mirror of
https://git.notmuchmail.org/git/notmuch
synced 2025-01-03 07:11:41 +01:00
index: Don't bother indexing quoted portions of messages (and signatures).
Our old notmuch-index-message.cc code had this, but I originally left it out when adding indexing back in. I was concerned primarily with mistakenly detecting signature markers and omitting important text, (for example, I often do long lines of "----" as section separators). But now I see that there's a performance benefit to skippint the quotations, (about 120 files/sec. instead of 95 files/sec.). I mitigated the bogus signature checking by recognizing nothing other than the all-time classic "-- ".
This commit is contained in:
parent
cfa228a3d4
commit
56218ddbb4
1 changed files with 55 additions and 1 deletions
56
index.cc
56
index.cc
|
@ -135,6 +135,60 @@ skip_re_in_subject (const char *subject)
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Given a string representing the body of a message, generate terms
|
||||||
|
* for it, (skipping quoted portions and signatures).
|
||||||
|
*
|
||||||
|
* This function is evil in that it modifies the string passed to it,
|
||||||
|
* (changing some newlines into '\0').
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
_index_body_text (notmuch_message_t *message, char *body)
|
||||||
|
{
|
||||||
|
char *line, *line_end, *next_line;
|
||||||
|
|
||||||
|
if (body == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
next_line = body;
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
line = next_line;
|
||||||
|
if (*line == '\0')
|
||||||
|
break;
|
||||||
|
|
||||||
|
next_line = strchr (line, '\n');
|
||||||
|
if (next_line == NULL) {
|
||||||
|
next_line = line + strlen (line);
|
||||||
|
}
|
||||||
|
line_end = next_line - 1;
|
||||||
|
|
||||||
|
/* Get to the next non-blank line. */
|
||||||
|
while (*next_line == '\n')
|
||||||
|
next_line++;
|
||||||
|
|
||||||
|
/* Skip blank lines. */
|
||||||
|
if (line_end < line)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Skip lines that are quotes. */
|
||||||
|
if (*line == '>')
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Also skip lines introducing a quote on the next line. */
|
||||||
|
if (*line_end == ':' && *next_line == '>')
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Finally, bail as soon as we see a signature. */
|
||||||
|
/* XXX: Should only do this if "near" the end of the message. */
|
||||||
|
if (strncmp (line, "-- ", 3) == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
*(line_end + 1) = '\0';
|
||||||
|
|
||||||
|
_notmuch_message_gen_terms (message, NULL, line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Callback to generate terms for each mime part of a message. */
|
/* Callback to generate terms for each mime part of a message. */
|
||||||
static void
|
static void
|
||||||
_index_mime_part (notmuch_message_t *message,
|
_index_mime_part (notmuch_message_t *message,
|
||||||
|
@ -207,7 +261,7 @@ _index_mime_part (notmuch_message_t *message,
|
||||||
g_byte_array_append (byte_array, (guint8 *) "\0", 1);
|
g_byte_array_append (byte_array, (guint8 *) "\0", 1);
|
||||||
body = (char *) g_byte_array_free (byte_array, FALSE);
|
body = (char *) g_byte_array_free (byte_array, FALSE);
|
||||||
|
|
||||||
_notmuch_message_gen_terms (message, NULL, body);
|
_index_body_text (message, body);
|
||||||
|
|
||||||
free (body);
|
free (body);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue