mirror of
https://git.notmuchmail.org/git/notmuch
synced 2025-01-03 15:21:41 +01:00
n_m_remove_indexed_terms: reduce number of Xapian API calls.
Previously this functioned scanned every term attached to a given Xapian document. It turns out we know how to read only the terms we need to preserve (and we might have already done so). This commit replaces many calls to Xapian::Document::remove_term with one call to ::clear_terms, and a (typically much smaller) number of calls to ::add_term. Roughly speaking this is based on the assumption that most messages have more text than they have tags. According to the performance test suite, this yields a roughly 40% speedup on "notmuch reindex '*'"
This commit is contained in:
parent
3563079be3
commit
97939170b3
1 changed files with 38 additions and 28 deletions
|
@ -716,6 +716,8 @@ _notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix)
|
|||
|
||||
/* Remove all terms generated by indexing, i.e. not tags or
|
||||
* properties, along with any automatic tags*/
|
||||
/* According to Xapian API docs, none of these calls throw
|
||||
* exceptions */
|
||||
notmuch_private_status_t
|
||||
_notmuch_message_remove_indexed_terms (notmuch_message_t *message)
|
||||
{
|
||||
|
@ -727,45 +729,53 @@ _notmuch_message_remove_indexed_terms (notmuch_message_t *message)
|
|||
tag_prefix = _find_prefix ("tag"),
|
||||
type_prefix = _find_prefix ("type");
|
||||
|
||||
for (i = message->doc.termlist_begin ();
|
||||
i != message->doc.termlist_end (); i++) {
|
||||
/* Make sure we have the data to restore to Xapian*/
|
||||
_notmuch_message_ensure_metadata (message,NULL);
|
||||
|
||||
const std::string term = *i;
|
||||
|
||||
if (term.compare (0, type_prefix.size (), type_prefix) == 0)
|
||||
continue;
|
||||
|
||||
if (term.compare (0, id_prefix.size (), id_prefix) == 0)
|
||||
continue;
|
||||
|
||||
if (term.compare (0, property_prefix.size (), property_prefix) == 0)
|
||||
continue;
|
||||
|
||||
if (term.compare (0, tag_prefix.size (), tag_prefix) == 0 &&
|
||||
term.compare (1, strlen("encrypted"), "encrypted") != 0 &&
|
||||
term.compare (1, strlen("signed"), "signed") != 0 &&
|
||||
term.compare (1, strlen("attachment"), "attachment") != 0)
|
||||
continue;
|
||||
|
||||
try {
|
||||
message->doc.remove_term ((*i));
|
||||
/* Empirically, it turns out to be faster to remove all the terms,
|
||||
* and add back the ones we want. */
|
||||
message->doc.clear_terms ();
|
||||
message->modified = true;
|
||||
} catch (const Xapian::InvalidArgumentError) {
|
||||
/* Ignore failure to remove non-existent term. */
|
||||
} catch (const Xapian::Error &error) {
|
||||
notmuch_database_t *notmuch = message->notmuch;
|
||||
|
||||
if (!notmuch->exception_reported) {
|
||||
_notmuch_database_log(notmuch_message_get_database (message), "A Xapian exception occurred creating message: %s\n",
|
||||
error.get_msg().c_str());
|
||||
notmuch->exception_reported = true;
|
||||
}
|
||||
return NOTMUCH_PRIVATE_STATUS_XAPIAN_EXCEPTION;
|
||||
/* still a mail message */
|
||||
message->doc.add_term (type_prefix + "mail");
|
||||
|
||||
/* Put back message-id */
|
||||
message->doc.add_term (id_prefix + message->message_id);
|
||||
|
||||
/* Put back non-automatic tags */
|
||||
for (notmuch_tags_t *tags = notmuch_message_get_tags (message);
|
||||
notmuch_tags_valid (tags);
|
||||
notmuch_tags_move_to_next (tags)) {
|
||||
|
||||
const char *tag = notmuch_tags_get (tags);
|
||||
|
||||
if (STRNCMP_LITERAL (tag, "encrypted") != 0 &&
|
||||
STRNCMP_LITERAL (tag, "signed") != 0 &&
|
||||
STRNCMP_LITERAL (tag, "attachment") != 0) {
|
||||
std::string term = tag_prefix + tag;
|
||||
message->doc.add_term(term);
|
||||
}
|
||||
}
|
||||
|
||||
/* Put back properties */
|
||||
notmuch_message_properties_t *list;
|
||||
|
||||
for (list = notmuch_message_get_properties (message, "", false);
|
||||
notmuch_message_properties_valid (list); notmuch_message_properties_move_to_next (list)) {
|
||||
std::string term = property_prefix +
|
||||
notmuch_message_properties_key(list) + "=" +
|
||||
notmuch_message_properties_value(list);
|
||||
|
||||
message->doc.add_term(term);
|
||||
}
|
||||
|
||||
notmuch_message_properties_destroy (list);
|
||||
|
||||
return NOTMUCH_PRIVATE_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/* Return true if p points at "new" or "cur". */
|
||||
static bool is_maildir (const char *p)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue