lib: directly traverse postlists in _n_message_delete

This is intended to fix the slow behaviour of "notmuch new" (and possibly
"notmuch reindex") when large numbers of files are deleted.

The underlying issue [1] seems to be the Xapian glass backend spending
a large amount of time in db.has_positions when running queries with
large-ish amounts of unflushed changes.

This commit removes two uses of Xapian queries [2], and replaces them with
an approximation of what Xapian would do after optimizing the
queries. This avoids the calls to has_positions (which are in any case
un-needed because we are only using boolean terms here).

[1] Thanks to "andres" on IRC for narrowing down the performance
bottleneck.

[2] Thanks to Olly Betts of Xapian fame for talking me a through a fix
that does not require people to update Xapian.
This commit is contained in:
David Bremner 2021-04-16 09:00:39 -03:00
parent 8a64f725df
commit 9ad19e4454

View file

@ -1356,11 +1356,10 @@ notmuch_status_t
_notmuch_message_delete (notmuch_message_t *message)
{
notmuch_status_t status;
const char *mid, *tid, *query_string;
const char *mid, *tid;
notmuch_message_t *ghost;
notmuch_private_status_t private_status;
notmuch_database_t *notmuch;
notmuch_query_t *query;
unsigned int count = 0;
bool is_ghost;
@ -1382,16 +1381,33 @@ _notmuch_message_delete (notmuch_message_t *message)
if (is_ghost)
return NOTMUCH_STATUS_SUCCESS;
query_string = talloc_asprintf (message, "thread:%s", tid);
query = notmuch_query_create (notmuch, query_string);
if (query == NULL)
return NOTMUCH_STATUS_OUT_OF_MEMORY;
status = notmuch_query_count_messages (query, &count);
if (status) {
notmuch_query_destroy (query);
return status;
}
/* look for a non-ghost message in the same thread */
try {
Xapian::PostingIterator thread_doc, thread_doc_end;
Xapian::PostingIterator mail_doc, mail_doc_end;
_notmuch_database_find_doc_ids (message->notmuch, "thread", tid, &thread_doc,
&thread_doc_end);
_notmuch_database_find_doc_ids (message->notmuch, "type", "mail", &mail_doc, &mail_doc_end);
while (count == 0 &&
thread_doc != thread_doc_end &&
mail_doc != mail_doc_end) {
thread_doc.skip_to (*mail_doc);
if (thread_doc != thread_doc_end) {
if (*thread_doc == *mail_doc) {
count++;
} else {
mail_doc.skip_to (*thread_doc);
if (mail_doc != mail_doc_end && *thread_doc == *mail_doc)
count++;
}
}
}
} catch (Xapian::Error &error) {
LOG_XAPIAN_EXCEPTION (message, error);
return NOTMUCH_STATUS_XAPIAN_EXCEPTION;
}
if (count > 0) {
/* reintroduce a ghost in its place because there are still
* other active messages in this thread: */
@ -1410,27 +1426,21 @@ _notmuch_message_delete (notmuch_message_t *message)
notmuch_message_destroy (ghost);
status = COERCE_STATUS (private_status, "Error converting to ghost message");
} else {
/* the thread is empty; drop all ghost messages from it */
notmuch_messages_t *messages;
status = _notmuch_query_search_documents (query,
"ghost",
&messages);
if (status == NOTMUCH_STATUS_SUCCESS) {
notmuch_status_t last_error = NOTMUCH_STATUS_SUCCESS;
while (notmuch_messages_valid (messages)) {
message = notmuch_messages_get (messages);
status = _notmuch_message_delete (message);
if (status) /* we'll report the last failure we see;
* if there is more than one failure, we
* forget about previous ones */
last_error = status;
notmuch_message_destroy (message);
notmuch_messages_move_to_next (messages);
/* the thread now contains only ghosts: delete them */
try {
Xapian::PostingIterator doc, doc_end;
_notmuch_database_find_doc_ids (message->notmuch, "thread", tid, &doc, &doc_end);
for (; doc != doc_end; doc++) {
message->notmuch->writable_xapian_db->delete_document (*doc);
}
status = last_error;
} catch (Xapian::Error &error) {
LOG_XAPIAN_EXCEPTION (message, error);
return NOTMUCH_STATUS_XAPIAN_EXCEPTION;
}
}
notmuch_query_destroy (query);
return status;
}