From 98845fdbb2a4acaa0036f8e2e998e726b18e6b13 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 4 Jun 2010 10:16:53 -0700 Subject: [PATCH] Avoid database corruption by not adding partially-constructed mail documents. Previously we were using Xapian's add_document to allocate document ID values for notmuch_message_t objects. This had the drawback of adding a partially constructed mail document to the database. If notmuch was subsequently interrupted before fully populating this document, then later runs would be quite confused when seeing the partial documents. There are reports from the wild of people hitting internal errors of the form "Message ... has no thread ID" for example, (which is currently an unrecoverable error). We fix this by manually allocating document IDs without adding documents. With this change, we never call Xapian's add_document method, but only replace_document with either the current document ID of a message or a new one that we have allocated. --- lib/database-private.h | 1 + lib/database.cc | 26 ++++++++++ lib/directory.cc | 3 +- lib/message.cc | 107 ++++++++++++++++++++++++++--------------- lib/notmuch-private.h | 3 ++ 5 files changed, 99 insertions(+), 41 deletions(-) diff --git a/lib/database-private.h b/lib/database-private.h index 41918d76..bd72f670 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -43,6 +43,7 @@ struct _notmuch_database { notmuch_database_mode_t mode; Xapian::Database *xapian_db; + unsigned int last_doc_id; uint64_t last_thread_id; Xapian::QueryParser *query_parser; diff --git a/lib/database.cc b/lib/database.cc index 6afc8d93..dd1fc637 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -622,6 +622,7 @@ notmuch_database_open (const char *path, } } + notmuch->last_doc_id = notmuch->xapian_db->get_lastdocid (); last_thread_id = notmuch->xapian_db->get_metadata ("last_thread_id"); if (last_thread_id.empty ()) { notmuch->last_thread_id = 0; @@ -1169,6 +1170,31 @@ notmuch_database_get_directory (notmuch_database_t *notmuch, } } +/* Allocate a document ID that satisfies the following criteria: + * + * 1. The ID does not exist for any document in the Xapian database + * + * 2. The ID was not previously returned from this function + * + * 3. The ID is the smallest integer satisfying (1) and (2) + * + * This function will trigger an internal error if these constraints + * cannot all be satisfied, (that is, the pool of available document + * IDs has been exhausted). + */ +unsigned int +_notmuch_database_generate_doc_id (notmuch_database_t *notmuch) +{ + assert (notmuch->last_doc_id >= notmuch->xapian_db->get_lastdocid ()); + + notmuch->last_doc_id++; + + if (notmuch->last_doc_id == 0) + INTERNAL_ERROR ("Xapian document IDs are exhausted.\n"); + + return notmuch->last_doc_id; +} + static const char * _notmuch_database_generate_thread_id (notmuch_database_t *notmuch) { diff --git a/lib/directory.cc b/lib/directory.cc index 5e75b73e..5d673e21 100644 --- a/lib/directory.cc +++ b/lib/directory.cc @@ -231,7 +231,8 @@ _notmuch_directory_create (notmuch_database_t *notmuch, directory->doc.add_value (NOTMUCH_VALUE_TIMESTAMP, Xapian::sortable_serialise (0)); - directory->document_id = db->add_document (directory->doc); + directory->document_id = _notmuch_database_generate_doc_id (notmuch); + db->replace_document (directory->document_id, directory->doc); talloc_free (local); } diff --git a/lib/message.cc b/lib/message.cc index 958ec734..71f5619f 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -57,35 +57,12 @@ _notmuch_message_destructor (notmuch_message_t *message) return 0; } -/* Create a new notmuch_message_t object for an existing document in - * the database. - * - * Here, 'talloc owner' is an optional talloc context to which the new - * message will belong. This allows for the caller to not bother - * calling notmuch_message_destroy on the message, and no that all - * memory will be reclaimed with 'talloc_owner' is free. The caller - * still can call notmuch_message_destroy when finished with the - * message if desired. - * - * The 'talloc_owner' argument can also be NULL, in which case the - * caller *is* responsible for calling notmuch_message_destroy. - * - * If no document exists in the database with document ID of 'doc_id' - * then this function returns NULL and optionally sets *status to - * NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND. - * - * This function can also fail to due lack of available memory, - * returning NULL and optionally setting *status to - * NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY. - * - * The caller can pass NULL for status if uninterested in - * distinguishing these two cases. - */ -notmuch_message_t * -_notmuch_message_create (const void *talloc_owner, - notmuch_database_t *notmuch, - unsigned int doc_id, - notmuch_private_status_t *status) +static notmuch_message_t * +_notmuch_message_create_for_document (const void *talloc_owner, + notmuch_database_t *notmuch, + unsigned int doc_id, + Xapian::Document doc, + notmuch_private_status_t *status) { notmuch_message_t *message; @@ -130,16 +107,53 @@ _notmuch_message_create (const void *talloc_owner, talloc_set_destructor (message, _notmuch_message_destructor); + message->doc = doc; + + return message; +} + +/* Create a new notmuch_message_t object for an existing document in + * the database. + * + * Here, 'talloc owner' is an optional talloc context to which the new + * message will belong. This allows for the caller to not bother + * calling notmuch_message_destroy on the message, and no that all + * memory will be reclaimed with 'talloc_owner' is free. The caller + * still can call notmuch_message_destroy when finished with the + * message if desired. + * + * The 'talloc_owner' argument can also be NULL, in which case the + * caller *is* responsible for calling notmuch_message_destroy. + * + * If no document exists in the database with document ID of 'doc_id' + * then this function returns NULL and optionally sets *status to + * NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND. + * + * This function can also fail to due lack of available memory, + * returning NULL and optionally setting *status to + * NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY. + * + * The caller can pass NULL for status if uninterested in + * distinguishing these two cases. + */ +notmuch_message_t * +_notmuch_message_create (const void *talloc_owner, + notmuch_database_t *notmuch, + unsigned int doc_id, + notmuch_private_status_t *status) +{ + Xapian::Document doc; + try { - message->doc = notmuch->xapian_db->get_document (doc_id); + doc = notmuch->xapian_db->get_document (doc_id); } catch (const Xapian::DocNotFoundError &error) { - talloc_free (message); if (status) *status = NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND; return NULL; } - return message; + return _notmuch_message_create_for_document (talloc_owner, notmuch, + doc_id, doc, status); } /* Create a new notmuch_message_t object for a specific message ID, @@ -148,11 +162,24 @@ _notmuch_message_create (const void *talloc_owner, * The 'notmuch' database will be the talloc owner of the returned * message. * - * If there is already a document with message ID 'message_id' in the - * database, then the returned message can be used to query/modify the - * document. Otherwise, a new document will be inserted into the - * database before this function returns, (and *status will be set - * to NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND). + * This function returns a valid notmuch_message_t whether or not + * there is already a document in the database with the given message + * ID. These two cases can be distinguished by the value of *status: + * + * + * NOTMUCH_PRIVATE_STATUS_SUCCESS: + * + * There is already a document with message ID 'message_id' in the + * database. The returned message can be used to query/modify the + * document. + * NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND: + * + * No document with 'message_id' exists in the database. The + * returned message contains a newly created document (not yet + * added to the database) and a document ID that is known not to + * exist in the database. The caller can modify the message, and a + * call to _notmuch_message_sync will add * the document to the + * database. * * If an error occurs, this function will return NULL and *status * will be set as appropriate. (The status pointer argument must @@ -192,7 +219,7 @@ _notmuch_message_create_for_message_id (notmuch_database_t *notmuch, doc.add_value (NOTMUCH_VALUE_MESSAGE_ID, message_id); - doc_id = db->add_document (doc); + doc_id = _notmuch_database_generate_doc_id (notmuch); } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred creating message: %s\n", error.get_msg().c_str()); @@ -201,8 +228,8 @@ _notmuch_message_create_for_message_id (notmuch_database_t *notmuch, return NULL; } - message = _notmuch_message_create (notmuch, notmuch, - doc_id, status_ret); + message = _notmuch_message_create_for_document (notmuch, notmuch, + doc_id, doc, status_ret); /* We want to inform the caller that we had to create a new * document. */ diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index 3768d6f8..f9774a48 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -167,6 +167,9 @@ _notmuch_database_split_path (void *ctx, const char * _notmuch_database_get_directory_db_path (const char *path); +unsigned int +_notmuch_database_generate_doc_id (notmuch_database_t *notmuch); + notmuch_private_status_t _notmuch_database_find_unique_doc_id (notmuch_database_t *notmuch, const char *prefix_name,