Avoid database corruption by not adding partially-constructed mail documents.

Previously we were using Xapian's add_document to allocate document ID
values for notmuch_message_t objects.  This had the drawback of adding
a partially constructed mail document to the database. If notmuch was
subsequently interrupted before fully populating this document, then
later runs would be quite confused when seeing the partial documents.

There are reports from the wild of people hitting internal errors of
the form "Message ... has no thread ID" for example, (which is
currently an unrecoverable error).

We fix this by manually allocating document IDs without adding
documents. With this change, we never call Xapian's add_document
method, but only replace_document with either the current document ID
of a message or a new one that we have allocated.
This commit is contained in:
Carl Worth 2010-06-04 10:16:53 -07:00
parent 361b9d4bd9
commit 98845fdbb2
5 changed files with 99 additions and 41 deletions

View file

@ -43,6 +43,7 @@ struct _notmuch_database {
notmuch_database_mode_t mode;
Xapian::Database *xapian_db;
unsigned int last_doc_id;
uint64_t last_thread_id;
Xapian::QueryParser *query_parser;

View file

@ -622,6 +622,7 @@ notmuch_database_open (const char *path,
}
}
notmuch->last_doc_id = notmuch->xapian_db->get_lastdocid ();
last_thread_id = notmuch->xapian_db->get_metadata ("last_thread_id");
if (last_thread_id.empty ()) {
notmuch->last_thread_id = 0;
@ -1169,6 +1170,31 @@ notmuch_database_get_directory (notmuch_database_t *notmuch,
}
}
/* Allocate a document ID that satisfies the following criteria:
*
* 1. The ID does not exist for any document in the Xapian database
*
* 2. The ID was not previously returned from this function
*
* 3. The ID is the smallest integer satisfying (1) and (2)
*
* This function will trigger an internal error if these constraints
* cannot all be satisfied, (that is, the pool of available document
* IDs has been exhausted).
*/
unsigned int
_notmuch_database_generate_doc_id (notmuch_database_t *notmuch)
{
assert (notmuch->last_doc_id >= notmuch->xapian_db->get_lastdocid ());
notmuch->last_doc_id++;
if (notmuch->last_doc_id == 0)
INTERNAL_ERROR ("Xapian document IDs are exhausted.\n");
return notmuch->last_doc_id;
}
static const char *
_notmuch_database_generate_thread_id (notmuch_database_t *notmuch)
{

View file

@ -231,7 +231,8 @@ _notmuch_directory_create (notmuch_database_t *notmuch,
directory->doc.add_value (NOTMUCH_VALUE_TIMESTAMP,
Xapian::sortable_serialise (0));
directory->document_id = db->add_document (directory->doc);
directory->document_id = _notmuch_database_generate_doc_id (notmuch);
db->replace_document (directory->document_id, directory->doc);
talloc_free (local);
}

View file

@ -57,35 +57,12 @@ _notmuch_message_destructor (notmuch_message_t *message)
return 0;
}
/* Create a new notmuch_message_t object for an existing document in
* the database.
*
* Here, 'talloc owner' is an optional talloc context to which the new
* message will belong. This allows for the caller to not bother
* calling notmuch_message_destroy on the message, and no that all
* memory will be reclaimed with 'talloc_owner' is free. The caller
* still can call notmuch_message_destroy when finished with the
* message if desired.
*
* The 'talloc_owner' argument can also be NULL, in which case the
* caller *is* responsible for calling notmuch_message_destroy.
*
* If no document exists in the database with document ID of 'doc_id'
* then this function returns NULL and optionally sets *status to
* NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND.
*
* This function can also fail to due lack of available memory,
* returning NULL and optionally setting *status to
* NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY.
*
* The caller can pass NULL for status if uninterested in
* distinguishing these two cases.
*/
notmuch_message_t *
_notmuch_message_create (const void *talloc_owner,
notmuch_database_t *notmuch,
unsigned int doc_id,
notmuch_private_status_t *status)
static notmuch_message_t *
_notmuch_message_create_for_document (const void *talloc_owner,
notmuch_database_t *notmuch,
unsigned int doc_id,
Xapian::Document doc,
notmuch_private_status_t *status)
{
notmuch_message_t *message;
@ -130,16 +107,53 @@ _notmuch_message_create (const void *talloc_owner,
talloc_set_destructor (message, _notmuch_message_destructor);
message->doc = doc;
return message;
}
/* Create a new notmuch_message_t object for an existing document in
* the database.
*
* Here, 'talloc owner' is an optional talloc context to which the new
* message will belong. This allows for the caller to not bother
* calling notmuch_message_destroy on the message, and no that all
* memory will be reclaimed with 'talloc_owner' is free. The caller
* still can call notmuch_message_destroy when finished with the
* message if desired.
*
* The 'talloc_owner' argument can also be NULL, in which case the
* caller *is* responsible for calling notmuch_message_destroy.
*
* If no document exists in the database with document ID of 'doc_id'
* then this function returns NULL and optionally sets *status to
* NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND.
*
* This function can also fail to due lack of available memory,
* returning NULL and optionally setting *status to
* NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY.
*
* The caller can pass NULL for status if uninterested in
* distinguishing these two cases.
*/
notmuch_message_t *
_notmuch_message_create (const void *talloc_owner,
notmuch_database_t *notmuch,
unsigned int doc_id,
notmuch_private_status_t *status)
{
Xapian::Document doc;
try {
message->doc = notmuch->xapian_db->get_document (doc_id);
doc = notmuch->xapian_db->get_document (doc_id);
} catch (const Xapian::DocNotFoundError &error) {
talloc_free (message);
if (status)
*status = NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND;
return NULL;
}
return message;
return _notmuch_message_create_for_document (talloc_owner, notmuch,
doc_id, doc, status);
}
/* Create a new notmuch_message_t object for a specific message ID,
@ -148,11 +162,24 @@ _notmuch_message_create (const void *talloc_owner,
* The 'notmuch' database will be the talloc owner of the returned
* message.
*
* If there is already a document with message ID 'message_id' in the
* database, then the returned message can be used to query/modify the
* document. Otherwise, a new document will be inserted into the
* database before this function returns, (and *status will be set
* to NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND).
* This function returns a valid notmuch_message_t whether or not
* there is already a document in the database with the given message
* ID. These two cases can be distinguished by the value of *status:
*
*
* NOTMUCH_PRIVATE_STATUS_SUCCESS:
*
* There is already a document with message ID 'message_id' in the
* database. The returned message can be used to query/modify the
* document.
* NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND:
*
* No document with 'message_id' exists in the database. The
* returned message contains a newly created document (not yet
* added to the database) and a document ID that is known not to
* exist in the database. The caller can modify the message, and a
* call to _notmuch_message_sync will add * the document to the
* database.
*
* If an error occurs, this function will return NULL and *status
* will be set as appropriate. (The status pointer argument must
@ -192,7 +219,7 @@ _notmuch_message_create_for_message_id (notmuch_database_t *notmuch,
doc.add_value (NOTMUCH_VALUE_MESSAGE_ID, message_id);
doc_id = db->add_document (doc);
doc_id = _notmuch_database_generate_doc_id (notmuch);
} catch (const Xapian::Error &error) {
fprintf (stderr, "A Xapian exception occurred creating message: %s\n",
error.get_msg().c_str());
@ -201,8 +228,8 @@ _notmuch_message_create_for_message_id (notmuch_database_t *notmuch,
return NULL;
}
message = _notmuch_message_create (notmuch, notmuch,
doc_id, status_ret);
message = _notmuch_message_create_for_document (notmuch, notmuch,
doc_id, doc, status_ret);
/* We want to inform the caller that we had to create a new
* document. */

View file

@ -167,6 +167,9 @@ _notmuch_database_split_path (void *ctx,
const char *
_notmuch_database_get_directory_db_path (const char *path);
unsigned int
_notmuch_database_generate_doc_id (notmuch_database_t *notmuch);
notmuch_private_status_t
_notmuch_database_find_unique_doc_id (notmuch_database_t *notmuch,
const char *prefix_name,