Add support (and tests) for messages with really long message IDs.

Scott Henson reported an internal error that occurred when he tried to
add a message that referenced another message with a message ID well
over 300 characters in length. The bug here was running into a Xapian
limit for the length of metadata key names, (which is even more
restrictive than the Xapian limit for the length of terms).

We fix this by noticing long message ID values and instead using a
message ID of the form "notmuch-sha1-<sha1_sum_of_message_id>". That
is, we use SHA1 to generate a compressed, (but still unique), version
of the message ID.

We add support to the test suite to exercise this fix. The tests add a
message referencing the long message ID, then add the message with the
long message ID, then finally add another message referencing the long
ID. Each of these tests exercise different code paths where the
special handling is implemented.

A final test ensures that all three messages are stitched together
into a single thread---guaranteeing that the three code paths all act
consistently.
This commit is contained in:
Carl Worth 2010-06-04 12:39:36 -07:00
parent 77ab738343
commit 7b78eb4af6
3 changed files with 78 additions and 9 deletions

View file

@ -57,8 +57,12 @@ typedef struct {
* *
* type: mail * type: mail
* *
* id: Unique ID of mail, (from Message-ID header or generated * id: Unique ID of mail. This is from the Message-ID header
* as "notmuch-sha1-<sha1_sum_of_entire_file>. * if present and not too long (see NOTMUCH_MESSAGE_ID_MAX).
* If it's present and too long, then we use
* "notmuch-sha1-<sha1_sum_of_message_id>".
* If this header is not present, we use
* "notmuch-sha1-<sha1_sum_of_entire_file>".
* *
* thread: The ID of the thread to which the mail belongs * thread: The ID of the thread to which the mail belongs
* *
@ -145,9 +149,11 @@ typedef struct {
* *
* thread_id_* A pre-allocated thread ID for a particular * thread_id_* A pre-allocated thread ID for a particular
* message. This is actually an arbitarily large * message. This is actually an arbitarily large
* family of metadata name. Any particular name * family of metadata name. Any particular name is
* is formed by concatenating "thread_id_" with a * formed by concatenating "thread_id_" with a message
* message ID. The value stored is a thread ID. * ID (or the SHA1 sum of a message ID if it is very
* long---see description of 'id' in the mail
* document). The value stored is a thread ID.
* *
* These thread ID metadata values are stored * These thread ID metadata values are stored
* whenever a message references a parent message * whenever a message references a parent message
@ -334,6 +340,23 @@ find_document_for_doc_id (notmuch_database_t *notmuch, unsigned doc_id)
return notmuch->xapian_db->get_document (doc_id); return notmuch->xapian_db->get_document (doc_id);
} }
/* Generate a compressed version of 'message_id' of the form:
*
* notmuch-sha1-<sha1_sum_of_message_id>
*/
static char *
_message_id_compressed (void *ctx, const char *message_id)
{
char *sha1, *compressed;
sha1 = notmuch_sha1_of_string (message_id);
compressed = talloc_asprintf (ctx, "notmuch-sha1-%s", sha1);
free (sha1);
return compressed;
}
notmuch_message_t * notmuch_message_t *
notmuch_database_find_message (notmuch_database_t *notmuch, notmuch_database_find_message (notmuch_database_t *notmuch,
const char *message_id) const char *message_id)
@ -341,6 +364,9 @@ notmuch_database_find_message (notmuch_database_t *notmuch,
notmuch_private_status_t status; notmuch_private_status_t status;
unsigned int doc_id; unsigned int doc_id;
if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX)
message_id = _message_id_compressed (notmuch, message_id);
try { try {
status = _notmuch_database_find_unique_doc_id (notmuch, "id", status = _notmuch_database_find_unique_doc_id (notmuch, "id",
message_id, &doc_id); message_id, &doc_id);
@ -1217,7 +1243,11 @@ _notmuch_database_generate_thread_id (notmuch_database_t *notmuch)
static char * static char *
_get_metadata_thread_id_key (void *ctx, const char *message_id) _get_metadata_thread_id_key (void *ctx, const char *message_id)
{ {
return talloc_asprintf (ctx, "thread_id_%s", message_id); if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX)
message_id = _message_id_compressed (ctx, message_id);
return talloc_asprintf (ctx, NOTMUCH_METADATA_THREAD_ID_PREFIX "%s",
message_id);
} }
/* Find the thread ID to which the message with 'message_id' belongs. /* Find the thread ID to which the message with 'message_id' belongs.
@ -1570,10 +1600,12 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
if (message_id == NULL) if (message_id == NULL)
message_id = talloc_strdup (message_file, header); message_id = talloc_strdup (message_file, header);
/* Reject a Message ID that's too long. */ /* If a message ID is too long, substitute its sha1 instead. */
if (message_id && strlen (message_id) + 1 > NOTMUCH_TERM_MAX) { if (message_id && strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX) {
char *compressed = _message_id_compressed (message_file,
message_id);
talloc_free (message_id); talloc_free (message_id);
message_id = NULL; message_id = compressed;
} }
} }

View file

@ -108,6 +108,16 @@ typedef enum {
* programmatically. */ * programmatically. */
#define NOTMUCH_TERM_MAX 245 #define NOTMUCH_TERM_MAX 245
#define NOTMUCH_METADATA_THREAD_ID_PREFIX "thread_id_"
/* For message IDs we have to be even more restrictive. Beyond fitting
* into the term limit, we also use message IDs to construct
* metadata-key values. And the documentation says that these should
* be restricted to about 200 characters. (The actual limit for the
* chert backend at least is 252.)
*/
#define NOTMUCH_MESSAGE_ID_MAX (200 - sizeof (NOTMUCH_METADATA_THREAD_ID_PREFIX))
typedef enum _notmuch_private_status { typedef enum _notmuch_private_status {
/* First, copy all the public status values. */ /* First, copy all the public status values. */
NOTMUCH_PRIVATE_STATUS_SUCCESS = NOTMUCH_STATUS_SUCCESS, NOTMUCH_PRIVATE_STATUS_SUCCESS = NOTMUCH_STATUS_SUCCESS,

View file

@ -1120,6 +1120,33 @@ References: <${gen_msg_id}>
On Tue, 05 Jan 2010 15:43:56 -0800, Sender <sender@example.com> wrote: On Tue, 05 Jan 2010 15:43:56 -0800, Sender <sender@example.com> wrote:
> from guessing test" > from guessing test"
printf "\nTesting messages with ridiculously-long message IDs...\n"
printf " Referencing long ID before adding...\t\t"
generate_message '[subject]="Reference of ridiculously-long message ID"' \
'[references]=\<abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-\>'
output=$(NOTMUCH_NEW)
pass_if_equal "$output" "Added 1 new message to the database."
printf " Adding message with long ID...\t\t\t"
generate_message '[subject]="A ridiculously-long message ID"' \
'[id]=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-'
output=$(NOTMUCH_NEW)
pass_if_equal "$output" "Added 1 new message to the database."
printf " Referencing long ID after adding...\t\t"
generate_message '[subject]="Reply to ridiculously-long message ID"' \
'[in-reply-to]=\<abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-\>'
output=$(NOTMUCH_NEW)
pass_if_equal "$output" "Added 1 new message to the database."
printf " Ensure all messages were threaded together...\t"
output=$($NOTMUCH search 'subject:"a ridiculously-long message ID"' | notmuch_search_sanitize)
pass_if_equal "$output" "thread:XXX 2001-01-05 [1/3] Notmuch Test Suite; A ridiculously-long message ID (inbox unread)"
echo "" echo ""
echo "Notmuch test suite complete." echo "Notmuch test suite complete."