From 6a4992bc611881b363583b4a20fc530c770aedd8 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 22 Oct 2009 15:31:56 -0700 Subject: [PATCH] Generate message ID (using SHA1) when a mail message contains none. This is important as we're using the message ID as the unique key in our database. So previously, all messages with no message ID would be treated as the same message---not good at all. --- Makefile | 13 +++++++- database.cc | 44 ++++++++++++++++++--------- notmuch-private.h | 14 +++++++++ notmuch.h | 9 ++++++ sha1.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 141 insertions(+), 16 deletions(-) create mode 100644 sha1.c diff --git a/Makefile b/Makefile index ff654e11..13f2b28d 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,17 @@ MYCXXFLAGS=$(MYCFLAGS) `xapian-config --cxxflags` MYLDFLAGS=`pkg-config --libs glib-2.0 talloc` `xapian-config --libs` +MODULES= \ + notmuch.o \ + database.o \ + date.o \ + message.o \ + message-file.o \ + query.o \ + sha1.o \ + libsha1.o \ + xutil.o + all: $(PROGS) %.o: %.cc @@ -13,7 +24,7 @@ all: $(PROGS) %.o: %.c $(CC) -c $(CFLAGS) $(MYCFLAGS) $< -o $@ -notmuch: notmuch.o database.o date.o message.o message-file.o query.o xutil.o +notmuch: $(MODULES) $(CC) $(MYLDFLAGS) $^ -o $@ Makefile.dep: *.c *.cc diff --git a/database.cc b/database.cc index 6d62109e..578dce4b 100644 --- a/database.cc +++ b/database.cc @@ -36,6 +36,8 @@ notmuch_status_to_string (notmuch_status_t status) return "No error occurred"; case NOTMUCH_STATUS_XAPIAN_EXCEPTION: return "A Xapian exception occurred"; + case NOTMUCH_STATUS_FILE_ERROR: + return "Something went wrong trying to read or write a file"; case NOTMUCH_STATUS_FILE_NOT_EMAIL: return "File is not an email"; case NOTMUCH_STATUS_NULL_POINTER: @@ -488,6 +490,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch, Xapian::WritableDatabase *db = notmuch->xapian_db; Xapian::Document doc; notmuch_message_file_t *message; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; GPtrArray *parents, *thread_ids; @@ -533,9 +536,18 @@ notmuch_database_add_message (notmuch_database_t *notmuch, if (message_id == NULL) message_id = xstrdup (header); } else { - /* XXX: Should generate a message_id here, (such as a SHA1 - * sum of the message itself) */ - message_id = NULL; + /* No message-id at all, let's generate one by taking a + * hash over the file's contents. */ + char *sha1 = notmuch_sha1_of_file (filename); + + /* If that failed too, something is really wrong. Give up. */ + if (sha1 == NULL) { + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; + } + + message_id = g_strdup_printf ("notmuch-sha1-%s", sha1); + free (sha1); } thread_ids = find_thread_ids (notmuch, parents, message_id); @@ -543,10 +555,11 @@ notmuch_database_add_message (notmuch_database_t *notmuch, for (i = 0; i < parents->len; i++) g_free (g_ptr_array_index (parents, i)); g_ptr_array_free (parents, TRUE); - if (message_id) { - add_term (doc, "msgid", message_id); - doc.add_value (NOTMUCH_VALUE_MESSAGE_ID, message_id); - } + + add_term (doc, "msgid", message_id); + doc.add_value (NOTMUCH_VALUE_MESSAGE_ID, message_id); + + free (message_id); if (thread_ids->len) { unsigned int i; @@ -565,7 +578,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch, } doc.add_value (NOTMUCH_VALUE_THREAD, thread_id->str); g_string_free (thread_id, TRUE); - } else if (message_id) { + } else { /* If not part of any existing thread, generate a new thread_id. */ thread_id_t thread_id; @@ -576,8 +589,6 @@ notmuch_database_add_message (notmuch_database_t *notmuch, g_ptr_array_free (thread_ids, TRUE); - free (message_id); - date = notmuch_message_file_get_header (message, "date"); time_value = notmuch_parse_date (date, NULL); @@ -592,18 +603,21 @@ notmuch_database_add_message (notmuch_database_t *notmuch, subject == NULL && to == NULL) { - notmuch_message_file_close (message); - return NOTMUCH_STATUS_FILE_NOT_EMAIL; + ret = NOTMUCH_STATUS_FILE_NOT_EMAIL; + goto DONE; } else { db->add_document (doc); } } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred: %s.\n", error.get_msg().c_str()); - return NOTMUCH_STATUS_XAPIAN_EXCEPTION; + ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION; + goto DONE; } - notmuch_message_file_close (message); + DONE: + if (message) + notmuch_message_file_close (message); - return NOTMUCH_STATUS_SUCCESS; + return ret; } diff --git a/notmuch-private.h b/notmuch-private.h index bb3f62c1..2d64a458 100644 --- a/notmuch-private.h +++ b/notmuch-private.h @@ -192,6 +192,20 @@ notmuch_message_file_get_header (notmuch_message_file_t *message, time_t notmuch_parse_date (const char *str, int *tz_offset); +/* sha1.c */ + +/* Create a hexadecimal string version of the SHA-1 digest of the + * named file. + * + * This function returns a newly allocated string which the caller + * should free() when finished. + * + * If any error occurs while reading the file, (permission denied, + * file not found, etc.), this function returns NULL. + */ +char * +notmuch_sha1_of_file (const char *filename); + NOTMUCH_END_DECLS #endif diff --git a/notmuch.h b/notmuch.h index 912cbd26..bc2caaad 100644 --- a/notmuch.h +++ b/notmuch.h @@ -55,6 +55,10 @@ typedef int notmuch_bool_t; * * NOTMUCH_STATUS_XAPIAN_EXCEPTION: A Xapian exception occurred * + * NOTMUCH_STATUS_FILE_ERROR: An error occurred trying to read or + * write to a file (this could be file not found, permission + * denied, etc.) + * * NOTMUCH_STATUS_FILE_NOT_EMAIL: A file was presented that doesn't * appear to be an email message. * @@ -69,6 +73,7 @@ typedef int notmuch_bool_t; typedef enum _notmuch_status { NOTMUCH_STATUS_SUCCESS = 0, NOTMUCH_STATUS_XAPIAN_EXCEPTION, + NOTMUCH_STATUS_FILE_ERROR, NOTMUCH_STATUS_FILE_NOT_EMAIL, NOTMUCH_STATUS_NULL_POINTER, NOTMUCH_STATUS_TAG_TOO_LONG, @@ -180,6 +185,10 @@ notmuch_database_get_path (notmuch_database_t *database); * * NOTMUCH_STATUS_SUCCESS: Message successfully added to database. * + * NOTMUCH_STATUS_FILE_ERROR: an error occurred trying to open the + * file, (such as permission denied, or file not found, + * etc.). Nothing added to the database. + * * NOTMUCH_STATUS_FILE_NOT_EMAIL: the contents of filename don't look * like an email message. Nothing added to the database. */ diff --git a/sha1.c b/sha1.c new file mode 100644 index 00000000..152d870c --- /dev/null +++ b/sha1.c @@ -0,0 +1,77 @@ +/* sha1.c - Interfaces to SHA-1 hash for the notmuch mail system + * + * Copyright © 2009 Carl Worth + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/ . + * + * Author: Carl Worth + */ + +#include "notmuch-private.h" + +#include "libsha1.h" + +/* Just some simple interfaces on top of libsha1 so that we can leave + * libsha1 as untouched as possible. */ + +char * +notmuch_sha1_of_file (const char *filename) +{ + FILE *file; +#define BLOCK_SIZE 4096 + unsigned char block[BLOCK_SIZE]; + size_t bytes_read; + sha1_ctx sha1; + unsigned char digest[SHA1_DIGEST_SIZE]; + char *result, *r; + int i; + + file = fopen (filename, "r"); + if (file == NULL) + return NULL; + + sha1_begin (&sha1); + + while (1) { + bytes_read = fread (block, 1, 4096, file); + if (bytes_read == 0) { + if (feof (file)) { + break; + } else if (ferror (file)) { + fclose (file); + return NULL; + } + } else { + sha1_hash (block, bytes_read, &sha1); + } + } + + sha1_end (digest, &sha1); + + result = calloc (SHA1_DIGEST_SIZE * 2 + 1, 1); + if (result == NULL) + return NULL; + + for (r = result, i = 0; + i < SHA1_DIGEST_SIZE; + r += 2, i++) + { + sprintf (r, "%02x", digest[i]); + } + + fclose (file); + + return result; +} +