From 3f406fdefca0400c1c2023674dfc5b36db55a1e7 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sun, 13 Dec 2009 15:17:35 -0800 Subject: [PATCH 01/63] configure: Look for both Xapian 1.1 and 1.0 and allow user override. The in-development version of Xapian provides a config program named xapian-config-1.1 while the released version provides a program named xapian-config instead. By default, we now try each of these in turn, and we also allow the user to set a XAPIAN_CONFIG environment variable to explicitly specify a particular program. --- configure | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/configure b/configure index fa8e142b..d240b6ac 100755 --- a/configure +++ b/configure @@ -6,6 +6,7 @@ CC=${CC:-gcc} CXX=${CXX:-g++} CFLAGS=${CFLAGS:--O2} CXXFLAGS=${CXXFLAGS:-\$(CFLAGS)} +XAPIAN_CONFIG=${XAPIAN_CONFIG:-xapian-config-1.1 xapian-config} # Set the defaults for values the user can specify with command-line # options. @@ -37,6 +38,13 @@ First, some common variables can specified via environment variables: Each of these values can further be controlled by specifying them later on the "make" command line. +Other environment variables can be used to control configure itself, +(and for which there is no equivalent build-time control): + + XAPIAN_CONFIG The program to use to determine flags for + compiling and linking against the Xapian + library. [$XAPIAN_CONFIG] + Additionally, various options can be specified on the configure command line. @@ -97,14 +105,18 @@ else fi printf "Checking for Xapian development files... " -if xapian-config --version > /dev/null 2>&1; then - printf "Yes.\n" - have_xapian=1 - xapian_cxxflags=$(xapian-config --cxxflags) - xapian_ldflags=$(xapian-config --libs) -else +have_xapian=0 +for xapian_config in ${XAPIAN_CONFIG}; do + if ${xapian_config} --version > /dev/null 2>&1; then + printf "Yes.\n" + have_xapian=1 + xapian_cxxflags=$(${xapian_config} --cxxflags) + xapian_ldflags=$(${xapian_config} --libs) + break + fi +done +if [ ${have_xapian} = "0" ]; then printf "No.\n" - have_xapian=0 errors=$((errors + 1)) fi From e1669b155c89fb7f140c64d36bf25c246d024ab8 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 14 Dec 2009 16:06:37 -0800 Subject: [PATCH 02/63] notmuch new: Restrict the "not much" pun to the first run. Several people complained that the humor wore thin very quickly. The most significant case of "not much mail" is when counting the user's initial mail collection. We've promised on the web page that no matter how much mail the user has, notmuch will consider it to be "not much" so let's say so. (This message was in place very early on, but was inadvertently dropped at some point.) --- notmuch-new.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index 9d206167..501f04c9 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -485,7 +485,7 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) if (interrupted) return 1; - printf ("Found %d total files. \n", count); + printf ("Found %d total files (that's not much mail).\n", count); notmuch = notmuch_database_create (db_path); add_files_state.ignore_read_only_directories = FALSE; add_files_state.total_files = count; @@ -525,12 +525,12 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) } } if (add_files_state.added_messages) { - printf ("Added %d new %s to the database (not much, really).\n", + printf ("Added %d new %s to the database.\n", add_files_state.added_messages, add_files_state.added_messages == 1 ? "message" : "messages"); } else { - printf ("No new mail---and that's not much.\n"); + printf ("No new mail.\n"); } if (elapsed > 1 && ! add_files_state.saw_read_only_directory) { From 8c6b7d311c123bdddfc806359f1fd6b444245914 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 19 Dec 2009 12:34:06 -0800 Subject: [PATCH 03/63] lib: Add missing value to notmuch_private_status_t enum. And fix the initialization such that the private enum will always have distinct values from the public enum even if we similarly miss the addition of a new public value in the future. --- lib/notmuch-private.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index 116f63d6..b9566340 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -117,9 +117,10 @@ typedef enum _notmuch_private_status { NOTMUCH_PRIVATE_STATUS_FILE_NOT_EMAIL = NOTMUCH_STATUS_FILE_NOT_EMAIL, NOTMUCH_PRIVATE_STATUS_NULL_POINTER = NOTMUCH_STATUS_NULL_POINTER, NOTMUCH_PRIVATE_STATUS_TAG_TOO_LONG = NOTMUCH_STATUS_TAG_TOO_LONG, + NOTMUCH_PRIVATE_STATUS_UNBALANCED_FREEZE_THAW = NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW, /* Then add our own private values. */ - NOTMUCH_PRIVATE_STATUS_TERM_TOO_LONG, + NOTMUCH_PRIVATE_STATUS_TERM_TOO_LONG = NOTMUCH_STATUS_LAST_STATUS, NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND, NOTMUCH_PRIVATE_STATUS_LAST_STATUS From 3a9c3ec9e719f0e5adefe0ceafffeb34c7a3917e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 14 Dec 2009 15:57:44 -0800 Subject: [PATCH 04/63] notmuch new: Remove hack to ignore read-only directories in mail store. This was really the last thing keeping the initial run of "notmuch new" being different from all other runs. And I'm taking a fresh look at the performance of "notmuch new" anyway, so I think we can safely drop this optimization. --- notmuch-client.h | 2 -- notmuch-new.c | 21 --------------------- notmuch.1 | 8 -------- notmuch.c | 5 ----- 4 files changed, 36 deletions(-) diff --git a/notmuch-client.h b/notmuch-client.h index 50a30fed..eca99906 100644 --- a/notmuch-client.h +++ b/notmuch-client.h @@ -73,8 +73,6 @@ typedef void (*add_files_callback_t) (notmuch_message_t *message); typedef struct { - int ignore_read_only_directories; - int saw_read_only_directory; int output_is_a_tty; int verbose; diff --git a/notmuch-new.c b/notmuch-new.c index 501f04c9..21c9b6ac 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -144,17 +144,6 @@ add_files_recursive (notmuch_database_t *notmuch, struct dirent **namelist = NULL; int num_entries; - /* If we're told to, we bail out on encountering a read-only - * directory, (with this being a clear clue from the user to - * Notmuch that new mail won't be arriving there and we need not - * look. */ - if (state->ignore_read_only_directories && - (st->st_mode & S_IWUSR) == 0) - { - state->saw_read_only_directory = TRUE; - goto DONE; - } - path_mtime = st->st_mtime; path_dbtime = notmuch_database_get_timestamp (notmuch, path); @@ -487,12 +476,10 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) printf ("Found %d total files (that's not much mail).\n", count); notmuch = notmuch_database_create (db_path); - add_files_state.ignore_read_only_directories = FALSE; add_files_state.total_files = count; } else { notmuch = notmuch_database_open (db_path, NOTMUCH_DATABASE_MODE_READ_WRITE); - add_files_state.ignore_read_only_directories = TRUE; add_files_state.total_files = 0; } @@ -502,7 +489,6 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) talloc_free (dot_notmuch_path); dot_notmuch_path = NULL; - add_files_state.saw_read_only_directory = FALSE; add_files_state.processed_files = 0; add_files_state.added_messages = 0; gettimeofday (&add_files_state.tv_start, NULL); @@ -533,13 +519,6 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) printf ("No new mail.\n"); } - if (elapsed > 1 && ! add_files_state.saw_read_only_directory) { - printf ("\nTip: If you have any sub-directories that are archives (that is,\n" - "they will never receive new mail), marking these directories as\n" - "read-only (chmod u-w /path/to/dir) will make \"notmuch new\"\n" - "much more efficient (it won't even look in those directories).\n"); - } - if (ret) { printf ("\nNote: At least one error was encountered: %s\n", notmuch_status_to_string (ret)); diff --git a/notmuch.1 b/notmuch.1 index 369ecba1..282ad989 100644 --- a/notmuch.1 +++ b/notmuch.1 @@ -109,14 +109,6 @@ whenever new mail is delivered and you wish to incorporate it into the database. These subsequent runs will be much quicker than the initial run. -Note: -.B notmuch new -runs (other than the first run) will skip any read-only directories, -so you can use that to mark directories that will not receive any new -mail (and make -.B notmuch new -even faster). - Invoking .B notmuch with no command argument will run diff --git a/notmuch.c b/notmuch.c index 2ac8a592..87479f81 100644 --- a/notmuch.c +++ b/notmuch.c @@ -145,11 +145,6 @@ command_t commands[] = { "\t\t\tVerbose operation. Shows paths of message files as\n" "\t\t\tthey are being indexed.\n" "\n" - "\t\tNote: \"notmuch new\" runs (other than the first run) will\n" - "\t\tskip any read-only directories, so you can use that to mark\n" - "\t\tdirectories that will not receive any new mail (and make\n" - "\t\t\"notmuch new\" even faster).\n" - "\n" "\t\tInvoking notmuch with no command argument will run new if\n" "\t\tthe setup command has previously been completed, but new has\n" "\t\tnot previously been run." }, From ba12bf1f2625a34f960502a06ba8907445ef5257 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 19 Dec 2009 12:32:11 -0800 Subject: [PATCH 05/63] lib: Abstract the extraction of a relative path from set_filename We'll soon be having multiple entry points that accept a filename path, so we want common code for getting a relative path from a potentially absolute path. --- lib/database.cc | 34 ++++++++++++++++++++++++++++++++++ lib/message.cc | 22 +++------------------- lib/notmuch-private.h | 4 ++++ 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index b6c4d07b..261be016 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -587,6 +587,40 @@ timestamp_db_key (const char *key) return strdup (key); } +/* Given a legal 'path' for the database, return the relative path. + * + * The return value will be a pointer to the originl path contents, + * and will be either the original string (if 'path' was relative) or + * a portion of the string (if path was absolute and begins with the + * database path). + */ +const char * +_notmuch_database_relative_path (notmuch_database_t *notmuch, + const char *path) +{ + const char *db_path, *relative; + unsigned int db_path_len; + + db_path = notmuch_database_get_path (notmuch); + db_path_len = strlen (db_path); + + relative = path; + + if (*relative == '/') { + while (*relative == '/' && *(relative+1) == '/') + relative++; + + if (strncmp (relative, db_path, db_path_len) == 0) + { + relative += db_path_len; + while (*relative == '/') + relative++; + } + } + + return relative; +} + notmuch_status_t notmuch_database_set_timestamp (notmuch_database_t *notmuch, const char *key, time_t timestamp) diff --git a/lib/message.cc b/lib/message.cc index 49519f1e..7c7ea7a1 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -396,9 +396,7 @@ void _notmuch_message_set_filename (notmuch_message_t *message, const char *filename) { - const char *s; - const char *db_path; - unsigned int db_path_len; + const char *relative; if (message->filename) { talloc_free (message->filename); @@ -408,22 +406,8 @@ _notmuch_message_set_filename (notmuch_message_t *message, if (filename == NULL) INTERNAL_ERROR ("Message filename cannot be NULL."); - s = filename; - - db_path = notmuch_database_get_path (message->notmuch); - db_path_len = strlen (db_path); - - if (*s == '/' && strlen (s) > db_path_len - && strncmp (s, db_path, db_path_len) == 0) - { - s += db_path_len; - while (*s == '/') s++; - - if (!*s) - INTERNAL_ERROR ("Message filename was same as db prefix."); - } - - message->doc.set_data (s); + relative = _notmuch_database_relative_path (message->notmuch, filename); + message->doc.set_data (relative); } const char * diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index b9566340..50f93eec 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -151,6 +151,10 @@ typedef enum _notmuch_private_status { const char * _find_prefix (const char *name); +const char * +_notmuch_database_relative_path (notmuch_database_t *notmuch, + const char *path); + /* thread.cc */ notmuch_thread_t * From 50ae83a17feb1fc2f48fb8e51ef73da08ae4e2f2 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 17 Dec 2009 14:33:34 -0800 Subject: [PATCH 06/63] lib: Rename set/get_timestamp to set/get_directory_mtime. I've been suitably scolded by Keith for doing a premature generalization that ended up just making the documentation more convoluted. Fix that. --- lib/database.cc | 80 ++++++++++++++++++++++++------------------------- lib/notmuch.h | 53 +++++++++++++++----------------- notmuch-new.c | 4 +-- 3 files changed, 66 insertions(+), 71 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 261be016..7efd3b4a 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -37,7 +37,7 @@ typedef struct { /* Here's the current schema for our database: * - * We currently have two different types of documents: mail and timestamps. + * We currently have two different types of documents: mail and directory. * * Mail document * ------------- @@ -75,21 +75,19 @@ typedef struct { * user in searching. But the database doesn't really care itself * about any of these. * - * Timestamp document + * Directory document * ------------------ - * A timestamp document is used by a client of the notmuch library to + * A directory document is used by a client of the notmuch library to * maintain data necessary to allow for efficient polling of mail - * directories. The notmuch library does no interpretation of - * timestamps, but merely allows the user to store and retrieve - * timestamps as name/value pairs. + * directories. * - * The timestamp document is indexed with a single prefixed term: + * The directory document is indexed with a single prefixed term: * - * timestamp: The user's key value (likely a directory name) + * directory: The directory path (an absolute path) * * and has a single value: * - * TIMESTAMP: The time_t value from the user. + * TIMESTAMP: The mtime of the directory (at last scan) */ /* With these prefix values we follow the conventions published here: @@ -111,7 +109,8 @@ prefix_t BOOLEAN_PREFIX_INTERNAL[] = { { "type", "T" }, { "reference", "XREFERENCE" }, { "replyto", "XREPLYTO" }, - { "timestamp", "XTIMESTAMP" }, + /* XXX: Need a flag day to rename XTIMESTAMP. */ + { "directory", "XTIMESTAMP" }, }; prefix_t BOOLEAN_PREFIX_EXTERNAL[] = { @@ -561,30 +560,29 @@ notmuch_database_get_path (notmuch_database_t *notmuch) } static notmuch_private_status_t -find_timestamp_document (notmuch_database_t *notmuch, const char *db_key, +find_directory_document (notmuch_database_t *notmuch, const char *db_path, Xapian::Document *doc, unsigned int *doc_id) { - return find_unique_document (notmuch, "timestamp", db_key, doc, doc_id); + return find_unique_document (notmuch, "directory", db_path, doc, doc_id); } -/* We allow the user to use arbitrarily long keys for timestamps, - * (they're for filesystem paths after all, which have no limit we - * know about). But we have a term-length limit. So if we exceed that, - * we'll use the SHA-1 of the user's key as the actual key for - * constructing a database term. +/* We allow the user to use arbitrarily long paths for directories. But + * we have a term-length limit. So if we exceed that, we'll use the + * SHA-1 of the path for the database term. * - * Caution: This function returns a newly allocated string which the - * caller should free() when finished. + * Note: This function may return the original value of 'path'. If it + * does not, then the caller is responsible to free() the returned + * value. */ -static char * -timestamp_db_key (const char *key) +static const char * +directory_db_path (const char *path) { - int term_len = strlen (_find_prefix ("timestamp")) + strlen (key); + int term_len = strlen (_find_prefix ("directory")) + strlen (path); if (term_len > NOTMUCH_TERM_MAX) - return notmuch_sha1_of_string (key); + return notmuch_sha1_of_string (path); else - return strdup (key); + return path; } /* Given a legal 'path' for the database, return the relative path. @@ -622,15 +620,16 @@ _notmuch_database_relative_path (notmuch_database_t *notmuch, } notmuch_status_t -notmuch_database_set_timestamp (notmuch_database_t *notmuch, - const char *key, time_t timestamp) +notmuch_database_set_directory_mtime (notmuch_database_t *notmuch, + const char *path, + time_t mtime) { Xapian::Document doc; Xapian::WritableDatabase *db; unsigned int doc_id; notmuch_private_status_t status; notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; - char *db_key = NULL; + const char *db_path = NULL; if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) { fprintf (stderr, "Attempted to update a read-only database.\n"); @@ -638,17 +637,17 @@ notmuch_database_set_timestamp (notmuch_database_t *notmuch, } db = static_cast (notmuch->xapian_db); - db_key = timestamp_db_key (key); + db_path = directory_db_path (path); try { - status = find_timestamp_document (notmuch, db_key, &doc, &doc_id); + status = find_directory_document (notmuch, db_path, &doc, &doc_id); doc.add_value (NOTMUCH_VALUE_TIMESTAMP, - Xapian::sortable_serialise (timestamp)); + Xapian::sortable_serialise (mtime)); if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { char *term = talloc_asprintf (NULL, "%s%s", - _find_prefix ("timestamp"), db_key); + _find_prefix ("directory"), db_path); doc.add_term (term); talloc_free (term); @@ -658,31 +657,32 @@ notmuch_database_set_timestamp (notmuch_database_t *notmuch, } } catch (const Xapian::Error &error) { - fprintf (stderr, "A Xapian exception occurred setting timestamp: %s.\n", + fprintf (stderr, "A Xapian exception occurred setting directory mtime: %s.\n", error.get_msg().c_str()); notmuch->exception_reported = TRUE; ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION; } - if (db_key) - free (db_key); + if (db_path != path) + free ((char *) db_path); return ret; } time_t -notmuch_database_get_timestamp (notmuch_database_t *notmuch, const char *key) +notmuch_database_get_directory_mtime (notmuch_database_t *notmuch, + const char *path) { Xapian::Document doc; unsigned int doc_id; notmuch_private_status_t status; - char *db_key = NULL; + const char *db_path = NULL; time_t ret = 0; - db_key = timestamp_db_key (key); + db_path = directory_db_path (path); try { - status = find_timestamp_document (notmuch, db_key, &doc, &doc_id); + status = find_directory_document (notmuch, db_path, &doc, &doc_id); if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) goto DONE; @@ -694,8 +694,8 @@ notmuch_database_get_timestamp (notmuch_database_t *notmuch, const char *key) } DONE: - if (db_key) - free (db_key); + if (db_path != path) + free ((char *) db_path); return ret; } diff --git a/lib/notmuch.h b/lib/notmuch.h index 60834fb5..a98241de 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -178,56 +178,51 @@ notmuch_database_close (notmuch_database_t *database); const char * notmuch_database_get_path (notmuch_database_t *database); -/* Store a timestamp within the database. +/* Store an mtime within the database for 'path'. * - * The Notmuch database will not interpret this key nor the timestamp - * values at all. It will merely store them together and return the - * timestamp when notmuch_database_get_timestamp is called with the - * same value for 'key'. - * - * The intention is for the caller to use the timestamp to allow - * efficient identification of new messages to be added to the - * database. The recommended usage is as follows: + * The intention is for the caller to use the mtime to allow efficient + * identification of new messages to be added to the database. The + * recommended usage is as follows: * * o Read the mtime of a directory from the filesystem * * o Call add_message for all mail files in the directory * - * o Call notmuch_database_set_timestamp with the path of the - * directory as 'key' and the originally read mtime as 'value'. + * o Call notmuch_database_set_directory_mtime * * Then, when wanting to check for updates to the directory in the - * future, the client can call notmuch_database_get_timestamp and know - * that it only needs to add files if the mtime of the directory and - * files are newer than the stored timestamp. + * future, the client can call notmuch_database_get_directory_mtime + * and know that it only needs to add files if the mtime of the + * directory and files are newer than the stored timestamp. * - * Note: The notmuch_database_get_timestamp function does not allow - * the caller to distinguish a timestamp of 0 from a non-existent - * timestamp. So don't store a timestamp of 0 unless you are - * comfortable with that. + * Note: The notmuch_database_get_directory_mtime function does not + * allow the caller to distinguish a timestamp of 0 from a + * non-existent timestamp. So don't store a timestamp of 0 unless you + * are comfortable with that. * * Return value: * - * NOTMUCH_STATUS_SUCCESS: Timestamp successfully stored in database. + * NOTMUCH_STATUS_SUCCESS: mtime successfully stored in database. * * NOTMUCH_STATUS_XAPIAN_EXCEPTION: A Xapian exception - * occurred. Timestamp not stored. + * occurred, mtime not stored. */ notmuch_status_t -notmuch_database_set_timestamp (notmuch_database_t *database, - const char *key, time_t timestamp); +notmuch_database_set_directory_mtime (notmuch_database_t *database, + const char *path, + time_t mtime); -/* Retrieve a timestamp from the database. +/* Retrieve the mtime from the database for 'path'. * - * Returns the timestamp value previously stored by calling - * notmuch_database_set_timestamp with the same value for 'key'. + * Returns the mtime value previously stored by calling + * notmuch_database_set_directory_mtime with the same 'path'. * - * Returns 0 if no timestamp is stored for 'key' or if any error - * occurred querying the database. + * Returns 0 if no mtime is stored for 'path' or if any error occurred + * querying the database. */ time_t -notmuch_database_get_timestamp (notmuch_database_t *database, - const char *key); +notmuch_database_get_directory_mtime (notmuch_database_t *database, + const char *path); /* Add a new message to the given notmuch database. * diff --git a/notmuch-new.c b/notmuch-new.c index 21c9b6ac..2a929c56 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -146,7 +146,7 @@ add_files_recursive (notmuch_database_t *notmuch, path_mtime = st->st_mtime; - path_dbtime = notmuch_database_get_timestamp (notmuch, path); + path_dbtime = notmuch_database_get_directory_mtime (notmuch, path); num_entries = scandir (path, &namelist, 0, ino_cmp); if (num_entries == -1) { @@ -277,7 +277,7 @@ add_files_recursive (notmuch_database_t *notmuch, next = NULL; } - status = notmuch_database_set_timestamp (notmuch, path, path_mtime); + status = notmuch_database_set_directory_mtime (notmuch, path, path_mtime); if (status && ret == NOTMUCH_STATUS_SUCCESS) ret = status; From 9257622da8ed50502e59adec69e5daa043ef242a Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 19 Dec 2009 13:05:06 -0800 Subject: [PATCH 07/63] lib: Document that the filename is stored in the 'data' of a mail document Our database schema documentation previously didn't give any indication of where this most essential piece of information is stored. --- lib/database.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/database.cc b/lib/database.cc index 7efd3b4a..f7f9943f 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -75,6 +75,9 @@ typedef struct { * user in searching. But the database doesn't really care itself * about any of these. * + * Finally, the data portion of a mail document contains the path name + * of the mail message (relative to the database path). + * * Directory document * ------------------ * A directory document is used by a client of the notmuch library to From 154bf7ac677c41168c5c6a982fee3f22350adfef Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 19 Dec 2009 13:11:00 -0800 Subject: [PATCH 08/63] database: Store directory paths as relative, not absolute. We were already storing relative mail filenames, so this is consistent with that. Additionally, it means that directory documents remain valid even if the database is relocated within its containing filesystem. --- lib/database.cc | 4 +++- lib/notmuch.h | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/database.cc b/lib/database.cc index f7f9943f..0ef59ea9 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -86,7 +86,7 @@ typedef struct { * * The directory document is indexed with a single prefixed term: * - * directory: The directory path (an absolute path) + * directory: The directory path (relative to the database path) * * and has a single value: * @@ -639,6 +639,8 @@ notmuch_database_set_directory_mtime (notmuch_database_t *notmuch, return NOTMUCH_STATUS_READONLY_DATABASE; } + path = _notmuch_database_relative_path (notmuch, path); + db = static_cast (notmuch->xapian_db); db_path = directory_db_path (path); diff --git a/lib/notmuch.h b/lib/notmuch.h index a98241de..786b8e9f 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -179,6 +179,11 @@ const char * notmuch_database_get_path (notmuch_database_t *database); /* Store an mtime within the database for 'path'. + * + * Here,'path' should be the path of a directory relative to the path + * of 'database' (see notmuch_database_get_path), or else should be an + * absolute path with initial components that match the path of + * 'database'. * * The intention is for the caller to use the mtime to allow efficient * identification of new messages to be added to the database. The From 851c97aed727d284872992dcf6b64bc2069c1d0e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 19 Dec 2009 13:18:18 -0800 Subject: [PATCH 09/63] database: Rename internal directory value from XTIMESTAMP to XDIRECTORY. The recent change from storing absolute paths to relative paths means that new directory documents will already be created, (and the old ones will just linger stale in the database). Given that, we might as well put a clean name on the term in the new documents, (and no real flag day is needed). --- lib/database.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 0ef59ea9..5c49f74c 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -112,8 +112,7 @@ prefix_t BOOLEAN_PREFIX_INTERNAL[] = { { "type", "T" }, { "reference", "XREFERENCE" }, { "replyto", "XREPLYTO" }, - /* XXX: Need a flag day to rename XTIMESTAMP. */ - { "directory", "XTIMESTAMP" }, + { "directory", "XDIRECTORY" }, }; prefix_t BOOLEAN_PREFIX_EXTERNAL[] = { From e890b0cf4011fd9fd77ebd87343379e4a778888b Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 19 Dec 2009 13:20:26 -0800 Subject: [PATCH 10/63] database: Store the parent ID for each directory document. Storing the document ID of the parent of each directory document will allow us to find all child-directory documents for a given directory document. We will need this in order to detect directories that have been removed from the mail store, (though we aren't yet doing this). --- lib/database.cc | 86 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/lib/database.cc b/lib/database.cc index 5c49f74c..acd06de8 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -84,9 +84,11 @@ typedef struct { * maintain data necessary to allow for efficient polling of mail * directories. * - * The directory document is indexed with a single prefixed term: + * The directory document contains the following terms: * * directory: The directory path (relative to the database path) + * parent: The document ID of the parent directory document. + * Top-level directories will have a parent value of 0. * * and has a single value: * @@ -113,6 +115,7 @@ prefix_t BOOLEAN_PREFIX_INTERNAL[] = { { "reference", "XREFERENCE" }, { "replyto", "XREPLYTO" }, { "directory", "XDIRECTORY" }, + { "parent", "XPARENT" }, }; prefix_t BOOLEAN_PREFIX_EXTERNAL[] = { @@ -587,6 +590,76 @@ directory_db_path (const char *path) return path; } +notmuch_status_t +_find_parent_id (notmuch_database_t *notmuch, + const char *path, + Xapian::docid *parent_id) +{ + const char *slash, *parent_db_path; + char *parent_path; + notmuch_private_status_t private_status; + notmuch_status_t status = NOTMUCH_STATUS_SUCCESS; + + if (path == NULL || *path == '\0') { + *parent_id = 0; + return NOTMUCH_STATUS_SUCCESS; + } + + /* Find the last slash (not counting a trailing slash), if any. */ + + slash = path + strlen (path) - 1; + + /* First, skip trailing slashes. */ + while (slash != path) { + if (*slash != '/') + break; + + --slash; + } + + /* Then, find a slash. */ + while (slash != path) { + if (*slash == '/') + break; + + --slash; + } + + /* Finally, skip multiple slashes. */ + while (slash != path) { + if (*slash != '/') + break; + + --slash; + } + + if (slash == path) + parent_path = talloc_strdup (notmuch, ""); + else + parent_path = talloc_strndup (notmuch, path, slash - path + 1); + + parent_db_path = directory_db_path (parent_path); + + private_status = find_unique_doc_id (notmuch, "directory", + parent_db_path, parent_id); + if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { + status = notmuch_database_set_directory_mtime (notmuch, + parent_path, 0); + if (status) + return status; + private_status = find_unique_doc_id (notmuch, "directory", + parent_db_path, parent_id); + status = COERCE_STATUS (private_status, "_find_parent_id"); + } + + if (parent_db_path != parent_path) + free ((char *) parent_db_path); + + talloc_free (parent_path); + + return status; +} + /* Given a legal 'path' for the database, return the relative path. * * The return value will be a pointer to the originl path contents, @@ -632,6 +705,7 @@ notmuch_database_set_directory_mtime (notmuch_database_t *notmuch, notmuch_private_status_t status; notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; const char *db_path = NULL; + Xapian::docid parent_id; if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) { fprintf (stderr, "Attempted to update a read-only database.\n"); @@ -655,6 +729,16 @@ notmuch_database_set_directory_mtime (notmuch_database_t *notmuch, doc.add_term (term); talloc_free (term); + status = _find_parent_id (notmuch, path, &parent_id); + if (status) + return status; + + term = talloc_asprintf (NULL, "%s%u", + _find_prefix ("parent"), + parent_id); + doc.add_term (term); + talloc_free (term); + db->add_document (doc); } else { db->replace_document (doc_id, doc); From 406ec4b15d65f1104c7ff3ee654a5e9cd5b39f29 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 19 Dec 2009 15:11:55 -0800 Subject: [PATCH 11/63] database: Export _notmuch_database_find_parent_id for internal use. We'll soon have mail documents referring to their parent directory's directory documents, so we'll need access to _find_parent_id in files such as message.cc. --- lib/database.cc | 40 +++++++++++++++++++++++++++++++++------- lib/notmuch-private.h | 5 +++++ 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index acd06de8..bf56f520 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -590,10 +590,32 @@ directory_db_path (const char *path) return path; } +/* Given a 'path' (relative to the database path) return the document + * ID of the directory document corresponding to the parent directory + * of 'path' in 'parent_id'. + * + * The original 'path' can represent either a regular file or a + * directory, (in either case, the document ID of the parent will be + * returned). Trailing slashes on 'path' will be ignored, and any + * cases of multiple '/' characters appearing in series will be + * treated as a single '/'. + * + * If no directory document exists in the database for the parent, (or + * for any of its parents up to the top-level database path), then + * directory documents will be created for these (each with an mtime + * of 0). + * + * Return value: + * + * NOTMUCH_STATUS_SUCCESS: Valid value available in parent_id. + * + * NOTMUCH_STATUS_XAPIAN_EXCEPTION: A Xapian exception + * occurred and parent_id will be set to (unsigned) -1. + */ notmuch_status_t -_find_parent_id (notmuch_database_t *notmuch, - const char *path, - Xapian::docid *parent_id) +_notmuch_database_find_parent_id (notmuch_database_t *notmuch, + const char *path, + unsigned int *parent_id) { const char *slash, *parent_db_path; char *parent_path; @@ -657,6 +679,9 @@ _find_parent_id (notmuch_database_t *notmuch, talloc_free (parent_path); + if (status) + *parent_id = -1; + return status; } @@ -705,7 +730,7 @@ notmuch_database_set_directory_mtime (notmuch_database_t *notmuch, notmuch_private_status_t status; notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; const char *db_path = NULL; - Xapian::docid parent_id; + unsigned int parent_id; if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) { fprintf (stderr, "Attempted to update a read-only database.\n"); @@ -729,9 +754,10 @@ notmuch_database_set_directory_mtime (notmuch_database_t *notmuch, doc.add_term (term); talloc_free (term); - status = _find_parent_id (notmuch, path, &parent_id); - if (status) - return status; + ret = _notmuch_database_find_parent_id (notmuch, path, + &parent_id); + if (ret) + return ret; term = talloc_asprintf (NULL, "%s%u", _find_prefix ("parent"), diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index 50f93eec..56929ffa 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -155,6 +155,11 @@ const char * _notmuch_database_relative_path (notmuch_database_t *notmuch, const char *path); +notmuch_status_t +_notmuch_database_find_parent_id (notmuch_database_t *notmuch, + const char *path, + unsigned int *parent_id); + /* thread.cc */ notmuch_thread_t * From 4c1cca888fc89cd5c072a2df609d6d6d47acdfaf Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sun, 20 Dec 2009 15:46:41 -0800 Subject: [PATCH 12/63] database: Store directory path in 'data' of directory documents. We're planning to have mail documents refer to directory documents for the path of the containing directory. To support this, we need the path in the data, (since the path in the 'directory' term can be irretrievable as it will be the SHA1 sum of the path for a very long path). --- lib/database.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/database.cc b/lib/database.cc index bf56f520..dbfec630 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -87,12 +87,19 @@ typedef struct { * The directory document contains the following terms: * * directory: The directory path (relative to the database path) + * Or the SHA1 sum of the directory path (if the + * path itself is too long to fit in a Xapian + * term). + * * parent: The document ID of the parent directory document. * Top-level directories will have a parent value of 0. * * and has a single value: * * TIMESTAMP: The mtime of the directory (at last scan) + * + * The data portion of a directory document contains the path of the + * directory (relative to the datbase path). */ /* With these prefix values we follow the conventions published here: @@ -754,6 +761,8 @@ notmuch_database_set_directory_mtime (notmuch_database_t *notmuch, doc.add_term (term); talloc_free (term); + doc.set_data (path); + ret = _notmuch_database_find_parent_id (notmuch, path, &parent_id); if (ret) From 84742d86ab2fd3e5b5b601f073351454b993575e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 21 Dec 2009 08:14:52 -0800 Subject: [PATCH 13/63] database: Split _find_parent_id into _split_path and _find_directory_id Some pending commits want the _split_path functionality separate from mapping a directory to a document ID. The split_path function now returns the basename as well as the directory name. --- lib/database.cc | 132 +++++++++++++++++++++++++----------------- lib/notmuch-private.h | 12 +++- 2 files changed, 88 insertions(+), 56 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index dbfec630..f122c2e4 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -597,40 +597,40 @@ directory_db_path (const char *path) return path; } -/* Given a 'path' (relative to the database path) return the document - * ID of the directory document corresponding to the parent directory - * of 'path' in 'parent_id'. +/* Given a path, split it into two parts: the directory part is all + * components except for the last, and the basename is that last + * component. Getting the return-value for either part is optional + * (the caller can pass NULL). * * The original 'path' can represent either a regular file or a - * directory, (in either case, the document ID of the parent will be - * returned). Trailing slashes on 'path' will be ignored, and any + * directory---the splitting will be carried out in the same way in + * either case. Trailing slashes on 'path' will be ignored, and any * cases of multiple '/' characters appearing in series will be * treated as a single '/'. * - * If no directory document exists in the database for the parent, (or - * for any of its parents up to the top-level database path), then - * directory documents will be created for these (each with an mtime - * of 0). + * Allocation (if any) will have 'ctx' as the talloc owner. But + * pointers will be returned within the original path string whenever + * possible. * - * Return value: - * - * NOTMUCH_STATUS_SUCCESS: Valid value available in parent_id. - * - * NOTMUCH_STATUS_XAPIAN_EXCEPTION: A Xapian exception - * occurred and parent_id will be set to (unsigned) -1. + * Note: If 'path' is non-empty and contains no non-trailing slash, + * (that is, consists of a filename with no parent directory), then + * the directory returned will be an empty string. However, if 'path' + * is an empty string, then both directory and basename will be + * returned as NULL. */ notmuch_status_t -_notmuch_database_find_parent_id (notmuch_database_t *notmuch, - const char *path, - unsigned int *parent_id) +_notmuch_database_split_path (void *ctx, + const char *path, + const char **directory, + const char **basename) { - const char *slash, *parent_db_path; - char *parent_path; - notmuch_private_status_t private_status; - notmuch_status_t status = NOTMUCH_STATUS_SUCCESS; + const char *slash; if (path == NULL || *path == '\0') { - *parent_id = 0; + if (directory) + *directory = NULL; + if (basename) + *basename = NULL; return NOTMUCH_STATUS_SUCCESS; } @@ -651,6 +651,9 @@ _notmuch_database_find_parent_id (notmuch_database_t *notmuch, if (*slash == '/') break; + if (basename) + *basename = slash; + --slash; } @@ -662,32 +665,54 @@ _notmuch_database_find_parent_id (notmuch_database_t *notmuch, --slash; } - if (slash == path) - parent_path = talloc_strdup (notmuch, ""); - else - parent_path = talloc_strndup (notmuch, path, slash - path + 1); - - parent_db_path = directory_db_path (parent_path); - - private_status = find_unique_doc_id (notmuch, "directory", - parent_db_path, parent_id); - if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { - status = notmuch_database_set_directory_mtime (notmuch, - parent_path, 0); - if (status) - return status; - private_status = find_unique_doc_id (notmuch, "directory", - parent_db_path, parent_id); - status = COERCE_STATUS (private_status, "_find_parent_id"); + if (slash == path) { + if (directory) + *directory = talloc_strdup (ctx, ""); + if (basename) + *basename = path; + } else { + if (directory) + *directory = talloc_strndup (ctx, path, slash - path + 1); } - if (parent_db_path != parent_path) - free ((char *) parent_db_path); + return NOTMUCH_STATUS_SUCCESS; +} - talloc_free (parent_path); +notmuch_status_t +_notmuch_database_find_directory_id (notmuch_database_t *notmuch, + const char *path, + unsigned int *directory_id) +{ + notmuch_private_status_t private_status; + notmuch_status_t status = NOTMUCH_STATUS_SUCCESS; + const char *db_path; + + if (path == NULL) { + *directory_id = 0; + return NOTMUCH_STATUS_SUCCESS; + } + + db_path = directory_db_path (path); + + private_status = find_unique_doc_id (notmuch, "directory", + db_path, directory_id); + if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { + status = notmuch_database_set_directory_mtime (notmuch, + path, 0); + if (status) + goto DONE; + + private_status = find_unique_doc_id (notmuch, "directory", + db_path, directory_id); + status = COERCE_STATUS (private_status, "_find_directory_id"); + } + + DONE: + if (db_path != path) + free ((char *) db_path); if (status) - *parent_id = -1; + *directory_id = -1; return status; } @@ -736,8 +761,9 @@ notmuch_database_set_directory_mtime (notmuch_database_t *notmuch, unsigned int doc_id; notmuch_private_status_t status; notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; - const char *db_path = NULL; + const char *parent, *db_path = NULL; unsigned int parent_id; + void *local = talloc_new (notmuch); if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) { fprintf (stderr, "Attempted to update a read-only database.\n"); @@ -756,23 +782,20 @@ notmuch_database_set_directory_mtime (notmuch_database_t *notmuch, Xapian::sortable_serialise (mtime)); if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { - char *term = talloc_asprintf (NULL, "%s%s", + char *term = talloc_asprintf (local, "%s%s", _find_prefix ("directory"), db_path); doc.add_term (term); - talloc_free (term); doc.set_data (path); - ret = _notmuch_database_find_parent_id (notmuch, path, - &parent_id); - if (ret) - return ret; + _notmuch_database_split_path (local, path, &parent, NULL); - term = talloc_asprintf (NULL, "%s%u", + _notmuch_database_find_directory_id (notmuch, parent, &parent_id); + + term = talloc_asprintf (local, "%s%u", _find_prefix ("parent"), parent_id); doc.add_term (term); - talloc_free (term); db->add_document (doc); } else { @@ -801,6 +824,7 @@ notmuch_database_get_directory_mtime (notmuch_database_t *notmuch, notmuch_private_status_t status; const char *db_path = NULL; time_t ret = 0; + void *local = talloc_new (notmuch); db_path = directory_db_path (path); @@ -820,6 +844,8 @@ notmuch_database_get_directory_mtime (notmuch_database_t *notmuch, if (db_path != path) free ((char *) db_path); + talloc_free (local); + return ret; } diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index 56929ffa..9f470c9f 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -156,9 +156,15 @@ _notmuch_database_relative_path (notmuch_database_t *notmuch, const char *path); notmuch_status_t -_notmuch_database_find_parent_id (notmuch_database_t *notmuch, - const char *path, - unsigned int *parent_id); +_notmuch_database_split_path (void *ctx, + const char *path, + const char **directory, + const char **basename); + +notmuch_status_t +_notmuch_database_find_directory_id (notmuch_database_t *database, + const char *path, + unsigned int *directory_id); /* thread.cc */ From 6ca6c089e9df7affe6bee0392197509a24ab2546 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 21 Dec 2009 08:23:26 -0800 Subject: [PATCH 14/63] database: Store mail filename as a new 'direntry' term, not as 'data'. Instead of storing the complete message filename in the data portion of a mail document we now store a 'direntry' term that contains the document ID of a directory document and also the basename of the message filename within that directory. This will allow us to easily store multple filenames for a single message, and will also allow us to find mail documents for files that previously existed in a directory but that have since been deleted. --- lib/database.cc | 25 +++++++++++--- lib/index.cc | 2 +- lib/message.cc | 77 +++++++++++++++++++++++++++++++++++++------ lib/notmuch-private.h | 7 +++- 4 files changed, 95 insertions(+), 16 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index f122c2e4..7d09119c 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -63,6 +63,11 @@ typedef struct { * * tag: Any tags associated with this message by the user. * + * direntry: A colon-separated pair of values (INTEGER:STRING), + * where INTEGER is the document ID of a directory + * document, and STRING is the name of a file within + * that directory for this mail message. + * * A mail document also has two values: * * TIMESTAMP: The time_t value corresponding to the message's @@ -75,8 +80,7 @@ typedef struct { * user in searching. But the database doesn't really care itself * about any of these. * - * Finally, the data portion of a mail document contains the path name - * of the mail message (relative to the database path). + * The data portion of a mail document is empty. * * Directory document * ------------------ @@ -122,6 +126,7 @@ prefix_t BOOLEAN_PREFIX_INTERNAL[] = { { "reference", "XREFERENCE" }, { "replyto", "XREPLYTO" }, { "directory", "XDIRECTORY" }, + { "direntry", "XDIRENTRY" }, { "parent", "XPARENT" }, }; @@ -717,6 +722,18 @@ _notmuch_database_find_directory_id (notmuch_database_t *notmuch, return status; } +const char * +_notmuch_database_get_directory_path (void *ctx, + notmuch_database_t *notmuch, + unsigned int doc_id) +{ + Xapian::Document document; + + document = find_document_for_doc_id (notmuch, doc_id); + + return talloc_strdup (ctx, document.get_data ().c_str ()); +} + /* Given a legal 'path' for the database, return the relative path. * * The return value will be a pointer to the originl path contents, @@ -812,6 +829,8 @@ notmuch_database_set_directory_mtime (notmuch_database_t *notmuch, if (db_path != path) free ((char *) db_path); + talloc_free (local); + return ret; } @@ -844,8 +863,6 @@ notmuch_database_get_directory_mtime (notmuch_database_t *notmuch, if (db_path != path) free ((char *) db_path); - talloc_free (local); - return ret; } diff --git a/lib/index.cc b/lib/index.cc index 125fa6c9..e58bc870 100644 --- a/lib/index.cc +++ b/lib/index.cc @@ -31,7 +31,7 @@ _index_address_mailbox (notmuch_message_t *message, { InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address); const char *name, *addr; - void *local = talloc_new (NULL); + void *local = talloc_new (message); name = internet_address_get_name (address); addr = internet_address_mailbox_get_addr (mailbox); diff --git a/lib/message.cc b/lib/message.cc index 7c7ea7a1..3f533423 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -392,11 +392,15 @@ notmuch_message_get_replies (notmuch_message_t *message) * * This change will not be reflected in the database until the next * call to _notmuch_message_set_sync. */ -void +notmuch_status_t _notmuch_message_set_filename (notmuch_message_t *message, const char *filename) { - const char *relative; + const char *relative, *directory, *basename; + char *term; + Xapian::docid directory_id; + notmuch_status_t status; + void *local = talloc_new (message); if (message->filename) { talloc_free (message->filename); @@ -407,26 +411,79 @@ _notmuch_message_set_filename (notmuch_message_t *message, INTERNAL_ERROR ("Message filename cannot be NULL."); relative = _notmuch_database_relative_path (message->notmuch, filename); - message->doc.set_data (relative); + + status = _notmuch_database_split_path (local, relative, + &directory, &basename); + if (status) + return status; + + status = _notmuch_database_find_directory_id (message->notmuch, directory, + &directory_id); + if (status) + return status; + + term = talloc_asprintf (local, "%s%u:%s", + _find_prefix ("direntry"), directory_id, basename); + + message->doc.add_term (term); + + talloc_free (local); + + return NOTMUCH_STATUS_SUCCESS; } const char * notmuch_message_get_filename (notmuch_message_t *message) { - std::string filename_str; - const char *db_path; + const char *prefix = _find_prefix ("direntry"); + int prefix_len = strlen (prefix); + Xapian::TermIterator i; + char *direntry, *colon; + const char *db_path, *directory, *basename; + unsigned int directory_id; + void *local = talloc_new (message); if (message->filename) return message->filename; - filename_str = message->doc.get_data (); + i = message->doc.termlist_begin (); + i.skip_to (prefix); + + if (i != message->doc.termlist_end ()) + direntry = talloc_strdup (local, (*i).c_str ()); + + if (i == message->doc.termlist_end () || + strncmp (direntry, prefix, prefix_len)) + { + INTERNAL_ERROR ("message with no filename"); + } + + direntry += prefix_len; + + directory_id = strtol (direntry, &colon, 10); + + if (colon == NULL || *colon != ':') + INTERNAL_ERROR ("malformed direntry"); + + basename = colon + 1; + + *colon = '\0'; + db_path = notmuch_database_get_path (message->notmuch); - if (filename_str[0] != '/') - message->filename = talloc_asprintf (message, "%s/%s", db_path, - filename_str.c_str ()); + directory = _notmuch_database_get_directory_path (local, + message->notmuch, + directory_id); + + if (strlen (directory)) + message->filename = talloc_asprintf (message, "%s/%s/%s", + db_path, directory, basename); else - message->filename = talloc_strdup (message, filename_str.c_str ()); + message->filename = talloc_asprintf (message, "%s/%s", + db_path, basename); + talloc_free ((void *) directory); + + talloc_free (local); return message->filename; } diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index 9f470c9f..5a930bb3 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -166,6 +166,11 @@ _notmuch_database_find_directory_id (notmuch_database_t *database, const char *path, unsigned int *directory_id); +const char * +_notmuch_database_get_directory_path (void *ctx, + notmuch_database_t *notmuch, + unsigned int doc_id); + /* thread.cc */ notmuch_thread_t * @@ -205,7 +210,7 @@ _notmuch_message_gen_terms (notmuch_message_t *message, const char *prefix_name, const char *text); -void +notmuch_status_t _notmuch_message_set_filename (notmuch_message_t *message, const char *filename); From 1376a90db622b71e0997fca52c50ccf34faeed22 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 21 Dec 2009 12:08:46 -0800 Subject: [PATCH 15/63] database: Allowing storing multiple filenames for a single message ID. The library interface is unchanged so far, (still just notmuch_database_add_message), but internally, the old _set_filename function is now _add_filename instead. --- lib/database.cc | 24 ++++++++++++------------ lib/message.cc | 7 ++----- lib/notmuch-private.h | 2 +- lib/notmuch.h | 10 ++++++++-- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 7d09119c..553c9f82 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -843,7 +843,6 @@ notmuch_database_get_directory_mtime (notmuch_database_t *notmuch, notmuch_private_status_t status; const char *db_path = NULL; time_t ret = 0; - void *local = talloc_new (notmuch); db_path = directory_db_path (path); @@ -1188,24 +1187,25 @@ notmuch_database_add_message (notmuch_database_t *notmuch, goto DONE; } + _notmuch_message_add_filename (message, filename); + /* Is this a newly created message object? */ if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { - _notmuch_message_set_filename (message, filename); _notmuch_message_add_term (message, "type", "mail"); + + ret = _notmuch_database_link_message (notmuch, message, + message_file); + if (ret) + goto DONE; + + date = notmuch_message_file_get_header (message_file, "date"); + _notmuch_message_set_date (message, date); + + _notmuch_message_index_file (message, filename); } else { ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID; - goto DONE; } - ret = _notmuch_database_link_message (notmuch, message, message_file); - if (ret) - goto DONE; - - date = notmuch_message_file_get_header (message_file, "date"); - _notmuch_message_set_date (message, date); - - _notmuch_message_index_file (message, filename); - _notmuch_message_sync (message); } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred adding message: %s.\n", diff --git a/lib/message.cc b/lib/message.cc index 3f533423..7d586903 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -385,15 +385,12 @@ notmuch_message_get_replies (notmuch_message_t *message) return _notmuch_messages_create (message->replies); } -/* Set the filename for 'message' to 'filename'. - * - * XXX: We should still figure out if we think it's important to store - * multiple filenames for email messages with identical message IDs. +/* Add an additional 'filename' for 'message'. * * This change will not be reflected in the database until the next * call to _notmuch_message_set_sync. */ notmuch_status_t -_notmuch_message_set_filename (notmuch_message_t *message, +_notmuch_message_add_filename (notmuch_message_t *message, const char *filename) { const char *relative, *directory, *basename; diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index 5a930bb3..e9712832 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -211,7 +211,7 @@ _notmuch_message_gen_terms (notmuch_message_t *message, const char *text); notmuch_status_t -_notmuch_message_set_filename (notmuch_message_t *message, +_notmuch_message_add_filename (notmuch_message_t *message, const char *filename); void diff --git a/lib/notmuch.h b/lib/notmuch.h index 786b8e9f..e96474f6 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -698,14 +698,20 @@ notmuch_message_get_thread_id (notmuch_message_t *message); notmuch_messages_t * notmuch_message_get_replies (notmuch_message_t *message); -/* Get the filename for the email corresponding to 'message'. +/* Get a filename for the email corresponding to 'message'. * * The returned filename is an absolute filename, (the initial * component will match notmuch_database_get_path() ). * * The returned string belongs to the message so should not be * modified or freed by the caller (nor should it be referenced after - * the message is destroyed). */ + * the message is destroyed). + * + * Note: If this message corresponds to multiple files in the mail + * store, (that is, multiple files contain identical message IDs), + * this function will arbitrarily return a single one of those + * filenames. + */ const char * notmuch_message_get_filename (notmuch_message_t *message); From 498edff50373785c9dcc889d0fb6bc9bfc13dfcb Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 21 Dec 2009 15:09:56 -0800 Subject: [PATCH 16/63] database: Abstract _filename_to_direntry from _add_message The code to map a filename to a direntry is something that we're going to want in a future _remove_message function, so put it in a new function _notmuch_database_filename_to_direntry . --- lib/database.cc | 32 ++++++++++++++++++++++++++++++++ lib/message.cc | 21 +++++---------------- lib/notmuch-private.h | 6 ++++++ 3 files changed, 43 insertions(+), 16 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 553c9f82..3ed19772 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -734,6 +734,38 @@ _notmuch_database_get_directory_path (void *ctx, return talloc_strdup (ctx, document.get_data ().c_str ()); } +/* Given a legal 'filename' for the database, (either relative to + * database path or absolute with initial components identical to + * database path), return a new string (with 'ctx' as the talloc + * owner) suitable for use as a direntry term value. + */ +notmuch_status_t +_notmuch_database_filename_to_direntry (void *ctx, + notmuch_database_t *notmuch, + const char *filename, + char **direntry) +{ + const char *relative, *directory, *basename; + Xapian::docid directory_id; + notmuch_status_t status; + + relative = _notmuch_database_relative_path (notmuch, filename); + + status = _notmuch_database_split_path (ctx, relative, + &directory, &basename); + if (status) + return status; + + status = _notmuch_database_find_directory_id (notmuch, directory, + &directory_id); + if (status) + return status; + + *direntry = talloc_asprintf (ctx, "%u:%s", directory_id, basename); + + return NOTMUCH_STATUS_SUCCESS; +} + /* Given a legal 'path' for the database, return the relative path. * * The return value will be a pointer to the originl path contents, diff --git a/lib/message.cc b/lib/message.cc index 7d586903..bd179519 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -393,11 +393,9 @@ notmuch_status_t _notmuch_message_add_filename (notmuch_message_t *message, const char *filename) { - const char *relative, *directory, *basename; - char *term; - Xapian::docid directory_id; notmuch_status_t status; void *local = talloc_new (message); + char *direntry; if (message->filename) { talloc_free (message->filename); @@ -407,22 +405,13 @@ _notmuch_message_add_filename (notmuch_message_t *message, if (filename == NULL) INTERNAL_ERROR ("Message filename cannot be NULL."); - relative = _notmuch_database_relative_path (message->notmuch, filename); - - status = _notmuch_database_split_path (local, relative, - &directory, &basename); + status = _notmuch_database_filename_to_direntry (local, + message->notmuch, + filename, &direntry); if (status) return status; - status = _notmuch_database_find_directory_id (message->notmuch, directory, - &directory_id); - if (status) - return status; - - term = talloc_asprintf (local, "%s%u:%s", - _find_prefix ("direntry"), directory_id, basename); - - message->doc.add_term (term); + _notmuch_message_add_term (message, "direntry", direntry); talloc_free (local); diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index e9712832..cb93c397 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -171,6 +171,12 @@ _notmuch_database_get_directory_path (void *ctx, notmuch_database_t *notmuch, unsigned int doc_id); +notmuch_status_t +_notmuch_database_filename_to_direntry (void *ctx, + notmuch_database_t *notmuch, + const char *filename, + char **direntry); + /* thread.cc */ notmuch_thread_t * From d7e5f5827e21be7dd8993e5a877bdb73cdb64325 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 21 Dec 2009 15:11:32 -0800 Subject: [PATCH 17/63] database: Make find_unique_doc_id enforce uniqueness (for a debug build) Catching any violation of this unique-ness constraint is very much in line with similar, existing INTERNAL_ERROR cases. --- lib/database.cc | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 3ed19772..5d300732 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -246,10 +246,19 @@ find_unique_doc_id (notmuch_database_t *notmuch, if (i == end) { *doc_id = 0; return NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND; - } else { - *doc_id = *i; - return NOTMUCH_PRIVATE_STATUS_SUCCESS; } + + *doc_id = *i; + +#if DEBUG_DATABASE_SANITY + i++; + + if (i != end) + INTERNAL_ERROR ("Term %s:%s is not unique as expected.\n", + prefix_name, value); +#endif + + return NOTMUCH_PRIVATE_STATUS_SUCCESS; } static Xapian::Document From 44a74912c78526c1942a101a8172206a32885425 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 21 Dec 2009 15:12:52 -0800 Subject: [PATCH 18/63] database: Add new find_doc_ids_for_term interface. The existing find_doc_ids function is convenient when the caller doesn't want to be bothered constructing a term. But when the caller *does* have the term already, that interface is just wasteful. So we export a lower-level interface that maps a pre-constructed term to a document-ID iterators. --- lib/database.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 5d300732..cd3346fd 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -213,6 +213,17 @@ notmuch_status_to_string (notmuch_status_t status) } } +static void +find_doc_ids_for_term (notmuch_database_t *notmuch, + const char *term, + Xapian::PostingIterator *begin, + Xapian::PostingIterator *end) +{ + *begin = notmuch->xapian_db->postlist_begin (term); + + *end = notmuch->xapian_db->postlist_end (term); +} + static void find_doc_ids (notmuch_database_t *notmuch, const char *prefix_name, @@ -220,15 +231,12 @@ find_doc_ids (notmuch_database_t *notmuch, Xapian::PostingIterator *begin, Xapian::PostingIterator *end) { - Xapian::PostingIterator i; char *term; term = talloc_asprintf (notmuch, "%s%s", _find_prefix (prefix_name), value); - *begin = notmuch->xapian_db->postlist_begin (term); - - *end = notmuch->xapian_db->postlist_end (term); + find_doc_ids_for_term (notmuch, term, begin, end); talloc_free (term); } From f11aaa3678ab9d02d4ea98db1164772ed637404b Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 21 Dec 2009 15:14:32 -0800 Subject: [PATCH 19/63] database: Add new, public notmuch_database_remove_message This will allow applications to support the removal of messages, (such as when a file is deleted from the mail store). No removal support is provided yet in commands such as "notmuch new". --- lib/database.cc | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/notmuch.h | 13 ++++++++++++ 2 files changed, 66 insertions(+) diff --git a/lib/database.cc b/lib/database.cc index cd3346fd..a3824956 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -1278,6 +1278,59 @@ notmuch_database_add_message (notmuch_database_t *notmuch, return ret; } +notmuch_status_t +notmuch_database_remove_message (notmuch_database_t *notmuch, + const char *filename) +{ + Xapian::WritableDatabase *db; + void *local = talloc_new (notmuch); + const char *direntry_prefix = _find_prefix ("direntry"); + char *direntry, *term; + Xapian::PostingIterator i, end; + Xapian::Document document; + notmuch_status_t status; + + if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) { + fprintf (stderr, "Attempted to update a read-only database.\n"); + return NOTMUCH_STATUS_READONLY_DATABASE; + } + + db = static_cast (notmuch->xapian_db); + + status = _notmuch_database_filename_to_direntry (local, notmuch, + filename, &direntry); + if (status) + return status; + + term = talloc_asprintf (notmuch, "%s%s", direntry_prefix, direntry); + + find_doc_ids_for_term (notmuch, term, &i, &end); + + for ( ; i != end; i++) { + Xapian::TermIterator j; + + document = find_document_for_doc_id (notmuch, *i); + + document.remove_term (term); + + j = document.termlist_begin (); + j.skip_to (direntry_prefix); + + /* Was this the last direntry in the message? */ + if (j == document.termlist_end () || + strncmp ((*j).c_str (), direntry_prefix, strlen (direntry_prefix))) + { + db->delete_document (document.get_docid ()); + } else { + db->replace_document (document.get_docid (), document); + } + } + + talloc_free (local); + + return NOTMUCH_STATUS_SUCCESS; +} + notmuch_tags_t * _notmuch_convert_tags (void *ctx, Xapian::TermIterator &i, Xapian::TermIterator &end) diff --git a/lib/notmuch.h b/lib/notmuch.h index e96474f6..2e598656 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -267,6 +267,19 @@ notmuch_database_add_message (notmuch_database_t *database, const char *filename, notmuch_message_t **message); +/* Remove a message from the given notmuch database. + * + * Note that the only this particular filename association is removed + * from the database. If the same message (as determined by the + * message ID) is still available via other filenames, then the + * message will persist in the database for those filenames. When the + * last filename is removed for a particular message, the database + * content for that message will be entirely removed. + */ +notmuch_status_t +notmuch_database_remove_message (notmuch_database_t *database, + const char *filename); + /* Find a message with the given message_id. * * If the database contains a message with the given message_id, then From 95deec1b2767695bce5f4e3dc3336859825682f7 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Wed, 9 Dec 2009 00:51:52 -0800 Subject: [PATCH 20/63] Prototypes for directory tracking There's no functionality here yet---just a sketch of what the interface could look like. --- lib/notmuch.h | 87 ++++++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 42 deletions(-) diff --git a/lib/notmuch.h b/lib/notmuch.h index 2e598656..bfcd4702 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -114,6 +114,8 @@ typedef struct _notmuch_thread notmuch_thread_t; typedef struct _notmuch_messages notmuch_messages_t; typedef struct _notmuch_message notmuch_message_t; typedef struct _notmuch_tags notmuch_tags_t; +typedef struct _notmuch_directory notmuch_directory_t; +typedef struct _notmuch_files notmuch_files_t; /* Create a new, empty notmuch database located at 'path'. * @@ -178,56 +180,47 @@ notmuch_database_close (notmuch_database_t *database); const char * notmuch_database_get_path (notmuch_database_t *database); -/* Store an mtime within the database for 'path'. +/* Read the stored contents of a directory from the database. * - * Here,'path' should be the path of a directory relative to the path - * of 'database' (see notmuch_database_get_path), or else should be an - * absolute path with initial components that match the path of + * Here, 'dirname' should be a path relative to the path of + * 'database' (see notmuch_database_get_path), or else should be an + * absolute filename with initial components that match the path of * 'database'. * - * The intention is for the caller to use the mtime to allow efficient - * identification of new messages to be added to the database. The - * recommended usage is as follows: - * - * o Read the mtime of a directory from the filesystem - * - * o Call add_message for all mail files in the directory - * - * o Call notmuch_database_set_directory_mtime - * - * Then, when wanting to check for updates to the directory in the - * future, the client can call notmuch_database_get_directory_mtime - * and know that it only needs to add files if the mtime of the - * directory and files are newer than the stored timestamp. - * - * Note: The notmuch_database_get_directory_mtime function does not - * allow the caller to distinguish a timestamp of 0 from a - * non-existent timestamp. So don't store a timestamp of 0 unless you - * are comfortable with that. - * - * Return value: - * - * NOTMUCH_STATUS_SUCCESS: mtime successfully stored in database. - * - * NOTMUCH_STATUS_XAPIAN_EXCEPTION: A Xapian exception - * occurred, mtime not stored. + * The stored mtime of the directory along with a list of messages + * and directories in the database contained in 'dirname' are + * returned in 'directory'. The entries are sorted by filename. */ notmuch_status_t -notmuch_database_set_directory_mtime (notmuch_database_t *database, - const char *path, - time_t mtime); +notmuch_database_read_directory (notmuch_database_t *database, + const char *filename, + notmuch_directory_t **directory); -/* Retrieve the mtime from the database for 'path'. - * - * Returns the mtime value previously stored by calling - * notmuch_database_set_directory_mtime with the same 'path'. - * - * Returns 0 if no mtime is stored for 'path' or if any error occurred - * querying the database. +/* Return the recorded 'mtime' for the given directory */ time_t -notmuch_database_get_directory_mtime (notmuch_database_t *database, - const char *path); +notmuch_directory_get_mtime (notmuch_directory_t *directory); + +/* Return a notmuch_files_t iterator for all regular files in 'directory'. + */ +notmuch_files_t * +notmuch_directory_get_files (notmuch_directory_t *directory); + +/* Return a notmuch_files_t iterator for all sub-directories of + * 'directory'. + */ +notmuch_files_t * +notmuch_directory_get_subdirs (notmuch_directory_t *directory); + +/* Does the given notmuch_files_t object contain any more results. + */ +notmuch_bool_t +notmuch_files_has_more (notmuch_files_t *files); + +/* Get the current filename from 'files' as a string. + */ +const char * +notmuch_files_get_filename (notmuch_files_t *files); /* Add a new message to the given notmuch database. * @@ -728,6 +721,16 @@ notmuch_message_get_replies (notmuch_message_t *message); const char * notmuch_message_get_filename (notmuch_message_t *message); +/* Remove a filename for the email corresponding to 'message'. + * + * This removes the association between a filename and a message, + * when the last filename is gone, the entire message is removed + * from the database. + */ +notmuch_status_t +notmuch_message_remove_filename (notmuch_message_t *message, + const char *filename); + /* Message flags */ typedef enum _notmuch_message_flag { NOTMUCH_MESSAGE_FLAG_MATCH, From ba07fe1819b59c9ecf7041834699d8959a604828 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 13:06:24 -0800 Subject: [PATCH 21/63] Revamp the proposed directory-tracking API slightly. This commit contains my changes to the API proposed by Keith. Nothing is dramatically different. There are minor things like changing notmuch_files_t to notmuch_filenames_t and then various things needed for completeness as noticed while implementing this, (such as notmuch_directory_destroy and notmuch_directory_set_mtime). --- lib/notmuch.h | 157 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 108 insertions(+), 49 deletions(-) diff --git a/lib/notmuch.h b/lib/notmuch.h index bfcd4702..13efd5db 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -115,7 +115,7 @@ typedef struct _notmuch_messages notmuch_messages_t; typedef struct _notmuch_message notmuch_message_t; typedef struct _notmuch_tags notmuch_tags_t; typedef struct _notmuch_directory notmuch_directory_t; -typedef struct _notmuch_files notmuch_files_t; +typedef struct _notmuch_filenames notmuch_filenames_t; /* Create a new, empty notmuch database located at 'path'. * @@ -180,47 +180,19 @@ notmuch_database_close (notmuch_database_t *database); const char * notmuch_database_get_path (notmuch_database_t *database); -/* Read the stored contents of a directory from the database. +/* Retrieve a directory object from the database for 'path'. * - * Here, 'dirname' should be a path relative to the path of - * 'database' (see notmuch_database_get_path), or else should be an - * absolute filename with initial components that match the path of - * 'database'. + * Here, 'path' should be a path relative to the path of 'database' + * (see notmuch_database_get_path), or else should be an absolute path + * with initial components that match the path of 'database'. * - * The stored mtime of the directory along with a list of messages - * and directories in the database contained in 'dirname' are - * returned in 'directory'. The entries are sorted by filename. + * Note: The resulting notmuch_directory_t object will represent the + * state as it currently exists in the database, (and will not reflect + * subsequent changes). */ -notmuch_status_t -notmuch_database_read_directory (notmuch_database_t *database, - const char *filename, - notmuch_directory_t **directory); - -/* Return the recorded 'mtime' for the given directory - */ -time_t -notmuch_directory_get_mtime (notmuch_directory_t *directory); - -/* Return a notmuch_files_t iterator for all regular files in 'directory'. - */ -notmuch_files_t * -notmuch_directory_get_files (notmuch_directory_t *directory); - -/* Return a notmuch_files_t iterator for all sub-directories of - * 'directory'. - */ -notmuch_files_t * -notmuch_directory_get_subdirs (notmuch_directory_t *directory); - -/* Does the given notmuch_files_t object contain any more results. - */ -notmuch_bool_t -notmuch_files_has_more (notmuch_files_t *files); - -/* Get the current filename from 'files' as a string. - */ -const char * -notmuch_files_get_filename (notmuch_files_t *files); +notmuch_directory_t * +notmuch_database_get_directory (notmuch_database_t *database, + const char *path); /* Add a new message to the given notmuch database. * @@ -721,16 +693,6 @@ notmuch_message_get_replies (notmuch_message_t *message); const char * notmuch_message_get_filename (notmuch_message_t *message); -/* Remove a filename for the email corresponding to 'message'. - * - * This removes the association between a filename and a message, - * when the last filename is gone, the entire message is removed - * from the database. - */ -notmuch_status_t -notmuch_message_remove_filename (notmuch_message_t *message, - const char *filename); - /* Message flags */ typedef enum _notmuch_message_flag { NOTMUCH_MESSAGE_FLAG_MATCH, @@ -951,6 +913,103 @@ notmuch_tags_advance (notmuch_tags_t *tags); void notmuch_tags_destroy (notmuch_tags_t *tags); +/* Store an mtime within the database for 'directory'. + * + * The 'directory' should be an object retrieved from the database + * with notmuch_database_get_directory for a particular path. + * + * The intention is for the caller to use the mtime to allow efficient + * identification of new messages to be added to the database. The + * recommended usage is as follows: + * + * o Read the mtime of a directory from the filesystem + * + * o Call add_message for all mail files in the directory + * + * o Call notmuch_directory_set_mtime with the mtime read from the + * filesystem. + * + * Then, when wanting to check for updates to the directory in the + * future, the client can call notmuch_directory_get_mtime and know + * that it only needs to add files if the mtime of the directory and + * files are newer than the stored timestamp. + * + * Note: The notmuch_directory_get_mtime function does not allow the + * caller to distinguish a timestamp of 0 from a non-existent + * timestamp. So don't store a timestamp of 0 unless you are + * comfortable with that. + * + * Return value: + * + * NOTMUCH_STATUS_SUCCESS: mtime successfully stored in database. + * + * NOTMUCH_STATUS_XAPIAN_EXCEPTION: A Xapian exception + * occurred, mtime not stored. + */ +notmuch_status_t +notmuch_directory_set_mtime (notmuch_directory_t *directory, + time_t mtime); + +/* Get the mtime of a directory, (as previously stored with + * notmuch_directory_set_mtime). + * + * Returns 0 if not mtime has previously been stored for this + * directory.*/ +time_t +notmuch_directory_get_mtime (notmuch_directory_t *directory); + +/* Get a notmuch_filenames_t iterator listing all the filenames of + * messages in the database within the given directory. + * + * The returned filenames will be the basename-entries only (not + * complete paths). */ +notmuch_filenames_t * +notmuch_directory_get_child_files (notmuch_directory_t *directory); + +/* Get a notmuch_filenams_t iterator listing all the filenames of + * sub-directories in the database within the given directory. + * + * The returned filenames will be the basename-entries only (not + * complete paths). */ +notmuch_filenames_t * +notmuch_directory_get_child_directories (notmuch_directory_t *directory); + +/* Destroy a notmuch_directory_t object. */ +void +notmuch_directory_destroy (notmuch_directory_t *directory); + +/* Does the given notmuch_filenames_t object contain any more + * filenames. + * + * When this function returns TRUE, notmuch_filenames_get will return + * a valid string. Whereas when this function returns FALSE, + * notmuch_filenames_get will return NULL. + */ +notmuch_bool_t +notmuch_filenames_has_more (notmuch_filenames_t *filenames); + +/* Get the current filename from 'filenames' as a string. + * + * Note: The returned string belongs to 'filenames' and has a lifetime + * identical to it (and the directory to which it ultimately belongs). + */ +const char * +notmuch_filenames_get (notmuch_filenames_t *filenames); + +/* Advance the 'filenames' iterator to the next filename. + */ +void +notmuch_filenames_advance (notmuch_filenames_t *filenames); + +/* Destroy a notmuch_filenames_t object. + * + * It's not strictly necessary to call this function. All memory from + * the notmuch_filenames_t object will be reclaimed when the + * containing directory object is destroyed. + */ +void +notmuch_filenames_destroy (notmuch_filenames_t *filenames); + NOTMUCH_END_DECLS #endif From d807e28f43579ecc91aa40ae3e42760991c2f810 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 13:29:23 -0800 Subject: [PATCH 22/63] lib: Implement new notmuch_directory_t API. This new directory ojbect provides all the infrastructure needed to detect when files or directories are deleted or renamed. There's still code needed on top of this (within "notmuch new") to actually do that detection. --- lib/Makefile.local | 1 + lib/database.cc | 234 ++++++++---------------------- lib/directory.cc | 329 ++++++++++++++++++++++++++++++++++++++++++ lib/message.cc | 4 +- lib/notmuch-private.h | 19 +++ notmuch-new.c | 7 +- 6 files changed, 413 insertions(+), 181 deletions(-) create mode 100644 lib/directory.cc diff --git a/lib/Makefile.local b/lib/Makefile.local index a7562c9b..70489e17 100644 --- a/lib/Makefile.local +++ b/lib/Makefile.local @@ -11,6 +11,7 @@ libnotmuch_c_srcs = \ libnotmuch_cxx_srcs = \ $(dir)/database.cc \ + $(dir)/directory.cc \ $(dir)/index.cc \ $(dir)/message.cc \ $(dir)/query.cc \ diff --git a/lib/database.cc b/lib/database.cc index a3824956..510d13cb 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -63,10 +63,11 @@ typedef struct { * * tag: Any tags associated with this message by the user. * - * direntry: A colon-separated pair of values (INTEGER:STRING), - * where INTEGER is the document ID of a directory - * document, and STRING is the name of a file within - * that directory for this mail message. + * file-direntry: A colon-separated pair of values + * (INTEGER:STRING), where INTEGER is the + * document ID of a directory document, and + * STRING is the name of a file within that + * directory for this mail message. * * A mail document also has two values: * @@ -88,22 +89,28 @@ typedef struct { * maintain data necessary to allow for efficient polling of mail * directories. * - * The directory document contains the following terms: + * All directory documents contain one term: * * directory: The directory path (relative to the database path) * Or the SHA1 sum of the directory path (if the * path itself is too long to fit in a Xapian * term). * - * parent: The document ID of the parent directory document. - * Top-level directories will have a parent value of 0. + * And all directory documents for directories other than top-level + * directories also contain the following term: * - * and has a single value: + * directory-direntry: A colon-separated pair of values + * (INTEGER:STRING), where INTEGER is the + * document ID of the parent directory + * document, and STRING is the name of this + * directory within that parent. + * + * All directory documents have a single value: * * TIMESTAMP: The mtime of the directory (at last scan) * * The data portion of a directory document contains the path of the - * directory (relative to the datbase path). + * directory (relative to the database path). */ /* With these prefix values we follow the conventions published here: @@ -122,25 +129,25 @@ typedef struct { */ prefix_t BOOLEAN_PREFIX_INTERNAL[] = { - { "type", "T" }, - { "reference", "XREFERENCE" }, - { "replyto", "XREPLYTO" }, - { "directory", "XDIRECTORY" }, - { "direntry", "XDIRENTRY" }, - { "parent", "XPARENT" }, + { "type", "T" }, + { "reference", "XREFERENCE" }, + { "replyto", "XREPLYTO" }, + { "directory", "XDIRECTORY" }, + { "file-direntry", "XFDIRENTRY" }, + { "directory-direntry", "XDDIRENTRY" }, }; prefix_t BOOLEAN_PREFIX_EXTERNAL[] = { - { "thread", "G" }, - { "tag", "K" }, - { "id", "Q" } + { "thread", "G" }, + { "tag", "K" }, + { "id", "Q" } }; prefix_t PROBABILISTIC_PREFIX[]= { - { "from", "XFROM" }, - { "to", "XTO" }, - { "attachment", "XATTACHMENT" }, - { "subject", "XSUBJECT"} + { "from", "XFROM" }, + { "to", "XTO" }, + { "attachment", "XATTACHMENT" }, + { "subject", "XSUBJECT"} }; int @@ -241,11 +248,11 @@ find_doc_ids (notmuch_database_t *notmuch, talloc_free (term); } -static notmuch_private_status_t -find_unique_doc_id (notmuch_database_t *notmuch, - const char *prefix_name, - const char *value, - unsigned int *doc_id) +notmuch_private_status_t +_notmuch_database_find_unique_doc_id (notmuch_database_t *notmuch, + const char *prefix_name, + const char *value, + unsigned int *doc_id) { Xapian::PostingIterator i, end; @@ -275,26 +282,6 @@ find_document_for_doc_id (notmuch_database_t *notmuch, unsigned doc_id) return notmuch->xapian_db->get_document (doc_id); } -static notmuch_private_status_t -find_unique_document (notmuch_database_t *notmuch, - const char *prefix_name, - const char *value, - Xapian::Document *document, - unsigned int *doc_id) -{ - notmuch_private_status_t status; - - status = find_unique_doc_id (notmuch, prefix_name, value, doc_id); - - if (status) { - *document = Xapian::Document (); - return status; - } - - *document = find_document_for_doc_id (notmuch, *doc_id); - return NOTMUCH_PRIVATE_STATUS_SUCCESS; -} - notmuch_message_t * notmuch_database_find_message (notmuch_database_t *notmuch, const char *message_id) @@ -302,7 +289,8 @@ notmuch_database_find_message (notmuch_database_t *notmuch, notmuch_private_status_t status; unsigned int doc_id; - status = find_unique_doc_id (notmuch, "id", message_id, &doc_id); + status = _notmuch_database_find_unique_doc_id (notmuch, "id", + message_id, &doc_id); if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) return NULL; @@ -593,13 +581,6 @@ notmuch_database_get_path (notmuch_database_t *notmuch) return notmuch->path; } -static notmuch_private_status_t -find_directory_document (notmuch_database_t *notmuch, const char *db_path, - Xapian::Document *doc, unsigned int *doc_id) -{ - return find_unique_document (notmuch, "directory", db_path, doc, doc_id); -} - /* We allow the user to use arbitrarily long paths for directories. But * we have a term-length limit. So if we exceed that, we'll use the * SHA-1 of the path for the database term. @@ -608,8 +589,8 @@ find_directory_document (notmuch_database_t *notmuch, const char *db_path, * does not, then the caller is responsible to free() the returned * value. */ -static const char * -directory_db_path (const char *path) +const char * +_notmuch_database_get_directory_db_path (const char *path) { int term_len = strlen (_find_prefix ("directory")) + strlen (path); @@ -705,38 +686,25 @@ _notmuch_database_find_directory_id (notmuch_database_t *notmuch, const char *path, unsigned int *directory_id) { - notmuch_private_status_t private_status; - notmuch_status_t status = NOTMUCH_STATUS_SUCCESS; - const char *db_path; + notmuch_directory_t *directory; + notmuch_status_t status; if (path == NULL) { *directory_id = 0; return NOTMUCH_STATUS_SUCCESS; } - db_path = directory_db_path (path); - - private_status = find_unique_doc_id (notmuch, "directory", - db_path, directory_id); - if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { - status = notmuch_database_set_directory_mtime (notmuch, - path, 0); - if (status) - goto DONE; - - private_status = find_unique_doc_id (notmuch, "directory", - db_path, directory_id); - status = COERCE_STATUS (private_status, "_find_directory_id"); + directory = _notmuch_directory_create (notmuch, path, &status); + if (status) { + *directory_id = -1; + return status; } - DONE: - if (db_path != path) - free ((char *) db_path); + *directory_id = _notmuch_directory_get_document_id (directory); - if (status) - *directory_id = -1; + notmuch_directory_destroy (directory); - return status; + return NOTMUCH_STATUS_SUCCESS; } const char * @@ -817,101 +785,13 @@ _notmuch_database_relative_path (notmuch_database_t *notmuch, return relative; } -notmuch_status_t -notmuch_database_set_directory_mtime (notmuch_database_t *notmuch, - const char *path, - time_t mtime) +notmuch_directory_t * +notmuch_database_get_directory (notmuch_database_t *notmuch, + const char *path) { - Xapian::Document doc; - Xapian::WritableDatabase *db; - unsigned int doc_id; - notmuch_private_status_t status; - notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; - const char *parent, *db_path = NULL; - unsigned int parent_id; - void *local = talloc_new (notmuch); + notmuch_status_t status; - if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) { - fprintf (stderr, "Attempted to update a read-only database.\n"); - return NOTMUCH_STATUS_READONLY_DATABASE; - } - - path = _notmuch_database_relative_path (notmuch, path); - - db = static_cast (notmuch->xapian_db); - db_path = directory_db_path (path); - - try { - status = find_directory_document (notmuch, db_path, &doc, &doc_id); - - doc.add_value (NOTMUCH_VALUE_TIMESTAMP, - Xapian::sortable_serialise (mtime)); - - if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { - char *term = talloc_asprintf (local, "%s%s", - _find_prefix ("directory"), db_path); - doc.add_term (term); - - doc.set_data (path); - - _notmuch_database_split_path (local, path, &parent, NULL); - - _notmuch_database_find_directory_id (notmuch, parent, &parent_id); - - term = talloc_asprintf (local, "%s%u", - _find_prefix ("parent"), - parent_id); - doc.add_term (term); - - db->add_document (doc); - } else { - db->replace_document (doc_id, doc); - } - - } catch (const Xapian::Error &error) { - fprintf (stderr, "A Xapian exception occurred setting directory mtime: %s.\n", - error.get_msg().c_str()); - notmuch->exception_reported = TRUE; - ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION; - } - - if (db_path != path) - free ((char *) db_path); - - talloc_free (local); - - return ret; -} - -time_t -notmuch_database_get_directory_mtime (notmuch_database_t *notmuch, - const char *path) -{ - Xapian::Document doc; - unsigned int doc_id; - notmuch_private_status_t status; - const char *db_path = NULL; - time_t ret = 0; - - db_path = directory_db_path (path); - - try { - status = find_directory_document (notmuch, db_path, &doc, &doc_id); - - if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) - goto DONE; - - ret = Xapian::sortable_unserialise (doc.get_value (NOTMUCH_VALUE_TIMESTAMP)); - } catch (Xapian::Error &error) { - ret = 0; - goto DONE; - } - - DONE: - if (db_path != path) - free ((char *) db_path); - - return ret; + return _notmuch_directory_create (notmuch, path, &status); } /* Find the thread ID to which the message with 'message_id' belongs. @@ -1284,7 +1164,7 @@ notmuch_database_remove_message (notmuch_database_t *notmuch, { Xapian::WritableDatabase *db; void *local = talloc_new (notmuch); - const char *direntry_prefix = _find_prefix ("direntry"); + const char *prefix = _find_prefix ("file-direntry"); char *direntry, *term; Xapian::PostingIterator i, end; Xapian::Document document; @@ -1302,7 +1182,7 @@ notmuch_database_remove_message (notmuch_database_t *notmuch, if (status) return status; - term = talloc_asprintf (notmuch, "%s%s", direntry_prefix, direntry); + term = talloc_asprintf (notmuch, "%s%s", prefix, direntry); find_doc_ids_for_term (notmuch, term, &i, &end); @@ -1314,11 +1194,11 @@ notmuch_database_remove_message (notmuch_database_t *notmuch, document.remove_term (term); j = document.termlist_begin (); - j.skip_to (direntry_prefix); + j.skip_to (prefix); - /* Was this the last direntry in the message? */ + /* Was this the last file-direntry in the message? */ if (j == document.termlist_end () || - strncmp ((*j).c_str (), direntry_prefix, strlen (direntry_prefix))) + strncmp ((*j).c_str (), prefix, strlen (prefix))) { db->delete_document (document.get_docid ()); } else { diff --git a/lib/directory.cc b/lib/directory.cc new file mode 100644 index 00000000..196f7805 --- /dev/null +++ b/lib/directory.cc @@ -0,0 +1,329 @@ +/* directory.cc - Results of directory-based searches from a notmuch database + * + * Copyright © 2009 Carl Worth + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/ . + * + * Author: Carl Worth + */ + +#include "notmuch-private.h" +#include "database-private.h" + +#include + +struct _notmuch_filenames { + Xapian::TermIterator iterator; + Xapian::TermIterator end; + int prefix_len; + char *filename; +}; + +/* We end up having to call the destructors explicitly because we had + * to use "placement new" in order to initialize C++ objects within a + * block that we allocated with talloc. So C++ is making talloc + * slightly less simple to use, (we wouldn't need + * talloc_set_destructor at all otherwise). + */ +static int +_notmuch_filenames_destructor (notmuch_filenames_t *filenames) +{ + filenames->iterator.~TermIterator (); + filenames->end.~TermIterator (); + + return 0; +} + +/* Create an iterator to iterate over the basenames of files (or + * directories) that all share a common parent directory. + * + * The code here is general enough to be reused for any case of + * iterating over the non-prefixed portion of terms sharing a common + * prefix. + */ +notmuch_filenames_t * +_notmuch_filenames_create (void *ctx, + notmuch_database_t *notmuch, + const char *prefix) +{ + notmuch_filenames_t *filenames; + + filenames = talloc (ctx, notmuch_filenames_t); + if (unlikely (filenames == NULL)) + return NULL; + + new (&filenames->iterator) Xapian::TermIterator (); + new (&filenames->end) Xapian::TermIterator (); + + talloc_set_destructor (filenames, _notmuch_filenames_destructor); + + filenames->iterator = notmuch->xapian_db->allterms_begin (prefix); + filenames->end = notmuch->xapian_db->allterms_end (prefix); + + filenames->prefix_len = strlen (prefix); + + filenames->filename = NULL; + + return filenames; +} + +notmuch_bool_t +notmuch_filenames_has_more (notmuch_filenames_t *filenames) +{ + return (filenames->iterator != filenames->end); +} + +const char * +notmuch_filenames_get (notmuch_filenames_t *filenames) +{ + if (filenames->filename == NULL) { + std::string term = *filenames->iterator; + + filenames->filename = talloc_strdup (filenames, + term.c_str () + + filenames->prefix_len); + } + + return filenames->filename; +} + +void +notmuch_filenames_advance (notmuch_filenames_t *filenames) +{ + if (filenames->filename) { + talloc_free (filenames->filename); + filenames->filename = NULL; + } + + if (filenames->iterator != filenames->end) + filenames->iterator++; +} + +void +notmuch_filenames_destroy (notmuch_filenames_t *filenames) +{ + talloc_free (filenames); +} + +struct _notmuch_directory { + notmuch_database_t *notmuch; + Xapian::docid document_id; + Xapian::Document doc; + time_t mtime; +}; + +/* We end up having to call the destructor explicitly because we had + * to use "placement new" in order to initialize C++ objects within a + * block that we allocated with talloc. So C++ is making talloc + * slightly less simple to use, (we wouldn't need + * talloc_set_destructor at all otherwise). + */ +static int +_notmuch_directory_destructor (notmuch_directory_t *directory) +{ + directory->doc.~Document (); + + return 0; +} + +static notmuch_private_status_t +find_directory_document (notmuch_database_t *notmuch, + const char *db_path, + Xapian::Document *document) +{ + notmuch_private_status_t status; + Xapian::docid doc_id; + + status = _notmuch_database_find_unique_doc_id (notmuch, "directory", + db_path, &doc_id); + if (status) { + *document = Xapian::Document (); + return status; + } + + *document = notmuch->xapian_db->get_document (doc_id); + return NOTMUCH_PRIVATE_STATUS_SUCCESS; +} + +notmuch_directory_t * +_notmuch_directory_create (notmuch_database_t *notmuch, + const char *path, + notmuch_status_t *status_ret) +{ + Xapian::WritableDatabase *db; + notmuch_directory_t *directory; + notmuch_private_status_t private_status; + const char *db_path; + + *status_ret = NOTMUCH_STATUS_SUCCESS; + + path = _notmuch_database_relative_path (notmuch, path); + + if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) { + fprintf (stderr, "Attempted to update a read-only database.\n"); + *status_ret = NOTMUCH_STATUS_READONLY_DATABASE; + return NULL; + } + + db = static_cast (notmuch->xapian_db); + + directory = talloc (notmuch, notmuch_directory_t); + if (unlikely (directory == NULL)) + return NULL; + + directory->notmuch = notmuch; + + /* "placement new"---not actually allocating memory */ + new (&directory->doc) Xapian::Document; + + talloc_set_destructor (directory, _notmuch_directory_destructor); + + db_path = _notmuch_database_get_directory_db_path (path); + + try { + Xapian::TermIterator i, end; + + private_status = find_directory_document (notmuch, db_path, + &directory->doc); + directory->document_id = directory->doc.get_docid (); + + if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { + void *local = talloc_new (directory); + const char *parent, *basename; + Xapian::docid parent_id; + char *term = talloc_asprintf (local, "%s%s", + _find_prefix ("directory"), db_path); + directory->doc.add_term (term); + + directory->doc.set_data (path); + + _notmuch_database_split_path (local, path, &parent, &basename); + + _notmuch_database_find_directory_id (notmuch, parent, &parent_id); + + if (basename) { + term = talloc_asprintf (local, "%s%u:%s", + _find_prefix ("directory-direntry"), + parent_id, basename); + directory->doc.add_term (term); + } + + directory->doc.add_value (NOTMUCH_VALUE_TIMESTAMP, + Xapian::sortable_serialise (0)); + + directory->document_id = db->add_document (directory->doc); + talloc_free (local); + } + + directory->mtime = Xapian::sortable_unserialise ( + directory->doc.get_value (NOTMUCH_VALUE_TIMESTAMP)); + } catch (const Xapian::Error &error) { + fprintf (stderr, + "A Xapian exception occurred creating a directory: %s.\n", + error.get_msg().c_str()); + notmuch->exception_reported = TRUE; + notmuch_directory_destroy (directory); + *status_ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION; + return NULL; + } + + if (db_path != path) + free ((char *) db_path); + + return directory; +} + +unsigned int +_notmuch_directory_get_document_id (notmuch_directory_t *directory) +{ + return directory->document_id; +} + +notmuch_status_t +notmuch_directory_set_mtime (notmuch_directory_t *directory, + time_t mtime) +{ + notmuch_database_t *notmuch = directory->notmuch; + Xapian::WritableDatabase *db; + + if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) { + fprintf (stderr, "Attempted to update a read-only database.\n"); + return NOTMUCH_STATUS_READONLY_DATABASE; + } + + db = static_cast (notmuch->xapian_db); + + try { + directory->doc.add_value (NOTMUCH_VALUE_TIMESTAMP, + Xapian::sortable_serialise (mtime)); + + db->replace_document (directory->document_id, directory->doc); + } catch (const Xapian::Error &error) { + fprintf (stderr, + "A Xapian exception occurred setting directory mtime: %s.\n", + error.get_msg().c_str()); + notmuch->exception_reported = TRUE; + return NOTMUCH_STATUS_XAPIAN_EXCEPTION; + } + + return NOTMUCH_STATUS_SUCCESS; +} + +time_t +notmuch_directory_get_mtime (notmuch_directory_t *directory) +{ + return directory->mtime; +} + +notmuch_filenames_t * +notmuch_directory_get_child_files (notmuch_directory_t *directory) +{ + char *term; + notmuch_filenames_t *child_files; + + term = talloc_asprintf (directory, "%s%u:", + _find_prefix ("file-direntry"), + directory->document_id); + + child_files = _notmuch_filenames_create (directory, + directory->notmuch, term); + + talloc_free (term); + + return child_files; +} + +notmuch_filenames_t * +notmuch_directory_get_child_directories (notmuch_directory_t *directory) +{ + char *term; + notmuch_filenames_t *child_directories; + + term = talloc_asprintf (directory, "%s%u:", + _find_prefix ("directory-direntry"), + directory->document_id); + + child_directories = _notmuch_filenames_create (directory, + directory->notmuch, term); + + talloc_free (term); + + return child_directories; +} + +void +notmuch_directory_destroy (notmuch_directory_t *directory) +{ + talloc_free (directory); +} diff --git a/lib/message.cc b/lib/message.cc index bd179519..82e8fce7 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -411,7 +411,7 @@ _notmuch_message_add_filename (notmuch_message_t *message, if (status) return status; - _notmuch_message_add_term (message, "direntry", direntry); + _notmuch_message_add_term (message, "file-direntry", direntry); talloc_free (local); @@ -421,7 +421,7 @@ _notmuch_message_add_filename (notmuch_message_t *message, const char * notmuch_message_get_filename (notmuch_message_t *message) { - const char *prefix = _find_prefix ("direntry"); + const char *prefix = _find_prefix ("file-direntry"); int prefix_len = strlen (prefix); Xapian::TermIterator i; char *direntry, *colon; diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index cb93c397..8b582640 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -161,6 +161,15 @@ _notmuch_database_split_path (void *ctx, const char **directory, const char **basename); +const char * +_notmuch_database_get_directory_db_path (const char *path); + +notmuch_private_status_t +_notmuch_database_find_unique_doc_id (notmuch_database_t *notmuch, + const char *prefix_name, + const char *value, + unsigned int *doc_id); + notmuch_status_t _notmuch_database_find_directory_id (notmuch_database_t *database, const char *path, @@ -177,6 +186,16 @@ _notmuch_database_filename_to_direntry (void *ctx, const char *filename, char **direntry); +/* directory.cc */ + +notmuch_directory_t * +_notmuch_directory_create (notmuch_database_t *notmuch, + const char *path, + notmuch_status_t *status_ret); + +unsigned int +_notmuch_directory_get_document_id (notmuch_directory_t *directory); + /* thread.cc */ notmuch_thread_t * diff --git a/notmuch-new.c b/notmuch-new.c index 2a929c56..ee6f196e 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -143,10 +143,13 @@ add_files_recursive (notmuch_database_t *notmuch, notmuch_message_t *message = NULL; struct dirent **namelist = NULL; int num_entries; + notmuch_directory_t *directory; path_mtime = st->st_mtime; - path_dbtime = notmuch_database_get_directory_mtime (notmuch, path); + directory = notmuch_database_get_directory (notmuch, path); + path_dbtime = notmuch_directory_get_mtime (directory); + num_entries = scandir (path, &namelist, 0, ino_cmp); if (num_entries == -1) { @@ -277,7 +280,7 @@ add_files_recursive (notmuch_database_t *notmuch, next = NULL; } - status = notmuch_database_set_directory_mtime (notmuch, path, path_mtime); + status = notmuch_directory_set_mtime (directory, path_mtime); if (status && ret == NOTMUCH_STATUS_SUCCESS) ret = status; From 3f32fd8a1c06d417bdcb467bac2805f658cb5476 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 15:01:58 -0800 Subject: [PATCH 23/63] Add missing comment for NOTMUCH_STATUS_READONLY_DATABASE. And adjust the string representation of the same to match. --- lib/database.cc | 2 +- lib/notmuch.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/database.cc b/lib/database.cc index 510d13cb..205d0360 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -199,7 +199,7 @@ notmuch_status_to_string (notmuch_status_t status) case NOTMUCH_STATUS_OUT_OF_MEMORY: return "Out of memory"; case NOTMUCH_STATUS_READONLY_DATABASE: - return "The database is read-only"; + return "Attempt to write to a read-only database"; case NOTMUCH_STATUS_XAPIAN_EXCEPTION: return "A Xapian exception occurred"; case NOTMUCH_STATUS_FILE_ERROR: diff --git a/lib/notmuch.h b/lib/notmuch.h index 13efd5db..d9fe152a 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -57,6 +57,9 @@ typedef int notmuch_bool_t; * value. Instead we should map to things like DATABASE_LOCKED or * whatever. * + * NOTMUCH_STATUS_READONLY_DATABASE: An attempt was made to write to a + * database opened in read-only mode. + * * NOTMUCH_STATUS_XAPIAN_EXCEPTION: A Xapian exception occurred * * NOTMUCH_STATUS_FILE_ERROR: An error occurred trying to read or From 341d49b0610fcf725da618d87fda577a3d512343 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 15:05:57 -0800 Subject: [PATCH 24/63] Makefiles: Use .DEFAULT to support arbitrary targets from sub directories. Taking advantage of the .DEFAULT construct means that we won't need to explicitly list targets such as "clean", etc. in each sub-Makefile. --- compat/Makefile | 4 ++-- lib/Makefile | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/compat/Makefile b/compat/Makefile index 9a29ffcf..fa25832e 100644 --- a/compat/Makefile +++ b/compat/Makefile @@ -1,5 +1,5 @@ all: $(MAKE) -C .. all -clean: - $(MAKE) -C .. clean +.DEFAULT: + $(MAKE) -C .. $@ diff --git a/lib/Makefile b/lib/Makefile index 9a29ffcf..b6859eac 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,5 +1,7 @@ +# See Makfefile.local for the list of files to be compiled in this +# directory. all: $(MAKE) -C .. all -clean: - $(MAKE) -C .. clean +.DEFAULT: + $(MAKE) -C .. $@ From 63ef5cd07320f70db84d94cc88c4fd3ead534a87 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 15:11:21 -0800 Subject: [PATCH 25/63] Make the add_files function static within notmuch-new.c. No other files need this function so we don't need it exported in notmuch-client.h. --- notmuch-client.h | 18 ------------------ notmuch-new.c | 16 +++++++++++++++- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/notmuch-client.h b/notmuch-client.h index eca99906..77766de2 100644 --- a/notmuch-client.h +++ b/notmuch-client.h @@ -70,20 +70,6 @@ #define STRNCMP_LITERAL(var, literal) \ strncmp ((var), (literal), sizeof (literal) - 1) -typedef void (*add_files_callback_t) (notmuch_message_t *message); - -typedef struct { - int output_is_a_tty; - int verbose; - - int total_files; - int processed_files; - int added_messages; - struct timeval tv_start; - - add_files_callback_t callback; -} add_files_state_t; - static inline void chomp_newline (char *str) { @@ -130,10 +116,6 @@ notmuch_time_print_formatted_seconds (double seconds); double notmuch_time_elapsed (struct timeval start, struct timeval end); -notmuch_status_t -add_files (notmuch_database_t *notmuch, const char *path, - add_files_state_t *state); - char * query_string_from_args (void *ctx, int argc, char *argv[]); diff --git a/notmuch-new.c b/notmuch-new.c index ee6f196e..9ee15812 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -22,6 +22,20 @@ #include +typedef void (*add_files_callback_t) (notmuch_message_t *message); + +typedef struct { + int output_is_a_tty; + int verbose; + + int total_files; + int processed_files; + int added_messages; + struct timeval tv_start; + + add_files_callback_t callback; +} add_files_state_t; + static volatile sig_atomic_t do_add_files_print_progress = 0; static void @@ -300,7 +314,7 @@ add_files_recursive (notmuch_database_t *notmuch, /* This is the top-level entry point for add_files. It does a couple * of error checks, sets up the progress-printing timer and then calls * into the recursive function. */ -notmuch_status_t +static notmuch_status_t add_files (notmuch_database_t *notmuch, const char *path, add_files_state_t *state) From 999f4c895c2442f50f943f6a8b391e1adc9cbba4 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 15:13:16 -0800 Subject: [PATCH 26/63] notmuch-new: Remove dead add_files_callback code. Always satisfying to delete code (even if tiny). --- notmuch-new.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index 9ee15812..ca68a684 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -22,8 +22,6 @@ #include -typedef void (*add_files_callback_t) (notmuch_message_t *message); - typedef struct { int output_is_a_tty; int verbose; @@ -32,8 +30,6 @@ typedef struct { int processed_files; int added_messages; struct timeval tv_start; - - add_files_callback_t callback; } add_files_state_t; static volatile sig_atomic_t do_add_files_print_progress = 0; From 29908b9f1375904957e754531912d4ad12e94e74 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 15:23:52 -0800 Subject: [PATCH 27/63] notmuch new: Avoid updating directory timestamp if interrupted. This was a very dangerous bug. An interrupted "notmuch new" session would still update the timestamp for the directory in the database. This would result in mail files that were not processed due to the original interruption *never* being picked up by future runs of "notmuch new". Yikes! --- notmuch-new.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index ca68a684..4adbdc7f 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -290,9 +290,11 @@ add_files_recursive (notmuch_database_t *notmuch, next = NULL; } - status = notmuch_directory_set_mtime (directory, path_mtime); - if (status && ret == NOTMUCH_STATUS_SUCCESS) - ret = status; + if (! interrupted) { + status = notmuch_directory_set_mtime (directory, path_mtime); + if (status && ret == NOTMUCH_STATUS_SUCCESS) + ret = status; + } DONE: if (next) From 3fb7ee7754ee79ef1371001aa55d5063b1799806 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 15:31:56 -0800 Subject: [PATCH 28/63] notmuch new: Rename the various timestamp variables to be more clear. The previous name of "path_mtime" was very ambiguous. The new names are much more obvious (fs_mtime is the mtime from the filesystem and db_mtime is the mtime from the database). --- notmuch-new.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index 4adbdc7f..7e5e0813 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -121,20 +121,20 @@ is_maildir (struct dirent **entries, int count) /* Examine 'path' recursively as follows: * - * o Ask the filesystem for the mtime of 'path' (path_mtime) + * o Ask the filesystem for the mtime of 'path' (fs_mtime) * - * o Ask the database for its timestamp of 'path' (path_dbtime) + * o Ask the database for its timestamp of 'path' (db_mtime) * - * o If 'path_mtime' > 'path_dbtime' + * o If 'fs_mtime' > 'db_mtime' * * o For each regular file in 'path' with mtime newer than the - * 'path_dbtime' call add_message to add the file to the + * 'db_mtime' call add_message to add the file to the * database. * * o For each sub-directory of path, recursively call into this * same function. * - * o Tell the database to update its time of 'path' to 'path_mtime' + * o Tell the database to update its time of 'path' to 'fs_mtime' * * The 'struct stat *st' must point to a structure that has already * been initialized for 'path' by calling stat(). @@ -148,17 +148,17 @@ add_files_recursive (notmuch_database_t *notmuch, DIR *dir = NULL; struct dirent *entry = NULL; char *next = NULL; - time_t path_mtime, path_dbtime; + time_t fs_mtime, db_mtime; notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS; notmuch_message_t *message = NULL; struct dirent **namelist = NULL; int num_entries; notmuch_directory_t *directory; - path_mtime = st->st_mtime; + fs_mtime = st->st_mtime; directory = notmuch_database_get_directory (notmuch, path); - path_dbtime = notmuch_directory_get_mtime (directory); + db_mtime = notmuch_directory_get_mtime (directory); num_entries = scandir (path, &namelist, 0, ino_cmp); @@ -180,7 +180,7 @@ add_files_recursive (notmuch_database_t *notmuch, /* If this directory hasn't been modified since the last * add_files, then we only need to look further for * sub-directories. */ - if (path_mtime <= path_dbtime && entry->d_type == DT_REG) + if (fs_mtime <= db_mtime && entry->d_type == DT_REG) continue; /* Ignore special directories to avoid infinite recursion. @@ -221,7 +221,7 @@ add_files_recursive (notmuch_database_t *notmuch, if (S_ISREG (st->st_mode)) { /* If the file hasn't been modified since the last * add_files, then we need not look at it. */ - if (path_dbtime == 0 || st->st_mtime > path_dbtime) { + if (db_mtime == 0 || st->st_mtime > db_mtime) { state->processed_files++; if (state->verbose) { @@ -291,7 +291,7 @@ add_files_recursive (notmuch_database_t *notmuch, } if (! interrupted) { - status = notmuch_directory_set_mtime (directory, path_mtime); + status = notmuch_directory_set_mtime (directory, fs_mtime); if (status && ret == NOTMUCH_STATUS_SUCCESS) ret = status; } From 2ce46c31fe44ea1c7fc75a7da83e1278d06c28e1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 15:52:59 -0800 Subject: [PATCH 29/63] notmuch new: Fix internal documentation of add_files_recursive. To make it more clear that the mtime of a directory does not affect whether further sub-directories are examined, (they are examined unconditionally). --- notmuch-new.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index 7e5e0813..6df4ad9f 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -131,8 +131,8 @@ is_maildir (struct dirent **entries, int count) * 'db_mtime' call add_message to add the file to the * database. * - * o For each sub-directory of path, recursively call into this - * same function. + * o For each sub-directory of path, recursively call into this + * same function. * * o Tell the database to update its time of 'path' to 'fs_mtime' * From dde214c768a948222786b4c2b5ec404a4ffacc8c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 15:59:11 -0800 Subject: [PATCH 30/63] notmuch new: Eliminate the check on the mtime of regular files before adding. This check was buggy in that moving a pre-existing file into the mail store, (where the file existed before the last run of "notmuch new"), does not update the mtime of the file. So the message would never be added to the database. The fix here is not practical in the long run, (since it causes *all* files in the mail store to be processed in every run of "notmuch new" (!)). But this change will let us drop a stat() call that we don't otherwise need and will help move us toward proper database-backed detection of new files, (which will fix the bug without the performance impact of the current fix). --- notmuch-new.c | 109 ++++++++++++++++++++++++-------------------------- 1 file changed, 52 insertions(+), 57 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index 6df4ad9f..fe280d84 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -127,9 +127,8 @@ is_maildir (struct dirent **entries, int count) * * o If 'fs_mtime' > 'db_mtime' * - * o For each regular file in 'path' with mtime newer than the - * 'db_mtime' call add_message to add the file to the - * database. + * o For each regular file directly within 'path', call + * add_message to add the file to the database. * * o For each sub-directory of path, recursively call into this * same function. @@ -219,66 +218,62 @@ add_files_recursive (notmuch_database_t *notmuch, } if (S_ISREG (st->st_mode)) { - /* If the file hasn't been modified since the last - * add_files, then we need not look at it. */ - if (db_mtime == 0 || st->st_mtime > db_mtime) { - state->processed_files++; + state->processed_files++; - if (state->verbose) { - if (state->output_is_a_tty) - printf("\r\033[K"); + if (state->verbose) { + if (state->output_is_a_tty) + printf("\r\033[K"); - printf ("%i/%i: %s", - state->processed_files, - state->total_files, - next); + printf ("%i/%i: %s", + state->processed_files, + state->total_files, + next); - putchar((state->output_is_a_tty) ? '\r' : '\n'); - fflush (stdout); - } + putchar((state->output_is_a_tty) ? '\r' : '\n'); + fflush (stdout); + } - status = notmuch_database_add_message (notmuch, next, &message); - switch (status) { - /* success */ - case NOTMUCH_STATUS_SUCCESS: - state->added_messages++; - tag_inbox_and_unread (message); - break; - /* Non-fatal issues (go on to next file) */ - case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: - /* Stay silent on this one. */ - break; - case NOTMUCH_STATUS_FILE_NOT_EMAIL: - fprintf (stderr, "Note: Ignoring non-mail file: %s\n", - next); - break; - /* Fatal issues. Don't process anymore. */ - case NOTMUCH_STATUS_READONLY_DATABASE: - case NOTMUCH_STATUS_XAPIAN_EXCEPTION: - case NOTMUCH_STATUS_OUT_OF_MEMORY: - fprintf (stderr, "Error: %s. Halting processing.\n", - notmuch_status_to_string (status)); - ret = status; - goto DONE; - default: - case NOTMUCH_STATUS_FILE_ERROR: - case NOTMUCH_STATUS_NULL_POINTER: - case NOTMUCH_STATUS_TAG_TOO_LONG: - case NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW: - case NOTMUCH_STATUS_LAST_STATUS: - INTERNAL_ERROR ("add_message returned unexpected value: %d", status); - goto DONE; - } + status = notmuch_database_add_message (notmuch, next, &message); + switch (status) { + /* success */ + case NOTMUCH_STATUS_SUCCESS: + state->added_messages++; + tag_inbox_and_unread (message); + break; + /* Non-fatal issues (go on to next file) */ + case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: + /* Stay silent on this one. */ + break; + case NOTMUCH_STATUS_FILE_NOT_EMAIL: + fprintf (stderr, "Note: Ignoring non-mail file: %s\n", + next); + break; + /* Fatal issues. Don't process anymore. */ + case NOTMUCH_STATUS_READONLY_DATABASE: + case NOTMUCH_STATUS_XAPIAN_EXCEPTION: + case NOTMUCH_STATUS_OUT_OF_MEMORY: + fprintf (stderr, "Error: %s. Halting processing.\n", + notmuch_status_to_string (status)); + ret = status; + goto DONE; + default: + case NOTMUCH_STATUS_FILE_ERROR: + case NOTMUCH_STATUS_NULL_POINTER: + case NOTMUCH_STATUS_TAG_TOO_LONG: + case NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW: + case NOTMUCH_STATUS_LAST_STATUS: + INTERNAL_ERROR ("add_message returned unexpected value: %d", status); + goto DONE; + } - if (message) { - notmuch_message_destroy (message); - message = NULL; - } + if (message) { + notmuch_message_destroy (message); + message = NULL; + } - if (do_add_files_print_progress) { - do_add_files_print_progress = 0; - add_files_print_progress (state); - } + if (do_add_files_print_progress) { + do_add_files_print_progress = 0; + add_files_print_progress (state); } } else if (S_ISDIR (st->st_mode)) { status = add_files_recursive (notmuch, next, st, state); From 2c4555f1a56602ff1dd55a63699810522ba4d91e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 16:06:46 -0800 Subject: [PATCH 31/63] notmuch new: Remove an unnecessary stat of every regular file in the mail store. We were previousl using the stat for two reasons. One was to obtain the mtime of the file. This usage was removed in the previous commit, (since the mtime is unreliable in the case of a file being moved into the mail store). The second reason was to identify regular and directory file types. But this information is already available in the result we get from scandir. What's left is simply a stat for each directory in the mailstore, (which we are still using to compare filesystem mtime with the mtime stored in the database). --- notmuch-new.c | 53 +++++++++++++++++---------------------------------- 1 file changed, 17 insertions(+), 36 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index fe280d84..490101d0 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -141,7 +141,6 @@ is_maildir (struct dirent **entries, int count) static notmuch_status_t add_files_recursive (notmuch_database_t *notmuch, const char *path, - struct stat *st, add_files_state_t *state) { DIR *dir = NULL; @@ -153,8 +152,20 @@ add_files_recursive (notmuch_database_t *notmuch, struct dirent **namelist = NULL; int num_entries; notmuch_directory_t *directory; + struct stat st; - fs_mtime = st->st_mtime; + if (stat (path, &st)) { + fprintf (stderr, "Error reading directory %s: %s\n", + path, strerror (errno)); + return NOTMUCH_STATUS_FILE_ERROR; + } + + if (! S_ISDIR (st.st_mode)) { + fprintf (stderr, "Error: %s is not a directory.\n", path); + return NOTMUCH_STATUS_FILE_ERROR; + } + + fs_mtime = st.st_mtime; directory = notmuch_database_get_directory (notmuch, path); db_mtime = notmuch_directory_get_mtime (directory); @@ -199,25 +210,7 @@ add_files_recursive (notmuch_database_t *notmuch, next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); - if (stat (next, st)) { - int err = errno; - - switch (err) { - case ENOENT: - /* The file was removed between scandir and now... */ - case EPERM: - case EACCES: - /* We can't read this file so don't add it to the cache. */ - continue; - } - - fprintf (stderr, "Error reading %s: %s\n", - next, strerror (errno)); - ret = NOTMUCH_STATUS_FILE_ERROR; - goto DONE; - } - - if (S_ISREG (st->st_mode)) { + if (entry->d_type == DT_REG) { state->processed_files++; if (state->verbose) { @@ -275,8 +268,8 @@ add_files_recursive (notmuch_database_t *notmuch, do_add_files_print_progress = 0; add_files_print_progress (state); } - } else if (S_ISDIR (st->st_mode)) { - status = add_files_recursive (notmuch, next, st, state); + } else if (entry->d_type == DT_DIR) { + status = add_files_recursive (notmuch, next, state); if (status && ret == NOTMUCH_STATUS_SUCCESS) ret = status; } @@ -312,23 +305,11 @@ add_files (notmuch_database_t *notmuch, const char *path, add_files_state_t *state) { - struct stat st; notmuch_status_t status; struct sigaction action; struct itimerval timerval; notmuch_bool_t timer_is_active = FALSE; - if (stat (path, &st)) { - fprintf (stderr, "Error reading directory %s: %s\n", - path, strerror (errno)); - return NOTMUCH_STATUS_FILE_ERROR; - } - - if (! S_ISDIR (st.st_mode)) { - fprintf (stderr, "Error: %s is not a directory.\n", path); - return NOTMUCH_STATUS_FILE_ERROR; - } - if (state->output_is_a_tty && ! debugger_is_active () && ! state->verbose) { /* Setup our handler for SIGALRM */ memset (&action, 0, sizeof (struct sigaction)); @@ -347,7 +328,7 @@ add_files (notmuch_database_t *notmuch, timer_is_active = TRUE; } - status = add_files_recursive (notmuch, path, &st, state); + status = add_files_recursive (notmuch, path, state); if (timer_is_active) { /* Now stop the timer. */ From 6f05dd8a8c51d23f47fce608e3eb25cd8e1c8bd1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 16:15:43 -0800 Subject: [PATCH 32/63] add_files_recursive: Use consistent naming for array and count variables. Previously we had an array named "namelist" and its count named "num_entries". We now use an array name of "fs_entries" and a count named "num_fs_entries" to try to preserve sanity. --- notmuch-new.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index 490101d0..58431b7c 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -149,8 +149,8 @@ add_files_recursive (notmuch_database_t *notmuch, time_t fs_mtime, db_mtime; notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS; notmuch_message_t *message = NULL; - struct dirent **namelist = NULL; - int num_entries; + struct dirent **fs_entries = NULL; + int num_fs_entries; notmuch_directory_t *directory; struct stat st; @@ -170,9 +170,9 @@ add_files_recursive (notmuch_database_t *notmuch, directory = notmuch_database_get_directory (notmuch, path); db_mtime = notmuch_directory_get_mtime (directory); - num_entries = scandir (path, &namelist, 0, ino_cmp); + num_fs_entries = scandir (path, &fs_entries, 0, ino_cmp); - if (num_entries == -1) { + if (num_fs_entries == -1) { fprintf (stderr, "Error opening directory %s: %s\n", path, strerror (errno)); ret = NOTMUCH_STATUS_FILE_ERROR; @@ -182,10 +182,10 @@ add_files_recursive (notmuch_database_t *notmuch, int i=0; while (!interrupted) { - if (i == num_entries) + if (i == num_fs_entries) break; - entry= namelist[i++]; + entry = fs_entries[i++]; /* If this directory hasn't been modified since the last * add_files, then we only need to look further for @@ -202,7 +202,7 @@ add_files_recursive (notmuch_database_t *notmuch, strcmp (entry->d_name, "..") == 0 || (entry->d_type == DT_DIR && (strcmp (entry->d_name, "tmp") == 0) && - is_maildir (namelist, num_entries)) || + is_maildir (fs_entries, num_fs_entries)) || strcmp (entry->d_name, ".notmuch") ==0) { continue; @@ -291,8 +291,8 @@ add_files_recursive (notmuch_database_t *notmuch, free (entry); if (dir) closedir (dir); - if (namelist) - free (namelist); + if (fs_entries) + free (fs_entries); return ret; } @@ -358,21 +358,21 @@ count_files (const char *path, int *count) struct dirent *entry = NULL; char *next; struct stat st; - struct dirent **namelist = NULL; - int n_entries = scandir (path, &namelist, 0, ino_cmp); + struct dirent **fs_entries = NULL; + int num_fs_entries = scandir (path, &fs_entries, 0, ino_cmp); int i = 0; - if (n_entries == -1) { + if (num_fs_entries == -1) { fprintf (stderr, "Warning: failed to open directory %s: %s\n", path, strerror (errno)); goto DONE; } while (!interrupted) { - if (i == n_entries) + if (i == num_fs_entries) break; - entry= namelist[i++]; + entry = fs_entries[i++]; /* Ignore special directories to avoid infinite recursion. * Also ignore the .notmuch directory. @@ -411,8 +411,8 @@ count_files (const char *path, int *count) DONE: if (entry) free (entry); - if (namelist) - free (namelist); + if (fs_entries) + free (fs_entries); } int From 28ce73848d98d8ee2b661733402e2c10b13418d5 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 16:35:02 -0800 Subject: [PATCH 33/63] add_files_recursive: Separate scanning for directories and files for legibility. We now do two scans over the entries returned from scandir. The first scan is looking for directories (and making the recursive call). The second scan is looking for new files to add to the database. This is easier to read than the previous code which had a single loop and some if statements with ridiculously long bodies. It also has the advantage that once the directory scan is complete we can do a single comparison of the filesystem and database mtimes and entirely skip the second scan if it's not needed. --- notmuch-new.c | 152 +++++++++++++++++++++++++++----------------------- 1 file changed, 83 insertions(+), 69 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index 58431b7c..f34d4676 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -125,14 +125,14 @@ is_maildir (struct dirent **entries, int count) * * o Ask the database for its timestamp of 'path' (db_mtime) * + * o For each sub-directory of path, recursively call into this + * same function. + * * o If 'fs_mtime' > 'db_mtime' * * o For each regular file directly within 'path', call * add_message to add the file to the database. * - * o For each sub-directory of path, recursively call into this - * same function. - * * o Tell the database to update its time of 'path' to 'fs_mtime' * * The 'struct stat *st' must point to a structure that has already @@ -150,7 +150,7 @@ add_files_recursive (notmuch_database_t *notmuch, notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS; notmuch_message_t *message = NULL; struct dirent **fs_entries = NULL; - int num_fs_entries; + int i, num_fs_entries; notmuch_directory_t *directory; struct stat st; @@ -179,18 +179,14 @@ add_files_recursive (notmuch_database_t *notmuch, goto DONE; } - int i=0; - - while (!interrupted) { - if (i == num_fs_entries) + /* First, recurse into all sub-directories. */ + for (i = 0; i < num_fs_entries; i++) { + if (interrupted) break; - entry = fs_entries[i++]; + entry = fs_entries[i]; - /* If this directory hasn't been modified since the last - * add_files, then we only need to look further for - * sub-directories. */ - if (fs_mtime <= db_mtime && entry->d_type == DT_REG) + if (entry->d_type != DT_DIR) continue; /* Ignore special directories to avoid infinite recursion. @@ -209,69 +205,87 @@ add_files_recursive (notmuch_database_t *notmuch, } next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); + status = add_files_recursive (notmuch, next, state); + if (status && ret == NOTMUCH_STATUS_SUCCESS) + ret = status; + talloc_free (next); + next = NULL; + } - if (entry->d_type == DT_REG) { - state->processed_files++; + /* If this directory hasn't been modified since the last + * add_files, then we can skip the second pass where we look for + * new files in this directory. */ + if (fs_mtime <= db_mtime) + goto DONE; - if (state->verbose) { - if (state->output_is_a_tty) - printf("\r\033[K"); + /* Second, scan the regular files in this directory. */ + for (i = 0; i < num_fs_entries; i++) { + if (interrupted) + break; - printf ("%i/%i: %s", - state->processed_files, - state->total_files, - next); + entry = fs_entries[i]; - putchar((state->output_is_a_tty) ? '\r' : '\n'); - fflush (stdout); - } + if (entry->d_type != DT_REG) + continue; - status = notmuch_database_add_message (notmuch, next, &message); - switch (status) { - /* success */ - case NOTMUCH_STATUS_SUCCESS: - state->added_messages++; - tag_inbox_and_unread (message); - break; - /* Non-fatal issues (go on to next file) */ - case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: - /* Stay silent on this one. */ - break; - case NOTMUCH_STATUS_FILE_NOT_EMAIL: - fprintf (stderr, "Note: Ignoring non-mail file: %s\n", - next); - break; - /* Fatal issues. Don't process anymore. */ - case NOTMUCH_STATUS_READONLY_DATABASE: - case NOTMUCH_STATUS_XAPIAN_EXCEPTION: - case NOTMUCH_STATUS_OUT_OF_MEMORY: - fprintf (stderr, "Error: %s. Halting processing.\n", - notmuch_status_to_string (status)); - ret = status; - goto DONE; - default: - case NOTMUCH_STATUS_FILE_ERROR: - case NOTMUCH_STATUS_NULL_POINTER: - case NOTMUCH_STATUS_TAG_TOO_LONG: - case NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW: - case NOTMUCH_STATUS_LAST_STATUS: - INTERNAL_ERROR ("add_message returned unexpected value: %d", status); - goto DONE; - } + next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); - if (message) { - notmuch_message_destroy (message); - message = NULL; - } + state->processed_files++; - if (do_add_files_print_progress) { - do_add_files_print_progress = 0; - add_files_print_progress (state); - } - } else if (entry->d_type == DT_DIR) { - status = add_files_recursive (notmuch, next, state); - if (status && ret == NOTMUCH_STATUS_SUCCESS) - ret = status; + if (state->verbose) { + if (state->output_is_a_tty) + printf("\r\033[K"); + + printf ("%i/%i: %s", + state->processed_files, + state->total_files, + next); + + putchar((state->output_is_a_tty) ? '\r' : '\n'); + fflush (stdout); + } + + status = notmuch_database_add_message (notmuch, next, &message); + switch (status) { + /* success */ + case NOTMUCH_STATUS_SUCCESS: + state->added_messages++; + tag_inbox_and_unread (message); + break; + /* Non-fatal issues (go on to next file) */ + case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: + /* Stay silent on this one. */ + break; + case NOTMUCH_STATUS_FILE_NOT_EMAIL: + fprintf (stderr, "Note: Ignoring non-mail file: %s\n", + next); + break; + /* Fatal issues. Don't process anymore. */ + case NOTMUCH_STATUS_READONLY_DATABASE: + case NOTMUCH_STATUS_XAPIAN_EXCEPTION: + case NOTMUCH_STATUS_OUT_OF_MEMORY: + fprintf (stderr, "Error: %s. Halting processing.\n", + notmuch_status_to_string (status)); + ret = status; + goto DONE; + default: + case NOTMUCH_STATUS_FILE_ERROR: + case NOTMUCH_STATUS_NULL_POINTER: + case NOTMUCH_STATUS_TAG_TOO_LONG: + case NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW: + case NOTMUCH_STATUS_LAST_STATUS: + INTERNAL_ERROR ("add_message returned unexpected value: %d", status); + goto DONE; + } + + if (message) { + notmuch_message_destroy (message); + message = NULL; + } + + if (do_add_files_print_progress) { + do_add_files_print_progress = 0; + add_files_print_progress (state); } talloc_free (next); From 2a98b1d48795e66754435e06e28cdf58ab1dd154 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 16:42:14 -0800 Subject: [PATCH 34/63] add_files_recursive: Make the maildir detection more efficient. Previously, we were re-scanning the entire list of entries for every directory entry. Instead, we can simply check if the entries look like a maildir once, up-front. --- notmuch-new.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index f34d4676..af742977 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -100,12 +100,14 @@ static int ino_cmp(const struct dirent **a, const struct dirent **b) * Return 1 if the directory looks like a Maildir and 0 otherwise. */ static int -is_maildir (struct dirent **entries, int count) +_entries_resemble_maildir (struct dirent **entries, int count) { int i, found = 0; for (i = 0; i < count; i++) { - if (entries[i]->d_type != DT_DIR) continue; + if (entries[i]->d_type != DT_DIR) + continue; + if (strcmp(entries[i]->d_name, "new") == 0 || strcmp(entries[i]->d_name, "cur") == 0 || strcmp(entries[i]->d_name, "tmp") == 0) @@ -153,6 +155,7 @@ add_files_recursive (notmuch_database_t *notmuch, int i, num_fs_entries; notmuch_directory_t *directory; struct stat st; + notmuch_bool_t is_maildir; if (stat (path, &st)) { fprintf (stderr, "Error reading directory %s: %s\n", @@ -180,6 +183,8 @@ add_files_recursive (notmuch_database_t *notmuch, } /* First, recurse into all sub-directories. */ + is_maildir = _entries_resemble_maildir (fs_entries, num_fs_entries); + for (i = 0; i < num_fs_entries; i++) { if (interrupted) break; @@ -190,15 +195,14 @@ add_files_recursive (notmuch_database_t *notmuch, continue; /* Ignore special directories to avoid infinite recursion. - * Also ignore the .notmuch directory. + * Also ignore the .notmuch directory and any "tmp" directory + * that appears within a maildir. */ /* XXX: Eventually we'll want more sophistication to let the * user specify files to be ignored. */ if (strcmp (entry->d_name, ".") == 0 || strcmp (entry->d_name, "..") == 0 || - (entry->d_type == DT_DIR && - (strcmp (entry->d_name, "tmp") == 0) && - is_maildir (fs_entries, num_fs_entries)) || + (is_maildir && strcmp (entry->d_name, "tmp") == 0) || strcmp (entry->d_name, ".notmuch") ==0) { continue; From 03d5175001185a7eb0b3219665ea0974147da8cc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 17:43:03 -0800 Subject: [PATCH 35/63] notmuch new: Detect deleted (renamed) files and directories. This takes advantage of the notmuch_directory_t interfaces added recently (with cooresponding storage of directory documents in the database) to detect when files or entire directories are deleted or renamed within the mail store. This also fixes the recent regression where *all* files would be processed by every run of "notmuch new", (now only new files are processed once again). The deleted files and directories are only detected so far. They aren't properly removed from the database. --- notmuch-new.c | 104 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 87 insertions(+), 17 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index af742977..b2b6043a 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -87,11 +87,18 @@ add_files_print_progress (add_files_state_t *state) fflush (stdout); } -static int ino_cmp(const struct dirent **a, const struct dirent **b) +static int +dirent_sort_inode (const struct dirent **a, const struct dirent **b) { return ((*a)->d_ino < (*b)->d_ino) ? -1 : 1; } +static int +dirent_sort_strcmp_name (const struct dirent **a, const struct dirent **b) +{ + return strcmp ((*a)->d_name, (*b)->d_name); +} + /* Test if the directory looks like a Maildir directory. * * Search through the array of directory entries to see if we can find all @@ -124,21 +131,37 @@ _entries_resemble_maildir (struct dirent **entries, int count) /* Examine 'path' recursively as follows: * * o Ask the filesystem for the mtime of 'path' (fs_mtime) - * * o Ask the database for its timestamp of 'path' (db_mtime) * - * o For each sub-directory of path, recursively call into this - * same function. + * o Ask the filesystem for files and directories within 'path' + * (via scandir and stored in fs_entries) + * o Ask the database for files and directories within 'path' + * (db_files and db_subdirs) * - * o If 'fs_mtime' > 'db_mtime' + * o Pass 1: For each directory in fs_entries, recursively call into + * this same function. * - * o For each regular file directly within 'path', call - * add_message to add the file to the database. + * o Pass 2: If 'fs_mtime' > 'db_mtime', then walk fs_entries + * simultaneously with db_files and db_subdirs. Look for one of + * three interesting cases: + * + * 1. Regular file in fs_entries and not in db_files + * This is a new file to add_message into the database. + * + * 2. Filename in db_files not in fs_entries. + * This is a file that has been removed from the mail store. + * + * 3. Directory in db_subdirs not in fs_entries + * This is a directory that has been removed from the mail store. + * + * Note that the addition of a directory is not interesting here, + * since that will have been taken care of in pass 1. Also, we + * don't immediately act on file/directory removal since we must + * ensure that in the case of a rename that the new filename is + * added before the old filename is removed, (so that no + * information is lost from the database). * * o Tell the database to update its time of 'path' to 'fs_mtime' - * - * The 'struct stat *st' must point to a structure that has already - * been initialized for 'path' by calling stat(). */ static notmuch_status_t add_files_recursive (notmuch_database_t *notmuch, @@ -154,6 +177,8 @@ add_files_recursive (notmuch_database_t *notmuch, struct dirent **fs_entries = NULL; int i, num_fs_entries; notmuch_directory_t *directory; + notmuch_filenames_t *db_files = NULL; + notmuch_filenames_t *db_subdirs = NULL; struct stat st; notmuch_bool_t is_maildir; @@ -173,7 +198,12 @@ add_files_recursive (notmuch_database_t *notmuch, directory = notmuch_database_get_directory (notmuch, path); db_mtime = notmuch_directory_get_mtime (directory); - num_fs_entries = scandir (path, &fs_entries, 0, ino_cmp); + /* If the database knows about this directory, then we sort based + * on strcmp to match the database sorting. Otherwise, we can do + * inode-based sorting for faster filesystem operation. */ + num_fs_entries = scandir (path, &fs_entries, 0, + db_mtime ? + dirent_sort_strcmp_name : dirent_sort_inode); if (num_fs_entries == -1) { fprintf (stderr, "Error opening directory %s: %s\n", @@ -182,7 +212,7 @@ add_files_recursive (notmuch_database_t *notmuch, goto DONE; } - /* First, recurse into all sub-directories. */ + /* Pass 1: Recurse into all sub-directories. */ is_maildir = _entries_resemble_maildir (fs_entries, num_fs_entries); for (i = 0; i < num_fs_entries; i++) { @@ -217,21 +247,55 @@ add_files_recursive (notmuch_database_t *notmuch, } /* If this directory hasn't been modified since the last - * add_files, then we can skip the second pass where we look for - * new files in this directory. */ + * "notmuch new", then we can skip the second pass entirely. */ if (fs_mtime <= db_mtime) goto DONE; - /* Second, scan the regular files in this directory. */ - for (i = 0; i < num_fs_entries; i++) { + /* Pass 2: Scan for new files, removed files, and removed directories. */ + db_files = notmuch_directory_get_child_files (directory); + db_subdirs = notmuch_directory_get_child_directories (directory); + + for (i = 0; i < num_fs_entries; i++) + { if (interrupted) break; entry = fs_entries[i]; + /* Check if we've walked past any names in db_files or + * db_subdirs. If so, these have been deleted. */ + while (notmuch_filenames_has_more (db_files) && + strcmp (notmuch_filenames_get (db_files), entry->d_name) < 0) + { + printf ("Detected deleted file %s/%s\n", path, + notmuch_filenames_get (db_files)); + + notmuch_filenames_advance (db_files); + } + + while (notmuch_filenames_has_more (db_subdirs) && + strcmp (notmuch_filenames_get (db_subdirs), entry->d_name) <= 0) + { + if (strcmp (notmuch_filenames_get (db_subdirs), entry->d_name) < 0) + printf ("Detected deleted directory %s/%s", path, + notmuch_filenames_get (db_subdirs)); + + notmuch_filenames_advance (db_subdirs); + } + if (entry->d_type != DT_REG) continue; + /* Don't add a file that we've added before. */ + if (notmuch_filenames_has_more (db_files) && + strcmp (notmuch_filenames_get (db_files), entry->d_name) == 0) + { + notmuch_filenames_advance (db_files); + continue; + } + + /* We're not looking at a regular file that doesn't yet exist + * in the database, so add it. */ next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); state->processed_files++; @@ -311,6 +375,12 @@ add_files_recursive (notmuch_database_t *notmuch, closedir (dir); if (fs_entries) free (fs_entries); + if (db_subdirs) + notmuch_filenames_destroy (db_subdirs); + if (db_files) + notmuch_filenames_destroy (db_files); + if (directory) + notmuch_directory_destroy (directory); return ret; } @@ -377,7 +447,7 @@ count_files (const char *path, int *count) char *next; struct stat st; struct dirent **fs_entries = NULL; - int num_fs_entries = scandir (path, &fs_entries, 0, ino_cmp); + int num_fs_entries = scandir (path, &fs_entries, 0, dirent_sort_inode); int i = 0; if (num_fs_entries == -1) { From 2e96464f9705be4ec772280cad71a6c9d5831e6f Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 17:56:11 -0800 Subject: [PATCH 36/63] notmuch new: Store detected removed filenames for later processing. It is essential to defer the actual removal of any filenames from the database until we are entirely done adding any new files. This is to avoid any information loss from the database in the case of a renamed file or directory. Note that we're *still* not actually doing any removal---still just printing messages indicating the filenames that were detected as removed. But we're at least now printing those messages at a time when we actually *can* do the actual removal. --- notmuch-new.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 5 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index b2b6043a..f8fe66a3 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -22,6 +22,16 @@ #include +typedef struct _filename_node { + char *filename; + struct _filename_node *next; +} _filename_node_t; + +typedef struct _filename_list { + _filename_node_t *head; + _filename_node_t **tail; +} _filename_list_t; + typedef struct { int output_is_a_tty; int verbose; @@ -30,6 +40,9 @@ typedef struct { int processed_files; int added_messages; struct timeval tv_start; + + _filename_list_t *removed_files; + _filename_list_t *removed_directories; } add_files_state_t; static volatile sig_atomic_t do_add_files_print_progress = 0; @@ -52,6 +65,34 @@ handle_sigint (unused (int sig)) interrupted = 1; } +static _filename_list_t * +_filename_list_create (const void *ctx) +{ + _filename_list_t *list; + + list = talloc (ctx, _filename_list_t); + if (list == NULL) + return NULL; + + list->head = NULL; + list->tail = &list->head; + + return list; +} + +static void +_filename_list_add (_filename_list_t *list, + const char *filename) +{ + _filename_node_t *node = talloc (list, _filename_node_t); + + node->filename = talloc_strdup (list, filename); + node->next = NULL; + + *(list->tail) = node; + list->tail = &node->next; +} + static void tag_inbox_and_unread (notmuch_message_t *message) { @@ -267,8 +308,11 @@ add_files_recursive (notmuch_database_t *notmuch, while (notmuch_filenames_has_more (db_files) && strcmp (notmuch_filenames_get (db_files), entry->d_name) < 0) { - printf ("Detected deleted file %s/%s\n", path, - notmuch_filenames_get (db_files)); + char *absolute = talloc_asprintf (state->removed_files, + "%s/%s", path, + notmuch_filenames_get (db_files)); + + _filename_list_add (state->removed_files, absolute); notmuch_filenames_advance (db_files); } @@ -276,9 +320,15 @@ add_files_recursive (notmuch_database_t *notmuch, while (notmuch_filenames_has_more (db_subdirs) && strcmp (notmuch_filenames_get (db_subdirs), entry->d_name) <= 0) { - if (strcmp (notmuch_filenames_get (db_subdirs), entry->d_name) < 0) - printf ("Detected deleted directory %s/%s", path, - notmuch_filenames_get (db_subdirs)); + const char *filename = notmuch_filenames_get (db_subdirs); + + if (strcmp (filename, entry->d_name) < 0) + { + char *absolute = talloc_asprintf (state->removed_directories, + "%s/%s", path, filename); + + _filename_list_add (state->removed_directories, absolute); + } notmuch_filenames_advance (db_subdirs); } @@ -516,6 +566,7 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) const char *db_path; char *dot_notmuch_path; struct sigaction action; + _filename_node_t *f; int i; add_files_state.verbose = 0; @@ -572,8 +623,22 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) add_files_state.added_messages = 0; gettimeofday (&add_files_state.tv_start, NULL); + add_files_state.removed_files = _filename_list_create (ctx); + add_files_state.removed_directories = _filename_list_create (ctx); + ret = add_files (notmuch, db_path, &add_files_state); + for (f = add_files_state.removed_files->head; f; f = f->next) { + printf ("Detected removed file: %s\n", f->filename); + } + + for (f = add_files_state.removed_directories->head; f; f = f->next) { + printf ("Detected removed directory: %s\n", f->filename); + } + + talloc_free (add_files_state.removed_files); + talloc_free (add_files_state.removed_directories); + gettimeofday (&tv_now, NULL); elapsed = notmuch_time_elapsed (add_files_state.tv_start, tv_now); From 3fa2385f7cc367725ea74a08237158bd5a4163d7 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 18:59:18 -0800 Subject: [PATCH 37/63] notmuch new: Proper support for renamed and deleted files. The "notmuch new" command will now efficiently notice if any files or directories have been removed from the mail store and will appropriately update its database. Any given mail message (as determined by the message ID) may have multiple corresponding filenames, and notmuch will return one of them. When a filen is deleted, the corresponding filename will be removed from the message in the database. When the last filename is removed from a message, that message will be entirely removed from the database. All file additions are handled before any file removals so that rename is supported properly. --- notmuch-new.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index f8fe66a3..3f8b7fcf 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -629,11 +629,28 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) ret = add_files (notmuch, db_path, &add_files_state); for (f = add_files_state.removed_files->head; f; f = f->next) { - printf ("Detected removed file: %s\n", f->filename); + notmuch_database_remove_message (notmuch, f->filename); } for (f = add_files_state.removed_directories->head; f; f = f->next) { - printf ("Detected removed directory: %s\n", f->filename); + notmuch_directory_t *directory; + notmuch_filenames_t *files; + + directory = notmuch_database_get_directory (notmuch, f->filename); + + for (files = notmuch_directory_get_child_files (directory); + notmuch_filenames_has_more (files); + notmuch_filenames_advance (files)) + { + char *absolute; + + absolute = talloc_asprintf (ctx, "%s/%s", f->filename, + notmuch_filenames_get (files)); + notmuch_database_remove_message (notmuch, absolute); + talloc_free (absolute); + } + + notmuch_directory_destroy (directory); } talloc_free (add_files_state.removed_files); From 6ef6ddba80116e6c1cbaca494f42a63d924b741b Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 19:14:07 -0800 Subject: [PATCH 38/63] Index content from citations and signatures. In the presentation we often omit citations and signatures, but this is not content that should be omitted from the index, (especially when the citation detection is wrong---see cases where a line beginning with "From" is corrupted to ">From" by mail processing tools). --- lib/index.cc | 60 ++++------------------------------------------------ 1 file changed, 4 insertions(+), 56 deletions(-) diff --git a/lib/index.cc b/lib/index.cc index e58bc870..7e2da085 100644 --- a/lib/index.cc +++ b/lib/index.cc @@ -123,60 +123,6 @@ skip_re_in_subject (const char *subject) return s; } -/* Given a string representing the body of a message, generate terms - * for it, (skipping quoted portions and signatures). - * - * This function is evil in that it modifies the string passed to it, - * (changing some newlines into '\0'). - */ -static void -_index_body_text (notmuch_message_t *message, char *body) -{ - char *line, *line_end, *next_line; - - if (body == NULL) - return; - - next_line = body; - - while (1) { - line = next_line; - if (*line == '\0') - break; - - next_line = strchr (line, '\n'); - if (next_line == NULL) { - next_line = line + strlen (line); - } - line_end = next_line - 1; - - /* Get to the next non-blank line. */ - while (*next_line == '\n') - next_line++; - - /* Skip blank lines. */ - if (line_end < line) - continue; - - /* Skip lines that are quotes. */ - if (*line == '>') - continue; - - /* Also skip lines introducing a quote on the next line. */ - if (*line_end == ':' && *next_line == '>') - continue; - - /* Finally, bail as soon as we see a signature. */ - /* XXX: Should only do this if "near" the end of the message. */ - if (strncmp (line, "-- ", 3) == 0) - break; - - *(line_end + 1) = '\0'; - - _notmuch_message_gen_terms (message, NULL, line); - } -} - /* Callback to generate terms for each mime part of a message. */ static void _index_mime_part (notmuch_message_t *message, @@ -249,9 +195,11 @@ _index_mime_part (notmuch_message_t *message, g_byte_array_append (byte_array, (guint8 *) "\0", 1); body = (char *) g_byte_array_free (byte_array, FALSE); - _index_body_text (message, body); + if (body) { + _notmuch_message_gen_terms (message, NULL, body); - free (body); + free (body); + } } notmuch_status_t From 777cd23d9db64c8015fe880a106e84c3634bf19f Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 6 Jan 2010 10:03:58 -0800 Subject: [PATCH 39/63] lib: Update documentation of notmuch_database_add_message. Previously, adding a filename with the same message ID as an existing message would do nothing. But we recently fixed this to instead add the new filename to the existing message document. So update the documentation to match now. --- lib/notmuch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/notmuch.h b/lib/notmuch.h index d9fe152a..52078e81 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -220,8 +220,8 @@ notmuch_database_get_directory (notmuch_database_t *database, * NOTMUCH_STATUS_SUCCESS: Message successfully added to database. * * NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: Message has the same message - * ID as another message already in the database. Nothing added - * to the database. + * ID as another message already in the database. The new filename + * was successfully added to the message in the database. * * NOTMUCH_STATUS_FILE_ERROR: an error occurred trying to open the * file, (such as permission denied, or file not found, From 4b418343f656c2de5503d907d075019626561373 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 6 Jan 2010 10:06:00 -0800 Subject: [PATCH 40/63] lib: Indicate whether notmuch_database_remove_message removed anything. Similar to the return value of notmuch_database_add_message, we now enhance the return value of notmuch_database_remove_message to indicate whether the message document was entirely removed (SUCCESS) or whether only this filename was removed and the document exists under other filenamed (DUPLICATE_MESSAGE_ID). --- lib/database.cc | 4 +++- lib/notmuch.h | 21 +++++++++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 205d0360..dc967c8c 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -1201,14 +1201,16 @@ notmuch_database_remove_message (notmuch_database_t *notmuch, strncmp ((*j).c_str (), prefix, strlen (prefix))) { db->delete_document (document.get_docid ()); + status = NOTMUCH_STATUS_SUCCESS; } else { db->replace_document (document.get_docid (), document); + status = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID; } } talloc_free (local); - return NOTMUCH_STATUS_SUCCESS; + return status; } notmuch_tags_t * diff --git a/lib/notmuch.h b/lib/notmuch.h index 52078e81..110061c2 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -237,12 +237,21 @@ notmuch_database_add_message (notmuch_database_t *database, /* Remove a message from the given notmuch database. * - * Note that the only this particular filename association is removed - * from the database. If the same message (as determined by the - * message ID) is still available via other filenames, then the - * message will persist in the database for those filenames. When the - * last filename is removed for a particular message, the database - * content for that message will be entirely removed. + * Note that only this particular filename association is removed from + * the database. If the same message (as determined by the message ID) + * is still available via other filenames, then the message will + * persist in the database for those filenames. When the last filename + * is removed for a particular message, the database content for that + * message will be entirely removed. + * + * Return value: + * + * NOTMUCH_STATUS_SUCCESS: The last filename was removed and the + * message was removed from the database. + * + * NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: This filename was removed but + * the message persists in the database with at least one other + * filename. */ notmuch_status_t notmuch_database_remove_message (notmuch_database_t *database, From 9d4d7963a155d07f71f1f8f95031225cb9a5b705 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 6 Jan 2010 10:07:49 -0800 Subject: [PATCH 41/63] notmuch new: Print counts of deleted and renamed messages. It's nice to be able to see a report indicating that the recently added support for detecting file rename and deletion is working. --- notmuch-new.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index 3f8b7fcf..ab1d12a6 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -567,6 +567,8 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) char *dot_notmuch_path; struct sigaction action; _filename_node_t *f; + int renamed_files, removed_files; + notmuch_status_t status; int i; add_files_state.verbose = 0; @@ -628,8 +630,14 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) ret = add_files (notmuch, db_path, &add_files_state); + removed_files = 0; + renamed_files = 0; for (f = add_files_state.removed_files->head; f; f = f->next) { - notmuch_database_remove_message (notmuch, f->filename); + status = notmuch_database_remove_message (notmuch, f->filename); + if (status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) + renamed_files++; + else + removed_files++; } for (f = add_files_state.removed_directories->head; f; f = f->next) { @@ -646,7 +654,11 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) absolute = talloc_asprintf (ctx, "%s/%s", f->filename, notmuch_filenames_get (files)); - notmuch_database_remove_message (notmuch, absolute); + status = notmuch_database_remove_message (notmuch, absolute); + if (status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) + renamed_files++; + else + removed_files++; talloc_free (absolute); } @@ -659,6 +671,7 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) gettimeofday (&tv_now, NULL); elapsed = notmuch_time_elapsed (add_files_state.tv_start, tv_now); + if (add_files_state.processed_files) { printf ("Processed %d %s in ", add_files_state.processed_files, add_files_state.processed_files == 1 ? @@ -671,15 +684,30 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) printf (". \n"); } } + if (add_files_state.added_messages) { - printf ("Added %d new %s to the database.\n", + printf ("Added %d new %s to the database.", add_files_state.added_messages, add_files_state.added_messages == 1 ? "message" : "messages"); } else { - printf ("No new mail.\n"); + printf ("No new mail."); } + if (removed_files) { + printf (" Removed %d %s.", + removed_files, + removed_files == 1 ? "message" : "messages"); + } + + if (renamed_files) { + printf (" Detected %d file %s.", + renamed_files, + renamed_files == 1 ? "rename" : "renames"); + } + + printf ("\n"); + if (ret) { printf ("\nNote: At least one error was encountered: %s\n", notmuch_status_to_string (ret)); From bd72d95bac894b3611bb4467e10c9481af0456bf Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 6 Jan 2010 10:08:51 -0800 Subject: [PATCH 42/63] Fix typo in comment. The difference between "now" and "not" ends up being fairly dramatic. --- notmuch-new.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notmuch-new.c b/notmuch-new.c index ab1d12a6..c15f6837 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -344,7 +344,7 @@ add_files_recursive (notmuch_database_t *notmuch, continue; } - /* We're not looking at a regular file that doesn't yet exist + /* We're now looking at a regular file that doesn't yet exist * in the database, so add it. */ next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); From 49f09958df22f80e4058b6f2892f6224a80c3d6d Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 6 Jan 2010 10:09:17 -0800 Subject: [PATCH 43/63] notmuch new: Fix regression preventing recursion through symlinks. In commit 3df737bc4addfce71c647792ee668725e5221a98 we switched from using stat() to using the d_type field in the result of scandir() to determine whether a filename is a regular file or a directory. This change introduced a regression in that the recursion would no longer traverse through a symlink to a directory. (Since stat() would resolve the symlink but with scandir() we see a distinct DT_LNK value in d_type). We fix this for directories by allowing both DT_DIR and DT_LNK values to recurse, and then downgrading the existing not-a-directory check within the recursion to not be an error. We also add a new not-a-directory check outside the recursion that is an error. --- notmuch-new.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index c15f6837..e2b5878f 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -229,10 +229,11 @@ add_files_recursive (notmuch_database_t *notmuch, return NOTMUCH_STATUS_FILE_ERROR; } - if (! S_ISDIR (st.st_mode)) { - fprintf (stderr, "Error: %s is not a directory.\n", path); - return NOTMUCH_STATUS_FILE_ERROR; - } + /* This is not an error since we may have recursed based on a + * symlink to a regular file, not a directory, and we don't know + * that until this stat. */ + if (! S_ISDIR (st.st_mode)) + return NOTMUCH_STATUS_SUCCESS; fs_mtime = st.st_mtime; @@ -262,7 +263,7 @@ add_files_recursive (notmuch_database_t *notmuch, entry = fs_entries[i]; - if (entry->d_type != DT_DIR) + if (entry->d_type != DT_DIR && entry->d_type != DT_LNK) continue; /* Ignore special directories to avoid infinite recursion. @@ -447,6 +448,7 @@ add_files (notmuch_database_t *notmuch, struct sigaction action; struct itimerval timerval; notmuch_bool_t timer_is_active = FALSE; + struct stat st; if (state->output_is_a_tty && ! debugger_is_active () && ! state->verbose) { /* Setup our handler for SIGALRM */ @@ -466,6 +468,17 @@ add_files (notmuch_database_t *notmuch, timer_is_active = TRUE; } + if (stat (path, &st)) { + fprintf (stderr, "Error reading directory %s: %s\n", + path, strerror (errno)); + return NOTMUCH_STATUS_FILE_ERROR; + } + + if (! S_ISDIR (st.st_mode)) { + fprintf (stderr, "Error: %s is not a directory.\n", path); + return NOTMUCH_STATUS_FILE_ERROR; + } + status = add_files_recursive (notmuch, path, state); if (timer_is_active) { From 39e81ca431906370d0fee351419e1314e57a7c58 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 6 Jan 2010 10:30:08 -0800 Subject: [PATCH 44/63] notmuch new: Fix regression preventing addition of symlinked mail files. As described in the previous commit message, we introduced multiple symlink-based regressions in commit 3df737bc4addfce71c647792ee668725e5221a98 Here, we fix the case of symlinks to regular files by doing an extra stat of any DT_LNK files to determine if they do, in fact, link to regular files. --- notmuch-new.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/notmuch-new.c b/notmuch-new.c index e2b5878f..432d1262 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -334,8 +334,27 @@ add_files_recursive (notmuch_database_t *notmuch, notmuch_filenames_advance (db_subdirs); } - if (entry->d_type != DT_REG) + /* If we're looking at a symlink, we only want to add it if it + * links to a regular file, (and not to a directory, say). */ + if (entry->d_type == DT_LNK) { + int err; + + next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); + err = stat (next, &st); + talloc_free (next); + next = NULL; + + /* Don't emit an error for a link pointing nowhere, since + * the directory-traversal pass will have already done + * that. */ + if (err) + continue; + + if (! S_ISREG (st.st_mode)) + continue; + } else if (entry->d_type != DT_REG) { continue; + } /* Don't add a file that we've added before. */ if (notmuch_filenames_has_more (db_files) && From 59c09623c844f095c14400a9b4199be20e5d712e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 6 Jan 2010 13:26:47 -0800 Subject: [PATCH 45/63] notmuch new: Fix to detect deletions of names at the end of the list. Previously we only scanned the list of filenames in the filesystem and detected a deletion whenever that scan skipped a name that existed in the database. That much was fine, but we *also* need to continue walking the list of names from the database when the filesystem list is exhausted. Without this, removing the last file or directory within any particular directory would go undetected. --- notmuch-new.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/notmuch-new.c b/notmuch-new.c index 432d1262..f0c306d5 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -430,6 +430,30 @@ add_files_recursive (notmuch_database_t *notmuch, next = NULL; } + /* Now that we've walked the whole filesystem list, anything left + * over in the database lists has been deleted. */ + while (notmuch_filenames_has_more (db_files)) + { + char *absolute = talloc_asprintf (state->removed_files, + "%s/%s", path, + notmuch_filenames_get (db_files)); + + _filename_list_add (state->removed_files, absolute); + + notmuch_filenames_advance (db_files); + } + + while (notmuch_filenames_has_more (db_subdirs)) + { + char *absolute = talloc_asprintf (state->removed_directories, + "%s/%s", path, + notmuch_filenames_get (db_subdirs)); + + _filename_list_add (state->removed_directories, absolute); + + notmuch_filenames_advance (db_subdirs); + } + if (! interrupted) { status = notmuch_directory_set_mtime (directory, fs_mtime); if (status && ret == NOTMUCH_STATUS_SUCCESS) From 7d8271dd9db5affa9fce589424d6f3b4d8b66cd7 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 6 Jan 2010 13:54:39 -0800 Subject: [PATCH 46/63] notmuch new: Fix bug resulting in file removal on initial build of database. The bug here was that we would see that the database did not know anything about a directory so would get results from the filesystem in inode rather than strcmp order. However, we wouldn't actually ask for the list of files from the database until after recursing into the sub-directories. So by the time we traverse the filenames looking for deletions, the database *does* have entries and we end up detecting erroneous deletions because our filename list from the filesystem isn't in strcmp order. So ask for the list of names from the database before doing any additions to avoid this problem. --- notmuch-new.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index f0c306d5..7d15fe91 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -239,6 +239,8 @@ add_files_recursive (notmuch_database_t *notmuch, directory = notmuch_database_get_directory (notmuch, path); db_mtime = notmuch_directory_get_mtime (directory); + db_files = notmuch_directory_get_child_files (directory); + db_subdirs = notmuch_directory_get_child_directories (directory); /* If the database knows about this directory, then we sort based * on strcmp to match the database sorting. Otherwise, we can do @@ -294,9 +296,6 @@ add_files_recursive (notmuch_database_t *notmuch, goto DONE; /* Pass 2: Scan for new files, removed files, and removed directories. */ - db_files = notmuch_directory_get_child_files (directory); - db_subdirs = notmuch_directory_get_child_directories (directory); - for (i = 0; i < num_fs_entries; i++) { if (interrupted) From 957ae198e708dc901e3a8c89b7364c2bc75ce371 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 6 Jan 2010 14:35:11 -0800 Subject: [PATCH 47/63] lib: Treat NULL as a valid (and empty) notmuch_filenames_t iterator. This will be convenient to avoid some special-casing in higher-level code. --- lib/directory.cc | 12 ++++++++++++ lib/notmuch.h | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/lib/directory.cc b/lib/directory.cc index 196f7805..d5015e0a 100644 --- a/lib/directory.cc +++ b/lib/directory.cc @@ -81,12 +81,18 @@ _notmuch_filenames_create (void *ctx, notmuch_bool_t notmuch_filenames_has_more (notmuch_filenames_t *filenames) { + if (filenames == NULL) + return NULL; + return (filenames->iterator != filenames->end); } const char * notmuch_filenames_get (notmuch_filenames_t *filenames) { + if (filenames == NULL || filenames->iterator == filenames->end) + return NULL; + if (filenames->filename == NULL) { std::string term = *filenames->iterator; @@ -101,6 +107,9 @@ notmuch_filenames_get (notmuch_filenames_t *filenames) void notmuch_filenames_advance (notmuch_filenames_t *filenames) { + if (filenames == NULL) + return; + if (filenames->filename) { talloc_free (filenames->filename); filenames->filename = NULL; @@ -113,6 +122,9 @@ notmuch_filenames_advance (notmuch_filenames_t *filenames) void notmuch_filenames_destroy (notmuch_filenames_t *filenames) { + if (filenames == NULL) + return; + talloc_free (filenames); } diff --git a/lib/notmuch.h b/lib/notmuch.h index 110061c2..f3e1d286 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -996,6 +996,9 @@ notmuch_directory_destroy (notmuch_directory_t *directory); * When this function returns TRUE, notmuch_filenames_get will return * a valid string. Whereas when this function returns FALSE, * notmuch_filenames_get will return NULL. + * + * It is acceptable to pass NULL for 'filenames', in which case this + * function will always return FALSE. */ notmuch_bool_t notmuch_filenames_has_more (notmuch_filenames_t *filenames); @@ -1004,11 +1007,17 @@ notmuch_filenames_has_more (notmuch_filenames_t *filenames); * * Note: The returned string belongs to 'filenames' and has a lifetime * identical to it (and the directory to which it ultimately belongs). + * + * It is acceptable to pass NULL for 'filenames', in which case this + * function will always return NULL. */ const char * notmuch_filenames_get (notmuch_filenames_t *filenames); /* Advance the 'filenames' iterator to the next filename. + * + * It is acceptable to pass NULL for 'filenames', in which case this + * function will do nothing. */ void notmuch_filenames_advance (notmuch_filenames_t *filenames); @@ -1018,6 +1027,9 @@ notmuch_filenames_advance (notmuch_filenames_t *filenames); * It's not strictly necessary to call this function. All memory from * the notmuch_filenames_t object will be reclaimed when the * containing directory object is destroyed. + * + * It is acceptable to pass NULL for 'filenames', in which case this + * function will do nothing. */ void notmuch_filenames_destroy (notmuch_filenames_t *filenames); From 1a38cb841ca4ce87f6b57f9b1baf1a5f8b8e35b7 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 6 Jan 2010 14:35:56 -0800 Subject: [PATCH 48/63] notmuch new: Never ask the database for any names from a new directory. When we know that we are adding a new directory to the database, (and we therefore are using inode rather than strcmp-based sorting of the filenames), then we *never* want to see any names from the database. If we get any names that could only make us inadvertently remove files that we just added. Since it's not obvious from the Xapian documentation whether new terms being added as part of new documents will appear in the in-progress all-terms iteration we are using, (and this might differ based on Xapian backend and also might differ based on how many new directories are added and whether a flush threshold is reached). For all of these reasons, we play it safe and use NULL rather than a real notmuch_filenames_t iterator in this case to avoid any problem. --- notmuch-new.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index 7d15fe91..d1298aff 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -221,7 +221,7 @@ add_files_recursive (notmuch_database_t *notmuch, notmuch_filenames_t *db_files = NULL; notmuch_filenames_t *db_subdirs = NULL; struct stat st; - notmuch_bool_t is_maildir; + notmuch_bool_t is_maildir, new_directory; if (stat (path, &st)) { fprintf (stderr, "Error reading directory %s: %s\n", @@ -239,15 +239,23 @@ add_files_recursive (notmuch_database_t *notmuch, directory = notmuch_database_get_directory (notmuch, path); db_mtime = notmuch_directory_get_mtime (directory); - db_files = notmuch_directory_get_child_files (directory); - db_subdirs = notmuch_directory_get_child_directories (directory); + + if (db_mtime == 0) { + new_directory = TRUE; + db_files = NULL; + db_subdirs = NULL; + } else { + new_directory = FALSE; + db_files = notmuch_directory_get_child_files (directory); + db_subdirs = notmuch_directory_get_child_directories (directory); + } /* If the database knows about this directory, then we sort based * on strcmp to match the database sorting. Otherwise, we can do * inode-based sorting for faster filesystem operation. */ num_fs_entries = scandir (path, &fs_entries, 0, - db_mtime ? - dirent_sort_strcmp_name : dirent_sort_inode); + new_directory ? + dirent_sort_inode : dirent_sort_strcmp_name); if (num_fs_entries == -1) { fprintf (stderr, "Error opening directory %s: %s\n", From a274848f95e3eb074b48d14cf22774c75b80c735 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 7 Jan 2010 09:22:34 -0800 Subject: [PATCH 49/63] notmuch_message_get_filename: Support old-style filename storage. When a notmuch database is upgraded to the new database format, (to support file rename and deletion), any message documents corresponding to deleted files will not currently be upgraded. This means that a search matching these documents will find no filenames in the expected place. Go ahead and return the filename as originally stored, (rather than aborting with an internal error), in this case. --- lib/message.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/message.cc b/lib/message.cc index 82e8fce7..0fc54668 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -441,7 +441,19 @@ notmuch_message_get_filename (notmuch_message_t *message) if (i == message->doc.termlist_end () || strncmp (direntry, prefix, prefix_len)) { - INTERNAL_ERROR ("message with no filename"); + /* A message document created by an old version of notmuch + * (prior to rename support) will have the filename in the + * data of the document rather than as a file-direntry term. */ + const char *data; + + data = message->doc.get_data ().c_str (); + + if (data == NULL) + INTERNAL_ERROR ("message with no filename"); + + message->filename = talloc_strdup (message, data); + + return message->filename; } direntry += prefix_len; From 6ed606c19edfe06d2dfd48854fc97f8502eaaf7c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 7 Jan 2010 09:31:58 -0800 Subject: [PATCH 50/63] lib: Clarify internal documentation of _notmuch_database_filename_to_direntry The original wording made it sound like this function was just doing some string manipulation. But this function actually creates new directory documents as a side effect. So make that explicit in its documentation. --- lib/database.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/database.cc b/lib/database.cc index dc967c8c..125b37eb 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -723,6 +723,9 @@ _notmuch_database_get_directory_path (void *ctx, * database path or absolute with initial components identical to * database path), return a new string (with 'ctx' as the talloc * owner) suitable for use as a direntry term value. + * + * The necessary directory documents will be created in the database + * as needed. */ notmuch_status_t _notmuch_database_filename_to_direntry (void *ctx, From f93b7218c3e2d11c5b3cdd4c367a42ca7a7ede77 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 7 Jan 2010 10:19:44 -0800 Subject: [PATCH 51/63] lib: Consolidate checks for read-only database. Previously, many checks were deep in the library just before a cast operation. These have now been replaced with internal errors and new checks have instead been added at the beginning of all top-levelentry points requiring a read-write database. The new checks now also use a single function for checking and printing the error message. This will give us a convenient location to extend the check, (such as based on database version as well). --- lib/database.cc | 28 ++++++++++++------ lib/directory.cc | 15 ++++------ lib/message.cc | 67 +++++++++++++++++++++++++++++++------------ lib/notmuch-private.h | 3 ++ lib/notmuch.h | 29 +++++++++++++++++-- 5 files changed, 104 insertions(+), 38 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 125b37eb..807e39ed 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -475,6 +475,17 @@ notmuch_database_create (const char *path) return notmuch; } +notmuch_status_t +_notmuch_database_ensure_writable (notmuch_database_t *notmuch) +{ + if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) { + fprintf (stderr, "Cannot write to a read-only database.\n"); + return NOTMUCH_STATUS_READONLY_DATABASE; + } + + return NOTMUCH_STATUS_SUCCESS; +} + notmuch_database_t * notmuch_database_open (const char *path, notmuch_database_mode_t mode) @@ -1034,11 +1045,13 @@ notmuch_database_add_message (notmuch_database_t *notmuch, if (message_ret) *message_ret = NULL; + ret = _notmuch_database_ensure_writable (notmuch); + if (ret) + return ret; + message_file = notmuch_message_file_open (filename); - if (message_file == NULL) { - ret = NOTMUCH_STATUS_FILE_ERROR; - goto DONE; - } + if (message_file == NULL) + return NOTMUCH_STATUS_FILE_ERROR; notmuch_message_file_restrict_headers (message_file, "date", @@ -1173,10 +1186,9 @@ notmuch_database_remove_message (notmuch_database_t *notmuch, Xapian::Document document; notmuch_status_t status; - if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) { - fprintf (stderr, "Attempted to update a read-only database.\n"); - return NOTMUCH_STATUS_READONLY_DATABASE; - } + status = _notmuch_database_ensure_writable (notmuch); + if (status) + return status; db = static_cast (notmuch->xapian_db); diff --git a/lib/directory.cc b/lib/directory.cc index d5015e0a..461c0cc3 100644 --- a/lib/directory.cc +++ b/lib/directory.cc @@ -182,11 +182,8 @@ _notmuch_directory_create (notmuch_database_t *notmuch, path = _notmuch_database_relative_path (notmuch, path); - if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) { - fprintf (stderr, "Attempted to update a read-only database.\n"); - *status_ret = NOTMUCH_STATUS_READONLY_DATABASE; - return NULL; - } + if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) + INTERNAL_ERROR ("Failure to ensure database is writable"); db = static_cast (notmuch->xapian_db); @@ -268,11 +265,11 @@ notmuch_directory_set_mtime (notmuch_directory_t *directory, { notmuch_database_t *notmuch = directory->notmuch; Xapian::WritableDatabase *db; + notmuch_status_t status; - if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) { - fprintf (stderr, "Attempted to update a read-only database.\n"); - return NOTMUCH_STATUS_READONLY_DATABASE; - } + status = _notmuch_database_ensure_writable (notmuch); + if (status) + return status; db = static_cast (notmuch->xapian_db); diff --git a/lib/message.cc b/lib/message.cc index 0fc54668..ea9c8bd9 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -174,11 +174,6 @@ _notmuch_message_create_for_message_id (notmuch_database_t *notmuch, unsigned int doc_id; char *term; - if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) { - *status_ret = NOTMUCH_PRIVATE_STATUS_READONLY_DATABASE; - return NULL; - } - *status_ret = NOTMUCH_PRIVATE_STATUS_SUCCESS; message = notmuch_database_find_message (notmuch, message_id); @@ -192,6 +187,9 @@ _notmuch_message_create_for_message_id (notmuch_database_t *notmuch, return NULL; } + if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) + INTERNAL_ERROR ("Failure to ensure database is writable."); + db = static_cast (notmuch->xapian_db); try { doc.add_term (term); @@ -706,7 +704,12 @@ _notmuch_message_remove_term (notmuch_message_t *message, notmuch_status_t notmuch_message_add_tag (notmuch_message_t *message, const char *tag) { - notmuch_private_status_t status; + notmuch_private_status_t private_status; + notmuch_status_t status; + + status = _notmuch_database_ensure_writable (message->notmuch); + if (status) + return status; if (tag == NULL) return NOTMUCH_STATUS_NULL_POINTER; @@ -714,10 +717,10 @@ notmuch_message_add_tag (notmuch_message_t *message, const char *tag) if (strlen (tag) > NOTMUCH_TAG_MAX) return NOTMUCH_STATUS_TAG_TOO_LONG; - status = _notmuch_message_add_term (message, "tag", tag); - if (status) { + private_status = _notmuch_message_add_term (message, "tag", tag); + if (private_status) { INTERNAL_ERROR ("_notmuch_message_add_term return unexpected value: %d\n", - status); + private_status); } if (! message->frozen) @@ -729,7 +732,12 @@ notmuch_message_add_tag (notmuch_message_t *message, const char *tag) notmuch_status_t notmuch_message_remove_tag (notmuch_message_t *message, const char *tag) { - notmuch_private_status_t status; + notmuch_private_status_t private_status; + notmuch_status_t status; + + status = _notmuch_database_ensure_writable (message->notmuch); + if (status) + return status; if (tag == NULL) return NOTMUCH_STATUS_NULL_POINTER; @@ -737,10 +745,10 @@ notmuch_message_remove_tag (notmuch_message_t *message, const char *tag) if (strlen (tag) > NOTMUCH_TAG_MAX) return NOTMUCH_STATUS_TAG_TOO_LONG; - status = _notmuch_message_remove_term (message, "tag", tag); - if (status) { + private_status = _notmuch_message_remove_term (message, "tag", tag); + if (private_status) { INTERNAL_ERROR ("_notmuch_message_remove_term return unexpected value: %d\n", - status); + private_status); } if (! message->frozen) @@ -749,39 +757,60 @@ notmuch_message_remove_tag (notmuch_message_t *message, const char *tag) return NOTMUCH_STATUS_SUCCESS; } -void +notmuch_status_t notmuch_message_remove_all_tags (notmuch_message_t *message) { - notmuch_private_status_t status; + notmuch_private_status_t private_status; + notmuch_status_t status; notmuch_tags_t *tags; const char *tag; + status = _notmuch_database_ensure_writable (message->notmuch); + if (status) + return status; + for (tags = notmuch_message_get_tags (message); notmuch_tags_has_more (tags); notmuch_tags_advance (tags)) { tag = notmuch_tags_get (tags); - status = _notmuch_message_remove_term (message, "tag", tag); - if (status) { + private_status = _notmuch_message_remove_term (message, "tag", tag); + if (private_status) { INTERNAL_ERROR ("_notmuch_message_remove_term return unexpected value: %d\n", - status); + private_status); } } if (! message->frozen) _notmuch_message_sync (message); + + return NOTMUCH_STATUS_SUCCESS; } -void +notmuch_status_t notmuch_message_freeze (notmuch_message_t *message) { + notmuch_status_t status; + + status = _notmuch_database_ensure_writable (message->notmuch); + if (status) + return status; + message->frozen++; + + return NOTMUCH_STATUS_SUCCESS; } notmuch_status_t notmuch_message_thaw (notmuch_message_t *message) { + notmuch_status_t status; + + status = _notmuch_database_ensure_writable (message->notmuch); + if (status) + return status; + if (message->frozen > 0) { message->frozen--; if (message->frozen == 0) diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index 8b582640..4eb82619 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -151,6 +151,9 @@ typedef enum _notmuch_private_status { const char * _find_prefix (const char *name); +notmuch_status_t +_notmuch_database_ensure_writable (notmuch_database_t *notmuch); + const char * _notmuch_database_relative_path (notmuch_database_t *notmuch, const char *path); diff --git a/lib/notmuch.h b/lib/notmuch.h index f3e1d286..d0337304 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -229,6 +229,9 @@ notmuch_database_get_directory (notmuch_database_t *database, * * NOTMUCH_STATUS_FILE_NOT_EMAIL: the contents of filename don't look * like an email message. Nothing added to the database. + * + * NOTMUCH_STATUS_READONLY_DATABASE: Database was opened in read-only + * mode so no message can be added. */ notmuch_status_t notmuch_database_add_message (notmuch_database_t *database, @@ -252,6 +255,9 @@ notmuch_database_add_message (notmuch_database_t *database, * NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: This filename was removed but * the message persists in the database with at least one other * filename. + * + * NOTMUCH_STATUS_READONLY_DATABASE: Database was opened in read-only + * mode so no message can be removed. */ notmuch_status_t notmuch_database_remove_message (notmuch_database_t *database, @@ -789,6 +795,9 @@ notmuch_message_get_tags (notmuch_message_t *message); * * NOTMUCH_STATUS_TAG_TOO_LONG: The length of 'tag' is too long * (exceeds NOTMUCH_TAG_MAX) + * + * NOTMUCH_STATUS_READONLY_DATABASE: Database was opened in read-only + * mode so message cannot be modified. */ notmuch_status_t notmuch_message_add_tag (notmuch_message_t *message, const char *tag); @@ -803,6 +812,9 @@ notmuch_message_add_tag (notmuch_message_t *message, const char *tag); * * NOTMUCH_STATUS_TAG_TOO_LONG: The length of 'tag' is too long * (exceeds NOTMUCH_TAG_MAX) + * + * NOTMUCH_STATUS_READONLY_DATABASE: Database was opened in read-only + * mode so message cannot be modified. */ notmuch_status_t notmuch_message_remove_tag (notmuch_message_t *message, const char *tag); @@ -811,8 +823,11 @@ notmuch_message_remove_tag (notmuch_message_t *message, const char *tag); * * See notmuch_message_freeze for an example showing how to safely * replace tag values. + * + * NOTMUCH_STATUS_READONLY_DATABASE: Database was opened in read-only + * mode so message cannot be modified. */ -void +notmuch_status_t notmuch_message_remove_all_tags (notmuch_message_t *message); /* Freeze the current state of 'message' within the database. @@ -847,8 +862,15 @@ notmuch_message_remove_all_tags (notmuch_message_t *message); * somehow getting interrupted. This could result in the message being * left with no tags if the interruption happened after * notmuch_message_remove_all_tags but before notmuch_message_add_tag. + * + * Return value: + * + * NOTMUCH_STATUS_SUCCESS: Message successfully frozen. + * + * NOTMUCH_STATUS_READONLY_DATABASE: Database was opened in read-only + * mode so message cannot be modified. */ -void +notmuch_status_t notmuch_message_freeze (notmuch_message_t *message); /* Thaw the current 'message', synchronizing any changes that may have @@ -957,6 +979,9 @@ notmuch_tags_destroy (notmuch_tags_t *tags); * * NOTMUCH_STATUS_XAPIAN_EXCEPTION: A Xapian exception * occurred, mtime not stored. + * + * NOTMUCH_STATUS_READONLY_DATABASE: Database was opened in read-only + * mode so directory mtime cannot be modified. */ notmuch_status_t notmuch_directory_set_mtime (notmuch_directory_t *directory, From 807aef93d3bf02884f7a37b44b894c11d9e1df58 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 7 Jan 2010 10:29:05 -0800 Subject: [PATCH 52/63] Prefer READ_ONLY consistently over READONLY. Previously we had NOTMUCH_DATABASE_MODE_READ_ONLY but NOTMUCH_STATUS_READONLY_DATABASE which was ugly and confusing. Rename the latter to NOTMUCH_STATUS_READ_ONLY_DATABASE for consistency. --- lib/database.cc | 4 ++-- lib/notmuch-private.h | 2 +- lib/notmuch.h | 20 ++++++++++---------- notmuch-new.c | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 807e39ed..3de7f295 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -198,7 +198,7 @@ notmuch_status_to_string (notmuch_status_t status) return "No error occurred"; case NOTMUCH_STATUS_OUT_OF_MEMORY: return "Out of memory"; - case NOTMUCH_STATUS_READONLY_DATABASE: + case NOTMUCH_STATUS_READ_ONLY_DATABASE: return "Attempt to write to a read-only database"; case NOTMUCH_STATUS_XAPIAN_EXCEPTION: return "A Xapian exception occurred"; @@ -480,7 +480,7 @@ _notmuch_database_ensure_writable (notmuch_database_t *notmuch) { if (notmuch->mode == NOTMUCH_DATABASE_MODE_READ_ONLY) { fprintf (stderr, "Cannot write to a read-only database.\n"); - return NOTMUCH_STATUS_READONLY_DATABASE; + return NOTMUCH_STATUS_READ_ONLY_DATABASE; } return NOTMUCH_STATUS_SUCCESS; diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index 4eb82619..27b1317d 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -112,7 +112,7 @@ typedef enum _notmuch_private_status { /* First, copy all the public status values. */ NOTMUCH_PRIVATE_STATUS_SUCCESS = NOTMUCH_STATUS_SUCCESS, NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY = NOTMUCH_STATUS_OUT_OF_MEMORY, - NOTMUCH_PRIVATE_STATUS_READONLY_DATABASE = NOTMUCH_STATUS_READONLY_DATABASE, + NOTMUCH_PRIVATE_STATUS_READ_ONLY_DATABASE = NOTMUCH_STATUS_READ_ONLY_DATABASE, NOTMUCH_PRIVATE_STATUS_XAPIAN_EXCEPTION = NOTMUCH_STATUS_XAPIAN_EXCEPTION, NOTMUCH_PRIVATE_STATUS_FILE_NOT_EMAIL = NOTMUCH_STATUS_FILE_NOT_EMAIL, NOTMUCH_PRIVATE_STATUS_NULL_POINTER = NOTMUCH_STATUS_NULL_POINTER, diff --git a/lib/notmuch.h b/lib/notmuch.h index d0337304..eae0fb50 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -57,8 +57,8 @@ typedef int notmuch_bool_t; * value. Instead we should map to things like DATABASE_LOCKED or * whatever. * - * NOTMUCH_STATUS_READONLY_DATABASE: An attempt was made to write to a - * database opened in read-only mode. + * NOTMUCH_STATUS_READ_ONLY_DATABASE: An attempt was made to write to + * a database opened in read-only mode. * * NOTMUCH_STATUS_XAPIAN_EXCEPTION: A Xapian exception occurred * @@ -89,7 +89,7 @@ typedef int notmuch_bool_t; typedef enum _notmuch_status { NOTMUCH_STATUS_SUCCESS = 0, NOTMUCH_STATUS_OUT_OF_MEMORY, - NOTMUCH_STATUS_READONLY_DATABASE, + NOTMUCH_STATUS_READ_ONLY_DATABASE, NOTMUCH_STATUS_XAPIAN_EXCEPTION, NOTMUCH_STATUS_FILE_ERROR, NOTMUCH_STATUS_FILE_NOT_EMAIL, @@ -230,7 +230,7 @@ notmuch_database_get_directory (notmuch_database_t *database, * NOTMUCH_STATUS_FILE_NOT_EMAIL: the contents of filename don't look * like an email message. Nothing added to the database. * - * NOTMUCH_STATUS_READONLY_DATABASE: Database was opened in read-only + * NOTMUCH_STATUS_READ_ONLY_DATABASE: Database was opened in read-only * mode so no message can be added. */ notmuch_status_t @@ -256,7 +256,7 @@ notmuch_database_add_message (notmuch_database_t *database, * the message persists in the database with at least one other * filename. * - * NOTMUCH_STATUS_READONLY_DATABASE: Database was opened in read-only + * NOTMUCH_STATUS_READ_ONLY_DATABASE: Database was opened in read-only * mode so no message can be removed. */ notmuch_status_t @@ -796,7 +796,7 @@ notmuch_message_get_tags (notmuch_message_t *message); * NOTMUCH_STATUS_TAG_TOO_LONG: The length of 'tag' is too long * (exceeds NOTMUCH_TAG_MAX) * - * NOTMUCH_STATUS_READONLY_DATABASE: Database was opened in read-only + * NOTMUCH_STATUS_READ_ONLY_DATABASE: Database was opened in read-only * mode so message cannot be modified. */ notmuch_status_t @@ -813,7 +813,7 @@ notmuch_message_add_tag (notmuch_message_t *message, const char *tag); * NOTMUCH_STATUS_TAG_TOO_LONG: The length of 'tag' is too long * (exceeds NOTMUCH_TAG_MAX) * - * NOTMUCH_STATUS_READONLY_DATABASE: Database was opened in read-only + * NOTMUCH_STATUS_READ_ONLY_DATABASE: Database was opened in read-only * mode so message cannot be modified. */ notmuch_status_t @@ -824,7 +824,7 @@ notmuch_message_remove_tag (notmuch_message_t *message, const char *tag); * See notmuch_message_freeze for an example showing how to safely * replace tag values. * - * NOTMUCH_STATUS_READONLY_DATABASE: Database was opened in read-only + * NOTMUCH_STATUS_READ_ONLY_DATABASE: Database was opened in read-only * mode so message cannot be modified. */ notmuch_status_t @@ -867,7 +867,7 @@ notmuch_message_remove_all_tags (notmuch_message_t *message); * * NOTMUCH_STATUS_SUCCESS: Message successfully frozen. * - * NOTMUCH_STATUS_READONLY_DATABASE: Database was opened in read-only + * NOTMUCH_STATUS_READ_ONLY_DATABASE: Database was opened in read-only * mode so message cannot be modified. */ notmuch_status_t @@ -980,7 +980,7 @@ notmuch_tags_destroy (notmuch_tags_t *tags); * NOTMUCH_STATUS_XAPIAN_EXCEPTION: A Xapian exception * occurred, mtime not stored. * - * NOTMUCH_STATUS_READONLY_DATABASE: Database was opened in read-only + * NOTMUCH_STATUS_READ_ONLY_DATABASE: Database was opened in read-only * mode so directory mtime cannot be modified. */ notmuch_status_t diff --git a/notmuch-new.c b/notmuch-new.c index d1298aff..f33ba366 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -406,7 +406,7 @@ add_files_recursive (notmuch_database_t *notmuch, next); break; /* Fatal issues. Don't process anymore. */ - case NOTMUCH_STATUS_READONLY_DATABASE: + case NOTMUCH_STATUS_READ_ONLY_DATABASE: case NOTMUCH_STATUS_XAPIAN_EXCEPTION: case NOTMUCH_STATUS_OUT_OF_MEMORY: fprintf (stderr, "Error: %s. Halting processing.\n", From cb8e4bc9c0406391cedde012dfcd7ea8f6b8ef46 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 7 Jan 2010 18:17:38 -0800 Subject: [PATCH 53/63] TODO: Add a couple of ideas that came up during recent coding. The notmuch_query_count_messages functions duplicates a lot of code undesirably. --- TODO | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/TODO b/TODO index 43956691..bdfe64c6 100644 --- a/TODO +++ b/TODO @@ -74,6 +74,8 @@ for selecting what gets printed). Add a "--count-only" (or so?) option to "notmuch search" for returning the count of search results. +Add documented syntax for searching all threads/messages. + Give "notmuch restore" some progress indicator. Until we get the Xapian bugs fixed that are making this operation slow, we really need to let the user know that things are still moving. @@ -146,6 +148,10 @@ notmuch initially sees all changes as interesting, and quickly learns from the user which changes are not interesting (such as the very common mailing-list footer). +Fix notmuch_query_count_messages to share code with +notmuch_query_search_messages rather than duplicating code. (And +consider renaming it as well.) + General ------- Audit everything for dealing with out-of-memory (and drop xutil.c). From 21f8fd6967c3ef8e726652bbd6944e29975508b5 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 7 Jan 2010 18:20:28 -0800 Subject: [PATCH 54/63] notmuch new: Fix deletion support to recurse on removed directories. Previously, when notmuch detected that a directory had been deleted it was only removing files immediately in that directory. We now correctly recurse to also remove any directories (and files, etc.) within sub-directories, etc. --- notmuch-new.c | 67 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 22 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index f33ba366..739ca118 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -616,6 +616,49 @@ count_files (const char *path, int *count) free (fs_entries); } +/* Recursively remove all filenames from the database referring to + * 'path' (or to any of its children). */ +static void +_remove_directory (void *ctx, + notmuch_database_t *notmuch, + const char *path, + int *renamed_files, + int *removed_files) +{ + notmuch_directory_t *directory; + notmuch_filenames_t *files, *subdirs; + notmuch_status_t status; + char *absolute; + + directory = notmuch_database_get_directory (notmuch, path); + + for (files = notmuch_directory_get_child_files (directory); + notmuch_filenames_has_more (files); + notmuch_filenames_advance (files)) + { + absolute = talloc_asprintf (ctx, "%s/%s", path, + notmuch_filenames_get (files)); + status = notmuch_database_remove_message (notmuch, absolute); + if (status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) + *renamed_files = *renamed_files + 1; + else + *removed_files = *removed_files + 1; + talloc_free (absolute); + } + + for (subdirs = notmuch_directory_get_child_directories (directory); + notmuch_filenames_has_more (subdirs); + notmuch_filenames_advance (subdirs)) + { + absolute = talloc_asprintf (ctx, "%s/%s", path, + notmuch_filenames_get (subdirs)); + _remove_directory (ctx, notmuch, absolute, renamed_files, removed_files); + talloc_free (absolute); + } + + notmuch_directory_destroy (directory); +} + int notmuch_new_command (void *ctx, int argc, char *argv[]) { @@ -704,28 +747,8 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) } for (f = add_files_state.removed_directories->head; f; f = f->next) { - notmuch_directory_t *directory; - notmuch_filenames_t *files; - - directory = notmuch_database_get_directory (notmuch, f->filename); - - for (files = notmuch_directory_get_child_files (directory); - notmuch_filenames_has_more (files); - notmuch_filenames_advance (files)) - { - char *absolute; - - absolute = talloc_asprintf (ctx, "%s/%s", f->filename, - notmuch_filenames_get (files)); - status = notmuch_database_remove_message (notmuch, absolute); - if (status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) - renamed_files++; - else - removed_files++; - talloc_free (absolute); - } - - notmuch_directory_destroy (directory); + _remove_directory (ctx, notmuch, f->filename, + &renamed_files, &removed_files); } talloc_free (add_files_state.removed_files); From 909f52bd8c4bdfa11cd3e75e3d0959e0293689bd Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 7 Jan 2010 18:26:31 -0800 Subject: [PATCH 55/63] lib: Implement versioning in the database and provide upgrade function. The recent support for renames in the database is our first time (since notmuch has had more than a single user) that we have a database format change. To support smooth upgrades we now encode a database format version number in the Xapian metadata. Going forward notmuch will emit a warning if used to read from a database with a newer version than it natively supports, and will refuse to write to a database with a newer version. The library also provides functions to query the database format version: notmuch_database_get_version to ask if notmuch wants a newer version than that: notmuch_database_needs_upgrade and a function to actually perform that upgrade: notmuch_database_upgrade --- lib/database-private.h | 2 + lib/database.cc | 216 ++++++++++++++++++++++++++++++++++++++++- lib/message.cc | 23 ++++- lib/notmuch-private.h | 3 + lib/notmuch.h | 38 +++++++- 5 files changed, 274 insertions(+), 8 deletions(-) diff --git a/lib/database-private.h b/lib/database-private.h index 643b0507..5891584e 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -33,6 +33,8 @@ struct _notmuch_database { Xapian::QueryParser *query_parser; Xapian::TermGenerator *term_gen; Xapian::ValueRangeProcessor *value_range_processor; + + notmuch_bool_t needs_upgrade; }; /* Convert tags from Xapian internal format to notmuch format. diff --git a/lib/database.cc b/lib/database.cc index 3de7f295..6b8c9989 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -22,6 +22,8 @@ #include +#include +#include #include #include /* g_free, GPtrArray, GHashTable */ @@ -35,7 +37,12 @@ typedef struct { const char *prefix; } prefix_t; -/* Here's the current schema for our database: +#define NOTMUCH_DATABASE_VERSION 1 + +#define STRINGIFY(s) _SUB_STRINGIFY(s) +#define _SUB_STRINGIFY(s) #s + +/* Here's the current schema for our database (for NOTMUCH_DATABASE_VERSION): * * We currently have two different types of documents: mail and directory. * @@ -467,6 +474,7 @@ notmuch_database_create (const char *path) notmuch = notmuch_database_open (path, NOTMUCH_DATABASE_MODE_READ_WRITE); + notmuch_database_upgrade (notmuch, NULL, NULL); DONE: if (notmuch_path) @@ -494,7 +502,7 @@ notmuch_database_open (const char *path, char *notmuch_path = NULL, *xapian_path = NULL; struct stat st; int err; - unsigned int i; + unsigned int i, version; if (asprintf (¬much_path, "%s/%s", path, ".notmuch") == -1) { notmuch_path = NULL; @@ -522,13 +530,42 @@ notmuch_database_open (const char *path, if (notmuch->path[strlen (notmuch->path) - 1] == '/') notmuch->path[strlen (notmuch->path) - 1] = '\0'; + notmuch->needs_upgrade = FALSE; notmuch->mode = mode; try { if (mode == NOTMUCH_DATABASE_MODE_READ_WRITE) { notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path, Xapian::DB_CREATE_OR_OPEN); + version = notmuch_database_get_version (notmuch); + + if (version > NOTMUCH_DATABASE_VERSION) { + fprintf (stderr, + "Error: Notmuch database at %s\n" + " has a newer database format version (%u) than supported by this\n" + " version of notmuch (%u). Refusing to open this database in\n" + " read-write mode.\n", + notmuch_path, version, NOTMUCH_DATABASE_VERSION); + notmuch->mode = NOTMUCH_DATABASE_MODE_READ_ONLY; + notmuch_database_close (notmuch); + notmuch = NULL; + goto DONE; + } + + if (version < NOTMUCH_DATABASE_VERSION) + notmuch->needs_upgrade = TRUE; } else { notmuch->xapian_db = new Xapian::Database (xapian_path); + version = notmuch_database_get_version (notmuch); + if (version > NOTMUCH_DATABASE_VERSION) + { + fprintf (stderr, + "Warning: Notmuch database at %s\n" + " has a newer database format version (%u) than supported by this\n" + " version of notmuch (%u). Some operations may behave incorrectly,\n" + " (but the database will not be harmed since it is being opened\n" + " in read-only mode).\n", + notmuch_path, version, NOTMUCH_DATABASE_VERSION); + } } notmuch->query_parser = new Xapian::QueryParser; notmuch->term_gen = new Xapian::TermGenerator; @@ -592,6 +629,181 @@ notmuch_database_get_path (notmuch_database_t *notmuch) return notmuch->path; } +unsigned int +notmuch_database_get_version (notmuch_database_t *notmuch) +{ + unsigned int version; + string version_string; + const char *str; + char *end; + + version_string = notmuch->xapian_db->get_metadata ("version"); + if (version_string.empty ()) + return 0; + + str = version_string.c_str (); + if (str == NULL || *str == '\0') + return 0; + + version = strtoul (str, &end, 10); + if (*end != '\0') + INTERNAL_ERROR ("Malformed database version: %s", str); + + return version; +} + +notmuch_bool_t +notmuch_database_needs_upgrade (notmuch_database_t *notmuch) +{ + return notmuch->needs_upgrade; +} + +static volatile sig_atomic_t do_progress_notify = 0; + +static void +handle_sigalrm (unused (int signal)) +{ + do_progress_notify = 1; +} + +/* Upgrade the current database. + * + * After opening a database in read-write mode, the client should + * check if an upgrade is needed (notmuch_database_needs_upgrade) and + * if so, upgrade with this function before making any modifications. + * + * The optional progress_notify callback can be used by the caller to + * provide progress indication to the user. If non-NULL it will be + * called periodically with 'count' as the number of messages upgraded + * so far and 'total' the overall number of messages that will be + * converted. + */ +notmuch_status_t +notmuch_database_upgrade (notmuch_database_t *notmuch, + void (*progress_notify) (void *closure, + unsigned int count, + unsigned int total), + void *closure) +{ + Xapian::WritableDatabase *db; + struct sigaction action; + struct itimerval timerval; + notmuch_bool_t timer_is_active = FALSE; + unsigned int version; + notmuch_status_t status; + + status = _notmuch_database_ensure_writable (notmuch); + if (status) + return status; + + db = static_cast (notmuch->xapian_db); + + version = notmuch_database_get_version (notmuch); + + if (version >= NOTMUCH_DATABASE_VERSION) + return NOTMUCH_STATUS_SUCCESS; + + if (progress_notify) { + /* Setup our handler for SIGALRM */ + memset (&action, 0, sizeof (struct sigaction)); + action.sa_handler = handle_sigalrm; + sigemptyset (&action.sa_mask); + action.sa_flags = SA_RESTART; + sigaction (SIGALRM, &action, NULL); + + /* Then start a timer to send SIGALRM once per second. */ + timerval.it_interval.tv_sec = 1; + timerval.it_interval.tv_usec = 0; + timerval.it_value.tv_sec = 1; + timerval.it_value.tv_usec = 0; + setitimer (ITIMER_REAL, &timerval, NULL); + + timer_is_active = TRUE; + } + + /* Before version 1, each message document had its filename in the + * data field. Move that into the new format by calling + * notmuch_message_add_filename. + */ + if (version < 1) { + unsigned int count = 0, total; + notmuch_query_t *query = notmuch_query_create (notmuch, ""); + notmuch_messages_t *messages; + notmuch_message_t *message; + + total = notmuch_query_count_messages (query); + + for (messages = notmuch_query_search_messages (query); + notmuch_messages_has_more (messages); + notmuch_messages_advance (messages)) + { + if (do_progress_notify) + progress_notify (closure, count, total); + + message = notmuch_messages_get (messages); + + _notmuch_message_upgrade_filename_storage (message); + + count++; + } + } + + /* Also, before version 1 we stored directory timestamps in + * XTIMESTAMP documents instead of the current XDIRECTORY + * documents. So convert those as well. */ + if (version < 1) { + Xapian::TermIterator t, t_end; + + t_end = notmuch->xapian_db->allterms_end ("XTIMESTAMP"); + + for (t = notmuch->xapian_db->allterms_begin ("XTIMESTAMP"); + t != t_end; + t++) + { + Xapian::PostingIterator p, p_end; + std::string term = *t; + + p_end = notmuch->xapian_db->postlist_end (term); + + for (p = notmuch->xapian_db->postlist_begin (term); + p != p_end; + p++) + { + Xapian::Document document; + time_t mtime; + notmuch_directory_t *directory; + + document = find_document_for_doc_id (notmuch, *p); + mtime = Xapian::sortable_unserialise ( + document.get_value (NOTMUCH_VALUE_TIMESTAMP)); + + directory = notmuch_database_get_directory (notmuch, + term.c_str() + 10); + notmuch_directory_set_mtime (directory, mtime); + notmuch_directory_destroy (directory); + } + } + } + + db->set_metadata ("version", STRINGIFY (NOTMUCH_DATABASE_VERSION)); + db->flush (); + + if (timer_is_active) { + /* Now stop the timer. */ + timerval.it_interval.tv_sec = 0; + timerval.it_interval.tv_usec = 0; + timerval.it_value.tv_sec = 0; + timerval.it_value.tv_usec = 0; + setitimer (ITIMER_REAL, &timerval, NULL); + + /* And disable the signal handler. */ + action.sa_handler = SIG_IGN; + sigaction (SIGALRM, &action, NULL); + } + + return NOTMUCH_STATUS_SUCCESS; +} + /* We allow the user to use arbitrarily long paths for directories. But * we have a term-length limit. So if we exceed that, we'll use the * SHA-1 of the path for the database term. diff --git a/lib/message.cc b/lib/message.cc index ea9c8bd9..baeaa469 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -416,6 +416,24 @@ _notmuch_message_add_filename (notmuch_message_t *message, return NOTMUCH_STATUS_SUCCESS; } +/* Move the filename from the data field (as it was in database format + * version 0) to a file-direntry term instead (as in database format + * version 1). + */ +void +_notmuch_message_upgrade_filename_storage (notmuch_message_t *message) +{ + char *filename; + + filename = talloc_strdup (message, message->doc.get_data ().c_str ()); + if (filename && *filename != '\0') { + _notmuch_message_add_filename (message, filename); + message->doc.set_data (""); + _notmuch_message_sync (message); + } + talloc_free (filename); +} + const char * notmuch_message_get_filename (notmuch_message_t *message) { @@ -441,7 +459,10 @@ notmuch_message_get_filename (notmuch_message_t *message) { /* A message document created by an old version of notmuch * (prior to rename support) will have the filename in the - * data of the document rather than as a file-direntry term. */ + * data of the document rather than as a file-direntry term. + * + * It would be nice to do the upgrade of the document directly + * here, but the database is likely open in read-only mode. */ const char *data; data = message->doc.get_data ().c_str (); diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index 27b1317d..f9adea72 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -238,6 +238,9 @@ _notmuch_message_gen_terms (notmuch_message_t *message, const char *prefix_name, const char *text); +void +_notmuch_message_upgrade_filename_storage (notmuch_message_t *message); + notmuch_status_t _notmuch_message_add_filename (notmuch_message_t *message, const char *filename); diff --git a/lib/notmuch.h b/lib/notmuch.h index eae0fb50..d8508dfd 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -183,15 +183,43 @@ notmuch_database_close (notmuch_database_t *database); const char * notmuch_database_get_path (notmuch_database_t *database); +/* Return the database format version of the given database. */ +unsigned int +notmuch_database_get_version (notmuch_database_t *database); + +/* Does this database need to be upgraded before writing to it? + * + * If this function returns TRUE then no functions that modify the + * database (notmuch_database_add_message, notmuch_message_add_tag, + * notmuch_directory_set_mtime, etc.) will work unless the function + * notmuch_database_upgrade is called successfully first. */ +notmuch_bool_t +notmuch_database_needs_upgrade (notmuch_database_t *database); + +/* Upgrade the current database. + * + * After opening a database in read-write mode, the client should + * check if an upgrade is needed (notmuch_database_needs_upgrade) and + * if so, upgrade with this function before making any modifications. + * + * The optional progress_notify callback can be used by the caller to + * provide progress indication to the user. If non-NULL it will be + * called periodically with 'count' as the number of messages upgraded + * so far and 'total' the overall number of messages that will be + * converted. + */ +notmuch_status_t +notmuch_database_upgrade (notmuch_database_t *database, + void (*progress_notify) (void *closure, + unsigned int count, + unsigned int total), + void *closure); + /* Retrieve a directory object from the database for 'path'. * * Here, 'path' should be a path relative to the path of 'database' * (see notmuch_database_get_path), or else should be an absolute path * with initial components that match the path of 'database'. - * - * Note: The resulting notmuch_directory_t object will represent the - * state as it currently exists in the database, (and will not reflect - * subsequent changes). */ notmuch_directory_t * notmuch_database_get_directory (notmuch_database_t *database, @@ -990,7 +1018,7 @@ notmuch_directory_set_mtime (notmuch_directory_t *directory, /* Get the mtime of a directory, (as previously stored with * notmuch_directory_set_mtime). * - * Returns 0 if not mtime has previously been stored for this + * Returns 0 if no mtime has previously been stored for this * directory.*/ time_t notmuch_directory_get_mtime (notmuch_directory_t *directory); From e307e990c9394b6426e55b28c748c265358a1c89 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 7 Jan 2010 18:30:32 -0800 Subject: [PATCH 56/63] notmuch new: Automatically upgrade the database if necessary. This takes advantage of the recently added library support to detect if the database needs to be upgraded and then automatically performs that upgrade, (with a nice progress report). --- notmuch-new.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/notmuch-new.c b/notmuch-new.c index 739ca118..0d06253d 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -616,6 +616,28 @@ count_files (const char *path, int *count) free (fs_entries); } +static void +upgrade_print_progress (void *closure, + unsigned int count, + unsigned int total) +{ + add_files_state_t *state = closure; + struct timeval tv_now; + double elapsed_overall, rate_overall, time_remaining; + + gettimeofday (&tv_now, NULL); + + elapsed_overall = notmuch_time_elapsed (state->tv_start, tv_now); + rate_overall = count / elapsed_overall; + time_remaining = ((total - count) / rate_overall); + + printf ("Upgraded %d of %d messages (", count, total); + notmuch_time_print_formatted_seconds (time_remaining); + printf (" remaining). \r"); + + fflush (stdout); +} + /* Recursively remove all filenames from the database referring to * 'path' (or to any of its children). */ static void @@ -718,6 +740,18 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) } else { notmuch = notmuch_database_open (db_path, NOTMUCH_DATABASE_MODE_READ_WRITE); + if (notmuch == NULL) + return 1; + + if (notmuch_database_needs_upgrade (notmuch)) { + printf ("Welcome to a new version of notmuch! Your database will now be upgraded.\n"); + gettimeofday (&add_files_state.tv_start, NULL); + notmuch_database_upgrade (notmuch, upgrade_print_progress, + &add_files_state); + printf ("Your notmuch database has now been upgraded to database format version %u.\n", + notmuch_database_get_version (notmuch)); + } + add_files_state.total_files = 0; } From 1c86b48329c0cd69b4353accda4d28739ec1c0f3 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 7 Jan 2010 21:24:44 -0800 Subject: [PATCH 57/63] notmuch new: Fix progress notification on database upgrade. This was firing continuously rather than just once per second as intended. --- lib/database.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/database.cc b/lib/database.cc index 6b8c9989..831b4a16 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -737,8 +737,10 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, notmuch_messages_has_more (messages); notmuch_messages_advance (messages)) { - if (do_progress_notify) + if (do_progress_notify) { progress_notify (closure, count, total); + do_progress_notify = 0; + } message = notmuch_messages_get (messages); From c485c51585d8e51d289eb9830203fa322d7a0740 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 8 Jan 2010 08:45:16 -0800 Subject: [PATCH 58/63] notmuch new: Don't prevent database upgrade from being interrupted. Our signal handler is designed to quickly flush out changes and then exit. But if a database upgrade is in progress when the user interrupts, then we just want to immediately abort. We could do something fancy like add a return value to our progress_notify function to allow it to tell the upgrade process to abort. But it's actually much cleaner and robust to delay the installation of our signal handler so that the default abort happens on SIGINT. --- notmuch-new.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index 0d06253d..55c3dc1d 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -711,13 +711,6 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) } } - /* Setup our handler for SIGINT */ - memset (&action, 0, sizeof (struct sigaction)); - action.sa_handler = handle_sigint; - sigemptyset (&action.sa_mask); - action.sa_flags = SA_RESTART; - sigaction (SIGINT, &action, NULL); - config = notmuch_config_open (ctx, NULL, NULL); if (config == NULL) return 1; @@ -758,6 +751,15 @@ notmuch_new_command (void *ctx, int argc, char *argv[]) if (notmuch == NULL) return 1; + /* Setup our handler for SIGINT. We do this after having + * potentially done a database upgrade we this interrupt handler + * won't support. */ + memset (&action, 0, sizeof (struct sigaction)); + action.sa_handler = handle_sigint; + sigemptyset (&action.sa_mask); + action.sa_flags = SA_RESTART; + sigaction (SIGINT, &action, NULL); + talloc_free (dot_notmuch_path); dot_notmuch_path = NULL; From 5fe5e802ab3101a375ec1262770955904e169e47 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 8 Jan 2010 09:52:25 -0800 Subject: [PATCH 59/63] lib: Delete stale timestamp documents during database upgrade. Once we move the timestamp to the new directory document, we don't need the old one anymore. --- lib/database.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/database.cc b/lib/database.cc index 831b4a16..d0262722 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -783,6 +783,8 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, term.c_str() + 10); notmuch_directory_set_mtime (directory, mtime); notmuch_directory_destroy (directory); + + db->delete_document (*p); } } } From d12801c8b4d04a50fcb912b75017244fb09658e8 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 9 Jan 2010 11:13:12 -0800 Subject: [PATCH 60/63] lib: Split the database upgrade into two phases for safer operation. The first phase copies data from the old format to the new format without deleting anything. This allows an old notmuch to still use the database if the upgrade process gets interrupted. The second phase performs the deletion (after updating the database version number). If the second phase is interrupted, there will be some unused data in the database, but it shouldn't cause any actual harm. --- lib/database.cc | 78 ++++++++++++++++++++++++++++++++++++++++--- lib/message.cc | 22 +++++------- lib/notmuch-private.h | 16 +++++++++ 3 files changed, 97 insertions(+), 19 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index d0262722..19f960e2 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -722,7 +722,7 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, } /* Before version 1, each message document had its filename in the - * data field. Move that into the new format by calling + * data field. Copy that into the new format by calling * notmuch_message_add_filename. */ if (version < 1) { @@ -730,6 +730,7 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, notmuch_query_t *query = notmuch_query_create (notmuch, ""); notmuch_messages_t *messages; notmuch_message_t *message; + char *filename; total = notmuch_query_count_messages (query); @@ -744,15 +745,24 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, message = notmuch_messages_get (messages); - _notmuch_message_upgrade_filename_storage (message); + filename = _notmuch_message_talloc_copy_data (message); + if (filename && *filename != '\0') { + _notmuch_message_add_filename (message, filename); + _notmuch_message_sync (message); + } + talloc_free (filename); + + notmuch_message_destroy (message); count++; } + + notmuch_query_destroy (query); } /* Also, before version 1 we stored directory timestamps in * XTIMESTAMP documents instead of the current XDIRECTORY - * documents. So convert those as well. */ + * documents. So copy those as well. */ if (version < 1) { Xapian::TermIterator t, t_end; @@ -783,8 +793,6 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, term.c_str() + 10); notmuch_directory_set_mtime (directory, mtime); notmuch_directory_destroy (directory); - - db->delete_document (*p); } } } @@ -792,6 +800,66 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, db->set_metadata ("version", STRINGIFY (NOTMUCH_DATABASE_VERSION)); db->flush (); + /* Now that the upgrade is complete we can remove the old data + * and documents that are no longer needed. */ + if (version < 1) { + unsigned int count = 0, total; + notmuch_query_t *query = notmuch_query_create (notmuch, ""); + notmuch_messages_t *messages; + notmuch_message_t *message; + char *filename; + + total = notmuch_query_count_messages (query); + + for (messages = notmuch_query_search_messages (query); + notmuch_messages_has_more (messages); + notmuch_messages_advance (messages)) + { + if (do_progress_notify) { + progress_notify (closure, count, total); + do_progress_notify = 0; + } + + message = notmuch_messages_get (messages); + + filename = _notmuch_message_talloc_copy_data (message); + if (filename && *filename != '\0') { + _notmuch_message_clear_data (message); + _notmuch_message_sync (message); + } + talloc_free (filename); + + notmuch_message_destroy (message); + + count++; + } + + notmuch_query_destroy (query); + } + + if (version < 1) { + Xapian::TermIterator t, t_end; + + t_end = notmuch->xapian_db->allterms_end ("XTIMESTAMP"); + + for (t = notmuch->xapian_db->allterms_begin ("XTIMESTAMP"); + t != t_end; + t++) + { + Xapian::PostingIterator p, p_end; + std::string term = *t; + + p_end = notmuch->xapian_db->postlist_end (term); + + for (p = notmuch->xapian_db->postlist_begin (term); + p != p_end; + p++) + { + db->delete_document (*p); + } + } + } + if (timer_is_active) { /* Now stop the timer. */ timerval.it_interval.tv_sec = 0; diff --git a/lib/message.cc b/lib/message.cc index baeaa469..1cda55a1 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -416,22 +416,16 @@ _notmuch_message_add_filename (notmuch_message_t *message, return NOTMUCH_STATUS_SUCCESS; } -/* Move the filename from the data field (as it was in database format - * version 0) to a file-direntry term instead (as in database format - * version 1). - */ -void -_notmuch_message_upgrade_filename_storage (notmuch_message_t *message) +char * +_notmuch_message_talloc_copy_data (notmuch_message_t *message) { - char *filename; + return talloc_strdup (message, message->doc.get_data ().c_str ()); +} - filename = talloc_strdup (message, message->doc.get_data ().c_str ()); - if (filename && *filename != '\0') { - _notmuch_message_add_filename (message, filename); - message->doc.set_data (""); - _notmuch_message_sync (message); - } - talloc_free (filename); +void +_notmuch_message_clear_data (notmuch_message_t *message) +{ + message->doc.set_data (""); } const char * diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index f9adea72..c7fb0ef8 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -258,6 +258,22 @@ _notmuch_message_sync (notmuch_message_t *message); void _notmuch_message_close (notmuch_message_t *message); +/* Get a copy of the data in this message document. + * + * Caller should talloc_free the result when done. + * + * This function is intended to support database upgrade and really + * shouldn't be used otherwise. */ +char * +_notmuch_message_talloc_copy_data (notmuch_message_t *message); + +/* Clear the data in this message document. + * + * This function is intended to support database upgrade and really + * shouldn't be used otherwise. */ +void +_notmuch_message_clear_data (notmuch_message_t *message); + /* index.cc */ notmuch_status_t From 45b1856782beca246cd2670ea6b122b0c9e06fc0 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 9 Jan 2010 11:16:40 -0800 Subject: [PATCH 61/63] lib: Explicitly set BoolWeight when searching. All notmuch searches currently sort by value (either date or message ID) so it's just wasted effort for Xapian to compute relevance values for each result. We now explicitly tell Xapian that we're uninterested in the relevance values. --- lib/query.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/query.cc b/lib/query.cc index 9106b92d..2c8d1672 100644 --- a/lib/query.cc +++ b/lib/query.cc @@ -134,6 +134,8 @@ notmuch_query_search_messages (notmuch_query_t *query) mail_query, string_query); } + enquire.set_weighting_scheme (Xapian::BoolWeight()); + switch (query->sort) { case NOTMUCH_SORT_OLDEST_FIRST: enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, FALSE); From ccf2e0cc4211c276da1db43cdca7ee11018c391d Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 9 Jan 2010 11:18:27 -0800 Subject: [PATCH 62/63] lib: Add non-content terms with a WDF value of 0. The WDF is the "within-document frequency" value for a particular term. It's intended to provide an indication of how frequent a term is within a document, (for use in computing relevance). Xapian's term generator already computes WDF values when we use that, (which we do for indexing all mail content). We don't use the term generator when adding single terms for things that don't actually appear in the mail document, (such as tags, the filename, etc.). In this case, the WDF value for these terms doesn't matter much. But Xapian's flint backend can be more efficient with changes to terms that don't affect the document "length". So there's a performance advantage for manipulating tags (with the flint backend) if the WDF of these terms is 0. --- lib/directory.cc | 4 ++-- lib/message.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/directory.cc b/lib/directory.cc index 461c0cc3..bb6314ad 100644 --- a/lib/directory.cc +++ b/lib/directory.cc @@ -213,7 +213,7 @@ _notmuch_directory_create (notmuch_database_t *notmuch, Xapian::docid parent_id; char *term = talloc_asprintf (local, "%s%s", _find_prefix ("directory"), db_path); - directory->doc.add_term (term); + directory->doc.add_term (term, 0); directory->doc.set_data (path); @@ -225,7 +225,7 @@ _notmuch_directory_create (notmuch_database_t *notmuch, term = talloc_asprintf (local, "%s%u:%s", _find_prefix ("directory-direntry"), parent_id, basename); - directory->doc.add_term (term); + directory->doc.add_term (term, 0); } directory->doc.add_value (NOTMUCH_VALUE_TIMESTAMP, diff --git a/lib/message.cc b/lib/message.cc index 1cda55a1..f0e905b7 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -192,7 +192,7 @@ _notmuch_message_create_for_message_id (notmuch_database_t *notmuch, db = static_cast (notmuch->xapian_db); try { - doc.add_term (term); + doc.add_term (term, 0); talloc_free (term); doc.add_value (NOTMUCH_VALUE_MESSAGE_ID, message_id); @@ -646,7 +646,7 @@ _notmuch_message_add_term (notmuch_message_t *message, if (strlen (term) > NOTMUCH_TERM_MAX) return NOTMUCH_PRIVATE_STATUS_TERM_TOO_LONG; - message->doc.add_term (term); + message->doc.add_term (term, 0); talloc_free (term); From c340c1bd1140c0a1b7e0f24ef3ebac806f5fc3e6 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 9 Jan 2010 17:38:23 -0800 Subject: [PATCH 63/63] notmuch new: Print upgrade progress report as a percentage. Previously we were printing a number of messages upgraded so far. The original motivation for this was to accurately reflect the fact that there are two passes, (so each message is processed twice and it's not accurate to represent with a single count). But as it turns out, the second pass takes zero time (relatively speaking) so we're still not accounting for it. If nothing else, the percentage-based reporting makes for a cleaner API for the progress_notify function. --- lib/database.cc | 34 ++++++++++++++++++---------------- lib/notmuch.h | 9 ++++----- notmuch-new.c | 26 +++++++++++++++----------- 3 files changed, 37 insertions(+), 32 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 19f960e2..cce78478 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -681,8 +681,7 @@ handle_sigalrm (unused (int signal)) notmuch_status_t notmuch_database_upgrade (notmuch_database_t *notmuch, void (*progress_notify) (void *closure, - unsigned int count, - unsigned int total), + double progress), void *closure) { Xapian::WritableDatabase *db; @@ -691,6 +690,7 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, notmuch_bool_t timer_is_active = FALSE; unsigned int version; notmuch_status_t status; + unsigned int count = 0, total = 0; status = _notmuch_database_ensure_writable (notmuch); if (status) @@ -726,11 +726,11 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, * notmuch_message_add_filename. */ if (version < 1) { - unsigned int count = 0, total; notmuch_query_t *query = notmuch_query_create (notmuch, ""); notmuch_messages_t *messages; notmuch_message_t *message; char *filename; + Xapian::TermIterator t, t_end; total = notmuch_query_count_messages (query); @@ -739,7 +739,7 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, notmuch_messages_advance (messages)) { if (do_progress_notify) { - progress_notify (closure, count, total); + progress_notify (closure, (double) count / total); do_progress_notify = 0; } @@ -758,13 +758,10 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, } notmuch_query_destroy (query); - } - /* Also, before version 1 we stored directory timestamps in - * XTIMESTAMP documents instead of the current XDIRECTORY - * documents. So copy those as well. */ - if (version < 1) { - Xapian::TermIterator t, t_end; + /* Also, before version 1 we stored directory timestamps in + * XTIMESTAMP documents instead of the current XDIRECTORY + * documents. So copy those as well. */ t_end = notmuch->xapian_db->allterms_end ("XTIMESTAMP"); @@ -785,6 +782,11 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, time_t mtime; notmuch_directory_t *directory; + if (do_progress_notify) { + progress_notify (closure, (double) count / total); + do_progress_notify = 0; + } + document = find_document_for_doc_id (notmuch, *p); mtime = Xapian::sortable_unserialise ( document.get_value (NOTMUCH_VALUE_TIMESTAMP)); @@ -803,20 +805,17 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, /* Now that the upgrade is complete we can remove the old data * and documents that are no longer needed. */ if (version < 1) { - unsigned int count = 0, total; notmuch_query_t *query = notmuch_query_create (notmuch, ""); notmuch_messages_t *messages; notmuch_message_t *message; char *filename; - total = notmuch_query_count_messages (query); - for (messages = notmuch_query_search_messages (query); notmuch_messages_has_more (messages); notmuch_messages_advance (messages)) { if (do_progress_notify) { - progress_notify (closure, count, total); + progress_notify (closure, (double) count / total); do_progress_notify = 0; } @@ -830,8 +829,6 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, talloc_free (filename); notmuch_message_destroy (message); - - count++; } notmuch_query_destroy (query); @@ -855,6 +852,11 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, p != p_end; p++) { + if (do_progress_notify) { + progress_notify (closure, (double) count / total); + do_progress_notify = 0; + } + db->delete_document (*p); } } diff --git a/lib/notmuch.h b/lib/notmuch.h index d8508dfd..15c9db40 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -204,15 +204,14 @@ notmuch_database_needs_upgrade (notmuch_database_t *database); * * The optional progress_notify callback can be used by the caller to * provide progress indication to the user. If non-NULL it will be - * called periodically with 'count' as the number of messages upgraded - * so far and 'total' the overall number of messages that will be - * converted. + * called periodically with 'progress' as a floating-point value in + * the range of [0.0 .. 1.0] indicating the progress made so far in + * the upgrade process. */ notmuch_status_t notmuch_database_upgrade (notmuch_database_t *database, void (*progress_notify) (void *closure, - unsigned int count, - unsigned int total), + double progress), void *closure); /* Retrieve a directory object from the database for 'path'. diff --git a/notmuch-new.c b/notmuch-new.c index 55c3dc1d..b740ee2b 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -618,22 +618,26 @@ count_files (const char *path, int *count) static void upgrade_print_progress (void *closure, - unsigned int count, - unsigned int total) + double progress) { add_files_state_t *state = closure; - struct timeval tv_now; - double elapsed_overall, rate_overall, time_remaining; - gettimeofday (&tv_now, NULL); + printf ("Upgrading database: %.2f%% complete", progress * 100.0); - elapsed_overall = notmuch_time_elapsed (state->tv_start, tv_now); - rate_overall = count / elapsed_overall; - time_remaining = ((total - count) / rate_overall); + if (progress > 0) { + struct timeval tv_now; + double elapsed, time_remaining; - printf ("Upgraded %d of %d messages (", count, total); - notmuch_time_print_formatted_seconds (time_remaining); - printf (" remaining). \r"); + gettimeofday (&tv_now, NULL); + + elapsed = notmuch_time_elapsed (state->tv_start, tv_now); + time_remaining = (elapsed / progress) * (1.0 - progress); + printf (" ("); + notmuch_time_print_formatted_seconds (time_remaining); + printf (" remaining)"); + } + + printf (". \r"); fflush (stdout); }