From 1fa8e40561aafb0ac4f51e0aba171a702b66fd86 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Sat, 8 Feb 2014 21:20:42 +0200 Subject: [PATCH] lib: make folder: prefix literal In xapian terms, convert folder: prefix from probabilistic to boolean prefix, matching the paths, relative from the maildir root, of the message files, ignoring the maildir new and cur leaf directories. folder:foo matches all message files in foo, foo/new, and foo/cur. folder:foo/new does *not* match message files in foo/new. folder:"" matches all message files in the top level maildir and its new and cur subdirectories. This change constitutes a database change: bump the database version and add database upgrade support for folder: terms. The upgrade also adds path: terms. Finally, fix the folder search test for literal folder: search, as some of the folder: matching capabilities are lost in the probabilistic to boolean prefix change. --- lib/database.cc | 44 ++++++++++++++++++- lib/message.cc | 80 ++++++++++++++++++++++++++++++----- lib/notmuch-private.h | 3 ++ test/T100-search-by-folder.sh | 24 +++++++++-- 4 files changed, 135 insertions(+), 16 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 93cc7f57..aef748f7 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -42,7 +42,7 @@ typedef struct { const char *prefix; } prefix_t; -#define NOTMUCH_DATABASE_VERSION 1 +#define NOTMUCH_DATABASE_VERSION 2 #define STRINGIFY(s) _SUB_STRINGIFY(s) #define _SUB_STRINGIFY(s) #s @@ -210,6 +210,13 @@ static prefix_t BOOLEAN_PREFIX_EXTERNAL[] = { { "is", "K" }, { "id", "Q" }, { "path", "P" }, + /* + * Without the ":", since this is a multi-letter prefix, Xapian + * will add a colon itself if the first letter of the path is + * upper-case ASCII. Including the ":" forces there to always be a + * colon, which keeps our own logic simpler. + */ + { "folder", "XFOLDER:" }, }; static prefix_t PROBABILISTIC_PREFIX[]= { @@ -217,7 +224,6 @@ static prefix_t PROBABILISTIC_PREFIX[]= { { "to", "XTO" }, { "attachment", "XATTACHMENT" }, { "subject", "XSUBJECT"}, - { "folder", "XFOLDER"} }; const char * @@ -1168,6 +1174,40 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, } } + /* + * Prior to version 2, the "folder:" prefix was probabilistic and + * stemmed. Change it to the current boolean prefix. Add "path:" + * prefixes while at it. + */ + if (version < 2) { + notmuch_query_t *query = notmuch_query_create (notmuch, ""); + notmuch_messages_t *messages; + notmuch_message_t *message; + + count = 0; + total = notmuch_query_count_messages (query); + + for (messages = notmuch_query_search_messages (query); + notmuch_messages_valid (messages); + notmuch_messages_move_to_next (messages)) { + if (do_progress_notify) { + progress_notify (closure, (double) count / total); + do_progress_notify = 0; + } + + message = notmuch_messages_get (messages); + + _notmuch_message_upgrade_folder (message); + _notmuch_message_sync (message); + + notmuch_message_destroy (message); + + count++; + } + + notmuch_query_destroy (query); + } + db->set_metadata ("version", STRINGIFY (NOTMUCH_DATABASE_VERSION)); db->flush (); diff --git a/lib/message.cc b/lib/message.cc index 21abe8e1..9243b769 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -504,6 +504,56 @@ _notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix) } } +/* Return true if p points at "new" or "cur". */ +static bool is_maildir (const char *p) +{ + return strcmp (p, "cur") == 0 || strcmp (p, "new") == 0; +} + +/* Add "folder:" term for directory. */ +static notmuch_status_t +_notmuch_message_add_folder_terms (notmuch_message_t *message, + const char *directory) +{ + char *folder, *last; + + folder = talloc_strdup (NULL, directory); + if (! folder) + return NOTMUCH_STATUS_OUT_OF_MEMORY; + + /* + * If the message file is in a leaf directory named "new" or + * "cur", presume maildir and index the parent directory. Thus a + * "folder:" prefix search matches messages in the specified + * maildir folder, i.e. in the specified directory and its "new" + * and "cur" subdirectories. + * + * Note that this means the "folder:" prefix can't be used for + * distinguishing between message files in "new" or "cur". The + * "path:" prefix needs to be used for that. + * + * Note the deliberate difference to _filename_is_in_maildir(). We + * don't want to index different things depending on the existence + * or non-existence of all maildir sibling directories "new", + * "cur", and "tmp". Doing so would be surprising, and difficult + * for the user to fix in case all subdirectories were not in + * place during indexing. + */ + last = strrchr (folder, '/'); + if (last) { + if (is_maildir (last + 1)) + *last = '\0'; + } else if (is_maildir (folder)) { + *folder = '\0'; + } + + _notmuch_message_add_term (message, "folder", folder); + + talloc_free (folder); + + return NOTMUCH_STATUS_SUCCESS; +} + #define RECURSIVE_SUFFIX "/**" /* Add "path:" terms for directory. */ @@ -570,9 +620,8 @@ _notmuch_message_add_directory_terms (void *ctx, notmuch_message_t *message) directory = _notmuch_database_get_directory_path (ctx, message->notmuch, directory_id); - if (strlen (directory)) - _notmuch_message_gen_terms (message, "folder", directory); + _notmuch_message_add_folder_terms (message, directory); _notmuch_message_add_path_terms (message, directory); } @@ -610,9 +659,7 @@ _notmuch_message_add_filename (notmuch_message_t *message, * notmuch_directory_get_child_files() . */ _notmuch_message_add_term (message, "file-direntry", direntry); - /* New terms allow user to search with folder: specification. */ - _notmuch_message_gen_terms (message, "folder", directory); - + _notmuch_message_add_folder_terms (message, directory); _notmuch_message_add_path_terms (message, directory); talloc_free (local); @@ -637,8 +684,6 @@ _notmuch_message_remove_filename (notmuch_message_t *message, const char *filename) { void *local = talloc_new (message); - const char *folder_prefix = _find_prefix ("folder"); - char *zfolder_prefix = talloc_asprintf(local, "Z%s", folder_prefix); char *direntry; notmuch_private_status_t private_status; notmuch_status_t status; @@ -659,10 +704,7 @@ _notmuch_message_remove_filename (notmuch_message_t *message, /* Re-synchronize "folder:" and "path:" terms for this message. */ /* Remove all "folder:" terms. */ - _notmuch_message_remove_terms (message, folder_prefix); - - /* Remove all "folder:" stemmed terms. */ - _notmuch_message_remove_terms (message, zfolder_prefix); + _notmuch_message_remove_terms (message, _find_prefix ("folder")); /* Remove all "path:" terms. */ _notmuch_message_remove_terms (message, _find_prefix ("path")); @@ -675,6 +717,22 @@ _notmuch_message_remove_filename (notmuch_message_t *message, return status; } +/* Upgrade the "folder:" prefix from V1 to V2. */ +#define FOLDER_PREFIX_V1 "XFOLDER" +#define ZFOLDER_PREFIX_V1 "Z" FOLDER_PREFIX_V1 +void +_notmuch_message_upgrade_folder (notmuch_message_t *message) +{ + /* Remove all old "folder:" terms. */ + _notmuch_message_remove_terms (message, FOLDER_PREFIX_V1); + + /* Remove all old "folder:" stemmed terms. */ + _notmuch_message_remove_terms (message, ZFOLDER_PREFIX_V1); + + /* Add new boolean "folder:" and "path:" terms. */ + _notmuch_message_add_directory_terms (message, message); +} + char * _notmuch_message_talloc_copy_data (notmuch_message_t *message) { diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index af185c7c..59eb2bc2 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -263,6 +263,9 @@ _notmuch_message_gen_terms (notmuch_message_t *message, void _notmuch_message_upgrade_filename_storage (notmuch_message_t *message); +void +_notmuch_message_upgrade_folder (notmuch_message_t *message); + notmuch_status_t _notmuch_message_add_filename (notmuch_message_t *message, const char *filename); diff --git a/test/T100-search-by-folder.sh b/test/T100-search-by-folder.sh index 5cc2ca8d..a7f63dd1 100755 --- a/test/T100-search-by-folder.sh +++ b/test/T100-search-by-folder.sh @@ -3,6 +3,7 @@ test_description='"notmuch search" by folder: (with variations)' . ./test-lib.sh add_message '[dir]=bad' '[subject]="To the bone"' +add_message '[dir]=.' '[subject]="Top level"' add_message '[dir]=bad/news' '[subject]="Bears"' mkdir -p "${MAIL_DIR}/duplicate/bad/news" cp "$gen_msg_filename" "${MAIL_DIR}/duplicate/bad/news" @@ -12,29 +13,46 @@ add_message '[dir]=things/favorite' '[subject]="Raindrops, whiskers, kettles"' add_message '[dir]=things/bad' '[subject]="Bites, stings, sad feelings"' test_begin_subtest "Single-world folder: specification (multiple results)" -output=$(notmuch search folder:bad | notmuch_search_sanitize) +output=$(notmuch search folder:bad folder:bad/news folder:things/bad | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread) thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread) thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)" +test_begin_subtest "Top level folder" +output=$(notmuch search folder:'""' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Top level (inbox unread)" + test_begin_subtest "Two-word path to narrow results to one" output=$(notmuch search folder:bad/news | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread)" +test_begin_subtest "Folder search with --output=files" +output=$(notmuch search --output=files folder:bad/news | notmuch_search_files_sanitize) +test_expect_equal "$output" "MAIL_DIR/bad/news/msg-003 +MAIL_DIR/duplicate/bad/news/msg-003" + test_begin_subtest "After removing duplicate instance of matching path" rm -r "${MAIL_DIR}/bad/news" notmuch new output=$(notmuch search folder:bad/news | notmuch_search_sanitize) +test_expect_equal "$output" "" + +test_begin_subtest "Folder search with --output=files part #2" +output=$(notmuch search --output=files folder:duplicate/bad/news | notmuch_search_files_sanitize) +test_expect_equal "$output" "MAIL_DIR/duplicate/bad/news/msg-003" + +test_begin_subtest "After removing duplicate instance of matching path part #2" +output=$(notmuch search folder:duplicate/bad/news | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread)" test_begin_subtest "After rename, old path returns nothing" mv "${MAIL_DIR}/duplicate/bad/news" "${MAIL_DIR}/duplicate/bad/olds" notmuch new -output=$(notmuch search folder:bad/news | notmuch_search_sanitize) +output=$(notmuch search folder:duplicate/bad/news | notmuch_search_sanitize) test_expect_equal "$output" "" test_begin_subtest "After rename, new path returns result" -output=$(notmuch search folder:bad/olds | notmuch_search_sanitize) +output=$(notmuch search folder:duplicate/bad/olds | notmuch_search_sanitize) test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread)" test_done