diff --git a/doc/man7/notmuch-search-terms.rst b/doc/man7/notmuch-search-terms.rst index f7a39ceb..fd8bf634 100644 --- a/doc/man7/notmuch-search-terms.rst +++ b/doc/man7/notmuch-search-terms.rst @@ -44,6 +44,9 @@ results to those whose value matches a regular expression (see notmuch search 'from:"/bob@.*[.]example[.]com/"' +body: + Match terms in the body of messages. + from: or from:// The **from:** prefix is used to match the name or address of the sender of an email message. @@ -249,7 +252,7 @@ follows. Boolean **tag:**, **id:**, **thread:**, **folder:**, **path:**, **property:** Probabilistic - **to:**, **attachment:**, **mimetype:** + **body:**, **to:**, **attachment:**, **mimetype:** Special **from:**, **query:**, **subject:** diff --git a/lib/database-private.h b/lib/database-private.h index a499b259..293f2db4 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -108,6 +108,12 @@ enum _notmuch_features { * * Introduced: version 3. */ NOTMUCH_FEATURE_LAST_MOD = 1 << 6, + + /* If set, unprefixed terms are stored only for the message body, + * not for headers. + * + * Introduced: version 3. */ + NOTMUCH_FEATURE_UNPREFIX_BODY_ONLY = 1 << 7, }; /* In C++, a named enum is its own type, so define bitwise operators diff --git a/lib/database.cc b/lib/database.cc index 09ab9cb0..d2732f5e 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -122,9 +122,12 @@ typedef struct { * LAST_MOD: The revision number as of the last tag or * filename change. * - * In addition, terms from the content of the message are added with - * "from", "to", "attachment", and "subject" prefixes for use by the - * user in searching. + * The prefixed terms described above are also searchable without an + * explicit field name, but as of notmuch 0.29 this is due to + * query-parser setup, not extra terms in the database. In addition, + * terms from the content of the message are added without a prefix + * for use by the user in searching. Note that the prefix name "body" + * is used to refer to the empty prefix string in the database. * * The path of the containing folder is added with the "folder" prefix * (see _notmuch_message_add_folder_terms). Sub-paths of the the path @@ -266,6 +269,8 @@ prefix_t prefix_table[] = { { "directory", "XDIRECTORY", NOTMUCH_FIELD_NO_FLAGS }, { "file-direntry", "XFDIRENTRY", NOTMUCH_FIELD_NO_FLAGS }, { "directory-direntry", "XDDIRENTRY", NOTMUCH_FIELD_NO_FLAGS }, + { "body", "", NOTMUCH_FIELD_EXTERNAL | + NOTMUCH_FIELD_PROBABILISTIC}, { "thread", "G", NOTMUCH_FIELD_EXTERNAL | NOTMUCH_FIELD_PROCESSOR }, { "tag", "K", NOTMUCH_FIELD_EXTERNAL | @@ -309,6 +314,8 @@ prefix_t prefix_table[] = { static void _setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch) { + if (prefix->prefix) + notmuch->query_parser->add_prefix ("",prefix->prefix); if (prefix->flags & NOTMUCH_FIELD_PROBABILISTIC) notmuch->query_parser->add_prefix (prefix->name, prefix->prefix); else @@ -333,6 +340,8 @@ _setup_query_field (const prefix_t *prefix, notmuch_database_t *notmuch) *notmuch->query_parser, notmuch))->release (); /* we treat all field-processor fields as boolean in order to get the raw input */ + if (prefix->prefix) + notmuch->query_parser->add_prefix ("",prefix->prefix); notmuch->query_parser->add_boolean_prefix (prefix->name, fp); } else { _setup_query_field_default (prefix, notmuch); @@ -390,6 +399,10 @@ static const struct { "indexed MIME types", "w"}, { NOTMUCH_FEATURE_LAST_MOD, "modification tracking", "w"}, + /* Existing databases will work fine for all queries not involving + * 'body:' */ + { NOTMUCH_FEATURE_UNPREFIX_BODY_ONLY, + "index body and headers separately", "w"}, }; const char * @@ -663,6 +676,7 @@ notmuch_database_create_verbose (const char *path, * new databases have them. */ notmuch->features |= NOTMUCH_FEATURE_FROM_SUBJECT_ID_VALUES; notmuch->features |= NOTMUCH_FEATURE_INDEXED_MIMETYPES; + notmuch->features |= NOTMUCH_FEATURE_UNPREFIX_BODY_ONLY; status = notmuch_database_upgrade (notmuch, NULL, NULL); if (status) { diff --git a/lib/message.cc b/lib/message.cc index 6f2f6345..38a48933 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -1419,8 +1419,9 @@ _notmuch_message_add_term (notmuch_message_t *message, } /* Parse 'text' and add a term to 'message' for each parsed word. Each - * term will be added both prefixed (if prefix_name is not NULL) and - * also non-prefixed). */ + * term will be added with the appropriate prefix if prefix_name is + * non-NULL. + */ notmuch_private_status_t _notmuch_message_gen_terms (notmuch_message_t *message, const char *prefix_name, @@ -1432,22 +1433,17 @@ _notmuch_message_gen_terms (notmuch_message_t *message, return NOTMUCH_PRIVATE_STATUS_NULL_POINTER; term_gen->set_document (message->doc); + term_gen->set_termpos (message->termpos); if (prefix_name) { - const char *prefix = _find_prefix (prefix_name); - - term_gen->set_termpos (message->termpos); - term_gen->index_text (text, 1, prefix); - /* Create a gap between this an the next terms so they don't - * appear to be a phrase. */ - message->termpos = term_gen->get_termpos () + 100; - _notmuch_message_invalidate_metadata (message, prefix_name); + term_gen->index_text (text, 1, _find_prefix (prefix_name)); + } else { + term_gen->index_text (text); } - term_gen->set_termpos (message->termpos); - term_gen->index_text (text); - /* Create a term gap, as above. */ + /* Create a gap between this an the next terms so they don't + * appear to be a phrase. */ message->termpos = term_gen->get_termpos () + 100; return NOTMUCH_PRIVATE_STATUS_SUCCESS; diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh index 69ebec68..2124dde2 100755 --- a/test/T530-upgrade.sh +++ b/test/T530-upgrade.sh @@ -117,4 +117,20 @@ MAIL_DIR/bar/new/21:2, MAIL_DIR/bar/new/22:2, MAIL_DIR/cur/51:2," +test_begin_subtest "body: same as unprefixed before reindex" +notmuch search --output=messages body:close > OUTPUT +notmuch search --output=messages close > EXPECTED +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "body: subset of unprefixed after reindex" +notmuch reindex '*' +notmuch search --output=messages body:close | sort > BODY +notmuch search --output=messages close | sort > UNPREFIXED +diff -e UNPREFIXED BODY | cut -c2- > OUTPUT +cat < EXPECTED +d +d +EOF +test_expect_equal_file EXPECTED OUTPUT + test_done diff --git a/test/T740-body.sh b/test/T740-body.sh new file mode 100755 index 00000000..548b30a4 --- /dev/null +++ b/test/T740-body.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +test_description='search body' +. $(dirname "$0")/test-lib.sh || exit 1 + +add_message "[body]=thebody-1" "[subject]=subject-1" +add_message "[body]=nothing-to-see-here-1" "[subject]=thebody-1" + +test_begin_subtest 'search with body: prefix' +notmuch search body:thebody | notmuch_search_sanitize > OUTPUT +cat < EXPECTED +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; subject-1 (inbox unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest 'search without body: prefix' +notmuch search thebody | notmuch_search_sanitize > OUTPUT +cat < EXPECTED +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; subject-1 (inbox unread) +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; thebody-1 (inbox unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest 'negated body: prefix' +notmuch search thebody and not body:thebody | notmuch_search_sanitize > OUTPUT +cat < EXPECTED +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; thebody-1 (inbox unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest 'search unprefixed for prefixed term' +notmuch search subject | notmuch_search_sanitize > OUTPUT +cat < EXPECTED +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; subject-1 (inbox unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest 'search with body: prefix for term only in subject' +notmuch search body:subject | notmuch_search_sanitize > OUTPUT +cat < EXPECTED +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_done