mirror of
https://git.notmuchmail.org/git/notmuch
synced 2024-12-22 09:24:54 +01:00
lib: add 'body:' field, stop indexing headers twice.
The new `body:` field (in Xapian terms) or prefix (in slightly sloppier notmuch) terms allows matching terms that occur only in the body. Unprefixed query terms should continue to match anywhere (header or body) in the message. This follows a suggestion of Olly Betts to use the facility (since Xapian 1.0.4) to add the same field with multiple prefixes. The double indexing of previous versions is thus replaced with a query time expension of unprefixed query terms to the various prefixed equivalent. Reindexing will be needed for 'body:' searches to work correctly; otherwise they will also match messages where the term occur in headers (demonstrated by the new tests in T530-upgrade.sh)
This commit is contained in:
parent
9fbc5cb578
commit
319dd95ebb
6 changed files with 95 additions and 17 deletions
|
@ -44,6 +44,9 @@ results to those whose value matches a regular expression (see
|
||||||
|
|
||||||
notmuch search 'from:"/bob@.*[.]example[.]com/"'
|
notmuch search 'from:"/bob@.*[.]example[.]com/"'
|
||||||
|
|
||||||
|
body:<word-or-quoted-phrase>
|
||||||
|
Match terms in the body of messages.
|
||||||
|
|
||||||
from:<name-or-address> or from:/<regex>/
|
from:<name-or-address> or from:/<regex>/
|
||||||
The **from:** prefix is used to match the name or address of
|
The **from:** prefix is used to match the name or address of
|
||||||
the sender of an email message.
|
the sender of an email message.
|
||||||
|
@ -249,7 +252,7 @@ follows.
|
||||||
Boolean
|
Boolean
|
||||||
**tag:**, **id:**, **thread:**, **folder:**, **path:**, **property:**
|
**tag:**, **id:**, **thread:**, **folder:**, **path:**, **property:**
|
||||||
Probabilistic
|
Probabilistic
|
||||||
**to:**, **attachment:**, **mimetype:**
|
**body:**, **to:**, **attachment:**, **mimetype:**
|
||||||
Special
|
Special
|
||||||
**from:**, **query:**, **subject:**
|
**from:**, **query:**, **subject:**
|
||||||
|
|
||||||
|
|
|
@ -108,6 +108,12 @@ enum _notmuch_features {
|
||||||
*
|
*
|
||||||
* Introduced: version 3. */
|
* Introduced: version 3. */
|
||||||
NOTMUCH_FEATURE_LAST_MOD = 1 << 6,
|
NOTMUCH_FEATURE_LAST_MOD = 1 << 6,
|
||||||
|
|
||||||
|
/* If set, unprefixed terms are stored only for the message body,
|
||||||
|
* not for headers.
|
||||||
|
*
|
||||||
|
* Introduced: version 3. */
|
||||||
|
NOTMUCH_FEATURE_UNPREFIX_BODY_ONLY = 1 << 7,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* In C++, a named enum is its own type, so define bitwise operators
|
/* In C++, a named enum is its own type, so define bitwise operators
|
||||||
|
|
|
@ -122,9 +122,12 @@ typedef struct {
|
||||||
* LAST_MOD: The revision number as of the last tag or
|
* LAST_MOD: The revision number as of the last tag or
|
||||||
* filename change.
|
* filename change.
|
||||||
*
|
*
|
||||||
* In addition, terms from the content of the message are added with
|
* The prefixed terms described above are also searchable without an
|
||||||
* "from", "to", "attachment", and "subject" prefixes for use by the
|
* explicit field name, but as of notmuch 0.29 this is due to
|
||||||
* user in searching.
|
* query-parser setup, not extra terms in the database. In addition,
|
||||||
|
* terms from the content of the message are added without a prefix
|
||||||
|
* for use by the user in searching. Note that the prefix name "body"
|
||||||
|
* is used to refer to the empty prefix string in the database.
|
||||||
*
|
*
|
||||||
* The path of the containing folder is added with the "folder" prefix
|
* The path of the containing folder is added with the "folder" prefix
|
||||||
* (see _notmuch_message_add_folder_terms). Sub-paths of the the path
|
* (see _notmuch_message_add_folder_terms). Sub-paths of the the path
|
||||||
|
@ -266,6 +269,8 @@ prefix_t prefix_table[] = {
|
||||||
{ "directory", "XDIRECTORY", NOTMUCH_FIELD_NO_FLAGS },
|
{ "directory", "XDIRECTORY", NOTMUCH_FIELD_NO_FLAGS },
|
||||||
{ "file-direntry", "XFDIRENTRY", NOTMUCH_FIELD_NO_FLAGS },
|
{ "file-direntry", "XFDIRENTRY", NOTMUCH_FIELD_NO_FLAGS },
|
||||||
{ "directory-direntry", "XDDIRENTRY", NOTMUCH_FIELD_NO_FLAGS },
|
{ "directory-direntry", "XDDIRENTRY", NOTMUCH_FIELD_NO_FLAGS },
|
||||||
|
{ "body", "", NOTMUCH_FIELD_EXTERNAL |
|
||||||
|
NOTMUCH_FIELD_PROBABILISTIC},
|
||||||
{ "thread", "G", NOTMUCH_FIELD_EXTERNAL |
|
{ "thread", "G", NOTMUCH_FIELD_EXTERNAL |
|
||||||
NOTMUCH_FIELD_PROCESSOR },
|
NOTMUCH_FIELD_PROCESSOR },
|
||||||
{ "tag", "K", NOTMUCH_FIELD_EXTERNAL |
|
{ "tag", "K", NOTMUCH_FIELD_EXTERNAL |
|
||||||
|
@ -309,6 +314,8 @@ prefix_t prefix_table[] = {
|
||||||
static void
|
static void
|
||||||
_setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch)
|
_setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch)
|
||||||
{
|
{
|
||||||
|
if (prefix->prefix)
|
||||||
|
notmuch->query_parser->add_prefix ("",prefix->prefix);
|
||||||
if (prefix->flags & NOTMUCH_FIELD_PROBABILISTIC)
|
if (prefix->flags & NOTMUCH_FIELD_PROBABILISTIC)
|
||||||
notmuch->query_parser->add_prefix (prefix->name, prefix->prefix);
|
notmuch->query_parser->add_prefix (prefix->name, prefix->prefix);
|
||||||
else
|
else
|
||||||
|
@ -333,6 +340,8 @@ _setup_query_field (const prefix_t *prefix, notmuch_database_t *notmuch)
|
||||||
*notmuch->query_parser, notmuch))->release ();
|
*notmuch->query_parser, notmuch))->release ();
|
||||||
|
|
||||||
/* we treat all field-processor fields as boolean in order to get the raw input */
|
/* we treat all field-processor fields as boolean in order to get the raw input */
|
||||||
|
if (prefix->prefix)
|
||||||
|
notmuch->query_parser->add_prefix ("",prefix->prefix);
|
||||||
notmuch->query_parser->add_boolean_prefix (prefix->name, fp);
|
notmuch->query_parser->add_boolean_prefix (prefix->name, fp);
|
||||||
} else {
|
} else {
|
||||||
_setup_query_field_default (prefix, notmuch);
|
_setup_query_field_default (prefix, notmuch);
|
||||||
|
@ -390,6 +399,10 @@ static const struct {
|
||||||
"indexed MIME types", "w"},
|
"indexed MIME types", "w"},
|
||||||
{ NOTMUCH_FEATURE_LAST_MOD,
|
{ NOTMUCH_FEATURE_LAST_MOD,
|
||||||
"modification tracking", "w"},
|
"modification tracking", "w"},
|
||||||
|
/* Existing databases will work fine for all queries not involving
|
||||||
|
* 'body:' */
|
||||||
|
{ NOTMUCH_FEATURE_UNPREFIX_BODY_ONLY,
|
||||||
|
"index body and headers separately", "w"},
|
||||||
};
|
};
|
||||||
|
|
||||||
const char *
|
const char *
|
||||||
|
@ -663,6 +676,7 @@ notmuch_database_create_verbose (const char *path,
|
||||||
* new databases have them. */
|
* new databases have them. */
|
||||||
notmuch->features |= NOTMUCH_FEATURE_FROM_SUBJECT_ID_VALUES;
|
notmuch->features |= NOTMUCH_FEATURE_FROM_SUBJECT_ID_VALUES;
|
||||||
notmuch->features |= NOTMUCH_FEATURE_INDEXED_MIMETYPES;
|
notmuch->features |= NOTMUCH_FEATURE_INDEXED_MIMETYPES;
|
||||||
|
notmuch->features |= NOTMUCH_FEATURE_UNPREFIX_BODY_ONLY;
|
||||||
|
|
||||||
status = notmuch_database_upgrade (notmuch, NULL, NULL);
|
status = notmuch_database_upgrade (notmuch, NULL, NULL);
|
||||||
if (status) {
|
if (status) {
|
||||||
|
|
|
@ -1419,8 +1419,9 @@ _notmuch_message_add_term (notmuch_message_t *message,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Parse 'text' and add a term to 'message' for each parsed word. Each
|
/* Parse 'text' and add a term to 'message' for each parsed word. Each
|
||||||
* term will be added both prefixed (if prefix_name is not NULL) and
|
* term will be added with the appropriate prefix if prefix_name is
|
||||||
* also non-prefixed). */
|
* non-NULL.
|
||||||
|
*/
|
||||||
notmuch_private_status_t
|
notmuch_private_status_t
|
||||||
_notmuch_message_gen_terms (notmuch_message_t *message,
|
_notmuch_message_gen_terms (notmuch_message_t *message,
|
||||||
const char *prefix_name,
|
const char *prefix_name,
|
||||||
|
@ -1432,22 +1433,17 @@ _notmuch_message_gen_terms (notmuch_message_t *message,
|
||||||
return NOTMUCH_PRIVATE_STATUS_NULL_POINTER;
|
return NOTMUCH_PRIVATE_STATUS_NULL_POINTER;
|
||||||
|
|
||||||
term_gen->set_document (message->doc);
|
term_gen->set_document (message->doc);
|
||||||
|
term_gen->set_termpos (message->termpos);
|
||||||
|
|
||||||
if (prefix_name) {
|
if (prefix_name) {
|
||||||
const char *prefix = _find_prefix (prefix_name);
|
|
||||||
|
|
||||||
term_gen->set_termpos (message->termpos);
|
|
||||||
term_gen->index_text (text, 1, prefix);
|
|
||||||
/* Create a gap between this an the next terms so they don't
|
|
||||||
* appear to be a phrase. */
|
|
||||||
message->termpos = term_gen->get_termpos () + 100;
|
|
||||||
|
|
||||||
_notmuch_message_invalidate_metadata (message, prefix_name);
|
_notmuch_message_invalidate_metadata (message, prefix_name);
|
||||||
|
term_gen->index_text (text, 1, _find_prefix (prefix_name));
|
||||||
|
} else {
|
||||||
|
term_gen->index_text (text);
|
||||||
}
|
}
|
||||||
|
|
||||||
term_gen->set_termpos (message->termpos);
|
/* Create a gap between this an the next terms so they don't
|
||||||
term_gen->index_text (text);
|
* appear to be a phrase. */
|
||||||
/* Create a term gap, as above. */
|
|
||||||
message->termpos = term_gen->get_termpos () + 100;
|
message->termpos = term_gen->get_termpos () + 100;
|
||||||
|
|
||||||
return NOTMUCH_PRIVATE_STATUS_SUCCESS;
|
return NOTMUCH_PRIVATE_STATUS_SUCCESS;
|
||||||
|
|
|
@ -117,4 +117,20 @@ MAIL_DIR/bar/new/21:2,
|
||||||
MAIL_DIR/bar/new/22:2,
|
MAIL_DIR/bar/new/22:2,
|
||||||
MAIL_DIR/cur/51:2,"
|
MAIL_DIR/cur/51:2,"
|
||||||
|
|
||||||
|
test_begin_subtest "body: same as unprefixed before reindex"
|
||||||
|
notmuch search --output=messages body:close > OUTPUT
|
||||||
|
notmuch search --output=messages close > EXPECTED
|
||||||
|
test_expect_equal_file EXPECTED OUTPUT
|
||||||
|
|
||||||
|
test_begin_subtest "body: subset of unprefixed after reindex"
|
||||||
|
notmuch reindex '*'
|
||||||
|
notmuch search --output=messages body:close | sort > BODY
|
||||||
|
notmuch search --output=messages close | sort > UNPREFIXED
|
||||||
|
diff -e UNPREFIXED BODY | cut -c2- > OUTPUT
|
||||||
|
cat <<EOF > EXPECTED
|
||||||
|
d
|
||||||
|
d
|
||||||
|
EOF
|
||||||
|
test_expect_equal_file EXPECTED OUTPUT
|
||||||
|
|
||||||
test_done
|
test_done
|
||||||
|
|
43
test/T740-body.sh
Executable file
43
test/T740-body.sh
Executable file
|
@ -0,0 +1,43 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
test_description='search body'
|
||||||
|
. $(dirname "$0")/test-lib.sh || exit 1
|
||||||
|
|
||||||
|
add_message "[body]=thebody-1" "[subject]=subject-1"
|
||||||
|
add_message "[body]=nothing-to-see-here-1" "[subject]=thebody-1"
|
||||||
|
|
||||||
|
test_begin_subtest 'search with body: prefix'
|
||||||
|
notmuch search body:thebody | notmuch_search_sanitize > OUTPUT
|
||||||
|
cat <<EOF > EXPECTED
|
||||||
|
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; subject-1 (inbox unread)
|
||||||
|
EOF
|
||||||
|
test_expect_equal_file EXPECTED OUTPUT
|
||||||
|
|
||||||
|
test_begin_subtest 'search without body: prefix'
|
||||||
|
notmuch search thebody | notmuch_search_sanitize > OUTPUT
|
||||||
|
cat <<EOF > EXPECTED
|
||||||
|
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; subject-1 (inbox unread)
|
||||||
|
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; thebody-1 (inbox unread)
|
||||||
|
EOF
|
||||||
|
test_expect_equal_file EXPECTED OUTPUT
|
||||||
|
|
||||||
|
test_begin_subtest 'negated body: prefix'
|
||||||
|
notmuch search thebody and not body:thebody | notmuch_search_sanitize > OUTPUT
|
||||||
|
cat <<EOF > EXPECTED
|
||||||
|
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; thebody-1 (inbox unread)
|
||||||
|
EOF
|
||||||
|
test_expect_equal_file EXPECTED OUTPUT
|
||||||
|
|
||||||
|
test_begin_subtest 'search unprefixed for prefixed term'
|
||||||
|
notmuch search subject | notmuch_search_sanitize > OUTPUT
|
||||||
|
cat <<EOF > EXPECTED
|
||||||
|
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; subject-1 (inbox unread)
|
||||||
|
EOF
|
||||||
|
test_expect_equal_file EXPECTED OUTPUT
|
||||||
|
|
||||||
|
test_begin_subtest 'search with body: prefix for term only in subject'
|
||||||
|
notmuch search body:subject | notmuch_search_sanitize > OUTPUT
|
||||||
|
cat <<EOF > EXPECTED
|
||||||
|
EOF
|
||||||
|
test_expect_equal_file EXPECTED OUTPUT
|
||||||
|
|
||||||
|
test_done
|
Loading…
Reference in a new issue