lib/parse-sexp: add term prefix backed fields

We use "boolean" to describe fields that should generate terms
literally without stemming or phrase splitting.  This terminology
might not be ideal but it is already enshrined in
notmuch-search-terms(7).
This commit is contained in:
David Bremner 2021-08-24 08:17:23 -07:00
parent 90d9c2ad5c
commit 8322f536f5
3 changed files with 160 additions and 1 deletions

View file

@ -81,6 +81,14 @@ string) into words, ignore punctuation. Phrase splitting is applied to
terms in phrase (probabilistic) fields. Both phrase splitting and
stemming apply only in phrase fields.
Each term or phrase field has an associated combining operator
(``and`` or ``or``) used to combine the queries from each element of
the tail of the list. This is generally ``or`` for those fields where
a message has one such attribute, and ``and`` otherwise.
Term or phrase fields can contain arbitrarily complex queries made up
from terms, operators, and modifiers, but not other fields.
.. _field-table:
.. table:: Fields with supported modifiers
@ -112,7 +120,7 @@ stemming apply only in phrase fields.
+------------+-----------+-----------+-----------+-----------+----------+
| mimetype | or | phrase | yes | yes | no |
+------------+-----------+-----------+-----------+-----------+----------+
| path | or | term | yes | yes | yes |
| path | or | term | no | yes | yes |
+------------+-----------+-----------+-----------+-----------+----------+
| property | and | term | yes | yes | yes |
+------------+-----------+-----------+-----------+-----------+----------+
@ -151,10 +159,18 @@ EXAMPLES
Match the *phrase* "quick" followed by "fox" in phrase fields (or
outside a field). Match the literal string in a term field.
``(id 1234@invalid blah@test)``
Matches Message-Id "1234@invalid" *or* Message-Id "blah@test"
``(subject quick "brown fox")``
Match messages whose subject contains "quick" (anywhere, stemmed) and
the phrase "brown fox".
``(to (or bob@example.com mallory@example.org))`` ``(or (to bob@example.com) (to mallory@example.org))``
Match in the "To" or "Cc" headers, "bob@example.com",
"mallory@example.org", and also "bob@example.com.au" since it
contains the adjacent triple "bob", "example", "com".
NOTES
=====

View file

@ -10,8 +10,26 @@
typedef enum {
SEXP_FLAG_NONE = 0,
SEXP_FLAG_FIELD = 1 << 0,
SEXP_FLAG_BOOLEAN = 1 << 1,
} _sexp_flag_t;
/*
* define bitwise operators to hide casts */
inline _sexp_flag_t
operator| (_sexp_flag_t a, _sexp_flag_t b)
{
return static_cast<_sexp_flag_t>(
static_cast<unsigned>(a) | static_cast<unsigned>(b));
}
inline _sexp_flag_t
operator& (_sexp_flag_t a, _sexp_flag_t b)
{
return static_cast<_sexp_flag_t>(
static_cast<unsigned>(a) & static_cast<unsigned>(b));
}
typedef struct {
const char *name;
Xapian::Query::op xapian_op;
@ -23,12 +41,39 @@ static _sexp_prefix_t prefixes[] =
{
{ "and", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_NONE },
{ "attachment", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD },
{ "body", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD },
{ "from", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD },
{ "folder", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
{ "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
{ "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
{ "mid", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
{ "mimetype", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD },
{ "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll,
SEXP_FLAG_NONE },
{ "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_NONE },
{ "path", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
{ "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD
| SEXP_FLAG_BOOLEAN },
{ "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD },
{ "tag", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
{ "thread", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
{ "to", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD },
{ }
};
@ -110,6 +155,10 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
std::string term = Xapian::Unicode::tolower (sx->val);
Xapian::Stem stem = *(notmuch->stemmer);
std::string term_prefix = parent ? _find_prefix (parent->name) : "";
if (parent && (parent->flags & SEXP_FLAG_BOOLEAN)) {
output = Xapian::Query (term_prefix + sx->val);
return NOTMUCH_STATUS_SUCCESS;
}
if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) {
output = Xapian::Query ("Z" + term_prefix + stem (term));
return NOTMUCH_STATUS_SUCCESS;

View file

@ -101,6 +101,99 @@ thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "Search by 'attachment'"
notmuch search attachment:notmuch-help.patch > EXPECTED
notmuch search --query=sexp '(attachment notmuch-help.patch)' > OUTPUT
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "Search by 'body'"
add_message '[subject]="body search"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [body]=bodysearchtest
output=$(notmuch search --query=sexp '(body bodysearchtest)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; body search (inbox unread)"
test_begin_subtest "Search by 'body' (phrase)"
add_message '[subject]="body search (phrase)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="body search (phrase)"'
add_message '[subject]="negative result"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="This phrase should not match the body search"'
output=$(notmuch search --query=sexp '(body "body search phrase")' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; body search (phrase) (inbox unread)"
test_begin_subtest "Search by 'body' (utf-8):"
add_message '[subject]="utf8-message-body-subject"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="message body utf8: bödý"'
output=$(notmuch search --query=sexp '(body bödý)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread)"
test_begin_subtest "Search by 'from'"
add_message '[subject]="search by from"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [from]=searchbyfrom
output=$(notmuch search --query=sexp '(from searchbyfrom)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] searchbyfrom; search by from (inbox unread)"
test_begin_subtest "Search by 'from' (address)"
add_message '[subject]="search by from (address)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [from]=searchbyfrom@example.com
output=$(notmuch search --query=sexp '(from searchbyfrom@example.com)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] searchbyfrom@example.com; search by from (address) (inbox unread)"
test_begin_subtest "Search by 'from' (name)"
add_message '[subject]="search by from (name)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[from]="Search By From Name <test@example.com>"'
output=$(notmuch search --query=sexp '(from "Search By From Name")' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)"
test_begin_subtest "Search by 'from' (name and address)"
output=$(notmuch search --query=sexp '(from "Search By From Name <test@example.com>")' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)"
add_message '[dir]=bad' '[subject]="To the bone"'
add_message '[dir]=.' '[subject]="Top level"'
add_message '[dir]=bad/news' '[subject]="Bears"'
mkdir -p "${MAIL_DIR}/duplicate/bad/news"
cp "$gen_msg_filename" "${MAIL_DIR}/duplicate/bad/news"
add_message '[dir]=things' '[subject]="These are a few"'
add_message '[dir]=things/favorite' '[subject]="Raindrops, whiskers, kettles"'
add_message '[dir]=things/bad' '[subject]="Bites, stings, sad feelings"'
test_begin_subtest "Search by 'folder' (multiple)"
output=$(notmuch search --query=sexp '(folder bad bad/news things/bad)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)
thread:XXX 2001-01-05 [1/1(2)] Notmuch Test Suite; Bears (inbox unread)
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)"
test_begin_subtest "Search by 'folder': top level."
notmuch search folder:'""' > EXPECTED
notmuch search --query=sexp '(folder "")' > OUTPUT
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "Search by 'id'"
add_message '[subject]="search by id"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
output=$(notmuch search --query=sexp "(id ${gen_msg_id})" | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by id (inbox unread)"
test_begin_subtest "Search by 'id' (or)"
add_message '[subject]="search by id"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
output=$(notmuch search --query=sexp "(id non-existent-mid ${gen_msg_id})" | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by id (inbox unread)"
test_begin_subtest "Search by 'is' (multiple)"
notmuch tag -inbox tag:searchbytag
notmuch search is:inbox AND is:unread | notmuch_search_sanitize > EXPECTED
notmuch search --query=sexp '(is inbox unread)' | notmuch_search_sanitize > OUTPUT
notmuch tag +inbox tag:searchbytag
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "Search by 'mid'"
add_message '[subject]="search by mid"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
output=$(notmuch search --query=sexp "(mid ${gen_msg_id})" | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by mid (inbox unread)"
test_begin_subtest "Search by 'mid' (or)"
add_message '[subject]="search by mid"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
output=$(notmuch search --query=sexp "(mid non-existent-mid ${gen_msg_id})" | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by mid (inbox unread)"
test_begin_subtest "Search by 'mimetype'"
notmuch search mimetype:text/html > EXPECTED
notmuch search --query=sexp '(mimetype text html)' > OUTPUT
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "Search by 'subject' (utf-8, phrase-token):"
output=$(notmuch search --query=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
@ -118,6 +211,7 @@ notmuch search --query=sexp '(subject (or utf8 "compatibility issues"))' | notmu
cat <<EOF > EXPECTED
thread:XXX 2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread)
thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread)
EOF
test_expect_equal_file EXPECTED OUTPUT