lib/parse-sexp: 'starts-with' wildcard searches

The many tests potentially overkill, but they could catch typos in the
prefixes table. As a simplifying assumption, for now we assume a
single argument to the wildcard operator, as this matches the Xapian
semantics. The name 'starts-with' is chosen to emphasize the supported
case of wildcards in currrent (1.4.x) Xapian.
This commit is contained in:
David Bremner 2021-08-24 08:17:24 -07:00
parent 8322f536f5
commit 011d06f4d6
3 changed files with 255 additions and 15 deletions

View file

@ -162,10 +162,20 @@ EXAMPLES
``(id 1234@invalid blah@test)`` ``(id 1234@invalid blah@test)``
Matches Message-Id "1234@invalid" *or* Message-Id "blah@test" Matches Message-Id "1234@invalid" *or* Message-Id "blah@test"
``(starts-with prelim)``
Match any words starting with "prelim".
``(subject quick "brown fox")`` ``(subject quick "brown fox")``
Match messages whose subject contains "quick" (anywhere, stemmed) and Match messages whose subject contains "quick" (anywhere, stemmed) and
the phrase "brown fox". the phrase "brown fox".
``(subject (starts-with prelim))``
Matches any word starting with "prelim", inside a message subject.
``(subject (starts-wih quick) "brown fox")``
Match messages whose subject contains "quick brown fox", but also
"brown fox quicksand".
``(to (or bob@example.com mallory@example.org))`` ``(or (to bob@example.com) (to mallory@example.org))`` ``(to (or bob@example.com mallory@example.org))`` ``(or (to bob@example.com) (to mallory@example.org))``
Match in the "To" or "Cc" headers, "bob@example.com", Match in the "To" or "Cc" headers, "bob@example.com",
"mallory@example.org", and also "bob@example.com.au" since it "mallory@example.org", and also "bob@example.com.au" since it
@ -180,6 +190,9 @@ NOTES
.. [#aka-bool] a.k.a. boolean prefixes .. [#aka-bool] a.k.a. boolean prefixes
.. [#not-body] Due the the way ``body`` is implemented in notmuch,
this modifier is not supported in the ``body`` field.
.. |q1| replace:: :math:`q_1` .. |q1| replace:: :math:`q_1`
.. |q2| replace:: :math:`q_2` .. |q2| replace:: :math:`q_2`
.. |qn| replace:: :math:`q_n` .. |qn| replace:: :math:`q_n`

View file

@ -11,6 +11,8 @@ typedef enum {
SEXP_FLAG_NONE = 0, SEXP_FLAG_NONE = 0,
SEXP_FLAG_FIELD = 1 << 0, SEXP_FLAG_FIELD = 1 << 0,
SEXP_FLAG_BOOLEAN = 1 << 1, SEXP_FLAG_BOOLEAN = 1 << 1,
SEXP_FLAG_SINGLE = 1 << 2,
SEXP_FLAG_WILDCARD = 1 << 3,
} _sexp_flag_t; } _sexp_flag_t;
/* /*
@ -42,38 +44,39 @@ static _sexp_prefix_t prefixes[] =
{ "and", Xapian::Query::OP_AND, Xapian::Query::MatchAll, { "and", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_NONE }, SEXP_FLAG_NONE },
{ "attachment", Xapian::Query::OP_AND, Xapian::Query::MatchAll, { "attachment", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD }, SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
{ "body", Xapian::Query::OP_AND, Xapian::Query::MatchAll, { "body", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD }, SEXP_FLAG_FIELD },
{ "from", Xapian::Query::OP_AND, Xapian::Query::MatchAll, { "from", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD }, SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
{ "folder", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, { "folder", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
{ "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, { "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
{ "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll, { "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
{ "mid", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, { "mid", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
{ "mimetype", Xapian::Query::OP_AND, Xapian::Query::MatchAll, { "mimetype", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD }, SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
{ "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll, { "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll,
SEXP_FLAG_NONE }, SEXP_FLAG_NONE },
{ "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, { "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_NONE }, SEXP_FLAG_NONE },
{ "path", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, { "path", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
{ "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll, { "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
| SEXP_FLAG_BOOLEAN }, { "starts-with", Xapian::Query::OP_WILDCARD, Xapian::Query::MatchAll,
SEXP_FLAG_SINGLE },
{ "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll, { "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD }, SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
{ "tag", Xapian::Query::OP_AND, Xapian::Query::MatchAll, { "tag", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
{ "thread", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, { "thread", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
{ "to", Xapian::Query::OP_AND, Xapian::Query::MatchAll, { "to", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD }, SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
{ } { }
}; };
@ -142,6 +145,25 @@ _sexp_parse_phrase (std::string term_prefix, const char *phrase, Xapian::Query &
return NOTMUCH_STATUS_SUCCESS; return NOTMUCH_STATUS_SUCCESS;
} }
static notmuch_status_t
_sexp_parse_wildcard (notmuch_database_t *notmuch,
const _sexp_prefix_t *parent,
std::string match,
Xapian::Query &output)
{
std::string term_prefix = parent ? _find_prefix (parent->name) : "";
if (parent && ! (parent->flags & SEXP_FLAG_WILDCARD)) {
_notmuch_database_log (notmuch, "'%s' does not support wildcard queries\n", parent->name);
return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
}
output = Xapian::Query (Xapian::Query::OP_WILDCARD,
term_prefix + Xapian::Unicode::tolower (match));
return NOTMUCH_STATUS_SUCCESS;
}
/* Here we expect the s-expression to be a proper list, with first /* Here we expect the s-expression to be a proper list, with first
* element defining and operation, or as a special case the empty * element defining and operation, or as a special case the empty
* list */ * list */
@ -150,7 +172,6 @@ static notmuch_status_t
_sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent, const sexp_t *sx, _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent, const sexp_t *sx,
Xapian::Query &output) Xapian::Query &output)
{ {
if (sx->ty == SEXP_VALUE) { if (sx->ty == SEXP_VALUE) {
std::string term = Xapian::Unicode::tolower (sx->val); std::string term = Xapian::Unicode::tolower (sx->val);
Xapian::Stem stem = *(notmuch->stemmer); Xapian::Stem stem = *(notmuch->stemmer);
@ -190,6 +211,16 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
parent = prefix; parent = prefix;
} }
if ((prefix->flags & SEXP_FLAG_SINGLE) &&
(! sx->list->next || sx->list->next->next || sx->list->next->ty != SEXP_VALUE)) {
_notmuch_database_log (notmuch, "'%s' expects single atom as argument\n",
prefix->name);
return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
}
if (prefix->xapian_op == Xapian::Query::OP_WILDCARD)
return _sexp_parse_wildcard (notmuch, parent, sx->list->next->val, output);
return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial, return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial,
sx->list->next, output); sx->list->next, output);
} }

View file

@ -222,6 +222,170 @@ thread:XXX 2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac
EOF EOF
test_expect_equal_file EXPECTED OUTPUT test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "Search by 'tag'"
add_message '[subject]="search by tag"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
notmuch tag +searchbytag id:${gen_msg_id}
output=$(notmuch search --query=sexp '(tag searchbytag)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by tag (inbox searchbytag unread)"
test_begin_subtest "Search by 'tag' (multiple)"
notmuch tag -inbox tag:searchbytag
notmuch search tag:inbox AND tag:unread | notmuch_search_sanitize > EXPECTED
notmuch search --query=sexp '(tag inbox unread)' | notmuch_search_sanitize > OUTPUT
notmuch tag +inbox tag:searchbytag
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "Search by 'tag' and 'subject'"
notmuch search tag:inbox and subject:maildir | notmuch_search_sanitize > EXPECTED
notmuch search --query=sexp '(and (tag inbox) (subject maildir))' | notmuch_search_sanitize > OUTPUT
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "Search by 'thread'"
add_message '[subject]="search by thread"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
thread_id=$(notmuch search id:${gen_msg_id} | sed -e "s/thread:\([a-f0-9]*\).*/\1/")
output=$(notmuch search --query=sexp "(thread ${thread_id})" | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by thread (inbox unread)"
test_begin_subtest "Search by 'to'"
add_message '[subject]="search by to"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [to]=searchbyto
output=$(notmuch search --query=sexp '(to searchbyto)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (inbox unread)"
test_begin_subtest "Search by 'to' (address)"
add_message '[subject]="search by to (address)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [to]=searchbyto@example.com
output=$(notmuch search --query=sexp '(to searchbyto@example.com)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (address) (inbox unread)"
test_begin_subtest "Search by 'to' (name)"
add_message '[subject]="search by to (name)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[to]="Search By To Name <test@example.com>"'
output=$(notmuch search --query=sexp '(to "Search By To Name")' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)"
test_begin_subtest "Search by 'to' (name and address)"
output=$(notmuch search --query=sexp '(to "Search By To Name <test@example.com>")' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)"
test_begin_subtest "starts-with, no prefix"
output=$(notmuch search --query=sexp '(starts-with prelim)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2009-11-17 [2/2] Alex Botero-Lowry, Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)"
test_begin_subtest "starts-with, case-insensitive"
notmuch search --query=sexp '(starts-with FreeB)' | notmuch_search_sanitize > OUTPUT
cat <<EOF > EXPECTED
thread:XXX 2009-11-18 [3/4] Alexander Botero-Lowry, Jjgod Jiang; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread)
thread:XXX 2009-11-17 [2/2] Alex Botero-Lowry, Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "starts-with, no prefix, all messages"
notmuch search --query=sexp '(starts-with "")' | notmuch_search_sanitize > OUTPUT
notmuch search '*' | notmuch_search_sanitize > EXPECTED
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "starts-with, attachment"
output=$(notmuch search --query=sexp '(attachment (starts-with not))' | notmuch_search_sanitize)
test_expect_equal "$output" 'thread:XXX 2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread)'
test_begin_subtest "starts-with, folder"
notmuch search --output=files --query=sexp '(folder (starts-with bad))' | notmuch_dir_sanitize > OUTPUT
cat <<EOF > EXPECTED
MAIL_DIR/bad/msg-010
MAIL_DIR/bad/news/msg-012
MAIL_DIR/duplicate/bad/news/msg-012
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "starts-with, from"
notmuch search --query=sexp '(from (starts-with Mik))' | notmuch_search_sanitize > OUTPUT
cat <<EOF > EXPECTED
thread:XXX 2009-11-17 [1/1] Mikhail Gusarov; [notmuch] [PATCH] Handle rename of message file (inbox unread)
thread:XXX 2009-11-17 [2/7] Mikhail Gusarov| Lars Kellogg-Stedman, Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread)
thread:XXX 2009-11-17 [2/5] Mikhail Gusarov| Carl Worth, Keith Packard; [notmuch] [PATCH 2/2] Include <stdint.h> to get uint32_t in C++ file with gcc 4.4 (inbox unread)
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "starts-with, id"
notmuch search --query=sexp --output=messages '(id (starts-with 877))' > OUTPUT
cat <<EOF > EXPECTED
id:877h1wv7mg.fsf@inf-8657.int-evry.fr
id:877htoqdbo.fsf@yoom.home.cworth.org
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "starts-with, is"
output=$(notmuch search --query=sexp '(is (starts-with searchby))' | notmuch_search_sanitize)
test_expect_equal "$output" 'thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by tag (inbox searchbytag unread)'
test_begin_subtest "starts-with, mid"
notmuch search --query=sexp --output=messages '(mid (starts-with 877))' > OUTPUT
cat <<EOF > EXPECTED
id:877h1wv7mg.fsf@inf-8657.int-evry.fr
id:877htoqdbo.fsf@yoom.home.cworth.org
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "starts-with, mimetype"
notmuch search --query=sexp '(mimetype (starts-with sig))' | notmuch_search_sanitize > OUTPUT
cat <<EOF > EXPECTED
thread:XXX 2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread)
thread:XXX 2009-11-18 [4/7] Lars Kellogg-Stedman, Mikhail Gusarov| Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread)
thread:XXX 2009-11-17 [1/3] Adrian Perez de Castro| Keith Packard, Carl Worth; [notmuch] Introducing myself (inbox signed unread)
EOF
test_expect_equal_file EXPECTED OUTPUT
add_message '[subject]="message with properties"'
notmuch restore <<EOF
#= ${gen_msg_id} foo=bar
EOF
test_begin_subtest "starts-with, property"
notmuch search --query=sexp '(property (starts-with foo=))' | notmuch_search_sanitize > OUTPUT
cat <<EOF > EXPECTED
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; message with properties (inbox unread)
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "starts-with, subject"
notmuch search --query=sexp '(subject (starts-with FreeB))' | notmuch_search_sanitize > OUTPUT
cat <<EOF > EXPECTED
thread:XXX 2009-11-17 [2/2] Alex Botero-Lowry, Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "starts-with, tag"
output=$(notmuch search --query=sexp '(tag (starts-with searchby))' | notmuch_search_sanitize)
test_expect_equal "$output" 'thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by tag (inbox searchbytag unread)'
add_message '[subject]="no tags"'
notag_mid=${gen_msg_id}
notmuch tag -unread -inbox id:${notag_mid}
test_begin_subtest "negated starts-with, tag"
output=$(notmuch search --query=sexp '(tag (not (starts-with in)))' | notmuch_search_sanitize)
test_expect_equal "$output" 'thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; no tags ()'
test_begin_subtest "negated starts-with, tag 2"
output=$(notmuch search --query=sexp '(not (tag (starts-with in)))' | notmuch_search_sanitize)
test_expect_equal "$output" 'thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; no tags ()'
test_begin_subtest "negated starts-with, tag 3"
output=$(notmuch search --query=sexp '(not (tag (starts-with "")))' | notmuch_search_sanitize)
test_expect_equal "$output" 'thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; no tags ()'
test_begin_subtest "starts-with, thread"
notmuch search --query=sexp '(thread (starts-with "00"))' > OUTPUT
notmuch search '*' > EXPECTED
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "starts-with, to"
notmuch search --query=sexp '(to (starts-with "search"))' | notmuch_search_sanitize > OUTPUT
cat <<EOF > EXPECTED
thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (inbox unread)
thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (address) (inbox unread)
thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "Unbalanced parens" test_begin_subtest "Unbalanced parens"
# A code 1 indicates the error was handled (a crash will return e.g. 139). # A code 1 indicates the error was handled (a crash will return e.g. 139).
test_expect_code 1 "notmuch search --query=sexp '('" test_expect_code 1 "notmuch search --query=sexp '('"
@ -258,4 +422,36 @@ nested field: 'subject' inside 'subject'
EOF EOF
test_expect_equal_file EXPECTED OUTPUT test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "starts-with, no argument"
notmuch search --query=sexp '(starts-with)' >OUTPUT 2>&1
cat <<EOF > EXPECTED
notmuch search: Syntax error in query
'starts-with' expects single atom as argument
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "starts-with, list argument"
notmuch search --query=sexp '(starts-with (stuff))' >OUTPUT 2>&1
cat <<EOF > EXPECTED
notmuch search: Syntax error in query
'starts-with' expects single atom as argument
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "starts-with, too many arguments"
notmuch search --query=sexp '(starts-with a b c)' >OUTPUT 2>&1
cat <<EOF > EXPECTED
notmuch search: Syntax error in query
'starts-with' expects single atom as argument
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "starts-with, illegal field"
notmuch search --query=sexp '(body (starts-with foo))' >OUTPUT 2>&1
cat <<EOF > EXPECTED
notmuch search: Syntax error in query
'body' does not support wildcard queries
EOF
test_expect_equal_file EXPECTED OUTPUT
test_done test_done