From 011d06f4d6fec8083581676ba9f9448fe7bea1be Mon Sep 17 00:00:00 2001 From: David Bremner Date: Tue, 24 Aug 2021 08:17:24 -0700 Subject: [PATCH] lib/parse-sexp: 'starts-with' wildcard searches The many tests potentially overkill, but they could catch typos in the prefixes table. As a simplifying assumption, for now we assume a single argument to the wildcard operator, as this matches the Xapian semantics. The name 'starts-with' is chosen to emphasize the supported case of wildcards in currrent (1.4.x) Xapian. --- doc/man7/notmuch-sexp-queries.rst | 13 ++ lib/parse-sexp.cc | 61 +++++++--- test/T081-sexpr-search.sh | 196 ++++++++++++++++++++++++++++++ 3 files changed, 255 insertions(+), 15 deletions(-) diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst index 6e68fcc3..c83ce3d0 100644 --- a/doc/man7/notmuch-sexp-queries.rst +++ b/doc/man7/notmuch-sexp-queries.rst @@ -162,10 +162,20 @@ EXAMPLES ``(id 1234@invalid blah@test)`` Matches Message-Id "1234@invalid" *or* Message-Id "blah@test" +``(starts-with prelim)`` + Match any words starting with "prelim". + ``(subject quick "brown fox")`` Match messages whose subject contains "quick" (anywhere, stemmed) and the phrase "brown fox". +``(subject (starts-with prelim))`` + Matches any word starting with "prelim", inside a message subject. + +``(subject (starts-wih quick) "brown fox")`` + Match messages whose subject contains "quick brown fox", but also + "brown fox quicksand". + ``(to (or bob@example.com mallory@example.org))`` ``(or (to bob@example.com) (to mallory@example.org))`` Match in the "To" or "Cc" headers, "bob@example.com", "mallory@example.org", and also "bob@example.com.au" since it @@ -180,6 +190,9 @@ NOTES .. [#aka-bool] a.k.a. boolean prefixes +.. [#not-body] Due the the way ``body`` is implemented in notmuch, + this modifier is not supported in the ``body`` field. + .. |q1| replace:: :math:`q_1` .. |q2| replace:: :math:`q_2` .. |qn| replace:: :math:`q_n` diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc index 26b7e5f1..692b3849 100644 --- a/lib/parse-sexp.cc +++ b/lib/parse-sexp.cc @@ -11,6 +11,8 @@ typedef enum { SEXP_FLAG_NONE = 0, SEXP_FLAG_FIELD = 1 << 0, SEXP_FLAG_BOOLEAN = 1 << 1, + SEXP_FLAG_SINGLE = 1 << 2, + SEXP_FLAG_WILDCARD = 1 << 3, } _sexp_flag_t; /* @@ -42,38 +44,39 @@ static _sexp_prefix_t prefixes[] = { "and", Xapian::Query::OP_AND, Xapian::Query::MatchAll, SEXP_FLAG_NONE }, { "attachment", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD }, + SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD }, { "body", Xapian::Query::OP_AND, Xapian::Query::MatchAll, SEXP_FLAG_FIELD }, { "from", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD }, + SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD }, { "folder", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, { "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, { "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, { "mid", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, { "mimetype", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD }, + SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD }, { "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll, SEXP_FLAG_NONE }, { "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, SEXP_FLAG_NONE }, { "path", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, { "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD - | SEXP_FLAG_BOOLEAN }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + { "starts-with", Xapian::Query::OP_WILDCARD, Xapian::Query::MatchAll, + SEXP_FLAG_SINGLE }, { "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD }, + SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD }, { "tag", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, { "thread", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, { "to", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD }, + SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD }, { } }; @@ -142,6 +145,25 @@ _sexp_parse_phrase (std::string term_prefix, const char *phrase, Xapian::Query & return NOTMUCH_STATUS_SUCCESS; } +static notmuch_status_t +_sexp_parse_wildcard (notmuch_database_t *notmuch, + const _sexp_prefix_t *parent, + std::string match, + Xapian::Query &output) +{ + + std::string term_prefix = parent ? _find_prefix (parent->name) : ""; + + if (parent && ! (parent->flags & SEXP_FLAG_WILDCARD)) { + _notmuch_database_log (notmuch, "'%s' does not support wildcard queries\n", parent->name); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + + output = Xapian::Query (Xapian::Query::OP_WILDCARD, + term_prefix + Xapian::Unicode::tolower (match)); + return NOTMUCH_STATUS_SUCCESS; +} + /* Here we expect the s-expression to be a proper list, with first * element defining and operation, or as a special case the empty * list */ @@ -150,7 +172,6 @@ static notmuch_status_t _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent, const sexp_t *sx, Xapian::Query &output) { - if (sx->ty == SEXP_VALUE) { std::string term = Xapian::Unicode::tolower (sx->val); Xapian::Stem stem = *(notmuch->stemmer); @@ -190,6 +211,16 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent parent = prefix; } + if ((prefix->flags & SEXP_FLAG_SINGLE) && + (! sx->list->next || sx->list->next->next || sx->list->next->ty != SEXP_VALUE)) { + _notmuch_database_log (notmuch, "'%s' expects single atom as argument\n", + prefix->name); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + + if (prefix->xapian_op == Xapian::Query::OP_WILDCARD) + return _sexp_parse_wildcard (notmuch, parent, sx->list->next->val, output); + return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial, sx->list->next, output); } diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh index 96d58ee2..24c6edd1 100755 --- a/test/T081-sexpr-search.sh +++ b/test/T081-sexpr-search.sh @@ -222,6 +222,170 @@ thread:XXX 2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac EOF test_expect_equal_file EXPECTED OUTPUT +test_begin_subtest "Search by 'tag'" +add_message '[subject]="search by tag"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' +notmuch tag +searchbytag id:${gen_msg_id} +output=$(notmuch search --query=sexp '(tag searchbytag)' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by tag (inbox searchbytag unread)" + +test_begin_subtest "Search by 'tag' (multiple)" +notmuch tag -inbox tag:searchbytag +notmuch search tag:inbox AND tag:unread | notmuch_search_sanitize > EXPECTED +notmuch search --query=sexp '(tag inbox unread)' | notmuch_search_sanitize > OUTPUT +notmuch tag +inbox tag:searchbytag +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "Search by 'tag' and 'subject'" +notmuch search tag:inbox and subject:maildir | notmuch_search_sanitize > EXPECTED +notmuch search --query=sexp '(and (tag inbox) (subject maildir))' | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "Search by 'thread'" +add_message '[subject]="search by thread"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' +thread_id=$(notmuch search id:${gen_msg_id} | sed -e "s/thread:\([a-f0-9]*\).*/\1/") +output=$(notmuch search --query=sexp "(thread ${thread_id})" | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by thread (inbox unread)" + +test_begin_subtest "Search by 'to'" +add_message '[subject]="search by to"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [to]=searchbyto +output=$(notmuch search --query=sexp '(to searchbyto)' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (inbox unread)" + +test_begin_subtest "Search by 'to' (address)" +add_message '[subject]="search by to (address)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [to]=searchbyto@example.com +output=$(notmuch search --query=sexp '(to searchbyto@example.com)' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (address) (inbox unread)" + +test_begin_subtest "Search by 'to' (name)" +add_message '[subject]="search by to (name)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[to]="Search By To Name "' +output=$(notmuch search --query=sexp '(to "Search By To Name")' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)" + +test_begin_subtest "Search by 'to' (name and address)" +output=$(notmuch search --query=sexp '(to "Search By To Name ")' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)" + +test_begin_subtest "starts-with, no prefix" +output=$(notmuch search --query=sexp '(starts-with prelim)' | notmuch_search_sanitize) +test_expect_equal "$output" "thread:XXX 2009-11-17 [2/2] Alex Botero-Lowry, Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)" + +test_begin_subtest "starts-with, case-insensitive" +notmuch search --query=sexp '(starts-with FreeB)' | notmuch_search_sanitize > OUTPUT +cat < EXPECTED +thread:XXX 2009-11-18 [3/4] Alexander Botero-Lowry, Jjgod Jiang; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread) +thread:XXX 2009-11-17 [2/2] Alex Botero-Lowry, Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "starts-with, no prefix, all messages" +notmuch search --query=sexp '(starts-with "")' | notmuch_search_sanitize > OUTPUT +notmuch search '*' | notmuch_search_sanitize > EXPECTED +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "starts-with, attachment" +output=$(notmuch search --query=sexp '(attachment (starts-with not))' | notmuch_search_sanitize) +test_expect_equal "$output" 'thread:XXX 2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread)' + +test_begin_subtest "starts-with, folder" +notmuch search --output=files --query=sexp '(folder (starts-with bad))' | notmuch_dir_sanitize > OUTPUT +cat < EXPECTED +MAIL_DIR/bad/msg-010 +MAIL_DIR/bad/news/msg-012 +MAIL_DIR/duplicate/bad/news/msg-012 +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "starts-with, from" +notmuch search --query=sexp '(from (starts-with Mik))' | notmuch_search_sanitize > OUTPUT +cat < EXPECTED +thread:XXX 2009-11-17 [1/1] Mikhail Gusarov; [notmuch] [PATCH] Handle rename of message file (inbox unread) +thread:XXX 2009-11-17 [2/7] Mikhail Gusarov| Lars Kellogg-Stedman, Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread) +thread:XXX 2009-11-17 [2/5] Mikhail Gusarov| Carl Worth, Keith Packard; [notmuch] [PATCH 2/2] Include to get uint32_t in C++ file with gcc 4.4 (inbox unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "starts-with, id" +notmuch search --query=sexp --output=messages '(id (starts-with 877))' > OUTPUT +cat < EXPECTED +id:877h1wv7mg.fsf@inf-8657.int-evry.fr +id:877htoqdbo.fsf@yoom.home.cworth.org +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "starts-with, is" +output=$(notmuch search --query=sexp '(is (starts-with searchby))' | notmuch_search_sanitize) +test_expect_equal "$output" 'thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by tag (inbox searchbytag unread)' + +test_begin_subtest "starts-with, mid" +notmuch search --query=sexp --output=messages '(mid (starts-with 877))' > OUTPUT +cat < EXPECTED +id:877h1wv7mg.fsf@inf-8657.int-evry.fr +id:877htoqdbo.fsf@yoom.home.cworth.org +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "starts-with, mimetype" +notmuch search --query=sexp '(mimetype (starts-with sig))' | notmuch_search_sanitize > OUTPUT +cat < EXPECTED +thread:XXX 2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread) +thread:XXX 2009-11-18 [4/7] Lars Kellogg-Stedman, Mikhail Gusarov| Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread) +thread:XXX 2009-11-17 [1/3] Adrian Perez de Castro| Keith Packard, Carl Worth; [notmuch] Introducing myself (inbox signed unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + +add_message '[subject]="message with properties"' +notmuch restore < OUTPUT +cat < EXPECTED +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; message with properties (inbox unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "starts-with, subject" +notmuch search --query=sexp '(subject (starts-with FreeB))' | notmuch_search_sanitize > OUTPUT +cat < EXPECTED +thread:XXX 2009-11-17 [2/2] Alex Botero-Lowry, Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "starts-with, tag" +output=$(notmuch search --query=sexp '(tag (starts-with searchby))' | notmuch_search_sanitize) +test_expect_equal "$output" 'thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by tag (inbox searchbytag unread)' + +add_message '[subject]="no tags"' +notag_mid=${gen_msg_id} +notmuch tag -unread -inbox id:${notag_mid} + +test_begin_subtest "negated starts-with, tag" +output=$(notmuch search --query=sexp '(tag (not (starts-with in)))' | notmuch_search_sanitize) +test_expect_equal "$output" 'thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; no tags ()' + +test_begin_subtest "negated starts-with, tag 2" +output=$(notmuch search --query=sexp '(not (tag (starts-with in)))' | notmuch_search_sanitize) +test_expect_equal "$output" 'thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; no tags ()' + +test_begin_subtest "negated starts-with, tag 3" +output=$(notmuch search --query=sexp '(not (tag (starts-with "")))' | notmuch_search_sanitize) +test_expect_equal "$output" 'thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; no tags ()' + +test_begin_subtest "starts-with, thread" +notmuch search --query=sexp '(thread (starts-with "00"))' > OUTPUT +notmuch search '*' > EXPECTED +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "starts-with, to" +notmuch search --query=sexp '(to (starts-with "search"))' | notmuch_search_sanitize > OUTPUT +cat < EXPECTED +thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (inbox unread) +thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (address) (inbox unread) +thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + test_begin_subtest "Unbalanced parens" # A code 1 indicates the error was handled (a crash will return e.g. 139). test_expect_code 1 "notmuch search --query=sexp '('" @@ -258,4 +422,36 @@ nested field: 'subject' inside 'subject' EOF test_expect_equal_file EXPECTED OUTPUT +test_begin_subtest "starts-with, no argument" +notmuch search --query=sexp '(starts-with)' >OUTPUT 2>&1 +cat < EXPECTED +notmuch search: Syntax error in query +'starts-with' expects single atom as argument +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "starts-with, list argument" +notmuch search --query=sexp '(starts-with (stuff))' >OUTPUT 2>&1 +cat < EXPECTED +notmuch search: Syntax error in query +'starts-with' expects single atom as argument +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "starts-with, too many arguments" +notmuch search --query=sexp '(starts-with a b c)' >OUTPUT 2>&1 +cat < EXPECTED +notmuch search: Syntax error in query +'starts-with' expects single atom as argument +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "starts-with, illegal field" +notmuch search --query=sexp '(body (starts-with foo))' >OUTPUT 2>&1 +cat < EXPECTED +notmuch search: Syntax error in query +'body' does not support wildcard queries +EOF +test_expect_equal_file EXPECTED OUTPUT + test_done