From 1870b3ae4bae5e68296e63acc81965eea04a1a7a Mon Sep 17 00:00:00 2001 From: David Bremner Date: Tue, 24 Aug 2021 08:17:29 -0700 Subject: [PATCH] lib/parse-sexp: support regular expressions At least to the degree that the Xapian QueryParser based parser also supports them. Support short alias 'rx' as it seems to make more complex queries nicer to read. --- doc/man7/notmuch-sexp-queries.rst | 8 ++++ lib/parse-sexp.cc | 54 ++++++++++++++++++----- test/T081-sexpr-search.sh | 72 +++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 10 deletions(-) diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst index f32bab9c..7eaffe56 100644 --- a/doc/man7/notmuch-sexp-queries.rst +++ b/doc/man7/notmuch-sexp-queries.rst @@ -144,6 +144,11 @@ MODIFIERS *Modifiers* refer to any prefixes (first elements of compound queries) that are neither operators nor fields. +``(regex`` *atom* ``)`` ``(rx`` *atom* ``)`` + Interpret *atom* as a POSIX.2 regular expression (see + :manpage:`regex(7)`). This applies in term fields and a subset [#not-phrase]_ of + phrase fields (see :any:`field-table`). + ``(starts-with`` *subword* ``)`` Matches any term starting with *subword*. This applies in either phrase or term :any:`fields `, or outside of fields [#not-body]_. Note that @@ -205,6 +210,9 @@ NOTES .. [#aka-bool] a.k.a. boolean prefixes +.. [#not-phrase] Due to the implemention of phrase fields in Xapian, + regex queries could only match individual words. + .. [#not-body] Due the the way ``body`` is implemented in notmuch, this modifier is not supported in the ``body`` field. diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc index 0192bda9..84914296 100644 --- a/lib/parse-sexp.cc +++ b/lib/parse-sexp.cc @@ -13,6 +13,8 @@ typedef enum { SEXP_FLAG_BOOLEAN = 1 << 1, SEXP_FLAG_SINGLE = 1 << 2, SEXP_FLAG_WILDCARD = 1 << 3, + SEXP_FLAG_REGEX = 1 << 4, + SEXP_FLAG_DO_REGEX = 1 << 5, } _sexp_flag_t; /* @@ -48,15 +50,15 @@ static _sexp_prefix_t prefixes[] = { "body", Xapian::Query::OP_AND, Xapian::Query::MatchAll, SEXP_FLAG_FIELD }, { "from", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "folder", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "mid", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "mimetype", Xapian::Query::OP_AND, Xapian::Query::MatchAll, SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD }, { "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll, @@ -64,17 +66,21 @@ static _sexp_prefix_t prefixes[] = { "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, SEXP_FLAG_NONE }, { "path", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, + { "regex", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll, + SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX }, + { "rx", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll, + SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX }, { "starts-with", Xapian::Query::OP_WILDCARD, Xapian::Query::MatchAll, SEXP_FLAG_SINGLE }, { "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "tag", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "thread", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "to", Xapian::Query::OP_AND, Xapian::Query::MatchAll, SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD }, { } @@ -180,6 +186,30 @@ _sexp_parse_one_term (notmuch_database_t *notmuch, std::string term_prefix, cons } } + +notmuch_status_t +_sexp_parse_regex (notmuch_database_t *notmuch, + const _sexp_prefix_t *prefix, const _sexp_prefix_t *parent, + std::string val, Xapian::Query &output) +{ + if (! parent) { + _notmuch_database_log (notmuch, "illegal '%s' outside field\n", + prefix->name); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + + if (! (parent->flags & SEXP_FLAG_REGEX)) { + _notmuch_database_log (notmuch, "'%s' not supported in field '%s'\n", + prefix->name, parent->name); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + + std::string msg; /* ignored */ + + return _notmuch_regexp_to_query (notmuch, Xapian::BAD_VALUENO, parent->name, + val, output, msg); +} + /* Here we expect the s-expression to be a proper list, with first * element defining and operation, or as a special case the empty * list */ @@ -254,6 +284,10 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent if (prefix->xapian_op == Xapian::Query::OP_WILDCARD) return _sexp_parse_wildcard (notmuch, parent, sx->list->next->val, output); + if (prefix->flags & SEXP_FLAG_DO_REGEX) { + return _sexp_parse_regex (notmuch, prefix, parent, sx->list->next->val, output); + } + return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial, sx->list->next, output); } diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh index be243fc0..6cfd59a8 100755 --- a/test/T081-sexpr-search.sh +++ b/test/T081-sexpr-search.sh @@ -565,4 +565,76 @@ output=$(notmuch search --query=sexp '(subject deleted)' | notmuch_search_saniti test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Not deleted (inbox unread) thread:XXX 2001-01-05 [2/2] Notmuch Test Suite; Deleted (deleted inbox unread)" +test_begin_subtest "regex at top level" +notmuch search --query=sexp '(rx foo)' >& OUTPUT +cat < EXPECTED +notmuch search: Syntax error in query +illegal 'rx' outside field +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "regex in illegal field" +notmuch search --query=sexp '(body (regex foo))' >& OUTPUT +cat < EXPECTED +notmuch search: Syntax error in query +'regex' not supported in field 'body' +EOF +test_expect_equal_file EXPECTED OUTPUT + +notmuch search --output=messages from:cworth > cworth.msg-ids + +test_begin_subtest "regexp 'from' search" +notmuch search --output=messages --query=sexp '(from (rx cworth))' > OUTPUT +test_expect_equal_file cworth.msg-ids OUTPUT + +test_begin_subtest "regexp search for 'from' 2" +notmuch search from:/cworth@cworth.org/ and subject:patch | notmuch_search_sanitize > EXPECTED +notmuch search --query=sexp '(and (from (rx cworth@cworth.org)) (subject patch))' \ + | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "regexp 'folder' search" +notmuch search 'folder:/^bar$/' | notmuch_search_sanitize > EXPECTED +notmuch search --query=sexp '(folder (rx ^bar$))' | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "regexp 'id' search" +notmuch search --output=messages --query=sexp '(id (rx yoom))' > OUTPUT +test_expect_equal_file cworth.msg-ids OUTPUT + +test_begin_subtest "unanchored 'is' search" +notmuch search tag:signed or tag:inbox > EXPECTED +notmuch search --query=sexp '(is (rx i))' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "anchored 'is' search" +notmuch search tag:signed > EXPECTED +notmuch search --query=sexp '(is (rx ^si))' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "combine regexp mid and subject" +notmuch search subject:/-C/ and mid:/y..m/ | notmuch_search_sanitize > EXPECTED +notmuch search --query=sexp '(and (subject (rx -C)) (mid (rx y..m)))' | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "regexp 'path' search" +notmuch search 'path:/^bar$/' | notmuch_search_sanitize > EXPECTED +notmuch search --query=sexp '(path (rx ^bar$))' | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "regexp 'property' search" +notmuch search property:foo=bar > EXPECTED +notmuch search --query=sexp '(property (rx foo=.*))' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "anchored 'tag' search" +notmuch search tag:signed > EXPECTED +notmuch search --query=sexp '(tag (rx ^si))' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "regexp 'thread' search" +notmuch search --output=threads '*' | grep '7$' > EXPECTED +notmuch search --output=threads --query=sexp '(thread (rx 7$))' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + test_done