diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst index f32bab9c..7eaffe56 100644 --- a/doc/man7/notmuch-sexp-queries.rst +++ b/doc/man7/notmuch-sexp-queries.rst @@ -144,6 +144,11 @@ MODIFIERS *Modifiers* refer to any prefixes (first elements of compound queries) that are neither operators nor fields. +``(regex`` *atom* ``)`` ``(rx`` *atom* ``)`` + Interpret *atom* as a POSIX.2 regular expression (see + :manpage:`regex(7)`). This applies in term fields and a subset [#not-phrase]_ of + phrase fields (see :any:`field-table`). + ``(starts-with`` *subword* ``)`` Matches any term starting with *subword*. This applies in either phrase or term :any:`fields `, or outside of fields [#not-body]_. Note that @@ -205,6 +210,9 @@ NOTES .. [#aka-bool] a.k.a. boolean prefixes +.. [#not-phrase] Due to the implemention of phrase fields in Xapian, + regex queries could only match individual words. + .. [#not-body] Due the the way ``body`` is implemented in notmuch, this modifier is not supported in the ``body`` field. diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc index 0192bda9..84914296 100644 --- a/lib/parse-sexp.cc +++ b/lib/parse-sexp.cc @@ -13,6 +13,8 @@ typedef enum { SEXP_FLAG_BOOLEAN = 1 << 1, SEXP_FLAG_SINGLE = 1 << 2, SEXP_FLAG_WILDCARD = 1 << 3, + SEXP_FLAG_REGEX = 1 << 4, + SEXP_FLAG_DO_REGEX = 1 << 5, } _sexp_flag_t; /* @@ -48,15 +50,15 @@ static _sexp_prefix_t prefixes[] = { "body", Xapian::Query::OP_AND, Xapian::Query::MatchAll, SEXP_FLAG_FIELD }, { "from", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "folder", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "mid", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "mimetype", Xapian::Query::OP_AND, Xapian::Query::MatchAll, SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD }, { "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll, @@ -64,17 +66,21 @@ static _sexp_prefix_t prefixes[] = { "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, SEXP_FLAG_NONE }, { "path", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, + { "regex", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll, + SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX }, + { "rx", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll, + SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX }, { "starts-with", Xapian::Query::OP_WILDCARD, Xapian::Query::MatchAll, SEXP_FLAG_SINGLE }, { "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "tag", Xapian::Query::OP_AND, Xapian::Query::MatchAll, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "thread", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, - SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD }, + SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX }, { "to", Xapian::Query::OP_AND, Xapian::Query::MatchAll, SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD }, { } @@ -180,6 +186,30 @@ _sexp_parse_one_term (notmuch_database_t *notmuch, std::string term_prefix, cons } } + +notmuch_status_t +_sexp_parse_regex (notmuch_database_t *notmuch, + const _sexp_prefix_t *prefix, const _sexp_prefix_t *parent, + std::string val, Xapian::Query &output) +{ + if (! parent) { + _notmuch_database_log (notmuch, "illegal '%s' outside field\n", + prefix->name); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + + if (! (parent->flags & SEXP_FLAG_REGEX)) { + _notmuch_database_log (notmuch, "'%s' not supported in field '%s'\n", + prefix->name, parent->name); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + + std::string msg; /* ignored */ + + return _notmuch_regexp_to_query (notmuch, Xapian::BAD_VALUENO, parent->name, + val, output, msg); +} + /* Here we expect the s-expression to be a proper list, with first * element defining and operation, or as a special case the empty * list */ @@ -254,6 +284,10 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent if (prefix->xapian_op == Xapian::Query::OP_WILDCARD) return _sexp_parse_wildcard (notmuch, parent, sx->list->next->val, output); + if (prefix->flags & SEXP_FLAG_DO_REGEX) { + return _sexp_parse_regex (notmuch, prefix, parent, sx->list->next->val, output); + } + return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial, sx->list->next, output); } diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh index be243fc0..6cfd59a8 100755 --- a/test/T081-sexpr-search.sh +++ b/test/T081-sexpr-search.sh @@ -565,4 +565,76 @@ output=$(notmuch search --query=sexp '(subject deleted)' | notmuch_search_saniti test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Not deleted (inbox unread) thread:XXX 2001-01-05 [2/2] Notmuch Test Suite; Deleted (deleted inbox unread)" +test_begin_subtest "regex at top level" +notmuch search --query=sexp '(rx foo)' >& OUTPUT +cat < EXPECTED +notmuch search: Syntax error in query +illegal 'rx' outside field +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "regex in illegal field" +notmuch search --query=sexp '(body (regex foo))' >& OUTPUT +cat < EXPECTED +notmuch search: Syntax error in query +'regex' not supported in field 'body' +EOF +test_expect_equal_file EXPECTED OUTPUT + +notmuch search --output=messages from:cworth > cworth.msg-ids + +test_begin_subtest "regexp 'from' search" +notmuch search --output=messages --query=sexp '(from (rx cworth))' > OUTPUT +test_expect_equal_file cworth.msg-ids OUTPUT + +test_begin_subtest "regexp search for 'from' 2" +notmuch search from:/cworth@cworth.org/ and subject:patch | notmuch_search_sanitize > EXPECTED +notmuch search --query=sexp '(and (from (rx cworth@cworth.org)) (subject patch))' \ + | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "regexp 'folder' search" +notmuch search 'folder:/^bar$/' | notmuch_search_sanitize > EXPECTED +notmuch search --query=sexp '(folder (rx ^bar$))' | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "regexp 'id' search" +notmuch search --output=messages --query=sexp '(id (rx yoom))' > OUTPUT +test_expect_equal_file cworth.msg-ids OUTPUT + +test_begin_subtest "unanchored 'is' search" +notmuch search tag:signed or tag:inbox > EXPECTED +notmuch search --query=sexp '(is (rx i))' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "anchored 'is' search" +notmuch search tag:signed > EXPECTED +notmuch search --query=sexp '(is (rx ^si))' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "combine regexp mid and subject" +notmuch search subject:/-C/ and mid:/y..m/ | notmuch_search_sanitize > EXPECTED +notmuch search --query=sexp '(and (subject (rx -C)) (mid (rx y..m)))' | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "regexp 'path' search" +notmuch search 'path:/^bar$/' | notmuch_search_sanitize > EXPECTED +notmuch search --query=sexp '(path (rx ^bar$))' | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "regexp 'property' search" +notmuch search property:foo=bar > EXPECTED +notmuch search --query=sexp '(property (rx foo=.*))' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "anchored 'tag' search" +notmuch search tag:signed > EXPECTED +notmuch search --query=sexp '(tag (rx ^si))' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "regexp 'thread' search" +notmuch search --output=threads '*' | grep '7$' > EXPECTED +notmuch search --output=threads --query=sexp '(thread (rx 7$))' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + test_done