mirror of
https://git.notmuchmail.org/git/notmuch
synced 2024-12-22 09:24:54 +01:00
lib/parse-sexp: support regular expressions
At least to the degree that the Xapian QueryParser based parser also supports them. Support short alias 'rx' as it seems to make more complex queries nicer to read.
This commit is contained in:
parent
5cb452c325
commit
1870b3ae4b
3 changed files with 124 additions and 10 deletions
|
@ -144,6 +144,11 @@ MODIFIERS
|
|||
*Modifiers* refer to any prefixes (first elements of compound queries)
|
||||
that are neither operators nor fields.
|
||||
|
||||
``(regex`` *atom* ``)`` ``(rx`` *atom* ``)``
|
||||
Interpret *atom* as a POSIX.2 regular expression (see
|
||||
:manpage:`regex(7)`). This applies in term fields and a subset [#not-phrase]_ of
|
||||
phrase fields (see :any:`field-table`).
|
||||
|
||||
``(starts-with`` *subword* ``)``
|
||||
Matches any term starting with *subword*. This applies in either
|
||||
phrase or term :any:`fields <fields>`, or outside of fields [#not-body]_. Note that
|
||||
|
@ -205,6 +210,9 @@ NOTES
|
|||
|
||||
.. [#aka-bool] a.k.a. boolean prefixes
|
||||
|
||||
.. [#not-phrase] Due to the implemention of phrase fields in Xapian,
|
||||
regex queries could only match individual words.
|
||||
|
||||
.. [#not-body] Due the the way ``body`` is implemented in notmuch,
|
||||
this modifier is not supported in the ``body`` field.
|
||||
|
||||
|
|
|
@ -13,6 +13,8 @@ typedef enum {
|
|||
SEXP_FLAG_BOOLEAN = 1 << 1,
|
||||
SEXP_FLAG_SINGLE = 1 << 2,
|
||||
SEXP_FLAG_WILDCARD = 1 << 3,
|
||||
SEXP_FLAG_REGEX = 1 << 4,
|
||||
SEXP_FLAG_DO_REGEX = 1 << 5,
|
||||
} _sexp_flag_t;
|
||||
|
||||
/*
|
||||
|
@ -48,15 +50,15 @@ static _sexp_prefix_t prefixes[] =
|
|||
{ "body", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||
SEXP_FLAG_FIELD },
|
||||
{ "from", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
|
||||
{ "folder", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
|
||||
{ "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
|
||||
{ "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
|
||||
{ "mid", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
|
||||
{ "mimetype", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
|
||||
{ "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll,
|
||||
|
@ -64,17 +66,21 @@ static _sexp_prefix_t prefixes[] =
|
|||
{ "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
|
||||
SEXP_FLAG_NONE },
|
||||
{ "path", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
|
||||
{ "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
|
||||
{ "regex", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll,
|
||||
SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX },
|
||||
{ "rx", Xapian::Query::OP_INVALID, Xapian::Query::MatchAll,
|
||||
SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX },
|
||||
{ "starts-with", Xapian::Query::OP_WILDCARD, Xapian::Query::MatchAll,
|
||||
SEXP_FLAG_SINGLE },
|
||||
{ "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
|
||||
{ "tag", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
|
||||
{ "thread", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
|
||||
{ "to", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||
SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
|
||||
{ }
|
||||
|
@ -180,6 +186,30 @@ _sexp_parse_one_term (notmuch_database_t *notmuch, std::string term_prefix, cons
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
notmuch_status_t
|
||||
_sexp_parse_regex (notmuch_database_t *notmuch,
|
||||
const _sexp_prefix_t *prefix, const _sexp_prefix_t *parent,
|
||||
std::string val, Xapian::Query &output)
|
||||
{
|
||||
if (! parent) {
|
||||
_notmuch_database_log (notmuch, "illegal '%s' outside field\n",
|
||||
prefix->name);
|
||||
return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
|
||||
}
|
||||
|
||||
if (! (parent->flags & SEXP_FLAG_REGEX)) {
|
||||
_notmuch_database_log (notmuch, "'%s' not supported in field '%s'\n",
|
||||
prefix->name, parent->name);
|
||||
return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
|
||||
}
|
||||
|
||||
std::string msg; /* ignored */
|
||||
|
||||
return _notmuch_regexp_to_query (notmuch, Xapian::BAD_VALUENO, parent->name,
|
||||
val, output, msg);
|
||||
}
|
||||
|
||||
/* Here we expect the s-expression to be a proper list, with first
|
||||
* element defining and operation, or as a special case the empty
|
||||
* list */
|
||||
|
@ -254,6 +284,10 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
|
|||
if (prefix->xapian_op == Xapian::Query::OP_WILDCARD)
|
||||
return _sexp_parse_wildcard (notmuch, parent, sx->list->next->val, output);
|
||||
|
||||
if (prefix->flags & SEXP_FLAG_DO_REGEX) {
|
||||
return _sexp_parse_regex (notmuch, prefix, parent, sx->list->next->val, output);
|
||||
}
|
||||
|
||||
return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial,
|
||||
sx->list->next, output);
|
||||
}
|
||||
|
|
|
@ -565,4 +565,76 @@ output=$(notmuch search --query=sexp '(subject deleted)' | notmuch_search_saniti
|
|||
test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Not deleted (inbox unread)
|
||||
thread:XXX 2001-01-05 [2/2] Notmuch Test Suite; Deleted (deleted inbox unread)"
|
||||
|
||||
test_begin_subtest "regex at top level"
|
||||
notmuch search --query=sexp '(rx foo)' >& OUTPUT
|
||||
cat <<EOF > EXPECTED
|
||||
notmuch search: Syntax error in query
|
||||
illegal 'rx' outside field
|
||||
EOF
|
||||
test_expect_equal_file EXPECTED OUTPUT
|
||||
|
||||
test_begin_subtest "regex in illegal field"
|
||||
notmuch search --query=sexp '(body (regex foo))' >& OUTPUT
|
||||
cat <<EOF > EXPECTED
|
||||
notmuch search: Syntax error in query
|
||||
'regex' not supported in field 'body'
|
||||
EOF
|
||||
test_expect_equal_file EXPECTED OUTPUT
|
||||
|
||||
notmuch search --output=messages from:cworth > cworth.msg-ids
|
||||
|
||||
test_begin_subtest "regexp 'from' search"
|
||||
notmuch search --output=messages --query=sexp '(from (rx cworth))' > OUTPUT
|
||||
test_expect_equal_file cworth.msg-ids OUTPUT
|
||||
|
||||
test_begin_subtest "regexp search for 'from' 2"
|
||||
notmuch search from:/cworth@cworth.org/ and subject:patch | notmuch_search_sanitize > EXPECTED
|
||||
notmuch search --query=sexp '(and (from (rx cworth@cworth.org)) (subject patch))' \
|
||||
| notmuch_search_sanitize > OUTPUT
|
||||
test_expect_equal_file EXPECTED OUTPUT
|
||||
|
||||
test_begin_subtest "regexp 'folder' search"
|
||||
notmuch search 'folder:/^bar$/' | notmuch_search_sanitize > EXPECTED
|
||||
notmuch search --query=sexp '(folder (rx ^bar$))' | notmuch_search_sanitize > OUTPUT
|
||||
test_expect_equal_file EXPECTED OUTPUT
|
||||
|
||||
test_begin_subtest "regexp 'id' search"
|
||||
notmuch search --output=messages --query=sexp '(id (rx yoom))' > OUTPUT
|
||||
test_expect_equal_file cworth.msg-ids OUTPUT
|
||||
|
||||
test_begin_subtest "unanchored 'is' search"
|
||||
notmuch search tag:signed or tag:inbox > EXPECTED
|
||||
notmuch search --query=sexp '(is (rx i))' > OUTPUT
|
||||
test_expect_equal_file EXPECTED OUTPUT
|
||||
|
||||
test_begin_subtest "anchored 'is' search"
|
||||
notmuch search tag:signed > EXPECTED
|
||||
notmuch search --query=sexp '(is (rx ^si))' > OUTPUT
|
||||
test_expect_equal_file EXPECTED OUTPUT
|
||||
|
||||
test_begin_subtest "combine regexp mid and subject"
|
||||
notmuch search subject:/-C/ and mid:/y..m/ | notmuch_search_sanitize > EXPECTED
|
||||
notmuch search --query=sexp '(and (subject (rx -C)) (mid (rx y..m)))' | notmuch_search_sanitize > OUTPUT
|
||||
test_expect_equal_file EXPECTED OUTPUT
|
||||
|
||||
test_begin_subtest "regexp 'path' search"
|
||||
notmuch search 'path:/^bar$/' | notmuch_search_sanitize > EXPECTED
|
||||
notmuch search --query=sexp '(path (rx ^bar$))' | notmuch_search_sanitize > OUTPUT
|
||||
test_expect_equal_file EXPECTED OUTPUT
|
||||
|
||||
test_begin_subtest "regexp 'property' search"
|
||||
notmuch search property:foo=bar > EXPECTED
|
||||
notmuch search --query=sexp '(property (rx foo=.*))' > OUTPUT
|
||||
test_expect_equal_file EXPECTED OUTPUT
|
||||
|
||||
test_begin_subtest "anchored 'tag' search"
|
||||
notmuch search tag:signed > EXPECTED
|
||||
notmuch search --query=sexp '(tag (rx ^si))' > OUTPUT
|
||||
test_expect_equal_file EXPECTED OUTPUT
|
||||
|
||||
test_begin_subtest "regexp 'thread' search"
|
||||
notmuch search --output=threads '*' | grep '7$' > EXPECTED
|
||||
notmuch search --output=threads --query=sexp '(thread (rx 7$))' > OUTPUT
|
||||
test_expect_equal_file EXPECTED OUTPUT
|
||||
|
||||
test_done
|
||||
|
|
Loading…
Reference in a new issue