lib/parse-sexp: support subject field

The broken tests are because we do not yet handle phrase searches.
This commit is contained in:
David Bremner 2021-08-24 08:17:20 -07:00
parent f83cd2a05a
commit 200e164dc7
3 changed files with 133 additions and 5 deletions

View file

@ -36,9 +36,8 @@ An s-expression query is either an atom, the empty list, or a
a *field*, *logical operation*, or *modifier*, and 0 or more a *field*, *logical operation*, or *modifier*, and 0 or more
subqueries. subqueries.
``*`` ``*`` ``()``
``()`` Match all messages.
The empty list matches all messages
*term* *term*
Match all messages containing *term*, possibly after Match all messages containing *term*, possibly after
@ -64,6 +63,59 @@ subqueries.
FIELDS FIELDS
`````` ``````
*Fields* (also called *prefixes* in notmuch documentation)
correspond to attributes of mail messages. Some are inherent (and
immutable) like ``subject``, while others ``tag`` and ``property`` are
settable by the user. Each concrete field in
:any:`the table below <field-table>`
is discussed further under "Search prefixes" in
:any:`notmuch-search-terms(7)`. The row *user* refers to user defined
fields, described in :any:`notmuch-config(1)`.
.. _field-table:
.. table:: Fields with supported modifiers
+------------+-----------+-----------+-----------+-----------+----------+
| field | combine | type | expand | wildcard | regex |
+============+===========+===========+===========+===========+==========+
| *none* | and | | no | yes | no |
+------------+-----------+-----------+-----------+-----------+----------+
| *user* | and | phrase | no | yes | no |
+------------+-----------+-----------+-----------+-----------+----------+
| attachment | and | phrase | yes | yes | no |
+------------+-----------+-----------+-----------+-----------+----------+
| body | and | phrase | no | no | no |
+------------+-----------+-----------+-----------+-----------+----------+
| date | | range | no | no | no |
+------------+-----------+-----------+-----------+-----------+----------+
| folder | or | phrase | yes | yes | yes |
+------------+-----------+-----------+-----------+-----------+----------+
| from | and | phrase | yes | yes | yes |
+------------+-----------+-----------+-----------+-----------+----------+
| id | or | term | no | yes | yes |
+------------+-----------+-----------+-----------+-----------+----------+
| is | and | term | yes | yes | yes |
+------------+-----------+-----------+-----------+-----------+----------+
| lastmod | | range | no | no | no |
+------------+-----------+-----------+-----------+-----------+----------+
| mid | or | term | no | yes | yes |
+------------+-----------+-----------+-----------+-----------+----------+
| mimetype | or | phrase | yes | yes | no |
+------------+-----------+-----------+-----------+-----------+----------+
| path | or | term | yes | yes | yes |
+------------+-----------+-----------+-----------+-----------+----------+
| property | and | term | yes | yes | yes |
+------------+-----------+-----------+-----------+-----------+----------+
| subject | and | phrase | yes | yes | yes |
+------------+-----------+-----------+-----------+-----------+----------+
| tag | and | term | yes | yes | yes |
+------------+-----------+-----------+-----------+-----------+----------+
| thread | or | term | yes | yes | yes |
+------------+-----------+-----------+-----------+-----------+----------+
| to | and | phrase | yes | yes | no |
+------------+-----------+-----------+-----------+-----------+----------+
.. _modifiers: .. _modifiers:
MODIFIERS MODIFIERS
@ -86,6 +138,10 @@ EXAMPLES
``(not Bob Marley)`` ``(not Bob Marley)``
Match messages containing neither "Bob" nor "Marley", nor their stems, Match messages containing neither "Bob" nor "Marley", nor their stems,
``(subject quick "brown fox")``
Match messages whose subject contains "quick" (anywhere, stemmed) and
the phrase "brown fox".
.. |q1| replace:: :math:`q_1` .. |q1| replace:: :math:`q_1`
.. |q2| replace:: :math:`q_2` .. |q2| replace:: :math:`q_2`
.. |qn| replace:: :math:`q_n` .. |qn| replace:: :math:`q_n`

View file

@ -9,6 +9,7 @@
typedef enum { typedef enum {
SEXP_FLAG_NONE = 0, SEXP_FLAG_NONE = 0,
SEXP_FLAG_FIELD = 1 << 0,
} _sexp_flag_t; } _sexp_flag_t;
typedef struct { typedef struct {
@ -26,6 +27,8 @@ static _sexp_prefix_t prefixes[] =
SEXP_FLAG_NONE }, SEXP_FLAG_NONE },
{ "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing, { "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
SEXP_FLAG_NONE }, SEXP_FLAG_NONE },
{ "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
SEXP_FLAG_FIELD },
{ } { }
}; };
@ -76,8 +79,11 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
if (sx->ty == SEXP_VALUE) { if (sx->ty == SEXP_VALUE) {
std::string term = Xapian::Unicode::tolower (sx->val); std::string term = Xapian::Unicode::tolower (sx->val);
Xapian::Stem stem = *(notmuch->stemmer); Xapian::Stem stem = *(notmuch->stemmer);
std::string term_prefix = parent ? _find_prefix (parent->name) : "";
if (sx->aty == SEXP_BASIC) if (sx->aty == SEXP_BASIC)
term = "Z" + stem (term); term = "Z" + term_prefix + stem (term);
else
term = term_prefix + term;
output = Xapian::Query (term); output = Xapian::Query (term);
return NOTMUCH_STATUS_SUCCESS; return NOTMUCH_STATUS_SUCCESS;
@ -97,6 +103,15 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
for (_sexp_prefix_t *prefix = prefixes; prefix && prefix->name; prefix++) { for (_sexp_prefix_t *prefix = prefixes; prefix && prefix->name; prefix++) {
if (strcmp (prefix->name, sx->list->val) == 0) { if (strcmp (prefix->name, sx->list->val) == 0) {
if (prefix->flags & SEXP_FLAG_FIELD) {
if (parent) {
_notmuch_database_log (notmuch, "nested field: '%s' inside '%s'\n",
prefix->name, parent->name);
return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
}
parent = prefix;
}
return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial, return _sexp_combine_query (notmuch, parent, prefix->xapian_op, prefix->initial,
sx->list->next, output); sx->list->next, output);
} }

View file

@ -62,6 +62,55 @@ test_begin_subtest "single term in body, unstemmed version"
notmuch search --query=sexp '"arriv"' > OUTPUT notmuch search --query=sexp '"arriv"' > OUTPUT
test_expect_equal_file /dev/null OUTPUT test_expect_equal_file /dev/null OUTPUT
test_begin_subtest "Search by 'subject'"
add_message [subject]=subjectsearchtest '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
output=$(notmuch search --query=sexp '(subject subjectsearchtest)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; subjectsearchtest (inbox unread)"
test_begin_subtest "Search by 'subject' (case insensitive)"
notmuch search tag:inbox and subject:maildir | notmuch_search_sanitize > EXPECTED
notmuch search --query=sexp '(subject "Maildir")' | notmuch_search_sanitize > OUTPUT
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "Search by 'subject' (utf-8):"
add_message [subject]=utf8-sübjéct '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
output=$(notmuch search --query=sexp '(subject utf8 sübjéct)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
test_begin_subtest "Search by 'subject' (utf-8, and):"
output=$(notmuch search --query=sexp '(subject (and utf8 sübjéct))' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
test_begin_subtest "Search by 'subject' (utf-8, and outside):"
output=$(notmuch search --query=sexp '(and (subject utf8) (subject sübjéct))' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
test_begin_subtest "Search by 'subject' (utf-8, or):"
notmuch search --query=sexp '(subject (or utf8 subjectsearchtest))' | notmuch_search_sanitize > OUTPUT
cat <<EOF > EXPECTED
thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; subjectsearchtest (inbox unread)
thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "Search by 'subject' (utf-8, or outside):"
notmuch search --query=sexp '(or (subject utf8) (subject subjectsearchtest))' | notmuch_search_sanitize > OUTPUT
cat <<EOF > EXPECTED
thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; subjectsearchtest (inbox unread)
thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "Search by 'subject' (utf-8, phrase-token):"
test_subtest_known_broken
output=$(notmuch search --query=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
test_begin_subtest "Search by 'subject' (utf-8, quoted string):"
test_subtest_known_broken
output=$(notmuch search --query=sexp '(subject "utf8 sübjéct")' | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
test_begin_subtest "Unbalanced parens" test_begin_subtest "Unbalanced parens"
# A code 1 indicates the error was handled (a crash will return e.g. 139). # A code 1 indicates the error was handled (a crash will return e.g. 139).
test_expect_code 1 "notmuch search --query=sexp '('" test_expect_code 1 "notmuch search --query=sexp '('"
@ -90,4 +139,12 @@ unexpected list in field/operation position
EOF EOF
test_expect_equal_file EXPECTED OUTPUT test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "illegal nesting"
notmuch search --query=sexp '(subject (subject foo))' >OUTPUT 2>&1
cat <<EOF > EXPECTED
notmuch search: Syntax error in query
nested field: 'subject' inside 'subject'
EOF
test_expect_equal_file EXPECTED OUTPUT
test_done test_done