mirror of
https://git.notmuchmail.org/git/notmuch
synced 2024-11-25 04:18:08 +01:00
lib/parse-sexp: add term prefix backed fields
We use "boolean" to describe fields that should generate terms literally without stemming or phrase splitting. This terminology might not be ideal but it is already enshrined in notmuch-search-terms(7).
This commit is contained in:
parent
90d9c2ad5c
commit
8322f536f5
3 changed files with 160 additions and 1 deletions
|
@ -81,6 +81,14 @@ string) into words, ignore punctuation. Phrase splitting is applied to
|
||||||
terms in phrase (probabilistic) fields. Both phrase splitting and
|
terms in phrase (probabilistic) fields. Both phrase splitting and
|
||||||
stemming apply only in phrase fields.
|
stemming apply only in phrase fields.
|
||||||
|
|
||||||
|
Each term or phrase field has an associated combining operator
|
||||||
|
(``and`` or ``or``) used to combine the queries from each element of
|
||||||
|
the tail of the list. This is generally ``or`` for those fields where
|
||||||
|
a message has one such attribute, and ``and`` otherwise.
|
||||||
|
|
||||||
|
Term or phrase fields can contain arbitrarily complex queries made up
|
||||||
|
from terms, operators, and modifiers, but not other fields.
|
||||||
|
|
||||||
.. _field-table:
|
.. _field-table:
|
||||||
|
|
||||||
.. table:: Fields with supported modifiers
|
.. table:: Fields with supported modifiers
|
||||||
|
@ -112,7 +120,7 @@ stemming apply only in phrase fields.
|
||||||
+------------+-----------+-----------+-----------+-----------+----------+
|
+------------+-----------+-----------+-----------+-----------+----------+
|
||||||
| mimetype | or | phrase | yes | yes | no |
|
| mimetype | or | phrase | yes | yes | no |
|
||||||
+------------+-----------+-----------+-----------+-----------+----------+
|
+------------+-----------+-----------+-----------+-----------+----------+
|
||||||
| path | or | term | yes | yes | yes |
|
| path | or | term | no | yes | yes |
|
||||||
+------------+-----------+-----------+-----------+-----------+----------+
|
+------------+-----------+-----------+-----------+-----------+----------+
|
||||||
| property | and | term | yes | yes | yes |
|
| property | and | term | yes | yes | yes |
|
||||||
+------------+-----------+-----------+-----------+-----------+----------+
|
+------------+-----------+-----------+-----------+-----------+----------+
|
||||||
|
@ -151,10 +159,18 @@ EXAMPLES
|
||||||
Match the *phrase* "quick" followed by "fox" in phrase fields (or
|
Match the *phrase* "quick" followed by "fox" in phrase fields (or
|
||||||
outside a field). Match the literal string in a term field.
|
outside a field). Match the literal string in a term field.
|
||||||
|
|
||||||
|
``(id 1234@invalid blah@test)``
|
||||||
|
Matches Message-Id "1234@invalid" *or* Message-Id "blah@test"
|
||||||
|
|
||||||
``(subject quick "brown fox")``
|
``(subject quick "brown fox")``
|
||||||
Match messages whose subject contains "quick" (anywhere, stemmed) and
|
Match messages whose subject contains "quick" (anywhere, stemmed) and
|
||||||
the phrase "brown fox".
|
the phrase "brown fox".
|
||||||
|
|
||||||
|
``(to (or bob@example.com mallory@example.org))`` ``(or (to bob@example.com) (to mallory@example.org))``
|
||||||
|
Match in the "To" or "Cc" headers, "bob@example.com",
|
||||||
|
"mallory@example.org", and also "bob@example.com.au" since it
|
||||||
|
contains the adjacent triple "bob", "example", "com".
|
||||||
|
|
||||||
NOTES
|
NOTES
|
||||||
=====
|
=====
|
||||||
|
|
||||||
|
|
|
@ -10,8 +10,26 @@
|
||||||
typedef enum {
|
typedef enum {
|
||||||
SEXP_FLAG_NONE = 0,
|
SEXP_FLAG_NONE = 0,
|
||||||
SEXP_FLAG_FIELD = 1 << 0,
|
SEXP_FLAG_FIELD = 1 << 0,
|
||||||
|
SEXP_FLAG_BOOLEAN = 1 << 1,
|
||||||
} _sexp_flag_t;
|
} _sexp_flag_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* define bitwise operators to hide casts */
|
||||||
|
|
||||||
|
inline _sexp_flag_t
|
||||||
|
operator| (_sexp_flag_t a, _sexp_flag_t b)
|
||||||
|
{
|
||||||
|
return static_cast<_sexp_flag_t>(
|
||||||
|
static_cast<unsigned>(a) | static_cast<unsigned>(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline _sexp_flag_t
|
||||||
|
operator& (_sexp_flag_t a, _sexp_flag_t b)
|
||||||
|
{
|
||||||
|
return static_cast<_sexp_flag_t>(
|
||||||
|
static_cast<unsigned>(a) & static_cast<unsigned>(b));
|
||||||
|
}
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const char *name;
|
const char *name;
|
||||||
Xapian::Query::op xapian_op;
|
Xapian::Query::op xapian_op;
|
||||||
|
@ -23,12 +41,39 @@ static _sexp_prefix_t prefixes[] =
|
||||||
{
|
{
|
||||||
{ "and", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
{ "and", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||||
SEXP_FLAG_NONE },
|
SEXP_FLAG_NONE },
|
||||||
|
{ "attachment", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||||
|
SEXP_FLAG_FIELD },
|
||||||
|
{ "body", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||||
|
SEXP_FLAG_FIELD },
|
||||||
|
{ "from", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||||
|
SEXP_FLAG_FIELD },
|
||||||
|
{ "folder", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
|
||||||
|
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
|
||||||
|
{ "id", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
|
||||||
|
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
|
||||||
|
{ "is", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||||
|
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
|
||||||
|
{ "mid", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
|
||||||
|
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
|
||||||
|
{ "mimetype", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||||
|
SEXP_FLAG_FIELD },
|
||||||
{ "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll,
|
{ "not", Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll,
|
||||||
SEXP_FLAG_NONE },
|
SEXP_FLAG_NONE },
|
||||||
{ "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
|
{ "or", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
|
||||||
SEXP_FLAG_NONE },
|
SEXP_FLAG_NONE },
|
||||||
|
{ "path", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
|
||||||
|
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
|
||||||
|
{ "property", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||||
|
SEXP_FLAG_FIELD
|
||||||
|
| SEXP_FLAG_BOOLEAN },
|
||||||
{ "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
{ "subject", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||||
SEXP_FLAG_FIELD },
|
SEXP_FLAG_FIELD },
|
||||||
|
{ "tag", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||||
|
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
|
||||||
|
{ "thread", Xapian::Query::OP_OR, Xapian::Query::MatchNothing,
|
||||||
|
SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
|
||||||
|
{ "to", Xapian::Query::OP_AND, Xapian::Query::MatchAll,
|
||||||
|
SEXP_FLAG_FIELD },
|
||||||
{ }
|
{ }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -110,6 +155,10 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
|
||||||
std::string term = Xapian::Unicode::tolower (sx->val);
|
std::string term = Xapian::Unicode::tolower (sx->val);
|
||||||
Xapian::Stem stem = *(notmuch->stemmer);
|
Xapian::Stem stem = *(notmuch->stemmer);
|
||||||
std::string term_prefix = parent ? _find_prefix (parent->name) : "";
|
std::string term_prefix = parent ? _find_prefix (parent->name) : "";
|
||||||
|
if (parent && (parent->flags & SEXP_FLAG_BOOLEAN)) {
|
||||||
|
output = Xapian::Query (term_prefix + sx->val);
|
||||||
|
return NOTMUCH_STATUS_SUCCESS;
|
||||||
|
}
|
||||||
if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) {
|
if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) {
|
||||||
output = Xapian::Query ("Z" + term_prefix + stem (term));
|
output = Xapian::Query ("Z" + term_prefix + stem (term));
|
||||||
return NOTMUCH_STATUS_SUCCESS;
|
return NOTMUCH_STATUS_SUCCESS;
|
||||||
|
|
|
@ -101,6 +101,99 @@ thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
|
||||||
EOF
|
EOF
|
||||||
test_expect_equal_file EXPECTED OUTPUT
|
test_expect_equal_file EXPECTED OUTPUT
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'attachment'"
|
||||||
|
notmuch search attachment:notmuch-help.patch > EXPECTED
|
||||||
|
notmuch search --query=sexp '(attachment notmuch-help.patch)' > OUTPUT
|
||||||
|
test_expect_equal_file EXPECTED OUTPUT
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'body'"
|
||||||
|
add_message '[subject]="body search"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [body]=bodysearchtest
|
||||||
|
output=$(notmuch search --query=sexp '(body bodysearchtest)' | notmuch_search_sanitize)
|
||||||
|
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; body search (inbox unread)"
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'body' (phrase)"
|
||||||
|
add_message '[subject]="body search (phrase)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="body search (phrase)"'
|
||||||
|
add_message '[subject]="negative result"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="This phrase should not match the body search"'
|
||||||
|
output=$(notmuch search --query=sexp '(body "body search phrase")' | notmuch_search_sanitize)
|
||||||
|
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; body search (phrase) (inbox unread)"
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'body' (utf-8):"
|
||||||
|
add_message '[subject]="utf8-message-body-subject"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[body]="message body utf8: bödý"'
|
||||||
|
output=$(notmuch search --query=sexp '(body bödý)' | notmuch_search_sanitize)
|
||||||
|
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread)"
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'from'"
|
||||||
|
add_message '[subject]="search by from"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [from]=searchbyfrom
|
||||||
|
output=$(notmuch search --query=sexp '(from searchbyfrom)' | notmuch_search_sanitize)
|
||||||
|
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] searchbyfrom; search by from (inbox unread)"
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'from' (address)"
|
||||||
|
add_message '[subject]="search by from (address)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' [from]=searchbyfrom@example.com
|
||||||
|
output=$(notmuch search --query=sexp '(from searchbyfrom@example.com)' | notmuch_search_sanitize)
|
||||||
|
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] searchbyfrom@example.com; search by from (address) (inbox unread)"
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'from' (name)"
|
||||||
|
add_message '[subject]="search by from (name)"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"' '[from]="Search By From Name <test@example.com>"'
|
||||||
|
output=$(notmuch search --query=sexp '(from "Search By From Name")' | notmuch_search_sanitize)
|
||||||
|
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)"
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'from' (name and address)"
|
||||||
|
output=$(notmuch search --query=sexp '(from "Search By From Name <test@example.com>")' | notmuch_search_sanitize)
|
||||||
|
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)"
|
||||||
|
|
||||||
|
add_message '[dir]=bad' '[subject]="To the bone"'
|
||||||
|
add_message '[dir]=.' '[subject]="Top level"'
|
||||||
|
add_message '[dir]=bad/news' '[subject]="Bears"'
|
||||||
|
mkdir -p "${MAIL_DIR}/duplicate/bad/news"
|
||||||
|
cp "$gen_msg_filename" "${MAIL_DIR}/duplicate/bad/news"
|
||||||
|
|
||||||
|
add_message '[dir]=things' '[subject]="These are a few"'
|
||||||
|
add_message '[dir]=things/favorite' '[subject]="Raindrops, whiskers, kettles"'
|
||||||
|
add_message '[dir]=things/bad' '[subject]="Bites, stings, sad feelings"'
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'folder' (multiple)"
|
||||||
|
output=$(notmuch search --query=sexp '(folder bad bad/news things/bad)' | notmuch_search_sanitize)
|
||||||
|
test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)
|
||||||
|
thread:XXX 2001-01-05 [1/1(2)] Notmuch Test Suite; Bears (inbox unread)
|
||||||
|
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)"
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'folder': top level."
|
||||||
|
notmuch search folder:'""' > EXPECTED
|
||||||
|
notmuch search --query=sexp '(folder "")' > OUTPUT
|
||||||
|
test_expect_equal_file EXPECTED OUTPUT
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'id'"
|
||||||
|
add_message '[subject]="search by id"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
|
||||||
|
output=$(notmuch search --query=sexp "(id ${gen_msg_id})" | notmuch_search_sanitize)
|
||||||
|
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by id (inbox unread)"
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'id' (or)"
|
||||||
|
add_message '[subject]="search by id"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
|
||||||
|
output=$(notmuch search --query=sexp "(id non-existent-mid ${gen_msg_id})" | notmuch_search_sanitize)
|
||||||
|
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by id (inbox unread)"
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'is' (multiple)"
|
||||||
|
notmuch tag -inbox tag:searchbytag
|
||||||
|
notmuch search is:inbox AND is:unread | notmuch_search_sanitize > EXPECTED
|
||||||
|
notmuch search --query=sexp '(is inbox unread)' | notmuch_search_sanitize > OUTPUT
|
||||||
|
notmuch tag +inbox tag:searchbytag
|
||||||
|
test_expect_equal_file EXPECTED OUTPUT
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'mid'"
|
||||||
|
add_message '[subject]="search by mid"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
|
||||||
|
output=$(notmuch search --query=sexp "(mid ${gen_msg_id})" | notmuch_search_sanitize)
|
||||||
|
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by mid (inbox unread)"
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'mid' (or)"
|
||||||
|
add_message '[subject]="search by mid"' '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
|
||||||
|
output=$(notmuch search --query=sexp "(mid non-existent-mid ${gen_msg_id})" | notmuch_search_sanitize)
|
||||||
|
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by mid (inbox unread)"
|
||||||
|
|
||||||
|
test_begin_subtest "Search by 'mimetype'"
|
||||||
|
notmuch search mimetype:text/html > EXPECTED
|
||||||
|
notmuch search --query=sexp '(mimetype text html)' > OUTPUT
|
||||||
|
test_expect_equal_file EXPECTED OUTPUT
|
||||||
|
|
||||||
test_begin_subtest "Search by 'subject' (utf-8, phrase-token):"
|
test_begin_subtest "Search by 'subject' (utf-8, phrase-token):"
|
||||||
output=$(notmuch search --query=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize)
|
output=$(notmuch search --query=sexp '(subject utf8-sübjéct)' | notmuch_search_sanitize)
|
||||||
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
|
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)"
|
||||||
|
@ -118,6 +211,7 @@ notmuch search --query=sexp '(subject (or utf8 "compatibility issues"))' | notmu
|
||||||
cat <<EOF > EXPECTED
|
cat <<EOF > EXPECTED
|
||||||
thread:XXX 2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread)
|
thread:XXX 2009-11-18 [4/4] Jjgod Jiang, Alexander Botero-Lowry; [notmuch] Mac OS X/Darwin compatibility issues (inbox unread)
|
||||||
thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
|
thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-sübjéct (inbox unread)
|
||||||
|
thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread)
|
||||||
EOF
|
EOF
|
||||||
test_expect_equal_file EXPECTED OUTPUT
|
test_expect_equal_file EXPECTED OUTPUT
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue