lib/parse-sexp: stem unquoted atoms

This is somewhat less DWIM than the Xapian query parser, but it has
the advantage of simplicity.
This commit is contained in:
David Bremner 2021-08-24 08:17:18 -07:00
parent 3202e0d1fe
commit a2785c3919
3 changed files with 19 additions and 6 deletions

View file

@ -41,8 +41,10 @@ subqueries.
The empty list matches all messages
*term*
Match all messages containing *term*, possibly after stemming
or phase splitting.
Match all messages containing *term*, possibly after
stemming or phase splitting. For discussion of stemming in
notmuch see :any:`notmuch-search-terms(7)`. Stemming only applies
to unquoted terms (basic values) in s-expression queries.
``(`` *field* |q1| |q2| ... |qn| ``)``
Restrict the queries |q1| to |qn| to *field*, and combine with *and*
@ -76,6 +78,10 @@ EXAMPLES
``Wizard``
Match all messages containing the word "wizard", ignoring case.
``added``
Match all messages containing "added", but also those containing "add", "additional",
"Additional", "adds", etc... via stemming.
.. |q1| replace:: :math:`q_1`
.. |q2| replace:: :math:`q_2`
.. |qn| replace:: :math:`q_n`

View file

@ -1,5 +1,4 @@
#include <xapian.h>
#include "notmuch-private.h"
#include "database-private.h"
#if HAVE_SFSEXP
#include "sexp.h"
@ -18,7 +17,12 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const sexp_t *sx,
{
if (sx->ty == SEXP_VALUE) {
output = Xapian::Query (Xapian::Unicode::tolower (sx->val));
std::string term = Xapian::Unicode::tolower (sx->val);
Xapian::Stem stem = *(notmuch->stemmer);
if (sx->aty == SEXP_BASIC)
term = "Z" + stem (term);
output = Xapian::Query (term);
return NOTMUCH_STATUS_SUCCESS;
}

View file

@ -29,11 +29,14 @@ EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "single term in body, stemmed version"
test_subtest_known_broken
notmuch search arriv > EXPECTED
notmuch search --query=sexp arriv > OUTPUT
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "single term in body, unstemmed version"
notmuch search --query=sexp '"arriv"' > OUTPUT
test_expect_equal_file /dev/null OUTPUT
test_begin_subtest "Unbalanced parens"
# A code 1 indicates the error was handled (a crash will return e.g. 139).
test_expect_code 1 "notmuch search --query=sexp '('"