From be7e83de96b706af418fc9f139ded4d50bf342f6 Mon Sep 17 00:00:00 2001 From: David Bremner Date: Tue, 24 Aug 2021 08:17:16 -0700 Subject: [PATCH] lib/parse-sexp: parse single terms and the empty list. There is not much of a parser here yet, but it already does some useful error reporting. Most functionality sketched in the documentation is not implemented yet; detailed documentation will follow with the implementation. --- doc/conf.py | 4 ++ doc/index.rst | 1 + doc/man7/notmuch-sexp-queries.rst | 81 +++++++++++++++++++++++++++++++ lib/Makefile.local | 3 +- lib/database-private.h | 7 +++ lib/parse-sexp.cc | 55 +++++++++++++++++++++ lib/query.cc | 8 +-- test/T080-search.sh | 7 --- test/T081-sexpr-search.sh | 65 +++++++++++++++++++++++++ 9 files changed, 217 insertions(+), 14 deletions(-) create mode 100644 doc/man7/notmuch-sexp-queries.rst create mode 100644 lib/parse-sexp.cc create mode 100755 test/T081-sexpr-search.sh diff --git a/doc/conf.py b/doc/conf.py index 3ec55a61..1fbd102b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -159,6 +159,10 @@ man_pages = [ u'syntax for notmuch queries', [notmuch_authors], 7), + ('man7/notmuch-sexp-queries', 'notmuch-sexp-queries', + u's-expression syntax for notmuch queries', + [notmuch_authors], 7), + ('man1/notmuch-show', 'notmuch-show', u'show messages matching the given search terms', [notmuch_authors], 1), diff --git a/doc/index.rst b/doc/index.rst index a3bf3480..fbdcf779 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -24,6 +24,7 @@ Contents: man1/notmuch-restore man1/notmuch-search man7/notmuch-search-terms + man7/notmuch-sexp-queries man1/notmuch-show man1/notmuch-tag python-bindings diff --git a/doc/man7/notmuch-sexp-queries.rst b/doc/man7/notmuch-sexp-queries.rst new file mode 100644 index 00000000..1118f854 --- /dev/null +++ b/doc/man7/notmuch-sexp-queries.rst @@ -0,0 +1,81 @@ +.. _notmuch-sexp-queries(7): + +==================== +notmuch-sexp-queries +==================== + +SYNOPSIS +======== + +**notmuch** **search** ``--query=sexp`` '(and (to santa) (date december))' + +DESCRIPTION +=========== + + +S-EXPRESSIONS +------------- + +An *s-expression* is either an atom, or list of whitespace delimited +s-expressions inside parentheses. Atoms are either + +*basic value* + A basic value is an unquoted string containing no whitespace, double quotes, or + parentheses. + +*quoted string* + Double quotes (") delimit strings possibly containing whitespace + or parentheses. These can contain double quote characters by + escaping with backslash. E.g. ``"this is a quote \""``. + +S-EXPRESSION QUERIES +-------------------- + +An s-expression query is either an atom, the empty list, or a +*compound query* consisting of a prefix atom (first element) defining +a *field*, *logical operation*, or *modifier*, and 0 or more +subqueries. + +``*`` +``()`` + The empty list matches all messages + +*term* + Match all messages containing *term*, possibly after stemming + or phase splitting. + +``(`` *field* |q1| |q2| ... |qn| ``)`` + Restrict the queries |q1| to |qn| to *field*, and combine with *and* + (for most fields) or *or*. See :any:`fields` for more information. + +``(`` *operator* |q1| |q2| ... |qn| ``)`` + Combine queries |q1| to |qn|. See :any:`operators` for more information. + +``(`` *modifier* |q1| |q2| ... |qn| ``)`` + Combine queries |q1| to |qn|, and reinterpret the result (e.g. as a regular expression). + See :any:`modifiers` for more information. + +.. _fields: + +FIELDS +`````` + +.. _operators: + +OPERATORS +````````` + +.. _modifiers: + +MODIFIERS +````````` + +EXAMPLES +======== + +``Wizard`` + Match all messages containing the word "wizard", ignoring case. + +.. |q1| replace:: :math:`q_1` +.. |q2| replace:: :math:`q_2` +.. |qn| replace:: :math:`q_n` diff --git a/lib/Makefile.local b/lib/Makefile.local index e2d4b91d..1378a74b 100644 --- a/lib/Makefile.local +++ b/lib/Makefile.local @@ -63,7 +63,8 @@ libnotmuch_cxx_srcs = \ $(dir)/features.cc \ $(dir)/prefix.cc \ $(dir)/open.cc \ - $(dir)/init.cc + $(dir)/init.cc \ + $(dir)/parse-sexp.cc libnotmuch_modules := $(libnotmuch_c_srcs:.c=.o) $(libnotmuch_cxx_srcs:.cc=.o) diff --git a/lib/database-private.h b/lib/database-private.h index 9706c17e..f206efaf 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -300,4 +300,11 @@ _notmuch_database_setup_standard_query_fields (notmuch_database_t *notmuch); notmuch_status_t _notmuch_database_setup_user_query_fields (notmuch_database_t *notmuch); +#if __cplusplus +/* parse-sexp.cc */ +notmuch_status_t +_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr, + Xapian::Query &output); +#endif + #endif diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc new file mode 100644 index 00000000..66dbdb41 --- /dev/null +++ b/lib/parse-sexp.cc @@ -0,0 +1,55 @@ +#include +#include "notmuch-private.h" + +#if HAVE_SFSEXP +#include "sexp.h" + + +/* _sexp is used for file scope symbols to avoid clashing with + * definitions from sexp.h */ + +/* Here we expect the s-expression to be a proper list, with first + * element defining and operation, or as a special case the empty + * list */ + +static notmuch_status_t +_sexp_to_xapian_query (notmuch_database_t *notmuch, const sexp_t *sx, + Xapian::Query &output) +{ + + if (sx->ty == SEXP_VALUE) { + output = Xapian::Query (Xapian::Unicode::tolower (sx->val)); + return NOTMUCH_STATUS_SUCCESS; + } + + /* Empty list */ + if (! sx->list) { + output = Xapian::Query::MatchAll; + return NOTMUCH_STATUS_SUCCESS; + } + + if (sx->list->ty == SEXP_VALUE) + _notmuch_database_log (notmuch, "unknown prefix '%s'\n", sx->list->val); + else + _notmuch_database_log (notmuch, "unexpected list in field/operation position\n", + sx->list->val); + + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; +} + +notmuch_status_t +_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr, + Xapian::Query &output) +{ + const sexp_t *sx = NULL; + char *buf = talloc_strdup (notmuch, querystr); + + sx = parse_sexp (buf, strlen (querystr)); + if (! sx) { + _notmuch_database_log (notmuch, "invalid s-expression: '%s'\n", querystr); + return NOTMUCH_STATUS_BAD_QUERY_SYNTAX; + } + + return _sexp_to_xapian_query (notmuch, sx, output); +} +#endif diff --git a/lib/query.cc b/lib/query.cc index a3fe3793..435f7229 100644 --- a/lib/query.cc +++ b/lib/query.cc @@ -23,10 +23,6 @@ #include /* GHashTable, GPtrArray */ -#if HAVE_SFSEXP -#include "sexp.h" -#endif - struct _notmuch_query { notmuch_database_t *notmuch; const char *query_string; @@ -210,8 +206,8 @@ _notmuch_query_ensure_parsed_sexpr (notmuch_query_t *query) if (query->parsed) return NOTMUCH_STATUS_SUCCESS; - query->xapian_query = Xapian::Query::MatchAll; - return NOTMUCH_STATUS_SUCCESS; + return _notmuch_sexp_string_to_xapian_query (query->notmuch, query->query_string, + query->xapian_query); } static notmuch_status_t diff --git a/test/T080-search.sh b/test/T080-search.sh index 9bda1eb9..a3f0dead 100755 --- a/test/T080-search.sh +++ b/test/T080-search.sh @@ -189,11 +189,4 @@ test_begin_subtest "parts do not have adjacent term positions" output=$(notmuch search id:termpos and '"c x"') test_expect_equal "$output" "" -if [[ NOTMUCH_HAVE_SFSEXP = 1 ]]; then - test_begin_subtest "sexpr query: all messages" - notmuch search '*' > EXPECTED - notmuch search --query=sexp '()' > OUTPUT - test_expect_equal_file EXPECTED OUTPUT -fi - test_done diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh new file mode 100755 index 00000000..46cc712c --- /dev/null +++ b/test/T081-sexpr-search.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +test_description='"notmuch search" in several variations' +. $(dirname "$0")/test-lib.sh || exit 1 + +if [ $NOTMUCH_HAVE_SFSEXP -ne 1 ]; then + printf "Skipping due to missing sfsexp library\n" + test_done +fi + +add_email_corpus + +test_begin_subtest "all messages: ()" +notmuch search '*' > EXPECTED +notmuch search --query=sexp "()" > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "single term in body" +notmuch search --query=sexp 'wizard' | notmuch_search_sanitize>OUTPUT +cat < EXPECTED +thread:XXX 2009-11-18 [1/3] Carl Worth| Jan Janak; [notmuch] What a great idea! (inbox unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "single term in body (case insensitive)" +notmuch search --query=sexp 'Wizard' | notmuch_search_sanitize>OUTPUT +cat < EXPECTED +thread:XXX 2009-11-18 [1/3] Carl Worth| Jan Janak; [notmuch] What a great idea! (inbox unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "single term in body, stemmed version" +test_subtest_known_broken +notmuch search arriv > EXPECTED +notmuch search --query=sexp arriv > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "Unbalanced parens" +# A code 1 indicates the error was handled (a crash will return e.g. 139). +test_expect_code 1 "notmuch search --query=sexp '('" + +test_begin_subtest "Unbalanced parens, error message" +notmuch search --query=sexp '(' >OUTPUT 2>&1 +cat < EXPECTED +notmuch search: Syntax error in query +invalid s-expression: '(' +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "unknown prefix" +notmuch search --query=sexp '(foo)' >OUTPUT 2>&1 +cat < EXPECTED +notmuch search: Syntax error in query +unknown prefix 'foo' +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "list as prefix" +notmuch search --query=sexp '((foo))' >OUTPUT 2>&1 +cat < EXPECTED +notmuch search: Syntax error in query +unexpected list in field/operation position +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_done