lib/thread-fp: factor out query expansion, rewrite in Xapian

It will be convenient not to have to construct a notmuch query object
when parsing subqueries, so the commit rewrites the query
expansion (currently only used for thread:{} queries) using only
Xapian. As a bonus it seems about 15% faster in initial experiments.
This commit is contained in:
David Bremner 2021-08-24 08:17:32 -07:00
parent b3bbaf1bc2
commit 4083fd8bec
4 changed files with 73 additions and 21 deletions

View file

@ -40,6 +40,10 @@
#include <xapian.h>
#if HAVE_SFSEXP
#include <sexp.h>
#endif
/* Bit masks for _notmuch_database::features. Features are named,
* independent aspects of the database schema.
*
@ -313,11 +317,21 @@ notmuch_status_t
_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
Xapian::Query &output);
notmuch_status_t
_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery,
Xapian::Query &output, std::string &msg);
/* regexp-fields.cc */
notmuch_status_t
_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
std::string regexp_str,
Xapian::Query &output, std::string &msg);
#endif
#if HAVE_SFSEXP
/* parse-sexp.cc */
notmuch_status_t
_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
Xapian::Query &output);
#endif
#endif
#endif

View file

@ -219,8 +219,6 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t *parent
Xapian::Query &output)
{
if (sx->ty == SEXP_VALUE) {
std::string term = Xapian::Unicode::tolower (sx->val);
Xapian::Stem stem = *(notmuch->stemmer);
std::string term_prefix = parent ? _find_prefix (parent->name) : "";
if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) {

View file

@ -821,3 +821,51 @@ notmuch_query_get_database (const notmuch_query_t *query)
{
return query->notmuch;
}
notmuch_status_t
_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery,
Xapian::Query &output, std::string &msg)
{
std::set<std::string> terms;
const std::string term_prefix = _find_prefix (field);
if (_debug_query ()) {
fprintf (stderr, "Expanding subquery:\n%s\n",
subquery.get_description ().c_str ());
}
try {
Xapian::Enquire enquire (*notmuch->xapian_db);
Xapian::MSet mset;
enquire.set_weighting_scheme (Xapian::BoolWeight ());
enquire.set_query (subquery);
mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
for (Xapian::MSetIterator iterator = mset.begin (); iterator != mset.end (); iterator++) {
Xapian::docid doc_id = *iterator;
Xapian::Document doc = notmuch->xapian_db->get_document (doc_id);
Xapian::TermIterator i = doc.termlist_begin ();
for (i.skip_to (term_prefix);
i != doc.termlist_end () && ((*i).rfind (term_prefix, 0) == 0); i++) {
terms.insert (*i);
}
}
output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
if (_debug_query ()) {
fprintf (stderr, "Expanded query:\n%s\n",
subquery.get_description ().c_str ());
}
} catch (const Xapian::Error &error) {
_notmuch_database_log (notmuch,
"A Xapian exception occurred expanding query: %s\n",
error.get_msg ().c_str ());
msg = error.get_msg ();
return NOTMUCH_STATUS_XAPIAN_EXCEPTION;
}
return NOTMUCH_STATUS_SUCCESS;
}

View file

@ -34,28 +34,20 @@ ThreadFieldProcessor::operator() (const std::string & str)
if (str.size () <= 1 || str.at (str.size () - 1) != '}') {
throw Xapian::QueryParserError ("missing } in '" + str + "'");
} else {
Xapian::Query subquery;
Xapian::Query query;
std::string msg;
std::string subquery_str = str.substr (1, str.size () - 2);
notmuch_query_t *subquery = notmuch_query_create (notmuch, subquery_str.c_str ());
notmuch_messages_t *messages;
std::set<std::string> terms;
if (! subquery)
throw Xapian::QueryParserError ("failed to create subquery for '" + subquery_str +
"'");
status = notmuch_query_search_messages (subquery, &messages);
status = _notmuch_query_string_to_xapian_query (notmuch, subquery_str, subquery, msg);
if (status)
throw Xapian::QueryParserError ("failed to search messages for '" + subquery_str +
"'");
throw Xapian::QueryParserError (msg);
for (; notmuch_messages_valid (messages); notmuch_messages_move_to_next (messages)) {
std::string term = thread_prefix;
notmuch_message_t *message;
message = notmuch_messages_get (messages);
term += _notmuch_message_get_thread_id_only (message);
terms.insert (term);
}
return Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
status = _notmuch_query_expand (notmuch, "thread", subquery, query, msg);
if (status)
throw Xapian::QueryParserError (msg);
return query;
}
} else {
/* literal thread id */