mirror of
https://git.notmuchmail.org/git/notmuch
synced 2024-12-22 17:34:54 +01:00
6273966d0b
This replaces two instances of Xapian::Query::MatchAll with the
equivalent but thread-safe alternative Xapian::Query(std::string()).
Xapian::Query::MatchAll maintains an internal pointer to a refcounted
Xapian::Internal::QueryTerm.
None of this is thread-safe but that wouldn't be an issue if
Xapian::Query::MatchAll wasn't static. Because it's static, the
refcounting goes awry when Notmuch is called from multiple threads.
This is actually documented by Xapian:
4715de3a9f/xapian-core/include/xapian/query.h (L65)
While static, Xapian::Query::MatchNothing is safe because it doesn't
maintain an internal object and as such, doesn't use references.
Two best-effort tests making use of TSan were added to showcase the
issue (I couldn't figure out a way to deterministically reproduce it
without making an unmaintainable mess).
First, when two databases are created in parallel, a query that uses
Xapian::Query::MatchAll is made (lib/query.cc), resulting in the
following backtrace on a segfault:
#0 0x00007ffff76822af in Xapian::Query::get_terms_begin (this=0x7fffe80137f0) at api/query.cc:141
#1 0x00007ffff7f933f5 in _notmuch_query_cache_terms (query=0x7fffe80137c0) at lib/query.cc:176
#2 0x00007ffff7f93784 in _notmuch_query_ensure_parsed_xapian (query=0x7fffe80137c0) at lib/query.cc:225
#3 0x00007ffff7f9381a in _notmuch_query_ensure_parsed (query=0x7fffe80137c0) at lib/query.cc:260
#4 0x00007ffff7f93bfe in _notmuch_query_search_documents (query=0x7fffe80137c0, type=0x7ffff7fa9b1e "mail", out=0x7ffff666da18) at lib/query.cc:361
#5 0x00007ffff7f93ba4 in notmuch_query_search_messages (query=0x7fffe80137c0, out=0x7ffff666da18) at lib/query.cc:349
#6 0x00007ffff7f83d98 in notmuch_database_upgrade (notmuch=0x7fffe8000bd0, progress_notify=0x0, closure=0x0) at lib/database.cc:934
#7 0x00007ffff7fa110f in notmuch_database_create_with_config (database_path=0x7ffff666dcb0 "/tmp/notmuch.MZ2AGr", config_path=0x7ffff7faab3c "", profile=0x0, database=0x0, status_string=0x7ffff666dc90) at lib/open.cc:754
#8 0x00007ffff7fa0d6f in notmuch_database_create_verbose (path=0x7ffff666dcb0 "/tmp/notmuch.MZ2AGr", database=0x0, status_string=0x7ffff666dc90) at lib/open.cc:653
#9 0x00007ffff7fa0ceb in notmuch_database_create (path=0x7ffff666dcb0 "/tmp/notmuch.MZ2AGr", database=0x0) at lib/open.cc:637
...
Second, some queries would make use of Xapian::Query::MatchAll
(lib/regexp-fields.cc), resulting in the following backtrace on a
segfault:
#0 0x00007f629828b690 in Xapian::Internal::QueryBranch::gather_terms (this=0x7f628800def0, void_terms=0x7f629726d5a0) at api/queryinternal.cc:1245
#1 0x00007f629828c260 in Xapian::Internal::QueryScaleWeight::gather_terms (this=0x7f628800df70, void_terms=0x7f629726d5a0) at api/queryinternal.cc:1434
#2 0x00007f629828b69f in Xapian::Internal::QueryBranch::gather_terms (this=0x7f628800dd90, void_terms=0x7f629726d5a0) at api/queryinternal.cc:1245
#3 0x00007f6298282571 in Xapian::Query::get_unique_terms_begin (this=0x7f628800dcd8) at api/query.cc:166
#4 0x00007f629841a59b in Xapian::Weight::Internal::accumulate_stats (this=0x7f628800dca0, subdb=..., rset=...) at weight/weightinternal.cc:86
#5 0x00007f62983c15ba in LocalSubMatch::prepare_match (this=0x7f628800df20, nowait=true, total_stats=...) at matcher/localsubmatch.cc:172
#6 0x00007f62983c8fcc in prepare_sub_matches (leaves=std::vector of length 1, capacity 1 = {...}, stats=...) at matcher/multimatch.cc:237
#7 0x00007f62983c98a3 in MultiMatch::MultiMatch (this=0x7f629726d9a0, db_=..., query_=..., qlen=3, omrset=0x0, collapse_max_=0, collapse_key_=4294967295, percent_cutoff_=0, weight_cutoff_=0, order_=Xapian::Enquire::ASCENDING, sort_key_=0, sort_by_=Xapian::Enquire::Internal::VAL, sort_value_forward_=true, time_limit_=0, stats=..., weight_=0x7f6288008d50, matchspies_=std::vector of length 0, capacity 0, have_sorter=false, have_mdecider=false) at matcher/multimatch.cc:353
#8 0x00007f629826fcba in Xapian::Enquire::Internal::get_mset (this=0x7f628800e0b0, first=0, maxitems=0, check_at_least=0, rset=0x0, mdecider=0x0) at api/omenquire.cc:569
#9 0x00007f629827181c in Xapian::Enquire::get_mset (this=0x7f629726db80, first=0, maxitems=0, check_at_least=0, rset=0x0, mdecider=0x0) at api/omenquire.cc:937
#10 0x00007f6298be529a in _notmuch_query_search_documents (query=0x7f6288009750, type=0x7f6298bfaafe "mail", out=0x7f629726dcc0) at lib/query.cc:447
#11 0x00007f6298be4ae8 in notmuch_query_search_messages (query=0x7f6288009750, out=0x7f629726dcc0) at lib/query.cc:349
...
Printing Xapian::Query::MatchAll->internal.px->_refs in these
circumstances can help quickly identifying this scenario.
This is motivated by some test frameworks (like Rust's Cargo) that
runs unit tests in parallel and would easily encounter this issue,
unless client code gates every call to Notmuch behind a lock.
This is what can be expected from the tests when they fail:
== stderr ==
+==================
+WARNING: ThreadSanitizer: data race (pid=207931)
+ Read of size 1 at 0x7b10000001a0 by thread T2:
+ #0 memcpy <null> (libtsan.so.2+0x62506)
+ #1 void std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_construct<char*>(char*, char*, std::forward_iterator_tag) [clone .isra.0] <null> (libxapian.so.30+0x872b3)
+
+ Previous write of size 8 at 0x7b10000001a0 by thread T1:
+ #0 operator new(unsigned long) <null> (libtsan.so.2+0x8ba83)
+ #1 Xapian::Query::Query(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, unsigned int, unsigned int) <null> (libxapian.so.30+0x855cd)
...
252 lines
6.5 KiB
C++
252 lines
6.5 KiB
C++
/* regexp-fields.cc - field processor glue for regex supporting fields
|
|
*
|
|
* This file is part of notmuch.
|
|
*
|
|
* Copyright © 2015 Austin Clements
|
|
* Copyright © 2016 David Bremner
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see https://www.gnu.org/licenses/ .
|
|
*
|
|
* Author: Austin Clements <aclements@csail.mit.edu>
|
|
* David Bremner <david@tethera.net>
|
|
*/
|
|
|
|
#include "regexp-fields.h"
|
|
#include "notmuch-private.h"
|
|
#include "database-private.h"
|
|
#include "xapian-extra.h"
|
|
|
|
notmuch_status_t
|
|
compile_regex (regex_t ®exp, const char *str, std::string &msg)
|
|
{
|
|
int err = regcomp (®exp, str, REG_EXTENDED | REG_NOSUB);
|
|
|
|
if (err != 0) {
|
|
size_t len = regerror (err, ®exp, NULL, 0);
|
|
char *buffer = new char[len];
|
|
msg = "Regexp error: ";
|
|
(void) regerror (err, ®exp, buffer, len);
|
|
msg.append (buffer, len);
|
|
delete[] buffer;
|
|
|
|
return NOTMUCH_STATUS_ILLEGAL_ARGUMENT;
|
|
}
|
|
return NOTMUCH_STATUS_SUCCESS;
|
|
}
|
|
|
|
RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const std::string ®exp)
|
|
: slot_ (slot)
|
|
{
|
|
std::string msg;
|
|
notmuch_status_t status = compile_regex (regexp_, regexp.c_str (), msg);
|
|
|
|
if (status)
|
|
throw Xapian::QueryParserError (msg);
|
|
}
|
|
|
|
RegexpPostingSource::~RegexpPostingSource ()
|
|
{
|
|
regfree (®exp_);
|
|
}
|
|
|
|
void
|
|
RegexpPostingSource::init (const Xapian::Database &db)
|
|
{
|
|
db_ = db;
|
|
it_ = db_.valuestream_begin (slot_);
|
|
end_ = db.valuestream_end (slot_);
|
|
started_ = false;
|
|
}
|
|
|
|
Xapian::doccount
|
|
RegexpPostingSource::get_termfreq_min () const
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
Xapian::doccount
|
|
RegexpPostingSource::get_termfreq_est () const
|
|
{
|
|
return get_termfreq_max () / 2;
|
|
}
|
|
|
|
Xapian::doccount
|
|
RegexpPostingSource::get_termfreq_max () const
|
|
{
|
|
return db_.get_value_freq (slot_);
|
|
}
|
|
|
|
Xapian::docid
|
|
RegexpPostingSource::get_docid () const
|
|
{
|
|
return it_.get_docid ();
|
|
}
|
|
|
|
bool
|
|
RegexpPostingSource::at_end () const
|
|
{
|
|
return it_ == end_;
|
|
}
|
|
|
|
void
|
|
RegexpPostingSource::next (unused (double min_wt))
|
|
{
|
|
if (started_ && ! at_end ())
|
|
++it_;
|
|
started_ = true;
|
|
|
|
for (; ! at_end (); ++it_) {
|
|
std::string value = *it_;
|
|
if (regexec (®exp_, value.c_str (), 0, NULL, 0) == 0)
|
|
break;
|
|
}
|
|
}
|
|
|
|
void
|
|
RegexpPostingSource::skip_to (Xapian::docid did, unused (double min_wt))
|
|
{
|
|
started_ = true;
|
|
it_.skip_to (did);
|
|
for (; ! at_end (); ++it_) {
|
|
std::string value = *it_;
|
|
if (regexec (®exp_, value.c_str (), 0, NULL, 0) == 0)
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool
|
|
RegexpPostingSource::check (Xapian::docid did, unused (double min_wt))
|
|
{
|
|
started_ = true;
|
|
if (! it_.check (did) || at_end ())
|
|
return false;
|
|
return (regexec (®exp_, (*it_).c_str (), 0, NULL, 0) == 0);
|
|
}
|
|
|
|
static inline Xapian::valueno
|
|
_find_slot (std::string prefix)
|
|
{
|
|
if (prefix == "from")
|
|
return NOTMUCH_VALUE_FROM;
|
|
else if (prefix == "subject")
|
|
return NOTMUCH_VALUE_SUBJECT;
|
|
else if (prefix == "mid")
|
|
return NOTMUCH_VALUE_MESSAGE_ID;
|
|
else
|
|
return Xapian::BAD_VALUENO;
|
|
}
|
|
|
|
RegexpFieldProcessor::RegexpFieldProcessor (std::string field_,
|
|
notmuch_field_flag_t options_,
|
|
Xapian::QueryParser &parser_,
|
|
notmuch_database_t *notmuch_)
|
|
: slot (_find_slot (field_)),
|
|
field (field_),
|
|
term_prefix (_find_prefix (field_.c_str ())),
|
|
options (options_),
|
|
parser (parser_),
|
|
notmuch (notmuch_)
|
|
{
|
|
};
|
|
|
|
notmuch_status_t
|
|
_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
|
|
std::string regexp_str,
|
|
Xapian::Query &output, std::string &msg)
|
|
{
|
|
regex_t regexp;
|
|
notmuch_status_t status;
|
|
|
|
status = compile_regex (regexp, regexp_str.c_str (), msg);
|
|
if (status) {
|
|
_notmuch_database_log_append (notmuch, "error compiling regex %s", msg.c_str ());
|
|
return status;
|
|
}
|
|
|
|
if (slot == Xapian::BAD_VALUENO)
|
|
slot = _find_slot (field);
|
|
|
|
if (slot == Xapian::BAD_VALUENO) {
|
|
std::string term_prefix = _find_prefix (field.c_str ());
|
|
std::vector<std::string> terms;
|
|
|
|
for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix);
|
|
it != notmuch->xapian_db->allterms_end (); ++it) {
|
|
if (regexec (®exp, (*it).c_str () + term_prefix.size (),
|
|
0, NULL, 0) == 0)
|
|
terms.push_back (*it);
|
|
}
|
|
output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
|
|
} else {
|
|
RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str);
|
|
output = Xapian::Query (postings->release ());
|
|
}
|
|
return NOTMUCH_STATUS_SUCCESS;
|
|
}
|
|
|
|
Xapian::Query
|
|
RegexpFieldProcessor::operator() (const std::string & str)
|
|
{
|
|
if (str.empty ()) {
|
|
if (options & NOTMUCH_FIELD_PROBABILISTIC) {
|
|
return Xapian::Query (Xapian::Query::OP_AND_NOT,
|
|
xapian_query_match_all (),
|
|
Xapian::Query (Xapian::Query::OP_WILDCARD, term_prefix));
|
|
} else {
|
|
return Xapian::Query (term_prefix);
|
|
}
|
|
}
|
|
|
|
if (str.at (0) == '/') {
|
|
if (str.length () > 1 && str.at (str.size () - 1) == '/') {
|
|
Xapian::Query query;
|
|
std::string regexp_str = str.substr (1, str.size () - 2);
|
|
std::string msg;
|
|
notmuch_status_t status;
|
|
|
|
status = _notmuch_regexp_to_query (notmuch, slot, field, regexp_str, query, msg);
|
|
if (status)
|
|
throw Xapian::QueryParserError (msg);
|
|
return query;
|
|
} else {
|
|
throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
|
|
}
|
|
} else {
|
|
if (options & NOTMUCH_FIELD_PROBABILISTIC) {
|
|
/* TODO replace this with a nicer API level triggering of
|
|
* phrase parsing, when possible */
|
|
std::string query_str;
|
|
|
|
if ((str.at (0) != '(' || *str.rbegin () != ')') &&
|
|
(*str.rbegin () != '*' || str.find (' ') != std::string::npos))
|
|
query_str = '"' + str + '"';
|
|
else
|
|
query_str = str;
|
|
|
|
return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix);
|
|
} else {
|
|
/* Boolean prefix */
|
|
std::string query_str;
|
|
std::string term;
|
|
|
|
if (str.length () > 1 && str.at (str.size () - 1) == '/')
|
|
query_str = str.substr (0, str.size () - 1);
|
|
else
|
|
query_str = str;
|
|
|
|
term = term_prefix + query_str;
|
|
return Xapian::Query (term);
|
|
}
|
|
}
|
|
}
|