2017-02-27 03:34:20 +01:00
|
|
|
/* regexp-fields.cc - field processor glue for regex supporting fields
|
|
|
|
*
|
|
|
|
* This file is part of notmuch.
|
|
|
|
*
|
|
|
|
* Copyright © 2015 Austin Clements
|
|
|
|
* Copyright © 2016 David Bremner
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see https://www.gnu.org/licenses/ .
|
|
|
|
*
|
|
|
|
* Author: Austin Clements <aclements@csail.mit.edu>
|
|
|
|
* David Bremner <david@tethera.net>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "regexp-fields.h"
|
|
|
|
#include "notmuch-private.h"
|
|
|
|
#include "database-private.h"
|
|
|
|
|
2021-08-24 17:17:28 +02:00
|
|
|
notmuch_status_t
|
|
|
|
compile_regex (regex_t ®exp, const char *str, std::string &msg)
|
2017-02-27 03:34:20 +01:00
|
|
|
{
|
|
|
|
int err = regcomp (®exp, str, REG_EXTENDED | REG_NOSUB);
|
|
|
|
|
|
|
|
if (err != 0) {
|
|
|
|
size_t len = regerror (err, ®exp, NULL, 0);
|
|
|
|
char *buffer = new char[len];
|
2021-08-24 17:17:28 +02:00
|
|
|
msg = "Regexp error: ";
|
2017-02-27 03:34:20 +01:00
|
|
|
(void) regerror (err, ®exp, buffer, len);
|
2019-03-11 18:14:49 +01:00
|
|
|
msg.append (buffer, len);
|
2017-03-04 18:38:41 +01:00
|
|
|
delete[] buffer;
|
2017-02-27 03:34:20 +01:00
|
|
|
|
2021-08-24 17:17:28 +02:00
|
|
|
return NOTMUCH_STATUS_ILLEGAL_ARGUMENT;
|
2017-02-27 03:34:20 +01:00
|
|
|
}
|
2021-08-24 17:17:28 +02:00
|
|
|
return NOTMUCH_STATUS_SUCCESS;
|
2017-02-27 03:34:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const std::string ®exp)
|
|
|
|
: slot_ (slot)
|
|
|
|
{
|
2021-08-24 17:17:28 +02:00
|
|
|
std::string msg;
|
|
|
|
notmuch_status_t status = compile_regex (regexp_, regexp.c_str (), msg);
|
|
|
|
|
|
|
|
if (status)
|
|
|
|
throw Xapian::QueryParserError (msg);
|
2017-02-27 03:34:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
RegexpPostingSource::~RegexpPostingSource ()
|
|
|
|
{
|
|
|
|
regfree (®exp_);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
RegexpPostingSource::init (const Xapian::Database &db)
|
|
|
|
{
|
|
|
|
db_ = db;
|
|
|
|
it_ = db_.valuestream_begin (slot_);
|
|
|
|
end_ = db.valuestream_end (slot_);
|
|
|
|
started_ = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
Xapian::doccount
|
|
|
|
RegexpPostingSource::get_termfreq_min () const
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
Xapian::doccount
|
|
|
|
RegexpPostingSource::get_termfreq_est () const
|
|
|
|
{
|
|
|
|
return get_termfreq_max () / 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
Xapian::doccount
|
|
|
|
RegexpPostingSource::get_termfreq_max () const
|
|
|
|
{
|
|
|
|
return db_.get_value_freq (slot_);
|
|
|
|
}
|
|
|
|
|
|
|
|
Xapian::docid
|
|
|
|
RegexpPostingSource::get_docid () const
|
|
|
|
{
|
|
|
|
return it_.get_docid ();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
RegexpPostingSource::at_end () const
|
|
|
|
{
|
|
|
|
return it_ == end_;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
RegexpPostingSource::next (unused (double min_wt))
|
|
|
|
{
|
|
|
|
if (started_ && ! at_end ())
|
|
|
|
++it_;
|
|
|
|
started_ = true;
|
|
|
|
|
|
|
|
for (; ! at_end (); ++it_) {
|
|
|
|
std::string value = *it_;
|
|
|
|
if (regexec (®exp_, value.c_str (), 0, NULL, 0) == 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-07 13:52:39 +01:00
|
|
|
void
|
|
|
|
RegexpPostingSource::skip_to (Xapian::docid did, unused (double min_wt))
|
|
|
|
{
|
|
|
|
started_ = true;
|
|
|
|
it_.skip_to (did);
|
|
|
|
for (; ! at_end (); ++it_) {
|
|
|
|
std::string value = *it_;
|
|
|
|
if (regexec (®exp_, value.c_str (), 0, NULL, 0) == 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
RegexpPostingSource::check (Xapian::docid did, unused (double min_wt))
|
|
|
|
{
|
|
|
|
started_ = true;
|
2019-06-13 12:55:35 +02:00
|
|
|
if (! it_.check (did) || at_end ())
|
2017-03-07 13:52:39 +01:00
|
|
|
return false;
|
|
|
|
return (regexec (®exp_, (*it_).c_str (), 0, NULL, 0) == 0);
|
|
|
|
}
|
|
|
|
|
2019-06-13 12:55:35 +02:00
|
|
|
static inline Xapian::valueno
|
|
|
|
_find_slot (std::string prefix)
|
2017-02-27 03:34:20 +01:00
|
|
|
{
|
|
|
|
if (prefix == "from")
|
|
|
|
return NOTMUCH_VALUE_FROM;
|
|
|
|
else if (prefix == "subject")
|
|
|
|
return NOTMUCH_VALUE_SUBJECT;
|
2017-02-15 13:58:28 +01:00
|
|
|
else if (prefix == "mid")
|
|
|
|
return NOTMUCH_VALUE_MESSAGE_ID;
|
2017-02-27 03:34:20 +01:00
|
|
|
else
|
2017-03-08 03:32:26 +01:00
|
|
|
return Xapian::BAD_VALUENO;
|
2017-02-27 03:34:20 +01:00
|
|
|
}
|
|
|
|
|
2021-08-24 17:17:28 +02:00
|
|
|
RegexpFieldProcessor::RegexpFieldProcessor (std::string field_,
|
2017-02-15 13:58:28 +01:00
|
|
|
notmuch_field_flag_t options_,
|
|
|
|
Xapian::QueryParser &parser_,
|
|
|
|
notmuch_database_t *notmuch_)
|
2021-08-24 17:17:28 +02:00
|
|
|
: slot (_find_slot (field_)),
|
|
|
|
field (field_),
|
|
|
|
term_prefix (_find_prefix (field_.c_str ())),
|
2019-06-13 12:55:35 +02:00
|
|
|
options (options_),
|
|
|
|
parser (parser_),
|
|
|
|
notmuch (notmuch_)
|
2017-02-27 03:34:20 +01:00
|
|
|
{
|
|
|
|
};
|
|
|
|
|
2021-08-24 17:17:28 +02:00
|
|
|
notmuch_status_t
|
|
|
|
_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
|
|
|
|
std::string regexp_str,
|
|
|
|
Xapian::Query &output, std::string &msg)
|
|
|
|
{
|
|
|
|
regex_t regexp;
|
|
|
|
notmuch_status_t status;
|
|
|
|
|
|
|
|
status = compile_regex (regexp, regexp_str.c_str (), msg);
|
|
|
|
if (status) {
|
|
|
|
_notmuch_database_log_append (notmuch, "error compiling regex %s", msg.c_str ());
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (slot == Xapian::BAD_VALUENO)
|
|
|
|
slot = _find_slot (field);
|
|
|
|
|
|
|
|
if (slot == Xapian::BAD_VALUENO) {
|
|
|
|
std::string term_prefix = _find_prefix (field.c_str ());
|
|
|
|
std::vector<std::string> terms;
|
|
|
|
|
|
|
|
for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix);
|
|
|
|
it != notmuch->xapian_db->allterms_end (); ++it) {
|
|
|
|
if (regexec (®exp, (*it).c_str () + term_prefix.size (),
|
|
|
|
0, NULL, 0) == 0)
|
|
|
|
terms.push_back (*it);
|
|
|
|
}
|
|
|
|
output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end ());
|
|
|
|
} else {
|
|
|
|
RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str);
|
|
|
|
output = Xapian::Query (postings->release ());
|
|
|
|
}
|
|
|
|
return NOTMUCH_STATUS_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2017-02-27 03:34:20 +01:00
|
|
|
Xapian::Query
|
|
|
|
RegexpFieldProcessor::operator() (const std::string & str)
|
|
|
|
{
|
2017-03-08 03:32:26 +01:00
|
|
|
if (str.empty ()) {
|
|
|
|
if (options & NOTMUCH_FIELD_PROBABILISTIC) {
|
2019-06-13 12:55:35 +02:00
|
|
|
return Xapian::Query (Xapian::Query::OP_AND_NOT,
|
|
|
|
Xapian::Query::MatchAll,
|
|
|
|
Xapian::Query (Xapian::Query::OP_WILDCARD, term_prefix));
|
2017-03-08 03:32:26 +01:00
|
|
|
} else {
|
|
|
|
return Xapian::Query (term_prefix);
|
|
|
|
}
|
|
|
|
}
|
2017-03-24 23:52:51 +01:00
|
|
|
|
2017-02-27 03:34:20 +01:00
|
|
|
if (str.at (0) == '/') {
|
2019-06-13 12:55:35 +02:00
|
|
|
if (str.length () > 1 && str.at (str.size () - 1) == '/') {
|
2021-08-24 17:17:28 +02:00
|
|
|
Xapian::Query query;
|
2019-06-13 12:55:35 +02:00
|
|
|
std::string regexp_str = str.substr (1, str.size () - 2);
|
2021-08-24 17:17:28 +02:00
|
|
|
std::string msg;
|
|
|
|
notmuch_status_t status;
|
|
|
|
|
|
|
|
status = _notmuch_regexp_to_query (notmuch, slot, field, regexp_str, query, msg);
|
|
|
|
if (status)
|
|
|
|
throw Xapian::QueryParserError (msg);
|
|
|
|
return query;
|
2017-02-27 03:34:20 +01:00
|
|
|
} else {
|
|
|
|
throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
|
|
|
|
}
|
|
|
|
} else {
|
2017-02-15 13:58:28 +01:00
|
|
|
if (options & NOTMUCH_FIELD_PROBABILISTIC) {
|
|
|
|
/* TODO replace this with a nicer API level triggering of
|
|
|
|
* phrase parsing, when possible */
|
|
|
|
std::string query_str;
|
2017-03-18 03:23:51 +01:00
|
|
|
|
2022-02-25 03:41:03 +01:00
|
|
|
if ((str.at (0) != '(' || *str.rbegin () != ')') &&
|
|
|
|
(*str.rbegin () != '*' || str.find (' ') != std::string::npos))
|
2017-02-15 13:58:28 +01:00
|
|
|
query_str = '"' + str + '"';
|
|
|
|
else
|
|
|
|
query_str = str;
|
2017-03-18 03:23:51 +01:00
|
|
|
|
2017-02-15 13:58:28 +01:00
|
|
|
return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix);
|
|
|
|
} else {
|
|
|
|
/* Boolean prefix */
|
2022-01-22 00:38:51 +01:00
|
|
|
std::string query_str;
|
|
|
|
std::string term;
|
|
|
|
|
|
|
|
if (str.length () > 1 && str.at (str.size () - 1) == '/')
|
|
|
|
query_str = str.substr (0, str.size () - 1);
|
|
|
|
else
|
|
|
|
query_str = str;
|
|
|
|
|
|
|
|
term = term_prefix + query_str;
|
2017-02-15 13:58:28 +01:00
|
|
|
return Xapian::Query (term);
|
|
|
|
}
|
2017-02-27 03:34:20 +01:00
|
|
|
}
|
|
|
|
}
|