From 81bd72cebbffcc11be4198a099974a0e0722c86e Mon Sep 17 00:00:00 2001 From: Olly Betts Date: Tue, 7 Mar 2017 08:52:39 -0400 Subject: [PATCH] lib: Fix RegexpPostingSource Remove incorrect skipping to first match from init(), and add explicit skip_to() and check() methods to work around xapian-core bug (the check() method will also improve speed when filtering by one of these). --- lib/regexp-fields.cc | 26 +++++++++++++++++++++----- lib/regexp-fields.h | 2 ++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc index b4174750..8e740a81 100644 --- a/lib/regexp-fields.cc +++ b/lib/regexp-fields.cc @@ -62,11 +62,6 @@ RegexpPostingSource::init (const Xapian::Database &db) it_ = db_.valuestream_begin (slot_); end_ = db.valuestream_end (slot_); started_ = false; - - /* make sure we start on a matching value */ - while (!at_end() && regexec (®exp_, (*it_).c_str (), 0, NULL, 0) != 0) { - ++it_; - } } Xapian::doccount @@ -113,6 +108,27 @@ RegexpPostingSource::next (unused (double min_wt)) } } +void +RegexpPostingSource::skip_to (Xapian::docid did, unused (double min_wt)) +{ + started_ = true; + it_.skip_to (did); + for (; ! at_end (); ++it_) { + std::string value = *it_; + if (regexec (®exp_, value.c_str (), 0, NULL, 0) == 0) + break; + } +} + +bool +RegexpPostingSource::check (Xapian::docid did, unused (double min_wt)) +{ + started_ = true; + if (!it_.check (did) || at_end ()) + return false; + return (regexec (®exp_, (*it_).c_str (), 0, NULL, 0) == 0); +} + static inline Xapian::valueno _find_slot (std::string prefix) { if (prefix == "from") diff --git a/lib/regexp-fields.h b/lib/regexp-fields.h index bac11999..a4ba7ad8 100644 --- a/lib/regexp-fields.h +++ b/lib/regexp-fields.h @@ -56,6 +56,8 @@ class RegexpPostingSource : public Xapian::PostingSource Xapian::docid get_docid () const; bool at_end () const; void next (unused (double min_wt)); + void skip_to (Xapian::docid did, unused (double min_wt)); + bool check (Xapian::docid did, unused (double min_wt)); };