mirror of
https://git.notmuchmail.org/git/notmuch
synced 2025-01-13 12:03:17 +01:00
c62c22c9fb
This resolves an old bug reported by David Edmondson in 2014. The fix is only needed for the "boolean" case, as probabilistic / phrase searching already ignores punctuation. This fix is only for the infix (xapian provided) query parser. [1]: id:cunoasuolcv.fsf@gargravarr.hh.sledj.net
389 lines
12 KiB
C++
389 lines
12 KiB
C++
/* database-private.h - For peeking into the internals of notmuch_database_t
|
|
*
|
|
* Copyright © 2009 Carl Worth
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see https://www.gnu.org/licenses/ .
|
|
*
|
|
* Author: Carl Worth <cworth@cworth.org>
|
|
*/
|
|
|
|
#ifndef NOTMUCH_DATABASE_PRIVATE_H
|
|
#define NOTMUCH_DATABASE_PRIVATE_H
|
|
|
|
/* According to WG14/N1124, a C++ implementation won't provide us a
|
|
* macro like PRIx64 (which gives a printf format string for
|
|
* formatting a uint64_t as hexadecimal) unless we define
|
|
* __STDC_FORMAT_MACROS before including inttypes.h. That's annoying,
|
|
* but there it is.
|
|
*/
|
|
#define __STDC_FORMAT_MACROS
|
|
#include <inttypes.h>
|
|
|
|
#include "notmuch-private.h"
|
|
|
|
#define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0]))
|
|
|
|
#ifdef SILENCE_XAPIAN_DEPRECATION_WARNINGS
|
|
#define XAPIAN_DEPRECATED(D) D
|
|
#endif
|
|
|
|
#include <xapian.h>
|
|
|
|
#if HAVE_SFSEXP
|
|
#include <sexp.h>
|
|
#endif
|
|
|
|
/* Bit masks for _notmuch_database::features. Features are named,
|
|
* independent aspects of the database schema.
|
|
*
|
|
* A database stores the set of features that it "uses" (implicitly
|
|
* before database version 3 and explicitly as of version 3).
|
|
*
|
|
* A given library version will "recognize" a particular set of
|
|
* features; if a database uses a feature that the library does not
|
|
* recognize, the library will refuse to open it. It is assumed the
|
|
* set of recognized features grows monotonically over time. A
|
|
* library version will "implement" some subset of the recognized
|
|
* features: some operations may require that the database use (or not
|
|
* use) some feature, while other operations may support both
|
|
* databases that use and that don't use some feature.
|
|
*
|
|
* On disk, the database stores string names for these features (see
|
|
* the feature_names array). These enum bit values are never
|
|
* persisted to disk and may change freely.
|
|
*/
|
|
enum _notmuch_features {
|
|
/* If set, file names are stored in "file-direntry" terms. If
|
|
* unset, file names are stored in document data.
|
|
*
|
|
* Introduced: version 1. */
|
|
NOTMUCH_FEATURE_FILE_TERMS = 1 << 0,
|
|
|
|
/* If set, directory timestamps are stored in documents with
|
|
* XDIRECTORY terms and relative paths. If unset, directory
|
|
* timestamps are stored in documents with XTIMESTAMP terms and
|
|
* absolute paths.
|
|
*
|
|
* Introduced: version 1. */
|
|
NOTMUCH_FEATURE_DIRECTORY_DOCS = 1 << 1,
|
|
|
|
/* If set, the from, subject, and message-id headers are stored in
|
|
* message document values. If unset, message documents *may*
|
|
* have these values, but if the value is empty, it must be
|
|
* retrieved from the message file.
|
|
*
|
|
* Introduced: optional in version 1, required as of version 3.
|
|
*/
|
|
NOTMUCH_FEATURE_FROM_SUBJECT_ID_VALUES = 1 << 2,
|
|
|
|
/* If set, folder terms are boolean and path terms exist. If
|
|
* unset, folder terms are probabilistic and stemmed and path
|
|
* terms do not exist.
|
|
*
|
|
* Introduced: version 2. */
|
|
NOTMUCH_FEATURE_BOOL_FOLDER = 1 << 3,
|
|
|
|
/* If set, missing messages are stored in ghost mail documents.
|
|
* If unset, thread IDs of ghost messages are stored as database
|
|
* metadata instead of in ghost documents.
|
|
*
|
|
* Introduced: version 3. */
|
|
NOTMUCH_FEATURE_GHOSTS = 1 << 4,
|
|
|
|
|
|
/* If set, then the database was created after the introduction of
|
|
* indexed mime types. If unset, then the database may contain a
|
|
* mixture of messages with indexed and non-indexed mime types.
|
|
*
|
|
* Introduced: version 3. */
|
|
NOTMUCH_FEATURE_INDEXED_MIMETYPES = 1 << 5,
|
|
|
|
/* If set, messages store the revision number of the last
|
|
* modification in NOTMUCH_VALUE_LAST_MOD.
|
|
*
|
|
* Introduced: version 3. */
|
|
NOTMUCH_FEATURE_LAST_MOD = 1 << 6,
|
|
|
|
/* If set, unprefixed terms are stored only for the message body,
|
|
* not for headers.
|
|
*
|
|
* Introduced: version 3. */
|
|
NOTMUCH_FEATURE_UNPREFIX_BODY_ONLY = 1 << 7,
|
|
};
|
|
|
|
/* In C++, a named enum is its own type, so define bitwise operators
|
|
* on _notmuch_features. */
|
|
inline _notmuch_features
|
|
operator| (_notmuch_features a, _notmuch_features b)
|
|
{
|
|
return static_cast<_notmuch_features>(
|
|
static_cast<unsigned>(a) | static_cast<unsigned>(b));
|
|
}
|
|
|
|
inline _notmuch_features
|
|
operator& (_notmuch_features a, _notmuch_features b)
|
|
{
|
|
return static_cast<_notmuch_features>(
|
|
static_cast<unsigned>(a) & static_cast<unsigned>(b));
|
|
}
|
|
|
|
inline _notmuch_features
|
|
operator~ (_notmuch_features a)
|
|
{
|
|
return static_cast<_notmuch_features>(~static_cast<unsigned>(a));
|
|
}
|
|
|
|
inline _notmuch_features&
|
|
operator|= (_notmuch_features &a, _notmuch_features b)
|
|
{
|
|
a = a | b;
|
|
return a;
|
|
}
|
|
|
|
inline _notmuch_features&
|
|
operator&= (_notmuch_features &a, _notmuch_features b)
|
|
{
|
|
a = a & b;
|
|
return a;
|
|
}
|
|
|
|
/*
|
|
* Configuration options for xapian database fields */
|
|
typedef enum {
|
|
NOTMUCH_FIELD_NO_FLAGS = 0,
|
|
NOTMUCH_FIELD_EXTERNAL = 1 << 0,
|
|
NOTMUCH_FIELD_PROBABILISTIC = 1 << 1,
|
|
NOTMUCH_FIELD_PROCESSOR = 1 << 2,
|
|
NOTMUCH_FIELD_STRIP_TRAILING_SLASH = 1 << 3,
|
|
} notmuch_field_flag_t;
|
|
|
|
/*
|
|
* define bitwise operators to hide casts */
|
|
inline notmuch_field_flag_t
|
|
operator| (notmuch_field_flag_t a, notmuch_field_flag_t b)
|
|
{
|
|
return static_cast<notmuch_field_flag_t>(
|
|
static_cast<unsigned>(a) | static_cast<unsigned>(b));
|
|
}
|
|
|
|
inline notmuch_field_flag_t
|
|
operator& (notmuch_field_flag_t a, notmuch_field_flag_t b)
|
|
{
|
|
return static_cast<notmuch_field_flag_t>(
|
|
static_cast<unsigned>(a) & static_cast<unsigned>(b));
|
|
}
|
|
|
|
#define NOTMUCH_QUERY_PARSER_FLAGS (Xapian::QueryParser::FLAG_BOOLEAN | \
|
|
Xapian::QueryParser::FLAG_PHRASE | \
|
|
Xapian::QueryParser::FLAG_LOVEHATE | \
|
|
Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE | \
|
|
Xapian::QueryParser::FLAG_WILDCARD | \
|
|
Xapian::QueryParser::FLAG_PURE_NOT)
|
|
|
|
/*
|
|
* explicit and implied parameters to open */
|
|
typedef enum {
|
|
NOTMUCH_PARAM_NONE = 0,
|
|
/* database passed explicitely */
|
|
NOTMUCH_PARAM_DATABASE = 1 << 0,
|
|
/* config file passed explicitely */
|
|
NOTMUCH_PARAM_CONFIG = 1 << 1,
|
|
/* profile name passed explicitely */
|
|
NOTMUCH_PARAM_PROFILE = 1 << 2,
|
|
/* split (e.g. XDG) configuration */
|
|
NOTMUCH_PARAM_SPLIT = 1 << 3,
|
|
} notmuch_open_param_t;
|
|
|
|
/*
|
|
* define bitwise operators to hide casts */
|
|
|
|
inline notmuch_open_param_t
|
|
operator| (notmuch_open_param_t a, notmuch_open_param_t b)
|
|
{
|
|
return static_cast<notmuch_open_param_t>(
|
|
static_cast<unsigned>(a) | static_cast<unsigned>(b));
|
|
}
|
|
|
|
inline notmuch_open_param_t&
|
|
operator|= (notmuch_open_param_t &a, notmuch_open_param_t b)
|
|
{
|
|
a = a | b;
|
|
return a;
|
|
}
|
|
|
|
inline notmuch_open_param_t
|
|
operator& (notmuch_open_param_t a, notmuch_open_param_t b)
|
|
{
|
|
return static_cast<notmuch_open_param_t>(
|
|
static_cast<unsigned>(a) & static_cast<unsigned>(b));
|
|
}
|
|
|
|
struct _notmuch_database {
|
|
bool exception_reported;
|
|
|
|
/* Path to actual database */
|
|
const char *xapian_path;
|
|
|
|
/* Path to config loaded, if any */
|
|
const char *config_path;
|
|
|
|
int atomic_nesting;
|
|
/* true if changes have been made in this atomic section */
|
|
bool atomic_dirty;
|
|
Xapian::Database *xapian_db;
|
|
Xapian::WritableDatabase *writable_xapian_db;
|
|
bool open;
|
|
/* Bit mask of features used by this database. This is a
|
|
* bitwise-OR of NOTMUCH_FEATURE_* values (above). */
|
|
enum _notmuch_features features;
|
|
|
|
unsigned int last_doc_id;
|
|
|
|
/* 16 bytes (+ terminator) for hexadecimal representation of
|
|
* a 64-bit integer. */
|
|
char thread_id_str[17];
|
|
uint64_t last_thread_id;
|
|
|
|
/* How many transactions have successfully completed since we last committed */
|
|
int transaction_count;
|
|
/* when to commit and reset the counter */
|
|
int transaction_threshold;
|
|
|
|
/* error reporting; this value persists only until the
|
|
* next library call. May be NULL */
|
|
char *status_string;
|
|
|
|
/* Highest committed revision number. Modifications are recorded
|
|
* under a higher revision number, which can be generated with
|
|
* notmuch_database_new_revision. */
|
|
unsigned long revision;
|
|
const char *uuid;
|
|
|
|
/* Keep track of the number of times the database has been re-opened
|
|
* (or other global invalidations of notmuch's caching)
|
|
*/
|
|
unsigned long view;
|
|
Xapian::QueryParser *query_parser;
|
|
Xapian::Stem *stemmer;
|
|
Xapian::TermGenerator *term_gen;
|
|
Xapian::RangeProcessor *value_range_processor;
|
|
Xapian::RangeProcessor *date_range_processor;
|
|
Xapian::RangeProcessor *last_mod_range_processor;
|
|
|
|
/* XXX it's slightly gross to use two parallel string->string maps
|
|
* here, but at least they are small */
|
|
notmuch_string_map_t *user_prefix;
|
|
notmuch_string_map_t *user_header;
|
|
|
|
/* Cached and possibly overridden configuration */
|
|
notmuch_string_map_t *config;
|
|
|
|
/* Track what parameters were specified when opening */
|
|
notmuch_open_param_t params;
|
|
};
|
|
|
|
/* Prior to database version 3, features were implied by the database
|
|
* version number, so hard-code them for earlier versions. */
|
|
#define NOTMUCH_FEATURES_V0 ((enum _notmuch_features) 0)
|
|
#define NOTMUCH_FEATURES_V1 (NOTMUCH_FEATURES_V0 | NOTMUCH_FEATURE_FILE_TERMS | \
|
|
NOTMUCH_FEATURE_DIRECTORY_DOCS)
|
|
#define NOTMUCH_FEATURES_V2 (NOTMUCH_FEATURES_V1 | NOTMUCH_FEATURE_BOOL_FOLDER)
|
|
|
|
/* Current database features. If any of these are missing from a
|
|
* database, request an upgrade.
|
|
* NOTMUCH_FEATURE_FROM_SUBJECT_ID_VALUES and
|
|
* NOTMUCH_FEATURE_INDEXED_MIMETYPES are not included because upgrade
|
|
* doesn't currently introduce the features (though brand new databases
|
|
* will have it). */
|
|
#define NOTMUCH_FEATURES_CURRENT \
|
|
(NOTMUCH_FEATURE_FILE_TERMS | NOTMUCH_FEATURE_DIRECTORY_DOCS | \
|
|
NOTMUCH_FEATURE_BOOL_FOLDER | NOTMUCH_FEATURE_GHOSTS | \
|
|
NOTMUCH_FEATURE_LAST_MOD)
|
|
|
|
/* Return the list of terms from the given iterator matching a prefix.
|
|
* The prefix will be stripped from the strings in the returned list.
|
|
* The list will be allocated using ctx as the talloc context.
|
|
*
|
|
* The function returns NULL on failure.
|
|
*/
|
|
notmuch_string_list_t *
|
|
_notmuch_database_get_terms_with_prefix (void *ctx, Xapian::TermIterator &i,
|
|
Xapian::TermIterator &end,
|
|
const char *prefix);
|
|
|
|
void
|
|
_notmuch_database_find_doc_ids (notmuch_database_t *notmuch,
|
|
const char *prefix_name,
|
|
const char *value,
|
|
Xapian::PostingIterator *begin,
|
|
Xapian::PostingIterator *end);
|
|
|
|
#define NOTMUCH_DATABASE_VERSION 3
|
|
|
|
/* features.cc */
|
|
|
|
_notmuch_features
|
|
_notmuch_database_parse_features (const void *ctx, const char *features, unsigned int version,
|
|
char mode, char **incompat_out);
|
|
|
|
char *
|
|
_notmuch_database_print_features (const void *ctx, unsigned int features);
|
|
|
|
/* prefix.cc */
|
|
notmuch_status_t
|
|
_notmuch_database_setup_standard_query_fields (notmuch_database_t *notmuch);
|
|
|
|
notmuch_status_t
|
|
_notmuch_database_setup_user_query_fields (notmuch_database_t *notmuch);
|
|
|
|
#if __cplusplus
|
|
/* query.cc */
|
|
notmuch_status_t
|
|
_notmuch_query_string_to_xapian_query (notmuch_database_t *notmuch,
|
|
std::string query_string,
|
|
Xapian::Query &output,
|
|
std::string &msg);
|
|
/* parse-sexp.cc */
|
|
notmuch_status_t
|
|
_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
|
|
Xapian::Query &output);
|
|
|
|
notmuch_status_t
|
|
_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery,
|
|
Xapian::Query &output, std::string &msg);
|
|
|
|
/* regexp-fields.cc */
|
|
notmuch_status_t
|
|
_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
|
|
std::string regexp_str,
|
|
Xapian::Query &output, std::string &msg);
|
|
|
|
/* thread-fp.cc */
|
|
notmuch_status_t
|
|
_notmuch_query_name_to_query (notmuch_database_t *notmuch, const std::string name,
|
|
Xapian::Query &output);
|
|
|
|
#if HAVE_SFSEXP
|
|
/* parse-sexp.cc */
|
|
notmuch_status_t
|
|
_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
|
|
Xapian::Query &output);
|
|
#endif
|
|
|
|
/* parse-time-vrp.h */
|
|
notmuch_status_t
|
|
_notmuch_date_strings_to_query (Xapian::valueno slot, const std::string &from, const std::string &to,
|
|
Xapian::Query &output, std::string &msg);
|
|
#endif
|
|
#endif
|