notmuch/lib/database-private.h

396 lines
13 KiB
C
Raw Normal View History

/* database-private.h - For peeking into the internals of notmuch_database_t
*
* Copyright © 2009 Carl Worth
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see https://www.gnu.org/licenses/ .
*
* Author: Carl Worth <cworth@cworth.org>
*/
#ifndef NOTMUCH_DATABASE_PRIVATE_H
#define NOTMUCH_DATABASE_PRIVATE_H
/* According to WG14/N1124, a C++ implementation won't provide us a
* macro like PRIx64 (which gives a printf format string for
* formatting a uint64_t as hexadecimal) unless we define
* __STDC_FORMAT_MACROS before including inttypes.h. That's annoying,
* but there it is.
*/
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
#include "notmuch-private.h"
#define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0]))
#ifdef SILENCE_XAPIAN_DEPRECATION_WARNINGS
#define XAPIAN_DEPRECATED(D) D
#endif
#include <xapian.h>
#if HAVE_SFSEXP
#include <sexp.h>
#endif
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
/* Bit masks for _notmuch_database::features. Features are named,
* independent aspects of the database schema.
*
* A database stores the set of features that it "uses" (implicitly
* before database version 3 and explicitly as of version 3).
*
* A given library version will "recognize" a particular set of
* features; if a database uses a feature that the library does not
* recognize, the library will refuse to open it. It is assumed the
* set of recognized features grows monotonically over time. A
* library version will "implement" some subset of the recognized
* features: some operations may require that the database use (or not
* use) some feature, while other operations may support both
* databases that use and that don't use some feature.
*
* On disk, the database stores string names for these features (see
* the feature_names array). These enum bit values are never
* persisted to disk and may change freely.
*/
enum _notmuch_features {
/* If set, file names are stored in "file-direntry" terms. If
* unset, file names are stored in document data.
*
* Introduced: version 1. */
NOTMUCH_FEATURE_FILE_TERMS = 1 << 0,
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
/* If set, directory timestamps are stored in documents with
* XDIRECTORY terms and relative paths. If unset, directory
* timestamps are stored in documents with XTIMESTAMP terms and
* absolute paths.
*
* Introduced: version 1. */
NOTMUCH_FEATURE_DIRECTORY_DOCS = 1 << 1,
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
/* If set, the from, subject, and message-id headers are stored in
* message document values. If unset, message documents *may*
* have these values, but if the value is empty, it must be
* retrieved from the message file.
*
* Introduced: optional in version 1, required as of version 3.
*/
NOTMUCH_FEATURE_FROM_SUBJECT_ID_VALUES = 1 << 2,
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
/* If set, folder terms are boolean and path terms exist. If
* unset, folder terms are probabilistic and stemmed and path
* terms do not exist.
*
* Introduced: version 2. */
NOTMUCH_FEATURE_BOOL_FOLDER = 1 << 3,
/* If set, missing messages are stored in ghost mail documents.
* If unset, thread IDs of ghost messages are stored as database
* metadata instead of in ghost documents.
*
* Introduced: version 3. */
NOTMUCH_FEATURE_GHOSTS = 1 << 4,
/* If set, then the database was created after the introduction of
* indexed mime types. If unset, then the database may contain a
* mixture of messages with indexed and non-indexed mime types.
*
* Introduced: version 3. */
NOTMUCH_FEATURE_INDEXED_MIMETYPES = 1 << 5,
lib: Add per-message last modification tracking This adds a new document value that stores the revision of the last modification to message metadata, where the revision number increases monotonically with each database commit. An alternative would be to store the wall-clock time of the last modification of each message. In principle this is simpler and has the advantage that any process can determine the current timestamp without support from libnotmuch. However, even assuming a computer's clock never goes backward and ignoring clock skew in networked environments, this has a fatal flaw. Xapian uses (optimistic) snapshot isolation, which means reads can be concurrent with writes. Given this, consider the following time line with a write and two read transactions: write |-X-A--------------| read 1 |---B---| read 2 |---| The write transaction modifies message X and records the wall-clock time of the modification at A. The writer hangs around for a while and later commits its change. Read 1 is concurrent with the write, so it doesn't see the change to X. It does some query and records the wall-clock time of its results at B. Transaction read 2 later starts after the write commits and queries for changes since wall-clock time B (say the reads are performing an incremental backup). Even though read 1 could not see the change to X, read 2 is told (correctly) that X has not changed since B, the time of the last read. In fact, X changed before wall-clock time A, but the change was not visible until *after* wall-clock time B, so read 2 misses the change to X. This is tricky to solve in full-blown snapshot isolation, but because Xapian serializes writes, we can use a simple, monotonically increasing database revision number. Furthermore, maintaining this revision number requires no more IO than a wall-clock time solution because Xapian already maintains statistics on the upper (and lower) bound of each value stream.
2014-10-13 08:20:01 +02:00
/* If set, messages store the revision number of the last
* modification in NOTMUCH_VALUE_LAST_MOD.
*
* Introduced: version 3. */
NOTMUCH_FEATURE_LAST_MOD = 1 << 6,
/* If set, unprefixed terms are stored only for the message body,
* not for headers.
*
* Introduced: version 3. */
NOTMUCH_FEATURE_UNPREFIX_BODY_ONLY = 1 << 7,
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
};
/* In C++, a named enum is its own type, so define bitwise operators
* on _notmuch_features. */
inline _notmuch_features
operator| (_notmuch_features a, _notmuch_features b)
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
{
return static_cast<_notmuch_features>(
static_cast<unsigned>(a) | static_cast<unsigned>(b));
}
inline _notmuch_features
operator& (_notmuch_features a, _notmuch_features b)
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
{
return static_cast<_notmuch_features>(
static_cast<unsigned>(a) & static_cast<unsigned>(b));
}
inline _notmuch_features
operator~ (_notmuch_features a)
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
{
return static_cast<_notmuch_features>(~static_cast<unsigned>(a));
}
inline _notmuch_features&
operator|= (_notmuch_features &a, _notmuch_features b)
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
{
a = a | b;
return a;
}
inline _notmuch_features&
operator&= (_notmuch_features &a, _notmuch_features b)
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
{
a = a & b;
return a;
}
/*
* Configuration options for xapian database fields */
typedef enum {
NOTMUCH_FIELD_NO_FLAGS = 0,
NOTMUCH_FIELD_EXTERNAL = 1 << 0,
NOTMUCH_FIELD_PROBABILISTIC = 1 << 1,
NOTMUCH_FIELD_PROCESSOR = 1 << 2,
NOTMUCH_FIELD_STRIP_TRAILING_SLASH = 1 << 3,
} notmuch_field_flag_t;
/*
* define bitwise operators to hide casts */
inline notmuch_field_flag_t
operator| (notmuch_field_flag_t a, notmuch_field_flag_t b)
{
return static_cast<notmuch_field_flag_t>(
static_cast<unsigned>(a) | static_cast<unsigned>(b));
}
inline notmuch_field_flag_t
operator& (notmuch_field_flag_t a, notmuch_field_flag_t b)
{
return static_cast<notmuch_field_flag_t>(
static_cast<unsigned>(a) & static_cast<unsigned>(b));
}
#define NOTMUCH_QUERY_PARSER_FLAGS (Xapian::QueryParser::FLAG_BOOLEAN | \
Xapian::QueryParser::FLAG_PHRASE | \
Xapian::QueryParser::FLAG_LOVEHATE | \
Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE | \
Xapian::QueryParser::FLAG_WILDCARD | \
Xapian::QueryParser::FLAG_PURE_NOT)
/*
* explicit and implied parameters to open */
typedef enum {
NOTMUCH_PARAM_NONE = 0,
/* database passed explicitely */
NOTMUCH_PARAM_DATABASE = 1 << 0,
/* config file passed explicitely */
NOTMUCH_PARAM_CONFIG = 1 << 1,
/* profile name passed explicitely */
NOTMUCH_PARAM_PROFILE = 1 << 2,
/* split (e.g. XDG) configuration */
NOTMUCH_PARAM_SPLIT = 1 << 3,
} notmuch_open_param_t;
/*
* define bitwise operators to hide casts */
inline notmuch_open_param_t
operator| (notmuch_open_param_t a, notmuch_open_param_t b)
{
return static_cast<notmuch_open_param_t>(
static_cast<unsigned>(a) | static_cast<unsigned>(b));
}
inline notmuch_open_param_t&
operator|= (notmuch_open_param_t &a, notmuch_open_param_t b)
{
a = a | b;
return a;
}
inline notmuch_open_param_t
operator& (notmuch_open_param_t a, notmuch_open_param_t b)
{
return static_cast<notmuch_open_param_t>(
static_cast<unsigned>(a) & static_cast<unsigned>(b));
}
struct _notmuch_database {
bool exception_reported;
/* Path to actual database */
const char *xapian_path;
/* Path to config loaded, if any */
const char *config_path;
int atomic_nesting;
/* true if changes have been made in this atomic section */
bool atomic_dirty;
Xapian::Database *xapian_db;
Xapian::WritableDatabase *writable_xapian_db;
bool open;
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
/* Bit mask of features used by this database. This is a
* bitwise-OR of NOTMUCH_FEATURE_* values (above). */
enum _notmuch_features features;
unsigned int last_doc_id;
/* 16 bytes (+ terminator) for hexadecimal representation of
* a 64-bit integer. */
char thread_id_str[17];
uint64_t last_thread_id;
/* How many transactions have successfully completed since we last committed */
int transaction_count;
/* when to commit and reset the counter */
int transaction_threshold;
/* error reporting; this value persists only until the
* next library call. May be NULL */
char *status_string;
lib: Add per-message last modification tracking This adds a new document value that stores the revision of the last modification to message metadata, where the revision number increases monotonically with each database commit. An alternative would be to store the wall-clock time of the last modification of each message. In principle this is simpler and has the advantage that any process can determine the current timestamp without support from libnotmuch. However, even assuming a computer's clock never goes backward and ignoring clock skew in networked environments, this has a fatal flaw. Xapian uses (optimistic) snapshot isolation, which means reads can be concurrent with writes. Given this, consider the following time line with a write and two read transactions: write |-X-A--------------| read 1 |---B---| read 2 |---| The write transaction modifies message X and records the wall-clock time of the modification at A. The writer hangs around for a while and later commits its change. Read 1 is concurrent with the write, so it doesn't see the change to X. It does some query and records the wall-clock time of its results at B. Transaction read 2 later starts after the write commits and queries for changes since wall-clock time B (say the reads are performing an incremental backup). Even though read 1 could not see the change to X, read 2 is told (correctly) that X has not changed since B, the time of the last read. In fact, X changed before wall-clock time A, but the change was not visible until *after* wall-clock time B, so read 2 misses the change to X. This is tricky to solve in full-blown snapshot isolation, but because Xapian serializes writes, we can use a simple, monotonically increasing database revision number. Furthermore, maintaining this revision number requires no more IO than a wall-clock time solution because Xapian already maintains statistics on the upper (and lower) bound of each value stream.
2014-10-13 08:20:01 +02:00
/* Highest committed revision number. Modifications are recorded
* under a higher revision number, which can be generated with
* notmuch_database_new_revision. */
unsigned long revision;
const char *uuid;
lib: Add per-message last modification tracking This adds a new document value that stores the revision of the last modification to message metadata, where the revision number increases monotonically with each database commit. An alternative would be to store the wall-clock time of the last modification of each message. In principle this is simpler and has the advantage that any process can determine the current timestamp without support from libnotmuch. However, even assuming a computer's clock never goes backward and ignoring clock skew in networked environments, this has a fatal flaw. Xapian uses (optimistic) snapshot isolation, which means reads can be concurrent with writes. Given this, consider the following time line with a write and two read transactions: write |-X-A--------------| read 1 |---B---| read 2 |---| The write transaction modifies message X and records the wall-clock time of the modification at A. The writer hangs around for a while and later commits its change. Read 1 is concurrent with the write, so it doesn't see the change to X. It does some query and records the wall-clock time of its results at B. Transaction read 2 later starts after the write commits and queries for changes since wall-clock time B (say the reads are performing an incremental backup). Even though read 1 could not see the change to X, read 2 is told (correctly) that X has not changed since B, the time of the last read. In fact, X changed before wall-clock time A, but the change was not visible until *after* wall-clock time B, so read 2 misses the change to X. This is tricky to solve in full-blown snapshot isolation, but because Xapian serializes writes, we can use a simple, monotonically increasing database revision number. Furthermore, maintaining this revision number requires no more IO than a wall-clock time solution because Xapian already maintains statistics on the upper (and lower) bound of each value stream.
2014-10-13 08:20:01 +02:00
/* Keep track of the number of times the database has been re-opened
* (or other global invalidations of notmuch's caching)
*/
unsigned long view;
Xapian::QueryParser *query_parser;
Xapian::Stem *stemmer;
Xapian::TermGenerator *term_gen;
Xapian::RangeProcessor *value_range_processor;
Xapian::RangeProcessor *date_range_processor;
Xapian::RangeProcessor *last_mod_range_processor;
/* XXX it's slightly gross to use two parallel string->string maps
* here, but at least they are small */
notmuch_string_map_t *user_prefix;
notmuch_string_map_t *user_header;
/* Cached and possibly overridden configuration */
notmuch_string_map_t *config;
/* Track what parameters were specified when opening */
notmuch_open_param_t params;
/* list of regular expressions to check for text indexing */
regex_t *index_as_text;
size_t index_as_text_length;
};
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
/* Prior to database version 3, features were implied by the database
* version number, so hard-code them for earlier versions. */
#define NOTMUCH_FEATURES_V0 ((enum _notmuch_features) 0)
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
#define NOTMUCH_FEATURES_V1 (NOTMUCH_FEATURES_V0 | NOTMUCH_FEATURE_FILE_TERMS | \
NOTMUCH_FEATURE_DIRECTORY_DOCS)
#define NOTMUCH_FEATURES_V2 (NOTMUCH_FEATURES_V1 | NOTMUCH_FEATURE_BOOL_FOLDER)
/* Current database features. If any of these are missing from a
* database, request an upgrade.
* NOTMUCH_FEATURE_FROM_SUBJECT_ID_VALUES and
* NOTMUCH_FEATURE_INDEXED_MIMETYPES are not included because upgrade
* doesn't currently introduce the features (though brand new databases
* will have it). */
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
#define NOTMUCH_FEATURES_CURRENT \
(NOTMUCH_FEATURE_FILE_TERMS | NOTMUCH_FEATURE_DIRECTORY_DOCS | \
lib: Add per-message last modification tracking This adds a new document value that stores the revision of the last modification to message metadata, where the revision number increases monotonically with each database commit. An alternative would be to store the wall-clock time of the last modification of each message. In principle this is simpler and has the advantage that any process can determine the current timestamp without support from libnotmuch. However, even assuming a computer's clock never goes backward and ignoring clock skew in networked environments, this has a fatal flaw. Xapian uses (optimistic) snapshot isolation, which means reads can be concurrent with writes. Given this, consider the following time line with a write and two read transactions: write |-X-A--------------| read 1 |---B---| read 2 |---| The write transaction modifies message X and records the wall-clock time of the modification at A. The writer hangs around for a while and later commits its change. Read 1 is concurrent with the write, so it doesn't see the change to X. It does some query and records the wall-clock time of its results at B. Transaction read 2 later starts after the write commits and queries for changes since wall-clock time B (say the reads are performing an incremental backup). Even though read 1 could not see the change to X, read 2 is told (correctly) that X has not changed since B, the time of the last read. In fact, X changed before wall-clock time A, but the change was not visible until *after* wall-clock time B, so read 2 misses the change to X. This is tricky to solve in full-blown snapshot isolation, but because Xapian serializes writes, we can use a simple, monotonically increasing database revision number. Furthermore, maintaining this revision number requires no more IO than a wall-clock time solution because Xapian already maintains statistics on the upper (and lower) bound of each value stream.
2014-10-13 08:20:01 +02:00
NOTMUCH_FEATURE_BOOL_FOLDER | NOTMUCH_FEATURE_GHOSTS | \
NOTMUCH_FEATURE_LAST_MOD)
lib: Database version 3: Introduce fine-grained "features" Previously, our database schema was versioned by a single number. Each database schema change had to occur "atomically" in Notmuch's development history: before some commit, Notmuch used version N, after that commit, it used version N+1. Hence, each new schema version could introduce only one change, the task of developing a schema change fell on a single person, and it all had to happen and be perfect in a single commit series. This made introducing a new schema version hard. We've seen only two schema changes in the history of Notmuch. This commit introduces database schema version 3; hopefully the last schema version we'll need for a while. With this version, we switch from a single version number to "features": a set of named, independent aspects of the database schema. Features should make backwards compatibility easier. For many things, it should be easy to support databases both with and without a feature, which will allow us to make upgrades optional and will enable "unstable" features that can be developed and tested over time. Features also make forwards compatibility easier. The features recorded in a database include "compatibility flags," which can indicate to an older version of Notmuch when it must support a given feature to open the database for read or for write. This lets us replace the old vague "I don't recognize this version, so something might go wrong, but I promise to try my best" warnings upon opening a database with an unknown version with precise errors. If a database is safe to open for read/write despite unknown features, an older version will know that and issue no message at all. If the database is not safe to open for read/write because of unknown features, an older version will know that, too, and can tell the user exactly which required features it lacks support for.
2014-08-25 19:26:00 +02:00
/* Return the list of terms from the given iterator matching a prefix.
* The prefix will be stripped from the strings in the returned list.
* The list will be allocated using ctx as the talloc context.
*
* The function returns NULL on failure.
*/
notmuch_string_list_t *
_notmuch_database_get_terms_with_prefix (void *ctx, Xapian::TermIterator &i,
Xapian::TermIterator &end,
const char *prefix);
void
_notmuch_database_find_doc_ids (notmuch_database_t *notmuch,
const char *prefix_name,
const char *value,
Xapian::PostingIterator *begin,
Xapian::PostingIterator *end);
#define NOTMUCH_DATABASE_VERSION 3
/* features.cc */
_notmuch_features
_notmuch_database_parse_features (const void *ctx, const char *features, unsigned int version,
char mode, char **incompat_out);
char *
_notmuch_database_print_features (const void *ctx, unsigned int features);
/* prefix.cc */
notmuch_status_t
_notmuch_database_setup_standard_query_fields (notmuch_database_t *notmuch);
notmuch_status_t
_notmuch_database_setup_user_query_fields (notmuch_database_t *notmuch);
#if __cplusplus
/* query.cc */
notmuch_status_t
_notmuch_query_string_to_xapian_query (notmuch_database_t *notmuch,
std::string query_string,
Xapian::Query &output,
std::string &msg);
notmuch_status_t
_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, Xapian::Query subquery,
Xapian::Query &output, std::string &msg);
/* regexp-fields.cc */
notmuch_status_t
_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, std::string field,
std::string regexp_str,
Xapian::Query &output, std::string &msg);
/* thread-fp.cc */
notmuch_status_t
_notmuch_query_name_to_query (notmuch_database_t *notmuch, const std::string name,
Xapian::Query &output);
#if HAVE_SFSEXP
/* parse-sexp.cc */
notmuch_status_t
_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char *querystr,
Xapian::Query &output);
#endif
/* parse-time-vrp.h */
notmuch_status_t
_notmuch_date_strings_to_query (Xapian::valueno slot, const std::string &from, const std::string &to,
Xapian::Query &output, std::string &msg);
/* lastmod-fp.h */
notmuch_status_t
_notmuch_lastmod_strings_to_query (notmuch_database_t *notmuch,
const std::string &from, const std::string &to,
Xapian::Query &output, std::string &msg);
#endif
#endif