notmuch/lib/add-message.cc
Daniel Kahn Gillmor 5b93fa6e70 lib: add notmuch_message_reindex
This new function asks the database to reindex a given message.
The parameter `indexopts` is currently ignored, but is intended to
provide an extensible API to support e.g. changing the encryption or
filtering status (e.g. whether and how certain non-plaintext parts are
indexed).
2017-08-01 21:17:47 -04:00

577 lines
17 KiB
C++

#include "database-private.h"
/* Parse a References header value, putting a (talloc'ed under 'ctx')
* copy of each referenced message-id into 'hash'.
*
* We explicitly avoid including any reference identical to
* 'message_id' in the result (to avoid mass confusion when a single
* message references itself cyclically---and yes, mail messages are
* not infrequent in the wild that do this---don't ask me why).
*
* Return the last reference parsed, if it is not equal to message_id.
*/
static char *
parse_references (void *ctx,
const char *message_id,
GHashTable *hash,
const char *refs)
{
char *ref, *last_ref = NULL;
if (refs == NULL || *refs == '\0')
return NULL;
while (*refs) {
ref = _notmuch_message_id_parse (ctx, refs, &refs);
if (ref && strcmp (ref, message_id)) {
g_hash_table_add (hash, ref);
last_ref = ref;
}
}
/* The return value of this function is used to add a parent
* reference to the database. We should avoid making a message
* its own parent, thus the above check.
*/
return talloc_strdup(ctx, last_ref);
}
static const char *
_notmuch_database_generate_thread_id (notmuch_database_t *notmuch)
{
/* 16 bytes (+ terminator) for hexadecimal representation of
* a 64-bit integer. */
static char thread_id[17];
Xapian::WritableDatabase *db;
db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
notmuch->last_thread_id++;
sprintf (thread_id, "%016" PRIx64, notmuch->last_thread_id);
db->set_metadata ("last_thread_id", thread_id);
return thread_id;
}
static char *
_get_metadata_thread_id_key (void *ctx, const char *message_id)
{
if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX)
message_id = _notmuch_message_id_compressed (ctx, message_id);
return talloc_asprintf (ctx, NOTMUCH_METADATA_THREAD_ID_PREFIX "%s",
message_id);
}
static notmuch_status_t
_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
void *ctx,
const char *message_id,
const char **thread_id_ret);
/* Find the thread ID to which the message with 'message_id' belongs.
*
* Note: 'thread_id_ret' must not be NULL!
* On success '*thread_id_ret' is set to a newly talloced string belonging to
* 'ctx'.
*
* Note: If there is no message in the database with the given
* 'message_id' then a new thread_id will be allocated for this
* message ID and stored in the database metadata so that the
* thread ID can be looked up if the message is added to the database
* later.
*/
static notmuch_status_t
_resolve_message_id_to_thread_id (notmuch_database_t *notmuch,
void *ctx,
const char *message_id,
const char **thread_id_ret)
{
notmuch_private_status_t status;
notmuch_message_t *message;
if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS))
return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id,
thread_id_ret);
/* Look for this message (regular or ghost) */
message = _notmuch_message_create_for_message_id (
notmuch, message_id, &status);
if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) {
/* Message exists */
*thread_id_ret = talloc_steal (
ctx, notmuch_message_get_thread_id (message));
} else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
/* Message did not exist. Give it a fresh thread ID and
* populate this message as a ghost message. */
*thread_id_ret = talloc_strdup (
ctx, _notmuch_database_generate_thread_id (notmuch));
if (! *thread_id_ret) {
status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY;
} else {
status = _notmuch_message_initialize_ghost (message, *thread_id_ret);
if (status == 0)
/* Commit the new ghost message */
_notmuch_message_sync (message);
}
} else {
/* Create failed. Fall through. */
}
notmuch_message_destroy (message);
return COERCE_STATUS (status, "Error creating ghost message");
}
/* Pre-ghost messages _resolve_message_id_to_thread_id */
static notmuch_status_t
_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
void *ctx,
const char *message_id,
const char **thread_id_ret)
{
notmuch_status_t status;
notmuch_message_t *message;
std::string thread_id_string;
char *metadata_key;
Xapian::WritableDatabase *db;
status = notmuch_database_find_message (notmuch, message_id, &message);
if (status)
return status;
if (message) {
*thread_id_ret = talloc_steal (ctx,
notmuch_message_get_thread_id (message));
notmuch_message_destroy (message);
return NOTMUCH_STATUS_SUCCESS;
}
/* Message has not been seen yet.
*
* We may have seen a reference to it already, in which case, we
* can return the thread ID stored in the metadata. Otherwise, we
* generate a new thread ID and store it there.
*/
db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
metadata_key = _get_metadata_thread_id_key (ctx, message_id);
thread_id_string = notmuch->xapian_db->get_metadata (metadata_key);
if (thread_id_string.empty()) {
*thread_id_ret = talloc_strdup (ctx,
_notmuch_database_generate_thread_id (notmuch));
db->set_metadata (metadata_key, *thread_id_ret);
} else {
*thread_id_ret = talloc_strdup (ctx, thread_id_string.c_str());
}
talloc_free (metadata_key);
return NOTMUCH_STATUS_SUCCESS;
}
static notmuch_status_t
_merge_threads (notmuch_database_t *notmuch,
const char *winner_thread_id,
const char *loser_thread_id)
{
Xapian::PostingIterator loser, loser_end;
notmuch_message_t *message = NULL;
notmuch_private_status_t private_status;
notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
_notmuch_database_find_doc_ids (notmuch, "thread", loser_thread_id, &loser, &loser_end);
for ( ; loser != loser_end; loser++) {
message = _notmuch_message_create (notmuch, notmuch,
*loser, &private_status);
if (message == NULL) {
ret = COERCE_STATUS (private_status,
"Cannot find document for doc_id from query");
goto DONE;
}
_notmuch_message_remove_term (message, "thread", loser_thread_id);
_notmuch_message_add_term (message, "thread", winner_thread_id);
_notmuch_message_sync (message);
notmuch_message_destroy (message);
message = NULL;
}
DONE:
if (message)
notmuch_message_destroy (message);
return ret;
}
static void
_my_talloc_free_for_g_hash (void *ptr)
{
talloc_free (ptr);
}
notmuch_status_t
_notmuch_database_link_message_to_parents (notmuch_database_t *notmuch,
notmuch_message_t *message,
notmuch_message_file_t *message_file,
const char **thread_id)
{
GHashTable *parents = NULL;
const char *refs, *in_reply_to, *in_reply_to_message_id;
const char *last_ref_message_id, *this_message_id;
GList *l, *keys = NULL;
notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
parents = g_hash_table_new_full (g_str_hash, g_str_equal,
_my_talloc_free_for_g_hash, NULL);
this_message_id = notmuch_message_get_message_id (message);
refs = _notmuch_message_file_get_header (message_file, "references");
last_ref_message_id = parse_references (message,
this_message_id,
parents, refs);
in_reply_to = _notmuch_message_file_get_header (message_file, "in-reply-to");
in_reply_to_message_id = parse_references (message,
this_message_id,
parents, in_reply_to);
/* For the parent of this message, use the last message ID of the
* References header, if available. If not, fall back to the
* first message ID in the In-Reply-To header. */
if (last_ref_message_id) {
_notmuch_message_add_term (message, "replyto",
last_ref_message_id);
} else if (in_reply_to_message_id) {
_notmuch_message_add_term (message, "replyto",
in_reply_to_message_id);
}
keys = g_hash_table_get_keys (parents);
for (l = keys; l; l = l->next) {
char *parent_message_id;
const char *parent_thread_id = NULL;
parent_message_id = (char *) l->data;
_notmuch_message_add_term (message, "reference",
parent_message_id);
ret = _resolve_message_id_to_thread_id (notmuch,
message,
parent_message_id,
&parent_thread_id);
if (ret)
goto DONE;
if (*thread_id == NULL) {
*thread_id = talloc_strdup (message, parent_thread_id);
_notmuch_message_add_term (message, "thread", *thread_id);
} else if (strcmp (*thread_id, parent_thread_id)) {
ret = _merge_threads (notmuch, *thread_id, parent_thread_id);
if (ret)
goto DONE;
}
}
DONE:
if (keys)
g_list_free (keys);
if (parents)
g_hash_table_unref (parents);
return ret;
}
static notmuch_status_t
_notmuch_database_link_message_to_children (notmuch_database_t *notmuch,
notmuch_message_t *message,
const char **thread_id)
{
const char *message_id = notmuch_message_get_message_id (message);
Xapian::PostingIterator child, children_end;
notmuch_message_t *child_message = NULL;
const char *child_thread_id;
notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
notmuch_private_status_t private_status;
_notmuch_database_find_doc_ids (notmuch, "reference", message_id, &child, &children_end);
for ( ; child != children_end; child++) {
child_message = _notmuch_message_create (message, notmuch,
*child, &private_status);
if (child_message == NULL) {
ret = COERCE_STATUS (private_status,
"Cannot find document for doc_id from query");
goto DONE;
}
child_thread_id = notmuch_message_get_thread_id (child_message);
if (*thread_id == NULL) {
*thread_id = talloc_strdup (message, child_thread_id);
_notmuch_message_add_term (message, "thread", *thread_id);
} else if (strcmp (*thread_id, child_thread_id)) {
_notmuch_message_remove_term (child_message, "reference",
message_id);
_notmuch_message_sync (child_message);
ret = _merge_threads (notmuch, *thread_id, child_thread_id);
if (ret)
goto DONE;
}
notmuch_message_destroy (child_message);
child_message = NULL;
}
DONE:
if (child_message)
notmuch_message_destroy (child_message);
return ret;
}
/* Fetch and clear the stored thread_id for message, or NULL if none. */
static char *
_consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
notmuch_message_t *message)
{
const char *message_id;
std::string stored_id;
char *metadata_key;
message_id = notmuch_message_get_message_id (message);
metadata_key = _get_metadata_thread_id_key (ctx, message_id);
/* Check if we have already seen related messages to this one.
* If we have then use the thread_id that we stored at that time.
*/
stored_id = notmuch->xapian_db->get_metadata (metadata_key);
if (stored_id.empty ()) {
return NULL;
} else {
Xapian::WritableDatabase *db;
db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
/* Clear the metadata for this message ID. We don't need it
* anymore. */
db->set_metadata (metadata_key, "");
return talloc_strdup (ctx, stored_id.c_str ());
}
}
/* Given a blank or ghost 'message' and its corresponding
* 'message_file' link it to existing threads in the database.
*
* First, if is_ghost, this retrieves the thread ID already stored in
* the message (which will be the case if a message was previously
* added that referenced this one). If the message is blank
* (!is_ghost), it doesn't have a thread ID yet (we'll generate one
* later in this function). If the database does not support ghost
* messages, this checks for a thread ID stored in database metadata
* for this message ID.
*
* Second, we look at 'message_file' and its link-relevant headers
* (References and In-Reply-To) for message IDs.
*
* Finally, we look in the database for existing message that
* reference 'message'.
*
* In all cases, we assign to the current message the first thread ID
* found. We will also merge any existing, distinct threads where this
* message belongs to both, (which is not uncommon when messages are
* processed out of order).
*
* Finally, if no thread ID has been found through referenced messages, we
* call _notmuch_message_generate_thread_id to generate a new thread
* ID. This should only happen for new, top-level messages, (no
* References or In-Reply-To header in this message, and no previously
* added message refers to this message).
*/
static notmuch_status_t
_notmuch_database_link_message (notmuch_database_t *notmuch,
notmuch_message_t *message,
notmuch_message_file_t *message_file,
notmuch_bool_t is_ghost)
{
void *local = talloc_new (NULL);
notmuch_status_t status;
const char *thread_id = NULL;
/* Check if the message already had a thread ID */
if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) {
if (is_ghost)
thread_id = notmuch_message_get_thread_id (message);
} else {
thread_id = _consume_metadata_thread_id (local, notmuch, message);
if (thread_id)
_notmuch_message_add_term (message, "thread", thread_id);
}
status = _notmuch_database_link_message_to_parents (notmuch, message,
message_file,
&thread_id);
if (status)
goto DONE;
if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) {
/* In general, it shouldn't be necessary to link children,
* since the earlier indexing of those children will have
* stored a thread ID for the missing parent. However, prior
* to ghost messages, these stored thread IDs were NOT
* rewritten during thread merging (and there was no
* performant way to do so), so if indexed children were
* pulled into a different thread ID by a merge, it was
* necessary to pull them *back* into the stored thread ID of
* the parent. With ghost messages, we just rewrite the
* stored thread IDs during merging, so this workaround isn't
* necessary. */
status = _notmuch_database_link_message_to_children (notmuch, message,
&thread_id);
if (status)
goto DONE;
}
/* If not part of any existing thread, generate a new thread ID. */
if (thread_id == NULL) {
thread_id = _notmuch_database_generate_thread_id (notmuch);
_notmuch_message_add_term (message, "thread", thread_id);
}
DONE:
talloc_free (local);
return status;
}
notmuch_status_t
notmuch_database_add_message (notmuch_database_t *notmuch,
const char *filename,
notmuch_message_t **message_ret)
{
notmuch_message_file_t *message_file;
notmuch_message_t *message = NULL;
notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2;
notmuch_private_status_t private_status;
notmuch_bool_t is_ghost = FALSE, is_new = FALSE;
const char *date;
const char *from, *to, *subject;
char *message_id = NULL;
if (message_ret)
*message_ret = NULL;
ret = _notmuch_database_ensure_writable (notmuch);
if (ret)
return ret;
message_file = _notmuch_message_file_open (notmuch, filename);
if (message_file == NULL)
return NOTMUCH_STATUS_FILE_ERROR;
/* Adding a message may change many documents. Do this all
* atomically. */
ret = notmuch_database_begin_atomic (notmuch);
if (ret)
goto DONE;
ret = _notmuch_message_file_get_headers (message_file,
&from, &subject, &to, &date,
&message_id);
if (ret)
goto DONE;
try {
/* Now that we have a message ID, we get a message object,
* (which may or may not reference an existing document in the
* database). */
message = _notmuch_message_create_for_message_id (notmuch,
message_id,
&private_status);
talloc_free (message_id);
/* We cannot call notmuch_message_get_flag for a new message */
switch (private_status) {
case NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND:
is_ghost = FALSE;
is_new = TRUE;
break;
case NOTMUCH_PRIVATE_STATUS_SUCCESS:
is_ghost = notmuch_message_get_flag (message, NOTMUCH_MESSAGE_FLAG_GHOST);
is_new = FALSE;
break;
default:
ret = COERCE_STATUS (private_status,
"Unexpected status value from _notmuch_message_create_for_message_id");
goto DONE;
}
_notmuch_message_add_filename (message, filename);
if (is_new || is_ghost) {
_notmuch_message_add_term (message, "type", "mail");
if (is_ghost)
/* Convert ghost message to a regular message */
_notmuch_message_remove_term (message, "type", "ghost");
}
ret = _notmuch_database_link_message (notmuch, message,
message_file, is_ghost);
if (ret)
goto DONE;
if (is_new || is_ghost)
_notmuch_message_set_header_values (message, date, from, subject);
ret = _notmuch_message_index_file (message, message_file);
if (ret)
goto DONE;
if (! is_new && !is_ghost)
ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
_notmuch_message_sync (message);
} catch (const Xapian::Error &error) {
_notmuch_database_log (notmuch, "A Xapian exception occurred adding message: %s.\n",
error.get_msg().c_str());
notmuch->exception_reported = TRUE;
ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION;
goto DONE;
}
DONE:
if (message) {
if ((ret == NOTMUCH_STATUS_SUCCESS ||
ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) && message_ret)
*message_ret = message;
else
notmuch_message_destroy (message);
}
if (message_file)
_notmuch_message_file_close (message_file);
ret2 = notmuch_database_end_atomic (notmuch);
if ((ret == NOTMUCH_STATUS_SUCCESS ||
ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) &&
ret2 != NOTMUCH_STATUS_SUCCESS)
ret = ret2;
return ret;
}