From d9b0ae918fd9d535e819b8859eca579002146661 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 26 Nov 2010 23:34:29 -0500 Subject: [PATCH 1/6] Use a single unified pass to fetch scalar message metadata. This performs a single pass over a message's term list to fetch the thread ID, message ID, and reply-to, rather than requiring a pass for each. Xapian decompresses the term list anew for each iteration, so this reduces the amount of time spent decompressing message metadata. This reduces my inbox search from 3.102 seconds to 2.555 seconds (1.2X faster). --- lib/message.cc | 201 ++++++++++++++++++++++++------------------------- 1 file changed, 100 insertions(+), 101 deletions(-) diff --git a/lib/message.cc b/lib/message.cc index 0590f764..25afd85b 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -256,6 +256,92 @@ _notmuch_message_create_for_message_id (notmuch_database_t *notmuch, return message; } +static char * +_notmuch_message_get_term (notmuch_message_t *message, + Xapian::TermIterator &i, Xapian::TermIterator &end, + const char *prefix) +{ + int prefix_len = strlen (prefix); + const char *term = NULL; + char *value; + + i.skip_to (prefix); + + if (i != end) + term = (*i).c_str (); + + if (!term || strncmp (term, prefix, prefix_len)) + return NULL; + + value = talloc_strdup (message, term + prefix_len); + +#if DEBUG_DATABASE_SANITY + i++; + + if (i != end && strncmp ((*i).c_str (), prefix, prefix_len) == 0) { + INTERNAL_ERROR ("Mail (doc_id: %d) has duplicate %s terms: %s and %s\n", + message->doc_id, prefix, value, + (*i).c_str () + prefix_len); + } +#endif + + return value; +} + +void +_notmuch_message_ensure_metadata (notmuch_message_t *message) +{ + Xapian::TermIterator i, end; + const char *thread_prefix = _find_prefix ("thread"), + *id_prefix = _find_prefix ("id"), + *replyto_prefix = _find_prefix ("replyto"); + + /* We do this all in a single pass because Xapian decompresses the + * term list every time you iterate over it. Thus, while this is + * slightly more costly than looking up individual fields if only + * one field of the message object is actually used, it's a huge + * win as more fields are used. */ + + i = message->doc.termlist_begin (); + end = message->doc.termlist_end (); + + /* Get thread */ + if (!message->thread_id) + message->thread_id = + _notmuch_message_get_term (message, i, end, thread_prefix); + + /* Get id */ + assert (strcmp (thread_prefix, id_prefix) < 0); + if (!message->message_id) + message->message_id = + _notmuch_message_get_term (message, i, end, id_prefix); + + /* Get reply to */ + assert (strcmp (id_prefix, replyto_prefix) < 0); + if (!message->in_reply_to) + message->in_reply_to = + _notmuch_message_get_term (message, i, end, replyto_prefix); + /* It's perfectly valid for a message to have no In-Reply-To + * header. For these cases, we return an empty string. */ + if (!message->in_reply_to) + message->in_reply_to = talloc_strdup (message, ""); +} + +static void +_notmuch_message_invalidate_metadata (notmuch_message_t *message, + const char *prefix_name) +{ + if (strcmp ("thread", prefix_name) == 0) { + talloc_free (message->thread_id); + message->thread_id = NULL; + } + + if (strcmp ("replyto", prefix_name) == 0) { + talloc_free (message->in_reply_to); + message->in_reply_to = NULL; + } +} + unsigned int _notmuch_message_get_doc_id (notmuch_message_t *message) { @@ -265,32 +351,11 @@ _notmuch_message_get_doc_id (notmuch_message_t *message) const char * notmuch_message_get_message_id (notmuch_message_t *message) { - Xapian::TermIterator i; - - if (message->message_id) - return message->message_id; - - i = message->doc.termlist_begin (); - i.skip_to (_find_prefix ("id")); - - if (i == message->doc.termlist_end ()) - INTERNAL_ERROR ("Message with document ID of %d has no message ID.\n", + if (!message->message_id) + _notmuch_message_ensure_metadata (message); + if (!message->message_id) + INTERNAL_ERROR ("Message with document ID of %u has no message ID.\n", message->doc_id); - - message->message_id = talloc_strdup (message, (*i).c_str () + 1); - -#if DEBUG_DATABASE_SANITY - i++; - - if (i != message->doc.termlist_end () && - strncmp ((*i).c_str (), _find_prefix ("id"), - strlen (_find_prefix ("id"))) == 0) - { - INTERNAL_ERROR ("Mail (doc_id: %d) has duplicate message IDs", - message->doc_id); - } -#endif - return message->message_id; } @@ -329,89 +394,19 @@ notmuch_message_get_header (notmuch_message_t *message, const char *header) const char * _notmuch_message_get_in_reply_to (notmuch_message_t *message) { - const char *prefix = _find_prefix ("replyto"); - int prefix_len = strlen (prefix); - Xapian::TermIterator i; - std::string in_reply_to; - - if (message->in_reply_to) - return message->in_reply_to; - - i = message->doc.termlist_begin (); - i.skip_to (prefix); - - if (i != message->doc.termlist_end ()) - in_reply_to = *i; - - /* It's perfectly valid for a message to have no In-Reply-To - * header. For these cases, we return an empty string. */ - if (i == message->doc.termlist_end () || - strncmp (in_reply_to.c_str (), prefix, prefix_len)) - { - message->in_reply_to = talloc_strdup (message, ""); - return message->in_reply_to; - } - - message->in_reply_to = talloc_strdup (message, - in_reply_to.c_str () + prefix_len); - -#if DEBUG_DATABASE_SANITY - i++; - - in_reply_to = *i; - - if (i != message->doc.termlist_end () && - strncmp ((*i).c_str (), prefix, prefix_len) == 0) - { - INTERNAL_ERROR ("Message %s has duplicate In-Reply-To IDs: %s and %s\n", - notmuch_message_get_message_id (message), - message->in_reply_to, - (*i).c_str () + prefix_len); - } -#endif - + if (!message->in_reply_to) + _notmuch_message_ensure_metadata (message); return message->in_reply_to; } const char * notmuch_message_get_thread_id (notmuch_message_t *message) { - const char *prefix = _find_prefix ("thread"); - Xapian::TermIterator i; - std::string id; - - /* This code is written with the assumption that "thread" has a - * single-character prefix. */ - assert (strlen (prefix) == 1); - - if (message->thread_id) - return message->thread_id; - - i = message->doc.termlist_begin (); - i.skip_to (prefix); - - if (i != message->doc.termlist_end ()) - id = *i; - - if (i == message->doc.termlist_end () || id[0] != *prefix) - INTERNAL_ERROR ("Message with document ID of %d has no thread ID.\n", + if (!message->thread_id) + _notmuch_message_ensure_metadata (message); + if (!message->thread_id) + INTERNAL_ERROR ("Message with document ID of %u has no thread ID.\n", message->doc_id); - - message->thread_id = talloc_strdup (message, id.c_str () + 1); - -#if DEBUG_DATABASE_SANITY - i++; - id = *i; - - if (i != message->doc.termlist_end () && id[0] == *prefix) - { - INTERNAL_ERROR ("Message %s has duplicate thread IDs: %s and %s\n", - notmuch_message_get_message_id (message), - message->thread_id, - id.c_str () + 1); - } -#endif - return message->thread_id; } @@ -809,6 +804,8 @@ _notmuch_message_add_term (notmuch_message_t *message, talloc_free (term); + _notmuch_message_invalidate_metadata (message, prefix_name); + return NOTMUCH_PRIVATE_STATUS_SUCCESS; } @@ -874,6 +871,8 @@ _notmuch_message_remove_term (notmuch_message_t *message, talloc_free (term); + _notmuch_message_invalidate_metadata (message, prefix_name); + return NOTMUCH_PRIVATE_STATUS_SUCCESS; } From f3c1eebfaf8526129ae6946cbcd44a3c602563d6 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 8 Dec 2010 19:26:05 -0500 Subject: [PATCH 2/6] Implement an internal generic string list and use it. This replaces the guts of the filename list and tag list, making those interfaces simple iterators over the generic string list. The directory, message filename, and tags-related code now build generic string lists and then wraps them in specific iterators. The real wins come in later patches, when we use these for even more generic functionality. As a nice side-effect, this also eliminates the annoying dependency on GList in the tag list. --- lib/Makefile.local | 1 + lib/database.cc | 12 +++--- lib/directory.cc | 7 ++-- lib/filenames.c | 52 +++--------------------- lib/message.cc | 17 ++++---- lib/messages.c | 11 ++--- lib/notmuch-private.h | 65 ++++++++++++++---------------- lib/string-list.c | 94 +++++++++++++++++++++++++++++++++++++++++++ lib/tags.c | 58 ++++---------------------- lib/thread.cc | 10 ++--- 10 files changed, 166 insertions(+), 161 deletions(-) create mode 100644 lib/string-list.c diff --git a/lib/Makefile.local b/lib/Makefile.local index d02a515c..43190231 100644 --- a/lib/Makefile.local +++ b/lib/Makefile.local @@ -50,6 +50,7 @@ extra_cflags += -I$(srcdir)/$(dir) -fPIC libnotmuch_c_srcs = \ $(notmuch_compat_srcs) \ $(dir)/filenames.c \ + $(dir)/string-list.c \ $(dir)/libsha1.c \ $(dir)/message-file.c \ $(dir)/messages.c \ diff --git a/lib/database.cc b/lib/database.cc index d88b168b..9c777156 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -1762,15 +1762,15 @@ _notmuch_convert_tags (void *ctx, Xapian::TermIterator &i, Xapian::TermIterator &end) { const char *prefix = _find_prefix ("tag"); - notmuch_tags_t *tags; + notmuch_string_list_t *list; std::string tag; /* Currently this iteration is written with the assumption that * "tag" has a single-character prefix. */ assert (strlen (prefix) == 1); - tags = _notmuch_tags_create (ctx); - if (unlikely (tags == NULL)) + list = _notmuch_string_list_create (ctx); + if (unlikely (list == NULL)) return NULL; i.skip_to (prefix); @@ -1781,14 +1781,14 @@ _notmuch_convert_tags (void *ctx, Xapian::TermIterator &i, if (tag.empty () || tag[0] != *prefix) break; - _notmuch_tags_add_tag (tags, tag.c_str () + 1); + _notmuch_string_list_append (list, tag.c_str () + 1); i++; } - _notmuch_tags_prepare_iterator (tags); + _notmuch_string_list_sort (list); - return tags; + return _notmuch_tags_create (ctx, list); } notmuch_tags_t * diff --git a/lib/directory.cc b/lib/directory.cc index 946be4f4..aeee9caf 100644 --- a/lib/directory.cc +++ b/lib/directory.cc @@ -33,11 +33,11 @@ _create_filenames_for_terms_with_prefix (void *ctx, notmuch_database_t *notmuch, const char *prefix) { - notmuch_filename_list_t *filename_list; + notmuch_string_list_t *filename_list; Xapian::TermIterator i, end; int prefix_len = strlen (prefix); - filename_list = _notmuch_filename_list_create (ctx); + filename_list = _notmuch_string_list_create (ctx); if (unlikely (filename_list == NULL)) return NULL; @@ -47,8 +47,7 @@ _create_filenames_for_terms_with_prefix (void *ctx, { std::string term = *i; - _notmuch_filename_list_add_filename (filename_list, term.c_str () + - prefix_len); + _notmuch_string_list_append (filename_list, term.c_str () + prefix_len); } return _notmuch_filenames_create (ctx, filename_list); diff --git a/lib/filenames.c b/lib/filenames.c index f078c955..f1ea2430 100644 --- a/lib/filenames.c +++ b/lib/filenames.c @@ -21,56 +21,14 @@ #include "notmuch-private.h" struct _notmuch_filenames { - notmuch_filename_node_t *iterator; + notmuch_string_node_t *iterator; }; -/* Create a new notmuch_filename_list_t object, with 'ctx' as its - * talloc owner. - * - * This function can return NULL in case of out-of-memory. - */ -notmuch_filename_list_t * -_notmuch_filename_list_create (const void *ctx) -{ - notmuch_filename_list_t *list; - - list = talloc (ctx, notmuch_filename_list_t); - if (unlikely (list == NULL)) - return NULL; - - list->head = NULL; - list->tail = &list->head; - - return list; -} - -void -_notmuch_filename_list_add_filename (notmuch_filename_list_t *list, - const char *filename) -{ - /* Create and initialize new node. */ - notmuch_filename_node_t *node = talloc (list, - notmuch_filename_node_t); - - node->filename = talloc_strdup (node, filename); - node->next = NULL; - - /* Append the node to the list. */ - *(list->tail) = node; - list->tail = &node->next; -} - -void -_notmuch_filename_list_destroy (notmuch_filename_list_t *list) -{ - talloc_free (list); -} - -/* The notmuch_filenames_t is an iterator object for a - * notmuch_filename_list_t */ +/* The notmuch_filenames_t iterates over a notmuch_string_list_t of + * file names */ notmuch_filenames_t * _notmuch_filenames_create (const void *ctx, - notmuch_filename_list_t *list) + notmuch_string_list_t *list) { notmuch_filenames_t *filenames; @@ -99,7 +57,7 @@ notmuch_filenames_get (notmuch_filenames_t *filenames) if (filenames->iterator == NULL) return NULL; - return filenames->iterator->filename; + return filenames->iterator->string; } void diff --git a/lib/message.cc b/lib/message.cc index 25afd85b..b87506ac 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -32,7 +32,7 @@ struct _notmuch_message { char *message_id; char *thread_id; char *in_reply_to; - notmuch_filename_list_t *filename_list; + notmuch_string_list_t *filename_list; char *author; notmuch_message_file_t *message_file; notmuch_message_list_t *replies; @@ -440,7 +440,7 @@ _notmuch_message_add_filename (notmuch_message_t *message, INTERNAL_ERROR ("Message filename cannot be NULL."); if (message->filename_list) { - _notmuch_filename_list_destroy (message->filename_list); + talloc_free (message->filename_list); message->filename_list = NULL; } @@ -492,7 +492,7 @@ _notmuch_message_remove_filename (notmuch_message_t *message, Xapian::TermIterator i, last; if (message->filename_list) { - _notmuch_filename_list_destroy (message->filename_list); + talloc_free (message->filename_list); message->filename_list = NULL; } @@ -582,7 +582,7 @@ _notmuch_message_ensure_filename_list (notmuch_message_t *message) if (message->filename_list) return; - message->filename_list = _notmuch_filename_list_create (message); + message->filename_list = _notmuch_string_list_create (message); i = message->doc.termlist_begin (); i.skip_to (prefix); @@ -603,7 +603,7 @@ _notmuch_message_ensure_filename_list (notmuch_message_t *message) if (data == NULL) INTERNAL_ERROR ("message with no filename"); - _notmuch_filename_list_add_filename (message->filename_list, data); + _notmuch_string_list_append (message->filename_list, data); return; } @@ -644,8 +644,7 @@ _notmuch_message_ensure_filename_list (notmuch_message_t *message) filename = talloc_asprintf (message, "%s/%s", db_path, basename); - _notmuch_filename_list_add_filename (message->filename_list, - filename); + _notmuch_string_list_append (message->filename_list, filename); talloc_free (local); } @@ -660,12 +659,12 @@ notmuch_message_get_filename (notmuch_message_t *message) return NULL; if (message->filename_list->head == NULL || - message->filename_list->head->filename == NULL) + message->filename_list->head->string == NULL) { INTERNAL_ERROR ("message with no filename"); } - return message->filename_list->head->filename; + return message->filename_list->head->string; } notmuch_filenames_t * diff --git a/lib/messages.c b/lib/messages.c index 120a48a9..7bcd1abf 100644 --- a/lib/messages.c +++ b/lib/messages.c @@ -146,13 +146,14 @@ notmuch_messages_destroy (notmuch_messages_t *messages) notmuch_tags_t * notmuch_messages_collect_tags (notmuch_messages_t *messages) { - notmuch_tags_t *tags, *msg_tags; + notmuch_string_list_t *tags; + notmuch_tags_t *msg_tags; notmuch_message_t *msg; GHashTable *htable; GList *keys, *l; const char *tag; - tags = _notmuch_tags_create (messages); + tags = _notmuch_string_list_create (messages); if (tags == NULL) return NULL; htable = g_hash_table_new_full (g_str_hash, g_str_equal, free, NULL); @@ -170,12 +171,12 @@ notmuch_messages_collect_tags (notmuch_messages_t *messages) keys = g_hash_table_get_keys (htable); for (l = keys; l; l = l->next) { - _notmuch_tags_add_tag (tags, (char *)l->data); + _notmuch_string_list_append (tags, (char *)l->data); } g_list_free (keys); g_hash_table_destroy (htable); - _notmuch_tags_prepare_iterator (tags); - return tags; + _notmuch_string_list_sort (tags); + return _notmuch_tags_create (messages, tags); } diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index a1b82b3e..0856751c 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -457,48 +457,45 @@ notmuch_sha1_of_string (const char *str); char * notmuch_sha1_of_file (const char *filename); +/* string-list.c */ + +typedef struct _notmuch_string_node { + char *string; + struct _notmuch_string_node *next; +} notmuch_string_node_t; + +typedef struct _notmuch_string_list { + int length; + notmuch_string_node_t *head; + notmuch_string_node_t **tail; +} notmuch_string_list_t; + +notmuch_string_list_t * +_notmuch_string_list_create (const void *ctx); + +/* Add 'string' to 'list'. + * + * The list will create its own talloced copy of 'string'. + */ +void +_notmuch_string_list_append (notmuch_string_list_t *list, + const char *string); + +void +_notmuch_string_list_sort (notmuch_string_list_t *list); + /* tags.c */ notmuch_tags_t * -_notmuch_tags_create (void *ctx); - -void -_notmuch_tags_add_tag (notmuch_tags_t *tags, const char *tag); - -void -_notmuch_tags_prepare_iterator (notmuch_tags_t *tags); +_notmuch_tags_create (const void *ctx, notmuch_string_list_t *list); /* filenames.c */ -typedef struct _notmuch_filename_node { - char *filename; - struct _notmuch_filename_node *next; -} notmuch_filename_node_t; - -typedef struct _notmuch_filename_list { - notmuch_filename_node_t *head; - notmuch_filename_node_t **tail; -} notmuch_filename_list_t; - -notmuch_filename_list_t * -_notmuch_filename_list_create (const void *ctx); - -/* Add 'filename' to 'list'. - * - * The list will create its own talloced copy of 'filename'. - */ -void -_notmuch_filename_list_add_filename (notmuch_filename_list_t *list, - const char *filename); - -void -_notmuch_filename_list_destroy (notmuch_filename_list_t *list); - -/* The notmuch_filenames_t is an iterator object for a - * notmuch_filename_list_t */ +/* The notmuch_filenames_t iterates over a notmuch_string_list_t of + * file names */ notmuch_filenames_t * _notmuch_filenames_create (const void *ctx, - notmuch_filename_list_t *list); + notmuch_string_list_t *list); #pragma GCC visibility pop diff --git a/lib/string-list.c b/lib/string-list.c new file mode 100644 index 00000000..d92a0bab --- /dev/null +++ b/lib/string-list.c @@ -0,0 +1,94 @@ +/* strings.c - Iterator for a list of strings + * + * Copyright © 2010 Intel Corporation + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/ . + * + * Author: Carl Worth + */ + +#include "notmuch-private.h" + +/* Create a new notmuch_string_list_t object, with 'ctx' as its + * talloc owner. + * + * This function can return NULL in case of out-of-memory. + */ +notmuch_string_list_t * +_notmuch_string_list_create (const void *ctx) +{ + notmuch_string_list_t *list; + + list = talloc (ctx, notmuch_string_list_t); + if (unlikely (list == NULL)) + return NULL; + + list->length = 0; + list->head = NULL; + list->tail = &list->head; + + return list; +} + +void +_notmuch_string_list_append (notmuch_string_list_t *list, + const char *string) +{ + /* Create and initialize new node. */ + notmuch_string_node_t *node = talloc (list, notmuch_string_node_t); + + node->string = talloc_strdup (node, string); + node->next = NULL; + + /* Append the node to the list. */ + *(list->tail) = node; + list->tail = &node->next; + list->length++; +} + +static int +cmpnode (const void *pa, const void *pb) +{ + notmuch_string_node_t *a = *(notmuch_string_node_t * const *)pa; + notmuch_string_node_t *b = *(notmuch_string_node_t * const *)pb; + + return strcmp (a->string, b->string); +} + +void +_notmuch_string_list_sort (notmuch_string_list_t *list) +{ + notmuch_string_node_t **nodes, *node; + int i; + + if (list->length == 0) + return; + + nodes = talloc_array (list, notmuch_string_node_t *, list->length); + if (unlikely (nodes == NULL)) + INTERNAL_ERROR ("Could not allocate memory for list sort"); + + for (i = 0, node = list->head; node; i++, node = node->next) + nodes[i] = node; + + qsort (nodes, list->length, sizeof (*nodes), cmpnode); + + for (i = 0; i < list->length - 1; ++i) + nodes[i]->next = nodes[i+1]; + nodes[i]->next = NULL; + list->head = nodes[0]; + list->tail = &nodes[i]->next; + + talloc_free (nodes); +} diff --git a/lib/tags.c b/lib/tags.c index 8fe4a3f0..c58924f8 100644 --- a/lib/tags.c +++ b/lib/tags.c @@ -20,30 +20,18 @@ #include "notmuch-private.h" -#include /* GList */ - struct _notmuch_tags { - int sorted; - GList *tags; - GList *iterator; + notmuch_string_node_t *iterator; }; -/* XXX: Should write some talloc-friendly list to avoid the need for - * this. */ -static int -_notmuch_tags_destructor (notmuch_tags_t *tags) -{ - g_list_free (tags->tags); - - return 0; -} - /* Create a new notmuch_tags_t object, with 'ctx' as its talloc owner. + * The returned iterator will talloc_steal the 'list', since the list + * is almost always transient. * * This function can return NULL in case of out-of-memory. */ notmuch_tags_t * -_notmuch_tags_create (void *ctx) +_notmuch_tags_create (const void *ctx, notmuch_string_list_t *list) { notmuch_tags_t *tags; @@ -51,44 +39,12 @@ _notmuch_tags_create (void *ctx) if (unlikely (tags == NULL)) return NULL; - talloc_set_destructor (tags, _notmuch_tags_destructor); - - tags->sorted = 1; - tags->tags = NULL; - tags->iterator = NULL; + tags->iterator = list->head; + talloc_steal (tags, list); return tags; } -/* Add a new tag to 'tags'. The tags object will create its own copy - * of the string. - * - * Note: The tags object will not do anything to prevent duplicate - * tags being stored, so the caller really shouldn't pass - * duplicates. */ -void -_notmuch_tags_add_tag (notmuch_tags_t *tags, const char *tag) -{ - tags->tags = g_list_prepend (tags->tags, talloc_strdup (tags, tag)); - tags->sorted = 0; -} - -/* Prepare 'tag' for iteration. - * - * The internal creator of 'tags' should call this function before - * returning 'tags' to the user to call the public functions such as - * notmuch_tags_valid, notmuch_tags_get, and - * notmuch_tags_move_to_next. */ -void -_notmuch_tags_prepare_iterator (notmuch_tags_t *tags) -{ - if (! tags->sorted) - tags->tags = g_list_sort (tags->tags, (GCompareFunc) strcmp); - tags->sorted = 1; - - tags->iterator = tags->tags; -} - notmuch_bool_t notmuch_tags_valid (notmuch_tags_t *tags) { @@ -101,7 +57,7 @@ notmuch_tags_get (notmuch_tags_t *tags) if (tags->iterator == NULL) return NULL; - return (char *) tags->iterator->data; + return (char *) tags->iterator->string; } void diff --git a/lib/thread.cc b/lib/thread.cc index 5190a663..ace5ce7f 100644 --- a/lib/thread.cc +++ b/lib/thread.cc @@ -537,23 +537,23 @@ notmuch_thread_get_newest_date (notmuch_thread_t *thread) notmuch_tags_t * notmuch_thread_get_tags (notmuch_thread_t *thread) { - notmuch_tags_t *tags; + notmuch_string_list_t *tags; GList *keys, *l; - tags = _notmuch_tags_create (thread); + tags = _notmuch_string_list_create (thread); if (unlikely (tags == NULL)) return NULL; keys = g_hash_table_get_keys (thread->tags); for (l = keys; l; l = l->next) - _notmuch_tags_add_tag (tags, (char *) l->data); + _notmuch_string_list_append (tags, (char *) l->data); g_list_free (keys); - _notmuch_tags_prepare_iterator (tags); + _notmuch_string_list_sort (tags); - return tags; + return _notmuch_tags_create (thread, tags); } void From 206938ec9b4ddee28793f2f052a5314d6d7ab08d Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 9 Dec 2010 00:32:35 -0500 Subject: [PATCH 3/6] Add a generic function to get a list of terms with some prefix. Replace _notmuch_convert_tags with this and simplify _create_filenames_for_terms_with_prefix. This will also come in handy shortly to get the message file name list. --- lib/database-private.h | 16 +++++++--------- lib/database.cc | 37 +++++++++++++++---------------------- lib/directory.cc | 19 ++++--------------- lib/message.cc | 6 +++++- 4 files changed, 31 insertions(+), 47 deletions(-) diff --git a/lib/database-private.h b/lib/database-private.h index 140b54ed..f7050097 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -53,18 +53,16 @@ struct _notmuch_database { Xapian::ValueRangeProcessor *value_range_processor; }; -/* Convert tags from Xapian internal format to notmuch format. - * - * The function gets a TermIterator as argument and uses that iterator to find - * all tag terms in the object. The tags are then converted to a - * notmuch_tags_t list and returned. The function needs to allocate memory for - * the resulting list and it uses the argument ctx as talloc context. +/* Return the list of terms from the given iterator matching a prefix. + * The prefix will be stripped from the strings in the returned list. + * The list will be allocated using ctx as the talloc context. * * The function returns NULL on failure. */ -notmuch_tags_t * -_notmuch_convert_tags (void *ctx, Xapian::TermIterator &i, - Xapian::TermIterator &end); +notmuch_string_list_t * +_notmuch_database_get_terms_with_prefix (void *ctx, Xapian::TermIterator &i, + Xapian::TermIterator &end, + const char *prefix); #pragma GCC visibility pop diff --git a/lib/database.cc b/lib/database.cc index 9c777156..7f79cf47 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -1757,49 +1757,42 @@ notmuch_database_remove_message (notmuch_database_t *notmuch, return status; } -notmuch_tags_t * -_notmuch_convert_tags (void *ctx, Xapian::TermIterator &i, - Xapian::TermIterator &end) +notmuch_string_list_t * +_notmuch_database_get_terms_with_prefix (void *ctx, Xapian::TermIterator &i, + Xapian::TermIterator &end, + const char *prefix) { - const char *prefix = _find_prefix ("tag"); + int prefix_len = strlen (prefix); notmuch_string_list_t *list; - std::string tag; - - /* Currently this iteration is written with the assumption that - * "tag" has a single-character prefix. */ - assert (strlen (prefix) == 1); list = _notmuch_string_list_create (ctx); if (unlikely (list == NULL)) return NULL; - i.skip_to (prefix); - - while (i != end) { - tag = *i; - - if (tag.empty () || tag[0] != *prefix) + for (i.skip_to (prefix); i != end; i++) { + /* Terminate loop at first term without desired prefix. */ + if (strncmp ((*i).c_str (), prefix, prefix_len)) break; - _notmuch_string_list_append (list, tag.c_str () + 1); - - i++; + _notmuch_string_list_append (list, (*i).c_str () + prefix_len); } - _notmuch_string_list_sort (list); - - return _notmuch_tags_create (ctx, list); + return list; } notmuch_tags_t * notmuch_database_get_all_tags (notmuch_database_t *db) { Xapian::TermIterator i, end; + notmuch_string_list_t *tags; try { i = db->xapian_db->allterms_begin(); end = db->xapian_db->allterms_end(); - return _notmuch_convert_tags(db, i, end); + tags = _notmuch_database_get_terms_with_prefix (db, i, end, + _find_prefix ("tag")); + _notmuch_string_list_sort (tags); + return _notmuch_tags_create (db, tags); } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred getting tags: %s.\n", error.get_msg().c_str()); diff --git a/lib/directory.cc b/lib/directory.cc index aeee9caf..70e1693e 100644 --- a/lib/directory.cc +++ b/lib/directory.cc @@ -23,10 +23,6 @@ /* Create an iterator to iterate over the basenames of files (or * directories) that all share a common parent directory. - * - * The code here is general enough to be reused for any case of - * iterating over the non-prefixed portion of terms sharing a common - * prefix. */ static notmuch_filenames_t * _create_filenames_for_terms_with_prefix (void *ctx, @@ -35,21 +31,14 @@ _create_filenames_for_terms_with_prefix (void *ctx, { notmuch_string_list_t *filename_list; Xapian::TermIterator i, end; - int prefix_len = strlen (prefix); - filename_list = _notmuch_string_list_create (ctx); + i = notmuch->xapian_db->allterms_begin(); + end = notmuch->xapian_db->allterms_end(); + filename_list = _notmuch_database_get_terms_with_prefix (ctx, i, end, + prefix); if (unlikely (filename_list == NULL)) return NULL; - end = notmuch->xapian_db->allterms_end (prefix); - - for (i = notmuch->xapian_db->allterms_begin (prefix); i != end; i++) - { - std::string term = *i; - - _notmuch_string_list_append (filename_list, term.c_str () + prefix_len); - } - return _notmuch_filenames_create (ctx, filename_list); } diff --git a/lib/message.cc b/lib/message.cc index b87506ac..da4a102c 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -711,9 +711,13 @@ notmuch_tags_t * notmuch_message_get_tags (notmuch_message_t *message) { Xapian::TermIterator i, end; + notmuch_string_list_t *tags; i = message->doc.termlist_begin(); end = message->doc.termlist_end(); - return _notmuch_convert_tags(message, i, end); + tags = _notmuch_database_get_terms_with_prefix (message, i, end, + _find_prefix ("tag")); + _notmuch_string_list_sort (tags); + return _notmuch_tags_create (message, tags); } const char * From f271071330fed2947abfa7e9956a85a978924548 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 8 Dec 2010 22:19:55 -0500 Subject: [PATCH 4/6] Add the file name list to the unified message metadata pass. Even if the caller never uses the file names, there is little cost to simply fetching the file name terms. However, retrieving the full paths requires additional database work, so the expansion from terms to full paths is performed lazily. This also simplifies clearing the filename cache, since that's now handled by the generic metadata cache code. This further reduces my inbox search from 3.102 seconds before the unified metadata pass to 2.206 seconds (1.4X faster). --- lib/message.cc | 58 ++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/lib/message.cc b/lib/message.cc index da4a102c..43f8e700 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -32,6 +32,7 @@ struct _notmuch_message { char *message_id; char *thread_id; char *in_reply_to; + notmuch_string_list_t *filename_term_list; notmuch_string_list_t *filename_list; char *author; notmuch_message_file_t *message_file; @@ -102,6 +103,7 @@ _notmuch_message_create_for_document (const void *talloc_owner, message->message_id = NULL; message->thread_id = NULL; message->in_reply_to = NULL; + message->filename_term_list = NULL; message->filename_list = NULL; message->message_file = NULL; message->author = NULL; @@ -294,6 +296,7 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message) Xapian::TermIterator i, end; const char *thread_prefix = _find_prefix ("thread"), *id_prefix = _find_prefix ("id"), + *filename_prefix = _find_prefix ("file-direntry"), *replyto_prefix = _find_prefix ("replyto"); /* We do this all in a single pass because Xapian decompresses the @@ -316,8 +319,17 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message) message->message_id = _notmuch_message_get_term (message, i, end, id_prefix); + /* Get filename list. Here we get only the terms. We lazily + * expand them to full file names when needed in + * _notmuch_message_ensure_filename_list. */ + assert (strcmp (id_prefix, filename_prefix) < 0); + if (!message->filename_term_list && !message->filename_list) + message->filename_term_list = + _notmuch_database_get_terms_with_prefix (message, i, end, + filename_prefix); + /* Get reply to */ - assert (strcmp (id_prefix, replyto_prefix) < 0); + assert (strcmp (filename_prefix, replyto_prefix) < 0); if (!message->in_reply_to) message->in_reply_to = _notmuch_message_get_term (message, i, end, replyto_prefix); @@ -336,6 +348,12 @@ _notmuch_message_invalidate_metadata (notmuch_message_t *message, message->thread_id = NULL; } + if (strcmp ("file-direntry", prefix_name) == 0) { + talloc_free (message->filename_term_list); + talloc_free (message->filename_list); + message->filename_term_list = message->filename_list = NULL; + } + if (strcmp ("replyto", prefix_name) == 0) { talloc_free (message->in_reply_to); message->in_reply_to = NULL; @@ -439,11 +457,6 @@ _notmuch_message_add_filename (notmuch_message_t *message, if (filename == NULL) INTERNAL_ERROR ("Message filename cannot be NULL."); - if (message->filename_list) { - talloc_free (message->filename_list); - message->filename_list = NULL; - } - relative = _notmuch_database_relative_path (message->notmuch, filename); status = _notmuch_database_split_path (local, relative, &directory, NULL); @@ -491,11 +504,6 @@ _notmuch_message_remove_filename (notmuch_message_t *message, notmuch_status_t status; Xapian::TermIterator i, last; - if (message->filename_list) { - talloc_free (message->filename_list); - message->filename_list = NULL; - } - status = _notmuch_database_filename_to_direntry (local, message->notmuch, filename, &direntry); if (status) @@ -575,21 +583,18 @@ _notmuch_message_clear_data (notmuch_message_t *message) static void _notmuch_message_ensure_filename_list (notmuch_message_t *message) { - const char *prefix = _find_prefix ("file-direntry"); - int prefix_len = strlen (prefix); - Xapian::TermIterator i; + notmuch_string_node_t *node; if (message->filename_list) return; + if (!message->filename_term_list) + _notmuch_message_ensure_metadata (message); + message->filename_list = _notmuch_string_list_create (message); + node = message->filename_term_list->head; - i = message->doc.termlist_begin (); - i.skip_to (prefix); - - if (i == message->doc.termlist_end () || - strncmp ((*i).c_str (), prefix, prefix_len)) - { + if (!node) { /* A message document created by an old version of notmuch * (prior to rename support) will have the filename in the * data of the document rather than as a file-direntry term. @@ -608,19 +613,13 @@ _notmuch_message_ensure_filename_list (notmuch_message_t *message) return; } - for (; i != message->doc.termlist_end (); i++) { + for (; node; node = node->next) { void *local = talloc_new (message); const char *db_path, *directory, *basename, *filename; char *colon, *direntry = NULL; unsigned int directory_id; - /* Terminate loop at first term without desired prefix. */ - if (strncmp ((*i).c_str (), prefix, prefix_len)) - break; - - direntry = talloc_strdup (local, (*i).c_str ()); - - direntry += prefix_len; + direntry = node->string; directory_id = strtol (direntry, &colon, 10); @@ -648,6 +647,9 @@ _notmuch_message_ensure_filename_list (notmuch_message_t *message) talloc_free (local); } + + talloc_free (message->filename_term_list); + message->filename_term_list = NULL; } const char * From d19c5de17a606e08860a5de951c780038dec2f89 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 9 Dec 2010 01:54:34 -0500 Subject: [PATCH 5/6] Add the tag list to the unified message metadata pass. Now each caller of notmuch_message_get_tags only gets a new iterator, instead of a whole new list. In principle this could cause problems with iterating while modifying tags, but through the magic of talloc references, we keep the old tag list alive even after the cache in the message object is invalidated. This reduces my index search from the 3.102 seconds before the unified metadata pass to 1.811 seconds (1.7X faster). Combined with the thread search optimization in b3caef1f0659dac8183441357c8fee500a940889, that makes this query 2.5X faster than when I started. --- lib/message.cc | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/lib/message.cc b/lib/message.cc index 43f8e700..ecda75af 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -32,6 +32,7 @@ struct _notmuch_message { char *message_id; char *thread_id; char *in_reply_to; + notmuch_string_list_t *tag_list; notmuch_string_list_t *filename_term_list; notmuch_string_list_t *filename_list; char *author; @@ -103,6 +104,7 @@ _notmuch_message_create_for_document (const void *talloc_owner, message->message_id = NULL; message->thread_id = NULL; message->in_reply_to = NULL; + message->tag_list = NULL; message->filename_term_list = NULL; message->filename_list = NULL; message->message_file = NULL; @@ -295,6 +297,7 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message) { Xapian::TermIterator i, end; const char *thread_prefix = _find_prefix ("thread"), + *tag_prefix = _find_prefix ("tag"), *id_prefix = _find_prefix ("id"), *filename_prefix = _find_prefix ("file-direntry"), *replyto_prefix = _find_prefix ("replyto"); @@ -313,8 +316,17 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message) message->thread_id = _notmuch_message_get_term (message, i, end, thread_prefix); + /* Get tags */ + assert (strcmp (thread_prefix, tag_prefix) < 0); + if (!message->tag_list) { + message->tag_list = + _notmuch_database_get_terms_with_prefix (message, i, end, + tag_prefix); + _notmuch_string_list_sort (message->tag_list); + } + /* Get id */ - assert (strcmp (thread_prefix, id_prefix) < 0); + assert (strcmp (tag_prefix, id_prefix) < 0); if (!message->message_id) message->message_id = _notmuch_message_get_term (message, i, end, id_prefix); @@ -348,6 +360,11 @@ _notmuch_message_invalidate_metadata (notmuch_message_t *message, message->thread_id = NULL; } + if (strcmp ("tag", prefix_name) == 0) { + talloc_unlink (message, message->tag_list); + message->tag_list = NULL; + } + if (strcmp ("file-direntry", prefix_name) == 0) { talloc_free (message->filename_term_list); talloc_free (message->filename_list); @@ -712,14 +729,20 @@ notmuch_message_get_date (notmuch_message_t *message) notmuch_tags_t * notmuch_message_get_tags (notmuch_message_t *message) { - Xapian::TermIterator i, end; - notmuch_string_list_t *tags; - i = message->doc.termlist_begin(); - end = message->doc.termlist_end(); - tags = _notmuch_database_get_terms_with_prefix (message, i, end, - _find_prefix ("tag")); - _notmuch_string_list_sort (tags); - return _notmuch_tags_create (message, tags); + notmuch_tags_t *tags; + + if (!message->tag_list) + _notmuch_message_ensure_metadata (message); + + tags = _notmuch_tags_create (message, message->tag_list); + /* _notmuch_tags_create steals the reference to the tag_list, but + * in this case it's still used by the message, so we add an + * *additional* talloc reference to the list. As a result, it's + * possible to modify the message tags (which talloc_unlink's the + * current list from the message) while still iterating because + * the iterator will keep the current list alive. */ + talloc_reference (message, message->tag_list); + return tags; } const char * @@ -1287,6 +1310,7 @@ notmuch_message_remove_all_tags (notmuch_message_t *message) if (! message->frozen) _notmuch_message_sync (message); + talloc_free (tags); return NOTMUCH_STATUS_SUCCESS; } From b599bbe6721f6320c732ec7187f798ce60781887 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Mon, 21 Mar 2011 02:33:55 -0400 Subject: [PATCH 6/6] Fixup string list author --- lib/string-list.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/string-list.c b/lib/string-list.c index d92a0bab..da72746d 100644 --- a/lib/string-list.c +++ b/lib/string-list.c @@ -16,6 +16,7 @@ * along with this program. If not, see http://www.gnu.org/licenses/ . * * Author: Carl Worth + * Austin Clements */ #include "notmuch-private.h"