tag: Automatically limit to messages whose tags will actually change.

This optimizes the user's tagging query to exclude messages that won't
be affected by the tagging operation, saving computation and IO for
redundant tagging operations.

For example,
  notmuch tag +notmuch to:notmuch@notmuchmail.org
will now use the query
  ( to:notmuch@notmuchmail.org ) and (not tag:"notmuch")

In the past, we've often suggested that people do this exact
transformation by hand for slow tagging operations.  This makes that
unnecessary.
This commit is contained in:
Austin Clements 2011-11-09 08:44:35 -05:00 committed by David Bremner
parent 6cb61729d4
commit da67bf12ce
2 changed files with 98 additions and 0 deletions

13
NEWS
View file

@ -1,3 +1,16 @@
Notmuch 0.11 (201x-xx-xx)
=========================
Performance
-----------
Automatic tag query optimization
"notmuch tag" now automatically optimizes the user's query to
exclude messages whose tags won't change. In the past, we've
suggested that people do this by hand; this is no longer necessary.
Notmuch 0.10.1 (2011-11-25)
===========================

View file

@ -30,6 +30,81 @@ handle_sigint (unused (int sig))
interrupted = 1;
}
static char *
_escape_tag (char *buf, const char *tag)
{
const char *in = tag;
char *out = buf;
/* Boolean terms surrounded by double quotes can contain any
* character. Double quotes are quoted by doubling them. */
*out++ = '"';
while (*in) {
if (*in == '"')
*out++ = '"';
*out++ = *in++;
}
*out++ = '"';
*out = 0;
return buf;
}
static char *
_optimize_tag_query (void *ctx, const char *orig_query_string, char *argv[],
int *add_tags, int add_tags_count,
int *remove_tags, int remove_tags_count)
{
/* This is subtler than it looks. Xapian ignores the '-' operator
* at the beginning both queries and parenthesized groups and,
* furthermore, the presence of a '-' operator at the beginning of
* a group can inhibit parsing of the previous operator. Hence,
* the user-provided query MUST appear first, but it is safe to
* parenthesize and the exclusion part of the query must not use
* the '-' operator (though the NOT operator is fine). */
char *escaped, *query_string;
const char *join = "";
int i;
unsigned int max_tag_len = 0;
/* Allocate a buffer for escaping tags. This is large enough to
* hold a fully escaped tag with every character doubled plus
* enclosing quotes and a NUL. */
for (i = 0; i < add_tags_count; i++)
if (strlen (argv[add_tags[i]] + 1) > max_tag_len)
max_tag_len = strlen (argv[add_tags[i]] + 1);
for (i = 0; i < remove_tags_count; i++)
if (strlen (argv[remove_tags[i]] + 1) > max_tag_len)
max_tag_len = strlen (argv[remove_tags[i]] + 1);
escaped = talloc_array(ctx, char, max_tag_len * 2 + 3);
if (!escaped)
return NULL;
/* Build the new query string */
if (strcmp (orig_query_string, "*") == 0)
query_string = talloc_strdup (ctx, "(");
else
query_string = talloc_asprintf (ctx, "( %s ) and (", orig_query_string);
for (i = 0; i < add_tags_count && query_string; i++) {
query_string = talloc_asprintf_append_buffer (
query_string, "%snot tag:%s", join,
_escape_tag (escaped, argv[add_tags[i]] + 1));
join = " or ";
}
for (i = 0; i < remove_tags_count && query_string; i++) {
query_string = talloc_asprintf_append_buffer (
query_string, "%stag:%s", join,
_escape_tag (escaped, argv[remove_tags[i]] + 1));
join = " or ";
}
if (query_string)
query_string = talloc_strdup_append_buffer (query_string, ")");
talloc_free (escaped);
return query_string;
}
int
notmuch_tag_command (void *ctx, unused (int argc), unused (char *argv[]))
{
@ -93,6 +168,16 @@ notmuch_tag_command (void *ctx, unused (int argc), unused (char *argv[]))
return 1;
}
/* Optimize the query so it excludes messages that already have
* the specified set of tags. */
query_string = _optimize_tag_query (ctx, query_string, argv,
add_tags, add_tags_count,
remove_tags, remove_tags_count);
if (query_string == NULL) {
fprintf (stderr, "Out of memory.\n");
return 1;
}
config = notmuch_config_open (ctx, NULL, NULL);
if (config == NULL)
return 1;