diff --git a/notmuch-tag.c b/notmuch-tag.c index 88d559be..fc9d43a7 100644 --- a/notmuch-tag.c +++ b/notmuch-tag.c @@ -19,6 +19,7 @@ */ #include "notmuch-client.h" +#include "string-util.h" static volatile sig_atomic_t interrupted; @@ -35,25 +36,6 @@ handle_sigint (unused (int sig)) interrupted = 1; } -static char * -_escape_tag (char *buf, const char *tag) -{ - const char *in = tag; - char *out = buf; - - /* Boolean terms surrounded by double quotes can contain any - * character. Double quotes are quoted by doubling them. */ - *out++ = '"'; - while (*in) { - if (*in == '"') - *out++ = '"'; - *out++ = *in++; - } - *out++ = '"'; - *out = 0; - return buf; -} - typedef struct { const char *tag; notmuch_bool_t remove; @@ -71,25 +53,16 @@ _optimize_tag_query (void *ctx, const char *orig_query_string, * parenthesize and the exclusion part of the query must not use * the '-' operator (though the NOT operator is fine). */ - char *escaped, *query_string; + char *escaped = NULL; + size_t escaped_len = 0; + char *query_string; const char *join = ""; - int i; - unsigned int max_tag_len = 0; + size_t i; /* Don't optimize if there are no tag changes. */ if (tag_ops[0].tag == NULL) return talloc_strdup (ctx, orig_query_string); - /* Allocate a buffer for escaping tags. This is large enough to - * hold a fully escaped tag with every character doubled plus - * enclosing quotes and a NUL. */ - for (i = 0; tag_ops[i].tag; i++) - if (strlen (tag_ops[i].tag) > max_tag_len) - max_tag_len = strlen (tag_ops[i].tag); - escaped = talloc_array (ctx, char, max_tag_len * 2 + 3); - if (! escaped) - return NULL; - /* Build the new query string */ if (strcmp (orig_query_string, "*") == 0) query_string = talloc_strdup (ctx, "("); @@ -97,10 +70,17 @@ _optimize_tag_query (void *ctx, const char *orig_query_string, query_string = talloc_asprintf (ctx, "( %s ) and (", orig_query_string); for (i = 0; tag_ops[i].tag && query_string; i++) { + /* XXX in case of OOM, query_string will be deallocated when + * ctx is, which might be at shutdown */ + if (make_boolean_term (ctx, + "tag", tag_ops[i].tag, + &escaped, &escaped_len)) + return NULL; + query_string = talloc_asprintf_append_buffer ( - query_string, "%s%stag:%s", join, + query_string, "%s%s%s", join, tag_ops[i].remove ? "" : "not ", - _escape_tag (escaped, tag_ops[i].tag)); + escaped); join = " or "; } diff --git a/util/string-util.c b/util/string-util.c index 44f8cd3a..7a71049a 100644 --- a/util/string-util.c +++ b/util/string-util.c @@ -20,6 +20,9 @@ #include "string-util.h" +#include "talloc.h" + +#include char * strtok_len (char *s, const char *delim, size_t *len) @@ -32,3 +35,75 @@ strtok_len (char *s, const char *delim, size_t *len) return *len ? s : NULL; } + +static int +is_unquoted_terminator (unsigned char c) +{ + return c == 0 || c <= ' ' || c == ')'; +} + +int +make_boolean_term (void *ctx, const char *prefix, const char *term, + char **buf, size_t *len) +{ + const char *in; + char *out; + size_t needed = 3; + int need_quoting = 0; + + /* Do we need quoting? To be paranoid, we quote anything + * containing a quote, even though it only matters at the + * beginning, and anything containing non-ASCII text. */ + for (in = term; *in && !need_quoting; in++) + if (is_unquoted_terminator (*in) || *in == '"' + || (unsigned char)*in > 127) + need_quoting = 1; + + if (need_quoting) + for (in = term; *in; in++) + needed += (*in == '"') ? 2 : 1; + else + needed = strlen (term) + 1; + + /* Reserve space for the prefix */ + if (prefix) + needed += strlen (prefix) + 1; + + if ((*buf == NULL) || (needed > *len)) { + *len = 2 * needed; + *buf = talloc_realloc (ctx, *buf, char, *len); + } + + if (! *buf) { + errno = ENOMEM; + return -1; + } + + out = *buf; + + /* Copy in the prefix */ + if (prefix) { + strcpy (out, prefix); + out += strlen (prefix); + *out++ = ':'; + } + + if (! need_quoting) { + strcpy (out, term); + return 0; + } + + /* Quote term by enclosing it in double quotes and doubling any + * internal double quotes. */ + *out++ = '"'; + in = term; + while (*in) { + if (*in == '"') + *out++ = '"'; + *out++ = *in++; + } + *out++ = '"'; + *out = '\0'; + + return 0; +} diff --git a/util/string-util.h b/util/string-util.h index ac7676c8..719c276a 100644 --- a/util/string-util.h +++ b/util/string-util.h @@ -19,4 +19,19 @@ char *strtok_len (char *s, const char *delim, size_t *len); +/* Construct a boolean term query with the specified prefix (e.g., + * "id") and search term, quoting term as necessary. Specifically, if + * term contains any non-printable ASCII characters, non-ASCII + * characters, close parenthesis or double quotes, it will be enclosed + * in double quotes and any internal double quotes will be doubled + * (e.g. a"b -> "a""b"). The result will be a valid notmuch query and + * can be parsed by parse_boolean_term. + * + * Output is into buf; it may be talloc_realloced. + * Return: 0 on success, -1 on error. errno will be set to ENOMEM if + * there is an allocation failure. + */ +int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term, + char **buf, size_t *len); + #endif