util: Function to parse boolean term queries

This parses the subset of Xapian's boolean term quoting rules that are
used by make_boolean_term.  This is provided as a generic string
utility, but will be used shortly in notmuch restore to parse and
optimize for ID queries.
This commit is contained in:
Austin Clements 2013-01-06 15:22:39 -05:00 committed by David Bremner
parent 25cf5f5dc4
commit 7c3a995d6b
2 changed files with 98 additions and 0 deletions

View file

@ -22,6 +22,7 @@
#include "string-util.h" #include "string-util.h"
#include "talloc.h" #include "talloc.h"
#include <ctype.h>
#include <errno.h> #include <errno.h>
char * char *
@ -107,3 +108,84 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
return 0; return 0;
} }
static const char*
skip_space (const char *str)
{
while (*str && isspace ((unsigned char) *str))
++str;
return str;
}
int
parse_boolean_term (void *ctx, const char *str,
char **prefix_out, char **term_out)
{
int err = EINVAL;
*prefix_out = *term_out = NULL;
/* Parse prefix */
str = skip_space (str);
const char *pos = strchr (str, ':');
if (! pos)
goto FAIL;
*prefix_out = talloc_strndup (ctx, str, pos - str);
if (! *prefix_out) {
err = ENOMEM;
goto FAIL;
}
++pos;
/* Implement de-quoting compatible with make_boolean_term. */
if (*pos == '"') {
char *out = talloc_array (ctx, char, strlen (pos));
int closed = 0;
if (! out) {
err = ENOMEM;
goto FAIL;
}
*term_out = out;
/* Skip the opening quote, find the closing quote, and
* un-double doubled internal quotes. */
for (++pos; *pos; ) {
if (*pos == '"') {
++pos;
if (*pos != '"') {
/* Found the closing quote. */
closed = 1;
pos = skip_space (pos);
break;
}
}
*out++ = *pos++;
}
/* Did the term terminate without a closing quote or is there
* trailing text after the closing quote? */
if (!closed || *pos)
goto FAIL;
*out = '\0';
} else {
const char *start = pos;
/* Check for text after the boolean term. */
while (! is_unquoted_terminator (*pos))
++pos;
if (*skip_space (pos)) {
err = EINVAL;
goto FAIL;
}
/* No trailing text; dup the string so the caller can free
* it. */
*term_out = talloc_strndup (ctx, start, pos - start);
if (! *term_out) {
err = ENOMEM;
goto FAIL;
}
}
return 0;
FAIL:
talloc_free (*prefix_out);
talloc_free (*term_out);
errno = err;
return -1;
}

View file

@ -34,4 +34,20 @@ char *strtok_len (char *s, const char *delim, size_t *len);
int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term, int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
char **buf, size_t *len); char **buf, size_t *len);
/* Parse a boolean term query consisting of a prefix, a colon, and a
* term that may be quoted as described for make_boolean_term. If the
* term is not quoted, then it ends at the first whitespace or close
* parenthesis. str may containing leading or trailing whitespace,
* but anything else is considered a parse error. This is compatible
* with anything produced by make_boolean_term, and supports a subset
* of the quoting styles supported by Xapian (and hence notmuch).
* *prefix_out and *term_out will be talloc'd with context ctx.
*
* Return: 0 on success, -1 on error. errno will be set to EINVAL if
* there is a parse error or ENOMEM if there is an allocation failure.
*/
int
parse_boolean_term (void *ctx, const char *str,
char **prefix_out, char **term_out);
#endif #endif