mirror of
https://git.notmuchmail.org/git/notmuch
synced 2024-11-21 18:38:08 +01:00
util: Function to parse boolean term queries
This parses the subset of Xapian's boolean term quoting rules that are used by make_boolean_term. This is provided as a generic string utility, but will be used shortly in notmuch restore to parse and optimize for ID queries.
This commit is contained in:
parent
25cf5f5dc4
commit
7c3a995d6b
2 changed files with 98 additions and 0 deletions
|
@ -22,6 +22,7 @@
|
||||||
#include "string-util.h"
|
#include "string-util.h"
|
||||||
#include "talloc.h"
|
#include "talloc.h"
|
||||||
|
|
||||||
|
#include <ctype.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
char *
|
char *
|
||||||
|
@ -107,3 +108,84 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char*
|
||||||
|
skip_space (const char *str)
|
||||||
|
{
|
||||||
|
while (*str && isspace ((unsigned char) *str))
|
||||||
|
++str;
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
parse_boolean_term (void *ctx, const char *str,
|
||||||
|
char **prefix_out, char **term_out)
|
||||||
|
{
|
||||||
|
int err = EINVAL;
|
||||||
|
*prefix_out = *term_out = NULL;
|
||||||
|
|
||||||
|
/* Parse prefix */
|
||||||
|
str = skip_space (str);
|
||||||
|
const char *pos = strchr (str, ':');
|
||||||
|
if (! pos)
|
||||||
|
goto FAIL;
|
||||||
|
*prefix_out = talloc_strndup (ctx, str, pos - str);
|
||||||
|
if (! *prefix_out) {
|
||||||
|
err = ENOMEM;
|
||||||
|
goto FAIL;
|
||||||
|
}
|
||||||
|
++pos;
|
||||||
|
|
||||||
|
/* Implement de-quoting compatible with make_boolean_term. */
|
||||||
|
if (*pos == '"') {
|
||||||
|
char *out = talloc_array (ctx, char, strlen (pos));
|
||||||
|
int closed = 0;
|
||||||
|
if (! out) {
|
||||||
|
err = ENOMEM;
|
||||||
|
goto FAIL;
|
||||||
|
}
|
||||||
|
*term_out = out;
|
||||||
|
/* Skip the opening quote, find the closing quote, and
|
||||||
|
* un-double doubled internal quotes. */
|
||||||
|
for (++pos; *pos; ) {
|
||||||
|
if (*pos == '"') {
|
||||||
|
++pos;
|
||||||
|
if (*pos != '"') {
|
||||||
|
/* Found the closing quote. */
|
||||||
|
closed = 1;
|
||||||
|
pos = skip_space (pos);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*out++ = *pos++;
|
||||||
|
}
|
||||||
|
/* Did the term terminate without a closing quote or is there
|
||||||
|
* trailing text after the closing quote? */
|
||||||
|
if (!closed || *pos)
|
||||||
|
goto FAIL;
|
||||||
|
*out = '\0';
|
||||||
|
} else {
|
||||||
|
const char *start = pos;
|
||||||
|
/* Check for text after the boolean term. */
|
||||||
|
while (! is_unquoted_terminator (*pos))
|
||||||
|
++pos;
|
||||||
|
if (*skip_space (pos)) {
|
||||||
|
err = EINVAL;
|
||||||
|
goto FAIL;
|
||||||
|
}
|
||||||
|
/* No trailing text; dup the string so the caller can free
|
||||||
|
* it. */
|
||||||
|
*term_out = talloc_strndup (ctx, start, pos - start);
|
||||||
|
if (! *term_out) {
|
||||||
|
err = ENOMEM;
|
||||||
|
goto FAIL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
FAIL:
|
||||||
|
talloc_free (*prefix_out);
|
||||||
|
talloc_free (*term_out);
|
||||||
|
errno = err;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
|
@ -34,4 +34,20 @@ char *strtok_len (char *s, const char *delim, size_t *len);
|
||||||
int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
|
int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
|
||||||
char **buf, size_t *len);
|
char **buf, size_t *len);
|
||||||
|
|
||||||
|
/* Parse a boolean term query consisting of a prefix, a colon, and a
|
||||||
|
* term that may be quoted as described for make_boolean_term. If the
|
||||||
|
* term is not quoted, then it ends at the first whitespace or close
|
||||||
|
* parenthesis. str may containing leading or trailing whitespace,
|
||||||
|
* but anything else is considered a parse error. This is compatible
|
||||||
|
* with anything produced by make_boolean_term, and supports a subset
|
||||||
|
* of the quoting styles supported by Xapian (and hence notmuch).
|
||||||
|
* *prefix_out and *term_out will be talloc'd with context ctx.
|
||||||
|
*
|
||||||
|
* Return: 0 on success, -1 on error. errno will be set to EINVAL if
|
||||||
|
* there is a parse error or ENOMEM if there is an allocation failure.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
parse_boolean_term (void *ctx, const char *str,
|
||||||
|
char **prefix_out, char **term_out);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue