mirror of
https://git.notmuchmail.org/git/notmuch
synced 2025-01-07 09:11:43 +01:00
bcb695a716
Add strcmp_null, a strcmp that handles NULL strings; in strcmp terms a NULL string is considered to be less than a non-NULL string.
257 lines
5.5 KiB
C
257 lines
5.5 KiB
C
/* string-util.c - Extra or enhanced routines for null terminated strings.
|
|
*
|
|
* Copyright (c) 2012 Jani Nikula
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see http://www.gnu.org/licenses/ .
|
|
*
|
|
* Author: Jani Nikula <jani@nikula.org>
|
|
*/
|
|
|
|
|
|
#include "string-util.h"
|
|
#include "talloc.h"
|
|
|
|
#include <ctype.h>
|
|
#include <errno.h>
|
|
|
|
char *
|
|
strtok_len (char *s, const char *delim, size_t *len)
|
|
{
|
|
/* skip initial delims */
|
|
s += strspn (s, delim);
|
|
|
|
/* length of token */
|
|
*len = strcspn (s, delim);
|
|
|
|
return *len ? s : NULL;
|
|
}
|
|
|
|
const char *
|
|
strtok_len_c (const char *s, const char *delim, size_t *len)
|
|
{
|
|
/* strtok_len is already const-safe, but we can't express both
|
|
* versions in the C type system. */
|
|
return strtok_len ((char*)s, delim, len);
|
|
}
|
|
|
|
char *
|
|
sanitize_string (const void *ctx, const char *str)
|
|
{
|
|
char *out, *loop;
|
|
|
|
if (! str)
|
|
return NULL;
|
|
|
|
out = talloc_strdup (ctx, str);
|
|
if (! out)
|
|
return NULL;
|
|
|
|
for (loop = out; *loop; loop++) {
|
|
if (*loop == '\t' || *loop == '\n')
|
|
*loop = ' ';
|
|
else if ((unsigned char)(*loop) < 32)
|
|
*loop = '?';
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
static int
|
|
is_unquoted_terminator (unsigned char c)
|
|
{
|
|
return c == 0 || c <= ' ' || c == ')';
|
|
}
|
|
|
|
int
|
|
make_boolean_term (void *ctx, const char *prefix, const char *term,
|
|
char **buf, size_t *len)
|
|
{
|
|
const char *in;
|
|
char *out;
|
|
size_t needed = 3;
|
|
int need_quoting = 0;
|
|
|
|
/* Do we need quoting? To be paranoid, we quote anything
|
|
* containing a quote or '(', even though these only matter at the
|
|
* beginning, and anything containing non-ASCII text. */
|
|
if (! term[0])
|
|
need_quoting = 1;
|
|
for (in = term; *in && !need_quoting; in++)
|
|
if (is_unquoted_terminator (*in) || *in == '"' || *in == '('
|
|
|| (unsigned char)*in > 127)
|
|
need_quoting = 1;
|
|
|
|
if (need_quoting)
|
|
for (in = term; *in; in++)
|
|
needed += (*in == '"') ? 2 : 1;
|
|
else
|
|
needed = strlen (term) + 1;
|
|
|
|
/* Reserve space for the prefix */
|
|
if (prefix)
|
|
needed += strlen (prefix) + 1;
|
|
|
|
if ((*buf == NULL) || (needed > *len)) {
|
|
*len = 2 * needed;
|
|
*buf = talloc_realloc (ctx, *buf, char, *len);
|
|
}
|
|
|
|
if (! *buf) {
|
|
errno = ENOMEM;
|
|
return -1;
|
|
}
|
|
|
|
out = *buf;
|
|
|
|
/* Copy in the prefix */
|
|
if (prefix) {
|
|
strcpy (out, prefix);
|
|
out += strlen (prefix);
|
|
*out++ = ':';
|
|
}
|
|
|
|
if (! need_quoting) {
|
|
strcpy (out, term);
|
|
return 0;
|
|
}
|
|
|
|
/* Quote term by enclosing it in double quotes and doubling any
|
|
* internal double quotes. */
|
|
*out++ = '"';
|
|
in = term;
|
|
while (*in) {
|
|
if (*in == '"')
|
|
*out++ = '"';
|
|
*out++ = *in++;
|
|
}
|
|
*out++ = '"';
|
|
*out = '\0';
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const char*
|
|
skip_space (const char *str)
|
|
{
|
|
while (*str && isspace ((unsigned char) *str))
|
|
++str;
|
|
return str;
|
|
}
|
|
|
|
int
|
|
parse_boolean_term (void *ctx, const char *str,
|
|
char **prefix_out, char **term_out)
|
|
{
|
|
int err = EINVAL;
|
|
*prefix_out = *term_out = NULL;
|
|
|
|
/* Parse prefix */
|
|
str = skip_space (str);
|
|
const char *pos = strchr (str, ':');
|
|
if (! pos || pos == str)
|
|
goto FAIL;
|
|
*prefix_out = talloc_strndup (ctx, str, pos - str);
|
|
if (! *prefix_out) {
|
|
err = ENOMEM;
|
|
goto FAIL;
|
|
}
|
|
++pos;
|
|
|
|
/* Implement de-quoting compatible with make_boolean_term. */
|
|
if (*pos == '"') {
|
|
char *out = talloc_array (ctx, char, strlen (pos));
|
|
int closed = 0;
|
|
if (! out) {
|
|
err = ENOMEM;
|
|
goto FAIL;
|
|
}
|
|
*term_out = out;
|
|
/* Skip the opening quote, find the closing quote, and
|
|
* un-double doubled internal quotes. */
|
|
for (++pos; *pos; ) {
|
|
if (*pos == '"') {
|
|
++pos;
|
|
if (*pos != '"') {
|
|
/* Found the closing quote. */
|
|
closed = 1;
|
|
pos = skip_space (pos);
|
|
break;
|
|
}
|
|
}
|
|
*out++ = *pos++;
|
|
}
|
|
/* Did the term terminate without a closing quote or is there
|
|
* trailing text after the closing quote? */
|
|
if (!closed || *pos)
|
|
goto FAIL;
|
|
*out = '\0';
|
|
} else {
|
|
const char *start = pos;
|
|
/* Check for text after the boolean term. */
|
|
while (! is_unquoted_terminator (*pos))
|
|
++pos;
|
|
if (*skip_space (pos)) {
|
|
err = EINVAL;
|
|
goto FAIL;
|
|
}
|
|
/* No trailing text; dup the string so the caller can free
|
|
* it. */
|
|
*term_out = talloc_strndup (ctx, start, pos - start);
|
|
if (! *term_out) {
|
|
err = ENOMEM;
|
|
goto FAIL;
|
|
}
|
|
}
|
|
return 0;
|
|
|
|
FAIL:
|
|
talloc_free (*prefix_out);
|
|
talloc_free (*term_out);
|
|
errno = err;
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
strcmp_null (const char *s1, const char *s2)
|
|
{
|
|
if (s1 && s2)
|
|
return strcmp (s1, s2);
|
|
else if (! s1 && ! s2)
|
|
return 0;
|
|
else if (s1)
|
|
return 1; /* s1 (non-NULL) is greater than s2 (NULL) */
|
|
else
|
|
return -1; /* s1 (NULL) is less than s2 (non-NULL) */
|
|
}
|
|
|
|
int
|
|
strcase_equal (const void *a, const void *b)
|
|
{
|
|
return strcasecmp (a, b) == 0;
|
|
}
|
|
|
|
unsigned int
|
|
strcase_hash (const void *ptr)
|
|
{
|
|
const char *s = ptr;
|
|
|
|
/* This is the djb2 hash. */
|
|
unsigned int hash = 5381;
|
|
while (s && *s) {
|
|
hash = ((hash << 5) + hash) + tolower (*s);
|
|
s++;
|
|
}
|
|
|
|
return hash;
|
|
}
|