mirror of
https://git.notmuchmail.org/git/notmuch
synced 2024-11-21 10:28:09 +01:00
util: add unicode_word_utf8
This originally use Xapian::Unicode::is_wordchar, but that forces clients to link directly to libxapian, which seems like it might be busywork if nothing else.
This commit is contained in:
parent
46ab6013a2
commit
781125c9e9
3 changed files with 57 additions and 1 deletions
|
@ -5,7 +5,8 @@ extra_cflags += -I$(srcdir)/$(dir)
|
|||
|
||||
libnotmuch_util_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \
|
||||
$(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c \
|
||||
$(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c
|
||||
$(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c \
|
||||
$(dir)/unicode-util.c
|
||||
|
||||
libnotmuch_util_modules := $(libnotmuch_util_c_srcs:.c=.o)
|
||||
|
||||
|
|
43
util/unicode-util.c
Normal file
43
util/unicode-util.c
Normal file
|
@ -0,0 +1,43 @@
|
|||
#include "unicode-util.h"
|
||||
|
||||
/* Based on Xapian::Unicode::is_wordchar, to avoid forcing clients to
|
||||
link directly to libxapian.
|
||||
*/
|
||||
|
||||
static bool
|
||||
unicode_is_wordchar (notmuch_unichar ch)
|
||||
{
|
||||
switch (g_unichar_type (ch)) {
|
||||
case G_UNICODE_UPPERCASE_LETTER:
|
||||
case G_UNICODE_LOWERCASE_LETTER:
|
||||
case G_UNICODE_TITLECASE_LETTER:
|
||||
case G_UNICODE_MODIFIER_LETTER:
|
||||
case G_UNICODE_OTHER_LETTER:
|
||||
case G_UNICODE_NON_SPACING_MARK:
|
||||
case G_UNICODE_ENCLOSING_MARK:
|
||||
case G_UNICODE_SPACING_MARK:
|
||||
case G_UNICODE_DECIMAL_NUMBER:
|
||||
case G_UNICODE_LETTER_NUMBER:
|
||||
case G_UNICODE_OTHER_NUMBER:
|
||||
case G_UNICODE_CONNECT_PUNCTUATION:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
unicode_word_utf8 (const char *utf8_str)
|
||||
{
|
||||
gunichar *decoded = g_utf8_to_ucs4_fast (utf8_str, -1, NULL);
|
||||
const gunichar *p = decoded;
|
||||
bool ret;
|
||||
|
||||
while (*p && unicode_is_wordchar (*p))
|
||||
p++;
|
||||
|
||||
ret = (*p == '\0');
|
||||
|
||||
g_free (decoded);
|
||||
return ret;
|
||||
}
|
12
util/unicode-util.h
Normal file
12
util/unicode-util.h
Normal file
|
@ -0,0 +1,12 @@
|
|||
#ifndef UNICODE_UTIL_H
|
||||
#define UNICODE_UTIL_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <gmodule.h>
|
||||
|
||||
/* The utf8 encoded string would tokenize as a single word, according
|
||||
* to xapian. */
|
||||
bool unicode_word_utf8 (const char *str);
|
||||
typedef gunichar notmuch_unichar;
|
||||
|
||||
#endif
|
Loading…
Reference in a new issue