From 3f5809bf28becbddfed9ff33d6f1242346904c23 Mon Sep 17 00:00:00 2001 From: David Bremner Date: Thu, 5 Jan 2023 20:02:05 -0400 Subject: [PATCH] lib: parse index.as_text We pre-parse into a list of compiled regular expressions to avoid calling regexc on the hot (indexing) path. As explained in the code comment, this cannot be done lazily with reasonable error reporting, at least not without touching a lot of the code in index.cc. --- lib/database-private.h | 4 ++++ lib/open.cc | 53 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/lib/database-private.h b/lib/database-private.h index b9be4e22..61232f1a 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -291,6 +291,10 @@ struct _notmuch_database { /* Track what parameters were specified when opening */ notmuch_open_param_t params; + + /* list of regular expressions to check for text indexing */ + regex_t *index_as_text; + size_t index_as_text_length; }; /* Prior to database version 3, features were implied by the database diff --git a/lib/open.cc b/lib/open.cc index 67ff868c..54d1faf3 100644 --- a/lib/open.cc +++ b/lib/open.cc @@ -320,6 +320,8 @@ _alloc_notmuch (const char *database_path, const char *config_path, const char * notmuch->transaction_count = 0; notmuch->transaction_threshold = 0; notmuch->view = 1; + notmuch->index_as_text = NULL; + notmuch->index_as_text_length = 0; notmuch->params = NOTMUCH_PARAM_NONE; if (database_path) @@ -427,6 +429,53 @@ _load_database_state (notmuch_database_t *notmuch) notmuch, notmuch->xapian_db->get_uuid ().c_str ()); } +/* XXX This should really be done lazily, but the error reporting path in the indexing code + * would need to be redone to report any errors. + */ +notmuch_status_t +_ensure_index_as_text (notmuch_database_t *notmuch, char **message) +{ + int nregex = 0; + regex_t *regexv = NULL; + + if (notmuch->index_as_text) + return NOTMUCH_STATUS_SUCCESS; + + for (notmuch_config_values_t *list = notmuch_config_get_values (notmuch, + NOTMUCH_CONFIG_INDEX_AS_TEXT); + notmuch_config_values_valid (list); + notmuch_config_values_move_to_next (list)) { + regex_t *new_regex; + int rerr; + const char *str = notmuch_config_values_get (list); + size_t len = strlen (str); + + /* str must be non-empty, because n_c_get_values skips empty + * strings */ + assert (len > 0); + + regexv = talloc_realloc (notmuch, regexv, regex_t, nregex + 1); + new_regex = ®exv[nregex]; + + rerr = regcomp (new_regex, str, REG_EXTENDED | REG_NOSUB); + if (rerr) { + size_t error_size = regerror (rerr, new_regex, NULL, 0); + char *error = (char *) talloc_size (str, error_size); + + regerror (rerr, new_regex, error, error_size); + IGNORE_RESULT (asprintf (message, "Error in index.as_text: %s: %s\n", error, str)); + + return NOTMUCH_STATUS_ILLEGAL_ARGUMENT; + } + nregex++; + } + + notmuch->index_as_text = regexv; + notmuch->index_as_text_length = nregex; + + return NOTMUCH_STATUS_SUCCESS; +} + static notmuch_status_t _finish_open (notmuch_database_t *notmuch, const char *profile, @@ -531,6 +580,10 @@ _finish_open (notmuch_database_t *notmuch, if (status) goto DONE; + status = _ensure_index_as_text (notmuch, &message); + if (status) + goto DONE; + autocommit_str = notmuch_config_get (notmuch, NOTMUCH_CONFIG_AUTOCOMMIT); if (unlikely (! autocommit_str)) { INTERNAL_ERROR ("missing configuration for autocommit");