From e5beec39d6021c7b8c21e6d84050660ad6c69a96 Mon Sep 17 00:00:00 2001 From: Daniel Kahn Gillmor Date: Sun, 4 Jun 2017 09:32:35 -0300 Subject: [PATCH] add "notmuch reindex" subcommand This new subcommand takes a set of search terms, and re-indexes the list of matching messages. --- Makefile.local | 1 + doc/conf.py | 4 + doc/index.rst | 1 + doc/man1/notmuch-reindex.rst | 29 +++++++ doc/man1/notmuch.rst | 4 +- doc/man7/notmuch-search-terms.rst | 7 +- notmuch-client.h | 3 + notmuch-reindex.c | 134 ++++++++++++++++++++++++++++++ notmuch.c | 2 + performance-test/M04-reindex.sh | 11 +++ performance-test/T03-reindex.sh | 13 +++ test/T670-duplicate-mid.sh | 7 ++ test/T700-reindex.sh | 79 ++++++++++++++++++ 13 files changed, 291 insertions(+), 4 deletions(-) create mode 100644 doc/man1/notmuch-reindex.rst create mode 100644 notmuch-reindex.c create mode 100755 performance-test/M04-reindex.sh create mode 100755 performance-test/T03-reindex.sh create mode 100755 test/T700-reindex.sh diff --git a/Makefile.local b/Makefile.local index 6bc78ef8..af12ca7f 100644 --- a/Makefile.local +++ b/Makefile.local @@ -225,6 +225,7 @@ notmuch_client_srcs = \ notmuch-dump.c \ notmuch-insert.c \ notmuch-new.c \ + notmuch-reindex.c \ notmuch-reply.c \ notmuch-restore.c \ notmuch-search.c \ diff --git a/doc/conf.py b/doc/conf.py index a3d82696..aa864b3c 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -95,6 +95,10 @@ man_pages = [ u'incorporate new mail into the notmuch database', [notmuch_authors], 1), + ('man1/notmuch-reindex', 'notmuch-reindex', + u're-index matching messages', + [notmuch_authors], 1), + ('man1/notmuch-reply', 'notmuch-reply', u'constructs a reply template for a set of messages', [notmuch_authors], 1), diff --git a/doc/index.rst b/doc/index.rst index 344606d9..aa6c9f40 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -18,6 +18,7 @@ Contents: man5/notmuch-hooks man1/notmuch-insert man1/notmuch-new + man1/notmuch-reindex man1/notmuch-reply man1/notmuch-restore man1/notmuch-search diff --git a/doc/man1/notmuch-reindex.rst b/doc/man1/notmuch-reindex.rst new file mode 100644 index 00000000..e39cc4ee --- /dev/null +++ b/doc/man1/notmuch-reindex.rst @@ -0,0 +1,29 @@ +=============== +notmuch-reindex +=============== + +SYNOPSIS +======== + +**notmuch** **reindex** [*option* ...] <*search-term*> ... + +DESCRIPTION +=========== + +Re-index all messages matching the search terms. + +See **notmuch-search-terms(7)** for details of the supported syntax for +<*search-term*\ >. + +The **reindex** command searches for all messages matching the +supplied search terms, and re-creates the full-text index on these +messages using the supplied options. + +SEE ALSO +======== + +**notmuch(1)**, **notmuch-config(1)**, **notmuch-count(1)**, +**notmuch-dump(1)**, **notmuch-hooks(5)**, **notmuch-insert(1)**, +**notmuch-new(1)**, +**notmuch-reply(1)**, **notmuch-restore(1)**, **notmuch-search(1)**, +**notmuch-search-terms(7)**, **notmuch-show(1)**, **notmuch-tag(1)** diff --git a/doc/man1/notmuch.rst b/doc/man1/notmuch.rst index cb350d1a..40fd335b 100644 --- a/doc/man1/notmuch.rst +++ b/doc/man1/notmuch.rst @@ -163,8 +163,8 @@ SEE ALSO **notmuch-address(1)**, **notmuch-compact(1)**, **notmuch-config(1)**, **notmuch-count(1)**, **notmuch-dump(1)**, **notmuch-hooks(5)**, -**notmuch-insert(1)**, **notmuch-new(1)**, **notmuch-reply(1)**, -**notmuch-restore(1)**, **notmuch-search(1)**, +**notmuch-insert(1)**, **notmuch-new(1)**, **notmuch-reindex(1)**, +**notmuch-reply(1)**, **notmuch-restore(1)**, **notmuch-search(1)**, **notmuch-search-terms(7)**, **notmuch-show(1)**, **notmuch-tag(1)** The notmuch website: **https://notmuchmail.org** diff --git a/doc/man7/notmuch-search-terms.rst b/doc/man7/notmuch-search-terms.rst index 47cab48d..dd76972e 100644 --- a/doc/man7/notmuch-search-terms.rst +++ b/doc/man7/notmuch-search-terms.rst @@ -9,6 +9,8 @@ SYNOPSIS **notmuch** **dump** [--format=(batch-tag|sup)] [--] [--output=<*file*>] [--] [<*search-term*> ...] +**notmuch** **reindex** [option ...] <*search-term*> ... + **notmuch** **search** [option ...] <*search-term*> ... **notmuch** **show** [option ...] <*search-term*> ... @@ -421,5 +423,6 @@ SEE ALSO **notmuch(1)**, **notmuch-config(1)**, **notmuch-count(1)**, **notmuch-dump(1)**, **notmuch-hooks(5)**, **notmuch-insert(1)**, -**notmuch-new(1)**, **notmuch-reply(1)**, **notmuch-restore(1)**, -**notmuch-search(1)**, **notmuch-show(1)**, **notmuch-tag(1)** +**notmuch-new(1)**, **notmuch-reindex(1)**, **notmuch-reply(1)**, +**notmuch-restore(1)**, **notmuch-search(1)**, **notmuch-show(1)**, +**notmuch-tag(1)** diff --git a/notmuch-client.h b/notmuch-client.h index ae37360b..1d3c0829 100644 --- a/notmuch-client.h +++ b/notmuch-client.h @@ -200,6 +200,9 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[]); int notmuch_insert_command (notmuch_config_t *config, int argc, char *argv[]); +int +notmuch_reindex_command (notmuch_config_t *config, int argc, char *argv[]); + int notmuch_reply_command (notmuch_config_t *config, int argc, char *argv[]); diff --git a/notmuch-reindex.c b/notmuch-reindex.c new file mode 100644 index 00000000..44223042 --- /dev/null +++ b/notmuch-reindex.c @@ -0,0 +1,134 @@ +/* notmuch - Not much of an email program, (just index and search) + * + * Copyright © 2016 Daniel Kahn Gillmor + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/ . + * + * Author: Daniel Kahn Gillmor + */ + +#include "notmuch-client.h" +#include "string-util.h" + +static volatile sig_atomic_t interrupted; + +static void +handle_sigint (unused (int sig)) +{ + static char msg[] = "Stopping... \n"; + + /* This write is "opportunistic", so it's okay to ignore the + * result. It is not required for correctness, and if it does + * fail or produce a short write, we want to get out of the signal + * handler as quickly as possible, not retry it. */ + IGNORE_RESULT (write (2, msg, sizeof (msg) - 1)); + interrupted = 1; +} + +/* reindex all messages matching 'query_string' using the passed-in indexopts + */ +static int +reindex_query (notmuch_database_t *notmuch, const char *query_string, + notmuch_param_t *indexopts) +{ + notmuch_query_t *query; + notmuch_messages_t *messages; + notmuch_message_t *message; + notmuch_status_t status; + + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + + query = notmuch_query_create (notmuch, query_string); + if (query == NULL) { + fprintf (stderr, "Out of memory.\n"); + return 1; + } + + /* reindexing is not interested in any special sort order */ + notmuch_query_set_sort (query, NOTMUCH_SORT_UNSORTED); + + status = notmuch_query_search_messages (query, &messages); + if (print_status_query ("notmuch reindex", query, status)) + return status; + + ret = notmuch_database_begin_atomic (notmuch); + for (; + notmuch_messages_valid (messages) && ! interrupted; + notmuch_messages_move_to_next (messages)) { + message = notmuch_messages_get (messages); + + ret = notmuch_message_reindex(message, indexopts); + if (ret != NOTMUCH_STATUS_SUCCESS) + break; + } + + if (!ret) + ret = notmuch_database_end_atomic (notmuch); + + notmuch_query_destroy (query); + + return ret || interrupted; +} + +int +notmuch_reindex_command (notmuch_config_t *config, int argc, char *argv[]) +{ + char *query_string = NULL; + notmuch_database_t *notmuch; + struct sigaction action; + int opt_index; + int ret; + notmuch_param_t *indexopts = NULL; + + /* Set up our handler for SIGINT */ + memset (&action, 0, sizeof (struct sigaction)); + action.sa_handler = handle_sigint; + sigemptyset (&action.sa_mask); + action.sa_flags = SA_RESTART; + sigaction (SIGINT, &action, NULL); + + notmuch_opt_desc_t options[] = { + { NOTMUCH_OPT_INHERIT, (void *) ¬much_shared_options, NULL, 0, 0 }, + { 0, 0, 0, 0, 0 } + }; + + opt_index = parse_arguments (argc, argv, options, 1); + if (opt_index < 0) + return EXIT_FAILURE; + + notmuch_process_shared_options (argv[0]); + + if (notmuch_database_open (notmuch_config_get_database_path (config), + NOTMUCH_DATABASE_MODE_READ_WRITE, ¬much)) + return EXIT_FAILURE; + + notmuch_exit_if_unmatched_db_uuid (notmuch); + + query_string = query_string_from_args (config, argc-opt_index, argv+opt_index); + if (query_string == NULL) { + fprintf (stderr, "Out of memory\n"); + return EXIT_FAILURE; + } + + if (*query_string == '\0') { + fprintf (stderr, "Error: notmuch reindex requires at least one search term.\n"); + return EXIT_FAILURE; + } + + ret = reindex_query (notmuch, query_string, indexopts); + + notmuch_database_destroy (notmuch); + + return ret || interrupted ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/notmuch.c b/notmuch.c index 8e332ce6..201c7454 100644 --- a/notmuch.c +++ b/notmuch.c @@ -123,6 +123,8 @@ static command_t commands[] = { "Restore the tags from the given dump file (see 'dump')." }, { "compact", notmuch_compact_command, NOTMUCH_CONFIG_OPEN, "Compact the notmuch database." }, + { "reindex", notmuch_reindex_command, NOTMUCH_CONFIG_OPEN, + "Re-index all messages matching the search terms." }, { "config", notmuch_config_command, NOTMUCH_CONFIG_OPEN, "Get or set settings in the notmuch configuration file." }, { "help", notmuch_help_command, NOTMUCH_CONFIG_CREATE, /* create but don't save config */ diff --git a/performance-test/M04-reindex.sh b/performance-test/M04-reindex.sh new file mode 100755 index 00000000..d36e061b --- /dev/null +++ b/performance-test/M04-reindex.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +test_description='reindex' + +. ./perf-test-lib.sh || exit 1 + +memory_start + +memory_run 'reindex *' "notmuch reindex '*'" + +memory_done diff --git a/performance-test/T03-reindex.sh b/performance-test/T03-reindex.sh new file mode 100755 index 00000000..7af2d22d --- /dev/null +++ b/performance-test/T03-reindex.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +test_description='tagging' + +. ./perf-test-lib.sh || exit 1 + +time_start + +time_run 'reindex *' "notmuch reindex '*'" +time_run 'reindex *' "notmuch reindex '*'" +time_run 'reindex *' "notmuch reindex '*'" + +time_done diff --git a/test/T670-duplicate-mid.sh b/test/T670-duplicate-mid.sh index 2013c6c7..ea5e1d6a 100755 --- a/test/T670-duplicate-mid.sh +++ b/test/T670-duplicate-mid.sh @@ -30,4 +30,11 @@ EOF notmuch search --output=files "sekrit" | notmuch_dir_sanitize > OUTPUT test_expect_equal_file EXPECTED OUTPUT +rm ${MAIL_DIR}/copy3 +test_begin_subtest 'reindex drops terms in duplicate file' +cp /dev/null EXPECTED +notmuch reindex '*' +notmuch search --output=files "sekrit" | notmuch_dir_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + test_done diff --git a/test/T700-reindex.sh b/test/T700-reindex.sh new file mode 100755 index 00000000..051fbb3c --- /dev/null +++ b/test/T700-reindex.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +test_description='reindexing messages' +. ./test-lib.sh || exit 1 + +add_email_corpus + +notmuch tag +usertag1 '*' + +notmuch search '*' | notmuch_search_sanitize > initial-threads +notmuch search --output=messages '*' > initial-message-ids +notmuch dump > initial-dump + +test_begin_subtest 'reindex preserves threads' +notmuch reindex '*' +notmuch search '*' | notmuch_search_sanitize > OUTPUT +test_expect_equal_file initial-threads OUTPUT + +test_begin_subtest 'reindex after removing duplicate file preserves threads' +# remove one copy +sed 's,3/3(4),3/3,' < initial-threads > EXPECTED +mv $MAIL_DIR/bar/18:2, duplicate-msg-1.eml +notmuch reindex '*' +notmuch search '*' | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest 'reindex preserves message-ids' +notmuch reindex '*' +notmuch search --output=messages '*' > OUTPUT +test_expect_equal_file initial-message-ids OUTPUT + +test_begin_subtest 'reindex preserves tags' +notmuch reindex '*' +notmuch dump > OUTPUT +test_expect_equal_file initial-dump OUTPUT + +test_begin_subtest 'reindex moves a message between threads' +notmuch search --output=threads id:87iqd9rn3l.fsf@vertex.dottedmag > EXPECTED +# re-parent +sed -i 's/1258471718-6781-1-git-send-email-dottedmag@dottedmag.net/87iqd9rn3l.fsf@vertex.dottedmag/' $MAIL_DIR/02:2,* +notmuch reindex id:1258471718-6781-2-git-send-email-dottedmag@dottedmag.net +notmuch search --output=threads id:1258471718-6781-2-git-send-email-dottedmag@dottedmag.net > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest 'reindex detects removal of all files' +notmuch search --output=messages not id:20091117232137.GA7669@griffis1.net> EXPECTED +# remove both copies +mv $MAIL_DIR/cur/51:2,* duplicate-message-2.eml +notmuch reindex id:20091117232137.GA7669@griffis1.net +notmuch search --output=messages '*' > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "reindex preserves properties" +cat < prop-dump +#= 1258471718-6781-1-git-send-email-dottedmag@dottedmag.net userprop=userval +#= 1258471718-6781-2-git-send-email-dottedmag@dottedmag.net userprop=userval +#= 1258491078-29658-1-git-send-email-dottedmag@dottedmag.net userprop=userval1 +#= 20091117190054.GU3165@dottiness.seas.harvard.edu userprop=userval +#= 20091117203301.GV3165@dottiness.seas.harvard.edu userprop=userval3 +#= 87fx8can9z.fsf@vertex.dottedmag userprop=userval2 +#= 87iqd9rn3l.fsf@vertex.dottedmag userprop=userval +#= 87lji4lx9v.fsf@yoom.home.cworth.org userprop=userval3 +#= 87lji5cbwo.fsf@yoom.home.cworth.org userprop=userval +#= cf0c4d610911171136h1713aa59w9cf9aa31f052ad0a@mail.gmail.com userprop=userval +EOF +notmuch restore < prop-dump +notmuch reindex '*' +notmuch dump | grep '^#=' | sort > OUTPUT +test_expect_equal_file prop-dump OUTPUT +test_done + +add_email_corpus lkml + +test_begin_subtest "reindex of lkml corpus preserves threads" +notmuch search '*' | notmuch_search_sanitize > EXPECTED +notmuch reindex '*' +notmuch search '*' | notmuch_search_sanitize > OUTPUT +test_expect_equal_file EXPECTED OUTPUT + +test_done