From bd4347499f6ddab1af0967d596ad97996eea74f8 Mon Sep 17 00:00:00 2001 From: David Bremner Date: Sat, 2 Jul 2022 20:45:11 -0300 Subject: [PATCH] CLI/git: current cache contents of index Rather than shelling out once per message to get the list of files corresponding to tags, it is much faster (although potentially a bit memory intensive) to read them all at once. --- notmuch-git.py | 58 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/notmuch-git.py b/notmuch-git.py index a75de135..4d9887c8 100644 --- a/notmuch-git.py +++ b/notmuch-git.py @@ -738,6 +738,7 @@ class PrivateIndex: self.lastmod = None self.checksum = None self._load_cache_file() + self.file_tree = None self._index_tags() def __enter__(self): @@ -763,6 +764,43 @@ class PrivateIndex: _LOG.error("Error decoding cache") _sys.exit(1) + @timed + def _read_file_tree(self): + self.file_tree = {} + + with _git( + args=['ls-files', 'tags'], + additional_env={'GIT_INDEX_FILE': self.index_path}, + stdout=_subprocess.PIPE) as git: + for file in git.stdout: + dir=_os.path.dirname(file) + tag=_os.path.basename(file).rstrip() + if dir not in self.file_tree: + self.file_tree[dir]=[tag] + else: + self.file_tree[dir].append(tag) + + + def _clear_tags_for_message(self, id): + """ + Clear any existing index entries for message 'id' + + Neither 'id' nor the tags in 'tags' should be encoded/escaped. + """ + + if self.file_tree == None: + self._read_file_tree() + + dir = _id_path(id) + + if dir not in self.file_tree: + return + + for file in self.file_tree[dir]: + line = '0 0000000000000000000000000000000000000000\t{:s}/{:s}\n'.format(dir,file) + yield line + + @timed def _index_tags(self): "Write notmuch tags to private git index." @@ -798,7 +836,7 @@ class PrivateIndex: if tag.startswith(prefix)] id = _xapian_unquote(string=id) if clear_tags: - for line in _clear_tags_for_message(index=self.index_path, id=id): + for line in self._clear_tags_for_message(id=id): git.stdin.write(line) for line in _index_tags_for_message( id=id, status='A', tags=tags): @@ -835,24 +873,6 @@ def _read_index_checksum (index_path): except FileNotFoundError: return None - -def _clear_tags_for_message(index, id): - """ - Clear any existing index entries for message 'id' - - Neither 'id' nor the tags in 'tags' should be encoded/escaped. - """ - - dir = _id_path(id) - - with _git( - args=['ls-files', dir], - additional_env={'GIT_INDEX_FILE': index}, - stdout=_subprocess.PIPE) as git: - for file in git.stdout: - line = '0 0000000000000000000000000000000000000000\t{:s}\n'.format(file.strip()) - yield line - def _read_database_lastmod(): with _spawn( args=['notmuch', 'count', '--lastmod', '*'],