CLI/git: current cache contents of index

Rather than shelling out once per message to get the list of files
corresponding to tags, it is much faster (although potentially a bit
memory intensive) to read them all at once.
This commit is contained in:
David Bremner 2022-07-02 20:45:11 -03:00
parent c66f0dea7a
commit bd4347499f

View file

@ -738,6 +738,7 @@ class PrivateIndex:
self.lastmod = None self.lastmod = None
self.checksum = None self.checksum = None
self._load_cache_file() self._load_cache_file()
self.file_tree = None
self._index_tags() self._index_tags()
def __enter__(self): def __enter__(self):
@ -763,6 +764,43 @@ class PrivateIndex:
_LOG.error("Error decoding cache") _LOG.error("Error decoding cache")
_sys.exit(1) _sys.exit(1)
@timed
def _read_file_tree(self):
self.file_tree = {}
with _git(
args=['ls-files', 'tags'],
additional_env={'GIT_INDEX_FILE': self.index_path},
stdout=_subprocess.PIPE) as git:
for file in git.stdout:
dir=_os.path.dirname(file)
tag=_os.path.basename(file).rstrip()
if dir not in self.file_tree:
self.file_tree[dir]=[tag]
else:
self.file_tree[dir].append(tag)
def _clear_tags_for_message(self, id):
"""
Clear any existing index entries for message 'id'
Neither 'id' nor the tags in 'tags' should be encoded/escaped.
"""
if self.file_tree == None:
self._read_file_tree()
dir = _id_path(id)
if dir not in self.file_tree:
return
for file in self.file_tree[dir]:
line = '0 0000000000000000000000000000000000000000\t{:s}/{:s}\n'.format(dir,file)
yield line
@timed @timed
def _index_tags(self): def _index_tags(self):
"Write notmuch tags to private git index." "Write notmuch tags to private git index."
@ -798,7 +836,7 @@ class PrivateIndex:
if tag.startswith(prefix)] if tag.startswith(prefix)]
id = _xapian_unquote(string=id) id = _xapian_unquote(string=id)
if clear_tags: if clear_tags:
for line in _clear_tags_for_message(index=self.index_path, id=id): for line in self._clear_tags_for_message(id=id):
git.stdin.write(line) git.stdin.write(line)
for line in _index_tags_for_message( for line in _index_tags_for_message(
id=id, status='A', tags=tags): id=id, status='A', tags=tags):
@ -835,24 +873,6 @@ def _read_index_checksum (index_path):
except FileNotFoundError: except FileNotFoundError:
return None return None
def _clear_tags_for_message(index, id):
"""
Clear any existing index entries for message 'id'
Neither 'id' nor the tags in 'tags' should be encoded/escaped.
"""
dir = _id_path(id)
with _git(
args=['ls-files', dir],
additional_env={'GIT_INDEX_FILE': index},
stdout=_subprocess.PIPE) as git:
for file in git.stdout:
line = '0 0000000000000000000000000000000000000000\t{:s}\n'.format(file.strip())
yield line
def _read_database_lastmod(): def _read_database_lastmod():
with _spawn( with _spawn(
args=['notmuch', 'count', '--lastmod', '*'], args=['notmuch', 'count', '--lastmod', '*'],