diff --git a/doc/man1/notmuch-git.rst b/doc/man1/notmuch-git.rst index fa7a748e..59d02fb4 100644 --- a/doc/man1/notmuch-git.rst +++ b/doc/man1/notmuch-git.rst @@ -235,14 +235,46 @@ REPOSITORY CONTENTS =================== The tags are stored in the git repo (and exported) as a set of empty -files. For a message with Message-Id *id*, for each tag *tag*, there +files. These empty files are contained within a directory named after +the message-id. + +In what follows `encode()` represents a POSIX filesystem safe +encoding. The encoding preserves alphanumerics, and the characters +`+-_@=.,:`. All other octets are replaced with `%` followed by a two +digit hex number. + +Currently :any:`notmuch-git` can read any format version, but can only +create (via :any:`init`) :ref:`version 1 ` repositories. + +.. _format_version_0: + +Version 0 +--------- + +This is the legacy format created by the `nmbug` tool prior to release +0.37. For a message with Message-Id *id*, for each tag *tag*, there is an empty file with path tags/ `encode` (*id*) / `encode` (*tag*) -The encoding preserves alphanumerics, and the characters `+-_@=.,:`. -All other octets are replaced with `%` followed by a two digit hex -number. +.. _format_version_1: + +Version 1 +--------- + +In format version 1 and later, the format version is contained in a +top level file called FORMAT. + +For a message with Message-Id *id*, for each tag *tag*, there +is an empty file with path + + tags/ `hash1` (*id*) / `hash2` (*id*) `encode` (*id*) / `encode` (*tag*) + +The hash functions each represent one byte of the `blake2b` hex +digest. + +Compared to :ref:`version 0 `, this reduces the +number of subdirectories within each directory. .. _repo_location: diff --git a/notmuch-git.py b/notmuch-git.py index b4253c0d..aebff764 100644 --- a/notmuch-git.py +++ b/notmuch-git.py @@ -46,10 +46,12 @@ _LOG.addHandler(_logging.StreamHandler()) NOTMUCH_GIT_DIR = None TAG_PREFIX = None +FORMAT_VERSION = 0 _HEX_ESCAPE_REGEX = _re.compile('%[0-9A-F]{2}') _TAG_DIRECTORY = 'tags/' -_TAG_FILE_REGEX = _re.compile(_TAG_DIRECTORY + '(?P[^/]*)/(?P[^/]*)') +_TAG_FILE_REGEX = ( _re.compile(_TAG_DIRECTORY + '(?P[^/]*)/(?P[^/]*)'), + _re.compile(_TAG_DIRECTORY + '([0-9a-f]{2}/){2}(?P[^/]*)/(?P[^/]*)')) # magic hash for Git (git hash-object -t blob /dev/null) _EMPTYBLOB = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391' @@ -265,7 +267,7 @@ def archive(treeish='HEAD', args=()): Each tag $tag for message with Message-Id $id is written to an empty file - tags/encode($id)/encode($tag) + tags/hash1(id)/hash2(id)/encode($id)/encode($tag) The encoding preserves alphanumerics, and the characters "+-_@=.:," (not the quotes). All other octets are replaced with @@ -469,9 +471,17 @@ def init(remote=None): _git(args=['config', 'core.logallrefupdates', 'true'], wait=True) # create an empty blob (e69de29bb2d1d6434b8b29ae775ad8c2e48c5391) _git(args=['hash-object', '-w', '--stdin'], input='', wait=True) + # create a blob for the FORMAT file + (status, stdout, _) = _git(args=['hash-object', '-w', '--stdin'], stdout=_subprocess.PIPE, + input='1\n', wait=True) + verhash=stdout.rstrip() + _LOG.debug('hash of FORMAT blob = {:s}'.format(verhash)) + # Add FORMAT to the index + _git(args=['update-index', '--add', '--cacheinfo', '100644,{:s},FORMAT'.format(verhash)], wait=True) + _git( args=[ - 'commit', '--allow-empty', '-m', 'Start a new nmbug repository' + 'commit', '-m', 'Start a new notmuch-git repository' ], additional_env={'GIT_WORK_TREE': NOTMUCH_GIT_DIR}, wait=True) @@ -821,7 +831,7 @@ def _clear_tags_for_message(index, id): Neither 'id' nor the tags in 'tags' should be encoded/escaped. """ - dir = 'tags/{id}'.format(id=_hex_quote(string=id)) + dir = _id_path(id) with _git( args=['ls-files', dir], @@ -838,6 +848,21 @@ def _read_database_lastmod(): (count,uuid,lastmod_str) = notmuch.stdout.readline().split() return (count,uuid,int(lastmod_str)) +def _id_path(id): + hid=_hex_quote(string=id) + from hashlib import blake2b + + if FORMAT_VERSION==0: + return 'tags/{hid}'.format(hid=hid) + elif FORMAT_VERSION==1: + idhash = blake2b(hid.encode('utf8'), digest_size=2).hexdigest() + return 'tags/{dir1}/{dir2}/{hid}'.format( + hid=hid, + dir1=idhash[0:2],dir2=idhash[2:]) + else: + _LOG.error("Unknown format version",FORMAT_VERSION) + _sys.exit(1) + def _index_tags_for_message(id, status, tags): """ Update the Git index to either create or delete an empty file. @@ -852,8 +877,7 @@ def _index_tags_for_message(id, status, tags): hash = '0000000000000000000000000000000000000000' for tag in tags: - path = 'tags/{id}/{tag}'.format( - id=_hex_quote(string=id), tag=_hex_quote(string=tag)) + path = '{ipath}/{tag}'.format(ipath=_id_path(id),tag=_hex_quote(string=tag)) yield '{mode} {hash}\t{path}\n'.format(mode=mode, hash=hash, path=path) @@ -869,7 +893,7 @@ def _diff_refs(filter, a='HEAD', b='@{upstream}'): def _unpack_diff_lines(stream): "Iterate through (id, tag) tuples in a diff stream." for line in stream: - match = _TAG_FILE_REGEX.match(line.strip()) + match = _TAG_FILE_REGEX[FORMAT_VERSION].match(line.strip()) if not match: message = 'non-tag line in diff: {!r}'.format(line.strip()) if line.startswith(_TAG_DIRECTORY): @@ -907,6 +931,17 @@ def _notmuch_config_get(key): _sys.exit(1) return stdout.rstrip() +def read_format_version(): + try: + (status, stdout, stderr) = _git( + args=['cat-file', 'blob', 'master:FORMAT'], + stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, wait=True) + except SubprocessError as e: + _LOG.debug("failed to read FORMAT file from git, assuming format version 0") + return 0 + + return int(stdout) + # based on BaseDirectory.save_data_path from pyxdg (LGPL2+) def xdg_data_path(profile): resource = _os.path.join('notmuch',profile,'git') @@ -1104,6 +1139,9 @@ if __name__ == '__main__': _LOG.debug('prefix = {:s}'.format(TAG_PREFIX)) _LOG.debug('repository = {:s}'.format(NOTMUCH_GIT_DIR)) + FORMAT_VERSION = read_format_version() + _LOG.debug('FORMAT_VERSION={:d}'.format(FORMAT_VERSION)) + if args.func == help: arg_names = ['command'] else: diff --git a/test/T850-git.sh b/test/T850-git.sh index 7ea50939..342cc31b 100755 --- a/test/T850-git.sh +++ b/test/T850-git.sh @@ -40,10 +40,10 @@ notmuch tag -new-prefix::foo id:20091117190054.GU3165@dottiness.seas.harvard.edu test_begin_subtest "committing new prefix works with force" notmuch tag +new-prefix::foo id:20091117190054.GU3165@dottiness.seas.harvard.edu notmuch git -l debug -p 'new-prefix::' -C force-prefix.git commit --force -git -C force-prefix.git ls-tree -r --name-only HEAD | xargs dirname | sort -u | sed s,tags/,id:, > OUTPUT +git -C force-prefix.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | xargs dirname | sort -u > OUTPUT notmuch tag -new-prefix::foo id:20091117190054.GU3165@dottiness.seas.harvard.edu cat <EXPECTED -id:20091117190054.GU3165@dottiness.seas.harvard.edu +20091117190054.GU3165@dottiness.seas.harvard.edu EOF test_expect_equal_file_nonempty EXPECTED OUTPUT @@ -62,8 +62,8 @@ test_expect_equal_file_nonempty EXPECTED OUTPUT test_begin_subtest "commit" notmuch git -C tags.git commit --force -git -C tags.git ls-tree -r --name-only HEAD | xargs dirname | sort -u | sed s,tags/,id:, > OUTPUT -notmuch search --output=messages '*' | sort > EXPECTED +git -C tags.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | xargs dirname | sort -u > OUTPUT +notmuch search --output=messages '*' | sed s/^id:// | sort > EXPECTED test_expect_equal_file_nonempty EXPECTED OUTPUT test_begin_subtest "commit --force succeeds" @@ -88,22 +88,22 @@ test_expect_equal_file_nonempty BEFORE AFTER test_begin_subtest "commit (incremental)" notmuch tag +test id:20091117190054.GU3165@dottiness.seas.harvard.edu notmuch git -C tags.git commit -git -C tags.git ls-tree -r --name-only HEAD | +git -C tags.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | \ grep 20091117190054 | sort > OUTPUT echo "--------------------------------------------------" >> OUTPUT notmuch tag -test id:20091117190054.GU3165@dottiness.seas.harvard.edu notmuch git -C tags.git commit -git -C tags.git ls-tree -r --name-only HEAD | +git -C tags.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | \ grep 20091117190054 | sort >> OUTPUT cat < EXPECTED -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/test -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread +20091117190054.GU3165@dottiness.seas.harvard.edu/inbox +20091117190054.GU3165@dottiness.seas.harvard.edu/signed +20091117190054.GU3165@dottiness.seas.harvard.edu/test +20091117190054.GU3165@dottiness.seas.harvard.edu/unread -------------------------------------------------- -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread +20091117190054.GU3165@dottiness.seas.harvard.edu/inbox +20091117190054.GU3165@dottiness.seas.harvard.edu/signed +20091117190054.GU3165@dottiness.seas.harvard.edu/unread EOF test_expect_equal_file_nonempty EXPECTED OUTPUT @@ -111,18 +111,18 @@ test_begin_subtest "commit (change prefix)" notmuch tag +test::one id:20091117190054.GU3165@dottiness.seas.harvard.edu notmuch git -C tags.git -p 'test::' commit --force git -C tags.git ls-tree -r --name-only HEAD | - grep 20091117190054 | sort > OUTPUT + grep 20091117190054 | notmuch_git_sanitize | sort > OUTPUT echo "--------------------------------------------------" >> OUTPUT notmuch tag -test::one id:20091117190054.GU3165@dottiness.seas.harvard.edu notmuch git -C tags.git commit --force -git -C tags.git ls-tree -r --name-only HEAD | +git -C tags.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | \ grep 20091117190054 | sort >> OUTPUT cat < EXPECTED -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/one +20091117190054.GU3165@dottiness.seas.harvard.edu/one -------------------------------------------------- -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread +20091117190054.GU3165@dottiness.seas.harvard.edu/inbox +20091117190054.GU3165@dottiness.seas.harvard.edu/signed +20091117190054.GU3165@dottiness.seas.harvard.edu/unread EOF test_expect_equal_file_nonempty EXPECTED OUTPUT @@ -151,12 +151,12 @@ test_expect_equal_file_nonempty BEFORE AFTER test_begin_subtest "archive" notmuch git -C tags.git archive | tar tf - | \ - grep 20091117190054.GU3165@dottiness.seas.harvard.edu | sort > OUTPUT + grep 20091117190054.GU3165@dottiness.seas.harvard.edu | notmuch_git_sanitize | sort > OUTPUT cat < EXPECTED -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/ -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed -tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread +20091117190054.GU3165@dottiness.seas.harvard.edu/ +20091117190054.GU3165@dottiness.seas.harvard.edu/inbox +20091117190054.GU3165@dottiness.seas.harvard.edu/signed +20091117190054.GU3165@dottiness.seas.harvard.edu/unread EOF notmuch git -C tags.git checkout test_expect_equal_file EXPECTED OUTPUT diff --git a/test/test-lib.sh b/test/test-lib.sh index 4eb58ea0..e9f32582 100644 --- a/test/test-lib.sh +++ b/test/test-lib.sh @@ -559,6 +559,10 @@ notmuch_date_sanitize () { -e 's/^Date: Fri, 05 Jan 2001 .*0000/Date: GENERATED_DATE/' } +# remove redundant parts of notmuch-git internal paths +notmuch_git_sanitize () { + sed -e 's,tags/\([0-9a-f]\{2\}/\)\{2\},,' -e '/FORMAT/d' +} notmuch_uuid_sanitize () { sed 's/[0-9a-f]\{8\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{12\}/UUID/g' }