lib: Separate all phrases indexed by _notmuch_message_gen_terms

This adds a 100 termpos gap between all phrases indexed by
_notmuch_message_gen_terms.  This fixes a bug where terms from the end
of one header and the beginning of another header could match together
in a single phrase and a separate bug where term positions of
un-prefixed terms overlapped.

This fix only affects newly indexed messages.  Messages that are
already indexed won't benefit from this fix without re-indexing, but
the fix won't make things any worse for existing messages.
This commit is contained in:
Austin Clements 2014-06-15 22:40:34 -04:00 committed by David Bremner
parent c1805576a0
commit dc64ab6720
2 changed files with 7 additions and 4 deletions

View file

@ -1023,16 +1023,21 @@ _notmuch_message_gen_terms (notmuch_message_t *message,
return NOTMUCH_PRIVATE_STATUS_NULL_POINTER;
term_gen->set_document (message->doc);
term_gen->set_termpos (message->termpos);
if (prefix_name) {
const char *prefix = _find_prefix (prefix_name);
term_gen->set_termpos (message->termpos);
term_gen->index_text (text, 1, prefix);
message->termpos = term_gen->get_termpos ();
/* Create a gap between this an the next terms so they don't
* appear to be a phrase. */
message->termpos = term_gen->get_termpos () + 100;
}
term_gen->set_termpos (message->termpos);
term_gen->index_text (text);
/* Create a term gap, as above. */
message->termpos = term_gen->get_termpos () + 100;
return NOTMUCH_PRIVATE_STATUS_SUCCESS;
}

View file

@ -170,14 +170,12 @@ EOF
notmuch new > /dev/null
test_begin_subtest "headers do not have adjacent term positions"
test_subtest_known_broken
# Regression test for a bug where term positions for non-prefixed
# terms weren't updated
output=$(notmuch search id:termpos and '"com dest"')
test_expect_equal "$output" ""
test_begin_subtest "parts have non-overlapping term positions"
test_subtest_known_broken
output=$(notmuch search id:termpos and '"a y c"')
test_expect_equal "$output" ""