lib/message_file: open gzipped files

Rather than storing the lower level stdio FILE object, we store a
GMime stream. This allows both transparent decompression, and passing
the stream into GMime for parsing. As a side effect, we can let GMime
close the underlying OS stream (indeed, that stream isn't visible here
anymore).

This change is enough to get notmuch-{new,search} working, but there is still
some work required for notmuch-show, to be done in a following commit.
This commit is contained in:
David Bremner 2019-03-24 00:32:43 -03:00
parent 98b3eebc37
commit 852167479f
2 changed files with 125 additions and 21 deletions

View file

@ -27,8 +27,8 @@
#include <glib.h> /* GHashTable */ #include <glib.h> /* GHashTable */
struct _notmuch_message_file { struct _notmuch_message_file {
/* File object */ /* open stream to (possibly gzipped) file */
FILE *file; GMimeStream *stream;
char *filename; char *filename;
/* Cache for decoded headers */ /* Cache for decoded headers */
@ -46,9 +46,6 @@ _notmuch_message_file_destructor (notmuch_message_file_t *message)
if (message->message) if (message->message)
g_object_unref (message->message); g_object_unref (message->message);
if (message->file)
fclose (message->file);
return 0; return 0;
} }
@ -64,15 +61,14 @@ _notmuch_message_file_open_ctx (notmuch_database_t *notmuch,
if (unlikely (message == NULL)) if (unlikely (message == NULL))
return NULL; return NULL;
/* Only needed for error messages during parsing. */
message->filename = talloc_strdup (message, filename); message->filename = talloc_strdup (message, filename);
if (message->filename == NULL) if (message->filename == NULL)
goto FAIL; goto FAIL;
talloc_set_destructor (message, _notmuch_message_file_destructor); talloc_set_destructor (message, _notmuch_message_file_destructor);
message->file = fopen (filename, "r"); message->stream = g_mime_stream_gzfile_open (filename);
if (message->file == NULL) if (message->stream == NULL)
goto FAIL; goto FAIL;
return message; return message;
@ -105,17 +101,17 @@ _notmuch_message_file_close (notmuch_message_file_t *message)
} }
static bool static bool
_is_mbox (FILE *file) _is_mbox (GMimeStream *stream)
{ {
char from_buf[5]; char from_buf[5];
bool ret = false; bool ret = false;
/* Is this mbox? */ /* Is this mbox? */
if (fread (from_buf, sizeof (from_buf), 1, file) == 1 && if (g_mime_stream_read (stream, from_buf, sizeof (from_buf)) == sizeof(from_buf) &&
strncmp (from_buf, "From ", 5) == 0) strncmp (from_buf, "From ", 5) == 0)
ret = true; ret = true;
rewind (file); g_mime_stream_reset (stream);
return ret; return ret;
} }
@ -123,7 +119,6 @@ _is_mbox (FILE *file)
notmuch_status_t notmuch_status_t
_notmuch_message_file_parse (notmuch_message_file_t *message) _notmuch_message_file_parse (notmuch_message_file_t *message)
{ {
GMimeStream *stream;
GMimeParser *parser; GMimeParser *parser;
notmuch_status_t status = NOTMUCH_STATUS_SUCCESS; notmuch_status_t status = NOTMUCH_STATUS_SUCCESS;
static int initialized = 0; static int initialized = 0;
@ -132,7 +127,7 @@ _notmuch_message_file_parse (notmuch_message_file_t *message)
if (message->message) if (message->message)
return NOTMUCH_STATUS_SUCCESS; return NOTMUCH_STATUS_SUCCESS;
is_mbox = _is_mbox (message->file); is_mbox = _is_mbox (message->stream);
if (! initialized) { if (! initialized) {
g_mime_init (); g_mime_init ();
@ -144,12 +139,7 @@ _notmuch_message_file_parse (notmuch_message_file_t *message)
if (! message->headers) if (! message->headers)
return NOTMUCH_STATUS_OUT_OF_MEMORY; return NOTMUCH_STATUS_OUT_OF_MEMORY;
stream = g_mime_stream_file_new (message->file); parser = g_mime_parser_new_with_stream (message->stream);
/* We'll own and fclose the FILE* ourselves. */
g_mime_stream_file_set_owner (GMIME_STREAM_FILE (stream), false);
parser = g_mime_parser_new_with_stream (stream);
g_mime_parser_set_scan_from (parser, is_mbox); g_mime_parser_set_scan_from (parser, is_mbox);
message->message = g_mime_parser_construct_message (parser, NULL); message->message = g_mime_parser_construct_message (parser, NULL);
@ -167,7 +157,7 @@ _notmuch_message_file_parse (notmuch_message_file_t *message)
} }
DONE: DONE:
g_object_unref (stream); g_mime_stream_reset (message->stream);
g_object_unref (parser); g_object_unref (parser);
if (status) { if (status) {
@ -179,7 +169,6 @@ _notmuch_message_file_parse (notmuch_message_file_t *message)
message->message = NULL; message->message = NULL;
} }
rewind (message->file);
} }
return status; return status;

115
test/T750-gzip.sh Executable file
View file

@ -0,0 +1,115 @@
#!/usr/bin/env bash
test_description='support for gzipped messages'
. $(dirname "$0")/test-lib.sh || exit 1
#######################################################################
# notmuch new
test_begin_subtest "Single new gzipped message"
generate_message
gzip $gen_msg_filename
output=$(NOTMUCH_NEW --debug)
test_expect_equal "$output" "Added 1 new message to the database."
test_begin_subtest "Single new gzipped message (full-scan)"
generate_message
gzip $gen_msg_filename
output=$(NOTMUCH_NEW --debug --full-scan 2>&1)
test_expect_equal "$output" "Added 1 new message to the database."
test_begin_subtest "Multiple new messages, one gzipped"
generate_message
gzip $gen_msg_filename
generate_message
output=$(NOTMUCH_NEW --debug)
test_expect_equal "$output" "Added 2 new messages to the database."
test_begin_subtest "Multiple new messages, one gzipped (full-scan)"
generate_message
gzip $gen_msg_filename
generate_message
output=$(NOTMUCH_NEW --debug --full-scan 2>&1)
test_expect_equal "$output" "Added 2 new messages to the database."
test_begin_subtest "Renamed (gzipped) message"
generate_message
echo $gen_message_filename
notmuch new > /dev/null
gzip $gen_msg_filename
output=$(NOTMUCH_NEW --debug)
test_expect_equal "$output" "(D) add_files, pass 2: queuing passed file ${gen_msg_filename} for deletion from database
No new mail. Detected 1 file rename."
######################################################################
# notmuch search
test_begin_subtest "notmuch search with partially gzipped mail store"
notmuch search '*' | notmuch_search_sanitize > OUTPUT
cat <<EOF > EXPECTED
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Single new gzipped message (inbox unread)
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Single new gzipped message (full-scan) (inbox unread)
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Multiple new messages, one gzipped (inbox unread)
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Multiple new messages, one gzipped (inbox unread)
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Multiple new messages, one gzipped (full-scan) (inbox unread)
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Multiple new messages, one gzipped (full-scan) (inbox unread)
thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Renamed (gzipped) message (inbox unread)
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "notmuch search --output=files with partially gzipped mail store"
notmuch search --output=files '*' | notmuch_search_files_sanitize > OUTPUT
cat <<EOF > EXPECTED
MAIL_DIR/msg-001.gz
MAIL_DIR/msg-002.gz
MAIL_DIR/msg-003.gz
MAIL_DIR/msg-004
MAIL_DIR/msg-005.gz
MAIL_DIR/msg-006
MAIL_DIR/msg-007.gz
EOF
test_expect_equal_file EXPECTED OUTPUT
######################################################################
# notmuch show
test_begin_subtest "show un-gzipped message"
notmuch show id:msg-006@notmuch-test-suite | notmuch_show_sanitize > OUTPUT
cat <<EOF > EXPECTED
message{ id:msg-006@notmuch-test-suite depth:0 match:1 excluded:0 filename:/XXX/mail/msg-006
header{
Notmuch Test Suite <test_suite@notmuchmail.org> (2001-01-05) (inbox unread)
Subject: Multiple new messages, one gzipped (full-scan)
From: Notmuch Test Suite <test_suite@notmuchmail.org>
To: Notmuch Test Suite <test_suite@notmuchmail.org>
Date: Fri, 05 Jan 2001 15:43:51 +0000
header}
body{
part{ ID: 1, Content-type: text/plain
This is just a test message (#6)
part}
body}
message}
EOF
test_expect_equal_file EXPECTED OUTPUT
test_begin_subtest "show gzipped message"
test_subtest_known_broken
notmuch show id:msg-007@notmuch-test-suite | notmuch_show_sanitize > OUTPUT
cat <<EOF > EXPECTED
message{ id:msg-007@notmuch-test-suite depth:0 match:1 excluded:0 filename:/XXX/mail/msg-007.gz
header{
Notmuch Test Suite <test_suite@notmuchmail.org> (2001-01-05) (inbox unread)
Subject: Renamed (gzipped) message
From: Notmuch Test Suite <test_suite@notmuchmail.org>
To: Notmuch Test Suite <test_suite@notmuchmail.org>
Date: Fri, 05 Jan 2001 15:43:50 +0000
header}
body{
part{ ID: 1, Content-type: text/plain
This is just a test message (#7)
part}
body}
message}
EOF
test_expect_equal_file EXPECTED OUTPUT
test_done