From 852167479f552cd396b1fdcbe9b1cb4db40e5e0a Mon Sep 17 00:00:00 2001 From: David Bremner Date: Sun, 24 Mar 2019 00:32:43 -0300 Subject: [PATCH] lib/message_file: open gzipped files Rather than storing the lower level stdio FILE object, we store a GMime stream. This allows both transparent decompression, and passing the stream into GMime for parsing. As a side effect, we can let GMime close the underlying OS stream (indeed, that stream isn't visible here anymore). This change is enough to get notmuch-{new,search} working, but there is still some work required for notmuch-show, to be done in a following commit. --- lib/message-file.c | 31 ++++-------- test/T750-gzip.sh | 115 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 21 deletions(-) create mode 100755 test/T750-gzip.sh diff --git a/lib/message-file.c b/lib/message-file.c index 18802974..50855067 100644 --- a/lib/message-file.c +++ b/lib/message-file.c @@ -27,8 +27,8 @@ #include /* GHashTable */ struct _notmuch_message_file { - /* File object */ - FILE *file; + /* open stream to (possibly gzipped) file */ + GMimeStream *stream; char *filename; /* Cache for decoded headers */ @@ -46,9 +46,6 @@ _notmuch_message_file_destructor (notmuch_message_file_t *message) if (message->message) g_object_unref (message->message); - if (message->file) - fclose (message->file); - return 0; } @@ -64,15 +61,14 @@ _notmuch_message_file_open_ctx (notmuch_database_t *notmuch, if (unlikely (message == NULL)) return NULL; - /* Only needed for error messages during parsing. */ message->filename = talloc_strdup (message, filename); if (message->filename == NULL) goto FAIL; talloc_set_destructor (message, _notmuch_message_file_destructor); - message->file = fopen (filename, "r"); - if (message->file == NULL) + message->stream = g_mime_stream_gzfile_open (filename); + if (message->stream == NULL) goto FAIL; return message; @@ -105,17 +101,17 @@ _notmuch_message_file_close (notmuch_message_file_t *message) } static bool -_is_mbox (FILE *file) +_is_mbox (GMimeStream *stream) { char from_buf[5]; bool ret = false; /* Is this mbox? */ - if (fread (from_buf, sizeof (from_buf), 1, file) == 1 && + if (g_mime_stream_read (stream, from_buf, sizeof (from_buf)) == sizeof(from_buf) && strncmp (from_buf, "From ", 5) == 0) ret = true; - rewind (file); + g_mime_stream_reset (stream); return ret; } @@ -123,7 +119,6 @@ _is_mbox (FILE *file) notmuch_status_t _notmuch_message_file_parse (notmuch_message_file_t *message) { - GMimeStream *stream; GMimeParser *parser; notmuch_status_t status = NOTMUCH_STATUS_SUCCESS; static int initialized = 0; @@ -132,7 +127,7 @@ _notmuch_message_file_parse (notmuch_message_file_t *message) if (message->message) return NOTMUCH_STATUS_SUCCESS; - is_mbox = _is_mbox (message->file); + is_mbox = _is_mbox (message->stream); if (! initialized) { g_mime_init (); @@ -144,12 +139,7 @@ _notmuch_message_file_parse (notmuch_message_file_t *message) if (! message->headers) return NOTMUCH_STATUS_OUT_OF_MEMORY; - stream = g_mime_stream_file_new (message->file); - - /* We'll own and fclose the FILE* ourselves. */ - g_mime_stream_file_set_owner (GMIME_STREAM_FILE (stream), false); - - parser = g_mime_parser_new_with_stream (stream); + parser = g_mime_parser_new_with_stream (message->stream); g_mime_parser_set_scan_from (parser, is_mbox); message->message = g_mime_parser_construct_message (parser, NULL); @@ -167,7 +157,7 @@ _notmuch_message_file_parse (notmuch_message_file_t *message) } DONE: - g_object_unref (stream); + g_mime_stream_reset (message->stream); g_object_unref (parser); if (status) { @@ -179,7 +169,6 @@ _notmuch_message_file_parse (notmuch_message_file_t *message) message->message = NULL; } - rewind (message->file); } return status; diff --git a/test/T750-gzip.sh b/test/T750-gzip.sh new file mode 100755 index 00000000..96464956 --- /dev/null +++ b/test/T750-gzip.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash +test_description='support for gzipped messages' +. $(dirname "$0")/test-lib.sh || exit 1 + +####################################################################### +# notmuch new +test_begin_subtest "Single new gzipped message" +generate_message +gzip $gen_msg_filename +output=$(NOTMUCH_NEW --debug) +test_expect_equal "$output" "Added 1 new message to the database." + +test_begin_subtest "Single new gzipped message (full-scan)" +generate_message +gzip $gen_msg_filename +output=$(NOTMUCH_NEW --debug --full-scan 2>&1) +test_expect_equal "$output" "Added 1 new message to the database." + +test_begin_subtest "Multiple new messages, one gzipped" +generate_message +gzip $gen_msg_filename +generate_message +output=$(NOTMUCH_NEW --debug) +test_expect_equal "$output" "Added 2 new messages to the database." + +test_begin_subtest "Multiple new messages, one gzipped (full-scan)" +generate_message +gzip $gen_msg_filename +generate_message +output=$(NOTMUCH_NEW --debug --full-scan 2>&1) +test_expect_equal "$output" "Added 2 new messages to the database." + +test_begin_subtest "Renamed (gzipped) message" +generate_message +echo $gen_message_filename +notmuch new > /dev/null +gzip $gen_msg_filename +output=$(NOTMUCH_NEW --debug) +test_expect_equal "$output" "(D) add_files, pass 2: queuing passed file ${gen_msg_filename} for deletion from database +No new mail. Detected 1 file rename." + +###################################################################### +# notmuch search + +test_begin_subtest "notmuch search with partially gzipped mail store" +notmuch search '*' | notmuch_search_sanitize > OUTPUT +cat < EXPECTED +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Single new gzipped message (inbox unread) +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Single new gzipped message (full-scan) (inbox unread) +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Multiple new messages, one gzipped (inbox unread) +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Multiple new messages, one gzipped (inbox unread) +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Multiple new messages, one gzipped (full-scan) (inbox unread) +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Multiple new messages, one gzipped (full-scan) (inbox unread) +thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Renamed (gzipped) message (inbox unread) +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "notmuch search --output=files with partially gzipped mail store" +notmuch search --output=files '*' | notmuch_search_files_sanitize > OUTPUT +cat < EXPECTED +MAIL_DIR/msg-001.gz +MAIL_DIR/msg-002.gz +MAIL_DIR/msg-003.gz +MAIL_DIR/msg-004 +MAIL_DIR/msg-005.gz +MAIL_DIR/msg-006 +MAIL_DIR/msg-007.gz +EOF +test_expect_equal_file EXPECTED OUTPUT + +###################################################################### +# notmuch show + +test_begin_subtest "show un-gzipped message" +notmuch show id:msg-006@notmuch-test-suite | notmuch_show_sanitize > OUTPUT +cat < EXPECTED + message{ id:msg-006@notmuch-test-suite depth:0 match:1 excluded:0 filename:/XXX/mail/msg-006 + header{ +Notmuch Test Suite (2001-01-05) (inbox unread) +Subject: Multiple new messages, one gzipped (full-scan) +From: Notmuch Test Suite +To: Notmuch Test Suite +Date: Fri, 05 Jan 2001 15:43:51 +0000 + header} + body{ + part{ ID: 1, Content-type: text/plain +This is just a test message (#6) + part} + body} + message} +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_begin_subtest "show gzipped message" +test_subtest_known_broken +notmuch show id:msg-007@notmuch-test-suite | notmuch_show_sanitize > OUTPUT +cat < EXPECTED + message{ id:msg-007@notmuch-test-suite depth:0 match:1 excluded:0 filename:/XXX/mail/msg-007.gz + header{ +Notmuch Test Suite (2001-01-05) (inbox unread) +Subject: Renamed (gzipped) message +From: Notmuch Test Suite +To: Notmuch Test Suite +Date: Fri, 05 Jan 2001 15:43:50 +0000 + header} + body{ + part{ ID: 1, Content-type: text/plain +This is just a test message (#7) + part} + body} + message} +EOF +test_expect_equal_file EXPECTED OUTPUT + +test_done