2009-10-19 12:54:40 -07:00
|
|
|
/* message.c - Utility functions for parsing an email message for notmuch.
|
|
|
|
*
|
|
|
|
* Copyright © 2009 Carl Worth
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
2016-06-02 12:26:14 -04:00
|
|
|
* along with this program. If not, see https://www.gnu.org/licenses/ .
|
2009-10-19 12:54:40 -07:00
|
|
|
*
|
|
|
|
* Author: Carl Worth <cworth@cworth.org>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdarg.h>
|
|
|
|
|
|
|
|
#include "notmuch-private.h"
|
|
|
|
|
2009-11-02 14:32:20 -08:00
|
|
|
#include <gmime/gmime.h>
|
|
|
|
|
2009-10-19 13:40:56 -07:00
|
|
|
#include <glib.h> /* GHashTable */
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-10-20 15:09:51 -07:00
|
|
|
struct _notmuch_message_file {
|
2019-03-24 00:32:43 -03:00
|
|
|
/* open stream to (possibly gzipped) file */
|
|
|
|
GMimeStream *stream;
|
2014-03-31 00:21:49 +03:00
|
|
|
char *filename;
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
/* Cache for decoded headers */
|
2009-10-19 12:54:40 -07:00
|
|
|
GHashTable *headers;
|
2009-10-19 16:38:44 -07:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
GMimeMessage *message;
|
2009-10-19 12:54:40 -07:00
|
|
|
};
|
|
|
|
|
2009-10-26 17:35:31 -07:00
|
|
|
static int
|
|
|
|
_notmuch_message_file_destructor (notmuch_message_file_t *message)
|
|
|
|
{
|
|
|
|
if (message->headers)
|
|
|
|
g_hash_table_destroy (message->headers);
|
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
if (message->message)
|
|
|
|
g_object_unref (message->message);
|
|
|
|
|
2019-05-09 21:23:24 -03:00
|
|
|
if (message->stream)
|
|
|
|
g_object_unref (message->stream);
|
|
|
|
|
2009-10-26 17:35:31 -07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create a new notmuch_message_file_t for 'filename' with 'ctx' as
|
|
|
|
* the talloc owner. */
|
2009-10-20 15:09:51 -07:00
|
|
|
notmuch_message_file_t *
|
2014-12-26 18:34:49 +01:00
|
|
|
_notmuch_message_file_open_ctx (notmuch_database_t *notmuch,
|
|
|
|
void *ctx, const char *filename)
|
2009-10-19 12:54:40 -07:00
|
|
|
{
|
2009-10-20 15:09:51 -07:00
|
|
|
notmuch_message_file_t *message;
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-10-26 17:35:31 -07:00
|
|
|
message = talloc_zero (ctx, notmuch_message_file_t);
|
|
|
|
if (unlikely (message == NULL))
|
|
|
|
return NULL;
|
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
message->filename = talloc_strdup (message, filename);
|
|
|
|
if (message->filename == NULL)
|
|
|
|
goto FAIL;
|
|
|
|
|
2009-10-26 17:35:31 -07:00
|
|
|
talloc_set_destructor (message, _notmuch_message_file_destructor);
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2019-03-24 00:32:43 -03:00
|
|
|
message->stream = g_mime_stream_gzfile_open (filename);
|
|
|
|
if (message->stream == NULL)
|
2009-10-19 12:54:40 -07:00
|
|
|
goto FAIL;
|
|
|
|
|
|
|
|
return message;
|
|
|
|
|
|
|
|
FAIL:
|
2014-12-26 18:34:49 +01:00
|
|
|
_notmuch_database_log (notmuch, "Error opening %s: %s\n",
|
2019-06-13 07:55:35 -03:00
|
|
|
filename, strerror (errno));
|
2014-05-13 05:44:05 -04:00
|
|
|
_notmuch_message_file_close (message);
|
2009-10-19 12:54:40 -07:00
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2009-10-26 17:35:31 -07:00
|
|
|
notmuch_message_file_t *
|
2014-12-26 18:34:49 +01:00
|
|
|
_notmuch_message_file_open (notmuch_database_t *notmuch,
|
|
|
|
const char *filename)
|
2009-10-26 17:35:31 -07:00
|
|
|
{
|
2014-12-26 18:34:49 +01:00
|
|
|
return _notmuch_message_file_open_ctx (notmuch, NULL, filename);
|
2009-10-26 17:35:31 -07:00
|
|
|
}
|
|
|
|
|
2017-06-04 09:32:27 -03:00
|
|
|
const char *
|
|
|
|
_notmuch_message_file_get_filename (notmuch_message_file_t *message_file)
|
|
|
|
{
|
|
|
|
return message_file->filename;
|
|
|
|
}
|
|
|
|
|
2009-10-19 12:54:40 -07:00
|
|
|
void
|
2014-05-13 05:44:05 -04:00
|
|
|
_notmuch_message_file_close (notmuch_message_file_t *message)
|
2009-10-19 12:54:40 -07:00
|
|
|
{
|
2009-10-26 15:17:10 -07:00
|
|
|
talloc_free (message);
|
2009-10-19 12:54:40 -07:00
|
|
|
}
|
|
|
|
|
2017-10-07 11:44:05 +03:00
|
|
|
static bool
|
2019-03-24 00:32:43 -03:00
|
|
|
_is_mbox (GMimeStream *stream)
|
2009-10-19 12:54:40 -07:00
|
|
|
{
|
2014-03-31 00:21:49 +03:00
|
|
|
char from_buf[5];
|
2017-10-07 11:44:05 +03:00
|
|
|
bool ret = false;
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
/* Is this mbox? */
|
2019-06-13 07:55:35 -03:00
|
|
|
if (g_mime_stream_read (stream, from_buf, sizeof (from_buf)) == sizeof (from_buf) &&
|
2014-03-31 00:21:49 +03:00
|
|
|
strncmp (from_buf, "From ", 5) == 0)
|
2017-10-07 11:44:05 +03:00
|
|
|
ret = true;
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2019-03-24 00:32:43 -03:00
|
|
|
g_mime_stream_reset (stream);
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
return ret;
|
2009-10-19 12:54:40 -07:00
|
|
|
}
|
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
notmuch_status_t
|
|
|
|
_notmuch_message_file_parse (notmuch_message_file_t *message)
|
2009-10-19 12:54:40 -07:00
|
|
|
{
|
2014-03-31 00:21:49 +03:00
|
|
|
GMimeParser *parser;
|
|
|
|
notmuch_status_t status = NOTMUCH_STATUS_SUCCESS;
|
|
|
|
static int initialized = 0;
|
2017-10-07 11:44:05 +03:00
|
|
|
bool is_mbox;
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
if (message->message)
|
|
|
|
return NOTMUCH_STATUS_SUCCESS;
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2019-03-24 00:32:43 -03:00
|
|
|
is_mbox = _is_mbox (message->stream);
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
if (! initialized) {
|
2019-05-02 09:19:42 -04:00
|
|
|
g_mime_init ();
|
2014-03-31 00:21:49 +03:00
|
|
|
initialized = 1;
|
|
|
|
}
|
2009-10-19 16:38:44 -07:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
message->headers = g_hash_table_new_full (strcase_hash, strcase_equal,
|
|
|
|
free, g_free);
|
|
|
|
if (! message->headers)
|
|
|
|
return NOTMUCH_STATUS_OUT_OF_MEMORY;
|
2009-10-19 16:38:44 -07:00
|
|
|
|
2019-03-24 00:32:43 -03:00
|
|
|
parser = g_mime_parser_new_with_stream (message->stream);
|
2014-06-05 08:34:09 +02:00
|
|
|
g_mime_parser_set_scan_from (parser, is_mbox);
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2019-05-02 09:19:46 -04:00
|
|
|
message->message = g_mime_parser_construct_message (parser, NULL);
|
2014-03-31 00:21:49 +03:00
|
|
|
if (! message->message) {
|
|
|
|
status = NOTMUCH_STATUS_FILE_NOT_EMAIL;
|
|
|
|
goto DONE;
|
2009-10-19 12:54:40 -07:00
|
|
|
}
|
|
|
|
|
2014-11-23 13:15:12 +02:00
|
|
|
if (is_mbox && ! g_mime_parser_eos (parser)) {
|
2014-06-05 08:34:09 +02:00
|
|
|
/*
|
2014-11-23 13:15:12 +02:00
|
|
|
* This is a multi-message mbox. (For historical reasons, we
|
|
|
|
* do support single-message mboxes.)
|
2014-06-05 08:34:09 +02:00
|
|
|
*/
|
2014-11-23 13:15:12 +02:00
|
|
|
status = NOTMUCH_STATUS_FILE_NOT_EMAIL;
|
2014-06-05 08:34:09 +02:00
|
|
|
}
|
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
DONE:
|
2019-03-24 00:32:43 -03:00
|
|
|
g_mime_stream_reset (message->stream);
|
2014-03-31 00:21:49 +03:00
|
|
|
g_object_unref (parser);
|
|
|
|
|
|
|
|
if (status) {
|
|
|
|
g_hash_table_destroy (message->headers);
|
|
|
|
message->headers = NULL;
|
|
|
|
|
|
|
|
if (message->message) {
|
|
|
|
g_object_unref (message->message);
|
|
|
|
message->message = NULL;
|
|
|
|
}
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
}
|
2014-03-31 00:21:49 +03:00
|
|
|
|
|
|
|
return status;
|
2009-10-19 12:54:40 -07:00
|
|
|
}
|
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
notmuch_status_t
|
|
|
|
_notmuch_message_file_get_mime_message (notmuch_message_file_t *message,
|
|
|
|
GMimeMessage **mime_message)
|
2009-10-19 12:54:40 -07:00
|
|
|
{
|
2014-03-31 00:21:49 +03:00
|
|
|
notmuch_status_t status;
|
2009-11-02 14:32:20 -08:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
status = _notmuch_message_file_parse (message);
|
|
|
|
if (status)
|
|
|
|
return status;
|
2010-04-26 12:58:34 -07:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
*mime_message = message->message;
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
return NOTMUCH_STATUS_SUCCESS;
|
|
|
|
}
|
2009-10-19 16:38:44 -07:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
/*
|
|
|
|
* Get all instances of a header decoded and concatenated.
|
|
|
|
*
|
|
|
|
* The result must be freed using g_free().
|
|
|
|
*
|
|
|
|
* Return NULL on errors, empty string for non-existing headers.
|
|
|
|
*/
|
2017-05-17 07:18:55 -03:00
|
|
|
|
|
|
|
static char *
|
2019-06-13 07:55:35 -03:00
|
|
|
_extend_header (char *combined, const char *value)
|
|
|
|
{
|
2017-05-17 07:18:55 -03:00
|
|
|
char *decoded;
|
|
|
|
|
2019-05-02 09:19:45 -04:00
|
|
|
decoded = g_mime_utils_header_decode_text (NULL, value);
|
2017-05-17 07:18:55 -03:00
|
|
|
if (! decoded) {
|
|
|
|
if (combined) {
|
|
|
|
g_free (combined);
|
|
|
|
combined = NULL;
|
|
|
|
}
|
|
|
|
goto DONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (combined) {
|
|
|
|
char *tmp = g_strdup_printf ("%s %s", combined, decoded);
|
|
|
|
g_free (decoded);
|
|
|
|
g_free (combined);
|
|
|
|
if (! tmp) {
|
|
|
|
combined = NULL;
|
|
|
|
goto DONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
combined = tmp;
|
|
|
|
} else {
|
|
|
|
combined = decoded;
|
|
|
|
}
|
2019-06-13 07:55:35 -03:00
|
|
|
DONE:
|
2017-05-17 07:18:55 -03:00
|
|
|
return combined;
|
|
|
|
}
|
|
|
|
|
2017-05-17 07:40:09 -03:00
|
|
|
static char *
|
|
|
|
_notmuch_message_file_get_combined_header (notmuch_message_file_t *message,
|
|
|
|
const char *header)
|
|
|
|
{
|
|
|
|
char *combined = NULL;
|
|
|
|
GMimeHeaderList *headers;
|
|
|
|
|
|
|
|
headers = g_mime_object_get_header_list (GMIME_OBJECT (message->message));
|
|
|
|
if (! headers)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
2019-06-13 07:55:35 -03:00
|
|
|
for (int i = 0; i < g_mime_header_list_get_count (headers); i++) {
|
2017-05-17 07:40:09 -03:00
|
|
|
const char *value;
|
|
|
|
GMimeHeader *g_header = g_mime_header_list_get_header_at (headers, i);
|
|
|
|
|
|
|
|
if (strcasecmp (g_mime_header_get_name (g_header), header) != 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* GMime retains ownership of value, we hope */
|
|
|
|
value = g_mime_header_get_value (g_header);
|
|
|
|
|
|
|
|
combined = _extend_header (combined, value);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Return empty string for non-existing headers. */
|
|
|
|
if (! combined)
|
|
|
|
combined = g_strdup ("");
|
|
|
|
|
|
|
|
return combined;
|
|
|
|
}
|
2009-10-19 16:38:44 -07:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
const char *
|
2014-05-13 05:44:05 -04:00
|
|
|
_notmuch_message_file_get_header (notmuch_message_file_t *message,
|
2019-06-13 07:55:35 -03:00
|
|
|
const char *header)
|
2014-03-31 00:21:49 +03:00
|
|
|
{
|
|
|
|
const char *value;
|
|
|
|
char *decoded;
|
2009-11-17 21:28:37 +06:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
if (_notmuch_message_file_parse (message))
|
|
|
|
return NULL;
|
2009-10-19 16:38:44 -07:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
/* If we have a cached decoded value, use it. */
|
|
|
|
value = g_hash_table_lookup (message->headers, header);
|
|
|
|
if (value)
|
|
|
|
return value;
|
|
|
|
|
|
|
|
if (strcasecmp (header, "received") == 0) {
|
|
|
|
/*
|
|
|
|
* The Received: header is special. We concatenate all
|
|
|
|
* instances of the header as we use this when analyzing the
|
|
|
|
* path the mail has taken from sender to recipient.
|
|
|
|
*/
|
|
|
|
decoded = _notmuch_message_file_get_combined_header (message, header);
|
|
|
|
} else {
|
|
|
|
value = g_mime_object_get_header (GMIME_OBJECT (message->message),
|
|
|
|
header);
|
|
|
|
if (value)
|
2019-05-02 09:19:45 -04:00
|
|
|
decoded = g_mime_utils_header_decode_text (NULL, value);
|
2014-03-31 00:21:49 +03:00
|
|
|
else
|
|
|
|
decoded = g_strdup ("");
|
2009-10-19 16:38:44 -07:00
|
|
|
}
|
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
if (! decoded)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* Cache the decoded value. We also own the strings. */
|
|
|
|
g_hash_table_insert (message->headers, xstrdup (header), decoded);
|
2009-10-22 15:46:22 -07:00
|
|
|
|
2014-03-31 00:21:49 +03:00
|
|
|
return decoded;
|
2009-10-19 12:54:40 -07:00
|
|
|
}
|
2017-06-04 09:32:27 -03:00
|
|
|
|
|
|
|
notmuch_status_t
|
|
|
|
_notmuch_message_file_get_headers (notmuch_message_file_t *message_file,
|
|
|
|
const char **from_out,
|
|
|
|
const char **subject_out,
|
|
|
|
const char **to_out,
|
|
|
|
const char **date_out,
|
|
|
|
char **message_id_out)
|
|
|
|
{
|
|
|
|
notmuch_status_t ret;
|
|
|
|
const char *header;
|
|
|
|
const char *from, *to, *subject, *date;
|
|
|
|
char *message_id = NULL;
|
|
|
|
|
|
|
|
/* Parse message up front to get better error status. */
|
|
|
|
ret = _notmuch_message_file_parse (message_file);
|
|
|
|
if (ret)
|
|
|
|
goto DONE;
|
|
|
|
|
|
|
|
/* Before we do any real work, (especially before doing a
|
|
|
|
* potential SHA-1 computation on the entire file's contents),
|
|
|
|
* let's make sure that what we're looking at looks like an
|
|
|
|
* actual email message.
|
|
|
|
*/
|
|
|
|
from = _notmuch_message_file_get_header (message_file, "from");
|
|
|
|
subject = _notmuch_message_file_get_header (message_file, "subject");
|
|
|
|
to = _notmuch_message_file_get_header (message_file, "to");
|
|
|
|
date = _notmuch_message_file_get_header (message_file, "date");
|
|
|
|
|
|
|
|
if ((from == NULL || *from == '\0') &&
|
|
|
|
(subject == NULL || *subject == '\0') &&
|
|
|
|
(to == NULL || *to == '\0')) {
|
|
|
|
ret = NOTMUCH_STATUS_FILE_NOT_EMAIL;
|
|
|
|
goto DONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now that we're sure it's mail, the first order of business
|
|
|
|
* is to find a message ID (or else create one ourselves).
|
|
|
|
*/
|
|
|
|
header = _notmuch_message_file_get_header (message_file, "message-id");
|
|
|
|
if (header && *header != '\0') {
|
|
|
|
message_id = _notmuch_message_id_parse (message_file, header, NULL);
|
|
|
|
|
|
|
|
/* So the header value isn't RFC-compliant, but it's
|
|
|
|
* better than no message-id at all.
|
|
|
|
*/
|
|
|
|
if (message_id == NULL)
|
|
|
|
message_id = talloc_strdup (message_file, header);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (message_id == NULL ) {
|
|
|
|
/* No message-id at all, let's generate one by taking a
|
|
|
|
* hash over the file's contents.
|
|
|
|
*/
|
|
|
|
char *sha1 = _notmuch_sha1_of_file (_notmuch_message_file_get_filename (message_file));
|
|
|
|
|
|
|
|
/* If that failed too, something is really wrong. Give up. */
|
|
|
|
if (sha1 == NULL) {
|
|
|
|
ret = NOTMUCH_STATUS_FILE_ERROR;
|
|
|
|
goto DONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
message_id = talloc_asprintf (message_file, "notmuch-sha1-%s", sha1);
|
|
|
|
free (sha1);
|
|
|
|
}
|
2019-06-13 07:55:35 -03:00
|
|
|
DONE:
|
2017-06-04 09:32:27 -03:00
|
|
|
if (ret == NOTMUCH_STATUS_SUCCESS) {
|
|
|
|
if (from_out)
|
|
|
|
*from_out = from;
|
|
|
|
if (subject_out)
|
|
|
|
*subject_out = subject;
|
|
|
|
if (to_out)
|
|
|
|
*to_out = to;
|
|
|
|
if (date_out)
|
|
|
|
*date_out = date;
|
|
|
|
if (message_id_out)
|
|
|
|
*message_id_out = message_id;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|