2009-10-19 12:54:40 -07:00
|
|
|
/* message.c - Utility functions for parsing an email message for notmuch.
|
|
|
|
*
|
|
|
|
* Copyright © 2009 Carl Worth
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see http://www.gnu.org/licenses/ .
|
|
|
|
*
|
|
|
|
* Author: Carl Worth <cworth@cworth.org>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdarg.h>
|
|
|
|
|
|
|
|
#include "notmuch-private.h"
|
|
|
|
|
2009-11-02 14:32:20 -08:00
|
|
|
#include <gmime/gmime.h>
|
|
|
|
|
2009-10-19 13:40:56 -07:00
|
|
|
#include <glib.h> /* GHashTable */
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
typedef struct {
|
|
|
|
char *str;
|
|
|
|
size_t size;
|
|
|
|
size_t len;
|
|
|
|
} header_value_closure_t;
|
|
|
|
|
2009-10-20 15:09:51 -07:00
|
|
|
struct _notmuch_message_file {
|
2009-10-19 16:38:44 -07:00
|
|
|
/* File object */
|
|
|
|
FILE *file;
|
2009-10-19 12:54:40 -07:00
|
|
|
|
|
|
|
/* Header storage */
|
|
|
|
int restrict_headers;
|
|
|
|
GHashTable *headers;
|
2009-10-19 23:08:49 -07:00
|
|
|
int broken_headers;
|
|
|
|
int good_headers;
|
2009-10-29 08:51:12 -07:00
|
|
|
size_t header_size; /* Length of full message header in bytes. */
|
2009-10-19 12:54:40 -07:00
|
|
|
|
|
|
|
/* Parsing state */
|
2009-10-19 16:38:44 -07:00
|
|
|
char *line;
|
|
|
|
size_t line_size;
|
|
|
|
header_value_closure_t value;
|
|
|
|
|
2009-10-19 12:54:40 -07:00
|
|
|
int parsing_started;
|
|
|
|
int parsing_finished;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
|
|
|
strcase_equal (const void *a, const void *b)
|
|
|
|
{
|
|
|
|
return strcasecmp (a, b) == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned int
|
|
|
|
strcase_hash (const void *ptr)
|
|
|
|
{
|
|
|
|
const char *s = ptr;
|
|
|
|
|
|
|
|
/* This is the djb2 hash. */
|
|
|
|
unsigned int hash = 5381;
|
|
|
|
while (s && *s) {
|
|
|
|
hash = ((hash << 5) + hash) + tolower (*s);
|
|
|
|
s++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return hash;
|
|
|
|
}
|
|
|
|
|
2009-10-26 17:35:31 -07:00
|
|
|
static int
|
|
|
|
_notmuch_message_file_destructor (notmuch_message_file_t *message)
|
|
|
|
{
|
|
|
|
if (message->line)
|
|
|
|
free (message->line);
|
|
|
|
|
|
|
|
if (message->value.size)
|
|
|
|
free (message->value.str);
|
|
|
|
|
|
|
|
if (message->headers)
|
|
|
|
g_hash_table_destroy (message->headers);
|
|
|
|
|
|
|
|
if (message->file)
|
|
|
|
fclose (message->file);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create a new notmuch_message_file_t for 'filename' with 'ctx' as
|
|
|
|
* the talloc owner. */
|
2009-10-20 15:09:51 -07:00
|
|
|
notmuch_message_file_t *
|
2009-10-26 17:35:31 -07:00
|
|
|
_notmuch_message_file_open_ctx (void *ctx, const char *filename)
|
2009-10-19 12:54:40 -07:00
|
|
|
{
|
2009-10-20 15:09:51 -07:00
|
|
|
notmuch_message_file_t *message;
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-10-26 17:35:31 -07:00
|
|
|
message = talloc_zero (ctx, notmuch_message_file_t);
|
|
|
|
if (unlikely (message == NULL))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
talloc_set_destructor (message, _notmuch_message_file_destructor);
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
message->file = fopen (filename, "r");
|
|
|
|
if (message->file == NULL)
|
2009-10-19 12:54:40 -07:00
|
|
|
goto FAIL;
|
|
|
|
|
|
|
|
message->headers = g_hash_table_new_full (strcase_hash,
|
|
|
|
strcase_equal,
|
|
|
|
free,
|
2012-12-21 19:52:01 +02:00
|
|
|
g_free);
|
2009-10-19 12:54:40 -07:00
|
|
|
|
|
|
|
message->parsing_started = 0;
|
|
|
|
message->parsing_finished = 0;
|
|
|
|
|
|
|
|
return message;
|
|
|
|
|
|
|
|
FAIL:
|
|
|
|
fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
|
2009-10-20 15:09:51 -07:00
|
|
|
notmuch_message_file_close (message);
|
2009-10-19 12:54:40 -07:00
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2009-10-26 17:35:31 -07:00
|
|
|
notmuch_message_file_t *
|
|
|
|
notmuch_message_file_open (const char *filename)
|
|
|
|
{
|
|
|
|
return _notmuch_message_file_open_ctx (NULL, filename);
|
|
|
|
}
|
|
|
|
|
2009-10-19 12:54:40 -07:00
|
|
|
void
|
2009-10-20 15:09:51 -07:00
|
|
|
notmuch_message_file_close (notmuch_message_file_t *message)
|
2009-10-19 12:54:40 -07:00
|
|
|
{
|
2009-10-26 15:17:10 -07:00
|
|
|
talloc_free (message);
|
2009-10-19 12:54:40 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2009-10-20 15:09:51 -07:00
|
|
|
notmuch_message_file_restrict_headersv (notmuch_message_file_t *message,
|
|
|
|
va_list va_headers)
|
2009-10-19 12:54:40 -07:00
|
|
|
{
|
|
|
|
char *header;
|
|
|
|
|
2009-10-25 10:22:41 -07:00
|
|
|
if (message->parsing_started)
|
|
|
|
INTERNAL_ERROR ("notmuch_message_file_restrict_headers called after parsing has started");
|
2009-10-19 12:54:40 -07:00
|
|
|
|
|
|
|
while (1) {
|
|
|
|
header = va_arg (va_headers, char*);
|
|
|
|
if (header == NULL)
|
|
|
|
break;
|
|
|
|
g_hash_table_insert (message->headers,
|
|
|
|
xstrdup (header), NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
message->restrict_headers = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2009-10-20 15:09:51 -07:00
|
|
|
notmuch_message_file_restrict_headers (notmuch_message_file_t *message, ...)
|
2009-10-19 12:54:40 -07:00
|
|
|
{
|
|
|
|
va_list va_headers;
|
|
|
|
|
|
|
|
va_start (va_headers, message);
|
|
|
|
|
2009-10-20 15:09:51 -07:00
|
|
|
notmuch_message_file_restrict_headersv (message, va_headers);
|
2009-10-19 12:54:40 -07:00
|
|
|
}
|
|
|
|
|
2009-10-25 15:58:05 -07:00
|
|
|
static void
|
2009-10-19 16:38:44 -07:00
|
|
|
copy_header_unfolding (header_value_closure_t *value,
|
|
|
|
const char *chunk)
|
|
|
|
{
|
|
|
|
char *last;
|
|
|
|
|
|
|
|
if (chunk == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
while (*chunk == ' ' || *chunk == '\t')
|
|
|
|
chunk++;
|
|
|
|
|
|
|
|
if (value->len + 1 + strlen (chunk) + 1 > value->size) {
|
2009-10-25 15:39:53 -07:00
|
|
|
unsigned int new_size = value->size;
|
2009-10-19 16:38:44 -07:00
|
|
|
if (value->size == 0)
|
|
|
|
new_size = strlen (chunk) + 1;
|
|
|
|
else
|
|
|
|
while (value->len + 1 + strlen (chunk) + 1 > new_size)
|
|
|
|
new_size *= 2;
|
|
|
|
value->str = xrealloc (value->str, new_size);
|
|
|
|
value->size = new_size;
|
2009-10-19 12:54:40 -07:00
|
|
|
}
|
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
last = value->str + value->len;
|
|
|
|
if (value->len) {
|
|
|
|
*last = ' ';
|
|
|
|
last++;
|
|
|
|
value->len++;
|
2009-10-19 12:54:40 -07:00
|
|
|
}
|
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
strcpy (last, chunk);
|
|
|
|
value->len += strlen (chunk);
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
last = value->str + value->len - 1;
|
|
|
|
if (*last == '\n') {
|
|
|
|
*last = '\0';
|
|
|
|
value->len--;
|
|
|
|
}
|
2009-10-19 12:54:40 -07:00
|
|
|
}
|
|
|
|
|
2009-10-29 08:51:12 -07:00
|
|
|
/* As a special-case, a value of NULL for header_desired will force
|
|
|
|
* the entire header to be parsed if it is not parsed already. This is
|
2010-04-26 12:58:34 -07:00
|
|
|
* used by the _notmuch_message_file_get_headers_end function.
|
|
|
|
* Another special case is the Received: header. For this header we
|
|
|
|
* want to concatenate all instances of the header instead of just
|
|
|
|
* hashing the first instance as we use this when analyzing the path
|
|
|
|
* the mail has taken from sender to recipient.
|
|
|
|
*/
|
2009-10-19 12:54:40 -07:00
|
|
|
const char *
|
2009-10-20 15:09:51 -07:00
|
|
|
notmuch_message_file_get_header (notmuch_message_file_t *message,
|
|
|
|
const char *header_desired)
|
2009-10-19 12:54:40 -07:00
|
|
|
{
|
|
|
|
int contains;
|
2010-04-26 12:58:34 -07:00
|
|
|
char *header, *decoded_value, *header_sofar, *combined_header;
|
2009-10-19 16:38:44 -07:00
|
|
|
const char *s, *colon;
|
2010-04-26 12:58:34 -07:00
|
|
|
int match, newhdr, hdrsofar, is_received;
|
2009-11-02 14:32:20 -08:00
|
|
|
static int initialized = 0;
|
|
|
|
|
2010-04-26 12:58:34 -07:00
|
|
|
is_received = (strcmp(header_desired,"received") == 0);
|
|
|
|
|
2009-11-02 14:32:20 -08:00
|
|
|
if (! initialized) {
|
2013-09-11 20:36:43 +03:00
|
|
|
g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS);
|
2009-11-02 14:32:20 -08:00
|
|
|
initialized = 1;
|
|
|
|
}
|
2009-10-19 12:54:40 -07:00
|
|
|
|
|
|
|
message->parsing_started = 1;
|
|
|
|
|
2009-10-29 08:51:12 -07:00
|
|
|
if (header_desired == NULL)
|
|
|
|
contains = 0;
|
|
|
|
else
|
|
|
|
contains = g_hash_table_lookup_extended (message->headers,
|
|
|
|
header_desired, NULL,
|
2009-11-02 14:32:20 -08:00
|
|
|
(gpointer *) &decoded_value);
|
2009-10-29 08:51:12 -07:00
|
|
|
|
2009-11-02 14:32:20 -08:00
|
|
|
if (contains && decoded_value)
|
|
|
|
return decoded_value;
|
2009-10-19 12:54:40 -07:00
|
|
|
|
|
|
|
if (message->parsing_finished)
|
2009-11-11 23:00:58 -08:00
|
|
|
return "";
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
#define NEXT_HEADER_LINE(closure) \
|
2009-10-29 08:51:12 -07:00
|
|
|
while (1) { \
|
2009-10-19 16:38:44 -07:00
|
|
|
ssize_t bytes_read = getline (&message->line, \
|
|
|
|
&message->line_size, \
|
|
|
|
message->file); \
|
|
|
|
if (bytes_read == -1) { \
|
|
|
|
message->parsing_finished = 1; \
|
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
if (*message->line == '\n') { \
|
|
|
|
message->parsing_finished = 1; \
|
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
if (closure && \
|
|
|
|
(*message->line == ' ' || *message->line == '\t')) \
|
|
|
|
{ \
|
|
|
|
copy_header_unfolding ((closure), message->line); \
|
|
|
|
} \
|
2009-10-29 08:51:12 -07:00
|
|
|
if (*message->line == ' ' || *message->line == '\t') \
|
|
|
|
message->header_size += strlen (message->line); \
|
|
|
|
else \
|
|
|
|
break; \
|
|
|
|
}
|
2009-10-19 16:38:44 -07:00
|
|
|
|
|
|
|
if (message->line == NULL)
|
|
|
|
NEXT_HEADER_LINE (NULL);
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
while (1) {
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
if (message->parsing_finished)
|
|
|
|
break;
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
colon = strchr (message->line, ':');
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
if (colon == NULL) {
|
2009-10-19 23:08:49 -07:00
|
|
|
message->broken_headers++;
|
|
|
|
/* A simple heuristic for giving up on things that just
|
|
|
|
* don't look like mail messages. */
|
|
|
|
if (message->broken_headers >= 10 &&
|
|
|
|
message->good_headers < 5)
|
|
|
|
{
|
|
|
|
message->parsing_finished = 1;
|
2009-11-17 18:37:45 -08:00
|
|
|
break;
|
2009-10-19 23:08:49 -07:00
|
|
|
}
|
2009-10-19 16:38:44 -07:00
|
|
|
NEXT_HEADER_LINE (NULL);
|
2009-10-19 12:54:40 -07:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2009-10-29 08:51:12 -07:00
|
|
|
message->header_size += strlen (message->line);
|
|
|
|
|
2009-10-19 23:08:49 -07:00
|
|
|
message->good_headers++;
|
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
header = xstrndup (message->line, colon - message->line);
|
2009-10-19 12:54:40 -07:00
|
|
|
|
|
|
|
if (message->restrict_headers &&
|
|
|
|
! g_hash_table_lookup_extended (message->headers,
|
|
|
|
header, NULL, NULL))
|
|
|
|
{
|
|
|
|
free (header);
|
2009-10-19 16:38:44 -07:00
|
|
|
NEXT_HEADER_LINE (NULL);
|
2009-10-19 12:54:40 -07:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
s = colon + 1;
|
2009-10-19 16:38:44 -07:00
|
|
|
while (*s == ' ' || *s == '\t')
|
|
|
|
s++;
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
message->value.len = 0;
|
|
|
|
copy_header_unfolding (&message->value, s);
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
NEXT_HEADER_LINE (&message->value);
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2010-04-26 12:58:34 -07:00
|
|
|
if (header_desired == NULL)
|
2009-10-29 08:51:12 -07:00
|
|
|
match = 0;
|
|
|
|
else
|
|
|
|
match = (strcasecmp (header, header_desired) == 0);
|
2009-10-19 12:54:40 -07:00
|
|
|
|
2009-11-02 14:32:20 -08:00
|
|
|
decoded_value = g_mime_utils_header_decode_text (message->value.str);
|
2010-04-26 12:58:34 -07:00
|
|
|
header_sofar = (char *)g_hash_table_lookup (message->headers, header);
|
|
|
|
/* we treat the Received: header special - we want to concat ALL of
|
|
|
|
* the Received: headers we encounter.
|
|
|
|
* for everything else we return the first instance of a header */
|
2011-05-17 12:10:32 +10:00
|
|
|
if (strcasecmp(header, "received") == 0) {
|
2010-04-26 12:58:34 -07:00
|
|
|
if (header_sofar == NULL) {
|
|
|
|
/* first Received: header we encountered; just add it */
|
|
|
|
g_hash_table_insert (message->headers, header, decoded_value);
|
|
|
|
} else {
|
|
|
|
/* we need to add the header to those we already collected */
|
|
|
|
newhdr = strlen(decoded_value);
|
|
|
|
hdrsofar = strlen(header_sofar);
|
2012-12-21 19:52:01 +02:00
|
|
|
combined_header = g_malloc(hdrsofar + newhdr + 2);
|
2010-04-26 12:58:34 -07:00
|
|
|
strncpy(combined_header,header_sofar,hdrsofar);
|
|
|
|
*(combined_header+hdrsofar) = ' ';
|
|
|
|
strncpy(combined_header+hdrsofar+1,decoded_value,newhdr+1);
|
2012-12-21 19:52:01 +02:00
|
|
|
g_free (decoded_value);
|
2010-04-26 12:58:34 -07:00
|
|
|
g_hash_table_insert (message->headers, header, combined_header);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (header_sofar == NULL) {
|
|
|
|
/* Only insert if we don't have a value for this header, yet. */
|
|
|
|
g_hash_table_insert (message->headers, header, decoded_value);
|
2011-05-28 14:51:51 -07:00
|
|
|
} else {
|
|
|
|
free (header);
|
2012-12-21 19:52:01 +02:00
|
|
|
g_free (decoded_value);
|
2011-05-28 14:51:51 -07:00
|
|
|
decoded_value = header_sofar;
|
2010-04-26 12:58:34 -07:00
|
|
|
}
|
2010-04-06 12:45:30 -07:00
|
|
|
}
|
2010-04-26 12:58:34 -07:00
|
|
|
/* if we found a match we can bail - unless of course we are
|
|
|
|
* collecting all the Received: headers */
|
|
|
|
if (match && !is_received)
|
2009-11-02 14:32:20 -08:00
|
|
|
return decoded_value;
|
2009-10-19 12:54:40 -07:00
|
|
|
}
|
2009-10-19 16:38:44 -07:00
|
|
|
|
2009-11-17 21:28:37 +06:00
|
|
|
if (message->parsing_finished) {
|
|
|
|
fclose (message->file);
|
|
|
|
message->file = NULL;
|
|
|
|
}
|
|
|
|
|
2009-10-19 16:38:44 -07:00
|
|
|
if (message->line)
|
|
|
|
free (message->line);
|
|
|
|
message->line = NULL;
|
|
|
|
|
|
|
|
if (message->value.size) {
|
|
|
|
free (message->value.str);
|
|
|
|
message->value.str = NULL;
|
|
|
|
message->value.size = 0;
|
|
|
|
message->value.len = 0;
|
|
|
|
}
|
|
|
|
|
2010-04-26 12:58:34 -07:00
|
|
|
/* For the Received: header we actually might end up here even
|
|
|
|
* though we found the header (as we force continued parsing
|
|
|
|
* in that case). So let's check if that's the header we were
|
|
|
|
* looking for and return the value that we found (if any)
|
|
|
|
*/
|
|
|
|
if (is_received)
|
|
|
|
return (char *)g_hash_table_lookup (message->headers, "received");
|
|
|
|
|
2009-10-22 15:46:22 -07:00
|
|
|
/* We've parsed all headers and never found the one we're looking
|
|
|
|
* for. It's probably just not there, but let's check that we
|
|
|
|
* didn't make a mistake preventing us from seeing it. */
|
2009-10-29 08:51:12 -07:00
|
|
|
if (message->restrict_headers && header_desired &&
|
2009-10-22 15:46:22 -07:00
|
|
|
! g_hash_table_lookup_extended (message->headers,
|
|
|
|
header_desired, NULL, NULL))
|
|
|
|
{
|
2009-10-25 10:22:41 -07:00
|
|
|
INTERNAL_ERROR ("Attempt to get header \"%s\" which was not\n"
|
|
|
|
"included in call to notmuch_message_file_restrict_headers\n",
|
|
|
|
header_desired);
|
2009-10-22 15:46:22 -07:00
|
|
|
}
|
|
|
|
|
2009-11-11 23:00:58 -08:00
|
|
|
return "";
|
2009-10-19 12:54:40 -07:00
|
|
|
}
|