notmuch/lib/message-file.c
Dirk Hohndel 5b8b0377cb Make Received: header special in notmuch_message_file_get_header
With this patch the Received: header becomes special in the way
we treat headers - this is the only header for which we concatenate
all the instances we find (instead of just returning the first one).

This will be used in the From guessing code for replies as we need to
be able to walk ALL of the Received: headers in a message to have a
good chance to guess which mailbox this email was delivered to.

Signed-off-by: Dirk Hohndel <hohndel@infradead.org>
2010-04-26 14:44:06 -07:00

395 lines
9.9 KiB
C

/* message.c - Utility functions for parsing an email message for notmuch.
*
* Copyright © 2009 Carl Worth
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/ .
*
* Author: Carl Worth <cworth@cworth.org>
*/
#include <stdarg.h>
#include "notmuch-private.h"
#include <gmime/gmime.h>
#include <glib.h> /* GHashTable */
typedef struct {
char *str;
size_t size;
size_t len;
} header_value_closure_t;
struct _notmuch_message_file {
/* File object */
FILE *file;
/* Header storage */
int restrict_headers;
GHashTable *headers;
int broken_headers;
int good_headers;
size_t header_size; /* Length of full message header in bytes. */
/* Parsing state */
char *line;
size_t line_size;
header_value_closure_t value;
int parsing_started;
int parsing_finished;
};
static int
strcase_equal (const void *a, const void *b)
{
return strcasecmp (a, b) == 0;
}
static unsigned int
strcase_hash (const void *ptr)
{
const char *s = ptr;
/* This is the djb2 hash. */
unsigned int hash = 5381;
while (s && *s) {
hash = ((hash << 5) + hash) + tolower (*s);
s++;
}
return hash;
}
static int
_notmuch_message_file_destructor (notmuch_message_file_t *message)
{
if (message->line)
free (message->line);
if (message->value.size)
free (message->value.str);
if (message->headers)
g_hash_table_destroy (message->headers);
if (message->file)
fclose (message->file);
return 0;
}
/* Create a new notmuch_message_file_t for 'filename' with 'ctx' as
* the talloc owner. */
notmuch_message_file_t *
_notmuch_message_file_open_ctx (void *ctx, const char *filename)
{
notmuch_message_file_t *message;
message = talloc_zero (ctx, notmuch_message_file_t);
if (unlikely (message == NULL))
return NULL;
talloc_set_destructor (message, _notmuch_message_file_destructor);
message->file = fopen (filename, "r");
if (message->file == NULL)
goto FAIL;
message->headers = g_hash_table_new_full (strcase_hash,
strcase_equal,
free,
free);
message->parsing_started = 0;
message->parsing_finished = 0;
return message;
FAIL:
fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
notmuch_message_file_close (message);
return NULL;
}
notmuch_message_file_t *
notmuch_message_file_open (const char *filename)
{
return _notmuch_message_file_open_ctx (NULL, filename);
}
void
notmuch_message_file_close (notmuch_message_file_t *message)
{
talloc_free (message);
}
void
notmuch_message_file_restrict_headersv (notmuch_message_file_t *message,
va_list va_headers)
{
char *header;
if (message->parsing_started)
INTERNAL_ERROR ("notmuch_message_file_restrict_headers called after parsing has started");
while (1) {
header = va_arg (va_headers, char*);
if (header == NULL)
break;
g_hash_table_insert (message->headers,
xstrdup (header), NULL);
}
message->restrict_headers = 1;
}
void
notmuch_message_file_restrict_headers (notmuch_message_file_t *message, ...)
{
va_list va_headers;
va_start (va_headers, message);
notmuch_message_file_restrict_headersv (message, va_headers);
}
static void
copy_header_unfolding (header_value_closure_t *value,
const char *chunk)
{
char *last;
if (chunk == NULL)
return;
while (*chunk == ' ' || *chunk == '\t')
chunk++;
if (value->len + 1 + strlen (chunk) + 1 > value->size) {
unsigned int new_size = value->size;
if (value->size == 0)
new_size = strlen (chunk) + 1;
else
while (value->len + 1 + strlen (chunk) + 1 > new_size)
new_size *= 2;
value->str = xrealloc (value->str, new_size);
value->size = new_size;
}
last = value->str + value->len;
if (value->len) {
*last = ' ';
last++;
value->len++;
}
strcpy (last, chunk);
value->len += strlen (chunk);
last = value->str + value->len - 1;
if (*last == '\n') {
*last = '\0';
value->len--;
}
}
/* As a special-case, a value of NULL for header_desired will force
* the entire header to be parsed if it is not parsed already. This is
* used by the _notmuch_message_file_get_headers_end function.
* Another special case is the Received: header. For this header we
* want to concatenate all instances of the header instead of just
* hashing the first instance as we use this when analyzing the path
* the mail has taken from sender to recipient.
*/
const char *
notmuch_message_file_get_header (notmuch_message_file_t *message,
const char *header_desired)
{
int contains;
char *header, *decoded_value, *header_sofar, *combined_header;
const char *s, *colon;
int match, newhdr, hdrsofar, is_received;
static int initialized = 0;
is_received = (strcmp(header_desired,"received") == 0);
if (! initialized) {
g_mime_init (0);
initialized = 1;
}
message->parsing_started = 1;
if (header_desired == NULL)
contains = 0;
else
contains = g_hash_table_lookup_extended (message->headers,
header_desired, NULL,
(gpointer *) &decoded_value);
if (contains && decoded_value)
return decoded_value;
if (message->parsing_finished)
return "";
#define NEXT_HEADER_LINE(closure) \
while (1) { \
ssize_t bytes_read = getline (&message->line, \
&message->line_size, \
message->file); \
if (bytes_read == -1) { \
message->parsing_finished = 1; \
break; \
} \
if (*message->line == '\n') { \
message->parsing_finished = 1; \
break; \
} \
if (closure && \
(*message->line == ' ' || *message->line == '\t')) \
{ \
copy_header_unfolding ((closure), message->line); \
} \
if (*message->line == ' ' || *message->line == '\t') \
message->header_size += strlen (message->line); \
else \
break; \
}
if (message->line == NULL)
NEXT_HEADER_LINE (NULL);
while (1) {
if (message->parsing_finished)
break;
colon = strchr (message->line, ':');
if (colon == NULL) {
message->broken_headers++;
/* A simple heuristic for giving up on things that just
* don't look like mail messages. */
if (message->broken_headers >= 10 &&
message->good_headers < 5)
{
message->parsing_finished = 1;
break;
}
NEXT_HEADER_LINE (NULL);
continue;
}
message->header_size += strlen (message->line);
message->good_headers++;
header = xstrndup (message->line, colon - message->line);
if (message->restrict_headers &&
! g_hash_table_lookup_extended (message->headers,
header, NULL, NULL))
{
free (header);
NEXT_HEADER_LINE (NULL);
continue;
}
s = colon + 1;
while (*s == ' ' || *s == '\t')
s++;
message->value.len = 0;
copy_header_unfolding (&message->value, s);
NEXT_HEADER_LINE (&message->value);
if (header_desired == NULL)
match = 0;
else
match = (strcasecmp (header, header_desired) == 0);
decoded_value = g_mime_utils_header_decode_text (message->value.str);
header_sofar = (char *)g_hash_table_lookup (message->headers, header);
/* we treat the Received: header special - we want to concat ALL of
* the Received: headers we encounter.
* for everything else we return the first instance of a header */
if (is_received) {
if (header_sofar == NULL) {
/* first Received: header we encountered; just add it */
g_hash_table_insert (message->headers, header, decoded_value);
} else {
/* we need to add the header to those we already collected */
newhdr = strlen(decoded_value);
hdrsofar = strlen(header_sofar);
combined_header = xmalloc(hdrsofar + newhdr + 2);
strncpy(combined_header,header_sofar,hdrsofar);
*(combined_header+hdrsofar) = ' ';
strncpy(combined_header+hdrsofar+1,decoded_value,newhdr+1);
g_hash_table_insert (message->headers, header, combined_header);
}
} else {
if (header_sofar == NULL) {
/* Only insert if we don't have a value for this header, yet. */
g_hash_table_insert (message->headers, header, decoded_value);
}
}
/* if we found a match we can bail - unless of course we are
* collecting all the Received: headers */
if (match && !is_received)
return decoded_value;
}
if (message->parsing_finished) {
fclose (message->file);
message->file = NULL;
}
if (message->line)
free (message->line);
message->line = NULL;
if (message->value.size) {
free (message->value.str);
message->value.str = NULL;
message->value.size = 0;
message->value.len = 0;
}
/* For the Received: header we actually might end up here even
* though we found the header (as we force continued parsing
* in that case). So let's check if that's the header we were
* looking for and return the value that we found (if any)
*/
if (is_received)
return (char *)g_hash_table_lookup (message->headers, "received");
/* We've parsed all headers and never found the one we're looking
* for. It's probably just not there, but let's check that we
* didn't make a mistake preventing us from seeing it. */
if (message->restrict_headers && header_desired &&
! g_hash_table_lookup_extended (message->headers,
header_desired, NULL, NULL))
{
INTERNAL_ERROR ("Attempt to get header \"%s\" which was not\n"
"included in call to notmuch_message_file_restrict_headers\n",
header_desired);
}
return "";
}