/* message.c - Utility functions for parsing an email message for notmuch.
 *
 * Copyright © 2009 Carl Worth
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see http://www.gnu.org/licenses/ .
 *
 * Author: Carl Worth <cworth@cworth.org>
 */

#include <stdarg.h>

#include "notmuch-private.h"

#include <gmime/gmime.h>

#include <glib.h> /* GHashTable */

typedef struct {
    char *str;
    size_t size;
    size_t len;
} header_value_closure_t;

struct _notmuch_message_file {
    /* File object */
    FILE *file;

    /* Header storage */
    int restrict_headers;
    GHashTable *headers;
    int broken_headers;
    int good_headers;
    size_t header_size; /* Length of full message header in bytes. */

    /* Parsing state */
    char *line;
    size_t line_size;
    header_value_closure_t value;

    int parsing_started;
    int parsing_finished;
};

static int
strcase_equal (const void *a, const void *b)
{
    return strcasecmp (a, b) == 0;
}

static unsigned int
strcase_hash (const void *ptr)
{
    const char *s = ptr;

    /* This is the djb2 hash. */
    unsigned int hash = 5381;
    while (s && *s) {
	hash = ((hash << 5) + hash) + tolower (*s);
	s++;
    }

    return hash;
}

static int
_notmuch_message_file_destructor (notmuch_message_file_t *message)
{
    if (message->line)
	free (message->line);

    if (message->value.size)
	free (message->value.str);

    if (message->headers)
	g_hash_table_destroy (message->headers);

    if (message->file)
	fclose (message->file);

    return 0;
}

/* Create a new notmuch_message_file_t for 'filename' with 'ctx' as
 * the talloc owner. */
notmuch_message_file_t *
_notmuch_message_file_open_ctx (void *ctx, const char *filename)
{
    notmuch_message_file_t *message;

    message = talloc_zero (ctx, notmuch_message_file_t);
    if (unlikely (message == NULL))
	return NULL;

    talloc_set_destructor (message, _notmuch_message_file_destructor);

    message->file = fopen (filename, "r");
    if (message->file == NULL)
	goto FAIL;

    message->headers = g_hash_table_new_full (strcase_hash,
					      strcase_equal,
					      free,
					      free);

    message->parsing_started = 0;
    message->parsing_finished = 0;

    return message;

  FAIL:
    fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
    notmuch_message_file_close (message);

    return NULL;
}

notmuch_message_file_t *
notmuch_message_file_open (const char *filename)
{
    return _notmuch_message_file_open_ctx (NULL, filename);
}

void
notmuch_message_file_close (notmuch_message_file_t *message)
{
    talloc_free (message);
}

void
notmuch_message_file_restrict_headersv (notmuch_message_file_t *message,
					va_list va_headers)
{
    char *header;

    if (message->parsing_started)
	INTERNAL_ERROR ("notmuch_message_file_restrict_headers called after parsing has started");

    while (1) {
	header = va_arg (va_headers, char*);
	if (header == NULL)
	    break;
	g_hash_table_insert (message->headers,
			     xstrdup (header), NULL);
    }

    message->restrict_headers = 1;
}

void
notmuch_message_file_restrict_headers (notmuch_message_file_t *message, ...)
{
    va_list va_headers;

    va_start (va_headers, message);

    notmuch_message_file_restrict_headersv (message, va_headers);
}

static void
copy_header_unfolding (header_value_closure_t *value,
		       const char *chunk)
{
    char *last;

    if (chunk == NULL)
	return;

    while (*chunk == ' ' || *chunk == '\t')
	chunk++;

    if (value->len + 1 + strlen (chunk) + 1 > value->size) {
	unsigned int new_size = value->size;
	if (value->size == 0)
	    new_size = strlen (chunk) + 1;
	else
	    while (value->len + 1 + strlen (chunk) + 1 > new_size)
		new_size *= 2;
	value->str = xrealloc (value->str, new_size);
	value->size = new_size;
    }

    last = value->str + value->len;
    if (value->len) {
	*last = ' ';
	last++;
	value->len++;
    }

    strcpy (last, chunk);
    value->len += strlen (chunk);

    last = value->str + value->len - 1;
    if (*last == '\n') {
	*last = '\0';
	value->len--;
    }
}

/* As a special-case, a value of NULL for header_desired will force
 * the entire header to be parsed if it is not parsed already. This is
 * used by the _notmuch_message_file_get_headers_end function.
 * Another special case is the Received: header. For this header we
 * want to concatenate all instances of the header instead of just
 * hashing the first instance as we use this when analyzing the path
 * the mail has taken from sender to recipient.
 */
const char *
notmuch_message_file_get_header (notmuch_message_file_t *message,
				 const char *header_desired)
{
    int contains;
    char *header, *decoded_value, *header_sofar, *combined_header;
    const char *s, *colon;
    int match, newhdr, hdrsofar, is_received;
    static int initialized = 0;

    is_received = (strcmp(header_desired,"received") == 0);

    if (! initialized) {
	g_mime_init (0);
	initialized = 1;
    }

    message->parsing_started = 1;

    if (header_desired == NULL)
	contains = 0;
    else
	contains = g_hash_table_lookup_extended (message->headers,
						 header_desired, NULL,
						 (gpointer *) &decoded_value);

    if (contains && decoded_value)
	return decoded_value;

    if (message->parsing_finished)
	return "";

#define NEXT_HEADER_LINE(closure)				\
    while (1) {							\
	ssize_t bytes_read = getline (&message->line,		\
				      &message->line_size,	\
				      message->file);		\
	if (bytes_read == -1) {					\
	    message->parsing_finished = 1;			\
	    break;						\
	}							\
	if (*message->line == '\n') {				\
	    message->parsing_finished = 1;			\
	    break;						\
	}							\
	if (closure &&						\
	    (*message->line == ' ' || *message->line == '\t'))	\
	{							\
	    copy_header_unfolding ((closure), message->line);	\
	}							\
	if (*message->line == ' ' || *message->line == '\t')	\
	    message->header_size += strlen (message->line);	\
	else							\
	    break;						\
    }

    if (message->line == NULL)
	NEXT_HEADER_LINE (NULL);

    while (1) {

	if (message->parsing_finished)
	    break;

	colon = strchr (message->line, ':');

	if (colon == NULL) {
	    message->broken_headers++;
	    /* A simple heuristic for giving up on things that just
	     * don't look like mail messages. */
	    if (message->broken_headers >= 10 &&
		message->good_headers < 5)
	    {
		message->parsing_finished = 1;
		break;
	    }
	    NEXT_HEADER_LINE (NULL);
	    continue;
	}

	message->header_size += strlen (message->line);

	message->good_headers++;

	header = xstrndup (message->line, colon - message->line);

	if (message->restrict_headers &&
	    ! g_hash_table_lookup_extended (message->headers,
					    header, NULL, NULL))
	{
	    free (header);
	    NEXT_HEADER_LINE (NULL);
	    continue;
	}

	s = colon + 1;
	while (*s == ' ' || *s == '\t')
	    s++;

	message->value.len = 0;
	copy_header_unfolding (&message->value, s);

	NEXT_HEADER_LINE (&message->value);

	if (header_desired == NULL)
	    match = 0;
	else
	    match = (strcasecmp (header, header_desired) == 0);

	decoded_value = g_mime_utils_header_decode_text (message->value.str);
	header_sofar = (char *)g_hash_table_lookup (message->headers, header);
	/* we treat the Received: header special - we want to concat ALL of 
	 * the Received: headers we encounter.
	 * for everything else we return the first instance of a header */
	if (strcasecmp(header, "received") == 0) {
	    if (header_sofar == NULL) {
		/* first Received: header we encountered; just add it */
		g_hash_table_insert (message->headers, header, decoded_value);
	    } else {
		/* we need to add the header to those we already collected */
		newhdr = strlen(decoded_value);
		hdrsofar = strlen(header_sofar);
		combined_header = xmalloc(hdrsofar + newhdr + 2);
		strncpy(combined_header,header_sofar,hdrsofar);
		*(combined_header+hdrsofar) = ' ';
		strncpy(combined_header+hdrsofar+1,decoded_value,newhdr+1);
		free (decoded_value);
		g_hash_table_insert (message->headers, header, combined_header);
	    }
	} else {
	    if (header_sofar == NULL) {
		/* Only insert if we don't have a value for this header, yet. */
		g_hash_table_insert (message->headers, header, decoded_value);
	    } else {
		free (header);
		free (decoded_value);
		decoded_value = header_sofar;
	    }
	}
	/* if we found a match we can bail - unless of course we are
	 * collecting all the Received: headers */
	if (match && !is_received)
	    return decoded_value;
    }

    if (message->parsing_finished) {
        fclose (message->file);
        message->file = NULL;
    }

    if (message->line)
	free (message->line);
    message->line = NULL;

    if (message->value.size) {
	free (message->value.str);
	message->value.str = NULL;
	message->value.size = 0;
	message->value.len = 0;
    }

    /* For the Received: header we actually might end up here even
     * though we found the header (as we force continued parsing
     * in that case). So let's check if that's the header we were
     * looking for and return the value that we found (if any)
     */
    if (is_received)
	return (char *)g_hash_table_lookup (message->headers, "received");

    /* We've parsed all headers and never found the one we're looking
     * for. It's probably just not there, but let's check that we
     * didn't make a mistake preventing us from seeing it. */
    if (message->restrict_headers && header_desired &&
	! g_hash_table_lookup_extended (message->headers,
					header_desired, NULL, NULL))
    {
	INTERNAL_ERROR ("Attempt to get header \"%s\" which was not\n"
			"included in call to notmuch_message_file_restrict_headers\n",
			header_desired);
    }

    return "";
}