/* message.c - Utility functions for parsing an email message for notmuch.
 *
 * Copyright © 2009 Carl Worth
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see https://www.gnu.org/licenses/ .
 *
 * Author: Carl Worth <cworth@cworth.org>
 */

#include <stdarg.h>

#include "notmuch-private.h"

#include <gmime/gmime.h>

#include <glib.h> /* GHashTable */

struct _notmuch_message_file {
    /* File object */
    FILE *file;
    char *filename;

    /* Cache for decoded headers */
    GHashTable *headers;

    GMimeMessage *message;
};

static int
_notmuch_message_file_destructor (notmuch_message_file_t *message)
{
    if (message->headers)
	g_hash_table_destroy (message->headers);

    if (message->message)
	g_object_unref (message->message);

    if (message->file)
	fclose (message->file);

    return 0;
}

/* Create a new notmuch_message_file_t for 'filename' with 'ctx' as
 * the talloc owner. */
notmuch_message_file_t *
_notmuch_message_file_open_ctx (notmuch_database_t *notmuch,
				void *ctx, const char *filename)
{
    notmuch_message_file_t *message;

    message = talloc_zero (ctx, notmuch_message_file_t);
    if (unlikely (message == NULL))
	return NULL;

    /* Only needed for error messages during parsing. */
    message->filename = talloc_strdup (message, filename);
    if (message->filename == NULL)
	goto FAIL;

    talloc_set_destructor (message, _notmuch_message_file_destructor);

    message->file = fopen (filename, "r");
    if (message->file == NULL)
	goto FAIL;

    return message;

  FAIL:
    _notmuch_database_log (notmuch, "Error opening %s: %s\n",
			  filename, strerror (errno));
    _notmuch_message_file_close (message);

    return NULL;
}

notmuch_message_file_t *
_notmuch_message_file_open (notmuch_database_t *notmuch,
			    const char *filename)
{
    return _notmuch_message_file_open_ctx (notmuch, NULL, filename);
}

const char *
_notmuch_message_file_get_filename (notmuch_message_file_t *message_file)
{
    return message_file->filename;
}

void
_notmuch_message_file_close (notmuch_message_file_t *message)
{
    talloc_free (message);
}

static notmuch_bool_t
_is_mbox (FILE *file)
{
    char from_buf[5];
    notmuch_bool_t ret = FALSE;

    /* Is this mbox? */
    if (fread (from_buf, sizeof (from_buf), 1, file) == 1 &&
	strncmp (from_buf, "From ", 5) == 0)
	ret = TRUE;

    rewind (file);

    return ret;
}

notmuch_status_t
_notmuch_message_file_parse (notmuch_message_file_t *message)
{
    GMimeStream *stream;
    GMimeParser *parser;
    notmuch_status_t status = NOTMUCH_STATUS_SUCCESS;
    static int initialized = 0;
    notmuch_bool_t is_mbox;

    if (message->message)
	return NOTMUCH_STATUS_SUCCESS;

    is_mbox = _is_mbox (message->file);

    if (! initialized) {
	g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS);
	initialized = 1;
    }

    message->headers = g_hash_table_new_full (strcase_hash, strcase_equal,
					      free, g_free);
    if (! message->headers)
	return NOTMUCH_STATUS_OUT_OF_MEMORY;

    stream = g_mime_stream_file_new (message->file);

    /* We'll own and fclose the FILE* ourselves. */
    g_mime_stream_file_set_owner (GMIME_STREAM_FILE (stream), FALSE);

    parser = g_mime_parser_new_with_stream (stream);
    g_mime_parser_set_scan_from (parser, is_mbox);

    message->message = g_mime_parser_construct_message (parser);
    if (! message->message) {
	status = NOTMUCH_STATUS_FILE_NOT_EMAIL;
	goto DONE;
    }

    if (is_mbox && ! g_mime_parser_eos (parser)) {
	/*
	 * This is a multi-message mbox. (For historical reasons, we
	 * do support single-message mboxes.)
	 */
	status = NOTMUCH_STATUS_FILE_NOT_EMAIL;
    }

  DONE:
    g_object_unref (stream);
    g_object_unref (parser);

    if (status) {
	g_hash_table_destroy (message->headers);
	message->headers = NULL;

	if (message->message) {
	    g_object_unref (message->message);
	    message->message = NULL;
	}

	rewind (message->file);
    }

    return status;
}

notmuch_status_t
_notmuch_message_file_get_mime_message (notmuch_message_file_t *message,
					GMimeMessage **mime_message)
{
    notmuch_status_t status;

    status = _notmuch_message_file_parse (message);
    if (status)
	return status;

    *mime_message = message->message;

    return NOTMUCH_STATUS_SUCCESS;
}

/*
 * Get all instances of a header decoded and concatenated.
 *
 * The result must be freed using g_free().
 *
 * Return NULL on errors, empty string for non-existing headers.
 */

static char *
_extend_header (char *combined, const char *value) {
    char *decoded;

    decoded = g_mime_utils_header_decode_text (value);
    if (! decoded) {
	if (combined) {
	    g_free (combined);
	    combined = NULL;
	}
	goto DONE;
    }

    if (combined) {
	char *tmp = g_strdup_printf ("%s %s", combined, decoded);
	g_free (decoded);
	g_free (combined);
	if (! tmp) {
	    combined = NULL;
	    goto DONE;
	}

	combined = tmp;
    } else {
	combined = decoded;
    }
 DONE:
    return combined;
}

#if (GMIME_MAJOR_VERSION < 3)
static char *
_notmuch_message_file_get_combined_header (notmuch_message_file_t *message,
					   const char *header)
{
    GMimeHeaderList *headers;
    GMimeHeaderIter *iter;
    char *combined = NULL;

    headers = g_mime_object_get_header_list (GMIME_OBJECT (message->message));
    if (! headers)
	return NULL;

    iter = g_mime_header_iter_new ();
    if (! iter)
	return NULL;

    if (! g_mime_header_list_get_iter (headers, iter))
	goto DONE;

    do {
	const char *value;
	if (strcasecmp (g_mime_header_iter_get_name (iter), header) != 0)
	    continue;

	/* Note that GMime retains ownership of value... */
	value = g_mime_header_iter_get_value (iter);

	combined = _extend_header (combined, value);
    } while (g_mime_header_iter_next (iter));

    /* Return empty string for non-existing headers. */
    if (! combined)
	combined = g_strdup ("");

  DONE:
    g_mime_header_iter_free (iter);

    return combined;
}
#else
static char *
_notmuch_message_file_get_combined_header (notmuch_message_file_t *message,
					   const char *header)
{
    char *combined = NULL;
    GMimeHeaderList *headers;

    headers = g_mime_object_get_header_list (GMIME_OBJECT (message->message));
    if (! headers)
	return NULL;


    for (int i=0; i < g_mime_header_list_get_count (headers); i++) {
	const char *value;
	GMimeHeader *g_header = g_mime_header_list_get_header_at (headers, i);

	if (strcasecmp (g_mime_header_get_name (g_header), header) != 0)
	    continue;

	/* GMime retains ownership of value, we hope */
	value = g_mime_header_get_value (g_header);

	combined = _extend_header (combined, value);
    }

    /* Return empty string for non-existing headers. */
    if (! combined)
	combined = g_strdup ("");

    return combined;
}
#endif

const char *
_notmuch_message_file_get_header (notmuch_message_file_t *message,
				 const char *header)
{
    const char *value;
    char *decoded;

    if (_notmuch_message_file_parse (message))
	return NULL;

    /* If we have a cached decoded value, use it. */
    value = g_hash_table_lookup (message->headers, header);
    if (value)
	return value;

    if (strcasecmp (header, "received") == 0) {
	/*
	 * The Received: header is special. We concatenate all
	 * instances of the header as we use this when analyzing the
	 * path the mail has taken from sender to recipient.
	 */
	decoded = _notmuch_message_file_get_combined_header (message, header);
    } else {
	value = g_mime_object_get_header (GMIME_OBJECT (message->message),
					  header);
	if (value)
	    decoded = g_mime_utils_header_decode_text (value);
	else
	    decoded = g_strdup ("");
    }

    if (! decoded)
	return NULL;

    /* Cache the decoded value. We also own the strings. */
    g_hash_table_insert (message->headers, xstrdup (header), decoded);

    return decoded;
}

notmuch_status_t
_notmuch_message_file_get_headers (notmuch_message_file_t *message_file,
				   const char **from_out,
				   const char **subject_out,
				   const char **to_out,
				   const char **date_out,
				   char **message_id_out)
{
    notmuch_status_t ret;
    const char *header;
    const char *from, *to, *subject, *date;
    char *message_id = NULL;

    /* Parse message up front to get better error status. */
    ret = _notmuch_message_file_parse (message_file);
    if (ret)
	goto DONE;

    /* Before we do any real work, (especially before doing a
     * potential SHA-1 computation on the entire file's contents),
     * let's make sure that what we're looking at looks like an
     * actual email message.
     */
    from = _notmuch_message_file_get_header (message_file, "from");
    subject = _notmuch_message_file_get_header (message_file, "subject");
    to = _notmuch_message_file_get_header (message_file, "to");
    date = _notmuch_message_file_get_header (message_file, "date");

    if ((from == NULL || *from == '\0') &&
	(subject == NULL || *subject == '\0') &&
	(to == NULL || *to == '\0')) {
	ret = NOTMUCH_STATUS_FILE_NOT_EMAIL;
	goto DONE;
    }

    /* Now that we're sure it's mail, the first order of business
     * is to find a message ID (or else create one ourselves).
     */
    header = _notmuch_message_file_get_header (message_file, "message-id");
    if (header && *header != '\0') {
	message_id = _notmuch_message_id_parse (message_file, header, NULL);

	/* So the header value isn't RFC-compliant, but it's
	 * better than no message-id at all.
	 */
	if (message_id == NULL)
	    message_id = talloc_strdup (message_file, header);
    }

    if (message_id == NULL ) {
	/* No message-id at all, let's generate one by taking a
	 * hash over the file's contents.
	 */
	char *sha1 = _notmuch_sha1_of_file (_notmuch_message_file_get_filename (message_file));

	/* If that failed too, something is really wrong. Give up. */
	if (sha1 == NULL) {
	    ret = NOTMUCH_STATUS_FILE_ERROR;
	    goto DONE;
	}

	message_id = talloc_asprintf (message_file, "notmuch-sha1-%s", sha1);
	free (sha1);
    }
 DONE:
    if (ret == NOTMUCH_STATUS_SUCCESS) {
	if (from_out)
	    *from_out = from;
	if (subject_out)
	    *subject_out = subject;
	if (to_out)
	    *to_out = to;
	if (date_out)
	    *date_out = date;
	if (message_id_out)
	    *message_id_out = message_id;
    }
    return ret;
}