From 5986cfe5e71bad6126a05e733fa9ea777d7cc0eb Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 13 Oct 2009 07:23:14 -0700 Subject: [PATCH] Add sup-compatible prefixes and achieve sup-compatible print output What I've done here is to instrument sup-sync to print the text and terms objects it constructs just before indexing a message. Then I've made my g_mime_test program achieve (nearly) identical output for an example email message, (just missing the body text). Next we can start shoving this data into a Xapian index. --- g_mime_test.c | 192 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 170 insertions(+), 22 deletions(-) diff --git a/g_mime_test.c b/g_mime_test.c index eb624b69..9ffb0ac1 100644 --- a/g_mime_test.c +++ b/g_mime_test.c @@ -21,18 +21,128 @@ #include #include #include +#include #include #define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0])) -static void -print_header (const char *name, const char *value) +/* These prefix values are specifically chosen to be compatible + * with sup, (http://sup.rubyforge.org), written by + * William Morgan , and released + * under the GNU GPL v2. + */ + +typedef struct { + const char *name; + const char *prefix; +} prefix_t; + +prefix_t NORMAL_PREFIX[] = { + { "subject", "S" }, + { "body", "B" }, + { "from_name", "FN" }, + { "to_name", "TN" }, + { "name", "N" }, + { "attachment", "A" } +}; + +prefix_t BOOLEAN_PREFIX[] = { + { "type", "K" }, + { "from_email", "FE" }, + { "to_email", "TE" }, + { "email", "E" }, + { "date", "D" }, + { "label", "L" }, + { "source_id", "I" }, + { "attachment_extension", "O" }, + { "msgid", "Q" }, + { "thread", "H" }, + { "ref", "R" } +}; + +static const char * +find_prefix (const char *name) { - printf ("%s:", name); - if (value) - printf ("\t%s", value); - printf ("\n"); + int i; + + for (i = 0; i < ARRAY_SIZE (NORMAL_PREFIX); i++) + if (strcmp (name, NORMAL_PREFIX[i].name) == 0) + return NORMAL_PREFIX[i].prefix; + + for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX); i++) + if (strcmp (name, BOOLEAN_PREFIX[i].name) == 0) + return BOOLEAN_PREFIX[i].prefix; + + return ""; +} + +int TERM_COMBINED = 0; + +static void +print_term (const char *prefix_name, const char *value) +{ + const char *prefix; + + if (value == NULL) + return; + + prefix = find_prefix (prefix_name); + + if (TERM_COMBINED) + printf ("\"%s%s\", ", prefix, value); + else + printf ("[\"%s\", \"%s\"], ", value, prefix); +} + +static void +add_address_name (InternetAddress *address, const char *prefix_name) +{ + const char *name; + + name = internet_address_get_name (address); + + if (name) + print_term (prefix_name, name); +} + +static void +add_address_names (InternetAddressList *addresses, const char *address_type) +{ + int i; + InternetAddress *address; + + for (i = 0; i < internet_address_list_length (addresses); i++) { + address = internet_address_list_get_address (addresses, i); + add_address_name (address, address_type); + add_address_name (address, "name"); + add_address_name (address, "body"); + } +} + +static void +add_address_addr (InternetAddress *address, const char *prefix_name) +{ + InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address); + const char *addr; + + addr = internet_address_mailbox_get_addr (mailbox); + + if (addr) + print_term (prefix_name, addr); +} + +static void +add_address_addrs (InternetAddressList *addresses, const char *address_type) +{ + int i; + InternetAddress *address; + + for (i = 0; i < internet_address_list_length (addresses); i++) { + address = internet_address_list_get_address (addresses, i); + add_address_addr (address, address_type); + add_address_addr (address, "email"); + } } int @@ -41,22 +151,18 @@ main (int argc, char **argv) GMimeStream *stream; GMimeParser *parser; GMimeMessage *message; + InternetAddressList *addresses; const char *filename; FILE *file; - const char *sup_entry_headers[] = { - "From", - "Subject", - "Date", - "References", - "CC", - "To", - "In-Reply-To" - }; - const char *value; + const char *value, *from; int i; + time_t time; + struct tm gm_time_tm; + char time_str[16]; /* YYYYMMDDHHMMSS + 1 for Y100k compatibility ;-) */ + if (argc < 2) { fprintf (stderr, "Usage: %s \n", argv[0]); @@ -79,15 +185,57 @@ main (int argc, char **argv) message = g_mime_parser_construct_message (parser); - value = g_mime_message_get_message_id (message); - print_header ("message_id", value); + printf ("text is:\n["); + from = g_mime_message_get_sender (message); + addresses = internet_address_list_parse_string (from); - for (i = 0; i < ARRAY_SIZE (sup_entry_headers); i++) { - value = g_mime_object_get_header (GMIME_OBJECT (message), - sup_entry_headers[i]); - print_header (sup_entry_headers[i], value); + add_address_names (addresses, "from_name"); + + add_address_names (g_mime_message_get_all_recipients (message), + "to_name"); + + value = g_mime_message_get_subject (message); + print_term ("subject", value); + print_term ("body", value); + + printf ("]\nterms is:\n["); + + TERM_COMBINED = 1; + + from = g_mime_message_get_sender (message); + addresses = internet_address_list_parse_string (from); + + add_address_addrs (addresses, "from_email"); + + add_address_addrs (g_mime_message_get_all_recipients (message), + "to_email"); + + g_mime_message_get_date (message, &time, NULL); + + gmtime_r (&time, &gm_time_tm); + + if (strftime (time_str, sizeof (time_str), + "%Y%m%d%H%M%S", &gm_time_tm) == 0) { + fprintf (stderr, "Internal error formatting time\n"); + exit (1); } + print_term ("date", time_str); + + print_term ("label", "inbox"); + print_term ("label", "unread"); + print_term ("type", "mail"); + + value = g_mime_message_get_message_id (message); + print_term ("msgid", value); + + print_term ("source_id", "1"); + + value = g_mime_message_get_message_id (message); + print_term ("thread", value); + + printf ("]\n"); + g_object_unref (message); g_object_unref (parser); g_object_unref (stream);