diff --git a/AUTHORS b/AUTHORS index fb6c6b67..6d0f2de8 100644 --- a/AUTHORS +++ b/AUTHORS @@ -12,9 +12,6 @@ thanks to the GNU General Public License). This includes: Brian Gladman (with Mikhail Gusarov ) Implementation of SHA-1 (nice and small) (libsha1.c) -Jeffrey Stedfast - Parsing of myriad date formats in email messages (date.c) - Please see the various files in the Notmuch distribution for individual copyright statements. diff --git a/Makefile b/Makefile index 348d8772..8fa81c61 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,6 @@ NOTMUCH_LDFLAGS=`pkg-config --libs glib-2.0 gmime-2.4 talloc` \ LIBRARY= \ database.o \ - date.o \ index.o \ libsha1.o \ message.o \ diff --git a/date.c b/date.c deleted file mode 100644 index 44f5a99a..00000000 --- a/date.c +++ /dev/null @@ -1,670 +0,0 @@ -/* date.c - Date-parsing utility for the notmuch mail system. - * - * Copyright © 2000-2009 Jeffrey Stedfast - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see http://www.gnu.org/licenses/ - */ - -/* This code was originally written by from Jeffrey Stedfast - * as part of his GMime library (http://spruce.sourceforge.net/gmime/) - * - * Carl Worth imported it into notmuch and removed - * some glib-isms. - */ - -#include "notmuch-private.h" - -#include - -#include /* For g_ascii_strncasecmp only. */ - -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef TRUE -#define TRUE 1 -#endif - -#define d(x) - -#define GMIME_FOLD_PREENCODED (GMIME_FOLD_LEN / 2) - -/* date parser macros */ -#define NUMERIC_CHARS "1234567890" -#define WEEKDAY_CHARS "SundayMondayTuesdayWednesdayThursdayFridaySaturday" -#define MONTH_CHARS "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember" -#define TIMEZONE_ALPHA_CHARS "UTCGMTESTEDTCSTCDTMSTPSTPDTZAMNY()" -#define TIMEZONE_NUMERIC_CHARS "-+1234567890" -#define TIME_CHARS "1234567890:" - -#define DATE_TOKEN_NON_NUMERIC (1 << 0) -#define DATE_TOKEN_NON_WEEKDAY (1 << 1) -#define DATE_TOKEN_NON_MONTH (1 << 2) -#define DATE_TOKEN_NON_TIME (1 << 3) -#define DATE_TOKEN_HAS_COLON (1 << 4) -#define DATE_TOKEN_NON_TIMEZONE_ALPHA (1 << 5) -#define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6) -#define DATE_TOKEN_HAS_SIGN (1 << 7) - -static unsigned char gmime_datetok_table[256] = { - 128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, - 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, - 111,111,111,111,111,111,111,111, 79, 79,111,175,111,175,111,111, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,119,111,111,111,111,111, - 111, 75,111, 79, 75, 79,105, 79,111,111,107,111,111, 73, 75,107, - 79,111,111, 73, 77, 79,111,109,111, 79, 79,111,111,111,111,111, - 111,105,107,107,109,105,111,107,105,105,111,111,107,107,105,105, - 107,111,105,105,105,105,107,111,111,105,111,111,111,111,111,111, - 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, - 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, - 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, - 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, - 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, - 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, - 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, - 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, -}; - -/* hrm, is there a library for this stuff? */ -static struct { - const char *name; - int offset; -} tz_offsets [] = { - { "UT", 0 }, - { "GMT", 0 }, - { "EST", -500 }, /* these are all US timezones. bloody yanks */ - { "EDT", -400 }, - { "CST", -600 }, - { "CDT", -500 }, - { "MST", -700 }, - { "MDT", -600 }, - { "PST", -800 }, - { "PDT", -700 }, - { "Z", 0 }, - { "A", -100 }, - { "M", -1200 }, - { "N", 100 }, - { "Y", 1200 }, -}; - -static const char *tm_months[] = { - "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" -}; - -static const char *tm_days[] = { - "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" -}; - -/* This is where it gets ugly... */ - -typedef struct _date_token { - struct _date_token *next; - unsigned char mask; - const char *start; - size_t len; -} date_token; - -#define date_token_free(tok) free (tok) -#define date_token_new() malloc (sizeof (date_token)) - -static date_token * -datetok (const char *date) -{ - date_token *tokens = NULL, *token, *tail = (date_token *) &tokens; - const char *start, *end; - unsigned char mask; - - start = date; - while (*start) { - /* kill leading whitespace */ - while (*start == ' ' || *start == '\t') - start++; - - if (*start == '\0') - break; - - mask = gmime_datetok_table[(unsigned char) *start]; - - /* find the end of this token */ - end = start + 1; - while (*end && !strchr ("-/,\t\r\n ", *end)) - mask |= gmime_datetok_table[(unsigned char) *end++]; - - if (end != start) { - token = date_token_new (); - token->next = NULL; - token->start = start; - token->len = end - start; - token->mask = mask; - - tail->next = token; - tail = token; - } - - if (*end) - start = end + 1; - else - break; - } - - return tokens; -} - -static int -decode_int (const char *in, size_t inlen) -{ - register const char *inptr; - int sign = 1, val = 0; - const char *inend; - - inptr = in; - inend = in + inlen; - - if (*inptr == '-') { - sign = -1; - inptr++; - } else if (*inptr == '+') - inptr++; - - for ( ; inptr < inend; inptr++) { - if (!(*inptr >= '0' && *inptr <= '9')) - return -1; - else - val = (val * 10) + (*inptr - '0'); - } - - val *= sign; - - return val; -} - -#if 0 -static int -get_days_in_month (int month, int year) -{ - switch (month) { - case 1: - case 3: - case 5: - case 7: - case 8: - case 10: - case 12: - return 31; - case 4: - case 6: - case 9: - case 11: - return 30; - case 2: - if (g_date_is_leap_year (year)) - return 29; - else - return 28; - default: - return 0; - } -} -#endif - -static int -get_wday (const char *in, size_t inlen) -{ - int wday; - - if (in == NULL) - return -1; - - if (inlen < 3) - return -1; - - for (wday = 0; wday < 7; wday++) { - if (!g_ascii_strncasecmp (in, tm_days[wday], 3)) - return wday; - } - - return -1; /* unknown week day */ -} - -static int -get_mday (const char *in, size_t inlen) -{ - int mday; - - if (in == NULL) - return -1; - - mday = decode_int (in, inlen); - - if (mday < 0 || mday > 31) - mday = -1; - - return mday; -} - -static int -get_month (const char *in, size_t inlen) -{ - int i; - - if (in == NULL) - return -1; - - if (inlen < 3) - return -1; - - for (i = 0; i < 12; i++) { - if (!g_ascii_strncasecmp (in, tm_months[i], 3)) - return i; - } - - return -1; /* unknown month */ -} - -static int -get_year (const char *in, size_t inlen) -{ - int year; - - if (in == NULL) - return -1; - - if ((year = decode_int (in, inlen)) == -1) - return -1; - - if (year < 100) - year += (year < 70) ? 2000 : 1900; - - if (year < 1969) - return -1; - - return year; -} - -static int -get_time (const char *in, size_t inlen, int *hour, int *min, int *sec) -{ - register const char *inptr; - int *val, colons = 0; - const char *inend; - - *hour = *min = *sec = 0; - - inend = in + inlen; - val = hour; - for (inptr = in; inptr < inend; inptr++) { - if (*inptr == ':') { - colons++; - switch (colons) { - case 1: - val = min; - break; - case 2: - val = sec; - break; - default: - return FALSE; - } - } else if (!(*inptr >= '0' && *inptr <= '9')) - return FALSE; - else - *val = (*val * 10) + (*inptr - '0'); - } - - return TRUE; -} - -static int -get_tzone (date_token **token) -{ - const char *inptr, *inend; - size_t inlen; - int i, t; - - for (i = 0; *token && i < 2; *token = (*token)->next, i++) { - inptr = (*token)->start; - inlen = (*token)->len; - inend = inptr + inlen; - - if (*inptr == '+' || *inptr == '-') { - return decode_int (inptr, inlen); - } else { - if (*inptr == '(') { - inptr++; - if (*(inend - 1) == ')') - inlen -= 2; - else - inlen--; - } - - for (t = 0; t < 15; t++) { - size_t len = strlen (tz_offsets[t].name); - - if (len != inlen) - continue; - - if (!strncmp (inptr, tz_offsets[t].name, len)) - return tz_offsets[t].offset; - } - } - } - - return -1; -} - -#define HAVE_TIMEZONE - -static time_t -mktime_utc (struct tm *tm) -{ - time_t tt; - long tz; - - tm->tm_isdst = -1; - tt = mktime (tm); - -#if defined (G_OS_WIN32) - _get_timezone (&tz); - if (tm->tm_isdst > 0) { - int dst; - - _get_dstbias (&dst); - tz += dst; - } -#elif defined (HAVE_TM_GMTOFF) - tz = -tm->tm_gmtoff; -#elif defined (HAVE_TIMEZONE) - if (tm->tm_isdst > 0) { -#if defined (HAVE_ALTZONE) - tz = altzone; -#else /* !defined (HAVE_ALTZONE) */ - tz = (timezone - 3600); -#endif - } else { - tz = timezone; - } -#elif defined (HAVE__TIMEZONE) - tz = _timezone; -#else -#error Neither HAVE_TIMEZONE nor HAVE_TM_GMTOFF defined. Rerun autoheader, autoconf, etc. -#endif - - return tt - tz; -} - -static time_t -parse_rfc822_date (date_token *tokens, int *tzone) -{ - int hour, min, sec, offset, n; - date_token *token; - struct tm tm; - time_t t; - - if (tokens == NULL) - return 0; - - token = tokens; - - memset ((void *) &tm, 0, sizeof (struct tm)); - - if ((n = get_wday (token->start, token->len)) != -1) { - /* not all dates may have this... */ - tm.tm_wday = n; - token = token->next; - } - - /* get the mday */ - if (!token || (n = get_mday (token->start, token->len)) == -1) - return (time_t) 0; - - tm.tm_mday = n; - token = token->next; - - /* get the month */ - if (!token || (n = get_month (token->start, token->len)) == -1) - return (time_t) 0; - - tm.tm_mon = n; - token = token->next; - - /* get the year */ - if (!token || (n = get_year (token->start, token->len)) == -1) - return (time_t) 0; - - tm.tm_year = n - 1900; - token = token->next; - - /* get the hour/min/sec */ - if (!token || !get_time (token->start, token->len, &hour, &min, &sec)) - return (time_t) 0; - - tm.tm_hour = hour; - tm.tm_min = min; - tm.tm_sec = sec; - token = token->next; - - /* get the timezone */ - if (!token || (n = get_tzone (&token)) == -1) { - /* I guess we assume tz is GMT? */ - offset = 0; - } else { - offset = n; - } - - t = mktime_utc (&tm); - - /* t is now GMT of the time we want, but not offset by the timezone ... */ - - /* this should convert the time to the GMT equiv time */ - t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60; - - if (tzone) - *tzone = offset; - - return t; -} - - -#define date_token_mask(t) (((date_token *) t)->mask) -#define is_numeric(t) ((date_token_mask (t) & DATE_TOKEN_NON_NUMERIC) == 0) -#define is_weekday(t) ((date_token_mask (t) & DATE_TOKEN_NON_WEEKDAY) == 0) -#define is_month(t) ((date_token_mask (t) & DATE_TOKEN_NON_MONTH) == 0) -#define is_time(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIME) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_COLON)) -#define is_tzone_alpha(t) ((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_ALPHA) == 0) -#define is_tzone_numeric(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_NUMERIC) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_SIGN)) -#define is_tzone(t) (is_tzone_alpha (t) || is_tzone_numeric (t)) - -static time_t -parse_broken_date (date_token *tokens, int *tzone) -{ - int got_wday, got_month, got_tzone; - int hour, min, sec, offset, n; - date_token *token; - struct tm tm; - time_t t; - - memset ((void *) &tm, 0, sizeof (struct tm)); - got_wday = got_month = got_tzone = FALSE; - offset = 0; - - token = tokens; - while (token) { - if (is_weekday (token) && !got_wday) { - if ((n = get_wday (token->start, token->len)) != -1) { - d(printf ("weekday; ")); - got_wday = TRUE; - tm.tm_wday = n; - goto next; - } - } - - if (is_month (token) && !got_month) { - if ((n = get_month (token->start, token->len)) != -1) { - d(printf ("month; ")); - got_month = TRUE; - tm.tm_mon = n; - goto next; - } - } - - if (is_time (token) && !tm.tm_hour && !tm.tm_min && !tm.tm_sec) { - if (get_time (token->start, token->len, &hour, &min, &sec)) { - d(printf ("time; ")); - tm.tm_hour = hour; - tm.tm_min = min; - tm.tm_sec = sec; - goto next; - } - } - - if (is_tzone (token) && !got_tzone) { - date_token *t = token; - - if ((n = get_tzone (&t)) != -1) { - d(printf ("tzone; ")); - got_tzone = TRUE; - offset = n; - goto next; - } - } - - if (is_numeric (token)) { - if (token->len == 4 && !tm.tm_year) { - if ((n = get_year (token->start, token->len)) != -1) { - d(printf ("year; ")); - tm.tm_year = n - 1900; - goto next; - } - } else { - /* Note: assumes MM-DD-YY ordering if '0 < MM < 12' holds true */ - if (!got_month && token->next && is_numeric (token->next)) { - if ((n = decode_int (token->start, token->len)) > 12) { - goto mday; - } else if (n > 0) { - d(printf ("mon; ")); - got_month = TRUE; - tm.tm_mon = n - 1; - } - goto next; - } else if (!tm.tm_mday && (n = get_mday (token->start, token->len)) != -1) { - mday: - d(printf ("mday; ")); - tm.tm_mday = n; - goto next; - } else if (!tm.tm_year) { - if ((n = get_year (token->start, token->len)) != -1) { - d(printf ("2-digit year; ")); - tm.tm_year = n - 1900; - } - goto next; - } - } - } - - d(printf ("???; ")); - - next: - - token = token->next; - } - - d(printf ("\n")); - - t = mktime_utc (&tm); - - /* t is now GMT of the time we want, but not offset by the timezone ... */ - - /* this should convert the time to the GMT equiv time */ - t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60; - - if (tzone) - *tzone = offset; - - return t; -} - -#if 0 -static void -gmime_datetok_table_init (void) -{ - int i; - - memset (gmime_datetok_table, 0, sizeof (gmime_datetok_table)); - - for (i = 0; i < 256; i++) { - if (!strchr (NUMERIC_CHARS, i)) - gmime_datetok_table[i] |= DATE_TOKEN_NON_NUMERIC; - - if (!strchr (WEEKDAY_CHARS, i)) - gmime_datetok_table[i] |= DATE_TOKEN_NON_WEEKDAY; - - if (!strchr (MONTH_CHARS, i)) - gmime_datetok_table[i] |= DATE_TOKEN_NON_MONTH; - - if (!strchr (TIME_CHARS, i)) - gmime_datetok_table[i] |= DATE_TOKEN_NON_TIME; - - if (!strchr (TIMEZONE_ALPHA_CHARS, i)) - gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_ALPHA; - - if (!strchr (TIMEZONE_NUMERIC_CHARS, i)) - gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_NUMERIC; - - if (((char) i) == ':') - gmime_datetok_table[i] |= DATE_TOKEN_HAS_COLON; - - if (strchr ("+-", i)) - gmime_datetok_table[i] |= DATE_TOKEN_HAS_SIGN; - } - - printf ("static unsigned char gmime_datetok_table[256] = {"); - for (i = 0; i < 256; i++) { - if (i % 16 == 0) - printf ("\n\t"); - printf ("%3d,", gmime_datetok_table[i]); - } - printf ("\n};\n"); -} -#endif - -time_t -notmuch_parse_date (const char *str, int *tz_offset) -{ - date_token *token, *tokens; - time_t date; - - if (str == NULL) - return 0; - - if (!(tokens = datetok (str))) { - if (tz_offset) - *tz_offset = 0; - - return 0; - } - - if (!(date = parse_rfc822_date (tokens, tz_offset))) - date = parse_broken_date (tokens, tz_offset); - - /* cleanup */ - while (tokens) { - token = tokens; - tokens = tokens->next; - date_token_free (token); - } - - return date; -} diff --git a/message.cc b/message.cc index 2a900149..154efa0f 100644 --- a/message.cc +++ b/message.cc @@ -21,6 +21,8 @@ #include "notmuch-private.h" #include "database-private.h" +#include + #include struct _notmuch_message { @@ -428,7 +430,7 @@ _notmuch_message_set_date (notmuch_message_t *message, { time_t time_value; - time_value = notmuch_parse_date (date, NULL); + time_value = g_mime_utils_header_decode_date (date, NULL); message->doc.add_value (NOTMUCH_VALUE_TIMESTAMP, Xapian::sortable_serialise (time_value));