notmuch/parse-time-string/parse-time-string.c

/*
 * parse time string - user friendly date and time parser
 * Copyright © 2012 Jani Nikula
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * Author: Jani Nikula <jani@nikula.org>
 */

#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <time.h>
#include <sys/time.h>
#include <sys/types.h>

#include "compat.h"
#include "parse-time-string.h"

/*
 * IMPLEMENTATION DETAILS
 *
 * At a high level, the parsing is done in two phases: 1) actual
 * parsing of the input string and storing the parsed data into
 * 'struct state', and 2) processing of the data in 'struct state'
 * according to current time (or provided reference time) and
 * rounding. This is evident in the main entry point function
 * parse_time_string().
 *
 * 1) The parsing phase - parse_input()
 *
 * Parsing is greedy and happens from left to right. The parsing is as
 * unambiguous as possible; only unambiguous date/time formats are
 * accepted. Redundant or contradictory absolute date/time in the
 * input (e.g. date specified multiple times/ways) is not
 * accepted. Relative date/time on the other hand just accumulates if
 * present multiple times (e.g. "5 days 5 days" just turns into 10
 * days).
 *
 * Parsing decisions are made on the input format, not value. For
 * example, "20/5/2005" fails because the recognized format here is
 * MM/D/YYYY, even though the values would suggest DD/M/YYYY.
 *
 * Parsing is mostly stateless in the sense that parsing decisions are
 * not made based on the values of previously parsed data, or whether
 * certain data is present in the first place. (There are a few
 * exceptions to the latter part, though, such as parsing of time zone
 * that would otherwise look like plain time.)
 *
 * When the parser encounters a number that is not greedily parsed as
 * part of a format, the interpretation is postponed until the next
 * token is parsed. The parser for the next token may consume the
 * previously postponed number. For example, when parsing "20 May" the
 * meaning of "20" is not known until "May" is parsed. If the parser
 * for the next token does not consume the postponed number, the
 * number is handled as a "lone" number before parser for the next
 * token finishes.
 *
 * 2) The processing phase - create_output()
 *
 * Once the parser in phase 1 has finished, 'struct state' contains
 * all the information from the input string, and it's no longer
 * needed. Since the parser does not even handle the concept of "now",
 * the processing initializes the fields referring to the current
 * date/time.
 *
 * If requested, the result is rounded towards past or future. The
 * idea behind rounding is to support parsing date/time ranges in an
 * obvious way. For example, for a range defined as two dates (without
 * time), one would typically want to have an inclusive range from the
 * beginning of start date to the end of the end date. The caller
 * would use rounding towards past in the start date, and towards
 * future in the end date.
 *
 * The absolute date and time is shifted by the relative date and
 * time, and time zone adjustments are made. Daylight saving time
 * (DST) is specifically *not* handled at all.
 *
 * Finally, the result is stored to time_t.
 */

#define unused(x) x __attribute__ ((unused))

/* XXX: Redefine these to add i18n support. The keyword table uses
 * N_() to mark strings to be translated; they are accessed
 * dynamically using _(). */
#define _(s) (s)        /* i18n: define as gettext (s) */
#define N_(s) (s)       /* i18n: define as gettext_noop (s) */

#define ARRAY_SIZE(a) (sizeof (a) / sizeof (a[0]))

/*
 * Field indices in the tm and set arrays of struct state.
 *
 * NOTE: There's some code that depends on the ordering of this enum.
 */
enum field {
    /* Keep SEC...YEAR in this order. */
    TM_ABS_SEC,         /* seconds */
    TM_ABS_MIN,         /* minutes */
    TM_ABS_HOUR,        /* hours */
    TM_ABS_MDAY,        /* day of the month */
    TM_ABS_MON,         /* month */
    TM_ABS_YEAR,        /* year */

    TM_WDAY,            /* day of the week. special: may be relative */
    TM_ABS_ISDST,       /* daylight saving time */

    TM_AMPM,            /* am vs. pm */
    TM_TZ,              /* timezone in minutes */

    /* Keep SEC...YEAR in this order. */
    TM_REL_SEC,         /* seconds relative to absolute or reference time */
    TM_REL_MIN,         /* minutes ... */
    TM_REL_HOUR,        /* hours ... */
    TM_REL_DAY,         /* days ... */
    TM_REL_MON,         /* months ... */
    TM_REL_YEAR,        /* years ... */
    TM_REL_WEEK,        /* weeks ... */

    TM_NONE,            /* not a field */

    TM_SIZE		= TM_NONE,
    TM_FIRST_ABS	= TM_ABS_SEC,
    TM_FIRST_REL	= TM_REL_SEC,
};

/* Values for the set array of struct state. */
enum field_set {
    FIELD_UNSET,        /* The field has not been touched by parser. */
    FIELD_SET,          /* The field has been set by parser. */
    FIELD_NOW,          /* The field will be set to reference time. */
};

static enum field
next_abs_field (enum field field)
{
    /* NOTE: Depends on the enum ordering. */
    return field < TM_ABS_YEAR ? field + 1 : TM_NONE;
}

static enum field
abs_to_rel_field (enum field field)
{
    assert (field <= TM_ABS_YEAR);

    /* NOTE: Depends on the enum ordering. */
    return field + (TM_FIRST_REL - TM_FIRST_ABS);
}

/* Get the smallest acceptable value for field. */
static int
get_field_epoch_value (enum field field)
{
    if (field == TM_ABS_MDAY || field == TM_ABS_MON)
	return 1;
    else if (field == TM_ABS_YEAR)
	return 1970;
    else
	return 0;
}

/* The parsing state. */
struct state {
    int tm[TM_SIZE];                    /* parsed date and time */
    enum field_set set[TM_SIZE];        /* set status of tm */

    enum field last_field;              /* Previously set field. */
    char delim;

    int postponed_length;               /* Number of digits in postponed value. */
    int postponed_value;
    char postponed_delim;               /* The delimiter preceding postponed number. */
};

/*
 * Helpers for postponed numbers.
 *
 * postponed_length is the number of digits in postponed value. 0
 * means there is no postponed number. -1 means there is a postponed
 * number, but it comes from a keyword, and it doesn't have digits.
 */
static int
get_postponed_length (struct state *state)
{
    return state->postponed_length;
}

/*
 * Consume a previously postponed number. Return true if a number was
 * in fact postponed, false otherwise. Store the postponed number's
 * value in *v, length in the input string in *n (or -1 if the number
 * was written out and parsed as a keyword), and the preceding
 * delimiter to *d. If a number was not postponed, *v, *n and *d are
 * unchanged.
 */
static bool
consume_postponed_number (struct state *state, int *v, int *n, char *d)
{
    if (! state->postponed_length)
	return false;

    if (n)
	*n = state->postponed_length;

    if (v)
	*v = state->postponed_value;

    if (d)
	*d = state->postponed_delim;

    state->postponed_length = 0;
    state->postponed_value = 0;
    state->postponed_delim = 0;

    return true;
}

static int parse_postponed_number (struct state *state, enum field next_field);

/*
 * Postpone a number to be handled later. If one exists already,
 * handle it first. n may be -1 to indicate a keyword that has no
 * number length.
 */
static int
set_postponed_number (struct state *state, int v, int n)
{
    int r;
    char d = state->delim;

    /* Parse a previously postponed number, if any. */
    r = parse_postponed_number (state, TM_NONE);
    if (r)
	return r;

    state->postponed_length = n;
    state->postponed_value = v;
    state->postponed_delim = d;

    return 0;
}

static void
set_delim (struct state *state, char delim)
{
    state->delim = delim;
}

static void
unset_delim (struct state *state)
{
    state->delim = 0;
}

/*
 * Field set/get/mod helpers.
 */

/* Return true if field has been set. */
static bool
is_field_set (struct state *state, enum field field)
{
    assert (field < ARRAY_SIZE (state->tm));

    return state->set[field] != FIELD_UNSET;
}

static void
unset_field (struct state *state, enum field field)
{
    assert (field < ARRAY_SIZE (state->tm));

    state->set[field] = FIELD_UNSET;
    state->tm[field] = 0;
}

/*
 * Set field to value. A field can only be set once to ensure the
 * input does not contain redundant and potentially conflicting data.
 */
static int
set_field (struct state *state, enum field field, int value)
{
    int r;

    /* Fields can only be set once. */
    if (is_field_set (state, field))
	return -PARSE_TIME_ERR_ALREADYSET;

    state->set[field] = FIELD_SET;

    /* Parse a previously postponed number, if any. */
    r = parse_postponed_number (state, field);
    if (r)
	return r;

    unset_delim (state);

    state->tm[field] = value;
    state->last_field = field;

    return 0;
}

/*
 * Mark n fields in fields to be set to the reference date/time in the
 * specified time zone, or local timezone if not specified. The fields
 * will be initialized after parsing is complete and timezone is
 * known.
 */
static int
set_fields_to_now (struct state *state, enum field *fields, size_t n)
{
    size_t i;
    int r;

    for (i = 0; i < n; i++) {
	r = set_field (state, fields[i], 0);
	if (r)
	    return r;
	state->set[fields[i]] = FIELD_NOW;
    }

    return 0;
}

/* Modify field by adding value to it. To be used on relative fields,
 * which can be modified multiple times (to accumulate). */
static int
add_to_field (struct state *state, enum field field, int value)
{
    int r;

    assert (field < ARRAY_SIZE (state->tm));

    state->set[field] = FIELD_SET;

    /* Parse a previously postponed number, if any. */
    r = parse_postponed_number (state, field);
    if (r)
	return r;

    unset_delim (state);

    state->tm[field] += value;
    state->last_field = field;

    return 0;
}

/*
 * Get field value. Make sure the field is set before query. It's most
 * likely an error to call this while parsing (for example fields set
 * as FIELD_NOW will only be set to some value after parsing).
 */
static int
get_field (struct state *state, enum field field)
{
    assert (field < ARRAY_SIZE (state->tm));

    return state->tm[field];
}

/*
 * Validity checkers.
 */
static bool
is_valid_12hour (int h)
{
    return h >= 1 && h <= 12;
}

static bool
is_valid_time (int h, int m, int s)
{
    /* Allow 24:00:00 to denote end of day. */
    if (h == 24 && m == 0 && s == 0)
	return true;

    return h >= 0 && h <= 23 && m >= 0 && m <= 59 && s >= 0 && s <= 59;
}

static bool
is_valid_mday (int mday)
{
    return mday >= 1 && mday <= 31;
}

static bool
is_valid_mon (int mon)
{
    return mon >= 1 && mon <= 12;
}

static bool
is_valid_year (int year)
{
    return year >= 1970;
}

static bool
is_valid_date (int year, int mon, int mday)
{
    return is_valid_year (year) && is_valid_mon (mon) && is_valid_mday (mday);
}

/* Unset indicator for time and date set helpers. */
#define UNSET -1

/* Time set helper. No input checking. Use UNSET (-1) to leave unset. */
static int
set_abs_time (struct state *state, int hour, int min, int sec)
{
    int r;

    if (hour != UNSET) {
	if ((r = set_field (state, TM_ABS_HOUR, hour)))
	    return r;
    }

    if (min != UNSET) {
	if ((r = set_field (state, TM_ABS_MIN, min)))
	    return r;
    }

    if (sec != UNSET) {
	if ((r = set_field (state, TM_ABS_SEC, sec)))
	    return r;
    }

    return 0;
}

/* Date set helper. No input checking. Use UNSET (-1) to leave unset. */
static int
set_abs_date (struct state *state, int year, int mon, int mday)
{
    int r;

    if (year != UNSET) {
	if ((r = set_field (state, TM_ABS_YEAR, year)))
	    return r;
    }

    if (mon != UNSET) {
	if ((r = set_field (state, TM_ABS_MON, mon)))
	    return r;
    }

    if (mday != UNSET) {
	if ((r = set_field (state, TM_ABS_MDAY, mday)))
	    return r;
    }

    return 0;
}

/*
 * Keyword parsing and handling.
 */
struct keyword;
typedef int (*setter_t)(struct state *state, struct keyword *kw);

struct keyword {
    const char *name;   /* keyword */
    enum field field;   /* field to set, or FIELD_NONE if N/A */
    int value;          /* value to set, or 0 if N/A */
    setter_t set;       /* function to use for setting, if non-NULL */
};

/*
 * Setter callback functions for keywords.
 */
static int
kw_set_rel (struct state *state, struct keyword *kw)
{
    int multiplier = 1;

    /* Get a previously set multiplier, if any. */
    consume_postponed_number (state, &multiplier, NULL, NULL);

    /* Accumulate relative field values. */
    return add_to_field (state, kw->field, multiplier * kw->value);
}

static int
kw_set_number (struct state *state, struct keyword *kw)
{
    /* -1 = no length, from keyword. */
    return set_postponed_number (state, kw->value, -1);
}

static int
kw_set_month (struct state *state, struct keyword *kw)
{
    int n = get_postponed_length (state);

    /* Consume postponed number if it could be mday. This handles "20
     * January". */
    if (n == 1 || n == 2) {
	int r, v;

	consume_postponed_number (state, &v, NULL, NULL);

	if (! is_valid_mday (v))
	    return -PARSE_TIME_ERR_INVALIDDATE;

	r = set_field (state, TM_ABS_MDAY, v);
	if (r)
	    return r;
    }

    return set_field (state, kw->field, kw->value);
}

static int
kw_set_ampm (struct state *state, struct keyword *kw)
{
    int n = get_postponed_length (state);

    /* Consume postponed number if it could be hour. This handles
     * "5pm". */
    if (n == 1 || n == 2) {
	int r, v;

	consume_postponed_number (state, &v, NULL, NULL);

	if (! is_valid_12hour (v))
	    return -PARSE_TIME_ERR_INVALIDTIME;

	r = set_abs_time (state, v, 0, 0);
	if (r)
	    return r;
    }

    return set_field (state, kw->field, kw->value);
}

static int
kw_set_timeofday (struct state *state, struct keyword *kw)
{
    return set_abs_time (state, kw->value, 0, 0);
}

static int
kw_set_today (struct state *state, unused (struct keyword *kw))
{
    enum field fields[] = { TM_ABS_YEAR, TM_ABS_MON, TM_ABS_MDAY };

    return set_fields_to_now (state, fields, ARRAY_SIZE (fields));
}

static int
kw_set_now (struct state *state, unused (struct keyword *kw))
{
    enum field fields[] = { TM_ABS_HOUR, TM_ABS_MIN, TM_ABS_SEC };

    return set_fields_to_now (state, fields, ARRAY_SIZE (fields));
}

static int
kw_set_ordinal (struct state *state, struct keyword *kw)
{
    int n, v;

    /* Require a postponed number. */
    if (! consume_postponed_number (state, &v, &n, NULL))
	return -PARSE_TIME_ERR_DATEFORMAT;

    /* Ordinals are mday. */
    if (n != 1 && n != 2)
	return -PARSE_TIME_ERR_DATEFORMAT;

    /* Be strict about st, nd, rd, and lax about th. */
    if (strcasecmp (kw->name, "st") == 0 && v != 1 && v != 21 && v != 31)
	return -PARSE_TIME_ERR_INVALIDDATE;
    else if (strcasecmp (kw->name, "nd") == 0 && v != 2 && v != 22)
	return -PARSE_TIME_ERR_INVALIDDATE;
    else if (strcasecmp (kw->name, "rd") == 0 && v != 3 && v != 23)
	return -PARSE_TIME_ERR_INVALIDDATE;
    else if (strcasecmp (kw->name, "th") == 0 && ! is_valid_mday (v))
	return -PARSE_TIME_ERR_INVALIDDATE;

    return set_field (state, TM_ABS_MDAY, v);
}

static int
kw_ignore (unused (struct state *state), unused (struct keyword *kw))
{
    return 0;
}

/*
 * Accepted keywords.
 *
 * A keyword may optionally contain a '|' to indicate the minimum
 * match length. Without one, full match is required. It's advisable
 * to keep the minimum match parts unique across all keywords. If
 * they're not, the first match wins.
 *
 * If keyword begins with '*', then the matching will be case
 * sensitive. Otherwise the matching is case insensitive.
 *
 * If .set is NULL, the field specified by .field will be set to
 * .value.
 *
 * Note: Observe how "m" and "mi" match minutes, "M" and "mo" and
 * "mont" match months, but "mon" matches Monday.
 */
static struct keyword keywords[] = {
    /* Weekdays. */
    { N_ ("sun|day"),    TM_WDAY,        0,      NULL },
    { N_ ("mon|day"),    TM_WDAY,        1,      NULL },
    { N_ ("tue|sday"),   TM_WDAY,        2,      NULL },
    { N_ ("wed|nesday"), TM_WDAY,        3,      NULL },
    { N_ ("thu|rsday"),  TM_WDAY,        4,      NULL },
    { N_ ("fri|day"),    TM_WDAY,        5,      NULL },
    { N_ ("sat|urday"),  TM_WDAY,        6,      NULL },

    /* Months. */
    { N_ ("jan|uary"),   TM_ABS_MON,     1,      kw_set_month },
    { N_ ("feb|ruary"),  TM_ABS_MON,     2,      kw_set_month },
    { N_ ("mar|ch"),     TM_ABS_MON,     3,      kw_set_month },
    { N_ ("apr|il"),     TM_ABS_MON,     4,      kw_set_month },
    { N_ ("may"),        TM_ABS_MON,     5,      kw_set_month },
    { N_ ("jun|e"),      TM_ABS_MON,     6,      kw_set_month },
    { N_ ("jul|y"),      TM_ABS_MON,     7,      kw_set_month },
    { N_ ("aug|ust"),    TM_ABS_MON,     8,      kw_set_month },
    { N_ ("sep|tember"), TM_ABS_MON,     9,      kw_set_month },
    { N_ ("oct|ober"),   TM_ABS_MON,     10,     kw_set_month },
    { N_ ("nov|ember"),  TM_ABS_MON,     11,     kw_set_month },
    { N_ ("dec|ember"),  TM_ABS_MON,     12,     kw_set_month },

    /* Durations. */
    { N_ ("y|ears"),     TM_REL_YEAR,    1,      kw_set_rel },
    { N_ ("mo|nths"),    TM_REL_MON,     1,      kw_set_rel },
    { N_ ("*M"),         TM_REL_MON,     1,      kw_set_rel },
    { N_ ("w|eeks"),     TM_REL_WEEK,    1,      kw_set_rel },
    { N_ ("d|ays"),      TM_REL_DAY,     1,      kw_set_rel },
    { N_ ("h|ours"),     TM_REL_HOUR,    1,      kw_set_rel },
    { N_ ("hr|s"),       TM_REL_HOUR,    1,      kw_set_rel },
    { N_ ("mi|nutes"),   TM_REL_MIN,     1,      kw_set_rel },
    { N_ ("mins"),       TM_REL_MIN,     1,      kw_set_rel },
    { N_ ("*m"),         TM_REL_MIN,     1,      kw_set_rel },
    { N_ ("s|econds"),   TM_REL_SEC,     1,      kw_set_rel },
    { N_ ("secs"),       TM_REL_SEC,     1,      kw_set_rel },

    /* Numbers. */
    { N_ ("one"),        TM_NONE,        1,      kw_set_number },
    { N_ ("two"),        TM_NONE,        2,      kw_set_number },
    { N_ ("three"),      TM_NONE,        3,      kw_set_number },
    { N_ ("four"),       TM_NONE,        4,      kw_set_number },
    { N_ ("five"),       TM_NONE,        5,      kw_set_number },
    { N_ ("six"),        TM_NONE,        6,      kw_set_number },
    { N_ ("seven"),      TM_NONE,        7,      kw_set_number },
    { N_ ("eight"),      TM_NONE,        8,      kw_set_number },
    { N_ ("nine"),       TM_NONE,        9,      kw_set_number },
    { N_ ("ten"),        TM_NONE,        10,     kw_set_number },
    { N_ ("dozen"),      TM_NONE,        12,     kw_set_number },
    { N_ ("hundred"),    TM_NONE,        100,    kw_set_number },

    /* Special number forms. */
    { N_ ("this"),       TM_NONE,        0,      kw_set_number },
    { N_ ("last"),       TM_NONE,        1,      kw_set_number },

    /* Other special keywords. */
    { N_ ("yesterday"),  TM_REL_DAY,     1,      kw_set_rel },
    { N_ ("today"),      TM_NONE,        0,      kw_set_today },
    { N_ ("now"),        TM_NONE,        0,      kw_set_now },
    { N_ ("noon"),       TM_NONE,        12,     kw_set_timeofday },
    { N_ ("midnight"),   TM_NONE,        0,      kw_set_timeofday },
    { N_ ("am"),         TM_AMPM,        0,      kw_set_ampm },
    { N_ ("a.m."),       TM_AMPM,        0,      kw_set_ampm },
    { N_ ("pm"),         TM_AMPM,        1,      kw_set_ampm },
    { N_ ("p.m."),       TM_AMPM,        1,      kw_set_ampm },
    { N_ ("st"),         TM_NONE,        0,      kw_set_ordinal },
    { N_ ("nd"),         TM_NONE,        0,      kw_set_ordinal },
    { N_ ("rd"),         TM_NONE,        0,      kw_set_ordinal },
    { N_ ("th"),         TM_NONE,        0,      kw_set_ordinal },
    { N_ ("ago"),        TM_NONE,        0,      kw_ignore },

    /* Timezone codes: offset in minutes. XXX: Add more codes. */
    { N_ ("pst"),        TM_TZ,          -8 * 60,  NULL },
    { N_ ("mst"),        TM_TZ,          -7 * 60,  NULL },
    { N_ ("cst"),        TM_TZ,          -6 * 60,  NULL },
    { N_ ("est"),        TM_TZ,          -5 * 60,  NULL },
    { N_ ("ast"),        TM_TZ,          -4 * 60,  NULL },
    { N_ ("nst"),        TM_TZ,          -(3 * 60 + 30),     NULL },

    { N_ ("gmt"),        TM_TZ,          0,      NULL },
    { N_ ("utc"),        TM_TZ,          0,      NULL },

    { N_ ("wet"),        TM_TZ,          0,      NULL },
    { N_ ("cet"),        TM_TZ,          1 * 60,   NULL },
    { N_ ("eet"),        TM_TZ,          2 * 60,   NULL },
    { N_ ("fet"),        TM_TZ,          3 * 60,   NULL },

    { N_ ("wat"),        TM_TZ,          1 * 60,   NULL },
    { N_ ("cat"),        TM_TZ,          2 * 60,   NULL },
    { N_ ("eat"),        TM_TZ,          3 * 60,   NULL },
};

/*
 * Compare strings str and keyword. Return the number of matching
 * chars on match, 0 for no match.
 *
 * All of the alphabetic characters (isalpha) in str up to the first
 * non-alpha character (or end of string) must match the
 * keyword. Consequently, the value returned on match is the number of
 * consecutive alphabetic characters in str.
 *
 * Abbreviated match is accepted if the keyword contains a '|'
 * character, and str matches keyword up to that character. Any alpha
 * characters after that in str must still match the keyword following
 * the '|' character. If no '|' is present, all of keyword must match.
 *
 * Excessive, consecutive, and misplaced (at the beginning or end) '|'
 * characters in keyword are handled gracefully. Only the first one
 * matters.
 *
 * If match_case is true, the matching is case sensitive.
 */
static size_t
match_keyword (const char *str, const char *keyword, bool match_case)
{
    const char *s = str;
    bool prefix_matched = false;

    for (;;) {
	while (*keyword == '|') {
	    prefix_matched = true;
	    keyword++;
	}

	if (! *s || ! isalpha ((unsigned char) *s) || ! *keyword)
	    break;

	if (match_case) {
	    if (*s != *keyword)
		return 0;
	} else {
	    if (tolower ((unsigned char) *s) !=
		tolower ((unsigned char) *keyword))
		return 0;
	}
	s++;
	keyword++;
    }

    /* did not match all of the keyword in input string */
    if (*s && isalpha ((unsigned char) *s))
	return 0;

    /* did not match enough of keyword */
    if (*keyword && ! prefix_matched)
	return 0;

    return s - str;
}

/*
 * Parse a keyword. Return < 0 on error, number of parsed chars on
 * success.
 */
static ssize_t
parse_keyword (struct state *state, const char *s)
{
    unsigned int i;
    size_t n = 0;
    struct keyword *kw = NULL;
    int r;

    for (i = 0; i < ARRAY_SIZE (keywords); i++) {
	const char *keyword = _ (keywords[i].name);
	bool mcase = false;

	/* Match case if keyword begins with '*'. */
	if (*keyword == '*') {
	    mcase = true;
	    keyword++;
	}

	n = match_keyword (s, keyword, mcase);
	if (n) {
	    kw = &keywords[i];
	    break;
	}
    }

    if (! kw)
	return -PARSE_TIME_ERR_KEYWORD;

    if (kw->set)
	r = kw->set (state, kw);
    else
	r = set_field (state, kw->field, kw->value);

    if (r < 0)
	return r;

    return n;
}

/*
 * Non-keyword parsers and their helpers.
 */

static int
set_user_tz (struct state *state, char sign, int hour, int min)
{
    int tz = hour * 60 + min;

    assert (sign == '+' || sign == '-');

    if (hour < 0 || hour > 14 || min < 0 || min > 59 || min % 15)
	return -PARSE_TIME_ERR_INVALIDTIME;

    if (sign == '-')
	tz = -tz;

    return set_field (state, TM_TZ, tz);
}

/*
 * Parse a previously postponed number if one exists. Independent
 * parsing of a postponed number when it wasn't consumed during
 * parsing of the following token.
 */
static int
parse_postponed_number (struct state *state, unused (enum field next_field))
{
    int v, n;
    char d;

    /* Bail out if there's no postponed number. */
    if (! consume_postponed_number (state, &v, &n, &d))
	return 0;

    if (n == 1 || n == 2) {
	/* Notable exception: Previous field affects parsing. This
	 * handles "January 20". */
	if (state->last_field == TM_ABS_MON) {
	    /* D[D] */
	    if (! is_valid_mday (v))
		return -PARSE_TIME_ERR_INVALIDDATE;

	    return set_field (state, TM_ABS_MDAY, v);
	} else if (n == 2) {
	    /* XXX: Only allow if last field is hour, min, or sec? */
	    if (d == '+' || d == '-') {
		/* +/-HH */
		return set_user_tz (state, d, v, 0);
	    }
	}
    } else if (n == 4) {
	/* Notable exception: Value affects parsing. Time zones are
	 * always at most 1400 and we don't understand years before
	 * 1970. */
	if (! is_valid_year (v)) {
	    if (d == '+' || d == '-') {
		/* +/-HHMM */
		return set_user_tz (state, d, v / 100, v % 100);
	    }
	} else {
	    /* YYYY */
	    return set_field (state, TM_ABS_YEAR, v);
	}
    } else if (n == 6) {
	/* HHMMSS */
	int hour = v / 10000;
	int min = (v / 100) % 100;
	int sec = v % 100;

	if (! is_valid_time (hour, min, sec))
	    return -PARSE_TIME_ERR_INVALIDTIME;

	return set_abs_time (state, hour, min, sec);
    } else if (n == 8) {
	/* YYYYMMDD */
	int year = v / 10000;
	int mon = (v / 100) % 100;
	int mday = v % 100;

	if (! is_valid_date (year, mon, mday))
	    return -PARSE_TIME_ERR_INVALIDDATE;

	return set_abs_date (state, year, mon, mday);
    }

    return -PARSE_TIME_ERR_FORMAT;
}

static int tm_get_field (const struct tm *tm, enum field field);

static int
set_timestamp (struct state *state, time_t t)
{
    struct tm tm;
    enum field f;
    int r;

    if (gmtime_r (&t, &tm) == NULL)
	return -PARSE_TIME_ERR_LIB;

    for (f = TM_ABS_SEC; f != TM_NONE; f = next_abs_field (f)) {
	r = set_field (state, f, tm_get_field (&tm, f));
	if (r)
	    return r;
    }

    r = set_field (state, TM_TZ, 0);
    if (r)
	return r;

    /* XXX: Prevent TM_AMPM with timestamp, e.g. "@123456 pm" */

    return 0;
}

/* Parse a single number. Typically postpone parsing until later. */
static int
parse_single_number (struct state *state, unsigned long v,
		     unsigned long n)
{
    assert (n);

    if (state->delim == '@')
	return set_timestamp (state, (time_t) v);

    if (v > INT_MAX)
	return -PARSE_TIME_ERR_FORMAT;

    return set_postponed_number (state, v, n);
}

static bool
is_time_sep (char c)
{
    return c == ':';
}

static bool
is_date_sep (char c)
{
    return c == '/' || c == '-' || c == '.';
}

static bool
is_sep (char c)
{
    return is_time_sep (c) || is_date_sep (c);
}

/* Two-digit year: 00...69 is 2000s, 70...99 1900s, if n == 0 keep
 * unset. */
static int
expand_year (unsigned long year, size_t n)
{
    if (n == 2) {
	return (year < 70 ? 2000 : 1900) + year;
    } else if (n == 4) {
	return year;
    } else {
	return UNSET;
    }
}

/* Parse a date number triplet. */
static int
parse_date (struct state *state, char sep,
	    unsigned long v1, unsigned long v2, unsigned long v3,
	    size_t n1, size_t n2, size_t n3)
{
    int year = UNSET, mon = UNSET, mday = UNSET;

    assert (is_date_sep (sep));

    switch (sep) {
    case '/': /* Date: M[M]/D[D][/YY[YY]] or M[M]/YYYY */
	if (n1 != 1 && n1 != 2)
	    return -PARSE_TIME_ERR_DATEFORMAT;

	if ((n2 == 1 || n2 == 2) && (n3 == 0 || n3 == 2 || n3 == 4)) {
	    /* M[M]/D[D][/YY[YY]] */
	    year = expand_year (v3, n3);
	    mon = v1;
	    mday = v2;
	} else if (n2 == 4 && n3 == 0) {
	    /* M[M]/YYYY */
	    year = v2;
	    mon = v1;
	} else {
	    return -PARSE_TIME_ERR_DATEFORMAT;
	}
	break;

    case '-': /* Date: YYYY-MM[-DD] or DD-MM[-YY[YY]] or MM-YYYY */
	if (n1 == 4 && n2 == 2 && (n3 == 0 || n3 == 2)) {
	    /* YYYY-MM[-DD] */
	    year = v1;
	    mon = v2;
	    if (n3)
		mday = v3;
	} else if (n1 == 2 && n2 == 2 && (n3 == 0 || n3 == 2 || n3 == 4)) {
	    /* DD-MM[-YY[YY]] */
	    year = expand_year (v3, n3);
	    mon = v2;
	    mday = v1;
	} else if (n1 == 2 && n2 == 4 && n3 == 0) {
	    /* MM-YYYY */
	    year = v2;
	    mon = v1;
	} else {
	    return -PARSE_TIME_ERR_DATEFORMAT;
	}
	break;

    case '.': /* Date: D[D].M[M][.[YY[YY]]] */
	if ((n1 != 1 && n1 != 2) || (n2 != 1 && n2 != 2) ||
	    (n3 != 0 && n3 != 2 && n3 != 4))
	    return -PARSE_TIME_ERR_DATEFORMAT;

	year = expand_year (v3, n3);
	mon = v2;
	mday = v1;
	break;
    }

    if (year != UNSET && ! is_valid_year (year))
	return -PARSE_TIME_ERR_INVALIDDATE;

    if (mon != UNSET && ! is_valid_mon (mon))
	return -PARSE_TIME_ERR_INVALIDDATE;

    if (mday != UNSET && ! is_valid_mday (mday))
	return -PARSE_TIME_ERR_INVALIDDATE;

    return set_abs_date (state, year, mon, mday);
}

/* Parse a time number triplet. */
static int
parse_time (struct state *state, char sep,
	    unsigned long v1, unsigned long v2, unsigned long v3,
	    size_t n1, size_t n2, size_t n3)
{
    assert (is_time_sep (sep));

    if ((n1 != 1 && n1 != 2) || n2 != 2 || (n3 != 0 && n3 != 2))
	return -PARSE_TIME_ERR_TIMEFORMAT;

    /*
     * Notable exception: Previously set fields affect
     * parsing. Interpret (+|-)HH:MM as time zone only if hour and
     * minute have been set.
     *
     * XXX: This could be fixed by restricting the delimiters
     * preceding time. For '+' it would be justified, but for '-' it
     * might be inconvenient. However prefer to allow '-' as an
     * insignificant delimiter preceding time for convenience, and
     * handle '+' the same way for consistency between positive and
     * negative time zones.
     */
    if (is_field_set (state, TM_ABS_HOUR) &&
	is_field_set (state, TM_ABS_MIN) &&
	n1 == 2 && n2 == 2 && n3 == 0 &&
	(state->delim == '+' || state->delim == '-')) {
	return set_user_tz (state, state->delim, v1, v2);
    }

    if (! is_valid_time (v1, v2, n3 ? v3 : 0))
	return -PARSE_TIME_ERR_INVALIDTIME;

    return set_abs_time (state, v1, v2, n3 ? (int) v3 : UNSET);
}

/* strtoul helper that assigns length. */
static unsigned long
strtoul_len (const char *s, const char **endp, size_t *len)
{
    unsigned long val = strtoul (s, (char **) endp, 10);

    *len = *endp - s;
    return val;
}

/*
 * Parse a (group of) number(s). Return < 0 on error, number of parsed
 * chars on success.
 */
static ssize_t
parse_number (struct state *state, const char *s)
{
    int r;
    unsigned long v1, v2, v3 = 0;
    size_t n1, n2, n3 = 0;
    const char *p = s;
    char sep;

    v1 = strtoul_len (p, &p, &n1);

    if (! is_sep (*p) || ! isdigit ((unsigned char) *(p + 1))) {
	/* A single number. */
	r = parse_single_number (state, v1, n1);
	if (r)
	    return r;

	return p - s;
    }

    sep = *p;
    v2 = strtoul_len (p + 1, &p, &n2);

    /* A group of two or three numbers? */
    if (*p == sep && isdigit ((unsigned char) *(p + 1)))
	v3 = strtoul_len (p + 1, &p, &n3);

    if (is_time_sep (sep))
	r = parse_time (state, sep, v1, v2, v3, n1, n2, n3);
    else
	r = parse_date (state, sep, v1, v2, v3, n1, n2, n3);

    if (r)
	return r;

    return p - s;
}

/*
 * Parse delimiter(s). Throw away all except the last one, which is
 * stored for parsing the next non-delimiter. Return < 0 on error,
 * number of parsed chars on success.
 *
 * XXX: We might want to be more strict here.
 */
static ssize_t
parse_delim (struct state *state, const char *s)
{
    const char *p = s;

    /*
     * Skip non-alpha and non-digit, and store the last for further
     * processing.
     */
    while (*p && ! isalnum ((unsigned char) *p)) {
	set_delim (state, *p);
	p++;
    }

    return p - s;
}

/*
 * Parse a date/time string. Return < 0 on error, number of parsed
 * chars on success.
 */
static ssize_t
parse_input (struct state *state, const char *s)
{
    const char *p = s;
    ssize_t n;
    int r;

    while (*p) {
	if (isalpha ((unsigned char) *p)) {
	    n = parse_keyword (state, p);
	} else if (isdigit ((unsigned char) *p)) {
	    n = parse_number (state, p);
	} else {
	    n = parse_delim (state, p);
	}

	if (n <= 0) {
	    if (n == 0)
		n = -PARSE_TIME_ERR;

	    return n;
	}

	p += n;
    }

    /* Parse a previously postponed number, if any. */
    r = parse_postponed_number (state, TM_NONE);
    if (r < 0)
	return r;

    return p - s;
}

/*
 * Processing the parsed input.
 */

/*
 * Initialize reference time to tm. Use time zone in state if
 * specified, otherwise local time. Use now for reference time if
 * non-NULL, otherwise current time.
 */
static int
initialize_now (struct state *state, const time_t *ref, struct tm *tm)
{
    time_t t;

    if (ref) {
	t = *ref;
    } else {
	if (time (&t) == (time_t) -1)
	    return -PARSE_TIME_ERR_LIB;
    }

    if (is_field_set (state, TM_TZ)) {
	/* Some other time zone. */

	/* Adjust now according to the TZ. */
	t += get_field (state, TM_TZ) * 60;

	/* It's not gm, but this doesn't mess with the TZ. */
	if (gmtime_r (&t, tm) == NULL)
	    return -PARSE_TIME_ERR_LIB;
    } else {
	/* Local time. */
	if (localtime_r (&t, tm) == NULL)
	    return -PARSE_TIME_ERR_LIB;
    }

    return 0;
}

/*
 * Normalize tm according to mktime(3); if structure members are
 * outside their valid interval, they will be normalized (so that, for
 * example, 40 October is changed into 9 November), and tm_wday and
 * tm_yday are set to values determined from the contents of the other
 * fields.
 *
 * Both mktime(3) and localtime_r(3) use local time, but they cancel
 * each other out here, making this function agnostic to time zone.
 */
static int
normalize_tm (struct tm *tm)
{
    time_t t = mktime (tm);

    if (t == (time_t) -1)
	return -PARSE_TIME_ERR_LIB;

    if (! localtime_r (&t, tm))
	return -PARSE_TIME_ERR_LIB;

    return 0;
}

/* Get field out of a struct tm. */
static int
tm_get_field (const struct tm *tm, enum field field)
{
    switch (field) {
    case TM_ABS_SEC:    return tm->tm_sec;
    case TM_ABS_MIN:    return tm->tm_min;
    case TM_ABS_HOUR:   return tm->tm_hour;
    case TM_ABS_MDAY:   return tm->tm_mday;
    case TM_ABS_MON:    return tm->tm_mon + 1; /* 0- to 1-based */
    case TM_ABS_YEAR:   return 1900 + tm->tm_year;
    case TM_WDAY:       return tm->tm_wday;
    case TM_ABS_ISDST:  return tm->tm_isdst;
    default:
	assert (false);
	break;
    }

    return 0;
}

/* Modify hour according to am/pm setting. */
static int
fixup_ampm (struct state *state)
{
    int hour, hdiff = 0;

    if (! is_field_set (state, TM_AMPM))
	return 0;

    if (! is_field_set (state, TM_ABS_HOUR))
	return -PARSE_TIME_ERR_TIMEFORMAT;

    hour = get_field (state, TM_ABS_HOUR);
    if (! is_valid_12hour (hour))
	return -PARSE_TIME_ERR_INVALIDTIME;

    if (get_field (state, TM_AMPM)) {
	/* 12pm is noon. */
	if (hour != 12)
	    hdiff = 12;
    } else {
	/* 12am is midnight, beginning of day. */
	if (hour == 12)
	    hdiff = -12;
    }

    add_to_field (state, TM_REL_HOUR, -hdiff);

    return 0;
}

/* Combine absolute and relative fields, and round. */
static int
create_output (struct state *state, time_t *t_out, const time_t *ref,
	       int round)
{
    struct tm tm = { .tm_isdst = -1 };
    struct tm now;
    time_t t;
    enum field f;
    int r;
    int week_round = PARSE_TIME_NO_ROUND;

    r = initialize_now (state, ref, &now);
    if (r)
	return r;

    /* Initialize fields flagged as "now" to reference time. */
    for (f = TM_ABS_SEC; f != TM_NONE; f = next_abs_field (f)) {
	if (state->set[f] == FIELD_NOW) {
	    state->tm[f] = tm_get_field (&now, f);
	    state->set[f] = FIELD_SET;
	}
    }

    /*
     * If WDAY is set but MDAY is not, we consider WDAY relative
     *
     * XXX: This fails on stuff like "two months monday" because two
     * months ago wasn't the same day as today. Postpone until we know
     * date?
     */
    if (is_field_set (state, TM_WDAY) &&
	! is_field_set (state, TM_ABS_MDAY)) {
	int wday = get_field (state, TM_WDAY);
	int today = tm_get_field (&now, TM_WDAY);
	int rel_days;

	if (today > wday)
	    rel_days = today - wday;
	else
	    rel_days = today + 7 - wday;

	/* This also prevents special week rounding from happening. */
	add_to_field (state, TM_REL_DAY, rel_days);

	unset_field (state, TM_WDAY);
    }

    r = fixup_ampm (state);
    if (r)
	return r;

    /*
     * Iterate fields from most accurate to least accurate, and set
     * unset fields according to requested rounding.
     */
    for (f = TM_ABS_SEC; f != TM_NONE; f = next_abs_field (f)) {
	if (round != PARSE_TIME_NO_ROUND) {
	    enum field r = abs_to_rel_field (f);

	    if (is_field_set (state, f) || is_field_set (state, r)) {
		if (round >= PARSE_TIME_ROUND_UP && f != TM_ABS_SEC) {
		    /*
		     * This is the most accurate field
		     * specified. Round up adjusting it towards
		     * future.
		     */
		    add_to_field (state, r, -1);

		    /*
		     * Go back a second if the result is to be used
		     * for inclusive comparisons.
		     */
		    if (round == PARSE_TIME_ROUND_UP_INCLUSIVE)
			add_to_field (state, TM_REL_SEC, 1);
		}
		round = PARSE_TIME_NO_ROUND; /* No more rounding. */
	    } else {
		if (f == TM_ABS_MDAY &&
		    is_field_set (state, TM_REL_WEEK)) {
		    /* Week is most accurate. */
		    week_round = round;
		    round = PARSE_TIME_NO_ROUND;
		} else {
		    set_field (state, f, get_field_epoch_value (f));
		}
	    }
	}

	if (! is_field_set (state, f))
	    set_field (state, f, tm_get_field (&now, f));
    }

    /* Special case: rounding with week accuracy. */
    if (week_round != PARSE_TIME_NO_ROUND) {
	/* Temporarily set more accurate fields to now. */
	set_field (state, TM_ABS_SEC, tm_get_field (&now, TM_ABS_SEC));
	set_field (state, TM_ABS_MIN, tm_get_field (&now, TM_ABS_MIN));
	set_field (state, TM_ABS_HOUR, tm_get_field (&now, TM_ABS_HOUR));
	set_field (state, TM_ABS_MDAY, tm_get_field (&now, TM_ABS_MDAY));
    }

    /*
     * Set all fields. They may contain out of range values before
     * normalization by mktime(3).
     */
    tm.tm_sec = get_field (state, TM_ABS_SEC) - get_field (state, TM_REL_SEC);
    tm.tm_min = get_field (state, TM_ABS_MIN) - get_field (state, TM_REL_MIN);
    tm.tm_hour = get_field (state, TM_ABS_HOUR) - get_field (state, TM_REL_HOUR);
    tm.tm_mday = get_field (state, TM_ABS_MDAY) -
		 get_field (state, TM_REL_DAY) - 7 * get_field (state, TM_REL_WEEK);
    tm.tm_mon = get_field (state, TM_ABS_MON) - get_field (state, TM_REL_MON);
    tm.tm_mon--; /* 1- to 0-based */
    tm.tm_year = get_field (state, TM_ABS_YEAR) - get_field (state, TM_REL_YEAR) - 1900;

    /*
     * It's always normal time.
     *
     * XXX: This is probably not a solution that universally
     * works. Just make sure DST is not taken into account. We don't
     * want rounding to be affected by DST.
     */
    tm.tm_isdst = -1;

    /* Special case: rounding with week accuracy. */
    if (week_round != PARSE_TIME_NO_ROUND) {
	/* Normalize to get proper tm.wday. */
	r = normalize_tm (&tm);
	if (r < 0)
	    return r;

	/* Set more accurate fields back to zero. */
	tm.tm_sec = 0;
	tm.tm_min = 0;
	tm.tm_hour = 0;
	tm.tm_isdst = -1;

	/* Monday is the true 1st day of week, but this is easier. */
	if (week_round >= PARSE_TIME_ROUND_UP) {
	    tm.tm_mday += 7 - tm.tm_wday;
	    if (week_round == PARSE_TIME_ROUND_UP_INCLUSIVE)
		tm.tm_sec--;
	} else {
	    tm.tm_mday -= tm.tm_wday;
	}
    }

    if (is_field_set (state, TM_TZ)) {
	/* tm is in specified TZ, convert to UTC for timegm(3). */
	tm.tm_min -= get_field (state, TM_TZ);
	t = timegm (&tm);
    } else {
	/* tm is in local time. */
	t = mktime (&tm);
    }

    if (t == (time_t) -1)
	return -PARSE_TIME_ERR_LIB;

    *t_out = t;

    return 0;
}

/* Internally, all errors are < 0. parse_time_string() returns errors > 0. */
#define EXTERNAL_ERR(r) (-r)

int
parse_time_string (const char *s, time_t *t, const time_t *ref, int round)
{
    struct state state = { .last_field = TM_NONE };
    int r;

    if (! s || ! t)
	return EXTERNAL_ERR (-PARSE_TIME_ERR);

    r = parse_input (&state, s);
    if (r < 0)
	return EXTERNAL_ERR (r);

    r = create_output (&state, t, ref, round);
    if (r < 0)
	return EXTERNAL_ERR (r);

    return 0;
}