From e26a2bf48bea7de68fa421c96330928dce07de39 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 19 Oct 2009 13:04:59 -0700 Subject: [PATCH] date.c: Remove a bunch of undesired code. We're only interested in the date-parsing code here. --- date.c | 1805 -------------------------------------------------------- 1 file changed, 1805 deletions(-) diff --git a/date.c b/date.c index c8e5c14a..c6cbd813 100644 --- a/date.c +++ b/date.c @@ -74,19 +74,6 @@ #define d(x) - -/** - * SECTION: gmime-utils - * @title: gmime-utils - * @short_description: MIME utility functions - * @see_also: - * - * Utility functions to parse, encode and decode various MIME tokens - * and encodings. - **/ - -extern gboolean _g_mime_enable_rfc2047_workarounds (void); - #define GMIME_FOLD_PREENCODED (GMIME_FOLD_LEN / 2) /* date parser macros */ @@ -106,11 +93,6 @@ extern gboolean _g_mime_enable_rfc2047_workarounds (void); #define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6) #define DATE_TOKEN_HAS_SIGN (1 << 7) -static unsigned char tohex[16] = { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' -}; - static unsigned char gmime_datetok_table[256] = { 128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, @@ -161,40 +143,6 @@ static char *tm_days[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; - -/** - * g_mime_utils_header_format_date: - * @date: time_t date representation - * @tz_offset: Timezone offset - * - * Allocates a string buffer containing the rfc822 formatted date - * string represented by @time and @tz_offset. - * - * Returns: a valid string representation of the date. - **/ -char * -g_mime_utils_header_format_date (time_t date, int tz_offset) -{ - struct tm tm; - - date += ((tz_offset / 100) * (60 * 60)) + (tz_offset % 100) * 60; - -#if defined (HAVE_GMTIME_R) - gmtime_r (&date, &tm); -#elif defined (HAVE_GMTIME_S) - gmtime_s (&tm, &date); -#else - memcpy (&tm, gmtime (&date), sizeof (tm)); -#endif - - return g_strdup_printf ("%s, %02d %s %04d %02d:%02d:%02d %+05d", - tm_days[tm.tm_wday], tm.tm_mday, - tm_months[tm.tm_mon], - tm.tm_year + 1900, - tm.tm_hour, tm.tm_min, tm.tm_sec, - tz_offset); -} - /* This is where it gets ugly... */ typedef struct _date_token { @@ -758,1756 +706,3 @@ g_mime_utils_header_decode_date (const char *str, int *tz_offset) return date; } - - -/** - * g_mime_utils_generate_message_id: - * @fqdn: Fully qualified domain name - * - * Generates a unique Message-Id. - * - * Returns: a unique string in an addr-spec format suitable for use as - * a Message-Id. - **/ -char * -g_mime_utils_generate_message_id (const char *fqdn) -{ -#ifdef G_THREADS_ENABLED - static GStaticMutex mutex = G_STATIC_MUTEX_INIT; -#define MUTEX_LOCK() g_static_mutex_lock (&mutex) -#define MUTEX_UNLOCK() g_static_mutex_unlock (&mutex) -#else -#define MUTEX_LOCK() -#define MUTEX_UNLOCK() -#endif - static unsigned long int count = 0; - const char *hostname = NULL; - char *name = NULL; - char *msgid; - - if (!fqdn) { -#ifdef HAVE_UTSNAME_DOMAINNAME - struct utsname unam; - - uname (&unam); - - hostname = unam.nodename; - - if (unam.domainname[0]) - name = g_strdup_printf ("%s.%s", hostname, unam.domainname); -#else /* ! HAVE_UTSNAME_DOMAINNAME */ - char host[MAXHOSTNAMELEN + 1]; - -#ifdef HAVE_GETHOSTNAME - host[MAXHOSTNAMELEN] = '\0'; - if (gethostname (host, MAXHOSTNAMELEN) == 0) { -#ifdef HAVE_GETDOMAINNAME - size_t domainlen = MAXHOSTNAMELEN; - char *domain; - int rv; - - domain = g_malloc (domainlen); - - while ((rv = getdomainname (domain, domainlen)) == -1 && errno == EINVAL) { - domainlen += MAXHOSTNAMELEN; - domain = g_realloc (domain, domainlen); - } - - if (rv == 0 && domain[0]) { - if (host[0]) { - name = g_strdup_printf ("%s.%s", host, domain); - g_free (domain); - } else { - name = domain; - } - } -#endif /* HAVE_GETDOMAINNAME */ - } else { - host[0] = '\0'; - } -#endif /* HAVE_GETHOSTNAME */ - hostname = host; -#endif /* HAVE_UTSNAME_DOMAINNAME */ - -#ifdef HAVE_GETADDRINFO - if (!name && hostname[0]) { - /* we weren't able to get a domain name */ - struct addrinfo hints, *res; - - memset (&hints, 0, sizeof (hints)); - hints.ai_flags = AI_CANONNAME; - - if (getaddrinfo (hostname, NULL, &hints, &res) == 0) { - name = g_strdup (res->ai_canonname); - freeaddrinfo (res); - } - } -#endif /* HAVE_GETADDRINFO */ - - fqdn = name != NULL ? name : (hostname[0] ? hostname : "localhost.localdomain"); - } - - MUTEX_LOCK (); - msgid = g_strdup_printf ("%lu.%lu.%lu@%s", (unsigned long int) time (NULL), - (unsigned long int) getpid (), count++, fqdn); - MUTEX_UNLOCK (); - - g_free (name); - - return msgid; -} - -static char * -decode_addrspec (const char **in) -{ - const char *word, *inptr; - GString *addrspec; - char *str; - - decode_lwsp (in); - inptr = *in; - - if (!(word = decode_word (&inptr))) { - w(g_warning ("No local-part in addr-spec: %s", *in)); - return NULL; - } - - addrspec = g_string_new (""); - g_string_append_len (addrspec, word, (size_t) (inptr - word)); - - /* get the rest of the local-part */ - decode_lwsp (&inptr); - while (*inptr == '.') { - g_string_append_c (addrspec, *inptr++); - if ((word = decode_word (&inptr))) { - g_string_append_len (addrspec, word, (size_t) (inptr - word)); - decode_lwsp (&inptr); - } else { - w(g_warning ("Invalid local-part in addr-spec: %s", *in)); - goto exception; - } - } - - /* we should be at the '@' now... */ - if (*inptr++ != '@') { - w(g_warning ("Invalid addr-spec; missing '@': %s", *in)); - goto exception; - } - - g_string_append_c (addrspec, '@'); - if (!decode_domain (&inptr, addrspec)) { - w(g_warning ("No domain in addr-spec: %s", *in)); - goto exception; - } - - str = addrspec->str; - g_string_free (addrspec, FALSE); - - *in = inptr; - - return str; - - exception: - - g_string_free (addrspec, TRUE); - - return NULL; -} - -static char * -decode_msgid (const char **in) -{ - const char *inptr = *in; - char *msgid = NULL; - - decode_lwsp (&inptr); - if (*inptr != '<') { - w(g_warning ("Invalid msg-id; missing '<': %s", *in)); - } else { - inptr++; - } - - decode_lwsp (&inptr); - if ((msgid = decode_addrspec (&inptr))) { - decode_lwsp (&inptr); - if (*inptr != '>') { - w(g_warning ("Invalid msg-id; missing '>': %s", *in)); - } else { - inptr++; - } - - *in = inptr; - } else { - w(g_warning ("Invalid msg-id; missing addr-spec: %s", *in)); - *in = inptr; - while (*inptr && *inptr != '>') - inptr++; - - msgid = g_strndup (*in, (size_t) (inptr - *in)); - *in = inptr; - } - - return msgid; -} - - -/** - * g_mime_utils_decode_message_id: - * @message_id: string containing a message-id - * - * Decodes a msg-id as defined by rfc822. - * - * Returns: the addr-spec portion of the msg-id. - **/ -char * -g_mime_utils_decode_message_id (const char *message_id) -{ - g_return_val_if_fail (message_id != NULL, NULL); - - return decode_msgid (&message_id); -} - - -/** - * g_mime_references_decode: - * @text: string containing a list of msg-ids - * - * Decodes a list of msg-ids as in the References and/or In-Reply-To - * headers defined in rfc822. - * - * Returns: a list of referenced msg-ids. - **/ -GMimeReferences * -g_mime_references_decode (const char *text) -{ - GMimeReferences *refs, *tail, *ref; - const char *word, *inptr = text; - char *msgid; - - g_return_val_if_fail (text != NULL, NULL); - - refs = NULL; - tail = (GMimeReferences *) &refs; - - while (*inptr) { - decode_lwsp (&inptr); - if (*inptr == '<') { - /* looks like a msg-id */ - if ((msgid = decode_msgid (&inptr))) { - ref = g_new (GMimeReferences, 1); - ref->next = NULL; - ref->msgid = msgid; - tail->next = ref; - tail = ref; - } else { - w(g_warning ("Invalid References header: %s", inptr)); - break; - } - } else if (*inptr) { - /* looks like part of a phrase */ - if (!(word = decode_word (&inptr))) { - w(g_warning ("Invalid References header: %s", inptr)); - break; - } - } - } - - return refs; -} - - -/** - * g_mime_references_append: - * @refs: the address of a #GMimeReferences list - * @msgid: a message-id string - * - * Appends a reference to msgid to the list of references. - **/ -void -g_mime_references_append (GMimeReferences **refs, const char *msgid) -{ - GMimeReferences *ref; - - g_return_if_fail (refs != NULL); - g_return_if_fail (msgid != NULL); - - ref = (GMimeReferences *) refs; - while (ref->next) - ref = ref->next; - - ref->next = g_new (GMimeReferences, 1); - ref->next->msgid = g_strdup (msgid); - ref->next->next = NULL; -} - - -/** - * g_mime_references_free: - * @refs: a #GMimeReferences list - * - * Frees the #GMimeReferences list. - **/ -void -g_mime_references_free (GMimeReferences *refs) -{ - GMimeReferences *ref, *next; - - ref = refs; - while (ref) { - next = ref->next; - g_free (ref->msgid); - g_free (ref); - ref = next; - } -} - - -/** - * g_mime_references_clear: - * @refs: address of a #GMimeReferences list - * - * Clears the #GMimeReferences list and resets it to %NULL. - **/ -void -g_mime_references_clear (GMimeReferences **refs) -{ - g_return_if_fail (refs != NULL); - - g_mime_references_free (*refs); - *refs = NULL; -} - - -/** - * g_mime_references_get_next: - * @ref: a #GMimeReferences list - * - * Advances to the next reference node in the #GMimeReferences list. - * - * Returns: the next reference node in the #GMimeReferences list. - **/ -const GMimeReferences * -g_mime_references_get_next (const GMimeReferences *ref) -{ - return ref ? ref->next : NULL; -} - - -/** - * g_mime_references_get_message_id: - * @ref: a #GMimeReferences list - * - * Gets the Message-Id reference from the #GMimeReferences node. - * - * Returns: the Message-Id reference from the #GMimeReferences node. - **/ -const char * -g_mime_references_get_message_id (const GMimeReferences *ref) -{ - return ref ? ref->msgid : NULL; -} - - -static gboolean -is_rfc2047_token (const char *inptr, size_t len) -{ - if (len < 8 || strncmp (inptr, "=?", 2) != 0 || strncmp (inptr + len - 2, "?=", 2) != 0) - return FALSE; - - inptr += 2; - len -= 2; - - /* skip past the charset */ - while (*inptr != '?' && len > 0) { - inptr++; - len--; - } - - if (*inptr != '?' || len < 4) - return FALSE; - - if (inptr[1] != 'q' && inptr[1] != 'Q' && inptr[1] != 'b' && inptr[1] != 'B') - return FALSE; - - inptr += 2; - len -= 2; - - if (*inptr != '?') - return FALSE; - - return TRUE; -} - -static char * -header_fold (const char *in, gboolean structured) -{ - gboolean last_was_lwsp = FALSE; - register const char *inptr; - size_t len, outlen, i; - size_t fieldlen; - GString *out; - char *ret; - - inptr = in; - len = strlen (in); - if (len <= GMIME_FOLD_LEN + 1) - return g_strdup (in); - - out = g_string_new (""); - fieldlen = strcspn (inptr, ": \t\n"); - g_string_append_len (out, inptr, fieldlen); - outlen = fieldlen; - inptr += fieldlen; - - while (*inptr && *inptr != '\n') { - len = strcspn (inptr, " \t\n"); - - if (len > 1 && outlen + len > GMIME_FOLD_LEN) { - if (outlen > 1 && out->len > fieldlen + 2) { - if (last_was_lwsp) { - if (structured) - out->str[out->len - 1] = '\t'; - - g_string_insert_c (out, out->len - 1, '\n'); - } else - g_string_append (out, "\n\t"); - outlen = 1; - } - - if (!structured && !is_rfc2047_token (inptr, len)) { - /* check for very long words, just cut them up */ - while (outlen + len > GMIME_FOLD_LEN) { - for (i = 0; i < GMIME_FOLD_LEN - outlen; i++) - g_string_append_c (out, inptr[i]); - inptr += GMIME_FOLD_LEN - outlen; - len -= GMIME_FOLD_LEN - outlen; - g_string_append (out, "\n\t"); - outlen = 1; - } - } else { - g_string_append_len (out, inptr, len); - outlen += len; - inptr += len; - } - last_was_lwsp = FALSE; - } else if (len > 0) { - g_string_append_len (out, inptr, len); - outlen += len; - inptr += len; - last_was_lwsp = FALSE; - } else { - last_was_lwsp = TRUE; - if (*inptr == '\t') { - /* tabs are a good place to fold, odds - are that this is where the previous - mailer folded it */ - g_string_append (out, "\n\t"); - outlen = 1; - while (is_blank (*inptr)) - inptr++; - } else { - g_string_append_c (out, *inptr++); - outlen++; - } - } - } - - if (*inptr == '\n' && out->str[out->len - 1] != '\n') - g_string_append_c (out, '\n'); - - ret = out->str; - g_string_free (out, FALSE); - - return ret; -} - - -/** - * g_mime_utils_structured_header_fold: - * @str: input string - * - * Folds a structured header according to the rules in rfc822. - * - * Returns: an allocated string containing the folded header. - **/ -char * -g_mime_utils_structured_header_fold (const char *str) -{ - return header_fold (str, TRUE); -} - - -/** - * g_mime_utils_unstructured_header_fold: - * @str: input string - * - * Folds an unstructured header according to the rules in rfc822. - * - * Returns: an allocated string containing the folded header. - **/ -char * -g_mime_utils_unstructured_header_fold (const char *str) -{ - return header_fold (str, FALSE); -} - - -/** - * g_mime_utils_header_fold: - * @str: input string - * - * Folds a structured header according to the rules in rfc822. - * - * Returns: an allocated string containing the folded header. - **/ -char * -g_mime_utils_header_fold (const char *str) -{ - return header_fold (str, TRUE); -} - - -/** - * g_mime_utils_header_printf: - * @format: string format - * @Varargs: arguments - * - * Allocates a buffer containing a formatted header specified by the - * @Varargs. - * - * Returns: an allocated string containing the folded header specified - * by @format and the following arguments. - **/ -char * -g_mime_utils_header_printf (const char *format, ...) -{ - char *buf, *ret; - va_list ap; - - va_start (ap, format); - buf = g_strdup_vprintf (format, ap); - va_end (ap); - - ret = header_fold (buf, TRUE); - g_free (buf); - - return ret; -} - -static gboolean -need_quotes (const char *string) -{ - gboolean quoted = FALSE; - const char *inptr; - - inptr = string; - - while (*inptr) { - if (*inptr == '\\') - inptr++; - else if (*inptr == '"') - quoted = !quoted; - else if (!quoted && (is_tspecial (*inptr) || *inptr == '.')) - return TRUE; - - if (*inptr) - inptr++; - } - - return FALSE; -} - -/** - * g_mime_utils_quote_string: - * @str: input string - * - * Quotes @string as needed according to the rules in rfc2045. - * - * Returns: an allocated string containing the escaped and quoted (if - * needed to be) input string. The decision to quote the string is - * based on whether or not the input string contains any 'tspecials' - * as defined by rfc2045. - **/ -char * -g_mime_utils_quote_string (const char *str) -{ - gboolean quote; - const char *c; - char *qstring; - GString *out; - - out = g_string_new (""); - - if ((quote = need_quotes (str))) - g_string_append_c (out, '"'); - - for (c = str; *c; c++) { - if ((*c == '"' && quote) || *c == '\\') - g_string_append_c (out, '\\'); - - g_string_append_c (out, *c); - } - - if (quote) - g_string_append_c (out, '"'); - - qstring = out->str; - g_string_free (out, FALSE); - - return qstring; -} - - -/** - * g_mime_utils_unquote_string: - * @str: input string - * - * Unquotes and unescapes a string. - **/ -void -g_mime_utils_unquote_string (char *str) -{ - /* if the string is quoted, unquote it */ - register char *inptr = str; - int escaped = FALSE; - int quoted = FALSE; - - if (!str) - return; - - while (*inptr) { - if (*inptr == '\\') { - if (escaped) - *str++ = *inptr++; - else - inptr++; - escaped = !escaped; - } else if (*inptr == '"') { - if (escaped) { - *str++ = *inptr++; - escaped = FALSE; - } else { - quoted = !quoted; - inptr++; - } - } else { - *str++ = *inptr++; - escaped = FALSE; - } - } - - *str = '\0'; -} - - -/** - * g_mime_utils_text_is_8bit: - * @text: text to check for 8bit chars - * @len: text length - * - * Determines if @text contains 8bit characters within the first @len - * bytes. - * - * Returns: %TRUE if the text contains 8bit characters or %FALSE - * otherwise. - **/ -gboolean -g_mime_utils_text_is_8bit (const unsigned char *text, size_t len) -{ - register const unsigned char *inptr; - const unsigned char *inend; - - g_return_val_if_fail (text != NULL, FALSE); - - inend = text + len; - for (inptr = text; *inptr && inptr < inend; inptr++) - if (*inptr > (unsigned char) 127) - return TRUE; - - return FALSE; -} - - -/** - * g_mime_utils_best_encoding: - * @text: text to encode - * @len: text length - * - * Determines the best content encoding for the first @len bytes of - * @text. - * - * Returns: a #GMimeContentEncoding that is determined to be the best - * encoding type for the specified block of text. ("best" in this - * particular case means smallest output size) - **/ -GMimeContentEncoding -g_mime_utils_best_encoding (const unsigned char *text, size_t len) -{ - const unsigned char *ch, *inend; - size_t count = 0; - - inend = text + len; - for (ch = text; ch < inend; ch++) - if (*ch > (unsigned char) 127) - count++; - - if ((float) count <= len * 0.17) - return GMIME_CONTENT_ENCODING_QUOTEDPRINTABLE; - else - return GMIME_CONTENT_ENCODING_BASE64; -} - - -/** - * charset_convert: - * @cd: iconv converter - * @inbuf: input text buffer to convert - * @inleft: length of the input buffer - * @outp: pointer to output buffer - * @outlenp: pointer to output buffer length - * @ninval: the number of invalid bytes in @inbuf - * - * Converts the input buffer from one charset to another using the - * @cd. On completion, @outp will point to the output buffer - * containing the converted text (nul-terminated), @outlenp will be - * the size of the @outp buffer (note: not the strlen() of @outp) and - * @ninval will contain the number of bytes which could not be - * converted. - * - * Bytes which cannot be converted from @inbuf will appear as '?' - * characters in the output buffer. - * - * If *@outp is non-NULL, then it is assumed that it points to a - * pre-allocated buffer of length *@outlenp. This is done so that the - * same output buffer can be reused multiple times. - * - * Returns: the string length of the output buffer. - **/ -static size_t -charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size_t *outlenp, size_t *ninval) -{ - size_t outlen, outleft, rc, n = 0; - char *outbuf, *out; - - if (*outp == NULL) { - outleft = outlen = (inleft * 2) + 16; - outbuf = out = g_malloc (outlen + 1); - } else { - outleft = outlen = *outlenp; - outbuf = out = *outp; - } - - do { - rc = iconv (cd, (char **) &inbuf, &inleft, &outbuf, &outleft); - if (rc == (size_t) -1) { - if (errno == EINVAL) { - /* incomplete sequence at the end of the input buffer */ - n += inleft; - break; - } - -#ifdef G_OS_WIN32 - /* seems that GnuWin32's libiconv 1.9 does not set errno in - * the E2BIG case, so we have to fake it */ - if (outleft <= inleft) - errno = E2BIG; -#endif - - if (errno == E2BIG) { - /* need to grow the output buffer */ - outlen += (inleft * 2) + 16; - rc = (size_t) (outbuf - out); - out = g_realloc (out, outlen + 1); - outleft = outlen - rc; - outbuf = out + rc; - } else { - /* invalid byte(-sequence) in the input buffer */ - *outbuf++ = '?'; - outleft--; - inleft--; - inbuf++; - n++; - } - } - } while (inleft > 0); - - iconv (cd, NULL, NULL, &outbuf, &outleft); - *outbuf++ = '\0'; - - *outlenp = outlen; - *outp = out; - *ninval = n; - - return (outbuf - out); -} - - -#define USER_CHARSETS_INCLUDE_UTF8 (1 << 0) -#define USER_CHARSETS_INCLUDE_LOCALE (1 << 1) - - -/** - * g_mime_utils_decode_8bit: - * @text: input text in unknown 8bit/multibyte character set - * @len: input text length - * - * Attempts to convert text in an unknown 8bit/multibyte charset into - * UTF-8 by finding the charset which will convert the most bytes into - * valid UTF-8 characters as possible. If no exact match can be found, - * it will choose the best match and convert invalid byte sequences - * into question-marks (?) in the returned string buffer. - * - * Returns: a UTF-8 string representation of @text. - **/ -char * -g_mime_utils_decode_8bit (const char *text, size_t len) -{ - const char **charsets, **user_charsets, *locale, *best; - size_t outleft, outlen, min, ninval; - unsigned int included = 0; - iconv_t cd; - char *out; - int i = 0; - - g_return_val_if_fail (text != NULL, NULL); - - locale = g_mime_locale_charset (); - if (locale && !g_ascii_strcasecmp (locale, "UTF-8")) - included |= USER_CHARSETS_INCLUDE_LOCALE; - - if ((user_charsets = g_mime_user_charsets ())) { - while (user_charsets[i]) - i++; - } - - charsets = g_alloca (sizeof (char *) * (i + 3)); - i = 0; - - if (user_charsets) { - while (user_charsets[i]) { - /* keep a record of whether or not the user-supplied - * charsets include UTF-8 and/or the default fallback - * charset so that we avoid doubling our efforts for - * these 2 charsets. We could have used a hash table - * to keep track of unique charsets, but we can - * (hopefully) assume that user_charsets is a unique - * list of charsets with no duplicates. */ - if (!g_ascii_strcasecmp (user_charsets[i], "UTF-8")) - included |= USER_CHARSETS_INCLUDE_UTF8; - - if (locale && !g_ascii_strcasecmp (user_charsets[i], locale)) - included |= USER_CHARSETS_INCLUDE_LOCALE; - - charsets[i] = user_charsets[i]; - i++; - } - } - - if (!(included & USER_CHARSETS_INCLUDE_UTF8)) - charsets[i++] = "UTF-8"; - - if (!(included & USER_CHARSETS_INCLUDE_LOCALE)) - charsets[i++] = locale; - - charsets[i] = NULL; - - min = len; - best = charsets[0]; - - outleft = (len * 2) + 16; - out = g_malloc (outleft + 1); - - for (i = 0; charsets[i]; i++) { - if ((cd = g_mime_iconv_open ("UTF-8", charsets[i])) == (iconv_t) -1) - continue; - - outlen = charset_convert (cd, text, len, &out, &outleft, &ninval); - - g_mime_iconv_close (cd); - - if (ninval == 0) - return g_realloc (out, outlen + 1); - - if (ninval < min) { - best = charsets[i]; - min = ninval; - } - } - - /* if we get here, then none of the charsets fit the 8bit text flawlessly... - * try to find the one that fit the best and use that to convert what we can, - * replacing any byte we can't convert with a '?' */ - - if ((cd = g_mime_iconv_open ("UTF-8", best)) == (iconv_t) -1) { - /* this shouldn't happen... but if we are here, then - * it did... the only thing we can do at this point - * is replace the 8bit garbage and pray */ - register const char *inptr = text; - const char *inend = inptr + len; - char *outbuf = out; - - while (inptr < inend) { - if (is_ascii (*inptr)) - *outbuf++ = *inptr++; - else - *outbuf++ = '?'; - } - - *outbuf++ = '\0'; - - return g_realloc (out, (size_t) (outbuf - out)); - } - - outlen = charset_convert (cd, text, len, &out, &outleft, &ninval); - - g_mime_iconv_close (cd); - - return g_realloc (out, outlen + 1); -} - - -/* this decodes rfc2047's version of quoted-printable */ -static ssize_t -quoted_decode (const unsigned char *in, size_t len, unsigned char *out) -{ - register const unsigned char *inptr; - register unsigned char *outptr; - const unsigned char *inend; - unsigned char c, c1; - - inend = in + len; - outptr = out; - - inptr = in; - while (inptr < inend) { - c = *inptr++; - if (c == '=') { - if (inend - inptr >= 2) { - c = toupper (*inptr++); - c1 = toupper (*inptr++); - *outptr++ = (((c >= 'A' ? c - 'A' + 10 : c - '0') & 0x0f) << 4) - | ((c1 >= 'A' ? c1 - 'A' + 10 : c1 - '0') & 0x0f); - } else { - /* data was truncated */ - return -1; - } - } else if (c == '_') { - /* _'s are an rfc2047 shortcut for encoding spaces */ - *outptr++ = ' '; - } else { - *outptr++ = c; - } - } - - return (ssize_t) (outptr - out); -} - -#define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2)) - -static char * -rfc2047_decode_word (const char *in, size_t inlen) -{ - const unsigned char *instart = (const unsigned char *) in; - const register unsigned char *inptr = instart + 2; - const unsigned char *inend = instart + inlen - 2; - unsigned char *decoded; - const char *charset; - size_t len, ninval; - char *charenc, *p; - guint32 save = 0; - ssize_t declen; - int state = 0; - iconv_t cd; - char *buf; - - /* skip over the charset */ - if (!(inptr = memchr (inptr, '?', inend - inptr)) || inptr[2] != '?') - return NULL; - - inptr++; - - switch (*inptr) { - case 'B': - case 'b': - inptr += 2; - len = (size_t) (inend - inptr); - decoded = g_alloca (len); - declen = g_mime_encoding_base64_decode_step (inptr, len, decoded, &state, &save); - - if (declen == -1) { - d(fprintf (stderr, "encountered broken 'Q' encoding\n")); - return NULL; - } - break; - case 'Q': - case 'q': - inptr += 2; - len = (size_t) (inend - inptr); - decoded = g_alloca (len); - declen = quoted_decode (inptr, len, decoded); - - if (declen == -1) { - d(fprintf (stderr, "encountered broken 'Q' encoding\n")); - return NULL; - } - break; - default: - d(fprintf (stderr, "unknown encoding\n")); - return NULL; - } - - len = (inptr - 3) - (instart + 2); - charenc = g_alloca (len + 1); - memcpy (charenc, in + 2, len); - charenc[len] = '\0'; - charset = charenc; - - /* rfc2231 updates rfc2047 encoded words... - * The ABNF given in RFC 2047 for encoded-words is: - * encoded-word := "=?" charset "?" encoding "?" encoded-text "?=" - * This specification changes this ABNF to: - * encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?=" - */ - - /* trim off the 'language' part if it's there... */ - if ((p = strchr (charset, '*'))) - *p = '\0'; - - /* slight optimization? */ - if (!g_ascii_strcasecmp (charset, "UTF-8")) { - p = (char *) decoded; - len = declen; - - //while (!g_utf8_validate (p, len, (const char **) &p)) { - // len = declen - (p - (char *) decoded); - // *p = '?'; - //} - - return g_strndup ((char *) decoded, declen); - } - - if (!charset[0] || (cd = g_mime_iconv_open ("UTF-8", charset)) == (iconv_t) -1) { - w(g_warning ("Cannot convert from %s to UTF-8, header display may " - "be corrupt: %s", charset[0] ? charset : "unspecified charset", - g_strerror (errno))); - - return g_mime_utils_decode_8bit ((char *) decoded, declen); - } - - len = declen; - buf = g_malloc (len + 1); - - charset_convert (cd, (char *) decoded, declen, &buf, &len, &ninval); - - g_mime_iconv_close (cd); - -#if w(!)0 - if (ninval > 0) { - g_warning ("Failed to completely convert \"%.*s\" to UTF-8, display may be " - "corrupt: %s", declen, decoded, g_strerror (errno)); - } -#endif - - return buf; -} - - -/** - * g_mime_utils_header_decode_text: - * @text: header text to decode - * - * Decodes an rfc2047 encoded 'text' header. - * - * Note: See g_mime_set_user_charsets() for details on how charset - * conversion is handled for unencoded 8bit text and/or wrongly - * specified rfc2047 encoded-word tokens. - * - * Returns: a newly allocated UTF-8 string representing the the decoded - * header. - **/ -char * -g_mime_utils_header_decode_text (const char *text) -{ - gboolean enable_rfc2047_workarounds = _g_mime_enable_rfc2047_workarounds (); - register const char *inptr = text; - gboolean encoded = FALSE; - const char *lwsp, *word; - size_t nlwsp, n; - gboolean ascii; - char *decoded; - GString *out; - - if (text == NULL) - return g_strdup (""); - - out = g_string_sized_new (strlen (text) + 1); - - while (*inptr != '\0') { - lwsp = inptr; - while (is_lwsp (*inptr)) - inptr++; - - nlwsp = (size_t) (inptr - lwsp); - - if (*inptr != '\0') { - word = inptr; - ascii = TRUE; - - if (enable_rfc2047_workarounds) { - if (!strncmp (inptr, "=?", 2)) { - inptr += 2; - - /* skip past the charset (if one is even declared, sigh) */ - while (*inptr && *inptr != '?') { - ascii = ascii && is_ascii (*inptr); - inptr++; - } - - /* sanity check encoding type */ - if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || inptr[2] != '?') - goto non_rfc2047; - - inptr += 3; - - /* find the end of the rfc2047 encoded word token */ - while (*inptr && strncmp (inptr, "?=", 2) != 0) { - ascii = ascii && is_ascii (*inptr); - inptr++; - } - - if (!strncmp (inptr, "?=", 2)) - inptr += 2; - } else { - non_rfc2047: - /* stop if we encounter a possible rfc2047 encoded - * token even if it's inside another word, sigh. */ - while (*inptr && !is_lwsp (*inptr) && - strncmp (inptr, "=?", 2) != 0) { - ascii = ascii && is_ascii (*inptr); - inptr++; - } - } - } else { - while (*inptr && !is_lwsp (*inptr)) { - ascii = ascii && is_ascii (*inptr); - inptr++; - } - } - - n = (size_t) (inptr - word); - if (is_rfc2047_encoded_word (word, n)) { - if ((decoded = rfc2047_decode_word (word, n))) { - /* rfc2047 states that you must ignore all - * whitespace between encoded words */ - if (!encoded) - g_string_append_len (out, lwsp, nlwsp); - - g_string_append (out, decoded); - g_free (decoded); - - encoded = TRUE; - } else { - /* append lwsp and invalid rfc2047 encoded-word token */ - g_string_append_len (out, lwsp, nlwsp + n); - encoded = FALSE; - } - } else { - /* append lwsp */ - g_string_append_len (out, lwsp, nlwsp); - - /* append word token */ - if (!ascii) { - /* *sigh* I hate broken mailers... */ - decoded = g_mime_utils_decode_8bit (word, n); - g_string_append (out, decoded); - g_free (decoded); - } else { - g_string_append_len (out, word, n); - } - - encoded = FALSE; - } - } else { - /* appending trailing lwsp */ - g_string_append_len (out, lwsp, nlwsp); - break; - } - } - - decoded = out->str; - g_string_free (out, FALSE); - - return decoded; -} - - -/** - * g_mime_utils_header_decode_phrase: - * @phrase: header to decode - * - * Decodes an rfc2047 encoded 'phrase' header. - * - * Note: See g_mime_set_user_charsets() for details on how charset - * conversion is handled for unencoded 8bit text and/or wrongly - * specified rfc2047 encoded-word tokens. - * - * Returns: a newly allocated UTF-8 string representing the the decoded - * header. - **/ -char * -g_mime_utils_header_decode_phrase (const char *phrase) -{ - register const char *inptr = phrase; - gboolean encoded = FALSE; - const char *lwsp, *text; - size_t nlwsp, n; - gboolean ascii; - char *decoded; - GString *out; - - if (phrase == NULL) - return g_strdup (""); - - out = g_string_sized_new (strlen (phrase) + 1); - - while (*inptr != '\0') { - lwsp = inptr; - while (is_lwsp (*inptr)) - inptr++; - - nlwsp = (size_t) (inptr - lwsp); - - text = inptr; - if (is_atom (*inptr)) { - while (is_atom (*inptr)) - inptr++; - - n = (size_t) (inptr - text); - if (is_rfc2047_encoded_word (text, n)) { - if ((decoded = rfc2047_decode_word (text, n))) { - /* rfc2047 states that you must ignore all - * whitespace between encoded words */ - if (!encoded) - g_string_append_len (out, lwsp, nlwsp); - - g_string_append (out, decoded); - g_free (decoded); - - encoded = TRUE; - } else { - /* append lwsp and invalid rfc2047 encoded-word token */ - g_string_append_len (out, lwsp, nlwsp + n); - encoded = FALSE; - } - } else { - /* append lwsp and atom token */ - g_string_append_len (out, lwsp, nlwsp + n); - encoded = FALSE; - } - } else { - g_string_append_len (out, lwsp, nlwsp); - - ascii = TRUE; - while (*inptr && !is_lwsp (*inptr)) { - ascii = ascii && is_ascii (*inptr); - inptr++; - } - - n = (size_t) (inptr - text); - - if (!ascii) { - /* *sigh* I hate broken mailers... */ - decoded = g_mime_utils_decode_8bit (text, n); - g_string_append (out, decoded); - g_free (decoded); - } else { - g_string_append_len (out, text, n); - } - - encoded = FALSE; - } - } - - decoded = out->str; - g_string_free (out, FALSE); - - return decoded; -} - - -/* rfc2047 version of quoted-printable */ -static size_t -quoted_encode (const char *in, size_t len, unsigned char *out, gushort safemask) -{ - register const unsigned char *inptr = (const unsigned char *) in; - const unsigned char *inend = inptr + len; - register unsigned char *outptr = out; - unsigned char c; - - while (inptr < inend) { - c = *inptr++; - if (c == ' ') { - *outptr++ = '_'; - } else if (c != '_' && gmime_special_table[c] & safemask) { - *outptr++ = c; - } else { - *outptr++ = '='; - *outptr++ = tohex[(c >> 4) & 0xf]; - *outptr++ = tohex[c & 0xf]; - } - } - - return (outptr - out); -} - -static void -rfc2047_encode_word (GString *string, const char *word, size_t len, - const char *charset, gushort safemask) -{ - register char *inptr, *outptr; - iconv_t cd = (iconv_t) -1; - unsigned char *encoded; - size_t enclen, pos; - char *uword = NULL; - guint32 save = 0; - int state = 0; - char encoding; - - if (g_ascii_strcasecmp (charset, "UTF-8") != 0) - cd = g_mime_iconv_open (charset, "UTF-8"); - - if (cd != (iconv_t) -1) { - uword = g_mime_iconv_strndup (cd, (char *) word, len); - g_mime_iconv_close (cd); - } - - if (uword) { - len = strlen (uword); - word = uword; - } else { - charset = "UTF-8"; - } - - switch (g_mime_utils_best_encoding ((const unsigned char *) word, len)) { - case GMIME_CONTENT_ENCODING_BASE64: - enclen = GMIME_BASE64_ENCODE_LEN (len); - encoded = g_alloca (enclen + 1); - - encoding = 'b'; - - pos = g_mime_encoding_base64_encode_close ((const unsigned char *) word, len, encoded, &state, &save); - encoded[pos] = '\0'; - - /* remove \n chars as headers need to be wrapped differently */ - if (G_UNLIKELY ((inptr = strchr ((char *) encoded, '\n')))) { - outptr = inptr++; - while (G_LIKELY (*inptr)) { - if (G_LIKELY (*inptr != '\n')) - *outptr++ = *inptr; - - inptr++; - } - - *outptr = '\0'; - } - - break; - case GMIME_CONTENT_ENCODING_QUOTEDPRINTABLE: - enclen = GMIME_QP_ENCODE_LEN (len); - encoded = g_alloca (enclen + 1); - - encoding = 'q'; - - pos = quoted_encode (word, len, encoded, safemask); - encoded[pos] = '\0'; - - break; - default: - encoded = NULL; - encoding = '\0'; - g_assert_not_reached (); - } - - g_free (uword); - - g_string_append_printf (string, "=?%s?%c?%s?=", charset, encoding, encoded); -} - - -typedef enum { - WORD_ATOM, - WORD_QSTRING, - WORD_2047 -} rfc822_word_t; - -typedef struct _rfc822_word { - struct _rfc822_word *next; - const char *start, *end; - rfc822_word_t type; - int encoding; -} rfc822_word; - -#define rfc822_word_free(word) g_slice_free (rfc822_word, word) -#define rfc822_word_new() g_slice_new (rfc822_word) - -/* okay, so 'unstructured text' fields don't actually contain 'word' - * tokens, but we can group stuff similarly... */ -static rfc822_word * -rfc2047_encode_get_rfc822_words (const char *in, gboolean phrase) -{ - rfc822_word *words, *tail, *word; - rfc822_word_t type = WORD_ATOM; - const char *inptr, *start, *last; - int count = 0, encoding = 0; - - words = NULL; - tail = (rfc822_word *) &words; - - last = start = inptr = in; - while (inptr && *inptr) { - const char *newinptr; - gunichar c; - - newinptr = g_utf8_next_char (inptr); - c = g_utf8_get_char (inptr); - if (newinptr == NULL || !g_unichar_validate (c)) { - w(g_warning ("Invalid UTF-8 sequence encountered")); - inptr++; - continue; - } - - inptr = newinptr; - - if (c < 256 && is_lwsp (c)) { - if (count > 0) { - word = rfc822_word_new (); - word->next = NULL; - word->start = start; - word->end = last; - word->type = type; - word->encoding = encoding; - - tail->next = word; - tail = word; - count = 0; - } - - start = inptr; - type = WORD_ATOM; - encoding = 0; - } else { - count++; - if (phrase && c < 128) { - /* phrases can have qstring words */ - if (!is_atom (c)) - type = MAX (type, WORD_QSTRING); - } else if (c > 127 && c < 256) { - type = WORD_2047; - encoding = MAX (encoding, 1); - } else if (c >= 256) { - type = WORD_2047; - encoding = 2; - } - - if (count >= GMIME_FOLD_PREENCODED) { - word = rfc822_word_new (); - word->next = NULL; - word->start = start; - word->end = inptr; - word->type = type; - word->encoding = encoding; - - tail->next = word; - tail = word; - count = 0; - - /* Note: don't reset 'type' as it - * needs to be preserved when breaking - * long words */ - start = inptr; - encoding = 0; - } - } - - last = inptr; - } - - if (count > 0) { - word = rfc822_word_new (); - word->next = NULL; - word->start = start; - word->end = last; - word->type = type; - word->encoding = encoding; - - tail->next = word; - tail = word; - } - -#if d(!)0 - printf ("rfc822 word tokens:\n"); - word = words; - while (word) { - printf ("\t'%.*s'; type=%d, encoding=%d\n", - word->end - word->start, word->start, - word->type, word->encoding); - - word = word->next; - } -#endif - - return words; -} - -#define MERGED_WORD_LT_FOLDLEN(wlen, type) ((type) == WORD_2047 ? (wlen) < GMIME_FOLD_PREENCODED : (wlen) < (GMIME_FOLD_LEN - 8)) - -static gboolean -should_merge_words (rfc822_word *word, rfc822_word *next) -{ - switch (word->type) { - case WORD_ATOM: - if (next->type == WORD_2047) - return FALSE; - - return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, next->type)); - case WORD_QSTRING: - /* avoid merging with words that need to be rfc2047 encoded */ - if (next->type == WORD_2047) - return FALSE; - - return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, WORD_QSTRING)); - case WORD_2047: - if (next->type == WORD_ATOM) { - /* whether we merge or not is dependent upon: - * 1. the number of atoms in a row after 'word' - * 2. if there is another encword after the string of atoms. - */ - int natoms = 0; - - while (next && next->type == WORD_ATOM) { - next = next->next; - natoms++; - } - - /* if all the words after the encword are atoms, don't merge */ - if (!next || natoms > 3) - return FALSE; - } - - /* avoid merging with qstrings */ - if (next->type == WORD_QSTRING) - return FALSE; - - return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, WORD_2047)); - default: - return FALSE; - } -} - -static void -rfc2047_encode_merge_rfc822_words (rfc822_word **wordsp) -{ - rfc822_word *word, *next, *words = *wordsp; - - /* first pass: merge qstrings with adjacent qstrings and encwords with adjacent encwords */ - word = words; - while (word && word->next) { - next = word->next; - - if (word->type != WORD_ATOM && word->type == next->type && - MERGED_WORD_LT_FOLDLEN (next->end - word->start, word->type)) { - /* merge the words */ - word->encoding = MAX (word->encoding, next->encoding); - - word->end = next->end; - word->next = next->next; - - rfc822_word_free (next); - - next = word; - } - - word = next; - } - - /* second pass: now merge atoms with the other words */ - word = words; - while (word && word->next) { - next = word->next; - - if (should_merge_words (word, next)) { - /* the resulting word type is the MAX of the 2 types */ - word->type = MAX (word->type, next->type); - - word->encoding = MAX (word->encoding, next->encoding); - - word->end = next->end; - word->next = next->next; - - rfc822_word_free (next); - - continue; - } - - word = next; - } - - *wordsp = words; -} - -static void -g_string_append_len_quoted (GString *out, const char *in, size_t len) -{ - register const char *inptr; - const char *inend; - - g_string_append_c (out, '"'); - - inptr = in; - inend = in + len; - - while (inptr < inend) { - if (*inptr == '"' || *inptr == '\\') - g_string_append_c (out, '\\'); - - g_string_append_c (out, *inptr); - - inptr++; - } - - g_string_append_c (out, '"'); -} - -static char * -rfc2047_encode (const char *in, gushort safemask) -{ - rfc822_word *words, *word, *prev = NULL; - const char **charsets, *charset; - const char *start; - GMimeCharset mask; - GString *out; - char *outstr; - size_t len; - int i; - - if (!(words = rfc2047_encode_get_rfc822_words (in, safemask & IS_PSAFE))) - return g_strdup (in); - - rfc2047_encode_merge_rfc822_words (&words); - - charsets = g_mime_user_charsets (); - - out = g_string_new (""); - - /* output words now with spaces between them */ - word = words; - while (word) { - /* append correct number of spaces between words */ - if (prev && !(prev->type == WORD_2047 && word->type == WORD_2047)) { - /* one or both of the words are not encoded so we write the spaces out untouched */ - len = word->start - prev->end; - g_string_append_len (out, prev->end, len); - } - - switch (word->type) { - case WORD_ATOM: - g_string_append_len (out, word->start, (size_t) (word->end - word->start)); - break; - case WORD_QSTRING: - g_assert (safemask & IS_PSAFE); - g_string_append_len_quoted (out, word->start, (size_t) (word->end - word->start)); - break; - case WORD_2047: - if (prev && prev->type == WORD_2047) { - /* include the whitespace chars between these 2 words in the - resulting rfc2047 encoded word. */ - len = word->end - prev->end; - start = prev->end; - - /* encoded words need to be separated by linear whitespace */ - g_string_append_c (out, ' '); - } else { - len = word->end - word->start; - start = word->start; - } - - switch (word->encoding) { - case 0: /* us-ascii */ - rfc2047_encode_word (out, start, len, "us-ascii", safemask); - break; - case 1: /* iso-8859-1 */ - rfc2047_encode_word (out, start, len, "iso-8859-1", safemask); - break; - default: - charset = NULL; - g_mime_charset_init (&mask); - g_mime_charset_step (&mask, start, len); - - for (i = 0; charsets && charsets[i]; i++) { - if (g_mime_charset_can_encode (&mask, charsets[i], start, len)) { - charset = charsets[i]; - break; - } - } - - if (!charset) - charset = g_mime_charset_best_name (&mask); - - rfc2047_encode_word (out, start, len, charset, safemask); - break; - } - - break; - } - - rfc822_word_free (prev); - - prev = word; - word = word->next; - } - - rfc822_word_free (prev); - - outstr = out->str; - g_string_free (out, FALSE); - - return outstr; -} - - -/** - * g_mime_utils_header_encode_phrase: - * @phrase: phrase to encode - * - * Encodes a 'phrase' header according to the rules in rfc2047. - * - * Returns: the encoded 'phrase'. Useful for encoding internet - * addresses. - **/ -char * -g_mime_utils_header_encode_phrase (const char *phrase) -{ - if (phrase == NULL) - return NULL; - - return rfc2047_encode (phrase, IS_PSAFE); -} - - -/** - * g_mime_utils_header_encode_text: - * @text: text to encode - * - * Encodes a 'text' header according to the rules in rfc2047. - * - * Returns: the encoded header. Useful for encoding - * headers like "Subject". - **/ -char * -g_mime_utils_header_encode_text (const char *text) -{ - if (text == NULL) - return NULL; - - return rfc2047_encode (text, IS_ESAFE); -}