mirror of
https://git.notmuchmail.org/git/notmuch
synced 2024-11-26 04:45:20 +01:00
4f9aa77d80
As authorized by LGPL-2 term (3).
2513 lines
55 KiB
C
2513 lines
55 KiB
C
/* date.c - Date-parsing utility for the notmuch mail system.
|
|
*
|
|
* Copyright © 2000-2009 Jeffrey Stedfast
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see http://www.gnu.org/licenses/
|
|
*/
|
|
|
|
/* This code was originally written by from Jeffrey Stedfast
|
|
* as part of his GMime library (http://spruce.sourceforge.net/gmime/)
|
|
*
|
|
* Carl Worth <cworth@cworth.org> imported it into notmuch and removed
|
|
* some glib-isms.
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include <config.h>
|
|
#endif
|
|
|
|
#define _GNU_SOURCE
|
|
|
|
#include <glib.h>
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#ifdef HAVE_SYS_PARAM_H
|
|
#include <sys/param.h> /* for MAXHOSTNAMELEN */
|
|
#else
|
|
#define MAXHOSTNAMELEN 64
|
|
#endif
|
|
#ifdef HAVE_UTSNAME_DOMAINNAME
|
|
#include <sys/utsname.h> /* for uname() */
|
|
#endif
|
|
#include <sys/types.h>
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h> /* Unix header for getpid() */
|
|
#endif
|
|
#ifdef G_OS_WIN32
|
|
#include <winsock2.h>
|
|
#include <ws2tcpip.h>
|
|
#include <process.h>
|
|
#define getpid() _getpid()
|
|
#endif
|
|
#ifdef HAVE_NETDB_H
|
|
#include <netdb.h>
|
|
#endif
|
|
#include <ctype.h>
|
|
#include <errno.h>
|
|
|
|
#include "gmime-utils.h"
|
|
#include "gmime-table-private.h"
|
|
#include "gmime-parse-utils.h"
|
|
#include "gmime-part.h"
|
|
#include "gmime-charset.h"
|
|
#include "gmime-iconv.h"
|
|
#include "gmime-iconv-utils.h"
|
|
|
|
#ifdef ENABLE_WARNINGS
|
|
#define w(x) x
|
|
#else
|
|
#define w(x)
|
|
#endif /* ENABLE_WARNINGS */
|
|
|
|
#define d(x)
|
|
|
|
|
|
/**
|
|
* SECTION: gmime-utils
|
|
* @title: gmime-utils
|
|
* @short_description: MIME utility functions
|
|
* @see_also:
|
|
*
|
|
* Utility functions to parse, encode and decode various MIME tokens
|
|
* and encodings.
|
|
**/
|
|
|
|
extern gboolean _g_mime_enable_rfc2047_workarounds (void);
|
|
|
|
#define GMIME_FOLD_PREENCODED (GMIME_FOLD_LEN / 2)
|
|
|
|
/* date parser macros */
|
|
#define NUMERIC_CHARS "1234567890"
|
|
#define WEEKDAY_CHARS "SundayMondayTuesdayWednesdayThursdayFridaySaturday"
|
|
#define MONTH_CHARS "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember"
|
|
#define TIMEZONE_ALPHA_CHARS "UTCGMTESTEDTCSTCDTMSTPSTPDTZAMNY()"
|
|
#define TIMEZONE_NUMERIC_CHARS "-+1234567890"
|
|
#define TIME_CHARS "1234567890:"
|
|
|
|
#define DATE_TOKEN_NON_NUMERIC (1 << 0)
|
|
#define DATE_TOKEN_NON_WEEKDAY (1 << 1)
|
|
#define DATE_TOKEN_NON_MONTH (1 << 2)
|
|
#define DATE_TOKEN_NON_TIME (1 << 3)
|
|
#define DATE_TOKEN_HAS_COLON (1 << 4)
|
|
#define DATE_TOKEN_NON_TIMEZONE_ALPHA (1 << 5)
|
|
#define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6)
|
|
#define DATE_TOKEN_HAS_SIGN (1 << 7)
|
|
|
|
static unsigned char tohex[16] = {
|
|
'0', '1', '2', '3', '4', '5', '6', '7',
|
|
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
|
|
};
|
|
|
|
static unsigned char gmime_datetok_table[256] = {
|
|
128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
|
|
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
|
|
111,111,111,111,111,111,111,111, 79, 79,111,175,111,175,111,111,
|
|
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,119,111,111,111,111,111,
|
|
111, 75,111, 79, 75, 79,105, 79,111,111,107,111,111, 73, 75,107,
|
|
79,111,111, 73, 77, 79,111,109,111, 79, 79,111,111,111,111,111,
|
|
111,105,107,107,109,105,111,107,105,105,111,111,107,107,105,105,
|
|
107,111,105,105,105,105,107,111,111,105,111,111,111,111,111,111,
|
|
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
|
|
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
|
|
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
|
|
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
|
|
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
|
|
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
|
|
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
|
|
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
|
|
};
|
|
|
|
/* hrm, is there a library for this shit? */
|
|
static struct {
|
|
char *name;
|
|
int offset;
|
|
} tz_offsets [] = {
|
|
{ "UT", 0 },
|
|
{ "GMT", 0 },
|
|
{ "EST", -500 }, /* these are all US timezones. bloody yanks */
|
|
{ "EDT", -400 },
|
|
{ "CST", -600 },
|
|
{ "CDT", -500 },
|
|
{ "MST", -700 },
|
|
{ "MDT", -600 },
|
|
{ "PST", -800 },
|
|
{ "PDT", -700 },
|
|
{ "Z", 0 },
|
|
{ "A", -100 },
|
|
{ "M", -1200 },
|
|
{ "N", 100 },
|
|
{ "Y", 1200 },
|
|
};
|
|
|
|
static char *tm_months[] = {
|
|
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
|
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
|
|
};
|
|
|
|
static char *tm_days[] = {
|
|
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
|
|
};
|
|
|
|
|
|
/**
|
|
* g_mime_utils_header_format_date:
|
|
* @date: time_t date representation
|
|
* @tz_offset: Timezone offset
|
|
*
|
|
* Allocates a string buffer containing the rfc822 formatted date
|
|
* string represented by @time and @tz_offset.
|
|
*
|
|
* Returns: a valid string representation of the date.
|
|
**/
|
|
char *
|
|
g_mime_utils_header_format_date (time_t date, int tz_offset)
|
|
{
|
|
struct tm tm;
|
|
|
|
date += ((tz_offset / 100) * (60 * 60)) + (tz_offset % 100) * 60;
|
|
|
|
#if defined (HAVE_GMTIME_R)
|
|
gmtime_r (&date, &tm);
|
|
#elif defined (HAVE_GMTIME_S)
|
|
gmtime_s (&tm, &date);
|
|
#else
|
|
memcpy (&tm, gmtime (&date), sizeof (tm));
|
|
#endif
|
|
|
|
return g_strdup_printf ("%s, %02d %s %04d %02d:%02d:%02d %+05d",
|
|
tm_days[tm.tm_wday], tm.tm_mday,
|
|
tm_months[tm.tm_mon],
|
|
tm.tm_year + 1900,
|
|
tm.tm_hour, tm.tm_min, tm.tm_sec,
|
|
tz_offset);
|
|
}
|
|
|
|
/* This is where it gets ugly... */
|
|
|
|
typedef struct _date_token {
|
|
struct _date_token *next;
|
|
unsigned char mask;
|
|
const char *start;
|
|
size_t len;
|
|
} date_token;
|
|
|
|
#define date_token_free(tok) g_slice_free (date_token, tok)
|
|
#define date_token_new() g_slice_new (date_token)
|
|
|
|
static date_token *
|
|
datetok (const char *date)
|
|
{
|
|
date_token *tokens = NULL, *token, *tail = (date_token *) &tokens;
|
|
const char *start, *end;
|
|
unsigned char mask;
|
|
|
|
start = date;
|
|
while (*start) {
|
|
/* kill leading whitespace */
|
|
while (*start == ' ' || *start == '\t')
|
|
start++;
|
|
|
|
if (*start == '\0')
|
|
break;
|
|
|
|
mask = gmime_datetok_table[(unsigned char) *start];
|
|
|
|
/* find the end of this token */
|
|
end = start + 1;
|
|
while (*end && !strchr ("-/,\t\r\n ", *end))
|
|
mask |= gmime_datetok_table[(unsigned char) *end++];
|
|
|
|
if (end != start) {
|
|
token = date_token_new ();
|
|
token->next = NULL;
|
|
token->start = start;
|
|
token->len = end - start;
|
|
token->mask = mask;
|
|
|
|
tail->next = token;
|
|
tail = token;
|
|
}
|
|
|
|
if (*end)
|
|
start = end + 1;
|
|
else
|
|
break;
|
|
}
|
|
|
|
return tokens;
|
|
}
|
|
|
|
static int
|
|
decode_int (const char *in, size_t inlen)
|
|
{
|
|
register const char *inptr;
|
|
int sign = 1, val = 0;
|
|
const char *inend;
|
|
|
|
inptr = in;
|
|
inend = in + inlen;
|
|
|
|
if (*inptr == '-') {
|
|
sign = -1;
|
|
inptr++;
|
|
} else if (*inptr == '+')
|
|
inptr++;
|
|
|
|
for ( ; inptr < inend; inptr++) {
|
|
if (!(*inptr >= '0' && *inptr <= '9'))
|
|
return -1;
|
|
else
|
|
val = (val * 10) + (*inptr - '0');
|
|
}
|
|
|
|
val *= sign;
|
|
|
|
return val;
|
|
}
|
|
|
|
#if 0
|
|
static int
|
|
get_days_in_month (int month, int year)
|
|
{
|
|
switch (month) {
|
|
case 1:
|
|
case 3:
|
|
case 5:
|
|
case 7:
|
|
case 8:
|
|
case 10:
|
|
case 12:
|
|
return 31;
|
|
case 4:
|
|
case 6:
|
|
case 9:
|
|
case 11:
|
|
return 30;
|
|
case 2:
|
|
if (g_date_is_leap_year (year))
|
|
return 29;
|
|
else
|
|
return 28;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static int
|
|
get_wday (const char *in, size_t inlen)
|
|
{
|
|
int wday;
|
|
|
|
g_return_val_if_fail (in != NULL, -1);
|
|
|
|
if (inlen < 3)
|
|
return -1;
|
|
|
|
for (wday = 0; wday < 7; wday++) {
|
|
if (!g_ascii_strncasecmp (in, tm_days[wday], 3))
|
|
return wday;
|
|
}
|
|
|
|
return -1; /* unknown week day */
|
|
}
|
|
|
|
static int
|
|
get_mday (const char *in, size_t inlen)
|
|
{
|
|
int mday;
|
|
|
|
g_return_val_if_fail (in != NULL, -1);
|
|
|
|
mday = decode_int (in, inlen);
|
|
|
|
if (mday < 0 || mday > 31)
|
|
mday = -1;
|
|
|
|
return mday;
|
|
}
|
|
|
|
static int
|
|
get_month (const char *in, size_t inlen)
|
|
{
|
|
int i;
|
|
|
|
g_return_val_if_fail (in != NULL, -1);
|
|
|
|
if (inlen < 3)
|
|
return -1;
|
|
|
|
for (i = 0; i < 12; i++) {
|
|
if (!g_ascii_strncasecmp (in, tm_months[i], 3))
|
|
return i;
|
|
}
|
|
|
|
return -1; /* unknown month */
|
|
}
|
|
|
|
static int
|
|
get_year (const char *in, size_t inlen)
|
|
{
|
|
int year;
|
|
|
|
g_return_val_if_fail (in != NULL, -1);
|
|
|
|
if ((year = decode_int (in, inlen)) == -1)
|
|
return -1;
|
|
|
|
if (year < 100)
|
|
year += (year < 70) ? 2000 : 1900;
|
|
|
|
if (year < 1969)
|
|
return -1;
|
|
|
|
return year;
|
|
}
|
|
|
|
static gboolean
|
|
get_time (const char *in, size_t inlen, int *hour, int *min, int *sec)
|
|
{
|
|
register const char *inptr;
|
|
int *val, colons = 0;
|
|
const char *inend;
|
|
|
|
*hour = *min = *sec = 0;
|
|
|
|
inend = in + inlen;
|
|
val = hour;
|
|
for (inptr = in; inptr < inend; inptr++) {
|
|
if (*inptr == ':') {
|
|
colons++;
|
|
switch (colons) {
|
|
case 1:
|
|
val = min;
|
|
break;
|
|
case 2:
|
|
val = sec;
|
|
break;
|
|
default:
|
|
return FALSE;
|
|
}
|
|
} else if (!(*inptr >= '0' && *inptr <= '9'))
|
|
return FALSE;
|
|
else
|
|
*val = (*val * 10) + (*inptr - '0');
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static int
|
|
get_tzone (date_token **token)
|
|
{
|
|
const char *inptr, *inend;
|
|
size_t inlen;
|
|
int i, t;
|
|
|
|
for (i = 0; *token && i < 2; *token = (*token)->next, i++) {
|
|
inptr = (*token)->start;
|
|
inlen = (*token)->len;
|
|
inend = inptr + inlen;
|
|
|
|
if (*inptr == '+' || *inptr == '-') {
|
|
return decode_int (inptr, inlen);
|
|
} else {
|
|
if (*inptr == '(') {
|
|
inptr++;
|
|
if (*(inend - 1) == ')')
|
|
inlen -= 2;
|
|
else
|
|
inlen--;
|
|
}
|
|
|
|
for (t = 0; t < 15; t++) {
|
|
size_t len = strlen (tz_offsets[t].name);
|
|
|
|
if (len != inlen)
|
|
continue;
|
|
|
|
if (!strncmp (inptr, tz_offsets[t].name, len))
|
|
return tz_offsets[t].offset;
|
|
}
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
static time_t
|
|
mktime_utc (struct tm *tm)
|
|
{
|
|
time_t tt;
|
|
long tz;
|
|
|
|
tm->tm_isdst = -1;
|
|
tt = mktime (tm);
|
|
|
|
#if defined (G_OS_WIN32)
|
|
_get_timezone (&tz);
|
|
if (tm->tm_isdst > 0) {
|
|
int dst;
|
|
|
|
_get_dstbias (&dst);
|
|
tz += dst;
|
|
}
|
|
#elif defined (HAVE_TM_GMTOFF)
|
|
tz = -tm->tm_gmtoff;
|
|
#elif defined (HAVE_TIMEZONE)
|
|
if (tm->tm_isdst > 0) {
|
|
#if defined (HAVE_ALTZONE)
|
|
tz = altzone;
|
|
#else /* !defined (HAVE_ALTZONE) */
|
|
tz = (timezone - 3600);
|
|
#endif
|
|
} else {
|
|
tz = timezone;
|
|
}
|
|
#elif defined (HAVE__TIMEZONE)
|
|
tz = _timezone;
|
|
#else
|
|
#error Neither HAVE_TIMEZONE nor HAVE_TM_GMTOFF defined. Rerun autoheader, autoconf, etc.
|
|
#endif
|
|
|
|
return tt - tz;
|
|
}
|
|
|
|
static time_t
|
|
parse_rfc822_date (date_token *tokens, int *tzone)
|
|
{
|
|
int hour, min, sec, offset, n;
|
|
date_token *token;
|
|
struct tm tm;
|
|
time_t t;
|
|
|
|
g_return_val_if_fail (tokens != NULL, (time_t) 0);
|
|
|
|
token = tokens;
|
|
|
|
memset ((void *) &tm, 0, sizeof (struct tm));
|
|
|
|
if ((n = get_wday (token->start, token->len)) != -1) {
|
|
/* not all dates may have this... */
|
|
tm.tm_wday = n;
|
|
token = token->next;
|
|
}
|
|
|
|
/* get the mday */
|
|
if (!token || (n = get_mday (token->start, token->len)) == -1)
|
|
return (time_t) 0;
|
|
|
|
tm.tm_mday = n;
|
|
token = token->next;
|
|
|
|
/* get the month */
|
|
if (!token || (n = get_month (token->start, token->len)) == -1)
|
|
return (time_t) 0;
|
|
|
|
tm.tm_mon = n;
|
|
token = token->next;
|
|
|
|
/* get the year */
|
|
if (!token || (n = get_year (token->start, token->len)) == -1)
|
|
return (time_t) 0;
|
|
|
|
tm.tm_year = n - 1900;
|
|
token = token->next;
|
|
|
|
/* get the hour/min/sec */
|
|
if (!token || !get_time (token->start, token->len, &hour, &min, &sec))
|
|
return (time_t) 0;
|
|
|
|
tm.tm_hour = hour;
|
|
tm.tm_min = min;
|
|
tm.tm_sec = sec;
|
|
token = token->next;
|
|
|
|
/* get the timezone */
|
|
if (!token || (n = get_tzone (&token)) == -1) {
|
|
/* I guess we assume tz is GMT? */
|
|
offset = 0;
|
|
} else {
|
|
offset = n;
|
|
}
|
|
|
|
t = mktime_utc (&tm);
|
|
|
|
/* t is now GMT of the time we want, but not offset by the timezone ... */
|
|
|
|
/* this should convert the time to the GMT equiv time */
|
|
t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
|
|
|
|
if (tzone)
|
|
*tzone = offset;
|
|
|
|
return t;
|
|
}
|
|
|
|
|
|
#define date_token_mask(t) (((date_token *) t)->mask)
|
|
#define is_numeric(t) ((date_token_mask (t) & DATE_TOKEN_NON_NUMERIC) == 0)
|
|
#define is_weekday(t) ((date_token_mask (t) & DATE_TOKEN_NON_WEEKDAY) == 0)
|
|
#define is_month(t) ((date_token_mask (t) & DATE_TOKEN_NON_MONTH) == 0)
|
|
#define is_time(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIME) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_COLON))
|
|
#define is_tzone_alpha(t) ((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_ALPHA) == 0)
|
|
#define is_tzone_numeric(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_NUMERIC) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_SIGN))
|
|
#define is_tzone(t) (is_tzone_alpha (t) || is_tzone_numeric (t))
|
|
|
|
static time_t
|
|
parse_broken_date (date_token *tokens, int *tzone)
|
|
{
|
|
gboolean got_wday, got_month, got_tzone;
|
|
int hour, min, sec, offset, n;
|
|
date_token *token;
|
|
struct tm tm;
|
|
time_t t;
|
|
|
|
memset ((void *) &tm, 0, sizeof (struct tm));
|
|
got_wday = got_month = got_tzone = FALSE;
|
|
offset = 0;
|
|
|
|
token = tokens;
|
|
while (token) {
|
|
if (is_weekday (token) && !got_wday) {
|
|
if ((n = get_wday (token->start, token->len)) != -1) {
|
|
d(printf ("weekday; "));
|
|
got_wday = TRUE;
|
|
tm.tm_wday = n;
|
|
goto next;
|
|
}
|
|
}
|
|
|
|
if (is_month (token) && !got_month) {
|
|
if ((n = get_month (token->start, token->len)) != -1) {
|
|
d(printf ("month; "));
|
|
got_month = TRUE;
|
|
tm.tm_mon = n;
|
|
goto next;
|
|
}
|
|
}
|
|
|
|
if (is_time (token) && !tm.tm_hour && !tm.tm_min && !tm.tm_sec) {
|
|
if (get_time (token->start, token->len, &hour, &min, &sec)) {
|
|
d(printf ("time; "));
|
|
tm.tm_hour = hour;
|
|
tm.tm_min = min;
|
|
tm.tm_sec = sec;
|
|
goto next;
|
|
}
|
|
}
|
|
|
|
if (is_tzone (token) && !got_tzone) {
|
|
date_token *t = token;
|
|
|
|
if ((n = get_tzone (&t)) != -1) {
|
|
d(printf ("tzone; "));
|
|
got_tzone = TRUE;
|
|
offset = n;
|
|
goto next;
|
|
}
|
|
}
|
|
|
|
if (is_numeric (token)) {
|
|
if (token->len == 4 && !tm.tm_year) {
|
|
if ((n = get_year (token->start, token->len)) != -1) {
|
|
d(printf ("year; "));
|
|
tm.tm_year = n - 1900;
|
|
goto next;
|
|
}
|
|
} else {
|
|
/* Note: assumes MM-DD-YY ordering if '0 < MM < 12' holds true */
|
|
if (!got_month && token->next && is_numeric (token->next)) {
|
|
if ((n = decode_int (token->start, token->len)) > 12) {
|
|
goto mday;
|
|
} else if (n > 0) {
|
|
d(printf ("mon; "));
|
|
got_month = TRUE;
|
|
tm.tm_mon = n - 1;
|
|
}
|
|
goto next;
|
|
} else if (!tm.tm_mday && (n = get_mday (token->start, token->len)) != -1) {
|
|
mday:
|
|
d(printf ("mday; "));
|
|
tm.tm_mday = n;
|
|
goto next;
|
|
} else if (!tm.tm_year) {
|
|
if ((n = get_year (token->start, token->len)) != -1) {
|
|
d(printf ("2-digit year; "));
|
|
tm.tm_year = n - 1900;
|
|
}
|
|
goto next;
|
|
}
|
|
}
|
|
}
|
|
|
|
d(printf ("???; "));
|
|
|
|
next:
|
|
|
|
token = token->next;
|
|
}
|
|
|
|
d(printf ("\n"));
|
|
|
|
t = mktime_utc (&tm);
|
|
|
|
/* t is now GMT of the time we want, but not offset by the timezone ... */
|
|
|
|
/* this should convert the time to the GMT equiv time */
|
|
t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
|
|
|
|
if (tzone)
|
|
*tzone = offset;
|
|
|
|
return t;
|
|
}
|
|
|
|
#if 0
|
|
static void
|
|
gmime_datetok_table_init (void)
|
|
{
|
|
int i;
|
|
|
|
memset (gmime_datetok_table, 0, sizeof (gmime_datetok_table));
|
|
|
|
for (i = 0; i < 256; i++) {
|
|
if (!strchr (NUMERIC_CHARS, i))
|
|
gmime_datetok_table[i] |= DATE_TOKEN_NON_NUMERIC;
|
|
|
|
if (!strchr (WEEKDAY_CHARS, i))
|
|
gmime_datetok_table[i] |= DATE_TOKEN_NON_WEEKDAY;
|
|
|
|
if (!strchr (MONTH_CHARS, i))
|
|
gmime_datetok_table[i] |= DATE_TOKEN_NON_MONTH;
|
|
|
|
if (!strchr (TIME_CHARS, i))
|
|
gmime_datetok_table[i] |= DATE_TOKEN_NON_TIME;
|
|
|
|
if (!strchr (TIMEZONE_ALPHA_CHARS, i))
|
|
gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_ALPHA;
|
|
|
|
if (!strchr (TIMEZONE_NUMERIC_CHARS, i))
|
|
gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_NUMERIC;
|
|
|
|
if (((char) i) == ':')
|
|
gmime_datetok_table[i] |= DATE_TOKEN_HAS_COLON;
|
|
|
|
if (strchr ("+-", i))
|
|
gmime_datetok_table[i] |= DATE_TOKEN_HAS_SIGN;
|
|
}
|
|
|
|
printf ("static unsigned char gmime_datetok_table[256] = {");
|
|
for (i = 0; i < 256; i++) {
|
|
if (i % 16 == 0)
|
|
printf ("\n\t");
|
|
printf ("%3d,", gmime_datetok_table[i]);
|
|
}
|
|
printf ("\n};\n");
|
|
}
|
|
#endif
|
|
|
|
|
|
/**
|
|
* g_mime_utils_header_decode_date:
|
|
* @str: input date string
|
|
* @tz_offset: timezone offset
|
|
*
|
|
* Decodes the rfc822 date string and saves the GMT offset into
|
|
* @tz_offset if non-NULL.
|
|
*
|
|
* Returns: the time_t representation of the date string specified by
|
|
* @str or (time_t) %0 on error. If @tz_offset is non-NULL, the value
|
|
* of the timezone offset will be stored.
|
|
**/
|
|
time_t
|
|
g_mime_utils_header_decode_date (const char *str, int *tz_offset)
|
|
{
|
|
date_token *token, *tokens;
|
|
time_t date;
|
|
|
|
if (!(tokens = datetok (str))) {
|
|
if (tz_offset)
|
|
*tz_offset = 0;
|
|
|
|
return (time_t) 0;
|
|
}
|
|
|
|
if (!(date = parse_rfc822_date (tokens, tz_offset)))
|
|
date = parse_broken_date (tokens, tz_offset);
|
|
|
|
/* cleanup */
|
|
while (tokens) {
|
|
token = tokens;
|
|
tokens = tokens->next;
|
|
date_token_free (token);
|
|
}
|
|
|
|
return date;
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_utils_generate_message_id:
|
|
* @fqdn: Fully qualified domain name
|
|
*
|
|
* Generates a unique Message-Id.
|
|
*
|
|
* Returns: a unique string in an addr-spec format suitable for use as
|
|
* a Message-Id.
|
|
**/
|
|
char *
|
|
g_mime_utils_generate_message_id (const char *fqdn)
|
|
{
|
|
#ifdef G_THREADS_ENABLED
|
|
static GStaticMutex mutex = G_STATIC_MUTEX_INIT;
|
|
#define MUTEX_LOCK() g_static_mutex_lock (&mutex)
|
|
#define MUTEX_UNLOCK() g_static_mutex_unlock (&mutex)
|
|
#else
|
|
#define MUTEX_LOCK()
|
|
#define MUTEX_UNLOCK()
|
|
#endif
|
|
static unsigned long int count = 0;
|
|
const char *hostname = NULL;
|
|
char *name = NULL;
|
|
char *msgid;
|
|
|
|
if (!fqdn) {
|
|
#ifdef HAVE_UTSNAME_DOMAINNAME
|
|
struct utsname unam;
|
|
|
|
uname (&unam);
|
|
|
|
hostname = unam.nodename;
|
|
|
|
if (unam.domainname[0])
|
|
name = g_strdup_printf ("%s.%s", hostname, unam.domainname);
|
|
#else /* ! HAVE_UTSNAME_DOMAINNAME */
|
|
char host[MAXHOSTNAMELEN + 1];
|
|
|
|
#ifdef HAVE_GETHOSTNAME
|
|
host[MAXHOSTNAMELEN] = '\0';
|
|
if (gethostname (host, MAXHOSTNAMELEN) == 0) {
|
|
#ifdef HAVE_GETDOMAINNAME
|
|
size_t domainlen = MAXHOSTNAMELEN;
|
|
char *domain;
|
|
int rv;
|
|
|
|
domain = g_malloc (domainlen);
|
|
|
|
while ((rv = getdomainname (domain, domainlen)) == -1 && errno == EINVAL) {
|
|
domainlen += MAXHOSTNAMELEN;
|
|
domain = g_realloc (domain, domainlen);
|
|
}
|
|
|
|
if (rv == 0 && domain[0]) {
|
|
if (host[0]) {
|
|
name = g_strdup_printf ("%s.%s", host, domain);
|
|
g_free (domain);
|
|
} else {
|
|
name = domain;
|
|
}
|
|
}
|
|
#endif /* HAVE_GETDOMAINNAME */
|
|
} else {
|
|
host[0] = '\0';
|
|
}
|
|
#endif /* HAVE_GETHOSTNAME */
|
|
hostname = host;
|
|
#endif /* HAVE_UTSNAME_DOMAINNAME */
|
|
|
|
#ifdef HAVE_GETADDRINFO
|
|
if (!name && hostname[0]) {
|
|
/* we weren't able to get a domain name */
|
|
struct addrinfo hints, *res;
|
|
|
|
memset (&hints, 0, sizeof (hints));
|
|
hints.ai_flags = AI_CANONNAME;
|
|
|
|
if (getaddrinfo (hostname, NULL, &hints, &res) == 0) {
|
|
name = g_strdup (res->ai_canonname);
|
|
freeaddrinfo (res);
|
|
}
|
|
}
|
|
#endif /* HAVE_GETADDRINFO */
|
|
|
|
fqdn = name != NULL ? name : (hostname[0] ? hostname : "localhost.localdomain");
|
|
}
|
|
|
|
MUTEX_LOCK ();
|
|
msgid = g_strdup_printf ("%lu.%lu.%lu@%s", (unsigned long int) time (NULL),
|
|
(unsigned long int) getpid (), count++, fqdn);
|
|
MUTEX_UNLOCK ();
|
|
|
|
g_free (name);
|
|
|
|
return msgid;
|
|
}
|
|
|
|
static char *
|
|
decode_addrspec (const char **in)
|
|
{
|
|
const char *word, *inptr;
|
|
GString *addrspec;
|
|
char *str;
|
|
|
|
decode_lwsp (in);
|
|
inptr = *in;
|
|
|
|
if (!(word = decode_word (&inptr))) {
|
|
w(g_warning ("No local-part in addr-spec: %s", *in));
|
|
return NULL;
|
|
}
|
|
|
|
addrspec = g_string_new ("");
|
|
g_string_append_len (addrspec, word, (size_t) (inptr - word));
|
|
|
|
/* get the rest of the local-part */
|
|
decode_lwsp (&inptr);
|
|
while (*inptr == '.') {
|
|
g_string_append_c (addrspec, *inptr++);
|
|
if ((word = decode_word (&inptr))) {
|
|
g_string_append_len (addrspec, word, (size_t) (inptr - word));
|
|
decode_lwsp (&inptr);
|
|
} else {
|
|
w(g_warning ("Invalid local-part in addr-spec: %s", *in));
|
|
goto exception;
|
|
}
|
|
}
|
|
|
|
/* we should be at the '@' now... */
|
|
if (*inptr++ != '@') {
|
|
w(g_warning ("Invalid addr-spec; missing '@': %s", *in));
|
|
goto exception;
|
|
}
|
|
|
|
g_string_append_c (addrspec, '@');
|
|
if (!decode_domain (&inptr, addrspec)) {
|
|
w(g_warning ("No domain in addr-spec: %s", *in));
|
|
goto exception;
|
|
}
|
|
|
|
str = addrspec->str;
|
|
g_string_free (addrspec, FALSE);
|
|
|
|
*in = inptr;
|
|
|
|
return str;
|
|
|
|
exception:
|
|
|
|
g_string_free (addrspec, TRUE);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static char *
|
|
decode_msgid (const char **in)
|
|
{
|
|
const char *inptr = *in;
|
|
char *msgid = NULL;
|
|
|
|
decode_lwsp (&inptr);
|
|
if (*inptr != '<') {
|
|
w(g_warning ("Invalid msg-id; missing '<': %s", *in));
|
|
} else {
|
|
inptr++;
|
|
}
|
|
|
|
decode_lwsp (&inptr);
|
|
if ((msgid = decode_addrspec (&inptr))) {
|
|
decode_lwsp (&inptr);
|
|
if (*inptr != '>') {
|
|
w(g_warning ("Invalid msg-id; missing '>': %s", *in));
|
|
} else {
|
|
inptr++;
|
|
}
|
|
|
|
*in = inptr;
|
|
} else {
|
|
w(g_warning ("Invalid msg-id; missing addr-spec: %s", *in));
|
|
*in = inptr;
|
|
while (*inptr && *inptr != '>')
|
|
inptr++;
|
|
|
|
msgid = g_strndup (*in, (size_t) (inptr - *in));
|
|
*in = inptr;
|
|
}
|
|
|
|
return msgid;
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_utils_decode_message_id:
|
|
* @message_id: string containing a message-id
|
|
*
|
|
* Decodes a msg-id as defined by rfc822.
|
|
*
|
|
* Returns: the addr-spec portion of the msg-id.
|
|
**/
|
|
char *
|
|
g_mime_utils_decode_message_id (const char *message_id)
|
|
{
|
|
g_return_val_if_fail (message_id != NULL, NULL);
|
|
|
|
return decode_msgid (&message_id);
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_references_decode:
|
|
* @text: string containing a list of msg-ids
|
|
*
|
|
* Decodes a list of msg-ids as in the References and/or In-Reply-To
|
|
* headers defined in rfc822.
|
|
*
|
|
* Returns: a list of referenced msg-ids.
|
|
**/
|
|
GMimeReferences *
|
|
g_mime_references_decode (const char *text)
|
|
{
|
|
GMimeReferences *refs, *tail, *ref;
|
|
const char *word, *inptr = text;
|
|
char *msgid;
|
|
|
|
g_return_val_if_fail (text != NULL, NULL);
|
|
|
|
refs = NULL;
|
|
tail = (GMimeReferences *) &refs;
|
|
|
|
while (*inptr) {
|
|
decode_lwsp (&inptr);
|
|
if (*inptr == '<') {
|
|
/* looks like a msg-id */
|
|
if ((msgid = decode_msgid (&inptr))) {
|
|
ref = g_new (GMimeReferences, 1);
|
|
ref->next = NULL;
|
|
ref->msgid = msgid;
|
|
tail->next = ref;
|
|
tail = ref;
|
|
} else {
|
|
w(g_warning ("Invalid References header: %s", inptr));
|
|
break;
|
|
}
|
|
} else if (*inptr) {
|
|
/* looks like part of a phrase */
|
|
if (!(word = decode_word (&inptr))) {
|
|
w(g_warning ("Invalid References header: %s", inptr));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return refs;
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_references_append:
|
|
* @refs: the address of a #GMimeReferences list
|
|
* @msgid: a message-id string
|
|
*
|
|
* Appends a reference to msgid to the list of references.
|
|
**/
|
|
void
|
|
g_mime_references_append (GMimeReferences **refs, const char *msgid)
|
|
{
|
|
GMimeReferences *ref;
|
|
|
|
g_return_if_fail (refs != NULL);
|
|
g_return_if_fail (msgid != NULL);
|
|
|
|
ref = (GMimeReferences *) refs;
|
|
while (ref->next)
|
|
ref = ref->next;
|
|
|
|
ref->next = g_new (GMimeReferences, 1);
|
|
ref->next->msgid = g_strdup (msgid);
|
|
ref->next->next = NULL;
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_references_free:
|
|
* @refs: a #GMimeReferences list
|
|
*
|
|
* Frees the #GMimeReferences list.
|
|
**/
|
|
void
|
|
g_mime_references_free (GMimeReferences *refs)
|
|
{
|
|
GMimeReferences *ref, *next;
|
|
|
|
ref = refs;
|
|
while (ref) {
|
|
next = ref->next;
|
|
g_free (ref->msgid);
|
|
g_free (ref);
|
|
ref = next;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_references_clear:
|
|
* @refs: address of a #GMimeReferences list
|
|
*
|
|
* Clears the #GMimeReferences list and resets it to %NULL.
|
|
**/
|
|
void
|
|
g_mime_references_clear (GMimeReferences **refs)
|
|
{
|
|
g_return_if_fail (refs != NULL);
|
|
|
|
g_mime_references_free (*refs);
|
|
*refs = NULL;
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_references_get_next:
|
|
* @ref: a #GMimeReferences list
|
|
*
|
|
* Advances to the next reference node in the #GMimeReferences list.
|
|
*
|
|
* Returns: the next reference node in the #GMimeReferences list.
|
|
**/
|
|
const GMimeReferences *
|
|
g_mime_references_get_next (const GMimeReferences *ref)
|
|
{
|
|
return ref ? ref->next : NULL;
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_references_get_message_id:
|
|
* @ref: a #GMimeReferences list
|
|
*
|
|
* Gets the Message-Id reference from the #GMimeReferences node.
|
|
*
|
|
* Returns: the Message-Id reference from the #GMimeReferences node.
|
|
**/
|
|
const char *
|
|
g_mime_references_get_message_id (const GMimeReferences *ref)
|
|
{
|
|
return ref ? ref->msgid : NULL;
|
|
}
|
|
|
|
|
|
static gboolean
|
|
is_rfc2047_token (const char *inptr, size_t len)
|
|
{
|
|
if (len < 8 || strncmp (inptr, "=?", 2) != 0 || strncmp (inptr + len - 2, "?=", 2) != 0)
|
|
return FALSE;
|
|
|
|
inptr += 2;
|
|
len -= 2;
|
|
|
|
/* skip past the charset */
|
|
while (*inptr != '?' && len > 0) {
|
|
inptr++;
|
|
len--;
|
|
}
|
|
|
|
if (*inptr != '?' || len < 4)
|
|
return FALSE;
|
|
|
|
if (inptr[1] != 'q' && inptr[1] != 'Q' && inptr[1] != 'b' && inptr[1] != 'B')
|
|
return FALSE;
|
|
|
|
inptr += 2;
|
|
len -= 2;
|
|
|
|
if (*inptr != '?')
|
|
return FALSE;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static char *
|
|
header_fold (const char *in, gboolean structured)
|
|
{
|
|
gboolean last_was_lwsp = FALSE;
|
|
register const char *inptr;
|
|
size_t len, outlen, i;
|
|
size_t fieldlen;
|
|
GString *out;
|
|
char *ret;
|
|
|
|
inptr = in;
|
|
len = strlen (in);
|
|
if (len <= GMIME_FOLD_LEN + 1)
|
|
return g_strdup (in);
|
|
|
|
out = g_string_new ("");
|
|
fieldlen = strcspn (inptr, ": \t\n");
|
|
g_string_append_len (out, inptr, fieldlen);
|
|
outlen = fieldlen;
|
|
inptr += fieldlen;
|
|
|
|
while (*inptr && *inptr != '\n') {
|
|
len = strcspn (inptr, " \t\n");
|
|
|
|
if (len > 1 && outlen + len > GMIME_FOLD_LEN) {
|
|
if (outlen > 1 && out->len > fieldlen + 2) {
|
|
if (last_was_lwsp) {
|
|
if (structured)
|
|
out->str[out->len - 1] = '\t';
|
|
|
|
g_string_insert_c (out, out->len - 1, '\n');
|
|
} else
|
|
g_string_append (out, "\n\t");
|
|
outlen = 1;
|
|
}
|
|
|
|
if (!structured && !is_rfc2047_token (inptr, len)) {
|
|
/* check for very long words, just cut them up */
|
|
while (outlen + len > GMIME_FOLD_LEN) {
|
|
for (i = 0; i < GMIME_FOLD_LEN - outlen; i++)
|
|
g_string_append_c (out, inptr[i]);
|
|
inptr += GMIME_FOLD_LEN - outlen;
|
|
len -= GMIME_FOLD_LEN - outlen;
|
|
g_string_append (out, "\n\t");
|
|
outlen = 1;
|
|
}
|
|
} else {
|
|
g_string_append_len (out, inptr, len);
|
|
outlen += len;
|
|
inptr += len;
|
|
}
|
|
last_was_lwsp = FALSE;
|
|
} else if (len > 0) {
|
|
g_string_append_len (out, inptr, len);
|
|
outlen += len;
|
|
inptr += len;
|
|
last_was_lwsp = FALSE;
|
|
} else {
|
|
last_was_lwsp = TRUE;
|
|
if (*inptr == '\t') {
|
|
/* tabs are a good place to fold, odds
|
|
are that this is where the previous
|
|
mailer folded it */
|
|
g_string_append (out, "\n\t");
|
|
outlen = 1;
|
|
while (is_blank (*inptr))
|
|
inptr++;
|
|
} else {
|
|
g_string_append_c (out, *inptr++);
|
|
outlen++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (*inptr == '\n' && out->str[out->len - 1] != '\n')
|
|
g_string_append_c (out, '\n');
|
|
|
|
ret = out->str;
|
|
g_string_free (out, FALSE);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_utils_structured_header_fold:
|
|
* @str: input string
|
|
*
|
|
* Folds a structured header according to the rules in rfc822.
|
|
*
|
|
* Returns: an allocated string containing the folded header.
|
|
**/
|
|
char *
|
|
g_mime_utils_structured_header_fold (const char *str)
|
|
{
|
|
return header_fold (str, TRUE);
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_utils_unstructured_header_fold:
|
|
* @str: input string
|
|
*
|
|
* Folds an unstructured header according to the rules in rfc822.
|
|
*
|
|
* Returns: an allocated string containing the folded header.
|
|
**/
|
|
char *
|
|
g_mime_utils_unstructured_header_fold (const char *str)
|
|
{
|
|
return header_fold (str, FALSE);
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_utils_header_fold:
|
|
* @str: input string
|
|
*
|
|
* Folds a structured header according to the rules in rfc822.
|
|
*
|
|
* Returns: an allocated string containing the folded header.
|
|
**/
|
|
char *
|
|
g_mime_utils_header_fold (const char *str)
|
|
{
|
|
return header_fold (str, TRUE);
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_utils_header_printf:
|
|
* @format: string format
|
|
* @Varargs: arguments
|
|
*
|
|
* Allocates a buffer containing a formatted header specified by the
|
|
* @Varargs.
|
|
*
|
|
* Returns: an allocated string containing the folded header specified
|
|
* by @format and the following arguments.
|
|
**/
|
|
char *
|
|
g_mime_utils_header_printf (const char *format, ...)
|
|
{
|
|
char *buf, *ret;
|
|
va_list ap;
|
|
|
|
va_start (ap, format);
|
|
buf = g_strdup_vprintf (format, ap);
|
|
va_end (ap);
|
|
|
|
ret = header_fold (buf, TRUE);
|
|
g_free (buf);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static gboolean
|
|
need_quotes (const char *string)
|
|
{
|
|
gboolean quoted = FALSE;
|
|
const char *inptr;
|
|
|
|
inptr = string;
|
|
|
|
while (*inptr) {
|
|
if (*inptr == '\\')
|
|
inptr++;
|
|
else if (*inptr == '"')
|
|
quoted = !quoted;
|
|
else if (!quoted && (is_tspecial (*inptr) || *inptr == '.'))
|
|
return TRUE;
|
|
|
|
if (*inptr)
|
|
inptr++;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
/**
|
|
* g_mime_utils_quote_string:
|
|
* @str: input string
|
|
*
|
|
* Quotes @string as needed according to the rules in rfc2045.
|
|
*
|
|
* Returns: an allocated string containing the escaped and quoted (if
|
|
* needed to be) input string. The decision to quote the string is
|
|
* based on whether or not the input string contains any 'tspecials'
|
|
* as defined by rfc2045.
|
|
**/
|
|
char *
|
|
g_mime_utils_quote_string (const char *str)
|
|
{
|
|
gboolean quote;
|
|
const char *c;
|
|
char *qstring;
|
|
GString *out;
|
|
|
|
out = g_string_new ("");
|
|
|
|
if ((quote = need_quotes (str)))
|
|
g_string_append_c (out, '"');
|
|
|
|
for (c = str; *c; c++) {
|
|
if ((*c == '"' && quote) || *c == '\\')
|
|
g_string_append_c (out, '\\');
|
|
|
|
g_string_append_c (out, *c);
|
|
}
|
|
|
|
if (quote)
|
|
g_string_append_c (out, '"');
|
|
|
|
qstring = out->str;
|
|
g_string_free (out, FALSE);
|
|
|
|
return qstring;
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_utils_unquote_string:
|
|
* @str: input string
|
|
*
|
|
* Unquotes and unescapes a string.
|
|
**/
|
|
void
|
|
g_mime_utils_unquote_string (char *str)
|
|
{
|
|
/* if the string is quoted, unquote it */
|
|
register char *inptr = str;
|
|
int escaped = FALSE;
|
|
int quoted = FALSE;
|
|
|
|
if (!str)
|
|
return;
|
|
|
|
while (*inptr) {
|
|
if (*inptr == '\\') {
|
|
if (escaped)
|
|
*str++ = *inptr++;
|
|
else
|
|
inptr++;
|
|
escaped = !escaped;
|
|
} else if (*inptr == '"') {
|
|
if (escaped) {
|
|
*str++ = *inptr++;
|
|
escaped = FALSE;
|
|
} else {
|
|
quoted = !quoted;
|
|
inptr++;
|
|
}
|
|
} else {
|
|
*str++ = *inptr++;
|
|
escaped = FALSE;
|
|
}
|
|
}
|
|
|
|
*str = '\0';
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_utils_text_is_8bit:
|
|
* @text: text to check for 8bit chars
|
|
* @len: text length
|
|
*
|
|
* Determines if @text contains 8bit characters within the first @len
|
|
* bytes.
|
|
*
|
|
* Returns: %TRUE if the text contains 8bit characters or %FALSE
|
|
* otherwise.
|
|
**/
|
|
gboolean
|
|
g_mime_utils_text_is_8bit (const unsigned char *text, size_t len)
|
|
{
|
|
register const unsigned char *inptr;
|
|
const unsigned char *inend;
|
|
|
|
g_return_val_if_fail (text != NULL, FALSE);
|
|
|
|
inend = text + len;
|
|
for (inptr = text; *inptr && inptr < inend; inptr++)
|
|
if (*inptr > (unsigned char) 127)
|
|
return TRUE;
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_utils_best_encoding:
|
|
* @text: text to encode
|
|
* @len: text length
|
|
*
|
|
* Determines the best content encoding for the first @len bytes of
|
|
* @text.
|
|
*
|
|
* Returns: a #GMimeContentEncoding that is determined to be the best
|
|
* encoding type for the specified block of text. ("best" in this
|
|
* particular case means smallest output size)
|
|
**/
|
|
GMimeContentEncoding
|
|
g_mime_utils_best_encoding (const unsigned char *text, size_t len)
|
|
{
|
|
const unsigned char *ch, *inend;
|
|
size_t count = 0;
|
|
|
|
inend = text + len;
|
|
for (ch = text; ch < inend; ch++)
|
|
if (*ch > (unsigned char) 127)
|
|
count++;
|
|
|
|
if ((float) count <= len * 0.17)
|
|
return GMIME_CONTENT_ENCODING_QUOTEDPRINTABLE;
|
|
else
|
|
return GMIME_CONTENT_ENCODING_BASE64;
|
|
}
|
|
|
|
|
|
/**
|
|
* charset_convert:
|
|
* @cd: iconv converter
|
|
* @inbuf: input text buffer to convert
|
|
* @inleft: length of the input buffer
|
|
* @outp: pointer to output buffer
|
|
* @outlenp: pointer to output buffer length
|
|
* @ninval: the number of invalid bytes in @inbuf
|
|
*
|
|
* Converts the input buffer from one charset to another using the
|
|
* @cd. On completion, @outp will point to the output buffer
|
|
* containing the converted text (nul-terminated), @outlenp will be
|
|
* the size of the @outp buffer (note: not the strlen() of @outp) and
|
|
* @ninval will contain the number of bytes which could not be
|
|
* converted.
|
|
*
|
|
* Bytes which cannot be converted from @inbuf will appear as '?'
|
|
* characters in the output buffer.
|
|
*
|
|
* If *@outp is non-NULL, then it is assumed that it points to a
|
|
* pre-allocated buffer of length *@outlenp. This is done so that the
|
|
* same output buffer can be reused multiple times.
|
|
*
|
|
* Returns: the string length of the output buffer.
|
|
**/
|
|
static size_t
|
|
charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size_t *outlenp, size_t *ninval)
|
|
{
|
|
size_t outlen, outleft, rc, n = 0;
|
|
char *outbuf, *out;
|
|
|
|
if (*outp == NULL) {
|
|
outleft = outlen = (inleft * 2) + 16;
|
|
outbuf = out = g_malloc (outlen + 1);
|
|
} else {
|
|
outleft = outlen = *outlenp;
|
|
outbuf = out = *outp;
|
|
}
|
|
|
|
do {
|
|
rc = iconv (cd, (char **) &inbuf, &inleft, &outbuf, &outleft);
|
|
if (rc == (size_t) -1) {
|
|
if (errno == EINVAL) {
|
|
/* incomplete sequence at the end of the input buffer */
|
|
n += inleft;
|
|
break;
|
|
}
|
|
|
|
#ifdef G_OS_WIN32
|
|
/* seems that GnuWin32's libiconv 1.9 does not set errno in
|
|
* the E2BIG case, so we have to fake it */
|
|
if (outleft <= inleft)
|
|
errno = E2BIG;
|
|
#endif
|
|
|
|
if (errno == E2BIG) {
|
|
/* need to grow the output buffer */
|
|
outlen += (inleft * 2) + 16;
|
|
rc = (size_t) (outbuf - out);
|
|
out = g_realloc (out, outlen + 1);
|
|
outleft = outlen - rc;
|
|
outbuf = out + rc;
|
|
} else {
|
|
/* invalid byte(-sequence) in the input buffer */
|
|
*outbuf++ = '?';
|
|
outleft--;
|
|
inleft--;
|
|
inbuf++;
|
|
n++;
|
|
}
|
|
}
|
|
} while (inleft > 0);
|
|
|
|
iconv (cd, NULL, NULL, &outbuf, &outleft);
|
|
*outbuf++ = '\0';
|
|
|
|
*outlenp = outlen;
|
|
*outp = out;
|
|
*ninval = n;
|
|
|
|
return (outbuf - out);
|
|
}
|
|
|
|
|
|
#define USER_CHARSETS_INCLUDE_UTF8 (1 << 0)
|
|
#define USER_CHARSETS_INCLUDE_LOCALE (1 << 1)
|
|
|
|
|
|
/**
|
|
* g_mime_utils_decode_8bit:
|
|
* @text: input text in unknown 8bit/multibyte character set
|
|
* @len: input text length
|
|
*
|
|
* Attempts to convert text in an unknown 8bit/multibyte charset into
|
|
* UTF-8 by finding the charset which will convert the most bytes into
|
|
* valid UTF-8 characters as possible. If no exact match can be found,
|
|
* it will choose the best match and convert invalid byte sequences
|
|
* into question-marks (?) in the returned string buffer.
|
|
*
|
|
* Returns: a UTF-8 string representation of @text.
|
|
**/
|
|
char *
|
|
g_mime_utils_decode_8bit (const char *text, size_t len)
|
|
{
|
|
const char **charsets, **user_charsets, *locale, *best;
|
|
size_t outleft, outlen, min, ninval;
|
|
unsigned int included = 0;
|
|
iconv_t cd;
|
|
char *out;
|
|
int i = 0;
|
|
|
|
g_return_val_if_fail (text != NULL, NULL);
|
|
|
|
locale = g_mime_locale_charset ();
|
|
if (locale && !g_ascii_strcasecmp (locale, "UTF-8"))
|
|
included |= USER_CHARSETS_INCLUDE_LOCALE;
|
|
|
|
if ((user_charsets = g_mime_user_charsets ())) {
|
|
while (user_charsets[i])
|
|
i++;
|
|
}
|
|
|
|
charsets = g_alloca (sizeof (char *) * (i + 3));
|
|
i = 0;
|
|
|
|
if (user_charsets) {
|
|
while (user_charsets[i]) {
|
|
/* keep a record of whether or not the user-supplied
|
|
* charsets include UTF-8 and/or the default fallback
|
|
* charset so that we avoid doubling our efforts for
|
|
* these 2 charsets. We could have used a hash table
|
|
* to keep track of unique charsets, but we can
|
|
* (hopefully) assume that user_charsets is a unique
|
|
* list of charsets with no duplicates. */
|
|
if (!g_ascii_strcasecmp (user_charsets[i], "UTF-8"))
|
|
included |= USER_CHARSETS_INCLUDE_UTF8;
|
|
|
|
if (locale && !g_ascii_strcasecmp (user_charsets[i], locale))
|
|
included |= USER_CHARSETS_INCLUDE_LOCALE;
|
|
|
|
charsets[i] = user_charsets[i];
|
|
i++;
|
|
}
|
|
}
|
|
|
|
if (!(included & USER_CHARSETS_INCLUDE_UTF8))
|
|
charsets[i++] = "UTF-8";
|
|
|
|
if (!(included & USER_CHARSETS_INCLUDE_LOCALE))
|
|
charsets[i++] = locale;
|
|
|
|
charsets[i] = NULL;
|
|
|
|
min = len;
|
|
best = charsets[0];
|
|
|
|
outleft = (len * 2) + 16;
|
|
out = g_malloc (outleft + 1);
|
|
|
|
for (i = 0; charsets[i]; i++) {
|
|
if ((cd = g_mime_iconv_open ("UTF-8", charsets[i])) == (iconv_t) -1)
|
|
continue;
|
|
|
|
outlen = charset_convert (cd, text, len, &out, &outleft, &ninval);
|
|
|
|
g_mime_iconv_close (cd);
|
|
|
|
if (ninval == 0)
|
|
return g_realloc (out, outlen + 1);
|
|
|
|
if (ninval < min) {
|
|
best = charsets[i];
|
|
min = ninval;
|
|
}
|
|
}
|
|
|
|
/* if we get here, then none of the charsets fit the 8bit text flawlessly...
|
|
* try to find the one that fit the best and use that to convert what we can,
|
|
* replacing any byte we can't convert with a '?' */
|
|
|
|
if ((cd = g_mime_iconv_open ("UTF-8", best)) == (iconv_t) -1) {
|
|
/* this shouldn't happen... but if we are here, then
|
|
* it did... the only thing we can do at this point
|
|
* is replace the 8bit garbage and pray */
|
|
register const char *inptr = text;
|
|
const char *inend = inptr + len;
|
|
char *outbuf = out;
|
|
|
|
while (inptr < inend) {
|
|
if (is_ascii (*inptr))
|
|
*outbuf++ = *inptr++;
|
|
else
|
|
*outbuf++ = '?';
|
|
}
|
|
|
|
*outbuf++ = '\0';
|
|
|
|
return g_realloc (out, (size_t) (outbuf - out));
|
|
}
|
|
|
|
outlen = charset_convert (cd, text, len, &out, &outleft, &ninval);
|
|
|
|
g_mime_iconv_close (cd);
|
|
|
|
return g_realloc (out, outlen + 1);
|
|
}
|
|
|
|
|
|
/* this decodes rfc2047's version of quoted-printable */
|
|
static ssize_t
|
|
quoted_decode (const unsigned char *in, size_t len, unsigned char *out)
|
|
{
|
|
register const unsigned char *inptr;
|
|
register unsigned char *outptr;
|
|
const unsigned char *inend;
|
|
unsigned char c, c1;
|
|
|
|
inend = in + len;
|
|
outptr = out;
|
|
|
|
inptr = in;
|
|
while (inptr < inend) {
|
|
c = *inptr++;
|
|
if (c == '=') {
|
|
if (inend - inptr >= 2) {
|
|
c = toupper (*inptr++);
|
|
c1 = toupper (*inptr++);
|
|
*outptr++ = (((c >= 'A' ? c - 'A' + 10 : c - '0') & 0x0f) << 4)
|
|
| ((c1 >= 'A' ? c1 - 'A' + 10 : c1 - '0') & 0x0f);
|
|
} else {
|
|
/* data was truncated */
|
|
return -1;
|
|
}
|
|
} else if (c == '_') {
|
|
/* _'s are an rfc2047 shortcut for encoding spaces */
|
|
*outptr++ = ' ';
|
|
} else {
|
|
*outptr++ = c;
|
|
}
|
|
}
|
|
|
|
return (ssize_t) (outptr - out);
|
|
}
|
|
|
|
#define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2))
|
|
|
|
static char *
|
|
rfc2047_decode_word (const char *in, size_t inlen)
|
|
{
|
|
const unsigned char *instart = (const unsigned char *) in;
|
|
const register unsigned char *inptr = instart + 2;
|
|
const unsigned char *inend = instart + inlen - 2;
|
|
unsigned char *decoded;
|
|
const char *charset;
|
|
size_t len, ninval;
|
|
char *charenc, *p;
|
|
guint32 save = 0;
|
|
ssize_t declen;
|
|
int state = 0;
|
|
iconv_t cd;
|
|
char *buf;
|
|
|
|
/* skip over the charset */
|
|
if (!(inptr = memchr (inptr, '?', inend - inptr)) || inptr[2] != '?')
|
|
return NULL;
|
|
|
|
inptr++;
|
|
|
|
switch (*inptr) {
|
|
case 'B':
|
|
case 'b':
|
|
inptr += 2;
|
|
len = (size_t) (inend - inptr);
|
|
decoded = g_alloca (len);
|
|
declen = g_mime_encoding_base64_decode_step (inptr, len, decoded, &state, &save);
|
|
|
|
if (declen == -1) {
|
|
d(fprintf (stderr, "encountered broken 'Q' encoding\n"));
|
|
return NULL;
|
|
}
|
|
break;
|
|
case 'Q':
|
|
case 'q':
|
|
inptr += 2;
|
|
len = (size_t) (inend - inptr);
|
|
decoded = g_alloca (len);
|
|
declen = quoted_decode (inptr, len, decoded);
|
|
|
|
if (declen == -1) {
|
|
d(fprintf (stderr, "encountered broken 'Q' encoding\n"));
|
|
return NULL;
|
|
}
|
|
break;
|
|
default:
|
|
d(fprintf (stderr, "unknown encoding\n"));
|
|
return NULL;
|
|
}
|
|
|
|
len = (inptr - 3) - (instart + 2);
|
|
charenc = g_alloca (len + 1);
|
|
memcpy (charenc, in + 2, len);
|
|
charenc[len] = '\0';
|
|
charset = charenc;
|
|
|
|
/* rfc2231 updates rfc2047 encoded words...
|
|
* The ABNF given in RFC 2047 for encoded-words is:
|
|
* encoded-word := "=?" charset "?" encoding "?" encoded-text "?="
|
|
* This specification changes this ABNF to:
|
|
* encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?="
|
|
*/
|
|
|
|
/* trim off the 'language' part if it's there... */
|
|
if ((p = strchr (charset, '*')))
|
|
*p = '\0';
|
|
|
|
/* slight optimization? */
|
|
if (!g_ascii_strcasecmp (charset, "UTF-8")) {
|
|
p = (char *) decoded;
|
|
len = declen;
|
|
|
|
//while (!g_utf8_validate (p, len, (const char **) &p)) {
|
|
// len = declen - (p - (char *) decoded);
|
|
// *p = '?';
|
|
//}
|
|
|
|
return g_strndup ((char *) decoded, declen);
|
|
}
|
|
|
|
if (!charset[0] || (cd = g_mime_iconv_open ("UTF-8", charset)) == (iconv_t) -1) {
|
|
w(g_warning ("Cannot convert from %s to UTF-8, header display may "
|
|
"be corrupt: %s", charset[0] ? charset : "unspecified charset",
|
|
g_strerror (errno)));
|
|
|
|
return g_mime_utils_decode_8bit ((char *) decoded, declen);
|
|
}
|
|
|
|
len = declen;
|
|
buf = g_malloc (len + 1);
|
|
|
|
charset_convert (cd, (char *) decoded, declen, &buf, &len, &ninval);
|
|
|
|
g_mime_iconv_close (cd);
|
|
|
|
#if w(!)0
|
|
if (ninval > 0) {
|
|
g_warning ("Failed to completely convert \"%.*s\" to UTF-8, display may be "
|
|
"corrupt: %s", declen, decoded, g_strerror (errno));
|
|
}
|
|
#endif
|
|
|
|
return buf;
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_utils_header_decode_text:
|
|
* @text: header text to decode
|
|
*
|
|
* Decodes an rfc2047 encoded 'text' header.
|
|
*
|
|
* Note: See g_mime_set_user_charsets() for details on how charset
|
|
* conversion is handled for unencoded 8bit text and/or wrongly
|
|
* specified rfc2047 encoded-word tokens.
|
|
*
|
|
* Returns: a newly allocated UTF-8 string representing the the decoded
|
|
* header.
|
|
**/
|
|
char *
|
|
g_mime_utils_header_decode_text (const char *text)
|
|
{
|
|
gboolean enable_rfc2047_workarounds = _g_mime_enable_rfc2047_workarounds ();
|
|
register const char *inptr = text;
|
|
gboolean encoded = FALSE;
|
|
const char *lwsp, *word;
|
|
size_t nlwsp, n;
|
|
gboolean ascii;
|
|
char *decoded;
|
|
GString *out;
|
|
|
|
if (text == NULL)
|
|
return g_strdup ("");
|
|
|
|
out = g_string_sized_new (strlen (text) + 1);
|
|
|
|
while (*inptr != '\0') {
|
|
lwsp = inptr;
|
|
while (is_lwsp (*inptr))
|
|
inptr++;
|
|
|
|
nlwsp = (size_t) (inptr - lwsp);
|
|
|
|
if (*inptr != '\0') {
|
|
word = inptr;
|
|
ascii = TRUE;
|
|
|
|
if (enable_rfc2047_workarounds) {
|
|
if (!strncmp (inptr, "=?", 2)) {
|
|
inptr += 2;
|
|
|
|
/* skip past the charset (if one is even declared, sigh) */
|
|
while (*inptr && *inptr != '?') {
|
|
ascii = ascii && is_ascii (*inptr);
|
|
inptr++;
|
|
}
|
|
|
|
/* sanity check encoding type */
|
|
if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || inptr[2] != '?')
|
|
goto non_rfc2047;
|
|
|
|
inptr += 3;
|
|
|
|
/* find the end of the rfc2047 encoded word token */
|
|
while (*inptr && strncmp (inptr, "?=", 2) != 0) {
|
|
ascii = ascii && is_ascii (*inptr);
|
|
inptr++;
|
|
}
|
|
|
|
if (!strncmp (inptr, "?=", 2))
|
|
inptr += 2;
|
|
} else {
|
|
non_rfc2047:
|
|
/* stop if we encounter a possible rfc2047 encoded
|
|
* token even if it's inside another word, sigh. */
|
|
while (*inptr && !is_lwsp (*inptr) &&
|
|
strncmp (inptr, "=?", 2) != 0) {
|
|
ascii = ascii && is_ascii (*inptr);
|
|
inptr++;
|
|
}
|
|
}
|
|
} else {
|
|
while (*inptr && !is_lwsp (*inptr)) {
|
|
ascii = ascii && is_ascii (*inptr);
|
|
inptr++;
|
|
}
|
|
}
|
|
|
|
n = (size_t) (inptr - word);
|
|
if (is_rfc2047_encoded_word (word, n)) {
|
|
if ((decoded = rfc2047_decode_word (word, n))) {
|
|
/* rfc2047 states that you must ignore all
|
|
* whitespace between encoded words */
|
|
if (!encoded)
|
|
g_string_append_len (out, lwsp, nlwsp);
|
|
|
|
g_string_append (out, decoded);
|
|
g_free (decoded);
|
|
|
|
encoded = TRUE;
|
|
} else {
|
|
/* append lwsp and invalid rfc2047 encoded-word token */
|
|
g_string_append_len (out, lwsp, nlwsp + n);
|
|
encoded = FALSE;
|
|
}
|
|
} else {
|
|
/* append lwsp */
|
|
g_string_append_len (out, lwsp, nlwsp);
|
|
|
|
/* append word token */
|
|
if (!ascii) {
|
|
/* *sigh* I hate broken mailers... */
|
|
decoded = g_mime_utils_decode_8bit (word, n);
|
|
g_string_append (out, decoded);
|
|
g_free (decoded);
|
|
} else {
|
|
g_string_append_len (out, word, n);
|
|
}
|
|
|
|
encoded = FALSE;
|
|
}
|
|
} else {
|
|
/* appending trailing lwsp */
|
|
g_string_append_len (out, lwsp, nlwsp);
|
|
break;
|
|
}
|
|
}
|
|
|
|
decoded = out->str;
|
|
g_string_free (out, FALSE);
|
|
|
|
return decoded;
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_utils_header_decode_phrase:
|
|
* @phrase: header to decode
|
|
*
|
|
* Decodes an rfc2047 encoded 'phrase' header.
|
|
*
|
|
* Note: See g_mime_set_user_charsets() for details on how charset
|
|
* conversion is handled for unencoded 8bit text and/or wrongly
|
|
* specified rfc2047 encoded-word tokens.
|
|
*
|
|
* Returns: a newly allocated UTF-8 string representing the the decoded
|
|
* header.
|
|
**/
|
|
char *
|
|
g_mime_utils_header_decode_phrase (const char *phrase)
|
|
{
|
|
register const char *inptr = phrase;
|
|
gboolean encoded = FALSE;
|
|
const char *lwsp, *text;
|
|
size_t nlwsp, n;
|
|
gboolean ascii;
|
|
char *decoded;
|
|
GString *out;
|
|
|
|
if (phrase == NULL)
|
|
return g_strdup ("");
|
|
|
|
out = g_string_sized_new (strlen (phrase) + 1);
|
|
|
|
while (*inptr != '\0') {
|
|
lwsp = inptr;
|
|
while (is_lwsp (*inptr))
|
|
inptr++;
|
|
|
|
nlwsp = (size_t) (inptr - lwsp);
|
|
|
|
text = inptr;
|
|
if (is_atom (*inptr)) {
|
|
while (is_atom (*inptr))
|
|
inptr++;
|
|
|
|
n = (size_t) (inptr - text);
|
|
if (is_rfc2047_encoded_word (text, n)) {
|
|
if ((decoded = rfc2047_decode_word (text, n))) {
|
|
/* rfc2047 states that you must ignore all
|
|
* whitespace between encoded words */
|
|
if (!encoded)
|
|
g_string_append_len (out, lwsp, nlwsp);
|
|
|
|
g_string_append (out, decoded);
|
|
g_free (decoded);
|
|
|
|
encoded = TRUE;
|
|
} else {
|
|
/* append lwsp and invalid rfc2047 encoded-word token */
|
|
g_string_append_len (out, lwsp, nlwsp + n);
|
|
encoded = FALSE;
|
|
}
|
|
} else {
|
|
/* append lwsp and atom token */
|
|
g_string_append_len (out, lwsp, nlwsp + n);
|
|
encoded = FALSE;
|
|
}
|
|
} else {
|
|
g_string_append_len (out, lwsp, nlwsp);
|
|
|
|
ascii = TRUE;
|
|
while (*inptr && !is_lwsp (*inptr)) {
|
|
ascii = ascii && is_ascii (*inptr);
|
|
inptr++;
|
|
}
|
|
|
|
n = (size_t) (inptr - text);
|
|
|
|
if (!ascii) {
|
|
/* *sigh* I hate broken mailers... */
|
|
decoded = g_mime_utils_decode_8bit (text, n);
|
|
g_string_append (out, decoded);
|
|
g_free (decoded);
|
|
} else {
|
|
g_string_append_len (out, text, n);
|
|
}
|
|
|
|
encoded = FALSE;
|
|
}
|
|
}
|
|
|
|
decoded = out->str;
|
|
g_string_free (out, FALSE);
|
|
|
|
return decoded;
|
|
}
|
|
|
|
|
|
/* rfc2047 version of quoted-printable */
|
|
static size_t
|
|
quoted_encode (const char *in, size_t len, unsigned char *out, gushort safemask)
|
|
{
|
|
register const unsigned char *inptr = (const unsigned char *) in;
|
|
const unsigned char *inend = inptr + len;
|
|
register unsigned char *outptr = out;
|
|
unsigned char c;
|
|
|
|
while (inptr < inend) {
|
|
c = *inptr++;
|
|
if (c == ' ') {
|
|
*outptr++ = '_';
|
|
} else if (c != '_' && gmime_special_table[c] & safemask) {
|
|
*outptr++ = c;
|
|
} else {
|
|
*outptr++ = '=';
|
|
*outptr++ = tohex[(c >> 4) & 0xf];
|
|
*outptr++ = tohex[c & 0xf];
|
|
}
|
|
}
|
|
|
|
return (outptr - out);
|
|
}
|
|
|
|
static void
|
|
rfc2047_encode_word (GString *string, const char *word, size_t len,
|
|
const char *charset, gushort safemask)
|
|
{
|
|
register char *inptr, *outptr;
|
|
iconv_t cd = (iconv_t) -1;
|
|
unsigned char *encoded;
|
|
size_t enclen, pos;
|
|
char *uword = NULL;
|
|
guint32 save = 0;
|
|
int state = 0;
|
|
char encoding;
|
|
|
|
if (g_ascii_strcasecmp (charset, "UTF-8") != 0)
|
|
cd = g_mime_iconv_open (charset, "UTF-8");
|
|
|
|
if (cd != (iconv_t) -1) {
|
|
uword = g_mime_iconv_strndup (cd, (char *) word, len);
|
|
g_mime_iconv_close (cd);
|
|
}
|
|
|
|
if (uword) {
|
|
len = strlen (uword);
|
|
word = uword;
|
|
} else {
|
|
charset = "UTF-8";
|
|
}
|
|
|
|
switch (g_mime_utils_best_encoding ((const unsigned char *) word, len)) {
|
|
case GMIME_CONTENT_ENCODING_BASE64:
|
|
enclen = GMIME_BASE64_ENCODE_LEN (len);
|
|
encoded = g_alloca (enclen + 1);
|
|
|
|
encoding = 'b';
|
|
|
|
pos = g_mime_encoding_base64_encode_close ((const unsigned char *) word, len, encoded, &state, &save);
|
|
encoded[pos] = '\0';
|
|
|
|
/* remove \n chars as headers need to be wrapped differently */
|
|
if (G_UNLIKELY ((inptr = strchr ((char *) encoded, '\n')))) {
|
|
outptr = inptr++;
|
|
while (G_LIKELY (*inptr)) {
|
|
if (G_LIKELY (*inptr != '\n'))
|
|
*outptr++ = *inptr;
|
|
|
|
inptr++;
|
|
}
|
|
|
|
*outptr = '\0';
|
|
}
|
|
|
|
break;
|
|
case GMIME_CONTENT_ENCODING_QUOTEDPRINTABLE:
|
|
enclen = GMIME_QP_ENCODE_LEN (len);
|
|
encoded = g_alloca (enclen + 1);
|
|
|
|
encoding = 'q';
|
|
|
|
pos = quoted_encode (word, len, encoded, safemask);
|
|
encoded[pos] = '\0';
|
|
|
|
break;
|
|
default:
|
|
encoded = NULL;
|
|
encoding = '\0';
|
|
g_assert_not_reached ();
|
|
}
|
|
|
|
g_free (uword);
|
|
|
|
g_string_append_printf (string, "=?%s?%c?%s?=", charset, encoding, encoded);
|
|
}
|
|
|
|
|
|
typedef enum {
|
|
WORD_ATOM,
|
|
WORD_QSTRING,
|
|
WORD_2047
|
|
} rfc822_word_t;
|
|
|
|
typedef struct _rfc822_word {
|
|
struct _rfc822_word *next;
|
|
const char *start, *end;
|
|
rfc822_word_t type;
|
|
int encoding;
|
|
} rfc822_word;
|
|
|
|
#define rfc822_word_free(word) g_slice_free (rfc822_word, word)
|
|
#define rfc822_word_new() g_slice_new (rfc822_word)
|
|
|
|
/* okay, so 'unstructured text' fields don't actually contain 'word'
|
|
* tokens, but we can group stuff similarly... */
|
|
static rfc822_word *
|
|
rfc2047_encode_get_rfc822_words (const char *in, gboolean phrase)
|
|
{
|
|
rfc822_word *words, *tail, *word;
|
|
rfc822_word_t type = WORD_ATOM;
|
|
const char *inptr, *start, *last;
|
|
int count = 0, encoding = 0;
|
|
|
|
words = NULL;
|
|
tail = (rfc822_word *) &words;
|
|
|
|
last = start = inptr = in;
|
|
while (inptr && *inptr) {
|
|
const char *newinptr;
|
|
gunichar c;
|
|
|
|
newinptr = g_utf8_next_char (inptr);
|
|
c = g_utf8_get_char (inptr);
|
|
if (newinptr == NULL || !g_unichar_validate (c)) {
|
|
w(g_warning ("Invalid UTF-8 sequence encountered"));
|
|
inptr++;
|
|
continue;
|
|
}
|
|
|
|
inptr = newinptr;
|
|
|
|
if (c < 256 && is_lwsp (c)) {
|
|
if (count > 0) {
|
|
word = rfc822_word_new ();
|
|
word->next = NULL;
|
|
word->start = start;
|
|
word->end = last;
|
|
word->type = type;
|
|
word->encoding = encoding;
|
|
|
|
tail->next = word;
|
|
tail = word;
|
|
count = 0;
|
|
}
|
|
|
|
start = inptr;
|
|
type = WORD_ATOM;
|
|
encoding = 0;
|
|
} else {
|
|
count++;
|
|
if (phrase && c < 128) {
|
|
/* phrases can have qstring words */
|
|
if (!is_atom (c))
|
|
type = MAX (type, WORD_QSTRING);
|
|
} else if (c > 127 && c < 256) {
|
|
type = WORD_2047;
|
|
encoding = MAX (encoding, 1);
|
|
} else if (c >= 256) {
|
|
type = WORD_2047;
|
|
encoding = 2;
|
|
}
|
|
|
|
if (count >= GMIME_FOLD_PREENCODED) {
|
|
word = rfc822_word_new ();
|
|
word->next = NULL;
|
|
word->start = start;
|
|
word->end = inptr;
|
|
word->type = type;
|
|
word->encoding = encoding;
|
|
|
|
tail->next = word;
|
|
tail = word;
|
|
count = 0;
|
|
|
|
/* Note: don't reset 'type' as it
|
|
* needs to be preserved when breaking
|
|
* long words */
|
|
start = inptr;
|
|
encoding = 0;
|
|
}
|
|
}
|
|
|
|
last = inptr;
|
|
}
|
|
|
|
if (count > 0) {
|
|
word = rfc822_word_new ();
|
|
word->next = NULL;
|
|
word->start = start;
|
|
word->end = last;
|
|
word->type = type;
|
|
word->encoding = encoding;
|
|
|
|
tail->next = word;
|
|
tail = word;
|
|
}
|
|
|
|
#if d(!)0
|
|
printf ("rfc822 word tokens:\n");
|
|
word = words;
|
|
while (word) {
|
|
printf ("\t'%.*s'; type=%d, encoding=%d\n",
|
|
word->end - word->start, word->start,
|
|
word->type, word->encoding);
|
|
|
|
word = word->next;
|
|
}
|
|
#endif
|
|
|
|
return words;
|
|
}
|
|
|
|
#define MERGED_WORD_LT_FOLDLEN(wlen, type) ((type) == WORD_2047 ? (wlen) < GMIME_FOLD_PREENCODED : (wlen) < (GMIME_FOLD_LEN - 8))
|
|
|
|
static gboolean
|
|
should_merge_words (rfc822_word *word, rfc822_word *next)
|
|
{
|
|
switch (word->type) {
|
|
case WORD_ATOM:
|
|
if (next->type == WORD_2047)
|
|
return FALSE;
|
|
|
|
return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, next->type));
|
|
case WORD_QSTRING:
|
|
/* avoid merging with words that need to be rfc2047 encoded */
|
|
if (next->type == WORD_2047)
|
|
return FALSE;
|
|
|
|
return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, WORD_QSTRING));
|
|
case WORD_2047:
|
|
if (next->type == WORD_ATOM) {
|
|
/* whether we merge or not is dependent upon:
|
|
* 1. the number of atoms in a row after 'word'
|
|
* 2. if there is another encword after the string of atoms.
|
|
*/
|
|
int natoms = 0;
|
|
|
|
while (next && next->type == WORD_ATOM) {
|
|
next = next->next;
|
|
natoms++;
|
|
}
|
|
|
|
/* if all the words after the encword are atoms, don't merge */
|
|
if (!next || natoms > 3)
|
|
return FALSE;
|
|
}
|
|
|
|
/* avoid merging with qstrings */
|
|
if (next->type == WORD_QSTRING)
|
|
return FALSE;
|
|
|
|
return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, WORD_2047));
|
|
default:
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
static void
|
|
rfc2047_encode_merge_rfc822_words (rfc822_word **wordsp)
|
|
{
|
|
rfc822_word *word, *next, *words = *wordsp;
|
|
|
|
/* first pass: merge qstrings with adjacent qstrings and encwords with adjacent encwords */
|
|
word = words;
|
|
while (word && word->next) {
|
|
next = word->next;
|
|
|
|
if (word->type != WORD_ATOM && word->type == next->type &&
|
|
MERGED_WORD_LT_FOLDLEN (next->end - word->start, word->type)) {
|
|
/* merge the words */
|
|
word->encoding = MAX (word->encoding, next->encoding);
|
|
|
|
word->end = next->end;
|
|
word->next = next->next;
|
|
|
|
rfc822_word_free (next);
|
|
|
|
next = word;
|
|
}
|
|
|
|
word = next;
|
|
}
|
|
|
|
/* second pass: now merge atoms with the other words */
|
|
word = words;
|
|
while (word && word->next) {
|
|
next = word->next;
|
|
|
|
if (should_merge_words (word, next)) {
|
|
/* the resulting word type is the MAX of the 2 types */
|
|
word->type = MAX (word->type, next->type);
|
|
|
|
word->encoding = MAX (word->encoding, next->encoding);
|
|
|
|
word->end = next->end;
|
|
word->next = next->next;
|
|
|
|
rfc822_word_free (next);
|
|
|
|
continue;
|
|
}
|
|
|
|
word = next;
|
|
}
|
|
|
|
*wordsp = words;
|
|
}
|
|
|
|
static void
|
|
g_string_append_len_quoted (GString *out, const char *in, size_t len)
|
|
{
|
|
register const char *inptr;
|
|
const char *inend;
|
|
|
|
g_string_append_c (out, '"');
|
|
|
|
inptr = in;
|
|
inend = in + len;
|
|
|
|
while (inptr < inend) {
|
|
if (*inptr == '"' || *inptr == '\\')
|
|
g_string_append_c (out, '\\');
|
|
|
|
g_string_append_c (out, *inptr);
|
|
|
|
inptr++;
|
|
}
|
|
|
|
g_string_append_c (out, '"');
|
|
}
|
|
|
|
static char *
|
|
rfc2047_encode (const char *in, gushort safemask)
|
|
{
|
|
rfc822_word *words, *word, *prev = NULL;
|
|
const char **charsets, *charset;
|
|
const char *start;
|
|
GMimeCharset mask;
|
|
GString *out;
|
|
char *outstr;
|
|
size_t len;
|
|
int i;
|
|
|
|
if (!(words = rfc2047_encode_get_rfc822_words (in, safemask & IS_PSAFE)))
|
|
return g_strdup (in);
|
|
|
|
rfc2047_encode_merge_rfc822_words (&words);
|
|
|
|
charsets = g_mime_user_charsets ();
|
|
|
|
out = g_string_new ("");
|
|
|
|
/* output words now with spaces between them */
|
|
word = words;
|
|
while (word) {
|
|
/* append correct number of spaces between words */
|
|
if (prev && !(prev->type == WORD_2047 && word->type == WORD_2047)) {
|
|
/* one or both of the words are not encoded so we write the spaces out untouched */
|
|
len = word->start - prev->end;
|
|
g_string_append_len (out, prev->end, len);
|
|
}
|
|
|
|
switch (word->type) {
|
|
case WORD_ATOM:
|
|
g_string_append_len (out, word->start, (size_t) (word->end - word->start));
|
|
break;
|
|
case WORD_QSTRING:
|
|
g_assert (safemask & IS_PSAFE);
|
|
g_string_append_len_quoted (out, word->start, (size_t) (word->end - word->start));
|
|
break;
|
|
case WORD_2047:
|
|
if (prev && prev->type == WORD_2047) {
|
|
/* include the whitespace chars between these 2 words in the
|
|
resulting rfc2047 encoded word. */
|
|
len = word->end - prev->end;
|
|
start = prev->end;
|
|
|
|
/* encoded words need to be separated by linear whitespace */
|
|
g_string_append_c (out, ' ');
|
|
} else {
|
|
len = word->end - word->start;
|
|
start = word->start;
|
|
}
|
|
|
|
switch (word->encoding) {
|
|
case 0: /* us-ascii */
|
|
rfc2047_encode_word (out, start, len, "us-ascii", safemask);
|
|
break;
|
|
case 1: /* iso-8859-1 */
|
|
rfc2047_encode_word (out, start, len, "iso-8859-1", safemask);
|
|
break;
|
|
default:
|
|
charset = NULL;
|
|
g_mime_charset_init (&mask);
|
|
g_mime_charset_step (&mask, start, len);
|
|
|
|
for (i = 0; charsets && charsets[i]; i++) {
|
|
if (g_mime_charset_can_encode (&mask, charsets[i], start, len)) {
|
|
charset = charsets[i];
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!charset)
|
|
charset = g_mime_charset_best_name (&mask);
|
|
|
|
rfc2047_encode_word (out, start, len, charset, safemask);
|
|
break;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
rfc822_word_free (prev);
|
|
|
|
prev = word;
|
|
word = word->next;
|
|
}
|
|
|
|
rfc822_word_free (prev);
|
|
|
|
outstr = out->str;
|
|
g_string_free (out, FALSE);
|
|
|
|
return outstr;
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_utils_header_encode_phrase:
|
|
* @phrase: phrase to encode
|
|
*
|
|
* Encodes a 'phrase' header according to the rules in rfc2047.
|
|
*
|
|
* Returns: the encoded 'phrase'. Useful for encoding internet
|
|
* addresses.
|
|
**/
|
|
char *
|
|
g_mime_utils_header_encode_phrase (const char *phrase)
|
|
{
|
|
if (phrase == NULL)
|
|
return NULL;
|
|
|
|
return rfc2047_encode (phrase, IS_PSAFE);
|
|
}
|
|
|
|
|
|
/**
|
|
* g_mime_utils_header_encode_text:
|
|
* @text: text to encode
|
|
*
|
|
* Encodes a 'text' header according to the rules in rfc2047.
|
|
*
|
|
* Returns: the encoded header. Useful for encoding
|
|
* headers like "Subject".
|
|
**/
|
|
char *
|
|
g_mime_utils_header_encode_text (const char *text)
|
|
{
|
|
if (text == NULL)
|
|
return NULL;
|
|
|
|
return rfc2047_encode (text, IS_ESAFE);
|
|
}
|