mirror of
https://git.notmuchmail.org/git/notmuch
synced 2024-12-22 17:34:54 +01:00
0e777a8f80
Since we're currently just trying to stitch together In-Reply-To and References headers we don't need that much sophistication. It's when we later add full-text searching that GMime will be useful. So for now, even though my own code here is surely very buggy compared to GMime it's also a lot faster. And speed is what we're after for the initial index creation.
300 lines
6.9 KiB
C
300 lines
6.9 KiB
C
/* message.c - Utility functions for parsing an email message for notmuch.
|
|
*
|
|
* Copyright © 2009 Carl Worth
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see http://www.gnu.org/licenses/ .
|
|
*
|
|
* Author: Carl Worth <cworth@cworth.org>
|
|
*/
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include "notmuch-private.h"
|
|
|
|
#include <glib.h>
|
|
|
|
struct _notmuch_message {
|
|
/* File objects */
|
|
int fd;
|
|
void *map;
|
|
|
|
/* Header storage */
|
|
int restrict_headers;
|
|
GHashTable *headers;
|
|
|
|
/* Parsing state */
|
|
char *start;
|
|
size_t size;
|
|
const char *next_line;
|
|
int parsing_started;
|
|
int parsing_finished;
|
|
};
|
|
|
|
static int
|
|
strcase_equal (const void *a, const void *b)
|
|
{
|
|
return strcasecmp (a, b) == 0;
|
|
}
|
|
|
|
static unsigned int
|
|
strcase_hash (const void *ptr)
|
|
{
|
|
const char *s = ptr;
|
|
|
|
/* This is the djb2 hash. */
|
|
unsigned int hash = 5381;
|
|
while (s && *s) {
|
|
hash = ((hash << 5) + hash) + tolower (*s);
|
|
s++;
|
|
}
|
|
|
|
return hash;
|
|
}
|
|
|
|
notmuch_message_t *
|
|
notmuch_message_open (const char *filename)
|
|
{
|
|
notmuch_message_t *message;
|
|
struct stat st;
|
|
|
|
message = xcalloc (1, sizeof (notmuch_message_t));
|
|
|
|
message->fd = open (filename, O_RDONLY);
|
|
if (message->fd < 0)
|
|
goto FAIL;
|
|
|
|
if (fstat (message->fd, &st) < 0)
|
|
goto FAIL;
|
|
|
|
message->map = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE,
|
|
message->fd, 0);
|
|
if (message->map == MAP_FAILED)
|
|
goto FAIL;
|
|
|
|
message->headers = g_hash_table_new_full (strcase_hash,
|
|
strcase_equal,
|
|
free,
|
|
free);
|
|
|
|
message->start = (char *) message->map;
|
|
message->size = st.st_size;
|
|
message->next_line = message->start;
|
|
message->parsing_started = 0;
|
|
message->parsing_finished = 0;
|
|
|
|
return message;
|
|
|
|
FAIL:
|
|
fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
|
|
notmuch_message_close (message);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
void
|
|
notmuch_message_close (notmuch_message_t *message)
|
|
{
|
|
if (message == NULL)
|
|
return;
|
|
|
|
if (message->headers)
|
|
g_hash_table_unref (message->headers);
|
|
|
|
if (message->map)
|
|
munmap (message->map, message->size);
|
|
if (message->fd)
|
|
close (message->fd);
|
|
|
|
free (message);
|
|
}
|
|
|
|
void
|
|
notmuch_message_restrict_headersv (notmuch_message_t *message,
|
|
va_list va_headers)
|
|
{
|
|
char *header;
|
|
|
|
if (message->parsing_started ) {
|
|
fprintf (stderr, "Error: notmuch_message_restrict_headers called after parsing has started\n");
|
|
exit (1);
|
|
}
|
|
|
|
while (1) {
|
|
header = va_arg (va_headers, char*);
|
|
if (header == NULL)
|
|
break;
|
|
g_hash_table_insert (message->headers,
|
|
xstrdup (header), NULL);
|
|
}
|
|
|
|
message->restrict_headers = 1;
|
|
}
|
|
|
|
void
|
|
notmuch_message_restrict_headers (notmuch_message_t *message, ...)
|
|
{
|
|
va_list va_headers;
|
|
|
|
va_start (va_headers, message);
|
|
|
|
notmuch_message_restrict_headersv (message, va_headers);
|
|
}
|
|
|
|
/* With our mmapped file, we don't get the benefit of terminated
|
|
* strings, so we can't use things like strchr(). We don't even know
|
|
* if there's a newline at the end of the file so we also have to be
|
|
* careful of that. Basically, every time we advance a pointer while
|
|
* parsing we must ensure we don't go beyond our buffer.
|
|
*/
|
|
#define WITHIN(s) (((s) - message->start) < (message->size -1))
|
|
|
|
/* In each of the macros below, "without overrunning the buffer" means
|
|
* that the macro will never dereference a character beyond the end of
|
|
* the buffer. However, all of the macros may return a pointer
|
|
* pointing to the first character beyond the buffer. So callers
|
|
* should test with WITHIN before dereferencing the result. */
|
|
|
|
/* Advance 'ptr' until pointing at a non-space character in the same
|
|
* line, (without overrunning the buffer) */
|
|
#define SKIP_SPACE_IN_LINE(ptr) \
|
|
while (WITHIN (ptr) && (*(ptr) == ' ' || *(ptr) == '\t')) \
|
|
(ptr)++;
|
|
|
|
/* Advance 'ptr' until pointing at a non-space character, (without
|
|
* overrunning the buffer) */
|
|
#define SKIP_SPACE(ptr) \
|
|
while (WITHIN (ptr) && isspace(*(ptr))) \
|
|
(ptr)++;
|
|
|
|
/* Advance 'ptr' to the first occurrence of 'c' within the same
|
|
* line, (without overrunning the buffer). */
|
|
#define ADVANCE_TO(ptr, c) \
|
|
while (WITHIN (ptr) && *(ptr) != '\n' && \
|
|
*(ptr) != (c)) \
|
|
{ \
|
|
(ptr)++; \
|
|
}
|
|
|
|
/* Advance 'ptr' to the beginning of the next line not starting with
|
|
* an initial tab character, (without overruning the buffer). */
|
|
#define ADVANCE_TO_NEXT_HEADER_LINE(ptr) \
|
|
do { \
|
|
ADVANCE_TO ((ptr), '\n'); \
|
|
if (WITHIN (ptr)) \
|
|
(ptr)++; \
|
|
} while (WITHIN (ptr) && \
|
|
(*(ptr) == '\t' || *(ptr) == ' '));
|
|
|
|
char *
|
|
copy_header_value (const char *start, const char *end)
|
|
{
|
|
const char *s;
|
|
char *result, *r;
|
|
int was_newline = 0;
|
|
|
|
result = xmalloc (end - start + 1);
|
|
|
|
s = start;
|
|
r = result;
|
|
|
|
while (s < end) {
|
|
if (*s == '\n') {
|
|
was_newline = 1;
|
|
} else {
|
|
if (*s == '\t' && was_newline)
|
|
*r = ' ';
|
|
else
|
|
*r = *s;
|
|
r++;
|
|
was_newline = 0;
|
|
}
|
|
s++;
|
|
}
|
|
|
|
*r = '\0';
|
|
|
|
return result;
|
|
}
|
|
|
|
const char *
|
|
notmuch_message_get_header (notmuch_message_t *message,
|
|
const char *header_desired)
|
|
{
|
|
int contains;
|
|
const char *s, *colon;
|
|
char *header, *value;
|
|
int match;
|
|
|
|
message->parsing_started = 1;
|
|
|
|
contains = g_hash_table_lookup_extended (message->headers,
|
|
header_desired, NULL,
|
|
(gpointer *) &value);
|
|
if (contains)
|
|
return value;
|
|
|
|
if (message->parsing_finished)
|
|
return NULL;
|
|
|
|
while (1) {
|
|
s = message->next_line;
|
|
|
|
if (*s == '\n') {
|
|
message->parsing_finished = 1;
|
|
return NULL;
|
|
}
|
|
|
|
if (*s == '\t') {
|
|
fprintf (stderr, "Warning: Unexpected continued value\n");
|
|
ADVANCE_TO_NEXT_HEADER_LINE (message->next_line);
|
|
continue;
|
|
}
|
|
|
|
colon = s;
|
|
ADVANCE_TO (colon, ':');
|
|
|
|
if (! WITHIN (colon) || *colon == '\n') {
|
|
fprintf (stderr, "Warning: Unexpected non-header line: %s\n", s);
|
|
ADVANCE_TO_NEXT_HEADER_LINE (message->next_line);
|
|
continue;
|
|
}
|
|
|
|
header = xstrndup (s, colon - s);
|
|
|
|
if (message->restrict_headers &&
|
|
! g_hash_table_lookup_extended (message->headers,
|
|
header, NULL, NULL))
|
|
{
|
|
free (header);
|
|
message->next_line = colon;
|
|
ADVANCE_TO_NEXT_HEADER_LINE (message->next_line);
|
|
continue;
|
|
}
|
|
|
|
s = colon + 1;
|
|
SKIP_SPACE_IN_LINE (s);
|
|
|
|
message->next_line = s;
|
|
ADVANCE_TO_NEXT_HEADER_LINE (message->next_line);
|
|
|
|
value = copy_header_value (s, message->next_line);
|
|
|
|
match = (strcasecmp (header, header_desired) == 0);
|
|
|
|
g_hash_table_insert (message->headers, header, value);
|
|
|
|
if (match)
|
|
return value;
|
|
}
|
|
}
|