Rework message parsing to use getline rather than mmap.

The line-based parsing can be a bit awkward when wanting to peek
ahead, (say, for folded header values), but it's so convenient
to be able to trust that a string terminator exists on every
line so it cleans up the code considerably.
This commit is contained in:
Carl Worth 2009-10-19 16:38:44 -07:00
parent 45f0d7bcab
commit 371091139a
2 changed files with 103 additions and 117 deletions

218
message.c
View file

@ -24,19 +24,25 @@
#include <glib.h> /* GHashTable */ #include <glib.h> /* GHashTable */
typedef struct {
char *str;
size_t size;
size_t len;
} header_value_closure_t;
struct _notmuch_message { struct _notmuch_message {
/* File objects */ /* File object */
int fd; FILE *file;
void *map;
/* Header storage */ /* Header storage */
int restrict_headers; int restrict_headers;
GHashTable *headers; GHashTable *headers;
/* Parsing state */ /* Parsing state */
char *start; char *line;
size_t size; size_t line_size;
const char *next_line; header_value_closure_t value;
int parsing_started; int parsing_started;
int parsing_finished; int parsing_finished;
}; };
@ -66,20 +72,11 @@ notmuch_message_t *
notmuch_message_open (const char *filename) notmuch_message_open (const char *filename)
{ {
notmuch_message_t *message; notmuch_message_t *message;
struct stat st;
message = xcalloc (1, sizeof (notmuch_message_t)); message = xcalloc (1, sizeof (notmuch_message_t));
message->fd = open (filename, O_RDONLY); message->file = fopen (filename, "r");
if (message->fd < 0) if (message->file == NULL)
goto FAIL;
if (fstat (message->fd, &st) < 0)
goto FAIL;
message->map = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE,
message->fd, 0);
if (message->map == MAP_FAILED)
goto FAIL; goto FAIL;
message->headers = g_hash_table_new_full (strcase_hash, message->headers = g_hash_table_new_full (strcase_hash,
@ -87,9 +84,6 @@ notmuch_message_open (const char *filename)
free, free,
free); free);
message->start = (char *) message->map;
message->size = st.st_size;
message->next_line = message->start;
message->parsing_started = 0; message->parsing_started = 0;
message->parsing_finished = 0; message->parsing_finished = 0;
@ -111,10 +105,8 @@ notmuch_message_close (notmuch_message_t *message)
if (message->headers) if (message->headers)
g_hash_table_unref (message->headers); g_hash_table_unref (message->headers);
if (message->map) if (message->file)
munmap (message->map, message->size); fclose (message->file);
if (message->fd)
close (message->fd);
free (message); free (message);
} }
@ -151,80 +143,44 @@ notmuch_message_restrict_headers (notmuch_message_t *message, ...)
notmuch_message_restrict_headersv (message, va_headers); notmuch_message_restrict_headersv (message, va_headers);
} }
/* With our mmapped file, we don't get the benefit of terminated void
* strings, so we can't use things like strchr(). We don't even know copy_header_unfolding (header_value_closure_t *value,
* if there's a newline at the end of the file so we also have to be const char *chunk)
* careful of that. Basically, every time we advance a pointer while
* parsing we must ensure we don't go beyond our buffer.
*/
#define WITHIN(s) (((s) - message->start) < (message->size -1))
/* In each of the macros below, "without overrunning the buffer" means
* that the macro will never dereference a character beyond the end of
* the buffer. However, all of the macros may return a pointer
* pointing to the first character beyond the buffer. So callers
* should test with WITHIN before dereferencing the result. */
/* Advance 'ptr' until pointing at a non-space character in the same
* line, (without overrunning the buffer) */
#define SKIP_SPACE_IN_LINE(ptr) \
while (WITHIN (ptr) && (*(ptr) == ' ' || *(ptr) == '\t')) \
(ptr)++;
/* Advance 'ptr' until pointing at a non-space character, (without
* overrunning the buffer) */
#define SKIP_SPACE(ptr) \
while (WITHIN (ptr) && isspace(*(ptr))) \
(ptr)++;
/* Advance 'ptr' to the first occurrence of 'c' within the same
* line, (without overrunning the buffer). */
#define ADVANCE_TO(ptr, c) \
while (WITHIN (ptr) && *(ptr) != '\n' && \
*(ptr) != (c)) \
{ \
(ptr)++; \
}
/* Advance 'ptr' to the beginning of the next line not starting with
* an initial tab character, (without overruning the buffer). */
#define ADVANCE_TO_NEXT_HEADER_LINE(ptr) \
do { \
ADVANCE_TO ((ptr), '\n'); \
if (WITHIN (ptr)) \
(ptr)++; \
} while (WITHIN (ptr) && \
(*(ptr) == '\t' || *(ptr) == ' '));
char *
copy_header_value (const char *start, const char *end)
{ {
const char *s; char *last;
char *result, *r;
int was_newline = 0;
result = xmalloc (end - start + 1); if (chunk == NULL)
return;
s = start; while (*chunk == ' ' || *chunk == '\t')
r = result; chunk++;
while (s < end) { if (value->len + 1 + strlen (chunk) + 1 > value->size) {
if (*s == '\n') { int new_size = value->size;
was_newline = 1; if (value->size == 0)
} else { new_size = strlen (chunk) + 1;
if (*s == '\t' && was_newline) else
*r = ' '; while (value->len + 1 + strlen (chunk) + 1 > new_size)
else new_size *= 2;
*r = *s; value->str = xrealloc (value->str, new_size);
r++; value->size = new_size;
was_newline = 0;
}
s++;
} }
*r = '\0'; last = value->str + value->len;
if (value->len) {
*last = ' ';
last++;
value->len++;
}
return result; strcpy (last, chunk);
value->len += strlen (chunk);
last = value->str + value->len - 1;
if (*last == '\n') {
*last = '\0';
value->len--;
}
} }
const char * const char *
@ -232,8 +188,8 @@ notmuch_message_get_header (notmuch_message_t *message,
const char *header_desired) const char *header_desired)
{ {
int contains; int contains;
const char *s, *colon;
char *header, *value; char *header, *value;
const char *s, *colon;
int match; int match;
message->parsing_started = 1; message->parsing_started = 1;
@ -247,54 +203,82 @@ notmuch_message_get_header (notmuch_message_t *message,
if (message->parsing_finished) if (message->parsing_finished)
return NULL; return NULL;
#define NEXT_HEADER_LINE(closure) \
do { \
ssize_t bytes_read = getline (&message->line, \
&message->line_size, \
message->file); \
if (bytes_read == -1) { \
message->parsing_finished = 1; \
break; \
} \
if (*message->line == '\n') { \
message->parsing_finished = 1; \
break; \
} \
if (closure && \
(*message->line == ' ' || *message->line == '\t')) \
{ \
copy_header_unfolding ((closure), message->line); \
} \
} while (*message->line == ' ' || *message->line == '\t');
if (message->line == NULL)
NEXT_HEADER_LINE (NULL);
while (1) { while (1) {
s = message->next_line;
if (*s == '\n') { if (message->parsing_finished)
message->parsing_finished = 1; break;
return NULL;
}
if (*s == '\t') { colon = strchr (message->line, ':');
fprintf (stderr, "Warning: Unexpected continued value\n");
ADVANCE_TO_NEXT_HEADER_LINE (message->next_line); if (colon == NULL) {
fprintf (stderr, "Warning: Unexpected non-header line: %s\n",
message->line);
NEXT_HEADER_LINE (NULL);
continue; continue;
} }
colon = s; header = xstrndup (message->line, colon - message->line);
ADVANCE_TO (colon, ':');
if (! WITHIN (colon) || *colon == '\n') {
fprintf (stderr, "Warning: Unexpected non-header line: %s\n", s);
ADVANCE_TO_NEXT_HEADER_LINE (message->next_line);
continue;
}
header = xstrndup (s, colon - s);
if (message->restrict_headers && if (message->restrict_headers &&
! g_hash_table_lookup_extended (message->headers, ! g_hash_table_lookup_extended (message->headers,
header, NULL, NULL)) header, NULL, NULL))
{ {
free (header); free (header);
message->next_line = colon; NEXT_HEADER_LINE (NULL);
ADVANCE_TO_NEXT_HEADER_LINE (message->next_line);
continue; continue;
} }
s = colon + 1; s = colon + 1;
SKIP_SPACE_IN_LINE (s); while (*s == ' ' || *s == '\t')
s++;
message->next_line = s; message->value.len = 0;
ADVANCE_TO_NEXT_HEADER_LINE (message->next_line); copy_header_unfolding (&message->value, s);
value = copy_header_value (s, message->next_line); NEXT_HEADER_LINE (&message->value);
match = (strcasecmp (header, header_desired) == 0); match = (strcasecmp (header, header_desired) == 0);
g_hash_table_insert (message->headers, header, value); g_hash_table_insert (message->headers, header,
xstrdup (message->value.str));
if (match) if (match)
return value; return value;
} }
if (message->line)
free (message->line);
message->line = NULL;
if (message->value.size) {
free (message->value.str);
message->value.str = NULL;
message->value.size = 0;
message->value.len = 0;
}
return NULL;
} }

View file

@ -23,6 +23,8 @@
#include "notmuch.h" #include "notmuch.h"
#define _GNU_SOURCE /* For getline */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <stdarg.h> #include <stdarg.h>