diff --git a/database.cc b/database.cc index efc38762..70f80f9f 100644 --- a/database.cc +++ b/database.cc @@ -479,6 +479,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch, GPtrArray *parents, *thread_ids; const char *refs, *in_reply_to, *date, *header; + const char *from, *to, *subject; char *message_id; time_t time_value; @@ -487,10 +488,12 @@ notmuch_database_add_message (notmuch_database_t *notmuch, message = notmuch_message_open (filename); notmuch_message_restrict_headers (message, - "references", + "date", + "from", "in-reply-to", "message-id", - "date", + "references", + "subject", (char *) NULL); try { @@ -567,7 +570,19 @@ notmuch_database_add_message (notmuch_database_t *notmuch, doc.add_value (NOTMUCH_VALUE_DATE, Xapian::sortable_serialise (time_value)); - db->add_document (doc); + from = notmuch_message_get_header (message, "from"); + subject = notmuch_message_get_header (message, "subject"); + to = notmuch_message_get_header (message, "to"); + + if (from == NULL && + subject == NULL && + to == NULL) + { + notmuch_message_close (message); + return NOTMUCH_STATUS_FILE_NOT_EMAIL; + } else { + db->add_document (doc); + } } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred: %s.\n", error.get_msg().c_str()); diff --git a/message.c b/message.c index 03583c8d..1a5994ff 100644 --- a/message.c +++ b/message.c @@ -37,6 +37,8 @@ struct _notmuch_message { /* Header storage */ int restrict_headers; GHashTable *headers; + int broken_headers; + int good_headers; /* Parsing state */ char *line; @@ -234,12 +236,21 @@ notmuch_message_get_header (notmuch_message_t *message, colon = strchr (message->line, ':'); if (colon == NULL) { - fprintf (stderr, "Warning: Unexpected non-header line: %s\n", - message->line); + message->broken_headers++; + /* A simple heuristic for giving up on things that just + * don't look like mail messages. */ + if (message->broken_headers >= 10 && + message->good_headers < 5) + { + message->parsing_finished = 1; + continue; + } NEXT_HEADER_LINE (NULL); continue; } + message->good_headers++; + header = xstrndup (message->line, colon - message->line); if (message->restrict_headers && diff --git a/notmuch.c b/notmuch.c index 91ea3451..01000c2a 100644 --- a/notmuch.c +++ b/notmuch.c @@ -145,6 +145,7 @@ add_files (notmuch_database_t *notmuch, const char *path, int err; char *next; struct stat st; + notmuch_status_t status; dir = opendir (path); @@ -187,8 +188,13 @@ add_files (notmuch_database_t *notmuch, const char *path, stat (next, &st); if (S_ISREG (st.st_mode)) { - notmuch_database_add_message (notmuch, next); - state->count++; + status = notmuch_database_add_message (notmuch, next); + if (status == NOTMUCH_STATUS_FILE_NOT_EMAIL) { + fprintf (stderr, "Note: Ignoring non-mail file: %s\n", + next); + } else { + state->count++; + } if (state->count % 1000 == 0) add_files_print_progress (state); } else if (S_ISDIR (st.st_mode)) { @@ -293,9 +299,10 @@ setup_command (int argc, char *argv[]) printf ("Notmuch needs to know the top-level directory of your email archive,\n" "(where you already have mail stored and where messages will be delivered\n" "in the future). This directory can contain any number of sub-directories\n" - "but the only files it contains should be individual email messages.\n" - "Either maildir or mh format directories are fine, but you will want to\n" - "move away any auxiliary files maintained by other email programs.\n\n"); + "and primarily just files with indvidual email messages (eg. maildir or mh\n" + "archives are perfect). If there are other, non-email files (such as\n" + "indexes maintained by other email programs) then notmuch will do its\n" + "best to detect those and ignore them.\n\n"); printf ("Mail storage that uses mbox format, (where one mbox file contains many\n" "messages), will not work with notmuch. If that's how your mail is currently\n" diff --git a/notmuch.h b/notmuch.h index 873c88d2..e0b57db2 100644 --- a/notmuch.h +++ b/notmuch.h @@ -40,10 +40,14 @@ NOTMUCH_BEGIN_DECLS * NOTMUCH_STATUS_SUCCESS: No error occurred. * * NOTMUCH_STATUS_XAPIAN_EXCEPTION: A Xapian exception occurred + * + * NOTMUCH_STATUS_FILE_NOT_EMAIL: A file was presented that doesn't + * appear to be an email message. */ typedef enum _notmuch_status { NOTMUCH_STATUS_SUCCESS = 0, - NOTMUCH_STATUS_XAPIAN_EXCEPTION + NOTMUCH_STATUS_XAPIAN_EXCEPTION, + NOTMUCH_STATUS_FILE_NOT_EMAIL } notmuch_status_t; /* An opaque data structure representing a notmuch database. See @@ -116,7 +120,15 @@ notmuch_database_get_path (notmuch_database_t *database); * single mail message (not a multi-message mbox) that is expected to * remain at its current location, (since the notmuch database will * reference the filename, and will not copy the entire contents of - * the file. */ + * the file. + * + * Return value: + * + * NOTMUCH_STATUS_SUCCESS: Message successfully added to database. + * + * NOTMUCH_STATUS_FILE_NOT_EMAIL: the contents of filename don't look + * like an email message. Nothing added to the database. + */ notmuch_status_t notmuch_database_add_message (notmuch_database_t *database, const char *filename);