diff --git a/Makefile b/Makefile index b4d77b4e..a1a7a15c 100644 --- a/Makefile +++ b/Makefile @@ -2,27 +2,30 @@ PROGS=notmuch WARN_FLAGS=-Wall -Wextra -Wmissing-declarations -Wwrite-strings -Wswitch-enum -CDEPENDS_FLAGS=`pkg-config --cflags glib-2.0 talloc` -CXXDEPENDS_FLAGS=`pkg-config --cflags glib-2.0 talloc` `xapian-config --cxxflags` +CDEPENDS_FLAGS=`pkg-config --cflags glib-2.0 gmime-2.4 talloc` +CXXDEPENDS_FLAGS=$(CDEPENDS_FLAGS) `xapian-config --cxxflags` MYCFLAGS=$(WARN_FLAGS) -O0 -g $(CDEPENDS_FLAGS) MYCXXFLAGS=$(WARN_FLAGS) -O0 -g $(CXXDEPENDS_FLAGS) -MYLDFLAGS=`pkg-config --libs glib-2.0 talloc` `xapian-config --libs` +MYLDFLAGS=`pkg-config --libs glib-2.0 gmime-2.4 talloc` `xapian-config --libs` -MODULES= \ - notmuch.o \ +LIBRARY= \ database.o \ date.o \ + index.o \ + libsha1.o \ message.o \ message-file.o \ query.o \ sha1.o \ tags.o \ thread.o \ - libsha1.o \ xutil.o +MAIN= \ + notmuch.o + all: $(PROGS) %.o: %.cc @@ -31,7 +34,7 @@ all: $(PROGS) %.o: %.c $(CC) -c $(CFLAGS) $(MYCFLAGS) $< -o $@ -notmuch: $(MODULES) +notmuch: $(MAIN) $(LIBRARY) $(CC) $(MYLDFLAGS) $^ -o $@ Makefile.dep: *.c *.cc diff --git a/database-private.h b/database-private.h index a5cca5a4..76e26ce0 100644 --- a/database-private.h +++ b/database-private.h @@ -29,6 +29,7 @@ struct _notmuch_database { char *path; Xapian::WritableDatabase *xapian_db; Xapian::QueryParser *query_parser; + Xapian::TermGenerator *term_gen; }; #endif diff --git a/database.cc b/database.cc index 71246eb4..583bee82 100644 --- a/database.cc +++ b/database.cc @@ -114,6 +114,13 @@ prefix_t BOOLEAN_PREFIX_EXTERNAL[] = { { "id", "Q" } }; +prefix_t PROBABILISTIC_PREFIX[]= { + { "from", "XFROM" }, + { "to", "XTO" }, + { "attachment", "XATTACHMENT" }, + { "subject", "XSUBJECT"} +}; + int _internal_error (const char *format, ...) { @@ -141,6 +148,10 @@ _find_prefix (const char *name) if (strcmp (name, BOOLEAN_PREFIX_EXTERNAL[i].name) == 0) return BOOLEAN_PREFIX_EXTERNAL[i].prefix; + for (i = 0; i < ARRAY_SIZE (PROBABILISTIC_PREFIX); i++) + if (strcmp (name, PROBABILISTIC_PREFIX[i].name) == 0) + return PROBABILISTIC_PREFIX[i].prefix; + INTERNAL_ERROR ("No prefix exists for '%s'\n", name); return ""; @@ -478,14 +489,24 @@ notmuch_database_open (const char *path) notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path, Xapian::DB_CREATE_OR_OPEN); notmuch->query_parser = new Xapian::QueryParser; + notmuch->term_gen = new Xapian::TermGenerator; + notmuch->term_gen->set_stemmer (Xapian::Stem ("english")); + notmuch->query_parser->set_default_op (Xapian::Query::OP_AND); notmuch->query_parser->set_database (*notmuch->xapian_db); + notmuch->query_parser->set_stemmer (Xapian::Stem ("english")); + notmuch->query_parser->set_stemming_strategy (Xapian::QueryParser::STEM_SOME); for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) { prefix_t *prefix = &BOOLEAN_PREFIX_EXTERNAL[i]; notmuch->query_parser->add_boolean_prefix (prefix->name, prefix->prefix); } + + for (i = 0; i < ARRAY_SIZE (PROBABILISTIC_PREFIX); i++) { + prefix_t *prefix = &PROBABILISTIC_PREFIX[i]; + notmuch->query_parser->add_prefix (prefix->name, prefix->prefix); + } } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred: %s\n", error.get_msg().c_str()); @@ -508,6 +529,7 @@ notmuch_database_close (notmuch_database_t *notmuch) { notmuch->xapian_db->flush (); + delete notmuch->term_gen; delete notmuch->query_parser; delete notmuch->xapian_db; talloc_free (notmuch); @@ -924,9 +946,11 @@ notmuch_database_add_message (notmuch_database_t *notmuch, { ret = NOTMUCH_STATUS_FILE_NOT_EMAIL; goto DONE; - } else { - _notmuch_message_sync (message); } + + _notmuch_message_index_file (message, filename); + + _notmuch_message_sync (message); } catch (const Xapian::Error &error) { fprintf (stderr, "A Xapian exception occurred: %s.\n", error.get_msg().c_str()); diff --git a/index.cc b/index.cc new file mode 100644 index 00000000..88634fc7 --- /dev/null +++ b/index.cc @@ -0,0 +1,260 @@ +/* + * Copyright © 2009 Carl Worth + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/ . + * + * Author: Carl Worth + */ + +#include "notmuch-private.h" + +#include + +#include + +/* We're finally down to a single (NAME + address) email "mailbox". */ +static void +_index_address_mailbox (notmuch_message_t *message, + const char *prefix_name, + InternetAddress *address) +{ + InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address); + const char *name, *addr = internet_address_mailbox_get_addr (mailbox); + int own_name = 0; + + if (addr) + _notmuch_message_gen_terms (message, prefix_name, addr); + + name = internet_address_get_name (address); + + /* In the absence of a name, we'll strip the part before the @ + * from the address. */ + if (! name) { + const char *at; + + at = strchr (addr, '@'); + if (at) { + name = strndup (addr, at - addr); + own_name = 1; + } + } + + if (name) + _notmuch_message_gen_terms (message, prefix_name, name); +} + +static void +_index_address_list (notmuch_message_t *message, + const char *prefix_name, + InternetAddressList *addresses); + +/* The outer loop over the InternetAddressList wasn't quite enough. + * There can actually be a tree here where a single member of the list + * is a "group" containing another list. Recurse please. + */ +static void +_index_address_group (notmuch_message_t *message, + const char *prefix_name, + InternetAddress *address) +{ + InternetAddressGroup *group; + InternetAddressList *list; + + group = INTERNET_ADDRESS_GROUP (address); + list = internet_address_group_get_members (group); + + if (! list) + return; + + _index_address_list (message, prefix_name, list); +} + +static void +_index_address_list (notmuch_message_t *message, + const char *prefix_name, + InternetAddressList *addresses) +{ + int i; + InternetAddress *address; + + if (addresses == NULL) + return; + + for (i = 0; i < internet_address_list_length (addresses); i++) { + address = internet_address_list_get_address (addresses, i); + if (INTERNET_ADDRESS_IS_MAILBOX (address)) { + _index_address_mailbox (message, prefix_name, address); + } else if (INTERNET_ADDRESS_IS_GROUP (address)) { + _index_address_group (message, prefix_name, address); + } else { + INTERNAL_ERROR ("GMime InternetAddress is neither a mailbox nor a group.\n"); + } + } +} + +static const char * +skip_re_in_subject (const char *subject) +{ + const char *s = subject; + + if (subject == NULL) + return NULL; + + while (*s) { + while (*s && isspace (*s)) + s++; + if (strncasecmp (s, "re:", 3) == 0) + s += 3; + else + break; + } + + return s; +} + +/* Callback to generate terms for each mime part of a message. */ +static void +_index_mime_part (notmuch_message_t *message, + GMimeObject *part) +{ + GMimeStream *stream; + GMimeDataWrapper *wrapper; + GByteArray *byte_array; + GMimeContentDisposition *disposition; + char *body; + + if (GMIME_IS_MULTIPART (part)) { + GMimeMultipart *multipart = GMIME_MULTIPART (part); + int i; + + for (i = 0; i < g_mime_multipart_get_count (multipart); i++) { + if (GMIME_IS_MULTIPART_SIGNED (multipart)) { + /* Don't index the signature. */ + if (i == 1) + continue; + if (i > 1) + fprintf (stderr, "Warning: Unexpected extra parts of mutlipart/signed. Indexing anyway.\n"); + } + _index_mime_part (message, + g_mime_multipart_get_part (multipart, i)); + } + return; + } + + if (GMIME_IS_MESSAGE_PART (part)) { + GMimeMessage *mime_message; + + mime_message = g_mime_message_part_get_message (GMIME_MESSAGE_PART (part)); + + _index_mime_part (message, g_mime_message_get_mime_part (mime_message)); + + return; + } + + if (! (GMIME_IS_PART (part))) { + fprintf (stderr, "Warning: Not indexing unknown mime part: %s.\n", + g_type_name (G_OBJECT_TYPE (part))); + return; + } + + disposition = g_mime_object_get_content_disposition (part); + if (disposition && + strcmp (disposition->disposition, GMIME_DISPOSITION_ATTACHMENT) == 0) + { + const char *filename = g_mime_part_get_filename (GMIME_PART (part)); + + _notmuch_message_add_term (message, "tag", "attachment"); + _notmuch_message_gen_terms (message, "attachment", filename); + + /* XXX: Would be nice to call out to something here to parse + * the attachment into text and then index that. */ + return; + } + + byte_array = g_byte_array_new (); + + stream = g_mime_stream_mem_new_with_byte_array (byte_array); + g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE); + wrapper = g_mime_part_get_content_object (GMIME_PART (part)); + if (wrapper) + g_mime_data_wrapper_write_to_stream (wrapper, stream); + + g_object_unref (stream); + + g_byte_array_append (byte_array, (guint8 *) "\0", 1); + body = (char *) g_byte_array_free (byte_array, FALSE); + + _notmuch_message_gen_terms (message, NULL, body); + + free (body); +} + +notmuch_status_t +_notmuch_message_index_file (notmuch_message_t *message, + const char *filename) +{ + GMimeStream *stream = NULL; + GMimeParser *parser = NULL; + GMimeMessage *mime_message = NULL; + InternetAddressList *addresses; + FILE *file = NULL; + const char *from, *subject; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + static int initialized = 0; + + if (! initialized) { + g_mime_init (0); + initialized = 1; + } + + file = fopen (filename, "r"); + if (! file) { + fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno)); + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; + } + + /* Evil GMime steals my FILE* here so I won't fclose it. */ + stream = g_mime_stream_file_new (file); + + parser = g_mime_parser_new_with_stream (stream); + + mime_message = g_mime_parser_construct_message (parser); + + from = g_mime_message_get_sender (mime_message); + addresses = internet_address_list_parse_string (from); + + _index_address_list (message, "from", addresses); + + addresses = g_mime_message_get_all_recipients (mime_message); + _index_address_list (message, "to", addresses); + + subject = g_mime_message_get_subject (mime_message); + subject = skip_re_in_subject (subject); + _notmuch_message_gen_terms (message, "subject", subject); + + _index_mime_part (message, g_mime_message_get_mime_part (mime_message)); + + DONE: + if (mime_message) + g_object_unref (mime_message); + + if (parser) + g_object_unref (parser); + + if (stream) + g_object_unref (stream); + + return ret; +} diff --git a/message.cc b/message.cc index 66747b5c..60ddf8a8 100644 --- a/message.cc +++ b/message.cc @@ -442,6 +442,32 @@ _notmuch_message_add_term (notmuch_message_t *message, return NOTMUCH_PRIVATE_STATUS_SUCCESS; } +/* Parse 'text' and add a term to 'message' for each parsed word. Each + * term will be added both prefixed (if prefix_name is not NULL) and + * also unprefixed). */ +notmuch_private_status_t +_notmuch_message_gen_terms (notmuch_message_t *message, + const char *prefix_name, + const char *text) +{ + Xapian::TermGenerator *term_gen = message->notmuch->term_gen; + + if (text == NULL) + return NOTMUCH_PRIVATE_STATUS_NULL_POINTER; + + term_gen->set_document (message->doc); + + if (prefix_name) { + const char *prefix = _find_prefix (prefix_name); + + term_gen->index_text (text, 1, prefix); + } + + term_gen->index_text (text); + + return NOTMUCH_PRIVATE_STATUS_SUCCESS; +} + /* Remove a name:value term from 'message', (the actual term will be * encoded by prefixing the value with a short prefix). See * NORMAL_PREFIX and BOOLEAN_PREFIX arrays for the mapping of term diff --git a/notmuch-private.h b/notmuch-private.h index c80f219a..440860ba 100644 --- a/notmuch-private.h +++ b/notmuch-private.h @@ -187,6 +187,11 @@ _notmuch_message_remove_term (notmuch_message_t *message, const char *prefix_name, const char *value); +notmuch_private_status_t +_notmuch_message_gen_terms (notmuch_message_t *message, + const char *prefix_name, + const char *text); + void _notmuch_message_set_filename (notmuch_message_t *message, const char *filename); @@ -205,6 +210,12 @@ _notmuch_message_set_date (notmuch_message_t *message, void _notmuch_message_sync (notmuch_message_t *message); +/* index.cc */ + +notmuch_status_t +_notmuch_message_index_file (notmuch_message_t *message, + const char *filename); + /* message-file.c */ /* XXX: I haven't decided yet whether these will actually get exported