notmuch/lib/thread.cc
Dirk Hohndel cd19671f51 Simple attempt to display author names in a friendlier way
This patch only addresses the typical Outlook/Exchange case
where we have "Last, First" <first.last@company.com> or
"Last, First MI" <first.mi.last@company.com>.

In the future we should be more fexible as to the formats
we recognize, but for now we address this one as it is the
Exchange default setting and therefore the most common one.

Signed-off-by: Dirk Hohndel <hohndel@infradead.org>
2010-04-26 11:45:29 -07:00

575 lines
17 KiB
C++

/* thread.cc - Results of thread-based searches from a notmuch database
*
* Copyright © 2009 Carl Worth
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/ .
*
* Author: Carl Worth <cworth@cworth.org>
*/
#include "notmuch-private.h"
#include "database-private.h"
#include <xapian.h>
#include <gmime/gmime.h>
#include <glib.h> /* GHashTable */
struct _notmuch_thread {
notmuch_database_t *notmuch;
char *thread_id;
char *subject;
GHashTable *authors_hash;
char *authors;
char *nonmatched_authors;
GHashTable *tags;
notmuch_message_list_t *message_list;
GHashTable *message_hash;
int total_messages;
int matched_messages;
time_t oldest;
time_t newest;
};
static int
_notmuch_thread_destructor (notmuch_thread_t *thread)
{
g_hash_table_unref (thread->authors_hash);
g_hash_table_unref (thread->tags);
g_hash_table_unref (thread->message_hash);
return 0;
}
static void
_thread_add_author (notmuch_thread_t *thread,
const char *author)
{
if (author == NULL)
return;
if (g_hash_table_lookup_extended (thread->authors_hash,
author, NULL, NULL))
return;
g_hash_table_insert (thread->authors_hash, xstrdup (author), NULL);
if (thread->authors)
thread->authors = talloc_asprintf (thread, "%s, %s",
thread->authors,
author);
else
thread->authors = talloc_strdup (thread, author);
}
/*
* move authors of matched messages in the thread to
* the front of the authors list, but keep them in
* existing order within their group
*/
static void
_thread_move_matched_author (notmuch_thread_t *thread,
const char *author)
{
char *authors_copy;
char *current_author;
char *last_pipe,*next_pipe;
int idx,nm_start,author_len,authors_len;
if (thread->authors == NULL || author == NULL)
return;
if (thread->nonmatched_authors == NULL)
thread->nonmatched_authors = thread->authors;
author_len = strlen(author);
authors_len = strlen(thread->authors);
if (author_len == authors_len) {
/* just one author */
thread->nonmatched_authors += author_len;
return;
}
current_author = strcasestr(thread->authors, author);
if (current_author == NULL)
return;
/* how far inside the nonmatched authors is our author? */
idx = current_author - thread->nonmatched_authors;
if (idx < 0) {
/* already among matched authors */
return;
}
/* are there any authors in the list after our author? */
if (thread->nonmatched_authors + author_len < thread->authors + authors_len) {
/* we have to make changes, so let's get a temp copy */
authors_copy = talloc_strdup(thread,thread->authors);
/* nm_start is the offset into where the non-matched authors start */
nm_start = thread->nonmatched_authors - thread->authors;
/* copy this author and add the "| " - the if clause above tells us there's more */
strncpy(thread->nonmatched_authors,author,author_len);
strncpy(thread->nonmatched_authors+author_len,"| ",2);
thread->nonmatched_authors += author_len+2;
if (idx > 0) {
/* we are actually moving authors around, not just changing the separator
* first copy the authors that came BEFORE our author */
strncpy(thread->nonmatched_authors, authors_copy+nm_start, idx-2);
/* finally, if there are authors AFTER our author, copy those */
if(author_len+nm_start+idx < authors_len) {
strncpy(thread->nonmatched_authors + idx - 2,", ",2);
strncpy(thread->nonmatched_authors + idx, authors_copy+nm_start + idx + author_len + 2,
authors_len - (nm_start + idx + author_len + 2));
}
}
/* finally let's make sure there's just one '|' in the authors string */
last_pipe = strchr(thread->authors,'|');
while (last_pipe) {
next_pipe = strchr(last_pipe+1,'|');
if (next_pipe)
*last_pipe = ',';
last_pipe = next_pipe;
}
} else {
thread->nonmatched_authors += author_len;
/* so now all authors are matched - let's remove the '|' */
last_pipe = strchr(thread->authors,'|');
if (last_pipe)
*last_pipe = ',';
}
return;
}
/* clean up the uggly "Lastname, Firstname" format that some mail systems
* (most notably, Exchange) are creating to be "Firstname Lastname"
* To make sure that we don't change other potential situations where a
* comma is in the name, we check that we match one of these patterns
* "Last, First" <first.last@company.com>
* "Last, First MI" <first.mi.last@company.com>
*/
char *
_thread_cleanup_author (notmuch_thread_t *thread,
const char *author, const char *from)
{
char *clean_author,*test_author;
const char *comma;
char *blank;
int fname,lname;
clean_author = talloc_strdup(thread, author);
if (clean_author == NULL)
return NULL;
comma = strchr(author,',');
if (comma) {
/* let's assemble what we think is the correct name */
lname = comma - author;
fname = strlen(author) - lname - 2;
strncpy(clean_author, comma + 2, fname);
*(clean_author+fname) = ' ';
strncpy(clean_author + fname + 1, author, lname);
*(clean_author+fname+1+lname) = '\0';
/* make a temporary copy and see if it matches the email */
test_author = talloc_strdup(thread,clean_author);
blank=strchr(test_author,' ');
while (blank != NULL) {
*blank = '.';
blank=strchr(test_author,' ');
}
if (strcasestr(from, test_author) == NULL)
/* we didn't identify this as part of the email address
* so let's punt and return the original author */
strcpy (clean_author, author);
}
return clean_author;
}
/* Add 'message' as a message that belongs to 'thread'.
*
* The 'thread' will talloc_steal the 'message' and hold onto a
* reference to it.
*/
static void
_thread_add_message (notmuch_thread_t *thread,
notmuch_message_t *message)
{
notmuch_tags_t *tags;
const char *tag;
InternetAddressList *list = NULL;
InternetAddress *address;
const char *from, *author;
char *clean_author;
_notmuch_message_list_add_message (thread->message_list,
talloc_steal (thread, message));
thread->total_messages++;
g_hash_table_insert (thread->message_hash,
xstrdup (notmuch_message_get_message_id (message)),
message);
from = notmuch_message_get_header (message, "from");
if (from)
list = internet_address_list_parse_string (from);
if (list) {
address = internet_address_list_get_address (list, 0);
if (address) {
author = internet_address_get_name (address);
if (author == NULL) {
InternetAddressMailbox *mailbox;
mailbox = INTERNET_ADDRESS_MAILBOX (address);
author = internet_address_mailbox_get_addr (mailbox);
}
clean_author = _thread_cleanup_author (thread, author, from);
_thread_add_author (thread, clean_author);
notmuch_message_set_author (message, clean_author);
}
g_object_unref (G_OBJECT (list));
}
if (! thread->subject) {
const char *subject;
subject = notmuch_message_get_header (message, "subject");
thread->subject = talloc_strdup (thread, subject ? subject : "");
}
for (tags = notmuch_message_get_tags (message);
notmuch_tags_valid (tags);
notmuch_tags_move_to_next (tags))
{
tag = notmuch_tags_get (tags);
g_hash_table_insert (thread->tags, xstrdup (tag), NULL);
}
}
static void
_thread_set_subject_from_message (notmuch_thread_t *thread,
notmuch_message_t *message)
{
const char *subject;
const char *cleaned_subject;
subject = notmuch_message_get_header (message, "subject");
if (! subject)
return;
if ((strncasecmp (subject, "Re: ", 4) == 0) ||
(strncasecmp (subject, "Aw: ", 4) == 0) ||
(strncasecmp (subject, "Vs: ", 4) == 0) ||
(strncasecmp (subject, "Sv: ", 4) == 0)) {
cleaned_subject = talloc_strndup (thread,
subject + 4,
strlen(subject) - 4);
} else {
cleaned_subject = talloc_strdup (thread, subject);
}
if (thread->subject)
talloc_free (thread->subject);
thread->subject = talloc_strdup (thread, cleaned_subject);
}
static void
_thread_add_matched_message (notmuch_thread_t *thread,
notmuch_message_t *message,
notmuch_sort_t sort)
{
time_t date;
notmuch_message_t *hashed_message;
date = notmuch_message_get_date (message);
if (date < thread->oldest || ! thread->matched_messages)
thread->oldest = date;
if (date > thread->newest || ! thread->matched_messages)
thread->newest = date;
thread->matched_messages++;
if (g_hash_table_lookup_extended (thread->message_hash,
notmuch_message_get_message_id (message), NULL,
(void **) &hashed_message)) {
notmuch_message_set_flag (hashed_message,
NOTMUCH_MESSAGE_FLAG_MATCH, 1);
}
_thread_move_matched_author (thread,notmuch_message_get_author(hashed_message));
if ((sort == NOTMUCH_SORT_OLDEST_FIRST && date <= thread->newest) ||
(sort != NOTMUCH_SORT_OLDEST_FIRST && date == thread->newest))
{
_thread_set_subject_from_message (thread, message);
}
}
static void
_resolve_thread_relationships (unused (notmuch_thread_t *thread))
{
notmuch_message_node_t **prev, *node;
notmuch_message_t *message, *parent;
const char *in_reply_to;
prev = &thread->message_list->head;
while ((node = *prev)) {
message = node->message;
in_reply_to = _notmuch_message_get_in_reply_to (message);
if (in_reply_to && strlen (in_reply_to) &&
g_hash_table_lookup_extended (thread->message_hash,
in_reply_to, NULL,
(void **) &parent))
{
*prev = node->next;
if (thread->message_list->tail == &node->next)
thread->message_list->tail = prev;
node->next = NULL;
_notmuch_message_add_reply (parent, node);
} else {
prev = &((*prev)->next);
}
}
/* XXX: After scanning through the entire list looking for parents
* via "In-Reply-To", we should do a second pass that looks at the
* list of messages IDs in the "References" header instead. (And
* for this the parent would be the "deepest" message of all the
* messages found in the "References" list.)
*
* Doing this will allow messages and sub-threads to be positioned
* correctly in the thread even when an intermediate message is
* missing from the thread.
*/
}
/* Create a new notmuch_thread_t object for the given thread ID,
* treating any messages matching 'query_string' as "matched".
*
* Creating the thread will trigger two database searches. The first
* is for all messages belonging to the thread, (to get the first
* subject line, the total count of messages, and all authors). The
* second search is for all messages that are in the thread and that
* also match the given query_string. This is to allow for a separate
* count of matched messages, and to allow a viewer to display these
* messages differently.
*
* Here, 'ctx' is talloc context for the resulting thread object.
*
* This function returns NULL in the case of any error.
*/
notmuch_thread_t *
_notmuch_thread_create (void *ctx,
notmuch_database_t *notmuch,
const char *thread_id,
const char *query_string,
notmuch_sort_t sort)
{
notmuch_thread_t *thread;
const char *thread_id_query_string;
notmuch_query_t *thread_id_query;
notmuch_messages_t *messages;
notmuch_message_t *message;
notmuch_bool_t matched_is_subset_of_thread;
thread_id_query_string = talloc_asprintf (ctx, "thread:%s", thread_id);
if (unlikely (query_string == NULL))
return NULL;
/* Under normal circumstances we need to do two database
* queries. One is for the thread itself (thread_id_query_string)
* and the second is to determine which messages in that thread
* match the original query (matched_query_string).
*
* But under two circumstances, we use only the
* thread_id_query_string:
*
* 1. If the original query_string *is* just the thread
* specification.
*
* 2. If the original query_string matches all messages ("" or
* "*").
*
* In either of these cases, we can be more efficient by running
* just the thread_id query (since we know all messages in the
* thread will match the query_string).
*
* Beyond the performance advantage, in the second case, it's
* important to not try to create a concatenated query because our
* parser handles "" and "*" as special cases and will not do the
* right thing with a query string of "* and thread:<foo>".
**/
matched_is_subset_of_thread = 1;
if (strcmp (query_string, thread_id_query_string) == 0 ||
strcmp (query_string, "") == 0 ||
strcmp (query_string, "*") == 0)
{
matched_is_subset_of_thread = 0;
}
thread_id_query = notmuch_query_create (notmuch, thread_id_query_string);
if (unlikely (thread_id_query == NULL))
return NULL;
thread = talloc (ctx, notmuch_thread_t);
if (unlikely (thread == NULL))
return NULL;
talloc_set_destructor (thread, _notmuch_thread_destructor);
thread->notmuch = notmuch;
thread->thread_id = talloc_strdup (thread, thread_id);
thread->subject = NULL;
thread->authors_hash = g_hash_table_new_full (g_str_hash, g_str_equal,
free, NULL);
thread->authors = NULL;
thread->nonmatched_authors = NULL;
thread->tags = g_hash_table_new_full (g_str_hash, g_str_equal,
free, NULL);
thread->message_list = _notmuch_message_list_create (thread);
if (unlikely (thread->message_list == NULL))
return NULL;
thread->message_hash = g_hash_table_new_full (g_str_hash, g_str_equal,
free, NULL);
thread->total_messages = 0;
thread->matched_messages = 0;
thread->oldest = 0;
thread->newest = 0;
notmuch_query_set_sort (thread_id_query, NOTMUCH_SORT_OLDEST_FIRST);
for (messages = notmuch_query_search_messages (thread_id_query);
notmuch_messages_valid (messages);
notmuch_messages_move_to_next (messages))
{
message = notmuch_messages_get (messages);
_thread_add_message (thread, message);
if (! matched_is_subset_of_thread)
_thread_add_matched_message (thread, message, sort);
_notmuch_message_close (message);
}
notmuch_query_destroy (thread_id_query);
if (matched_is_subset_of_thread)
{
const char *matched_query_string;
notmuch_query_t *matched_query;
matched_query_string = talloc_asprintf (ctx, "%s AND (%s)",
thread_id_query_string,
query_string);
if (unlikely (matched_query_string == NULL))
return NULL;
matched_query = notmuch_query_create (notmuch, matched_query_string);
if (unlikely (matched_query == NULL))
return NULL;
for (messages = notmuch_query_search_messages (matched_query);
notmuch_messages_valid (messages);
notmuch_messages_move_to_next (messages))
{
message = notmuch_messages_get (messages);
_thread_add_matched_message (thread, message, sort);
_notmuch_message_close (message);
}
notmuch_query_destroy (matched_query);
}
_resolve_thread_relationships (thread);
return thread;
}
notmuch_messages_t *
notmuch_thread_get_toplevel_messages (notmuch_thread_t *thread)
{
return _notmuch_messages_create (thread->message_list);
}
const char *
notmuch_thread_get_thread_id (notmuch_thread_t *thread)
{
return thread->thread_id;
}
int
notmuch_thread_get_total_messages (notmuch_thread_t *thread)
{
return thread->total_messages;
}
int
notmuch_thread_get_matched_messages (notmuch_thread_t *thread)
{
return thread->matched_messages;
}
const char *
notmuch_thread_get_authors (notmuch_thread_t *thread)
{
return thread->authors;
}
const char *
notmuch_thread_get_subject (notmuch_thread_t *thread)
{
return thread->subject;
}
time_t
notmuch_thread_get_oldest_date (notmuch_thread_t *thread)
{
return thread->oldest;
}
time_t
notmuch_thread_get_newest_date (notmuch_thread_t *thread)
{
return thread->newest;
}
notmuch_tags_t *
notmuch_thread_get_tags (notmuch_thread_t *thread)
{
notmuch_tags_t *tags;
GList *keys, *l;
tags = _notmuch_tags_create (thread);
if (unlikely (tags == NULL))
return NULL;
keys = g_hash_table_get_keys (thread->tags);
for (l = keys; l; l = l->next)
_notmuch_tags_add_tag (tags, (char *) l->data);
g_list_free (keys);
_notmuch_tags_prepare_iterator (tags);
return tags;
}
void
notmuch_thread_destroy (notmuch_thread_t *thread)
{
talloc_free (thread);
}