From 2ce552b5f783b6c761f473990df9ce93ef03dcf0 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 21 Oct 2009 23:01:17 -0700 Subject: [PATCH 1/2] Fix lifetime-maintenance bug with std::string and c_str() Here's more evidence that C++ is a nightmare to program---or that I'm smart enough to realize that C++ is more clever than I will ever be. Most of my issues with C++ have to do with it hiding things from me that I'd really like to and expect to be aware of as a C programmer. For example, the specific problem here is that there's a short-lived std::string, from which I just want to copy the C string. I try to do that on the next line, but before I can, C++ has already called the destructor on the std::string. Now, C++ isn't alone in doing garbage collecting like this. But in a *real* garbage-collecting system, everything would work that way. For example, here, I'm still holding a pointer to the C string contents, so if the garbage collector were aware of that reference, then it might clean up the std::string container and leave the data I'm still using. But that's not what we get with C++. Instead, some things are reference counted and collected, (like the std::string), and some things just aren't (like the C string it contains). The end result is that it's very fragile. It forces me to be aware of the timing of hidden functions. In a "real" system I wouldn't have to be aware of that timing, and in C the function just wouldn't be hidden. --- message.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/message.cc b/message.cc index 8ca8fdee..24dbae91 100644 --- a/message.cc +++ b/message.cc @@ -176,14 +176,14 @@ notmuch_thread_ids_t * notmuch_message_get_thread_ids (notmuch_message_t *message) { notmuch_thread_ids_t *thread_ids; - const char *id_str; + std::string id_str; thread_ids = talloc (message, notmuch_thread_ids_t); if (unlikely (thread_ids == NULL)) return NULL; - id_str = message->doc.get_value (NOTMUCH_VALUE_THREAD).c_str (); - thread_ids->next = talloc_strdup (message, id_str); + id_str = message->doc.get_value (NOTMUCH_VALUE_THREAD); + thread_ids->next = talloc_strdup (message, id_str.c_str ()); /* Initialize thread_ids->current and terminate first ID. */ notmuch_thread_ids_advance (thread_ids); From c58ee818b5e116d00172c8406149106c97c2e377 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 21 Oct 2009 23:10:19 -0700 Subject: [PATCH 2/2] Bring back the insert_thread_id function. We deleted this in favor of our fancy new thread_ids iterator from the message object. But one of the previous callers of insert_thread_id isn't using notmuch_message_t yet. I made the mistake of thinking I could just call g_hash_table_insert directly, but the problem was that nobody was splitting up the thread_id string at its commas. So with this, we were inserting bogus comma-separated IDs into the hash table, so thread_id values were ballooning out of control. Should be much better now. --- database.cc | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/database.cc b/database.cc index e46fe5d8..77b2eff2 100644 --- a/database.cc +++ b/database.cc @@ -111,6 +111,29 @@ find_message_by_docid (Xapian::Database *db, Xapian::docid docid) return db->get_document (docid); } +static void +insert_thread_id (GHashTable *thread_ids, Xapian::Document doc) +{ + string value_string; + const char *value, *id, *comma; + + value_string = doc.get_value (NOTMUCH_VALUE_THREAD); + value = value_string.c_str(); + if (strlen (value)) { + id = value; + while (*id) { + comma = strchr (id, ','); + if (comma == NULL) + comma = id + strlen (id); + g_hash_table_insert (thread_ids, + strndup (id, comma - id), NULL); + id = comma; + if (*id) + id++; + } + } +} + notmuch_message_t * notmuch_database_find_message (notmuch_database_t *notmuch, const char *message_id) @@ -152,16 +175,8 @@ find_thread_ids (notmuch_database_t *notmuch, find_messages_by_term (db, "ref", message_id, &child, &children_end); for ( ; child != children_end; child++) { - const char *thread_id; doc = find_message_by_docid (db, *child); - - thread_id = doc.get_value (NOTMUCH_VALUE_THREAD).c_str (); - if (strlen (thread_id) == 0) { - fprintf (stderr, "Database error: Message with doc_id %u has empty thread-id value (value index %d)\n", - *child, NOTMUCH_VALUE_THREAD); - } else { - g_hash_table_insert (thread_ids, strdup (thread_id), NULL); - } + insert_thread_id (thread_ids, doc); } for (i = 0; i < parents->len; i++) {