mirror of
https://git.notmuchmail.org/git/notmuch
synced 2025-01-05 08:11:41 +01:00
Read mail directory in inode number order
This gives a rather decent reduction in number of seeks required when reading a Maildir that isn't in pagecache. Most filesystems give some locality on disk based on inode numbers. In ext[234] this is the inode tables, in XFS groups of sequential inode numbers are together on disk and the most significant bits indicate allocation group (i.e inode 1,000,000 is always after inode 1,000). With this patch, we read in the whole directory, sort by inode number before stat()ing the contents. Ideally, directory is sequential and then we make one scan through the file system stat()ing. Since the universe is not ideal, we'll probably seek during reading the directory and a fair bit while reading the inodes themselves. However... with readahead, and stat()ing in inode order, we should be in the best place possible to hit the cache. In a (not very good) benchmark of "how long does it take to find the first 15,000 messages in my Maildir after 'echo 3 > /proc/sys/vm/drop_caches'", this patch consistently cut at least 8 seconds off the scan time. Without patch: 50 seconds With patch: 38-42 seconds. (I did this in a previous maildir reading project and saw large improvements too)
This commit is contained in:
parent
0656fb518d
commit
a45ff8c361
1 changed files with 15 additions and 17 deletions
|
@ -73,6 +73,11 @@ add_files_print_progress (add_files_state_t *state)
|
||||||
fflush (stdout);
|
fflush (stdout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int ino_cmp(const struct dirent **a, const struct dirent **b)
|
||||||
|
{
|
||||||
|
return ((*a)->d_ino < (*b)->d_ino)? -1: 1;
|
||||||
|
}
|
||||||
|
|
||||||
/* Examine 'path' recursively as follows:
|
/* Examine 'path' recursively as follows:
|
||||||
*
|
*
|
||||||
* o Ask the filesystem for the mtime of 'path' (path_mtime)
|
* o Ask the filesystem for the mtime of 'path' (path_mtime)
|
||||||
|
@ -100,13 +105,12 @@ add_files_recursive (notmuch_database_t *notmuch,
|
||||||
add_files_state_t *state)
|
add_files_state_t *state)
|
||||||
{
|
{
|
||||||
DIR *dir = NULL;
|
DIR *dir = NULL;
|
||||||
struct dirent *e, *entry = NULL;
|
struct dirent *entry = NULL;
|
||||||
int entry_length;
|
|
||||||
int err;
|
|
||||||
char *next = NULL;
|
char *next = NULL;
|
||||||
time_t path_mtime, path_dbtime;
|
time_t path_mtime, path_dbtime;
|
||||||
notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS;
|
notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS;
|
||||||
notmuch_message_t *message = NULL;
|
notmuch_message_t *message = NULL;
|
||||||
|
struct dirent **namelist = NULL;
|
||||||
|
|
||||||
/* If we're told to, we bail out on encountering a read-only
|
/* If we're told to, we bail out on encountering a read-only
|
||||||
* directory, (with this being a clear clue from the user to
|
* directory, (with this being a clear clue from the user to
|
||||||
|
@ -122,31 +126,23 @@ add_files_recursive (notmuch_database_t *notmuch,
|
||||||
path_mtime = st->st_mtime;
|
path_mtime = st->st_mtime;
|
||||||
|
|
||||||
path_dbtime = notmuch_database_get_timestamp (notmuch, path);
|
path_dbtime = notmuch_database_get_timestamp (notmuch, path);
|
||||||
|
int n_entries= scandir(path, &namelist, 0, ino_cmp);
|
||||||
|
|
||||||
dir = opendir (path);
|
if (n_entries == -1) {
|
||||||
if (dir == NULL) {
|
|
||||||
fprintf (stderr, "Error opening directory %s: %s\n",
|
fprintf (stderr, "Error opening directory %s: %s\n",
|
||||||
path, strerror (errno));
|
path, strerror (errno));
|
||||||
ret = NOTMUCH_STATUS_FILE_ERROR;
|
ret = NOTMUCH_STATUS_FILE_ERROR;
|
||||||
goto DONE;
|
goto DONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
entry_length = offsetof (struct dirent, d_name) +
|
int i=0;
|
||||||
pathconf (path, _PC_NAME_MAX) + 1;
|
|
||||||
entry = malloc (entry_length);
|
|
||||||
|
|
||||||
while (!interrupted) {
|
while (!interrupted) {
|
||||||
err = readdir_r (dir, entry, &e);
|
if (i == n_entries)
|
||||||
if (err) {
|
|
||||||
fprintf (stderr, "Error reading directory: %s\n",
|
|
||||||
strerror (errno));
|
|
||||||
ret = NOTMUCH_STATUS_FILE_ERROR;
|
|
||||||
goto DONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (e == NULL)
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
entry= namelist[i++];
|
||||||
|
|
||||||
/* If this directory hasn't been modified since the last
|
/* If this directory hasn't been modified since the last
|
||||||
* add_files, then we only need to look further for
|
* add_files, then we only need to look further for
|
||||||
* sub-directories. */
|
* sub-directories. */
|
||||||
|
@ -243,6 +239,8 @@ add_files_recursive (notmuch_database_t *notmuch,
|
||||||
free (entry);
|
free (entry);
|
||||||
if (dir)
|
if (dir)
|
||||||
closedir (dir);
|
closedir (dir);
|
||||||
|
if (namelist)
|
||||||
|
free (namelist);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue