From d079837eeeadc37d266113a1fd2deb0a01aaee91 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 26 May 2007 01:24:19 -0400 Subject: Lazily open pack index files on demand In some repository configurations the user may have many packfiles, but all of the recent commits/trees/tags/blobs are likely to be in the most recent packfile (the one with the newest mtime). It is therefore common to be able to complete an entire operation by accessing only one packfile, even if there are 25 packfiles available to the repository. Rather than opening and mmaping the corresponding .idx file for every pack found, we now only open and map the .idx when we suspect there might be an object of interest in there. Of course we cannot known in advance which packfile contains an object, so we still need to scan the entire packed_git list to locate anything. But odds are users want to access objects in the most recently created packfiles first, and that may be all they ever need for the current operation. Junio observed in b867092f that placing recent packfiles before older ones can slightly improve access times for recent objects, without degrading it for historical object access. This change improves upon Junio's observations by trying even harder to avoid the .idx files that we won't need. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- sha1_file.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index 12d2ef2011..6a5ba63500 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -530,6 +530,21 @@ static int check_packed_git_idx(const char *path, struct packed_git *p) return 0; } +int open_pack_index (struct packed_git *p) +{ + char *idx_name; + int ret; + + if (p->index_data) + return 0; + + idx_name = xstrdup(p->pack_name); + strcpy(idx_name + strlen(idx_name) - strlen(".pack"), ".idx"); + ret = check_packed_git_idx(idx_name, p); + free(idx_name); + return ret; +} + static void scan_windows(struct packed_git *p, struct packed_git **lru_p, struct pack_window **lru_w, @@ -605,6 +620,9 @@ static int open_packed_git_1(struct packed_git *p) unsigned char *idx_sha1; long fd_flag; + if (!p->index_data && open_pack_index(p)) + return error("packfile %s index unavailable", p->pack_name); + p->pack_fd = open(p->pack_name, O_RDONLY); if (p->pack_fd < 0 || fstat(p->pack_fd, &st)) return -1; @@ -757,8 +775,7 @@ struct packed_git *add_packed_git(const char *path, int path_len, int local) return NULL; memcpy(p->pack_name, path, path_len); strcpy(p->pack_name + path_len, ".pack"); - if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode) || - check_packed_git_idx(path, p)) { + if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) { free(p); return NULL; } @@ -766,6 +783,10 @@ struct packed_git *add_packed_git(const char *path, int path_len, int local) /* ok, it looks sane as far as we can check without * actually mapping the pack file. */ + p->index_version = 0; + p->index_data = NULL; + p->index_size = 0; + p->num_objects = 0; p->pack_size = st.st_size; p->next = NULL; p->windows = NULL; @@ -1572,10 +1593,15 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset, return data; } -const unsigned char *nth_packed_object_sha1(const struct packed_git *p, +const unsigned char *nth_packed_object_sha1(struct packed_git *p, uint32_t n) { const unsigned char *index = p->index_data; + if (!index) { + if (open_pack_index(p)) + return NULL; + index = p->index_data; + } if (n >= p->num_objects) return NULL; index += 4 * 256; @@ -1612,6 +1638,12 @@ off_t find_pack_entry_one(const unsigned char *sha1, const unsigned char *index = p->index_data; unsigned hi, lo; + if (!index) { + if (open_pack_index(p)) + return 0; + level1_ofs = p->index_data; + index = p->index_data; + } if (p->index_version > 1) { level1_ofs += 2; index += 8; -- cgit v1.2.3 From 7dc24aa5a62cc5f77e6637674581c837f4bdf78e Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 26 May 2007 01:24:40 -0400 Subject: Micro-optimize prepare_alt_odb Calling getenv() is not that expensive, but its also not free, and its certainly not cheaper than testing to see if alt_odb_tail is not null. Because we are calling prepare_alt_odb() from within find_sha1_file every time we cannot find an object file locally we want to skip out of prepare_alt_odb() as early as possible once we have initialized our alternate list. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- sha1_file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index 6a5ba63500..a3637d7e5b 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -376,11 +376,12 @@ void prepare_alt_odb(void) { const char *alt; + if (alt_odb_tail) + return; + alt = getenv(ALTERNATE_DB_ENVIRONMENT); if (!alt) alt = ""; - if (alt_odb_tail) - return; alt_odb_tail = &alt_odb_list; link_alt_odb_entries(alt, alt + strlen(alt), ':', NULL, 0); -- cgit v1.2.3 From bc8e478a285ff549a3e5182461b064313d400de3 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Wed, 30 May 2007 02:13:42 -0400 Subject: Style nit - don't put space after function names Our style is to not put a space after a function name. I did here, and Junio applied the patch with the incorrect formatting. So I'm cleaning up after myself since I noticed it upon review. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- sha1_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index a3637d7e5b..3093ac9f5f 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -531,7 +531,7 @@ static int check_packed_git_idx(const char *path, struct packed_git *p) return 0; } -int open_pack_index (struct packed_git *p) +int open_pack_index(struct packed_git *p) { char *idx_name; int ret; -- cgit v1.2.3