From 99bf115c879af7e38ef0ca9596fc9db1d6598d5f Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:24 -0700 Subject: repository: introduce parsed objects field Convert the existing global cache for parsed objects (obj_hash) into repository-specific parsed object caches. Existing code that uses obj_hash are modified to use the parsed object cache of the_repository; future patches will use the parsed object caches of other repositories. Another future use case for a pool of objects is ease of memory management in revision walking: If we can free the rev-list related memory early in pack-objects (e.g. part of repack operation) then it could lower memory pressure significantly when running on large repos. While this has been discussed on the mailing list lately, this series doesn't implement this. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- object.c | 63 ++++++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 23 deletions(-) (limited to 'object.c') diff --git a/object.c b/object.c index 5044d08e96..f7c624a7ba 100644 --- a/object.c +++ b/object.c @@ -8,17 +8,14 @@ #include "object-store.h" #include "packfile.h" -static struct object **obj_hash; -static int nr_objs, obj_hash_size; - unsigned int get_max_object_index(void) { - return obj_hash_size; + return the_repository->parsed_objects->obj_hash_size; } struct object *get_indexed_object(unsigned int idx) { - return obj_hash[idx]; + return the_repository->parsed_objects->obj_hash[idx]; } static const char *object_type_strings[] = { @@ -90,15 +87,16 @@ struct object *lookup_object(const unsigned char *sha1) unsigned int i, first; struct object *obj; - if (!obj_hash) + if (!the_repository->parsed_objects->obj_hash) return NULL; - first = i = hash_obj(sha1, obj_hash_size); - while ((obj = obj_hash[i]) != NULL) { + first = i = hash_obj(sha1, + the_repository->parsed_objects->obj_hash_size); + while ((obj = the_repository->parsed_objects->obj_hash[i]) != NULL) { if (!hashcmp(sha1, obj->oid.hash)) break; i++; - if (i == obj_hash_size) + if (i == the_repository->parsed_objects->obj_hash_size) i = 0; } if (obj && i != first) { @@ -107,7 +105,8 @@ struct object *lookup_object(const unsigned char *sha1) * that we do not need to walk the hash table the next * time we look for it. */ - SWAP(obj_hash[i], obj_hash[first]); + SWAP(the_repository->parsed_objects->obj_hash[i], + the_repository->parsed_objects->obj_hash[first]); } return obj; } @@ -124,19 +123,19 @@ static void grow_object_hash(void) * Note that this size must always be power-of-2 to match hash_obj * above. */ - int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size; + int new_hash_size = the_repository->parsed_objects->obj_hash_size < 32 ? 32 : 2 * the_repository->parsed_objects->obj_hash_size; struct object **new_hash; new_hash = xcalloc(new_hash_size, sizeof(struct object *)); - for (i = 0; i < obj_hash_size; i++) { - struct object *obj = obj_hash[i]; + for (i = 0; i < the_repository->parsed_objects->obj_hash_size; i++) { + struct object *obj = the_repository->parsed_objects->obj_hash[i]; if (!obj) continue; insert_obj_hash(obj, new_hash, new_hash_size); } - free(obj_hash); - obj_hash = new_hash; - obj_hash_size = new_hash_size; + free(the_repository->parsed_objects->obj_hash); + the_repository->parsed_objects->obj_hash = new_hash; + the_repository->parsed_objects->obj_hash_size = new_hash_size; } void *create_object(const unsigned char *sha1, void *o) @@ -147,11 +146,12 @@ void *create_object(const unsigned char *sha1, void *o) obj->flags = 0; hashcpy(obj->oid.hash, sha1); - if (obj_hash_size - 1 <= nr_objs * 2) + if (the_repository->parsed_objects->obj_hash_size - 1 <= the_repository->parsed_objects->nr_objs * 2) grow_object_hash(); - insert_obj_hash(obj, obj_hash, obj_hash_size); - nr_objs++; + insert_obj_hash(obj, the_repository->parsed_objects->obj_hash, + the_repository->parsed_objects->obj_hash_size); + the_repository->parsed_objects->nr_objs++; return obj; } @@ -431,8 +431,8 @@ void clear_object_flags(unsigned flags) { int i; - for (i=0; i < obj_hash_size; i++) { - struct object *obj = obj_hash[i]; + for (i=0; i < the_repository->parsed_objects->obj_hash_size; i++) { + struct object *obj = the_repository->parsed_objects->obj_hash[i]; if (obj) obj->flags &= ~flags; } @@ -442,13 +442,20 @@ void clear_commit_marks_all(unsigned int flags) { int i; - for (i = 0; i < obj_hash_size; i++) { - struct object *obj = obj_hash[i]; + for (i = 0; i < the_repository->parsed_objects->obj_hash_size; i++) { + struct object *obj = the_repository->parsed_objects->obj_hash[i]; if (obj && obj->type == OBJ_COMMIT) obj->flags &= ~flags; } } +struct parsed_object_pool *parsed_object_pool_new(void) +{ + struct parsed_object_pool *o = xmalloc(sizeof(*o)); + memset(o, 0, sizeof(*o)); + return o; +} + struct raw_object_store *raw_object_store_new(void) { struct raw_object_store *o = xmalloc(sizeof(*o)); @@ -488,3 +495,13 @@ void raw_object_store_clear(struct raw_object_store *o) close_all_packs(o); o->packed_git = NULL; } + +void parsed_object_pool_clear(struct parsed_object_pool *o) +{ + /* + * TOOD free objects in o->obj_hash. + * + * As objects are allocated in slabs (see alloc.c), we do + * not need to free each object, but each slab instead. + */ +} -- cgit v1.2.3 From 68f95d382b51b134b138c91f94adb8d9ef2f557a Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:25 -0700 Subject: object: add repository argument to create_object Add a repository argument to allow the callers of create_object to be more specific about which repository to act on. This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Signed-off-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- object.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'object.c') diff --git a/object.c b/object.c index f7c624a7ba..2de029275b 100644 --- a/object.c +++ b/object.c @@ -138,7 +138,7 @@ static void grow_object_hash(void) the_repository->parsed_objects->obj_hash_size = new_hash_size; } -void *create_object(const unsigned char *sha1, void *o) +void *create_object_the_repository(const unsigned char *sha1, void *o) { struct object *obj = o; @@ -178,7 +178,8 @@ struct object *lookup_unknown_object(const unsigned char *sha1) { struct object *obj = lookup_object(sha1); if (!obj) - obj = create_object(sha1, alloc_object_node()); + obj = create_object(the_repository, sha1, + alloc_object_node()); return obj; } -- cgit v1.2.3 From c077a4526ba456ada28d16db9c945afd9a4a57de Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Tue, 8 May 2018 12:37:26 -0700 Subject: object: add repository argument to grow_object_hash Add a repository argument to allow the caller of grow_object_hash to be more specific about which repository to handle. This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Signed-off-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- object.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'object.c') diff --git a/object.c b/object.c index 2de029275b..91edc30770 100644 --- a/object.c +++ b/object.c @@ -116,7 +116,8 @@ struct object *lookup_object(const unsigned char *sha1) * power of 2 (but at least 32). Copy the existing values to the new * hash map. */ -static void grow_object_hash(void) +#define grow_object_hash(r) grow_object_hash_##r() +static void grow_object_hash_the_repository(void) { int i; /* @@ -147,7 +148,7 @@ void *create_object_the_repository(const unsigned char *sha1, void *o) hashcpy(obj->oid.hash, sha1); if (the_repository->parsed_objects->obj_hash_size - 1 <= the_repository->parsed_objects->nr_objs * 2) - grow_object_hash(); + grow_object_hash(the_repository); insert_obj_hash(obj, the_repository->parsed_objects->obj_hash, the_repository->parsed_objects->obj_hash_size); -- cgit v1.2.3 From 13e3fdcb767fe860953f8c27eb1985bd5d15674d Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:31 -0700 Subject: alloc: add repository argument to alloc_object_node This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'object.c') diff --git a/object.c b/object.c index 91edc30770..b8c3f923c5 100644 --- a/object.c +++ b/object.c @@ -180,7 +180,7 @@ struct object *lookup_unknown_object(const unsigned char *sha1) struct object *obj = lookup_object(sha1); if (!obj) obj = create_object(the_repository, sha1, - alloc_object_node()); + alloc_object_node(the_repository)); return obj; } -- cgit v1.2.3 From dd5d9deb0155a27cb2d74d1a0faf0af84ef5f355 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:33 -0700 Subject: alloc: add repository argument to alloc_commit_index This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'object.c') diff --git a/object.c b/object.c index b8c3f923c5..a365a91085 100644 --- a/object.c +++ b/object.c @@ -162,7 +162,7 @@ void *object_as_type(struct object *obj, enum object_type type, int quiet) return obj; else if (obj->type == OBJ_NONE) { if (type == OBJ_COMMIT) - ((struct commit *)obj)->index = alloc_commit_index(); + ((struct commit *)obj)->index = alloc_commit_index(the_repository); obj->type = type; return obj; } -- cgit v1.2.3 From 346a817a7271b58811a8004fa225ab88bc94e9c3 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:34 -0700 Subject: object: allow grow_object_hash to handle arbitrary repositories Reviewed-by: Jonathan Tan Signed-off-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- object.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'object.c') diff --git a/object.c b/object.c index a365a91085..0fcd6f6df4 100644 --- a/object.c +++ b/object.c @@ -116,27 +116,27 @@ struct object *lookup_object(const unsigned char *sha1) * power of 2 (but at least 32). Copy the existing values to the new * hash map. */ -#define grow_object_hash(r) grow_object_hash_##r() -static void grow_object_hash_the_repository(void) +static void grow_object_hash(struct repository *r) { int i; /* * Note that this size must always be power-of-2 to match hash_obj * above. */ - int new_hash_size = the_repository->parsed_objects->obj_hash_size < 32 ? 32 : 2 * the_repository->parsed_objects->obj_hash_size; + int new_hash_size = r->parsed_objects->obj_hash_size < 32 ? 32 : 2 * r->parsed_objects->obj_hash_size; struct object **new_hash; new_hash = xcalloc(new_hash_size, sizeof(struct object *)); - for (i = 0; i < the_repository->parsed_objects->obj_hash_size; i++) { - struct object *obj = the_repository->parsed_objects->obj_hash[i]; + for (i = 0; i < r->parsed_objects->obj_hash_size; i++) { + struct object *obj = r->parsed_objects->obj_hash[i]; + if (!obj) continue; insert_obj_hash(obj, new_hash, new_hash_size); } - free(the_repository->parsed_objects->obj_hash); - the_repository->parsed_objects->obj_hash = new_hash; - the_repository->parsed_objects->obj_hash_size = new_hash_size; + free(r->parsed_objects->obj_hash); + r->parsed_objects->obj_hash = new_hash; + r->parsed_objects->obj_hash_size = new_hash_size; } void *create_object_the_repository(const unsigned char *sha1, void *o) -- cgit v1.2.3 From 341e45e46bba094ef1274957ef5891f43e91b344 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:35 -0700 Subject: object: allow create_object to handle arbitrary repositories Reviewed-by: Jonathan Tan Signed-off-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- object.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'object.c') diff --git a/object.c b/object.c index 0fcd6f6df4..49b952e929 100644 --- a/object.c +++ b/object.c @@ -139,7 +139,7 @@ static void grow_object_hash(struct repository *r) r->parsed_objects->obj_hash_size = new_hash_size; } -void *create_object_the_repository(const unsigned char *sha1, void *o) +void *create_object(struct repository *r, const unsigned char *sha1, void *o) { struct object *obj = o; @@ -147,12 +147,12 @@ void *create_object_the_repository(const unsigned char *sha1, void *o) obj->flags = 0; hashcpy(obj->oid.hash, sha1); - if (the_repository->parsed_objects->obj_hash_size - 1 <= the_repository->parsed_objects->nr_objs * 2) - grow_object_hash(the_repository); + if (r->parsed_objects->obj_hash_size - 1 <= r->parsed_objects->nr_objs * 2) + grow_object_hash(r); - insert_obj_hash(obj, the_repository->parsed_objects->obj_hash, - the_repository->parsed_objects->obj_hash_size); - the_repository->parsed_objects->nr_objs++; + insert_obj_hash(obj, r->parsed_objects->obj_hash, + r->parsed_objects->obj_hash_size); + r->parsed_objects->nr_objs++; return obj; } -- cgit v1.2.3 From 14ba97f81c7b94e10d591b363688a073023f332d Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 15 May 2018 14:48:42 -0700 Subject: alloc: allow arbitrary repositories for alloc functions We have to convert all of the alloc functions at once, because alloc_report uses a funky macro for reporting. It is better for the sake of mechanical conversion to convert multiple functions at once rather than changing the structure of the reporting function. We record all memory allocation in alloc.c, and free them in clear_alloc_state, which is called for all repositories except the_repository. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- object.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'object.c') diff --git a/object.c b/object.c index 49b952e929..8e29f63bf2 100644 --- a/object.c +++ b/object.c @@ -5,6 +5,7 @@ #include "tree.h" #include "commit.h" #include "tag.h" +#include "alloc.h" #include "object-store.h" #include "packfile.h" @@ -455,6 +456,13 @@ struct parsed_object_pool *parsed_object_pool_new(void) { struct parsed_object_pool *o = xmalloc(sizeof(*o)); memset(o, 0, sizeof(*o)); + + o->blob_state = allocate_alloc_state(); + o->tree_state = allocate_alloc_state(); + o->commit_state = allocate_alloc_state(); + o->tag_state = allocate_alloc_state(); + o->object_state = allocate_alloc_state(); + return o; } @@ -501,9 +509,39 @@ void raw_object_store_clear(struct raw_object_store *o) void parsed_object_pool_clear(struct parsed_object_pool *o) { /* - * TOOD free objects in o->obj_hash. - * * As objects are allocated in slabs (see alloc.c), we do * not need to free each object, but each slab instead. + * + * Before doing so, we need to free any additional memory + * the objects may hold. */ + unsigned i; + + for (i = 0; i < o->obj_hash_size; i++) { + struct object *obj = o->obj_hash[i]; + + if (!obj) + continue; + + if (obj->type == OBJ_TREE) + free_tree_buffer((struct tree*)obj); + else if (obj->type == OBJ_COMMIT) + release_commit_memory((struct commit*)obj); + else if (obj->type == OBJ_TAG) + release_tag_memory((struct tag*)obj); + } + + FREE_AND_NULL(o->obj_hash); + o->obj_hash_size = 0; + + clear_alloc_state(o->blob_state); + clear_alloc_state(o->tree_state); + clear_alloc_state(o->commit_state); + clear_alloc_state(o->tag_state); + clear_alloc_state(o->object_state); + FREE_AND_NULL(o->blob_state); + FREE_AND_NULL(o->tree_state); + FREE_AND_NULL(o->commit_state); + FREE_AND_NULL(o->tag_state); + FREE_AND_NULL(o->object_state); } -- cgit v1.2.3