From 38e95844e8f9bfefb34443650e99f740581a394b Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 16 Dec 2020 11:50:30 -0300 Subject: convert: make convert_attrs() and convert structs public Move convert_attrs() declaration from convert.c to convert.h, together with the conv_attrs struct and the crlf_action enum. This function and the data structures will be used outside convert.c in the upcoming parallel checkout implementation. Note that crlf_action is renamed to convert_crlf_action, which is more appropriate for the global namespace. Signed-off-by: Jeff Hostetler Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- convert.c | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) (limited to 'convert.c') diff --git a/convert.c b/convert.c index ee360c2f07..f13b001273 100644 --- a/convert.c +++ b/convert.c @@ -24,17 +24,6 @@ #define CONVERT_STAT_BITS_TXT_CRLF 0x2 #define CONVERT_STAT_BITS_BIN 0x4 -enum crlf_action { - CRLF_UNDEFINED, - CRLF_BINARY, - CRLF_TEXT, - CRLF_TEXT_INPUT, - CRLF_TEXT_CRLF, - CRLF_AUTO, - CRLF_AUTO_INPUT, - CRLF_AUTO_CRLF -}; - struct text_stat { /* NUL, CR, LF and CRLF counts */ unsigned nul, lonecr, lonelf, crlf; @@ -172,7 +161,7 @@ static int text_eol_is_crlf(void) return 0; } -static enum eol output_eol(enum crlf_action crlf_action) +static enum eol output_eol(enum convert_crlf_action crlf_action) { switch (crlf_action) { case CRLF_BINARY: @@ -246,7 +235,7 @@ static int has_crlf_in_index(const struct index_state *istate, const char *path) } static int will_convert_lf_to_crlf(struct text_stat *stats, - enum crlf_action crlf_action) + enum convert_crlf_action crlf_action) { if (output_eol(crlf_action) != EOL_CRLF) return 0; @@ -499,7 +488,7 @@ static int encode_to_worktree(const char *path, const char *src, size_t src_len, static int crlf_to_git(const struct index_state *istate, const char *path, const char *src, size_t len, struct strbuf *buf, - enum crlf_action crlf_action, int conv_flags) + enum convert_crlf_action crlf_action, int conv_flags) { struct text_stat stats; char *dst; @@ -585,8 +574,8 @@ static int crlf_to_git(const struct index_state *istate, return 1; } -static int crlf_to_worktree(const char *src, size_t len, - struct strbuf *buf, enum crlf_action crlf_action) +static int crlf_to_worktree(const char *src, size_t len, struct strbuf *buf, + enum convert_crlf_action crlf_action) { char *to_free = NULL; struct text_stat stats; @@ -1247,7 +1236,7 @@ static const char *git_path_check_encoding(struct attr_check_item *check) return value; } -static enum crlf_action git_path_check_crlf(struct attr_check_item *check) +static enum convert_crlf_action git_path_check_crlf(struct attr_check_item *check) { const char *value = check->value; @@ -1297,18 +1286,10 @@ static int git_path_check_ident(struct attr_check_item *check) return !!ATTR_TRUE(value); } -struct conv_attrs { - struct convert_driver *drv; - enum crlf_action attr_action; /* What attr says */ - enum crlf_action crlf_action; /* When no attr is set, use core.autocrlf */ - int ident; - const char *working_tree_encoding; /* Supported encoding or default encoding if NULL */ -}; - static struct attr_check *check; -static void convert_attrs(const struct index_state *istate, - struct conv_attrs *ca, const char *path) +void convert_attrs(const struct index_state *istate, + struct conv_attrs *ca, const char *path) { struct attr_check_item *ccheck = NULL; -- cgit v1.2.3 From 55b4ad0eada6924e5e59600266b5d50cbf22733d Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 16 Dec 2020 11:50:31 -0300 Subject: convert: add [async_]convert_to_working_tree_ca() variants Separate the attribute gathering from the actual conversion by adding _ca() variants of the conversion functions. These variants receive a precomputed 'struct conv_attrs', not relying, thus, on an index state. They will be used in a future patch adding parallel checkout support, for two reasons: - We will already load the conversion attributes in checkout_entry(), before conversion, to decide whether a path is eligible for parallel checkout. Therefore, it would be wasteful to load them again later, for the actual conversion. - The parallel workers will be responsible for reading, converting and writing blobs to the working tree. They won't have access to the main process' index state, so they cannot load the attributes. Instead, they will receive the preloaded ones and call the _ca() variant of the conversion functions. Furthermore, the attributes machinery is optimized to handle paths in sequential order, so it's better to leave it for the main process, anyway. Signed-off-by: Jeff Hostetler Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- convert.c | 60 ++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 28 deletions(-) (limited to 'convert.c') diff --git a/convert.c b/convert.c index f13b001273..0307374241 100644 --- a/convert.c +++ b/convert.c @@ -1447,19 +1447,16 @@ void convert_to_git_filter_fd(const struct index_state *istate, ident_to_git(dst->buf, dst->len, dst, ca.ident); } -static int convert_to_working_tree_internal(const struct index_state *istate, - const char *path, const char *src, - size_t len, struct strbuf *dst, - int normalizing, - const struct checkout_metadata *meta, - struct delayed_checkout *dco) +static int convert_to_working_tree_ca_internal(const struct conv_attrs *ca, + const char *path, const char *src, + size_t len, struct strbuf *dst, + int normalizing, + const struct checkout_metadata *meta, + struct delayed_checkout *dco) { int ret = 0, ret_filter = 0; - struct conv_attrs ca; - - convert_attrs(istate, &ca, path); - ret |= ident_to_worktree(src, len, dst, ca.ident); + ret |= ident_to_worktree(src, len, dst, ca->ident); if (ret) { src = dst->buf; len = dst->len; @@ -1469,49 +1466,56 @@ static int convert_to_working_tree_internal(const struct index_state *istate, * is a smudge or process filter (even if the process filter doesn't * support smudge). The filters might expect CRLFs. */ - if ((ca.drv && (ca.drv->smudge || ca.drv->process)) || !normalizing) { - ret |= crlf_to_worktree(src, len, dst, ca.crlf_action); + if ((ca->drv && (ca->drv->smudge || ca->drv->process)) || !normalizing) { + ret |= crlf_to_worktree(src, len, dst, ca->crlf_action); if (ret) { src = dst->buf; len = dst->len; } } - ret |= encode_to_worktree(path, src, len, dst, ca.working_tree_encoding); + ret |= encode_to_worktree(path, src, len, dst, ca->working_tree_encoding); if (ret) { src = dst->buf; len = dst->len; } ret_filter = apply_filter( - path, src, len, -1, dst, ca.drv, CAP_SMUDGE, meta, dco); - if (!ret_filter && ca.drv && ca.drv->required) - die(_("%s: smudge filter %s failed"), path, ca.drv->name); + path, src, len, -1, dst, ca->drv, CAP_SMUDGE, meta, dco); + if (!ret_filter && ca->drv && ca->drv->required) + die(_("%s: smudge filter %s failed"), path, ca->drv->name); return ret | ret_filter; } -int async_convert_to_working_tree(const struct index_state *istate, - const char *path, const char *src, - size_t len, struct strbuf *dst, - const struct checkout_metadata *meta, - void *dco) +int async_convert_to_working_tree_ca(const struct conv_attrs *ca, + const char *path, const char *src, + size_t len, struct strbuf *dst, + const struct checkout_metadata *meta, + void *dco) { - return convert_to_working_tree_internal(istate, path, src, len, dst, 0, meta, dco); + return convert_to_working_tree_ca_internal(ca, path, src, len, dst, 0, + meta, dco); } -int convert_to_working_tree(const struct index_state *istate, - const char *path, const char *src, - size_t len, struct strbuf *dst, - const struct checkout_metadata *meta) +int convert_to_working_tree_ca(const struct conv_attrs *ca, + const char *path, const char *src, + size_t len, struct strbuf *dst, + const struct checkout_metadata *meta) { - return convert_to_working_tree_internal(istate, path, src, len, dst, 0, meta, NULL); + return convert_to_working_tree_ca_internal(ca, path, src, len, dst, 0, + meta, NULL); } int renormalize_buffer(const struct index_state *istate, const char *path, const char *src, size_t len, struct strbuf *dst) { - int ret = convert_to_working_tree_internal(istate, path, src, len, dst, 1, NULL, NULL); + struct conv_attrs ca; + int ret; + + convert_attrs(istate, &ca, path); + ret = convert_to_working_tree_ca_internal(&ca, path, src, len, dst, 1, + NULL, NULL); if (ret) { src = dst->buf; len = dst->len; -- cgit v1.2.3 From 3e9e82c0d897814d77cc755d25acb9df6e8bf48f Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 16 Dec 2020 11:50:32 -0300 Subject: convert: add get_stream_filter_ca() variant Like the previous patch, we will also need to call get_stream_filter() with a precomputed `struct conv_attrs`, when we add support for parallel checkout workers. So add the _ca() variant which takes the conversion attributes struct as a parameter. Signed-off-by: Jeff Hostetler Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- convert.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'convert.c') diff --git a/convert.c b/convert.c index 0307374241..9af6aafc5a 100644 --- a/convert.c +++ b/convert.c @@ -1942,34 +1942,31 @@ static struct stream_filter *ident_filter(const struct object_id *oid) } /* - * Return an appropriately constructed filter for the path, or NULL if + * Return an appropriately constructed filter for the given ca, or NULL if * the contents cannot be filtered without reading the whole thing * in-core. * * Note that you would be crazy to set CRLF, smudge/clean or ident to a * large binary blob you would want us not to slurp into the memory! */ -struct stream_filter *get_stream_filter(const struct index_state *istate, - const char *path, - const struct object_id *oid) +struct stream_filter *get_stream_filter_ca(const struct conv_attrs *ca, + const struct object_id *oid) { - struct conv_attrs ca; struct stream_filter *filter = NULL; - convert_attrs(istate, &ca, path); - if (ca.drv && (ca.drv->process || ca.drv->smudge || ca.drv->clean)) + if (ca->drv && (ca->drv->process || ca->drv->smudge || ca->drv->clean)) return NULL; - if (ca.working_tree_encoding) + if (ca->working_tree_encoding) return NULL; - if (ca.crlf_action == CRLF_AUTO || ca.crlf_action == CRLF_AUTO_CRLF) + if (ca->crlf_action == CRLF_AUTO || ca->crlf_action == CRLF_AUTO_CRLF) return NULL; - if (ca.ident) + if (ca->ident) filter = ident_filter(oid); - if (output_eol(ca.crlf_action) == EOL_CRLF) + if (output_eol(ca->crlf_action) == EOL_CRLF) filter = cascade_filter(filter, lf_to_crlf_filter()); else filter = cascade_filter(filter, &null_filter_singleton); @@ -1977,6 +1974,15 @@ struct stream_filter *get_stream_filter(const struct index_state *istate, return filter; } +struct stream_filter *get_stream_filter(const struct index_state *istate, + const char *path, + const struct object_id *oid) +{ + struct conv_attrs ca; + convert_attrs(istate, &ca, path); + return get_stream_filter_ca(&ca, oid); +} + void free_stream_filter(struct stream_filter *filter) { filter->vtbl->free(filter); -- cgit v1.2.3 From f59d15bb420b649a6ee3163a3418aac6b813d331 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 16 Dec 2020 11:50:33 -0300 Subject: convert: add classification for conv_attrs struct Create `enum conv_attrs_classification` to express the different ways that attributes are handled for a blob during checkout. This will be used in a later commit when deciding whether to add a file to the parallel or delayed queue during checkout. For now, we can also use it in get_stream_filter_ca() to simplify the function (as the classifying logic is the same). Signed-off-by: Jeff Hostetler Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- convert.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'convert.c') diff --git a/convert.c b/convert.c index 9af6aafc5a..b9d25d9a47 100644 --- a/convert.c +++ b/convert.c @@ -1954,13 +1954,7 @@ struct stream_filter *get_stream_filter_ca(const struct conv_attrs *ca, { struct stream_filter *filter = NULL; - if (ca->drv && (ca->drv->process || ca->drv->smudge || ca->drv->clean)) - return NULL; - - if (ca->working_tree_encoding) - return NULL; - - if (ca->crlf_action == CRLF_AUTO || ca->crlf_action == CRLF_AUTO_CRLF) + if (classify_conv_attrs(ca) != CA_CLASS_STREAMABLE) return NULL; if (ca->ident) @@ -2016,3 +2010,21 @@ void clone_checkout_metadata(struct checkout_metadata *dst, if (blob) oidcpy(&dst->blob, blob); } + +enum conv_attrs_classification classify_conv_attrs(const struct conv_attrs *ca) +{ + if (ca->drv) { + if (ca->drv->process) + return CA_CLASS_INCORE_PROCESS; + if (ca->drv->smudge || ca->drv->clean) + return CA_CLASS_INCORE_FILTER; + } + + if (ca->working_tree_encoding) + return CA_CLASS_INCORE; + + if (ca->crlf_action == CRLF_AUTO || ca->crlf_action == CRLF_AUTO_CRLF) + return CA_CLASS_INCORE; + + return CA_CLASS_STREAMABLE; +} -- cgit v1.2.3