From c8716bf36190c9d14995f95dbc9abc19517cc55a Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Wed, 9 Apr 2025 11:57:53 +0200 Subject: [PATCH 1/7] promisor-remote: refactor to get rid of 'struct strvec' In a following commit, we will use the new 'promisor-remote' protocol capability introduced by d460267613 (Add 'promisor-remote' capability to protocol v2, 2025-02-18) to pass and process more information about promisor remotes than just their name and url. For that purpose, we will need to store information about other fields, especially information that might or might not be available for different promisor remotes. Unfortunately using 'struct strvec', as we currently do, to store information about the promisor remotes with one 'struct strvec' for each field like "name" or "url" does not scale easily in that case. We would need one 'struct strvec' for each new field, and then we would have to pass all these 'struct strvec' around. Let's refactor this and introduce a new 'struct promisor_info'. It will only store promisor remote information in its members. For now it has only a 'name' member for the promisor remote name and an 'url' member for its URL. We will use a 'struct string_list' to store the instances of 'struct promisor_info'. For each 'item' in the string_list, 'item->string' will point to the promisor remote name and 'item->util' will point to the corresponding 'struct promisor_info' instance. Explicit members are used within 'struct promisor_info' for type safety and clarity regarding the specific information being handled, rather than a generic key-value store. We want to specify and document each field and its content, so adding new members to the struct as more fields are supported is fine. Signed-off-by: Christian Couder --- promisor-remote.c | 107 ++++++++++++++++++++++++++++------------------ 1 file changed, 66 insertions(+), 41 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index 08b0da89622..c3df8f071ef 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -314,9 +314,35 @@ static int allow_unsanitized(char ch) return ch > 32 && ch < 127; } -static void promisor_info_vecs(struct repository *repo, - struct strvec *names, - struct strvec *urls) +/* + * Struct for promisor remotes involved in the "promisor-remote" + * protocol capability. + * + * Except for "name", each in this struct and its + * should correspond (either on the client side or on the server side) + * to a "remote.." config variable set to where + * "" is a promisor remote name. + */ +struct promisor_info { + const char *name; + const char *url; +}; + +static void promisor_info_list_clear(struct string_list *list) +{ + for (size_t i = 0; i < list->nr; i++) { + struct promisor_info *p = list->items[i].util; + free((char *)p->name); + free((char *)p->url); + } + string_list_clear(list, 1); +} + +/* + * Populate 'list' with promisor remote information from the config. + * The 'util' pointer of each list item will hold a 'struct promisor_info'. + */ +static void promisor_config_info_list(struct repository *repo, struct string_list *list) { struct promisor_remote *r; @@ -328,8 +354,14 @@ static void promisor_info_vecs(struct repository *repo, /* Only add remotes with a non empty URL */ if (!repo_config_get_string_tmp(the_repository, url_key, &url) && *url) { - strvec_push(names, r->name); - strvec_push(urls, url); + struct promisor_info *new_info = xcalloc(1, sizeof(*new_info)); + struct string_list_item *item; + + new_info->name = xstrdup(r->name); + new_info->url = xstrdup(url); + + item = string_list_append(list, new_info->name); + item->util = new_info; } free(url_key); @@ -340,47 +372,36 @@ char *promisor_remote_info(struct repository *repo) { struct strbuf sb = STRBUF_INIT; int advertise_promisors = 0; - struct strvec names = STRVEC_INIT; - struct strvec urls = STRVEC_INIT; + struct string_list config_info = STRING_LIST_INIT_NODUP; + struct string_list_item *item; repo_config_get_bool(the_repository, "promisor.advertise", &advertise_promisors); if (!advertise_promisors) return NULL; - promisor_info_vecs(repo, &names, &urls); + promisor_config_info_list(repo, &config_info); - if (!names.nr) + if (!config_info.nr) return NULL; - for (size_t i = 0; i < names.nr; i++) { - if (i) + for_each_string_list_item(item, &config_info) { + struct promisor_info *p = item->util; + + if (item != config_info.items) strbuf_addch(&sb, ';'); + strbuf_addstr(&sb, "name="); - strbuf_addstr_urlencode(&sb, names.v[i], allow_unsanitized); + strbuf_addstr_urlencode(&sb, p->name, allow_unsanitized); strbuf_addstr(&sb, ",url="); - strbuf_addstr_urlencode(&sb, urls.v[i], allow_unsanitized); + strbuf_addstr_urlencode(&sb, p->url, allow_unsanitized); } - strvec_clear(&names); - strvec_clear(&urls); + promisor_info_list_clear(&config_info); return strbuf_detach(&sb, NULL); } -/* - * Find first index of 'nicks' where there is 'nick'. 'nick' is - * compared case sensitively to the strings in 'nicks'. If not found - * 'nicks->nr' is returned. - */ -static size_t remote_nick_find(struct strvec *nicks, const char *nick) -{ - for (size_t i = 0; i < nicks->nr; i++) - if (!strcmp(nicks->v[i], nick)) - return i; - return nicks->nr; -} - enum accept_promisor { ACCEPT_NONE = 0, ACCEPT_KNOWN_URL, @@ -390,19 +411,23 @@ enum accept_promisor { static int should_accept_remote(enum accept_promisor accept, const char *remote_name, const char *remote_url, - struct strvec *names, struct strvec *urls) + struct string_list *config_info) { - size_t i; + struct promisor_info *p; + struct string_list_item *item; if (accept == ACCEPT_ALL) return 1; - i = remote_nick_find(names, remote_name); + /* Get config info for that promisor remote */ + item = string_list_lookup(config_info, remote_name); - if (i >= names->nr) + if (!item) /* We don't know about that remote */ return 0; + p = item->util; + if (accept == ACCEPT_KNOWN_NAME) return 1; @@ -414,11 +439,11 @@ static int should_accept_remote(enum accept_promisor accept, return 0; } - if (!strcmp(urls->v[i], remote_url)) + if (!strcmp(p->url, remote_url)) return 1; warning(_("known remote named '%s' but with URL '%s' instead of '%s'"), - remote_name, urls->v[i], remote_url); + remote_name, p->url, remote_url); return 0; } @@ -430,8 +455,7 @@ static void filter_promisor_remote(struct repository *repo, struct strbuf **remotes; const char *accept_str; enum accept_promisor accept = ACCEPT_NONE; - struct strvec names = STRVEC_INIT; - struct strvec urls = STRVEC_INIT; + struct string_list config_info = STRING_LIST_INIT_NODUP; if (!repo_config_get_string_tmp(the_repository, "promisor.acceptfromserver", &accept_str)) { if (!*accept_str || !strcasecmp("None", accept_str)) @@ -450,8 +474,10 @@ static void filter_promisor_remote(struct repository *repo, if (accept == ACCEPT_NONE) return; - if (accept != ACCEPT_ALL) - promisor_info_vecs(repo, &names, &urls); + if (accept != ACCEPT_ALL) { + promisor_config_info_list(repo, &config_info); + string_list_sort(&config_info); + } /* Parse remote info received */ @@ -482,7 +508,7 @@ static void filter_promisor_remote(struct repository *repo, if (remote_url) decoded_url = url_percent_decode(remote_url); - if (decoded_name && should_accept_remote(accept, decoded_name, decoded_url, &names, &urls)) + if (decoded_name && should_accept_remote(accept, decoded_name, decoded_url, &config_info)) strvec_push(accepted, decoded_name); strbuf_list_free(elems); @@ -490,8 +516,7 @@ static void filter_promisor_remote(struct repository *repo, free(decoded_url); } - strvec_clear(&names); - strvec_clear(&urls); + promisor_info_list_clear(&config_info); strbuf_list_free(remotes); } -- GitLab From a07976245a21bc358a142302132ba8d0c22acf74 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Wed, 9 Apr 2025 12:28:25 +0200 Subject: [PATCH 2/7] promisor-remote: allow a server to advertise more fields For now the "promisor-remote" protocol capability can only pass "name" and "url" information from a server to a client in the form "name=,url=". To allow clients to make more informed decisions about which promisor remotes they accept, let's make it possible to pass more information by introducing a new "promisor.sendFields" configuration variable. On the server side, information about a remote `foo` is stored in configuration variables named `remote.foo.`. To make it clearer and simpler, we use `field` and `field name` like this: * `field name` refers to the part of such a configuration variable, and * `field` refers to both the `field name` and the value of such a configuration variable. The "promisor.sendFields" configuration variable should contain a comma or space separated list of field names that will be looked up in the configuration of the remote on the server to find the values that will be passed to the client. Only a set of predefined field names are allowed. The only field names in this set are "partialCloneFilter" and "token". The "partialCloneFilter" field name specifies the filter definition used by the promisor remote, and the "token" field name can provide an authentication credential for accessing it. For example, if "promisor.sendFields" is set to "partialCloneFilter", and the server has the "remote.foo.partialCloneFilter" config variable set to a value, then that value will be passed in the "partialCloneFilter" field in the form "partialCloneFilter=" after the "name" and "url" fields. A following commit will allow the client to use the information to decide if it accepts the remote or not. For now the client doesn't do anything with the additional information it receives. Signed-off-by: Christian Couder --- Documentation/config/promisor.adoc | 22 +++++ Documentation/gitprotocol-v2.adoc | 64 +++++++++---- promisor-remote.c | 129 ++++++++++++++++++++++++-- t/t5710-promisor-remote-capability.sh | 31 +++++++ 4 files changed, 221 insertions(+), 25 deletions(-) diff --git a/Documentation/config/promisor.adoc b/Documentation/config/promisor.adoc index 2638b01f830..b4a72c21521 100644 --- a/Documentation/config/promisor.adoc +++ b/Documentation/config/promisor.adoc @@ -9,6 +9,28 @@ promisor.advertise:: "false", which means the "promisor-remote" capability is not advertised. +promisor.sendFields:: + A comma or space separated list of additional remote related + field names. A server sends these field names and the + associated field values from its configuration when + advertising its promisor remotes using the "promisor-remote" + capability, see linkgit:gitprotocol-v2[5]. Currently, only the + "partialCloneFilter" and "token" field names are supported. ++ +`partialCloneFilter`:: contains the partial clone filter +used for the remote. ++ +`token`:: contains an authentication token for the remote. ++ +When a field name is part of this list and a corresponding +"remote.foo." config variable is set on the server to a +non-empty value, then the field name and value are sent when +advertising the promisor remote "foo". ++ +This list has no effect unless the "promisor.advertise" config +variable is set to "true", and the "name" and "url" fields are always +advertised regardless of this setting. + promisor.acceptFromServer:: If set to "all", a client will accept all the promisor remotes a server might advertise using the "promisor-remote" diff --git a/Documentation/gitprotocol-v2.adoc b/Documentation/gitprotocol-v2.adoc index 9a57005d777..c7db103299a 100644 --- a/Documentation/gitprotocol-v2.adoc +++ b/Documentation/gitprotocol-v2.adoc @@ -785,33 +785,64 @@ retrieving the header from a bundle at the indicated URI, and thus save themselves and the server(s) the request(s) needed to inspect the headers of that bundle or bundles. -promisor-remote= -~~~~~~~~~~~~~~~~~~~~~~~~~~ +promisor-remote= +~~~~~~~~~~~~~~~~~~~~~~~~~ The server may advertise some promisor remotes it is using or knows about to a client which may want to use them as its promisor remotes, -instead of this repository. In this case should be of the +instead of this repository. In this case should be of the form: - pr-infos = pr-info | pr-infos ";" pr-info + pr-info = pr-fields | pr-info ";" pr-fields - pr-info = "name=" pr-name | "name=" pr-name "," "url=" pr-url + pr-fields = pr-field | pr-fields "," pr-field -where `pr-name` is the urlencoded name of a promisor remote, and -`pr-url` the urlencoded URL of that promisor remote. + pr-field = field-name "=" field-value -In this case, if the client decides to use one or more promisor -remotes the server advertised, it can reply with -"promisor-remote=" where should be of the form: +where all the `field-name` and `field-value` in a given `pr-fields` +are field names and values related to a single promisor remote. A +given `field-name` MUST NOT appear more than once in given +`pr-fields`. + +The server MUST advertise at least the "name" and "url" field names +along with the associated field values, which are the name of a valid +remote and its URL, in each `pr-fields`. The "name" and "url" fields +MUST appear first in each pr-fields, in that order. + +After these mandatory fields, the server MAY advertise the following +optional fields in any order: + +`partialCloneFilter`:: The filter specification used by the remote. +Clients can use this to determine if the remote's filtering strategy +is compatible with their needs (e.g., checking if both use "blob:none"). +It corresponds to the "remote..partialCloneFilter" config setting. + +`token`:: An authentication token that clients can use when +connecting to the remote. It corresponds to the "remote..token" +config setting. + +No other fields are defined by the protocol at this time. Field names +are case-sensitive and MUST be transmitted exactly as specified +above. Clients MUST ignore fields they don't recognize to allow for +future protocol extensions. + +For now, the client can only use information transmitted through these +fields to decide if it accepts the advertised promisor remote. In the +future that information might be used for other purposes though. + +Field values MUST be urlencoded. + +If the client decides to use one or more promisor remotes the server +advertised, it can reply with "promisor-remote=" where + should be of the form: pr-names = pr-name | pr-names ";" pr-name where `pr-name` is the urlencoded name of a promisor remote the server advertised and the client accepts. -Note that, everywhere in this document, `pr-name` MUST be a valid -remote name, and the ';' and ',' characters MUST be encoded if they -appear in `pr-name` or `pr-url`. +Note that, everywhere in this document, the ';' and ',' characters +MUST be encoded if they appear in `pr-name` or `field-value`. If the server doesn't know any promisor remote that could be good for a client to use, or prefers a client not to use any promisor remote it @@ -822,9 +853,10 @@ In this case, or if the client doesn't want to use any promisor remote the server advertised, the client shouldn't advertise the "promisor-remote" capability at all in its reply. -The "promisor.advertise" and "promisor.acceptFromServer" configuration -options can be used on the server and client side to control what they -advertise or accept respectively. See the documentation of these +On the server side, the "promisor.advertise" and "promisor.sendFields" +configuration options can be used to control what it advertises. On +the client side, the "promisor.acceptFromServer" configuration option +can be used to control what it accepts. See the documentation of these configuration options for more information. Note that in the future it would be nice if the "promisor-remote" diff --git a/promisor-remote.c b/promisor-remote.c index c3df8f071ef..98ba59e9529 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -314,6 +314,75 @@ static int allow_unsanitized(char ch) return ch > 32 && ch < 127; } +static const char promisor_field_filter[] = "partialCloneFilter"; +static const char promisor_field_token[] = "token"; + +/* + * List of optional field names that can be used in the + * "promisor-remote" protocol capability (others must be + * ignored). Each field should correspond to a configurable property + * of a remote that can be relevant for the client. + */ +static const char *known_fields[] = { + promisor_field_filter, /* Filter used for partial clone */ + promisor_field_token, /* Authentication token for the remote */ + NULL +}; + +/* + * Check if 'field' is in the list of the known field names for the + * "promisor-remote" protocol capability. + */ +static int is_known_field(const char *field) +{ + const char **p; + + for (p = known_fields; *p; p++) + if (!strcasecmp(*p, field)) + return 1; + return 0; +} + +static int is_valid_field(struct string_list_item *item, void *cb_data) +{ + const char *field = item->string; + const char *config_key = (const char *)cb_data; + + if (!is_known_field(field)) { + warning(_("unsupported field '%s' in '%s' config"), field, config_key); + return 0; + } + return 1; +} + +static char *fields_from_config(struct string_list *fields_list, const char *config_key) +{ + char *fields = NULL; + + if (!repo_config_get_string(the_repository, config_key, &fields) && *fields) { + string_list_split_in_place_f(fields_list, fields, ",", -1, + STRING_LIST_SPLIT_TRIM | + STRING_LIST_SPLIT_NONEMPTY); + filter_string_list(fields_list, 0, is_valid_field, (void *)config_key); + } + + return fields; +} + +static struct string_list *fields_sent(void) +{ + static struct string_list fields_list = STRING_LIST_INIT_NODUP; + static int initialized; + + if (!initialized) { + fields_list.cmp = strcasecmp; + fields_from_config(&fields_list, "promisor.sendFields"); + initialized = 1; + } + + return &fields_list; +} + /* * Struct for promisor remotes involved in the "promisor-remote" * protocol capability. @@ -326,6 +395,8 @@ static int allow_unsanitized(char ch) struct promisor_info { const char *name; const char *url; + const char *filter; + const char *token; }; static void promisor_info_list_clear(struct string_list *list) @@ -334,15 +405,47 @@ static void promisor_info_list_clear(struct string_list *list) struct promisor_info *p = list->items[i].util; free((char *)p->name); free((char *)p->url); + free((char *)p->filter); + free((char *)p->token); } string_list_clear(list, 1); } +static void set_one_field(struct promisor_info *p, + const char *field, const char *value) +{ + if (!strcasecmp(field, promisor_field_filter)) + p->filter = xstrdup(value); + else if (!strcasecmp(field, promisor_field_token)) + p->token = xstrdup(value); + else + BUG("invalid field '%s'", field); +} + +static void set_fields(struct promisor_info *p, + struct string_list *field_names) +{ + struct string_list_item *item; + + for_each_string_list_item(item, field_names) { + char *key = xstrfmt("remote.%s.%s", p->name, item->string); + const char *val; + if (!repo_config_get_string_tmp(the_repository, key, &val) && *val) + set_one_field(p, item->string, val); + free(key); + } +} + /* * Populate 'list' with promisor remote information from the config. - * The 'util' pointer of each list item will hold a 'struct promisor_info'. + * The 'util' pointer of each list item will hold a 'struct + * promisor_info'. Except "name" and "url", only members of that + * struct specified by the 'field_names' list are set (using values + * from the configuration). */ -static void promisor_config_info_list(struct repository *repo, struct string_list *list) +static void promisor_config_info_list(struct repository *repo, + struct string_list *list, + struct string_list *field_names) { struct promisor_remote *r; @@ -360,6 +463,9 @@ static void promisor_config_info_list(struct repository *repo, struct string_lis new_info->name = xstrdup(r->name); new_info->url = xstrdup(url); + if (field_names) + set_fields(new_info, field_names); + item = string_list_append(list, new_info->name); item->util = new_info; } @@ -380,7 +486,7 @@ char *promisor_remote_info(struct repository *repo) if (!advertise_promisors) return NULL; - promisor_config_info_list(repo, &config_info); + promisor_config_info_list(repo, &config_info, fields_sent()); if (!config_info.nr) return NULL; @@ -395,6 +501,15 @@ char *promisor_remote_info(struct repository *repo) strbuf_addstr_urlencode(&sb, p->name, allow_unsanitized); strbuf_addstr(&sb, ",url="); strbuf_addstr_urlencode(&sb, p->url, allow_unsanitized); + + if (p->filter) { + strbuf_addf(&sb, ",%s=", promisor_field_filter); + strbuf_addstr_urlencode(&sb, p->filter, allow_unsanitized); + } + if (p->token) { + strbuf_addf(&sb, ",%s=", promisor_field_token); + strbuf_addstr_urlencode(&sb, p->token, allow_unsanitized); + } } promisor_info_list_clear(&config_info); @@ -475,7 +590,7 @@ static void filter_promisor_remote(struct repository *repo, return; if (accept != ACCEPT_ALL) { - promisor_config_info_list(repo, &config_info); + promisor_config_info_list(repo, &config_info, NULL); string_list_sort(&config_info); } @@ -494,13 +609,9 @@ static void filter_promisor_remote(struct repository *repo, elems = strbuf_split(remotes[i], ','); for (size_t j = 0; elems[j]; j++) { - int res; strbuf_strip_suffix(elems[j], ","); - res = skip_prefix(elems[j]->buf, "name=", &remote_name) || + if (!skip_prefix(elems[j]->buf, "name=", &remote_name)) skip_prefix(elems[j]->buf, "url=", &remote_url); - if (!res) - warning(_("unknown element '%s' from remote info"), - elems[j]->buf); } if (remote_name) diff --git a/t/t5710-promisor-remote-capability.sh b/t/t5710-promisor-remote-capability.sh index cb061b1f35e..204528b2e0c 100755 --- a/t/t5710-promisor-remote-capability.sh +++ b/t/t5710-promisor-remote-capability.sh @@ -295,6 +295,37 @@ test_expect_success "clone with 'KnownUrl' and empty url, so not advertised" ' check_missing_objects server 1 "$oid" ' +test_expect_success "clone with promisor.sendFields" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + git -C server remote add otherLop "https://invalid.invalid" && + git -C server config remote.otherLop.token "fooBar" && + git -C server config remote.otherLop.stuff "baz" && + git -C server config remote.otherLop.partialCloneFilter "blob:limit=10k" && + test_when_finished "git -C server remote remove otherLop" && + test_config -C server promisor.sendFields "partialCloneFilter, token" && + test_when_finished "rm trace" && + + # Clone from server to create a client + GIT_TRACE_PACKET="$(pwd)/trace" GIT_NO_LAZY_FETCH=0 git clone \ + -c remote.lop.promisor=true \ + -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ + -c remote.lop.url="file://$(pwd)/lop" \ + -c promisor.acceptfromserver=All \ + --no-local --filter="blob:limit=5k" server client && + + # Check that fields are properly transmitted + ENCODED_URL=$(echo "file://$(pwd)/lop" | sed -e "s/ /%20/g") && + PR1="name=lop,url=$ENCODED_URL,partialCloneFilter=blob:none" && + PR2="name=otherLop,url=https://invalid.invalid,partialCloneFilter=blob:limit=10k,token=fooBar" && + test_grep "clone< promisor-remote=$PR1;$PR2" trace && + test_grep "clone> promisor-remote=lop;otherLop" trace && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" +' + test_expect_success "clone with promisor.advertise set to 'true' but don't delete the client" ' git -C server config promisor.advertise true && -- GitLab From e38f2711f593075bf26655005df75d0d2e6f4eb5 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Fri, 16 May 2025 14:43:23 +0200 Subject: [PATCH 3/7] promisor-remote: use string constants for 'name' and 'url' too A previous commit started to define `promisor_field_filter` and `promisor_field_token`, and used them instead of the "partialCloneFilter" and "token" string literals. Let's do the same for "name" and "url" to avoid repeating them several times and for consistency with the other fields. For skipping "name=" or "url=" in advertisements, let's introduce a skip_field_name_prefix() helper function to keep parsing clean and easy to understand. Signed-off-by: Christian Couder --- promisor-remote.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index 98ba59e9529..3913e32c116 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -314,6 +314,12 @@ static int allow_unsanitized(char ch) return ch > 32 && ch < 127; } +/* + * All the fields used in "promisor-remote" protocol capability, + * including the mandatory "name" and "url" ones. + */ +static const char promisor_field_name[] = "name"; +static const char promisor_field_url[] = "url"; static const char promisor_field_filter[] = "partialCloneFilter"; static const char promisor_field_token[] = "token"; @@ -497,9 +503,9 @@ char *promisor_remote_info(struct repository *repo) if (item != config_info.items) strbuf_addch(&sb, ';'); - strbuf_addstr(&sb, "name="); + strbuf_addf(&sb, "%s=", promisor_field_name); strbuf_addstr_urlencode(&sb, p->name, allow_unsanitized); - strbuf_addstr(&sb, ",url="); + strbuf_addf(&sb, ",%s=", promisor_field_url); strbuf_addstr_urlencode(&sb, p->url, allow_unsanitized); if (p->filter) { @@ -563,6 +569,15 @@ static int should_accept_remote(enum accept_promisor accept, return 0; } +static int skip_field_name_prefix(const char *elem, const char *field_name, const char **value) +{ + const char *p; + if (!skip_prefix(elem, field_name, &p) || *p != '=') + return 0; + *value = p + 1; + return 1; +} + static void filter_promisor_remote(struct repository *repo, struct strvec *accepted, const char *info) @@ -610,8 +625,8 @@ static void filter_promisor_remote(struct repository *repo, for (size_t j = 0; elems[j]; j++) { strbuf_strip_suffix(elems[j], ","); - if (!skip_prefix(elems[j]->buf, "name=", &remote_name)) - skip_prefix(elems[j]->buf, "url=", &remote_url); + if (!skip_field_name_prefix(elems[j]->buf, promisor_field_name, &remote_name)) + skip_field_name_prefix(elems[j]->buf, promisor_field_url, &remote_url); } if (remote_name) -- GitLab From 42639188020144bebe9830d40ed0959d695c72ad Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Sun, 18 May 2025 12:16:19 +0200 Subject: [PATCH 4/7] promisor-remote: refactor how we parse advertised fields In a follow up commit we are going to parse more fields, like a filter and a token, coming from the server when it advertises promisor remotes using the "promisor-remote" capability. To prepare for this, let's refactor the code that parses the advertised fields coming from the server into a new parse_one_advertised_remote() function that will populate a `struct promisor_info` with the content of the fields it parsed. While at it, let's also pass this `struct promisor_info` to the should_accept_remote() function, instead of passing it the parsed name and url. These changes will make it simpler to both parse more fields and access the content of these parsed fields in follow up commits. Signed-off-by: Christian Couder --- promisor-remote.c | 86 +++++++++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 29 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index 3913e32c116..c22128d09e3 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -405,16 +405,20 @@ struct promisor_info { const char *token; }; +static void promisor_info_free(struct promisor_info *p) +{ + free((char *)p->name); + free((char *)p->url); + free((char *)p->filter); + free((char *)p->token); + free(p); +} + static void promisor_info_list_clear(struct string_list *list) { - for (size_t i = 0; i < list->nr; i++) { - struct promisor_info *p = list->items[i].util; - free((char *)p->name); - free((char *)p->url); - free((char *)p->filter); - free((char *)p->token); - } - string_list_clear(list, 1); + for (size_t i = 0; i < list->nr; i++) + promisor_info_free(list->items[i].util); + string_list_clear(list, 0); } static void set_one_field(struct promisor_info *p, @@ -531,11 +535,13 @@ enum accept_promisor { }; static int should_accept_remote(enum accept_promisor accept, - const char *remote_name, const char *remote_url, + struct promisor_info *advertised, struct string_list *config_info) { struct promisor_info *p; struct string_list_item *item; + const char *remote_name = advertised->name; + const char *remote_url = advertised->url; if (accept == ACCEPT_ALL) return 1; @@ -578,6 +584,41 @@ static int skip_field_name_prefix(const char *elem, const char *field_name, cons return 1; } +static struct promisor_info *parse_one_advertised_remote(const char *remote_info) +{ + struct promisor_info *info = xcalloc(1, sizeof(*info)); + struct string_list elem_list = STRING_LIST_INIT_DUP; + struct string_list_item *item; + + string_list_split(&elem_list, remote_info, ",", -1); + + for_each_string_list_item(item, &elem_list) { + const char *elem = item->string; + const char *p = strchr(elem, '='); + + if (!p) { + warning(_("invalid element '%s' from remote info"), elem); + continue; + } + + if (skip_field_name_prefix(elem, promisor_field_name, &p)) + info->name = url_percent_decode(p); + else if (skip_field_name_prefix(elem, promisor_field_url, &p)) + info->url = url_percent_decode(p); + } + + string_list_clear(&elem_list, 0); + + if (!info->name || !info->url) { + warning(_("server advertised a promisor remote without a name or URL: %s"), + remote_info); + promisor_info_free(info); + return NULL; + } + + return info; +} + static void filter_promisor_remote(struct repository *repo, struct strvec *accepted, const char *info) @@ -614,32 +655,19 @@ static void filter_promisor_remote(struct repository *repo, remotes = strbuf_split_str(info, ';', 0); for (size_t i = 0; remotes[i]; i++) { - struct strbuf **elems; - const char *remote_name = NULL; - const char *remote_url = NULL; - char *decoded_name = NULL; - char *decoded_url = NULL; + struct promisor_info *advertised; strbuf_strip_suffix(remotes[i], ";"); - elems = strbuf_split(remotes[i], ','); - for (size_t j = 0; elems[j]; j++) { - strbuf_strip_suffix(elems[j], ","); - if (!skip_field_name_prefix(elems[j]->buf, promisor_field_name, &remote_name)) - skip_field_name_prefix(elems[j]->buf, promisor_field_url, &remote_url); - } + advertised = parse_one_advertised_remote(remotes[i]->buf); - if (remote_name) - decoded_name = url_percent_decode(remote_name); - if (remote_url) - decoded_url = url_percent_decode(remote_url); + if (!advertised) + continue; - if (decoded_name && should_accept_remote(accept, decoded_name, decoded_url, &config_info)) - strvec_push(accepted, decoded_name); + if (should_accept_remote(accept, advertised, &config_info)) + strvec_push(accepted, advertised->name); - strbuf_list_free(elems); - free(decoded_name); - free(decoded_url); + promisor_info_free(advertised); } promisor_info_list_clear(&config_info); -- GitLab From c1f55b84f0e959522bd78e7023013cf3bf153d4a Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 2 Sep 2025 10:27:24 +0200 Subject: [PATCH 5/7] promisor-remote: use string_list_split() in filter_promisor_remote() A previous commit introduced a new parse_one_advertised_remote() function that takes a `const char *` argument. This function is called from filter_promisor_remote() and parses all the fields for one remote. This means that in filter_promisor_remote() we no longer need to split the remote information that will be passed to parse_one_advertised_remote() into an array of relatively heavy and complex `struct strbuf`. To use something lighter, let's then replace strbuf_split_str() with string_list_split() in filter_promisor_remote() to parse the remote information that is passed to parse_one_advertised_remote(). Signed-off-by: Christian Couder --- promisor-remote.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index c22128d09e3..afec0d081dd 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -623,10 +623,11 @@ static void filter_promisor_remote(struct repository *repo, struct strvec *accepted, const char *info) { - struct strbuf **remotes; const char *accept_str; enum accept_promisor accept = ACCEPT_NONE; struct string_list config_info = STRING_LIST_INIT_NODUP; + struct string_list remote_info = STRING_LIST_INIT_DUP; + struct string_list_item *item; if (!repo_config_get_string_tmp(the_repository, "promisor.acceptfromserver", &accept_str)) { if (!*accept_str || !strcasecmp("None", accept_str)) @@ -652,14 +653,12 @@ static void filter_promisor_remote(struct repository *repo, /* Parse remote info received */ - remotes = strbuf_split_str(info, ';', 0); + string_list_split(&remote_info, info, ";", -1); - for (size_t i = 0; remotes[i]; i++) { + for_each_string_list_item(item, &remote_info) { struct promisor_info *advertised; - strbuf_strip_suffix(remotes[i], ";"); - - advertised = parse_one_advertised_remote(remotes[i]->buf); + advertised = parse_one_advertised_remote(item->string); if (!advertised) continue; @@ -671,7 +670,7 @@ static void filter_promisor_remote(struct repository *repo, } promisor_info_list_clear(&config_info); - strbuf_list_free(remotes); + string_list_clear(&remote_info, 0); } char *promisor_remote_reply(const char *info) -- GitLab From 0e37b7149fb1f990fcab2e106c079079b802bb35 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Fri, 11 Apr 2025 18:54:19 +0200 Subject: [PATCH 6/7] promisor-remote: allow a client to check fields A previous commit allowed a server to pass additional fields through the "promisor-remote" protocol capability after the "name" and "url" fields, specifically the "partialCloneFilter" and "token" fields. Let's make it possible for a client to check if these fields match what it expects before accepting a promisor remote. We allow this by introducing a new "promisor.checkFields" configuration variable. It should contain a comma or space separated list of fields that will be checked. By limiting the protocol to specific well-defined fields, we ensure both server and client have a shared understanding of field semantics and usage. Signed-off-by: Christian Couder --- Documentation/config/promisor.adoc | 39 ++++++++++++ promisor-remote.c | 89 ++++++++++++++++++++++++--- t/t5710-promisor-remote-capability.sh | 34 ++++++++++ 3 files changed, 154 insertions(+), 8 deletions(-) diff --git a/Documentation/config/promisor.adoc b/Documentation/config/promisor.adoc index b4a72c21521..93e5e0d9b55 100644 --- a/Documentation/config/promisor.adoc +++ b/Documentation/config/promisor.adoc @@ -50,3 +50,42 @@ promisor.acceptFromServer:: lazily fetchable from this promisor remote from its responses to "fetch" and "clone" requests from the client. Name and URL comparisons are case sensitive. See linkgit:gitprotocol-v2[5]. + +promisor.checkFields:: + A comma or space separated list of additional remote related + field names. A client checks if the values of these fields + transmitted by a server correspond to the values of these + fields in its own configuration before accepting a promisor + remote. Currently, "partialCloneFilter" and "token" are the + only supported field names. ++ +If one of these field names (e.g., "token") is being checked for an +advertised promisor remote (e.g., "foo"), three conditions must be met +for the check of this specific field to pass: ++ +1. The corresponding local configuration (e.g., `remote.foo.token`) + must be set. +2. The server must advertise the "token" field for remote "foo". +3. The value of the locally configured `remote.foo.token` must exactly + match the value advertised by the server for the "token" field. ++ +If any of these conditions is not met for any field name listed in +`promisor.checkFields`, the advertised remote "foo" is rejected. ++ +For the "partialCloneFilter" field, this allows the client to ensure +that the server's filter matches what it expects locally, preventing +inconsistencies in filtering behavior. For the "token" field, this can +be used to verify that authentication credentials match expected +values. ++ +Field values are compared case-sensitively. ++ +The "name" and "url" fields are always checked according to the +`promisor.acceptFromServer` policy, independently of this setting. ++ +The field names and values should be passed by the server through the +"promisor-remote" capability by using the `promisor.sendFields` config +variable. The fields are checked only if the +`promisor.acceptFromServer` config variable is not set to "None". If +set to "None", this config variable has no effect. See +linkgit:gitprotocol-v2[5]. diff --git a/promisor-remote.c b/promisor-remote.c index afec0d081dd..a6cfade2237 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -389,6 +389,20 @@ static struct string_list *fields_sent(void) return &fields_list; } +static struct string_list *fields_checked(void) +{ + static struct string_list fields_list = STRING_LIST_INIT_NODUP; + static int initialized; + + if (!initialized) { + fields_list.cmp = strcasecmp; + fields_from_config(&fields_list, "promisor.checkFields"); + initialized = 1; + } + + return &fields_list; +} + /* * Struct for promisor remotes involved in the "promisor-remote" * protocol capability. @@ -534,6 +548,61 @@ enum accept_promisor { ACCEPT_ALL }; +static int match_field_against_config(const char *field, const char *value, + struct promisor_info *config_info) +{ + if (config_info->filter && !strcasecmp(field, promisor_field_filter)) + return !strcmp(config_info->filter, value); + else if (config_info->token && !strcasecmp(field, promisor_field_token)) + return !strcmp(config_info->token, value); + + return 0; +} + +static int all_fields_match(struct promisor_info *advertised, + struct string_list *config_info, + int in_list) +{ + struct string_list *fields = fields_checked(); + struct string_list_item *item_checked; + + for_each_string_list_item(item_checked, fields) { + int match = 0; + const char *field = item_checked->string; + const char *value = NULL; + struct string_list_item *item; + + if (!strcasecmp(field, promisor_field_filter)) + value = advertised->filter; + else if (!strcasecmp(field, promisor_field_token)) + value = advertised->token; + + if (!value) + return 0; + + if (in_list) { + for_each_string_list_item(item, config_info) { + struct promisor_info *p = item->util; + if (match_field_against_config(field, value, p)) { + match = 1; + break; + } + } + } else { + item = string_list_lookup(config_info, advertised->name); + if (item) { + struct promisor_info *p = item->util; + match = match_field_against_config(field, value, p); + } + } + + if (!match) + return 0; + } + + return 1; +} + static int should_accept_remote(enum accept_promisor accept, struct promisor_info *advertised, struct string_list *config_info) @@ -544,7 +613,7 @@ static int should_accept_remote(enum accept_promisor accept, const char *remote_url = advertised->url; if (accept == ACCEPT_ALL) - return 1; + return all_fields_match(advertised, config_info, 1); /* Get config info for that promisor remote */ item = string_list_lookup(config_info, remote_name); @@ -556,7 +625,7 @@ static int should_accept_remote(enum accept_promisor accept, p = item->util; if (accept == ACCEPT_KNOWN_NAME) - return 1; + return all_fields_match(advertised, config_info, 0); if (accept != ACCEPT_KNOWN_URL) BUG("Unhandled 'enum accept_promisor' value '%d'", accept); @@ -567,7 +636,7 @@ static int should_accept_remote(enum accept_promisor accept, } if (!strcmp(p->url, remote_url)) - return 1; + return all_fields_match(advertised, config_info, 0); warning(_("known remote named '%s' but with URL '%s' instead of '%s'"), remote_name, p->url, remote_url); @@ -605,6 +674,10 @@ static struct promisor_info *parse_one_advertised_remote(const char *remote_info info->name = url_percent_decode(p); else if (skip_field_name_prefix(elem, promisor_field_url, &p)) info->url = url_percent_decode(p); + else if (skip_field_name_prefix(elem, promisor_field_filter, &p)) + info->filter = url_percent_decode(p); + else if (skip_field_name_prefix(elem, promisor_field_token, &p)) + info->token = url_percent_decode(p); } string_list_clear(&elem_list, 0); @@ -646,11 +719,6 @@ static void filter_promisor_remote(struct repository *repo, if (accept == ACCEPT_NONE) return; - if (accept != ACCEPT_ALL) { - promisor_config_info_list(repo, &config_info, NULL); - string_list_sort(&config_info); - } - /* Parse remote info received */ string_list_split(&remote_info, info, ";", -1); @@ -663,6 +731,11 @@ static void filter_promisor_remote(struct repository *repo, if (!advertised) continue; + if (!config_info.nr) { + promisor_config_info_list(repo, &config_info, fields_checked()); + string_list_sort(&config_info); + } + if (should_accept_remote(accept, advertised, &config_info)) strvec_push(accepted, advertised->name); diff --git a/t/t5710-promisor-remote-capability.sh b/t/t5710-promisor-remote-capability.sh index 204528b2e0c..023735d6a84 100755 --- a/t/t5710-promisor-remote-capability.sh +++ b/t/t5710-promisor-remote-capability.sh @@ -326,6 +326,40 @@ test_expect_success "clone with promisor.sendFields" ' check_missing_objects server 1 "$oid" ' +test_expect_success "clone with promisor.checkFields" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client" && + + git -C server remote add otherLop "https://invalid.invalid" && + git -C server config remote.otherLop.token "fooBar" && + git -C server config remote.otherLop.stuff "baz" && + git -C server config remote.otherLop.partialCloneFilter "blob:limit=10k" && + test_when_finished "git -C server remote remove otherLop" && + test_config -C server promisor.sendFields "partialCloneFilter, token" && + test_when_finished "rm trace" && + + # Clone from server to create a client + GIT_TRACE_PACKET="$(pwd)/trace" GIT_NO_LAZY_FETCH=0 git clone \ + -c remote.lop.promisor=true \ + -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ + -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.partialCloneFilter="blob:none" \ + -c promisor.acceptfromserver=All \ + -c promisor.checkFields=partialcloneFilter \ + --no-local --filter="blob:limit=5k" server client && + + # Check that fields are properly transmitted + ENCODED_URL=$(echo "file://$(pwd)/lop" | sed -e "s/ /%20/g") && + PR1="name=lop,url=$ENCODED_URL,partialCloneFilter=blob:none" && + PR2="name=otherLop,url=https://invalid.invalid,partialCloneFilter=blob:limit=10k,token=fooBar" && + test_grep "clone< promisor-remote=$PR1;$PR2" trace && + test_grep "clone> promisor-remote=lop" trace && + test_grep ! "clone> promisor-remote=lop;otherLop" trace && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" +' + test_expect_success "clone with promisor.advertise set to 'true' but don't delete the client" ' git -C server config promisor.advertise true && -- GitLab From 123d41a7fc037df1595085dd4bd39e366439e208 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 2 Sep 2025 14:55:16 +0200 Subject: [PATCH 7/7] promisor-remote: use string_list_split() in mark_remotes_as_accepted() Previous commits replaced some strbuf_split*() calls with calls to string_list_split*() in "promisor-remote.c". For consistency, let's also replace the strbuf_split_str() call in mark_remotes_as_accepted() with a call to string_list_split(), as we don't need the splitted strings to be managed by a `struct strbuf`. Using the lighter-weight `string_list` API is enough for our needs. While at it let's remove a useless call to `strbuf_strip_suffix()`. Signed-off-by: Christian Couder --- promisor-remote.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index a6cfade2237..77ebf537e2b 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -769,16 +769,15 @@ char *promisor_remote_reply(const char *info) void mark_promisor_remotes_as_accepted(struct repository *r, const char *remotes) { - struct strbuf **accepted_remotes = strbuf_split_str(remotes, ';', 0); + struct string_list accepted_remotes = STRING_LIST_INIT_DUP; + struct string_list_item *item; - for (size_t i = 0; accepted_remotes[i]; i++) { - struct promisor_remote *p; - char *decoded_remote; + string_list_split(&accepted_remotes, remotes, ";", -1); - strbuf_strip_suffix(accepted_remotes[i], ";"); - decoded_remote = url_percent_decode(accepted_remotes[i]->buf); + for_each_string_list_item(item, &accepted_remotes) { + char *decoded_remote = url_percent_decode(item->string); + struct promisor_remote *p = repo_promisor_remote_find(r, decoded_remote); - p = repo_promisor_remote_find(r, decoded_remote); if (p) p->accepted = 1; else @@ -788,5 +787,5 @@ void mark_promisor_remotes_as_accepted(struct repository *r, const char *remotes free(decoded_remote); } - strbuf_list_free(accepted_remotes); + string_list_clear(&accepted_remotes, 0); } -- GitLab