From fb556a76d374311f6b063a7040a1a105e3049671 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 24 Jul 2024 12:14:53 +0200 Subject: [PATCH 1/3] clone: remove double bundle list clear code The bundle list transport->bundles is filled by transport_get_remote_bundle_uri(). Only when the list is not used, it is cleared right away by calling clear_bundle_list(). This looks like we leak memory allocated for the list when transport->bundles *is* used. But in fact, transport->bundles is cleaned up in transport_disconnect() near the end of cmd_clone(). Remove the double clean up of transport->bundles, and depend solely on transport_disconnect() to take care of it. Also add a test case that hits this code, but due to other leaks we cannot mark it as leak-free. Signed-off-by: Toon Claes --- builtin/clone.c | 3 --- t/t5558-clone-bundle-uri.sh | 28 +++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index af6017d41a3..aa507395a01 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1419,9 +1419,6 @@ int cmd_clone(int argc, const char **argv, const char *prefix) else if (fetch_bundle_list(the_repository, transport->bundles)) warning(_("failed to fetch advertised bundles")); - } else { - clear_bundle_list(transport->bundles); - FREE_AND_NULL(transport->bundles); } } diff --git a/t/t5558-clone-bundle-uri.sh b/t/t5558-clone-bundle-uri.sh index cd05321e176..2d6e690fbeb 100755 --- a/t/t5558-clone-bundle-uri.sh +++ b/t/t5558-clone-bundle-uri.sh @@ -1,6 +1,6 @@ #!/bin/sh -test_description='test fetching bundles with --bundle-uri' +test_description='test clone with use of bundle-uri' . ./test-lib.sh . "$TEST_DIRECTORY"/lib-bundle.sh @@ -438,6 +438,32 @@ test_expect_success 'negotiation: bundle list with all wanted commits' ' test_grep ! "clone> want " trace-packet.txt ' +test_expect_success 'bundles advertised by the server' ' + test_when_finished rm -f trace*.txt && + git clone clone-from clone-advertiser && + git -C clone-advertiser config uploadpack.advertiseBundleURIs true && + git -C clone-advertiser config bundle.version 1 && + git -C clone-advertiser config bundle.mode all && + git -C clone-advertiser config bundle.bundle-1.uri "file://$(pwd)/clone-from/bundle-1.bundle" && + git -C clone-advertiser config bundle.bundle-2.uri "file://$(pwd)/clone-from/bundle-2.bundle" && + git -C clone-advertiser config bundle.bundle-3.uri "file://$(pwd)/clone-from/bundle-3.bundle" && + git -C clone-advertiser config bundle.bundle-4.uri "file://$(pwd)/clone-from/bundle-4.bundle" && + + GIT_TRACE_PACKET="$(pwd)/trace-packet.txt" \ + git -c transfer.bundleURI=true clone clone-advertiser clone-advertised && + git -C clone-advertised for-each-ref --format="%(refname)" >refs && + grep "refs/bundles/" refs >actual && + cat >expect <<-\EOF && + refs/bundles/base + refs/bundles/left + refs/bundles/merge + refs/bundles/right + EOF + test_cmp expect actual && + # We already have all needed commits so no "want" needed. + test_grep ! "clone> want " trace-packet.txt +' + ######################################################################### # HTTP tests begin here -- GitLab From 0f7c53bbe138fd3c2735ede90d587749cdf42d64 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 24 Jul 2024 15:17:17 +0200 Subject: [PATCH 2/3] transport: introduce transport_has_remote_bundle_uri() The public function transport_get_remote_bundle_uri() exists to fetch the bundle URI(s) from the remote. This function is only called from builtin/clone.c (not taking test-tool into account). There it ignores the return value, because it doesn't matter whether the server didn't return any bundles or if it failed trying to fetch them, clone can continue without bundle URIs. After calling it, it checks if anything is collected in the bundle list and starts fetching them. Add public function transport_has_remote_bundle_uri() instead. This calls the (now made private) transport_get_remote_bundle_uri() function and returns whether any bundle URI is received. This makes reuse of the code easier and avoids code duplication when we add bundle URI support to git-fetch(1). Signed-off-by: Toon Claes --- builtin/clone.c | 23 +++++++---------------- t/helper/test-bundle-uri.c | 2 +- transport.c | 14 +++++++++++++- transport.h | 7 ++++--- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index aa507395a01..25535c1814d 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1404,22 +1404,13 @@ int cmd_clone(int argc, const char **argv, const char *prefix) bundle_uri); else if (has_heuristic) git_config_set_gently("fetch.bundleuri", bundle_uri); - } else { - /* - * Populate transport->got_remote_bundle_uri and - * transport->bundle_uri. We might get nothing. - */ - transport_get_remote_bundle_uri(transport); - - if (transport->bundles && - hashmap_get_size(&transport->bundles->bundles)) { - /* At this point, we need the_repository to match the cloned repo. */ - if (repo_init(the_repository, git_dir, work_tree)) - warning(_("failed to initialize the repo, skipping bundle URI")); - else if (fetch_bundle_list(the_repository, - transport->bundles)) - warning(_("failed to fetch advertised bundles")); - } + } else if (transport_has_remote_bundle_uri(transport)) { + /* At this point, we need the_repository to match the cloned repo. */ + if (repo_init(the_repository, git_dir, work_tree)) + warning(_("failed to initialize the repo, skipping bundle URI")); + else if (fetch_bundle_list(the_repository, + transport->bundles)) + warning(_("failed to fetch advertised bundles")); } if (refs) diff --git a/t/helper/test-bundle-uri.c b/t/helper/test-bundle-uri.c index 0c5fa723d8d..bd558d5e57a 100644 --- a/t/helper/test-bundle-uri.c +++ b/t/helper/test-bundle-uri.c @@ -90,7 +90,7 @@ static int cmd_ls_remote(int argc, const char **argv) } transport = transport_get(remote, NULL); - if (transport_get_remote_bundle_uri(transport) < 0) { + if (!transport_has_remote_bundle_uri(transport)) { error(_("could not get the bundle-uri list")); status = 1; goto cleanup; diff --git a/transport.c b/transport.c index 12cc5b4d967..1a7d86fa402 100644 --- a/transport.c +++ b/transport.c @@ -1536,7 +1536,7 @@ int transport_fetch_refs(struct transport *transport, struct ref *refs) return rc; } -int transport_get_remote_bundle_uri(struct transport *transport) +static int transport_get_remote_bundle_uri(struct transport *transport) { int value = 0; const struct transport_vtable *vtable = transport->vtable; @@ -1561,6 +1561,18 @@ int transport_get_remote_bundle_uri(struct transport *transport) if (vtable->get_bundle_uri(transport) < 0) return error(_("could not retrieve server-advertised bundle-uri list")); + + return 0; +} + +int transport_has_remote_bundle_uri(struct transport *transport) +{ + transport_get_remote_bundle_uri(transport); + + if (transport->bundles && + hashmap_get_size(&transport->bundles->bundles)) + return 1; + return 0; } diff --git a/transport.h b/transport.h index 6393cd9823c..5ea9641558f 100644 --- a/transport.h +++ b/transport.h @@ -294,10 +294,11 @@ const struct ref *transport_get_remote_refs(struct transport *transport, struct transport_ls_refs_options *transport_options); /** - * Retrieve bundle URI(s) from a remote. Populates "struct - * transport"'s "bundle_uri" and "got_remote_bundle_uri". + * Try fetch bundle URI(s) from a remote and returns 1 if one or more + * bundle URI(s) are received from the server. + * Populates "struct transport"'s "bundles" and "got_remote_bundle_uri". */ -int transport_get_remote_bundle_uri(struct transport *transport); +int transport_has_remote_bundle_uri(struct transport *transport); /* * Fetch the hash algorithm used by a remote. -- GitLab From db8f303dde18b77c82587af1d3e215c1afca1b79 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 24 Jul 2024 15:58:58 +0200 Subject: [PATCH 3/3] fetch: use bundle URIs when having creationToken heuristic At the moment, bundle URIs are only used by git-clone(1). For a clone the use of bundle URI is trivial, because the repository is empty so downloading bundles will never result in downloading objects that are in the repository already. For git-fetch(1), this more complicated to use bundle URI. We want to avoid downloading bundles that only contains objects that are in the local repository already. One way to achieve this is possible when the "creationToken" heuristic is used for bundle URIs. We attempt to download and unbundle the minimum number of bundles by creationToken in decreasing order. If we fail to unbundle (after a successful download) then move to the next non-downloaded bundle and attempt downloading. Once we succeed in applying a bundle, move to the previous unapplied bundle and attempt to unbundle it again. At the end the highest applied creationToken is written to `fetch.bundleCreationToken` in the git-config. The next time bundles are advertised by the server, bundles with a lower creationToken value are ignored. This was already implemented by 7903efb717 (bundle-uri: download in creationToken order, 2023-01-31) in fetch_bundles_by_token(). Using the creationToken heuristic is optional, but without it the client has no idea which bundles are new, how to sort them, and which only have objects the client already has. With this knowledge, make git-fetch(1) use bundle URIs from the server, but only when the creationToken heuristic is used. Signed-off-by: Toon Claes --- builtin/fetch.c | 13 ++++++++++ t/t5584-fetch-bundle-uri.sh | 49 +++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100755 t/t5584-fetch-bundle-uri.sh diff --git a/builtin/fetch.c b/builtin/fetch.c index 693f02b9580..98e811f4382 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1694,6 +1694,19 @@ static int do_fetch(struct transport *transport, retcode = 1; } + if (transport_has_remote_bundle_uri(transport)) { + /* + * Only use bundle-URIs when they use the creationToken + * heuristic, this allows us to ensure not downloading bundles + * we don't need. You can read the comments in + * fetch_bundles_by_token() to understand how this works. + */ + if (transport->bundles->heuristic == BUNDLE_HEURISTIC_CREATIONTOKEN) { + if (fetch_bundle_list(the_repository, transport->bundles)) + warning(_("failed to fetch advertised bundles")); + } + } + if (fetch_and_consume_refs(&display_state, transport, transaction, ref_map, &fetch_head, config)) { retcode = 1; diff --git a/t/t5584-fetch-bundle-uri.sh b/t/t5584-fetch-bundle-uri.sh new file mode 100755 index 00000000000..6c2383646e8 --- /dev/null +++ b/t/t5584-fetch-bundle-uri.sh @@ -0,0 +1,49 @@ +#!/bin/sh + +test_description='test use of bundle URI in "git fetch"' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +. ./test-lib.sh + +test_expect_success 'set up repos and bundles' ' + git init source && + test_commit -C source A && + git clone --no-local source go-A-to-C && + test_commit -C source B && + git clone --no-local source go-B-to-C && + git clone --no-local source go-B-to-D && + git -C source bundle create B.bundle main && + test_commit -C source C && + git -C source bundle create B-to-C.bundle B..main && + git -C source config uploadpack.advertiseBundleURIs true && + git -C source config bundle.version 1 && + git -C source config bundle.mode all && + git -C source config bundle.heuristic creationToken && + git -C source config bundle.bundle-B.uri "file://$(pwd)/source/B.bundle" && + git -C source config bundle.bundle-B.creationToken 1 && + git -C source config bundle.bundle-B-to-C.uri "file://$(pwd)/source/B-to-C.bundle" && + git -C source config bundle.bundle-B-to-C.creationToken 2 +' + +test_expect_success 'fetches one bundle URI to get up-to-date' ' + git -C go-B-to-C -c transfer.bundleURI=true fetch origin && + test 1 = $(ls go-B-to-C/.git/objects/bundles | wc -l) && + test 2 = $(git -C go-B-to-C config fetch.bundleCreationToken) +' + +test_expect_success 'fetches two bundle URIs to get up-to-date' ' + git -C go-A-to-C -c transfer.bundleURI=true fetch origin && + test 2 = $(ls go-A-to-C/.git/objects/bundles | wc -l) && + test 2 = $(git -C go-A-to-C config fetch.bundleCreationToken) +' + +test_expect_success 'fetches one bundle URI and objects from remote' ' + test_commit -C source D && + git -C go-B-to-D -c transfer.bundleURI=true fetch origin && + test 1 = $(ls go-B-to-D/.git/objects/bundles | wc -l) && + test 2 = $(git -C go-B-to-D config fetch.bundleCreationToken) +' + +test_done -- GitLab