From 7aaaa1516ada884d2121db66fb2ba23e18b570e8 Mon Sep 17 00:00:00 2001 From: Eric Ju Date: Tue, 3 Dec 2024 12:47:13 -0500 Subject: [PATCH 1/8] git-compat-util: add strtoul_ul() with error handling We already have strtoul_ui() and similar functions that provide proper error handling using strtoul from the standard library. However, there isn't currently a variant that returns an unsigned long. This commit introduces strtoul_ul() to address this gap, enabling the return of an unsigned long with proper error handling. --- git-compat-util.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/git-compat-util.h b/git-compat-util.h index e123288e8f1..0e9a43351ad 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1353,6 +1353,26 @@ static inline int strtoul_ui(char const *s, int base, unsigned int *result) return 0; } +/* + * Convert a string to an unsigned long using the standard library's strtoul, + * with additional error handling to ensure robustness. + */ +static inline int strtoul_ul(char const *s, int base, unsigned long *result) +{ + unsigned long ul; + char *p; + + errno = 0; + /* negative values would be accepted by strtoul */ + if (strchr(s, '-')) + return -1; + ul = strtoul(s, &p, base); + if (errno || *p || p == s ) + return -1; + *result = ul; + return 0; +} + static inline int strtol_i(char const *s, int base, int *result) { long ul; -- GitLab From 49a715bb605c8435a670d2f5a9b239ee596bd6ad Mon Sep 17 00:00:00 2001 From: Eric Ju Date: Wed, 19 Jun 2024 14:41:33 -0400 Subject: [PATCH 2/8] cat-file: add declaration of variable i inside its for loop Some code used in this series declares variable i and only uses it in a for loop, not in any other logic outside the loop. Change the declaration of i to be inside the for loop for readability. While at it, we also change its type from "int" to "size_t" where the latter makes more sense. Helped-by: Christian Couder Signed-off-by: Eric Ju --- builtin/cat-file.c | 11 +++-------- fetch-pack.c | 3 +-- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index b13561cf73b..69ea642dc66 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -676,12 +676,10 @@ static void dispatch_calls(struct batch_options *opt, struct queued_cmd *cmd, int nr) { - int i; - if (!opt->buffer_output) die(_("flush is only for --buffer mode")); - for (i = 0; i < nr; i++) + for (size_t i = 0; i < nr; i++) cmd[i].fn(opt, cmd[i].line, output, data); fflush(stdout); @@ -689,9 +687,7 @@ static void dispatch_calls(struct batch_options *opt, static void free_cmds(struct queued_cmd *cmd, size_t *nr) { - size_t i; - - for (i = 0; i < *nr; i++) + for (size_t i = 0; i < *nr; i++) FREE_AND_NULL(cmd[i].line); *nr = 0; @@ -717,7 +713,6 @@ static void batch_objects_command(struct batch_options *opt, size_t alloc = 0, nr = 0; while (strbuf_getdelim_strip_crlf(&input, stdin, opt->input_delim) != EOF) { - int i; const struct parse_cmd *cmd = NULL; const char *p = NULL, *cmd_end; struct queued_cmd call = {0}; @@ -727,7 +722,7 @@ static void batch_objects_command(struct batch_options *opt, if (isspace(*input.buf)) die(_("whitespace before command: '%s'"), input.buf); - for (i = 0; i < ARRAY_SIZE(commands); i++) { + for (size_t i = 0; i < ARRAY_SIZE(commands); i++) { if (!skip_prefix(input.buf, commands[i].name, &cmd_end)) continue; diff --git a/fetch-pack.c b/fetch-pack.c index 1ed5e11dd56..71fb2ca0543 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1331,9 +1331,8 @@ static void write_fetch_command_and_capabilities(struct strbuf *req_buf, if (advertise_sid && server_supports_v2("session-id")) packet_buf_write(req_buf, "session-id=%s", trace2_session_id()); if (server_options && server_options->nr) { - int i; ensure_server_supports_v2("server-option"); - for (i = 0; i < server_options->nr; i++) + for (size_t i = 0; i < server_options->nr; i++) packet_buf_write(req_buf, "server-option=%s", server_options->items[i].string); } -- GitLab From 56db14a5e8cba5cc42856114cc28328c03d45175 Mon Sep 17 00:00:00 2001 From: Eric Ju Date: Mon, 2 Dec 2024 12:28:40 -0500 Subject: [PATCH 3/8] t1006: split test utility functions into new "lib-cat-file.sh" This refactor extracts utility functions from the cat-file's test script "t1006-cat-file.sh" into a new "lib-cat-file.sh" dedicated library file. The goal is to improve code reuse and readability, enabling future tests to leverage these utilities without duplicating code. --- t/lib-cat-file.sh | 16 ++++++++++++++++ t/t1006-cat-file.sh | 13 +------------ 2 files changed, 17 insertions(+), 12 deletions(-) create mode 100644 t/lib-cat-file.sh diff --git a/t/lib-cat-file.sh b/t/lib-cat-file.sh new file mode 100644 index 00000000000..44af232d741 --- /dev/null +++ b/t/lib-cat-file.sh @@ -0,0 +1,16 @@ +# Library of git-cat-file related test functions. + +# Print a string without a trailing newline. +echo_without_newline () { + printf '%s' "$*" +} + +# Print a string without newlines and replace them with a NULL character (\0). +echo_without_newline_nul () { + echo_without_newline "$@" | tr '\n' '\0' +} + +# Calculate the length of a string. +strlen () { + echo_without_newline "$1" | wc -c | sed -e 's/^ *//' +} diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 398865d6ebe..1c27c10c6f7 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -3,6 +3,7 @@ test_description='git cat-file' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-cat-file.sh test_cmdmode_usage () { test_expect_code 129 "$@" 2>err && @@ -98,18 +99,6 @@ do ' done -echo_without_newline () { - printf '%s' "$*" -} - -echo_without_newline_nul () { - echo_without_newline "$@" | tr '\n' '\0' -} - -strlen () { - echo_without_newline "$1" | wc -c | sed -e 's/^ *//' -} - run_tests () { type=$1 oid=$2 -- GitLab From edf1d5e2c42665a0f5e51ce7187b9c02d06f02d6 Mon Sep 17 00:00:00 2001 From: Calvin Wan Date: Thu, 28 Jul 2022 23:02:05 +0000 Subject: [PATCH 4/8] fetch-pack: refactor packet writing Refactor write_fetch_command_and_capabilities() to a more general-purpose function, write_command_and_capabilities(), enabling it to serve both fetch and additional commands. In this context, "command" refers to the "operations" supported by Git's wire protocol https://git-scm.com/docs/protocol-v2, such as a Git subcommand (e.g., git-fetch(1)) or a server-side operation like "object-info" as implemented in commit a2ba162c (object-info: support for retrieving object info, 2021-04-20). Furthermore, write_command_and_capabilities() is moved to connect.c, making it accessible to additional commands in the future. To move write_command_and_capabilities() to connect.c, we need to adjust how `advertise_sid` is managed. Previously, in fetch_pack.c, `advertise_sid` was a static variable, modified using git_config_get_bool(). In connect.c, we now initialize `advertise_sid` at the beginning by directly using git_config_get_bool(). This change is safe because: In the original fetch-pack.c code, there are only two places that write `advertise_sid` : 1. In function do_fetch_pack: if (!server_supports("session-id")) advertise_sid = 0; 2. In function fetch_pack_config(): git_config_get_bool("transfer.advertisesid", &advertise_sid); About 1, since do_fetch_pack() is only relevant for protocol v1, this assignment can be ignored in our refactor, as write_command_and_capabilities() is only used in protocol v2. About 2, git_config_get_bool() is from config.h and it is an out-of-box dependency of connect.c, so we can reuse it directly. Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju --- connect.c | 34 ++++++++++++++++++++++++++++++++++ connect.h | 8 ++++++++ fetch-pack.c | 35 ++--------------------------------- 3 files changed, 44 insertions(+), 33 deletions(-) diff --git a/connect.c b/connect.c index 32804353310..ecb4e38ef8e 100644 --- a/connect.c +++ b/connect.c @@ -697,6 +697,40 @@ int server_supports(const char *feature) return !!server_feature_value(feature, NULL); } +void write_command_and_capabilities(struct strbuf *req_buf, const char *command, + const struct string_list *server_options) +{ + const char *hash_name; + int advertise_sid; + + git_config_get_bool("transfer.advertisesid", &advertise_sid); + + ensure_server_supports_v2(command); + packet_buf_write(req_buf, "command=%s", command); + if (server_supports_v2("agent")) + packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized()); + if (advertise_sid && server_supports_v2("session-id")) + packet_buf_write(req_buf, "session-id=%s", trace2_session_id()); + if (server_options && server_options->nr) { + ensure_server_supports_v2("server-option"); + for (size_t i = 0; i < server_options->nr; i++) + packet_buf_write(req_buf, "server-option=%s", + server_options->items[i].string); + } + + if (server_feature_v2("object-format", &hash_name)) { + const int hash_algo = hash_algo_by_name(hash_name); + if (hash_algo_by_ptr(the_hash_algo) != hash_algo) + die(_("mismatched algorithms: client %s; server %s"), + the_hash_algo->name, hash_name); + packet_buf_write(req_buf, "object-format=%s", the_hash_algo->name); + } else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1) { + die(_("the server does not support algorithm '%s'"), + the_hash_algo->name); + } + packet_buf_delim(req_buf); +} + enum protocol { PROTO_LOCAL = 1, PROTO_FILE, diff --git a/connect.h b/connect.h index 1645126c17f..d904c73a856 100644 --- a/connect.h +++ b/connect.h @@ -30,4 +30,12 @@ void check_stateless_delimiter(int stateless_rpc, struct packet_reader *reader, const char *error); +/* + * Writes a command along with the requested + * server capabilities/features into a request buffer. + */ +struct string_list; +void write_command_and_capabilities(struct strbuf *req_buf, const char *command, + const struct string_list *server_options); + #endif diff --git a/fetch-pack.c b/fetch-pack.c index 71fb2ca0543..19b4a092ead 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1319,37 +1319,6 @@ static int add_haves(struct fetch_negotiator *negotiator, return haves_added; } -static void write_fetch_command_and_capabilities(struct strbuf *req_buf, - const struct string_list *server_options) -{ - const char *hash_name; - - ensure_server_supports_v2("fetch"); - packet_buf_write(req_buf, "command=fetch"); - if (server_supports_v2("agent")) - packet_buf_write(req_buf, "agent=%s", git_user_agent_sanitized()); - if (advertise_sid && server_supports_v2("session-id")) - packet_buf_write(req_buf, "session-id=%s", trace2_session_id()); - if (server_options && server_options->nr) { - ensure_server_supports_v2("server-option"); - for (size_t i = 0; i < server_options->nr; i++) - packet_buf_write(req_buf, "server-option=%s", - server_options->items[i].string); - } - - if (server_feature_v2("object-format", &hash_name)) { - int hash_algo = hash_algo_by_name(hash_name); - if (hash_algo_by_ptr(the_hash_algo) != hash_algo) - die(_("mismatched algorithms: client %s; server %s"), - the_hash_algo->name, hash_name); - packet_buf_write(req_buf, "object-format=%s", the_hash_algo->name); - } else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1) { - die(_("the server does not support algorithm '%s'"), - the_hash_algo->name); - } - packet_buf_delim(req_buf); -} - static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, struct fetch_pack_args *args, const struct ref *wants, struct oidset *common, @@ -1360,7 +1329,7 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, int done_sent = 0; struct strbuf req_buf = STRBUF_INIT; - write_fetch_command_and_capabilities(&req_buf, args->server_options); + write_command_and_capabilities(&req_buf, "fetch", args->server_options); if (args->use_thin_pack) packet_buf_write(&req_buf, "thin-pack"); @@ -2188,7 +2157,7 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips, the_repository, "%d", negotiation_round); strbuf_reset(&req_buf); - write_fetch_command_and_capabilities(&req_buf, server_options); + write_command_and_capabilities(&req_buf, "fetch", server_options); packet_buf_write(&req_buf, "wait-for-done"); -- GitLab From 1167966b5358ac0e534eb3d53cc4ca5e8cba0096 Mon Sep 17 00:00:00 2001 From: Calvin Wan Date: Thu, 28 Jul 2022 23:02:06 +0000 Subject: [PATCH 5/8] fetch-pack: move fetch initialization There are some variables initialized at the start of the do_fetch_pack_v2() state machine. Currently, they are initialized in FETCH_CHECK_LOCAL, which is the initial state set at the beginning of the function. However, a subsequent patch will allow for another initial state, while still requiring these initialized variables. Move the initialization to be before the state machine, so that they are set regardless of the initial state. Note that there is no change in behavior, because we're moving code from the beginning of the first state to just before the execution of the state machine. Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju --- fetch-pack.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fetch-pack.c b/fetch-pack.c index 19b4a092ead..35dccea073d 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1650,18 +1650,18 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, reader.me = "fetch-pack"; } + /* v2 supports these by default */ + allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1; + use_sideband = 2; + if (args->depth > 0 || args->deepen_since || args->deepen_not) + args->deepen = 1; + while (state != FETCH_DONE) { switch (state) { case FETCH_CHECK_LOCAL: sort_ref_list(&ref, ref_compare_name); QSORT(sought, nr_sought, cmp_ref_by_name); - /* v2 supports these by default */ - allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1; - use_sideband = 2; - if (args->depth > 0 || args->deepen_since || args->deepen_not) - args->deepen = 1; - /* Filter 'ref' by 'sought' and those that aren't local */ mark_complete_and_common_ref(negotiator, args, &ref); filter_refs(args, &ref, sought, nr_sought); -- GitLab From df3ff7deb99062c2f3dd3d41eeefc9f0874cb8f9 Mon Sep 17 00:00:00 2001 From: Calvin Wan Date: Thu, 28 Jul 2022 23:02:08 +0000 Subject: [PATCH 6/8] serve: advertise object-info feature In order for a client to know what object-info components a server can provide, advertise supported object-info features. This will allow a client to decide whether to query the server for object-info or fetch as a fallback. Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju --- serve.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/serve.c b/serve.c index e3ccf1505ca..f39c43fc0f1 100644 --- a/serve.c +++ b/serve.c @@ -89,7 +89,7 @@ static void session_id_receive(struct repository *r UNUSED, trace2_data_string("transfer", NULL, "client-sid", client_sid); } -static int object_info_advertise(struct repository *r, struct strbuf *value UNUSED) +static int object_info_advertise(struct repository *r, struct strbuf *value) { if (advertise_object_info == -1 && repo_config_get_bool(r, "transfer.advertiseobjectinfo", @@ -97,6 +97,8 @@ static int object_info_advertise(struct repository *r, struct strbuf *value UNUS /* disabled by default */ advertise_object_info = 0; } + if (value && advertise_object_info) + strbuf_addstr(value, "size"); return advertise_object_info; } -- GitLab From 211f18cbb6e3b334ab53f2f7b6e5c8d55da594ed Mon Sep 17 00:00:00 2001 From: Calvin Wan Date: Thu, 28 Jul 2022 23:02:09 +0000 Subject: [PATCH 7/8] transport: add client support for object-info Sometimes, it is beneficial to retrieve information about an object without downloading it entirely. The server-side logic for this functionality was implemented in commit "a2ba162cda (object-info: support for retrieving object info, 2021-04-20)." And the wire format is documented at https://git-scm.com/docs/protocol-v2#_object_info. This commit introduces client functions to interact with the server. Currently, the client supports requesting a list of object IDs with the 'size' feature from a v2 server. If the server does not advertise this feature (i.e., transfer.advertiseobjectinfo is set to false), the client will return an error and exit. Notice that the entire request is written into req_buf before being sent to the remote. This approach follows the pattern used in the `send_fetch_request()` logic within fetch-pack.c. Streaming the request is not addressed in this patch. Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju --- Makefile | 1 + fetch-object-info.c | 85 +++++++++++++++++++++++++++++++++++++++++++++ fetch-object-info.h | 22 ++++++++++++ fetch-pack.c | 3 ++ fetch-pack.h | 2 ++ transport-helper.c | 11 ++++-- transport.c | 28 ++++++++++++++- transport.h | 11 ++++++ 8 files changed, 160 insertions(+), 3 deletions(-) create mode 100644 fetch-object-info.c create mode 100644 fetch-object-info.h diff --git a/Makefile b/Makefile index 7315507381e..d31a4d0f20b 100644 --- a/Makefile +++ b/Makefile @@ -1030,6 +1030,7 @@ LIB_OBJS += ewah/ewah_rlw.o LIB_OBJS += exec-cmd.o LIB_OBJS += fetch-negotiator.o LIB_OBJS += fetch-pack.o +LIB_OBJS += fetch-object-info.o LIB_OBJS += fmt-merge-msg.o LIB_OBJS += fsck.o LIB_OBJS += fsmonitor.o diff --git a/fetch-object-info.c b/fetch-object-info.c new file mode 100644 index 00000000000..b279e06dc8c --- /dev/null +++ b/fetch-object-info.c @@ -0,0 +1,85 @@ +#include "git-compat-util.h" +#include "gettext.h" +#include "hex.h" +#include "pkt-line.h" +#include "connect.h" +#include "oid-array.h" +#include "object-store-ll.h" +#include "fetch-object-info.h" +#include "string-list.h" + +/* Sends git-cat-file object-info command and its arguments into the request buffer. */ +static void send_object_info_request(const int fd_out, struct object_info_args *args) +{ + struct strbuf req_buf = STRBUF_INIT; + + write_command_and_capabilities(&req_buf, "object-info", args->server_options); + + if (unsorted_string_list_has_string(args->object_info_options, "size")) + packet_buf_write(&req_buf, "size"); + + if (args->oids) + for (size_t i = 0; i < args->oids->nr; i++) + packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i])); + + packet_buf_flush(&req_buf); + if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0) + die_errno(_("unable to write request to remote")); + + strbuf_release(&req_buf); +} + +int fetch_object_info(const enum protocol_version version, struct object_info_args *args, + struct packet_reader *reader, struct object_info *object_info_data, + const int stateless_rpc, const int fd_out) +{ + int size_index = -1; + + switch (version) { + case protocol_v2: + if (!server_supports_v2("object-info")) + die(_("object-info capability is not enabled on the server")); + send_object_info_request(fd_out, args); + break; + case protocol_v1: + case protocol_v0: + die(_("unsupported protocol version. expected v2")); + case protocol_unknown_version: + BUG("unknown protocol version"); + } + + for (size_t i = 0; i < args->object_info_options->nr; i++) { + if (packet_reader_read(reader) != PACKET_READ_NORMAL) { + check_stateless_delimiter(stateless_rpc, reader, "stateless delimiter expected"); + return -1; + } + if (!string_list_has_string(args->object_info_options, reader->line)) + return -1; + if (!strcmp(reader->line, "size")) { + size_index = i; + for (size_t j = 0; j < args->oids->nr; j++) + object_info_data[j].sizep = xcalloc(1, sizeof(*object_info_data[j].sizep)); + } + } + + for (size_t i = 0; packet_reader_read(reader) == PACKET_READ_NORMAL && i < args->oids->nr; i++){ + struct string_list object_info_values = STRING_LIST_INIT_DUP; + + string_list_split(&object_info_values, reader->line, ' ', -1); + if (0 <= size_index) { + if (!strcmp(object_info_values.items[1 + size_index].string, "")) + die("object-info: not our ref %s", + object_info_values.items[0].string); + + if (strtoul_ul(object_info_values.items[1 + size_index].string, 10, object_info_data[i].sizep)) + die("object-info: ref %s has invalid size %s", + object_info_values.items[0].string, + object_info_values.items[1 + size_index].string); + } + + string_list_clear(&object_info_values, 0); + } + check_stateless_delimiter(stateless_rpc, reader, "stateless delimiter expected"); + + return 0; +} diff --git a/fetch-object-info.h b/fetch-object-info.h new file mode 100644 index 00000000000..6184d04d723 --- /dev/null +++ b/fetch-object-info.h @@ -0,0 +1,22 @@ +#ifndef FETCH_OBJECT_INFO_H +#define FETCH_OBJECT_INFO_H + +#include "pkt-line.h" +#include "protocol.h" +#include "object-store-ll.h" + +struct object_info_args { + struct string_list *object_info_options; + const struct string_list *server_options; + struct oid_array *oids; +}; + +/* + * Sends git-cat-file object-info command into the request buf and read the + * results from packets. + */ +int fetch_object_info(enum protocol_version version, struct object_info_args *args, + struct packet_reader *reader, struct object_info *object_info_data, + int stateless_rpc, int fd_out); + +#endif /* FETCH_OBJECT_INFO_H */ diff --git a/fetch-pack.c b/fetch-pack.c index 35dccea073d..92e8a7291ce 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1656,6 +1656,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, if (args->depth > 0 || args->deepen_since || args->deepen_not) args->deepen = 1; + if (args->object_info) + state = FETCH_SEND_REQUEST; + while (state != FETCH_DONE) { switch (state) { case FETCH_CHECK_LOCAL: diff --git a/fetch-pack.h b/fetch-pack.h index 9d3470366f8..119d3369f10 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -16,6 +16,7 @@ struct fetch_pack_args { const struct string_list *deepen_not; struct list_objects_filter_options filter_options; const struct string_list *server_options; + struct object_info *object_info_data; /* * If not NULL, during packfile negotiation, fetch-pack will send "have" @@ -42,6 +43,7 @@ struct fetch_pack_args { unsigned reject_shallow_remote:1; unsigned deepen:1; unsigned refetch:1; + unsigned object_info:1; /* * Indicate that the remote of this request is a promisor remote. The diff --git a/transport-helper.c b/transport-helper.c index d457b425501..9da1547b2c4 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -710,8 +710,8 @@ static int fetch_refs(struct transport *transport, /* * If we reach here, then the server, the client, and/or the transport - * helper does not support protocol v2. --negotiate-only requires - * protocol v2. + * helper does not support protocol v2. --negotiate-only and cat-file + * remote-object-info require protocol v2. */ if (data->transport_options.acked_commits) { warning(_("--negotiate-only requires protocol v2")); @@ -727,6 +727,13 @@ static int fetch_refs(struct transport *transport, free_refs(dummy); } + /* fail the command explicitly to avoid further commands input. */ + if (transport->smart_options->object_info) + die(_("remote-object-info requires protocol v2")); + + if (!data->get_refs_list_called) + get_refs_list_using_list(transport, 0); + count = 0; for (i = 0; i < nr_heads; i++) if (!(to_fetch[i]->status & REF_STATUS_UPTODATE)) diff --git a/transport.c b/transport.c index 6c2801bcbd9..95be3771a6c 100644 --- a/transport.c +++ b/transport.c @@ -9,6 +9,7 @@ #include "hook.h" #include "pkt-line.h" #include "fetch-pack.h" +#include "fetch-object-info.h" #include "remote.h" #include "connect.h" #include "send-pack.h" @@ -465,8 +466,33 @@ static int fetch_refs_via_pack(struct transport *transport, args.server_options = transport->server_options; args.negotiation_tips = data->options.negotiation_tips; args.reject_shallow_remote = transport->smart_options->reject_shallow; + args.object_info = transport->smart_options->object_info; + + if (transport->smart_options->object_info + && transport->smart_options->object_info_oids->nr > 0) { + struct packet_reader reader; + struct object_info_args obj_info_args = { 0 }; + + obj_info_args.server_options = transport->server_options; + obj_info_args.oids = transport->smart_options->object_info_oids; + obj_info_args.object_info_options = transport->smart_options->object_info_options; + string_list_sort(obj_info_args.object_info_options); + + connect_setup(transport, 0); + packet_reader_init(&reader, data->fd[0], NULL, 0, + PACKET_READ_CHOMP_NEWLINE | + PACKET_READ_GENTLE_ON_EOF | + PACKET_READ_DIE_ON_ERR_PACKET); + + data->version = discover_version(&reader); + transport->hash_algo = reader.hash_algo; + + ret = fetch_object_info(data->version, &obj_info_args, &reader, + data->options.object_info_data, transport->stateless_rpc, + data->fd[1]); + goto cleanup; - if (!data->finished_handshake) { + } else if (!data->finished_handshake) { int i; int must_list_refs = 0; for (i = 0; i < nr_heads; i++) { diff --git a/transport.h b/transport.h index 892f19454a7..5a4f27451ae 100644 --- a/transport.h +++ b/transport.h @@ -5,6 +5,7 @@ #include "remote.h" #include "list-objects-filter-options.h" #include "string-list.h" +#include "object-store.h" struct git_transport_options { unsigned thin : 1; @@ -30,6 +31,12 @@ struct git_transport_options { */ unsigned connectivity_checked:1; + /* + * Transport will attempt to retrieve only object-info. + * If object-info is not supported, the operation will error and exit. + */ + unsigned object_info : 1; + int depth; const char *deepen_since; const struct string_list *deepen_not; @@ -53,6 +60,10 @@ struct git_transport_options { * common commits to this oidset instead of fetching any packfiles. */ struct oidset *acked_commits; + + struct oid_array *object_info_oids; + struct object_info *object_info_data; + struct string_list *object_info_options; }; enum transport_family { -- GitLab From 482d32d3f3d16ea2bbfa4d8844db043a34ce8c43 Mon Sep 17 00:00:00 2001 From: Eric Ju Date: Mon, 28 Oct 2024 14:35:54 -0400 Subject: [PATCH 8/8] cat-file: add remote-object-info to batch-command Since the `info` command in `cat-file --batch-command` prints object info for a given object, it is natural to add another command in `cat-file --batch-command` to print object info for a given object from a remote. Add `remote-object-info` to `cat-file --batch-command`. While `info` takes object ids one at a time, this creates overhead when making requests to a server. So `remote-object-info` instead can take multiple object ids at once. The `cat-file --batch-command` command is generally implemented in the following manner: - Receive and parse input from user - Call respective function attached to command - Get object info, print object info In --buffer mode, this changes to: - Receive and parse input from user - Store respective function attached to command in a queue - After flush, loop through commands in queue - Call respective function attached to command - Get object info, print object info Notice how the getting and printing of object info is accomplished one at a time. As described above, this creates a problem for making requests to a server. Therefore, `remote-object-info` is implemented in the following manner: - Receive and parse input from user If command is `remote-object-info`: - Get object info from remote - Loop through and print each object info Else: - Call respective function attached to command - Parse input, get object info, print object info And finally for --buffer mode `remote-object-info`: - Receive and parse input from user - Store respective function attached to command in a queue - After flush, loop through commands in queue: If command is `remote-object-info`: - Get object info from remote - Loop through and print each object info Else: - Call respective function attached to command - Get object info, print object info To summarize, `remote-object-info` gets object info from the remote and then loops through the object info passed in, printing the info. In order for `remote-object-info` to avoid remote communication overhead in the non-buffer mode, the objects are passed in as such: remote-object-info ... rather than remote-object-info remote-object-info ... remote-object-info Helped-by: Jonathan Tan Helped-by: Christian Couder Signed-off-by: Calvin Wan Signed-off-by: Eric Ju --- Documentation/git-cat-file.adoc | 25 +- builtin/cat-file.c | 140 ++++ object-file.c | 11 + object-store-ll.h | 3 + t/t1017-cat-file-remote-object-info.sh | 877 +++++++++++++++++++++++++ 5 files changed, 1052 insertions(+), 4 deletions(-) create mode 100755 t/t1017-cat-file-remote-object-info.sh diff --git a/Documentation/git-cat-file.adoc b/Documentation/git-cat-file.adoc index d5890ae3686..128b911ab0a 100644 --- a/Documentation/git-cat-file.adoc +++ b/Documentation/git-cat-file.adoc @@ -149,6 +149,14 @@ info :: Print object info for object reference ``. This corresponds to the output of `--batch-check`. +remote-object-info ...:: + Print object info for object references `` at specified + `` without downloading objects from the remote. + The `` is required to be URL-encoded string. + Raise an error when the `object-info` capability is not supported by the remote. + Raise an error when no object references are provided. + This command may be combined with `--buffer`. + flush:: Used with `--buffer` to execute all preceding commands that were issued since the beginning or since the last flush was issued. When `--buffer` @@ -290,7 +298,8 @@ newline. The available atoms are: The full hex representation of the object name. `objecttype`:: - The type of the object (the same as `cat-file -t` reports). + The type of the object (the same as `cat-file -t` reports). See + `CAVEATS` below. Not supported by `remote-object-info`. `objectsize`:: The size, in bytes, of the object (the same as `cat-file -s` @@ -298,13 +307,14 @@ newline. The available atoms are: `objectsize:disk`:: The size, in bytes, that the object takes up on disk. See the - note about on-disk sizes in the `CAVEATS` section below. + note about on-disk sizes in the `CAVEATS` section below. Not + supported by `remote-object-info`. `deltabase`:: If the object is stored as a delta on-disk, this expands to the full hex representation of the delta base object name. Otherwise, expands to the null OID (all zeroes). See `CAVEATS` - below. + below. Not supported by `remote-object-info`. `rest`:: If this atom is used in the output string, input lines are split @@ -314,7 +324,10 @@ newline. The available atoms are: line) are output in place of the `%(rest)` atom. If no format is specified, the default format is `%(objectname) -%(objecttype) %(objectsize)`. +%(objecttype) %(objectsize)`, except for `remote-object-info` commands which use +`%(objectname) %(objectsize)` for now because "%(objecttype)" is not supported yet. +WARNING: When "%(objecttype)" is supported, the default format WILL be unified, so +DO NOT RELY on the current default format to stay the same!!! If `--batch` is specified, or if `--batch-command` is used with the `contents` command, the object information is followed by the object contents (consisting @@ -396,6 +409,10 @@ scripting purposes. CAVEATS ------- +Note that since %(objecttype), %(objectsize:disk) and %(deltabase) are +currently not supported by the `remote-object-info` command, we will raise +an error and exit when they appear in the format string. + Note that the sizes of objects on disk are reported accurately, but care should be taken in drawing conclusions about which refs or objects are responsible for disk usage. The size of a packed non-delta object may be diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 69ea642dc66..564c7550485 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -27,6 +27,9 @@ #include "promisor-remote.h" #include "mailmap.h" #include "write-or-die.h" +#include "alias.h" +#include "remote.h" +#include "transport.h" enum batch_mode { BATCH_MODE_CONTENTS, @@ -48,6 +51,8 @@ struct batch_options { }; static const char *force_path; +static struct object_info *remote_object_info; +static struct oid_array object_info_oids = OID_ARRAY_INIT; static struct string_list mailmap = STRING_LIST_INIT_NODUP; static int use_mailmap; @@ -579,6 +584,104 @@ static void batch_one_object(const char *obj_name, object_context_release(&ctx); } +/* + * Check whether the format contains unsupported placeholders + * for the remote-object-info command. + */ +static char *check_placeholder_supportability(const char *format) { + /* + * '%(objectname)' and '%(objectsize)' are the only placeholders + * currently supported in remote-object-info. + */ + const char *unsupported_placeholders[] = { + "%(objecttype)", "%(objectsize:disk)", + "%(deltabase)", "%(rest)" + }; + struct strbuf buf = STRBUF_INIT; + bool first = true; + for (size_t i = 0; i < ARRAY_SIZE(unsupported_placeholders); i++) { + const char *found = strstr(format, unsupported_placeholders[i]); + if (found) { + if (!first) + strbuf_addstr(&buf, " "); + strbuf_addstr(&buf, unsupported_placeholders[i]); + first=false; + } + } + return strbuf_detach(&buf, NULL); +} + +static int get_remote_info(struct batch_options *opt, const char *url, const char *oid_list) +{ + int retval = 0; + struct remote *remote = NULL; + struct object_id oid; + struct string_list object_info_options = STRING_LIST_INIT_NODUP; + char *unsupported_placeholders = NULL; + static struct transport *gtransport; + + /* + * Change the format to "%(objectname) %(objectsize)" when + * remote-object-info command is used. Once we start supporting objecttype + * the default format should change to DEFAULT_FORMAT. + */ + if (!opt->format) + opt->format = "%(objectname) %(objectsize)"; + + + remote = remote_get(url); + if (!remote) + die(_("must supply valid remote when using remote-object-info")); + + oid_array_clear(&object_info_oids); + while (*oid_list) { + if (*oid_list == ' ') { + oid_list++; + continue; + } + if (parse_oid_hex(oid_list, &oid, &oid_list)) + die(_("Not a valid object name %s"), oid_list); + oid_array_append(&object_info_oids, &oid); + } + + if (!object_info_oids.nr) + die(_("remote-object-info requires objects")); + + gtransport = transport_get(remote, NULL); + if (!gtransport->smart_options) { + retval = -1; + } else { + CALLOC_ARRAY(remote_object_info, object_info_oids.nr); + gtransport->smart_options->object_info = 1; + gtransport->smart_options->object_info_oids = &object_info_oids; + + unsupported_placeholders = check_placeholder_supportability(opt->format); + if (strlen(unsupported_placeholders) > 0) + die(_("%s not supported with remote-object-info"), unsupported_placeholders); + + /* + * When %(objectname) is the only placeholder in the format, + * the `remote-object-info` command will still query the object size + * from the server but only display the object name. This is because + * the server-side object-info command does not support querying just + * the object name alone. + */ + if (strstr(opt->format, "%(objectsize)") || strstr(opt->format, "%(objectname)")) + string_list_append(&object_info_options, "size"); + + if (object_info_options.nr > 0) { + gtransport->smart_options->object_info_options = &object_info_options; + gtransport->smart_options->object_info_data = remote_object_info; + retval = transport_fetch_refs(gtransport, NULL); + } + } + + string_list_clear(&object_info_options, 0); + free(unsupported_placeholders); + transport_disconnect(gtransport); + return retval; +} + struct object_cb_data { struct batch_options *opt; struct expand_data *expand; @@ -670,6 +773,42 @@ static void parse_cmd_info(struct batch_options *opt, batch_one_object(line, output, opt, data); } +static void parse_cmd_remote_object_info(struct batch_options *opt, + const char *line, struct strbuf *output, + struct expand_data *data) +{ + char *url; + const char *space; + space = strchr(line, ' '); + if (!space) + die(_("remote-object-info requires objects")); + url = xmemdupz(line, space - line); + + if (get_remote_info(opt, url, space + 1)) + goto cleanup; + + data->skip_object_info = 1; + for (size_t i = 0; i < object_info_oids.nr; i++) { + data->oid = object_info_oids.oid[i]; + if (remote_object_info[i].sizep) { + /* + * When reaching here, it means remote-object-info can retrieve + * information from server without downloading them. + */ + data->size = *remote_object_info[i].sizep; + opt->batch_mode = BATCH_MODE_INFO; + batch_object_write(oid_to_hex(&data->oid), output, opt, data, NULL, 0); + } + } + data->skip_object_info = 0; + +cleanup: + for (size_t i = 0; i < object_info_oids.nr; i++) + free_object_info_contents(&remote_object_info[i]); + free(url); + free(remote_object_info); +} + static void dispatch_calls(struct batch_options *opt, struct strbuf *output, struct expand_data *data, @@ -701,6 +840,7 @@ static const struct parse_cmd { } commands[] = { { "contents", parse_cmd_contents, 1}, { "info", parse_cmd_info, 1}, + { "remote-object-info", parse_cmd_remote_object_info, 1}, { "flush", NULL, 0}, }; diff --git a/object-file.c b/object-file.c index 726e41a0475..020f038659a 100644 --- a/object-file.c +++ b/object-file.c @@ -3161,3 +3161,14 @@ int read_loose_object(const char *path, munmap(map, mapsize); return ret; } + +void free_object_info_contents(struct object_info *object_info) +{ + if (!object_info) + return; + free(object_info->typep); + free(object_info->sizep); + free(object_info->disk_sizep); + free(object_info->delta_base_oid); + free(object_info->type_name); +} diff --git a/object-store-ll.h b/object-store-ll.h index cd3bd5bd99f..20208e1d4fb 100644 --- a/object-store-ll.h +++ b/object-store-ll.h @@ -553,4 +553,7 @@ int for_each_object_in_pack(struct packed_git *p, int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, void *data, enum for_each_object_flags flags); +/* Free pointers inside of object_info, but not object_info itself */ +void free_object_info_contents(struct object_info *object_info); + #endif /* OBJECT_STORE_LL_H */ diff --git a/t/t1017-cat-file-remote-object-info.sh b/t/t1017-cat-file-remote-object-info.sh new file mode 100755 index 00000000000..c30ab91d3a5 --- /dev/null +++ b/t/t1017-cat-file-remote-object-info.sh @@ -0,0 +1,877 @@ +#!/bin/sh + +test_description='git cat-file --batch-command with remote-object-info command' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-cat-file.sh + +hello_content="Hello World" +hello_size=$(strlen "$hello_content") +hello_oid=$(echo_without_newline "$hello_content" | git hash-object --stdin) + +# This is how we get 13: +# 13 = + + + , where +# file mode is 100644, which is 6 characters; +# file name is hello, which is 5 characters +# a space is 1 character and a null is 1 character +tree_size=$(($(test_oid rawsz) + 13)) + +commit_message="Initial commit" + +# This is how we get 137: +# 137 = + + + +# + + +# + + +# + +# +# An easier way to calculate is: 1. use `git cat-file commit | wc -c`, +# to get 177, 2. then deduct 40 hex characters to get 137 +commit_size=$(($(test_oid hexsz) + 137)) + +tag_header_without_oid="type blob +tag hellotag +tagger $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL>" +tag_header_without_timestamp="object $hello_oid +$tag_header_without_oid" +tag_description="This is a tag" +tag_content="$tag_header_without_timestamp 0 +0000 + +$tag_description" + +tag_oid=$(echo_without_newline "$tag_content" | git hash-object -t tag --stdin -w) +tag_size=$(strlen "$tag_content") + +set_transport_variables () { + hello_oid=$(echo_without_newline "$hello_content" | git hash-object --stdin) + tree_oid=$(git -C "$1" write-tree) + commit_oid=$(echo_without_newline "$commit_message" | git -C "$1" commit-tree $tree_oid) + tag_oid=$(echo_without_newline "$tag_content" | git -C "$1" hash-object -t tag --stdin -w) + tag_size=$(strlen "$tag_content") +} + +# This section tests --batch-command with remote-object-info command +# Since "%(objecttype)" is currently not supported by the command remote-object-info , +# the format is set to "%(objectname) %(objectsize)" in some test cases. + +# Test --batch-command remote-object-info with 'git://' transport with +# transfer.advertiseobjectinfo set to true, i.e. server has object-info capability +. "$TEST_DIRECTORY"/lib-git-daemon.sh +start_git_daemon --export-all --enable=receive-pack +daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent + +test_expect_success 'create repo to be served by git-daemon' ' + git init "$daemon_parent" && + echo_without_newline "$hello_content" > $daemon_parent/hello && + git -C "$daemon_parent" update-index --add hello && + git -C "$daemon_parent" config transfer.advertiseobjectinfo true && + git clone "$GIT_DAEMON_URL/parent" -n "$daemon_parent/daemon_client_empty" +' + +test_expect_success 'batch-command remote-object-info git://' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + test_write_lines $GIT_DAEMON_URL/parent $hello_oid && + + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info $GIT_DAEMON_URL/parent $hello_oid + remote-object-info $GIT_DAEMON_URL/parent $tree_oid + remote-object-info $GIT_DAEMON_URL/parent $commit_oid + remote-object-info $GIT_DAEMON_URL/parent $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info git:// multiple sha1 per line' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info $GIT_DAEMON_URL/parent $hello_oid $tree_oid $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info git:// default format' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + GIT_TRACE_PACKET=1 git cat-file --batch-command >actual <<-EOF && + remote-object-info $GIT_DAEMON_URL/parent $hello_oid $tree_oid + remote-object-info $GIT_DAEMON_URL/parent $commit_oid $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info git:// just %(objectname)' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + echo "$hello_oid" >expect && + echo "$tree_oid" >>expect && + echo "$commit_oid" >>expect && + echo "$tag_oid" >>expect && + GIT_TRACE_PACKET=1 git cat-file --batch-command="%(objectname)" >actual <<-EOF && + remote-object-info $GIT_DAEMON_URL/parent $hello_oid $tree_oid + remote-object-info $GIT_DAEMON_URL/parent $commit_oid $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info git:// just %(objectsize)' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + echo "$hello_size" >expect && + echo "$tree_size" >>expect && + echo "$commit_size" >>expect && + echo "$tag_size" >>expect && + GIT_TRACE_PACKET=1 git cat-file --batch-command="%(objectsize)" >actual <<-EOF && + remote-object-info $GIT_DAEMON_URL/parent $hello_oid $tree_oid + remote-object-info $GIT_DAEMON_URL/parent $commit_oid $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info git:// fails on unsupported placeholders' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + test_must_fail git cat-file --batch-command="%(deltabase) %(objecttype)" 2>err <<-EOF && + remote-object-info $GIT_DAEMON_URL/parent $hello_oid + EOF + test_grep "%(objecttype) %(deltabase) not supported with remote-object-info" err + ) +' + +test_expect_success 'batch-command --buffer remote-object-info git://' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF && + remote-object-info $GIT_DAEMON_URL/parent $hello_oid $tree_oid + remote-object-info $GIT_DAEMON_URL/parent $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + flush + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command -Z remote-object-info git:// default format' ' + ( + set_transport_variables "$daemon_parent" && + cd "$daemon_parent/daemon_client_empty" && + + printf "%s\0" "$hello_oid $hello_size" >expect && + printf "%s\0" "$tree_oid $tree_size" >>expect && + printf "%s\0" "$commit_oid $commit_size" >>expect && + printf "%s\0" "$tag_oid $tag_size" >>expect && + + printf "%s\0" "$hello_oid missing" >>expect && + printf "%s\0" "$tree_oid missing" >>expect && + printf "%s\0" "$commit_oid missing" >>expect && + printf "%s\0" "$tag_oid missing" >>expect && + + batch_input="remote-object-info $GIT_DAEMON_URL/parent $hello_oid $tree_oid +remote-object-info $GIT_DAEMON_URL/parent $commit_oid $tag_oid +info $hello_oid +info $tree_oid +info $commit_oid +info $tag_oid +" && + echo_without_newline_nul "$batch_input" >commands_null_delimited && + + git cat-file --batch-command -Z < commands_null_delimited >actual && + test_cmp expect actual + ) +' + +# Test --batch-command remote-object-info with 'git://' and +# transfer.advertiseobjectinfo set to false, i.e. server does not have object-info capability +test_expect_success 'batch-command remote-object-info git:// fails when transfer.advertiseobjectinfo=false' ' + ( + git -C "$daemon_parent" config transfer.advertiseobjectinfo false && + set_transport_variables "$daemon_parent" && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info $GIT_DAEMON_URL/parent $hello_oid $tree_oid $commit_oid $tag_oid + EOF + test_grep "object-info capability is not enabled on the server" err && + + # revert server state back + git -C "$daemon_parent" config transfer.advertiseobjectinfo true + + ) +' + +stop_git_daemon + +# Test --batch-command remote-object-info with 'file://' transport with +# transfer.advertiseobjectinfo set to true, i.e. server has object-info capability +# shellcheck disable=SC2016 +test_expect_success 'create repo to be served by file:// transport' ' + git init server && + git -C server config protocol.version 2 && + git -C server config transfer.advertiseobjectinfo true && + echo_without_newline "$hello_content" > server/hello && + git -C server update-index --add hello && + git clone -n "file://$(pwd)/server" file_client_empty +' + +test_expect_success 'batch-command remote-object-info file://' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + file_server_url=$(echo "file://${server_path}" | test_uri_escape) && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info $file_server_url $hello_oid + remote-object-info $file_server_url $tree_oid + remote-object-info $file_server_url $commit_oid + remote-object-info $file_server_url $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info file:// multiple sha1 per line' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + file_server_url=$(echo "file://${server_path}" | test_uri_escape) && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info $file_server_url $hello_oid $tree_oid $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command --buffer remote-object-info file://' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + file_server_url=$(echo "file://${server_path}" | test_uri_escape) && + + git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF && + remote-object-info $file_server_url $hello_oid $tree_oid + remote-object-info $file_server_url $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + flush + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info file:// default format' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + file_server_url=$(echo "file://${server_path}" | test_uri_escape) && + + git cat-file --batch-command >actual <<-EOF && + remote-object-info $file_server_url $hello_oid $tree_oid + remote-object-info $file_server_url $commit_oid $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info file:// just "%(objectname)"' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + echo "$hello_oid" >expect && + echo "$tree_oid" >>expect && + echo "$commit_oid" >>expect && + echo "$tag_oid" >>expect && + + file_server_url=$(echo "file://${server_path}" | test_uri_escape) && + + git cat-file --batch-command="%(objectname)" >actual <<-EOF && + remote-object-info $file_server_url $hello_oid $tree_oid + remote-object-info $file_server_url $commit_oid $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info file:// just "%(objectsize)"' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + echo "$hello_size" >expect && + echo "$tree_size" >>expect && + echo "$commit_size" >>expect && + echo "$tag_size" >>expect && + + file_server_url=$(echo "file://${server_path}" | test_uri_escape) && + + git cat-file --batch-command="%(objectsize)" >actual <<-EOF && + remote-object-info $file_server_url $hello_oid $tree_oid + remote-object-info $file_server_url $commit_oid $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info file:// fails on mixed supported and unsupported placeholders' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + file_server_url=$(echo "file://${server_path}" | test_uri_escape) && + + test_must_fail git cat-file --batch-command="%(objectname) %(objecttype)" 2>err <<-EOF && + remote-object-info $file_server_url $hello_oid + EOF + test_grep "%(objecttype) not supported with remote-object-info" err + ) +' + +test_expect_success 'batch-command -Z remote-object-info file:// default format' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + cd file_client_empty && + + printf "%s\0" "$hello_oid $hello_size" >expect && + printf "%s\0" "$tree_oid $tree_size" >>expect && + printf "%s\0" "$commit_oid $commit_size" >>expect && + printf "%s\0" "$tag_oid $tag_size" >>expect && + + printf "%s\0" "$hello_oid missing" >>expect && + printf "%s\0" "$tree_oid missing" >>expect && + printf "%s\0" "$commit_oid missing" >>expect && + printf "%s\0" "$tag_oid missing" >>expect && + + file_server_url=$(echo "file://${server_path}" | test_uri_escape) && + + batch_input="remote-object-info $file_server_url $hello_oid $tree_oid +remote-object-info $file_server_url $commit_oid $tag_oid +info $hello_oid +info $tree_oid +info $commit_oid +info $tag_oid +" && + echo_without_newline_nul "$batch_input" >commands_null_delimited && + + git cat-file --batch-command -Z < commands_null_delimited >actual && + test_cmp expect actual + ) +' + +# Test --batch-command remote-object-info with 'file://' and +# transfer.advertiseobjectinfo set to false, i.e. server does not have object-info capability +test_expect_success 'batch-command remote-object-info file:// fails when transfer.advertiseobjectinfo=false' ' + ( + set_transport_variables "server" && + server_path="$(pwd)/server" && + git -C "${server_path}" config transfer.advertiseobjectinfo false && + + file_server_url=$(echo "file://${server_path}" | test_uri_escape) && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info $file_server_url $hello_oid $tree_oid $commit_oid $tag_oid + EOF + test_grep "object-info capability is not enabled on the server" err && + + # revert server state back + git -C "${server_path}" config transfer.advertiseobjectinfo true + ) +' + +# Test --batch-command remote-object-info with 'http://' transport with +# transfer.advertiseobjectinfo set to true, i.e. server has object-info capability + +. "$TEST_DIRECTORY"/lib-httpd.sh +start_httpd + +test_expect_success 'create repo to be served by http:// transport' ' + git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true && + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config transfer.advertiseobjectinfo true && + echo_without_newline "$hello_content" > $HTTPD_DOCUMENT_ROOT_PATH/http_parent/hello && + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" update-index --add hello && + git clone "$HTTPD_URL/smart/http_parent" -n "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" +' + +test_expect_success 'batch-command remote-object-info http://' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent $hello_oid + remote-object-info $HTTPD_URL/smart/http_parent $tree_oid + remote-object-info $HTTPD_URL/smart/http_parent $commit_oid + remote-object-info $HTTPD_URL/smart/http_parent $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info http:// one line' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" >actual <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent $hello_oid $tree_oid $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command --buffer remote-object-info http://' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname) %(objectsize)" --buffer >actual <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent $hello_oid $tree_oid + remote-object-info $HTTPD_URL/smart/http_parent $commit_oid $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + flush + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info http:// default format' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + echo "$hello_oid $hello_size" >expect && + echo "$tree_oid $tree_size" >>expect && + echo "$commit_oid $commit_size" >>expect && + echo "$tag_oid $tag_size" >>expect && + + git cat-file --batch-command >actual <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent $hello_oid $tree_oid + remote-object-info $HTTPD_URL/smart/http_parent $commit_oid $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command -Z remote-object-info http:// default format' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + printf "%s\0" "$hello_oid $hello_size" >expect && + printf "%s\0" "$tree_oid $tree_size" >>expect && + printf "%s\0" "$commit_oid $commit_size" >>expect && + printf "%s\0" "$tag_oid $tag_size" >>expect && + + batch_input="remote-object-info $HTTPD_URL/smart/http_parent $hello_oid $tree_oid +remote-object-info $HTTPD_URL/smart/http_parent $commit_oid $tag_oid +" && + echo_without_newline_nul "$batch_input" >commands_null_delimited && + + git cat-file --batch-command -Z < commands_null_delimited >actual && + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info http:// just %(objectname)' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_oid" >expect && + echo "$tree_oid" >>expect && + echo "$commit_oid" >>expect && + echo "$tag_oid" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectname)" >actual <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent $hello_oid + remote-object-info $HTTPD_URL/smart/http_parent $tree_oid + remote-object-info $HTTPD_URL/smart/http_parent $commit_oid + remote-object-info $HTTPD_URL/smart/http_parent $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'batch-command remote-object-info http:// just %(objectsize)' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_client_empty" && + + # These results prove remote-object-info can get object info from the remote + echo "$hello_size" >expect && + echo "$tree_size" >>expect && + echo "$commit_size" >>expect && + echo "$tag_size" >>expect && + + # These results prove remote-object-info did not download objects from the remote + echo "$hello_oid missing" >>expect && + echo "$tree_oid missing" >>expect && + echo "$commit_oid missing" >>expect && + echo "$tag_oid missing" >>expect && + + git cat-file --batch-command="%(objectsize)" >actual <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent $hello_oid + remote-object-info $HTTPD_URL/smart/http_parent $tree_oid + remote-object-info $HTTPD_URL/smart/http_parent $commit_oid + remote-object-info $HTTPD_URL/smart/http_parent $tag_oid + info $hello_oid + info $tree_oid + info $commit_oid + info $tag_oid + EOF + test_cmp expect actual + ) +' + +test_expect_success 'remote-object-info fails on mixed supported and unsupported placeholders' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize:disk)" 2>err <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent $hello_oid + EOF + test_grep "%(objectsize:disk) not supported with remote-object-info" err + ) +' + +test_expect_success 'remote-object-info fails on mixed supported and unsupported placeholders (another order)' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git cat-file --batch-command="%(deltabase) %(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent $hello_oid + EOF + test_grep "%(deltabase) not supported with remote-object-info" err + ) +' + +test_expect_success 'remote-object-info fails on all unsupported placeholders' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git cat-file --batch-command="%(objecttype) %(objectsize:disk) %(deltabase) %(rest) " 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid + EOF + test_grep "%(objecttype) %(objectsize:disk) %(deltabase) %(rest) not supported with remote-object-info" err + ) +' + +test_expect_success 'remote-object-info fails on al unsupported placeholders with arbitrary spaces' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git cat-file --batch-command=" %(objecttype) %(objectsize:disk) %(deltabase) %(rest) " 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $hello_oid + EOF + test_grep "%(objecttype) %(objectsize:disk) %(deltabase) %(rest) not supported with remote-object-info" err + ) +' + +test_expect_success 'remote-object-info fails on server with legacy protocol' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git -c protocol.version=0 cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent $hello_oid + EOF + test_grep "remote-object-info requires protocol v2" err + ) +' + +test_expect_success 'remote-object-info fails on server with legacy protocol with default format' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git -c protocol.version=0 cat-file --batch-command 2>err <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent $hello_oid + EOF + test_grep "remote-object-info requires protocol v2" err + ) +' + +test_expect_success 'remote-object-info fails on malformed OID' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + malformed_object_id="this_id_is_not_valid" && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent $malformed_object_id + EOF + test_grep "Not a valid object name '$malformed_object_id'" err + ) +' + +test_expect_success 'remote-object-info fails on malformed OID with default format' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + malformed_object_id="this_id_is_not_valid" && + + test_must_fail git cat-file --batch-command 2>err <<-EOF && + remote-object-info "$HTTPD_URL/smart/http_parent" $malformed_object_id + EOF + test_grep "Not a valid object name '$malformed_object_id'" err + ) +' + +test_expect_success 'remote-object-info fails on missing OID' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + git clone "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" missing_oid_repo && + test_commit -C missing_oid_repo message1 c.txt && + cd missing_oid_repo && + + object_id=$(git rev-parse message1:c.txt) && + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent $object_id + EOF + test_grep "object-info: not our ref $object_id" err + ) +' + +test_expect_success 'remote-object-info fails on not providing OID' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent + EOF + test_grep "remote-object-info requires objects" err + ) +' +test_expect_success 'remote-object-info fails on not providing OID and has spaces' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + some_spaces=" " && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent $some_spaces + EOF + test_grep "remote-object-info requires objects" err + ) +' + +# Test --batch-command remote-object-info with 'http://' transport and +# transfer.advertiseobjectinfo set to false, i.e. server does not have object-info capability +test_expect_success 'batch-command remote-object-info http:// fails when transfer.advertiseobjectinfo=false ' ' + ( + set_transport_variables "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config transfer.advertiseobjectinfo false && + + test_must_fail git cat-file --batch-command="%(objectname) %(objectsize)" 2>err <<-EOF && + remote-object-info $HTTPD_URL/smart/http_parent $hello_oid $tree_oid $commit_oid $tag_oid + EOF + test_grep "object-info capability is not enabled on the server" err && + + # revert server state back + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config transfer.advertiseobjectinfo true + ) +' +# DO NOT add non-httpd-specific tests here, because the last part of this +# test script is only executed when httpd is available and enabled. + +test_done -- GitLab