From 9da4aff33d51f3d3a7a2daf28044617a6ff49963 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Mon, 4 Mar 2019 15:44:24 +0100 Subject: [PATCH 01/13] Create executable that glues packs --- glue-packs.go | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 glue-packs.go diff --git a/glue-packs.go b/glue-packs.go new file mode 100644 index 00000000000..0f8b080a15e --- /dev/null +++ b/glue-packs.go @@ -0,0 +1,192 @@ +package main + +import ( + "bytes" + "crypto/sha1" + "encoding/binary" + "fmt" + "hash" + "io" + "log" + "os" +) + +const packMagic = "PACK\x00\x00\x00\x02" + +func main() { + if len(os.Args) != 2 { + fmt.Println("Usage: glue-packs PACK1 < PACK2") + os.Exit(1) + } + + if err := _main(); err != nil { + log.Fatal(err) + } +} + +func _main() error { + pack1, err := os.Open(os.Args[1]) + if err != nil { + return err + } + defer pack1.Close() + + nPack1, err := numPackObjects(pack1) + if err != nil { + return err + } + log.Printf("%s: %d objects", os.Args[1], nPack1) + + nPack2, err := numPackObjects(os.Stdin) + if err != nil { + return err + } + log.Printf("stdin: %d objects", nPack2) + + summer := sha1.New() + stdout := io.MultiWriter(os.Stdout, summer) + + if _, err := fmt.Fprint(stdout, packMagic); err != nil { + return err + } + + size := make([]byte, 4) + binary.BigEndian.PutUint32(size, nPack1+nPack2) + if _, err := stdout.Write(size); err != nil { + return err + } + + pack1Writer := &shaSplitter{w: stdout} + if _, err := io.Copy(pack1Writer, pack1); err != nil { + return err + } + + pack2Writer := &shaSplitter{w: stdout} + if _, err := io.Copy(pack2Writer, os.Stdin); err != nil { + return err + } + + if _, err := stdout.Write(summer.Sum(nil)); err != nil { + return err + } + + return nil +} + +func numPackObjects(pack io.Reader) (uint32, error) { + header := make([]byte, 12) + if _, err := io.ReadFull(pack, header); err != nil { + return 0, err + } + + if magic := string(header[:len(packMagic)]); magic != packMagic { + return 0, fmt.Errorf("bad pack header: %q", magic) + } + + return binary.BigEndian.Uint32(header[len(packMagic):]), nil +} + +const shaSize = 20 + +type shaSplitter struct { + buf []byte + w io.Writer +} + +func (sp *shaSplitter) Write(p []byte) (int, error) { + sp.buf = append(sp.buf, p...) + + chunkBoundary := len(sp.buf) - shaSize + if chunkBoundary <= 0 { + return len(p), nil + } + + if _, err := sp.w.Write(sp.buf[:chunkBoundary]); err != nil { + return 0, err + } + + copy(sp.buf, sp.buf[chunkBoundary:]) + sp.buf = sp.buf[:shaSize] + + return len(p), nil +} + +func (sp *shaSplitter) Sha() ([]byte, error) { + if n := len(sp.buf); n != shaSize { + return nil, fmt.Errorf("error: %d bytes left in buffer", n) + } + + return sp.buf, nil +} + +type packReader struct { + buf []byte + avail []byte + reader io.Reader + readErr error + sum hash.Hash + nObjects uint32 +} + +func NewPackReader(r io.Reader) (*packReader, error) { + pr := &packReader{ + buf: make([]byte, 4096), + reader: r, + sum: sha1.New(), + } + + header := make([]byte, 12) + if _, err := io.ReadFull(pr.reader, header); err != nil { + return nil, err + } + + if magic := string(header[:len(packMagic)]); magic != packMagic { + return nil, fmt.Errorf("bad pack header: %q", magic) + } + + pr.nObjects = binary.BigEndian.Uint32(header[len(packMagic):]) + + if _, err := pr.sum.Write(header); err != nil { + return nil, err + } + + return pr, nil +} + +func (pr *packReader) Read(p []byte) (int, error) { + if len(pr.avail) <= shaSize && pr.readErr == nil { + copy(pr.buf, pr.avail) + + var nRead int + nRead, pr.readErr = pr.reader.Read(pr.buf[len(pr.avail):]) + pr.avail = pr.buf[:len(pr.avail)+nRead] + + if nUncheckedBytes := len(pr.avail) - shaSize; nUncheckedBytes > 0 { + if _, err := pr.sum.Write(pr.avail[:nUncheckedBytes]); err != nil { + return 0, err + } + } + + if pr.readErr != nil && pr.readErr != io.EOF { + return 0, pr.readErr + } + } + + if len(pr.avail) <= shaSize { + if pr.readErr == io.EOF { + if len(pr.avail) != shaSize { + return 0, fmt.Errorf("short read: incomplete packfile checksum") + } + + if !bytes.Equal(pr.sum.Sum(nil), pr.avail) { + return 0, fmt.Errorf("packfile checksum mismatch") + } + } + + return 0, pr.readErr + } + + nYielded := copy(p, pr.avail[:len(pr.avail)-shaSize]) + pr.avail = pr.avail[nYielded:] + return nYielded, nil +} -- GitLab From 4a07f7f15ca1dc83c8286fde826b5a571b84b85c Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Mon, 4 Mar 2019 17:16:53 +0100 Subject: [PATCH 02/13] Use reader instead of writer --- glue-packs.go | 105 +++++++++++++++++--------------------------------- 1 file changed, 36 insertions(+), 69 deletions(-) diff --git a/glue-packs.go b/glue-packs.go index 0f8b080a15e..30259a88ef6 100644 --- a/glue-packs.go +++ b/glue-packs.go @@ -11,8 +11,6 @@ import ( "os" ) -const packMagic = "PACK\x00\x00\x00\x02" - func main() { if len(os.Args) != 2 { fmt.Println("Usage: glue-packs PACK1 < PACK2") @@ -31,16 +29,20 @@ func _main() error { } defer pack1.Close() - nPack1, err := numPackObjects(pack1) + pack1Reader, err := NewPackReader(pack1) if err != nil { return err } + + nPack1 := pack1Reader.NumObjects() log.Printf("%s: %d objects", os.Args[1], nPack1) - nPack2, err := numPackObjects(os.Stdin) + pack2Reader, err := NewPackReader(os.Stdin) if err != nil { return err } + nPack2 := pack2Reader.NumObjects() + log.Printf("stdin: %d objects", nPack2) summer := sha1.New() @@ -51,18 +53,16 @@ func _main() error { } size := make([]byte, 4) - binary.BigEndian.PutUint32(size, nPack1+nPack2) + binary.BigEndian.PutUint32(size, nPack1+nPack2) // TODO check for overflow if _, err := stdout.Write(size); err != nil { return err } - pack1Writer := &shaSplitter{w: stdout} - if _, err := io.Copy(pack1Writer, pack1); err != nil { + if _, err := io.Copy(stdout, pack1Reader); err != nil { return err } - pack2Writer := &shaSplitter{w: stdout} - if _, err := io.Copy(pack2Writer, os.Stdin); err != nil { + if _, err := io.Copy(stdout, pack2Reader); err != nil { return err } @@ -73,69 +73,32 @@ func _main() error { return nil } -func numPackObjects(pack io.Reader) (uint32, error) { - header := make([]byte, 12) - if _, err := io.ReadFull(pack, header); err != nil { - return 0, err - } - - if magic := string(header[:len(packMagic)]); magic != packMagic { - return 0, fmt.Errorf("bad pack header: %q", magic) - } - - return binary.BigEndian.Uint32(header[len(packMagic):]), nil -} - -const shaSize = 20 - -type shaSplitter struct { - buf []byte - w io.Writer -} - -func (sp *shaSplitter) Write(p []byte) (int, error) { - sp.buf = append(sp.buf, p...) - - chunkBoundary := len(sp.buf) - shaSize - if chunkBoundary <= 0 { - return len(p), nil - } - - if _, err := sp.w.Write(sp.buf[:chunkBoundary]); err != nil { - return 0, err - } - - copy(sp.buf, sp.buf[chunkBoundary:]) - sp.buf = sp.buf[:shaSize] - - return len(p), nil -} - -func (sp *shaSplitter) Sha() ([]byte, error) { - if n := len(sp.buf); n != shaSize { - return nil, fmt.Errorf("error: %d bytes left in buffer", n) - } - - return sp.buf, nil -} +const ( + sumSize = sha1.Size + packBufferSize = 4096 +) type packReader struct { - buf []byte - avail []byte - reader io.Reader - readErr error - sum hash.Hash - nObjects uint32 + buf [packBufferSize]byte + avail []byte + reader io.Reader + readErr error + sum hash.Hash + numObjects uint32 } +const ( + packMagic = "PACK\x00\x00\x00\x02" + packHeaderSize = 12 +) + func NewPackReader(r io.Reader) (*packReader, error) { pr := &packReader{ - buf: make([]byte, 4096), reader: r, sum: sha1.New(), } - header := make([]byte, 12) + header := make([]byte, packHeaderSize) if _, err := io.ReadFull(pr.reader, header); err != nil { return nil, err } @@ -144,7 +107,7 @@ func NewPackReader(r io.Reader) (*packReader, error) { return nil, fmt.Errorf("bad pack header: %q", magic) } - pr.nObjects = binary.BigEndian.Uint32(header[len(packMagic):]) + pr.numObjects = binary.BigEndian.Uint32(header[len(packMagic):]) if _, err := pr.sum.Write(header); err != nil { return nil, err @@ -153,15 +116,19 @@ func NewPackReader(r io.Reader) (*packReader, error) { return pr, nil } +func (pr *packReader) NumObjects() uint32 { + return pr.numObjects +} + func (pr *packReader) Read(p []byte) (int, error) { - if len(pr.avail) <= shaSize && pr.readErr == nil { - copy(pr.buf, pr.avail) + if len(pr.avail) <= sumSize && pr.readErr == nil { + copy(pr.buf[:], pr.avail) var nRead int nRead, pr.readErr = pr.reader.Read(pr.buf[len(pr.avail):]) pr.avail = pr.buf[:len(pr.avail)+nRead] - if nUncheckedBytes := len(pr.avail) - shaSize; nUncheckedBytes > 0 { + if nUncheckedBytes := len(pr.avail) - sumSize; nUncheckedBytes > 0 { if _, err := pr.sum.Write(pr.avail[:nUncheckedBytes]); err != nil { return 0, err } @@ -172,9 +139,9 @@ func (pr *packReader) Read(p []byte) (int, error) { } } - if len(pr.avail) <= shaSize { + if len(pr.avail) <= sumSize { if pr.readErr == io.EOF { - if len(pr.avail) != shaSize { + if len(pr.avail) != sumSize { return 0, fmt.Errorf("short read: incomplete packfile checksum") } @@ -186,7 +153,7 @@ func (pr *packReader) Read(p []byte) (int, error) { return 0, pr.readErr } - nYielded := copy(p, pr.avail[:len(pr.avail)-shaSize]) + nYielded := copy(p, pr.avail[:len(pr.avail)-sumSize]) pr.avail = pr.avail[nYielded:] return nYielded, nil } -- GitLab From 99d7b00e8690c63d6aa05e197c4dda22a38a5ba4 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Tue, 5 Mar 2019 11:53:15 +0100 Subject: [PATCH 03/13] Comments --- glue-packs.go | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/glue-packs.go b/glue-packs.go index 30259a88ef6..99b0e8f87cf 100644 --- a/glue-packs.go +++ b/glue-packs.go @@ -41,8 +41,8 @@ func _main() error { if err != nil { return err } - nPack2 := pack2Reader.NumObjects() + nPack2 := pack2Reader.NumObjects() log.Printf("stdin: %d objects", nPack2) summer := sha1.New() @@ -92,6 +92,7 @@ const ( packHeaderSize = 12 ) +// NewPackReader blocks until it has read the packfile header from r. func NewPackReader(r io.Reader) (*packReader, error) { pr := &packReader{ reader: r, @@ -121,6 +122,7 @@ func (pr *packReader) NumObjects() uint32 { } func (pr *packReader) Read(p []byte) (int, error) { + // No data available? Try to read from pr.reader. if len(pr.avail) <= sumSize && pr.readErr == nil { copy(pr.buf[:], pr.avail) @@ -139,20 +141,16 @@ func (pr *packReader) Read(p []byte) (int, error) { } } + // (Still) no data available? Early return. if len(pr.avail) <= sumSize { - if pr.readErr == io.EOF { - if len(pr.avail) != sumSize { - return 0, fmt.Errorf("short read: incomplete packfile checksum") - } - - if !bytes.Equal(pr.sum.Sum(nil), pr.avail) { - return 0, fmt.Errorf("packfile checksum mismatch") - } + if pr.readErr == io.EOF && !bytes.Equal(pr.sum.Sum(nil), pr.avail) { + return 0, fmt.Errorf("packfile checksum mismatch") } return 0, pr.readErr } + // Happy path: yield data from our buffer. nYielded := copy(p, pr.avail[:len(pr.avail)-sumSize]) pr.avail = pr.avail[nYielded:] return nYielded, nil -- GitLab From bd9cb5af5b1eb00ba28d8ae55173bb81dd396ed5 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Tue, 5 Mar 2019 12:36:41 +0100 Subject: [PATCH 04/13] Use variable --- glue-packs.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/glue-packs.go b/glue-packs.go index 99b0e8f87cf..78e03c633c6 100644 --- a/glue-packs.go +++ b/glue-packs.go @@ -141,8 +141,9 @@ func (pr *packReader) Read(p []byte) (int, error) { } } - // (Still) no data available? Early return. - if len(pr.avail) <= sumSize { + nBytesAvailable := len(pr.avail) - sumSize + + if nBytesAvailable <= 0 { if pr.readErr == io.EOF && !bytes.Equal(pr.sum.Sum(nil), pr.avail) { return 0, fmt.Errorf("packfile checksum mismatch") } @@ -150,8 +151,7 @@ func (pr *packReader) Read(p []byte) (int, error) { return 0, pr.readErr } - // Happy path: yield data from our buffer. - nYielded := copy(p, pr.avail[:len(pr.avail)-sumSize]) + nYielded := copy(p, pr.avail[:nBytesAvailable]) pr.avail = pr.avail[nYielded:] return nYielded, nil } -- GitLab From ebeb57b0ffacde288444d9c560238f8a5d04fb95 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 7 Mar 2019 18:12:02 +0100 Subject: [PATCH 05/13] count in helper --- glue-packs.go | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/glue-packs.go b/glue-packs.go index 78e03c633c6..b9d7040c381 100644 --- a/glue-packs.go +++ b/glue-packs.go @@ -117,33 +117,30 @@ func NewPackReader(r io.Reader) (*packReader, error) { return pr, nil } -func (pr *packReader) NumObjects() uint32 { - return pr.numObjects -} +func (pr *packReader) NumObjects() uint32 { return pr.numObjects } + +func (pr *packReader) numBytesAvailable() int { return len(pr.avail) - sumSize } func (pr *packReader) Read(p []byte) (int, error) { - // No data available? Try to read from pr.reader. - if len(pr.avail) <= sumSize && pr.readErr == nil { + if pr.numBytesAvailable() <= 0 && pr.readErr == nil { copy(pr.buf[:], pr.avail) var nRead int nRead, pr.readErr = pr.reader.Read(pr.buf[len(pr.avail):]) + if pr.readErr != nil && pr.readErr != io.EOF { + return 0, pr.readErr + } + pr.avail = pr.buf[:len(pr.avail)+nRead] - if nUncheckedBytes := len(pr.avail) - sumSize; nUncheckedBytes > 0 { - if _, err := pr.sum.Write(pr.avail[:nUncheckedBytes]); err != nil { + if n := pr.numBytesAvailable(); n > 0 { + if _, err := pr.sum.Write(pr.avail[:n]); err != nil { return 0, err } } - - if pr.readErr != nil && pr.readErr != io.EOF { - return 0, pr.readErr - } } - nBytesAvailable := len(pr.avail) - sumSize - - if nBytesAvailable <= 0 { + if pr.numBytesAvailable() <= 0 { if pr.readErr == io.EOF && !bytes.Equal(pr.sum.Sum(nil), pr.avail) { return 0, fmt.Errorf("packfile checksum mismatch") } @@ -151,7 +148,7 @@ func (pr *packReader) Read(p []byte) (int, error) { return 0, pr.readErr } - nYielded := copy(p, pr.avail[:nBytesAvailable]) + nYielded := copy(p, pr.avail[:pr.numBytesAvailable()]) pr.avail = pr.avail[nYielded:] return nYielded, nil } -- GitLab From 8c70386e71013be933d9ac185b121d199f4cad44 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 4 Apr 2019 15:21:20 +0200 Subject: [PATCH 06/13] Add working example using bundle files --- glue-packs.go | 82 --------- pack-objects-clone-bundle/README.md | 8 + .../pack-objects-bundle.go | 171 ++++++++++++++++++ pack-objects-clone-bundle/packreader.go | 90 +++++++++ 4 files changed, 269 insertions(+), 82 deletions(-) create mode 100644 pack-objects-clone-bundle/README.md create mode 100644 pack-objects-clone-bundle/pack-objects-bundle.go create mode 100644 pack-objects-clone-bundle/packreader.go diff --git a/glue-packs.go b/glue-packs.go index b9d7040c381..5d6f896f343 100644 --- a/glue-packs.go +++ b/glue-packs.go @@ -1,11 +1,9 @@ package main import ( - "bytes" "crypto/sha1" "encoding/binary" "fmt" - "hash" "io" "log" "os" @@ -72,83 +70,3 @@ func _main() error { return nil } - -const ( - sumSize = sha1.Size - packBufferSize = 4096 -) - -type packReader struct { - buf [packBufferSize]byte - avail []byte - reader io.Reader - readErr error - sum hash.Hash - numObjects uint32 -} - -const ( - packMagic = "PACK\x00\x00\x00\x02" - packHeaderSize = 12 -) - -// NewPackReader blocks until it has read the packfile header from r. -func NewPackReader(r io.Reader) (*packReader, error) { - pr := &packReader{ - reader: r, - sum: sha1.New(), - } - - header := make([]byte, packHeaderSize) - if _, err := io.ReadFull(pr.reader, header); err != nil { - return nil, err - } - - if magic := string(header[:len(packMagic)]); magic != packMagic { - return nil, fmt.Errorf("bad pack header: %q", magic) - } - - pr.numObjects = binary.BigEndian.Uint32(header[len(packMagic):]) - - if _, err := pr.sum.Write(header); err != nil { - return nil, err - } - - return pr, nil -} - -func (pr *packReader) NumObjects() uint32 { return pr.numObjects } - -func (pr *packReader) numBytesAvailable() int { return len(pr.avail) - sumSize } - -func (pr *packReader) Read(p []byte) (int, error) { - if pr.numBytesAvailable() <= 0 && pr.readErr == nil { - copy(pr.buf[:], pr.avail) - - var nRead int - nRead, pr.readErr = pr.reader.Read(pr.buf[len(pr.avail):]) - if pr.readErr != nil && pr.readErr != io.EOF { - return 0, pr.readErr - } - - pr.avail = pr.buf[:len(pr.avail)+nRead] - - if n := pr.numBytesAvailable(); n > 0 { - if _, err := pr.sum.Write(pr.avail[:n]); err != nil { - return 0, err - } - } - } - - if pr.numBytesAvailable() <= 0 { - if pr.readErr == io.EOF && !bytes.Equal(pr.sum.Sum(nil), pr.avail) { - return 0, fmt.Errorf("packfile checksum mismatch") - } - - return 0, pr.readErr - } - - nYielded := copy(p, pr.avail[:pr.numBytesAvailable()]) - pr.avail = pr.avail[nYielded:] - return nYielded, nil -} diff --git a/pack-objects-clone-bundle/README.md b/pack-objects-clone-bundle/README.md new file mode 100644 index 00000000000..1d6d486fc16 --- /dev/null +++ b/pack-objects-clone-bundle/README.md @@ -0,0 +1,8 @@ +# Demonstration of concatenating pack files to speed up Git clone + +This directory contains code for an executable that can speed up a Git clone when installed on a server. The only type of clone we can speed up is a full clone. + +- compile the executable and install at some chosen path, e.g. `go build -o /tmp/pack-objects-clone-bundle` +- `git config --global uploadpack.packObjectsHook /tmp/pack-objects-bundle` (has to be global for some reason) +- in the bare repo you want to speed up, run `git bundle create clone.bundle --branches --tags` +- now do a full clone from that repo. If it is a local clone, use `git clone --no-local` to see the effect diff --git a/pack-objects-clone-bundle/pack-objects-bundle.go b/pack-objects-clone-bundle/pack-objects-bundle.go new file mode 100644 index 00000000000..aafc8ece976 --- /dev/null +++ b/pack-objects-clone-bundle/pack-objects-bundle.go @@ -0,0 +1,171 @@ +package main + +import ( + "bufio" + "bytes" + "context" + "crypto/sha1" + "encoding/binary" + "fmt" + "io" + "log" + "os" + "os/exec" + "regexp" + "strings" +) + +func main() { + if len(os.Args) < 2 { + log.Fatal("not enough argument to pack-objects hook") + } + + if err := _main(os.Args[1:]); err != nil { + log.Fatal(err) + } +} + +var shaRegex = regexp.MustCompile(`\A[0-9a-f]{40}\z`) + +func _main(packObjects []string) error { + request := &bytes.Buffer{} + scanner := bufio.NewScanner(io.TeeReader(os.Stdin, request)) + seenNot := false + isClone := true + for scanner.Scan() { + if !seenNot && scanner.Text() == "--not" { + seenNot = true + continue + } + + if seenNot && scanner.Text() != "" { + isClone = false + } + } + + if err := scanner.Err(); err != nil { + return err + } + + if !isClone { + return fallback(packObjects, request) + } + + bundleFile, err := os.Open("clone.bundle") + if err != nil { + return fallback(packObjects, request) + } + defer bundleFile.Close() + + bundle := bufio.NewReader(bundleFile) + bundleHeader, err := readLine(bundle) + if err != nil { + return err + } + if bundleHeader != "# v2 git bundle" { + return fmt.Errorf("unexpected bundle header: %q", bundleHeader) + } + + request = bytes.NewBuffer(bytes.TrimSpace(request.Bytes())) + if _, err := request.WriteString("\n"); err != nil { + return err + } + + for { + refLine, err := readLine(bundle) + if err != nil { + return err + } + + if refLine == "" { + break + } + + split := strings.SplitN(refLine, " ", 2) + if len(split) != 2 { + return fmt.Errorf("invalid ref line: %q", refLine) + } + id := split[0] + if !shaRegex.MatchString(id) { + return fmt.Errorf("invalid object ID: %q", id) + } + + if _, err := fmt.Fprintln(request, id); err != nil { + return err + } + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + cmd := exec.CommandContext(ctx, packObjects[0], packObjects[1:]...) + cmd.Stdin = request + cmd.Stderr = os.Stderr + + packObjectsOut, err := cmd.StdoutPipe() + if err != nil { + return err + } + + if err := cmd.Start(); err != nil { + return err + } + + packObjectsReader, err := NewPackReader(packObjectsOut) + if err != nil { + return err + } + + bundleReader, err := NewPackReader(bundle) + if err != nil { + return err + } + + summer := sha1.New() + stdout := io.MultiWriter(os.Stdout, summer) + + if _, err := fmt.Fprint(stdout, packMagic); err != nil { + return err + } + + size := make([]byte, 4) + binary.BigEndian.PutUint32(size, packObjectsReader.NumObjects()+bundleReader.NumObjects()) // TODO check for overflow + if _, err := stdout.Write(size); err != nil { + return err + } + + if _, err := io.Copy(stdout, packObjectsReader); err != nil { + return err + } + + if err := cmd.Wait(); err != nil { + return err + } + + if _, err := io.Copy(stdout, bundleReader); err != nil { + return err + } + + if _, err := stdout.Write(summer.Sum(nil)); err != nil { + return err + } + + return nil +} + +func fallback(packObjects []string, request io.Reader) error { + cmd := exec.Command(packObjects[0], packObjects[1:]...) + cmd.Stdin = request + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +} + +func readLine(r *bufio.Reader) (string, error) { + line, err := r.ReadBytes('\n') + if err != nil { + return "", err + } + + return string(line[:len(line)-1]), nil +} diff --git a/pack-objects-clone-bundle/packreader.go b/pack-objects-clone-bundle/packreader.go new file mode 100644 index 00000000000..b6154979a77 --- /dev/null +++ b/pack-objects-clone-bundle/packreader.go @@ -0,0 +1,90 @@ +package main + +import ( + "bytes" + "crypto/sha1" + "encoding/binary" + "fmt" + "hash" + "io" +) + +const ( + sumSize = sha1.Size + packBufferSize = 4096 +) + +type packReader struct { + buf [packBufferSize]byte + avail []byte + reader io.Reader + readErr error + sum hash.Hash + numObjects uint32 +} + +const ( + packMagic = "PACK\x00\x00\x00\x02" + packHeaderSize = 12 +) + +// NewPackReader blocks until it has read the packfile header from r. +func NewPackReader(r io.Reader) (*packReader, error) { + pr := &packReader{ + reader: r, + sum: sha1.New(), + } + + header := make([]byte, packHeaderSize) + if _, err := io.ReadFull(pr.reader, header); err != nil { + return nil, err + } + + if magic := string(header[:len(packMagic)]); magic != packMagic { + return nil, fmt.Errorf("bad pack header: %q", magic) + } + + pr.numObjects = binary.BigEndian.Uint32(header[len(packMagic):]) + + if _, err := pr.sum.Write(header); err != nil { + return nil, err + } + + return pr, nil +} + +func (pr *packReader) NumObjects() uint32 { return pr.numObjects } + +func (pr *packReader) numBytesAvailable() int { return len(pr.avail) - sumSize } + +func (pr *packReader) Read(p []byte) (int, error) { + if pr.numBytesAvailable() <= 0 && pr.readErr == nil { + copy(pr.buf[:], pr.avail) + + var nRead int + nRead, pr.readErr = pr.reader.Read(pr.buf[len(pr.avail):]) + if pr.readErr != nil && pr.readErr != io.EOF { + return 0, pr.readErr + } + + pr.avail = pr.buf[:len(pr.avail)+nRead] + + if n := pr.numBytesAvailable(); n > 0 { + if _, err := pr.sum.Write(pr.avail[:n]); err != nil { + return 0, err + } + } + } + + if pr.numBytesAvailable() <= 0 { + if pr.readErr == io.EOF && !bytes.Equal(pr.sum.Sum(nil), pr.avail) { + return 0, fmt.Errorf("packfile checksum mismatch") + } + + return 0, pr.readErr + } + + nYielded := copy(p, pr.avail[:pr.numBytesAvailable()]) + pr.avail = pr.avail[nYielded:] + return nYielded, nil +} -- GitLab From 861da09f31d05bba746ba8b8ff62136cc9a2b9f1 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 4 Apr 2019 15:59:04 +0200 Subject: [PATCH 07/13] Clean up a little --- .../pack-objects-bundle.go | 74 +++++++++++-------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/pack-objects-clone-bundle/pack-objects-bundle.go b/pack-objects-clone-bundle/pack-objects-bundle.go index aafc8ece976..ed712458e9f 100644 --- a/pack-objects-clone-bundle/pack-objects-bundle.go +++ b/pack-objects-clone-bundle/pack-objects-bundle.go @@ -15,6 +15,8 @@ import ( "strings" ) +const bundleFileName = "clone.bundle" + func main() { if len(os.Args) < 2 { log.Fatal("not enough argument to pack-objects hook") @@ -51,48 +53,21 @@ func _main(packObjects []string) error { return fallback(packObjects, request) } - bundleFile, err := os.Open("clone.bundle") + bundleFile, err := os.Open(bundleFileName) if err != nil { return fallback(packObjects, request) } defer bundleFile.Close() bundle := bufio.NewReader(bundleFile) - bundleHeader, err := readLine(bundle) - if err != nil { - return err - } - if bundleHeader != "# v2 git bundle" { - return fmt.Errorf("unexpected bundle header: %q", bundleHeader) - } request = bytes.NewBuffer(bytes.TrimSpace(request.Bytes())) if _, err := request.WriteString("\n"); err != nil { return err } - for { - refLine, err := readLine(bundle) - if err != nil { - return err - } - - if refLine == "" { - break - } - - split := strings.SplitN(refLine, " ", 2) - if len(split) != 2 { - return fmt.Errorf("invalid ref line: %q", refLine) - } - id := split[0] - if !shaRegex.MatchString(id) { - return fmt.Errorf("invalid object ID: %q", id) - } - - if _, err := fmt.Fprintln(request, id); err != nil { - return err - } + if err := addBundleRefsToRequest(request, bundle); err != nil { + return err } ctx, cancel := context.WithCancel(context.Background()) @@ -150,6 +125,8 @@ func _main(packObjects []string) error { return err } + fmt.Fprintf(os.Stderr, "re-used from %s: %d objects\n", bundleFileName, bundleReader.NumObjects()) + return nil } @@ -169,3 +146,40 @@ func readLine(r *bufio.Reader) (string, error) { return string(line[:len(line)-1]), nil } + +func addBundleRefsToRequest(request io.Writer, bundle *bufio.Reader) error { + bundleHeader, err := readLine(bundle) + if err != nil { + return err + } + if bundleHeader != "# v2 git bundle" { + return fmt.Errorf("unexpected bundle header: %q", bundleHeader) + } + + for { + refLine, err := readLine(bundle) + if err != nil { + return err + } + + if refLine == "" { + break + } + + split := strings.SplitN(refLine, " ", 2) + if len(split) != 2 { + return fmt.Errorf("invalid ref line: %q", refLine) + } + + id := split[0] + if !shaRegex.MatchString(id) { + return fmt.Errorf("invalid object ID: %q", id) + } + + if _, err := fmt.Fprintln(request, id); err != nil { + return err + } + } + + return nil +} -- GitLab From 22b24c1778e8971d04a53c3ea35c47100b24ef53 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Wed, 10 Apr 2019 16:56:04 +0200 Subject: [PATCH 08/13] Add unit tests for packfile read/write --- .../packreader.go => internal/git/packfile.go | 69 ++++++++++++-- internal/git/packfile_test.go | 91 +++++++++++++++++++ 2 files changed, 153 insertions(+), 7 deletions(-) rename pack-objects-clone-bundle/packreader.go => internal/git/packfile.go (50%) create mode 100644 internal/git/packfile_test.go diff --git a/pack-objects-clone-bundle/packreader.go b/internal/git/packfile.go similarity index 50% rename from pack-objects-clone-bundle/packreader.go rename to internal/git/packfile.go index b6154979a77..7d5551d984c 100644 --- a/pack-objects-clone-bundle/packreader.go +++ b/internal/git/packfile.go @@ -1,4 +1,4 @@ -package main +package git import ( "bytes" @@ -14,7 +14,7 @@ const ( packBufferSize = 4096 ) -type packReader struct { +type PackReader struct { buf [packBufferSize]byte avail []byte reader io.Reader @@ -29,8 +29,8 @@ const ( ) // NewPackReader blocks until it has read the packfile header from r. -func NewPackReader(r io.Reader) (*packReader, error) { - pr := &packReader{ +func NewPackReader(r io.Reader) (*PackReader, error) { + pr := &PackReader{ reader: r, sum: sha1.New(), } @@ -53,11 +53,11 @@ func NewPackReader(r io.Reader) (*packReader, error) { return pr, nil } -func (pr *packReader) NumObjects() uint32 { return pr.numObjects } +func (pr *PackReader) NumObjects() uint32 { return pr.numObjects } -func (pr *packReader) numBytesAvailable() int { return len(pr.avail) - sumSize } +func (pr *PackReader) numBytesAvailable() int { return len(pr.avail) - sumSize } -func (pr *packReader) Read(p []byte) (int, error) { +func (pr *PackReader) Read(p []byte) (int, error) { if pr.numBytesAvailable() <= 0 && pr.readErr == nil { copy(pr.buf[:], pr.avail) @@ -88,3 +88,58 @@ func (pr *packReader) Read(p []byte) (int, error) { pr.avail = pr.avail[nYielded:] return nYielded, nil } + +type PackWriter struct { + w io.Writer + summer hash.Hash + flushed bool +} + +// NewWriter creates a new PackWriter, writes its header, and returns the +// PackWriter. The caller must call Flush() when done, or else the +// packfile written to w will be invalid. +func NewPackWriter(w io.Writer, numObjects uint32) (*PackWriter, error) { + pw := &PackWriter{ + summer: sha1.New(), + } + pw.w = io.MultiWriter(w, pw.summer) + + if _, err := pw.w.Write([]byte(packMagic)); err != nil { + return nil, err + } + + size := make([]byte, 4) + binary.BigEndian.PutUint32(size, numObjects) + if _, err := pw.w.Write(size); err != nil { + return nil, err + } + + return pw, nil +} + +type alreadyFlushedError struct{} + +func (alreadyFlushedError) Error() string { return "PackWriter already flushed" } + +func (pw *PackWriter) Write(p []byte) (int, error) { + if pw.flushed { + return 0, alreadyFlushedError{} + } + + return pw.w.Write(p) +} + +// Flush finalizes the packfile by writing its trailing checksum. +func (pw *PackWriter) Flush() error { + if pw.flushed { + return alreadyFlushedError{} + } + pw.flushed = true + + sum := pw.summer.Sum(nil) + + // Feeding the checksum back into pw.w messes up the state of pw.summer + // but we will not use it again so it's OK. + _, err := pw.w.Write(sum) + return err +} diff --git a/internal/git/packfile_test.go b/internal/git/packfile_test.go new file mode 100644 index 00000000000..b4b35dddede --- /dev/null +++ b/internal/git/packfile_test.go @@ -0,0 +1,91 @@ +package git + +import ( + "bytes" + "io/ioutil" + "testing" + + "github.com/stretchr/testify/require" +) + +type packExample struct { + n uint32 + raw []byte + content []byte +} + +var smallPackExample = &packExample{ + n: 3, + raw: []byte("PACK\x00\x00\x00\x02\x00\x00\x00\x03hello=?\x1A$\xB3\x8F\xCC\x96\xE0\xB0\xAC\xF0\x93\t\x85\xD8\x87K\xC5p"), + content: []byte("hello"), +} + +func TestPackReader(t *testing.T) { + testCases := []struct { + desc string + pack *packExample + }{ + { + desc: "packfile with 3 objects", + pack: smallPackExample, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + pr, err := NewPackReader(bytes.NewReader(tc.pack.raw)) + require.NoError(t, err) + + require.Equal(t, tc.pack.n, pr.NumObjects(), "number of objects in packfile") + + out, err := ioutil.ReadAll(pr) + require.NoError(t, err, "read all data") + + require.Equal(t, string(tc.pack.content), string(out), "packfile content") + }) + } +} + +// TODO add more PackReader tests: invalid header, length < 32, invalid checksum + +func TestPackWriter(t *testing.T) { + testCases := []struct { + desc string + pack *packExample + }{ + { + desc: "packfile with 3 objects", + pack: smallPackExample, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + out := &bytes.Buffer{} + pw, err := NewPackWriter(out, tc.pack.n) + require.NoError(t, err) + + in := tc.pack.content + nBytes, err := pw.Write(in) + require.NoError(t, err) + require.Equal(t, nBytes, len(in), "bytes written") + + require.NoError(t, pw.Flush(), "flush") + require.Equal(t, string(tc.pack.raw), out.String()) + }) + } +} + +func TestPackWriterFlush(t *testing.T) { + out := &bytes.Buffer{} + pw, err := NewPackWriter(out, 123) + require.NoError(t, err) + + require.NoError(t, pw.Flush()) + + n, err := pw.Write([]byte("hello")) + require.Equal(t, 0, n, "bytes written should be 0") + require.IsType(t, alreadyFlushedError{}, err, "write error should be 'already flushed'") + + require.IsType(t, alreadyFlushedError{}, pw.Flush(), "flush error should be 'already flushed'") +} -- GitLab From 84c9518249f494dd9bf1f7ccba3cd492ea42e44e Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Wed, 10 Apr 2019 16:57:16 +0200 Subject: [PATCH 09/13] Remove old file --- glue-packs.go | 72 --------------------------------------------------- 1 file changed, 72 deletions(-) delete mode 100644 glue-packs.go diff --git a/glue-packs.go b/glue-packs.go deleted file mode 100644 index 5d6f896f343..00000000000 --- a/glue-packs.go +++ /dev/null @@ -1,72 +0,0 @@ -package main - -import ( - "crypto/sha1" - "encoding/binary" - "fmt" - "io" - "log" - "os" -) - -func main() { - if len(os.Args) != 2 { - fmt.Println("Usage: glue-packs PACK1 < PACK2") - os.Exit(1) - } - - if err := _main(); err != nil { - log.Fatal(err) - } -} - -func _main() error { - pack1, err := os.Open(os.Args[1]) - if err != nil { - return err - } - defer pack1.Close() - - pack1Reader, err := NewPackReader(pack1) - if err != nil { - return err - } - - nPack1 := pack1Reader.NumObjects() - log.Printf("%s: %d objects", os.Args[1], nPack1) - - pack2Reader, err := NewPackReader(os.Stdin) - if err != nil { - return err - } - - nPack2 := pack2Reader.NumObjects() - log.Printf("stdin: %d objects", nPack2) - - summer := sha1.New() - stdout := io.MultiWriter(os.Stdout, summer) - - if _, err := fmt.Fprint(stdout, packMagic); err != nil { - return err - } - - size := make([]byte, 4) - binary.BigEndian.PutUint32(size, nPack1+nPack2) // TODO check for overflow - if _, err := stdout.Write(size); err != nil { - return err - } - - if _, err := io.Copy(stdout, pack1Reader); err != nil { - return err - } - - if _, err := io.Copy(stdout, pack2Reader); err != nil { - return err - } - - if _, err := stdout.Write(summer.Sum(nil)); err != nil { - return err - } - - return nil -} -- GitLab From f9ac26855fbdb46813aa71288e4590472c52b7a3 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Wed, 10 Apr 2019 17:22:41 +0200 Subject: [PATCH 10/13] Remove testcase layer --- internal/git/packfile_test.go | 48 ++++++++++++----------------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/internal/git/packfile_test.go b/internal/git/packfile_test.go index b4b35dddede..68bcfa77379 100644 --- a/internal/git/packfile_test.go +++ b/internal/git/packfile_test.go @@ -9,39 +9,33 @@ import ( ) type packExample struct { + desc string n uint32 raw []byte content []byte } -var smallPackExample = &packExample{ - n: 3, - raw: []byte("PACK\x00\x00\x00\x02\x00\x00\x00\x03hello=?\x1A$\xB3\x8F\xCC\x96\xE0\xB0\xAC\xF0\x93\t\x85\xD8\x87K\xC5p"), - content: []byte("hello"), +var packExamples = []packExample{ + { + desc: "small, 3 objects", + n: 3, + raw: []byte("PACK\x00\x00\x00\x02\x00\x00\x00\x03hello=?\x1A$\xB3\x8F\xCC\x96\xE0\xB0\xAC\xF0\x93\t\x85\xD8\x87K\xC5p"), + content: []byte("hello"), + }, } func TestPackReader(t *testing.T) { - testCases := []struct { - desc string - pack *packExample - }{ - { - desc: "packfile with 3 objects", - pack: smallPackExample, - }, - } - - for _, tc := range testCases { + for _, tc := range packExamples { t.Run(tc.desc, func(t *testing.T) { - pr, err := NewPackReader(bytes.NewReader(tc.pack.raw)) + pr, err := NewPackReader(bytes.NewReader(tc.raw)) require.NoError(t, err) - require.Equal(t, tc.pack.n, pr.NumObjects(), "number of objects in packfile") + require.Equal(t, tc.n, pr.NumObjects(), "number of objects in packfile") out, err := ioutil.ReadAll(pr) require.NoError(t, err, "read all data") - require.Equal(t, string(tc.pack.content), string(out), "packfile content") + require.Equal(t, string(tc.content), string(out), "packfile content") }) } } @@ -49,29 +43,19 @@ func TestPackReader(t *testing.T) { // TODO add more PackReader tests: invalid header, length < 32, invalid checksum func TestPackWriter(t *testing.T) { - testCases := []struct { - desc string - pack *packExample - }{ - { - desc: "packfile with 3 objects", - pack: smallPackExample, - }, - } - - for _, tc := range testCases { + for _, tc := range packExamples { t.Run(tc.desc, func(t *testing.T) { out := &bytes.Buffer{} - pw, err := NewPackWriter(out, tc.pack.n) + pw, err := NewPackWriter(out, tc.n) require.NoError(t, err) - in := tc.pack.content + in := tc.content nBytes, err := pw.Write(in) require.NoError(t, err) require.Equal(t, nBytes, len(in), "bytes written") require.NoError(t, pw.Flush(), "flush") - require.Equal(t, string(tc.pack.raw), out.String()) + require.Equal(t, string(tc.raw), out.String()) }) } } -- GitLab From 287609b8ae7ef828fb3517ab7dd7c3f4880dc3ed Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 11 Apr 2019 11:29:15 +0200 Subject: [PATCH 11/13] Restructure pack-objects hook --- .gitignore | 1 + cmd/gitaly-pack-objects/README.md | 15 ++++ cmd/gitaly-pack-objects/main.go | 26 ++++++ .../packobjects}/pack-objects-bundle.go | 90 +++++++------------ pack-objects-clone-bundle/README.md | 8 -- 5 files changed, 75 insertions(+), 65 deletions(-) create mode 100644 cmd/gitaly-pack-objects/README.md create mode 100644 cmd/gitaly-pack-objects/main.go rename {pack-objects-clone-bundle => internal/packobjects}/pack-objects-bundle.go (52%) delete mode 100644 pack-objects-clone-bundle/README.md diff --git a/.gitignore b/.gitignore index 08ae671a8d7..daa66f77554 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ cmd/gitaly-ssh/gitaly-ssh git-env /gitaly-debug /praefect +/gitaly-pack-objects \ No newline at end of file diff --git a/cmd/gitaly-pack-objects/README.md b/cmd/gitaly-pack-objects/README.md new file mode 100644 index 00000000000..757ede509c4 --- /dev/null +++ b/cmd/gitaly-pack-objects/README.md @@ -0,0 +1,15 @@ +# gitaly-pack-objects + +This is a **beta** pack-objects hook that can speed up a Git clone when +installed on a server. The only type of clone we can speed up is a full +clone. + +Also see https://gitlab.com/groups/gitlab-org/-/epics/1117. + +- compile the executable and install at some chosen path +- `git config --global uploadpack.packObjectsHook /path/to/gitaly-pack-objects` + (confighas to be global for some reason) +- in the bare repo you want to speed up, run + `mkdir -p gitaly && git bundle create gitaly/clone.bundle --branches --tags` +- now do a full clone from that repo. If it is a local clone, use + `git clone --no-local` to see the effect diff --git a/cmd/gitaly-pack-objects/main.go b/cmd/gitaly-pack-objects/main.go new file mode 100644 index 00000000000..d8e91407fab --- /dev/null +++ b/cmd/gitaly-pack-objects/main.go @@ -0,0 +1,26 @@ +package main + +import ( + "context" + "log" + "os" + + "gitlab.com/gitlab-org/gitaly/internal/packobjects" +) + +func main() { + if len(os.Args) < 2 { + log.Fatal("not enough argument to pack-objects hook") + } + + if err := _main(os.Args[1:]); err != nil { + log.Fatal(err) + } +} + +func _main(args []string) error { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + return packobjects.PackObjects(ctx, os.Args[1:], os.Stdin, os.Stdout, os.Stderr) +} diff --git a/pack-objects-clone-bundle/pack-objects-bundle.go b/internal/packobjects/pack-objects-bundle.go similarity index 52% rename from pack-objects-clone-bundle/pack-objects-bundle.go rename to internal/packobjects/pack-objects-bundle.go index ed712458e9f..9a7af202723 100644 --- a/pack-objects-clone-bundle/pack-objects-bundle.go +++ b/internal/packobjects/pack-objects-bundle.go @@ -1,37 +1,27 @@ -package main +package packobjects import ( "bufio" "bytes" "context" - "crypto/sha1" - "encoding/binary" "fmt" "io" - "log" "os" "os/exec" "regexp" "strings" -) -const bundleFileName = "clone.bundle" + "gitlab.com/gitlab-org/gitaly/internal/command" + "gitlab.com/gitlab-org/gitaly/internal/git" +) -func main() { - if len(os.Args) < 2 { - log.Fatal("not enough argument to pack-objects hook") - } - - if err := _main(os.Args[1:]); err != nil { - log.Fatal(err) - } -} +const bundleFileName = "gitaly/clone.bundle" var shaRegex = regexp.MustCompile(`\A[0-9a-f]{40}\z`) -func _main(packObjects []string) error { +func PackObjects(ctx context.Context, args []string, stdin io.Reader, stdout, stderr io.Writer) error { request := &bytes.Buffer{} - scanner := bufio.NewScanner(io.TeeReader(os.Stdin, request)) + scanner := bufio.NewScanner(io.TeeReader(stdin, request)) seenNot := false isClone := true for scanner.Scan() { @@ -50,17 +40,22 @@ func _main(packObjects []string) error { } if !isClone { - return fallback(packObjects, request) + return fallback(ctx, args, request, stdout, stderr) } bundleFile, err := os.Open(bundleFileName) if err != nil { - return fallback(packObjects, request) + return fallback(ctx, args, request, stdout, stderr) } defer bundleFile.Close() bundle := bufio.NewReader(bundleFile) + bundleReader, err := git.NewPackReader(bundle) + if err != nil { + return fallback(ctx, args, request, stdout, stderr) + } + request = bytes.NewBuffer(bytes.TrimSpace(request.Bytes())) if _, err := request.WriteString("\n"); err != nil { return err @@ -70,46 +65,24 @@ func _main(packObjects []string) error { return err } - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - cmd := exec.CommandContext(ctx, packObjects[0], packObjects[1:]...) - cmd.Stdin = request - cmd.Stderr = os.Stderr - - packObjectsOut, err := cmd.StdoutPipe() + cmd, err := command.New(ctx, exec.Command(args[0], args[1:]...), request, nil, stderr) if err != nil { return err } - if err := cmd.Start(); err != nil { - return err - } - - packObjectsReader, err := NewPackReader(packObjectsOut) + packObjectsReader, err := git.NewPackReader(cmd) if err != nil { return err } - bundleReader, err := NewPackReader(bundle) - if err != nil { - return err - } - - summer := sha1.New() - stdout := io.MultiWriter(os.Stdout, summer) - - if _, err := fmt.Fprint(stdout, packMagic); err != nil { - return err - } + totalObjects := packObjectsReader.NumObjects() + bundleReader.NumObjects() // TODO check for overflow - size := make([]byte, 4) - binary.BigEndian.PutUint32(size, packObjectsReader.NumObjects()+bundleReader.NumObjects()) // TODO check for overflow - if _, err := stdout.Write(size); err != nil { + w, err := git.NewPackWriter(stdout, totalObjects) + if err != nil { return err } - if _, err := io.Copy(stdout, packObjectsReader); err != nil { + if _, err := io.Copy(w, packObjectsReader); err != nil { return err } @@ -117,25 +90,26 @@ func _main(packObjects []string) error { return err } - if _, err := io.Copy(stdout, bundleReader); err != nil { + if _, err := io.Copy(w, bundleReader); err != nil { return err } - if _, err := stdout.Write(summer.Sum(nil)); err != nil { + if err := w.Flush(); err != nil { return err } - fmt.Fprintf(os.Stderr, "re-used from %s: %d objects\n", bundleFileName, bundleReader.NumObjects()) + fmt.Fprintf(stderr, "re-used from pre-computed packfile: %d objects\n", bundleReader.NumObjects()) return nil } -func fallback(packObjects []string, request io.Reader) error { - cmd := exec.Command(packObjects[0], packObjects[1:]...) - cmd.Stdin = request - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - return cmd.Run() +func fallback(ctx context.Context, args []string, request io.Reader, stdout, stderr io.Writer) error { + cmd, err := command.New(ctx, exec.Command(args[0], args[1:]...), request, stdout, stderr) + if err != nil { + return err + } + + return cmd.Wait() } func readLine(r *bufio.Reader) (string, error) { @@ -147,12 +121,14 @@ func readLine(r *bufio.Reader) (string, error) { return string(line[:len(line)-1]), nil } +const BundleHeader = "# v2 git bundle" + func addBundleRefsToRequest(request io.Writer, bundle *bufio.Reader) error { bundleHeader, err := readLine(bundle) if err != nil { return err } - if bundleHeader != "# v2 git bundle" { + if bundleHeader != BundleHeader { return fmt.Errorf("unexpected bundle header: %q", bundleHeader) } diff --git a/pack-objects-clone-bundle/README.md b/pack-objects-clone-bundle/README.md deleted file mode 100644 index 1d6d486fc16..00000000000 --- a/pack-objects-clone-bundle/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# Demonstration of concatenating pack files to speed up Git clone - -This directory contains code for an executable that can speed up a Git clone when installed on a server. The only type of clone we can speed up is a full clone. - -- compile the executable and install at some chosen path, e.g. `go build -o /tmp/pack-objects-clone-bundle` -- `git config --global uploadpack.packObjectsHook /tmp/pack-objects-bundle` (has to be global for some reason) -- in the bare repo you want to speed up, run `git bundle create clone.bundle --branches --tags` -- now do a full clone from that repo. If it is a local clone, use `git clone --no-local` to see the effect -- GitLab From b066d7a9432bfc0366951322c71f7e44f85b324f Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 11 Apr 2019 11:30:44 +0200 Subject: [PATCH 12/13] simplify main --- cmd/gitaly-pack-objects/main.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/gitaly-pack-objects/main.go b/cmd/gitaly-pack-objects/main.go index d8e91407fab..5307a9e2d19 100644 --- a/cmd/gitaly-pack-objects/main.go +++ b/cmd/gitaly-pack-objects/main.go @@ -13,12 +13,12 @@ func main() { log.Fatal("not enough argument to pack-objects hook") } - if err := _main(os.Args[1:]); err != nil { + if err := _main(); err != nil { log.Fatal(err) } } -func _main(args []string) error { +func _main() error { ctx, cancel := context.WithCancel(context.Background()) defer cancel() -- GitLab From c2a627f7ce32784e8548bac75dea0a836884e62f Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 11 Apr 2019 11:59:16 +0200 Subject: [PATCH 13/13] Fix bundle read order --- cmd/gitaly-pack-objects/main.go | 7 +++++- internal/packobjects/pack-objects-bundle.go | 24 ++++++++++++--------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/cmd/gitaly-pack-objects/main.go b/cmd/gitaly-pack-objects/main.go index 5307a9e2d19..a33b5373778 100644 --- a/cmd/gitaly-pack-objects/main.go +++ b/cmd/gitaly-pack-objects/main.go @@ -22,5 +22,10 @@ func _main() error { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - return packobjects.PackObjects(ctx, os.Args[1:], os.Stdin, os.Stdout, os.Stderr) + wd, err := os.Getwd() + if err != nil { + return err + } + + return packobjects.PackObjects(ctx, wd, os.Args[1:], os.Stdin, os.Stdout, os.Stderr) } diff --git a/internal/packobjects/pack-objects-bundle.go b/internal/packobjects/pack-objects-bundle.go index 9a7af202723..5cfcd5467b8 100644 --- a/internal/packobjects/pack-objects-bundle.go +++ b/internal/packobjects/pack-objects-bundle.go @@ -8,6 +8,7 @@ import ( "io" "os" "os/exec" + "path/filepath" "regexp" "strings" @@ -19,7 +20,7 @@ const bundleFileName = "gitaly/clone.bundle" var shaRegex = regexp.MustCompile(`\A[0-9a-f]{40}\z`) -func PackObjects(ctx context.Context, args []string, stdin io.Reader, stdout, stderr io.Writer) error { +func PackObjects(ctx context.Context, cwd string, args []string, stdin io.Reader, stdout, stderr io.Writer) error { request := &bytes.Buffer{} scanner := bufio.NewScanner(io.TeeReader(stdin, request)) seenNot := false @@ -39,11 +40,13 @@ func PackObjects(ctx context.Context, args []string, stdin io.Reader, stdout, st return err } + // TODO check args. If unexpected, return fallback. + if !isClone { return fallback(ctx, args, request, stdout, stderr) } - bundleFile, err := os.Open(bundleFileName) + bundleFile, err := os.Open(filepath.Join(cwd, bundleFileName)) if err != nil { return fallback(ctx, args, request, stdout, stderr) } @@ -51,11 +54,6 @@ func PackObjects(ctx context.Context, args []string, stdin io.Reader, stdout, st bundle := bufio.NewReader(bundleFile) - bundleReader, err := git.NewPackReader(bundle) - if err != nil { - return fallback(ctx, args, request, stdout, stderr) - } - request = bytes.NewBuffer(bytes.TrimSpace(request.Bytes())) if _, err := request.WriteString("\n"); err != nil { return err @@ -65,6 +63,11 @@ func PackObjects(ctx context.Context, args []string, stdin io.Reader, stdout, st return err } + bundleReader, err := git.NewPackReader(bundle) + if err != nil { + return err + } + cmd, err := command.New(ctx, exec.Command(args[0], args[1:]...), request, nil, stderr) if err != nil { return err @@ -75,7 +78,8 @@ func PackObjects(ctx context.Context, args []string, stdin io.Reader, stdout, st return err } - totalObjects := packObjectsReader.NumObjects() + bundleReader.NumObjects() // TODO check for overflow + // TODO check for overflow + totalObjects := packObjectsReader.NumObjects() + bundleReader.NumObjects() w, err := git.NewPackWriter(stdout, totalObjects) if err != nil { @@ -90,6 +94,8 @@ func PackObjects(ctx context.Context, args []string, stdin io.Reader, stdout, st return err } + fmt.Fprintf(stderr, "Pre-computed packfile: %d objects\n", bundleReader.NumObjects()) + if _, err := io.Copy(w, bundleReader); err != nil { return err } @@ -98,8 +104,6 @@ func PackObjects(ctx context.Context, args []string, stdin io.Reader, stdout, st return err } - fmt.Fprintf(stderr, "re-used from pre-computed packfile: %d objects\n", bundleReader.NumObjects()) - return nil } -- GitLab