From 7d14f039e80e3cdba03104ed8563cf711fea4ee1 Mon Sep 17 00:00:00 2001 From: Zeger-Jan van de Weg Date: Mon, 17 May 2021 13:41:29 +0200 Subject: [PATCH 1/2] git: Capture GIT_TRACE2_EVENTs Git allows a caller to receive metrics and events through `GIT_TRACE` and relatives; this has been around for a while. If needed, one could enable these before this change with `GIT_TRACE=1 ./gitaly` on boot. While this works for debugging, it logs a lot of events and thus is not enabled on production system. This change introduces a mechanism to enable the second iteration of the tracing from Git. Again Git exposes a lot of events and metrics, though now the there's improvements like setting a custom sink, exposing JSON over unstructured text, and better context among lines emitted. For now Gitaly creates a new open file descriptor when a Git command is spawned and copies the input from it to an io.Writer. This is not yet useful, but does sets up plumping to capture the events. Further, it would now be trivial to copy all events to stderr during testing if one wanted to. Later changes will introduce mechanisms to analyse the stream, and expose its data structured to be more useful in the GitLab architecture. Say through Prometheus, tracing, and/or logging. --- internal/git/command_factory.go | 17 +++++ internal/git/command_factory_test.go | 33 ++++++++++ internal/git/trace2/handler.go | 62 +++++++++++++++++++ internal/git/trace2/handler_test.go | 27 ++++++++ .../metadata/featureflag/feature_flags.go | 3 + 5 files changed, 142 insertions(+) create mode 100644 internal/git/trace2/handler.go create mode 100644 internal/git/trace2/handler_test.go diff --git a/internal/git/command_factory.go b/internal/git/command_factory.go index c97663ed3ea..183baef0a9b 100644 --- a/internal/git/command_factory.go +++ b/internal/git/command_factory.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "io" "os/exec" "github.com/prometheus/client_golang/prometheus" @@ -11,7 +12,9 @@ import ( "gitlab.com/gitlab-org/gitaly/v14/internal/command" "gitlab.com/gitlab-org/gitaly/v14/internal/git/alternates" "gitlab.com/gitlab-org/gitaly/v14/internal/git/repository" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/trace2" "gitlab.com/gitlab-org/gitaly/v14/internal/gitaly/config" + "gitlab.com/gitlab-org/gitaly/v14/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v14/internal/storage" ) @@ -49,6 +52,7 @@ type ExecCommandFactory struct { cfg config.Cfg cgroupsManager cgroups.Manager invalidCommandsMetric *prometheus.CounterVec + trace2Sink io.Writer } // NewExecCommandFactory returns a new instance of initialized ExecCommandFactory. @@ -98,6 +102,10 @@ func (cf *ExecCommandFactory) NewWithDir(ctx context.Context, dir string, sc Cmd return cf.newCommand(ctx, nil, dir, sc, opts...) } +func (cf *ExecCommandFactory) SetTrace2Sink(w io.Writer) { + cf.trace2Sink = w +} + func (cf *ExecCommandFactory) gitPath() string { return cf.cfg.Git.BinPath } @@ -134,6 +142,15 @@ func (cf *ExecCommandFactory) newCommand(ctx context.Context, repo repository.Gi execCommand := exec.Command(cf.gitPath(), args...) execCommand.Dir = dir + if featureflag.IsEnabled(ctx, featureflag.GitTrace2) && cf.trace2Sink != nil { + envVars, err := trace2.CopyHandler(ctx, execCommand, cf.trace2Sink) + if err != nil { + return nil, err + } + + env = append(env, envVars...) + } + command, err := command.New(ctx, execCommand, config.stdin, config.stdout, config.stderr, env...) if err != nil { return nil, err diff --git a/internal/git/command_factory_test.go b/internal/git/command_factory_test.go index 566dbe6a1cd..cb4d3d2e234 100644 --- a/internal/git/command_factory_test.go +++ b/internal/git/command_factory_test.go @@ -14,6 +14,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v14/internal/helper/text" "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testcfg" + "gitlab.com/gitlab-org/labkit/correlation" ) func TestGitCommandProxy(t *testing.T) { @@ -98,3 +99,35 @@ func TestExecCommandFactory_NewWithDir(t *testing.T) { require.Contains(t, err.Error(), "no such file or directory") }) } + +func TestExecCommandFactory_Trace2EventCaptured(t *testing.T) { + cfg := testcfg.Build(t) + ctx, cancel := testhelper.Context() + defer cancel() + + const corrID = "correlation-id" + ctx = correlation.ContextWithCorrelation(ctx, corrID) + + buf := &bytes.Buffer{} + gitCmdFactory := git.NewExecCommandFactory(cfg) + gitCmdFactory.SetTrace2Sink(buf) + + cmd, err := gitCmdFactory.NewWithoutRepo(ctx, git.SubCmd{Name: "version"}) + require.NoError(t, err) + cmd.Wait() + + // The buffer should contain at least info about: + require.NotEmpty(t, buf) + + // Gits version + require.Contains(t, buf.String(), `"event":"version"`) + + // Start the command and report the name + require.Contains(t, buf.String(), `"event":"cmd_name"`) + + // report the exit + require.Contains(t, buf.String(), `"event":"exit"`) + + // Propegates the SID to child events + require.Contains(t, buf.String(), `"sid":"`+corrID) +} diff --git a/internal/git/trace2/handler.go b/internal/git/trace2/handler.go new file mode 100644 index 00000000000..d4a507d4acd --- /dev/null +++ b/internal/git/trace2/handler.go @@ -0,0 +1,62 @@ +// trace2 provides utilities to leverage Gits trace2 functionalities within +// Gitaly. +package trace2 + +import ( + "context" + "fmt" + "io" + "os" + "os/exec" + + "gitlab.com/gitlab-org/labkit/correlation" +) + +const ( + // By setting this environment variable for Git process, it will enable + // the trace 2. Furhter, the value it will be set to tells Git where + // to write the output to. Gitaly uses an open file descriptor. + trace2EventFDKey = "GIT_TRACE2_EVENT" + + // Git continiously shells out to child processes and will set the SID + // as environment variable. The child than appends their own unique ID. + // For Gitaly we set the SID to the correlation ID to allow for + // correlation throughout GitLabs components. + sessionIDKey = "GIT_TRACE2_PARENT_SID" +) + +// CopyHandler takes the trace2 stream, and copies it over to a passed io.Writer +// Note it still receives the stream on a fresh file descriptor, not 2. +// Returns environment variables which MUST be injected by the +// caller into the spawned Git process; else the sink will never be +// closed and a resource leak occurs. +func CopyHandler(ctx context.Context, cmd *exec.Cmd, sink io.Writer) ([]string, error) { + r, w, err := os.Pipe() + if err != nil { + return nil, err + } + + cancelCtx, _ := context.WithCancel(ctx) + + go func() { + for { + select { + case <-cancelCtx.Done(): + w.Close() + r.Close() + break + default: + io.Copy(sink, r) + } + + } + }() + + cmd.ExtraFiles = append(cmd.ExtraFiles, w) + + return []string{ + // Plus 2 accounts for stdin (0), stdout (1), and stderr (2). + fmt.Sprintf("%s=%d", trace2EventFDKey, 2+len(cmd.ExtraFiles)), + fmt.Sprintf("%s=%s", sessionIDKey, correlation.ExtractFromContextOrGenerate(ctx)), + }, nil +} diff --git a/internal/git/trace2/handler_test.go b/internal/git/trace2/handler_test.go new file mode 100644 index 00000000000..df896cc62eb --- /dev/null +++ b/internal/git/trace2/handler_test.go @@ -0,0 +1,27 @@ +package trace2 + +import ( + "bytes" + "fmt" + "os/exec" + "strings" + "testing" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/internal/testhelper" +) + +func TestCopyHandler(t *testing.T) { + ctx, cancel := testhelper.Context() + defer cancel() + + cmd := exec.Command("env") + + var sink bytes.Buffer + env, err := CopyHandler(ctx, cmd, &sink) + require.NoError(t, err) + + require.Len(t, cmd.ExtraFiles, 1) + require.Equal(t, env[0], fmt.Sprintf("%s=3", trace2EventFDKey)) + require.True(t, strings.HasPrefix(env[1], sessionIDKey)) +} diff --git a/internal/metadata/featureflag/feature_flags.go b/internal/metadata/featureflag/feature_flags.go index 88f3d69e1f4..7e429dc353d 100644 --- a/internal/metadata/featureflag/feature_flags.go +++ b/internal/metadata/featureflag/feature_flags.go @@ -23,6 +23,8 @@ var ( TxConfig = FeatureFlag{Name: "tx_config", OnByDefault: false} // TxRemote enables transactional voting for AddRemote and DeleteRemote. TxRemote = FeatureFlag{Name: "tx_remote", OnByDefault: false} + // GitTrace2 enables the GIT_TRACE2_* environment variables required for Gitaly to ingest the trace events + GitTrace2 = FeatureFlag{Name: "git_trace2", OnByDefault: false} ) // All includes all feature flags. @@ -34,4 +36,5 @@ var All = []FeatureFlag{ FetchInternalRemoteErrors, TxConfig, TxRemote, + GitTrace2, } -- GitLab From 848ea1040658f005d00665c68f30d9284b9b484b Mon Sep 17 00:00:00 2001 From: Zeger-Jan van de Weg Date: Thu, 20 May 2021 11:01:21 +0200 Subject: [PATCH 2/2] trace2: Expose child events to logs While trace2 emits a lot of messages, this change disregards almost all of them except Git shelling out to a child process. In that case we capture only the most important details like the SID, arguments, and exit code. The trace2.CopyHandler has been renamed, and tests have been updated to validate the messages in the logs. Changelog: added --- internal/git/command_factory.go | 10 +--- internal/git/command_factory_test.go | 42 ++++++++++------- internal/git/trace2/handler.go | 70 ++++++++++++++++++++++++---- internal/git/trace2/handler_test.go | 4 +- 4 files changed, 88 insertions(+), 38 deletions(-) diff --git a/internal/git/command_factory.go b/internal/git/command_factory.go index 183baef0a9b..0142d515faa 100644 --- a/internal/git/command_factory.go +++ b/internal/git/command_factory.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "io" "os/exec" "github.com/prometheus/client_golang/prometheus" @@ -52,7 +51,6 @@ type ExecCommandFactory struct { cfg config.Cfg cgroupsManager cgroups.Manager invalidCommandsMetric *prometheus.CounterVec - trace2Sink io.Writer } // NewExecCommandFactory returns a new instance of initialized ExecCommandFactory. @@ -102,10 +100,6 @@ func (cf *ExecCommandFactory) NewWithDir(ctx context.Context, dir string, sc Cmd return cf.newCommand(ctx, nil, dir, sc, opts...) } -func (cf *ExecCommandFactory) SetTrace2Sink(w io.Writer) { - cf.trace2Sink = w -} - func (cf *ExecCommandFactory) gitPath() string { return cf.cfg.Git.BinPath } @@ -142,8 +136,8 @@ func (cf *ExecCommandFactory) newCommand(ctx context.Context, repo repository.Gi execCommand := exec.Command(cf.gitPath(), args...) execCommand.Dir = dir - if featureflag.IsEnabled(ctx, featureflag.GitTrace2) && cf.trace2Sink != nil { - envVars, err := trace2.CopyHandler(ctx, execCommand, cf.trace2Sink) + if featureflag.IsEnabled(ctx, featureflag.GitTrace2) { + envVars, err := trace2.LogHandler(ctx, execCommand) if err != nil { return nil, err } diff --git a/internal/git/command_factory_test.go b/internal/git/command_factory_test.go index cb4d3d2e234..40810e281ba 100644 --- a/internal/git/command_factory_test.go +++ b/internal/git/command_factory_test.go @@ -8,6 +8,8 @@ import ( "os" "testing" + "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" + "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v14/internal/git" "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest" @@ -108,26 +110,30 @@ func TestExecCommandFactory_Trace2EventCaptured(t *testing.T) { const corrID = "correlation-id" ctx = correlation.ContextWithCorrelation(ctx, corrID) - buf := &bytes.Buffer{} - gitCmdFactory := git.NewExecCommandFactory(cfg) - gitCmdFactory.SetTrace2Sink(buf) + logBuffer := &bytes.Buffer{} + log := &logrus.Logger{Out: logBuffer, Formatter: &logrus.JSONFormatter{}, Level: logrus.InfoLevel} + testCtx := ctxlogrus.ToContext(ctx, log.WithField("test", "logging")) + + repo, _, cleanup := gittest.CloneRepoAtStorage(t, cfg.Storages[0], t.Name()) + t.Cleanup(cleanup) - cmd, err := gitCmdFactory.NewWithoutRepo(ctx, git.SubCmd{Name: "version"}) + gitCmdFactory := git.NewExecCommandFactory(cfg) + cmd, err := gitCmdFactory.New(testCtx, repo, git.SubCmd{ + Name: "repack", + Flags: []git.Option{git.Flag{Name: "--no-write-bitmap-index"}}}, + ) require.NoError(t, err) cmd.Wait() - // The buffer should contain at least info about: - require.NotEmpty(t, buf) - - // Gits version - require.Contains(t, buf.String(), `"event":"version"`) - - // Start the command and report the name - require.Contains(t, buf.String(), `"event":"cmd_name"`) - - // report the exit - require.Contains(t, buf.String(), `"event":"exit"`) - - // Propegates the SID to child events - require.Contains(t, buf.String(), `"sid":"`+corrID) + strBuf := logBuffer.String() + + for _, expected := range []string{ + `"SID":"correlation-id/`, + `"event":"child_start"`, + `"event":"child_exit"`, + `"exit_code":0`, + `"argv":["git","pack-objects"`, + } { + require.Contains(t, strBuf, expected) + } } diff --git a/internal/git/trace2/handler.go b/internal/git/trace2/handler.go index d4a507d4acd..effd92f63bf 100644 --- a/internal/git/trace2/handler.go +++ b/internal/git/trace2/handler.go @@ -3,13 +3,18 @@ package trace2 import ( + "bufio" "context" + "encoding/json" "fmt" "io" "os" "os/exec" + "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" + "github.com/sirupsen/logrus" "gitlab.com/gitlab-org/labkit/correlation" + "gitlab.com/gitlab-org/labkit/log" ) const ( @@ -25,20 +30,19 @@ const ( sessionIDKey = "GIT_TRACE2_PARENT_SID" ) -// CopyHandler takes the trace2 stream, and copies it over to a passed io.Writer -// Note it still receives the stream on a fresh file descriptor, not 2. -// Returns environment variables which MUST be injected by the -// caller into the spawned Git process; else the sink will never be -// closed and a resource leak occurs. -func CopyHandler(ctx context.Context, cmd *exec.Cmd, sink io.Writer) ([]string, error) { +// LogHandler takes the trace2 events and logs child processes starting as well +// as their completion. +// Note the exec.Cmd is altered to include a FD for git to write the output to. +func LogHandler(ctx context.Context, cmd *exec.Cmd) ([]string, error) { r, w, err := os.Pipe() if err != nil { return nil, err } - cancelCtx, _ := context.WithCancel(ctx) - go func() { + cancelCtx, _ := context.WithCancel(ctx) + bufReader := bufio.NewReader(r) + for { select { case <-cancelCtx.Done(): @@ -46,7 +50,13 @@ func CopyHandler(ctx context.Context, cmd *exec.Cmd, sink io.Writer) ([]string, r.Close() break default: - io.Copy(sink, r) + err := logTraceEvents(cancelCtx, bufReader) + if err == io.EOF { + continue + } + if err != nil { + log.WithError(err).Error("failed to parse trace2 event") + } } } @@ -60,3 +70,45 @@ func CopyHandler(ctx context.Context, cmd *exec.Cmd, sink io.Writer) ([]string, fmt.Sprintf("%s=%s", sessionIDKey, correlation.ExtractFromContextOrGenerate(ctx)), }, nil } + +type trace2Event struct { + Event string `json:"event"` + Argv []string `json:"argv"` // Only present at child_start events + SID string `json:"sid"` + ExitCode int `json:"code"` +} + +const ( + childEventStart = "child_start" + childEventExit = "child_exit" +) + +func shouldLogEvent(e *trace2Event) bool { + return e.Event == childEventStart || e.Event == childEventExit +} + +func logTraceEvents(ctx context.Context, r *bufio.Reader) error { + line, err := r.ReadBytes('\n') + if err != nil { + return err + } + + event := &trace2Event{} + if err := json.Unmarshal(line, event); err != nil { + ctxlogrus.Extract(ctx).WithError(err).Warn("error decoding trace2 json") + + // This is not an error the caller can handle + return nil + } + + if shouldLogEvent(event) { + ctxlogrus.Extract(ctx).WithFields(logrus.Fields{ + "event": event.Event, + "SID": event.SID, + "argv": event.Argv, + "exit_code": event.ExitCode, + }).Info("trace2 event") + } + + return nil +} diff --git a/internal/git/trace2/handler_test.go b/internal/git/trace2/handler_test.go index df896cc62eb..90143e2dc37 100644 --- a/internal/git/trace2/handler_test.go +++ b/internal/git/trace2/handler_test.go @@ -1,7 +1,6 @@ package trace2 import ( - "bytes" "fmt" "os/exec" "strings" @@ -17,8 +16,7 @@ func TestCopyHandler(t *testing.T) { cmd := exec.Command("env") - var sink bytes.Buffer - env, err := CopyHandler(ctx, cmd, &sink) + env, err := LogHandler(ctx, cmd) require.NoError(t, err) require.Len(t, cmd.ExtraFiles, 1) -- GitLab