diff --git a/app.go b/app.go index 18513f0dc68dc642d40be0690e40f63f2da31fe4..458826c4e14088a79401643efdee3fa49e53f087 100644 --- a/app.go +++ b/app.go @@ -26,6 +26,7 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/auth" cfg "gitlab.com/gitlab-org/gitlab-pages/internal/config" "gitlab.com/gitlab-org/gitlab-pages/internal/config/tls" + internalCtx "gitlab.com/gitlab-org/gitlab-pages/internal/ctx" "gitlab.com/gitlab-org/gitlab-pages/internal/customheaders" "gitlab.com/gitlab-org/gitlab-pages/internal/domain" "gitlab.com/gitlab-org/gitlab-pages/internal/handlers" @@ -132,7 +133,7 @@ func (a *theApp) tryAuxiliaryHandlers(w http.ResponseWriter, r *http.Request, ht if _, err := domain.GetLookupPath(r); err != nil { if errors.Is(err, gitlab.ErrDiskDisabled) { - errortracking.Capture(err) + errortracking.Capture(err, errortracking.WithRequest(r)) httperrors.Serve500(w) return true } @@ -193,7 +194,18 @@ func (a *theApp) auxiliaryMiddleware(handler http.Handler) http.Handler { func (a *theApp) serveFileOrNotFoundHandler() http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { start := time.Now() - defer metrics.ServingTime.Observe(time.Since(start).Seconds()) + r = r.WithContext(internalCtx.SetStartTime(r.Context(), start)) + + defer func(start time.Time) { + metrics.ServingTime.Observe(time.Since(start).Seconds()) + + if time.Since(start) > a.config.General.SlowServingThreshold { + metrics.ServingSlowCount.Inc() + logging.LogRequest(r). + WithField("slow_serving_threshold", a.config.General.SlowServingThreshold). + Warn("slow request") + } + }(start) domain := domain.FromRequest(r) fileServed := domain.ServeFileHTTP(w, r) diff --git a/internal/config/config.go b/internal/config/config.go index c6f91db0b78d2fc269ab65624b8d56b021480c15..ea78fa9c2df883f2fafa0cd261bbdcf91cba51be 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -45,15 +45,16 @@ type Config struct { // General groups settings that are general to GitLab Pages and can not // be categorized under other head. type General struct { - Domain string - MaxConns int - MaxURILength int - MetricsAddress string - RedirectHTTP bool - RootCertificate []byte - RootDir string - RootKey []byte - StatusPath string + Domain string + MaxConns int + MaxURILength int + SlowServingThreshold time.Duration + MetricsAddress string + RedirectHTTP bool + RootCertificate []byte + RootDir string + RootKey []byte + StatusPath string DisableCrossOriginRequests bool InsecureCiphers bool @@ -183,6 +184,7 @@ func loadConfig() (*Config, error) { Domain: strings.ToLower(*pagesDomain), MaxConns: *maxConns, MaxURILength: *maxURILength, + SlowServingThreshold: *slowServingThreshold, MetricsAddress: *metricsAddress, RedirectHTTP: *redirectHTTP, RootDir: *pagesRoot, @@ -292,6 +294,7 @@ func LogConfig(config *Config) { "listen-proxy": listenProxy, "listen-https-proxyv2": listenHTTPSProxyv2, "log-format": *logFormat, + "slow-serving-threshold": config.General.SlowServingThreshold, "metrics-address": *metricsAddress, "pages-domain": *pagesDomain, "pages-root": *pagesRoot, diff --git a/internal/config/flags.go b/internal/config/flags.go index 6c9bd4a62049541074cedcec9cc66b8d0667da60..5a9d4d6a08b11cfbf1ff9d50c30de99ef96ca7f5 100644 --- a/internal/config/flags.go +++ b/internal/config/flags.go @@ -30,6 +30,7 @@ var ( propagateCorrelationID = flag.Bool("propagate-correlation-id", false, "Reuse existing Correlation-ID from the incoming request header `X-Request-ID` if present") logFormat = flag.String("log-format", "json", "The log output format: 'text' or 'json'") logVerbose = flag.Bool("log-verbose", false, "Verbose logging") + slowServingThreshold = flag.Duration("slow-serving-threshold", time.Second, "Set duration to log serving information when request takes longer than this threshold") secret = flag.String("auth-secret", "", "Cookie store hash key, should be at least 32 bytes long") publicGitLabServer = flag.String("gitlab-server", "", "Public GitLab server, for example https://www.gitlab.com") internalGitLabServer = flag.String("internal-gitlab-server", "", "Internal GitLab server used for API requests, useful if you want to send that traffic over an internal load balancer, example value https://gitlab.example.internal (defaults to value of gitlab-server)") diff --git a/internal/ctx/context.go b/internal/ctx/context.go new file mode 100644 index 0000000000000000000000000000000000000000..599c4d3ff4dbea933ce35c5b782abac7495f1488 --- /dev/null +++ b/internal/ctx/context.go @@ -0,0 +1,30 @@ +package ctx + +import ( + "context" + "time" +) + +type internalCtx string + +const ( + startTimeCtxKey internalCtx = "start_time_ctx" + lookupPathCtxKey internalCtx = "lookup_path_ctx" +) + +// SetStartTime in context +func SetStartTime(ctx context.Context, start time.Time) context.Context { + return context.WithValue(ctx, startTimeCtxKey, start) +} + +// GetStartTime from context +func GetStartTime(ctx context.Context) time.Time { + value := ctx.Value(startTimeCtxKey) + + startTime, ok := value.(time.Time) + if !ok { + return time.Time{} + } + + return startTime +} diff --git a/internal/ctx/context_test.go b/internal/ctx/context_test.go new file mode 100644 index 0000000000000000000000000000000000000000..5b165003dad5cfbf52af4cef9e6ae7d740963609 --- /dev/null +++ b/internal/ctx/context_test.go @@ -0,0 +1,18 @@ +package ctx + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestSetAndGetStartTime(t *testing.T) { + start := time.Now() + ctx := context.Background() + ctx = SetStartTime(ctx, start) + + timeFromCtx := GetStartTime(ctx) + require.Equal(t, start, timeFromCtx) +} diff --git a/internal/httperrors/httperrors.go b/internal/httperrors/httperrors.go index a96e4d855da62c0035d0544ead8a7b1d518a9f99..a81432c0312ada857d6d586200f2c21ae7a7a83d 100644 --- a/internal/httperrors/httperrors.go +++ b/internal/httperrors/httperrors.go @@ -3,10 +3,13 @@ package httperrors import ( "fmt" "net/http" + "time" "gitlab.com/gitlab-org/labkit/correlation" "gitlab.com/gitlab-org/labkit/errortracking" "gitlab.com/gitlab-org/labkit/log" + + internalCtx "gitlab.com/gitlab-org/gitlab-pages/internal/ctx" ) type content struct { @@ -211,9 +214,11 @@ func Serve500(w http.ResponseWriter) { func Serve500WithRequest(w http.ResponseWriter, r *http.Request, reason string, err error) { log.WithFields(log.Fields{ "correlation_id": correlation.ExtractFromContext(r.Context()), + "duration_ms": time.Since(internalCtx.GetStartTime(r.Context())).Milliseconds(), "host": r.Host, "path": r.URL.Path, }).WithError(err).Error(reason) + errortracking.Capture(err, errortracking.WithRequest(r)) serveErrorPage(w, content500) } diff --git a/internal/logging/logging.go b/internal/logging/logging.go index edc670d63cac84ee24f230bbbbee7d66e1341437..8ddc90d01776b00b8b44b27936803b8f2979c7b5 100644 --- a/internal/logging/logging.go +++ b/internal/logging/logging.go @@ -2,11 +2,13 @@ package logging import ( "net/http" + "time" "github.com/sirupsen/logrus" "gitlab.com/gitlab-org/labkit/correlation" "gitlab.com/gitlab-org/labkit/log" + internalCtx "gitlab.com/gitlab-org/gitlab-pages/internal/ctx" "gitlab.com/gitlab-org/gitlab-pages/internal/request" ) @@ -69,6 +71,7 @@ func enrichExtraFields(extraFields log.ExtraFieldsGeneratorFunc) log.ExtraFields return func(r *http.Request) log.Fields { enrichedFields := log.Fields{ "correlation_id": correlation.ExtractFromContext(r.Context()), + "duration_ms": time.Since(internalCtx.GetStartTime(r.Context())).Milliseconds(), "pages_https": request.IsHTTPS(r), "pages_host": r.Host, } @@ -87,6 +90,7 @@ func enrichExtraFields(extraFields log.ExtraFieldsGeneratorFunc) log.ExtraFields func LogRequest(r *http.Request) *logrus.Entry { return log.WithFields(log.Fields{ "correlation_id": correlation.ExtractFromContext(r.Context()), + "duration_ms": time.Since(internalCtx.GetStartTime(r.Context())).Milliseconds(), "host": r.Host, "path": r.URL.Path, }) diff --git a/internal/serving/disk/reader.go b/internal/serving/disk/reader.go index 5a6122df0becfaa6a2c5ed2266f11cd94074bcdf..526471a075838459d5e6b86b37f911950ce529dc 100644 --- a/internal/serving/disk/reader.go +++ b/internal/serving/disk/reader.go @@ -251,6 +251,11 @@ func (reader *Reader) serveCustomFile(ctx context.Context, w http.ResponseWriter // Open and serve content of file file, err := root.Open(ctx, fullPath) if err != nil { + // Handle context.Canceled error as not exist https://gitlab.com/gitlab-org/gitlab-pages/-/issues/669 + if errors.Is(err, context.Canceled) { + return fs.ErrNotExist + } + return err } defer file.Close() diff --git a/internal/source/gitlab/cache/retriever.go b/internal/source/gitlab/cache/retriever.go index 2f6c1f07452c16aade0c56c0edc42e1bbc9cf4a9..dc6d76bfdd7102de153d457f9e0686cbc8a73d8f 100644 --- a/internal/source/gitlab/cache/retriever.go +++ b/internal/source/gitlab/cache/retriever.go @@ -36,6 +36,7 @@ func NewRetriever(client api.Client, retrievalTimeout, maxRetrievalInterval time // backoff. It has its own context with timeout. func (r *Retriever) Retrieve(originalCtx context.Context, domain string) (lookup api.Lookup) { logMsg := "" + start := time.Now() // forward correlation_id from originalCtx to the new independent context correlationID := correlation.ExtractFromContext(originalCtx) @@ -52,13 +53,20 @@ func (r *Retriever) Retrieve(originalCtx context.Context, domain string) (lookup logMsg = "retrieval response sent" } - log.WithFields(log.Fields{ + l := log.WithFields(log.Fields{ "correlation_id": correlationID, "requested_domain": domain, "lookup_name": lookup.Name, "lookup_paths": lookup.Domain, "lookup_error": lookup.Error, - }).WithError(ctx.Err()).Debug(logMsg) + "duration_ms": time.Since(start).Milliseconds(), + }) + + if lookup.Error != nil { + l.WithError(ctx.Err()).Error(logMsg) + } else { + l.WithError(ctx.Err()).Debug(logMsg) + } return lookup } diff --git a/internal/vfs/zip/vfs.go b/internal/vfs/zip/vfs.go index a982519067095d687cf02876dd2cc8441cfb2a4a..5677b22818a5b345de0db431324643c58c26da53 100644 --- a/internal/vfs/zip/vfs.go +++ b/internal/vfs/zip/vfs.go @@ -179,13 +179,16 @@ func (zfs *zipVFS) Root(ctx context.Context, path string, cacheKey string) (vfs. // we do it in loop to not use any additional locks for { root, err := zfs.findOrOpenArchive(ctx, cacheKey, path) - if errors.Is(err, errAlreadyCached) { - continue - } - - // If archive is not found, return a known `vfs` error - if errors.Is(err, httprange.ErrNotFound) { + switch err { + case context.Canceled: + // treat user-initiated cancellations as not found and + // do not report a 500 https://gitlab.com/gitlab-org/gitlab-pages/-/issues/669 + fallthrough + case httprange.ErrNotFound: + // If archive is not found, return a known `vfs` error return nil, fs.ErrNotExist + case errAlreadyCached: + continue } return root, err diff --git a/metrics/metrics.go b/metrics/metrics.go index 680f7a0503a8d52482c510f3728ec25499795766..ed4d5559d2135115ef68f2a45287b8bf7f48c8a7 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -63,6 +63,14 @@ var ( Buckets: []float64{0.1, 0.5, 1, 2.5, 5, 10, 60, 180}, }) + // ServingSlowCount metric for requests taking longer than the slow-serving-threshold + // configuration flag. + ServingSlowCount = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "gitlab_pages_serving_slow_count", + Help: "The number of requests taking longer than the slow-serving-threshold", + }, + ) + // VFSOperations metric for VFS operations (lstat, readlink, open) VFSOperations = prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "gitlab_pages_vfs_operations_total", @@ -233,6 +241,7 @@ func MustRegister() { DomainsSourceFailures, DiskServingFileSize, ServingTime, + ServingSlowCount, VFSOperations, HTTPRangeRequestsTotal, HTTPRangeRequestDuration, diff --git a/test/acceptance/metrics_test.go b/test/acceptance/metrics_test.go index cb4262fba96831567e09a97e090c794b56eb00e4..19a6ed01ed892373c487735598657e7f0fde8b83 100644 --- a/test/acceptance/metrics_test.go +++ b/test/acceptance/metrics_test.go @@ -38,6 +38,7 @@ func TestPrometheusMetricsCanBeScraped(t *testing.T) { require.Contains(t, string(body), "gitlab_pages_domains_source_failures_total") require.Contains(t, string(body), "gitlab_pages_disk_serving_file_size_bytes_sum") require.Contains(t, string(body), "gitlab_pages_serving_time_seconds_sum") + require.Contains(t, string(body), "gitlab_pages_serving_slow_count") require.Contains(t, string(body), `gitlab_pages_domains_source_api_requests_total{status_code="200"}`) require.Contains(t, string(body), `gitlab_pages_domains_source_api_call_duration_bucket`) require.Contains(t, string(body), `gitlab_pages_domains_source_api_trace_duration`)