From 8675794e0c5855fe1c67eea40c17265964ca27b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Mon, 17 Aug 2020 14:02:52 +0200 Subject: [PATCH 1/4] Rename `serving/disk` into `serving/files/disk` --- internal/domain/domain.go | 2 +- internal/domain/domain_test.go | 2 +- internal/serving/file/disk/serving.go | 16 ++++++++++++ internal/serving/{disk => file}/errors.go | 2 +- internal/serving/{disk => file}/helpers.go | 2 +- internal/serving/{disk => file}/reader.go | 4 +-- internal/serving/{disk => file}/serving.go | 25 ++++++++----------- .../serving/{disk => file}/serving_test.go | 2 +- .../serving/{disk => file}/symlink/LICENSE | 0 .../serving/{disk => file}/symlink/PATENTS | 0 .../serving/{disk => file}/symlink/README.md | 0 .../{disk => file}/symlink/path_test.go | 2 +- .../serving/{disk => file}/symlink/shims.go | 0 .../serving/{disk => file}/symlink/symlink.go | 0 internal/source/disk/custom.go | 2 +- internal/source/disk/group.go | 2 +- internal/source/gitlab/factory.go | 2 +- internal/source/gitlab/factory_test.go | 2 +- 18 files changed, 39 insertions(+), 26 deletions(-) create mode 100644 internal/serving/file/disk/serving.go rename internal/serving/{disk => file}/errors.go (96%) rename internal/serving/{disk => file}/helpers.go (99%) rename internal/serving/{disk => file}/reader.go (98%) rename internal/serving/{disk => file}/serving.go (63%) rename internal/serving/{disk => file}/serving_test.go (98%) rename internal/serving/{disk => file}/symlink/LICENSE (100%) rename internal/serving/{disk => file}/symlink/PATENTS (100%) rename internal/serving/{disk => file}/symlink/README.md (100%) rename internal/serving/{disk => file}/symlink/path_test.go (99%) rename internal/serving/{disk => file}/symlink/shims.go (100%) rename internal/serving/{disk => file}/symlink/symlink.go (100%) diff --git a/internal/domain/domain.go b/internal/domain/domain.go index 7c1639a33..235c4e4db 100644 --- a/internal/domain/domain.go +++ b/internal/domain/domain.go @@ -9,7 +9,7 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/httperrors" "gitlab.com/gitlab-org/gitlab-pages/internal/serving" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/disk" ) // Domain is a domain that gitlab-pages can serve. diff --git a/internal/domain/domain_test.go b/internal/domain/domain_test.go index 26a7735cd..61a2c39f5 100644 --- a/internal/domain/domain_test.go +++ b/internal/domain/domain_test.go @@ -11,7 +11,7 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/fixture" "gitlab.com/gitlab-org/gitlab-pages/internal/serving" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/disk" "gitlab.com/gitlab-org/gitlab-pages/internal/testhelpers" ) diff --git a/internal/serving/file/disk/serving.go b/internal/serving/file/disk/serving.go new file mode 100644 index 000000000..b9a426331 --- /dev/null +++ b/internal/serving/file/disk/serving.go @@ -0,0 +1,16 @@ +package disk + +import ( + "gitlab.com/gitlab-org/gitlab-pages/internal/serving" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs/local" +) + +var disk = file.New(vfs.Instrumented(local.VFS{}, "disk")) + +// New returns a serving instance that is capable of reading files +// from the disk +func New() serving.Serving { + return disk +} diff --git a/internal/serving/disk/errors.go b/internal/serving/file/errors.go similarity index 96% rename from internal/serving/disk/errors.go rename to internal/serving/file/errors.go index 5e55220be..2dab0a1dc 100644 --- a/internal/serving/disk/errors.go +++ b/internal/serving/file/errors.go @@ -1,4 +1,4 @@ -package disk +package file type locationDirectoryError struct { FullPath string diff --git a/internal/serving/disk/helpers.go b/internal/serving/file/helpers.go similarity index 99% rename from internal/serving/disk/helpers.go rename to internal/serving/file/helpers.go index e6d3f8ab6..e2d5e778b 100644 --- a/internal/serving/disk/helpers.go +++ b/internal/serving/file/helpers.go @@ -1,4 +1,4 @@ -package disk +package file import ( "context" diff --git a/internal/serving/disk/reader.go b/internal/serving/file/reader.go similarity index 98% rename from internal/serving/disk/reader.go rename to internal/serving/file/reader.go index 8c7ee3bf3..180a90ae5 100644 --- a/internal/serving/disk/reader.go +++ b/internal/serving/file/reader.go @@ -1,4 +1,4 @@ -package disk +package file import ( "context" @@ -13,7 +13,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "gitlab.com/gitlab-org/gitlab-pages/internal/serving" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk/symlink" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/symlink" "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" ) diff --git a/internal/serving/disk/serving.go b/internal/serving/file/serving.go similarity index 63% rename from internal/serving/disk/serving.go rename to internal/serving/file/serving.go index b7501b802..1b4bb5836 100644 --- a/internal/serving/disk/serving.go +++ b/internal/serving/file/serving.go @@ -1,33 +1,25 @@ -package disk +package file import ( "gitlab.com/gitlab-org/gitlab-pages/internal/httperrors" "gitlab.com/gitlab-org/gitlab-pages/internal/serving" "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" - "gitlab.com/gitlab-org/gitlab-pages/internal/vfs/local" "gitlab.com/gitlab-org/gitlab-pages/metrics" ) -var disk = &Disk{ - reader: Reader{ - fileSizeMetric: metrics.DiskServingFileSize, - vfs: vfs.Instrumented(local.VFS{}, "disk"), - }, -} - // Disk describes a disk access serving -type Disk struct { +type Files struct { reader Reader } // ServeFileHTTP serves a file from disk and returns true. It returns false // when a file could not been found. -func (s *Disk) ServeFileHTTP(h serving.Handler) bool { +func (s *Files) ServeFileHTTP(h serving.Handler) bool { return s.reader.tryFile(h) == nil } // ServeNotFoundHTTP tries to read a custom 404 page -func (s *Disk) ServeNotFoundHTTP(h serving.Handler) { +func (s *Files) ServeNotFoundHTTP(h serving.Handler) { if s.reader.tryNotFound(h) == nil { return } @@ -38,6 +30,11 @@ func (s *Disk) ServeNotFoundHTTP(h serving.Handler) { // New returns a serving instance that is capable of reading files // from the disk -func New() serving.Serving { - return disk +func New(vfs vfs.VFS) serving.Serving { + return &Files{ + reader: Reader{ + fileSizeMetric: metrics.DiskServingFileSize, + vfs: vfs, + }, + } } diff --git a/internal/serving/disk/serving_test.go b/internal/serving/file/serving_test.go similarity index 98% rename from internal/serving/disk/serving_test.go rename to internal/serving/file/serving_test.go index 02b7fac74..9a614a003 100644 --- a/internal/serving/disk/serving_test.go +++ b/internal/serving/file/serving_test.go @@ -1,4 +1,4 @@ -package disk +package file import ( "io/ioutil" diff --git a/internal/serving/disk/symlink/LICENSE b/internal/serving/file/symlink/LICENSE similarity index 100% rename from internal/serving/disk/symlink/LICENSE rename to internal/serving/file/symlink/LICENSE diff --git a/internal/serving/disk/symlink/PATENTS b/internal/serving/file/symlink/PATENTS similarity index 100% rename from internal/serving/disk/symlink/PATENTS rename to internal/serving/file/symlink/PATENTS diff --git a/internal/serving/disk/symlink/README.md b/internal/serving/file/symlink/README.md similarity index 100% rename from internal/serving/disk/symlink/README.md rename to internal/serving/file/symlink/README.md diff --git a/internal/serving/disk/symlink/path_test.go b/internal/serving/file/symlink/path_test.go similarity index 99% rename from internal/serving/disk/symlink/path_test.go rename to internal/serving/file/symlink/path_test.go index 4d590db52..5ba5f5f27 100644 --- a/internal/serving/disk/symlink/path_test.go +++ b/internal/serving/file/symlink/path_test.go @@ -12,7 +12,7 @@ import ( "runtime" "testing" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk/symlink" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/symlink" "gitlab.com/gitlab-org/gitlab-pages/internal/vfs/local" ) diff --git a/internal/serving/disk/symlink/shims.go b/internal/serving/file/symlink/shims.go similarity index 100% rename from internal/serving/disk/symlink/shims.go rename to internal/serving/file/symlink/shims.go diff --git a/internal/serving/disk/symlink/symlink.go b/internal/serving/file/symlink/symlink.go similarity index 100% rename from internal/serving/disk/symlink/symlink.go rename to internal/serving/file/symlink/symlink.go diff --git a/internal/source/disk/custom.go b/internal/source/disk/custom.go index 2668ed816..8ecc1b9f3 100644 --- a/internal/source/disk/custom.go +++ b/internal/source/disk/custom.go @@ -4,7 +4,7 @@ import ( "net/http" "gitlab.com/gitlab-org/gitlab-pages/internal/serving" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/disk" ) type customProjectResolver struct { diff --git a/internal/source/disk/group.go b/internal/source/disk/group.go index e0365bbdc..5b13e34e5 100644 --- a/internal/source/disk/group.go +++ b/internal/source/disk/group.go @@ -8,7 +8,7 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/host" "gitlab.com/gitlab-org/gitlab-pages/internal/serving" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/disk" ) const ( diff --git a/internal/source/gitlab/factory.go b/internal/source/gitlab/factory.go index d526994f8..954bd71d7 100644 --- a/internal/source/gitlab/factory.go +++ b/internal/source/gitlab/factory.go @@ -6,7 +6,7 @@ import ( log "github.com/sirupsen/logrus" "gitlab.com/gitlab-org/gitlab-pages/internal/serving" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/disk" "gitlab.com/gitlab-org/gitlab-pages/internal/serving/serverless" "gitlab.com/gitlab-org/gitlab-pages/internal/source/gitlab/api" ) diff --git a/internal/source/gitlab/factory_test.go b/internal/source/gitlab/factory_test.go index 2f3e19940..cc273fbf5 100644 --- a/internal/source/gitlab/factory_test.go +++ b/internal/source/gitlab/factory_test.go @@ -6,7 +6,7 @@ import ( "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitlab-pages/internal/fixture" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/disk" "gitlab.com/gitlab-org/gitlab-pages/internal/serving/serverless" "gitlab.com/gitlab-org/gitlab-pages/internal/source/gitlab/api" ) -- GitLab From aecf4cbc265b5fe3b932fdd47013d60629ecb5f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Mon, 17 Aug 2020 14:22:30 +0200 Subject: [PATCH 2/4] Add VFS: Dir --- internal/serving/file/helpers.go | 9 ++-- internal/serving/file/reader.go | 63 ++++++++++++------------ internal/serving/file/symlink/shims.go | 2 +- internal/serving/file/symlink/symlink.go | 2 +- internal/vfs/local/vfs.go | 45 +++++++++++++++-- internal/vfs/vfs.go | 35 ++++++++++--- 6 files changed, 108 insertions(+), 48 deletions(-) diff --git a/internal/serving/file/helpers.go b/internal/serving/file/helpers.go index e2d5e778b..30968c536 100644 --- a/internal/serving/file/helpers.go +++ b/internal/serving/file/helpers.go @@ -10,6 +10,7 @@ import ( "strings" "gitlab.com/gitlab-org/gitlab-pages/internal/httputil" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" ) func endsWithSlash(path string) bool { @@ -23,13 +24,13 @@ func endsWithoutHTMLExtension(path string) bool { // Detect file's content-type either by extension or mime-sniffing. // Implementation is adapted from Golang's `http.serveContent()` // See https://github.com/golang/go/blob/902fc114272978a40d2e65c2510a18e870077559/src/net/http/fs.go#L194 -func (reader *Reader) detectContentType(ctx context.Context, path string) (string, error) { +func (reader *Reader) detectContentType(ctx context.Context, dir vfs.Dir, path string) (string, error) { contentType := mime.TypeByExtension(filepath.Ext(path)) if contentType == "" { var buf [512]byte - file, err := reader.vfs.Open(ctx, path) + file, err := dir.Open(ctx, path) if err != nil { return "", err } @@ -55,7 +56,7 @@ func acceptsGZip(r *http.Request) bool { return acceptedEncoding == "gzip" } -func (reader *Reader) handleGZip(ctx context.Context, w http.ResponseWriter, r *http.Request, fullPath string) string { +func (reader *Reader) handleGZip(ctx context.Context, w http.ResponseWriter, r *http.Request, dir vfs.Dir, fullPath string) string { if !acceptsGZip(r) { return fullPath } @@ -63,7 +64,7 @@ func (reader *Reader) handleGZip(ctx context.Context, w http.ResponseWriter, r * gzipPath := fullPath + ".gz" // Ensure the .gz file is not a symlink - fi, err := reader.vfs.Lstat(ctx, gzipPath) + fi, err := dir.Lstat(ctx, gzipPath) if err != nil || !fi.Mode().IsRegular() { return fullPath } diff --git a/internal/serving/file/reader.go b/internal/serving/file/reader.go index 180a90ae5..5e78a04ba 100644 --- a/internal/serving/file/reader.go +++ b/internal/serving/file/reader.go @@ -5,7 +5,6 @@ import ( "fmt" "io" "net/http" - "path/filepath" "strconv" "strings" "time" @@ -25,7 +24,7 @@ type Reader struct { func (reader *Reader) tryFile(h serving.Handler) error { ctx := h.Request.Context() - fullPath, err := reader.resolvePath(ctx, h.LookupPath.Path, h.SubPath) + dir, fullPath, err := reader.resolvePath(ctx, h.LookupPath.Path, h.SubPath) request := h.Request host := request.Host @@ -33,7 +32,7 @@ func (reader *Reader) tryFile(h serving.Handler) error { if locationError, _ := err.(*locationDirectoryError); locationError != nil { if endsWithSlash(urlPath) { - fullPath, err = reader.resolvePath(ctx, h.LookupPath.Path, h.SubPath, "index.html") + dir, fullPath, err = reader.resolvePath(ctx, h.LookupPath.Path, h.SubPath, "index.html") } else { // TODO why are we doing that? In tests it redirects to HTTPS. This seems wrong, // issue about this: https://gitlab.com/gitlab-org/gitlab-pages/issues/273 @@ -50,24 +49,24 @@ func (reader *Reader) tryFile(h serving.Handler) error { } if locationError, _ := err.(*locationFileNoExtensionError); locationError != nil { - fullPath, err = reader.resolvePath(ctx, h.LookupPath.Path, strings.TrimSuffix(h.SubPath, "/")+".html") + dir, fullPath, err = reader.resolvePath(ctx, h.LookupPath.Path, strings.TrimSuffix(h.SubPath, "/")+".html") } if err != nil { return err } - return reader.serveFile(ctx, h.Writer, h.Request, fullPath, h.LookupPath.HasAccessControl) + return reader.serveFile(ctx, h.Writer, h.Request, dir, fullPath, h.LookupPath.HasAccessControl) } func (reader *Reader) tryNotFound(h serving.Handler) error { ctx := h.Request.Context() - page404, err := reader.resolvePath(ctx, h.LookupPath.Path, "404.html") + dir, page404, err := reader.resolvePath(ctx, h.LookupPath.Path, "404.html") if err != nil { return err } - err = reader.serveCustomFile(ctx, h.Writer, h.Request, http.StatusNotFound, page404) + err = reader.serveCustomFile(ctx, h.Writer, h.Request, http.StatusNotFound, dir, page404) if err != nil { return err } @@ -76,39 +75,39 @@ func (reader *Reader) tryNotFound(h serving.Handler) error { // Resolve the HTTP request to a path on disk, converting requests for // directories to requests for index.html inside the directory if appropriate. -func (reader *Reader) resolvePath(ctx context.Context, publicPath string, subPath ...string) (string, error) { +func (reader *Reader) resolvePath(ctx context.Context, publicPath string, subPath ...string) (vfs.Dir, string, error) { // Ensure that publicPath always ends with "/" publicPath = strings.TrimSuffix(publicPath, "/") + "/" + dir, err := reader.vfs.Dir(ctx, publicPath) + if err != nil { + return nil, "", err + } + // Don't use filepath.Join as cleans the path, // where we want to traverse full path as supplied by user // (including ..) - testPath := publicPath + strings.Join(subPath, "/") - fullPath, err := symlink.EvalSymlinks(ctx, reader.vfs, testPath) + testPath := strings.Join(subPath, "/") + fullPath, err := symlink.EvalSymlinks(ctx, dir, testPath) if err != nil { if endsWithoutHTMLExtension(testPath) { - return "", &locationFileNoExtensionError{ + return nil, "", &locationFileNoExtensionError{ FullPath: fullPath, } } - return "", err - } - - // The requested path resolved to somewhere outside of the public/ directory - if !strings.HasPrefix(fullPath, publicPath) && fullPath != filepath.Clean(publicPath) { - return "", fmt.Errorf("%q should be in %q", fullPath, publicPath) + return nil, "", err } - fi, err := reader.vfs.Lstat(ctx, fullPath) + fi, err := dir.Lstat(ctx, fullPath) if err != nil { - return "", err + return nil, "", err } // The requested path is a directory, so try index.html via recursion if fi.IsDir() { - return "", &locationDirectoryError{ + return nil, "", &locationDirectoryError{ FullPath: fullPath, RelativePath: strings.TrimPrefix(fullPath, publicPath), } @@ -117,23 +116,23 @@ func (reader *Reader) resolvePath(ctx context.Context, publicPath string, subPat // The file exists, but is not a supported type to serve. Perhaps a block // special device or something else that may be a security risk. if !fi.Mode().IsRegular() { - return "", fmt.Errorf("%s: is not a regular file", fullPath) + return nil, "", fmt.Errorf("%s: is not a regular file", fullPath) } - return fullPath, nil + return dir, fullPath, nil } -func (reader *Reader) serveFile(ctx context.Context, w http.ResponseWriter, r *http.Request, origPath string, accessControl bool) error { - fullPath := reader.handleGZip(ctx, w, r, origPath) +func (reader *Reader) serveFile(ctx context.Context, w http.ResponseWriter, r *http.Request, dir vfs.Dir, origPath string, accessControl bool) error { + fullPath := reader.handleGZip(ctx, w, r, dir, origPath) - file, err := reader.vfs.Open(ctx, fullPath) + file, err := dir.Open(ctx, fullPath) if err != nil { return err } defer file.Close() - fi, err := reader.vfs.Lstat(ctx, fullPath) + fi, err := dir.Lstat(ctx, fullPath) if err != nil { return err } @@ -144,7 +143,7 @@ func (reader *Reader) serveFile(ctx context.Context, w http.ResponseWriter, r *h w.Header().Set("Expires", time.Now().Add(10*time.Minute).Format(time.RFC1123)) } - contentType, err := reader.detectContentType(ctx, origPath) + contentType, err := reader.detectContentType(ctx, dir, origPath) if err != nil { return err } @@ -157,22 +156,22 @@ func (reader *Reader) serveFile(ctx context.Context, w http.ResponseWriter, r *h return nil } -func (reader *Reader) serveCustomFile(ctx context.Context, w http.ResponseWriter, r *http.Request, code int, origPath string) error { - fullPath := reader.handleGZip(ctx, w, r, origPath) +func (reader *Reader) serveCustomFile(ctx context.Context, w http.ResponseWriter, r *http.Request, code int, dir vfs.Dir, origPath string) error { + fullPath := reader.handleGZip(ctx, w, r, dir, origPath) // Open and serve content of file - file, err := reader.vfs.Open(ctx, fullPath) + file, err := dir.Open(ctx, fullPath) if err != nil { return err } defer file.Close() - fi, err := reader.vfs.Lstat(ctx, fullPath) + fi, err := dir.Lstat(ctx, fullPath) if err != nil { return err } - contentType, err := reader.detectContentType(ctx, origPath) + contentType, err := reader.detectContentType(ctx, dir, origPath) if err != nil { return err } diff --git a/internal/serving/file/symlink/shims.go b/internal/serving/file/symlink/shims.go index d383b96ba..64b0bc3ce 100644 --- a/internal/serving/file/symlink/shims.go +++ b/internal/serving/file/symlink/shims.go @@ -12,6 +12,6 @@ func volumeNameLen(s string) int { return 0 } func IsAbs(path string) bool { return filepath.IsAbs(path) } func Clean(path string) string { return filepath.Clean(path) } -func EvalSymlinks(ctx context.Context, fs vfs.VFS, path string) (string, error) { +func EvalSymlinks(ctx context.Context, fs vfs.Dir, path string) (string, error) { return walkSymlinks(ctx, fs, path) } diff --git a/internal/serving/file/symlink/symlink.go b/internal/serving/file/symlink/symlink.go index 507148112..e04223d38 100644 --- a/internal/serving/file/symlink/symlink.go +++ b/internal/serving/file/symlink/symlink.go @@ -14,7 +14,7 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" ) -func walkSymlinks(ctx context.Context, fs vfs.VFS, path string) (string, error) { +func walkSymlinks(ctx context.Context, fs vfs.Dir, path string) (string, error) { volLen := volumeNameLen(path) pathSeparator := string(os.PathSeparator) diff --git a/internal/vfs/local/vfs.go b/internal/vfs/local/vfs.go index 7c6f3ba6b..cedfefa24 100644 --- a/internal/vfs/local/vfs.go +++ b/internal/vfs/local/vfs.go @@ -2,7 +2,10 @@ package local import ( "context" + "fmt" "os" + "path/filepath" + "strings" "golang.org/x/sys/unix" @@ -11,9 +14,43 @@ import ( type VFS struct{} -func (fs VFS) Lstat(ctx context.Context, name string) (os.FileInfo, error) { return os.Lstat(name) } -func (fs VFS) Readlink(ctx context.Context, name string) (string, error) { return os.Readlink(name) } +func (fs VFS) Dir(ctx context.Context, path string) (vfs.Dir, error) { + return &Dir{path: filepath.Clean(path)}, nil +} + +type Dir struct { + path string +} + +func (dir *Dir) validatePath(fullPath string) error { + // The requested path resolved to somewhere outside of the public/ directory + if !strings.HasPrefix(fullPath, dir.path) && fullPath != dir.path { + return fmt.Errorf("%q should be in %q", fullPath, dir.path) + } + + return nil +} + +func (dir *Dir) Lstat(ctx context.Context, name string) (os.FileInfo, error) { + if err := dir.validatePath(name); err != nil { + return nil, err + } + + return os.Lstat(filepath.Join(dir.path, name)) +} + +func (dir *Dir) Readlink(ctx context.Context, name string) (string, error) { + if err := dir.validatePath(name); err != nil { + return "", err + } + + return os.Readlink(filepath.Join(dir.path, name)) +} + +func (dir *Dir) Open(ctx context.Context, name string) (vfs.File, error) { + if err := dir.validatePath(name); err != nil { + return nil, err + } -func (fs VFS) Open(ctx context.Context, name string) (vfs.File, error) { - return os.OpenFile(name, os.O_RDONLY|unix.O_NOFOLLOW, 0) + return os.OpenFile(filepath.Join(dir.path, name), os.O_RDONLY|unix.O_NOFOLLOW, 0) } diff --git a/internal/vfs/vfs.go b/internal/vfs/vfs.go index 07c99b776..83f08cdb6 100644 --- a/internal/vfs/vfs.go +++ b/internal/vfs/vfs.go @@ -11,6 +11,11 @@ import ( // VFS abstracts the things Pages needs to serve a static site from disk. type VFS interface { + Dir(ctx context.Context, path string) (Dir, error) +} + +// Dir abstracts the things Pages needs to serve a static site from a given path. +type Dir interface { Lstat(ctx context.Context, name string) (os.FileInfo, error) Readlink(ctx context.Context, name string) (string, error) Open(ctx context.Context, name string) (File, error) @@ -36,20 +41,38 @@ func (i *InstrumentedVFS) increment(operation string, err error) { metrics.VFSOperations.WithLabelValues(i.name, operation, strconv.FormatBool(err == nil)).Inc() } -func (i *InstrumentedVFS) Lstat(ctx context.Context, name string) (os.FileInfo, error) { - fi, err := i.fs.Lstat(ctx, name) +func (i *InstrumentedVFS) Dir(ctx context.Context, path string) (Dir, error) { + dir, err := i.fs.Dir(ctx, path) + i.increment("Lstat", err) + if dir != nil { + dir = &InstrumentedDir{dir: dir, name: i.name} + } + return dir, err +} + +type InstrumentedDir struct { + dir Dir + name string +} + +func (i *InstrumentedDir) increment(operation string, err error) { + metrics.VFSOperations.WithLabelValues(i.name, operation, strconv.FormatBool(err == nil)).Inc() +} + +func (i *InstrumentedDir) Lstat(ctx context.Context, name string) (os.FileInfo, error) { + fi, err := i.dir.Lstat(ctx, name) i.increment("Lstat", err) return fi, err } -func (i *InstrumentedVFS) Readlink(ctx context.Context, name string) (string, error) { - target, err := i.fs.Readlink(ctx, name) +func (i *InstrumentedDir) Readlink(ctx context.Context, name string) (string, error) { + target, err := i.dir.Readlink(ctx, name) i.increment("Readlink", err) return target, err } -func (i *InstrumentedVFS) Open(ctx context.Context, name string) (File, error) { - f, err := i.fs.Open(ctx, name) +func (i *InstrumentedDir) Open(ctx context.Context, name string) (File, error) { + f, err := i.dir.Open(ctx, name) i.increment("Open", err) return f, err } -- GitLab From 3957883e3d210105594fb1872147a47345d3d462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Mon, 17 Aug 2020 15:34:08 +0200 Subject: [PATCH 3/4] Add disk ZIP fs --- internal/serving/file/reader.go | 9 ++- internal/serving/file/zip/serving.go | 16 ++++ internal/source/gitlab/factory.go | 3 + internal/vfs/vfs.go | 3 +- internal/vfs/zip/archive.go | 113 +++++++++++++++++++++++++++ internal/vfs/zip/vfs.go | 44 +++++++++++ 6 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 internal/serving/file/zip/serving.go create mode 100644 internal/vfs/zip/archive.go create mode 100644 internal/vfs/zip/vfs.go diff --git a/internal/serving/file/reader.go b/internal/serving/file/reader.go index 5e78a04ba..960513444 100644 --- a/internal/serving/file/reader.go +++ b/internal/serving/file/reader.go @@ -151,7 +151,14 @@ func (reader *Reader) serveFile(ctx context.Context, w http.ResponseWriter, r *h reader.fileSizeMetric.Observe(float64(fi.Size())) w.Header().Set("Content-Type", contentType) - http.ServeContent(w, r, origPath, fi.ModTime(), file) + + if rs, ok := file.(io.ReadSeeker); ok { + http.ServeContent(w, r, origPath, fi.ModTime(), rs) + } else { + // Support ReadSeeker if available + w.Header().Set("Content-Length", strconv.FormatInt(fi.Size(), 10)) + io.Copy(w, file) + } return nil } diff --git a/internal/serving/file/zip/serving.go b/internal/serving/file/zip/serving.go new file mode 100644 index 000000000..3acaea070 --- /dev/null +++ b/internal/serving/file/zip/serving.go @@ -0,0 +1,16 @@ +package zip + +import ( + "gitlab.com/gitlab-org/gitlab-pages/internal/serving" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs/local" +) + +var zip = file.New(vfs.Instrumented(local.VFS{}, "zip")) + +// New returns a serving instance that is capable of reading files +// from the disk +func New() serving.Serving { + return zip +} diff --git a/internal/source/gitlab/factory.go b/internal/source/gitlab/factory.go index 954bd71d7..1d994a07a 100644 --- a/internal/source/gitlab/factory.go +++ b/internal/source/gitlab/factory.go @@ -7,6 +7,7 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/serving" "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/disk" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/zip" "gitlab.com/gitlab-org/gitlab-pages/internal/serving/serverless" "gitlab.com/gitlab-org/gitlab-pages/internal/source/gitlab/api" ) @@ -32,6 +33,8 @@ func fabricateServing(lookup api.LookupPath) serving.Serving { switch source.Type { case "file": return disk.New() + case "zip": + return zip.New() case "serverless": serving, err := serverless.NewFromAPISource(source.Serverless) if err != nil { diff --git a/internal/vfs/vfs.go b/internal/vfs/vfs.go index 83f08cdb6..b351efce6 100644 --- a/internal/vfs/vfs.go +++ b/internal/vfs/vfs.go @@ -24,7 +24,8 @@ type Dir interface { // File represents an open file, which will typically be the response body of a Pages request. type File interface { io.Reader - io.Seeker + // TODO: Zip does not support seeking + // io.Seeker io.Closer } diff --git a/internal/vfs/zip/archive.go b/internal/vfs/zip/archive.go new file mode 100644 index 000000000..aeaf515aa --- /dev/null +++ b/internal/vfs/zip/archive.go @@ -0,0 +1,113 @@ +package zip + +import ( + "archive/zip" + "context" + "io" + "io/ioutil" + "os" + "strings" + "sync" + + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" +) + +const dirPrefix = "public/" +const maxSymlinkSize = 256 + +type zipArchive struct { + path string + once sync.Once + done chan struct{} + zip *zip.ReadCloser + files map[string]*zip.File + zipErr error +} + +func (a *zipArchive) Open(ctx context.Context) error { + a.once.Do(func() { + a.zip, a.zipErr = zip.OpenReader(a.path) + if a.zip != nil { + a.processZip() + } + close(a.done) + }) + + // wait for it to close + // or exit early + select { + case <-a.done: + case <-ctx.Done(): + } + return a.zipErr +} + +func (a *zipArchive) processZip() { + for _, file := range a.zip.File { + if !strings.HasPrefix(file.Name, dirPrefix) { + continue + } + + a.files[file.Name] = file + } + + // recycle memory + a.zip.File = nil +} + +func (a *zipArchive) Close() { + if a.zip != nil { + a.zip.Close() + a.zip = nil + } +} + +func (a *zipArchive) Lstat(ctx context.Context, name string) (os.FileInfo, error) { + file := a.files[name] + if file == nil { + return nil, os.ErrNotExist + } + + return file.FileInfo(), nil +} + +func (a *zipArchive) Readlink(ctx context.Context, name string) (string, error) { + file := a.files[name] + if file == nil { + return "", os.ErrNotExist + } + + if file.FileInfo().Mode()&os.ModeSymlink != os.ModeSymlink { + return "", os.ErrInvalid + } + + rc, err := file.Open() + if err != nil { + return "", err + } + + data, err := ioutil.ReadAll(&io.LimitedReader{R: rc, N: maxSymlinkSize}) + if err != nil { + return "", err + } + + return string(data), nil +} + +func (a *zipArchive) Open(ctx context.Context, name string) (vfs.File, error) { + file := a.files[name] + if file == nil { + return nil, os.ErrNotExist + } + + rc, err := file.Open() + // TODO: We can support `io.Seeker` if file would not be compressed + return rc, err +} + +func newArchive(path string) zipArchive { + return &zipArchive{ + path: path, + done: make(chan struct{}), + } +} diff --git a/internal/vfs/zip/vfs.go b/internal/vfs/zip/vfs.go new file mode 100644 index 000000000..b5ba720fc --- /dev/null +++ b/internal/vfs/zip/vfs.go @@ -0,0 +1,44 @@ +package zip + +import ( + "context" + "time" + + "github.com/patrickmn/go-cache" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" +) + +type zipVFS struct { + cache *cache.Cache +} + +func (fs *zipVFS) Dir(ctx context.Context, path string) (vfs.Dir, error) { + // we do it in loop to not use any additional locks + for { + dir, found := fs.cache.Get(path) + if !found { + dir = newArchive(path) + + // if it errors, it means that it is already added + // retry again to get it + if fs.cache.Add(path, dir, cache.DefaultExpiration) != nil { + continue + } + } + + err := dir.(*zipArchive).Open(ctx) + return dir, err + } +} + +func New() vfs.VFS { + vfs := &zipVFS{ + cache: cache.New(time.Minute, 2*time.Minute), + } + vfs.cache.OnEvicted(func(path string, object interface{}) { + if archive, ok := object.(*Archive); archive != nil && ok { + archive.Close() + } + }) + return vfs +} -- GitLab From 61873541514a10ada0e7b215dcd40a94e013ef2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Mon, 17 Aug 2020 16:38:54 +0200 Subject: [PATCH 4/4] Support remote ZIP --- internal/serving/file/zip/serving.go | 6 +- internal/vfs/zip/archive.go | 60 +++++++++--- internal/vfs/zip/deflate_reader.go | 27 ++++++ internal/vfs/zip/http_reader.go | 140 +++++++++++++++++++++++++++ internal/vfs/zip/vfs.go | 26 +++-- metrics/metrics.go | 12 +++ 6 files changed, 246 insertions(+), 25 deletions(-) create mode 100644 internal/vfs/zip/deflate_reader.go create mode 100644 internal/vfs/zip/http_reader.go diff --git a/internal/serving/file/zip/serving.go b/internal/serving/file/zip/serving.go index 3acaea070..d6ce70736 100644 --- a/internal/serving/file/zip/serving.go +++ b/internal/serving/file/zip/serving.go @@ -4,13 +4,13 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/serving" "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file" "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" - "gitlab.com/gitlab-org/gitlab-pages/internal/vfs/local" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs/zip" ) -var zip = file.New(vfs.Instrumented(local.VFS{}, "zip")) +var zipServing = file.New(vfs.Instrumented(zip.New(), "zip")) // New returns a serving instance that is capable of reading files // from the disk func New() serving.Serving { - return zip + return zipServing } diff --git a/internal/vfs/zip/archive.go b/internal/vfs/zip/archive.go index aeaf515aa..4f7ef6992 100644 --- a/internal/vfs/zip/archive.go +++ b/internal/vfs/zip/archive.go @@ -7,6 +7,7 @@ import ( "io/ioutil" "os" "strings" + "fmt" "sync" "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" @@ -16,17 +17,18 @@ const dirPrefix = "public/" const maxSymlinkSize = 256 type zipArchive struct { - path string - once sync.Once - done chan struct{} - zip *zip.ReadCloser - files map[string]*zip.File - zipErr error + path string + once sync.Once + done chan struct{} + zip *zip.Reader + zipCloser io.Closer + files map[string]*zip.File + zipErr error } -func (a *zipArchive) Open(ctx context.Context) error { +func (a *zipArchive) openArchive(ctx context.Context) error { a.once.Do(func() { - a.zip, a.zipErr = zip.OpenReader(a.path) + a.zip, a.zipCloser, a.zipErr = openZIPArchive(a.path) if a.zip != nil { a.processZip() } @@ -55,11 +57,12 @@ func (a *zipArchive) processZip() { a.zip.File = nil } -func (a *zipArchive) Close() { - if a.zip != nil { - a.zip.Close() - a.zip = nil +func (a *zipArchive) close() { + if a.zipCloser != nil { + a.zipCloser.Close() } + a.zipCloser = nil + a.zip = nil } func (a *zipArchive) Lstat(ctx context.Context, name string) (os.FileInfo, error) { @@ -100,12 +103,39 @@ func (a *zipArchive) Open(ctx context.Context, name string) (vfs.File, error) { return nil, os.ErrNotExist } - rc, err := file.Open() + dataOffset, err := file.DataOffset() + if err != nil { + return nil, err + } + // TODO: We can support `io.Seeker` if file would not be compressed - return rc, err + + if !isHTTPArchive(a.path) { + return file.Open() + } + + var reader io.ReadCloser + reader = &httpReader{ + URL: a.path, + Off: dataOffset, + N: int64(file.UncompressedSize64), + } + + switch file.Method { + case zip.Deflate: + reader = newDeflateReader(reader) + + case zip.Store: + // no-op + + default: + return nil, fmt.Errorf("unsupported compression: %x", file.Method) + } + + return reader, nil } -func newArchive(path string) zipArchive { +func newArchive(path string) *zipArchive { return &zipArchive{ path: path, done: make(chan struct{}), diff --git a/internal/vfs/zip/deflate_reader.go b/internal/vfs/zip/deflate_reader.go new file mode 100644 index 000000000..2e55ee5a3 --- /dev/null +++ b/internal/vfs/zip/deflate_reader.go @@ -0,0 +1,27 @@ +package zip + +import ( + "compress/flate" + "io" +) + +type deflateReader struct { + R io.ReadCloser + D io.ReadCloser +} + +func (r *deflateReader) Read(p []byte) (n int, err error) { + return r.D.Read(p) +} + +func (r *deflateReader) Close() error { + r.R.Close() + return r.D.Close() +} + +func newDeflateReader(r io.ReadCloser) *deflateReader { + return &deflateReader{ + R: r, + D: flate.NewReader(r), + } +} diff --git a/internal/vfs/zip/http_reader.go b/internal/vfs/zip/http_reader.go new file mode 100644 index 000000000..99c94b119 --- /dev/null +++ b/internal/vfs/zip/http_reader.go @@ -0,0 +1,140 @@ +package zip + +import ( + "archive/zip" + "fmt" + "io" + "io/ioutil" + "net/http" + "strings" + "time" + + "gitlab.com/gitlab-org/gitlab-pages/internal/httptransport" + "gitlab.com/gitlab-org/gitlab-pages/metrics" +) + +type httpReader struct { + URL string + Off int64 + N int64 + res *http.Response +} + +var httpClient = &http.Client{ + // TODO: we need connect timeout + // The longest time the request can be executed + Timeout: 30 * time.Minute, + Transport: httptransport.NewTransportWithMetrics(metrics.ZIPHttpReaderReqDuration, metrics.ZIPHttpReaderReqTotal), +} + +func (h *httpReader) ensureRequest() error { + if h.res != nil { + return nil + } + + req, err := http.NewRequest("GET", h.URL, nil) + if err != nil { + return err + } + + req.Header.Set("Range", fmt.Sprintf("%d-%d", h.Off, h.Off+h.N-1)) + res, err := httpClient.Do(req) + if err != nil { + return err + } + if res.StatusCode != http.StatusOK { + res.Body.Close() + // TODO: sanitize URL + return fmt.Errorf("the %q failed with %d: %q", h.URL, res.StatusCode, res.Status) + } + + return nil +} + +func (h *httpReader) Read(p []byte) (n int, err error) { + if len(p) == 0 { + return 0, nil + } + + if err := h.ensureRequest(); err != nil { + return 0, err + } + + return h.res.Body.Read(p) +} + +func (h *httpReader) Close() error { + if h.res != nil { + // TODO: should we read till end? + return h.res.Body.Close() + } + return nil +} + +type httpReadAt struct { + URL string +} + +func (h *httpReadAt) ReadAt(p []byte, off int64) (n int, err error) { + r := httpReader{URL: h.URL, Off: off, N: int64(len(p))} + defer r.Close() + + // TODO: + // Even if ReadAt returns n < len(p), it may use all of p as scratch space during the call. + // If some data is available but not len(p) bytes, ReadAt blocks until either all the data + // is available or an error occurs. In this respect ReadAt is different from Read. + return r.Read(p) +} + +func isHTTPArchive(path string) bool { + return strings.HasPrefix(path, "https://") +} + +func httpSize(path string) (int64, error) { + // the `h.URL` is likely presigned only for GET + req, err := http.NewRequest("GET", path, nil) + if err != nil { + return 0, err + } + + req.Header.Set("Range", fmt.Sprintf("%d-%d", 0, 0)) + res, err := httpClient.Do(req) + if err != nil { + return 0, err + } + defer io.Copy(ioutil.Discard, res.Body) + defer res.Body.Close() + + if res.StatusCode != http.StatusOK { + // TODO: sanitize URL + return 0, fmt.Errorf("the %q failed with %d: %q", path, res.StatusCode, res.Status) + } + + return res.ContentLength, nil +} + +func openZIPHTTPArchive(url string) (*zip.Reader, io.Closer, error) { + size, err := httpSize(url) + if err != nil { + return nil, nil, err + } + + r, err := zip.NewReader(&httpReadAt{URL: url}, size) + return r, nil, err +} + +func openZIPDiskArchive(path string) (*zip.Reader, io.Closer, error) { + r, err := zip.OpenReader(path) + if err != nil { + return nil, nil, err + } + return &r.Reader, r, nil +} + +func openZIPArchive(path string) (*zip.Reader, io.Closer, error) { + if isHTTPArchive(path) { + return openZIPHTTPArchive(path) + } + + return openZIPDiskArchive(path) +} diff --git a/internal/vfs/zip/vfs.go b/internal/vfs/zip/vfs.go index b5ba720fc..acdb8de38 100644 --- a/internal/vfs/zip/vfs.go +++ b/internal/vfs/zip/vfs.go @@ -8,6 +8,10 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" ) +const cacheExpirationInterval = time.Minute +const cacheRefreshInterval = time.Minute / 2 +const cacheEvictInterval = time.Minute + type zipVFS struct { cache *cache.Cache } @@ -15,8 +19,13 @@ type zipVFS struct { func (fs *zipVFS) Dir(ctx context.Context, path string) (vfs.Dir, error) { // we do it in loop to not use any additional locks for { - dir, found := fs.cache.Get(path) - if !found { + dir, till, found := fs.cache.GetWithExpiration(path) + if found { + if till.Sub(time.Now()) < cacheRefreshInterval { + // refresh item + fs.cache.Set(path, dir, cache.DefaultExpiration) + } + } else { dir = newArchive(path) // if it errors, it means that it is already added @@ -26,18 +35,21 @@ func (fs *zipVFS) Dir(ctx context.Context, path string) (vfs.Dir, error) { } } - err := dir.(*zipArchive).Open(ctx) - return dir, err + zipDir := dir.(*zipArchive) + + err := zipDir.openArchive(ctx) + return zipDir, err } } func New() vfs.VFS { vfs := &zipVFS{ - cache: cache.New(time.Minute, 2*time.Minute), + cache: cache.New(cacheExpirationInterval, cacheRefreshInterval), } + vfs.cache.OnEvicted(func(path string, object interface{}) { - if archive, ok := object.(*Archive); archive != nil && ok { - archive.Close() + if archive, ok := object.(*zipArchive); archive != nil && ok { + archive.close() } }) return vfs diff --git a/metrics/metrics.go b/metrics/metrics.go index 0792a41f4..f6f82014b 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -77,6 +77,18 @@ var ( Help: "The time (in seconds) it takes to get a response from the GitLab domains API", }, []string{"status_code"}) + // DomainsSourceAPIReqTotal is the number of calls made to the Object Storage that returned a 4XX error + ZIPHttpReaderReqTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "gitlab_pages_zip_reader_requests_total", + Help: "The number of Object Storage API calls with different status codes", + }, []string{"status_code"}) + + // DomainsSourceAPICallDuration is the time it takes to get a response from the Object Storage in seconds + ZIPHttpReaderReqDuration = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "gitlab_pages_zip_reader_requests_duration", + Help: "The time (in seconds) it takes to get a response from the Object Storage", + }, []string{"status_code"}) + // DiskServingFileSize metric for file size serving. serving_types: disk and object_storage DiskServingFileSize = prometheus.NewHistogram(prometheus.HistogramOpts{ Name: "gitlab_pages_disk_serving_file_size_bytes", -- GitLab