diff --git a/internal/domain/domain.go b/internal/domain/domain.go index 7c1639a3301ecb173d7c9aa4958b41700b0f4c97..235c4e4dbdfc9a4fb80ba41a1d85a521ae48400d 100644 --- a/internal/domain/domain.go +++ b/internal/domain/domain.go @@ -9,7 +9,7 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/httperrors" "gitlab.com/gitlab-org/gitlab-pages/internal/serving" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/disk" ) // Domain is a domain that gitlab-pages can serve. diff --git a/internal/domain/domain_test.go b/internal/domain/domain_test.go index 26a7735cd90c4fc54c892166716c33bef636c8d0..61a2c39f57c0e9475e358c0d3e43bd9f35e22124 100644 --- a/internal/domain/domain_test.go +++ b/internal/domain/domain_test.go @@ -11,7 +11,7 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/fixture" "gitlab.com/gitlab-org/gitlab-pages/internal/serving" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/disk" "gitlab.com/gitlab-org/gitlab-pages/internal/testhelpers" ) diff --git a/internal/serving/file/disk/serving.go b/internal/serving/file/disk/serving.go new file mode 100644 index 0000000000000000000000000000000000000000..b9a42633193834c12ceb50216a1a6ea26bc9492a --- /dev/null +++ b/internal/serving/file/disk/serving.go @@ -0,0 +1,16 @@ +package disk + +import ( + "gitlab.com/gitlab-org/gitlab-pages/internal/serving" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs/local" +) + +var disk = file.New(vfs.Instrumented(local.VFS{}, "disk")) + +// New returns a serving instance that is capable of reading files +// from the disk +func New() serving.Serving { + return disk +} diff --git a/internal/serving/disk/errors.go b/internal/serving/file/errors.go similarity index 96% rename from internal/serving/disk/errors.go rename to internal/serving/file/errors.go index 5e55220be0b60239e964e5ac914841b9845c8c6b..2dab0a1dcaa9105cdcf1f2d9524c23c989e41771 100644 --- a/internal/serving/disk/errors.go +++ b/internal/serving/file/errors.go @@ -1,4 +1,4 @@ -package disk +package file type locationDirectoryError struct { FullPath string diff --git a/internal/serving/disk/helpers.go b/internal/serving/file/helpers.go similarity index 84% rename from internal/serving/disk/helpers.go rename to internal/serving/file/helpers.go index e6d3f8ab6361e4906684af78383ee00a94796f0c..30968c5365b63dbedfea5915b1f48902e2405691 100644 --- a/internal/serving/disk/helpers.go +++ b/internal/serving/file/helpers.go @@ -1,4 +1,4 @@ -package disk +package file import ( "context" @@ -10,6 +10,7 @@ import ( "strings" "gitlab.com/gitlab-org/gitlab-pages/internal/httputil" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" ) func endsWithSlash(path string) bool { @@ -23,13 +24,13 @@ func endsWithoutHTMLExtension(path string) bool { // Detect file's content-type either by extension or mime-sniffing. // Implementation is adapted from Golang's `http.serveContent()` // See https://github.com/golang/go/blob/902fc114272978a40d2e65c2510a18e870077559/src/net/http/fs.go#L194 -func (reader *Reader) detectContentType(ctx context.Context, path string) (string, error) { +func (reader *Reader) detectContentType(ctx context.Context, dir vfs.Dir, path string) (string, error) { contentType := mime.TypeByExtension(filepath.Ext(path)) if contentType == "" { var buf [512]byte - file, err := reader.vfs.Open(ctx, path) + file, err := dir.Open(ctx, path) if err != nil { return "", err } @@ -55,7 +56,7 @@ func acceptsGZip(r *http.Request) bool { return acceptedEncoding == "gzip" } -func (reader *Reader) handleGZip(ctx context.Context, w http.ResponseWriter, r *http.Request, fullPath string) string { +func (reader *Reader) handleGZip(ctx context.Context, w http.ResponseWriter, r *http.Request, dir vfs.Dir, fullPath string) string { if !acceptsGZip(r) { return fullPath } @@ -63,7 +64,7 @@ func (reader *Reader) handleGZip(ctx context.Context, w http.ResponseWriter, r * gzipPath := fullPath + ".gz" // Ensure the .gz file is not a symlink - fi, err := reader.vfs.Lstat(ctx, gzipPath) + fi, err := dir.Lstat(ctx, gzipPath) if err != nil || !fi.Mode().IsRegular() { return fullPath } diff --git a/internal/serving/disk/reader.go b/internal/serving/file/reader.go similarity index 64% rename from internal/serving/disk/reader.go rename to internal/serving/file/reader.go index 8c7ee3bf3ffd47229114a37dcf9f0cf468c17d84..960513444f9f282fe13e4f217f4e6d1efc17341a 100644 --- a/internal/serving/disk/reader.go +++ b/internal/serving/file/reader.go @@ -1,11 +1,10 @@ -package disk +package file import ( "context" "fmt" "io" "net/http" - "path/filepath" "strconv" "strings" "time" @@ -13,7 +12,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "gitlab.com/gitlab-org/gitlab-pages/internal/serving" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk/symlink" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/symlink" "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" ) @@ -25,7 +24,7 @@ type Reader struct { func (reader *Reader) tryFile(h serving.Handler) error { ctx := h.Request.Context() - fullPath, err := reader.resolvePath(ctx, h.LookupPath.Path, h.SubPath) + dir, fullPath, err := reader.resolvePath(ctx, h.LookupPath.Path, h.SubPath) request := h.Request host := request.Host @@ -33,7 +32,7 @@ func (reader *Reader) tryFile(h serving.Handler) error { if locationError, _ := err.(*locationDirectoryError); locationError != nil { if endsWithSlash(urlPath) { - fullPath, err = reader.resolvePath(ctx, h.LookupPath.Path, h.SubPath, "index.html") + dir, fullPath, err = reader.resolvePath(ctx, h.LookupPath.Path, h.SubPath, "index.html") } else { // TODO why are we doing that? In tests it redirects to HTTPS. This seems wrong, // issue about this: https://gitlab.com/gitlab-org/gitlab-pages/issues/273 @@ -50,24 +49,24 @@ func (reader *Reader) tryFile(h serving.Handler) error { } if locationError, _ := err.(*locationFileNoExtensionError); locationError != nil { - fullPath, err = reader.resolvePath(ctx, h.LookupPath.Path, strings.TrimSuffix(h.SubPath, "/")+".html") + dir, fullPath, err = reader.resolvePath(ctx, h.LookupPath.Path, strings.TrimSuffix(h.SubPath, "/")+".html") } if err != nil { return err } - return reader.serveFile(ctx, h.Writer, h.Request, fullPath, h.LookupPath.HasAccessControl) + return reader.serveFile(ctx, h.Writer, h.Request, dir, fullPath, h.LookupPath.HasAccessControl) } func (reader *Reader) tryNotFound(h serving.Handler) error { ctx := h.Request.Context() - page404, err := reader.resolvePath(ctx, h.LookupPath.Path, "404.html") + dir, page404, err := reader.resolvePath(ctx, h.LookupPath.Path, "404.html") if err != nil { return err } - err = reader.serveCustomFile(ctx, h.Writer, h.Request, http.StatusNotFound, page404) + err = reader.serveCustomFile(ctx, h.Writer, h.Request, http.StatusNotFound, dir, page404) if err != nil { return err } @@ -76,39 +75,39 @@ func (reader *Reader) tryNotFound(h serving.Handler) error { // Resolve the HTTP request to a path on disk, converting requests for // directories to requests for index.html inside the directory if appropriate. -func (reader *Reader) resolvePath(ctx context.Context, publicPath string, subPath ...string) (string, error) { +func (reader *Reader) resolvePath(ctx context.Context, publicPath string, subPath ...string) (vfs.Dir, string, error) { // Ensure that publicPath always ends with "/" publicPath = strings.TrimSuffix(publicPath, "/") + "/" + dir, err := reader.vfs.Dir(ctx, publicPath) + if err != nil { + return nil, "", err + } + // Don't use filepath.Join as cleans the path, // where we want to traverse full path as supplied by user // (including ..) - testPath := publicPath + strings.Join(subPath, "/") - fullPath, err := symlink.EvalSymlinks(ctx, reader.vfs, testPath) + testPath := strings.Join(subPath, "/") + fullPath, err := symlink.EvalSymlinks(ctx, dir, testPath) if err != nil { if endsWithoutHTMLExtension(testPath) { - return "", &locationFileNoExtensionError{ + return nil, "", &locationFileNoExtensionError{ FullPath: fullPath, } } - return "", err + return nil, "", err } - // The requested path resolved to somewhere outside of the public/ directory - if !strings.HasPrefix(fullPath, publicPath) && fullPath != filepath.Clean(publicPath) { - return "", fmt.Errorf("%q should be in %q", fullPath, publicPath) - } - - fi, err := reader.vfs.Lstat(ctx, fullPath) + fi, err := dir.Lstat(ctx, fullPath) if err != nil { - return "", err + return nil, "", err } // The requested path is a directory, so try index.html via recursion if fi.IsDir() { - return "", &locationDirectoryError{ + return nil, "", &locationDirectoryError{ FullPath: fullPath, RelativePath: strings.TrimPrefix(fullPath, publicPath), } @@ -117,23 +116,23 @@ func (reader *Reader) resolvePath(ctx context.Context, publicPath string, subPat // The file exists, but is not a supported type to serve. Perhaps a block // special device or something else that may be a security risk. if !fi.Mode().IsRegular() { - return "", fmt.Errorf("%s: is not a regular file", fullPath) + return nil, "", fmt.Errorf("%s: is not a regular file", fullPath) } - return fullPath, nil + return dir, fullPath, nil } -func (reader *Reader) serveFile(ctx context.Context, w http.ResponseWriter, r *http.Request, origPath string, accessControl bool) error { - fullPath := reader.handleGZip(ctx, w, r, origPath) +func (reader *Reader) serveFile(ctx context.Context, w http.ResponseWriter, r *http.Request, dir vfs.Dir, origPath string, accessControl bool) error { + fullPath := reader.handleGZip(ctx, w, r, dir, origPath) - file, err := reader.vfs.Open(ctx, fullPath) + file, err := dir.Open(ctx, fullPath) if err != nil { return err } defer file.Close() - fi, err := reader.vfs.Lstat(ctx, fullPath) + fi, err := dir.Lstat(ctx, fullPath) if err != nil { return err } @@ -144,7 +143,7 @@ func (reader *Reader) serveFile(ctx context.Context, w http.ResponseWriter, r *h w.Header().Set("Expires", time.Now().Add(10*time.Minute).Format(time.RFC1123)) } - contentType, err := reader.detectContentType(ctx, origPath) + contentType, err := reader.detectContentType(ctx, dir, origPath) if err != nil { return err } @@ -152,27 +151,34 @@ func (reader *Reader) serveFile(ctx context.Context, w http.ResponseWriter, r *h reader.fileSizeMetric.Observe(float64(fi.Size())) w.Header().Set("Content-Type", contentType) - http.ServeContent(w, r, origPath, fi.ModTime(), file) + + if rs, ok := file.(io.ReadSeeker); ok { + http.ServeContent(w, r, origPath, fi.ModTime(), rs) + } else { + // Support ReadSeeker if available + w.Header().Set("Content-Length", strconv.FormatInt(fi.Size(), 10)) + io.Copy(w, file) + } return nil } -func (reader *Reader) serveCustomFile(ctx context.Context, w http.ResponseWriter, r *http.Request, code int, origPath string) error { - fullPath := reader.handleGZip(ctx, w, r, origPath) +func (reader *Reader) serveCustomFile(ctx context.Context, w http.ResponseWriter, r *http.Request, code int, dir vfs.Dir, origPath string) error { + fullPath := reader.handleGZip(ctx, w, r, dir, origPath) // Open and serve content of file - file, err := reader.vfs.Open(ctx, fullPath) + file, err := dir.Open(ctx, fullPath) if err != nil { return err } defer file.Close() - fi, err := reader.vfs.Lstat(ctx, fullPath) + fi, err := dir.Lstat(ctx, fullPath) if err != nil { return err } - contentType, err := reader.detectContentType(ctx, origPath) + contentType, err := reader.detectContentType(ctx, dir, origPath) if err != nil { return err } diff --git a/internal/serving/disk/serving.go b/internal/serving/file/serving.go similarity index 63% rename from internal/serving/disk/serving.go rename to internal/serving/file/serving.go index b7501b802aaee2599bbcc28acf7d599950b8a692..1b4bb5836113bafc227bbe4eccc20b4f0562fe60 100644 --- a/internal/serving/disk/serving.go +++ b/internal/serving/file/serving.go @@ -1,33 +1,25 @@ -package disk +package file import ( "gitlab.com/gitlab-org/gitlab-pages/internal/httperrors" "gitlab.com/gitlab-org/gitlab-pages/internal/serving" "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" - "gitlab.com/gitlab-org/gitlab-pages/internal/vfs/local" "gitlab.com/gitlab-org/gitlab-pages/metrics" ) -var disk = &Disk{ - reader: Reader{ - fileSizeMetric: metrics.DiskServingFileSize, - vfs: vfs.Instrumented(local.VFS{}, "disk"), - }, -} - // Disk describes a disk access serving -type Disk struct { +type Files struct { reader Reader } // ServeFileHTTP serves a file from disk and returns true. It returns false // when a file could not been found. -func (s *Disk) ServeFileHTTP(h serving.Handler) bool { +func (s *Files) ServeFileHTTP(h serving.Handler) bool { return s.reader.tryFile(h) == nil } // ServeNotFoundHTTP tries to read a custom 404 page -func (s *Disk) ServeNotFoundHTTP(h serving.Handler) { +func (s *Files) ServeNotFoundHTTP(h serving.Handler) { if s.reader.tryNotFound(h) == nil { return } @@ -38,6 +30,11 @@ func (s *Disk) ServeNotFoundHTTP(h serving.Handler) { // New returns a serving instance that is capable of reading files // from the disk -func New() serving.Serving { - return disk +func New(vfs vfs.VFS) serving.Serving { + return &Files{ + reader: Reader{ + fileSizeMetric: metrics.DiskServingFileSize, + vfs: vfs, + }, + } } diff --git a/internal/serving/disk/serving_test.go b/internal/serving/file/serving_test.go similarity index 98% rename from internal/serving/disk/serving_test.go rename to internal/serving/file/serving_test.go index 02b7fac749c768b5e7f68d2069a7dea554daeb33..9a614a0032d514593cc798f71c94a3031ee254b3 100644 --- a/internal/serving/disk/serving_test.go +++ b/internal/serving/file/serving_test.go @@ -1,4 +1,4 @@ -package disk +package file import ( "io/ioutil" diff --git a/internal/serving/disk/symlink/LICENSE b/internal/serving/file/symlink/LICENSE similarity index 100% rename from internal/serving/disk/symlink/LICENSE rename to internal/serving/file/symlink/LICENSE diff --git a/internal/serving/disk/symlink/PATENTS b/internal/serving/file/symlink/PATENTS similarity index 100% rename from internal/serving/disk/symlink/PATENTS rename to internal/serving/file/symlink/PATENTS diff --git a/internal/serving/disk/symlink/README.md b/internal/serving/file/symlink/README.md similarity index 100% rename from internal/serving/disk/symlink/README.md rename to internal/serving/file/symlink/README.md diff --git a/internal/serving/disk/symlink/path_test.go b/internal/serving/file/symlink/path_test.go similarity index 99% rename from internal/serving/disk/symlink/path_test.go rename to internal/serving/file/symlink/path_test.go index 4d590db52e687acd41114931020a1184757ff481..5ba5f5f27994aeffd5c989bdce61c8bd61facb53 100644 --- a/internal/serving/disk/symlink/path_test.go +++ b/internal/serving/file/symlink/path_test.go @@ -12,7 +12,7 @@ import ( "runtime" "testing" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk/symlink" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/symlink" "gitlab.com/gitlab-org/gitlab-pages/internal/vfs/local" ) diff --git a/internal/serving/disk/symlink/shims.go b/internal/serving/file/symlink/shims.go similarity index 84% rename from internal/serving/disk/symlink/shims.go rename to internal/serving/file/symlink/shims.go index d383b96bac2afc57bf52067633a92b6a31165e42..64b0bc3cee24bda0c2e5bad221cf69e5beac46cd 100644 --- a/internal/serving/disk/symlink/shims.go +++ b/internal/serving/file/symlink/shims.go @@ -12,6 +12,6 @@ func volumeNameLen(s string) int { return 0 } func IsAbs(path string) bool { return filepath.IsAbs(path) } func Clean(path string) string { return filepath.Clean(path) } -func EvalSymlinks(ctx context.Context, fs vfs.VFS, path string) (string, error) { +func EvalSymlinks(ctx context.Context, fs vfs.Dir, path string) (string, error) { return walkSymlinks(ctx, fs, path) } diff --git a/internal/serving/disk/symlink/symlink.go b/internal/serving/file/symlink/symlink.go similarity index 98% rename from internal/serving/disk/symlink/symlink.go rename to internal/serving/file/symlink/symlink.go index 507148112360810f256378642955e23adfdeecab..e04223d38861ae69a65b94368fe13e16b2067c27 100644 --- a/internal/serving/disk/symlink/symlink.go +++ b/internal/serving/file/symlink/symlink.go @@ -14,7 +14,7 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" ) -func walkSymlinks(ctx context.Context, fs vfs.VFS, path string) (string, error) { +func walkSymlinks(ctx context.Context, fs vfs.Dir, path string) (string, error) { volLen := volumeNameLen(path) pathSeparator := string(os.PathSeparator) diff --git a/internal/serving/file/zip/serving.go b/internal/serving/file/zip/serving.go new file mode 100644 index 0000000000000000000000000000000000000000..d6ce70736be70487328e802a9b81864182b2d34a --- /dev/null +++ b/internal/serving/file/zip/serving.go @@ -0,0 +1,16 @@ +package zip + +import ( + "gitlab.com/gitlab-org/gitlab-pages/internal/serving" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs/zip" +) + +var zipServing = file.New(vfs.Instrumented(zip.New(), "zip")) + +// New returns a serving instance that is capable of reading files +// from the disk +func New() serving.Serving { + return zipServing +} diff --git a/internal/source/disk/custom.go b/internal/source/disk/custom.go index 2668ed8165258c3febcbf984ef54b76b69e17616..8ecc1b9f3b4277f344ac00bf2f283d3415ca2052 100644 --- a/internal/source/disk/custom.go +++ b/internal/source/disk/custom.go @@ -4,7 +4,7 @@ import ( "net/http" "gitlab.com/gitlab-org/gitlab-pages/internal/serving" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/disk" ) type customProjectResolver struct { diff --git a/internal/source/disk/group.go b/internal/source/disk/group.go index e0365bbdcd3b5743174910c61fe230873429ec96..5b13e34e5199255561f8ad17da08d14589a79023 100644 --- a/internal/source/disk/group.go +++ b/internal/source/disk/group.go @@ -8,7 +8,7 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/host" "gitlab.com/gitlab-org/gitlab-pages/internal/serving" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/disk" ) const ( diff --git a/internal/source/gitlab/factory.go b/internal/source/gitlab/factory.go index d526994f8992edd9ebd581e756f060e973f6a2fe..1d994a07a69ee1fdedf24b16c9a0d0975c0afbac 100644 --- a/internal/source/gitlab/factory.go +++ b/internal/source/gitlab/factory.go @@ -6,7 +6,8 @@ import ( log "github.com/sirupsen/logrus" "gitlab.com/gitlab-org/gitlab-pages/internal/serving" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/disk" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/zip" "gitlab.com/gitlab-org/gitlab-pages/internal/serving/serverless" "gitlab.com/gitlab-org/gitlab-pages/internal/source/gitlab/api" ) @@ -32,6 +33,8 @@ func fabricateServing(lookup api.LookupPath) serving.Serving { switch source.Type { case "file": return disk.New() + case "zip": + return zip.New() case "serverless": serving, err := serverless.NewFromAPISource(source.Serverless) if err != nil { diff --git a/internal/source/gitlab/factory_test.go b/internal/source/gitlab/factory_test.go index 2f3e199407eea800a638f5c09e120d51d88b3693..cc273fbf57a4fe143b2b479609ea482f19140e1c 100644 --- a/internal/source/gitlab/factory_test.go +++ b/internal/source/gitlab/factory_test.go @@ -6,7 +6,7 @@ import ( "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitlab-pages/internal/fixture" - "gitlab.com/gitlab-org/gitlab-pages/internal/serving/disk" + "gitlab.com/gitlab-org/gitlab-pages/internal/serving/file/disk" "gitlab.com/gitlab-org/gitlab-pages/internal/serving/serverless" "gitlab.com/gitlab-org/gitlab-pages/internal/source/gitlab/api" ) diff --git a/internal/vfs/local/vfs.go b/internal/vfs/local/vfs.go index 7c6f3ba6b9b8e180f4f8026147ba884dd7371d3d..cedfefa24fa44279c5d7df1d1cd5b69cb6731ddd 100644 --- a/internal/vfs/local/vfs.go +++ b/internal/vfs/local/vfs.go @@ -2,7 +2,10 @@ package local import ( "context" + "fmt" "os" + "path/filepath" + "strings" "golang.org/x/sys/unix" @@ -11,9 +14,43 @@ import ( type VFS struct{} -func (fs VFS) Lstat(ctx context.Context, name string) (os.FileInfo, error) { return os.Lstat(name) } -func (fs VFS) Readlink(ctx context.Context, name string) (string, error) { return os.Readlink(name) } +func (fs VFS) Dir(ctx context.Context, path string) (vfs.Dir, error) { + return &Dir{path: filepath.Clean(path)}, nil +} + +type Dir struct { + path string +} + +func (dir *Dir) validatePath(fullPath string) error { + // The requested path resolved to somewhere outside of the public/ directory + if !strings.HasPrefix(fullPath, dir.path) && fullPath != dir.path { + return fmt.Errorf("%q should be in %q", fullPath, dir.path) + } + + return nil +} + +func (dir *Dir) Lstat(ctx context.Context, name string) (os.FileInfo, error) { + if err := dir.validatePath(name); err != nil { + return nil, err + } + + return os.Lstat(filepath.Join(dir.path, name)) +} + +func (dir *Dir) Readlink(ctx context.Context, name string) (string, error) { + if err := dir.validatePath(name); err != nil { + return "", err + } + + return os.Readlink(filepath.Join(dir.path, name)) +} + +func (dir *Dir) Open(ctx context.Context, name string) (vfs.File, error) { + if err := dir.validatePath(name); err != nil { + return nil, err + } -func (fs VFS) Open(ctx context.Context, name string) (vfs.File, error) { - return os.OpenFile(name, os.O_RDONLY|unix.O_NOFOLLOW, 0) + return os.OpenFile(filepath.Join(dir.path, name), os.O_RDONLY|unix.O_NOFOLLOW, 0) } diff --git a/internal/vfs/vfs.go b/internal/vfs/vfs.go index 07c99b7764cda0a4345111f633b1396572a19e6e..b351efce646b76a6d11d9986a64ebc30a52381c3 100644 --- a/internal/vfs/vfs.go +++ b/internal/vfs/vfs.go @@ -11,6 +11,11 @@ import ( // VFS abstracts the things Pages needs to serve a static site from disk. type VFS interface { + Dir(ctx context.Context, path string) (Dir, error) +} + +// Dir abstracts the things Pages needs to serve a static site from a given path. +type Dir interface { Lstat(ctx context.Context, name string) (os.FileInfo, error) Readlink(ctx context.Context, name string) (string, error) Open(ctx context.Context, name string) (File, error) @@ -19,7 +24,8 @@ type VFS interface { // File represents an open file, which will typically be the response body of a Pages request. type File interface { io.Reader - io.Seeker + // TODO: Zip does not support seeking + // io.Seeker io.Closer } @@ -36,20 +42,38 @@ func (i *InstrumentedVFS) increment(operation string, err error) { metrics.VFSOperations.WithLabelValues(i.name, operation, strconv.FormatBool(err == nil)).Inc() } -func (i *InstrumentedVFS) Lstat(ctx context.Context, name string) (os.FileInfo, error) { - fi, err := i.fs.Lstat(ctx, name) +func (i *InstrumentedVFS) Dir(ctx context.Context, path string) (Dir, error) { + dir, err := i.fs.Dir(ctx, path) + i.increment("Lstat", err) + if dir != nil { + dir = &InstrumentedDir{dir: dir, name: i.name} + } + return dir, err +} + +type InstrumentedDir struct { + dir Dir + name string +} + +func (i *InstrumentedDir) increment(operation string, err error) { + metrics.VFSOperations.WithLabelValues(i.name, operation, strconv.FormatBool(err == nil)).Inc() +} + +func (i *InstrumentedDir) Lstat(ctx context.Context, name string) (os.FileInfo, error) { + fi, err := i.dir.Lstat(ctx, name) i.increment("Lstat", err) return fi, err } -func (i *InstrumentedVFS) Readlink(ctx context.Context, name string) (string, error) { - target, err := i.fs.Readlink(ctx, name) +func (i *InstrumentedDir) Readlink(ctx context.Context, name string) (string, error) { + target, err := i.dir.Readlink(ctx, name) i.increment("Readlink", err) return target, err } -func (i *InstrumentedVFS) Open(ctx context.Context, name string) (File, error) { - f, err := i.fs.Open(ctx, name) +func (i *InstrumentedDir) Open(ctx context.Context, name string) (File, error) { + f, err := i.dir.Open(ctx, name) i.increment("Open", err) return f, err } diff --git a/internal/vfs/zip/archive.go b/internal/vfs/zip/archive.go new file mode 100644 index 0000000000000000000000000000000000000000..4f7ef69929ac891f550e952af8b25f56ae117375 --- /dev/null +++ b/internal/vfs/zip/archive.go @@ -0,0 +1,143 @@ +package zip + +import ( + "archive/zip" + "context" + "io" + "io/ioutil" + "os" + "strings" + "fmt" + "sync" + + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" +) + +const dirPrefix = "public/" +const maxSymlinkSize = 256 + +type zipArchive struct { + path string + once sync.Once + done chan struct{} + zip *zip.Reader + zipCloser io.Closer + files map[string]*zip.File + zipErr error +} + +func (a *zipArchive) openArchive(ctx context.Context) error { + a.once.Do(func() { + a.zip, a.zipCloser, a.zipErr = openZIPArchive(a.path) + if a.zip != nil { + a.processZip() + } + close(a.done) + }) + + // wait for it to close + // or exit early + select { + case <-a.done: + case <-ctx.Done(): + } + return a.zipErr +} + +func (a *zipArchive) processZip() { + for _, file := range a.zip.File { + if !strings.HasPrefix(file.Name, dirPrefix) { + continue + } + + a.files[file.Name] = file + } + + // recycle memory + a.zip.File = nil +} + +func (a *zipArchive) close() { + if a.zipCloser != nil { + a.zipCloser.Close() + } + a.zipCloser = nil + a.zip = nil +} + +func (a *zipArchive) Lstat(ctx context.Context, name string) (os.FileInfo, error) { + file := a.files[name] + if file == nil { + return nil, os.ErrNotExist + } + + return file.FileInfo(), nil +} + +func (a *zipArchive) Readlink(ctx context.Context, name string) (string, error) { + file := a.files[name] + if file == nil { + return "", os.ErrNotExist + } + + if file.FileInfo().Mode()&os.ModeSymlink != os.ModeSymlink { + return "", os.ErrInvalid + } + + rc, err := file.Open() + if err != nil { + return "", err + } + + data, err := ioutil.ReadAll(&io.LimitedReader{R: rc, N: maxSymlinkSize}) + if err != nil { + return "", err + } + + return string(data), nil +} + +func (a *zipArchive) Open(ctx context.Context, name string) (vfs.File, error) { + file := a.files[name] + if file == nil { + return nil, os.ErrNotExist + } + + dataOffset, err := file.DataOffset() + if err != nil { + return nil, err + } + + // TODO: We can support `io.Seeker` if file would not be compressed + + if !isHTTPArchive(a.path) { + return file.Open() + } + + var reader io.ReadCloser + reader = &httpReader{ + URL: a.path, + Off: dataOffset, + N: int64(file.UncompressedSize64), + } + + switch file.Method { + case zip.Deflate: + reader = newDeflateReader(reader) + + case zip.Store: + // no-op + + default: + return nil, fmt.Errorf("unsupported compression: %x", file.Method) + } + + return reader, nil +} + +func newArchive(path string) *zipArchive { + return &zipArchive{ + path: path, + done: make(chan struct{}), + } +} diff --git a/internal/vfs/zip/deflate_reader.go b/internal/vfs/zip/deflate_reader.go new file mode 100644 index 0000000000000000000000000000000000000000..2e55ee5a307a703d1bad1de71083c5be51b4edb0 --- /dev/null +++ b/internal/vfs/zip/deflate_reader.go @@ -0,0 +1,27 @@ +package zip + +import ( + "compress/flate" + "io" +) + +type deflateReader struct { + R io.ReadCloser + D io.ReadCloser +} + +func (r *deflateReader) Read(p []byte) (n int, err error) { + return r.D.Read(p) +} + +func (r *deflateReader) Close() error { + r.R.Close() + return r.D.Close() +} + +func newDeflateReader(r io.ReadCloser) *deflateReader { + return &deflateReader{ + R: r, + D: flate.NewReader(r), + } +} diff --git a/internal/vfs/zip/http_reader.go b/internal/vfs/zip/http_reader.go new file mode 100644 index 0000000000000000000000000000000000000000..99c94b1193a89aa6a0945d7c64a540a240af2f10 --- /dev/null +++ b/internal/vfs/zip/http_reader.go @@ -0,0 +1,140 @@ +package zip + +import ( + "archive/zip" + "fmt" + "io" + "io/ioutil" + "net/http" + "strings" + "time" + + "gitlab.com/gitlab-org/gitlab-pages/internal/httptransport" + "gitlab.com/gitlab-org/gitlab-pages/metrics" +) + +type httpReader struct { + URL string + Off int64 + N int64 + res *http.Response +} + +var httpClient = &http.Client{ + // TODO: we need connect timeout + // The longest time the request can be executed + Timeout: 30 * time.Minute, + Transport: httptransport.NewTransportWithMetrics(metrics.ZIPHttpReaderReqDuration, metrics.ZIPHttpReaderReqTotal), +} + +func (h *httpReader) ensureRequest() error { + if h.res != nil { + return nil + } + + req, err := http.NewRequest("GET", h.URL, nil) + if err != nil { + return err + } + + req.Header.Set("Range", fmt.Sprintf("%d-%d", h.Off, h.Off+h.N-1)) + res, err := httpClient.Do(req) + if err != nil { + return err + } + if res.StatusCode != http.StatusOK { + res.Body.Close() + // TODO: sanitize URL + return fmt.Errorf("the %q failed with %d: %q", h.URL, res.StatusCode, res.Status) + } + + return nil +} + +func (h *httpReader) Read(p []byte) (n int, err error) { + if len(p) == 0 { + return 0, nil + } + + if err := h.ensureRequest(); err != nil { + return 0, err + } + + return h.res.Body.Read(p) +} + +func (h *httpReader) Close() error { + if h.res != nil { + // TODO: should we read till end? + return h.res.Body.Close() + } + return nil +} + +type httpReadAt struct { + URL string +} + +func (h *httpReadAt) ReadAt(p []byte, off int64) (n int, err error) { + r := httpReader{URL: h.URL, Off: off, N: int64(len(p))} + defer r.Close() + + // TODO: + // Even if ReadAt returns n < len(p), it may use all of p as scratch space during the call. + // If some data is available but not len(p) bytes, ReadAt blocks until either all the data + // is available or an error occurs. In this respect ReadAt is different from Read. + return r.Read(p) +} + +func isHTTPArchive(path string) bool { + return strings.HasPrefix(path, "https://") +} + +func httpSize(path string) (int64, error) { + // the `h.URL` is likely presigned only for GET + req, err := http.NewRequest("GET", path, nil) + if err != nil { + return 0, err + } + + req.Header.Set("Range", fmt.Sprintf("%d-%d", 0, 0)) + res, err := httpClient.Do(req) + if err != nil { + return 0, err + } + defer io.Copy(ioutil.Discard, res.Body) + defer res.Body.Close() + + if res.StatusCode != http.StatusOK { + // TODO: sanitize URL + return 0, fmt.Errorf("the %q failed with %d: %q", path, res.StatusCode, res.Status) + } + + return res.ContentLength, nil +} + +func openZIPHTTPArchive(url string) (*zip.Reader, io.Closer, error) { + size, err := httpSize(url) + if err != nil { + return nil, nil, err + } + + r, err := zip.NewReader(&httpReadAt{URL: url}, size) + return r, nil, err +} + +func openZIPDiskArchive(path string) (*zip.Reader, io.Closer, error) { + r, err := zip.OpenReader(path) + if err != nil { + return nil, nil, err + } + return &r.Reader, r, nil +} + +func openZIPArchive(path string) (*zip.Reader, io.Closer, error) { + if isHTTPArchive(path) { + return openZIPHTTPArchive(path) + } + + return openZIPDiskArchive(path) +} diff --git a/internal/vfs/zip/vfs.go b/internal/vfs/zip/vfs.go new file mode 100644 index 0000000000000000000000000000000000000000..acdb8de38c3168f868f8aa24f494a0268a26debc --- /dev/null +++ b/internal/vfs/zip/vfs.go @@ -0,0 +1,56 @@ +package zip + +import ( + "context" + "time" + + "github.com/patrickmn/go-cache" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" +) + +const cacheExpirationInterval = time.Minute +const cacheRefreshInterval = time.Minute / 2 +const cacheEvictInterval = time.Minute + +type zipVFS struct { + cache *cache.Cache +} + +func (fs *zipVFS) Dir(ctx context.Context, path string) (vfs.Dir, error) { + // we do it in loop to not use any additional locks + for { + dir, till, found := fs.cache.GetWithExpiration(path) + if found { + if till.Sub(time.Now()) < cacheRefreshInterval { + // refresh item + fs.cache.Set(path, dir, cache.DefaultExpiration) + } + } else { + dir = newArchive(path) + + // if it errors, it means that it is already added + // retry again to get it + if fs.cache.Add(path, dir, cache.DefaultExpiration) != nil { + continue + } + } + + zipDir := dir.(*zipArchive) + + err := zipDir.openArchive(ctx) + return zipDir, err + } +} + +func New() vfs.VFS { + vfs := &zipVFS{ + cache: cache.New(cacheExpirationInterval, cacheRefreshInterval), + } + + vfs.cache.OnEvicted(func(path string, object interface{}) { + if archive, ok := object.(*zipArchive); archive != nil && ok { + archive.close() + } + }) + return vfs +} diff --git a/metrics/metrics.go b/metrics/metrics.go index 0792a41f415f623b2607cae554d1837c9bf8c86a..f6f82014b54a65222268ad68c25b1ea609c1b7dd 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -77,6 +77,18 @@ var ( Help: "The time (in seconds) it takes to get a response from the GitLab domains API", }, []string{"status_code"}) + // DomainsSourceAPIReqTotal is the number of calls made to the Object Storage that returned a 4XX error + ZIPHttpReaderReqTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "gitlab_pages_zip_reader_requests_total", + Help: "The number of Object Storage API calls with different status codes", + }, []string{"status_code"}) + + // DomainsSourceAPICallDuration is the time it takes to get a response from the Object Storage in seconds + ZIPHttpReaderReqDuration = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "gitlab_pages_zip_reader_requests_duration", + Help: "The time (in seconds) it takes to get a response from the Object Storage", + }, []string{"status_code"}) + // DiskServingFileSize metric for file size serving. serving_types: disk and object_storage DiskServingFileSize = prometheus.NewHistogram(prometheus.HistogramOpts{ Name: "gitlab_pages_disk_serving_file_size_bytes",