From f680ec0aaff55816d9c29ee0b6b45049879337ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Tue, 13 Oct 2020 10:42:29 +0200 Subject: [PATCH] Store data-offset and symlink in a zip.File --- go.mod | 1 - go.sum | 6 --- internal/vfs/zip/archive.go | 68 +++++++----------------- internal/vfs/zip/file.go | 99 +++++++++++++++++++++++++++++++++++ internal/vfs/zip/lru_cache.go | 61 --------------------- internal/vfs/zip/vfs.go | 23 ++------ internal/vfs/zip/vfs_test.go | 5 +- metrics/metrics.go | 7 ++- 8 files changed, 127 insertions(+), 143 deletions(-) create mode 100644 internal/vfs/zip/file.go delete mode 100644 internal/vfs/zip/lru_cache.go diff --git a/go.mod b/go.mod index 74582a654..d9035beea 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,6 @@ require ( github.com/gorilla/securecookie v1.1.1 github.com/gorilla/sessions v1.2.0 github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 - github.com/karlseguin/ccache/v2 v2.0.6 github.com/karrick/godirwalk v1.10.12 github.com/kr/text v0.2.0 // indirect github.com/namsral/flag v1.7.4-pre diff --git a/go.sum b/go.sum index 9e9ef22e5..8e2a2a3c9 100644 --- a/go.sum +++ b/go.sum @@ -176,10 +176,6 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88/go.mod h1:3w7q1U84EfirKl04SVQ/s7nPm1ZPhiXd34z40TNz36k= github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 h1:iQTw/8FWTuc7uiaSepXwyf3o52HaUYcV+Tu66S3F5GA= github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8= -github.com/karlseguin/ccache/v2 v2.0.6 h1:jFCLz4bF4EPfuCcvESAgYNClkEb31LV3WzyOwLlFz7w= -github.com/karlseguin/ccache/v2 v2.0.6/go.mod h1:2BDThcfQMf/c0jnZowt16eW405XIqZPavt+HoYEtcxQ= -github.com/karlseguin/expect v1.0.2-0.20190806010014-778a5f0c6003 h1:vJ0Snvo+SLMY72r5J4sEfkuE7AFbixEP2qRbEcum/wA= -github.com/karlseguin/expect v1.0.2-0.20190806010014-778a5f0c6003/go.mod h1:zNBxMY8P21owkeogJELCLeHIt+voOSduHYTFUbwRAV8= github.com/karrick/godirwalk v1.10.12 h1:BqUm+LuJcXjGv1d2mj3gBiQyrQ57a0rYoAmhvJQ7RDU= github.com/karrick/godirwalk v1.10.12/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0LhBygSwrAsHA= github.com/kataras/golog v0.0.9/go.mod h1:12HJgwBIZFNGL0EJnMRhmvGA0PQGx8VFwrZtM4CqbAk= @@ -331,8 +327,6 @@ github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPU github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= github.com/wadey/gocovmerge v0.0.0-20160331181800-b5bfa59ec0ad h1:W0LEBv82YCGEtcmPA3uNZBI33/qF//HAAs3MawDjRa0= github.com/wadey/gocovmerge v0.0.0-20160331181800-b5bfa59ec0ad/go.mod h1:Hy8o65+MXnS6EwGElrSRjUzQDLXreJlzYLlWiHtt8hM= -github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0 h1:3UeQBvD0TFrlVjOeLOBz+CPAI8dnbqNSVwUwRrkp7vQ= -github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0/go.mod h1:IXCdmsXIht47RaVFLEdVnh1t+pgYtTAhQGj73kz+2DM= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= diff --git a/internal/vfs/zip/archive.go b/internal/vfs/zip/archive.go index 548ba6518..3516cf065 100644 --- a/internal/vfs/zip/archive.go +++ b/internal/vfs/zip/archive.go @@ -5,13 +5,10 @@ import ( "context" "errors" "fmt" - "io" "os" "path/filepath" - "strconv" "strings" "sync" - "sync/atomic" "time" log "github.com/sirupsen/logrus" @@ -46,24 +43,22 @@ type zipArchive struct { done chan struct{} openTimeout time.Duration - cacheNamespace string - resource *httprange.Resource reader *httprange.RangedReader archive *zip.Reader err error - files map[string]*zip.File + files map[string]zipFile + fileCacheLock sync.RWMutex } func newArchive(fs *zipVFS, path string, openTimeout time.Duration) *zipArchive { return &zipArchive{ - fs: fs, - path: path, - done: make(chan struct{}), - files: make(map[string]*zip.File), - openTimeout: openTimeout, - cacheNamespace: strconv.FormatInt(atomic.AddInt64(&fs.archiveCount, 1), 10) + ":", + fs: fs, + path: path, + done: make(chan struct{}), + files: make(map[string]zipFile), + openTimeout: openTimeout, } } @@ -133,7 +128,9 @@ func (a *zipArchive) readArchive() { if !strings.HasPrefix(file.Name, dirPrefix) { continue } - a.files[file.Name] = file + zipFile := zipFile{File: file} + zipFile.recycle() + a.files[file.Name] = zipFile } // recycle memory @@ -145,15 +142,15 @@ func (a *zipArchive) readArchive() { metrics.ZipArchiveEntriesCached.Add(fileCount) } -func (a *zipArchive) findFile(name string) *zip.File { +func (a *zipArchive) findFile(name string) *zipFile { name = filepath.Join(dirPrefix, name) - if file := a.files[name]; file != nil { - return file + if file, ok := a.files[name]; ok { + return &file } - if dir := a.files[name+"/"]; dir != nil { - return dir + if dir, ok := a.files[name+"/"]; ok { + return &dir } return nil @@ -170,15 +167,13 @@ func (a *zipArchive) Open(ctx context.Context, name string) (vfs.File, error) { return nil, errNotFile } - dataOffset, err := a.fs.dataOffsetCache.findOrFetch(a.cacheNamespace, name, func() (interface{}, error) { - return file.DataOffset() - }) + dataOffset, err := file.cachedDataOffset(&a.fileCacheLock) if err != nil { return nil, err } // only read from dataOffset up to the size of the compressed file - reader := a.reader.SectionReader(ctx, dataOffset.(int64), int64(file.CompressedSize64)) + reader := a.reader.SectionReader(ctx, dataOffset, int64(file.CompressedSize64)) switch file.Method { case zip.Deflate: @@ -207,40 +202,17 @@ func (a *zipArchive) Readlink(ctx context.Context, name string) (string, error) return "", os.ErrNotExist } - if file.FileInfo().Mode()&os.ModeSymlink != os.ModeSymlink { - return "", errNotSymlink - } - - symlinkValue, err := a.fs.readlinkCache.findOrFetch(a.cacheNamespace, name, func() (interface{}, error) { - rc, err := file.Open() - if err != nil { - return nil, err - } - defer rc.Close() - - var link [maxSymlinkSize + 1]byte - - // read up to len(symlink) bytes from the link file - n, err := io.ReadFull(rc, link[:]) - if err != nil && err != io.ErrUnexpectedEOF { - // if err == io.ErrUnexpectedEOF the link is smaller than len(symlink) so it's OK to not return it - return nil, err - } - - return string(link[:n]), nil - }) + link, err := file.cachedReadlink(&a.fileCacheLock) if err != nil { return "", err } - symlink := symlinkValue.(string) - // return errSymlinkSize if the number of bytes read from the link is too big - if len(symlink) > maxSymlinkSize { + if len(link) > maxSymlinkSize { return "", errSymlinkSize } - return symlink, nil + return link, nil } // onEvicted called by the zipVFS.cache when an archive is removed from the cache diff --git a/internal/vfs/zip/file.go b/internal/vfs/zip/file.go new file mode 100644 index 000000000..ee93739c2 --- /dev/null +++ b/internal/vfs/zip/file.go @@ -0,0 +1,99 @@ +package zip + +import ( + "archive/zip" + "encoding/binary" + "io" + "os" + "sync" + + "gitlab.com/gitlab-org/gitlab-pages/metrics" +) + +type zipFile struct { + *zip.File +} + +// recycle removes unneeded metadata from `zip.File` to reduce memory pressure +func (f zipFile) recycle() { + f.Comment = "" + + // we clean `f.Extra` as we need it for the `cached*` methods + f.Extra = nil +} + +// cachedDataOffset does return a cached offset if present +// or requests and stores it +// it uses `zip.File.Extra` field to hold a data (abusing it) +// as we don't want to allocate more memory +func (f zipFile) cachedDataOffset(lock *sync.RWMutex) (int64, error) { + if !f.Mode().IsRegular() { + return 0, errNotFile + } + + lock.RLock() + data := f.Extra + lock.RUnlock() + + if data != nil { + metrics.ZipCacheRequests.WithLabelValues("data-offset", "hit").Inc() + return int64(binary.LittleEndian.Uint64(data)), nil + } + + dataOffset, err := f.DataOffset() + if err != nil { + metrics.ZipCacheRequests.WithLabelValues("data-offset", "error").Inc() + return 0, nil + } + + encoded := make([]byte, 8) + binary.LittleEndian.PutUint64(encoded, uint64(dataOffset)) + lock.Lock() + f.Extra = encoded + lock.Unlock() + + metrics.ZipCacheRequests.WithLabelValues("data-offset", "miss").Inc() + + return dataOffset, err +} + +// cachedReadlink does return a cached offset if present +// or requests and stores it +// it uses `zip.File.Extra` field to hold a data (abusing it) +func (f zipFile) cachedReadlink(lock *sync.RWMutex) (string, error) { + if f.FileInfo().Mode()&os.ModeSymlink != os.ModeSymlink { + return "", errNotSymlink + } + + lock.RLock() + data := f.Extra + lock.RUnlock() + + if data != nil { + metrics.ZipCacheRequests.WithLabelValues("readlink", "hit").Inc() + return string(f.Extra), nil + } + + rc, err := f.Open() + if err != nil { + metrics.ZipCacheRequests.WithLabelValues("readlink", "error").Inc() + return "", err + } + defer rc.Close() + + // read up to len(symlink) bytes from the link file + var link [maxSymlinkSize + 1]byte + n, err := io.ReadFull(rc, link[:]) + if err != nil && err != io.ErrUnexpectedEOF { + // if err == io.ErrUnexpectedEOF the link is smaller than len(symlink) so it's OK to not return it + metrics.ZipCacheRequests.WithLabelValues("readlink", "error").Inc() + return "", err + } + + lock.Lock() + f.Extra = link[:n] + lock.Unlock() + + metrics.ZipCacheRequests.WithLabelValues("readlink", "miss").Inc() + return string(link[:n]), nil +} diff --git a/internal/vfs/zip/lru_cache.go b/internal/vfs/zip/lru_cache.go deleted file mode 100644 index fed5c3602..000000000 --- a/internal/vfs/zip/lru_cache.go +++ /dev/null @@ -1,61 +0,0 @@ -package zip - -import ( - "time" - - "github.com/karlseguin/ccache/v2" - - "gitlab.com/gitlab-org/gitlab-pages/metrics" -) - -// lruCacheGetPerPromote is a value that makes the item to be promoted -// it is taken arbitrally as a sane value indicating that the item -// was frequently picked -// promotion moves the item to the front of the LRU list -const lruCacheGetsPerPromote = 64 - -// lruCacheItemsToPruneDiv is a value that indicates how much items -// needs to be pruned on OOM, this prunes 1/16 of items -const lruCacheItemsToPruneDiv = 16 - -type lruCache struct { - op string - duration time.Duration - cache *ccache.Cache -} - -func newLruCache(op string, maxEntries uint32, duration time.Duration) *lruCache { - configuration := ccache.Configure() - configuration.MaxSize(int64(maxEntries)) - configuration.ItemsToPrune(maxEntries / lruCacheItemsToPruneDiv) - configuration.GetsPerPromote(lruCacheGetsPerPromote) // if item gets requested frequently promote it - configuration.OnDelete(func(*ccache.Item) { - metrics.ZipCachedEntries.WithLabelValues(op).Dec() - }) - - return &lruCache{ - cache: ccache.New(configuration), - duration: duration, - } -} - -func (c *lruCache) findOrFetch(cacheNamespace, key string, fetchFn func() (interface{}, error)) (interface{}, error) { - item := c.cache.Get(cacheNamespace + key) - - if item != nil && !item.Expired() { - metrics.ZipCacheRequests.WithLabelValues(c.op, "hit").Inc() - return item.Value(), nil - } - - value, err := fetchFn() - if err != nil { - metrics.ZipCacheRequests.WithLabelValues(c.op, "error").Inc() - return nil, err - } - - metrics.ZipCacheRequests.WithLabelValues(c.op, "miss").Inc() - metrics.ZipCachedEntries.WithLabelValues(c.op).Inc() - - c.cache.Set(cacheNamespace+key, value, c.duration) - return value, nil -} diff --git a/internal/vfs/zip/vfs.go b/internal/vfs/zip/vfs.go index 78a77e1cb..b911fced8 100644 --- a/internal/vfs/zip/vfs.go +++ b/internal/vfs/zip/vfs.go @@ -18,16 +18,6 @@ const ( defaultCacheExpirationInterval = time.Minute defaultCacheCleanupInterval = time.Minute / 2 defaultCacheRefreshInterval = time.Minute / 2 - - // we assume that each item costs around 100 bytes - // this gives around 5MB of raw memory needed without acceleration structures - defaultDataOffsetItems = 50000 - defaultDataOffsetExpirationInterval = time.Hour - - // we assume that each item costs around 200 bytes - // this gives around 2MB of raw memory needed without acceleration structures - defaultReadlinkItems = 10000 - defaultReadlinkExpirationInterval = time.Hour ) var ( @@ -38,23 +28,16 @@ var ( type zipVFS struct { cache *cache.Cache cacheLock sync.Mutex - - dataOffsetCache *lruCache - readlinkCache *lruCache - - archiveCount int64 } // New creates a zipVFS instance that can be used by a serving request func New() vfs.VFS { zipVFS := &zipVFS{ - cache: cache.New(defaultCacheExpirationInterval, defaultCacheCleanupInterval), - dataOffsetCache: newLruCache("data-offset", defaultDataOffsetItems, defaultDataOffsetExpirationInterval), - readlinkCache: newLruCache("readlink", defaultReadlinkItems, defaultReadlinkExpirationInterval), + cache: cache.New(defaultCacheExpirationInterval, defaultCacheCleanupInterval), } zipVFS.cache.OnEvicted(func(s string, i interface{}) { - metrics.ZipCachedEntries.WithLabelValues("archive").Dec() + metrics.ZipCachedArchives.Dec() i.(*zipArchive).onEvicted() }) @@ -125,7 +108,7 @@ func (fs *zipVFS) findOrCreateArchive(ctx context.Context, path string) (*zipArc } metrics.ZipCacheRequests.WithLabelValues("archive", "miss").Inc() - metrics.ZipCachedEntries.WithLabelValues("archive").Inc() + metrics.ZipCachedArchives.Inc() } return archive.(*zipArchive), nil diff --git a/internal/vfs/zip/vfs_test.go b/internal/vfs/zip/vfs_test.go index a795b214e..e82c83e5d 100644 --- a/internal/vfs/zip/vfs_test.go +++ b/internal/vfs/zip/vfs_test.go @@ -107,8 +107,7 @@ func TestVFSFindOrCreateArchiveCacheEvict(t *testing.T) { vfs := New().(*zipVFS) - archivesMetric := metrics.ZipCachedEntries.WithLabelValues("archive") - archivesCount := testutil.ToFloat64(archivesMetric) + archivesCount := testutil.ToFloat64(metrics.ZipCachedArchives) // create a new archive and increase counters archive, err := vfs.findOrOpenArchive(context.Background(), path) @@ -125,6 +124,6 @@ func TestVFSFindOrCreateArchiveCacheEvict(t *testing.T) { require.NotNil(t, archive2) require.NotEqual(t, archive, archive2, "a different archive is returned") - archivesCountEnd := testutil.ToFloat64(archivesMetric) + archivesCountEnd := testutil.ToFloat64(metrics.ZipCachedArchives) require.Equal(t, float64(1), archivesCountEnd-archivesCount, "all expired archives are evicted") } diff --git a/metrics/metrics.go b/metrics/metrics.go index db7cae9a8..6503cc704 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -173,13 +173,12 @@ var ( []string{"op", "cache"}, ) - // ZipCachedArchives is the number of entries in the cache - ZipCachedEntries = prometheus.NewGaugeVec( + // ZipCachedArchives is the number of zip archives currently in the cache + ZipCachedArchives = prometheus.NewGauge( prometheus.GaugeOpts{ Name: "gitlab_pages_zip_cached_entries", Help: "The number of entries in the cache", }, - []string{"op"}, ) // ZipArchiveEntriesCached is the number of files per zip archive currently @@ -228,6 +227,6 @@ func MustRegister() { ZipOpenedEntriesCount, ZipCacheRequests, ZipArchiveEntriesCached, - ZipCachedEntries, + ZipCachedArchives, ) } -- GitLab