diff options
author | Jacob Vosmaer <jacob@gitlab.com> | 2022-06-21 15:50:33 +0300 |
---|---|---|
committer | Jacob Vosmaer <jacob@gitlab.com> | 2022-06-21 15:50:33 +0300 |
commit | f684ffa6fbe79845e3574b22987cb4b081f842ec (patch) | |
tree | 6e6fb151bdaa80debc94f804aadcdfe5801fac23 /internal | |
parent | badf997d1a195f75362dc75a268406b2aee10e68 (diff) |
Clear timezones from cached zip entries
In https://gitlab.com/gitlab-org/gitlab-pages/-/issues/702 we
discovered that by caching many `*zip.File` instances, we end up
storing many Go timezone objects on the heap: one per cached
`*zip.File`. This adds up to about 25% of the heap size.
In this commit we set the timestamp of `*zip.File` to UTC. Because Go
re-uses a single timezone object for UTC, this causes the original
timezone objects of the `*zip.File` instances to be garbage collected.
This reduces the heap size.
Here is an example program that demonstrates the effect:
```golang
package main
import (
"archive/zip"
"log"
"os"
"runtime/pprof"
)
func main() {
if err := load(os.Args[1]); err != nil {
log.Fatal(err)
}
log.Printf("load finished: %d archives", len(readers))
pprof.WriteHeapProfile(os.Stdout)
}
var readers []*zip.ReadCloser
func load(filename string) error {
for i := 0; i < 1000; i++ {
zr, err := zip.OpenReader(filename)
if err != nil {
return err
}
if os.Getenv("FORCE_UTC") == "1" {
for _, zf := range zr.File {
zf.Modified = zf.Modified.UTC()
}
}
readers = append(readers, zr)
}
return nil
}
```
Diffstat (limited to 'internal')
-rw-r--r-- | internal/vfs/zip/archive.go | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/internal/vfs/zip/archive.go b/internal/vfs/zip/archive.go index 0a9ac0d7..49ae2719 100644 --- a/internal/vfs/zip/archive.go +++ b/internal/vfs/zip/archive.go @@ -148,6 +148,13 @@ func (a *zipArchive) readArchive(url string) { continue } + // Each Modified timestamp contains a pointer to a unique timezone + // object. This wastes a lot of memory. By setting the timezone to UTC on + // each timestamp, we allow the unique timezone objects to be + // garbage-collected. Also see + // https://gitlab.com/gitlab-org/gitlab-pages/-/issues/702. + file.Modified = file.Modified.UTC() + if file.Mode().IsDir() { a.directories[file.Name] = &file.FileHeader } else { |