From f7aeaa61291dd75f92901bcbeecc7fce07a28dec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Thu, 8 Nov 2018 10:24:13 +0100 Subject: Add a consolidated file cache This commits reworks how file caching is performed in Hugo. Now there is only one way, and it can be configured. This is the default configuration: ```toml [caches] [caches.getjson] dir = ":cacheDir" maxAge = -1 [caches.getcsv] dir = ":cacheDir" maxAge = -1 [caches.images] dir = ":resourceDir/_gen" maxAge = -1 [caches.assets] dir = ":resourceDir/_gen" maxAge = -1 ``` You can override any of these cache setting in your own `config.toml`. The placeholders explained: `:cacheDir`: This is the value of the `cacheDir` config option if set (can also be set via OS env variable `HUGO_CACHEDIR`). It will fall back to `/opt/build/cache/hugo_cache/` on Netlify, or a `hugo_cache` directory below the OS temp dir for the others. `:resourceDir`: This is the value of the `resourceDir` config option. `maxAge` is the time in seconds before a cache entry will be evicted, -1 means forever and 0 effectively turns that particular cache off. This means that if you run your builds on Netlify, all caches configured with `:cacheDir` will be saved and restored on the next build. For other CI vendors, please read their documentation. For an CircleCI example, see https://github.com/bep/hugo-sass-test/blob/6c3960a8f4b90e8938228688bc49bdcdd6b2d99e/.circleci/config.yml Fixes #5404 --- cache/filecache/filecache.go | 442 ++++++++++++++++++++++++++++++++++++++ cache/filecache/filecache_test.go | 306 ++++++++++++++++++++++++++ 2 files changed, 748 insertions(+) create mode 100644 cache/filecache/filecache.go create mode 100644 cache/filecache/filecache_test.go (limited to 'cache') diff --git a/cache/filecache/filecache.go b/cache/filecache/filecache.go new file mode 100644 index 000000000..45359f574 --- /dev/null +++ b/cache/filecache/filecache.go @@ -0,0 +1,442 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filecache + +import ( + "bytes" + "io" + "io/ioutil" + "path" + "path/filepath" + "strings" + "time" + + "github.com/gohugoio/hugo/common/hugio" + + "github.com/gohugoio/hugo/helpers" + + "github.com/gohugoio/hugo/hugolib/paths" + + "github.com/pkg/errors" + + "github.com/BurntSushi/locker" + "github.com/bep/mapstructure" + "github.com/spf13/afero" +) + +const ( + cachesConfigKey = "caches" + + resourcesGenDir = ":resourceDir/_gen" +) + +var defaultCacheConfig = cacheConfig{ + MaxAge: -1, // Never expire + Dir: ":cacheDir", +} + +const ( + cacheKeyGetJSON = "getjson" + cacheKeyGetCSV = "getcsv" + cacheKeyImages = "images" + cacheKeyAssets = "assets" +) + +var defaultCacheConfigs = map[string]cacheConfig{ + cacheKeyGetJSON: defaultCacheConfig, + cacheKeyGetCSV: defaultCacheConfig, + cacheKeyImages: cacheConfig{ + MaxAge: -1, + Dir: resourcesGenDir, + }, + cacheKeyAssets: cacheConfig{ + MaxAge: -1, + Dir: resourcesGenDir, + }, +} + +type cachesConfig map[string]cacheConfig + +type cacheConfig struct { + // Maxe age of ache entries in this cache. Any items older than this will + // be removed and not returned from the cache. + // -1 means forever, 0 means cache is disabled. + MaxAge int + + // The directory where files are stored. + Dir string +} + +// Cache caches a set of files in a directory. This is usually a file on +// disk, but since this is backed by an Afero file system, it can be anything. +type Cache struct { + Fs afero.Fs + + // Max age in seconds. + maxAge int + + nlocker *locker.Locker +} + +// ItemInfo contains info about a cached file. +type ItemInfo struct { + // This is the file's name relative to the cache's filesystem. + Name string +} + +// NewCache creates a new file cache with the given filesystem and max age. +func NewCache(fs afero.Fs, maxAge int) *Cache { + return &Cache{ + Fs: fs, + nlocker: locker.NewLocker(), + maxAge: maxAge, + } +} + +// lockedFile is a file with a lock that is released on Close. +type lockedFile struct { + afero.File + unlock func() +} + +func (l *lockedFile) Close() error { + defer l.unlock() + return l.File.Close() +} + +// GetWriteCloser returns a transactional writer into the cache. +// It's important that it's closed when done. +func (c *Cache) WriteCloser(id string) (ItemInfo, io.WriteCloser, error) { + id = cleanID(id) + c.nlocker.Lock(id) + + info := ItemInfo{Name: id} + + f, err := helpers.OpenFileForWriting(c.Fs, id) + if err != nil { + c.nlocker.Unlock(id) + return info, nil, err + } + + return info, &lockedFile{ + File: f, + unlock: func() { c.nlocker.Unlock(id) }, + }, nil +} + +// ReadOrCreate tries to lookup the file in cache. +// If found, it is passed to read and then closed. +// If not found a new file is created and passed to create, which should close +// it when done. +func (c *Cache) ReadOrCreate(id string, + read func(info ItemInfo, r io.Reader) error, + create func(info ItemInfo, w io.WriteCloser) error) (info ItemInfo, err error) { + id = cleanID(id) + + c.nlocker.Lock(id) + defer c.nlocker.Unlock(id) + + info = ItemInfo{Name: id} + + if r := c.getOrRemove(id); r != nil { + err = read(info, r) + defer r.Close() + return + } + + f, err := helpers.OpenFileForWriting(c.Fs, id) + if err != nil { + return + } + + err = create(info, f) + + return + +} + +// GetOrCreate tries to get the file with the given id from cache. If not found or expired, create will +// be invoked and the result cached. +// This method is protected by a named lock using the given id as identifier. +func (c *Cache) GetOrCreate(id string, create func() (io.ReadCloser, error)) (ItemInfo, io.ReadCloser, error) { + id = cleanID(id) + + c.nlocker.Lock(id) + defer c.nlocker.Unlock(id) + + info := ItemInfo{Name: id} + + if r := c.getOrRemove(id); r != nil { + return info, r, nil + } + + r, err := create() + if err != nil { + return info, nil, err + } + + if c.maxAge == 0 { + // No caching. + return info, hugio.ToReadCloser(r), nil + } + + var buff bytes.Buffer + return info, + hugio.ToReadCloser(&buff), + afero.WriteReader(c.Fs, id, io.TeeReader(r, &buff)) +} + +// GetOrCreateBytes is the same as GetOrCreate, but produces a byte slice. +func (c *Cache) GetOrCreateBytes(id string, create func() ([]byte, error)) (ItemInfo, []byte, error) { + id = cleanID(id) + + c.nlocker.Lock(id) + defer c.nlocker.Unlock(id) + + info := ItemInfo{Name: id} + + if r := c.getOrRemove(id); r != nil { + defer r.Close() + b, err := ioutil.ReadAll(r) + return info, b, err + } + + b, err := create() + if err != nil { + return info, nil, err + } + + if c.maxAge == 0 { + return info, b, nil + } + + if err := afero.WriteReader(c.Fs, id, bytes.NewReader(b)); err != nil { + return info, nil, err + } + return info, b, nil + +} + +// GetBytes gets the file content with the given id from the cahce, nil if none found. +func (c *Cache) GetBytes(id string) (ItemInfo, []byte, error) { + id = cleanID(id) + + c.nlocker.Lock(id) + defer c.nlocker.Unlock(id) + + info := ItemInfo{Name: id} + + if r := c.getOrRemove(id); r != nil { + defer r.Close() + b, err := ioutil.ReadAll(r) + return info, b, err + } + + return info, nil, nil +} + +// Get gets the file with the given id from the cahce, nil if none found. +func (c *Cache) Get(id string) (ItemInfo, io.ReadCloser, error) { + id = cleanID(id) + + c.nlocker.Lock(id) + defer c.nlocker.Unlock(id) + + info := ItemInfo{Name: id} + + r := c.getOrRemove(id) + + return info, r, nil +} + +// get gets the file with the given id. If it's expired, it will +// be removed. +func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser { + if c.maxAge == 0 { + // No caching. + return nil + } + + if c.maxAge > 0 { + fi, err := c.Fs.Stat(id) + if err != nil { + return nil + } + + expiry := time.Now().Add(-time.Duration(c.maxAge) * time.Second) + expired := fi.ModTime().Before(expiry) + if expired { + c.Fs.Remove(id) + return nil + } + } + + f, err := c.Fs.Open(id) + + if err != nil { + return nil + } + + return f +} + +// For testing +func (c *Cache) getString(id string) string { + id = cleanID(id) + + c.nlocker.Lock(id) + defer c.nlocker.Unlock(id) + + if r := c.getOrRemove(id); r != nil { + defer r.Close() + b, _ := ioutil.ReadAll(r) + return string(b) + } + + return "" + +} + +// Caches is a named set of caches. +type Caches map[string]*Cache + +// Get gets a named cache, nil if none found. +func (f Caches) Get(name string) *Cache { + return f[strings.ToLower(name)] +} + +// GetJSOnCache gets the file cache for getJSON. +func (f Caches) GetJSONCache() *Cache { + return f[cacheKeyGetJSON] +} + +// GetCSVCache gets the file cache for getCSV. +func (f Caches) GetCSVCache() *Cache { + return f[cacheKeyGetCSV] +} + +// ImageCache gets the file cache for processed images. +func (f Caches) ImageCache() *Cache { + return f[cacheKeyImages] +} + +// AssetsCache gets the file cache for assets (processed resources, SCSS etc.). +func (f Caches) AssetsCache() *Cache { + return f[cacheKeyAssets] +} + +// NewCachesFromPaths creates a new set of file caches from the given +// configuration. +func NewCachesFromPaths(p *paths.Paths) (Caches, error) { + dcfg, err := decodeConfig(p) + if err != nil { + return nil, err + } + + fs := p.Fs.Source + + m := make(Caches) + for k, v := range dcfg { + baseDir := filepath.Join(v.Dir, k) + if err = fs.MkdirAll(baseDir, 0777); err != nil { + return nil, err + } + bfs := afero.NewBasePathFs(fs, baseDir) + m[k] = NewCache(bfs, v.MaxAge) + } + + return m, nil +} + +func decodeConfig(p *paths.Paths) (cachesConfig, error) { + c := make(cachesConfig) + valid := make(map[string]bool) + // Add defaults + for k, v := range defaultCacheConfigs { + c[k] = v + valid[k] = true + } + + cfg := p.Cfg + + m := cfg.GetStringMap(cachesConfigKey) + + _, isOsFs := p.Fs.Source.(*afero.OsFs) + + for k, v := range m { + cc := defaultCacheConfig + + if err := mapstructure.WeakDecode(v, &cc); err != nil { + return nil, err + } + + if cc.Dir == "" { + return c, errors.New("must provide cache Dir") + } + + name := strings.ToLower(k) + if !valid[name] { + return nil, errors.Errorf("%q is not a valid cache name", name) + } + + c[name] = cc + } + + // This is a very old flag in Hugo, but we need to respect it. + disabled := cfg.GetBool("ignoreCache") + + for k, v := range c { + v.Dir = filepath.Clean(v.Dir) + dir := filepath.ToSlash(v.Dir) + parts := strings.Split(dir, "/") + first := parts[0] + + if strings.HasPrefix(first, ":") { + resolved, err := resolveDirPlaceholder(p, first) + if err != nil { + return c, err + } + resolved = filepath.ToSlash(resolved) + + v.Dir = filepath.FromSlash(path.Join((append([]string{resolved}, parts[1:]...))...)) + + } else if isOsFs && !path.IsAbs(dir) { + return c, errors.Errorf("%q must either start with a placeholder (e.g. :cacheDir, :resourceDir) or be absolute", v.Dir) + } + + if disabled { + v.MaxAge = 0 + } + + c[k] = v + } + + return c, nil +} + +// Resolves :resourceDir => /myproject/resources etc., :cacheDir => ... +func resolveDirPlaceholder(p *paths.Paths, placeholder string) (string, error) { + switch strings.ToLower(placeholder) { + case ":resourcedir": + return p.AbsResourcesDir, nil + case ":cachedir": + return helpers.GetCacheDir(p.Fs.Source, p.Cfg) + } + + return "", errors.Errorf("%q is not a valid placeholder (valid values are :cacheDir or :resourceDir)", placeholder) +} + +func cleanID(name string) string { + return filepath.Clean(name) +} diff --git a/cache/filecache/filecache_test.go b/cache/filecache/filecache_test.go new file mode 100644 index 000000000..d483fc1a7 --- /dev/null +++ b/cache/filecache/filecache_test.go @@ -0,0 +1,306 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filecache + +import ( + "fmt" + "io" + "io/ioutil" + "path/filepath" + "regexp" + "runtime" + "strings" + "sync" + "testing" + "time" + + "github.com/gohugoio/hugo/common/hugio" + + "github.com/gohugoio/hugo/config" + "github.com/gohugoio/hugo/hugofs" + "github.com/gohugoio/hugo/hugolib/paths" + "github.com/spf13/afero" + "github.com/spf13/viper" + + "github.com/stretchr/testify/require" +) + +func TestFileCache(t *testing.T) { + t.Parallel() + assert := require.New(t) + + for _, cacheDir := range []string{"mycache", ""} { + + configStr := ` +cacheDir = "CACHEDIR" +[caches] +[caches.getJSON] +maxAge = 111 +dir = ":cacheDir/c" + +` + configStr = strings.Replace(configStr, "CACHEDIR", cacheDir, 1) + + cfg, err := config.FromConfigString(configStr, "toml") + assert.NoError(err) + + fs := hugofs.NewMem(cfg) + p, err := paths.New(fs, cfg) + assert.NoError(err) + + caches, err := NewCachesFromPaths(p) + assert.NoError(err) + + c := caches.Get("GetJSON") + assert.NotNil(c) + assert.Equal(111, c.maxAge) + + bfs, ok := c.Fs.(*afero.BasePathFs) + assert.True(ok) + filename, err := bfs.RealPath("key") + assert.NoError(err) + if cacheDir != "" { + assert.Equal(filepath.FromSlash(cacheDir+"/c/getjson/key"), filename) + } else { + // Temp dir. + assert.Regexp(regexp.MustCompile("hugo_cache.*key"), filename) + } + + rf := func(s string) func() (io.ReadCloser, error) { + return func() (io.ReadCloser, error) { + return struct { + io.ReadSeeker + io.Closer + }{ + strings.NewReader(s), + ioutil.NopCloser(nil), + }, nil + } + } + + bf := func() ([]byte, error) { + return []byte("bcd"), nil + } + + for i := 0; i < 2; i++ { + info, r, err := c.GetOrCreate("a", rf("abc")) + assert.NoError(err) + assert.NotNil(r) + assert.Equal("a", info.Name) + b, _ := ioutil.ReadAll(r) + r.Close() + assert.Equal("abc", string(b)) + + info, b, err = c.GetOrCreateBytes("b", bf) + assert.NoError(err) + assert.NotNil(r) + assert.Equal("b", info.Name) + assert.Equal("bcd", string(b)) + + _, b, err = c.GetOrCreateBytes("a", bf) + assert.NoError(err) + assert.Equal("abc", string(b)) + + _, r, err = c.GetOrCreate("a", rf("bcd")) + assert.NoError(err) + b, _ = ioutil.ReadAll(r) + r.Close() + assert.Equal("abc", string(b)) + } + + assert.NotNil(caches.Get("getJSON")) + + info, w, err := caches.ImageCache().WriteCloser("mykey") + assert.NoError(err) + assert.Equal("mykey", info.Name) + io.WriteString(w, "Hugo is great!") + w.Close() + assert.Equal("Hugo is great!", caches.ImageCache().getString("mykey")) + + info, r, err := caches.ImageCache().Get("mykey") + assert.NoError(err) + assert.NotNil(r) + assert.Equal("mykey", info.Name) + b, _ := ioutil.ReadAll(r) + r.Close() + assert.Equal("Hugo is great!", string(b)) + + info, b, err = caches.ImageCache().GetBytes("mykey") + assert.NoError(err) + assert.Equal("mykey", info.Name) + assert.Equal("Hugo is great!", string(b)) + + } + +} + +func TestFileCacheConcurrent(t *testing.T) { + t.Parallel() + + assert := require.New(t) + + configStr := ` +[caches] +[caches.getjson] +maxAge = 1 +dir = "/cache/c" + +` + + cfg, err := config.FromConfigString(configStr, "toml") + assert.NoError(err) + fs := hugofs.NewMem(cfg) + p, err := paths.New(fs, cfg) + assert.NoError(err) + + caches, err := NewCachesFromPaths(p) + assert.NoError(err) + + const cacheName = "getjson" + + filenameData := func(i int) (string, string) { + data := fmt.Sprintf("data: %d", i) + filename := fmt.Sprintf("file%d", i) + return filename, data + } + + var wg sync.WaitGroup + + for i := 0; i < 50; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + for j := 0; j < 20; j++ { + c := caches.Get(cacheName) + assert.NotNil(c) + filename, data := filenameData(i) + _, r, err := c.GetOrCreate(filename, func() (io.ReadCloser, error) { + return hugio.ToReadCloser(strings.NewReader(data)), nil + }) + assert.NoError(err) + b, _ := ioutil.ReadAll(r) + r.Close() + assert.Equal(data, string(b)) + // Trigger some expiration. + time.Sleep(50 * time.Millisecond) + } + }(i) + + } + wg.Wait() +} + +func TestDecodeConfig(t *testing.T) { + t.Parallel() + + assert := require.New(t) + + configStr := ` +[caches] +[caches.getJSON] +maxAge = 1234 +dir = "/path/to/c1" +[caches.getCSV] +maxAge = 3456 +dir = "/path/to/c2" +[caches.images] +dir = "/path/to/c3" + +` + + cfg, err := config.FromConfigString(configStr, "toml") + assert.NoError(err) + fs := hugofs.NewMem(cfg) + p, err := paths.New(fs, cfg) + assert.NoError(err) + + decoded, err := decodeConfig(p) + assert.NoError(err) + + assert.Equal(4, len(decoded)) + + c2 := decoded["getcsv"] + assert.Equal(3456, c2.MaxAge) + assert.Equal(filepath.FromSlash("/path/to/c2"), c2.Dir) + + c3 := decoded["images"] + assert.Equal(-1, c3.MaxAge) + assert.Equal(filepath.FromSlash("/path/to/c3"), c3.Dir) + +} + +func TestDecodeConfigIgnoreCache(t *testing.T) { + t.Parallel() + + assert := require.New(t) + + configStr := ` +ignoreCache = true +[caches] +[caches.getJSON] +maxAge = 1234 +dir = "/path/to/c1" +[caches.getCSV] +maxAge = 3456 +dir = "/path/to/c2" +[caches.images] +dir = "/path/to/c3" + +` + + cfg, err := config.FromConfigString(configStr, "toml") + assert.NoError(err) + fs := hugofs.NewMem(cfg) + p, err := paths.New(fs, cfg) + assert.NoError(err) + + decoded, err := decodeConfig(p) + assert.NoError(err) + + assert.Equal(4, len(decoded)) + + for _, v := range decoded { + assert.Equal(0, v.MaxAge) + } + +} + +func TestDecodeConfigDefault(t *testing.T) { + assert := require.New(t) + cfg := viper.New() + if runtime.GOOS == "windows" { + cfg.Set("resourceDir", "c:\\cache\\resources") + cfg.Set("cacheDir", "c:\\cache\\thecache") + + } else { + cfg.Set("resourceDir", "/cache/resources") + cfg.Set("cacheDir", "/cache/thecache") + } + + fs := hugofs.NewMem(cfg) + p, err := paths.New(fs, cfg) + assert.NoError(err) + + decoded, err := decodeConfig(p) + + assert.NoError(err) + + assert.Equal(4, len(decoded)) + + if runtime.GOOS == "windows" { + assert.Equal("c:\\cache\\resources\\_gen", decoded[cacheKeyImages].Dir) + } else { + assert.Equal("/cache/resources/_gen", decoded[cacheKeyImages].Dir) + } +} -- cgit v1.2.3