diff options
author | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2018-11-08 12:24:13 +0300 |
---|---|---|
committer | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2018-11-13 16:19:42 +0300 |
commit | f7aeaa61291dd75f92901bcbeecc7fce07a28dec (patch) | |
tree | 409c03e259a38fce2beeab46655fd5108c84cd5c /tpl/data | |
parent | 7d78a2afd3c4a6c4af77a4ddcbd2a82f15986048 (diff) |
Add a consolidated file cache
This commits reworks how file caching is performed in Hugo. Now there is only one way, and it can be configured.
This is the default configuration:
```toml
[caches]
[caches.getjson]
dir = ":cacheDir"
maxAge = -1
[caches.getcsv]
dir = ":cacheDir"
maxAge = -1
[caches.images]
dir = ":resourceDir/_gen"
maxAge = -1
[caches.assets]
dir = ":resourceDir/_gen"
maxAge = -1
```
You can override any of these cache setting in your own `config.toml`.
The placeholders explained:
`:cacheDir`: This is the value of the `cacheDir` config option if set (can also be set via OS env variable `HUGO_CACHEDIR`). It will fall back to `/opt/build/cache/hugo_cache/` on Netlify, or a `hugo_cache` directory below the OS temp dir for the others.
`:resourceDir`: This is the value of the `resourceDir` config option.
`maxAge` is the time in seconds before a cache entry will be evicted, -1 means forever and 0 effectively turns that particular cache off.
This means that if you run your builds on Netlify, all caches configured with `:cacheDir` will be saved and restored on the next build. For other CI vendors, please read their documentation. For an CircleCI example, see https://github.com/bep/hugo-sass-test/blob/6c3960a8f4b90e8938228688bc49bdcdd6b2d99e/.circleci/config.yml
Fixes #5404
Diffstat (limited to 'tpl/data')
-rw-r--r-- | tpl/data/cache.go | 85 | ||||
-rw-r--r-- | tpl/data/cache_test.go | 63 | ||||
-rw-r--r-- | tpl/data/data.go | 98 | ||||
-rw-r--r-- | tpl/data/init_test.go | 7 | ||||
-rw-r--r-- | tpl/data/resources.go | 147 | ||||
-rw-r--r-- | tpl/data/resources_test.go | 63 |
6 files changed, 161 insertions, 302 deletions
diff --git a/tpl/data/cache.go b/tpl/data/cache.go deleted file mode 100644 index 6c4033160..000000000 --- a/tpl/data/cache.go +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2017 The Hugo Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package data - -import ( - "crypto/md5" - "encoding/hex" - "errors" - "sync" - - "github.com/gohugoio/hugo/config" - "github.com/gohugoio/hugo/helpers" - "github.com/spf13/afero" -) - -var cacheMu sync.RWMutex - -// getCacheFileID returns the cache ID for a string. -func getCacheFileID(cfg config.Provider, id string) string { - hash := md5.Sum([]byte(id)) - return cfg.GetString("cacheDir") + hex.EncodeToString(hash[:]) -} - -// getCache returns the content for an ID from the file cache or an error. -// If the ID is not found, return nil,nil. -func getCache(id string, fs afero.Fs, cfg config.Provider, ignoreCache bool) ([]byte, error) { - if ignoreCache { - return nil, nil - } - - cacheMu.RLock() - defer cacheMu.RUnlock() - - fID := getCacheFileID(cfg, id) - isExists, err := helpers.Exists(fID, fs) - if err != nil { - return nil, err - } - if !isExists { - return nil, nil - } - - return afero.ReadFile(fs, fID) -} - -// writeCache writes bytes associated with an ID into the file cache. -func writeCache(id string, c []byte, fs afero.Fs, cfg config.Provider, ignoreCache bool) error { - if ignoreCache { - return nil - } - - cacheMu.Lock() - defer cacheMu.Unlock() - - fID := getCacheFileID(cfg, id) - f, err := fs.Create(fID) - if err != nil { - return errors.New("Error: " + err.Error() + ". Failed to create file: " + fID) - } - defer f.Close() - - n, err := f.Write(c) - if err != nil { - return errors.New("Error: " + err.Error() + ". Failed to write to file: " + fID) - } - if n == 0 { - return errors.New("No bytes written to file: " + fID) - } - return nil -} - -func deleteCache(id string, fs afero.Fs, cfg config.Provider) error { - return fs.Remove(getCacheFileID(cfg, id)) -} diff --git a/tpl/data/cache_test.go b/tpl/data/cache_test.go deleted file mode 100644 index 6057f0321..000000000 --- a/tpl/data/cache_test.go +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2017 The Hugo Authors. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package data - -import ( - "fmt" - "testing" - - "github.com/spf13/afero" - "github.com/spf13/viper" - "github.com/stretchr/testify/assert" -) - -func TestCache(t *testing.T) { - t.Parallel() - - fs := new(afero.MemMapFs) - - for i, test := range []struct { - path string - content []byte - ignore bool - }{ - {"http://Foo.Bar/foo_Bar-Foo", []byte(`T€st Content 123`), false}, - {"fOO,bar:foo%bAR", []byte(`T€st Content 123 fOO,bar:foo%bAR`), false}, - {"FOo/BaR.html", []byte(`FOo/BaR.html T€st Content 123`), false}, - {"трям/трям", []byte(`T€st трям/трям Content 123`), false}, - {"은행", []byte(`T€st C은행ontent 123`), false}, - {"Банковский кассир", []byte(`Банковский кассир T€st Content 123`), false}, - {"Банковский кассир", []byte(`Банковский кассир T€st Content 456`), true}, - } { - msg := fmt.Sprintf("Test #%d: %v", i, test) - - cfg := viper.New() - - c, err := getCache(test.path, fs, cfg, test.ignore) - assert.NoError(t, err, msg) - assert.Nil(t, c, msg) - - err = writeCache(test.path, test.content, fs, cfg, test.ignore) - assert.NoError(t, err, msg) - - c, err = getCache(test.path, fs, cfg, test.ignore) - assert.NoError(t, err, msg) - - if test.ignore { - assert.Nil(t, c, msg) - } else { - assert.Equal(t, string(test.content), string(c)) - } - } -} diff --git a/tpl/data/data.go b/tpl/data/data.go index cecce4b45..61de2f72e 100644 --- a/tpl/data/data.go +++ b/tpl/data/data.go @@ -20,17 +20,20 @@ import ( "errors" "net/http" "strings" - "time" + "github.com/gohugoio/hugo/cache/filecache" "github.com/gohugoio/hugo/deps" _errors "github.com/pkg/errors" ) // New returns a new instance of the data-namespaced template functions. func New(deps *deps.Deps) *Namespace { + return &Namespace{ - deps: deps, - client: http.DefaultClient, + deps: deps, + cacheGetCSV: deps.FileCaches.GetCSVCache(), + cacheGetJSON: deps.FileCaches.GetJSONCache(), + client: http.DefaultClient, } } @@ -38,6 +41,9 @@ func New(deps *deps.Deps) *Namespace { type Namespace struct { deps *deps.Deps + cacheGetJSON *filecache.Cache + cacheGetCSV *filecache.Cache + client *http.Client } @@ -48,40 +54,34 @@ type Namespace struct { // GetCSV returns nil or a slice slice to use in a short code. func (ns *Namespace) GetCSV(sep string, urlParts ...string) (d [][]string, err error) { url := strings.Join(urlParts, "") + cache := ns.cacheGetCSV - var clearCacheSleep = func(i int, u string) { - ns.deps.Log.INFO.Printf("Retry #%d for %s and sleeping for %s", i, url, resSleep) - time.Sleep(resSleep) - deleteCache(url, ns.deps.Fs.Source, ns.deps.Cfg) - } - - for i := 0; i <= resRetries; i++ { - var req *http.Request - req, err = http.NewRequest("GET", url, nil) - if err != nil { - return nil, _errors.Wrapf(err, "failed to create request for getCSV for resource %s", url) + unmarshal := func(b []byte) (error, bool) { + if !bytes.Contains(b, []byte(sep)) { + return _errors.Errorf("cannot find separator %s in CSV for %s", sep, url), false } - req.Header.Add("Accept", "text/csv") - req.Header.Add("Accept", "text/plain") + if d, err = parseCSV(b, sep); err != nil { + err = _errors.Wrapf(err, "failed to parse CSV file %s", url) - var c []byte - c, err = ns.getResource(req) - if err != nil { - return nil, _errors.Wrapf(err, "failed to read CSV resource %q", url) + return err, true } - if !bytes.Contains(c, []byte(sep)) { - return nil, _errors.Errorf("cannot find separator %s in CSV for %s", sep, url) - } + return nil, false + } - if d, err = parseCSV(c, sep); err != nil { - err = _errors.Wrapf(err, "failed to parse CSV file %s", url) + var req *http.Request + req, err = http.NewRequest("GET", url, nil) + if err != nil { + return nil, _errors.Wrapf(err, "failed to create request for getCSV for resource %s", url) + } - clearCacheSleep(i, url) - continue - } - break + req.Header.Add("Accept", "text/csv") + req.Header.Add("Accept", "text/plain") + + err = ns.getResource(cache, unmarshal, req) + if err != nil { + return nil, _errors.Wrapf(err, "failed to read CSV resource %q", url) } return @@ -90,38 +90,34 @@ func (ns *Namespace) GetCSV(sep string, urlParts ...string) (d [][]string, err e // GetJSON expects one or n-parts of a URL to a resource which can either be a local or a remote one. // If you provide multiple parts they will be joined together to the final URL. // GetJSON returns nil or parsed JSON to use in a short code. -func (ns *Namespace) GetJSON(urlParts ...string) (v interface{}, err error) { +func (ns *Namespace) GetJSON(urlParts ...string) (interface{}, error) { + var v interface{} url := strings.Join(urlParts, "") + cache := ns.cacheGetJSON - for i := 0; i <= resRetries; i++ { - var req *http.Request - req, err = http.NewRequest("GET", url, nil) - if err != nil { - return nil, _errors.Wrapf(err, "Failed to create request for getJSON resource %s", url) - } - - req.Header.Add("Accept", "application/json") + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, _errors.Wrapf(err, "Failed to create request for getJSON resource %s", url) + } - var c []byte - c, err = ns.getResource(req) + unmarshal := func(b []byte) (error, bool) { + err := json.Unmarshal(b, &v) if err != nil { - return nil, _errors.Wrapf(err, "failed to get getJSON resource %q", url) + return err, true } - err = json.Unmarshal(c, &v) - if err != nil { - ns.deps.Log.INFO.Printf("Cannot read JSON from resource %s: %s", url, err) - ns.deps.Log.INFO.Printf("Retry #%d for %s and sleeping for %s", i, url, resSleep) - time.Sleep(resSleep) - deleteCache(url, ns.deps.Fs.Source, ns.deps.Cfg) - continue - } - break + return nil, false } + req.Header.Add("Accept", "application/json") + + err = ns.getResource(cache, unmarshal, req) + if err != nil { return nil, _errors.Wrapf(err, "failed to get getJSON resource %q", url) } - return + + return v, nil + } // parseCSV parses bytes of CSV data into a slice slice string or an error diff --git a/tpl/data/init_test.go b/tpl/data/init_test.go index 6bb689a95..c4751e892 100644 --- a/tpl/data/init_test.go +++ b/tpl/data/init_test.go @@ -16,8 +16,8 @@ package data import ( "testing" - "github.com/gohugoio/hugo/deps" "github.com/gohugoio/hugo/tpl/internal" + "github.com/spf13/viper" "github.com/stretchr/testify/require" ) @@ -25,8 +25,11 @@ func TestInit(t *testing.T) { var found bool var ns *internal.TemplateFuncsNamespace + v := viper.New() + v.Set("contentDir", "content") + for _, nsf := range internal.TemplateFuncsNamespaceRegistry { - ns = nsf(&deps.Deps{}) + ns = nsf(newDeps(v)) if ns.Name == name { found = true break diff --git a/tpl/data/resources.go b/tpl/data/resources.go index 11c35f9d9..8b246a662 100644 --- a/tpl/data/resources.go +++ b/tpl/data/resources.go @@ -14,102 +14,81 @@ package data import ( - "fmt" "io/ioutil" "net/http" "path/filepath" - "sync" "time" + "github.com/pkg/errors" + + "github.com/gohugoio/hugo/cache/filecache" + "github.com/gohugoio/hugo/config" "github.com/gohugoio/hugo/helpers" "github.com/spf13/afero" - jww "github.com/spf13/jwalterweatherman" ) var ( - remoteURLLock = &remoteLock{m: make(map[string]*sync.Mutex)} - resSleep = time.Second * 2 // if JSON decoding failed sleep for n seconds before retrying - resRetries = 1 // number of retries to load the JSON from URL or local file system + resSleep = time.Second * 2 // if JSON decoding failed sleep for n seconds before retrying + resRetries = 1 // number of retries to load the JSON from URL ) -type remoteLock struct { - sync.RWMutex - m map[string]*sync.Mutex -} - -// URLLock locks an URL during download -func (l *remoteLock) URLLock(url string) { - var ( - lock *sync.Mutex - ok bool - ) - l.Lock() - if lock, ok = l.m[url]; !ok { - lock = &sync.Mutex{} - l.m[url] = lock - } - l.Unlock() - lock.Lock() -} - -// URLUnlock unlocks an URL when the download has been finished. Use only in defer calls. -func (l *remoteLock) URLUnlock(url string) { - l.RLock() - defer l.RUnlock() - if um, ok := l.m[url]; ok { - um.Unlock() - } -} - // getRemote loads the content of a remote file. This method is thread safe. -func getRemote(req *http.Request, fs afero.Fs, cfg config.Provider, hc *http.Client) ([]byte, error) { +func (ns *Namespace) getRemote(cache *filecache.Cache, unmarshal func([]byte) (error, bool), req *http.Request) error { url := req.URL.String() + id := helpers.MD5String(url) + var handled bool + var retry bool + + _, b, err := cache.GetOrCreateBytes(id, func() ([]byte, error) { + var err error + handled = true + for i := 0; i <= resRetries; i++ { + ns.deps.Log.INFO.Printf("Downloading: %s ...", url) + var res *http.Response + res, err = ns.client.Do(req) + if err != nil { + return nil, err + } + + if isHTTPError(res) { + return nil, errors.Errorf("Failed to retrieve remote file: %s", http.StatusText(res.StatusCode)) + } + + var b []byte + b, err = ioutil.ReadAll(res.Body) + + if err != nil { + return nil, err + } + res.Body.Close() + + err, retry = unmarshal(b) + + if err == nil { + // Return it so it can be cached. + return b, nil + } + + if !retry { + return nil, err + } + + ns.deps.Log.INFO.Printf("Cannot read remote resource %s: %s", url, err) + ns.deps.Log.INFO.Printf("Retry #%d for %s and sleeping for %s", i+1, url, resSleep) + time.Sleep(resSleep) + } - c, err := getCache(url, fs, cfg, cfg.GetBool("ignoreCache")) - if err != nil { return nil, err - } - if c != nil { - return c, nil - } - // avoid race condition with locks, block other goroutines if the current url is processing - remoteURLLock.URLLock(url) - defer func() { remoteURLLock.URLUnlock(url) }() + }) - // avoid multiple locks due to calling getCache twice - c, err = getCache(url, fs, cfg, cfg.GetBool("ignoreCache")) - if err != nil { - return nil, err - } - if c != nil { - return c, nil + if !handled { + // This is cached content and should be correct. + err, _ = unmarshal(b) } - jww.INFO.Printf("Downloading: %s ...", url) - res, err := hc.Do(req) - if err != nil { - return nil, err - } - - if res.StatusCode < 200 || res.StatusCode > 299 { - return nil, fmt.Errorf("Failed to retrieve remote file: %s", http.StatusText(res.StatusCode)) - } - - c, err = ioutil.ReadAll(res.Body) - res.Body.Close() - if err != nil { - return nil, err - } - - err = writeCache(url, c, fs, cfg, cfg.GetBool("ignoreCache")) - if err != nil { - return nil, err - } - - jww.INFO.Printf("... and cached to: %s", getCacheFileID(cfg, url)) - return c, nil + return err } // getLocal loads the content of a local file @@ -123,12 +102,22 @@ func getLocal(url string, fs afero.Fs, cfg config.Provider) ([]byte, error) { } -// getResource loads the content of a local or remote file -func (ns *Namespace) getResource(req *http.Request) ([]byte, error) { +// getResource loads the content of a local or remote file and returns its content and the +// cache ID used, if relevant. +func (ns *Namespace) getResource(cache *filecache.Cache, unmarshal func(b []byte) (error, bool), req *http.Request) error { switch req.URL.Scheme { case "": - return getLocal(req.URL.String(), ns.deps.Fs.Source, ns.deps.Cfg) + b, err := getLocal(req.URL.String(), ns.deps.Fs.Source, ns.deps.Cfg) + if err != nil { + return err + } + err, _ = unmarshal(b) + return err default: - return getRemote(req, ns.deps.Fs.Source, ns.deps.Cfg, ns.client) + return ns.getRemote(cache, unmarshal, req) } } + +func isHTTPError(res *http.Response) bool { + return res.StatusCode < 200 || res.StatusCode > 299 +} diff --git a/tpl/data/resources_test.go b/tpl/data/resources_test.go index c1da36d05..54eb123ee 100644 --- a/tpl/data/resources_test.go +++ b/tpl/data/resources_test.go @@ -23,6 +23,9 @@ import ( "testing" "time" + "github.com/gohugoio/hugo/hugolib/paths" + + "github.com/gohugoio/hugo/cache/filecache" "github.com/gohugoio/hugo/common/loggers" "github.com/gohugoio/hugo/config" "github.com/gohugoio/hugo/deps" @@ -85,16 +88,16 @@ func getTestServer(handler func(w http.ResponseWriter, r *http.Request)) (*httpt func TestScpGetRemote(t *testing.T) { t.Parallel() fs := new(afero.MemMapFs) + cache := filecache.NewCache(fs, 100) tests := []struct { path string content []byte - ignore bool }{ - {"http://Foo.Bar/foo_Bar-Foo", []byte(`T€st Content 123`), false}, - {"http://Doppel.Gänger/foo_Bar-Foo", []byte(`T€st Cont€nt 123`), false}, - {"http://Doppel.Gänger/Fizz_Bazz-Foo", []byte(`T€st Банковский кассир Cont€nt 123`), false}, - {"http://Doppel.Gänger/Fizz_Bazz-Bar", []byte(`T€st Банковский кассир Cont€nt 456`), true}, + {"http://Foo.Bar/foo_Bar-Foo", []byte(`T€st Content 123`)}, + {"http://Doppel.Gänger/foo_Bar-Foo", []byte(`T€st Cont€nt 123`)}, + {"http://Doppel.Gänger/Fizz_Bazz-Foo", []byte(`T€st Банковский кассир Cont€nt 123`)}, + {"http://Doppel.Gänger/Fizz_Bazz-Bar", []byte(`T€st Банковский кассир Cont€nt 456`)}, } for _, test := range tests { @@ -108,53 +111,64 @@ func TestScpGetRemote(t *testing.T) { }) defer func() { srv.Close() }() - cfg := viper.New() + ns := newTestNs() + ns.client = cl - c, err := getRemote(req, fs, cfg, cl) - require.NoError(t, err, msg) - assert.Equal(t, string(test.content), string(c)) + var c []byte + f := func(b []byte) (error, bool) { + c = b + return nil, false + } - c, err = getCache(req.URL.String(), fs, cfg, test.ignore) + err = ns.getRemote(cache, f, req) require.NoError(t, err, msg) + assert.Equal(t, string(test.content), string(c)) - if test.ignore { - assert.Empty(t, c, msg) - } else { - assert.Equal(t, string(test.content), string(c)) + assert.Equal(t, string(test.content), string(c)) - } } } func TestScpGetRemoteParallel(t *testing.T) { t.Parallel() - ns := newTestNs() - content := []byte(`T€st Content 123`) srv, cl := getTestServer(func(w http.ResponseWriter, r *http.Request) { w.Write(content) }) + defer func() { srv.Close() }() url := "http://Foo.Bar/foo_Bar-Foo" req, err := http.NewRequest("GET", url, nil) require.NoError(t, err) - for _, ignoreCache := range []bool{false, true} { + for _, ignoreCache := range []bool{false} { cfg := viper.New() cfg.Set("ignoreCache", ignoreCache) + cfg.Set("contentDir", "content") + + ns := New(newDeps(cfg)) + ns.client = cl var wg sync.WaitGroup - for i := 0; i < 50; i++ { + for i := 0; i < 1; i++ { wg.Add(1) go func(gor int) { defer wg.Done() for j := 0; j < 10; j++ { - c, err := getRemote(req, ns.deps.Fs.Source, ns.deps.Cfg, cl) + var c []byte + f := func(b []byte) (error, bool) { + c = b + return nil, false + } + err := ns.getRemote(ns.cacheGetJSON, f, req) + assert.NoError(t, err) - assert.Equal(t, string(content), string(c)) + if string(content) != string(c) { + t.Fatalf("expected\n%q\ngot\n%q", content, c) + } time.Sleep(23 * time.Millisecond) } @@ -173,11 +187,16 @@ func newDeps(cfg config.Provider) *deps.Deps { panic(err) } + fs := hugofs.NewMem(l) logger := loggers.NewErrorLogger() + p, _ := paths.New(fs, cfg) + + fileCaches, _ := filecache.NewCachesFromPaths(p) return &deps.Deps{ Cfg: cfg, - Fs: hugofs.NewMem(l), + Fs: fs, + FileCaches: fileCaches, ContentSpec: cs, Log: logger, DistinctErrorLog: helpers.NewDistinctLogger(logger.ERROR), |