Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/gohugoio/hugo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tpl/data
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2018-11-08 12:24:13 +0300
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2018-11-13 16:19:42 +0300
commitf7aeaa61291dd75f92901bcbeecc7fce07a28dec (patch)
tree409c03e259a38fce2beeab46655fd5108c84cd5c /tpl/data
parent7d78a2afd3c4a6c4af77a4ddcbd2a82f15986048 (diff)
Add a consolidated file cache
This commits reworks how file caching is performed in Hugo. Now there is only one way, and it can be configured. This is the default configuration: ```toml [caches] [caches.getjson] dir = ":cacheDir" maxAge = -1 [caches.getcsv] dir = ":cacheDir" maxAge = -1 [caches.images] dir = ":resourceDir/_gen" maxAge = -1 [caches.assets] dir = ":resourceDir/_gen" maxAge = -1 ``` You can override any of these cache setting in your own `config.toml`. The placeholders explained: `:cacheDir`: This is the value of the `cacheDir` config option if set (can also be set via OS env variable `HUGO_CACHEDIR`). It will fall back to `/opt/build/cache/hugo_cache/` on Netlify, or a `hugo_cache` directory below the OS temp dir for the others. `:resourceDir`: This is the value of the `resourceDir` config option. `maxAge` is the time in seconds before a cache entry will be evicted, -1 means forever and 0 effectively turns that particular cache off. This means that if you run your builds on Netlify, all caches configured with `:cacheDir` will be saved and restored on the next build. For other CI vendors, please read their documentation. For an CircleCI example, see https://github.com/bep/hugo-sass-test/blob/6c3960a8f4b90e8938228688bc49bdcdd6b2d99e/.circleci/config.yml Fixes #5404
Diffstat (limited to 'tpl/data')
-rw-r--r--tpl/data/cache.go85
-rw-r--r--tpl/data/cache_test.go63
-rw-r--r--tpl/data/data.go98
-rw-r--r--tpl/data/init_test.go7
-rw-r--r--tpl/data/resources.go147
-rw-r--r--tpl/data/resources_test.go63
6 files changed, 161 insertions, 302 deletions
diff --git a/tpl/data/cache.go b/tpl/data/cache.go
deleted file mode 100644
index 6c4033160..000000000
--- a/tpl/data/cache.go
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2017 The Hugo Authors. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package data
-
-import (
- "crypto/md5"
- "encoding/hex"
- "errors"
- "sync"
-
- "github.com/gohugoio/hugo/config"
- "github.com/gohugoio/hugo/helpers"
- "github.com/spf13/afero"
-)
-
-var cacheMu sync.RWMutex
-
-// getCacheFileID returns the cache ID for a string.
-func getCacheFileID(cfg config.Provider, id string) string {
- hash := md5.Sum([]byte(id))
- return cfg.GetString("cacheDir") + hex.EncodeToString(hash[:])
-}
-
-// getCache returns the content for an ID from the file cache or an error.
-// If the ID is not found, return nil,nil.
-func getCache(id string, fs afero.Fs, cfg config.Provider, ignoreCache bool) ([]byte, error) {
- if ignoreCache {
- return nil, nil
- }
-
- cacheMu.RLock()
- defer cacheMu.RUnlock()
-
- fID := getCacheFileID(cfg, id)
- isExists, err := helpers.Exists(fID, fs)
- if err != nil {
- return nil, err
- }
- if !isExists {
- return nil, nil
- }
-
- return afero.ReadFile(fs, fID)
-}
-
-// writeCache writes bytes associated with an ID into the file cache.
-func writeCache(id string, c []byte, fs afero.Fs, cfg config.Provider, ignoreCache bool) error {
- if ignoreCache {
- return nil
- }
-
- cacheMu.Lock()
- defer cacheMu.Unlock()
-
- fID := getCacheFileID(cfg, id)
- f, err := fs.Create(fID)
- if err != nil {
- return errors.New("Error: " + err.Error() + ". Failed to create file: " + fID)
- }
- defer f.Close()
-
- n, err := f.Write(c)
- if err != nil {
- return errors.New("Error: " + err.Error() + ". Failed to write to file: " + fID)
- }
- if n == 0 {
- return errors.New("No bytes written to file: " + fID)
- }
- return nil
-}
-
-func deleteCache(id string, fs afero.Fs, cfg config.Provider) error {
- return fs.Remove(getCacheFileID(cfg, id))
-}
diff --git a/tpl/data/cache_test.go b/tpl/data/cache_test.go
deleted file mode 100644
index 6057f0321..000000000
--- a/tpl/data/cache_test.go
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2017 The Hugo Authors. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package data
-
-import (
- "fmt"
- "testing"
-
- "github.com/spf13/afero"
- "github.com/spf13/viper"
- "github.com/stretchr/testify/assert"
-)
-
-func TestCache(t *testing.T) {
- t.Parallel()
-
- fs := new(afero.MemMapFs)
-
- for i, test := range []struct {
- path string
- content []byte
- ignore bool
- }{
- {"http://Foo.Bar/foo_Bar-Foo", []byte(`T€st Content 123`), false},
- {"fOO,bar:foo%bAR", []byte(`T€st Content 123 fOO,bar:foo%bAR`), false},
- {"FOo/BaR.html", []byte(`FOo/BaR.html T€st Content 123`), false},
- {"трям/трям", []byte(`T€st трям/трям Content 123`), false},
- {"은행", []byte(`T€st C은행ontent 123`), false},
- {"Банковский кассир", []byte(`Банковский кассир T€st Content 123`), false},
- {"Банковский кассир", []byte(`Банковский кассир T€st Content 456`), true},
- } {
- msg := fmt.Sprintf("Test #%d: %v", i, test)
-
- cfg := viper.New()
-
- c, err := getCache(test.path, fs, cfg, test.ignore)
- assert.NoError(t, err, msg)
- assert.Nil(t, c, msg)
-
- err = writeCache(test.path, test.content, fs, cfg, test.ignore)
- assert.NoError(t, err, msg)
-
- c, err = getCache(test.path, fs, cfg, test.ignore)
- assert.NoError(t, err, msg)
-
- if test.ignore {
- assert.Nil(t, c, msg)
- } else {
- assert.Equal(t, string(test.content), string(c))
- }
- }
-}
diff --git a/tpl/data/data.go b/tpl/data/data.go
index cecce4b45..61de2f72e 100644
--- a/tpl/data/data.go
+++ b/tpl/data/data.go
@@ -20,17 +20,20 @@ import (
"errors"
"net/http"
"strings"
- "time"
+ "github.com/gohugoio/hugo/cache/filecache"
"github.com/gohugoio/hugo/deps"
_errors "github.com/pkg/errors"
)
// New returns a new instance of the data-namespaced template functions.
func New(deps *deps.Deps) *Namespace {
+
return &Namespace{
- deps: deps,
- client: http.DefaultClient,
+ deps: deps,
+ cacheGetCSV: deps.FileCaches.GetCSVCache(),
+ cacheGetJSON: deps.FileCaches.GetJSONCache(),
+ client: http.DefaultClient,
}
}
@@ -38,6 +41,9 @@ func New(deps *deps.Deps) *Namespace {
type Namespace struct {
deps *deps.Deps
+ cacheGetJSON *filecache.Cache
+ cacheGetCSV *filecache.Cache
+
client *http.Client
}
@@ -48,40 +54,34 @@ type Namespace struct {
// GetCSV returns nil or a slice slice to use in a short code.
func (ns *Namespace) GetCSV(sep string, urlParts ...string) (d [][]string, err error) {
url := strings.Join(urlParts, "")
+ cache := ns.cacheGetCSV
- var clearCacheSleep = func(i int, u string) {
- ns.deps.Log.INFO.Printf("Retry #%d for %s and sleeping for %s", i, url, resSleep)
- time.Sleep(resSleep)
- deleteCache(url, ns.deps.Fs.Source, ns.deps.Cfg)
- }
-
- for i := 0; i <= resRetries; i++ {
- var req *http.Request
- req, err = http.NewRequest("GET", url, nil)
- if err != nil {
- return nil, _errors.Wrapf(err, "failed to create request for getCSV for resource %s", url)
+ unmarshal := func(b []byte) (error, bool) {
+ if !bytes.Contains(b, []byte(sep)) {
+ return _errors.Errorf("cannot find separator %s in CSV for %s", sep, url), false
}
- req.Header.Add("Accept", "text/csv")
- req.Header.Add("Accept", "text/plain")
+ if d, err = parseCSV(b, sep); err != nil {
+ err = _errors.Wrapf(err, "failed to parse CSV file %s", url)
- var c []byte
- c, err = ns.getResource(req)
- if err != nil {
- return nil, _errors.Wrapf(err, "failed to read CSV resource %q", url)
+ return err, true
}
- if !bytes.Contains(c, []byte(sep)) {
- return nil, _errors.Errorf("cannot find separator %s in CSV for %s", sep, url)
- }
+ return nil, false
+ }
- if d, err = parseCSV(c, sep); err != nil {
- err = _errors.Wrapf(err, "failed to parse CSV file %s", url)
+ var req *http.Request
+ req, err = http.NewRequest("GET", url, nil)
+ if err != nil {
+ return nil, _errors.Wrapf(err, "failed to create request for getCSV for resource %s", url)
+ }
- clearCacheSleep(i, url)
- continue
- }
- break
+ req.Header.Add("Accept", "text/csv")
+ req.Header.Add("Accept", "text/plain")
+
+ err = ns.getResource(cache, unmarshal, req)
+ if err != nil {
+ return nil, _errors.Wrapf(err, "failed to read CSV resource %q", url)
}
return
@@ -90,38 +90,34 @@ func (ns *Namespace) GetCSV(sep string, urlParts ...string) (d [][]string, err e
// GetJSON expects one or n-parts of a URL to a resource which can either be a local or a remote one.
// If you provide multiple parts they will be joined together to the final URL.
// GetJSON returns nil or parsed JSON to use in a short code.
-func (ns *Namespace) GetJSON(urlParts ...string) (v interface{}, err error) {
+func (ns *Namespace) GetJSON(urlParts ...string) (interface{}, error) {
+ var v interface{}
url := strings.Join(urlParts, "")
+ cache := ns.cacheGetJSON
- for i := 0; i <= resRetries; i++ {
- var req *http.Request
- req, err = http.NewRequest("GET", url, nil)
- if err != nil {
- return nil, _errors.Wrapf(err, "Failed to create request for getJSON resource %s", url)
- }
-
- req.Header.Add("Accept", "application/json")
+ req, err := http.NewRequest("GET", url, nil)
+ if err != nil {
+ return nil, _errors.Wrapf(err, "Failed to create request for getJSON resource %s", url)
+ }
- var c []byte
- c, err = ns.getResource(req)
+ unmarshal := func(b []byte) (error, bool) {
+ err := json.Unmarshal(b, &v)
if err != nil {
- return nil, _errors.Wrapf(err, "failed to get getJSON resource %q", url)
+ return err, true
}
- err = json.Unmarshal(c, &v)
- if err != nil {
- ns.deps.Log.INFO.Printf("Cannot read JSON from resource %s: %s", url, err)
- ns.deps.Log.INFO.Printf("Retry #%d for %s and sleeping for %s", i, url, resSleep)
- time.Sleep(resSleep)
- deleteCache(url, ns.deps.Fs.Source, ns.deps.Cfg)
- continue
- }
- break
+ return nil, false
}
+ req.Header.Add("Accept", "application/json")
+
+ err = ns.getResource(cache, unmarshal, req)
+
if err != nil {
return nil, _errors.Wrapf(err, "failed to get getJSON resource %q", url)
}
- return
+
+ return v, nil
+
}
// parseCSV parses bytes of CSV data into a slice slice string or an error
diff --git a/tpl/data/init_test.go b/tpl/data/init_test.go
index 6bb689a95..c4751e892 100644
--- a/tpl/data/init_test.go
+++ b/tpl/data/init_test.go
@@ -16,8 +16,8 @@ package data
import (
"testing"
- "github.com/gohugoio/hugo/deps"
"github.com/gohugoio/hugo/tpl/internal"
+ "github.com/spf13/viper"
"github.com/stretchr/testify/require"
)
@@ -25,8 +25,11 @@ func TestInit(t *testing.T) {
var found bool
var ns *internal.TemplateFuncsNamespace
+ v := viper.New()
+ v.Set("contentDir", "content")
+
for _, nsf := range internal.TemplateFuncsNamespaceRegistry {
- ns = nsf(&deps.Deps{})
+ ns = nsf(newDeps(v))
if ns.Name == name {
found = true
break
diff --git a/tpl/data/resources.go b/tpl/data/resources.go
index 11c35f9d9..8b246a662 100644
--- a/tpl/data/resources.go
+++ b/tpl/data/resources.go
@@ -14,102 +14,81 @@
package data
import (
- "fmt"
"io/ioutil"
"net/http"
"path/filepath"
- "sync"
"time"
+ "github.com/pkg/errors"
+
+ "github.com/gohugoio/hugo/cache/filecache"
+
"github.com/gohugoio/hugo/config"
"github.com/gohugoio/hugo/helpers"
"github.com/spf13/afero"
- jww "github.com/spf13/jwalterweatherman"
)
var (
- remoteURLLock = &remoteLock{m: make(map[string]*sync.Mutex)}
- resSleep = time.Second * 2 // if JSON decoding failed sleep for n seconds before retrying
- resRetries = 1 // number of retries to load the JSON from URL or local file system
+ resSleep = time.Second * 2 // if JSON decoding failed sleep for n seconds before retrying
+ resRetries = 1 // number of retries to load the JSON from URL
)
-type remoteLock struct {
- sync.RWMutex
- m map[string]*sync.Mutex
-}
-
-// URLLock locks an URL during download
-func (l *remoteLock) URLLock(url string) {
- var (
- lock *sync.Mutex
- ok bool
- )
- l.Lock()
- if lock, ok = l.m[url]; !ok {
- lock = &sync.Mutex{}
- l.m[url] = lock
- }
- l.Unlock()
- lock.Lock()
-}
-
-// URLUnlock unlocks an URL when the download has been finished. Use only in defer calls.
-func (l *remoteLock) URLUnlock(url string) {
- l.RLock()
- defer l.RUnlock()
- if um, ok := l.m[url]; ok {
- um.Unlock()
- }
-}
-
// getRemote loads the content of a remote file. This method is thread safe.
-func getRemote(req *http.Request, fs afero.Fs, cfg config.Provider, hc *http.Client) ([]byte, error) {
+func (ns *Namespace) getRemote(cache *filecache.Cache, unmarshal func([]byte) (error, bool), req *http.Request) error {
url := req.URL.String()
+ id := helpers.MD5String(url)
+ var handled bool
+ var retry bool
+
+ _, b, err := cache.GetOrCreateBytes(id, func() ([]byte, error) {
+ var err error
+ handled = true
+ for i := 0; i <= resRetries; i++ {
+ ns.deps.Log.INFO.Printf("Downloading: %s ...", url)
+ var res *http.Response
+ res, err = ns.client.Do(req)
+ if err != nil {
+ return nil, err
+ }
+
+ if isHTTPError(res) {
+ return nil, errors.Errorf("Failed to retrieve remote file: %s", http.StatusText(res.StatusCode))
+ }
+
+ var b []byte
+ b, err = ioutil.ReadAll(res.Body)
+
+ if err != nil {
+ return nil, err
+ }
+ res.Body.Close()
+
+ err, retry = unmarshal(b)
+
+ if err == nil {
+ // Return it so it can be cached.
+ return b, nil
+ }
+
+ if !retry {
+ return nil, err
+ }
+
+ ns.deps.Log.INFO.Printf("Cannot read remote resource %s: %s", url, err)
+ ns.deps.Log.INFO.Printf("Retry #%d for %s and sleeping for %s", i+1, url, resSleep)
+ time.Sleep(resSleep)
+ }
- c, err := getCache(url, fs, cfg, cfg.GetBool("ignoreCache"))
- if err != nil {
return nil, err
- }
- if c != nil {
- return c, nil
- }
- // avoid race condition with locks, block other goroutines if the current url is processing
- remoteURLLock.URLLock(url)
- defer func() { remoteURLLock.URLUnlock(url) }()
+ })
- // avoid multiple locks due to calling getCache twice
- c, err = getCache(url, fs, cfg, cfg.GetBool("ignoreCache"))
- if err != nil {
- return nil, err
- }
- if c != nil {
- return c, nil
+ if !handled {
+ // This is cached content and should be correct.
+ err, _ = unmarshal(b)
}
- jww.INFO.Printf("Downloading: %s ...", url)
- res, err := hc.Do(req)
- if err != nil {
- return nil, err
- }
-
- if res.StatusCode < 200 || res.StatusCode > 299 {
- return nil, fmt.Errorf("Failed to retrieve remote file: %s", http.StatusText(res.StatusCode))
- }
-
- c, err = ioutil.ReadAll(res.Body)
- res.Body.Close()
- if err != nil {
- return nil, err
- }
-
- err = writeCache(url, c, fs, cfg, cfg.GetBool("ignoreCache"))
- if err != nil {
- return nil, err
- }
-
- jww.INFO.Printf("... and cached to: %s", getCacheFileID(cfg, url))
- return c, nil
+ return err
}
// getLocal loads the content of a local file
@@ -123,12 +102,22 @@ func getLocal(url string, fs afero.Fs, cfg config.Provider) ([]byte, error) {
}
-// getResource loads the content of a local or remote file
-func (ns *Namespace) getResource(req *http.Request) ([]byte, error) {
+// getResource loads the content of a local or remote file and returns its content and the
+// cache ID used, if relevant.
+func (ns *Namespace) getResource(cache *filecache.Cache, unmarshal func(b []byte) (error, bool), req *http.Request) error {
switch req.URL.Scheme {
case "":
- return getLocal(req.URL.String(), ns.deps.Fs.Source, ns.deps.Cfg)
+ b, err := getLocal(req.URL.String(), ns.deps.Fs.Source, ns.deps.Cfg)
+ if err != nil {
+ return err
+ }
+ err, _ = unmarshal(b)
+ return err
default:
- return getRemote(req, ns.deps.Fs.Source, ns.deps.Cfg, ns.client)
+ return ns.getRemote(cache, unmarshal, req)
}
}
+
+func isHTTPError(res *http.Response) bool {
+ return res.StatusCode < 200 || res.StatusCode > 299
+}
diff --git a/tpl/data/resources_test.go b/tpl/data/resources_test.go
index c1da36d05..54eb123ee 100644
--- a/tpl/data/resources_test.go
+++ b/tpl/data/resources_test.go
@@ -23,6 +23,9 @@ import (
"testing"
"time"
+ "github.com/gohugoio/hugo/hugolib/paths"
+
+ "github.com/gohugoio/hugo/cache/filecache"
"github.com/gohugoio/hugo/common/loggers"
"github.com/gohugoio/hugo/config"
"github.com/gohugoio/hugo/deps"
@@ -85,16 +88,16 @@ func getTestServer(handler func(w http.ResponseWriter, r *http.Request)) (*httpt
func TestScpGetRemote(t *testing.T) {
t.Parallel()
fs := new(afero.MemMapFs)
+ cache := filecache.NewCache(fs, 100)
tests := []struct {
path string
content []byte
- ignore bool
}{
- {"http://Foo.Bar/foo_Bar-Foo", []byte(`T€st Content 123`), false},
- {"http://Doppel.Gänger/foo_Bar-Foo", []byte(`T€st Cont€nt 123`), false},
- {"http://Doppel.Gänger/Fizz_Bazz-Foo", []byte(`T€st Банковский кассир Cont€nt 123`), false},
- {"http://Doppel.Gänger/Fizz_Bazz-Bar", []byte(`T€st Банковский кассир Cont€nt 456`), true},
+ {"http://Foo.Bar/foo_Bar-Foo", []byte(`T€st Content 123`)},
+ {"http://Doppel.Gänger/foo_Bar-Foo", []byte(`T€st Cont€nt 123`)},
+ {"http://Doppel.Gänger/Fizz_Bazz-Foo", []byte(`T€st Банковский кассир Cont€nt 123`)},
+ {"http://Doppel.Gänger/Fizz_Bazz-Bar", []byte(`T€st Банковский кассир Cont€nt 456`)},
}
for _, test := range tests {
@@ -108,53 +111,64 @@ func TestScpGetRemote(t *testing.T) {
})
defer func() { srv.Close() }()
- cfg := viper.New()
+ ns := newTestNs()
+ ns.client = cl
- c, err := getRemote(req, fs, cfg, cl)
- require.NoError(t, err, msg)
- assert.Equal(t, string(test.content), string(c))
+ var c []byte
+ f := func(b []byte) (error, bool) {
+ c = b
+ return nil, false
+ }
- c, err = getCache(req.URL.String(), fs, cfg, test.ignore)
+ err = ns.getRemote(cache, f, req)
require.NoError(t, err, msg)
+ assert.Equal(t, string(test.content), string(c))
- if test.ignore {
- assert.Empty(t, c, msg)
- } else {
- assert.Equal(t, string(test.content), string(c))
+ assert.Equal(t, string(test.content), string(c))
- }
}
}
func TestScpGetRemoteParallel(t *testing.T) {
t.Parallel()
- ns := newTestNs()
-
content := []byte(`T€st Content 123`)
srv, cl := getTestServer(func(w http.ResponseWriter, r *http.Request) {
w.Write(content)
})
+
defer func() { srv.Close() }()
url := "http://Foo.Bar/foo_Bar-Foo"
req, err := http.NewRequest("GET", url, nil)
require.NoError(t, err)
- for _, ignoreCache := range []bool{false, true} {
+ for _, ignoreCache := range []bool{false} {
cfg := viper.New()
cfg.Set("ignoreCache", ignoreCache)
+ cfg.Set("contentDir", "content")
+
+ ns := New(newDeps(cfg))
+ ns.client = cl
var wg sync.WaitGroup
- for i := 0; i < 50; i++ {
+ for i := 0; i < 1; i++ {
wg.Add(1)
go func(gor int) {
defer wg.Done()
for j := 0; j < 10; j++ {
- c, err := getRemote(req, ns.deps.Fs.Source, ns.deps.Cfg, cl)
+ var c []byte
+ f := func(b []byte) (error, bool) {
+ c = b
+ return nil, false
+ }
+ err := ns.getRemote(ns.cacheGetJSON, f, req)
+
assert.NoError(t, err)
- assert.Equal(t, string(content), string(c))
+ if string(content) != string(c) {
+ t.Fatalf("expected\n%q\ngot\n%q", content, c)
+ }
time.Sleep(23 * time.Millisecond)
}
@@ -173,11 +187,16 @@ func newDeps(cfg config.Provider) *deps.Deps {
panic(err)
}
+ fs := hugofs.NewMem(l)
logger := loggers.NewErrorLogger()
+ p, _ := paths.New(fs, cfg)
+
+ fileCaches, _ := filecache.NewCachesFromPaths(p)
return &deps.Deps{
Cfg: cfg,
- Fs: hugofs.NewMem(l),
+ Fs: fs,
+ FileCaches: fileCaches,
ContentSpec: cs,
Log: logger,
DistinctErrorLog: helpers.NewDistinctLogger(logger.ERROR),