diff options
author | Paul Okstad <pokstad@gitlab.com> | 2019-08-16 14:03:28 +0300 |
---|---|---|
committer | Jacob Vosmaer <jacob@gitlab.com> | 2019-08-16 14:03:28 +0300 |
commit | db21dcb0bef6af6234b073dcfae545d3f1951daf (patch) | |
tree | 1ec42733826958d3889145f23c3ab4b85647e861 | |
parent | 37d86913ab56267e40d2adaf15605a828b13109a (diff) |
Disk cache object directory initial clear
-rw-r--r-- | changelogs/unreleased/po-cache-init-clear.yml | 5 | ||||
-rw-r--r-- | internal/cache/export_test.go | 3 | ||||
-rw-r--r-- | internal/cache/keyer.go | 9 | ||||
-rw-r--r-- | internal/cache/walker.go | 76 | ||||
-rw-r--r-- | internal/cache/walker_test.go | 72 | ||||
-rw-r--r-- | internal/tempdir/tempdir.go | 32 | ||||
-rw-r--r-- | internal/tempdir/tempdir_test.go | 4 |
7 files changed, 167 insertions, 34 deletions
diff --git a/changelogs/unreleased/po-cache-init-clear.yml b/changelogs/unreleased/po-cache-init-clear.yml new file mode 100644 index 000000000..f2f7d1005 --- /dev/null +++ b/changelogs/unreleased/po-cache-init-clear.yml @@ -0,0 +1,5 @@ +--- +title: Disk cache object directory initial clear +merge_request: 1424 +author: +type: performance diff --git a/internal/cache/export_test.go b/internal/cache/export_test.go index cb1f65b67..287072118 100644 --- a/internal/cache/export_test.go +++ b/internal/cache/export_test.go @@ -5,6 +5,9 @@ import "sync" var ( ExportMockRemovalCounter = new(mockCounter) ExportMockCheckCounter = new(mockCounter) + + ExportDisableMoveAndClear = &disableMoveAndClear + ExportDisableWalker = &disableWalker ) type mockCounter struct { diff --git a/internal/cache/keyer.go b/internal/cache/keyer.go index 20acbd414..b8d4609d0 100644 --- a/internal/cache/keyer.go +++ b/internal/cache/keyer.go @@ -213,14 +213,7 @@ func newPendingLease(repo *gitalypb.Repository) (string, error) { // cacheDir is $STORAGE/+gitaly/cache func cacheDir(repo *gitalypb.Repository) (string, error) { - storagePath, err := helper.GetStorageByName(repo.StorageName) - if err != nil { - return "", err - } - - absPath := filepath.Join(storagePath, tempdir.CachePrefix) - - return absPath, nil + return tempdir.CacheDir(repo.GetStorageName()) } func currentLeases(repo *gitalypb.Repository) ([]os.FileInfo, error) { diff --git a/internal/cache/walker.go b/internal/cache/walker.go index 4bbff532d..e70ee8846 100644 --- a/internal/cache/walker.go +++ b/internal/cache/walker.go @@ -6,6 +6,7 @@ package cache import ( + "io/ioutil" "os" "path/filepath" "time" @@ -15,8 +16,11 @@ import ( "gitlab.com/gitlab-org/gitaly/internal/tempdir" ) -func cleanWalk(storagePath string) error { - cachePath := filepath.Join(storagePath, tempdir.CachePrefix) +func cleanWalk(storageName string) error { + cachePath, err := tempdir.CacheDir(storageName) + if err != nil { + return err + } err := filepath.Walk(cachePath, func(path string, info os.FileInfo, err error) error { if err != nil { @@ -40,6 +44,7 @@ func cleanWalk(storagePath string) error { // have deleted the file already return nil } + return err } @@ -58,21 +63,86 @@ func cleanWalk(storagePath string) error { const cleanWalkFrequency = 10 * time.Minute func startCleanWalker(storage config.Storage) { + if disableWalker { + return + } + logrus.WithField("storage", storage.Name).Info("Starting disk cache object walker") walkTick := time.NewTicker(cleanWalkFrequency) go func() { for { - if err := cleanWalk(storage.Path); err != nil { + if err := cleanWalk(storage.Name); err != nil { logrus.WithField("storage", storage.Name).Error(err) } + <-walkTick.C } }() } +var ( + disableMoveAndClear bool // only used to disable move and clear in tests + disableWalker bool // only used to disable object walker in tests +) + +// moveAndClear will move the cache to the storage location's +// temporary folder, and then remove its contents asynchronously +func moveAndClear(storage config.Storage) error { + if disableMoveAndClear { + return nil + } + + logger := logrus.WithField("storage", storage.Name) + logger.Info("clearing disk cache object folder") + + cachePath, err := tempdir.CacheDir(storage.Name) + if err != nil { + return err + } + + tempPath, err := tempdir.TempDir(storage.Name) + if err != nil { + return err + } + + if err := os.MkdirAll(tempPath, 0755); err != nil { + return err + } + + tmpDir, err := ioutil.TempDir(tempPath, "diskcache") + if err != nil { + return err + } + + logger.Infof("moving disk cache object folder to %s", tmpDir) + if err := os.Rename(cachePath, filepath.Join(tmpDir, "moved")); err != nil { + if os.IsNotExist(err) { + logger.Info("disk cache object folder doesn't exist, no need to remove") + return nil + } + + return err + } + + go func() { + start := time.Now() + if err := os.RemoveAll(tmpDir); err != nil { + logger.Errorf("unable to remove disk cache objects: %q", err) + } + + logger.Infof("cleared all cache object files in %s after %s", tmpDir, time.Since(start)) + }() + + return nil +} + func init() { config.RegisterHook(func() error { for _, storage := range config.Config.Storages { + if err := moveAndClear(storage); err != nil { + return err + } + startCleanWalker(storage) } return nil diff --git a/internal/cache/walker_test.go b/internal/cache/walker_test.go index ef6f9b9e3..b3d3044d6 100644 --- a/internal/cache/walker_test.go +++ b/internal/cache/walker_test.go @@ -12,23 +12,12 @@ import ( "gitlab.com/gitlab-org/gitaly/internal/cache" "gitlab.com/gitlab-org/gitaly/internal/config" "gitlab.com/gitlab-org/gitaly/internal/tempdir" + "gitlab.com/gitlab-org/gitaly/internal/testhelper" ) func TestDiskCacheObjectWalker(t *testing.T) { - tmpPath, err := ioutil.TempDir("", t.Name()) - require.NoError(t, err) - defer func() { require.NoError(t, os.RemoveAll(tmpPath)) }() - - oldStorages := config.Config.Storages - config.Config.Storages = []config.Storage{ - { - Name: t.Name(), - Path: tmpPath, - }, - } - defer func() { config.Config.Storages = oldStorages }() - - satisfyConfigValidation(tmpPath) + cleanup := setupDiskCacheWalker(t) + defer cleanup() var shouldExist, shouldNotExist []string @@ -42,7 +31,10 @@ func TestDiskCacheObjectWalker(t *testing.T) { {"2b/ancient", 24 * time.Hour, true}, {"cd/baby", time.Second, false}, } { - path := filepath.Join(tmpPath, tempdir.CachePrefix, tt.name) + cacheDir, err := tempdir.CacheDir(t.Name()) // test name is storage name + require.NoError(t, err) + + path := filepath.Join(cacheDir, tt.name) require.NoError(t, os.MkdirAll(filepath.Dir(path), 0755)) f, err := os.Create(path) @@ -61,6 +53,11 @@ func TestDiskCacheObjectWalker(t *testing.T) { expectChecks := cache.ExportMockCheckCounter.Count() + 4 expectRemovals := cache.ExportMockRemovalCounter.Count() + 2 + // disable the initial move-and-clear function since we are only + // evaluating the walker + *cache.ExportDisableMoveAndClear = true + defer func() { *cache.ExportDisableMoveAndClear = false }() + require.NoError(t, config.Validate()) // triggers walker pollCountersUntil(t, expectChecks, expectRemovals) @@ -75,6 +72,51 @@ func TestDiskCacheObjectWalker(t *testing.T) { } } +func TestDiskCacheInitialClear(t *testing.T) { + cleanup := setupDiskCacheWalker(t) + defer cleanup() + + cacheDir, err := tempdir.CacheDir(t.Name()) // test name is storage name + require.NoError(t, err) + + canary := filepath.Join(cacheDir, "canary.txt") + require.NoError(t, os.MkdirAll(filepath.Dir(canary), 0755)) + require.NoError(t, ioutil.WriteFile(canary, []byte("chirp chirp"), 0755)) + + // disable the background walkers since we are only + // evaluating the initial move-and-clear function + *cache.ExportDisableWalker = true + defer func() { *cache.ExportDisableWalker = false }() + + // validation will run cache walker hook which synchronously + // runs the move-and-clear function + require.NoError(t, config.Validate()) + + testhelper.AssertFileNotExists(t, canary) +} + +func setupDiskCacheWalker(t testing.TB) func() { + tmpPath, err := ioutil.TempDir("", t.Name()) + require.NoError(t, err) + + oldStorages := config.Config.Storages + config.Config.Storages = []config.Storage{ + { + Name: t.Name(), + Path: tmpPath, + }, + } + + satisfyConfigValidation(tmpPath) + + cleanup := func() { + config.Config.Storages = oldStorages + require.NoError(t, os.RemoveAll(tmpPath)) + } + + return cleanup +} + // satisfyConfigValidation puts garbage values in the config file to satisfy // validation func satisfyConfigValidation(tmpPath string) { diff --git a/internal/tempdir/tempdir.go b/internal/tempdir/tempdir.go index 9431673b5..49ce183f1 100644 --- a/internal/tempdir/tempdir.go +++ b/internal/tempdir/tempdir.go @@ -23,13 +23,13 @@ const ( // character is not allowed in GitLab namespaces or repositories. GitalyDataPrefix = "+gitaly" - // TmpRootPrefix is the directory in which we store temporary + // tmpRootPrefix is the directory in which we store temporary // directories. - TmpRootPrefix = GitalyDataPrefix + "/tmp" + tmpRootPrefix = GitalyDataPrefix + "/tmp" - // CachePrefix is the directory where all cache data is stored on a + // cachePrefix is the directory where all cache data is stored on a // storage location. - CachePrefix = GitalyDataPrefix + "/cache" + cachePrefix = GitalyDataPrefix + "/cache" // MaxAge is used by ForDeleteAllRepositories. It is also a fallback // for the context-scoped temporary directories, to ensure they get @@ -37,6 +37,26 @@ const ( MaxAge = 7 * 24 * time.Hour ) +// CacheDir returns the path to the cache dir for a storage location +func CacheDir(storageName string) (string, error) { + storagePath, err := helper.GetStorageByName(storageName) + if err != nil { + return "", err + } + + return filepath.Join(storagePath, cachePrefix), nil +} + +// TempDir returns the path to the temp dir for a storage location +func TempDir(storageName string) (string, error) { + storagePath, err := helper.GetStorageByName(storageName) + if err != nil { + return "", err + } + + return filepath.Join(storagePath, tmpRootPrefix), nil +} + // ForDeleteAllRepositories returns a temporary directory for the given storage. It is not context-scoped but it will get removed eventuall (after MaxAge). func ForDeleteAllRepositories(storageName string) (string, error) { prefix := fmt.Sprintf("%s-repositories.old.%d.", storageName, time.Now().Unix()) @@ -90,7 +110,7 @@ func newAsRepository(ctx context.Context, storageName string, prefix string) (*g } func tmpRoot(storageRoot string) string { - return filepath.Join(storageRoot, TmpRootPrefix) + return filepath.Join(storageRoot, tmpRootPrefix) } // StartCleaning starts tempdir cleanup goroutines. @@ -119,7 +139,7 @@ type invalidCleanRoot string func clean(dir string) error { // If we start "cleaning up" the wrong directory we may delete user data // which is Really Bad. - if !strings.HasSuffix(dir, TmpRootPrefix) { + if !strings.HasSuffix(dir, tmpRootPrefix) { log.Print(dir) panic(invalidCleanRoot("invalid tempdir clean root: panicking to prevent data loss")) } diff --git a/internal/tempdir/tempdir_test.go b/internal/tempdir/tempdir_test.go index 67ef976e8..50a1f36e3 100644 --- a/internal/tempdir/tempdir_test.go +++ b/internal/tempdir/tempdir_test.go @@ -52,7 +52,7 @@ func TestNewAsRepositoryFailStorageUnknown(t *testing.T) { require.Error(t, err) } -var cleanRoot = path.Join("testdata/clean", TmpRootPrefix) +var cleanRoot = path.Join("testdata/clean", tmpRootPrefix) func TestCleanerSafety(t *testing.T) { defer func() { @@ -134,7 +134,7 @@ func makeDir(t *testing.T, dirPath string, mtime time.Time) { func TestCleanNoTmpExists(t *testing.T) { // This directory is valid because it ends in the special prefix - dir := path.Join("testdata", "does-not-exist", TmpRootPrefix) + dir := path.Join("testdata", "does-not-exist", tmpRootPrefix) require.NoError(t, clean(dir)) } |