Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2021-03-24 18:15:49 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2021-03-29 12:46:53 +0300
commitb0451fae5cf4583b871125c57768966abc6020e9 (patch)
treef6c7daf6162948e0ac7625334a72acceef9c23a0
parent96c315848fae9a9a2d3daacfeb5c3f92f7a33131 (diff)
maintenance: Introduce new random filesystem walker
In order to get a random distribution of optimized repositories, the optimization maintenance job does a random walk through all storage repositories. This random walk is implemented ad-hoc via a recursive function which is deeply married with the optimization calls. This makes it hard to extend, reason about and test. To improve the situation, this commit thus introduces a generic random filesystem walker implementation which has the same semantics as the current implementation: given a directory hierarchy, the walker will go down this hierarchy depth-first and, for each directory it encounters, process its entries in a randomized order. The new walker is not yet used.
-rw-r--r--internal/gitaly/maintenance/optimize.go4
-rw-r--r--internal/gitaly/maintenance/randomwalker.go95
-rw-r--r--internal/gitaly/maintenance/randomwalker_test.go225
3 files changed, 320 insertions, 4 deletions
diff --git a/internal/gitaly/maintenance/optimize.go b/internal/gitaly/maintenance/optimize.go
index 19550c57e..69121325c 100644
--- a/internal/gitaly/maintenance/optimize.go
+++ b/internal/gitaly/maintenance/optimize.go
@@ -35,10 +35,6 @@ func shuffledStoragesCopy(randSrc *rand.Rand, storages []config.Storage) []confi
return shuffled
}
-func shuffleFileInfos(randSrc *rand.Rand, s []os.FileInfo) {
- randSrc.Shuffle(len(s), func(i, j int) { s[i], s[j] = s[j], s[i] })
-}
-
// Optimizer knows how to optimize a repository
type Optimizer interface {
OptimizeRepository(context.Context, *gitalypb.OptimizeRepositoryRequest, ...grpc.CallOption) (*gitalypb.OptimizeRepositoryResponse, error)
diff --git a/internal/gitaly/maintenance/randomwalker.go b/internal/gitaly/maintenance/randomwalker.go
new file mode 100644
index 000000000..8faefe659
--- /dev/null
+++ b/internal/gitaly/maintenance/randomwalker.go
@@ -0,0 +1,95 @@
+package maintenance
+
+import (
+ "errors"
+ "io/ioutil"
+ "math/rand"
+ "os"
+ "path/filepath"
+)
+
+var (
+ errIterOver = errors.New("random walker at end")
+)
+
+type stackFrame struct {
+ name string
+ entries []os.FileInfo
+}
+
+// randomWalker is a filesystem walker which traverses a directory hierarchy in depth-first order,
+// randomizing the order of each directory's entries.
+type randomWalker struct {
+ stack []stackFrame
+ root string
+ pendingDir string
+ rand *rand.Rand
+}
+
+// newRandomWalker creates a new random walker starting at `root`.
+func newRandomWalker(root string, r *rand.Rand) *randomWalker {
+ return &randomWalker{
+ pendingDir: root,
+ root: root,
+ rand: r,
+ }
+}
+
+// next returns the next file. Traversal happens in depth-first order, where each directory's
+// entities are traversed in random order. If there are no more files to iterate, `errIterOver` is
+// returned.
+func (r *randomWalker) next() (os.FileInfo, string, error) {
+ if r.pendingDir != "" {
+ // Reset pendingDir before returning the error such that the caller can continue if
+ // he doesn't care e.g. for the directory not existing.
+ pendingDir := r.pendingDir
+ r.pendingDir = ""
+
+ entries, err := ioutil.ReadDir(pendingDir)
+ if err != nil {
+ return nil, pendingDir, err
+ }
+
+ shuffleFileInfos(r.rand, entries)
+ r.stack = append(r.stack, stackFrame{
+ name: pendingDir,
+ entries: entries,
+ })
+ }
+
+ // Iterate over all stack frames depth-first and search for the first non-empty
+ // one. If there are none, then it means that we've finished the depth-first search
+ // and return `errIterOver`.
+ for {
+ if len(r.stack) == 0 {
+ return nil, "", errIterOver
+ }
+
+ // Retrieve the current bottom-most stack frame. If the stack frame is empty, we pop
+ // it and retry its parent frame.
+ stackFrame := &r.stack[len(r.stack)-1]
+ if len(stackFrame.entries) == 0 {
+ r.stack = r.stack[:len(r.stack)-1]
+ continue
+ }
+
+ fi := stackFrame.entries[0]
+ stackFrame.entries = stackFrame.entries[1:]
+
+ path := filepath.Join(stackFrame.name, fi.Name())
+ if fi.IsDir() {
+ r.pendingDir = path
+ }
+
+ return fi, path, nil
+ }
+}
+
+func shuffleFileInfos(randSrc *rand.Rand, s []os.FileInfo) {
+ randSrc.Shuffle(len(s), func(i, j int) { s[i], s[j] = s[j], s[i] })
+}
+
+// skipDir marks the current directory such that it does not get descended into.
+func (r *randomWalker) skipDir() {
+ r.pendingDir = ""
+}
diff --git a/internal/gitaly/maintenance/randomwalker_test.go b/internal/gitaly/maintenance/randomwalker_test.go
new file mode 100644
index 000000000..9213197c9
--- /dev/null
+++ b/internal/gitaly/maintenance/randomwalker_test.go
@@ -0,0 +1,225 @@
+package maintenance
+
+import (
+ "io/ioutil"
+ "math/rand"
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/internal/testhelper"
+)
+
+func TestRandomWalk(t *testing.T) {
+ for _, tc := range []struct {
+ desc string
+ dirs []string
+ files []string
+ skipPaths []string
+ expectedPaths []string
+ }{
+ {
+ desc: "single directory",
+ dirs: []string{
+ "foo",
+ },
+ expectedPaths: []string{
+ "foo",
+ },
+ },
+ {
+ desc: "multiple directories",
+ dirs: []string{
+ "foo/bar/baz",
+ "foo/bar/qux",
+ "foo/bar/qux/qax",
+ "intermittent",
+ "other/dir",
+ "last",
+ },
+ expectedPaths: []string{
+ "foo",
+ "foo/bar",
+ "foo/bar/qux",
+ "foo/bar/qux/qax",
+ "foo/bar/baz",
+ "intermittent",
+ "other",
+ "other/dir",
+ "last",
+ },
+ },
+ {
+ desc: "single file",
+ files: []string{
+ "file",
+ },
+ expectedPaths: []string{
+ "file",
+ },
+ },
+ {
+ desc: "mixed files and directories",
+ dirs: []string{
+ "foo/bar/qux",
+ },
+ files: []string{
+ "file1",
+ "file2",
+ "file3",
+ "foo/file1",
+ "foo/file2",
+ "foo/file3",
+ "foo/bar/qux/file1",
+ "foo/bar/qux/file2",
+ "foo/bar/qux/file3",
+ },
+ expectedPaths: []string{
+ "file1",
+ "file2",
+ "foo",
+ "foo/bar",
+ "foo/bar/qux",
+ "foo/bar/qux/file2",
+ "foo/bar/qux/file3",
+ "foo/bar/qux/file1",
+ "foo/file2",
+ "foo/file3",
+ "foo/file1",
+ "file3",
+ },
+ },
+ {
+ desc: "single skipped dir",
+ dirs: []string{
+ "foo",
+ },
+ skipPaths: []string{
+ "foo",
+ },
+ expectedPaths: []string{
+ "foo",
+ },
+ },
+ {
+ desc: "single skipped dir with nested contents",
+ dirs: []string{
+ "foo",
+ "foo/subdir",
+ },
+ files: []string{
+ "foo/file",
+ },
+ skipPaths: []string{
+ "foo",
+ },
+ expectedPaths: []string{
+ "foo",
+ },
+ },
+ {
+ desc: "mixed files and directories with skipping",
+ dirs: []string{
+ "dir1/subdir/subsubdir",
+ "dir2/foo/bar/qux",
+ "dir2/foo/baz",
+ "dir3",
+ },
+ files: []string{
+ "file",
+ "dir2/foo/file",
+ },
+ skipPaths: []string{
+ "dir1",
+ "dir2/foo/bar",
+ },
+ expectedPaths: []string{
+ "dir1",
+ "dir2",
+ "dir2/foo",
+ "dir2/foo/file",
+ "dir2/foo/bar",
+ "dir2/foo/baz",
+ "file",
+ "dir3",
+ },
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ root, cleanup := testhelper.TempDir(t)
+ defer cleanup()
+
+ for _, dir := range tc.dirs {
+ require.NoError(t, os.MkdirAll(filepath.Join(root, dir), 0777))
+ }
+
+ for _, file := range tc.files {
+ require.NoError(t, ioutil.WriteFile(filepath.Join(root, file), []byte{}, 0777))
+ }
+
+ walker := newRandomWalker(root, rand.New(rand.NewSource(1)))
+
+ skipPaths := make(map[string]bool)
+ for _, skipPath := range tc.skipPaths {
+ skipPaths[filepath.Join(root, skipPath)] = true
+ }
+
+ actualPaths := []string{}
+ for {
+ fi, path, err := walker.next()
+ if err == errIterOver {
+ break
+ }
+ require.NoError(t, err)
+
+ if skipPaths[path] {
+ walker.skipDir()
+ }
+
+ require.Equal(t, filepath.Base(path), fi.Name())
+ actualPaths = append(actualPaths, path)
+ }
+
+ expectedPaths := make([]string, len(tc.expectedPaths))
+ for i, expectedPath := range tc.expectedPaths {
+ expectedPaths[i] = filepath.Join(root, expectedPath)
+ }
+
+ require.Equal(t, expectedPaths, actualPaths)
+ })
+ }
+}
+
+func TestRandomWalk_withRemovedDirs(t *testing.T) {
+ root, cleanup := testhelper.TempDir(t)
+ defer cleanup()
+
+ for _, dir := range []string{"foo/bar", "foo/bar/deleteme", "foo/baz/qux", "foo/baz/other"} {
+ require.NoError(t, os.MkdirAll(filepath.Join(root, dir), 0777))
+ }
+
+ walker := newRandomWalker(root, rand.New(rand.NewSource(1)))
+
+ for _, expectedPath := range []string{"foo", "foo/bar"} {
+ _, path, err := walker.next()
+ require.NoError(t, err)
+ require.Equal(t, filepath.Join(root, expectedPath), path)
+ }
+
+ require.NoError(t, os.RemoveAll(filepath.Join(root, "foo/bar")))
+
+ _, path, err := walker.next()
+ require.Error(t, err, "expected ENOENT")
+ require.True(t, os.IsNotExist(err))
+ require.Equal(t, filepath.Join(root, "foo/bar"), path)
+
+ for _, expectedPath := range []string{"foo/baz", "foo/baz/other", "foo/baz/qux"} {
+ _, path, err := walker.next()
+ require.NoError(t, err)
+ require.Equal(t, filepath.Join(root, expectedPath), path)
+ }
+
+ _, _, err = walker.next()
+ require.Equal(t, err, errIterOver)
+}