diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2021-03-24 18:15:49 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2021-03-29 12:46:53 +0300 |
commit | b0451fae5cf4583b871125c57768966abc6020e9 (patch) | |
tree | f6c7daf6162948e0ac7625334a72acceef9c23a0 | |
parent | 96c315848fae9a9a2d3daacfeb5c3f92f7a33131 (diff) |
maintenance: Introduce new random filesystem walker
In order to get a random distribution of optimized repositories, the
optimization maintenance job does a random walk through all storage
repositories. This random walk is implemented ad-hoc via a recursive
function which is deeply married with the optimization calls. This makes
it hard to extend, reason about and test.
To improve the situation, this commit thus introduces a generic random
filesystem walker implementation which has the same semantics as the
current implementation: given a directory hierarchy, the walker will go
down this hierarchy depth-first and, for each directory it encounters,
process its entries in a randomized order.
The new walker is not yet used.
-rw-r--r-- | internal/gitaly/maintenance/optimize.go | 4 | ||||
-rw-r--r-- | internal/gitaly/maintenance/randomwalker.go | 95 | ||||
-rw-r--r-- | internal/gitaly/maintenance/randomwalker_test.go | 225 |
3 files changed, 320 insertions, 4 deletions
diff --git a/internal/gitaly/maintenance/optimize.go b/internal/gitaly/maintenance/optimize.go index 19550c57e..69121325c 100644 --- a/internal/gitaly/maintenance/optimize.go +++ b/internal/gitaly/maintenance/optimize.go @@ -35,10 +35,6 @@ func shuffledStoragesCopy(randSrc *rand.Rand, storages []config.Storage) []confi return shuffled } -func shuffleFileInfos(randSrc *rand.Rand, s []os.FileInfo) { - randSrc.Shuffle(len(s), func(i, j int) { s[i], s[j] = s[j], s[i] }) -} - // Optimizer knows how to optimize a repository type Optimizer interface { OptimizeRepository(context.Context, *gitalypb.OptimizeRepositoryRequest, ...grpc.CallOption) (*gitalypb.OptimizeRepositoryResponse, error) diff --git a/internal/gitaly/maintenance/randomwalker.go b/internal/gitaly/maintenance/randomwalker.go new file mode 100644 index 000000000..8faefe659 --- /dev/null +++ b/internal/gitaly/maintenance/randomwalker.go @@ -0,0 +1,95 @@ +package maintenance + +import ( + "errors" + "io/ioutil" + "math/rand" + "os" + "path/filepath" +) + +var ( + errIterOver = errors.New("random walker at end") +) + +type stackFrame struct { + name string + entries []os.FileInfo +} + +// randomWalker is a filesystem walker which traverses a directory hierarchy in depth-first order, +// randomizing the order of each directory's entries. +type randomWalker struct { + stack []stackFrame + root string + pendingDir string + rand *rand.Rand +} + +// newRandomWalker creates a new random walker starting at `root`. +func newRandomWalker(root string, r *rand.Rand) *randomWalker { + return &randomWalker{ + pendingDir: root, + root: root, + rand: r, + } +} + +// next returns the next file. Traversal happens in depth-first order, where each directory's +// entities are traversed in random order. If there are no more files to iterate, `errIterOver` is +// returned. +func (r *randomWalker) next() (os.FileInfo, string, error) { + if r.pendingDir != "" { + // Reset pendingDir before returning the error such that the caller can continue if + // he doesn't care e.g. for the directory not existing. + pendingDir := r.pendingDir + r.pendingDir = "" + + entries, err := ioutil.ReadDir(pendingDir) + if err != nil { + return nil, pendingDir, err + } + + shuffleFileInfos(r.rand, entries) + r.stack = append(r.stack, stackFrame{ + name: pendingDir, + entries: entries, + }) + } + + // Iterate over all stack frames depth-first and search for the first non-empty + // one. If there are none, then it means that we've finished the depth-first search + // and return `errIterOver`. + for { + if len(r.stack) == 0 { + return nil, "", errIterOver + } + + // Retrieve the current bottom-most stack frame. If the stack frame is empty, we pop + // it and retry its parent frame. + stackFrame := &r.stack[len(r.stack)-1] + if len(stackFrame.entries) == 0 { + r.stack = r.stack[:len(r.stack)-1] + continue + } + + fi := stackFrame.entries[0] + stackFrame.entries = stackFrame.entries[1:] + + path := filepath.Join(stackFrame.name, fi.Name()) + if fi.IsDir() { + r.pendingDir = path + } + + return fi, path, nil + } +} + +func shuffleFileInfos(randSrc *rand.Rand, s []os.FileInfo) { + randSrc.Shuffle(len(s), func(i, j int) { s[i], s[j] = s[j], s[i] }) +} + +// skipDir marks the current directory such that it does not get descended into. +func (r *randomWalker) skipDir() { + r.pendingDir = "" +} diff --git a/internal/gitaly/maintenance/randomwalker_test.go b/internal/gitaly/maintenance/randomwalker_test.go new file mode 100644 index 000000000..9213197c9 --- /dev/null +++ b/internal/gitaly/maintenance/randomwalker_test.go @@ -0,0 +1,225 @@ +package maintenance + +import ( + "io/ioutil" + "math/rand" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/internal/testhelper" +) + +func TestRandomWalk(t *testing.T) { + for _, tc := range []struct { + desc string + dirs []string + files []string + skipPaths []string + expectedPaths []string + }{ + { + desc: "single directory", + dirs: []string{ + "foo", + }, + expectedPaths: []string{ + "foo", + }, + }, + { + desc: "multiple directories", + dirs: []string{ + "foo/bar/baz", + "foo/bar/qux", + "foo/bar/qux/qax", + "intermittent", + "other/dir", + "last", + }, + expectedPaths: []string{ + "foo", + "foo/bar", + "foo/bar/qux", + "foo/bar/qux/qax", + "foo/bar/baz", + "intermittent", + "other", + "other/dir", + "last", + }, + }, + { + desc: "single file", + files: []string{ + "file", + }, + expectedPaths: []string{ + "file", + }, + }, + { + desc: "mixed files and directories", + dirs: []string{ + "foo/bar/qux", + }, + files: []string{ + "file1", + "file2", + "file3", + "foo/file1", + "foo/file2", + "foo/file3", + "foo/bar/qux/file1", + "foo/bar/qux/file2", + "foo/bar/qux/file3", + }, + expectedPaths: []string{ + "file1", + "file2", + "foo", + "foo/bar", + "foo/bar/qux", + "foo/bar/qux/file2", + "foo/bar/qux/file3", + "foo/bar/qux/file1", + "foo/file2", + "foo/file3", + "foo/file1", + "file3", + }, + }, + { + desc: "single skipped dir", + dirs: []string{ + "foo", + }, + skipPaths: []string{ + "foo", + }, + expectedPaths: []string{ + "foo", + }, + }, + { + desc: "single skipped dir with nested contents", + dirs: []string{ + "foo", + "foo/subdir", + }, + files: []string{ + "foo/file", + }, + skipPaths: []string{ + "foo", + }, + expectedPaths: []string{ + "foo", + }, + }, + { + desc: "mixed files and directories with skipping", + dirs: []string{ + "dir1/subdir/subsubdir", + "dir2/foo/bar/qux", + "dir2/foo/baz", + "dir3", + }, + files: []string{ + "file", + "dir2/foo/file", + }, + skipPaths: []string{ + "dir1", + "dir2/foo/bar", + }, + expectedPaths: []string{ + "dir1", + "dir2", + "dir2/foo", + "dir2/foo/file", + "dir2/foo/bar", + "dir2/foo/baz", + "file", + "dir3", + }, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + root, cleanup := testhelper.TempDir(t) + defer cleanup() + + for _, dir := range tc.dirs { + require.NoError(t, os.MkdirAll(filepath.Join(root, dir), 0777)) + } + + for _, file := range tc.files { + require.NoError(t, ioutil.WriteFile(filepath.Join(root, file), []byte{}, 0777)) + } + + walker := newRandomWalker(root, rand.New(rand.NewSource(1))) + + skipPaths := make(map[string]bool) + for _, skipPath := range tc.skipPaths { + skipPaths[filepath.Join(root, skipPath)] = true + } + + actualPaths := []string{} + for { + fi, path, err := walker.next() + if err == errIterOver { + break + } + require.NoError(t, err) + + if skipPaths[path] { + walker.skipDir() + } + + require.Equal(t, filepath.Base(path), fi.Name()) + actualPaths = append(actualPaths, path) + } + + expectedPaths := make([]string, len(tc.expectedPaths)) + for i, expectedPath := range tc.expectedPaths { + expectedPaths[i] = filepath.Join(root, expectedPath) + } + + require.Equal(t, expectedPaths, actualPaths) + }) + } +} + +func TestRandomWalk_withRemovedDirs(t *testing.T) { + root, cleanup := testhelper.TempDir(t) + defer cleanup() + + for _, dir := range []string{"foo/bar", "foo/bar/deleteme", "foo/baz/qux", "foo/baz/other"} { + require.NoError(t, os.MkdirAll(filepath.Join(root, dir), 0777)) + } + + walker := newRandomWalker(root, rand.New(rand.NewSource(1))) + + for _, expectedPath := range []string{"foo", "foo/bar"} { + _, path, err := walker.next() + require.NoError(t, err) + require.Equal(t, filepath.Join(root, expectedPath), path) + } + + require.NoError(t, os.RemoveAll(filepath.Join(root, "foo/bar"))) + + _, path, err := walker.next() + require.Error(t, err, "expected ENOENT") + require.True(t, os.IsNotExist(err)) + require.Equal(t, filepath.Join(root, "foo/bar"), path) + + for _, expectedPath := range []string{"foo/baz", "foo/baz/other", "foo/baz/qux"} { + _, path, err := walker.next() + require.NoError(t, err) + require.Equal(t, filepath.Join(root, expectedPath), path) + } + + _, _, err = walker.next() + require.Equal(t, err, errIterOver) +} |