diff options
author | Pavlo Strokov <pstrokov@gitlab.com> | 2021-06-16 10:10:11 +0300 |
---|---|---|
committer | Pavlo Strokov <pstrokov@gitlab.com> | 2021-06-16 10:10:11 +0300 |
commit | 8a6d0e26de9d584941267d2b68c94b37bc30e092 (patch) | |
tree | 2d0630aead2d0fc85f1c3140f1c6568a7c4d1f55 | |
parent | 16b767c0446a66eb87ca2fd12d4a481d221efa74 (diff) | |
parent | 98049d011468b1d38aec7a6637af239fee1e46e8 (diff) |
Merge branch 'ps-bloom-filter' into 'master'
Write commit graph using bloom filter
Closes #3545
See merge request gitlab-org/gitaly!3545
-rw-r--r-- | internal/gitaly/service/repository/commit_graph.go | 113 | ||||
-rw-r--r-- | internal/gitaly/service/repository/commit_graph_test.go | 86 |
2 files changed, 197 insertions, 2 deletions
diff --git a/internal/gitaly/service/repository/commit_graph.go b/internal/gitaly/service/repository/commit_graph.go index 8cb3bba43..a1f2828ca 100644 --- a/internal/gitaly/service/repository/commit_graph.go +++ b/internal/gitaly/service/repository/commit_graph.go @@ -1,7 +1,14 @@ package repository import ( + "bufio" + "bytes" "context" + "errors" + "fmt" + "io/ioutil" + "os" + "path/filepath" "gitlab.com/gitlab-org/gitaly/v14/internal/git" "gitlab.com/gitlab-org/gitaly/v14/internal/git/repository" @@ -32,34 +39,136 @@ func (s *server) writeCommitGraph( repo repository.GitRepo, splitStrategy gitalypb.WriteCommitGraphRequest_SplitStrategy, ) error { + repoPath, err := s.locator.GetRepoPath(repo) + if err != nil { + return err + } + + missingBloomFilters := true + if _, err := os.Stat(filepath.Join(repoPath, CommitGraphRelPath)); err != nil { + if !errors.Is(err, os.ErrNotExist) { + return helper.ErrInternal(fmt.Errorf("remove commit graph file: %w", err)) + } + + // objects/info/commit-graph file doesn't exists + // check if commit-graph chain exists and includes Bloom filters + if missingBloomFilters, err = isMissingBloomFilters(repoPath); err != nil { + return helper.ErrInternal(fmt.Errorf("should remove commit graph chain: %w", err)) + } + } + flags := []git.Option{ git.Flag{Name: "--reachable"}, + git.Flag{Name: "--changed-paths"}, // enables Bloom filters + } + + if missingBloomFilters { + // if commit graph doesn't use Bloom filters we instruct operation to replace + // existent commit graph with the new one + // https://git-scm.com/docs/git-commit-graph#Documentation/git-commit-graph.txt-emwriteem + flags = append(flags, git.Flag{Name: "--split=replace"}) + } else { + flags = append(flags, git.Flag{Name: "--split"}) } switch splitStrategy { case gitalypb.WriteCommitGraphRequest_SizeMultiple: flags = append(flags, - git.Flag{Name: "--split"}, + // this flag has no effect if '--split=replace' is used git.ValueFlag{Name: "--size-multiple", Value: "4"}, ) default: return helper.ErrInvalidArgumentf("unsupported split strategy: %v", splitStrategy) } + var stderr bytes.Buffer cmd, err := s.gitCmdFactory.New(ctx, repo, git.SubSubCmd{ Name: "commit-graph", Action: "write", Flags: flags, }, + git.WithStderr(&stderr), ) if err != nil { return helper.ErrInternal(err) } if err := cmd.Wait(); err != nil { - return helper.ErrInternal(err) + return helper.ErrInternalf("commit-graph write: %s: %v", err, stderr.String()) } return nil } + +// isMissingBloomFilters checks if the commit graph chain exists and has a Bloom filters indicators. +// If the chain contains multiple files, each one is checked until at least one is missing a Bloom filter +// indicator or until there are no more files to check. +// https://git-scm.com/docs/commit-graph#_file_layout +func isMissingBloomFilters(repoPath string) (bool, error) { + const chunkTableEntrySize = 12 + + commitGraphsPath := filepath.Join(repoPath, CommitGraphChainRelPath) + commitGraphsData, err := ioutil.ReadFile(commitGraphsPath) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return false, nil + } + return false, err + } + + ids := bytes.Split(bytes.TrimSpace(commitGraphsData), []byte{'\n'}) + for _, id := range ids { + graphFilePath := filepath.Join(repoPath, filepath.Dir(CommitGraphChainRelPath), fmt.Sprintf("graph-%s.graph", id)) + graphFile, err := os.Open(graphFilePath) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + // concurrently modified + continue + } + return false, fmt.Errorf("read commit graph chain file: %w", err) + } + defer graphFile.Close() + + reader := bufio.NewReader(graphFile) + // https://github.com/git/git/blob/a43a2e6/Documentation/technical/commit-graph-format.txt#L123 + header := []byte{ + 0, 0, 0, 0, //4-byte signature: The signature is: {'C', 'G', 'P', 'H'} + 0, // 1-byte version number: Currently, the only valid version is 1. + 0, // 1-byte Hash Version + 0, // 1-byte number (C) of "chunks" + 0, // 1-byte number (B) of base commit-graphs + } + + if n, err := reader.Read(header); err != nil { + return false, fmt.Errorf("read commit graph file %q header: %w", graphFilePath, err) + } else if n != len(header) { + return false, fmt.Errorf("commit graph file %q is too small, no header", graphFilePath) + } + + if !bytes.Equal(header[:4], []byte("CGPH")) { + return false, fmt.Errorf("commit graph file %q doesn't have signature", graphFilePath) + } + if header[4] != 1 { + return false, fmt.Errorf("commit graph file %q has unsupported version number: %v", graphFilePath, header[4]) + } + + C := header[6] // number (C) of "chunks" + table := make([]byte, (C+1)*chunkTableEntrySize) + if n, err := reader.Read(table); err != nil { + return false, fmt.Errorf("read commit graph file %q table of contents for the chunks: %w", graphFilePath, err) + } else if n != len(table) { + return false, fmt.Errorf("commit graph file %q is too small, no table of contents", graphFilePath) + } + + if !bytes.Contains(table, []byte("BIDX")) && !bytes.Contains(table, []byte("BDAT")) { + return true, nil + } + + if err := graphFile.Close(); err != nil { + return false, fmt.Errorf("commit graph file %q close: %w", graphFilePath, err) + } + } + + return false, nil +} diff --git a/internal/gitaly/service/repository/commit_graph_test.go b/internal/gitaly/service/repository/commit_graph_test.go index 216f9c452..d7d95490c 100644 --- a/internal/gitaly/service/repository/commit_graph_test.go +++ b/internal/gitaly/service/repository/commit_graph_test.go @@ -1,6 +1,8 @@ package repository import ( + "bytes" + "fmt" "os" "path/filepath" "testing" @@ -10,6 +12,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testassert" + "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testcfg" "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testserver" "gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb" "google.golang.org/grpc/codes" @@ -49,9 +52,47 @@ func TestWriteCommitGraph_withExistingCommitGraphCreatedWithDefaults(t *testing. require.NotNil(t, res) require.FileExists(t, chainPath, "commit graph chain should be created") + requireBloomFilterUsed(t, repoPath) require.NoFileExists(t, commitGraphPath, "commit-graph file should be replaced with commit graph chain") } +func TestWriteCommitGraph_withExistingCommitGraphCreatedWithSplit(t *testing.T) { + cfg, repo, repoPath, client := setupRepositoryService(t) + + commitGraphPath := filepath.Join(repoPath, CommitGraphRelPath) + require.NoFileExists(t, commitGraphPath, "sanity check no commit graph") + + chainPath := filepath.Join(repoPath, CommitGraphChainRelPath) + require.NoFileExists(t, chainPath, "sanity check no commit graph chain exists") + + // write commit graph chain + gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--reachable", "--split") + require.FileExists(t, chainPath) + + treeEntry := gittest.TreeEntry{Mode: "100644", Path: "file.txt", Content: "something"} + gittest.WriteCommit( + t, + cfg, + repoPath, + gittest.WithBranch(t.Name()), + gittest.WithTreeEntries(treeEntry), + ) + + ctx, cancel := testhelper.Context() + defer cancel() + + res, err := client.WriteCommitGraph(ctx, &gitalypb.WriteCommitGraphRequest{ + Repository: repo, + SplitStrategy: gitalypb.WriteCommitGraphRequest_SizeMultiple, + }) + require.NoError(t, err) + require.NotNil(t, res) + + require.FileExists(t, chainPath, "commit graph chain should be created") + requireBloomFilterUsed(t, repoPath) + require.NoFileExists(t, commitGraphPath, "commit-graph file should not be created") +} + func TestWriteCommitGraph(t *testing.T) { _, repo, repoPath, client := setupRepositoryService(t) @@ -152,3 +193,48 @@ func TestUpdateCommitGraph(t *testing.T) { assertModTimeAfter(t, mt, chainPath) } + +func requireBloomFilterUsed(t testing.TB, repoPath string) { + t.Helper() + + commitGraphsPath := filepath.Join(repoPath, CommitGraphChainRelPath) + ids := bytes.Split(testhelper.MustReadFile(t, commitGraphsPath), []byte{'\n'}) + + for _, id := range ids { + if len(id) == 0 { + continue + } + graphFilePath := filepath.Join(repoPath, filepath.Dir(CommitGraphChainRelPath), fmt.Sprintf("graph-%s.graph", id)) + graphFileData := testhelper.MustReadFile(t, graphFilePath) + + require.True(t, bytes.HasPrefix(graphFileData, []byte("CGPH")), "4-byte signature of the commit graph file") + require.True(t, bytes.Contains(graphFileData, []byte("BIDX")), "Bloom Filter Index") + require.True(t, bytes.Contains(graphFileData, []byte("BDAT")), "Bloom Filter Data") + } +} + +func TestIsMissingBloomFilters(t *testing.T) { + for _, tc := range []struct { + desc string + enable bool + }{ + {desc: "no Bloom filter", enable: false}, + {desc: "with Bloom filter", enable: true}, + } { + t.Run(tc.desc, func(t *testing.T) { + cfg := testcfg.Build(t) + _, repoPath, cleanup := gittest.CloneRepoAtStorage(t, cfg, cfg.Storages[0], t.Name()) + t.Cleanup(cleanup) + + args := []string{"-C", repoPath, "commit-graph", "write", "--reachable", "--split"} + if tc.enable { + args = append(args, "--changed-paths") + } + gittest.Exec(t, cfg, args...) + + ok, err := isMissingBloomFilters(repoPath) + require.NoError(t, err) + require.Equal(t, tc.enable, !ok) + }) + } +} |