Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavlo Strokov <pstrokov@gitlab.com>2021-06-16 10:10:11 +0300
committerPavlo Strokov <pstrokov@gitlab.com>2021-06-16 10:10:11 +0300
commit8a6d0e26de9d584941267d2b68c94b37bc30e092 (patch)
tree2d0630aead2d0fc85f1c3140f1c6568a7c4d1f55
parent16b767c0446a66eb87ca2fd12d4a481d221efa74 (diff)
parent98049d011468b1d38aec7a6637af239fee1e46e8 (diff)
Merge branch 'ps-bloom-filter' into 'master'
Write commit graph using bloom filter Closes #3545 See merge request gitlab-org/gitaly!3545
-rw-r--r--internal/gitaly/service/repository/commit_graph.go113
-rw-r--r--internal/gitaly/service/repository/commit_graph_test.go86
2 files changed, 197 insertions, 2 deletions
diff --git a/internal/gitaly/service/repository/commit_graph.go b/internal/gitaly/service/repository/commit_graph.go
index 8cb3bba43..a1f2828ca 100644
--- a/internal/gitaly/service/repository/commit_graph.go
+++ b/internal/gitaly/service/repository/commit_graph.go
@@ -1,7 +1,14 @@
package repository
import (
+ "bufio"
+ "bytes"
"context"
+ "errors"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
"gitlab.com/gitlab-org/gitaly/v14/internal/git"
"gitlab.com/gitlab-org/gitaly/v14/internal/git/repository"
@@ -32,34 +39,136 @@ func (s *server) writeCommitGraph(
repo repository.GitRepo,
splitStrategy gitalypb.WriteCommitGraphRequest_SplitStrategy,
) error {
+ repoPath, err := s.locator.GetRepoPath(repo)
+ if err != nil {
+ return err
+ }
+
+ missingBloomFilters := true
+ if _, err := os.Stat(filepath.Join(repoPath, CommitGraphRelPath)); err != nil {
+ if !errors.Is(err, os.ErrNotExist) {
+ return helper.ErrInternal(fmt.Errorf("remove commit graph file: %w", err))
+ }
+
+ // objects/info/commit-graph file doesn't exists
+ // check if commit-graph chain exists and includes Bloom filters
+ if missingBloomFilters, err = isMissingBloomFilters(repoPath); err != nil {
+ return helper.ErrInternal(fmt.Errorf("should remove commit graph chain: %w", err))
+ }
+ }
+
flags := []git.Option{
git.Flag{Name: "--reachable"},
+ git.Flag{Name: "--changed-paths"}, // enables Bloom filters
+ }
+
+ if missingBloomFilters {
+ // if commit graph doesn't use Bloom filters we instruct operation to replace
+ // existent commit graph with the new one
+ // https://git-scm.com/docs/git-commit-graph#Documentation/git-commit-graph.txt-emwriteem
+ flags = append(flags, git.Flag{Name: "--split=replace"})
+ } else {
+ flags = append(flags, git.Flag{Name: "--split"})
}
switch splitStrategy {
case gitalypb.WriteCommitGraphRequest_SizeMultiple:
flags = append(flags,
- git.Flag{Name: "--split"},
+ // this flag has no effect if '--split=replace' is used
git.ValueFlag{Name: "--size-multiple", Value: "4"},
)
default:
return helper.ErrInvalidArgumentf("unsupported split strategy: %v", splitStrategy)
}
+ var stderr bytes.Buffer
cmd, err := s.gitCmdFactory.New(ctx, repo,
git.SubSubCmd{
Name: "commit-graph",
Action: "write",
Flags: flags,
},
+ git.WithStderr(&stderr),
)
if err != nil {
return helper.ErrInternal(err)
}
if err := cmd.Wait(); err != nil {
- return helper.ErrInternal(err)
+ return helper.ErrInternalf("commit-graph write: %s: %v", err, stderr.String())
}
return nil
}
+
+// isMissingBloomFilters checks if the commit graph chain exists and has a Bloom filters indicators.
+// If the chain contains multiple files, each one is checked until at least one is missing a Bloom filter
+// indicator or until there are no more files to check.
+// https://git-scm.com/docs/commit-graph#_file_layout
+func isMissingBloomFilters(repoPath string) (bool, error) {
+ const chunkTableEntrySize = 12
+
+ commitGraphsPath := filepath.Join(repoPath, CommitGraphChainRelPath)
+ commitGraphsData, err := ioutil.ReadFile(commitGraphsPath)
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ return false, nil
+ }
+ return false, err
+ }
+
+ ids := bytes.Split(bytes.TrimSpace(commitGraphsData), []byte{'\n'})
+ for _, id := range ids {
+ graphFilePath := filepath.Join(repoPath, filepath.Dir(CommitGraphChainRelPath), fmt.Sprintf("graph-%s.graph", id))
+ graphFile, err := os.Open(graphFilePath)
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ // concurrently modified
+ continue
+ }
+ return false, fmt.Errorf("read commit graph chain file: %w", err)
+ }
+ defer graphFile.Close()
+
+ reader := bufio.NewReader(graphFile)
+ // https://github.com/git/git/blob/a43a2e6/Documentation/technical/commit-graph-format.txt#L123
+ header := []byte{
+ 0, 0, 0, 0, //4-byte signature: The signature is: {'C', 'G', 'P', 'H'}
+ 0, // 1-byte version number: Currently, the only valid version is 1.
+ 0, // 1-byte Hash Version
+ 0, // 1-byte number (C) of "chunks"
+ 0, // 1-byte number (B) of base commit-graphs
+ }
+
+ if n, err := reader.Read(header); err != nil {
+ return false, fmt.Errorf("read commit graph file %q header: %w", graphFilePath, err)
+ } else if n != len(header) {
+ return false, fmt.Errorf("commit graph file %q is too small, no header", graphFilePath)
+ }
+
+ if !bytes.Equal(header[:4], []byte("CGPH")) {
+ return false, fmt.Errorf("commit graph file %q doesn't have signature", graphFilePath)
+ }
+ if header[4] != 1 {
+ return false, fmt.Errorf("commit graph file %q has unsupported version number: %v", graphFilePath, header[4])
+ }
+
+ C := header[6] // number (C) of "chunks"
+ table := make([]byte, (C+1)*chunkTableEntrySize)
+ if n, err := reader.Read(table); err != nil {
+ return false, fmt.Errorf("read commit graph file %q table of contents for the chunks: %w", graphFilePath, err)
+ } else if n != len(table) {
+ return false, fmt.Errorf("commit graph file %q is too small, no table of contents", graphFilePath)
+ }
+
+ if !bytes.Contains(table, []byte("BIDX")) && !bytes.Contains(table, []byte("BDAT")) {
+ return true, nil
+ }
+
+ if err := graphFile.Close(); err != nil {
+ return false, fmt.Errorf("commit graph file %q close: %w", graphFilePath, err)
+ }
+ }
+
+ return false, nil
+}
diff --git a/internal/gitaly/service/repository/commit_graph_test.go b/internal/gitaly/service/repository/commit_graph_test.go
index 216f9c452..d7d95490c 100644
--- a/internal/gitaly/service/repository/commit_graph_test.go
+++ b/internal/gitaly/service/repository/commit_graph_test.go
@@ -1,6 +1,8 @@
package repository
import (
+ "bytes"
+ "fmt"
"os"
"path/filepath"
"testing"
@@ -10,6 +12,7 @@ import (
"gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest"
"gitlab.com/gitlab-org/gitaly/v14/internal/testhelper"
"gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testassert"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testcfg"
"gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testserver"
"gitlab.com/gitlab-org/gitaly/v14/proto/go/gitalypb"
"google.golang.org/grpc/codes"
@@ -49,9 +52,47 @@ func TestWriteCommitGraph_withExistingCommitGraphCreatedWithDefaults(t *testing.
require.NotNil(t, res)
require.FileExists(t, chainPath, "commit graph chain should be created")
+ requireBloomFilterUsed(t, repoPath)
require.NoFileExists(t, commitGraphPath, "commit-graph file should be replaced with commit graph chain")
}
+func TestWriteCommitGraph_withExistingCommitGraphCreatedWithSplit(t *testing.T) {
+ cfg, repo, repoPath, client := setupRepositoryService(t)
+
+ commitGraphPath := filepath.Join(repoPath, CommitGraphRelPath)
+ require.NoFileExists(t, commitGraphPath, "sanity check no commit graph")
+
+ chainPath := filepath.Join(repoPath, CommitGraphChainRelPath)
+ require.NoFileExists(t, chainPath, "sanity check no commit graph chain exists")
+
+ // write commit graph chain
+ gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--reachable", "--split")
+ require.FileExists(t, chainPath)
+
+ treeEntry := gittest.TreeEntry{Mode: "100644", Path: "file.txt", Content: "something"}
+ gittest.WriteCommit(
+ t,
+ cfg,
+ repoPath,
+ gittest.WithBranch(t.Name()),
+ gittest.WithTreeEntries(treeEntry),
+ )
+
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+
+ res, err := client.WriteCommitGraph(ctx, &gitalypb.WriteCommitGraphRequest{
+ Repository: repo,
+ SplitStrategy: gitalypb.WriteCommitGraphRequest_SizeMultiple,
+ })
+ require.NoError(t, err)
+ require.NotNil(t, res)
+
+ require.FileExists(t, chainPath, "commit graph chain should be created")
+ requireBloomFilterUsed(t, repoPath)
+ require.NoFileExists(t, commitGraphPath, "commit-graph file should not be created")
+}
+
func TestWriteCommitGraph(t *testing.T) {
_, repo, repoPath, client := setupRepositoryService(t)
@@ -152,3 +193,48 @@ func TestUpdateCommitGraph(t *testing.T) {
assertModTimeAfter(t, mt, chainPath)
}
+
+func requireBloomFilterUsed(t testing.TB, repoPath string) {
+ t.Helper()
+
+ commitGraphsPath := filepath.Join(repoPath, CommitGraphChainRelPath)
+ ids := bytes.Split(testhelper.MustReadFile(t, commitGraphsPath), []byte{'\n'})
+
+ for _, id := range ids {
+ if len(id) == 0 {
+ continue
+ }
+ graphFilePath := filepath.Join(repoPath, filepath.Dir(CommitGraphChainRelPath), fmt.Sprintf("graph-%s.graph", id))
+ graphFileData := testhelper.MustReadFile(t, graphFilePath)
+
+ require.True(t, bytes.HasPrefix(graphFileData, []byte("CGPH")), "4-byte signature of the commit graph file")
+ require.True(t, bytes.Contains(graphFileData, []byte("BIDX")), "Bloom Filter Index")
+ require.True(t, bytes.Contains(graphFileData, []byte("BDAT")), "Bloom Filter Data")
+ }
+}
+
+func TestIsMissingBloomFilters(t *testing.T) {
+ for _, tc := range []struct {
+ desc string
+ enable bool
+ }{
+ {desc: "no Bloom filter", enable: false},
+ {desc: "with Bloom filter", enable: true},
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ cfg := testcfg.Build(t)
+ _, repoPath, cleanup := gittest.CloneRepoAtStorage(t, cfg, cfg.Storages[0], t.Name())
+ t.Cleanup(cleanup)
+
+ args := []string{"-C", repoPath, "commit-graph", "write", "--reachable", "--split"}
+ if tc.enable {
+ args = append(args, "--changed-paths")
+ }
+ gittest.Exec(t, cfg, args...)
+
+ ok, err := isMissingBloomFilters(repoPath)
+ require.NoError(t, err)
+ require.Equal(t, tc.enable, !ok)
+ })
+ }
+}