Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Ramsay <james@jramsay.com.au>2020-06-03 07:05:32 +0300
committerJames Ramsay <james@jramsay.com.au>2020-06-09 03:47:23 +0300
commit22dcda65372742bc9ae5ffdfdb32ff8ebfecc738 (patch)
tree0843b9121bca39f64850932cdd5d68ea42faf414
parent0d917bb8957d9652f2e82e32678a4b2934079b9b (diff)
Add support for filter-repo commit map to cleaner
Git filter-repo is preferred to BFG, but it's output is not identical. Compatibility is added for filter-repo commit map to avoid the need to manually process the commit map before uploading. Differences between filter-repo and BFG output: - header included in output by filter-repo - unchanged commits included in output by filter-repo These lines can be detected and ignored.
-rw-r--r--changelogs/unreleased/git-filter-repo-compatibility.yml5
-rw-r--r--internal/service/cleanup/apply_bfg_object_map_stream_test.go23
-rw-r--r--internal/service/cleanup/internalrefs/cleaner.go39
3 files changed, 49 insertions, 18 deletions
diff --git a/changelogs/unreleased/git-filter-repo-compatibility.yml b/changelogs/unreleased/git-filter-repo-compatibility.yml
new file mode 100644
index 000000000..446f724db
--- /dev/null
+++ b/changelogs/unreleased/git-filter-repo-compatibility.yml
@@ -0,0 +1,5 @@
+---
+title: Add support for filter-repo commit map to cleaner
+merge_request: 2247
+author:
+type: added
diff --git a/internal/service/cleanup/apply_bfg_object_map_stream_test.go b/internal/service/cleanup/apply_bfg_object_map_stream_test.go
index 2a004b905..7146ee877 100644
--- a/internal/service/cleanup/apply_bfg_object_map_stream_test.go
+++ b/internal/service/cleanup/apply_bfg_object_map_stream_test.go
@@ -46,12 +46,22 @@ func TestApplyBfgObjectMapStreamSuccess(t *testing.T) {
testhelper.MustRunCommand(t, nil, "git", "-C", testRepoPath, "update-ref", ref, headCommit.Id)
}
+ // Create some refs pointing to ref/tags/v1.0.0, simulating an unmodified
+ // commit that predates bad data being added to the repository.
+ for _, ref := range []string{
+ "refs/environments/_keep", "refs/keep-around/_keep", "refs/merge-requests/_keep",
+ } {
+ testhelper.MustRunCommand(t, nil, "git", "-C", testRepoPath, "update-ref", ref, tagID)
+ }
+
+ const filterRepoCommitMapHeader = "old new\n"
objectMapData := fmt.Sprintf(
- strings.Repeat("%s %s\n", 4),
- headCommit.Id, headCommit.Id,
+ filterRepoCommitMapHeader+strings.Repeat("%s %s\n", 5),
+ headCommit.Id, git.NullSHA,
git.NullSHA, blobID,
git.NullSHA, tagID,
- git.NullSHA, git.NullSHA,
+ blobID, git.NullSHA,
+ tagID, tagID,
)
entries, err := doStreamingRequest(ctx, t, testRepo, client, objectMapData)
@@ -65,13 +75,16 @@ func TestApplyBfgObjectMapStreamSuccess(t *testing.T) {
assert.Contains(t, refs, "refs/heads/_keep")
assert.Contains(t, refs, "refs/tags/_keep")
assert.Contains(t, refs, "refs/notes/_keep")
+ assert.Contains(t, refs, "refs/environments/_keep")
+ assert.Contains(t, refs, "refs/keep-around/_keep")
+ assert.Contains(t, refs, "refs/merge-requests/_keep")
// Ensure that the returned entry is correct
require.Len(t, entries, 4, "wrong number of entries returned")
- requireEntry(t, entries[0], headCommit.Id, headCommit.Id, gitalypb.ObjectType_COMMIT)
+ requireEntry(t, entries[0], headCommit.Id, git.NullSHA, gitalypb.ObjectType_COMMIT)
requireEntry(t, entries[1], git.NullSHA, blobID, gitalypb.ObjectType_BLOB)
requireEntry(t, entries[2], git.NullSHA, tagID, gitalypb.ObjectType_TAG)
- requireEntry(t, entries[3], git.NullSHA, git.NullSHA, gitalypb.ObjectType_UNKNOWN)
+ requireEntry(t, entries[3], blobID, git.NullSHA, gitalypb.ObjectType_UNKNOWN)
}
func requireEntry(t *testing.T, entry *gitalypb.ApplyBfgObjectMapStreamResponse_Entry, oldOid, newOid string, objectType gitalypb.ObjectType) {
diff --git a/internal/service/cleanup/internalrefs/cleaner.go b/internal/service/cleanup/internalrefs/cleaner.go
index 5dd6889d4..d647bc3a4 100644
--- a/internal/service/cleanup/internalrefs/cleaner.go
+++ b/internal/service/cleanup/internalrefs/cleaner.go
@@ -21,14 +21,15 @@ var internalRefs = []string{
"refs/merge-requests/",
}
-// A ForEachFunc can be called for every entry in the BFG object map file that
-// the cleaner is processing. Returning an error will stop the cleaner before
-// it has processed the entry in question
+// A ForEachFunc can be called for every entry in the filter-repo or BFG object
+// map file that the cleaner is processing. Returning an error will stop the
+// cleaner before it has processed the entry in question
type ForEachFunc func(oldOID, newOID string, isInternalRef bool) error
// Cleaner is responsible for updating the internal references in a repository
-// as specified by a BFG object map. Currently, internal references pointing to
-// a commit that has been rewritten will simply be removed.
+// as specified by a filter-repo or BFG object map. Currently, internal
+// references pointing to a commit that has been rewritten will simply be
+// removed.
type Cleaner struct {
ctx context.Context
forEach ForEachFunc
@@ -42,8 +43,8 @@ type Cleaner struct {
// map file is in the wrong format
type ErrInvalidObjectMap error
-// NewCleaner builds a new instance of Cleaner, which is used to apply a BFG
-// object map to a repository.
+// NewCleaner builds a new instance of Cleaner, which is used to apply a
+// filter-repo or BFG object map to a repository.
func NewCleaner(ctx context.Context, repo *gitalypb.Repository, forEach ForEachFunc) (*Cleaner, error) {
table, err := buildLookupTable(ctx, repo)
if err != nil {
@@ -58,15 +59,20 @@ func NewCleaner(ctx context.Context, repo *gitalypb.Repository, forEach ForEachF
return &Cleaner{ctx: ctx, table: table, updater: updater, forEach: forEach}, nil
}
-// ApplyObjectMap processes a BFG object map file, removing any internal
-// references that point to a rewritten commit.
+// ApplyObjectMap processes an object map file generated by git filter-repo, or
+// BFG, removing any internal references that point to a rewritten commit.
func (c *Cleaner) ApplyObjectMap(reader io.Reader) error {
scanner := bufio.NewScanner(reader)
for i := int64(0); scanner.Scan(); i++ {
line := scanner.Text()
+ const filterRepoCommitMapHeader = "old new"
+ if line == filterRepoCommitMapHeader {
+ continue
+ }
+
// Each line consists of two SHAs: the SHA of the original object, and
- // the SHA of a replacement object in the new history built by BFG. For
+ // the SHA of a replacement object in the new repository history. For
// now, the new SHA is ignored, but it may be used to rewrite (rather
// than remove) some references in the future.
shas := strings.SplitN(line, " ", 2)
@@ -75,6 +81,13 @@ func (c *Cleaner) ApplyObjectMap(reader io.Reader) error {
return ErrInvalidObjectMap(fmt.Errorf("object map invalid at line %d", i))
}
+ // References to unchanged objects do not need to be removed. When the old
+ // SHA and new SHA are the same, this means the object was considered but
+ // not modified.
+ if shas[0] == shas[1] {
+ continue
+ }
+
if err := c.processEntry(shas[0], shas[1]); err != nil {
return err
}
@@ -115,9 +128,9 @@ func (c *Cleaner) processEntry(oldSHA, newSHA string) error {
// may point to the same SHA.
//
// The lookup table is necessary to efficiently check which references point to
-// an object that has been rewritten by the BFG (and so require action). It is
-// consulted once per line in the object map. Git is optimized for ref -> SHA
-// lookups, but we want the opposite!
+// an object that has been rewritten by the filter-repo or BFG (and so require
+// action). It is consulted once per line in the object map. Git is optimized
+// for ref -> SHA lookups, but we want the opposite!
func buildLookupTable(ctx context.Context, repo *gitalypb.Repository) (map[string][]string, error) {
cmd, err := git.SafeCmd(ctx, repo, nil, git.SubCmd{
Name: "for-each-ref",