diff options
author | James Ramsay <james@jramsay.com.au> | 2020-06-03 07:05:32 +0300 |
---|---|---|
committer | James Ramsay <james@jramsay.com.au> | 2020-06-09 03:47:23 +0300 |
commit | 22dcda65372742bc9ae5ffdfdb32ff8ebfecc738 (patch) | |
tree | 0843b9121bca39f64850932cdd5d68ea42faf414 | |
parent | 0d917bb8957d9652f2e82e32678a4b2934079b9b (diff) |
Add support for filter-repo commit map to cleaner
Git filter-repo is preferred to BFG, but it's output is not identical.
Compatibility is added for filter-repo commit map to avoid the need to
manually process the commit map before uploading.
Differences between filter-repo and BFG output:
- header included in output by filter-repo
- unchanged commits included in output by filter-repo
These lines can be detected and ignored.
3 files changed, 49 insertions, 18 deletions
diff --git a/changelogs/unreleased/git-filter-repo-compatibility.yml b/changelogs/unreleased/git-filter-repo-compatibility.yml new file mode 100644 index 000000000..446f724db --- /dev/null +++ b/changelogs/unreleased/git-filter-repo-compatibility.yml @@ -0,0 +1,5 @@ +--- +title: Add support for filter-repo commit map to cleaner +merge_request: 2247 +author: +type: added diff --git a/internal/service/cleanup/apply_bfg_object_map_stream_test.go b/internal/service/cleanup/apply_bfg_object_map_stream_test.go index 2a004b905..7146ee877 100644 --- a/internal/service/cleanup/apply_bfg_object_map_stream_test.go +++ b/internal/service/cleanup/apply_bfg_object_map_stream_test.go @@ -46,12 +46,22 @@ func TestApplyBfgObjectMapStreamSuccess(t *testing.T) { testhelper.MustRunCommand(t, nil, "git", "-C", testRepoPath, "update-ref", ref, headCommit.Id) } + // Create some refs pointing to ref/tags/v1.0.0, simulating an unmodified + // commit that predates bad data being added to the repository. + for _, ref := range []string{ + "refs/environments/_keep", "refs/keep-around/_keep", "refs/merge-requests/_keep", + } { + testhelper.MustRunCommand(t, nil, "git", "-C", testRepoPath, "update-ref", ref, tagID) + } + + const filterRepoCommitMapHeader = "old new\n" objectMapData := fmt.Sprintf( - strings.Repeat("%s %s\n", 4), - headCommit.Id, headCommit.Id, + filterRepoCommitMapHeader+strings.Repeat("%s %s\n", 5), + headCommit.Id, git.NullSHA, git.NullSHA, blobID, git.NullSHA, tagID, - git.NullSHA, git.NullSHA, + blobID, git.NullSHA, + tagID, tagID, ) entries, err := doStreamingRequest(ctx, t, testRepo, client, objectMapData) @@ -65,13 +75,16 @@ func TestApplyBfgObjectMapStreamSuccess(t *testing.T) { assert.Contains(t, refs, "refs/heads/_keep") assert.Contains(t, refs, "refs/tags/_keep") assert.Contains(t, refs, "refs/notes/_keep") + assert.Contains(t, refs, "refs/environments/_keep") + assert.Contains(t, refs, "refs/keep-around/_keep") + assert.Contains(t, refs, "refs/merge-requests/_keep") // Ensure that the returned entry is correct require.Len(t, entries, 4, "wrong number of entries returned") - requireEntry(t, entries[0], headCommit.Id, headCommit.Id, gitalypb.ObjectType_COMMIT) + requireEntry(t, entries[0], headCommit.Id, git.NullSHA, gitalypb.ObjectType_COMMIT) requireEntry(t, entries[1], git.NullSHA, blobID, gitalypb.ObjectType_BLOB) requireEntry(t, entries[2], git.NullSHA, tagID, gitalypb.ObjectType_TAG) - requireEntry(t, entries[3], git.NullSHA, git.NullSHA, gitalypb.ObjectType_UNKNOWN) + requireEntry(t, entries[3], blobID, git.NullSHA, gitalypb.ObjectType_UNKNOWN) } func requireEntry(t *testing.T, entry *gitalypb.ApplyBfgObjectMapStreamResponse_Entry, oldOid, newOid string, objectType gitalypb.ObjectType) { diff --git a/internal/service/cleanup/internalrefs/cleaner.go b/internal/service/cleanup/internalrefs/cleaner.go index 5dd6889d4..d647bc3a4 100644 --- a/internal/service/cleanup/internalrefs/cleaner.go +++ b/internal/service/cleanup/internalrefs/cleaner.go @@ -21,14 +21,15 @@ var internalRefs = []string{ "refs/merge-requests/", } -// A ForEachFunc can be called for every entry in the BFG object map file that -// the cleaner is processing. Returning an error will stop the cleaner before -// it has processed the entry in question +// A ForEachFunc can be called for every entry in the filter-repo or BFG object +// map file that the cleaner is processing. Returning an error will stop the +// cleaner before it has processed the entry in question type ForEachFunc func(oldOID, newOID string, isInternalRef bool) error // Cleaner is responsible for updating the internal references in a repository -// as specified by a BFG object map. Currently, internal references pointing to -// a commit that has been rewritten will simply be removed. +// as specified by a filter-repo or BFG object map. Currently, internal +// references pointing to a commit that has been rewritten will simply be +// removed. type Cleaner struct { ctx context.Context forEach ForEachFunc @@ -42,8 +43,8 @@ type Cleaner struct { // map file is in the wrong format type ErrInvalidObjectMap error -// NewCleaner builds a new instance of Cleaner, which is used to apply a BFG -// object map to a repository. +// NewCleaner builds a new instance of Cleaner, which is used to apply a +// filter-repo or BFG object map to a repository. func NewCleaner(ctx context.Context, repo *gitalypb.Repository, forEach ForEachFunc) (*Cleaner, error) { table, err := buildLookupTable(ctx, repo) if err != nil { @@ -58,15 +59,20 @@ func NewCleaner(ctx context.Context, repo *gitalypb.Repository, forEach ForEachF return &Cleaner{ctx: ctx, table: table, updater: updater, forEach: forEach}, nil } -// ApplyObjectMap processes a BFG object map file, removing any internal -// references that point to a rewritten commit. +// ApplyObjectMap processes an object map file generated by git filter-repo, or +// BFG, removing any internal references that point to a rewritten commit. func (c *Cleaner) ApplyObjectMap(reader io.Reader) error { scanner := bufio.NewScanner(reader) for i := int64(0); scanner.Scan(); i++ { line := scanner.Text() + const filterRepoCommitMapHeader = "old new" + if line == filterRepoCommitMapHeader { + continue + } + // Each line consists of two SHAs: the SHA of the original object, and - // the SHA of a replacement object in the new history built by BFG. For + // the SHA of a replacement object in the new repository history. For // now, the new SHA is ignored, but it may be used to rewrite (rather // than remove) some references in the future. shas := strings.SplitN(line, " ", 2) @@ -75,6 +81,13 @@ func (c *Cleaner) ApplyObjectMap(reader io.Reader) error { return ErrInvalidObjectMap(fmt.Errorf("object map invalid at line %d", i)) } + // References to unchanged objects do not need to be removed. When the old + // SHA and new SHA are the same, this means the object was considered but + // not modified. + if shas[0] == shas[1] { + continue + } + if err := c.processEntry(shas[0], shas[1]); err != nil { return err } @@ -115,9 +128,9 @@ func (c *Cleaner) processEntry(oldSHA, newSHA string) error { // may point to the same SHA. // // The lookup table is necessary to efficiently check which references point to -// an object that has been rewritten by the BFG (and so require action). It is -// consulted once per line in the object map. Git is optimized for ref -> SHA -// lookups, but we want the opposite! +// an object that has been rewritten by the filter-repo or BFG (and so require +// action). It is consulted once per line in the object map. Git is optimized +// for ref -> SHA lookups, but we want the opposite! func buildLookupTable(ctx context.Context, repo *gitalypb.Repository) (map[string][]string, error) { cmd, err := git.SafeCmd(ctx, repo, nil, git.SubCmd{ Name: "for-each-ref", |