diff options
author | John Cai <jcai@gitlab.com> | 2019-06-14 20:23:21 +0300 |
---|---|---|
committer | John Cai <jcai@gitlab.com> | 2019-06-14 20:23:21 +0300 |
commit | a4a0481e938763304f8e3be58f84093a81c4e8fa (patch) | |
tree | 4c1acb60b67ee8efd4820ba429cebfa884be9911 | |
parent | 7cbe010f500a1fc4a337d3169ca9fdfdafffa0ac (diff) | |
parent | 640584ebeabc0fa15998559b92a3a5ab214658b4 (diff) |
Merge branch 'jv-rescue-dangling' into 'master'
Un-dangle dangling objects in object pools
Closes #1716
See merge request gitlab-org/gitaly!1297
-rw-r--r-- | changelogs/unreleased/jv-rescue-dangling.yml | 5 | ||||
-rw-r--r-- | internal/git/objectpool/fetch.go | 62 | ||||
-rw-r--r-- | internal/git/objectpool/fetch_test.go | 88 | ||||
-rw-r--r-- | internal/git/updateref/updateref.go | 6 |
4 files changed, 160 insertions, 1 deletions
diff --git a/changelogs/unreleased/jv-rescue-dangling.yml b/changelogs/unreleased/jv-rescue-dangling.yml new file mode 100644 index 000000000..b0c1e859e --- /dev/null +++ b/changelogs/unreleased/jv-rescue-dangling.yml @@ -0,0 +1,5 @@ +--- +title: Un-dangle dangling objects in object pools +merge_request: 1297 +author: +type: changed diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go index 2b967ad70..30df30ba1 100644 --- a/internal/git/objectpool/fetch.go +++ b/internal/git/objectpool/fetch.go @@ -3,10 +3,14 @@ package objectpool import ( "bufio" "context" + "fmt" + "strings" "gitlab.com/gitlab-org/gitaly-proto/go/gitalypb" "gitlab.com/gitlab-org/gitaly/internal/command" "gitlab.com/gitlab-org/gitaly/internal/git" + "gitlab.com/gitlab-org/gitaly/internal/git/repository" + "gitlab.com/gitlab-org/gitaly/internal/git/updateref" "gitlab.com/gitlab-org/gitaly/internal/helper" ) @@ -64,5 +68,63 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *gitalypb.Repos if err := fetchCmd.Wait(); err != nil { return err } + + if err := rescueDanglingObjects(ctx, o); err != nil { + return err + } + + return nil +} + +const danglingObjectNamespace = "refs/dangling" + +// rescueDanglingObjects creates refs for all dangling objects if finds +// with `git fsck`, which converts those objects from "dangling" to +// "not-dangling". This guards against any object ever being deleted from +// a pool repository. This is a defense in depth against accidental use +// of `git prune`, which could remove Git objects that a pool member +// relies on. There is currently no way for us to reliably determine if +// an object is still used anywhere, so the only safe thing to do is to +// assume that every object _is_ used. +func rescueDanglingObjects(ctx context.Context, repo repository.GitRepo) error { + fsck, err := git.Command(ctx, repo, "fsck", "--connectivity-only", "--dangling") + if err != nil { + return err + } + + updater, err := updateref.New(ctx, repo) + if err != nil { + return err + } + + scanner := bufio.NewScanner(fsck) + for scanner.Scan() { + split := strings.SplitN(scanner.Text(), " ", 3) + if len(split) != 3 { + continue + } + + if split[0] != "dangling" { + continue + } + + ref := danglingObjectNamespace + "/" + split[2] + if err := updater.Create(ref, split[2]); err != nil { + return err + } + } + + if err := scanner.Err(); err != nil { + return err + } + + if err := fsck.Wait(); err != nil { + return fmt.Errorf("git fsck: %v", err) + } + + if err := updater.Wait(); err != nil { + return err + } + return nil } diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go new file mode 100644 index 000000000..85dfb144a --- /dev/null +++ b/internal/git/objectpool/fetch_test.go @@ -0,0 +1,88 @@ +package objectpool + +import ( + "crypto/rand" + "encoding/hex" + "fmt" + "io" + "strings" + "testing" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/internal/helper/text" + "gitlab.com/gitlab-org/gitaly/internal/testhelper" +) + +func TestFetchFromOriginDangling(t *testing.T) { + source, _, cleanup := testhelper.NewTestRepo(t) + defer cleanup() + + pool, err := NewObjectPool(source.StorageName, testhelper.NewTestObjectPoolName(t)) + require.NoError(t, err) + + ctx, cancel := testhelper.Context() + defer cancel() + + require.NoError(t, pool.FetchFromOrigin(ctx, source), "seed pool") + + const ( + existingTree = "07f8147e8e73aab6c935c296e8cdc5194dee729b" + existingCommit = "7975be0116940bf2ad4321f79d02a55c5f7779aa" + existingBlob = "c60514b6d3d6bf4bec1030f70026e34dfbd69ad5" + ) + + // We want to have some objects that are guaranteed to be dangling. Use + // random data to make each object unique. + nonceBytes := make([]byte, 4) + _, err = io.ReadFull(rand.Reader, nonceBytes) + require.NoError(t, err) + nonce := hex.EncodeToString(nonceBytes) + + baseArgs := []string{"-C", pool.FullPath()} + + // A blob with random contents should be unique. + newBlobArgs := append(baseArgs, "hash-object", "-t", "blob", "-w", "--stdin") + newBlob := text.ChompBytes(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newBlobArgs...)) + + // A tree with a randomly named blob entry should be unique. + newTreeArgs := append(baseArgs, "mktree") + newTreeStdin := strings.NewReader(fmt.Sprintf("100644 blob %s %s\n", existingBlob, nonce)) + newTree := text.ChompBytes(testhelper.MustRunCommand(t, newTreeStdin, "git", newTreeArgs...)) + + // A commit with a random message should be unique. + newCommitArgs := append(baseArgs, "commit-tree", existingTree) + newCommit := text.ChompBytes(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newCommitArgs...)) + + // A tag with random hex characters in its name should be unique. + newTagName := "tag-" + nonce + newTagArgs := append(baseArgs, "tag", "-m", "msg", "-a", newTagName, existingCommit) + testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newTagArgs...) + newTag := text.ChompBytes(testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "rev-parse", newTagName)...)) + + // `git tag` automatically creates a ref, so our new tag is not dangling. + // Deleting the ref should fix that. + testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "update-ref", "-d", "refs/tags/"+newTagName)...) + + fsckBefore := testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "fsck", "--connectivity-only", "--dangling")...) + fsckBeforeLines := strings.Split(string(fsckBefore), "\n") + + for _, l := range []string{ + fmt.Sprintf("dangling blob %s", newBlob), + fmt.Sprintf("dangling tree %s", newTree), + fmt.Sprintf("dangling commit %s", newCommit), + fmt.Sprintf("dangling tag %s", newTag), + } { + require.Contains(t, fsckBeforeLines, l, "test setup sanity check") + } + + // We expect this second run to convert the dangling objects into + // non-dangling objects. + require.NoError(t, pool.FetchFromOrigin(ctx, source), "second fetch") + + refsArgs := append(baseArgs, "for-each-ref", "--format=%(refname) %(objectname)") + refsAfter := testhelper.MustRunCommand(t, nil, "git", refsArgs...) + refsAfterLines := strings.Split(string(refsAfter), "\n") + for _, id := range []string{newBlob, newTree, newCommit, newTag} { + require.Contains(t, refsAfterLines, fmt.Sprintf("refs/dangling/%s %s", id, id)) + } +} diff --git a/internal/git/updateref/updateref.go b/internal/git/updateref/updateref.go index 01376be52..7df494b18 100644 --- a/internal/git/updateref/updateref.go +++ b/internal/git/updateref/updateref.go @@ -53,5 +53,9 @@ func (u *Updater) Delete(ref string) error { // Wait applies the commands specified in other calls to the Updater func (u *Updater) Wait() error { - return u.cmd.Wait() + if err := u.cmd.Wait(); err != nil { + return fmt.Errorf("git update-ref: %v", err) + } + + return nil } |