Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Vosmaer <jacob@gitlab.com>2019-06-14 20:23:21 +0300
committerJohn Cai <jcai@gitlab.com>2019-06-14 20:23:21 +0300
commit640584ebeabc0fa15998559b92a3a5ab214658b4 (patch)
tree4c1acb60b67ee8efd4820ba429cebfa884be9911
parent7cbe010f500a1fc4a337d3169ca9fdfdafffa0ac (diff)
Un-dangle dangling objects in object pools
-rw-r--r--changelogs/unreleased/jv-rescue-dangling.yml5
-rw-r--r--internal/git/objectpool/fetch.go62
-rw-r--r--internal/git/objectpool/fetch_test.go88
-rw-r--r--internal/git/updateref/updateref.go6
4 files changed, 160 insertions, 1 deletions
diff --git a/changelogs/unreleased/jv-rescue-dangling.yml b/changelogs/unreleased/jv-rescue-dangling.yml
new file mode 100644
index 000000000..b0c1e859e
--- /dev/null
+++ b/changelogs/unreleased/jv-rescue-dangling.yml
@@ -0,0 +1,5 @@
+---
+title: Un-dangle dangling objects in object pools
+merge_request: 1297
+author:
+type: changed
diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go
index 2b967ad70..30df30ba1 100644
--- a/internal/git/objectpool/fetch.go
+++ b/internal/git/objectpool/fetch.go
@@ -3,10 +3,14 @@ package objectpool
import (
"bufio"
"context"
+ "fmt"
+ "strings"
"gitlab.com/gitlab-org/gitaly-proto/go/gitalypb"
"gitlab.com/gitlab-org/gitaly/internal/command"
"gitlab.com/gitlab-org/gitaly/internal/git"
+ "gitlab.com/gitlab-org/gitaly/internal/git/repository"
+ "gitlab.com/gitlab-org/gitaly/internal/git/updateref"
"gitlab.com/gitlab-org/gitaly/internal/helper"
)
@@ -64,5 +68,63 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *gitalypb.Repos
if err := fetchCmd.Wait(); err != nil {
return err
}
+
+ if err := rescueDanglingObjects(ctx, o); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+const danglingObjectNamespace = "refs/dangling"
+
+// rescueDanglingObjects creates refs for all dangling objects if finds
+// with `git fsck`, which converts those objects from "dangling" to
+// "not-dangling". This guards against any object ever being deleted from
+// a pool repository. This is a defense in depth against accidental use
+// of `git prune`, which could remove Git objects that a pool member
+// relies on. There is currently no way for us to reliably determine if
+// an object is still used anywhere, so the only safe thing to do is to
+// assume that every object _is_ used.
+func rescueDanglingObjects(ctx context.Context, repo repository.GitRepo) error {
+ fsck, err := git.Command(ctx, repo, "fsck", "--connectivity-only", "--dangling")
+ if err != nil {
+ return err
+ }
+
+ updater, err := updateref.New(ctx, repo)
+ if err != nil {
+ return err
+ }
+
+ scanner := bufio.NewScanner(fsck)
+ for scanner.Scan() {
+ split := strings.SplitN(scanner.Text(), " ", 3)
+ if len(split) != 3 {
+ continue
+ }
+
+ if split[0] != "dangling" {
+ continue
+ }
+
+ ref := danglingObjectNamespace + "/" + split[2]
+ if err := updater.Create(ref, split[2]); err != nil {
+ return err
+ }
+ }
+
+ if err := scanner.Err(); err != nil {
+ return err
+ }
+
+ if err := fsck.Wait(); err != nil {
+ return fmt.Errorf("git fsck: %v", err)
+ }
+
+ if err := updater.Wait(); err != nil {
+ return err
+ }
+
return nil
}
diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go
new file mode 100644
index 000000000..85dfb144a
--- /dev/null
+++ b/internal/git/objectpool/fetch_test.go
@@ -0,0 +1,88 @@
+package objectpool
+
+import (
+ "crypto/rand"
+ "encoding/hex"
+ "fmt"
+ "io"
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/internal/helper/text"
+ "gitlab.com/gitlab-org/gitaly/internal/testhelper"
+)
+
+func TestFetchFromOriginDangling(t *testing.T) {
+ source, _, cleanup := testhelper.NewTestRepo(t)
+ defer cleanup()
+
+ pool, err := NewObjectPool(source.StorageName, testhelper.NewTestObjectPoolName(t))
+ require.NoError(t, err)
+
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+
+ require.NoError(t, pool.FetchFromOrigin(ctx, source), "seed pool")
+
+ const (
+ existingTree = "07f8147e8e73aab6c935c296e8cdc5194dee729b"
+ existingCommit = "7975be0116940bf2ad4321f79d02a55c5f7779aa"
+ existingBlob = "c60514b6d3d6bf4bec1030f70026e34dfbd69ad5"
+ )
+
+ // We want to have some objects that are guaranteed to be dangling. Use
+ // random data to make each object unique.
+ nonceBytes := make([]byte, 4)
+ _, err = io.ReadFull(rand.Reader, nonceBytes)
+ require.NoError(t, err)
+ nonce := hex.EncodeToString(nonceBytes)
+
+ baseArgs := []string{"-C", pool.FullPath()}
+
+ // A blob with random contents should be unique.
+ newBlobArgs := append(baseArgs, "hash-object", "-t", "blob", "-w", "--stdin")
+ newBlob := text.ChompBytes(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newBlobArgs...))
+
+ // A tree with a randomly named blob entry should be unique.
+ newTreeArgs := append(baseArgs, "mktree")
+ newTreeStdin := strings.NewReader(fmt.Sprintf("100644 blob %s %s\n", existingBlob, nonce))
+ newTree := text.ChompBytes(testhelper.MustRunCommand(t, newTreeStdin, "git", newTreeArgs...))
+
+ // A commit with a random message should be unique.
+ newCommitArgs := append(baseArgs, "commit-tree", existingTree)
+ newCommit := text.ChompBytes(testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newCommitArgs...))
+
+ // A tag with random hex characters in its name should be unique.
+ newTagName := "tag-" + nonce
+ newTagArgs := append(baseArgs, "tag", "-m", "msg", "-a", newTagName, existingCommit)
+ testhelper.MustRunCommand(t, strings.NewReader(nonce), "git", newTagArgs...)
+ newTag := text.ChompBytes(testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "rev-parse", newTagName)...))
+
+ // `git tag` automatically creates a ref, so our new tag is not dangling.
+ // Deleting the ref should fix that.
+ testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "update-ref", "-d", "refs/tags/"+newTagName)...)
+
+ fsckBefore := testhelper.MustRunCommand(t, nil, "git", append(baseArgs, "fsck", "--connectivity-only", "--dangling")...)
+ fsckBeforeLines := strings.Split(string(fsckBefore), "\n")
+
+ for _, l := range []string{
+ fmt.Sprintf("dangling blob %s", newBlob),
+ fmt.Sprintf("dangling tree %s", newTree),
+ fmt.Sprintf("dangling commit %s", newCommit),
+ fmt.Sprintf("dangling tag %s", newTag),
+ } {
+ require.Contains(t, fsckBeforeLines, l, "test setup sanity check")
+ }
+
+ // We expect this second run to convert the dangling objects into
+ // non-dangling objects.
+ require.NoError(t, pool.FetchFromOrigin(ctx, source), "second fetch")
+
+ refsArgs := append(baseArgs, "for-each-ref", "--format=%(refname) %(objectname)")
+ refsAfter := testhelper.MustRunCommand(t, nil, "git", refsArgs...)
+ refsAfterLines := strings.Split(string(refsAfter), "\n")
+ for _, id := range []string{newBlob, newTree, newCommit, newTag} {
+ require.Contains(t, refsAfterLines, fmt.Sprintf("refs/dangling/%s %s", id, id))
+ }
+}
diff --git a/internal/git/updateref/updateref.go b/internal/git/updateref/updateref.go
index 01376be52..7df494b18 100644
--- a/internal/git/updateref/updateref.go
+++ b/internal/git/updateref/updateref.go
@@ -53,5 +53,9 @@ func (u *Updater) Delete(ref string) error {
// Wait applies the commands specified in other calls to the Updater
func (u *Updater) Wait() error {
- return u.cmd.Wait()
+ if err := u.cmd.Wait(); err != nil {
+ return fmt.Errorf("git update-ref: %v", err)
+ }
+
+ return nil
}