Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-12-27 10:56:55 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2023-01-06 14:01:35 +0300
commited2b02c589220d26850d3540a0cadb97614dfa95 (patch)
tree440ee0ae5bec7a960a26182e2d32b8f62749e0e2
parentc5c45cfacccea3fc99e562486489014125507684 (diff)
diff: Sort out behaviour of GetPatchID with binary diffs
When computing patch IDs in the GetPatchID RPC we pipe the output of `git diff $oldrev $newrev` into `git patch-id --stable`. As we don't pass the `--binary` flag to git-diff(1), the diff would thus look like the following for a binary diff: diff --git a/file b/file index 0858941..b52a4cb 100644 Binary files a/file and b/file differ Notably, the diff only contains a reference to the pre- and post-image blob IDs, but it does not contain the actual binary diff. And this is good as computing binary diffs can be quite expensive, so we ultimately don't want to pass `--binary`. So ideally, Git would just fall back to computing the patch ID based on the blob IDs. As it turns out though this wasn't the case until Git v2.39.0, which has only just been released two weeks ago at the point of writing this. In previous versions Git would have computed the same patch ID for all such binary diffs regardless of the pre- and post-image blob IDs. This is of course not what we want in the context of GetPatchID, which is going to be used to determine whether there are effective changes between two versions of the same merge request. This bug was fixed upstream via 0df19eb9d9 (builtin: patch-id: fix patch-id with binary diffs, 2022-10-24), which subsequently broke our CI as we now have different patch IDs depending on the Git version. While we cannot help the fact that Git before v2.39.0 is broken, we can at least document this in our codebase and double down on the expected new behaviour. This is done by explicitly adding the `--full-index` flag to git-diff(1) as a means to state that we intend to compute binary diffs via the pre- and post-image blobs instead of using the binary diffs. Changelog: fixed
-rw-r--r--internal/git/version.go9
-rw-r--r--internal/git/version_test.go18
-rw-r--r--internal/gitaly/service/diff/patch_id.go12
-rw-r--r--internal/gitaly/service/diff/patch_id_test.go90
4 files changed, 129 insertions, 0 deletions
diff --git a/internal/git/version.go b/internal/git/version.go
index 6dc533726..ee44ddd32 100644
--- a/internal/git/version.go
+++ b/internal/git/version.go
@@ -73,6 +73,15 @@ func (v Version) IsSupported() bool {
return !v.LessThan(minimumVersion)
}
+// PatchIDRespectsBinaries detects whether the given Git version correctly handles binary diffs when
+// computing a patch ID. Previous to Git v2.39.0, git-patch-id(1) just completely ignored any binary
+// diffs and thus would consider two diffs the same even if a binary changed.
+func (v Version) PatchIDRespectsBinaries() bool {
+ return !v.LessThan(Version{
+ major: 2, minor: 39, patch: 0,
+ })
+}
+
// LessThan determines whether the version is older than another version.
func (v Version) LessThan(other Version) bool {
switch {
diff --git a/internal/git/version_test.go b/internal/git/version_test.go
index 08f3e9a6e..ea0777696 100644
--- a/internal/git/version_test.go
+++ b/internal/git/version_test.go
@@ -124,3 +124,21 @@ func TestVersion_IsSupported(t *testing.T) {
})
}
}
+
+func TestVersion_PatchIDRespectsBinaries(t *testing.T) {
+ for _, tc := range []struct {
+ version string
+ expect bool
+ }{
+ {"1.0.0", false},
+ {"2.38.2", false},
+ {"2.39.0", true},
+ {"3.0.0", true},
+ } {
+ t.Run(tc.version, func(t *testing.T) {
+ version, err := parseVersion(tc.version)
+ require.NoError(t, err)
+ require.Equal(t, tc.expect, version.PatchIDRespectsBinaries())
+ })
+ }
+}
diff --git a/internal/gitaly/service/diff/patch_id.go b/internal/gitaly/service/diff/patch_id.go
index aa8e5f50a..4430e69ca 100644
--- a/internal/gitaly/service/diff/patch_id.go
+++ b/internal/gitaly/service/diff/patch_id.go
@@ -22,6 +22,18 @@ func (s *server) GetPatchID(ctx context.Context, in *gitalypb.GetPatchIDRequest)
git.Command{
Name: "diff",
Args: []string{string(in.GetOldRevision()), string(in.GetNewRevision())},
+ Flags: []git.Option{
+ // git-patch-id(1) will ignore binary diffs, and computing binary
+ // diffs would be expensive anyway for large blobs. This means that
+ // we must instead use the pre- and post-image blob IDs that
+ // git-diff(1) prints for binary diffs as input to git-patch-id(1),
+ // but unfortunately this is only honored in Git v2.39.0 and newer.
+ // We have no other choice than to accept this though, so we instead
+ // just ask git-diff(1) to print the full blob IDs for the pre- and
+ // post-image blobs instead of abbreviated ones so that we can avoid
+ // any kind of potential prefix collisions.
+ git.Flag{Name: "--full-index"},
+ },
},
git.WithStderr(&diffCmdStderr),
)
diff --git a/internal/gitaly/service/diff/patch_id_test.go b/internal/gitaly/service/diff/patch_id_test.go
index eb443cf06..ef12951c6 100644
--- a/internal/gitaly/service/diff/patch_id_test.go
+++ b/internal/gitaly/service/diff/patch_id_test.go
@@ -5,6 +5,7 @@ import (
"strings"
"testing"
+ "github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest"
"gitlab.com/gitlab-org/gitaly/v15/internal/structerr"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
@@ -17,6 +18,9 @@ func TestGetPatchID(t *testing.T) {
ctx := testhelper.Context(t)
cfg, client := setupDiffServiceWithoutRepo(t)
+ gitVersion, err := gittest.NewCommandFactory(t, cfg).GitVersion(ctx)
+ require.NoError(t, err)
+
testCases := []struct {
desc string
setup func(t *testing.T) *gitalypb.GetPatchIDRequest
@@ -104,6 +108,92 @@ func TestGetPatchID(t *testing.T) {
},
},
{
+ desc: "returns patch-id with binary file",
+ setup: func(t *testing.T) *gitalypb.GetPatchIDRequest {
+ repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg)
+
+ oldCommit := gittest.WriteCommit(t, cfg, repoPath,
+ gittest.WithTreeEntries(
+ gittest.TreeEntry{Path: "binary", Mode: "100644", Content: "\000old"},
+ ),
+ )
+ newCommit := gittest.WriteCommit(t, cfg, repoPath,
+ gittest.WithTreeEntries(
+ gittest.TreeEntry{Path: "binary", Mode: "100644", Content: "\000new"},
+ ),
+ )
+
+ return &gitalypb.GetPatchIDRequest{
+ Repository: repoProto,
+ OldRevision: []byte(oldCommit),
+ NewRevision: []byte(newCommit),
+ }
+ },
+ expectedResponse: &gitalypb.GetPatchIDResponse{
+ PatchId: func() string {
+ // Before Git v2.39.0, git-patch-id(1) would skip over any
+ // lines that have an "index " prefix. This causes issues
+ // with diffs of binaries though: we don't generate the diff
+ // with `--binary`, so the "index" line that contains the
+ // pre- and post-image blob IDs of the binary is the only
+ // bit of information we have that something changed. But
+ // because Git used to skip over it we wouldn't actually
+ // take into account the contents of the changed blob at
+ // all.
+ //
+ // This was fixed in Git v2.39.0 so that "index" lines will
+ // now be hashed to correctly account for binary changes. As
+ // a result, the patch ID has changed.
+ if gitVersion.PatchIDRespectsBinaries() {
+ return "13e4e9b9cd44ec511bac24fdbdeab9b74ba3000b"
+ }
+
+ return "715883c1b90a5b4450072e22fefec769ad346266"
+ }(),
+ },
+ },
+ {
+ // This test is essentially the same test set up as the preceding one, but
+ // with different binary contents. This is done to ensure that we indeed
+ // generate different patch IDs as expected.
+ desc: "different binary diff has different patch ID",
+ setup: func(t *testing.T) *gitalypb.GetPatchIDRequest {
+ repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg)
+
+ oldCommit := gittest.WriteCommit(t, cfg, repoPath,
+ gittest.WithTreeEntries(
+ gittest.TreeEntry{Path: "binary", Mode: "100644", Content: "\000old2"},
+ ),
+ )
+ newCommit := gittest.WriteCommit(t, cfg, repoPath,
+ gittest.WithTreeEntries(
+ gittest.TreeEntry{Path: "binary", Mode: "100644", Content: "\000new2"},
+ ),
+ )
+
+ return &gitalypb.GetPatchIDRequest{
+ Repository: repoProto,
+ OldRevision: []byte(oldCommit),
+ NewRevision: []byte(newCommit),
+ }
+ },
+ expectedResponse: &gitalypb.GetPatchIDResponse{
+ PatchId: func() string {
+ if gitVersion.PatchIDRespectsBinaries() {
+ // When respecting binary diffs we indeed have a
+ // different patch ID compared to the preceding
+ // testcase.
+ return "f678855867b112ac2c5466260b3b3a5e75fca875"
+ }
+
+ // But when git-patch-id(1) is not paying respect to binary
+ // diffs we incorrectly return the same patch ID. This is
+ // nothing we can easily fix though.
+ return "715883c1b90a5b4450072e22fefec769ad346266"
+ }(),
+ },
+ },
+ {
desc: "file didn't exist in the old revision",
setup: func(t *testing.T) *gitalypb.GetPatchIDRequest {
repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg)