diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-12-27 10:56:55 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2023-01-06 14:01:35 +0300 |
commit | ed2b02c589220d26850d3540a0cadb97614dfa95 (patch) | |
tree | 440ee0ae5bec7a960a26182e2d32b8f62749e0e2 | |
parent | c5c45cfacccea3fc99e562486489014125507684 (diff) |
diff: Sort out behaviour of GetPatchID with binary diffs
When computing patch IDs in the GetPatchID RPC we pipe the output of
`git diff $oldrev $newrev` into `git patch-id --stable`. As we don't
pass the `--binary` flag to git-diff(1), the diff would thus look like
the following for a binary diff:
diff --git a/file b/file
index 0858941..b52a4cb 100644
Binary files a/file and b/file differ
Notably, the diff only contains a reference to the pre- and post-image
blob IDs, but it does not contain the actual binary diff. And this is
good as computing binary diffs can be quite expensive, so we ultimately
don't want to pass `--binary`. So ideally, Git would just fall back to
computing the patch ID based on the blob IDs.
As it turns out though this wasn't the case until Git v2.39.0, which has
only just been released two weeks ago at the point of writing this. In
previous versions Git would have computed the same patch ID for all such
binary diffs regardless of the pre- and post-image blob IDs. This is of
course not what we want in the context of GetPatchID, which is going to
be used to determine whether there are effective changes between two
versions of the same merge request.
This bug was fixed upstream via 0df19eb9d9 (builtin: patch-id: fix
patch-id with binary diffs, 2022-10-24), which subsequently broke our CI
as we now have different patch IDs depending on the Git version.
While we cannot help the fact that Git before v2.39.0 is broken, we can
at least document this in our codebase and double down on the expected
new behaviour. This is done by explicitly adding the `--full-index` flag
to git-diff(1) as a means to state that we intend to compute binary
diffs via the pre- and post-image blobs instead of using the binary
diffs.
Changelog: fixed
-rw-r--r-- | internal/git/version.go | 9 | ||||
-rw-r--r-- | internal/git/version_test.go | 18 | ||||
-rw-r--r-- | internal/gitaly/service/diff/patch_id.go | 12 | ||||
-rw-r--r-- | internal/gitaly/service/diff/patch_id_test.go | 90 |
4 files changed, 129 insertions, 0 deletions
diff --git a/internal/git/version.go b/internal/git/version.go index 6dc533726..ee44ddd32 100644 --- a/internal/git/version.go +++ b/internal/git/version.go @@ -73,6 +73,15 @@ func (v Version) IsSupported() bool { return !v.LessThan(minimumVersion) } +// PatchIDRespectsBinaries detects whether the given Git version correctly handles binary diffs when +// computing a patch ID. Previous to Git v2.39.0, git-patch-id(1) just completely ignored any binary +// diffs and thus would consider two diffs the same even if a binary changed. +func (v Version) PatchIDRespectsBinaries() bool { + return !v.LessThan(Version{ + major: 2, minor: 39, patch: 0, + }) +} + // LessThan determines whether the version is older than another version. func (v Version) LessThan(other Version) bool { switch { diff --git a/internal/git/version_test.go b/internal/git/version_test.go index 08f3e9a6e..ea0777696 100644 --- a/internal/git/version_test.go +++ b/internal/git/version_test.go @@ -124,3 +124,21 @@ func TestVersion_IsSupported(t *testing.T) { }) } } + +func TestVersion_PatchIDRespectsBinaries(t *testing.T) { + for _, tc := range []struct { + version string + expect bool + }{ + {"1.0.0", false}, + {"2.38.2", false}, + {"2.39.0", true}, + {"3.0.0", true}, + } { + t.Run(tc.version, func(t *testing.T) { + version, err := parseVersion(tc.version) + require.NoError(t, err) + require.Equal(t, tc.expect, version.PatchIDRespectsBinaries()) + }) + } +} diff --git a/internal/gitaly/service/diff/patch_id.go b/internal/gitaly/service/diff/patch_id.go index aa8e5f50a..4430e69ca 100644 --- a/internal/gitaly/service/diff/patch_id.go +++ b/internal/gitaly/service/diff/patch_id.go @@ -22,6 +22,18 @@ func (s *server) GetPatchID(ctx context.Context, in *gitalypb.GetPatchIDRequest) git.Command{ Name: "diff", Args: []string{string(in.GetOldRevision()), string(in.GetNewRevision())}, + Flags: []git.Option{ + // git-patch-id(1) will ignore binary diffs, and computing binary + // diffs would be expensive anyway for large blobs. This means that + // we must instead use the pre- and post-image blob IDs that + // git-diff(1) prints for binary diffs as input to git-patch-id(1), + // but unfortunately this is only honored in Git v2.39.0 and newer. + // We have no other choice than to accept this though, so we instead + // just ask git-diff(1) to print the full blob IDs for the pre- and + // post-image blobs instead of abbreviated ones so that we can avoid + // any kind of potential prefix collisions. + git.Flag{Name: "--full-index"}, + }, }, git.WithStderr(&diffCmdStderr), ) diff --git a/internal/gitaly/service/diff/patch_id_test.go b/internal/gitaly/service/diff/patch_id_test.go index eb443cf06..ef12951c6 100644 --- a/internal/gitaly/service/diff/patch_id_test.go +++ b/internal/gitaly/service/diff/patch_id_test.go @@ -5,6 +5,7 @@ import ( "strings" "testing" + "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v15/internal/structerr" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" @@ -17,6 +18,9 @@ func TestGetPatchID(t *testing.T) { ctx := testhelper.Context(t) cfg, client := setupDiffServiceWithoutRepo(t) + gitVersion, err := gittest.NewCommandFactory(t, cfg).GitVersion(ctx) + require.NoError(t, err) + testCases := []struct { desc string setup func(t *testing.T) *gitalypb.GetPatchIDRequest @@ -104,6 +108,92 @@ func TestGetPatchID(t *testing.T) { }, }, { + desc: "returns patch-id with binary file", + setup: func(t *testing.T) *gitalypb.GetPatchIDRequest { + repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg) + + oldCommit := gittest.WriteCommit(t, cfg, repoPath, + gittest.WithTreeEntries( + gittest.TreeEntry{Path: "binary", Mode: "100644", Content: "\000old"}, + ), + ) + newCommit := gittest.WriteCommit(t, cfg, repoPath, + gittest.WithTreeEntries( + gittest.TreeEntry{Path: "binary", Mode: "100644", Content: "\000new"}, + ), + ) + + return &gitalypb.GetPatchIDRequest{ + Repository: repoProto, + OldRevision: []byte(oldCommit), + NewRevision: []byte(newCommit), + } + }, + expectedResponse: &gitalypb.GetPatchIDResponse{ + PatchId: func() string { + // Before Git v2.39.0, git-patch-id(1) would skip over any + // lines that have an "index " prefix. This causes issues + // with diffs of binaries though: we don't generate the diff + // with `--binary`, so the "index" line that contains the + // pre- and post-image blob IDs of the binary is the only + // bit of information we have that something changed. But + // because Git used to skip over it we wouldn't actually + // take into account the contents of the changed blob at + // all. + // + // This was fixed in Git v2.39.0 so that "index" lines will + // now be hashed to correctly account for binary changes. As + // a result, the patch ID has changed. + if gitVersion.PatchIDRespectsBinaries() { + return "13e4e9b9cd44ec511bac24fdbdeab9b74ba3000b" + } + + return "715883c1b90a5b4450072e22fefec769ad346266" + }(), + }, + }, + { + // This test is essentially the same test set up as the preceding one, but + // with different binary contents. This is done to ensure that we indeed + // generate different patch IDs as expected. + desc: "different binary diff has different patch ID", + setup: func(t *testing.T) *gitalypb.GetPatchIDRequest { + repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg) + + oldCommit := gittest.WriteCommit(t, cfg, repoPath, + gittest.WithTreeEntries( + gittest.TreeEntry{Path: "binary", Mode: "100644", Content: "\000old2"}, + ), + ) + newCommit := gittest.WriteCommit(t, cfg, repoPath, + gittest.WithTreeEntries( + gittest.TreeEntry{Path: "binary", Mode: "100644", Content: "\000new2"}, + ), + ) + + return &gitalypb.GetPatchIDRequest{ + Repository: repoProto, + OldRevision: []byte(oldCommit), + NewRevision: []byte(newCommit), + } + }, + expectedResponse: &gitalypb.GetPatchIDResponse{ + PatchId: func() string { + if gitVersion.PatchIDRespectsBinaries() { + // When respecting binary diffs we indeed have a + // different patch ID compared to the preceding + // testcase. + return "f678855867b112ac2c5466260b3b3a5e75fca875" + } + + // But when git-patch-id(1) is not paying respect to binary + // diffs we incorrectly return the same patch ID. This is + // nothing we can easily fix though. + return "715883c1b90a5b4450072e22fefec769ad346266" + }(), + }, + }, + { desc: "file didn't exist in the old revision", setup: func(t *testing.T) *gitalypb.GetPatchIDRequest { repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg) |