diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-05-02 11:18:51 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-05-02 11:18:51 +0300 |
commit | d8107b6c1e949b499bf56833fd7931633c4652a1 (patch) | |
tree | 79a23827491759d2a93d6e03f9aee4e931c76c09 | |
parent | c47bb5beba81703b7c2bb2b3d1c138cb667fa80c (diff) | |
parent | c89c8002131dabee677cce9c0551801a8e272570 (diff) |
Merge branch 'pks-git-core.fsync-compatibility' into 'master'
git: Fix core.fsync incompatibility and fix corruption of loose references
Closes #4117 and #4125
See merge request gitlab-org/gitaly!4498
-rw-r--r-- | internal/git/command_factory.go | 87 | ||||
-rw-r--r-- | internal/git/command_factory_test.go | 8 | ||||
-rw-r--r-- | internal/git/gittest/intercepting_command_factory.go | 68 | ||||
-rw-r--r-- | internal/git/gittest/intercepting_command_factory_test.go | 64 | ||||
-rw-r--r-- | internal/git/version.go | 6 | ||||
-rw-r--r-- | internal/git/version_test.go | 24 | ||||
-rw-r--r-- | internal/middleware/commandstatshandler/commandstatshandler_test.go | 11 |
7 files changed, 232 insertions, 36 deletions
diff --git a/internal/git/command_factory.go b/internal/git/command_factory.go index 3a5c58bac..a9544a46c 100644 --- a/internal/git/command_factory.go +++ b/internal/git/command_factory.go @@ -20,29 +20,6 @@ import ( "gitlab.com/gitlab-org/gitaly/v14/internal/metadata/featureflag" ) -var globalOptions = []GlobalOption{ - // Synchronize object files to lessen the likelihood of - // repository corruption in case the server crashes. - ConfigPair{Key: "core.fsyncObjectFiles", Value: "true"}, - - // Disable automatic garbage collection as we handle scheduling - // of it ourselves. - ConfigPair{Key: "gc.auto", Value: "0"}, - - // CRLF line endings will get replaced with LF line endings - // when writing blobs to the object database. No conversion is - // done when reading blobs from the object database. This is - // required for the web editor. - ConfigPair{Key: "core.autocrlf", Value: "input"}, - - // Git allows the use of replace refs, where a given object ID can be replaced with a - // different one. The result is that Git commands would use the new object instead of the - // old one in almost all contexts. This is a security threat: an adversary may use this - // mechanism to replace malicious commits with seemingly benign ones. We thus globally - // disable this mechanism. - ConfigPair{Key: "core.useReplaceRefs", Value: "false"}, -} - // CommandFactory is designed to create and run git commands in a protected and fully managed manner. type CommandFactory interface { // New creates a new command for the repo repository. @@ -317,10 +294,12 @@ func (cf *ExecCommandFactory) GitVersion(ctx context.Context) (Version, error) { cf.cachedGitVersionLock.Lock() defer cf.cachedGitVersionLock.Unlock() + execEnv := cf.GetExecutionEnvironment(ctx) + // We cannot reuse the stat(3P) information from above given that it wasn't acquired under // the write-lock. As such, it may have been invalidated by a concurrent thread which has // already updated the Git version information. - stat, err = os.Stat(cf.GetExecutionEnvironment(ctx).BinaryPath) + stat, err = os.Stat(execEnv.BinaryPath) if err != nil { return Version{}, fmt.Errorf("cannot stat Git binary: %w", err) } @@ -330,9 +309,11 @@ func (cf *ExecCommandFactory) GitVersion(ctx context.Context) (Version, error) { // though: it can also happen after `GitVersion()` was called, so it doesn't really help to // retry version detection here. Instead, we just live with this raciness -- the next call // to `GitVersion()` would detect the version being out-of-date anyway and thus correct it. - cmd, err := cf.NewWithoutRepo(ctx, SubCmd{ - Name: "version", - }) + // + // Furthermore, note that we're not using `newCommand()` but instead hand-craft the command. + // This is required to avoid a cyclic dependency when we need to check the version in + // `newCommand()` itself. + cmd, err := command.New(ctx, exec.Command(execEnv.BinaryPath, "version"), nil, nil, nil, execEnv.EnvironmentVariables...) if err != nil { return Version{}, fmt.Errorf("spawning version command: %w", err) } @@ -342,6 +323,10 @@ func (cf *ExecCommandFactory) GitVersion(ctx context.Context) (Version, error) { return Version{}, err } + if err := cmd.Wait(); err != nil { + return Version{}, fmt.Errorf("waiting for version: %w", err) + } + cf.cachedGitVersionByBinary[gitBinary] = cachedGitVersion{ version: gitVersion, stat: stat, @@ -436,6 +421,14 @@ func (cf *ExecCommandFactory) combineArgs(ctx context.Context, gitConfig []confi return nil, fmt.Errorf("invalid sub command name %q: %w", sc.Subcommand(), ErrInvalidArg) } + // It's fine to ask for the Git version whenever we spawn a command: the value is cached + // nowadays, so this would typically only boil down to a single stat(3P) call to determine + // whether the cache is stale or not. + gitVersion, err := cf.GitVersion(ctx) + if err != nil { + return nil, fmt.Errorf("determining Git version: %w", err) + } + // As global options may cancel out each other, we have a clearly defined order in which // globals get applied. The order is similar to how git handles configuration options from // most general to most specific. This allows callsites to override options which would @@ -448,8 +441,44 @@ func (cf *ExecCommandFactory) combineArgs(ctx context.Context, gitConfig []confi // 3. Globals passed via command options, e.g. as set up by // `WithReftxHook()`. // 4. Configuration as provided by the admin in Gitaly's config.toml. - var combinedGlobals []GlobalOption - combinedGlobals = append(combinedGlobals, globalOptions...) + combinedGlobals := []GlobalOption{ + // Disable automatic garbage collection as we handle scheduling + // of it ourselves. + ConfigPair{Key: "gc.auto", Value: "0"}, + + // CRLF line endings will get replaced with LF line endings + // when writing blobs to the object database. No conversion is + // done when reading blobs from the object database. This is + // required for the web editor. + ConfigPair{Key: "core.autocrlf", Value: "input"}, + + // Git allows the use of replace refs, where a given object ID can be replaced with a + // different one. The result is that Git commands would use the new object instead of the + // old one in almost all contexts. This is a security threat: an adversary may use this + // mechanism to replace malicious commits with seemingly benign ones. We thus globally + // disable this mechanism. + ConfigPair{Key: "core.useReplaceRefs", Value: "false"}, + } + + // Git v2.36.0 introduced new fine-grained configuration for what data should be fsynced and + // how that should happen. + if gitVersion.HasGranularFsyncConfig() { + combinedGlobals = append( + combinedGlobals, + // This is the same as below, but in addition we're also syncing packed-refs + // and loose refs to disk. This fixes a long-standing issue we've had where + // hard reboots of a server could end up corrupting loose references. + ConfigPair{Key: "core.fsync", Value: "objects,derived-metadata,reference"}, + ConfigPair{Key: "core.fsyncMethod", Value: "fsync"}, + ) + } else { + // Synchronize object files to lessen the likelihood of + // repository corruption in case the server crashes. + combinedGlobals = append( + combinedGlobals, ConfigPair{Key: "core.fsyncObjectFiles", Value: "true"}, + ) + } + combinedGlobals = append(combinedGlobals, commandDescription.opts...) combinedGlobals = append(combinedGlobals, cc.globals...) for _, configPair := range gitConfig { diff --git a/internal/git/command_factory_test.go b/internal/git/command_factory_test.go index 438e23adc..e9eee0a40 100644 --- a/internal/git/command_factory_test.go +++ b/internal/git/command_factory_test.go @@ -359,7 +359,9 @@ func TestExecCommandFactory_GitVersion(t *testing.T) { } { t.Run(tc.desc, func(t *testing.T) { gitCmdFactory := gittest.NewInterceptingCommandFactory( - ctx, t, testcfg.Build(t), generateVersionScript(tc.versionString), git.WithSkipHooks(), + ctx, t, testcfg.Build(t), generateVersionScript(tc.versionString), + gittest.WithRealCommandFactoryOptions(git.WithSkipHooks()), + gittest.WithInterceptedVersion(), ) actualVersion, err := gitCmdFactory.GitVersion(ctx) @@ -374,7 +376,9 @@ func TestExecCommandFactory_GitVersion(t *testing.T) { t.Run("caching", func(t *testing.T) { gitCmdFactory := gittest.NewInterceptingCommandFactory( - ctx, t, testcfg.Build(t), generateVersionScript("git version 1.2.3"), git.WithSkipHooks(), + ctx, t, testcfg.Build(t), generateVersionScript("git version 1.2.3"), + gittest.WithRealCommandFactoryOptions(git.WithSkipHooks()), + gittest.WithInterceptedVersion(), ) gitPath := gitCmdFactory.GetExecutionEnvironment(ctx).BinaryPath diff --git a/internal/git/gittest/intercepting_command_factory.go b/internal/git/gittest/intercepting_command_factory.go index 46b06f721..f68a3bca4 100644 --- a/internal/git/gittest/intercepting_command_factory.go +++ b/internal/git/gittest/intercepting_command_factory.go @@ -2,6 +2,7 @@ package gittest import ( "context" + "fmt" "path/filepath" "testing" @@ -19,6 +20,33 @@ var _ git.CommandFactory = &InterceptingCommandFactory{} type InterceptingCommandFactory struct { realCommandFactory git.CommandFactory interceptingCommandFactory git.CommandFactory + interceptVersion bool +} + +type interceptingCommandFactoryConfig struct { + opts []git.ExecCommandFactoryOption + interceptVersion bool +} + +// InterceptingCommandFactoryOption is an option that can be passed to +// NewInterceptingCommandFactory. +type InterceptingCommandFactoryOption func(*interceptingCommandFactoryConfig) + +// WithRealCommandFactoryOptions is an option that allows the caller to pass options to the real +// git.ExecCommandFactory. +func WithRealCommandFactoryOptions(opts ...git.ExecCommandFactoryOption) InterceptingCommandFactoryOption { + return func(cfg *interceptingCommandFactoryConfig) { + cfg.opts = opts + } +} + +// WithInterceptedVersion will cause `GitVersion()` to use the intercepting Git command factory +// instead of the real Git command factory. This allows the caller to explicitly test version +// detection. +func WithInterceptedVersion() InterceptingCommandFactoryOption { + return func(cfg *interceptingCommandFactoryConfig) { + cfg.interceptVersion = true + } } // NewInterceptingCommandFactory creates a new command factory which intercepts Git commands. The @@ -30,21 +58,50 @@ func NewInterceptingCommandFactory( tb testing.TB, cfg config.Cfg, generateScript func(git.ExecutionEnvironment) string, - opts ...git.ExecCommandFactoryOption, + opts ...InterceptingCommandFactoryOption, ) *InterceptingCommandFactory { + var interceptingCommandFactoryCfg interceptingCommandFactoryConfig + for _, opt := range opts { + opt(&interceptingCommandFactoryCfg) + } + // We create two different command factories. The first one will set up the execution // environment such that we can deterministically resolve the Git binary path as well as // required environment variables for both bundled and non-bundled Git. The second one will // then use a separate config which overrides the Git binary path to point to a custom // script supplied by the user. - gitCmdFactory := NewCommandFactory(tb, cfg, opts...) + gitCmdFactory := NewCommandFactory(tb, cfg, interceptingCommandFactoryCfg.opts...) + execEnv := gitCmdFactory.GetExecutionEnvironment(ctx) scriptPath := filepath.Join(testhelper.TempDir(tb), "git") - testhelper.WriteExecutable(tb, scriptPath, []byte(generateScript(gitCmdFactory.GetExecutionEnvironment(ctx)))) + testhelper.WriteExecutable(tb, scriptPath, []byte(generateScript(execEnv))) + + // In case the user requested us to not intercept the Git version we need to write another + // wrapper script. This wrapper script detects whether git-version(1) is executed and, if + // so, instead executes the real Git binary. Otherwise, it executes the Git script as + // provided by the user. + // + // This is required in addition to the interception of `GitVersion()` itself so that calls + // to `interceptingCommandFactory.GitVersion()` also return the correct results whenever the + // factory calls its own `GitVersion()` function. + if !interceptingCommandFactoryCfg.interceptVersion { + wrapperScriptPath := filepath.Join(testhelper.TempDir(tb), "git") + testhelper.WriteExecutable(tb, wrapperScriptPath, []byte(fmt.Sprintf( + `#!/bin/bash + if test "$1" = "version" + then + exec %q "$@" + fi + exec %q "$@" + `, execEnv.BinaryPath, scriptPath))) + + scriptPath = wrapperScriptPath + } return &InterceptingCommandFactory{ realCommandFactory: gitCmdFactory, interceptingCommandFactory: NewCommandFactory(tb, cfg, git.WithGitBinaryPath(scriptPath)), + interceptVersion: interceptingCommandFactoryCfg.interceptVersion, } } @@ -86,5 +143,8 @@ func (f *InterceptingCommandFactory) HooksPath(ctx context.Context) string { // GitVersion returns the Git version as returned by the intercepted command. func (f *InterceptingCommandFactory) GitVersion(ctx context.Context) (git.Version, error) { - return f.interceptingCommandFactory.GitVersion(ctx) + if f.interceptVersion { + return f.interceptingCommandFactory.GitVersion(ctx) + } + return f.realCommandFactory.GitVersion(ctx) } diff --git a/internal/git/gittest/intercepting_command_factory_test.go b/internal/git/gittest/intercepting_command_factory_test.go index f3005f86b..d326393ea 100644 --- a/internal/git/gittest/intercepting_command_factory_test.go +++ b/internal/git/gittest/intercepting_command_factory_test.go @@ -58,3 +58,67 @@ func TestInterceptingCommandFactory(t *testing.T) { require.Equal(t, expectedString, stdout.String()) }) } + +func TestInterceptingCommandFactory_GitVersion(t *testing.T) { + cfg, _, _ := setup(t) + ctx := testhelper.Context(t) + + generateVersionScript := func(execEnv git.ExecutionEnvironment) string { + return `#!/usr/bin/env bash + echo "git version 1.2.3" + ` + } + + // Obtain the real Git version so that we can compare that it matches what we expect. + realFactory, cleanup, err := git.NewExecCommandFactory(cfg) + require.NoError(t, err) + defer cleanup() + + realVersion, err := realFactory.GitVersion(ctx) + require.NoError(t, err) + + // Furthermore, we need to obtain the intercepted version here because we cannot construct + // `git.Version` structs ourselves. + fakeVersion, err := NewInterceptingCommandFactory(ctx, t, cfg, generateVersionScript, WithInterceptedVersion()).GitVersion(ctx) + require.NoError(t, err) + require.Equal(t, "1.2.3", fakeVersion.String()) + + for _, tc := range []struct { + desc string + opts []InterceptingCommandFactoryOption + expectedVersion git.Version + }{ + { + desc: "without version interception", + expectedVersion: realVersion, + }, + { + desc: "with version interception", + opts: []InterceptingCommandFactoryOption{ + WithInterceptedVersion(), + }, + expectedVersion: fakeVersion, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + factory := NewInterceptingCommandFactory(ctx, t, cfg, generateVersionScript, tc.opts...) + + version, err := factory.GitVersion(ctx) + require.NoError(t, err) + require.Equal(t, tc.expectedVersion, version) + + // The real command factory should always return the real Git version. + version, err = factory.realCommandFactory.GitVersion(ctx) + require.NoError(t, err) + require.Equal(t, realVersion, version) + + // On the other hand, the intercepting command factory should return + // different versions depending on whether the version is intercepted or + // not. This is required such that it correctly handles version checks in + // case it calls `GitVersion()` on itself. + version, err = factory.interceptingCommandFactory.GitVersion(ctx) + require.NoError(t, err) + require.Equal(t, tc.expectedVersion, version) + }) + } +} diff --git a/internal/git/version.go b/internal/git/version.go index 981dce5e1..8f4966347 100644 --- a/internal/git/version.go +++ b/internal/git/version.go @@ -84,6 +84,12 @@ func (v Version) FlushesUpdaterefStatus() bool { }) } +// HasGranularFsyncConfig determines whether the given Git version supports the new granular fsync +// configuration via `core.fsync`. This new config has been introduced with Git v2.36.0. +func (v Version) HasGranularFsyncConfig() bool { + return !v.LessThan(Version{major: 2, minor: 36, rc: true}) +} + // LessThan determines whether the version is older than another version. func (v Version) LessThan(other Version) bool { switch { diff --git a/internal/git/version_test.go b/internal/git/version_test.go index 55bbc0ff3..6fe8c89b8 100644 --- a/internal/git/version_test.go +++ b/internal/git/version_test.go @@ -147,3 +147,27 @@ func TestVersion_FlushesUpdaterefStatus(t *testing.T) { }) } } + +func TestVersion_HasGranularFsyncConfig(t *testing.T) { + for _, tc := range []struct { + version string + expect bool + }{ + {"2.35.0", false}, + {"2.35.0-rc0", false}, + {"2.35.1", false}, + {"2.35.1.gl3", false}, + {"2.36.0-rc1", true}, + {"2.36.0", true}, + {"2.36.0.gl1", true}, + {"2.36.1", true}, + {"2.37.1", true}, + {"3.0.0", true}, + } { + t.Run(tc.version, func(t *testing.T) { + version, err := parseVersion(tc.version) + require.NoError(t, err) + require.Equal(t, tc.expect, version.HasGranularFsyncConfig()) + }) + } +} diff --git a/internal/middleware/commandstatshandler/commandstatshandler_test.go b/internal/middleware/commandstatshandler/commandstatshandler_test.go index 9289eed11..9369cbb8e 100644 --- a/internal/middleware/commandstatshandler/commandstatshandler_test.go +++ b/internal/middleware/commandstatshandler/commandstatshandler_test.go @@ -99,7 +99,16 @@ func TestInterceptor(t *testing.T) { require.NoError(t, err) }, expectedLogData: map[string]interface{}{ - "command.count": 1, + // Until we bump our minimum required Git version to v2.36.0 we are + // forced to carry a version check whenever we spawn Git commands. + // The command count here is thus 2 because of the additional + // git-version(1) command. Note that the next command count remains + // 1 though: the version is cached, and consequentially we don't + // re-run git-version(1). + // + // This test will break again as soon as we bump the minimum Git + // version and thus remove the version check. + "command.count": 2, }, }, { |