gitaly/config: Support generating configuration via external command

In the context of FIPS we need a mechanism that allows us to not store any credentials in plaintext format on-disk. Ideally, we would hook into a solution like AWS Secrets or Kubernetes Secrets that lets us retrieve the secrets at startup time so that they don't have to exist on disk in any form at all. But there exists a bunch of different solutions for secrets management, where many make sense in one context but don't in any other contexts. So implementing support for e.g. AWS Secrets directly would put us into a position where we now need to argue which solutions we want to support and which we don't. This shows that directly supporting any of them directly is not the right way to go, but that we should instead support a general schema that allows administrators to easily plug in whatever they are using without too much of a hassle. A simple and boring solution to this is to implement support for running an external command on startup that generates the secrets for us. Like this, an administrator can simply provide a script that runs e.g. `aws get-secret-value` and Gitaly would know to put the secrets into the correct spot in our configuration. But that again has another issue: there are several different fields that may contain secrets, and making sure they all can be adjusted might be tedious. To fix that, we simply generalize the idea: why only care about secrets, when we can instead provide a mechanism that allows the external command to generate the complete configuration which we then merge back into the initial configuration read from disk? The implementation of this is straight-forward: we read the initial configuration from disk, and when a specific key is set we use its value as the executable that is to be spawned. We expect the executable to generate JSON, which we then deserialize and thus merge into the config structure we have already parsed. Like this, the command is free to only generate configuration at runtime that it wants to actually change, and all the other configuration would stay as-is. A simple external executable generating secrets via the AWS command line client could thus look like this: #!/usr/bin/env ruby require 'json' JSON.generate({"gitlab": {"http_settings": {"password": `aws get-secret-value --secret-id ...`}}}) But theoretically-speaking, you can generate the complete configuration of Gitaly. Changelog: added
author: Patrick Steinhardt <psteinhardt@gitlab.com> 2023-03-17 10:44:35 +0300
committer: Patrick Steinhardt <psteinhardt@gitlab.com> 2023-03-20 14:46:17 +0300
commit: 0c21142ce287af817bf7d20d54378f25244fb7eb (patch)
tree: 0b319d049e93f4d1937072dde383c8a45580cba3
parent: 2175a03911136b364bf49c426cc5489e51822068 (diff)
2 files changed, 294 insertions, 0 deletions
diff --git a/internal/gitaly/config/config.go b/internal/gitaly/config/config.go
index 02659fd01..256e2ce6a 100644
--- a/internal/gitaly/config/config.go
+++ b/internal/gitaly/config/config.go
@@ -1,11 +1,13 @@
 package config
 
 import (
+	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"net"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"reflect"
 	"regexp"
@@ -51,6 +53,12 @@ type DailyJob struct {
 
 // Cfg is a container for all config derived from config.toml.
 type Cfg struct {
+	// ConfigCommand specifies the path to an executable that Gitaly will run after loading the
+	// initial configuration from disk. The executable is expected to write JSON-formatted
+	// configuration to its standard output that we will then deserialize and merge back into
+	// the initially-loaded configuration again. This is an easy mechanism to generate parts of
+	// the configuration at runtime, like for example secrets.
+	ConfigCommand          string              `toml:"config_command,omitempty" json:"config_command"`
 	SocketPath             string              `toml:"socket_path,omitempty" json:"socket_path" split_words:"true"`
 	ListenAddr             string              `toml:"listen_addr,omitempty" json:"listen_addr" split_words:"true"`
 	TLSListenAddr          string              `toml:"tls_listen_addr,omitempty" json:"tls_listen_addr" split_words:"true"`
@@ -280,6 +288,22 @@ func Load(file io.Reader) (Cfg, error) {
 		return Cfg{}, fmt.Errorf("load toml: %w", err)
 	}
 
+	if cfg.ConfigCommand != "" {
+		output, err := exec.Command(cfg.ConfigCommand).Output()
+		if err != nil {
+			var exitErr *exec.ExitError
+			if errors.As(err, &exitErr) {
+				return Cfg{}, fmt.Errorf("running config command: %w, stderr: %q", err, string(exitErr.Stderr))
+			}
+
+			return Cfg{}, fmt.Errorf("running config command: %w", err)
+		}
+
+		if err := json.Unmarshal(output, &cfg); err != nil {
+			return Cfg{}, fmt.Errorf("unmarshalling generated config: %w", err)
+		}
+	}
+
 	if err := cfg.setDefaults(); err != nil {
 		return Cfg{}, err
 	}
diff --git a/internal/gitaly/config/config_test.go b/internal/gitaly/config/config_test.go
index cb0342bb9..eebe460aa 100644
--- a/internal/gitaly/config/config_test.go
+++ b/internal/gitaly/config/config_test.go
@@ -1,6 +1,7 @@
 package config
 
 import (
+	"bytes"
 	"errors"
 	"fmt"
 	"os"
@@ -179,6 +180,275 @@ func TestLoadListenAddr(t *testing.T) {
 	require.Equal(t, ":8080", cfg.ListenAddr)
 }
 
+func TestLoadConfigCommand(t *testing.T) {
+	t.Parallel()
+
+	modifyDefaultConfig := func(modify func(cfg *Cfg)) Cfg {
+		cfg := &Cfg{
+			Prometheus: prometheus.DefaultConfig(),
+		}
+		require.NoError(t, cfg.setDefaults())
+		modify(cfg)
+		return *cfg
+	}
+
+	writeScript := func(t *testing.T, script string) string {
+		return testhelper.WriteExecutable(t,
+			filepath.Join(testhelper.TempDir(t), "script"),
+			[]byte("#!/bin/sh\n"+script),
+		)
+	}
+
+	type setupData struct {
+		cfg         Cfg
+		expectedErr string
+		expectedCfg Cfg
+	}
+
+	for _, tc := range []struct {
+		desc  string
+		setup func(t *testing.T) setupData
+	}{
+		{
+			desc: "nonexistent executable",
+			setup: func(t *testing.T) setupData {
+				return setupData{
+					cfg: Cfg{
+						ConfigCommand: "/does/not/exist",
+					},
+					expectedErr: "running config command: fork/exec /does/not/exist: no such file or directory",
+				}
+			},
+		},
+		{
+			desc: "command points to non-executable file",
+			setup: func(t *testing.T) setupData {
+				cmd := filepath.Join(testhelper.TempDir(t), "script")
+				require.NoError(t, os.WriteFile(cmd, nil, perm.PrivateFile))
+
+				return setupData{
+					cfg: Cfg{
+						ConfigCommand: cmd,
+					},
+					expectedErr: fmt.Sprintf(
+						"running config command: fork/exec %s: permission denied", cmd,
+					),
+				}
+			},
+		},
+		{
+			desc: "executable returns error",
+			setup: func(t *testing.T) setupData {
+				return setupData{
+					cfg: Cfg{
+						ConfigCommand: writeScript(t, "echo error >&2 && exit 1"),
+					},
+					expectedErr: "running config command: exit status 1, stderr: \"error\\n\"",
+				}
+			},
+		},
+		{
+			desc: "invalid JSON",
+			setup: func(t *testing.T) setupData {
+				return setupData{
+					cfg: Cfg{
+						ConfigCommand: writeScript(t, "echo 'this is not json'"),
+					},
+					expectedErr: "unmarshalling generated config: invalid character 'h' in literal true (expecting 'r')",
+				}
+			},
+		},
+		{
+			desc: "mixed stdout and stderr",
+			setup: func(t *testing.T) setupData {
+				// We want to verify that we're able to correctly parse the output
+				// even if the process writes to both its stdout and stderr.
+				cmd := writeScript(t, "echo error >&2 && echo '{}'")
+
+				return setupData{
+					cfg: Cfg{
+						ConfigCommand: cmd,
+					},
+					expectedCfg: modifyDefaultConfig(func(cfg *Cfg) {
+						cfg.ConfigCommand = cmd
+					}),
+				}
+			},
+		},
+		{
+			desc: "empty script",
+			setup: func(t *testing.T) setupData {
+				cmd := writeScript(t, "echo '{}'")
+
+				return setupData{
+					cfg: Cfg{
+						ConfigCommand: cmd,
+					},
+					expectedCfg: modifyDefaultConfig(func(cfg *Cfg) {
+						cfg.ConfigCommand = cmd
+					}),
+				}
+			},
+		},
+		{
+			desc: "unknown value",
+			setup: func(t *testing.T) setupData {
+				cmd := writeScript(t, `echo '{"key_does_not_exist":"value"}'`)
+
+				return setupData{
+					cfg: Cfg{
+						ConfigCommand: cmd,
+					},
+					expectedCfg: modifyDefaultConfig(func(cfg *Cfg) {
+						cfg.ConfigCommand = cmd
+					}),
+				}
+			},
+		},
+		{
+			desc: "generated value",
+			setup: func(t *testing.T) setupData {
+				cmd := writeScript(t, `echo '{"socket_path": "value"}'`)
+
+				return setupData{
+					cfg: Cfg{
+						ConfigCommand: cmd,
+					},
+					expectedCfg: modifyDefaultConfig(func(cfg *Cfg) {
+						cfg.ConfigCommand = cmd
+						cfg.SocketPath = "value"
+					}),
+				}
+			},
+		},
+		{
+			desc: "overridden value",
+			setup: func(t *testing.T) setupData {
+				cmd := writeScript(t, `echo '{"socket_path": "overridden_value"}'`)
+
+				return setupData{
+					cfg: Cfg{
+						ConfigCommand: cmd,
+						SocketPath:    "initial_value",
+					},
+					expectedCfg: modifyDefaultConfig(func(cfg *Cfg) {
+						cfg.ConfigCommand = cmd
+						cfg.SocketPath = "overridden_value"
+					}),
+				}
+			},
+		},
+		{
+			desc: "mixed configuration",
+			setup: func(t *testing.T) setupData {
+				cmd := writeScript(t, `echo '{"listen_addr": "listen_addr"}'`)
+
+				return setupData{
+					cfg: Cfg{
+						ConfigCommand: cmd,
+						SocketPath:    "socket_path",
+					},
+					expectedCfg: modifyDefaultConfig(func(cfg *Cfg) {
+						cfg.ConfigCommand = cmd
+						cfg.SocketPath = "socket_path"
+						cfg.ListenAddr = "listen_addr"
+					}),
+				}
+			},
+		},
+		{
+			desc: "unset storages",
+			setup: func(t *testing.T) setupData {
+				cmd := writeScript(t, `echo '{"storage": []}'`)
+
+				return setupData{
+					cfg: Cfg{
+						ConfigCommand: cmd,
+					},
+					expectedCfg: modifyDefaultConfig(func(cfg *Cfg) {
+						cfg.ConfigCommand = cmd
+						cfg.Storages = []Storage{}
+					}),
+				}
+			},
+		},
+		{
+			desc: "overridden storages",
+			setup: func(t *testing.T) setupData {
+				cmd := writeScript(t, `echo '{"storage": [
+					{"name": "overridden storage", "path": "/path/to/storage"}
+				]}'`)
+
+				return setupData{
+					cfg: Cfg{
+						ConfigCommand: cmd,
+						Storages: []Storage{
+							{Name: "initial storage"},
+						},
+					},
+					expectedCfg: modifyDefaultConfig(func(cfg *Cfg) {
+						cfg.ConfigCommand = cmd
+						cfg.Storages = []Storage{
+							{Name: "overridden storage", Path: "/path/to/storage"},
+						}
+						cfg.DailyMaintenance.Storages = []string{
+							"overridden storage",
+						}
+					}),
+				}
+			},
+		},
+		{
+			desc: "subsections are being merged",
+			setup: func(t *testing.T) setupData {
+				cmd := writeScript(t, `cat <<-EOF
+					{
+						"git": {
+							"signing_key": "signing_key"
+						}
+					}
+					EOF
+				`)
+
+				return setupData{
+					cfg: Cfg{
+						ConfigCommand: cmd,
+						Git: Git{
+							BinPath: "foo/bar",
+						},
+					},
+					expectedCfg: modifyDefaultConfig(func(cfg *Cfg) {
+						cfg.ConfigCommand = cmd
+						cfg.Git.BinPath = "foo/bar"
+						cfg.Git.SigningKey = "signing_key"
+					}),
+				}
+			},
+		},
+	} {
+		tc := tc
+
+		t.Run(tc.desc, func(t *testing.T) {
+			t.Parallel()
+
+			setup := tc.setup(t)
+
+			var cfgBuffer bytes.Buffer
+			require.NoError(t, toml.NewEncoder(&cfgBuffer).Encode(setup.cfg))
+
+			cfg, err := Load(&cfgBuffer)
+			// We can't use `require.Equal()` for the error as it's basically impossible
+			// to reproduce the exact `exec.ExitError`.
+			if setup.expectedErr != "" {
+				require.EqualError(t, err, setup.expectedErr)
+			} else {
+				require.NoError(t, err)
+			}
+			require.Equal(t, setup.expectedCfg, cfg)
+		})
+	}
+}
+
 func TestConfig_marshallingTOMLRoundtrips(t *testing.T) {
 	// Without `omitempty` tags marshalling and de-marshalling as TOML would result in different
 	// configurations.
author	Patrick Steinhardt <psteinhardt@gitlab.com>	2023-03-17 10:44:35 +0300
committer	Patrick Steinhardt <psteinhardt@gitlab.com>	2023-03-20 14:46:17 +0300
commit	0c21142ce287af817bf7d20d54378f25244fb7eb (patch)
tree	0b319d049e93f4d1937072dde383c8a45580cba3
parent	2175a03911136b364bf49c426cc5489e51822068 (diff)