Merge branch 'pks-housekeeping-improve-stale-data-reporting' into 'master'

housekeeping: Improve reporting of cleaned stale data See merge request gitlab-org/gitaly!4446
author: James Fargher <proglottis@gmail.com> 2022-03-30 03:03:57 +0300
committer: James Fargher <proglottis@gmail.com> 2022-03-30 03:03:57 +0300
commit: d7eb8938be2bd217e37e5cf83a9374ae27c0a6b9 (patch)
tree: c69687425ef74dc8c40a324ec00ddf3f08a58aa1
parent: f319294b959b4cb8e9a35a330d3831c1216029f7 (diff)
parent: e7da5aae6439a99d4d7918f3b383fe85b561d75d (diff)
3 files changed, 93 insertions, 49 deletions
diff --git a/internal/git/gittest/repo.go b/internal/git/gittest/repo.go
index 45369493d..1d606fa7f 100644
--- a/internal/git/gittest/repo.go
+++ b/internal/git/gittest/repo.go
@@ -308,6 +308,7 @@ func CloneRepo(t testing.TB, cfg config.Cfg, storage config.Storage, opts ...Clo
 
 	absolutePath := filepath.Join(storage.Path, relativePath)
 	Exec(t, cfg, append(args, testRepositoryPath(t, sourceRepo), absolutePath)...)
+	Exec(t, cfg, "-C", absolutePath, "remote", "remove", "origin")
 
 	t.Cleanup(func() { require.NoError(t, os.RemoveAll(absolutePath)) })
 
diff --git a/internal/git/housekeeping/clean_stale_data.go b/internal/git/housekeeping/clean_stale_data.go
index 931a97553..ee1699151 100644
--- a/internal/git/housekeeping/clean_stale_data.go
+++ b/internal/git/housekeeping/clean_stale_data.go
@@ -49,10 +49,22 @@ func (m *RepositoryManager) CleanStaleData(ctx context.Context, repo *localrepo.
 		return fmt.Errorf("housekeeping failed to get repo path: %w", err)
 	}
 
-	logEntry := myLogger(ctx)
-	var filesToPrune []string
+	staleDataByType := map[string]int{}
+	defer func() {
+		if len(staleDataByType) == 0 {
+			return
+		}
+
+		logEntry := myLogger(ctx)
+		for staleDataType, count := range staleDataByType {
+			logEntry = logEntry.WithField(fmt.Sprintf("stale_data.%s", staleDataType), count)
+			m.prunedFilesTotal.WithLabelValues(staleDataType).Add(float64(count))
+		}
+		logEntry.Info("removed files")
+	}()
 
-	for field, staleFileFinder := range map[string]staleFileFinderFn{
+	var filesToPrune []string
+	for staleFileType, staleFileFinder := range map[string]staleFileFinderFn{
 		"objects":        findTemporaryObjects,
 		"locks":          findStaleLockfiles,
 		"refs":           findBrokenLooseReferences,
@@ -63,38 +75,28 @@ func (m *RepositoryManager) CleanStaleData(ctx context.Context, repo *localrepo.
 	} {
 		staleFiles, err := staleFileFinder(ctx, repoPath)
 		if err != nil {
-			return fmt.Errorf("housekeeping failed to find %s: %w", field, err)
+			return fmt.Errorf("housekeeping failed to find %s: %w", staleFileType, err)
 		}
 
 		filesToPrune = append(filesToPrune, staleFiles...)
-		logEntry = logEntry.WithField(field, len(staleFiles))
-
-		m.prunedFilesTotal.WithLabelValues(field).Add(float64(len(staleFiles)))
+		staleDataByType[staleFileType] = len(staleFiles)
 	}
 
-	unremovableFiles := 0
 	for _, path := range filesToPrune {
 		if err := os.Remove(path); err != nil {
 			if os.IsNotExist(err) {
 				continue
 			}
-			unremovableFiles++
-			// We cannot use `logEntry` here as it's already seeded
-			// with the statistics fields.
+			staleDataByType["failures"]++
 			myLogger(ctx).WithError(err).WithField("path", path).Warn("unable to remove stale file")
 		}
 	}
 
 	prunedRefDirs, err := removeRefEmptyDirs(ctx, repo)
-	m.prunedFilesTotal.WithLabelValues("refsemptydir").Add(float64(prunedRefDirs))
+	staleDataByType["refsemptydir"] = prunedRefDirs
 	if err != nil {
 		return fmt.Errorf("housekeeping could not remove empty refs: %w", err)
 	}
-	logEntry = logEntry.WithField("refsemptydir", prunedRefDirs)
-
-	if len(filesToPrune) > 0 || prunedRefDirs > 0 {
-		logEntry.WithField("failures", unremovableFiles).Info("removed files")
-	}
 
 	// TODO: https://gitlab.com/gitlab-org/gitaly/-/issues/3987
 	// This is a temporary code and needs to be removed once it will be run on all repositories at least once.
@@ -103,9 +105,19 @@ func (m *RepositoryManager) CleanStaleData(ctx context.Context, repo *localrepo.
 		if !errors.Is(err, git.ErrNotFound) {
 			return fmt.Errorf("housekeeping could not unset unnecessary config lines: %w", err)
 		}
-	}
-
-	if err := pruneEmptyConfigSections(ctx, repo); err != nil {
+		staleDataByType["configkeys"] = 0
+	} else {
+		// If we didn't get an error we know that we've deleted _something_. We just set
+		// this variable to `1` because we don't count how many keys we have deleted. It's
+		// probably good enough: we only want to know whether we're still pruning such old
+		// configuration or not, but typically don't care how many there are so that we know
+		// when to delete this cleanup of legacy data.
+		staleDataByType["configkeys"] = 1
+	}
+
+	skippedSections, err := pruneEmptyConfigSections(ctx, repo)
+	staleDataByType["configsections"] = skippedSections
+	if err != nil {
 		return fmt.Errorf("failed pruning empty sections: %w", err)
 	}
 
@@ -113,10 +125,10 @@ func (m *RepositoryManager) CleanStaleData(ctx context.Context, repo *localrepo.
 }
 
 // pruneEmptyConfigSections prunes all empty sections from the repo's config.
-func pruneEmptyConfigSections(ctx context.Context, repo *localrepo.Repo) (returnedErr error) {
+func pruneEmptyConfigSections(ctx context.Context, repo *localrepo.Repo) (_ int, returnedErr error) {
 	repoPath, err := repo.Path()
 	if err != nil {
-		return fmt.Errorf("getting repo path: %w", err)
+		return 0, fmt.Errorf("getting repo path: %w", err)
 	}
 	configPath := filepath.Join(repoPath, "config")
 
@@ -124,7 +136,7 @@ func pruneEmptyConfigSections(ctx context.Context, repo *localrepo.Repo) (return
 	// values into it anymore. Slurping it into memory should thus be fine.
 	configContents, err := os.ReadFile(configPath)
 	if err != nil {
-		return fmt.Errorf("reading config: %w", err)
+		return 0, fmt.Errorf("reading config: %w", err)
 	}
 	configLines := strings.SplitAfter(string(configContents), "\n")
 	if configLines[len(configLines)-1] == "" {
@@ -132,6 +144,8 @@ func pruneEmptyConfigSections(ctx context.Context, repo *localrepo.Repo) (return
 		configLines = configLines[:len(configLines)-1]
 	}
 
+	skippedSections := 0
+
 	// We now filter out any empty sections. A section is empty if the next line is a section
 	// header as well, or if it is the last line in the gitconfig. This isn't quite the whole
 	// story given that a section can also be empty if it just ain't got any keys, but only
@@ -141,24 +155,26 @@ func pruneEmptyConfigSections(ctx context.Context, repo *localrepo.Repo) (return
 	for i := 0; i < len(configLines)-1; i++ {
 		// Skip if we have two consecutive section headers.
 		if isSectionHeader(configLines[i]) && isSectionHeader(configLines[i+1]) {
+			skippedSections++
 			continue
 		}
 		filteredLines = append(filteredLines, configLines[i])
 	}
 	// The final line is always stripped in case it is a section header.
 	if len(configLines) > 0 && !isSectionHeader(configLines[len(configLines)-1]) {
+		skippedSections++
 		filteredLines = append(filteredLines, configLines[len(configLines)-1])
 	}
 
 	// If we haven't filtered out anything then there is no need to update the target file.
 	if len(configLines) == len(filteredLines) {
-		return
+		return 0, nil
 	}
 
 	// Otherwise, we need to update the repository's configuration.
 	configWriter, err := safe.NewLockingFileWriter(configPath)
 	if err != nil {
-		return fmt.Errorf("creating config configWriter: %w", err)
+		return 0, fmt.Errorf("creating config configWriter: %w", err)
 	}
 	defer func() {
 		if err := configWriter.Close(); err != nil && returnedErr == nil {
@@ -168,7 +184,7 @@ func pruneEmptyConfigSections(ctx context.Context, repo *localrepo.Repo) (return
 
 	for _, filteredLine := range filteredLines {
 		if _, err := configWriter.Write([]byte(filteredLine)); err != nil {
-			return fmt.Errorf("writing filtered config: %w", err)
+			return 0, fmt.Errorf("writing filtered config: %w", err)
 		}
 	}
 
@@ -187,25 +203,25 @@ func pruneEmptyConfigSections(ctx context.Context, repo *localrepo.Repo) (return
 				git.Flag{Name: "-l"},
 			},
 		}, git.WithStdout(&configOutput)); err != nil {
-			return fmt.Errorf("listing config: %w", err)
+			return 0, fmt.Errorf("listing config: %w", err)
 		}
 
 		configOutputs = append(configOutputs, configOutput.String())
 	}
 	if configOutputs[0] != configOutputs[1] {
-		return fmt.Errorf("section pruning has caused config change")
+		return 0, fmt.Errorf("section pruning has caused config change")
 	}
 
 	// We don't use transactional voting but commit the file directly -- we have asserted that
 	// the change is idempotent anyway.
 	if err := configWriter.Lock(); err != nil {
-		return fmt.Errorf("failed locking config: %w", err)
+		return 0, fmt.Errorf("failed locking config: %w", err)
 	}
 	if err := configWriter.Commit(); err != nil {
-		return fmt.Errorf("failed committing pruned config: %w", err)
+		return 0, fmt.Errorf("failed committing pruned config: %w", err)
 	}
 
-	return nil
+	return skippedSections, nil
 }
 
 func isSectionHeader(line string) bool {
diff --git a/internal/git/housekeeping/clean_stale_data_test.go b/internal/git/housekeeping/clean_stale_data_test.go
index e6a121c14..44a0edc1f 100644
--- a/internal/git/housekeeping/clean_stale_data_test.go
+++ b/internal/git/housekeeping/clean_stale_data_test.go
@@ -132,6 +132,8 @@ func d(name string, mode os.FileMode, age time.Duration, finalState entryFinalSt
 }
 
 type cleanStaleDataMetrics struct {
+	configkeys     int
+	configsections int
 	objects        int
 	locks          int
 	refs           int
@@ -153,6 +155,8 @@ func requireCleanStaleDataMetrics(t *testing.T, m *RepositoryManager, metrics cl
 	require.NoError(t, err)
 
 	for metric, expectedValue := range map[string]int{
+		"configkeys":     metrics.configkeys,
+		"configsections": metrics.configsections,
 		"objects":        metrics.objects,
 		"locks":          metrics.locks,
 		"refs":           metrics.refs,
@@ -880,7 +884,9 @@ func TestRepositoryManager_CleanStaleData_unsetConfiguration(t *testing.T) {
 	unrelated = untouched
 `), 0o644))
 
-	require.NoError(t, NewManager(cfg.Prometheus, nil).CleanStaleData(ctx, repo))
+	mgr := NewManager(cfg.Prometheus, nil)
+
+	require.NoError(t, mgr.CleanStaleData(ctx, repo))
 	require.Equal(t,
 		`[core]
 	repositoryformatversion = 0
@@ -893,6 +899,10 @@ func TestRepositoryManager_CleanStaleData_unsetConfiguration(t *testing.T) {
 [totally]
 	unrelated = untouched
 `, string(testhelper.MustReadFile(t, configPath)))
+
+	requireCleanStaleDataMetrics(t, mgr, cleanStaleDataMetrics{
+		configkeys: 1,
+	})
 }
 
 func TestRepositoryManager_CleanStaleData_unsetConfigurationTransactional(t *testing.T) {
@@ -957,7 +967,9 @@ func TestRepositoryManager_CleanStaleData_pruneEmptyConfigSections(t *testing.T)
 [remote "tmp-8c948ca94832c2725733e48cb2902287"]
 `), 0o644))
 
-	require.NoError(t, NewManager(cfg.Prometheus, nil).CleanStaleData(ctx, repo))
+	mgr := NewManager(cfg.Prometheus, nil)
+
+	require.NoError(t, mgr.CleanStaleData(ctx, repo))
 	require.Equal(t, `[core]
 	repositoryformatversion = 0
 	filemode = true
@@ -965,6 +977,11 @@ func TestRepositoryManager_CleanStaleData_pruneEmptyConfigSections(t *testing.T)
 [uploadpack]
 	allowAnySHA1InWant = true
 `, string(testhelper.MustReadFile(t, configPath)))
+
+	requireCleanStaleDataMetrics(t, mgr, cleanStaleDataMetrics{
+		configkeys:     1,
+		configsections: 7,
+	})
 }
 
 func TestPruneEmptyConfigSections(t *testing.T) {
@@ -978,9 +995,10 @@ func TestPruneEmptyConfigSections(t *testing.T) {
 	configPath := filepath.Join(repoPath, "config")
 
 	for _, tc := range []struct {
-		desc         string
-		configData   string
-		expectedData string
+		desc                    string
+		configData              string
+		expectedData            string
+		expectedSkippedSections int
 	}{
 		{
 			desc:         "empty",
@@ -1021,9 +1039,10 @@ func TestPruneEmptyConfigSections(t *testing.T) {
 			// This is not correct, but we really don't want to start parsing
 			// the config format completely. So we err on the side of caution
 			// and just say this is fine.
-			desc:         "empty section with comment",
-			configData:   "[foo]\n# comment\n[bar]\n[baz]\n",
-			expectedData: "[foo]\n# comment\n",
+			desc:                    "empty section with comment",
+			configData:              "[foo]\n# comment\n[bar]\n[baz]\n",
+			expectedData:            "[foo]\n# comment\n",
+			expectedSkippedSections: 1,
 		},
 		{
 			desc:         "empty section",
@@ -1031,14 +1050,16 @@ func TestPruneEmptyConfigSections(t *testing.T) {
 			expectedData: "",
 		},
 		{
-			desc:         "empty sections",
-			configData:   "[foo]\n[bar]\n[baz]\n",
-			expectedData: "",
+			desc:                    "empty sections",
+			configData:              "[foo]\n[bar]\n[baz]\n",
+			expectedData:            "",
+			expectedSkippedSections: 2,
 		},
 		{
-			desc:         "empty sections with missing newline",
-			configData:   "[foo]\n[bar]\n[baz]",
-			expectedData: "",
+			desc:                    "empty sections with missing newline",
+			configData:              "[foo]\n[bar]\n[baz]",
+			expectedData:            "",
+			expectedSkippedSections: 2,
 		},
 		{
 			desc:         "trailing empty section",
@@ -1046,9 +1067,10 @@ func TestPruneEmptyConfigSections(t *testing.T) {
 			expectedData: "[foo]\nbar = baz\n",
 		},
 		{
-			desc:         "mixed keys and sections",
-			configData:   "[empty]\n[nonempty]\nbar = baz\nbar = baz\n[empty]\n",
-			expectedData: "[nonempty]\nbar = baz\nbar = baz\n",
+			desc:                    "mixed keys and sections",
+			configData:              "[empty]\n[nonempty]\nbar = baz\nbar = baz\n[empty]\n",
+			expectedData:            "[nonempty]\nbar = baz\nbar = baz\n",
+			expectedSkippedSections: 1,
 		},
 		{
 			desc: "real world example",
@@ -1085,11 +1107,16 @@ func TestPruneEmptyConfigSections(t *testing.T) {
 [remote "tmp-f7a91ec9415f984d3747cf608b0a7e9c"]
         prune = true
 `,
+			expectedSkippedSections: 15,
 		},
 	} {
 		t.Run(tc.desc, func(t *testing.T) {
 			require.NoError(t, os.WriteFile(configPath, []byte(tc.configData), 0o644))
-			require.NoError(t, pruneEmptyConfigSections(ctx, repo))
+
+			skippedSections, err := pruneEmptyConfigSections(ctx, repo)
+			require.NoError(t, err)
+			require.Equal(t, tc.expectedSkippedSections, skippedSections)
+
 			require.Equal(t, tc.expectedData, string(testhelper.MustReadFile(t, configPath)))
 		})
 	}
author	James Fargher <proglottis@gmail.com>	2022-03-30 03:03:57 +0300
committer	James Fargher <proglottis@gmail.com>	2022-03-30 03:03:57 +0300
commit	d7eb8938be2bd217e37e5cf83a9374ae27c0a6b9 (patch)
tree	c69687425ef74dc8c40a324ec00ddf3f08a58aa1
parent	f319294b959b4cb8e9a35a330d3831c1216029f7 (diff)
parent	e7da5aae6439a99d4d7918f3b383fe85b561d75d (diff)