diff options
author | James Fargher <proglottis@gmail.com> | 2022-03-30 03:03:57 +0300 |
---|---|---|
committer | James Fargher <proglottis@gmail.com> | 2022-03-30 03:03:57 +0300 |
commit | d7eb8938be2bd217e37e5cf83a9374ae27c0a6b9 (patch) | |
tree | c69687425ef74dc8c40a324ec00ddf3f08a58aa1 | |
parent | f319294b959b4cb8e9a35a330d3831c1216029f7 (diff) | |
parent | e7da5aae6439a99d4d7918f3b383fe85b561d75d (diff) |
Merge branch 'pks-housekeeping-improve-stale-data-reporting' into 'master'
housekeeping: Improve reporting of cleaned stale data
See merge request gitlab-org/gitaly!4446
-rw-r--r-- | internal/git/gittest/repo.go | 1 | ||||
-rw-r--r-- | internal/git/housekeeping/clean_stale_data.go | 78 | ||||
-rw-r--r-- | internal/git/housekeeping/clean_stale_data_test.go | 63 |
3 files changed, 93 insertions, 49 deletions
diff --git a/internal/git/gittest/repo.go b/internal/git/gittest/repo.go index 45369493d..1d606fa7f 100644 --- a/internal/git/gittest/repo.go +++ b/internal/git/gittest/repo.go @@ -308,6 +308,7 @@ func CloneRepo(t testing.TB, cfg config.Cfg, storage config.Storage, opts ...Clo absolutePath := filepath.Join(storage.Path, relativePath) Exec(t, cfg, append(args, testRepositoryPath(t, sourceRepo), absolutePath)...) + Exec(t, cfg, "-C", absolutePath, "remote", "remove", "origin") t.Cleanup(func() { require.NoError(t, os.RemoveAll(absolutePath)) }) diff --git a/internal/git/housekeeping/clean_stale_data.go b/internal/git/housekeeping/clean_stale_data.go index 931a97553..ee1699151 100644 --- a/internal/git/housekeeping/clean_stale_data.go +++ b/internal/git/housekeeping/clean_stale_data.go @@ -49,10 +49,22 @@ func (m *RepositoryManager) CleanStaleData(ctx context.Context, repo *localrepo. return fmt.Errorf("housekeeping failed to get repo path: %w", err) } - logEntry := myLogger(ctx) - var filesToPrune []string + staleDataByType := map[string]int{} + defer func() { + if len(staleDataByType) == 0 { + return + } + + logEntry := myLogger(ctx) + for staleDataType, count := range staleDataByType { + logEntry = logEntry.WithField(fmt.Sprintf("stale_data.%s", staleDataType), count) + m.prunedFilesTotal.WithLabelValues(staleDataType).Add(float64(count)) + } + logEntry.Info("removed files") + }() - for field, staleFileFinder := range map[string]staleFileFinderFn{ + var filesToPrune []string + for staleFileType, staleFileFinder := range map[string]staleFileFinderFn{ "objects": findTemporaryObjects, "locks": findStaleLockfiles, "refs": findBrokenLooseReferences, @@ -63,38 +75,28 @@ func (m *RepositoryManager) CleanStaleData(ctx context.Context, repo *localrepo. } { staleFiles, err := staleFileFinder(ctx, repoPath) if err != nil { - return fmt.Errorf("housekeeping failed to find %s: %w", field, err) + return fmt.Errorf("housekeeping failed to find %s: %w", staleFileType, err) } filesToPrune = append(filesToPrune, staleFiles...) - logEntry = logEntry.WithField(field, len(staleFiles)) - - m.prunedFilesTotal.WithLabelValues(field).Add(float64(len(staleFiles))) + staleDataByType[staleFileType] = len(staleFiles) } - unremovableFiles := 0 for _, path := range filesToPrune { if err := os.Remove(path); err != nil { if os.IsNotExist(err) { continue } - unremovableFiles++ - // We cannot use `logEntry` here as it's already seeded - // with the statistics fields. + staleDataByType["failures"]++ myLogger(ctx).WithError(err).WithField("path", path).Warn("unable to remove stale file") } } prunedRefDirs, err := removeRefEmptyDirs(ctx, repo) - m.prunedFilesTotal.WithLabelValues("refsemptydir").Add(float64(prunedRefDirs)) + staleDataByType["refsemptydir"] = prunedRefDirs if err != nil { return fmt.Errorf("housekeeping could not remove empty refs: %w", err) } - logEntry = logEntry.WithField("refsemptydir", prunedRefDirs) - - if len(filesToPrune) > 0 || prunedRefDirs > 0 { - logEntry.WithField("failures", unremovableFiles).Info("removed files") - } // TODO: https://gitlab.com/gitlab-org/gitaly/-/issues/3987 // This is a temporary code and needs to be removed once it will be run on all repositories at least once. @@ -103,9 +105,19 @@ func (m *RepositoryManager) CleanStaleData(ctx context.Context, repo *localrepo. if !errors.Is(err, git.ErrNotFound) { return fmt.Errorf("housekeeping could not unset unnecessary config lines: %w", err) } - } - - if err := pruneEmptyConfigSections(ctx, repo); err != nil { + staleDataByType["configkeys"] = 0 + } else { + // If we didn't get an error we know that we've deleted _something_. We just set + // this variable to `1` because we don't count how many keys we have deleted. It's + // probably good enough: we only want to know whether we're still pruning such old + // configuration or not, but typically don't care how many there are so that we know + // when to delete this cleanup of legacy data. + staleDataByType["configkeys"] = 1 + } + + skippedSections, err := pruneEmptyConfigSections(ctx, repo) + staleDataByType["configsections"] = skippedSections + if err != nil { return fmt.Errorf("failed pruning empty sections: %w", err) } @@ -113,10 +125,10 @@ func (m *RepositoryManager) CleanStaleData(ctx context.Context, repo *localrepo. } // pruneEmptyConfigSections prunes all empty sections from the repo's config. -func pruneEmptyConfigSections(ctx context.Context, repo *localrepo.Repo) (returnedErr error) { +func pruneEmptyConfigSections(ctx context.Context, repo *localrepo.Repo) (_ int, returnedErr error) { repoPath, err := repo.Path() if err != nil { - return fmt.Errorf("getting repo path: %w", err) + return 0, fmt.Errorf("getting repo path: %w", err) } configPath := filepath.Join(repoPath, "config") @@ -124,7 +136,7 @@ func pruneEmptyConfigSections(ctx context.Context, repo *localrepo.Repo) (return // values into it anymore. Slurping it into memory should thus be fine. configContents, err := os.ReadFile(configPath) if err != nil { - return fmt.Errorf("reading config: %w", err) + return 0, fmt.Errorf("reading config: %w", err) } configLines := strings.SplitAfter(string(configContents), "\n") if configLines[len(configLines)-1] == "" { @@ -132,6 +144,8 @@ func pruneEmptyConfigSections(ctx context.Context, repo *localrepo.Repo) (return configLines = configLines[:len(configLines)-1] } + skippedSections := 0 + // We now filter out any empty sections. A section is empty if the next line is a section // header as well, or if it is the last line in the gitconfig. This isn't quite the whole // story given that a section can also be empty if it just ain't got any keys, but only @@ -141,24 +155,26 @@ func pruneEmptyConfigSections(ctx context.Context, repo *localrepo.Repo) (return for i := 0; i < len(configLines)-1; i++ { // Skip if we have two consecutive section headers. if isSectionHeader(configLines[i]) && isSectionHeader(configLines[i+1]) { + skippedSections++ continue } filteredLines = append(filteredLines, configLines[i]) } // The final line is always stripped in case it is a section header. if len(configLines) > 0 && !isSectionHeader(configLines[len(configLines)-1]) { + skippedSections++ filteredLines = append(filteredLines, configLines[len(configLines)-1]) } // If we haven't filtered out anything then there is no need to update the target file. if len(configLines) == len(filteredLines) { - return + return 0, nil } // Otherwise, we need to update the repository's configuration. configWriter, err := safe.NewLockingFileWriter(configPath) if err != nil { - return fmt.Errorf("creating config configWriter: %w", err) + return 0, fmt.Errorf("creating config configWriter: %w", err) } defer func() { if err := configWriter.Close(); err != nil && returnedErr == nil { @@ -168,7 +184,7 @@ func pruneEmptyConfigSections(ctx context.Context, repo *localrepo.Repo) (return for _, filteredLine := range filteredLines { if _, err := configWriter.Write([]byte(filteredLine)); err != nil { - return fmt.Errorf("writing filtered config: %w", err) + return 0, fmt.Errorf("writing filtered config: %w", err) } } @@ -187,25 +203,25 @@ func pruneEmptyConfigSections(ctx context.Context, repo *localrepo.Repo) (return git.Flag{Name: "-l"}, }, }, git.WithStdout(&configOutput)); err != nil { - return fmt.Errorf("listing config: %w", err) + return 0, fmt.Errorf("listing config: %w", err) } configOutputs = append(configOutputs, configOutput.String()) } if configOutputs[0] != configOutputs[1] { - return fmt.Errorf("section pruning has caused config change") + return 0, fmt.Errorf("section pruning has caused config change") } // We don't use transactional voting but commit the file directly -- we have asserted that // the change is idempotent anyway. if err := configWriter.Lock(); err != nil { - return fmt.Errorf("failed locking config: %w", err) + return 0, fmt.Errorf("failed locking config: %w", err) } if err := configWriter.Commit(); err != nil { - return fmt.Errorf("failed committing pruned config: %w", err) + return 0, fmt.Errorf("failed committing pruned config: %w", err) } - return nil + return skippedSections, nil } func isSectionHeader(line string) bool { diff --git a/internal/git/housekeeping/clean_stale_data_test.go b/internal/git/housekeeping/clean_stale_data_test.go index e6a121c14..44a0edc1f 100644 --- a/internal/git/housekeeping/clean_stale_data_test.go +++ b/internal/git/housekeeping/clean_stale_data_test.go @@ -132,6 +132,8 @@ func d(name string, mode os.FileMode, age time.Duration, finalState entryFinalSt } type cleanStaleDataMetrics struct { + configkeys int + configsections int objects int locks int refs int @@ -153,6 +155,8 @@ func requireCleanStaleDataMetrics(t *testing.T, m *RepositoryManager, metrics cl require.NoError(t, err) for metric, expectedValue := range map[string]int{ + "configkeys": metrics.configkeys, + "configsections": metrics.configsections, "objects": metrics.objects, "locks": metrics.locks, "refs": metrics.refs, @@ -880,7 +884,9 @@ func TestRepositoryManager_CleanStaleData_unsetConfiguration(t *testing.T) { unrelated = untouched `), 0o644)) - require.NoError(t, NewManager(cfg.Prometheus, nil).CleanStaleData(ctx, repo)) + mgr := NewManager(cfg.Prometheus, nil) + + require.NoError(t, mgr.CleanStaleData(ctx, repo)) require.Equal(t, `[core] repositoryformatversion = 0 @@ -893,6 +899,10 @@ func TestRepositoryManager_CleanStaleData_unsetConfiguration(t *testing.T) { [totally] unrelated = untouched `, string(testhelper.MustReadFile(t, configPath))) + + requireCleanStaleDataMetrics(t, mgr, cleanStaleDataMetrics{ + configkeys: 1, + }) } func TestRepositoryManager_CleanStaleData_unsetConfigurationTransactional(t *testing.T) { @@ -957,7 +967,9 @@ func TestRepositoryManager_CleanStaleData_pruneEmptyConfigSections(t *testing.T) [remote "tmp-8c948ca94832c2725733e48cb2902287"] `), 0o644)) - require.NoError(t, NewManager(cfg.Prometheus, nil).CleanStaleData(ctx, repo)) + mgr := NewManager(cfg.Prometheus, nil) + + require.NoError(t, mgr.CleanStaleData(ctx, repo)) require.Equal(t, `[core] repositoryformatversion = 0 filemode = true @@ -965,6 +977,11 @@ func TestRepositoryManager_CleanStaleData_pruneEmptyConfigSections(t *testing.T) [uploadpack] allowAnySHA1InWant = true `, string(testhelper.MustReadFile(t, configPath))) + + requireCleanStaleDataMetrics(t, mgr, cleanStaleDataMetrics{ + configkeys: 1, + configsections: 7, + }) } func TestPruneEmptyConfigSections(t *testing.T) { @@ -978,9 +995,10 @@ func TestPruneEmptyConfigSections(t *testing.T) { configPath := filepath.Join(repoPath, "config") for _, tc := range []struct { - desc string - configData string - expectedData string + desc string + configData string + expectedData string + expectedSkippedSections int }{ { desc: "empty", @@ -1021,9 +1039,10 @@ func TestPruneEmptyConfigSections(t *testing.T) { // This is not correct, but we really don't want to start parsing // the config format completely. So we err on the side of caution // and just say this is fine. - desc: "empty section with comment", - configData: "[foo]\n# comment\n[bar]\n[baz]\n", - expectedData: "[foo]\n# comment\n", + desc: "empty section with comment", + configData: "[foo]\n# comment\n[bar]\n[baz]\n", + expectedData: "[foo]\n# comment\n", + expectedSkippedSections: 1, }, { desc: "empty section", @@ -1031,14 +1050,16 @@ func TestPruneEmptyConfigSections(t *testing.T) { expectedData: "", }, { - desc: "empty sections", - configData: "[foo]\n[bar]\n[baz]\n", - expectedData: "", + desc: "empty sections", + configData: "[foo]\n[bar]\n[baz]\n", + expectedData: "", + expectedSkippedSections: 2, }, { - desc: "empty sections with missing newline", - configData: "[foo]\n[bar]\n[baz]", - expectedData: "", + desc: "empty sections with missing newline", + configData: "[foo]\n[bar]\n[baz]", + expectedData: "", + expectedSkippedSections: 2, }, { desc: "trailing empty section", @@ -1046,9 +1067,10 @@ func TestPruneEmptyConfigSections(t *testing.T) { expectedData: "[foo]\nbar = baz\n", }, { - desc: "mixed keys and sections", - configData: "[empty]\n[nonempty]\nbar = baz\nbar = baz\n[empty]\n", - expectedData: "[nonempty]\nbar = baz\nbar = baz\n", + desc: "mixed keys and sections", + configData: "[empty]\n[nonempty]\nbar = baz\nbar = baz\n[empty]\n", + expectedData: "[nonempty]\nbar = baz\nbar = baz\n", + expectedSkippedSections: 1, }, { desc: "real world example", @@ -1085,11 +1107,16 @@ func TestPruneEmptyConfigSections(t *testing.T) { [remote "tmp-f7a91ec9415f984d3747cf608b0a7e9c"] prune = true `, + expectedSkippedSections: 15, }, } { t.Run(tc.desc, func(t *testing.T) { require.NoError(t, os.WriteFile(configPath, []byte(tc.configData), 0o644)) - require.NoError(t, pruneEmptyConfigSections(ctx, repo)) + + skippedSections, err := pruneEmptyConfigSections(ctx, repo) + require.NoError(t, err) + require.Equal(t, tc.expectedSkippedSections, skippedSections) + require.Equal(t, tc.expectedData, string(testhelper.MustReadFile(t, configPath))) }) } |