Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorToon Claes <toon@gitlab.com>2022-06-03 17:45:42 +0300
committerToon Claes <toon@gitlab.com>2022-07-08 11:24:17 +0300
commit5709020ecbf5ff34658767230053225eae643603 (patch)
tree5b606ab9c2b52c4a9da9a3d300a92bc21e254c6f
parent228c7f63daf73362e2ec8daa18026d63b8161324 (diff)
linguist: Implement struct to store stats
We're about to introduce an implementation of getting the language statistics in Go. For this we need a way to collect and store the results in a cache. This cache will be used to incrementally calculate the stats between commits. This change introduces the linguist.languageStats struct which will deal with all this.
-rw-r--r--internal/gitaly/linguist/language_stats.go157
-rw-r--r--internal/gitaly/linguist/language_stats_test.go195
2 files changed, 352 insertions, 0 deletions
diff --git a/internal/gitaly/linguist/language_stats.go b/internal/gitaly/linguist/language_stats.go
new file mode 100644
index 000000000..9dad17c11
--- /dev/null
+++ b/internal/gitaly/linguist/language_stats.go
@@ -0,0 +1,157 @@
+package linguist
+
+import (
+ "compress/zlib"
+ "encoding/json"
+ "fmt"
+ "os"
+ "path/filepath"
+ "sync"
+
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
+)
+
+const (
+ // languageStatsFilename is the name of the file in the repo that stores
+ // a cached version of the language statistics. The name is
+ // intentionally different from what the linguist gem uses.
+ languageStatsFilename = "gitaly-language.stats"
+ languageStatsVersion = "v1:gitaly"
+)
+
+// languageStats takes care of accumulating and caching language statistics for
+// a repository.
+type languageStats struct {
+ // Version holds the file format version
+ Version string `json:"version"`
+ // CommitID holds the commit ID for the cached Totals
+ CommitID string `json:"commit_id"`
+
+ // m will protect concurrent writes to Totals & ByFile maps
+ m sync.Mutex
+
+ // Totals contains the total statistics for the CommitID
+ Totals ByteCountPerLanguage `json:"totals"`
+ // ByFile contains the statistics for a single file, where the filename
+ // is it's key.
+ ByFile map[string]ByteCountPerLanguage `json:"by_file"`
+}
+
+// newLanguageStats creates a languageStats object and tries to load the
+// optionally available stats from file.
+func newLanguageStats(repo *localrepo.Repo) (*languageStats, error) {
+ stats := languageStats{
+ Totals: ByteCountPerLanguage{},
+ ByFile: make(map[string]ByteCountPerLanguage),
+ }
+
+ objPath, err := repo.Path()
+ if err != nil {
+ return &stats, fmt.Errorf("new language stats get repo path: %w", err)
+ }
+
+ file, err := os.Open(filepath.Join(objPath, languageStatsFilename))
+ if err != nil {
+ if os.IsNotExist(err) {
+ return &stats, nil
+ }
+ return &stats, fmt.Errorf("new language stats open: %w", err)
+ }
+ defer file.Close()
+
+ r, err := zlib.NewReader(file)
+ if err != nil {
+ return &stats, fmt.Errorf("new language stats zlib reader: %w", err)
+ }
+
+ var loaded languageStats
+ if err = json.NewDecoder(r).Decode(&loaded); err != nil {
+ return &stats, fmt.Errorf("new language stats json decode: %w", err)
+ }
+
+ if loaded.Version != languageStatsVersion {
+ return &stats, fmt.Errorf("new language stats version mismatch %s vs %s", languageStatsVersion, loaded.Version)
+ }
+
+ return &loaded, nil
+}
+
+// add the statistics for the given filename
+func (c *languageStats) add(filename, language string, size uint64) {
+ c.m.Lock()
+ defer c.m.Unlock()
+
+ for k, v := range c.ByFile[filename] {
+ c.Totals[k] -= v
+ if c.Totals[k] <= 0 {
+ delete(c.Totals, k)
+ }
+ }
+
+ c.ByFile[filename] = ByteCountPerLanguage{language: size}
+ c.Totals[language] += size
+}
+
+// drop statistics for the given files
+func (c *languageStats) drop(filenames ...string) {
+ c.m.Lock()
+ defer c.m.Unlock()
+
+ for _, f := range filenames {
+ for k, v := range c.ByFile[f] {
+ c.Totals[k] -= v
+ if c.Totals[k] <= 0 {
+ delete(c.Totals, k)
+ }
+ }
+ delete(c.ByFile, f)
+ }
+}
+
+// save the language stats to file in the repository
+func (c *languageStats) save(repo *localrepo.Repo, commitID string) error {
+ c.CommitID = commitID
+ c.Version = languageStatsVersion
+
+ repoPath, err := repo.Path()
+ if err != nil {
+ return fmt.Errorf("languageStats save get repo path: %w", err)
+ }
+
+ tempPath, err := repo.StorageTempDir()
+ if err != nil {
+ return fmt.Errorf("languageStats locate temp dir: %w", err)
+ }
+
+ file, err := os.CreateTemp(tempPath, languageStatsFilename)
+ if err != nil {
+ return fmt.Errorf("languageStats create temp file: %w", err)
+ }
+ defer func() {
+ file.Close()
+ _ = os.Remove(file.Name())
+ }()
+
+ w := zlib.NewWriter(file)
+ defer w.Close()
+
+ if err = json.NewEncoder(w).Encode(c); err != nil {
+ return fmt.Errorf("languageStats encode json: %w", err)
+ }
+
+ if err = w.Close(); err != nil {
+ return fmt.Errorf("languageStats zlib write: %w", err)
+ }
+ if err = file.Sync(); err != nil {
+ return fmt.Errorf("languageStats flush: %w", err)
+ }
+ if err = file.Close(); err != nil {
+ return fmt.Errorf("languageStats close: %w", err)
+ }
+
+ if err = os.Rename(file.Name(), filepath.Join(repoPath, languageStatsFilename)); err != nil {
+ return fmt.Errorf("languageStats rename: %w", err)
+ }
+
+ return nil
+}
diff --git a/internal/gitaly/linguist/language_stats_test.go b/internal/gitaly/linguist/language_stats_test.go
new file mode 100644
index 000000000..56f03bce7
--- /dev/null
+++ b/internal/gitaly/linguist/language_stats_test.go
@@ -0,0 +1,195 @@
+package linguist
+
+import (
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg"
+)
+
+func TestNewLanguageStats(t *testing.T) {
+ t.Parallel()
+
+ cfg := testcfg.Build(t)
+ repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ t.Run("non-existing cache", func(t *testing.T) {
+ s, err := newLanguageStats(repo)
+ require.NoError(t, err)
+ require.Empty(t, s.Totals)
+ require.Empty(t, s.ByFile)
+ })
+
+ t.Run("pre-existing cache", func(t *testing.T) {
+ s, err := newLanguageStats(repo)
+ require.NoError(t, err)
+
+ s.Totals["C"] = 555
+ require.NoError(t, s.save(repo, "badcafe"))
+
+ require.Equal(t, ByteCountPerLanguage{"C": 555}, s.Totals)
+ })
+
+ t.Run("corrupt cache", func(t *testing.T) {
+ require.NoError(t, os.WriteFile(filepath.Join(repoPath, languageStatsFilename), []byte("garbage"), 0o644))
+
+ s, err := newLanguageStats(repo)
+ require.Errorf(t, err, "new language stats zlib reader: invalid header")
+ require.Empty(t, s.Totals)
+ require.Empty(t, s.ByFile)
+ })
+}
+
+func TestLanguageStats_add(t *testing.T) {
+ t.Parallel()
+
+ cfg := testcfg.Build(t)
+ repoProto, _ := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ for _, tc := range []struct {
+ desc string
+ run func(*testing.T, *languageStats)
+ }{
+ {
+ desc: "adds to the total",
+ run: func(t *testing.T, s *languageStats) {
+ s.add("main.go", "Go", 100)
+
+ require.Equal(t, uint64(100), s.Totals["Go"])
+ require.Len(t, s.ByFile, 1)
+ require.Equal(t, ByteCountPerLanguage{"Go": 100}, s.ByFile["main.go"])
+ },
+ },
+ {
+ desc: "accumulates",
+ run: func(t *testing.T, s *languageStats) {
+ s.add("main.go", "Go", 100)
+ s.add("main_test.go", "Go", 80)
+
+ require.Equal(t, uint64(180), s.Totals["Go"])
+ require.Len(t, s.ByFile, 2)
+ require.Equal(t, ByteCountPerLanguage{"Go": 100}, s.ByFile["main.go"])
+ require.Equal(t, ByteCountPerLanguage{"Go": 80}, s.ByFile["main_test.go"])
+ },
+ },
+ {
+ desc: "languages don't interfere",
+ run: func(t *testing.T, s *languageStats) {
+ s.add("main.go", "Go", 60)
+ s.add("Makefile", "Make", 30)
+
+ require.Equal(t, uint64(60), s.Totals["Go"])
+ require.Equal(t, uint64(30), s.Totals["Make"])
+ require.Len(t, s.ByFile, 2)
+ require.Equal(t, ByteCountPerLanguage{"Go": 60}, s.ByFile["main.go"])
+ require.Equal(t, ByteCountPerLanguage{"Make": 30}, s.ByFile["Makefile"])
+ },
+ },
+ {
+ desc: "updates the stat for a file",
+ run: func(t *testing.T, s *languageStats) {
+ s.add("main.go", "Go", 60)
+ s.add("main.go", "Go", 30)
+
+ require.Equal(t, uint64(30), s.Totals["Go"])
+ require.Len(t, s.ByFile, 1)
+ require.Equal(t, ByteCountPerLanguage{"Go": 30}, s.ByFile["main.go"])
+ },
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ s, err := newLanguageStats(repo)
+ require.NoError(t, err)
+
+ tc.run(t, s)
+ })
+ }
+}
+
+func TestLanguageStats_drop(t *testing.T) {
+ t.Parallel()
+
+ cfg := testcfg.Build(t)
+ repoProto, _ := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ for _, tc := range []struct {
+ desc string
+ run func(*testing.T, *languageStats)
+ }{
+ {
+ desc: "existing file",
+ run: func(t *testing.T, s *languageStats) {
+ s.drop("main.go")
+
+ require.Equal(t, uint64(20), s.Totals["Go"])
+ require.Len(t, s.ByFile, 1)
+ require.Equal(t, ByteCountPerLanguage{"Go": 20}, s.ByFile["main_test.go"])
+ },
+ },
+ {
+ desc: "non-existing file",
+ run: func(t *testing.T, s *languageStats) {
+ s.drop("foo.go")
+
+ require.Equal(t, uint64(100), s.Totals["Go"])
+ require.Len(t, s.ByFile, 2)
+ require.Equal(t, ByteCountPerLanguage{"Go": 80}, s.ByFile["main.go"])
+ require.Equal(t, ByteCountPerLanguage{"Go": 20}, s.ByFile["main_test.go"])
+ },
+ },
+ {
+ desc: "all files",
+ run: func(t *testing.T, s *languageStats) {
+ s.drop("main.go", "main_test.go")
+
+ require.Empty(t, s.Totals)
+ require.Empty(t, s.ByFile)
+ },
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ s, err := newLanguageStats(repo)
+ require.NoError(t, err)
+
+ s.Totals["Go"] = 100
+ s.ByFile["main.go"] = ByteCountPerLanguage{"Go": 80}
+ s.ByFile["main_test.go"] = ByteCountPerLanguage{"Go": 20}
+
+ tc.run(t, s)
+ })
+ }
+}
+
+func TestLanguageStats_save(t *testing.T) {
+ t.Parallel()
+
+ cfg := testcfg.Build(t)
+ repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ s, err := newLanguageStats(repo)
+ require.NoError(t, err)
+
+ s.Totals["Go"] = 100
+ s.ByFile["main.go"] = ByteCountPerLanguage{"Go": 80}
+ s.ByFile["main_test.go"] = ByteCountPerLanguage{"Go": 20}
+
+ err = s.save(repo, "buzz")
+ require.NoError(t, err)
+ require.FileExists(t, filepath.Join(repoPath, languageStatsFilename))
+
+ loaded, err := newLanguageStats(repo)
+ require.NoError(t, err)
+
+ require.Equal(t, "buzz", loaded.CommitID)
+ require.Equal(t, languageStatsVersion, loaded.Version)
+ require.Equal(t, s.Totals, loaded.Totals)
+ require.Equal(t, s.ByFile, loaded.ByFile)
+}