diff options
-rw-r--r-- | internal/source/disk/map.go | 308 | ||||
-rw-r--r-- | internal/source/disk/map_test.go | 253 |
2 files changed, 0 insertions, 561 deletions
diff --git a/internal/source/disk/map.go b/internal/source/disk/map.go deleted file mode 100644 index 05ab4c30..00000000 --- a/internal/source/disk/map.go +++ /dev/null @@ -1,308 +0,0 @@ -package disk - -import ( - "bytes" - "io/ioutil" - "os" - "path/filepath" - "strings" - "sync" - "time" - - "github.com/karrick/godirwalk" - "github.com/sirupsen/logrus" - "gitlab.com/gitlab-org/labkit/log" - - "gitlab.com/gitlab-org/gitlab-pages/internal/domain" - "gitlab.com/gitlab-org/gitlab-pages/metrics" -) - -// preventive measure to skip `@hashed` dir for new zip deployments when sourcing config from disk -// https://gitlab.com/gitlab-org/gitlab-pages/-/issues/468 -const skipHashedDir = "@hashed" - -// Map maps domain names to Domain instances. -type Map map[string]*domain.Domain - -type domainsUpdater func(Map) - -func (dm Map) updateDomainMap(domainName string, domain *domain.Domain) { - if _, ok := dm[domainName]; ok { - log.WithFields(log.Fields{ - "domain_name": domainName, - }).Error("Duplicate domain") - } - - dm[domainName] = domain -} - -func (dm Map) addDomain(rootDomain, groupName, projectName string, config *domainConfig) { - newDomain := domain.New( - strings.ToLower(config.Domain), - config.Certificate, - config.Key, - &customProjectResolver{ - config: config, - path: filepath.Join(groupName, projectName, "public"), - }, - ) - - dm.updateDomainMap(newDomain.Name, newDomain) -} - -func (dm Map) updateGroupDomain(rootDomain, groupName, projectPath string, httpsOnly bool, accessControl bool, id uint64) { - domainName := strings.ToLower(groupName + "." + rootDomain) - groupDomain := dm[domainName] - - if groupDomain == nil { - groupResolver := &Group{ - name: groupName, - projects: make(projects), - subgroups: make(subgroups), - } - - groupDomain = domain.New(domainName, "", "", groupResolver) - } - - split := strings.SplitN(strings.ToLower(projectPath), "/", maxProjectDepth) - projectName := split[len(split)-1] - g := groupDomain.Resolver.(*Group) - - for i := 0; i < len(split)-1; i++ { - subgroupName := split[i] - subgroup := g.subgroups[subgroupName] - if subgroup == nil { - subgroup = &Group{ - name: subgroupName, - projects: make(projects), - subgroups: make(subgroups), - } - g.subgroups[subgroupName] = subgroup - } - - g = subgroup - } - - g.projects[projectName] = &projectConfig{ - NamespaceProject: domainName == projectName, - HTTPSOnly: httpsOnly, - AccessControl: accessControl, - ID: id, - } - - dm[domainName] = groupDomain -} - -func (dm Map) readProjectConfig(rootDomain string, group, projectName string, config *multiDomainConfig) { - if config == nil { - // This is necessary to preserve the previous behaviour where a - // group domain is created even if no config.json files are - // loaded successfully. Is it safe to remove this? - dm.updateGroupDomain(rootDomain, group, projectName, false, false, 0) - return - } - - dm.updateGroupDomain(rootDomain, group, projectName, config.HTTPSOnly, config.AccessControl, config.ID) - - for _, domainConfig := range config.Domains { - config := domainConfig // domainConfig is reused for each loop iteration - if domainConfig.Valid(rootDomain) { - dm.addDomain(rootDomain, group, projectName, &config) - } - } -} - -func readProject(group, parent, projectName string, level int, fanIn chan<- jobResult) { - if strings.HasPrefix(projectName, ".") { - return - } - - // Ignore projects that have .deleted in name - if strings.HasSuffix(projectName, ".deleted") { - return - } - - projectPath := filepath.Join(parent, projectName) - if _, err := os.Lstat(filepath.Join(group, projectPath, "public")); err != nil { - // maybe it's a subgroup - if level <= subgroupScanLimit { - buf := make([]byte, 2*os.Getpagesize()) - readProjects(group, projectPath, level+1, buf, fanIn) - } - - return - } - - // We read the config.json file _before_ fanning in, because it does disk - // IO and it does not need access to the domains map. - config := &multiDomainConfig{} - if err := config.Read(group, projectPath); err != nil { - config = nil - } - - fanIn <- jobResult{group: group, project: projectPath, config: config} -} - -func readProjects(group, parent string, level int, buf []byte, fanIn chan<- jobResult) { - subgroup := filepath.Join(group, parent) - fis, err := godirwalk.ReadDirents(subgroup, buf) - if err != nil { - log.WithError(err).WithFields(log.Fields{ - "group": group, - "parent": parent, - }).Print("readdir failed") - return - } - - for _, project := range fis { - // Ignore non directories - if !project.IsDir() { - continue - } - - readProject(group, parent, project.Name(), level, fanIn) - } -} - -type jobResult struct { - group string - project string - config *multiDomainConfig -} - -// ReadGroups walks the pages directory and populates dm with all the domains it finds. -func (dm Map) ReadGroups(rootDomain string, fis godirwalk.Dirents) { - fanOutGroups := make(chan string) - fanIn := make(chan jobResult) - wg := &sync.WaitGroup{} - for i := 0; i < 4; i++ { - wg.Add(1) - - go func() { - buf := make([]byte, 2*os.Getpagesize()) - - for group := range fanOutGroups { - if group == skipHashedDir { - continue - } - - started := time.Now() - - readProjects(group, "", 0, buf, fanIn) - - log.WithFields(log.Fields{ - "group": group, - "duration": time.Since(started).Seconds(), - }).Debug("Loaded projects for group") - } - - wg.Done() - }() - } - - go func() { - wg.Wait() - close(fanIn) - }() - - done := make(chan struct{}) - go func() { - for result := range fanIn { - dm.readProjectConfig(rootDomain, result.group, result.project, result.config) - } - - close(done) - }() - - for _, group := range fis { - if !group.IsDir() { - continue - } - if strings.HasPrefix(group.Name(), ".") { - continue - } - fanOutGroups <- group.Name() - } - close(fanOutGroups) - - <-done -} - -const ( - updateFile = ".update" -) - -// Watch polls the filesystem and kicks off a new domain directory scan when needed. -func Watch(rootDomain string, updater domainsUpdater, interval time.Duration) { - lastUpdate := []byte("no-update") - - for { - // Read the update file - update, err := ioutil.ReadFile(updateFile) - if err != nil && !os.IsNotExist(err) { - log.WithError(err).Print("failed to read update timestamp") - time.Sleep(interval) - continue - } - - // If it's the same ignore - if bytes.Equal(lastUpdate, update) { - time.Sleep(interval) - continue - } - lastUpdate = update - - started := time.Now() - dm := make(Map) - - fis, err := godirwalk.ReadDirents(".", nil) - if err != nil { - log.WithError(err).Warn("domain scan failed") - metrics.DomainFailedUpdates.Inc() - continue - } - - dm.ReadGroups(rootDomain, fis) - duration := time.Since(started).Seconds() - - var hash string - if len(update) < 1 { - hash = "<empty>" - } else { - hash = strings.TrimSpace(string(update)) - } - - logConfiguredDomains(dm) - - log.WithFields(log.Fields{ - "count(domains)": len(dm), - "duration": duration, - "hash": hash, - }).Info("Updated all domains") - - if updater != nil { - updater(dm) - } - - // Update prometheus metrics - metrics.DomainLastUpdateTime.Set(float64(time.Now().UTC().Unix())) - metrics.DomainsServed.Set(float64(len(dm))) - metrics.DomainsConfigurationUpdateDuration.Set(duration) - metrics.DomainUpdates.Inc() - - time.Sleep(interval) - } -} - -func logConfiguredDomains(dm Map) { - if logrus.GetLevel() != logrus.DebugLevel { - return - } - - for h, d := range dm { - log.WithFields(log.Fields{ - "domain": d, - "host": h, - }).Debug("Configured domain") - } -} diff --git a/internal/source/disk/map_test.go b/internal/source/disk/map_test.go deleted file mode 100644 index 2a5fd828..00000000 --- a/internal/source/disk/map_test.go +++ /dev/null @@ -1,253 +0,0 @@ -package disk - -import ( - "crypto/rand" - "fmt" - "io/ioutil" - "os" - "strings" - "testing" - "time" - - "github.com/karrick/godirwalk" - "github.com/stretchr/testify/require" - - "gitlab.com/gitlab-org/gitlab-pages/internal/testhelpers" -) - -func getEntries(t require.TestingT) godirwalk.Dirents { - fis, err := godirwalk.ReadDirents(".", nil) - - require.NoError(t, err) - - return fis -} - -func TestReadProjects(t *testing.T) { - cleanup := setUpTests(t) - defer cleanup() - - dm := make(Map) - dm.ReadGroups("test.io", getEntries(t)) - - var domains []string - for d := range dm { - domains = append(domains, d) - } - - expectedDomains := []string{ - "group.test.io", - "group.internal.test.io", - "test.domain.com", // from config.json - "other.domain.com", - "domain.404.com", - "group.404.test.io", - "group.https-only.test.io", - "test.my-domain.com", - "test2.my-domain.com", - "no.cert.com", - "private.domain.com", - "group.auth.test.io", - "group.acme.test.io", - "withacmechallenge.domain.com", - "capitalgroup.test.io", - "group.404.gitlab-example.com", - "group.redirects.test.io", - "redirects.custom-domain.com", - } - - for _, expected := range domains { - require.Contains(t, domains, expected) - } - - for _, actual := range domains { - require.Contains(t, expectedDomains, actual) - } - - // Check that multiple domains in the same project are recorded faithfully - require.Equal(t, "test.domain.com", dm["test.domain.com"].Name) - require.Equal(t, "other.domain.com", dm["other.domain.com"].Name) - require.Equal(t, "test", dm["other.domain.com"].CertificateCert) - require.Equal(t, "key", dm["other.domain.com"].CertificateKey) - - // check subgroups - domain, ok := dm["group.test.io"] - require.True(t, ok, "missing group.test.io domain") - subgroup, ok := domain.Resolver.(*Group).subgroups["subgroup"] - require.True(t, ok, "missing group.test.io subgroup") - _, ok = subgroup.projects["project"] - require.True(t, ok, "missing project for subgroup in group.test.io domain") -} - -func TestReadProjectsMaxDepth(t *testing.T) { - nGroups := 3 - levels := subgroupScanLimit + 5 - cleanup := buildFakeDomainsDirectory(t, nGroups, levels) - defer cleanup() - - defaultDomain := "test.io" - dm := make(Map) - dm.ReadGroups(defaultDomain, getEntries(t)) - - var domains []string - for d := range dm { - domains = append(domains, d) - } - - var expectedDomains []string - for i := 0; i < nGroups; i++ { - expectedDomains = append(expectedDomains, fmt.Sprintf("group-%d.%s", i, defaultDomain)) - } - - for _, expected := range domains { - require.Contains(t, domains, expected) - } - - for _, actual := range domains { - // we are not checking config.json domains here - if !strings.HasSuffix(actual, defaultDomain) { - continue - } - require.Contains(t, expectedDomains, actual) - } - - // check subgroups - domain, ok := dm["group-0.test.io"] - require.True(t, ok, "missing group-0.test.io domain") - subgroup := domain.Resolver.(*Group) - for i := 0; i < levels; i++ { - subgroup, ok = subgroup.subgroups["sub"] - if i <= subgroupScanLimit { - require.True(t, ok, "missing group-0.test.io subgroup at level %d", i) - _, ok = subgroup.projects["project-0"] - require.True(t, ok, "missing project for subgroup in group-0.test.io domain at level %d", i) - } else { - require.False(t, ok, "subgroup level %d. Maximum allowed nesting level is %d", i, subgroupScanLimit) - break - } - } -} - -// This write must be atomic, otherwise we cannot predict the state of the -// domain watcher goroutine. We cannot use ioutil.WriteFile because that -// has a race condition where the file is empty, which can get picked up -// by the domain watcher. -func writeRandomTimestamp(t *testing.T) { - b := make([]byte, 10) - n, _ := rand.Read(b) - require.True(t, n > 0, "read some random bytes") - - temp, err := ioutil.TempFile(".", "TestWatch") - require.NoError(t, err) - _, err = temp.Write(b) - require.NoError(t, err, "write to tempfile") - require.NoError(t, temp.Close(), "close tempfile") - - require.NoError(t, os.Rename(temp.Name(), updateFile), "rename tempfile") -} - -func TestWatch(t *testing.T) { - cleanup := setUpTests(t) - defer cleanup() - - require.NoError(t, os.RemoveAll(updateFile)) - - update := make(chan Map) - go Watch("gitlab.io", func(dm Map) { - update <- dm - }, time.Microsecond*50) - - defer os.Remove(updateFile) - - domains := recvTimeout(t, update) - require.NotNil(t, domains, "if the domains are fetched on start") - - writeRandomTimestamp(t) - domains = recvTimeout(t, update) - require.NotNil(t, domains, "if the domains are updated after the creation") - - writeRandomTimestamp(t) - domains = recvTimeout(t, update) - require.NotNil(t, domains, "if the domains are updated after the timestamp change") -} - -func recvTimeout(t *testing.T, ch <-chan Map) Map { - timeout := 5 * time.Second - - select { - case dm := <-ch: - return dm - case <-time.After(timeout): - t.Fatalf("timeout after %v waiting for domain update", timeout) - return nil - } -} - -func buildFakeDomainsDirectory(t testing.TB, nGroups, levels int) func() { - testRoot, err := ioutil.TempDir("", "gitlab-pages-test") - require.NoError(t, err) - - for i := 0; i < nGroups; i++ { - parent := fmt.Sprintf("%s/group-%d", testRoot, i) - domain := fmt.Sprintf("%d.example.io", i) - buildFakeProjectsDirectory(t, parent, domain) - for j := 0; j < levels; j++ { - parent = fmt.Sprintf("%s/sub", parent) - domain = fmt.Sprintf("%d.%s", j, domain) - buildFakeProjectsDirectory(t, parent, domain) - } - if testing.Verbose() && i%100 == 0 { - fmt.Print(".") - } - } - - cleanup := testhelpers.ChdirInPath(t, testRoot, &chdirSet) - - return func() { - defer cleanup() - - if testing.Verbose() { - fmt.Printf("cleaning up test directory %s\n", testRoot) - } - - os.RemoveAll(testRoot) - } -} - -func buildFakeProjectsDirectory(t require.TestingT, groupPath, domain string) { - for j := 0; j < 5; j++ { - dir := fmt.Sprintf("%s/project-%d", groupPath, j) - require.NoError(t, os.MkdirAll(dir+"/public", 0755)) - - fakeConfig := fmt.Sprintf(`{"Domains":[{"Domain":"foo.%d.%s","Certificate":"bar","Key":"baz"}]}`, j, domain) - require.NoError(t, ioutil.WriteFile(dir+"/config.json", []byte(fakeConfig), 0644)) - } -} - -// this is a safeguard against compiler optimizations -// we use this package variable to make sure the benchmarkReadGroups loop -// has side effects outside of the loop. -// Without this the compiler (with the optimizations enabled) may remove the whole loop -var result int - -func benchmarkReadGroups(b *testing.B, groups, levels int) { - cleanup := buildFakeDomainsDirectory(b, groups, levels) - defer cleanup() - - b.ResetTimer() - - domainsCnt := 0 - for i := 0; i < b.N; i++ { - dm := make(Map) - dm.ReadGroups("example.com", getEntries(b)) - domainsCnt = len(dm) - } - result = domainsCnt -} - -func BenchmarkReadGroups(b *testing.B) { - b.Run("10 groups 3 levels", func(b *testing.B) { benchmarkReadGroups(b, 10, 3) }) - b.Run("100 groups 3 levels", func(b *testing.B) { benchmarkReadGroups(b, 100, 3) }) - b.Run("1000 groups 3 levels", func(b *testing.B) { benchmarkReadGroups(b, 1000, 3) }) - b.Run("10000 groups 1 levels", func(b *testing.B) { benchmarkReadGroups(b, 10000, 1) }) -} |