From 0a144bc6055b41fac726fdc6eeaa7150f622bd20 Mon Sep 17 00:00:00 2001 From: "J. Shuster" Date: Fri, 8 Sep 2017 08:58:42 +0000 Subject: Add an artifacts proxy to GitLab Pages --- internal/artifact/artifact.go | 120 +++++++++++++++ internal/artifact/artifact_test.go | 303 +++++++++++++++++++++++++++++++++++++ 2 files changed, 423 insertions(+) create mode 100644 internal/artifact/artifact.go create mode 100644 internal/artifact/artifact_test.go (limited to 'internal/artifact') diff --git a/internal/artifact/artifact.go b/internal/artifact/artifact.go new file mode 100644 index 00000000..bcb525ac --- /dev/null +++ b/internal/artifact/artifact.go @@ -0,0 +1,120 @@ +package artifact + +import ( + "fmt" + "io" + "net/http" + "net/url" + "regexp" + "strconv" + "strings" + "time" + + "gitlab.com/gitlab-org/gitlab-pages/internal/httperrors" +) + +const ( + baseURL = "/projects/%s/jobs/%s/artifacts" + hostPatternTemplate = `(?i)\Aartifact~(\d+)~(\d+)\.%s\z` + minStatusCode = 200 + maxStatusCode = 299 +) + +// Artifact is a struct that is made up of a url.URL, http.Client, and +// regexp.Regexp that is used to proxy requests where applicable. +type Artifact struct { + server string + client *http.Client + pattern *regexp.Regexp +} + +// New when provided the arguments defined herein, returns a pointer to an +// Artifact that is used to proxy requests. +func New(s string, timeout int, pagesDomain string) *Artifact { + return &Artifact{ + server: s, + client: &http.Client{Timeout: time.Second * time.Duration(timeout)}, + pattern: hostPatternGen(pagesDomain), + } + +} + +// TryMakeRequest will attempt to proxy a request and write it to the argument +// http.ResponseWriter, ultimately returning a bool that indicates if the +// http.ResponseWriter has been written to in any capacity. +func (a *Artifact) TryMakeRequest(host string, w http.ResponseWriter, r *http.Request) bool { + if a == nil || a.server == "" { + return false + } + + reqURL, ok := a.buildURL(host, r.URL.Path) + if !ok { + return false + } + + resp, err := a.client.Get(reqURL.String()) + if err != nil { + httperrors.Serve502(w) + return true + } + + if resp.StatusCode == http.StatusNotFound { + httperrors.Serve404(w) + return true + } + + if resp.StatusCode == http.StatusInternalServerError { + httperrors.Serve500(w) + return true + } + + // we only cache responses within the 2xx series response codes + if (resp.StatusCode >= minStatusCode) && (resp.StatusCode <= maxStatusCode) { + w.Header().Set("Cache-Control", "max-age=3600") + } + + w.Header().Set("Content-Type", resp.Header.Get("Content-Type")) + w.Header().Set("Content-Length", strconv.FormatInt(resp.ContentLength, 10)) + w.WriteHeader(resp.StatusCode) + io.Copy(w, resp.Body) + return true +} + +// buildURL returns a pointer to a url.URL for where the request should be +// proxied to. The returned bool will indicate if there is some sort of issue +// with the url while it is being generated. +func (a *Artifact) buildURL(host, path string) (*url.URL, bool) { + ids := a.pattern.FindAllStringSubmatch(host, -1) + if len(ids) != 1 || len(ids[0]) != 3 { + return nil, false + } + + strippedIds := ids[0][1:3] + body := fmt.Sprintf(baseURL, strippedIds[0], strippedIds[1]) + ourPath := a.server + if strings.HasSuffix(ourPath, "/") { + ourPath = ourPath[0:len(ourPath)-1] + body + } else { + ourPath = ourPath + body + } + + if len(path) == 0 || strings.HasPrefix(path, "/") { + ourPath = ourPath + path + } else { + ourPath = ourPath + "/" + path + } + + u, err := url.Parse(ourPath) + if err != nil { + return nil, false + } + return u, true +} + +// hostPatternGen returns a pointer to a regexp.Regexp that is made up of +// the constant hostPatternTemplate and the argument which represents the pages domain. +// This is used to ensure that the requested page meets not only the hostPatternTemplate +// requirements, but is suffixed with the proper pagesDomain. +func hostPatternGen(pagesDomain string) *regexp.Regexp { + return regexp.MustCompile(fmt.Sprintf(hostPatternTemplate, regexp.QuoteMeta(pagesDomain))) +} diff --git a/internal/artifact/artifact_test.go b/internal/artifact/artifact_test.go new file mode 100644 index 00000000..ad2285f6 --- /dev/null +++ b/internal/artifact/artifact_test.go @@ -0,0 +1,303 @@ +package artifact + +import ( + "fmt" + "net/http" + "net/http/httptest" + "net/url" + "regexp" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestTryMakeRequest(t *testing.T) { + content := "Title of the document" + contentType := "text/html; charset=utf-8" + testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", contentType) + switch r.URL.Path { + case "/projects/1/jobs/2/artifacts/200.html": + w.WriteHeader(http.StatusOK) + case "/projects/1/jobs/2/artifacts/max-caching.html": + w.WriteHeader(http.StatusIMUsed) + case "/projects/1/jobs/2/artifacts/non-caching.html": + w.WriteHeader(http.StatusTeapot) + case "/projects/1/jobs/2/artifacts/500.html": + w.WriteHeader(http.StatusInternalServerError) + case "/projects/1/jobs/2/artifacts/404.html": + w.WriteHeader(http.StatusNotFound) + } + fmt.Fprint(w, content) + })) + defer testServer.Close() + + cases := []struct { + Path string + Status int + Content string + Length string + CacheControl string + ContentType string + Description string + }{ + { + "/200.html", + http.StatusOK, + content, + "90", + "max-age=3600", + "text/html; charset=utf-8", + "basic successful request", + }, + { + "/max-caching.html", + http.StatusIMUsed, + content, + "90", + "max-age=3600", + "text/html; charset=utf-8", + "max caching request", + }, + { + "/non-caching.html", + http.StatusTeapot, + content, + "90", + "", + "text/html; charset=utf-8", + "no caching request", + }, + } + + for _, c := range cases { + result := httptest.NewRecorder() + reqURL, err := url.Parse(c.Path) + assert.NoError(t, err) + r := &http.Request{URL: reqURL} + art := &Artifact{ + server: testServer.URL, + client: &http.Client{Timeout: time.Second * time.Duration(1)}, + pattern: regexp.MustCompile(fmt.Sprintf(hostPatternTemplate, "gitlab-example.io")), + } + + assert.True(t, art.TryMakeRequest("artifact~1~2.gitlab-example.io", result, r)) + assert.Equal(t, c.ContentType, result.Header().Get("Content-Type")) + assert.Equal(t, c.Length, result.Header().Get("Content-Length")) + assert.Equal(t, c.CacheControl, result.Header().Get("Cache-Control")) + assert.Equal(t, c.Content, string(result.Body.Bytes())) + assert.Equal(t, c.Status, result.Code) + } +} + +func TestBuildURL(t *testing.T) { + cases := []struct { + RawServer string + Host string + Path string + Expected string + PagesDomain string + Ok bool + Description string + }{ + { + "https://gitlab.com/api/v4", + "artifact~1~2.gitlab.io", + "/path/to/file.txt", + "https://gitlab.com/api/v4/projects/1/jobs/2/artifacts/path/to/file.txt", + "gitlab.io", + true, + "basic case", + }, + { + "https://gitlab.com/api/v4/", + "artifact~1~2.gitlab.io", + "/path/to/file.txt", + "https://gitlab.com/api/v4/projects/1/jobs/2/artifacts/path/to/file.txt", + "gitlab.io", + true, + "basic case 2", + }, + { + "https://gitlab.com/api/v4", + "artifact~1~2.gitlab.io", + "path/to/file.txt", + "https://gitlab.com/api/v4/projects/1/jobs/2/artifacts/path/to/file.txt", + "gitlab.io", + true, + "basic case 3", + }, + { + "https://gitlab.com/api/v4/", + "artifact~1~2.gitlab.io", + "path/to/file.txt", + "https://gitlab.com/api/v4/projects/1/jobs/2/artifacts/path/to/file.txt", + "gitlab.io", + true, + "basic case 4", + }, + { + "https://gitlab.com/api/v4", + "artifact~1~2.gitlab.io", + "", + "https://gitlab.com/api/v4/projects/1/jobs/2/artifacts", + "gitlab.io", + true, + "basic case 5", + }, + { + "https://gitlab.com/api/v4/", + "artifact~1~2.gitlab.io", + "", + "https://gitlab.com/api/v4/projects/1/jobs/2/artifacts", + "gitlab.io", + true, + "basic case 6", + }, + { + "https://gitlab.com/api/v4", + "artifact~1~2.gitlab.io", + "/", + "https://gitlab.com/api/v4/projects/1/jobs/2/artifacts/", + "gitlab.io", + true, + "basic case 7", + }, + { + "https://gitlab.com/api/v4/", + "artifact~1~2.gitlab.io", + "/", + "https://gitlab.com/api/v4/projects/1/jobs/2/artifacts/", + "gitlab.io", + true, + "basic case 8", + }, + { + "https://gitlab.com/api/v4", + "artifact~100000~200000.gitlab.io", + "/file.txt", + "https://gitlab.com/api/v4/projects/100000/jobs/200000/artifacts/file.txt", + "gitlab.io", + true, + "expanded case", + }, + { + "https://gitlab.com/api/v4/", + "artifact~1~2.gitlab.io", + "/file.txt", + "https://gitlab.com/api/v4/projects/1/jobs/2/artifacts/file.txt", + "gitlab.io", + true, + "server with tailing slash", + }, + { + "https://gitlab.com/api/v4", + "artifact~A~B.gitlab.io", + "/index.html", + "", + "example.com", + false, + "non matching domain and request", + }, + { + "", + "artifact~A~B.gitlab.io", + "", + "", + "", + false, + "un-parseable Host", + }, + } + + for _, c := range cases { + a := &Artifact{server: c.RawServer, pattern: regexp.MustCompile(fmt.Sprintf(hostPatternTemplate, c.PagesDomain))} + u, ok := a.buildURL(c.Host, c.Path) + assert.Equal(t, c.Ok, ok, c.Description) + if c.Ok { + assert.Equal(t, c.Expected, u.String(), c.Description) + } + } +} + +func TestMatchHostGen(t *testing.T) { + cases := []struct { + URLHost string + PagesDomain string + Expected bool + Description string + }{ + { + "artifact~1~2.gitlab.io", + "gitlab.io", + true, + "basic case", + }, + { + "ARTIFACT~1~2.gitlab.io", + "gitlab.io", + true, + "capital letters case", + }, + { + "ARTIFACT~11234~2908908.gitlab.io", + "gitlab.io", + true, + "additional capital letters case", + }, + { + "artifact~10000~20000.gitlab.io", + "gitlab.io", + true, + "expanded case", + }, + { + "artifact~86753095555~55550935768.gitlab.io", + "gitlab.io", + true, + "large number case", + }, + { + "artifact~one~two.gitlab.io", + "gitlab.io", + false, + "letters rather than numbers", + }, + { + "artifact~One111~tWo222.gitlab.io", + "gitlab.io", + false, + "Mixture of alphanumeric", + }, + { + "artifact~!@#$%~%$#@!.gitlab.io", + "gitlab.io", + false, + "special characters", + }, + { + "artifact~1.gitlab.io", + "gitlab.io", + false, + "not enough ids", + }, + { + "artifact~1~2~34444~1~4.gitlab.io", + "gitlab.io", + false, + "too many ids", + }, + { + "artifact~1~2.gitlab.io", + "otherhost.io", + false, + "different domain / suffix", + }, + } + + for _, c := range cases { + reg := hostPatternGen(c.PagesDomain) + assert.Equal(t, c.Expected, reg.MatchString(c.URLHost), c.Description) + } +} -- cgit v1.2.3