From 77b42fad727a8d8166d8d9a3aa6951f530a664ea Mon Sep 17 00:00:00 2001 From: feistel <6742251-feistel@users.noreply.gitlab.com> Date: Tue, 13 Oct 2020 10:52:50 +0000 Subject: Ensure the content encoding is correct before reading the body Avoid strange EOF error when the encoding is wrong --- acceptance_test.go | 38 ++++++ go.mod | 1 + go.sum | 2 + helpers_test.go | 12 ++ internal/serving/disk/helpers.go | 61 +++++++--- internal/serving/disk/reader.go | 4 +- internal/source/disk/domain_test.go | 130 ++++++++++++++++++--- .../group.gitlab-example.com/public/index.html.br | Bin 0 -> 8 bytes .../group.gitlab-example.com/public/index.html.gz | Bin 0 -> 34 bytes .../pages/group/group.test.io/public/gz-symlink.br | 1 + .../group/group.test.io/public/image.unknown.br | Bin 0 -> 19 bytes .../pages/group/group.test.io/public/index.html.br | Bin 0 -> 14 bytes .../pages/group/group.test.io/public/index2.html | 1 + .../group/group.test.io/public/index2.html.gz | Bin 0 -> 41 bytes .../group/group.test.io/public/text.unknown.br | Bin 0 -> 10 bytes 15 files changed, 215 insertions(+), 35 deletions(-) create mode 100644 shared/pages/group/group.gitlab-example.com/public/index.html.br create mode 100644 shared/pages/group/group.gitlab-example.com/public/index.html.gz create mode 120000 shared/pages/group/group.test.io/public/gz-symlink.br create mode 100644 shared/pages/group/group.test.io/public/image.unknown.br create mode 100644 shared/pages/group/group.test.io/public/index.html.br create mode 100644 shared/pages/group/group.test.io/public/index2.html create mode 100644 shared/pages/group/group.test.io/public/index2.html.gz create mode 100644 shared/pages/group/group.test.io/public/text.unknown.br diff --git a/acceptance_test.go b/acceptance_test.go index 83f09cdf..0b1442f0 100644 --- a/acceptance_test.go +++ b/acceptance_test.go @@ -659,6 +659,44 @@ func TestObscureMIMEType(t *testing.T) { require.Equal(t, "application/manifest+json", mt) } +func TestCompressedEncoding(t *testing.T) { + skipUnlessEnabled(t) + + tests := []struct { + name string + host string + path string + encoding string + }{ + { + "gzip encoding", + "group.gitlab-example.com", + "index.html", + "gzip", + }, + { + "brotli encoding", + "group.gitlab-example.com", + "index.html", + "br", + }, + } + + teardown := RunPagesProcess(t, *pagesBinary, listeners, "") + defer teardown() + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rsp, err := GetCompressedPageFromListener(t, httpListener, "group.gitlab-example.com", "index.html", tt.encoding) + require.NoError(t, err) + defer rsp.Body.Close() + + require.Equal(t, http.StatusOK, rsp.StatusCode) + require.Equal(t, tt.encoding, rsp.Header.Get("Content-Encoding")) + }) + } +} + func TestArtifactProxyRequest(t *testing.T) { skipUnlessEnabled(t, "not-inplace-chroot") diff --git a/go.mod b/go.mod index b6a3e4c3..d9035bee 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module gitlab.com/gitlab-org/gitlab-pages go 1.13 require ( + github.com/andybalholm/brotli v1.0.1 github.com/cenkalti/backoff/v4 v4.0.2 github.com/dgrijalva/jwt-go v3.2.0+incompatible github.com/fzipp/gocyclo v0.0.0-20150627053110-6acd4345c835 diff --git a/go.sum b/go.sum index 8fad37a4..8e2a2a3c 100644 --- a/go.sum +++ b/go.sum @@ -31,6 +31,8 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc= +github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= diff --git a/helpers_test.go b/helpers_test.go index 6ee11b86..eec3c94b 100644 --- a/helpers_test.go +++ b/helpers_test.go @@ -350,6 +350,18 @@ func GetPageFromListenerWithCookie(t *testing.T, spec ListenSpec, host, urlsuffi return DoPagesRequest(t, req) } +func GetCompressedPageFromListener(t *testing.T, spec ListenSpec, host, urlsuffix string, encoding string) (*http.Response, error) { + url := spec.URL(urlsuffix) + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + req.Host = host + req.Header.Set("Accept-Encoding", encoding) + + return DoPagesRequest(t, req) +} + func GetProxiedPageFromListener(t *testing.T, spec ListenSpec, host, xForwardedHost, urlsuffix string) (*http.Response, error) { url := spec.URL(urlsuffix) req, err := http.NewRequest("GET", url, nil) diff --git a/internal/serving/disk/helpers.go b/internal/serving/disk/helpers.go index 1ff83683..5456724a 100644 --- a/internal/serving/disk/helpers.go +++ b/internal/serving/disk/helpers.go @@ -5,6 +5,7 @@ import ( "io" "mime" "net/http" + "os" "path/filepath" "strconv" "strings" @@ -13,6 +14,19 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" ) +var compressedEncodings = map[string]string{ + "br": ".br", + "gzip": ".gz", +} + +// Server side content encoding priority. +// Map iteration order is not deterministic in go, so we need this array to specify the priority +// when the client doesn't provide one +var compressedEncodingsPriority = []string{ + "br", + "gzip", +} + func endsWithSlash(path string) bool { return strings.HasSuffix(path, "/") } @@ -46,33 +60,42 @@ func (reader *Reader) detectContentType(ctx context.Context, root vfs.Root, path return contentType, nil } -func acceptsGZip(r *http.Request) bool { +func (reader *Reader) handleContentEncoding(ctx context.Context, w http.ResponseWriter, r *http.Request, root vfs.Root, fullPath string) string { + // don't accept range requests for compressed content if r.Header.Get("Range") != "" { - return false + return fullPath } - offers := []string{"gzip", "identity"} - acceptedEncoding := httputil.NegotiateContentEncoding(r, offers) - return acceptedEncoding == "gzip" -} + files := map[string]os.FileInfo{} -func (reader *Reader) handleGZip(ctx context.Context, w http.ResponseWriter, r *http.Request, root vfs.Root, fullPath string) string { - if !acceptsGZip(r) { - return fullPath - } + // finding compressed files + for encoding, extension := range compressedEncodings { + path := fullPath + extension - gzipPath := fullPath + ".gz" + // Ensure the file is not a symlink + if fi, err := root.Lstat(ctx, path); err == nil && fi.Mode().IsRegular() { + files[encoding] = fi + } + } - // Ensure the .gz file is not a symlink - fi, err := root.Lstat(ctx, gzipPath) - if err != nil || !fi.Mode().IsRegular() { - return fullPath + offers := make([]string, 0, len(files)+1) + for _, encoding := range compressedEncodingsPriority { + if _, ok := files[encoding]; ok { + offers = append(offers, encoding) + } } + offers = append(offers, "identity") - w.Header().Set("Content-Encoding", "gzip") + acceptedEncoding := httputil.NegotiateContentEncoding(r, offers) + + if fi, ok := files[acceptedEncoding]; ok { + w.Header().Set("Content-Encoding", acceptedEncoding) - // http.ServeContent doesn't set Content-Length if Content-Encoding is set - w.Header().Set("Content-Length", strconv.FormatInt(fi.Size(), 10)) + // http.ServeContent doesn't set Content-Length if Content-Encoding is set + w.Header().Set("Content-Length", strconv.FormatInt(fi.Size(), 10)) + + return fullPath + compressedEncodings[acceptedEncoding] + } - return gzipPath + return fullPath } diff --git a/internal/serving/disk/reader.go b/internal/serving/disk/reader.go index 6afe7947..e7f15a1e 100644 --- a/internal/serving/disk/reader.go +++ b/internal/serving/disk/reader.go @@ -169,7 +169,7 @@ func (reader *Reader) resolvePath(ctx context.Context, root vfs.Root, subPath .. } func (reader *Reader) serveFile(ctx context.Context, w http.ResponseWriter, r *http.Request, root vfs.Root, origPath string, accessControl bool) error { - fullPath := reader.handleGZip(ctx, w, r, root, origPath) + fullPath := reader.handleContentEncoding(ctx, w, r, root, origPath) file, err := root.Open(ctx, fullPath) if err != nil { @@ -212,7 +212,7 @@ func (reader *Reader) serveFile(ctx context.Context, w http.ResponseWriter, r *h } func (reader *Reader) serveCustomFile(ctx context.Context, w http.ResponseWriter, r *http.Request, code int, root vfs.Root, origPath string) error { - fullPath := reader.handleGZip(ctx, w, r, root, origPath) + fullPath := reader.handleContentEncoding(ctx, w, r, root, origPath) // Open and serve content of file file, err := root.Open(ctx, fullPath) diff --git a/internal/source/disk/domain_test.go b/internal/source/disk/domain_test.go index 5c7bdbea..1c81ba22 100644 --- a/internal/source/disk/domain_test.go +++ b/internal/source/disk/domain_test.go @@ -10,6 +10,7 @@ import ( "testing" "time" + "github.com/andybalholm/brotli" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitlab-pages/internal/domain" @@ -176,7 +177,7 @@ func TestIsHTTPSOnly(t *testing.T) { } } -func testHTTPGzip(t *testing.T, handler http.HandlerFunc, mode, url string, values url.Values, acceptEncoding string, str interface{}, contentType string, ungzip bool) { +func testHTTPGzip(t *testing.T, handler http.HandlerFunc, mode, url string, values url.Values, acceptEncoding string, str interface{}, contentType string, expectCompressed bool) { w := httptest.NewRecorder() req, err := http.NewRequest(mode, url+"?"+values.Encode(), nil) require.NoError(t, err) @@ -185,17 +186,17 @@ func testHTTPGzip(t *testing.T, handler http.HandlerFunc, mode, url string, valu } handler(w, req) - if ungzip { + if expectCompressed { contentLength := w.Header().Get("Content-Length") require.Equal(t, strconv.Itoa(w.Body.Len()), contentLength, "Content-Length") + contentEncoding := w.Header().Get("Content-Encoding") + require.Equal(t, "gzip", contentEncoding, "Content-Encoding") + reader, err := gzip.NewReader(w.Body) require.NoError(t, err) defer reader.Close() - contentEncoding := w.Header().Get("Content-Encoding") - require.Equal(t, "gzip", contentEncoding, "Content-Encoding") - bytes, err := ioutil.ReadAll(reader) require.NoError(t, err) require.Contains(t, string(bytes), str) @@ -223,19 +224,18 @@ func TestGroupServeHTTPGzip(t *testing.T) { } testSet := []struct { - mode string // HTTP mode - url string // Test URL - acceptEncoding string // Accept encoding header - body interface{} // Expected body at above URL - contentType string // Expected content-type - ungzip bool // Expect the response to be gzipped? + mode string // HTTP mode + url string // Test URL + acceptEncoding string // Accept encoding header + body interface{} // Expected body at above URL + contentType string // Expected content-type + expectCompressed bool // Expect the response to be gzipped? }{ // No gzip encoding requested {"GET", "/index.html", "", "main-dir", "text/html; charset=utf-8", false}, {"GET", "/index.html", "identity", "main-dir", "text/html; charset=utf-8", false}, {"GET", "/index.html", "gzip; q=0", "main-dir", "text/html; charset=utf-8", false}, // gzip encoding requested, - {"GET", "/index.html", "*", "main-dir", "text/html; charset=utf-8", true}, {"GET", "/index.html", "identity, gzip", "main-dir", "text/html; charset=utf-8", true}, {"GET", "/index.html", "gzip", "main-dir", "text/html; charset=utf-8", true}, {"GET", "/index.html", "gzip; q=1", "main-dir", "text/html; charset=utf-8", true}, @@ -243,6 +243,10 @@ func TestGroupServeHTTPGzip(t *testing.T) { {"GET", "/index.html", "gzip, deflate", "main-dir", "text/html; charset=utf-8", true}, {"GET", "/index.html", "gzip; q=1, deflate", "main-dir", "text/html; charset=utf-8", true}, {"GET", "/index.html", "gzip; q=0.9, deflate", "main-dir", "text/html; charset=utf-8", true}, + {"GET", "/index.html", "br; q=0.9, gzip; q=1", "main-dir", "text/html; charset=utf-8", true}, + {"GET", "/index.html", "br; q=0, gzip; q=1", "main-dir", "text/html; charset=utf-8", true}, + // fallback to gzip because .br is missing + {"GET", "/index2.html", "*", "main-dir", "text/html; charset=utf-8", true}, // gzip encoding requested, but url does not have compressed content on disk {"GET", "/project2/index.html", "*", "project2-main", "text/html; charset=utf-8", false}, {"GET", "/project2/index.html", "identity, gzip", "project2-main", "text/html; charset=utf-8", false}, @@ -259,6 +263,102 @@ func TestGroupServeHTTPGzip(t *testing.T) { // Symlinked .gz files are not supported {"GET", "/gz-symlink", "*", "data", "text/plain; charset=utf-8", false}, // Unknown file-extension, with text content + {"GET", "/text.unknown", "gzip", "hello", "text/plain; charset=utf-8", true}, + {"GET", "/text-nogzip.unknown", "*", "hello", "text/plain; charset=utf-8", false}, + // Unknown file-extension, with PNG content + {"GET", "/image.unknown", "gzip", "GIF89a", "image/gif", true}, + {"GET", "/image-nogzip.unknown", "*", "GIF89a", "image/gif", false}, + } + + for _, tt := range testSet { + t.Run(tt.url+" acceptEncoding: "+tt.acceptEncoding, func(t *testing.T) { + URL := "http://group.test.io" + tt.url + testHTTPGzip(t, serveFileOrNotFound(testGroup), tt.mode, URL, nil, tt.acceptEncoding, tt.body, tt.contentType, tt.expectCompressed) + }) + } +} + +func testHTTPBrotli(t *testing.T, handler http.HandlerFunc, mode, url string, values url.Values, acceptEncoding string, str interface{}, contentType string, expectCompressed bool) { + w := httptest.NewRecorder() + req, err := http.NewRequest(mode, url+"?"+values.Encode(), nil) + require.NoError(t, err) + if acceptEncoding != "" { + req.Header.Add("Accept-Encoding", acceptEncoding) + } + handler(w, req) + + if expectCompressed { + contentLength := w.Header().Get("Content-Length") + require.Equal(t, strconv.Itoa(w.Body.Len()), contentLength, "Content-Length") + + contentEncoding := w.Header().Get("Content-Encoding") + require.Equal(t, "br", contentEncoding, "Content-Encoding") + + reader := brotli.NewReader(w.Body) + bytes, err := ioutil.ReadAll(reader) + require.NoError(t, err) + require.Contains(t, string(bytes), str) + } else { + require.Contains(t, w.Body.String(), str) + } + + require.Equal(t, contentType, w.Header().Get("Content-Type")) +} + +func TestGroupServeHTTPBrotli(t *testing.T) { + cleanup := setUpTests(t) + defer cleanup() + + testGroup := &domain.Domain{ + Resolver: &Group{ + name: "group", + projects: map[string]*projectConfig{ + "group.test.io": &projectConfig{}, + "group.gitlab-example.com": &projectConfig{}, + "project": &projectConfig{}, + "project2": &projectConfig{}, + }, + }, + } + + testSet := []struct { + mode string // HTTP mode + url string // Test URL + acceptEncoding string // Accept encoding header + body interface{} // Expected body at above URL + contentType string // Expected content-type + expectCompressed bool // Expect the response to be br compressed? + }{ + // No br encoding requested + {"GET", "/index.html", "", "main-dir", "text/html; charset=utf-8", false}, + {"GET", "/index.html", "identity", "main-dir", "text/html; charset=utf-8", false}, + {"GET", "/index.html", "br; q=0", "main-dir", "text/html; charset=utf-8", false}, + // br encoding requested, + {"GET", "/index.html", "*", "main-dir", "text/html; charset=utf-8", true}, + {"GET", "/index.html", "identity, br", "main-dir", "text/html; charset=utf-8", true}, + {"GET", "/index.html", "br", "main-dir", "text/html; charset=utf-8", true}, + {"GET", "/index.html", "br; q=1", "main-dir", "text/html; charset=utf-8", true}, + {"GET", "/index.html", "br; q=0.9", "main-dir", "text/html; charset=utf-8", true}, + {"GET", "/index.html", "br, deflate", "main-dir", "text/html; charset=utf-8", true}, + {"GET", "/index.html", "br; q=1, deflate", "main-dir", "text/html; charset=utf-8", true}, + {"GET", "/index.html", "br; q=0.9, deflate", "main-dir", "text/html; charset=utf-8", true}, + {"GET", "/index.html", "gzip; q=0.5, br; q=1", "main-dir", "text/html; charset=utf-8", true}, + // br encoding requested, but url does not have compressed content on disk + {"GET", "/project2/index.html", "*", "project2-main", "text/html; charset=utf-8", false}, + {"GET", "/project2/index.html", "identity, br", "project2-main", "text/html; charset=utf-8", false}, + {"GET", "/project2/index.html", "br", "project2-main", "text/html; charset=utf-8", false}, + {"GET", "/project2/index.html", "br; q=1", "project2-main", "text/html; charset=utf-8", false}, + {"GET", "/project2/index.html", "br; q=0.9", "project2-main", "text/html; charset=utf-8", false}, + {"GET", "/project2/index.html", "br, deflate", "project2-main", "text/html; charset=utf-8", false}, + {"GET", "/project2/index.html", "br; q=1, deflate", "project2-main", "text/html; charset=utf-8", false}, + {"GET", "/project2/index.html", "br; q=0.9, deflate", "project2-main", "text/html; charset=utf-8", false}, + // malformed headers + {"GET", "/index.html", ";; br", "main-dir", "text/html; charset=utf-8", false}, + {"GET", "/index.html", "middle-out", "main-dir", "text/html; charset=utf-8", false}, + {"GET", "/index.html", "br; quality=1", "main-dir", "text/html; charset=utf-8", false}, + // Symlinked .br files are not supported + {"GET", "/gz-symlink", "*", "data", "text/plain; charset=utf-8", false}, + // Unknown file-extension, with text content {"GET", "/text.unknown", "*", "hello", "text/plain; charset=utf-8", true}, {"GET", "/text-nogzip.unknown", "*", "hello", "text/plain; charset=utf-8", false}, // Unknown file-extension, with PNG content @@ -267,8 +367,10 @@ func TestGroupServeHTTPGzip(t *testing.T) { } for _, tt := range testSet { - URL := "http://group.test.io" + tt.url - testHTTPGzip(t, serveFileOrNotFound(testGroup), tt.mode, URL, nil, tt.acceptEncoding, tt.body, tt.contentType, tt.ungzip) + t.Run(tt.url+" acceptEncoding: "+tt.acceptEncoding, func(t *testing.T) { + URL := "http://group.test.io" + tt.url + testHTTPBrotli(t, serveFileOrNotFound(testGroup), tt.mode, URL, nil, tt.acceptEncoding, tt.body, tt.contentType, tt.expectCompressed) + }) } } diff --git a/shared/pages/group/group.gitlab-example.com/public/index.html.br b/shared/pages/group/group.gitlab-example.com/public/index.html.br new file mode 100644 index 00000000..cf5a9650 Binary files /dev/null and b/shared/pages/group/group.gitlab-example.com/public/index.html.br differ diff --git a/shared/pages/group/group.gitlab-example.com/public/index.html.gz b/shared/pages/group/group.gitlab-example.com/public/index.html.gz new file mode 100644 index 00000000..fabfb92a Binary files /dev/null and b/shared/pages/group/group.gitlab-example.com/public/index.html.gz differ diff --git a/shared/pages/group/group.test.io/public/gz-symlink.br b/shared/pages/group/group.test.io/public/gz-symlink.br new file mode 120000 index 00000000..28e14853 --- /dev/null +++ b/shared/pages/group/group.test.io/public/gz-symlink.br @@ -0,0 +1 @@ +../config.json \ No newline at end of file diff --git a/shared/pages/group/group.test.io/public/image.unknown.br b/shared/pages/group/group.test.io/public/image.unknown.br new file mode 100644 index 00000000..2286b383 Binary files /dev/null and b/shared/pages/group/group.test.io/public/image.unknown.br differ diff --git a/shared/pages/group/group.test.io/public/index.html.br b/shared/pages/group/group.test.io/public/index.html.br new file mode 100644 index 00000000..31c092e5 Binary files /dev/null and b/shared/pages/group/group.test.io/public/index.html.br differ diff --git a/shared/pages/group/group.test.io/public/index2.html b/shared/pages/group/group.test.io/public/index2.html new file mode 100644 index 00000000..8da4d196 --- /dev/null +++ b/shared/pages/group/group.test.io/public/index2.html @@ -0,0 +1 @@ +main-dir diff --git a/shared/pages/group/group.test.io/public/index2.html.gz b/shared/pages/group/group.test.io/public/index2.html.gz new file mode 100644 index 00000000..3975516e Binary files /dev/null and b/shared/pages/group/group.test.io/public/index2.html.gz differ diff --git a/shared/pages/group/group.test.io/public/text.unknown.br b/shared/pages/group/group.test.io/public/text.unknown.br new file mode 100644 index 00000000..99eaf2eb Binary files /dev/null and b/shared/pages/group/group.test.io/public/text.unknown.br differ -- cgit v1.2.3