diff options
author | Zeger-Jan van de Weg <git@zjvandeweg.nl> | 2019-07-03 15:07:51 +0300 |
---|---|---|
committer | Zeger-Jan van de Weg <git@zjvandeweg.nl> | 2019-07-03 15:07:51 +0300 |
commit | c0eac09055fa811a01b3bd2f8281a95b2777f894 (patch) | |
tree | 0f63d967937b435e1565fa4d60e706a2f4bcdc1c | |
parent | 0bdf39da275bf6cd1095c55d8e70eaab5847754e (diff) | |
parent | f31a9d7ea250b7fdf99d6884ca3afe3c6645c5f5 (diff) |
Merge branch 'jv-clone-analyzer' into 'master'
Add HTTP clone analyzer
See merge request gitlab-org/gitaly!1338
-rw-r--r-- | changelogs/unreleased/jv-clone-analyzer.yml | 5 | ||||
-rw-r--r-- | cmd/gitaly-debug/analyzehttp.go | 230 | ||||
-rw-r--r-- | cmd/gitaly-debug/main.go | 84 | ||||
-rw-r--r-- | cmd/gitaly-debug/simulatehttp.go | 73 |
4 files changed, 322 insertions, 70 deletions
diff --git a/changelogs/unreleased/jv-clone-analyzer.yml b/changelogs/unreleased/jv-clone-analyzer.yml new file mode 100644 index 000000000..d83d0a870 --- /dev/null +++ b/changelogs/unreleased/jv-clone-analyzer.yml @@ -0,0 +1,5 @@ +--- +title: Add HTTP clone analyzer +merge_request: 1338 +author: +type: other diff --git a/cmd/gitaly-debug/analyzehttp.go b/cmd/gitaly-debug/analyzehttp.go new file mode 100644 index 000000000..c645b7ef3 --- /dev/null +++ b/cmd/gitaly-debug/analyzehttp.go @@ -0,0 +1,230 @@ +package main + +import ( + "bytes" + "compress/gzip" + "fmt" + "net/http" + "strings" + "time" + + "gitlab.com/gitlab-org/gitaly/internal/git/pktline" +) + +func analyzeHTTPClone(cloneURL string) { + wants := doBenchGet(cloneURL) + doBenchPost(cloneURL, wants) +} + +func doBenchGet(cloneURL string) []string { + req, err := http.NewRequest("GET", cloneURL+"/info/refs?service=git-upload-pack", nil) + noError(err) + + for k, v := range map[string]string{ + "User-Agent": "gitaly-debug", + "Accept": "*/*", + "Accept-Encoding": "deflate, gzip", + "Pragma": "no-cache", + } { + req.Header.Set(k, v) + } + + start := time.Now() + msg("---") + msg("--- GET %v", req.URL) + msg("---") + resp, err := http.DefaultClient.Do(req) + noError(err) + + msg("response after %v", time.Since(start)) + msg("response header: %v", resp.Header) + msg("HTTP status code %d", resp.StatusCode) + defer resp.Body.Close() + + // Expected response: + // - "# service=git-upload-pack\n" + // - FLUSH + // - "<OID> <ref> <capabilities>\n" + // - "<OID> <ref>\n" + // - ... + // - FLUSH + // + var wants []string + var size int64 + seenFlush := false + scanner := pktline.NewScanner(resp.Body) + packets := 0 + refs := 0 + for ; scanner.Scan(); packets++ { + if seenFlush { + fatal("received packet after flush") + } + + data := string(pktline.Data(scanner.Bytes())) + size += int64(len(data)) + switch packets { + case 0: + msg("first packet %v", time.Since(start)) + if data != "# service=git-upload-pack\n" { + fatal(fmt.Errorf("unexpected header %q", data)) + } + case 1: + if !pktline.IsFlush(scanner.Bytes()) { + fatal("missing flush after service announcement") + } + default: + if packets == 2 && !strings.Contains(data, " side-band-64k") { + fatal(fmt.Errorf("missing side-band-64k capability in %q", data)) + } + + if pktline.IsFlush(scanner.Bytes()) { + seenFlush = true + continue + } + + split := strings.SplitN(data, " ", 2) + if len(split) != 2 { + continue + } + refs++ + + if strings.HasPrefix(split[1], "refs/heads/") || strings.HasPrefix(split[1], "refs/tags/") { + wants = append(wants, split[0]) + } + } + } + noError(scanner.Err()) + if !seenFlush { + fatal("missing flush in response") + } + + msg("received %d packets", packets) + msg("done in %v", time.Since(start)) + msg("payload data: %d bytes", size) + msg("received %d refs, selected %d wants", refs, len(wants)) + + return wants +} + +func doBenchPost(cloneURL string, wants []string) { + reqBodyRaw := &bytes.Buffer{} + reqBodyGzip := gzip.NewWriter(reqBodyRaw) + for i, oid := range wants { + if i == 0 { + oid += " multi_ack_detailed no-done side-band-64k thin-pack ofs-delta deepen-since deepen-not agent=git/2.21.0" + } + _, err := pktline.WriteString(reqBodyGzip, "want "+oid+"\n") + noError(err) + } + noError(pktline.WriteFlush(reqBodyGzip)) + _, err := pktline.WriteString(reqBodyGzip, "done\n") + noError(err) + noError(reqBodyGzip.Close()) + + req, err := http.NewRequest("POST", cloneURL+"/git-upload-pack", reqBodyRaw) + noError(err) + + for k, v := range map[string]string{ + "User-Agent": "gitaly-debug", + "Content-Type": "application/x-git-upload-pack-request", + "Accept": "application/x-git-upload-pack-result", + "Content-Encoding": "gzip", + } { + req.Header.Set(k, v) + } + + start := time.Now() + msg("---") + msg("--- POST %v", req.URL) + msg("---") + resp, err := http.DefaultClient.Do(req) + noError(err) + + msg("response after %v", time.Since(start)) + msg("response header: %v", resp.Header) + msg("HTTP status code %d", resp.StatusCode) + defer resp.Body.Close() + + // Expected response: + // - "NAK\n" + // - "<side band byte><pack or progress or error data> + // - ... + // - FLUSH + // + packets := 0 + scanner := pktline.NewScanner(resp.Body) + totalSize := make(map[byte]int64) + payloadSizeHistogram := make(map[int]int) + sideBandHistogram := make(map[byte]int) + seenFlush := false + for ; scanner.Scan(); packets++ { + if seenFlush { + fatal("received extra packet after flush") + } + + data := pktline.Data(scanner.Bytes()) + + if packets == 0 { + if !bytes.Equal([]byte("NAK\n"), data) { + fatal(fmt.Errorf("expected NAK, got %q", data)) + } + msg("received NAK after %v", time.Since(start)) + continue + } + + if pktline.IsFlush(scanner.Bytes()) { + seenFlush = true + continue + } + + if len(data) == 0 { + fatal("empty packet in PACK data") + } + + band := data[0] + if band < 1 || band > 3 { + fatal(fmt.Errorf("invalid sideband: %d", band)) + } + if sideBandHistogram[band] == 0 { + msg("received first %s packet after %v", bandToHuman(band), time.Since(start)) + } + + sideBandHistogram[band]++ + + n := len(data[1:]) + totalSize[band] += int64(n) + payloadSizeHistogram[n]++ + + if packets%100 == 0 && packets > 0 && band == 1 { + fmt.Printf(".") + } + } + + fmt.Println("") // Trailing newline for progress dots. + + noError(scanner.Err()) + if !seenFlush { + fatal("POST response did not end in flush") + } + + msg("received %d packets", packets) + msg("done in %v", time.Since(start)) + for i := byte(1); i <= 3; i++ { + msg("%8s band: %10d payload bytes, %6d packets", bandToHuman(i), totalSize[i], sideBandHistogram[i]) + } + msg("packet payload size histogram: %v", payloadSizeHistogram) +} + +func bandToHuman(b byte) string { + switch b { + case 1: + return "pack" + case 2: + return "progress" + case 3: + return "error" + default: + fatal(fmt.Errorf("invalid band %d", b)) + return "" // never reached + } +} diff --git a/cmd/gitaly-debug/main.go b/cmd/gitaly-debug/main.go index 57d16e45d..96361caa6 100644 --- a/cmd/gitaly-debug/main.go +++ b/cmd/gitaly-debug/main.go @@ -1,15 +1,8 @@ package main import ( - "bufio" - "bytes" "fmt" - "io" - "io/ioutil" "os" - "os/exec" - "regexp" - "time" ) const ( @@ -22,81 +15,32 @@ simulate-http-clone GIT_DIR HTTP. The clone data is written to /dev/null. Note that in real life the workload also depends on the transport capabilities requested by the client; this tool uses a fixed set of capabilities. + +analyze-http-clone HTTP_URL + Clones a Git repository from a public HTTP URL into /dev/null. ` ) func main() { - if len(os.Args) != 3 { + if len(os.Args) < 2 { fatal(usage) } - gitDir := os.Args[2] + extraArgs := os.Args[2:] switch os.Args[1] { case "simulate-http-clone": - testHTTPCloneSpeed(gitDir) - default: - fatal(usage) - } -} - -func testHTTPCloneSpeed(gitDir string) { - msg("Generating server response for HTTP clone. Data goes to /dev/null.") - infoRefs := exec.Command("git", "upload-pack", "--stateless-rpc", "--advertise-refs", gitDir) - infoRefs.Stderr = os.Stderr - out, err := infoRefs.StdoutPipe() - noError(err) - - start := time.Now() - noError(infoRefs.Start()) - - infoScanner := bufio.NewScanner(out) - var infoLines []string - for infoScanner.Scan() { - infoLines = append(infoLines, infoScanner.Text()) - } - noError(infoScanner.Err()) - - noError(infoRefs.Wait()) - - msg("simulated GET \"/info/refs?service=git-upload-pack\" returned %d lines, took %v", len(infoLines), time.Since(start)) - - if len(infoLines) == 0 { - fatal("no refs were advertised") - } - - request := &bytes.Buffer{} - refsHeads := regexp.MustCompile(`^[a-f0-9]{44} refs/heads/`) - firstLine := true - for _, line := range infoLines { - if !refsHeads.MatchString(line) { - continue + if len(extraArgs) != 1 { + fatal(usage) } - - commitID := line[4:44] - - if firstLine { - firstLine = false - fmt.Fprintf(request, "0098want %s multi_ack_detailed no-done side-band-64k thin-pack ofs-delta deepen-since deepen-not agent=git/2.19.1\n", commitID) - continue + simulateHTTPClone(extraArgs[0]) + case "analyze-http-clone": + if len(extraArgs) != 1 { + fatal(usage) } - - fmt.Fprintf(request, "0032want %s\n", commitID) + analyzeHTTPClone(extraArgs[0]) + default: + fatal(usage) } - fmt.Fprint(request, "00000009done\n") - - uploadPack := exec.Command("git", "upload-pack", "--stateless-rpc", gitDir) - uploadPack.Stdin = request - uploadPack.Stderr = os.Stderr - out, err = uploadPack.StdoutPipe() - noError(err) - - start = time.Now() - noError(uploadPack.Start()) - - n, err := io.Copy(ioutil.Discard, out) - noError(err) - - msg("simulated POST \"/git-upload-pack\" returned %s, took %v", humanBytes(n), time.Since(start)) } func noError(err error) { diff --git a/cmd/gitaly-debug/simulatehttp.go b/cmd/gitaly-debug/simulatehttp.go new file mode 100644 index 000000000..31b3766fc --- /dev/null +++ b/cmd/gitaly-debug/simulatehttp.go @@ -0,0 +1,73 @@ +package main + +import ( + "bufio" + "bytes" + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "regexp" + "time" +) + +func simulateHTTPClone(gitDir string) { + msg("Generating server response for HTTP clone. Data goes to /dev/null.") + infoRefs := exec.Command("git", "upload-pack", "--stateless-rpc", "--advertise-refs", gitDir) + infoRefs.Stderr = os.Stderr + out, err := infoRefs.StdoutPipe() + noError(err) + + start := time.Now() + noError(infoRefs.Start()) + + infoScanner := bufio.NewScanner(out) + var infoLines []string + for infoScanner.Scan() { + infoLines = append(infoLines, infoScanner.Text()) + } + noError(infoScanner.Err()) + + noError(infoRefs.Wait()) + + msg("simulated GET \"/info/refs?service=git-upload-pack\" returned %d lines, took %v", len(infoLines), time.Since(start)) + + if len(infoLines) == 0 { + fatal("no refs were advertised") + } + + request := &bytes.Buffer{} + refsHeads := regexp.MustCompile(`^[a-f0-9]{44} refs/heads/`) + firstLine := true + for _, line := range infoLines { + if !refsHeads.MatchString(line) { + continue + } + + commitID := line[4:44] + + if firstLine { + firstLine = false + fmt.Fprintf(request, "0098want %s multi_ack_detailed no-done side-band-64k thin-pack ofs-delta deepen-since deepen-not agent=git/2.19.1\n", commitID) + continue + } + + fmt.Fprintf(request, "0032want %s\n", commitID) + } + fmt.Fprint(request, "00000009done\n") + + uploadPack := exec.Command("git", "upload-pack", "--stateless-rpc", gitDir) + uploadPack.Stdin = request + uploadPack.Stderr = os.Stderr + out, err = uploadPack.StdoutPipe() + noError(err) + + start = time.Now() + noError(uploadPack.Start()) + + n, err := io.Copy(ioutil.Discard, out) + noError(err) + + msg("simulated POST \"/git-upload-pack\" returned %s, took %v", humanBytes(n), time.Since(start)) +} |