diff options
author | Ahmad Sherif <me@ahmadsherif.com> | 2017-04-19 23:26:07 +0300 |
---|---|---|
committer | Ahmad Sherif <me@ahmadsherif.com> | 2017-05-01 19:17:35 +0300 |
commit | 83edc50f663745ea40ed0212ff71ae32b21017cf (patch) | |
tree | de9293c87e5a9688ac0700e98f662f84dccc88d8 | |
parent | b09bede9550c219595bd6a27796270fbecdd20e2 (diff) |
Use raw data mainly for diff parsing
-rw-r--r-- | internal/diff/diff.go | 183 | ||||
-rw-r--r-- | internal/helper/byte.go | 10 | ||||
-rw-r--r-- | internal/service/diff/commit.go | 14 | ||||
-rw-r--r-- | internal/service/diff/commit_test.go | 12 |
4 files changed, 151 insertions, 68 deletions
diff --git a/internal/diff/diff.go b/internal/diff/diff.go index c61d5660e..084dbaf20 100644 --- a/internal/diff/diff.go +++ b/internal/diff/diff.go @@ -25,38 +25,57 @@ type Diff struct { // Parser holds necessary state for parsing a diff stream type Parser struct { - reader *bufio.Reader - currentDiff *Diff - finished bool - err error + patchReader *bufio.Reader + rawLines [][]byte + currentDiff *Diff + nextPatchFromPath []byte + finished bool + err error } var ( - diffHeaderRegexp = regexp.MustCompile(`(?m)^diff --git "?a/(.*?)"? "?b/(.*?)"?$`) - indexHeaderRegexp = regexp.MustCompile(`(?m)^index ([[:xdigit:]]{40})..([[:xdigit:]]{40})(?:\s([[:digit:]]+))?$`) - renameCopyHeaderRegexp = regexp.MustCompile(`(?m)^(copy|rename) (from|to) "?(.*?)"?$`) - modeHeaderRegexp = regexp.MustCompile(`(?m)^(old|new|(?:deleted|new) file) mode (\d+)$`) + rawLineRegexp = regexp.MustCompile(`(?m)^:(\d+) (\d+) ([[:xdigit:]]{40}) ([[:xdigit:]]{40}) ([ADTUXMRC]\d*)\t(.*?)(?:\t(.*?))?$`) + diffHeaderRegexp = regexp.MustCompile(`(?m)^diff --git "?a/(.*?)"? "?b/(.*?)"?$`) ) // NewDiffParser returns a new Parser func NewDiffParser(src io.Reader) *Parser { + parser := &Parser{} reader := bufio.NewReader(src) - return &Parser{reader: reader} + + parser.cacheRawLines(reader) + parser.patchReader = reader + + return parser } // Parse parses a single diff. It returns true if successful, false if it finished // parsing all diffs or when it encounters an error, in which case use Parser.Err() // to get the error. func (parser *Parser) Parse() bool { - if parser.finished { + if parser.finished || len(parser.rawLines) == 0 { + return false + } + + if err := parser.initializeCurrentDiff(); err != nil { + return false + } + + if err := parser.findNextPatchFromPath(); err != nil { return false } - parsingDiff := false + if !bytes.Equal(parser.nextPatchFromPath, parser.currentDiff.FromPath) { + // The current diff has an empty patch + return true + } + + // We are consuming this patch so it is no longer 'next' + parser.nextPatchFromPath = nil for { // We cannot use bufio.Scanner because the line may be very long. - line, err := parser.reader.Peek(10) + line, err := parser.patchReader.Peek(10) if err == io.EOF { parser.finished = true @@ -65,7 +84,7 @@ func (parser *Parser) Parse() bool { } if len(line) > 0 && len(line) < 10 { - consumeChunkLine(parser.reader, parser.currentDiff) + consumeChunkLine(parser.patchReader, parser.currentDiff) } return true @@ -75,26 +94,19 @@ func (parser *Parser) Parse() bool { } if bytes.HasPrefix(line, []byte("diff --git")) { - if parsingDiff { - return true - } - - parser.currentDiff = &Diff{} - parsingDiff = true - - parser.err = parseHeader(parser.reader, parser.currentDiff) + return true } else if bytes.HasPrefix(line, []byte("Binary")) { - parser.err = consumeBinaryNotice(parser.reader, parser.currentDiff) + parser.err = consumeBinaryNotice(parser.patchReader, parser.currentDiff) } else if bytes.HasPrefix(line, []byte("@@")) { parser.currentDiff.RawChunks = append(parser.currentDiff.RawChunks, nil) - parser.err = consumeChunkLine(parser.reader, parser.currentDiff) + parser.err = consumeChunkLine(parser.patchReader, parser.currentDiff) } else if helper.ByteSliceHasAnyPrefix(line, "---", "+++") { - parser.err = consumeLine(parser.reader) + parser.err = consumeLine(parser.patchReader) } else if helper.ByteSliceHasAnyPrefix(line, "-", "+", " ", "\\") { - parser.err = consumeChunkLine(parser.reader, parser.currentDiff) + parser.err = consumeChunkLine(parser.patchReader, parser.currentDiff) } else { - parser.err = parseHeader(parser.reader, parser.currentDiff) + parser.err = consumeLine(parser.patchReader) } if parser.err != nil { @@ -117,52 +129,101 @@ func (parser *Parser) Err() error { return parser.err } -func parseHeader(reader *bufio.Reader, diff *Diff) error { - line, err := reader.ReadBytes('\n') +func (parser *Parser) cacheRawLines(reader *bufio.Reader) { + for { + line, err := reader.ReadBytes('\n') + if err != nil { + if err != io.EOF { + parser.err = err + parser.finished = true + } + // According to the documentation of bufio.Reader.ReadBytes we cannot get + // both an error and a line ending in '\n'. So the current line cannot be + // valid and we want to discard it. + return + } + + if !bytes.HasPrefix(line, []byte(":")) { + // Discard the current line and stop reading: we expect a blank line + // between raw lines and patch data. + return + } + + parser.rawLines = append(parser.rawLines, line) + } +} + +func (parser *Parser) nextRawLine() []byte { + if len(parser.rawLines) == 0 { + return nil + } + + line := parser.rawLines[0] + parser.rawLines = parser.rawLines[1:] + + return line +} + +func (parser *Parser) initializeCurrentDiff() error { + // Raw and regular diff formats don't necessarily have the same files, since some flags (e.g. --ignore-space-change) + // can suppress certain kinds of diffs from showing in regular format, but raw format will always have all the files. + parser.currentDiff = &Diff{} + if err := parseRawLine(parser.nextRawLine(), parser.currentDiff); err != nil { + parser.err = err + return err + } + + return nil +} + +func (parser *Parser) findNextPatchFromPath() error { + // Since we can only go forward when reading from a bufio.Reader, we save the matched FromPath we parsed until we + // reach its counterpart in raw diff. + if parser.nextPatchFromPath != nil { + return nil + } + + line, err := parser.patchReader.ReadBytes('\n') if err != nil && err != io.EOF { - return fmt.Errorf("read diff header line: %v", err) + parser.err = fmt.Errorf("read diff header line: %v", err) + return parser.err } - if matches := diffHeaderRegexp.FindSubmatch(line); len(matches) > 0 { // diff --git a/Makefile b/Makefile - diff.FromPath = unescapeOctalBytes(matches[1]) - diff.ToPath = unescapeOctalBytes(matches[1]) + if matches := diffHeaderRegexp.FindSubmatch(line); len(matches) > 0 { + parser.nextPatchFromPath = unescapeOctalBytes(matches[1]) + return nil } - if matches := indexHeaderRegexp.FindStringSubmatch(string(line)); len(matches) > 0 { // index a8b75d25da09b92b9f8b02151b001217ec24e0ea..3ecb2f9d50ed85f781569431df9f110bff6cb607 100644 - diff.FromID = matches[1] - diff.ToID = matches[2] - if matches[3] != "" { // mode does not exist for deleted/new files on this line - mode, err := strconv.ParseInt(matches[3], 8, 0) - if err != nil { - return fmt.Errorf("index header: %v", err) - } + parser.err = fmt.Errorf("diff header regexp mismatch") + return parser.err +} - diff.OldMode = int32(mode) - diff.NewMode = int32(mode) - } +func parseRawLine(line []byte, diff *Diff) error { + matches := rawLineRegexp.FindSubmatch(line) + if len(matches) == 0 { + return fmt.Errorf("raw line regexp mismatch") } - if matches := renameCopyHeaderRegexp.FindSubmatch(line); len(matches) > 0 { // rename from cmd/gitaly-client/main.go - switch string(matches[2]) { - case "from": - diff.FromPath = unescapeOctalBytes(matches[3]) - case "to": - diff.ToPath = unescapeOctalBytes(matches[3]) - } + mode, err := strconv.ParseInt(string(matches[1]), 8, 0) + if err != nil { + return fmt.Errorf("raw old mode: %v", err) } + diff.OldMode = int32(mode) - if matches := modeHeaderRegexp.FindStringSubmatch(string(line)); len(matches) > 0 { // deleted file mode 100644 - mode, err := strconv.ParseInt(matches[2], 8, 0) - if err != nil { - return fmt.Errorf("mode header: %v", err) - } + mode, err = strconv.ParseInt(string(matches[2]), 8, 0) + if err != nil { + return fmt.Errorf("raw new mode: %v", err) + } + diff.NewMode = int32(mode) - switch matches[1] { - case "old", "deleted file": - diff.OldMode = int32(mode) - case "new", "new file": - diff.NewMode = int32(mode) - } + diff.FromID = string(matches[3]) + diff.ToID = string(matches[4]) + + diff.FromPath = unescapeOctalBytes(helper.UnquoteBytes(matches[6])) + if matches[5][0] == 'C' || matches[5][0] == 'R' { + diff.ToPath = unescapeOctalBytes(helper.UnquoteBytes(matches[7])) + } else { + diff.ToPath = diff.FromPath } return nil diff --git a/internal/helper/byte.go b/internal/helper/byte.go index a346db940..5c1f75463 100644 --- a/internal/helper/byte.go +++ b/internal/helper/byte.go @@ -24,3 +24,13 @@ func IsNumber(s []byte) bool { } return true } + +// UnquoteBytes removes surrounding double-quotes from a byte slice returning +// a new slice if they exist, otherwise it returns the same byte slice passed. +func UnquoteBytes(s []byte) []byte { + if len(s) >= 2 && s[0] == '"' && s[len(s)-1] == '"' { + return s[1 : len(s)-1] + } + + return s +} diff --git a/internal/service/diff/commit.go b/internal/service/diff/commit.go index 7ad900f99..e7dec223f 100644 --- a/internal/service/diff/commit.go +++ b/internal/service/diff/commit.go @@ -25,7 +25,19 @@ func (s *server) CommitDiff(in *pb.CommitDiffRequest, stream pb.Diff_CommitDiffS log.Printf("CommitDiff: RepoPath=%q LeftCommitId=%q RightCommitId=%q", repoPath, leftSha, rightSha) - cmd, err := helper.GitCommandReader("--git-dir", repoPath, "diff", "--full-index", "--find-renames", leftSha, rightSha) + cmdArgs := []string{ + "--git-dir", repoPath, + "diff", + "--patch", + "--raw", + "--abbrev=40", + "--full-index", + "--find-renames", + leftSha, + rightSha, + } + + cmd, err := helper.GitCommandReader(cmdArgs...) if err != nil { return grpc.Errorf(codes.Internal, "CommitDiff: cmd: %v", err) } diff --git a/internal/service/diff/commit_test.go b/internal/service/diff/commit_test.go index 9b7a7b764..3724d0555 100644 --- a/internal/service/diff/commit_test.go +++ b/internal/service/diff/commit_test.go @@ -76,8 +76,8 @@ func TestSuccessfulCommitDiffRequest(t *testing.T) { }, { Diff: diff.Diff{ - FromID: "", - ToID: "", + FromID: "ead5a0eee1391308803cfebd8a2a8530495645eb", + ToID: "ead5a0eee1391308803cfebd8a2a8530495645eb", OldMode: 0100644, NewMode: 0100755, FromPath: []byte("gitaly/mode-file"), @@ -123,10 +123,10 @@ func TestSuccessfulCommitDiffRequest(t *testing.T) { }, { Diff: diff.Diff{ - FromID: "", - ToID: "", - OldMode: 0, - NewMode: 0, + FromID: "4e76e90b3c7e52390de9311a23c0a77575aed8a8", + ToID: "4e76e90b3c7e52390de9311a23c0a77575aed8a8", + OldMode: 0100644, + NewMode: 0100644, FromPath: []byte("gitaly/named-file"), ToPath: []byte("gitaly/renamed-file"), Binary: false, |