Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAhmad Sherif <me@ahmadsherif.com>2017-04-19 23:26:07 +0300
committerAhmad Sherif <me@ahmadsherif.com>2017-05-01 19:17:35 +0300
commit83edc50f663745ea40ed0212ff71ae32b21017cf (patch)
treede9293c87e5a9688ac0700e98f662f84dccc88d8
parentb09bede9550c219595bd6a27796270fbecdd20e2 (diff)
Use raw data mainly for diff parsing
-rw-r--r--internal/diff/diff.go183
-rw-r--r--internal/helper/byte.go10
-rw-r--r--internal/service/diff/commit.go14
-rw-r--r--internal/service/diff/commit_test.go12
4 files changed, 151 insertions, 68 deletions
diff --git a/internal/diff/diff.go b/internal/diff/diff.go
index c61d5660e..084dbaf20 100644
--- a/internal/diff/diff.go
+++ b/internal/diff/diff.go
@@ -25,38 +25,57 @@ type Diff struct {
// Parser holds necessary state for parsing a diff stream
type Parser struct {
- reader *bufio.Reader
- currentDiff *Diff
- finished bool
- err error
+ patchReader *bufio.Reader
+ rawLines [][]byte
+ currentDiff *Diff
+ nextPatchFromPath []byte
+ finished bool
+ err error
}
var (
- diffHeaderRegexp = regexp.MustCompile(`(?m)^diff --git "?a/(.*?)"? "?b/(.*?)"?$`)
- indexHeaderRegexp = regexp.MustCompile(`(?m)^index ([[:xdigit:]]{40})..([[:xdigit:]]{40})(?:\s([[:digit:]]+))?$`)
- renameCopyHeaderRegexp = regexp.MustCompile(`(?m)^(copy|rename) (from|to) "?(.*?)"?$`)
- modeHeaderRegexp = regexp.MustCompile(`(?m)^(old|new|(?:deleted|new) file) mode (\d+)$`)
+ rawLineRegexp = regexp.MustCompile(`(?m)^:(\d+) (\d+) ([[:xdigit:]]{40}) ([[:xdigit:]]{40}) ([ADTUXMRC]\d*)\t(.*?)(?:\t(.*?))?$`)
+ diffHeaderRegexp = regexp.MustCompile(`(?m)^diff --git "?a/(.*?)"? "?b/(.*?)"?$`)
)
// NewDiffParser returns a new Parser
func NewDiffParser(src io.Reader) *Parser {
+ parser := &Parser{}
reader := bufio.NewReader(src)
- return &Parser{reader: reader}
+
+ parser.cacheRawLines(reader)
+ parser.patchReader = reader
+
+ return parser
}
// Parse parses a single diff. It returns true if successful, false if it finished
// parsing all diffs or when it encounters an error, in which case use Parser.Err()
// to get the error.
func (parser *Parser) Parse() bool {
- if parser.finished {
+ if parser.finished || len(parser.rawLines) == 0 {
+ return false
+ }
+
+ if err := parser.initializeCurrentDiff(); err != nil {
+ return false
+ }
+
+ if err := parser.findNextPatchFromPath(); err != nil {
return false
}
- parsingDiff := false
+ if !bytes.Equal(parser.nextPatchFromPath, parser.currentDiff.FromPath) {
+ // The current diff has an empty patch
+ return true
+ }
+
+ // We are consuming this patch so it is no longer 'next'
+ parser.nextPatchFromPath = nil
for {
// We cannot use bufio.Scanner because the line may be very long.
- line, err := parser.reader.Peek(10)
+ line, err := parser.patchReader.Peek(10)
if err == io.EOF {
parser.finished = true
@@ -65,7 +84,7 @@ func (parser *Parser) Parse() bool {
}
if len(line) > 0 && len(line) < 10 {
- consumeChunkLine(parser.reader, parser.currentDiff)
+ consumeChunkLine(parser.patchReader, parser.currentDiff)
}
return true
@@ -75,26 +94,19 @@ func (parser *Parser) Parse() bool {
}
if bytes.HasPrefix(line, []byte("diff --git")) {
- if parsingDiff {
- return true
- }
-
- parser.currentDiff = &Diff{}
- parsingDiff = true
-
- parser.err = parseHeader(parser.reader, parser.currentDiff)
+ return true
} else if bytes.HasPrefix(line, []byte("Binary")) {
- parser.err = consumeBinaryNotice(parser.reader, parser.currentDiff)
+ parser.err = consumeBinaryNotice(parser.patchReader, parser.currentDiff)
} else if bytes.HasPrefix(line, []byte("@@")) {
parser.currentDiff.RawChunks = append(parser.currentDiff.RawChunks, nil)
- parser.err = consumeChunkLine(parser.reader, parser.currentDiff)
+ parser.err = consumeChunkLine(parser.patchReader, parser.currentDiff)
} else if helper.ByteSliceHasAnyPrefix(line, "---", "+++") {
- parser.err = consumeLine(parser.reader)
+ parser.err = consumeLine(parser.patchReader)
} else if helper.ByteSliceHasAnyPrefix(line, "-", "+", " ", "\\") {
- parser.err = consumeChunkLine(parser.reader, parser.currentDiff)
+ parser.err = consumeChunkLine(parser.patchReader, parser.currentDiff)
} else {
- parser.err = parseHeader(parser.reader, parser.currentDiff)
+ parser.err = consumeLine(parser.patchReader)
}
if parser.err != nil {
@@ -117,52 +129,101 @@ func (parser *Parser) Err() error {
return parser.err
}
-func parseHeader(reader *bufio.Reader, diff *Diff) error {
- line, err := reader.ReadBytes('\n')
+func (parser *Parser) cacheRawLines(reader *bufio.Reader) {
+ for {
+ line, err := reader.ReadBytes('\n')
+ if err != nil {
+ if err != io.EOF {
+ parser.err = err
+ parser.finished = true
+ }
+ // According to the documentation of bufio.Reader.ReadBytes we cannot get
+ // both an error and a line ending in '\n'. So the current line cannot be
+ // valid and we want to discard it.
+ return
+ }
+
+ if !bytes.HasPrefix(line, []byte(":")) {
+ // Discard the current line and stop reading: we expect a blank line
+ // between raw lines and patch data.
+ return
+ }
+
+ parser.rawLines = append(parser.rawLines, line)
+ }
+}
+
+func (parser *Parser) nextRawLine() []byte {
+ if len(parser.rawLines) == 0 {
+ return nil
+ }
+
+ line := parser.rawLines[0]
+ parser.rawLines = parser.rawLines[1:]
+
+ return line
+}
+
+func (parser *Parser) initializeCurrentDiff() error {
+ // Raw and regular diff formats don't necessarily have the same files, since some flags (e.g. --ignore-space-change)
+ // can suppress certain kinds of diffs from showing in regular format, but raw format will always have all the files.
+ parser.currentDiff = &Diff{}
+ if err := parseRawLine(parser.nextRawLine(), parser.currentDiff); err != nil {
+ parser.err = err
+ return err
+ }
+
+ return nil
+}
+
+func (parser *Parser) findNextPatchFromPath() error {
+ // Since we can only go forward when reading from a bufio.Reader, we save the matched FromPath we parsed until we
+ // reach its counterpart in raw diff.
+ if parser.nextPatchFromPath != nil {
+ return nil
+ }
+
+ line, err := parser.patchReader.ReadBytes('\n')
if err != nil && err != io.EOF {
- return fmt.Errorf("read diff header line: %v", err)
+ parser.err = fmt.Errorf("read diff header line: %v", err)
+ return parser.err
}
- if matches := diffHeaderRegexp.FindSubmatch(line); len(matches) > 0 { // diff --git a/Makefile b/Makefile
- diff.FromPath = unescapeOctalBytes(matches[1])
- diff.ToPath = unescapeOctalBytes(matches[1])
+ if matches := diffHeaderRegexp.FindSubmatch(line); len(matches) > 0 {
+ parser.nextPatchFromPath = unescapeOctalBytes(matches[1])
+ return nil
}
- if matches := indexHeaderRegexp.FindStringSubmatch(string(line)); len(matches) > 0 { // index a8b75d25da09b92b9f8b02151b001217ec24e0ea..3ecb2f9d50ed85f781569431df9f110bff6cb607 100644
- diff.FromID = matches[1]
- diff.ToID = matches[2]
- if matches[3] != "" { // mode does not exist for deleted/new files on this line
- mode, err := strconv.ParseInt(matches[3], 8, 0)
- if err != nil {
- return fmt.Errorf("index header: %v", err)
- }
+ parser.err = fmt.Errorf("diff header regexp mismatch")
+ return parser.err
+}
- diff.OldMode = int32(mode)
- diff.NewMode = int32(mode)
- }
+func parseRawLine(line []byte, diff *Diff) error {
+ matches := rawLineRegexp.FindSubmatch(line)
+ if len(matches) == 0 {
+ return fmt.Errorf("raw line regexp mismatch")
}
- if matches := renameCopyHeaderRegexp.FindSubmatch(line); len(matches) > 0 { // rename from cmd/gitaly-client/main.go
- switch string(matches[2]) {
- case "from":
- diff.FromPath = unescapeOctalBytes(matches[3])
- case "to":
- diff.ToPath = unescapeOctalBytes(matches[3])
- }
+ mode, err := strconv.ParseInt(string(matches[1]), 8, 0)
+ if err != nil {
+ return fmt.Errorf("raw old mode: %v", err)
}
+ diff.OldMode = int32(mode)
- if matches := modeHeaderRegexp.FindStringSubmatch(string(line)); len(matches) > 0 { // deleted file mode 100644
- mode, err := strconv.ParseInt(matches[2], 8, 0)
- if err != nil {
- return fmt.Errorf("mode header: %v", err)
- }
+ mode, err = strconv.ParseInt(string(matches[2]), 8, 0)
+ if err != nil {
+ return fmt.Errorf("raw new mode: %v", err)
+ }
+ diff.NewMode = int32(mode)
- switch matches[1] {
- case "old", "deleted file":
- diff.OldMode = int32(mode)
- case "new", "new file":
- diff.NewMode = int32(mode)
- }
+ diff.FromID = string(matches[3])
+ diff.ToID = string(matches[4])
+
+ diff.FromPath = unescapeOctalBytes(helper.UnquoteBytes(matches[6]))
+ if matches[5][0] == 'C' || matches[5][0] == 'R' {
+ diff.ToPath = unescapeOctalBytes(helper.UnquoteBytes(matches[7]))
+ } else {
+ diff.ToPath = diff.FromPath
}
return nil
diff --git a/internal/helper/byte.go b/internal/helper/byte.go
index a346db940..5c1f75463 100644
--- a/internal/helper/byte.go
+++ b/internal/helper/byte.go
@@ -24,3 +24,13 @@ func IsNumber(s []byte) bool {
}
return true
}
+
+// UnquoteBytes removes surrounding double-quotes from a byte slice returning
+// a new slice if they exist, otherwise it returns the same byte slice passed.
+func UnquoteBytes(s []byte) []byte {
+ if len(s) >= 2 && s[0] == '"' && s[len(s)-1] == '"' {
+ return s[1 : len(s)-1]
+ }
+
+ return s
+}
diff --git a/internal/service/diff/commit.go b/internal/service/diff/commit.go
index 7ad900f99..e7dec223f 100644
--- a/internal/service/diff/commit.go
+++ b/internal/service/diff/commit.go
@@ -25,7 +25,19 @@ func (s *server) CommitDiff(in *pb.CommitDiffRequest, stream pb.Diff_CommitDiffS
log.Printf("CommitDiff: RepoPath=%q LeftCommitId=%q RightCommitId=%q", repoPath, leftSha, rightSha)
- cmd, err := helper.GitCommandReader("--git-dir", repoPath, "diff", "--full-index", "--find-renames", leftSha, rightSha)
+ cmdArgs := []string{
+ "--git-dir", repoPath,
+ "diff",
+ "--patch",
+ "--raw",
+ "--abbrev=40",
+ "--full-index",
+ "--find-renames",
+ leftSha,
+ rightSha,
+ }
+
+ cmd, err := helper.GitCommandReader(cmdArgs...)
if err != nil {
return grpc.Errorf(codes.Internal, "CommitDiff: cmd: %v", err)
}
diff --git a/internal/service/diff/commit_test.go b/internal/service/diff/commit_test.go
index 9b7a7b764..3724d0555 100644
--- a/internal/service/diff/commit_test.go
+++ b/internal/service/diff/commit_test.go
@@ -76,8 +76,8 @@ func TestSuccessfulCommitDiffRequest(t *testing.T) {
},
{
Diff: diff.Diff{
- FromID: "",
- ToID: "",
+ FromID: "ead5a0eee1391308803cfebd8a2a8530495645eb",
+ ToID: "ead5a0eee1391308803cfebd8a2a8530495645eb",
OldMode: 0100644,
NewMode: 0100755,
FromPath: []byte("gitaly/mode-file"),
@@ -123,10 +123,10 @@ func TestSuccessfulCommitDiffRequest(t *testing.T) {
},
{
Diff: diff.Diff{
- FromID: "",
- ToID: "",
- OldMode: 0,
- NewMode: 0,
+ FromID: "4e76e90b3c7e52390de9311a23c0a77575aed8a8",
+ ToID: "4e76e90b3c7e52390de9311a23c0a77575aed8a8",
+ OldMode: 0100644,
+ NewMode: 0100644,
FromPath: []byte("gitaly/named-file"),
ToPath: []byte("gitaly/renamed-file"),
Binary: false,