Merge branch 'wc-decode-line-by-line' into 'master'

praefect: Read line-by-line in track-repositories See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/4855 Merged-by: John Cai <jcai@gitlab.com> Approved-by: Justin Tobler <jtobler@gitlab.com> Approved-by: John Cai <jcai@gitlab.com> Co-authored-by: Will Chandler <wchandler@gitlab.com>
author: John Cai <jcai@gitlab.com> 2022-09-16 16:18:02 +0300
committer: John Cai <jcai@gitlab.com> 2022-09-16 16:18:02 +0300
commit: bf8c76c905d145faa4a2a22b875020115f62b56c (patch)
tree: e63b0a96eb810ee76385ef769615c76fd491aee4
parent: 82ad225ea207e669379c39d804271e19b0f88c67 (diff)
parent: 9dee665338d4205d0d196f053d15706dcc660059 (diff)
2 files changed, 20 insertions, 20 deletions
diff --git a/cmd/praefect/subcmd_track_repositories.go b/cmd/praefect/subcmd_track_repositories.go
index 44bdb8c71..2ba8a88b1 100644
--- a/cmd/praefect/subcmd_track_repositories.go
+++ b/cmd/praefect/subcmd_track_repositories.go
@@ -1,6 +1,7 @@
 package main
 
 import (
+	"bufio"
 	"context"
 	"encoding/json"
 	"flag"
@@ -22,9 +23,9 @@ const (
 )
 
 type invalidRequest struct {
-	reqNum int
-	path   string
-	errs   []error
+	line int
+	path string
+	errs []error
 }
 
 type dupPathError struct {
@@ -33,7 +34,7 @@ type dupPathError struct {
 }
 
 func (d *dupPathError) Error() string {
-	return fmt.Sprintf("duplicate entries for relative_path, item #: %v", d.reqNums)
+	return fmt.Sprintf("duplicate entries for relative_path, line %v", d.reqNums)
 }
 
 type trackRepositories struct {
@@ -55,8 +56,8 @@ func (cmd *trackRepositories) FlagSet() *flag.FlagSet {
 		printfErr("Description:\n" +
 			"	This command allows bulk requests for repositories to be tracked by Praefect.\n" +
 			"	The -input-path flag must be the path of a file containing the details of the repositories\n" +
-			"	to track as a list of newline-delimited JSON objects. Each entry must contain the\n" +
-			"	following keys:\n\n" +
+			"	to track as a list of newline-delimited JSON objects. Each line must contain the details for\n" +
+			"   one and only one repository. Each item must contain the following keys:\n\n" +
 			"		relative_path - The relative path of the repository on-disk.\n" +
 			"		virtual_storage - The Praefect virtual storage name.\n" +
 			"		authoritative_storage - Which storage to consider as the canonical copy of the repository.\n\n" +
@@ -92,26 +93,25 @@ func (cmd trackRepositories) Exec(flags *flag.FlagSet, cfg config.Config) error
 	}
 	defer f.Close()
 
-	d := json.NewDecoder(f)
-	d.DisallowUnknownFields()
+	scanner := bufio.NewScanner(f)
 
 	fmt.Fprintf(cmd.w, "Validating repository information in %q\n", cmd.inputPath)
 
 	var requests []trackRepositoryRequest
-	var repoNum int
+	var line int
 	var repoErrs []invalidRequest
 	pathLines := make(map[string][]int)
 
 	// Read in and validate all requests from input file before executing. This prevents us from
 	// partially executing a file, which makes it difficult to tell which repos were actually
 	// tracked.
-	for d.More() {
-		repoNum++
+	for scanner.Scan() {
+		line++
 
 		request := trackRepositoryRequest{}
-		badReq := invalidRequest{reqNum: repoNum}
+		badReq := invalidRequest{line: line}
 
-		if err := d.Decode(&request); err != nil {
+		if err := json.Unmarshal(scanner.Bytes(), &request); err != nil {
 			badReq.errs = append(badReq.errs, err)
 			repoErrs = append(repoErrs, badReq)
 
@@ -144,13 +144,13 @@ func (cmd trackRepositories) Exec(flags *flag.FlagSet, cfg config.Config) error
 			badReq.errs = append(badReq.errs, &dupPathError{path: request.RelativePath})
 			repoErrs = append(repoErrs, badReq)
 
-			prevLines = append(prevLines, repoNum)
+			prevLines = append(prevLines, line)
 			pathLines[request.RelativePath] = prevLines
 
 			// We've already checked this path, no need to run further checks.
 			continue
 		}
-		pathLines[request.RelativePath] = []int{repoNum}
+		pathLines[request.RelativePath] = []int{line}
 
 		repoInDB, err := store.RepositoryExists(ctx, request.VirtualStorage, request.RelativePath)
 		if err != nil {
@@ -187,7 +187,7 @@ func (cmd trackRepositories) Exec(flags *flag.FlagSet, cfg config.Config) error
 	}
 
 	fmt.Fprintf(cmd.w, "All repository details are correctly formatted\n")
-	fmt.Fprintf(cmd.w, "Tracking %v repositories in Praefect DB...\n", repoNum)
+	fmt.Fprintf(cmd.w, "Tracking %v repositories in Praefect DB...\n", line)
 	for _, request := range requests {
 		if err := request.execRequest(ctx, db, cfg, cmd.w, logger, cmd.replicateImmediately); err != nil {
 			return fmt.Errorf("tracking repository %q: %w", request.RelativePath, err)
@@ -201,7 +201,7 @@ func printInvalidRequests(w io.Writer, repoErrs []invalidRequest, pathLines map[
 	fmt.Fprintf(w, "Found %v invalid request(s) in %q:\n", len(repoErrs), inputPath)
 
 	for _, l := range repoErrs {
-		fmt.Fprintf(w, "  item #: %v, relative_path: %q\n", l.reqNum, l.path)
+		fmt.Fprintf(w, "  line %v, relative_path: %q\n", l.line, l.path)
 		for _, err := range l.errs {
 			if dup, ok := err.(*dupPathError); ok {
 				// The complete set of duplicate reqNums won't be known until input is
diff --git a/cmd/praefect/subcmd_track_repositories_test.go b/cmd/praefect/subcmd_track_repositories_test.go
index cd8e97a1b..c644f88d2 100644
--- a/cmd/praefect/subcmd_track_repositories_test.go
+++ b/cmd/praefect/subcmd_track_repositories_test.go
@@ -109,9 +109,9 @@ func TestAddRepositories_Exec_invalidInput(t *testing.T) {
 			expectedError: "no repository information found",
 		},
 		{
-			input:          `{"foo":"bar"}`,
-			desc:           "unexpected key in JSON",
-			expectedOutput: `json: unknown field "foo"`,
+			input:          "@hashed/01/23/01234567890123456789.git",
+			desc:           "invalid JSON",
+			expectedOutput: "invalid character '@' looking for beginning of value",
 			expectedError:  invalidEntryErr,
 		},
 		{
author	John Cai <jcai@gitlab.com>	2022-09-16 16:18:02 +0300
committer	John Cai <jcai@gitlab.com>	2022-09-16 16:18:02 +0300
commit	bf8c76c905d145faa4a2a22b875020115f62b56c (patch)
tree	e63b0a96eb810ee76385ef769615c76fd491aee4
parent	82ad225ea207e669379c39d804271e19b0f88c67 (diff)
parent	9dee665338d4205d0d196f053d15706dcc660059 (diff)