From 7a75e661c5cef9fcd7c84fe0fb22672a57d6373e Mon Sep 17 00:00:00 2001 From: David Barr Date: Sat, 19 Mar 2011 18:20:54 +1100 Subject: vcs-svn: implement text-delta handling Handle input in Subversion's dumpfile format, version 3. This is the format produced by "svnrdump dump" and "svnadmin dump --deltas", and the main difference between v3 dumpfiles and the dumpfiles already handled is that these can include nodes whose properties and text are expressed relative to some other node. To handle such nodes, we find which node the text and properties are based on, handle its property changes, use the cat-blob command to request the basis blob from the fast-import backend, use the svndiff0_apply() helper to apply the text delta on the fly, writing output to a temporary file, and then measure that postimage file's length and write its content to the fast-import stream. The temporary postimage file is shared between delta-using nodes to avoid some file system overhead. The svn-fe interface needs to be more complicated to accomodate the backward flow of information from the fast-import backend to svn-fe. The backflow fd is not needed when parsing streams without deltas, though, so existing scripts using svn-fe on v2 dumps should continue to work. NEEDSWORK: generalize interface so caller sets the backflow fd, close temporary file before exiting Signed-off-by: David Barr Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- contrib/svn-fe/svn-fe.txt | 5 +-- t/t9010-svn-fe.sh | 108 ++++++++++++++++++++++++++++++++++++++++++++- vcs-svn/fast_export.c | 109 +++++++++++++++++++++++++++++++++++++++++++++- vcs-svn/fast_export.h | 3 ++ vcs-svn/svndump.c | 13 ++++-- 5 files changed, 227 insertions(+), 11 deletions(-) diff --git a/contrib/svn-fe/svn-fe.txt b/contrib/svn-fe/svn-fe.txt index 85f7b83028..2dd27ceb0e 100644 --- a/contrib/svn-fe/svn-fe.txt +++ b/contrib/svn-fe/svn-fe.txt @@ -9,7 +9,7 @@ SYNOPSIS -------- [verse] mkfifo backchannel && -svnadmin dump --incremental REPO | +svnadmin dump --deltas REPO | svn-fe [url] 3backchannel @@ -32,9 +32,6 @@ Subversion's repository dump format is documented in full in Files in this format can be generated using the 'svnadmin dump' or 'svk admin dump' command. -Dumps produced with 'svnadmin dump --deltas' (dumpfile format v3) -are not supported. - OUTPUT FORMAT ------------- The fast-import format is documented by the git-fast-import(1) diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh index 003395c5f6..f24f004fd5 100755 --- a/t/t9010-svn-fe.sh +++ b/t/t9010-svn-fe.sh @@ -674,7 +674,7 @@ test_expect_success PIPE 'change file mode and reiterate content' ' test_cmp hello actual.target ' -test_expect_success PIPE 'deltas not supported' ' +test_expect_success PIPE 'deltas supported' ' reinit_git && { # (old) h + (inline) ello + (old) \n @@ -735,7 +735,7 @@ test_expect_success PIPE 'deltas not supported' ' echo PROPS-END && cat delta } >delta.dump && - test_must_fail try_dump delta.dump + try_dump delta.dump ' test_expect_success PIPE 'property deltas supported' ' @@ -942,6 +942,110 @@ test_expect_success PIPE 'deltas for typechange' ' test_cmp expect actual ' +test_expect_success PIPE 'deltas need not consume the whole preimage' ' + reinit_git && + cat >expect <<-\EOF && + OBJID + :120000 100644 OBJID OBJID T postimage + OBJID + :100644 120000 OBJID OBJID T postimage + OBJID + :000000 100644 OBJID OBJID A postimage + EOF + echo "first preimage" >expect.1 && + printf target >expect.2 && + printf lnk >expect.3 && + { + printf "SVNQ%b%b%b" "QQ\017\001\017" "\0217" "first preimage\n" | + q_to_nul + } >delta.1 && + { + properties svn:special "*" && + echo PROPS-END + } >symlink.props && + { + printf "SVNQ%b%b%b" "Q\002\013\004\012" "\0201\001\001\0211" "lnk target" | + q_to_nul + } >delta.2 && + { + printf "SVNQ%b%b" "Q\004\003\004Q" "\001Q\002\002" | + q_to_nul + } >delta.3 && + { + cat <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: postimage + Node-kind: file + Node-action: add + Text-delta: true + Prop-content-length: 10 + EOF + echo Text-content-length: $(wc -c deltapartial.dump && + try_dump deltapartial.dump && + { + git rev-list HEAD | + git diff-tree --root --stdin | + sed "s/$_x40/OBJID/g" + } >actual && + test_cmp expect actual && + git show HEAD:postimage >actual.3 && + git show HEAD^:postimage >actual.2 && + git show HEAD^^:postimage >actual.1 && + test_cmp expect.1 actual.1 && + test_cmp expect.2 actual.2 && + test_cmp expect.3 actual.3 +' test_expect_success 'set up svn repo' ' svnconf=$PWD/svnconf && diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 33e853d9cd..005674d8c1 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -7,15 +7,38 @@ #include "strbuf.h" #include "quote.h" #include "fast_export.h" -#include "line_buffer.h" #include "repo_tree.h" #include "strbuf.h" +#include "svndiff.h" +#include "sliding_window.h" +#include "line_buffer.h" #define MAX_GITSVN_LINE_LEN 4096 +#define REPORT_FILENO 3 static uint32_t first_commit_done; +static struct line_buffer postimage = LINE_BUFFER_INIT; static struct line_buffer report_buffer = LINE_BUFFER_INIT; +/* NEEDSWORK: move to fast_export_init() */ +static int init_postimage(void) +{ + static int postimage_initialized; + if (postimage_initialized) + return 0; + postimage_initialized = 1; + return buffer_tmpfile_init(&postimage); +} + +static int init_report_buffer(int fd) +{ + static int report_buffer_initialized; + if (report_buffer_initialized) + return 0; + report_buffer_initialized = 1; + return buffer_fdinit(&report_buffer, fd); +} + void fast_export_init(int fd) { if (buffer_fdinit(&report_buffer, fd)) @@ -132,6 +155,73 @@ static void die_short_read(struct line_buffer *input) die("invalid dump: unexpected end of file"); } +static int ends_with(const char *s, size_t len, const char *suffix) +{ + const size_t suffixlen = strlen(suffix); + if (len < suffixlen) + return 0; + return !memcmp(s + len - suffixlen, suffix, suffixlen); +} + +static int parse_cat_response_line(const char *header, off_t *len) +{ + size_t headerlen = strlen(header); + const char *type; + const char *end; + + if (ends_with(header, headerlen, " missing")) + return error("cat-blob reports missing blob: %s", header); + type = memmem(header, headerlen, " blob ", strlen(" blob ")); + if (!type) + return error("cat-blob header has wrong object type: %s", header); + *len = strtoumax(type + strlen(" blob "), (char **) &end, 10); + if (end == type + strlen(" blob ")) + return error("cat-blob header does not contain length: %s", header); + if (*end) + return error("cat-blob header contains garbage after length: %s", header); + return 0; +} + +static long apply_delta(off_t len, struct line_buffer *input, + const char *old_data, uint32_t old_mode) +{ + long ret; + off_t preimage_len = 0; + struct sliding_view preimage = SLIDING_VIEW_INIT(&report_buffer); + FILE *out; + + if (init_postimage() || !(out = buffer_tmpfile_rewind(&postimage))) + die("cannot open temporary file for blob retrieval"); + if (init_report_buffer(REPORT_FILENO)) + die("cannot open fd 3 for feedback from fast-import"); + if (old_data) { + const char *response; + printf("cat-blob %s\n", old_data); + fflush(stdout); + response = get_response_line(); + if (parse_cat_response_line(response, &preimage_len)) + die("invalid cat-blob response: %s", response); + } + if (old_mode == REPO_MODE_LNK) { + strbuf_addstr(&preimage.buf, "link "); + preimage_len += strlen("link "); + } + if (svndiff0_apply(input, len, &preimage, out)) + die("cannot apply delta"); + if (old_data) { + /* Read the remainder of preimage and trailing newline. */ + if (move_window(&preimage, preimage_len, 1)) + die("cannot seek to end of input"); + if (preimage.buf.buf[0] != '\n') + die("missing newline after cat-blob response"); + } + ret = buffer_tmpfile_prepare_to_read(&postimage); + if (ret < 0) + die("cannot read temporary file for blob retrieval"); + strbuf_release(&preimage.buf); + return ret; +} + void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input) { if (mode == REPO_MODE_LNK) { @@ -199,3 +289,20 @@ int fast_export_ls(const char *path, uint32_t *mode, struct strbuf *dataref) ls_from_active_commit(path); return parse_ls_response(get_response_line(), mode, dataref); } + +void fast_export_blob_delta(uint32_t mode, + uint32_t old_mode, const char *old_data, + uint32_t len, struct line_buffer *input) +{ + long postimage_len; + if (len > maximum_signed_value_of_type(off_t)) + die("enormous delta"); + postimage_len = apply_delta((off_t) len, input, old_data, old_mode); + if (mode == REPO_MODE_LNK) { + buffer_skip_bytes(&postimage, strlen("link ")); + postimage_len -= strlen("link "); + } + printf("data %ld\n", postimage_len); + buffer_copy_bytes(&postimage, postimage_len); + fputc('\n', stdout); +} diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index 2d392e370d..43d05b65ef 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -15,6 +15,9 @@ void fast_export_begin_commit(uint32_t revision, const char *author, const char *url, unsigned long timestamp); void fast_export_end_commit(uint32_t revision); void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input); +void fast_export_blob_delta(uint32_t mode, + uint32_t old_mode, const char *old_data, + uint32_t len, struct line_buffer *input); /* If there is no such file at that rev, returns -1, errno == ENOENT. */ int fast_export_ls_rev(uint32_t rev, const char *path, diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 11c59f18bf..b1f4161068 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -217,9 +217,7 @@ static void handle_node(void) */ static const char *const empty_blob = "::empty::"; const char *old_data = NULL; - - if (node_ctx.text_delta) - die("text deltas not supported"); + uint32_t old_mode = REPO_MODE_BLB; if (node_ctx.action == NODEACT_DELETE) { if (have_text || have_props || node_ctx.srcRev) @@ -255,6 +253,7 @@ static void handle_node(void) if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR) die("invalid dump: cannot modify a file into a directory"); node_ctx.type = mode; + old_mode = mode; } else if (node_ctx.action == NODEACT_ADD) { if (type == REPO_MODE_DIR) old_data = NULL; @@ -289,8 +288,14 @@ static void handle_node(void) fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data); return; } + if (!node_ctx.text_delta) { + fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline"); + fast_export_data(node_ctx.type, node_ctx.textLength, &input); + return; + } fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline"); - fast_export_data(node_ctx.type, node_ctx.textLength, &input); + fast_export_blob_delta(node_ctx.type, old_mode, old_data, + node_ctx.textLength, &input); } static void begin_revision(void) -- cgit v1.2.3 From abe27c0cbd97bf6a693004ddb411392ed596a853 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 27 May 2011 05:18:33 -0500 Subject: vcs-svn: guard against overflow when computing preimage length Signed integer overflow produces undefined behavior in C and off_t is a signed type. For predictable behavior, add some checks to protect in advance against overflow. On 32-bit systems ftell as called by buffer_tmpfile_prepare_to_read is likely to fail with EOVERFLOW when reading the corresponding postimage, and this patch does not fix that. So it's more of a futureproofing measure than a complete fix. Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index edc658d4fe..96a75d51d1 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -166,6 +166,7 @@ static int ends_with(const char *s, size_t len, const char *suffix) static int parse_cat_response_line(const char *header, off_t *len) { size_t headerlen = strlen(header); + uintmax_t n; const char *type; const char *end; @@ -174,14 +175,25 @@ static int parse_cat_response_line(const char *header, off_t *len) type = memmem(header, headerlen, " blob ", strlen(" blob ")); if (!type) return error("cat-blob header has wrong object type: %s", header); - *len = strtoumax(type + strlen(" blob "), (char **) &end, 10); + n = strtoumax(type + strlen(" blob "), (char **) &end, 10); if (end == type + strlen(" blob ")) return error("cat-blob header does not contain length: %s", header); + if (memchr(type + strlen(" blob "), '-', end - type - strlen(" blob "))) + return error("cat-blob header contains negative length: %s", header); + if (n == UINTMAX_MAX || n > maximum_signed_value_of_type(off_t)) + return error("blob too large for current definition of off_t"); + *len = n; if (*end) return error("cat-blob header contains garbage after length: %s", header); return 0; } +static void check_preimage_overflow(off_t a, off_t b) +{ + if (signed_add_overflows(a, b)) + die("blob too large for current definition of off_t"); +} + static long apply_delta(off_t len, struct line_buffer *input, const char *old_data, uint32_t old_mode) { @@ -204,6 +216,7 @@ static long apply_delta(off_t len, struct line_buffer *input, } if (old_mode == REPO_MODE_LNK) { strbuf_addstr(&preimage.buf, "link "); + check_preimage_overflow(preimage_len, strlen("link ")); preimage_len += strlen("link "); } if (svndiff0_apply(input, len, &preimage, out)) -- cgit v1.2.3 From 3ac10b2e3fd6d858621f796160d251ad34affc20 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 27 May 2011 05:44:27 -0500 Subject: vcs-svn: avoid hangs from corrupt deltas A corrupt Subversion-format delta can request reads past the end of the preimage. Set sliding_view::max_off so such corruption is caught when it appears rather than blocking in an impossible-to-fulfill read() when input is coming from a socket or pipe. Inspired-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder --- t/t9010-svn-fe.sh | 40 +++++++++++++++++++++++++++++++++++++--- vcs-svn/fast_export.c | 15 +++++++++------ 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh index f24f004fd5..b7eed2489f 100755 --- a/t/t9010-svn-fe.sh +++ b/t/t9010-svn-fe.sh @@ -18,12 +18,13 @@ reinit_git () { try_dump () { input=$1 && - maybe_fail=${2:+test_$2} && + maybe_fail_svnfe=${2:+test_$2} && + maybe_fail_fi=${3:+test_$3} && { - $maybe_fail test-svn-fe "$input" >stream 3stream 3backflow && + $maybe_fail_fi git fast-import --cat-blob-fd=3 backflow && wait $! } @@ -1047,6 +1048,39 @@ test_expect_success PIPE 'deltas need not consume the whole preimage' ' test_cmp expect.3 actual.3 ' +test_expect_success PIPE 'no hang for delta trying to read past end of preimage' ' + reinit_git && + { + # COPY 1 + printf "SVNQ%b%b" "Q\001\001\002Q" "\001Q" | + q_to_nul + } >greedy.delta && + { + cat <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: bootstrap + Node-kind: file + Node-action: add + Text-delta: true + Prop-content-length: 10 + EOF + echo Text-content-length: $(wc -c greedydelta.dump && + try_dump greedydelta.dump must_fail might_fail +' + test_expect_success 'set up svn repo' ' svnconf=$PWD/svnconf && mkdir -p "$svnconf" && diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 96a75d51d1..97f5fdf489 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -198,8 +198,7 @@ static long apply_delta(off_t len, struct line_buffer *input, const char *old_data, uint32_t old_mode) { long ret; - off_t preimage_len = 0; - struct sliding_view preimage = SLIDING_VIEW_INIT(&report_buffer, -1); + struct sliding_view preimage = SLIDING_VIEW_INIT(&report_buffer, 0); FILE *out; if (init_postimage() || !(out = buffer_tmpfile_rewind(&postimage))) @@ -211,19 +210,23 @@ static long apply_delta(off_t len, struct line_buffer *input, printf("cat-blob %s\n", old_data); fflush(stdout); response = get_response_line(); - if (parse_cat_response_line(response, &preimage_len)) + if (parse_cat_response_line(response, &preimage.max_off)) die("invalid cat-blob response: %s", response); + check_preimage_overflow(preimage.max_off, 1); } if (old_mode == REPO_MODE_LNK) { strbuf_addstr(&preimage.buf, "link "); - check_preimage_overflow(preimage_len, strlen("link ")); - preimage_len += strlen("link "); + check_preimage_overflow(preimage.max_off, strlen("link ")); + preimage.max_off += strlen("link "); + check_preimage_overflow(preimage.max_off, 1); } if (svndiff0_apply(input, len, &preimage, out)) die("cannot apply delta"); if (old_data) { /* Read the remainder of preimage and trailing newline. */ - if (move_window(&preimage, preimage_len, 1)) + assert(!signed_add_overflows(preimage.max_off, 1)); + preimage.max_off++; /* room for newline */ + if (move_window(&preimage, preimage.max_off - 1, 1)) die("cannot seek to end of input"); if (preimage.buf.buf[0] != '\n') die("missing newline after cat-blob response"); -- cgit v1.2.3