From 41529bbce407fbf1a925cfbc7c1aa30064f66ae7 Mon Sep 17 00:00:00 2001 From: David Barr Date: Sat, 5 Mar 2011 13:30:23 +1100 Subject: vcs-svn: set up channel to read fast-import cat-blob response Set up some plumbing: teach the svndump lib to pass a file descriptor number to the fast_export lib, representing where cat-blob/ls responses can be read from, and add a get_response_line helper function to the fast_export lib to read a line from that file. Unfortunately this means that svn-fe needs file descriptor 3 to be redirected from somewhere (preferrably the cat-blob stream of a fast-import backend); otherwise it will fail: $ svndump | svn-fe fatal: cannot read from file descriptor 3: Bad file descriptor For the moment, "svn-fe 3 Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 28 ++++++++++++++++++++++++++++ vcs-svn/fast_export.h | 4 ++++ vcs-svn/svndump.c | 5 +++++ 3 files changed, 37 insertions(+) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 260cf50e77..70bd9597e7 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -12,6 +12,24 @@ #define MAX_GITSVN_LINE_LEN 4096 static uint32_t first_commit_done; +static struct line_buffer report_buffer = LINE_BUFFER_INIT; + +void fast_export_init(int fd) +{ + if (buffer_fdinit(&report_buffer, fd)) + die_errno("cannot read from file descriptor %d", fd); +} + +void fast_export_deinit(void) +{ + if (buffer_deinit(&report_buffer)) + die_errno("error closing fast-import feedback stream"); +} + +void fast_export_reset(void) +{ + buffer_reset(&report_buffer); +} void fast_export_delete(uint32_t depth, uint32_t *path) { @@ -63,6 +81,16 @@ void fast_export_commit(uint32_t revision, uint32_t author, char *log, printf("progress Imported commit %"PRIu32".\n\n", revision); } +static const char *get_response_line(void) +{ + const char *line = buffer_read_line(&report_buffer); + if (line) + return line; + if (buffer_ferror(&report_buffer)) + die_errno("error reading from fast-import"); + die("unexpected end of fast-import feedback"); +} + void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, struct line_buffer *input) { if (mode == REPO_MODE_LNK) { diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index 054e7d5eb1..fed30c14e6 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -3,6 +3,10 @@ #include "line_buffer.h" +void fast_export_init(int fd); +void fast_export_deinit(void); +void fast_export_reset(void); + void fast_export_delete(uint32_t depth, uint32_t *path); void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, uint32_t mark); diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index e6d84bada5..e05a99d51f 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -14,6 +14,8 @@ #include "obj_pool.h" #include "string_pool.h" +#define REPORT_FILENO 3 + #define NODEACT_REPLACE 4 #define NODEACT_DELETE 3 #define NODEACT_ADD 2 @@ -367,6 +369,7 @@ int svndump_init(const char *filename) if (buffer_init(&input, filename)) return error("cannot open %s: %s", filename, strerror(errno)); repo_init(); + fast_export_init(REPORT_FILENO); reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL); @@ -377,6 +380,7 @@ int svndump_init(const char *filename) void svndump_deinit(void) { log_reset(); + fast_export_deinit(); repo_reset(); reset_dump_ctx(~0); reset_rev_ctx(0); @@ -390,6 +394,7 @@ void svndump_deinit(void) void svndump_reset(void) { log_reset(); + fast_export_reset(); buffer_reset(&input); repo_reset(); reset_dump_ctx(~0); -- cgit v1.2.3 From d38f84484f21e7e509ff009d3a17167c9c09f893 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 10 Dec 2010 04:21:35 -0600 Subject: vcs-svn: use higher mark numbers for blobs Prepare to use mark :5 for the commit corresponding to r5 (and so on). 1 billion seems sufficiently high for blob marks to avoid conflicting with rev marks, while still leaving room for 3 billion blobs. Such high mark numbers cause trouble with ancient fast-import versions, but this topic cannot support git fast-import versions before 1.7.4 (which introduces the cat-blob command) anyway. Signed-off-by: Jonathan Nieder --- vcs-svn/repo_tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index 14bcc192b6..036a6866b9 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -292,7 +292,7 @@ void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, static void mark_init(void) { uint32_t i; - mark = 0; + mark = 1024 * 1024 * 1024; for (i = 0; i < dent_pool.size; i++) if (!repo_dirent_is_dir(dent_pointer(i)) && dent_pointer(i)->content_offset > mark) -- cgit v1.2.3 From 78e1a3ff236af3afaf1ba9db92985df42141cb0e Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Thu, 9 Dec 2010 18:57:13 -0600 Subject: vcs-svn: save marks for imported commits This way, a person can use svnadmin dump $path | svn-fe | git fast-import --relative-marks --export-marks=svn-revs to get a list of what commit corresponds to each svn revision (plus some irrelevant blob names) in .git/info/fast-import/svn-revs. Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 1 + 1 file changed, 1 insertion(+) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 70bd9597e7..0ad5382bfb 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -63,6 +63,7 @@ void fast_export_commit(uint32_t revision, uint32_t author, char *log, *gitsvnline = '\0'; } printf("commit refs/heads/master\n"); + printf("mark :%"PRIu32"\n", revision); printf("committer %s <%s@%s> %ld +0000\n", ~author ? pool_fetch(author) : "nobody", ~author ? pool_fetch(author) : "nobody", -- cgit v1.2.3 From 7e11902c995715836dec140eb55cfef1d24334bb Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Tue, 4 Jan 2011 21:53:33 -0600 Subject: vcs-svn: add a comment before each commit Current svn-fe produces output like this: blob mark :7382321 data 5 hello blob mark :7382322 data 5 Hello commit mark :3 [...] M 100644 :7382321 hello.c M 100644 :7382322 hello2.c This means svn-fe has to keep track of the paths modified in each commit and the corresponding marks, instead of dealing with each file as it arrives in input and then forgetting about it. A better strategy would be to use inline blobs: commit mark :3 [...] M 100644 inline hello.c data 5 hello [...] As a first step towards that, teach svn-fe to notice when the collection of blobs for each commit starts and write a comment ("# commit 3.") there. Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 5 +++++ vcs-svn/fast_export.h | 1 + vcs-svn/svndump.c | 29 ++++++++++++++++++++++------- 3 files changed, 28 insertions(+), 7 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 0ad5382bfb..8786ed234a 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -48,6 +48,11 @@ void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, putchar('\n'); } +void fast_export_begin_commit(uint32_t revision) +{ + printf("# commit %"PRIu32".\n", revision); +} + static char gitsvnline[MAX_GITSVN_LINE_LEN]; void fast_export_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index fed30c14e6..09b2033772 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -10,6 +10,7 @@ void fast_export_reset(void); void fast_export_delete(uint32_t depth, uint32_t *path); void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, uint32_t mark); +void fast_export_begin_commit(uint32_t revision); void fast_export_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, unsigned long timestamp); void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index e05a99d51f..3cc4135892 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -22,9 +22,11 @@ #define NODEACT_CHANGE 1 #define NODEACT_UNKNOWN 0 -#define DUMP_CTX 0 -#define REV_CTX 1 -#define NODE_CTX 2 +/* States: */ +#define DUMP_CTX 0 /* dump metadata */ +#define REV_CTX 1 /* revision metadata */ +#define NODE_CTX 2 /* node metadata */ +#define INTERNODE_CTX 3 /* between nodes */ #define LENGTH_UNKNOWN (~0) #define DATE_RFC2822_LEN 31 @@ -269,7 +271,14 @@ static void handle_node(void) node_ctx.textLength, &input); } -static void handle_revision(void) +static void begin_revision(void) +{ + if (!rev_ctx.revision) /* revision 0 gets no git commit. */ + return; + fast_export_begin_commit(rev_ctx.revision); +} + +static void end_revision(void) { if (rev_ctx.revision) repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log, @@ -303,13 +312,17 @@ void svndump_read(const char *url) } else if (key == keys.revision_number) { if (active_ctx == NODE_CTX) handle_node(); + if (active_ctx == REV_CTX) + begin_revision(); if (active_ctx != DUMP_CTX) - handle_revision(); + end_revision(); active_ctx = REV_CTX; reset_rev_ctx(atoi(val)); } else if (key == keys.node_path) { if (active_ctx == NODE_CTX) handle_node(); + if (active_ctx == REV_CTX) + begin_revision(); active_ctx = NODE_CTX; reset_node_ctx(val); } else if (key == keys.node_kind) { @@ -351,7 +364,7 @@ void svndump_read(const char *url) read_props(); } else if (active_ctx == NODE_CTX) { handle_node(); - active_ctx = REV_CTX; + active_ctx = INTERNODE_CTX; } else { fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len); buffer_skip_bytes(&input, len); @@ -360,8 +373,10 @@ void svndump_read(const char *url) } if (active_ctx == NODE_CTX) handle_node(); + if (active_ctx == REV_CTX) + begin_revision(); if (active_ctx != DUMP_CTX) - handle_revision(); + end_revision(); } int svndump_init(const char *filename) -- cgit v1.2.3 From 723b7a2789d66c1365390cc9b9213e34ab8513d7 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 10 Dec 2010 04:00:55 -0600 Subject: vcs-svn: eliminate repo_tree structure Rely on fast-import for information about previous revs. This requires always setting up backward flow of information, even for v2 dumps. On the plus side, it simplifies the code by quite a bit and opens the door to further simplifications. [db: adjusted to support final version of the cat-blob patch] [jn: avoiding hard-coding git's name for the empty tree for portability to other backends] Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 108 +++++++++++++--- vcs-svn/fast_export.h | 22 ++-- vcs-svn/repo_tree.c | 335 ++++++-------------------------------------------- vcs-svn/repo_tree.h | 2 +- vcs-svn/string_pool.c | 2 +- vcs-svn/string_pool.h | 2 +- vcs-svn/svndump.c | 53 +++++--- 7 files changed, 184 insertions(+), 340 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 8786ed234a..a8ce5c64b2 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -8,6 +8,7 @@ #include "line_buffer.h" #include "repo_tree.h" #include "string_pool.h" +#include "strbuf.h" #define MAX_GITSVN_LINE_LEN 4096 @@ -31,7 +32,7 @@ void fast_export_reset(void) buffer_reset(&report_buffer); } -void fast_export_delete(uint32_t depth, uint32_t *path) +void fast_export_delete(uint32_t depth, const uint32_t *path) { putchar('D'); putchar(' '); @@ -39,22 +40,27 @@ void fast_export_delete(uint32_t depth, uint32_t *path) putchar('\n'); } -void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, - uint32_t mark) +static void fast_export_truncate(uint32_t depth, const uint32_t *path, uint32_t mode) { - /* Mode must be 100644, 100755, 120000, or 160000. */ - printf("M %06"PRIo32" :%"PRIu32" ", mode, mark); - pool_print_seq(depth, path, '/', stdout); - putchar('\n'); + fast_export_modify(depth, path, mode, "inline"); + printf("data 0\n\n"); } -void fast_export_begin_commit(uint32_t revision) +void fast_export_modify(uint32_t depth, const uint32_t *path, uint32_t mode, + const char *dataref) { - printf("# commit %"PRIu32".\n", revision); + /* Mode must be 100644, 100755, 120000, or 160000. */ + if (!dataref) { + fast_export_truncate(depth, path, mode); + return; + } + printf("M %06"PRIo32" %s ", mode, dataref); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); } static char gitsvnline[MAX_GITSVN_LINE_LEN]; -void fast_export_commit(uint32_t revision, uint32_t author, char *log, +void fast_export_begin_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, unsigned long timestamp) { @@ -81,12 +87,31 @@ void fast_export_commit(uint32_t revision, uint32_t author, char *log, printf("from refs/heads/master^0\n"); first_commit_done = 1; } - repo_diff(revision - 1, revision); - fputc('\n', stdout); +} +void fast_export_end_commit(uint32_t revision) +{ printf("progress Imported commit %"PRIu32".\n\n", revision); } +static void ls_from_rev(uint32_t rev, uint32_t depth, const uint32_t *path) +{ + /* ls :5 path/to/old/file */ + printf("ls :%"PRIu32" ", rev); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); + fflush(stdout); +} + +static void ls_from_active_commit(uint32_t depth, const uint32_t *path) +{ + /* ls "path/to/file" */ + printf("ls \""); + pool_print_seq(depth, path, '/', stdout); + printf("\"\n"); + fflush(stdout); +} + static const char *get_response_line(void) { const char *line = buffer_read_line(&report_buffer); @@ -97,14 +122,69 @@ static const char *get_response_line(void) die("unexpected end of fast-import feedback"); } -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, struct line_buffer *input) +void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input) { if (mode == REPO_MODE_LNK) { /* svn symlink blobs start with "link " */ buffer_skip_bytes(input, 5); len -= 5; } - printf("blob\nmark :%"PRIu32"\ndata %"PRIu32"\n", mark, len); + printf("data %"PRIu32"\n", len); buffer_copy_bytes(input, len); fputc('\n', stdout); } + +static int parse_ls_response(const char *response, uint32_t *mode, + struct strbuf *dataref) +{ + const char *tab; + const char *response_end; + + assert(response); + response_end = response + strlen(response); + + if (*response == 'm') { /* Missing. */ + errno = ENOENT; + return -1; + } + + /* Mode. */ + if (response_end - response < strlen("100644") || + response[strlen("100644")] != ' ') + die("invalid ls response: missing mode: %s", response); + *mode = 0; + for (; *response != ' '; response++) { + char ch = *response; + if (ch < '0' || ch > '7') + die("invalid ls response: mode is not octal: %s", response); + *mode *= 8; + *mode += ch - '0'; + } + + /* ' blob ' or ' tree ' */ + if (response_end - response < strlen(" blob ") || + (response[1] != 'b' && response[1] != 't')) + die("unexpected ls response: not a tree or blob: %s", response); + response += strlen(" blob "); + + /* Dataref. */ + tab = memchr(response, '\t', response_end - response); + if (!tab) + die("invalid ls response: missing tab: %s", response); + strbuf_add(dataref, response, tab - response); + return 0; +} + +int fast_export_ls_rev(uint32_t rev, uint32_t depth, const uint32_t *path, + uint32_t *mode, struct strbuf *dataref) +{ + ls_from_rev(rev, depth, path); + return parse_ls_response(get_response_line(), mode, dataref); +} + +int fast_export_ls(uint32_t depth, const uint32_t *path, + uint32_t *mode, struct strbuf *dataref) +{ + ls_from_active_commit(depth, path); + return parse_ls_response(get_response_line(), mode, dataref); +} diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index 09b2033772..633d21944e 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -1,19 +1,25 @@ #ifndef FAST_EXPORT_H_ #define FAST_EXPORT_H_ -#include "line_buffer.h" +struct strbuf; +struct line_buffer; void fast_export_init(int fd); void fast_export_deinit(void); void fast_export_reset(void); -void fast_export_delete(uint32_t depth, uint32_t *path); -void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, - uint32_t mark); -void fast_export_begin_commit(uint32_t revision); -void fast_export_commit(uint32_t revision, uint32_t author, char *log, +void fast_export_delete(uint32_t depth, const uint32_t *path); +void fast_export_modify(uint32_t depth, const uint32_t *path, + uint32_t mode, const char *dataref); +void fast_export_begin_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, unsigned long timestamp); -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, - struct line_buffer *input); +void fast_export_end_commit(uint32_t revision); +void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input); + +/* If there is no such file at that rev, returns -1, errno == ENOENT. */ +int fast_export_ls_rev(uint32_t rev, uint32_t depth, const uint32_t *path, + uint32_t *mode_out, struct strbuf *dataref_out); +int fast_export_ls(uint32_t depth, const uint32_t *path, + uint32_t *mode_out, struct strbuf *dataref_out); #endif diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index 036a6866b9..e75f58087c 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -4,322 +4,61 @@ */ #include "git-compat-util.h" - -#include "string_pool.h" +#include "strbuf.h" #include "repo_tree.h" -#include "obj_pool.h" #include "fast_export.h" -#include "trp.h" - -struct repo_dirent { - uint32_t name_offset; - struct trp_node children; - uint32_t mode; - uint32_t content_offset; -}; - -struct repo_dir { - struct trp_root entries; -}; - -struct repo_commit { - uint32_t root_dir_offset; -}; - -/* Memory pools for commit, dir and dirent */ -obj_pool_gen(commit, struct repo_commit, 4096) -obj_pool_gen(dir, struct repo_dir, 4096) -obj_pool_gen(dent, struct repo_dirent, 4096) - -static uint32_t active_commit; -static uint32_t mark; - -static int repo_dirent_name_cmp(const void *a, const void *b); - -/* Treap for directory entries */ -trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp); - -uint32_t next_blob_mark(void) +const char *repo_read_path(const uint32_t *path) { - return mark++; -} + int err; + uint32_t dummy; + static struct strbuf buf = STRBUF_INIT; -static struct repo_dir *repo_commit_root_dir(struct repo_commit *commit) -{ - return dir_pointer(commit->root_dir_offset); -} - -static struct repo_dirent *repo_first_dirent(struct repo_dir *dir) -{ - return dent_first(&dir->entries); -} - -static int repo_dirent_name_cmp(const void *a, const void *b) -{ - const struct repo_dirent *dent1 = a, *dent2 = b; - uint32_t a_offset = dent1->name_offset; - uint32_t b_offset = dent2->name_offset; - return (a_offset > b_offset) - (a_offset < b_offset); -} - -static int repo_dirent_is_dir(struct repo_dirent *dent) -{ - return dent != NULL && dent->mode == REPO_MODE_DIR; -} - -static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dent) -{ - if (!repo_dirent_is_dir(dent)) + strbuf_reset(&buf); + err = fast_export_ls(REPO_MAX_PATH_DEPTH, path, &dummy, &buf); + if (err) { + if (errno != ENOENT) + die_errno("BUG: unexpected fast_export_ls error"); return NULL; - return dir_pointer(dent->content_offset); -} - -static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) -{ - uint32_t orig_o, new_o; - orig_o = dir_offset(orig_dir); - if (orig_o >= dir_pool.committed) - return orig_dir; - new_o = dir_alloc(1); - orig_dir = dir_pointer(orig_o); - *dir_pointer(new_o) = *orig_dir; - return dir_pointer(new_o); -} - -static struct repo_dirent *repo_read_dirent(uint32_t revision, - const uint32_t *path) -{ - uint32_t name = 0; - struct repo_dirent *key = dent_pointer(dent_alloc(1)); - struct repo_dir *dir = NULL; - struct repo_dirent *dent = NULL; - dir = repo_commit_root_dir(commit_pointer(revision)); - while (~(name = *path++)) { - key->name_offset = name; - dent = dent_search(&dir->entries, key); - if (dent == NULL || !repo_dirent_is_dir(dent)) - break; - dir = repo_dir_from_dirent(dent); - } - dent_free(1); - return dent; -} - -static void repo_write_dirent(const uint32_t *path, uint32_t mode, - uint32_t content_offset, uint32_t del) -{ - uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0; - struct repo_dir *dir; - struct repo_dirent *key; - struct repo_dirent *dent = NULL; - revision = active_commit; - dir = repo_commit_root_dir(commit_pointer(revision)); - dir = repo_clone_dir(dir); - commit_pointer(revision)->root_dir_offset = dir_offset(dir); - while (~(name = *path++)) { - parent_dir_o = dir_offset(dir); - - key = dent_pointer(dent_alloc(1)); - key->name_offset = name; - - dent = dent_search(&dir->entries, key); - if (dent == NULL) - dent = key; - else - dent_free(1); - - if (dent == key) { - dent->mode = REPO_MODE_DIR; - dent->content_offset = 0; - dent = dent_insert(&dir->entries, dent); - } - - if (dent_offset(dent) < dent_pool.committed) { - dir_o = repo_dirent_is_dir(dent) ? - dent->content_offset : ~0; - dent_remove(&dir->entries, dent); - dent = dent_pointer(dent_alloc(1)); - dent->name_offset = name; - dent->mode = REPO_MODE_DIR; - dent->content_offset = dir_o; - dent = dent_insert(&dir->entries, dent); - } - - dir = repo_dir_from_dirent(dent); - dir = repo_clone_dir(dir); - dent->content_offset = dir_offset(dir); } - if (dent == NULL) - return; - dent->mode = mode; - dent->content_offset = content_offset; - if (del && ~parent_dir_o) - dent_remove(&dir_pointer(parent_dir_o)->entries, dent); -} - -uint32_t repo_read_path(const uint32_t *path) -{ - uint32_t content_offset = 0; - struct repo_dirent *dent = repo_read_dirent(active_commit, path); - if (dent != NULL) - content_offset = dent->content_offset; - return content_offset; + return buf.buf; } uint32_t repo_read_mode(const uint32_t *path) { - struct repo_dirent *dent = repo_read_dirent(active_commit, path); - if (dent == NULL) - die("invalid dump: path to be modified is missing"); - return dent->mode; + int err; + uint32_t result; + static struct strbuf dummy = STRBUF_INIT; + + strbuf_reset(&dummy); + err = fast_export_ls(REPO_MAX_PATH_DEPTH, path, &result, &dummy); + if (err) { + if (errno != ENOENT) + die_errno("BUG: unexpected fast_export_ls error"); + /* Treat missing paths as directories. */ + return REPO_MODE_DIR; + } + return result; } void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst) { - uint32_t mode = 0, content_offset = 0; - struct repo_dirent *src_dent; - src_dent = repo_read_dirent(revision, src); - if (src_dent != NULL) { - mode = src_dent->mode; - content_offset = src_dent->content_offset; - repo_write_dirent(dst, mode, content_offset, 0); + int err; + uint32_t mode; + static struct strbuf data = STRBUF_INIT; + + strbuf_reset(&data); + err = fast_export_ls_rev(revision, REPO_MAX_PATH_DEPTH, src, &mode, &data); + if (err) { + if (errno != ENOENT) + die_errno("BUG: unexpected fast_export_ls_rev error"); + fast_export_delete(REPO_MAX_PATH_DEPTH, dst); + return; } -} - -void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) -{ - repo_write_dirent(path, mode, blob_mark, 0); + fast_export_modify(REPO_MAX_PATH_DEPTH, dst, mode, data.buf); } void repo_delete(uint32_t *path) { - repo_write_dirent(path, 0, 0, 1); -} - -static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir); - -static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dent) -{ - if (repo_dirent_is_dir(dent)) - repo_git_add_r(depth, path, repo_dir_from_dirent(dent)); - else - fast_export_modify(depth, path, - dent->mode, dent->content_offset); -} - -static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir) -{ - struct repo_dirent *de = repo_first_dirent(dir); - while (de) { - path[depth] = de->name_offset; - repo_git_add(depth + 1, path, de); - de = dent_next(&dir->entries, de); - } -} - -static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1, - struct repo_dir *dir2) -{ - struct repo_dirent *de1, *de2; - de1 = repo_first_dirent(dir1); - de2 = repo_first_dirent(dir2); - - while (de1 && de2) { - if (de1->name_offset < de2->name_offset) { - path[depth] = de1->name_offset; - fast_export_delete(depth + 1, path); - de1 = dent_next(&dir1->entries, de1); - continue; - } - if (de1->name_offset > de2->name_offset) { - path[depth] = de2->name_offset; - repo_git_add(depth + 1, path, de2); - de2 = dent_next(&dir2->entries, de2); - continue; - } - path[depth] = de1->name_offset; - - if (de1->mode == de2->mode && - de1->content_offset == de2->content_offset) { - ; /* No change. */ - } else if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) { - repo_diff_r(depth + 1, path, - repo_dir_from_dirent(de1), - repo_dir_from_dirent(de2)); - } else if (!repo_dirent_is_dir(de1) && !repo_dirent_is_dir(de2)) { - repo_git_add(depth + 1, path, de2); - } else { - fast_export_delete(depth + 1, path); - repo_git_add(depth + 1, path, de2); - } - de1 = dent_next(&dir1->entries, de1); - de2 = dent_next(&dir2->entries, de2); - } - while (de1) { - path[depth] = de1->name_offset; - fast_export_delete(depth + 1, path); - de1 = dent_next(&dir1->entries, de1); - } - while (de2) { - path[depth] = de2->name_offset; - repo_git_add(depth + 1, path, de2); - de2 = dent_next(&dir2->entries, de2); - } -} - -static uint32_t path_stack[REPO_MAX_PATH_DEPTH]; - -void repo_diff(uint32_t r1, uint32_t r2) -{ - repo_diff_r(0, - path_stack, - repo_commit_root_dir(commit_pointer(r1)), - repo_commit_root_dir(commit_pointer(r2))); -} - -void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, - uint32_t url, unsigned long timestamp) -{ - fast_export_commit(revision, author, log, uuid, url, timestamp); - dent_commit(); - dir_commit(); - active_commit = commit_alloc(1); - commit_pointer(active_commit)->root_dir_offset = - commit_pointer(active_commit - 1)->root_dir_offset; -} - -static void mark_init(void) -{ - uint32_t i; - mark = 1024 * 1024 * 1024; - for (i = 0; i < dent_pool.size; i++) - if (!repo_dirent_is_dir(dent_pointer(i)) && - dent_pointer(i)->content_offset > mark) - mark = dent_pointer(i)->content_offset; - mark++; -} - -void repo_init(void) -{ - mark_init(); - if (commit_pool.size == 0) { - /* Create empty tree for commit 0. */ - commit_alloc(1); - commit_pointer(0)->root_dir_offset = dir_alloc(1); - dir_pointer(0)->entries.trp_root = ~0; - dir_commit(); - } - /* Preallocate next commit, ready for changes. */ - active_commit = commit_alloc(1); - commit_pointer(active_commit)->root_dir_offset = - commit_pointer(active_commit - 1)->root_dir_offset; -} - -void repo_reset(void) -{ - pool_reset(); - commit_reset(); - dir_reset(); - dent_reset(); + fast_export_delete(REPO_MAX_PATH_DEPTH, path); } diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h index 11d48c2444..d690784fbb 100644 --- a/vcs-svn/repo_tree.h +++ b/vcs-svn/repo_tree.h @@ -14,7 +14,7 @@ uint32_t next_blob_mark(void); void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst); void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); -uint32_t repo_read_path(const uint32_t *path); +const char *repo_read_path(const uint32_t *path); uint32_t repo_read_mode(const uint32_t *path); void repo_delete(uint32_t *path); void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c index f5b1da836e..c08abac71d 100644 --- a/vcs-svn/string_pool.c +++ b/vcs-svn/string_pool.c @@ -65,7 +65,7 @@ uint32_t pool_tok_r(char *str, const char *delim, char **saveptr) return token ? pool_intern(token) : ~0; } -void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream) +void pool_print_seq(uint32_t len, const uint32_t *seq, char delim, FILE *stream) { uint32_t i; for (i = 0; i < len && ~seq[i]; i++) { diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h index 222fb66e68..3720cf8164 100644 --- a/vcs-svn/string_pool.h +++ b/vcs-svn/string_pool.h @@ -4,7 +4,7 @@ uint32_t pool_intern(const char *key); const char *pool_fetch(uint32_t entry); uint32_t pool_tok_r(char *str, const char *delim, char **saveptr); -void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream); +void pool_print_seq(uint32_t len, const uint32_t *seq, char delim, FILE *stream); uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str); void pool_reset(void); diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 3cc4135892..7ecb227a6d 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -36,6 +36,8 @@ obj_pool_gen(log, char, 4096) static struct line_buffer input = LINE_BUFFER_INIT; +#define REPORT_FILENO 3 + static char *log_copy(uint32_t length, const char *log) { char *buffer; @@ -202,15 +204,21 @@ static void read_props(void) static void handle_node(void) { - uint32_t mark = 0; const uint32_t type = node_ctx.type; const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; const int have_text = node_ctx.textLength != LENGTH_UNKNOWN; + /* + * Old text for this node: + * NULL - directory or bug + * empty_blob - empty + * "" - data retrievable from fast-import + */ + static const char *const empty_blob = "::empty::"; + const char *old_data = NULL; if (node_ctx.text_delta) die("text deltas not supported"); - if (have_text) - mark = next_blob_mark(); + if (node_ctx.action == NODEACT_DELETE) { if (have_text || have_props || node_ctx.srcRev) die("invalid dump: deletion node has " @@ -230,15 +238,15 @@ static void handle_node(void) die("invalid dump: directories cannot have text attached"); /* - * Decide on the new content (mark) and mode (node_ctx.type). + * Find old content (old_data) and decide on the new mode. */ if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) { if (type != REPO_MODE_DIR) die("invalid dump: root of tree is not a regular file"); + old_data = NULL; } else if (node_ctx.action == NODEACT_CHANGE) { uint32_t mode; - if (!have_text) - mark = repo_read_path(node_ctx.dst); + old_data = repo_read_path(node_ctx.dst); mode = repo_read_mode(node_ctx.dst); if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR) die("invalid dump: cannot modify a directory into a file"); @@ -246,7 +254,11 @@ static void handle_node(void) die("invalid dump: cannot modify a file into a directory"); node_ctx.type = mode; } else if (node_ctx.action == NODEACT_ADD) { - if (!have_text && type != REPO_MODE_DIR) + if (type == REPO_MODE_DIR) + old_data = NULL; + else if (have_text) + old_data = empty_blob; + else die("invalid dump: adds node without text"); } else { die("invalid dump: Node-path block lacks Node-action"); @@ -265,24 +277,34 @@ static void handle_node(void) /* * Save the result. */ - repo_add(node_ctx.dst, node_ctx.type, mark); - if (have_text) - fast_export_blob(node_ctx.type, mark, - node_ctx.textLength, &input); + if (type == REPO_MODE_DIR) /* directories are not tracked. */ + return; + assert(old_data); + if (old_data == empty_blob) + /* For the fast_export_* functions, NULL means empty. */ + old_data = NULL; + if (!have_text) { + fast_export_modify(REPO_MAX_PATH_DEPTH, node_ctx.dst, + node_ctx.type, old_data); + return; + } + fast_export_modify(REPO_MAX_PATH_DEPTH, node_ctx.dst, + node_ctx.type, "inline"); + fast_export_data(node_ctx.type, node_ctx.textLength, &input); } static void begin_revision(void) { if (!rev_ctx.revision) /* revision 0 gets no git commit. */ return; - fast_export_begin_commit(rev_ctx.revision); + fast_export_begin_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log, + dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp); } static void end_revision(void) { if (rev_ctx.revision) - repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log, - dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp); + fast_export_end_commit(rev_ctx.revision); } void svndump_read(const char *url) @@ -383,7 +405,6 @@ int svndump_init(const char *filename) { if (buffer_init(&input, filename)) return error("cannot open %s: %s", filename, strerror(errno)); - repo_init(); fast_export_init(REPORT_FILENO); reset_dump_ctx(~0); reset_rev_ctx(0); @@ -396,7 +417,6 @@ void svndump_deinit(void) { log_reset(); fast_export_deinit(); - repo_reset(); reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL); @@ -411,7 +431,6 @@ void svndump_reset(void) log_reset(); fast_export_reset(); buffer_reset(&input); - repo_reset(); reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL); -- cgit v1.2.3 From e43581120843f6f55f411af470faf806e052ad9d Mon Sep 17 00:00:00 2001 From: David Barr Date: Sun, 12 Dec 2010 03:59:31 +1100 Subject: vcs-svn: quote paths correctly for ls command This bug was found while importing rev 601865 of ASF. [jn: with test] Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 2 +- vcs-svn/string_pool.c | 11 +++++++++++ vcs-svn/string_pool.h | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index a8ce5c64b2..4d57efabd5 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -107,7 +107,7 @@ static void ls_from_active_commit(uint32_t depth, const uint32_t *path) { /* ls "path/to/file" */ printf("ls \""); - pool_print_seq(depth, path, '/', stdout); + pool_print_seq_q(depth, path, '/', stdout); printf("\"\n"); fflush(stdout); } diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c index c08abac71d..be43598d5b 100644 --- a/vcs-svn/string_pool.c +++ b/vcs-svn/string_pool.c @@ -4,6 +4,7 @@ */ #include "git-compat-util.h" +#include "quote.h" #include "trp.h" #include "obj_pool.h" #include "string_pool.h" @@ -75,6 +76,16 @@ void pool_print_seq(uint32_t len, const uint32_t *seq, char delim, FILE *stream) } } +void pool_print_seq_q(uint32_t len, const uint32_t *seq, char delim, FILE *stream) +{ + uint32_t i; + for (i = 0; i < len && ~seq[i]; i++) { + quote_c_style(pool_fetch(seq[i]), NULL, stream, 1); + if (i < len - 1 && ~seq[i + 1]) + fputc(delim, stream); + } +} + uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str) { char *context = NULL; diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h index 3720cf8164..96e501dc53 100644 --- a/vcs-svn/string_pool.h +++ b/vcs-svn/string_pool.h @@ -5,6 +5,7 @@ uint32_t pool_intern(const char *key); const char *pool_fetch(uint32_t entry); uint32_t pool_tok_r(char *str, const char *delim, char **saveptr); void pool_print_seq(uint32_t len, const uint32_t *seq, char delim, FILE *stream); +void pool_print_seq_q(uint32_t len, const uint32_t *seq, char delim, FILE *stream); uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str); void pool_reset(void); -- cgit v1.2.3 From 1ae469b06c50aade4781931ca1587453082f57eb Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 11 Dec 2010 17:08:51 -0600 Subject: vcs-svn: handle filenames with dq correctly Quote paths passed to fast-import so filenames with double quotes are not misinterpreted. One might imagine this could help with filenames with newlines, too, but svn does not allow those. Helped-by: David Barr Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 4d57efabd5..9c03f3e16d 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -34,10 +34,9 @@ void fast_export_reset(void) void fast_export_delete(uint32_t depth, const uint32_t *path) { - putchar('D'); - putchar(' '); - pool_print_seq(depth, path, '/', stdout); - putchar('\n'); + printf("D \""); + pool_print_seq_q(depth, path, '/', stdout); + printf("\"\n"); } static void fast_export_truncate(uint32_t depth, const uint32_t *path, uint32_t mode) @@ -54,9 +53,9 @@ void fast_export_modify(uint32_t depth, const uint32_t *path, uint32_t mode, fast_export_truncate(depth, path, mode); return; } - printf("M %06"PRIo32" %s ", mode, dataref); - pool_print_seq(depth, path, '/', stdout); - putchar('\n'); + printf("M %06"PRIo32" %s \"", mode, dataref); + pool_print_seq_q(depth, path, '/', stdout); + printf("\"\n"); } static char gitsvnline[MAX_GITSVN_LINE_LEN]; @@ -97,9 +96,9 @@ void fast_export_end_commit(uint32_t revision) static void ls_from_rev(uint32_t rev, uint32_t depth, const uint32_t *path) { /* ls :5 path/to/old/file */ - printf("ls :%"PRIu32" ", rev); - pool_print_seq(depth, path, '/', stdout); - putchar('\n'); + printf("ls :%"PRIu32" \"", rev); + pool_print_seq_q(depth, path, '/', stdout); + printf("\"\n"); fflush(stdout); } -- cgit v1.2.3 From dd3f42ad793b5334d506a451addcefd0054c27bb Mon Sep 17 00:00:00 2001 From: David Barr Date: Sun, 12 Dec 2010 13:41:38 +1100 Subject: vcs-svn: use mark from previous import for parent commit With this patch, overlapping incremental imports work. Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 9c03f3e16d..f19db9ae82 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -83,7 +83,7 @@ void fast_export_begin_commit(uint32_t revision, uint32_t author, char *log, log, gitsvnline); if (!first_commit_done) { if (revision > 1) - printf("from refs/heads/master^0\n"); + printf("from :%"PRIu32"\n", revision - 1); first_commit_done = 1; } } -- cgit v1.2.3 From 030879718f696b67fe1c958ab0a238971773ac96 Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 13 Dec 2010 16:41:12 +1100 Subject: vcs-svn: pass paths through to fast-import Now that there is no internal representation of the repo, it is not necessary to tokenise paths. Use strbuf instead and bypass string_pool. This means svn-fe can handle arbitrarily long paths (as long as a strbuf can fit them), with arbitrarily many path components. While at it, since we now treat paths in their entirety, only quote when necessary. Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 48 ++++++++++++++++++++++++------------------------ vcs-svn/fast_export.h | 9 ++++----- vcs-svn/repo_tree.c | 20 ++++++++++---------- vcs-svn/repo_tree.h | 13 +++++-------- vcs-svn/svndump.c | 34 +++++++++++++++++++--------------- 5 files changed, 62 insertions(+), 62 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index a64a3c5633..ec323e9b39 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -4,10 +4,11 @@ */ #include "git-compat-util.h" +#include "strbuf.h" +#include "quote.h" #include "fast_export.h" #include "line_buffer.h" #include "repo_tree.h" -#include "string_pool.h" #include "strbuf.h" #define MAX_GITSVN_LINE_LEN 4096 @@ -32,30 +33,30 @@ void fast_export_reset(void) buffer_reset(&report_buffer); } -void fast_export_delete(uint32_t depth, const uint32_t *path) +void fast_export_delete(const char *path) { - printf("D \""); - pool_print_seq_q(depth, path, '/', stdout); - printf("\"\n"); + putchar('D'); + putchar(' '); + quote_c_style(path, NULL, stdout, 0); + putchar('\n'); } -static void fast_export_truncate(uint32_t depth, const uint32_t *path, uint32_t mode) +static void fast_export_truncate(const char *path, uint32_t mode) { - fast_export_modify(depth, path, mode, "inline"); + fast_export_modify(path, mode, "inline"); printf("data 0\n\n"); } -void fast_export_modify(uint32_t depth, const uint32_t *path, uint32_t mode, - const char *dataref) +void fast_export_modify(const char *path, uint32_t mode, const char *dataref) { /* Mode must be 100644, 100755, 120000, or 160000. */ if (!dataref) { - fast_export_truncate(depth, path, mode); + fast_export_truncate(path, mode); return; } - printf("M %06"PRIo32" %s \"", mode, dataref); - pool_print_seq_q(depth, path, '/', stdout); - printf("\"\n"); + printf("M %06"PRIo32" %s ", mode, dataref); + quote_c_style(path, NULL, stdout, 0); + putchar('\n'); } static char gitsvnline[MAX_GITSVN_LINE_LEN]; @@ -93,20 +94,20 @@ void fast_export_end_commit(uint32_t revision) printf("progress Imported commit %"PRIu32".\n\n", revision); } -static void ls_from_rev(uint32_t rev, uint32_t depth, const uint32_t *path) +static void ls_from_rev(uint32_t rev, const char *path) { /* ls :5 path/to/old/file */ - printf("ls :%"PRIu32" \"", rev); - pool_print_seq_q(depth, path, '/', stdout); - printf("\"\n"); + printf("ls :%"PRIu32" ", rev); + quote_c_style(path, NULL, stdout, 0); + putchar('\n'); fflush(stdout); } -static void ls_from_active_commit(uint32_t depth, const uint32_t *path) +static void ls_from_active_commit(const char *path) { /* ls "path/to/file" */ printf("ls \""); - pool_print_seq_q(depth, path, '/', stdout); + quote_c_style(path, NULL, stdout, 1); printf("\"\n"); fflush(stdout); } @@ -183,16 +184,15 @@ static int parse_ls_response(const char *response, uint32_t *mode, return 0; } -int fast_export_ls_rev(uint32_t rev, uint32_t depth, const uint32_t *path, +int fast_export_ls_rev(uint32_t rev, const char *path, uint32_t *mode, struct strbuf *dataref) { - ls_from_rev(rev, depth, path); + ls_from_rev(rev, path); return parse_ls_response(get_response_line(), mode, dataref); } -int fast_export_ls(uint32_t depth, const uint32_t *path, - uint32_t *mode, struct strbuf *dataref) +int fast_export_ls(const char *path, uint32_t *mode, struct strbuf *dataref) { - ls_from_active_commit(depth, path); + ls_from_active_commit(path); return parse_ls_response(get_response_line(), mode, dataref); } diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index fc14242420..12b0bbb419 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -8,9 +8,8 @@ void fast_export_init(int fd); void fast_export_deinit(void); void fast_export_reset(void); -void fast_export_delete(uint32_t depth, const uint32_t *path); -void fast_export_modify(uint32_t depth, const uint32_t *path, - uint32_t mode, const char *dataref); +void fast_export_delete(const char *path); +void fast_export_modify(const char *path, uint32_t mode, const char *dataref); void fast_export_begin_commit(uint32_t revision, const char *author, char *log, const char *uuid, const char *url, unsigned long timestamp); @@ -18,9 +17,9 @@ void fast_export_end_commit(uint32_t revision); void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input); /* If there is no such file at that rev, returns -1, errno == ENOENT. */ -int fast_export_ls_rev(uint32_t rev, uint32_t depth, const uint32_t *path, +int fast_export_ls_rev(uint32_t rev, const char *path, uint32_t *mode_out, struct strbuf *dataref_out); -int fast_export_ls(uint32_t depth, const uint32_t *path, +int fast_export_ls(const char *path, uint32_t *mode_out, struct strbuf *dataref_out); #endif diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index e75f58087c..f2466bc634 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -8,14 +8,14 @@ #include "repo_tree.h" #include "fast_export.h" -const char *repo_read_path(const uint32_t *path) +const char *repo_read_path(const char *path) { int err; uint32_t dummy; static struct strbuf buf = STRBUF_INIT; strbuf_reset(&buf); - err = fast_export_ls(REPO_MAX_PATH_DEPTH, path, &dummy, &buf); + err = fast_export_ls(path, &dummy, &buf); if (err) { if (errno != ENOENT) die_errno("BUG: unexpected fast_export_ls error"); @@ -24,14 +24,14 @@ const char *repo_read_path(const uint32_t *path) return buf.buf; } -uint32_t repo_read_mode(const uint32_t *path) +uint32_t repo_read_mode(const char *path) { int err; uint32_t result; static struct strbuf dummy = STRBUF_INIT; strbuf_reset(&dummy); - err = fast_export_ls(REPO_MAX_PATH_DEPTH, path, &result, &dummy); + err = fast_export_ls(path, &result, &dummy); if (err) { if (errno != ENOENT) die_errno("BUG: unexpected fast_export_ls error"); @@ -41,24 +41,24 @@ uint32_t repo_read_mode(const uint32_t *path) return result; } -void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst) +void repo_copy(uint32_t revision, const char *src, const char *dst) { int err; uint32_t mode; static struct strbuf data = STRBUF_INIT; strbuf_reset(&data); - err = fast_export_ls_rev(revision, REPO_MAX_PATH_DEPTH, src, &mode, &data); + err = fast_export_ls_rev(revision, src, &mode, &data); if (err) { if (errno != ENOENT) die_errno("BUG: unexpected fast_export_ls_rev error"); - fast_export_delete(REPO_MAX_PATH_DEPTH, dst); + fast_export_delete(dst); return; } - fast_export_modify(REPO_MAX_PATH_DEPTH, dst, mode, data.buf); + fast_export_modify(dst, mode, data.buf); } -void repo_delete(uint32_t *path) +void repo_delete(const char *path) { - fast_export_delete(REPO_MAX_PATH_DEPTH, path); + fast_export_delete(path); } diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h index 29887f9765..44e6e8fabc 100644 --- a/vcs-svn/repo_tree.h +++ b/vcs-svn/repo_tree.h @@ -8,15 +8,12 @@ #define REPO_MODE_EXE 0100755 #define REPO_MODE_LNK 0120000 -#define REPO_MAX_PATH_LEN 4096 -#define REPO_MAX_PATH_DEPTH 1000 - uint32_t next_blob_mark(void); -void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst); -void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); -const char *repo_read_path(const uint32_t *path); -uint32_t repo_read_mode(const uint32_t *path); -void repo_delete(uint32_t *path); +void repo_copy(uint32_t revision, const char *src, const char *dst); +void repo_add(const char *path, uint32_t mode, uint32_t blob_mark); +const char *repo_read_path(const char *path); +uint32_t repo_read_mode(const char *path); +void repo_delete(const char *path); void repo_commit(uint32_t revision, const char *author, char *log, const char *uuid, const char *url, long unsigned timestamp); diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index f5de49cbeb..363503d4ea 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -11,7 +11,6 @@ #include "repo_tree.h" #include "fast_export.h" #include "line_buffer.h" -#include "string_pool.h" #include "strbuf.h" #define REPORT_FILENO 3 @@ -41,7 +40,7 @@ static struct line_buffer input = LINE_BUFFER_INIT; static struct { uint32_t action, propLength, textLength, srcRev, type; - uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; + struct strbuf src, dst; uint32_t text_delta, prop_delta; } node_ctx; @@ -62,9 +61,11 @@ static void reset_node_ctx(char *fname) node_ctx.action = NODEACT_UNKNOWN; node_ctx.propLength = LENGTH_UNKNOWN; node_ctx.textLength = LENGTH_UNKNOWN; - node_ctx.src[0] = ~0; + strbuf_reset(&node_ctx.src); node_ctx.srcRev = 0; - pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); + strbuf_reset(&node_ctx.dst); + if (fname) + strbuf_addstr(&node_ctx.dst, fname); node_ctx.text_delta = 0; node_ctx.prop_delta = 0; } @@ -228,14 +229,14 @@ static void handle_node(void) if (have_text || have_props || node_ctx.srcRev) die("invalid dump: deletion node has " "copyfrom info, text, or properties"); - return repo_delete(node_ctx.dst); + return repo_delete(node_ctx.dst.buf); } if (node_ctx.action == NODEACT_REPLACE) { - repo_delete(node_ctx.dst); + repo_delete(node_ctx.dst.buf); node_ctx.action = NODEACT_ADD; } if (node_ctx.srcRev) { - repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + repo_copy(node_ctx.srcRev, node_ctx.src.buf, node_ctx.dst.buf); if (node_ctx.action == NODEACT_ADD) node_ctx.action = NODEACT_CHANGE; } @@ -245,14 +246,14 @@ static void handle_node(void) /* * Find old content (old_data) and decide on the new mode. */ - if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) { + if (node_ctx.action == NODEACT_CHANGE && !*node_ctx.dst.buf) { if (type != REPO_MODE_DIR) die("invalid dump: root of tree is not a regular file"); old_data = NULL; } else if (node_ctx.action == NODEACT_CHANGE) { uint32_t mode; - old_data = repo_read_path(node_ctx.dst); - mode = repo_read_mode(node_ctx.dst); + old_data = repo_read_path(node_ctx.dst.buf); + mode = repo_read_mode(node_ctx.dst.buf); if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR) die("invalid dump: cannot modify a directory into a file"); if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR) @@ -289,12 +290,10 @@ static void handle_node(void) /* For the fast_export_* functions, NULL means empty. */ old_data = NULL; if (!have_text) { - fast_export_modify(REPO_MAX_PATH_DEPTH, node_ctx.dst, - node_ctx.type, old_data); + fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data); return; } - fast_export_modify(REPO_MAX_PATH_DEPTH, node_ctx.dst, - node_ctx.type, "inline"); + fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline"); fast_export_data(node_ctx.type, node_ctx.textLength, &input); } @@ -395,7 +394,8 @@ void svndump_read(const char *url) case sizeof("Node-copyfrom-path"): if (constcmp(t, "Node-copyfrom-path")) continue; - pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val); + strbuf_reset(&node_ctx.src); + strbuf_addstr(&node_ctx.src, val); break; case sizeof("Node-copyfrom-rev"): if (constcmp(t, "Node-copyfrom-rev")) @@ -460,6 +460,8 @@ int svndump_init(const char *filename) strbuf_init(&dump_ctx.url, 4096); strbuf_init(&rev_ctx.log, 4096); strbuf_init(&rev_ctx.author, 4096); + strbuf_init(&node_ctx.src, 4096); + strbuf_init(&node_ctx.dst, 4096); reset_dump_ctx(NULL); reset_rev_ctx(0); reset_node_ctx(NULL); @@ -473,6 +475,8 @@ void svndump_deinit(void) reset_rev_ctx(0); reset_node_ctx(NULL); strbuf_release(&rev_ctx.log); + strbuf_release(&node_ctx.src); + strbuf_release(&node_ctx.dst); if (buffer_deinit(&input)) fprintf(stderr, "Input error\n"); if (ferror(stdout)) -- cgit v1.2.3 From 28c5d9ed2a2bc562bc8c50092f52f58b3aa08039 Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 13 Dec 2010 21:17:36 +1100 Subject: vcs-svn: drop string_pool This reverts commit 1d73b52f5ba4184de6acf474f14668001304a10c (Add string-specific memory pool, 2010-08-09). Now that svn-fe does not need to maintain a growing collection of strings (paths) over a long period of time, the string_pool is not needed. Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/string_pool.c | 113 ------------------------------------------------ vcs-svn/string_pool.h | 12 ----- vcs-svn/string_pool.txt | 43 ------------------ 3 files changed, 168 deletions(-) delete mode 100644 vcs-svn/string_pool.c delete mode 100644 vcs-svn/string_pool.h delete mode 100644 vcs-svn/string_pool.txt (limited to 'vcs-svn') diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c deleted file mode 100644 index be43598d5b..0000000000 --- a/vcs-svn/string_pool.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Licensed under a two-clause BSD-style license. - * See LICENSE for details. - */ - -#include "git-compat-util.h" -#include "quote.h" -#include "trp.h" -#include "obj_pool.h" -#include "string_pool.h" - -static struct trp_root tree = { ~0 }; - -struct node { - uint32_t offset; - struct trp_node children; -}; - -/* Two memory pools: one for struct node, and another for strings */ -obj_pool_gen(node, struct node, 4096) -obj_pool_gen(string, char, 4096) - -static char *node_value(struct node *node) -{ - return node ? string_pointer(node->offset) : NULL; -} - -static int node_cmp(struct node *a, struct node *b) -{ - return strcmp(node_value(a), node_value(b)); -} - -/* Build a Treap from the node structure (a trp_node w/ offset) */ -trp_gen(static, tree_, struct node, children, node, node_cmp); - -const char *pool_fetch(uint32_t entry) -{ - return node_value(node_pointer(entry)); -} - -uint32_t pool_intern(const char *key) -{ - /* Canonicalize key */ - struct node *match = NULL, *node; - uint32_t key_len; - if (key == NULL) - return ~0; - key_len = strlen(key) + 1; - node = node_pointer(node_alloc(1)); - node->offset = string_alloc(key_len); - strcpy(node_value(node), key); - match = tree_search(&tree, node); - if (!match) { - tree_insert(&tree, node); - } else { - node_free(1); - string_free(key_len); - node = match; - } - return node_offset(node); -} - -uint32_t pool_tok_r(char *str, const char *delim, char **saveptr) -{ - char *token = strtok_r(str, delim, saveptr); - return token ? pool_intern(token) : ~0; -} - -void pool_print_seq(uint32_t len, const uint32_t *seq, char delim, FILE *stream) -{ - uint32_t i; - for (i = 0; i < len && ~seq[i]; i++) { - fputs(pool_fetch(seq[i]), stream); - if (i < len - 1 && ~seq[i + 1]) - fputc(delim, stream); - } -} - -void pool_print_seq_q(uint32_t len, const uint32_t *seq, char delim, FILE *stream) -{ - uint32_t i; - for (i = 0; i < len && ~seq[i]; i++) { - quote_c_style(pool_fetch(seq[i]), NULL, stream, 1); - if (i < len - 1 && ~seq[i + 1]) - fputc(delim, stream); - } -} - -uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str) -{ - char *context = NULL; - uint32_t token = ~0; - uint32_t length; - - if (sz == 0) - return ~0; - if (str) - token = pool_tok_r(str, delim, &context); - for (length = 0; length < sz; length++) { - seq[length] = token; - if (token == ~0) - return length; - token = pool_tok_r(NULL, delim, &context); - } - seq[sz - 1] = ~0; - return sz; -} - -void pool_reset(void) -{ - node_reset(); - string_reset(); -} diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h deleted file mode 100644 index 96e501dc53..0000000000 --- a/vcs-svn/string_pool.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef STRING_POOL_H_ -#define STRING_POOL_H_ - -uint32_t pool_intern(const char *key); -const char *pool_fetch(uint32_t entry); -uint32_t pool_tok_r(char *str, const char *delim, char **saveptr); -void pool_print_seq(uint32_t len, const uint32_t *seq, char delim, FILE *stream); -void pool_print_seq_q(uint32_t len, const uint32_t *seq, char delim, FILE *stream); -uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str); -void pool_reset(void); - -#endif diff --git a/vcs-svn/string_pool.txt b/vcs-svn/string_pool.txt deleted file mode 100644 index 1b41f15628..0000000000 --- a/vcs-svn/string_pool.txt +++ /dev/null @@ -1,43 +0,0 @@ -string_pool API -=============== - -The string_pool API provides facilities for replacing strings -with integer keys that can be more easily compared and stored. -The facilities are designed so that one could teach Git without -too much trouble to store the information needed for these keys to -remain valid over multiple executions. - -Functions ---------- - -pool_intern:: - Include a string in the string pool and get its key. - If that string is already in the pool, retrieves its - existing key. - -pool_fetch:: - Retrieve the string associated to a given key. - -pool_tok_r:: - Extract the key of the next token from a string. - Interface mimics strtok_r. - -pool_print_seq:: - Print a sequence of strings named by key to a file, using the - specified delimiter to separate them. - - If NULL (key ~0) appears in the sequence, the sequence ends - early. - -pool_tok_seq:: - Split a string into tokens, storing the keys of segments - into a caller-provided array. - - Unless sz is 0, the array will always be ~0-terminated. - If there is not enough room for all the tokens, the - array holds as many tokens as fit in the entries before - the terminating ~0. Return value is the index after the - last token, or sz if the tokens did not fit. - -pool_reset:: - Deallocate storage for the string pool. -- cgit v1.2.3 From 5db348dbd51cdeac711521d1fa7258785e72d202 Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 13 Dec 2010 21:23:17 +1100 Subject: vcs-svn: drop treap This reverts commit 951f316470acc7c785c460a4e40735b22822349f (Add treap implementation, 2010-08-09). The string_pool was trp.h's last user. Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/LICENSE | 3 - vcs-svn/trp.h | 237 -------------------------------------------------------- vcs-svn/trp.txt | 109 -------------------------- 3 files changed, 349 deletions(-) delete mode 100644 vcs-svn/trp.h delete mode 100644 vcs-svn/trp.txt (limited to 'vcs-svn') diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE index 0a5e3c43a0..533f585ebf 100644 --- a/vcs-svn/LICENSE +++ b/vcs-svn/LICENSE @@ -1,9 +1,6 @@ Copyright (C) 2010 David Barr . All rights reserved. -Copyright (C) 2008 Jason Evans . -All rights reserved. - Copyright (C) 2005 Stefan Hegny, hydrografix Consulting GmbH, Frankfurt/Main, Germany and others, see http://svn2cc.sarovar.org diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h deleted file mode 100644 index c32b9184e9..0000000000 --- a/vcs-svn/trp.h +++ /dev/null @@ -1,237 +0,0 @@ -/* - * C macro implementation of treaps. - * - * Usage: - * #include - * #include "trp.h" - * trp_gen(...) - * - * Licensed under a two-clause BSD-style license. - * See LICENSE for details. - */ - -#ifndef TRP_H_ -#define TRP_H_ - -#define MAYBE_UNUSED __attribute__((__unused__)) - -/* Node structure. */ -struct trp_node { - uint32_t trpn_left; - uint32_t trpn_right; -}; - -/* Root structure. */ -struct trp_root { - uint32_t trp_root; -}; - -/* Pointer/Offset conversion. */ -#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset)) -#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer)) -#define trpn_modify(a_base, a_offset) \ - do { \ - if ((a_offset) < a_base##_pool.committed) { \ - uint32_t old_offset = (a_offset);\ - (a_offset) = a_base##_alloc(1); \ - *trpn_pointer(a_base, a_offset) = \ - *trpn_pointer(a_base, old_offset); \ - } \ - } while (0) - -/* Left accessors. */ -#define trp_left_get(a_base, a_field, a_node) \ - (trpn_pointer(a_base, a_node)->a_field.trpn_left) -#define trp_left_set(a_base, a_field, a_node, a_left) \ - do { \ - trpn_modify(a_base, a_node); \ - trp_left_get(a_base, a_field, a_node) = (a_left); \ - } while (0) - -/* Right accessors. */ -#define trp_right_get(a_base, a_field, a_node) \ - (trpn_pointer(a_base, a_node)->a_field.trpn_right) -#define trp_right_set(a_base, a_field, a_node, a_right) \ - do { \ - trpn_modify(a_base, a_node); \ - trp_right_get(a_base, a_field, a_node) = (a_right); \ - } while (0) - -/* - * Fibonacci hash function. - * The multiplier is the nearest prime to (2^32 times (√5 - 1)/2). - * See Knuth §6.4: volume 3, 3rd ed, p518. - */ -#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node)) - -/* Priority accessors. */ -#define trp_prio_get(a_node) trpn_hash(a_node) - -/* Node initializer. */ -#define trp_node_new(a_base, a_field, a_node) \ - do { \ - trp_left_set(a_base, a_field, (a_node), ~0); \ - trp_right_set(a_base, a_field, (a_node), ~0); \ - } while (0) - -/* Internal utility macros. */ -#define trpn_first(a_base, a_field, a_root, r_node) \ - do { \ - (r_node) = (a_root); \ - if ((r_node) == ~0) \ - return NULL; \ - while (~trp_left_get(a_base, a_field, (r_node))) \ - (r_node) = trp_left_get(a_base, a_field, (r_node)); \ - } while (0) - -#define trpn_rotate_left(a_base, a_field, a_node, r_node) \ - do { \ - (r_node) = trp_right_get(a_base, a_field, (a_node)); \ - trp_right_set(a_base, a_field, (a_node), \ - trp_left_get(a_base, a_field, (r_node))); \ - trp_left_set(a_base, a_field, (r_node), (a_node)); \ - } while (0) - -#define trpn_rotate_right(a_base, a_field, a_node, r_node) \ - do { \ - (r_node) = trp_left_get(a_base, a_field, (a_node)); \ - trp_left_set(a_base, a_field, (a_node), \ - trp_right_get(a_base, a_field, (r_node))); \ - trp_right_set(a_base, a_field, (r_node), (a_node)); \ - } while (0) - -#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \ -a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \ -{ \ - uint32_t ret; \ - trpn_first(a_base, a_field, treap->trp_root, ret); \ - return trpn_pointer(a_base, ret); \ -} \ -a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \ -{ \ - uint32_t ret; \ - uint32_t offset = trpn_offset(a_base, node); \ - if (~trp_right_get(a_base, a_field, offset)) { \ - trpn_first(a_base, a_field, \ - trp_right_get(a_base, a_field, offset), ret); \ - } else { \ - uint32_t tnode = treap->trp_root; \ - ret = ~0; \ - while (1) { \ - int cmp = (a_cmp)(trpn_pointer(a_base, offset), \ - trpn_pointer(a_base, tnode)); \ - if (cmp < 0) { \ - ret = tnode; \ - tnode = trp_left_get(a_base, a_field, tnode); \ - } else if (cmp > 0) { \ - tnode = trp_right_get(a_base, a_field, tnode); \ - } else { \ - break; \ - } \ - } \ - } \ - return trpn_pointer(a_base, ret); \ -} \ -a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \ -{ \ - int cmp; \ - uint32_t ret = treap->trp_root; \ - while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ - if (cmp < 0) { \ - ret = trp_left_get(a_base, a_field, ret); \ - } else { \ - ret = trp_right_get(a_base, a_field, ret); \ - } \ - } \ - return trpn_pointer(a_base, ret); \ -} \ -a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \ -{ \ - int cmp; \ - uint32_t ret = treap->trp_root; \ - while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ - if (cmp < 0) { \ - if (!~trp_left_get(a_base, a_field, ret)) \ - break; \ - ret = trp_left_get(a_base, a_field, ret); \ - } else { \ - ret = trp_right_get(a_base, a_field, ret); \ - } \ - } \ - return trpn_pointer(a_base, ret); \ -} \ -a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \ -{ \ - if (cur_node == ~0) { \ - return ins_node; \ - } else { \ - uint32_t ret; \ - int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \ - trpn_pointer(a_base, cur_node)); \ - if (cmp < 0) { \ - uint32_t left = a_pre##insert_recurse( \ - trp_left_get(a_base, a_field, cur_node), ins_node); \ - trp_left_set(a_base, a_field, cur_node, left); \ - if (trp_prio_get(left) < trp_prio_get(cur_node)) \ - trpn_rotate_right(a_base, a_field, cur_node, ret); \ - else \ - ret = cur_node; \ - } else { \ - uint32_t right = a_pre##insert_recurse( \ - trp_right_get(a_base, a_field, cur_node), ins_node); \ - trp_right_set(a_base, a_field, cur_node, right); \ - if (trp_prio_get(right) < trp_prio_get(cur_node)) \ - trpn_rotate_left(a_base, a_field, cur_node, ret); \ - else \ - ret = cur_node; \ - } \ - return ret; \ - } \ -} \ -a_attr a_type *MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ -{ \ - uint32_t offset = trpn_offset(a_base, node); \ - trp_node_new(a_base, a_field, offset); \ - treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \ - return trpn_pointer(a_base, offset); \ -} \ -a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \ -{ \ - int cmp = a_cmp(trpn_pointer(a_base, rem_node), \ - trpn_pointer(a_base, cur_node)); \ - if (cmp == 0) { \ - uint32_t ret; \ - uint32_t left = trp_left_get(a_base, a_field, cur_node); \ - uint32_t right = trp_right_get(a_base, a_field, cur_node); \ - if (left == ~0) { \ - if (right == ~0) \ - return ~0; \ - } else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \ - trpn_rotate_right(a_base, a_field, cur_node, ret); \ - right = a_pre##remove_recurse(cur_node, rem_node); \ - trp_right_set(a_base, a_field, ret, right); \ - return ret; \ - } \ - trpn_rotate_left(a_base, a_field, cur_node, ret); \ - left = a_pre##remove_recurse(cur_node, rem_node); \ - trp_left_set(a_base, a_field, ret, left); \ - return ret; \ - } else if (cmp < 0) { \ - uint32_t left = a_pre##remove_recurse( \ - trp_left_get(a_base, a_field, cur_node), rem_node); \ - trp_left_set(a_base, a_field, cur_node, left); \ - return cur_node; \ - } else { \ - uint32_t right = a_pre##remove_recurse( \ - trp_right_get(a_base, a_field, cur_node), rem_node); \ - trp_right_set(a_base, a_field, cur_node, right); \ - return cur_node; \ - } \ -} \ -a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \ -{ \ - treap->trp_root = a_pre##remove_recurse(treap->trp_root, \ - trpn_offset(a_base, node)); \ -} \ - -#endif diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt deleted file mode 100644 index 5ca6b42edb..0000000000 --- a/vcs-svn/trp.txt +++ /dev/null @@ -1,109 +0,0 @@ -Motivation -========== - -Treaps provide a memory-efficient binary search tree structure. -Insertion/deletion/search are about as about as fast in the average -case as red-black trees and the chances of worst-case behavior are -vanishingly small, thanks to (pseudo-)randomness. The bad worst-case -behavior is a small price to pay, given that treaps are much simpler -to implement. - -API -=== - -The trp API generates a data structure and functions to handle a -large growing set of objects stored in a pool. - -The caller: - -. Specifies parameters for the generated functions with the - trp_gen(static, foo_, ...) macro. - -. Allocates a `struct trp_root` variable and sets it to {~0}. - -. Adds new nodes to the set using `foo_insert`. Any pointers - to existing nodes cannot be relied upon any more, so the caller - might retrieve them anew with `foo_pointer`. - -. Can find a specific item in the set using `foo_search`. - -. Can iterate over items in the set using `foo_first` and `foo_next`. - -. Can remove an item from the set using `foo_remove`. - -Example: - ----- -struct ex_node { - const char *s; - struct trp_node ex_link; -}; -static struct trp_root ex_base = {~0}; -obj_pool_gen(ex, struct ex_node, 4096); -trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp) -struct ex_node *item; - -item = ex_pointer(ex_alloc(1)); -item->s = "hello"; -ex_insert(&ex_base, item); -item = ex_pointer(ex_alloc(1)); -item->s = "goodbye"; -ex_insert(&ex_base, item); -for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item)) - printf("%s\n", item->s); ----- - -Functions ---------- - -trp_gen(attr, foo_, node_type, link_field, pool, cmp):: - - Generate a type-specific treap implementation. -+ -. The storage class for generated functions will be 'attr' (e.g., `static`). -. Generated function names are prefixed with 'foo_' (e.g., `treap_`). -. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`). - This type must be a struct with at least one `struct trp_node` field - to point to its children. -. The field used to access child nodes will be 'link_field'. -. All treap nodes must lie in the 'pool' object pool. -. Treap nodes must be totally ordered by the 'cmp' relation, with the - following prototype: -+ -int (*cmp)(node_type \*a, node_type \*b) -+ -and returning a value less than, equal to, or greater than zero -according to the result of comparison. - -node_type {asterisk}foo_insert(struct trp_root *treap, node_type \*node):: - - Insert node into treap. If inserted multiple times, - a node will appear in the treap multiple times. -+ -The return value is the address of the node within the treap, -which might differ from `node` if `pool_alloc` had to call -`realloc` to expand the pool. - -void foo_remove(struct trp_root *treap, node_type \*node):: - - Remove node from treap. Caller must ensure node is - present in treap before using this function. - -node_type *foo_search(struct trp_root \*treap, node_type \*key):: - - Search for a node that matches key. If no match is found, - result is NULL. - -node_type *foo_nsearch(struct trp_root \*treap, node_type \*key):: - - Like `foo_search`, but if if the key is missing return what - would be key's successor, were key in treap (NULL if no - successor). - -node_type *foo_first(struct trp_root \*treap):: - - Find the first item from the treap, in sorted order. - -node_type *foo_next(struct trp_root \*treap, node_type \*node):: - - Find the next item. -- cgit v1.2.3 From cba3546a43c64e2078664dbb6469aadf6bc473d3 Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 13 Dec 2010 21:26:43 +1100 Subject: vcs-svn: drop obj_pool This reverts commit 4709455db3891f6cad9a96a574296b4926f70cbe (Add memory pool library, 2010-08-09). svn-fe uses strbufs to avoid memory allocation overhead nowadays. Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/obj_pool.h | 61 ------------------------------------------------------ 1 file changed, 61 deletions(-) delete mode 100644 vcs-svn/obj_pool.h (limited to 'vcs-svn') diff --git a/vcs-svn/obj_pool.h b/vcs-svn/obj_pool.h deleted file mode 100644 index deb6eb8135..0000000000 --- a/vcs-svn/obj_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed under a two-clause BSD-style license. - * See LICENSE for details. - */ - -#ifndef OBJ_POOL_H_ -#define OBJ_POOL_H_ - -#include "git-compat-util.h" - -#define MAYBE_UNUSED __attribute__((__unused__)) - -#define obj_pool_gen(pre, obj_t, initial_capacity) \ -static struct { \ - uint32_t committed; \ - uint32_t size; \ - uint32_t capacity; \ - obj_t *base; \ -} pre##_pool = {0, 0, 0, NULL}; \ -static MAYBE_UNUSED uint32_t pre##_alloc(uint32_t count) \ -{ \ - uint32_t offset; \ - if (pre##_pool.size + count > pre##_pool.capacity) { \ - while (pre##_pool.size + count > pre##_pool.capacity) \ - if (pre##_pool.capacity) \ - pre##_pool.capacity *= 2; \ - else \ - pre##_pool.capacity = initial_capacity; \ - pre##_pool.base = realloc(pre##_pool.base, \ - pre##_pool.capacity * sizeof(obj_t)); \ - } \ - offset = pre##_pool.size; \ - pre##_pool.size += count; \ - return offset; \ -} \ -static MAYBE_UNUSED void pre##_free(uint32_t count) \ -{ \ - pre##_pool.size -= count; \ -} \ -static MAYBE_UNUSED uint32_t pre##_offset(obj_t *obj) \ -{ \ - return obj == NULL ? ~0 : obj - pre##_pool.base; \ -} \ -static MAYBE_UNUSED obj_t *pre##_pointer(uint32_t offset) \ -{ \ - return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset]; \ -} \ -static MAYBE_UNUSED void pre##_commit(void) \ -{ \ - pre##_pool.committed = pre##_pool.size; \ -} \ -static MAYBE_UNUSED void pre##_reset(void) \ -{ \ - free(pre##_pool.base); \ - pre##_pool.base = NULL; \ - pre##_pool.size = 0; \ - pre##_pool.capacity = 0; \ - pre##_pool.committed = 0; \ -} - -#endif -- cgit v1.2.3 From 43155cfe1415f5547791613a5de6399112ba3560 Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 13 Dec 2010 17:09:31 +1100 Subject: vcs-svn: avoid using ls command twice Currently there are two functions to retrieve the mode and content at a path: const char *repo_read_path(const uint32_t *path); uint32_t repo_read_mode(const uint32_t *path) Replace them with a single function with two return values. This means we can use one round-trip to get the same information from fast-import that previously took two. Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/repo_tree.c | 24 ++++-------------------- vcs-svn/repo_tree.h | 3 +-- vcs-svn/svndump.c | 3 +-- 3 files changed, 6 insertions(+), 24 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index e75f58087c..1681b654d1 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -8,39 +8,23 @@ #include "repo_tree.h" #include "fast_export.h" -const char *repo_read_path(const uint32_t *path) +const char *repo_read_path(const uint32_t *path, uint32_t *mode_out) { int err; - uint32_t dummy; static struct strbuf buf = STRBUF_INIT; strbuf_reset(&buf); - err = fast_export_ls(REPO_MAX_PATH_DEPTH, path, &dummy, &buf); + err = fast_export_ls(REPO_MAX_PATH_DEPTH, path, mode_out, &buf); if (err) { if (errno != ENOENT) die_errno("BUG: unexpected fast_export_ls error"); + /* Treat missing paths as directories. */ + *mode_out = REPO_MODE_DIR; return NULL; } return buf.buf; } -uint32_t repo_read_mode(const uint32_t *path) -{ - int err; - uint32_t result; - static struct strbuf dummy = STRBUF_INIT; - - strbuf_reset(&dummy); - err = fast_export_ls(REPO_MAX_PATH_DEPTH, path, &result, &dummy); - if (err) { - if (errno != ENOENT) - die_errno("BUG: unexpected fast_export_ls error"); - /* Treat missing paths as directories. */ - return REPO_MODE_DIR; - } - return result; -} - void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst) { int err; diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h index d690784fbb..f506352dc2 100644 --- a/vcs-svn/repo_tree.h +++ b/vcs-svn/repo_tree.h @@ -14,8 +14,7 @@ uint32_t next_blob_mark(void); void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst); void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); -const char *repo_read_path(const uint32_t *path); -uint32_t repo_read_mode(const uint32_t *path); +const char *repo_read_path(const uint32_t *path, uint32_t *mode_out); void repo_delete(uint32_t *path); void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, long unsigned timestamp); diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 7ecb227a6d..99a5ba0d10 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -246,8 +246,7 @@ static void handle_node(void) old_data = NULL; } else if (node_ctx.action == NODEACT_CHANGE) { uint32_t mode; - old_data = repo_read_path(node_ctx.dst); - mode = repo_read_mode(node_ctx.dst); + old_data = repo_read_path(node_ctx.dst, &mode); if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR) die("invalid dump: cannot modify a directory into a file"); if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR) -- cgit v1.2.3 From 9d2f5ddfe56fcc228a36dd079f0897e0f474eb4e Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 21:54:58 -0600 Subject: vcs-svn: learn to maintain a sliding view of a file Each section of a Subversion-format delta only requires examining (and keeping in random-access memory) a small portion of the preimage. At any moment, this portion starts at a certain file offset and has a well-defined length, and as the delta is applied, the portion advances from the beginning to the end of the preimage. Add a move_window function to keep track of this view into the preimage. You can use it like this: buffer_init(f, NULL); struct sliding_view window = SLIDING_VIEW_INIT(f); move_window(&window, 3, 7); /* (1) */ move_window(&window, 5, 5); /* (2) */ move_window(&window, 12, 2); /* (3) */ strbuf_release(&window.buf); buffer_deinit(f); The data structure is called sliding_view instead of _window to prevent confusion with svndiff0 Windows. In this example, (1) reads 10 bytes and discards the first 3; (2) discards the first 2, which are not needed any more; and (3) skips 2 bytes and reads 2 new bytes to work with. When move_window returns, the file position indicator is at position window->off + window->width and the data from positions window->off to the current file position are stored in window->buf. This function performs only sequential access from the input file and never seeks, so it can be safely used on pipes and sockets. On end-of-file, move_window silently reads less than the caller requested. On other errors, it prints a message and returns -1. Helped-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/LICENSE | 2 ++ vcs-svn/sliding_window.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++ vcs-svn/sliding_window.h | 17 +++++++++++ 3 files changed, 96 insertions(+) create mode 100644 vcs-svn/sliding_window.c create mode 100644 vcs-svn/sliding_window.h (limited to 'vcs-svn') diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE index 0a5e3c43a0..805882c838 100644 --- a/vcs-svn/LICENSE +++ b/vcs-svn/LICENSE @@ -1,6 +1,8 @@ Copyright (C) 2010 David Barr . All rights reserved. +Copyright (C) 2010 Jonathan Nieder . + Copyright (C) 2008 Jason Evans . All rights reserved. diff --git a/vcs-svn/sliding_window.c b/vcs-svn/sliding_window.c new file mode 100644 index 0000000000..1b8d9875ed --- /dev/null +++ b/vcs-svn/sliding_window.c @@ -0,0 +1,77 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "sliding_window.h" +#include "line_buffer.h" +#include "strbuf.h" + +static int input_error(struct line_buffer *file) +{ + if (!buffer_ferror(file)) + return error("delta preimage ends early"); + return error("cannot read delta preimage: %s", strerror(errno)); +} + +static int skip_or_whine(struct line_buffer *file, off_t gap) +{ + if (buffer_skip_bytes(file, gap) != gap) + return input_error(file); + return 0; +} + +static int read_to_fill_or_whine(struct line_buffer *file, + struct strbuf *buf, size_t width) +{ + buffer_read_binary(file, buf, width - buf->len); + if (buf->len != width) + return input_error(file); + return 0; +} + +static int check_overflow(off_t a, size_t b) +{ + if (b > maximum_signed_value_of_type(off_t)) + return error("unrepresentable length in delta: " + "%"PRIuMAX" > OFF_MAX", (uintmax_t) b); + if (signed_add_overflows(a, (off_t) b)) + return error("unrepresentable offset in delta: " + "%"PRIuMAX" + %"PRIuMAX" > OFF_MAX", + (uintmax_t) a, (uintmax_t) b); + return 0; +} + +int move_window(struct sliding_view *view, off_t off, size_t width) +{ + off_t file_offset; + assert(view); + assert(view->width <= view->buf.len); + assert(!check_overflow(view->off, view->buf.len)); + + if (check_overflow(off, width)) + return -1; + if (off < view->off || off + width < view->off + view->width) + return error("invalid delta: window slides left"); + + file_offset = view->off + view->buf.len; + if (off < file_offset) { + /* Move the overlapping region into place. */ + strbuf_remove(&view->buf, 0, off - view->off); + } else { + /* Seek ahead to skip the gap. */ + if (skip_or_whine(view->file, off - file_offset)) + return -1; + strbuf_setlen(&view->buf, 0); + } + + if (view->buf.len > width) + ; /* Already read. */ + else if (read_to_fill_or_whine(view->file, &view->buf, width)) + return -1; + + view->off = off; + view->width = width; + return 0; +} diff --git a/vcs-svn/sliding_window.h b/vcs-svn/sliding_window.h new file mode 100644 index 0000000000..ed0bfdd65c --- /dev/null +++ b/vcs-svn/sliding_window.h @@ -0,0 +1,17 @@ +#ifndef SLIDING_WINDOW_H_ +#define SLIDING_WINDOW_H_ + +#include "strbuf.h" + +struct sliding_view { + struct line_buffer *file; + off_t off; + size_t width; + struct strbuf buf; +}; + +#define SLIDING_VIEW_INIT(input) { (input), 0, 0, STRBUF_INIT } + +extern int move_window(struct sliding_view *view, off_t off, size_t width); + +#endif -- cgit v1.2.3 From 896e4bfcec4f6b489aba2197f60a59bc7f45a8ac Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 21:37:36 -0600 Subject: vcs-svn: make buffer_read_binary API more convenient buffer_read_binary is a thin wrapper around fread, but its signature is wrong: - fread can fill an arbitrary in-memory buffer. buffer_read_binary is limited to buffers whose size is representable by a 32-bit integer. - The result from fread is the number of bytes actually read. buffer_read_binary only reports the number of bytes read by incrementing sb->len by that amount and returns void. Fix both: let buffer_read_binary accept a size_t instead of uint32_t for the number of bytes to read and as a convenience return the number of bytes actually read. Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 6 +++--- vcs-svn/line_buffer.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index c39038723e..01fcb842f1 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -91,10 +91,10 @@ char *buffer_read_line(struct line_buffer *buf) return buf->line_buffer; } -void buffer_read_binary(struct line_buffer *buf, - struct strbuf *sb, uint32_t size) +size_t buffer_read_binary(struct line_buffer *buf, + struct strbuf *sb, size_t size) { - strbuf_fread(sb, size, buf->infile); + return strbuf_fread(sb, size, buf->infile); } off_t buffer_copy_bytes(struct line_buffer *buf, off_t nbytes) diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index d0b22dda76..8901f214ba 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -23,7 +23,7 @@ long buffer_tmpfile_prepare_to_read(struct line_buffer *buf); int buffer_ferror(struct line_buffer *buf); char *buffer_read_line(struct line_buffer *buf); int buffer_read_char(struct line_buffer *buf); -void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); +size_t buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, size_t len); /* Returns number of bytes read (not necessarily written). */ off_t buffer_copy_bytes(struct line_buffer *buf, off_t len); off_t buffer_skip_bytes(struct line_buffer *buf, off_t len); -- cgit v1.2.3 From ddcc8c5b469d2564dbacd629a873e7703f2dbd83 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 25 Dec 2010 05:11:32 -0600 Subject: vcs-svn: skeleton of an svn delta parser A delta in the subversion delta (svndiff0) format consists of the magic bytes SVN\0 followed by a sequence of windows of a certain well specified format (starting with five integers). Add an svndiff0_apply function and test-svn-fe -d commandline tool to parse such a delta in the special case of not including any windows. Later patches will add features to turn this into a fully functional delta applier for svn-fe to use to parse the streams produced by "svnrdump dump" and "svnadmin dump --deltas". The content of symlinks starts with the word "link " in Subversion's worldview, so we need to be able to prepend that text to input for the sake of delta application. So initialization of the input state of the delta preimage is left to the calling program, giving callers a chance to seed the buffer with text of their choice. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/svndiff.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ vcs-svn/svndiff.h | 10 ++++++++++ 2 files changed, 62 insertions(+) create mode 100644 vcs-svn/svndiff.c create mode 100644 vcs-svn/svndiff.h (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c new file mode 100644 index 0000000000..591603669c --- /dev/null +++ b/vcs-svn/svndiff.c @@ -0,0 +1,52 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "line_buffer.h" +#include "svndiff.h" + +/* + * svndiff0 applier + * + * See http://svn.apache.org/repos/asf/subversion/trunk/notes/svndiff. + * + * svndiff0 ::= 'SVN\0' window* + */ + +static int error_short_read(struct line_buffer *input) +{ + if (buffer_ferror(input)) + return error("error reading delta: %s", strerror(errno)); + return error("invalid delta: unexpected end of file"); +} + +static int read_magic(struct line_buffer *in, off_t *len) +{ + static const char magic[] = {'S', 'V', 'N', '\0'}; + struct strbuf sb = STRBUF_INIT; + + if (*len < sizeof(magic) || + buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic)) + return error_short_read(in); + + if (memcmp(sb.buf, magic, sizeof(magic))) + return error("invalid delta: unrecognized file type"); + + *len -= sizeof(magic); + strbuf_release(&sb); + return 0; +} + +int svndiff0_apply(struct line_buffer *delta, off_t delta_len, + struct sliding_view *preimage, FILE *postimage) +{ + assert(delta && preimage && postimage); + + if (read_magic(delta, &delta_len)) + return -1; + if (delta_len) + return error("What do you think I am? A delta applier?"); + return 0; +} diff --git a/vcs-svn/svndiff.h b/vcs-svn/svndiff.h new file mode 100644 index 0000000000..74eb464bab --- /dev/null +++ b/vcs-svn/svndiff.h @@ -0,0 +1,10 @@ +#ifndef SVNDIFF_H_ +#define SVNDIFF_H_ + +struct line_buffer; +struct sliding_view; + +extern int svndiff0_apply(struct line_buffer *delta, off_t delta_len, + struct sliding_view *preimage, FILE *postimage); + +#endif -- cgit v1.2.3 From 252712111fad127db365e3dd764309fe5658679a Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:21:43 -0500 Subject: vcs-svn: parse svndiff0 window header Each window in a subversion delta (svndiff0-format file) starts with a window header, consisting of five integers with variable-length representation: source view offset source view length output length instructions length auxiliary data length Parse it. The result is not usable for deltas with nonempty postimage yet; in fact, this only adds support for deltas without any instructions or auxiliary data. This is a good place to stop, though, since that little support lets us add some simple passing tests concerning error handling to the test suite. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/svndiff.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 5 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index 591603669c..249efb6eed 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -13,8 +13,16 @@ * See http://svn.apache.org/repos/asf/subversion/trunk/notes/svndiff. * * svndiff0 ::= 'SVN\0' window* + * window ::= int int int int int instructions inline_data; + * int ::= highdigit* lowdigit; + * highdigit ::= # binary 1000 0000 OR-ed with 7 bit value; + * lowdigit ::= # 7 bit value; */ +#define VLI_CONTINUE 0x80 +#define VLI_DIGIT_MASK 0x7f +#define VLI_BITS_PER_DIGIT 7 + static int error_short_read(struct line_buffer *input) { if (buffer_ferror(input)) @@ -28,17 +36,84 @@ static int read_magic(struct line_buffer *in, off_t *len) struct strbuf sb = STRBUF_INIT; if (*len < sizeof(magic) || - buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic)) - return error_short_read(in); + buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic)) { + error_short_read(in); + strbuf_release(&sb); + return -1; + } - if (memcmp(sb.buf, magic, sizeof(magic))) + if (memcmp(sb.buf, magic, sizeof(magic))) { + strbuf_release(&sb); return error("invalid delta: unrecognized file type"); + } *len -= sizeof(magic); strbuf_release(&sb); return 0; } +static int read_int(struct line_buffer *in, uintmax_t *result, off_t *len) +{ + uintmax_t rv = 0; + off_t sz; + for (sz = *len; sz; sz--) { + const int ch = buffer_read_char(in); + if (ch == EOF) + break; + + rv <<= VLI_BITS_PER_DIGIT; + rv += (ch & VLI_DIGIT_MASK); + if (ch & VLI_CONTINUE) + continue; + + *result = rv; + *len = sz - 1; + return 0; + } + return error_short_read(in); +} + +static int read_offset(struct line_buffer *in, off_t *result, off_t *len) +{ + uintmax_t val; + if (read_int(in, &val, len)) + return -1; + if (val > maximum_signed_value_of_type(off_t)) + return error("unrepresentable offset in delta: %"PRIuMAX"", val); + *result = val; + return 0; +} + +static int read_length(struct line_buffer *in, size_t *result, off_t *len) +{ + uintmax_t val; + if (read_int(in, &val, len)) + return -1; + if (val > SIZE_MAX) + return error("unrepresentable length in delta: %"PRIuMAX"", val); + *result = val; + return 0; +} + +static int apply_one_window(struct line_buffer *delta, off_t *delta_len) +{ + size_t out_len; + size_t instructions_len; + size_t data_len; + assert(delta_len); + + /* "source view" offset and length already handled; */ + if (read_length(delta, &out_len, delta_len) || + read_length(delta, &instructions_len, delta_len) || + read_length(delta, &data_len, delta_len)) + return -1; + if (instructions_len) + return error("What do you think I am? A delta applier?"); + if (data_len) + return error("No support for inline data yet"); + return 0; +} + int svndiff0_apply(struct line_buffer *delta, off_t delta_len, struct sliding_view *preimage, FILE *postimage) { @@ -46,7 +121,14 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len, if (read_magic(delta, &delta_len)) return -1; - if (delta_len) - return error("What do you think I am? A delta applier?"); + while (delta_len) { /* For each window: */ + off_t pre_off; + size_t pre_len; + + if (read_offset(delta, &pre_off, &delta_len) || + read_length(delta, &pre_len, &delta_len) || + apply_one_window(delta, &delta_len)) + return -1; + } return 0; } -- cgit v1.2.3 From bcd254621f9a98794cdc32906db10af7135824c4 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:30:37 -0500 Subject: vcs-svn: read the preimage when applying deltas The source view offset heading each svndiff0 window represents a number of bytes past the beginning of the preimage. Together with the source view length, it dictates to the delta applier what portion of the preimage instructions will refer to. Read that portion right away using the sliding window code. Maybe some day we will use mmap to read data more lazily. Subversion's implementation tolerates source view offsets pointing past the end of the preimage file but we do not, for simplicity. This does not teach the delta applier to read instructions or copy data from the source view. Deltas that could produce nonempty output will still be rejected. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- vcs-svn/svndiff.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index 249efb6eed..b7c2c8bf53 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -4,6 +4,7 @@ */ #include "git-compat-util.h" +#include "sliding_window.h" #include "line_buffer.h" #include "svndiff.h" @@ -127,6 +128,7 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len, if (read_offset(delta, &pre_off, &delta_len) || read_length(delta, &pre_len, &delta_len) || + move_window(preimage, pre_off, pre_len) || apply_one_window(delta, &delta_len)) return -1; } -- cgit v1.2.3 From fc4ae43b2cbd53da6ac2a0047fb4e53175921696 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:35:59 -0500 Subject: vcs-svn: read inline data from deltas Each window of an svndiff0-format delta includes a section for novel text to be copied to the postimage (in the order it appears in the window, possibly interspersed with other data). Slurp in this data when encountering it. It is not actually necessary to do so --- it would be just as easy to copy from delta to output as part of interpreting the relevant instructions --- but this way, the code that interprets svndiff0 instructions can proceed very quickly because it does not require I/O. Subversion's svndiff0 parser rejects deltas that do not consume all the novel text that was provided. Omit that check for now so we can test the new functionality right away, rather than waiting to learn instructions that consume data. Do check for truncated data sections. Subversion's parser rejects deltas that end in the middle of a declared novel-text section, so it should be safe for us to reject them, too. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- vcs-svn/svndiff.c | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index b7c2c8bf53..175168f599 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -24,6 +24,17 @@ #define VLI_DIGIT_MASK 0x7f #define VLI_BITS_PER_DIGIT 7 +struct window { + struct strbuf data; +}; + +#define WINDOW_INIT { STRBUF_INIT } + +static void window_release(struct window *ctx) +{ + strbuf_release(&ctx->data); +} + static int error_short_read(struct line_buffer *input) { if (buffer_ferror(input)) @@ -31,24 +42,30 @@ static int error_short_read(struct line_buffer *input) return error("invalid delta: unexpected end of file"); } +static int read_chunk(struct line_buffer *delta, off_t *delta_len, + struct strbuf *buf, size_t len) +{ + strbuf_reset(buf); + if (len > *delta_len || + buffer_read_binary(delta, buf, len) != len) + return error_short_read(delta); + *delta_len -= buf->len; + return 0; +} + static int read_magic(struct line_buffer *in, off_t *len) { static const char magic[] = {'S', 'V', 'N', '\0'}; struct strbuf sb = STRBUF_INIT; - if (*len < sizeof(magic) || - buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic)) { - error_short_read(in); + if (read_chunk(in, len, &sb, sizeof(magic))) { strbuf_release(&sb); return -1; } - if (memcmp(sb.buf, magic, sizeof(magic))) { strbuf_release(&sb); return error("invalid delta: unrecognized file type"); } - - *len -= sizeof(magic); strbuf_release(&sb); return 0; } @@ -98,6 +115,7 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len) static int apply_one_window(struct line_buffer *delta, off_t *delta_len) { + struct window ctx = WINDOW_INIT; size_t out_len; size_t instructions_len; size_t data_len; @@ -107,12 +125,18 @@ static int apply_one_window(struct line_buffer *delta, off_t *delta_len) if (read_length(delta, &out_len, delta_len) || read_length(delta, &instructions_len, delta_len) || read_length(delta, &data_len, delta_len)) - return -1; - if (instructions_len) - return error("What do you think I am? A delta applier?"); - if (data_len) - return error("No support for inline data yet"); + goto error_out; + if (instructions_len) { + error("What do you think I am? A delta applier?"); + goto error_out; + } + if (read_chunk(delta, delta_len, &ctx.data, data_len)) + goto error_out; + window_release(&ctx); return 0; +error_out: + window_release(&ctx); + return -1; } int svndiff0_apply(struct line_buffer *delta, off_t delta_len, -- cgit v1.2.3 From ef2ac77e9f8f4819f75cf52721567463e60a805c Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:38:01 -0500 Subject: vcs-svn: read instructions from deltas Buffer the instruction section upon encountering it for later interpretation. An alternative design would involve parsing the instructions at this point and buffering them in some processed form. Using the unprocessed form is simpler. Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- vcs-svn/svndiff.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index 175168f599..8968fdb4eb 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -25,13 +25,15 @@ #define VLI_BITS_PER_DIGIT 7 struct window { + struct strbuf instructions; struct strbuf data; }; -#define WINDOW_INIT { STRBUF_INIT } +#define WINDOW_INIT { STRBUF_INIT, STRBUF_INIT } static void window_release(struct window *ctx) { + strbuf_release(&ctx->instructions); strbuf_release(&ctx->data); } @@ -124,7 +126,8 @@ static int apply_one_window(struct line_buffer *delta, off_t *delta_len) /* "source view" offset and length already handled; */ if (read_length(delta, &out_len, delta_len) || read_length(delta, &instructions_len, delta_len) || - read_length(delta, &data_len, delta_len)) + read_length(delta, &data_len, delta_len) || + read_chunk(delta, delta_len, &ctx.instructions, instructions_len)) goto error_out; if (instructions_len) { error("What do you think I am? A delta applier?"); -- cgit v1.2.3 From ec71aa2e1f229b90092e6678ac7c2dca3d15b5f3 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:39:44 -0500 Subject: vcs-svn: implement copyfrom_data delta instruction The copyfrom_data instruction copies a few bytes verbatim from the novel text section of a window to the postimage. [jn: with memory leak fix from David] Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- vcs-svn/svndiff.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 108 insertions(+), 7 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index 8968fdb4eb..ed1d4a08be 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -15,28 +15,49 @@ * * svndiff0 ::= 'SVN\0' window* * window ::= int int int int int instructions inline_data; + * instructions ::= instruction*; + * instruction ::= view_selector int int + * | copyfrom_data int + * | packed_view_selector int + * | packed_copyfrom_data + * ; + * copyfrom_data ::= # binary 10 000000; + * packed_copyfrom_data ::= # copyfrom_data OR-ed with 6 bit value; * int ::= highdigit* lowdigit; * highdigit ::= # binary 1000 0000 OR-ed with 7 bit value; * lowdigit ::= # 7 bit value; */ +#define INSN_MASK 0xc0 +#define INSN_COPYFROM_DATA 0x80 +#define OPERAND_MASK 0x3f + #define VLI_CONTINUE 0x80 #define VLI_DIGIT_MASK 0x7f #define VLI_BITS_PER_DIGIT 7 struct window { + struct strbuf out; struct strbuf instructions; struct strbuf data; }; -#define WINDOW_INIT { STRBUF_INIT, STRBUF_INIT } +#define WINDOW_INIT { STRBUF_INIT, STRBUF_INIT, STRBUF_INIT } static void window_release(struct window *ctx) { + strbuf_release(&ctx->out); strbuf_release(&ctx->instructions); strbuf_release(&ctx->data); } +static int write_strbuf(struct strbuf *sb, FILE *out) +{ + if (fwrite(sb->buf, 1, sb->len, out) == sb->len) /* Success. */ + return 0; + return error("cannot write delta postimage: %s", strerror(errno)); +} + static int error_short_read(struct line_buffer *input) { if (buffer_ferror(input)) @@ -93,6 +114,25 @@ static int read_int(struct line_buffer *in, uintmax_t *result, off_t *len) return error_short_read(in); } +static int parse_int(const char **buf, size_t *result, const char *end) +{ + size_t rv = 0; + const char *pos; + for (pos = *buf; pos != end; pos++) { + unsigned char ch = *pos; + + rv <<= VLI_BITS_PER_DIGIT; + rv += (ch & VLI_DIGIT_MASK); + if (ch & VLI_CONTINUE) + continue; + + *result = rv; + *buf = pos + 1; + return 0; + } + return error("invalid delta: unexpected end of instructions section"); +} + static int read_offset(struct line_buffer *in, off_t *result, off_t *len) { uintmax_t val; @@ -115,7 +155,64 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len) return 0; } -static int apply_one_window(struct line_buffer *delta, off_t *delta_len) +static int copyfrom_data(struct window *ctx, size_t *data_pos, size_t nbytes) +{ + const size_t pos = *data_pos; + if (unsigned_add_overflows(pos, nbytes) || + pos + nbytes > ctx->data.len) + return error("invalid delta: copies unavailable inline data"); + strbuf_add(&ctx->out, ctx->data.buf + pos, nbytes); + *data_pos += nbytes; + return 0; +} + +static int parse_first_operand(const char **buf, size_t *out, const char *end) +{ + size_t result = (unsigned char) *(*buf)++ & OPERAND_MASK; + if (result) { /* immediate operand */ + *out = result; + return 0; + } + return parse_int(buf, out, end); +} + +static int execute_one_instruction(struct window *ctx, + const char **instructions, size_t *data_pos) +{ + unsigned int instruction; + const char *insns_end = ctx->instructions.buf + ctx->instructions.len; + size_t nbytes; + assert(ctx); + assert(instructions && *instructions); + assert(data_pos); + + instruction = (unsigned char) **instructions; + if (parse_first_operand(instructions, &nbytes, insns_end)) + return -1; + if ((instruction & INSN_MASK) != INSN_COPYFROM_DATA) + return error("Unknown instruction %x", instruction); + return copyfrom_data(ctx, data_pos, nbytes); +} + +static int apply_window_in_core(struct window *ctx) +{ + const char *instructions; + size_t data_pos = 0; + + /* + * Fill ctx->out.buf using data from the source, target, + * and inline data views. + */ + for (instructions = ctx->instructions.buf; + instructions != ctx->instructions.buf + ctx->instructions.len; + ) + if (execute_one_instruction(ctx, &instructions, &data_pos)) + return -1; + return 0; +} + +static int apply_one_window(struct line_buffer *delta, off_t *delta_len, + FILE *out) { struct window ctx = WINDOW_INIT; size_t out_len; @@ -127,13 +224,17 @@ static int apply_one_window(struct line_buffer *delta, off_t *delta_len) if (read_length(delta, &out_len, delta_len) || read_length(delta, &instructions_len, delta_len) || read_length(delta, &data_len, delta_len) || - read_chunk(delta, delta_len, &ctx.instructions, instructions_len)) + read_chunk(delta, delta_len, &ctx.instructions, instructions_len) || + read_chunk(delta, delta_len, &ctx.data, data_len)) + goto error_out; + strbuf_grow(&ctx.out, out_len); + if (apply_window_in_core(&ctx)) goto error_out; - if (instructions_len) { - error("What do you think I am? A delta applier?"); + if (ctx.out.len != out_len) { + error("invalid delta: incorrect postimage length"); goto error_out; } - if (read_chunk(delta, delta_len, &ctx.data, data_len)) + if (write_strbuf(&ctx.out, out)) goto error_out; window_release(&ctx); return 0; @@ -156,7 +257,7 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len, if (read_offset(delta, &pre_off, &delta_len) || read_length(delta, &pre_len, &delta_len) || move_window(preimage, pre_off, pre_len) || - apply_one_window(delta, &delta_len)) + apply_one_window(delta, &delta_len, postimage)) return -1; } return 0; -- cgit v1.2.3 From 4c9b93ed7644a7a7c72bdd8105d88a9ebb8e3e74 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:48:07 -0500 Subject: vcs-svn: verify that deltas consume all inline data By constraining the format of deltas, we can more easily detect corruption and other breakage. Requiring deltas not to provide unconsumed data also opens the possibility of ignoring the declared amount of novel data and simply streaming the data as needed to fulfill copyfrom_data requests. Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- vcs-svn/svndiff.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index ed1d4a08be..fb7dc22f92 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -208,6 +208,8 @@ static int apply_window_in_core(struct window *ctx) ) if (execute_one_instruction(ctx, &instructions, &data_pos)) return -1; + if (data_pos != ctx->data.len) + return error("invalid delta: does not copy all inline data"); return 0; } -- cgit v1.2.3 From d3f131b57ec0e69a37bca882fa6bf39aa4c1c387 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:50:07 -0500 Subject: vcs-svn: let deltas use data from postimage The copyfrom_target instruction copies appends data that is already present in the current output view to the end of output. (The offset argument is relative to the beginning of output produced in the current window.) The region copied is allowed to run past the end of the existing output. To support that case, copy one character at a time rather than calling memcpy or memmove. This allows copyfrom_target to be used once to repeat a string many times. For example: COPYFROM_DATA 2 COPYFROM_OUTPUT 10, 0 DATA "ab" would produce the output "ababababababababababab". Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- vcs-svn/svndiff.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index fb7dc22f92..a02eee0410 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -21,7 +21,12 @@ * | packed_view_selector int * | packed_copyfrom_data * ; + * view_selector ::= copyfrom_source + * | copyfrom_target + * ; + * copyfrom_target ::= # binary 01 000000; * copyfrom_data ::= # binary 10 000000; + * packed_view_selector ::= # view_selector OR-ed with 6 bit value; * packed_copyfrom_data ::= # copyfrom_data OR-ed with 6 bit value; * int ::= highdigit* lowdigit; * highdigit ::= # binary 1000 0000 OR-ed with 7 bit value; @@ -29,6 +34,7 @@ */ #define INSN_MASK 0xc0 +#define INSN_COPYFROM_TARGET 0x40 #define INSN_COPYFROM_DATA 0x80 #define OPERAND_MASK 0x3f @@ -155,6 +161,19 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len) return 0; } +static int copyfrom_target(struct window *ctx, const char **instructions, + size_t nbytes, const char *instructions_end) +{ + size_t offset; + if (parse_int(instructions, &offset, instructions_end)) + return -1; + if (offset >= ctx->out.len) + return error("invalid delta: copies from the future"); + for (; nbytes > 0; nbytes--) + strbuf_addch(&ctx->out, ctx->out.buf[offset++]); + return 0; +} + static int copyfrom_data(struct window *ctx, size_t *data_pos, size_t nbytes) { const size_t pos = *data_pos; @@ -189,9 +208,14 @@ static int execute_one_instruction(struct window *ctx, instruction = (unsigned char) **instructions; if (parse_first_operand(instructions, &nbytes, insns_end)) return -1; - if ((instruction & INSN_MASK) != INSN_COPYFROM_DATA) + switch (instruction & INSN_MASK) { + case INSN_COPYFROM_TARGET: + return copyfrom_target(ctx, instructions, nbytes, insns_end); + case INSN_COPYFROM_DATA: + return copyfrom_data(ctx, data_pos, nbytes); + default: return error("Unknown instruction %x", instruction); - return copyfrom_data(ctx, data_pos, nbytes); + } } static int apply_window_in_core(struct window *ctx) -- cgit v1.2.3 From c846e4107876936bed7177a811559bd74a72dcd8 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:58:30 -0500 Subject: vcs-svn: let deltas use data from preimage The copyfrom_source instruction appends data from the preimage buffer to the end of output. Its arguments are a length and an offset relative to the beginning of the source view. With this change, the delta applier is able to reproduce all 5,636,613 blobs in the early history of the ASF repository. Tested with mkfifo backflow svn-fe backflow with svn-asf-public-r0:940166 produced by whatever version of Subversion the dumps in /dump/ on svn.apache.org use (presumably 1.6.something). Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- vcs-svn/svndiff.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index a02eee0410..9ee41bbc90 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -24,6 +24,7 @@ * view_selector ::= copyfrom_source * | copyfrom_target * ; + * copyfrom_source ::= # binary 00 000000; * copyfrom_target ::= # binary 01 000000; * copyfrom_data ::= # binary 10 000000; * packed_view_selector ::= # view_selector OR-ed with 6 bit value; @@ -34,6 +35,7 @@ */ #define INSN_MASK 0xc0 +#define INSN_COPYFROM_SOURCE 0x00 #define INSN_COPYFROM_TARGET 0x40 #define INSN_COPYFROM_DATA 0x80 #define OPERAND_MASK 0x3f @@ -43,12 +45,13 @@ #define VLI_BITS_PER_DIGIT 7 struct window { + struct sliding_view *in; struct strbuf out; struct strbuf instructions; struct strbuf data; }; -#define WINDOW_INIT { STRBUF_INIT, STRBUF_INIT, STRBUF_INIT } +#define WINDOW_INIT(w) { (w), STRBUF_INIT, STRBUF_INIT, STRBUF_INIT } static void window_release(struct window *ctx) { @@ -161,6 +164,19 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len) return 0; } +static int copyfrom_source(struct window *ctx, const char **instructions, + size_t nbytes, const char *insns_end) +{ + size_t offset; + if (parse_int(instructions, &offset, insns_end)) + return -1; + if (unsigned_add_overflows(offset, nbytes) || + offset + nbytes > ctx->in->width) + return error("invalid delta: copies source data outside view"); + strbuf_add(&ctx->out, ctx->in->buf.buf + offset, nbytes); + return 0; +} + static int copyfrom_target(struct window *ctx, const char **instructions, size_t nbytes, const char *instructions_end) { @@ -209,12 +225,14 @@ static int execute_one_instruction(struct window *ctx, if (parse_first_operand(instructions, &nbytes, insns_end)) return -1; switch (instruction & INSN_MASK) { + case INSN_COPYFROM_SOURCE: + return copyfrom_source(ctx, instructions, nbytes, insns_end); case INSN_COPYFROM_TARGET: return copyfrom_target(ctx, instructions, nbytes, insns_end); case INSN_COPYFROM_DATA: return copyfrom_data(ctx, data_pos, nbytes); default: - return error("Unknown instruction %x", instruction); + return error("invalid delta: unrecognized instruction"); } } @@ -238,9 +256,9 @@ static int apply_window_in_core(struct window *ctx) } static int apply_one_window(struct line_buffer *delta, off_t *delta_len, - FILE *out) + struct sliding_view *preimage, FILE *out) { - struct window ctx = WINDOW_INIT; + struct window ctx = WINDOW_INIT(preimage); size_t out_len; size_t instructions_len; size_t data_len; @@ -283,7 +301,7 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len, if (read_offset(delta, &pre_off, &delta_len) || read_length(delta, &pre_len, &delta_len) || move_window(preimage, pre_off, pre_len) || - apply_one_window(delta, &delta_len, postimage)) + apply_one_window(delta, &delta_len, preimage, postimage)) return -1; } return 0; -- cgit v1.2.3 From 7a75e661c5cef9fcd7c84fe0fb22672a57d6373e Mon Sep 17 00:00:00 2001 From: David Barr Date: Sat, 19 Mar 2011 18:20:54 +1100 Subject: vcs-svn: implement text-delta handling Handle input in Subversion's dumpfile format, version 3. This is the format produced by "svnrdump dump" and "svnadmin dump --deltas", and the main difference between v3 dumpfiles and the dumpfiles already handled is that these can include nodes whose properties and text are expressed relative to some other node. To handle such nodes, we find which node the text and properties are based on, handle its property changes, use the cat-blob command to request the basis blob from the fast-import backend, use the svndiff0_apply() helper to apply the text delta on the fly, writing output to a temporary file, and then measure that postimage file's length and write its content to the fast-import stream. The temporary postimage file is shared between delta-using nodes to avoid some file system overhead. The svn-fe interface needs to be more complicated to accomodate the backward flow of information from the fast-import backend to svn-fe. The backflow fd is not needed when parsing streams without deltas, though, so existing scripts using svn-fe on v2 dumps should continue to work. NEEDSWORK: generalize interface so caller sets the backflow fd, close temporary file before exiting Signed-off-by: David Barr Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++- vcs-svn/fast_export.h | 3 ++ vcs-svn/svndump.c | 13 ++++-- 3 files changed, 120 insertions(+), 5 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 33e853d9cd..005674d8c1 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -7,15 +7,38 @@ #include "strbuf.h" #include "quote.h" #include "fast_export.h" -#include "line_buffer.h" #include "repo_tree.h" #include "strbuf.h" +#include "svndiff.h" +#include "sliding_window.h" +#include "line_buffer.h" #define MAX_GITSVN_LINE_LEN 4096 +#define REPORT_FILENO 3 static uint32_t first_commit_done; +static struct line_buffer postimage = LINE_BUFFER_INIT; static struct line_buffer report_buffer = LINE_BUFFER_INIT; +/* NEEDSWORK: move to fast_export_init() */ +static int init_postimage(void) +{ + static int postimage_initialized; + if (postimage_initialized) + return 0; + postimage_initialized = 1; + return buffer_tmpfile_init(&postimage); +} + +static int init_report_buffer(int fd) +{ + static int report_buffer_initialized; + if (report_buffer_initialized) + return 0; + report_buffer_initialized = 1; + return buffer_fdinit(&report_buffer, fd); +} + void fast_export_init(int fd) { if (buffer_fdinit(&report_buffer, fd)) @@ -132,6 +155,73 @@ static void die_short_read(struct line_buffer *input) die("invalid dump: unexpected end of file"); } +static int ends_with(const char *s, size_t len, const char *suffix) +{ + const size_t suffixlen = strlen(suffix); + if (len < suffixlen) + return 0; + return !memcmp(s + len - suffixlen, suffix, suffixlen); +} + +static int parse_cat_response_line(const char *header, off_t *len) +{ + size_t headerlen = strlen(header); + const char *type; + const char *end; + + if (ends_with(header, headerlen, " missing")) + return error("cat-blob reports missing blob: %s", header); + type = memmem(header, headerlen, " blob ", strlen(" blob ")); + if (!type) + return error("cat-blob header has wrong object type: %s", header); + *len = strtoumax(type + strlen(" blob "), (char **) &end, 10); + if (end == type + strlen(" blob ")) + return error("cat-blob header does not contain length: %s", header); + if (*end) + return error("cat-blob header contains garbage after length: %s", header); + return 0; +} + +static long apply_delta(off_t len, struct line_buffer *input, + const char *old_data, uint32_t old_mode) +{ + long ret; + off_t preimage_len = 0; + struct sliding_view preimage = SLIDING_VIEW_INIT(&report_buffer); + FILE *out; + + if (init_postimage() || !(out = buffer_tmpfile_rewind(&postimage))) + die("cannot open temporary file for blob retrieval"); + if (init_report_buffer(REPORT_FILENO)) + die("cannot open fd 3 for feedback from fast-import"); + if (old_data) { + const char *response; + printf("cat-blob %s\n", old_data); + fflush(stdout); + response = get_response_line(); + if (parse_cat_response_line(response, &preimage_len)) + die("invalid cat-blob response: %s", response); + } + if (old_mode == REPO_MODE_LNK) { + strbuf_addstr(&preimage.buf, "link "); + preimage_len += strlen("link "); + } + if (svndiff0_apply(input, len, &preimage, out)) + die("cannot apply delta"); + if (old_data) { + /* Read the remainder of preimage and trailing newline. */ + if (move_window(&preimage, preimage_len, 1)) + die("cannot seek to end of input"); + if (preimage.buf.buf[0] != '\n') + die("missing newline after cat-blob response"); + } + ret = buffer_tmpfile_prepare_to_read(&postimage); + if (ret < 0) + die("cannot read temporary file for blob retrieval"); + strbuf_release(&preimage.buf); + return ret; +} + void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input) { if (mode == REPO_MODE_LNK) { @@ -199,3 +289,20 @@ int fast_export_ls(const char *path, uint32_t *mode, struct strbuf *dataref) ls_from_active_commit(path); return parse_ls_response(get_response_line(), mode, dataref); } + +void fast_export_blob_delta(uint32_t mode, + uint32_t old_mode, const char *old_data, + uint32_t len, struct line_buffer *input) +{ + long postimage_len; + if (len > maximum_signed_value_of_type(off_t)) + die("enormous delta"); + postimage_len = apply_delta((off_t) len, input, old_data, old_mode); + if (mode == REPO_MODE_LNK) { + buffer_skip_bytes(&postimage, strlen("link ")); + postimage_len -= strlen("link "); + } + printf("data %ld\n", postimage_len); + buffer_copy_bytes(&postimage, postimage_len); + fputc('\n', stdout); +} diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index 2d392e370d..43d05b65ef 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -15,6 +15,9 @@ void fast_export_begin_commit(uint32_t revision, const char *author, const char *url, unsigned long timestamp); void fast_export_end_commit(uint32_t revision); void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input); +void fast_export_blob_delta(uint32_t mode, + uint32_t old_mode, const char *old_data, + uint32_t len, struct line_buffer *input); /* If there is no such file at that rev, returns -1, errno == ENOENT. */ int fast_export_ls_rev(uint32_t rev, const char *path, diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 11c59f18bf..b1f4161068 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -217,9 +217,7 @@ static void handle_node(void) */ static const char *const empty_blob = "::empty::"; const char *old_data = NULL; - - if (node_ctx.text_delta) - die("text deltas not supported"); + uint32_t old_mode = REPO_MODE_BLB; if (node_ctx.action == NODEACT_DELETE) { if (have_text || have_props || node_ctx.srcRev) @@ -255,6 +253,7 @@ static void handle_node(void) if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR) die("invalid dump: cannot modify a file into a directory"); node_ctx.type = mode; + old_mode = mode; } else if (node_ctx.action == NODEACT_ADD) { if (type == REPO_MODE_DIR) old_data = NULL; @@ -289,8 +288,14 @@ static void handle_node(void) fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data); return; } + if (!node_ctx.text_delta) { + fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline"); + fast_export_data(node_ctx.type, node_ctx.textLength, &input); + return; + } fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline"); - fast_export_data(node_ctx.type, node_ctx.textLength, &input); + fast_export_blob_delta(node_ctx.type, old_mode, old_data, + node_ctx.textLength, &input); } static void begin_revision(void) -- cgit v1.2.3 From fbdd4f6fb477885e4bf81658e02c3542a861c695 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 27 May 2011 04:07:44 -0500 Subject: vcs-svn: cap number of bytes read from sliding view Introduce a "max_off" field in struct sliding_view, roughly representing a maximum number of bytes that can be read from "file". If it is set to a nonnegative integer, a call to move_window() attempting to put the right endpoint beyond that offset will return an error instead. The idea is to use this when applying Subversion-format deltas to prevent reads past the end of the preimage (which has known length). Without such a check, corrupt deltas would cause svn-fe to block indefinitely when data in the input pipe is exhausted. Inspired-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder --- vcs-svn/sliding_window.c | 2 ++ vcs-svn/sliding_window.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/sliding_window.c b/vcs-svn/sliding_window.c index 1b8d9875ed..1bac7a4c7f 100644 --- a/vcs-svn/sliding_window.c +++ b/vcs-svn/sliding_window.c @@ -54,6 +54,8 @@ int move_window(struct sliding_view *view, off_t off, size_t width) return -1; if (off < view->off || off + width < view->off + view->width) return error("invalid delta: window slides left"); + if (view->max_off >= 0 && view->max_off < off + width) + return error("delta preimage ends early"); file_offset = view->off + view->buf.len; if (off < file_offset) { diff --git a/vcs-svn/sliding_window.h b/vcs-svn/sliding_window.h index ed0bfdd65c..b43a825cba 100644 --- a/vcs-svn/sliding_window.h +++ b/vcs-svn/sliding_window.h @@ -7,10 +7,11 @@ struct sliding_view { struct line_buffer *file; off_t off; size_t width; + off_t max_off; /* -1 means unlimited */ struct strbuf buf; }; -#define SLIDING_VIEW_INIT(input) { (input), 0, 0, STRBUF_INIT } +#define SLIDING_VIEW_INIT(input, len) { (input), 0, 0, (len), STRBUF_INIT } extern int move_window(struct sliding_view *view, off_t off, size_t width); -- cgit v1.2.3 From abe27c0cbd97bf6a693004ddb411392ed596a853 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 27 May 2011 05:18:33 -0500 Subject: vcs-svn: guard against overflow when computing preimage length Signed integer overflow produces undefined behavior in C and off_t is a signed type. For predictable behavior, add some checks to protect in advance against overflow. On 32-bit systems ftell as called by buffer_tmpfile_prepare_to_read is likely to fail with EOVERFLOW when reading the corresponding postimage, and this patch does not fix that. So it's more of a futureproofing measure than a complete fix. Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index edc658d4fe..96a75d51d1 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -166,6 +166,7 @@ static int ends_with(const char *s, size_t len, const char *suffix) static int parse_cat_response_line(const char *header, off_t *len) { size_t headerlen = strlen(header); + uintmax_t n; const char *type; const char *end; @@ -174,14 +175,25 @@ static int parse_cat_response_line(const char *header, off_t *len) type = memmem(header, headerlen, " blob ", strlen(" blob ")); if (!type) return error("cat-blob header has wrong object type: %s", header); - *len = strtoumax(type + strlen(" blob "), (char **) &end, 10); + n = strtoumax(type + strlen(" blob "), (char **) &end, 10); if (end == type + strlen(" blob ")) return error("cat-blob header does not contain length: %s", header); + if (memchr(type + strlen(" blob "), '-', end - type - strlen(" blob "))) + return error("cat-blob header contains negative length: %s", header); + if (n == UINTMAX_MAX || n > maximum_signed_value_of_type(off_t)) + return error("blob too large for current definition of off_t"); + *len = n; if (*end) return error("cat-blob header contains garbage after length: %s", header); return 0; } +static void check_preimage_overflow(off_t a, off_t b) +{ + if (signed_add_overflows(a, b)) + die("blob too large for current definition of off_t"); +} + static long apply_delta(off_t len, struct line_buffer *input, const char *old_data, uint32_t old_mode) { @@ -204,6 +216,7 @@ static long apply_delta(off_t len, struct line_buffer *input, } if (old_mode == REPO_MODE_LNK) { strbuf_addstr(&preimage.buf, "link "); + check_preimage_overflow(preimage_len, strlen("link ")); preimage_len += strlen("link "); } if (svndiff0_apply(input, len, &preimage, out)) -- cgit v1.2.3 From 3ac10b2e3fd6d858621f796160d251ad34affc20 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 27 May 2011 05:44:27 -0500 Subject: vcs-svn: avoid hangs from corrupt deltas A corrupt Subversion-format delta can request reads past the end of the preimage. Set sliding_view::max_off so such corruption is caught when it appears rather than blocking in an impossible-to-fulfill read() when input is coming from a socket or pipe. Inspired-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 96a75d51d1..97f5fdf489 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -198,8 +198,7 @@ static long apply_delta(off_t len, struct line_buffer *input, const char *old_data, uint32_t old_mode) { long ret; - off_t preimage_len = 0; - struct sliding_view preimage = SLIDING_VIEW_INIT(&report_buffer, -1); + struct sliding_view preimage = SLIDING_VIEW_INIT(&report_buffer, 0); FILE *out; if (init_postimage() || !(out = buffer_tmpfile_rewind(&postimage))) @@ -211,19 +210,23 @@ static long apply_delta(off_t len, struct line_buffer *input, printf("cat-blob %s\n", old_data); fflush(stdout); response = get_response_line(); - if (parse_cat_response_line(response, &preimage_len)) + if (parse_cat_response_line(response, &preimage.max_off)) die("invalid cat-blob response: %s", response); + check_preimage_overflow(preimage.max_off, 1); } if (old_mode == REPO_MODE_LNK) { strbuf_addstr(&preimage.buf, "link "); - check_preimage_overflow(preimage_len, strlen("link ")); - preimage_len += strlen("link "); + check_preimage_overflow(preimage.max_off, strlen("link ")); + preimage.max_off += strlen("link "); + check_preimage_overflow(preimage.max_off, 1); } if (svndiff0_apply(input, len, &preimage, out)) die("cannot apply delta"); if (old_data) { /* Read the remainder of preimage and trailing newline. */ - if (move_window(&preimage, preimage_len, 1)) + assert(!signed_add_overflows(preimage.max_off, 1)); + preimage.max_off++; /* room for newline */ + if (move_window(&preimage, preimage.max_off - 1, 1)) die("cannot seek to end of input"); if (preimage.buf.buf[0] != '\n') die("missing newline after cat-blob response"); -- cgit v1.2.3 From c5f1fbe7bc6b29f3343a168461ee70816ddebec2 Mon Sep 17 00:00:00 2001 From: Dmitry Ivankov Date: Mon, 20 Jun 2011 14:22:47 +0600 Subject: vcs-svn: do not initialize report_buffer twice When importing from a dump with deltas, first fast_export_init calls buffer_fdinit, and then init_report_buffer calls fdopen once again when processing the first delta. The second initialization is redundant and leaks a FILE *. Remove the redundant on-demand initialization to fix this. Initializing directly in fast_export_init is simpler and lets the caller pass an int specifying which fd to use instead of hard-coding REPORT_FILENO. Signed-off-by: Dmitry Ivankov Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 97f5fdf489..3efde20a0c 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -14,7 +14,6 @@ #include "line_buffer.h" #define MAX_GITSVN_LINE_LEN 4096 -#define REPORT_FILENO 3 static uint32_t first_commit_done; static struct line_buffer postimage = LINE_BUFFER_INIT; @@ -30,15 +29,6 @@ static int init_postimage(void) return buffer_tmpfile_init(&postimage); } -static int init_report_buffer(int fd) -{ - static int report_buffer_initialized; - if (report_buffer_initialized) - return 0; - report_buffer_initialized = 1; - return buffer_fdinit(&report_buffer, fd); -} - void fast_export_init(int fd) { if (buffer_fdinit(&report_buffer, fd)) @@ -203,8 +193,6 @@ static long apply_delta(off_t len, struct line_buffer *input, if (init_postimage() || !(out = buffer_tmpfile_rewind(&postimage))) die("cannot open temporary file for blob retrieval"); - if (init_report_buffer(REPORT_FILENO)) - die("cannot open fd 3 for feedback from fast-import"); if (old_data) { const char *response; printf("cat-blob %s\n", old_data); -- cgit v1.2.3 From c5bcbcdcfa1e2a1977497cb3a342c0365c8d78d6 Mon Sep 17 00:00:00 2001 From: Dmitry Ivankov Date: Thu, 23 Jun 2011 17:33:58 +0600 Subject: vcs-svn: reset first_commit_done in fast_export_init first_commit_done has zero as a default value, but it is not reset back to zero in fast_export_init. Reset it back to zero so that each export will have proper initial state. Signed-off-by: Dmitry Ivankov Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 1 + 1 file changed, 1 insertion(+) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 3efde20a0c..19d7c34c25 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -31,6 +31,7 @@ static int init_postimage(void) void fast_export_init(int fd) { + first_commit_done = 0; if (buffer_fdinit(&report_buffer, fd)) die_errno("cannot read from file descriptor %d", fd); } -- cgit v1.2.3