From abfef3bbf5f637c86032763632393ce1ffd23ccc Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 16 Apr 2014 23:54:05 -0700 Subject: git-svn: only look at the new parts of svn:mergeinfo In a Subversion repository where many feature branches are merged into a trunk, the svn:mergeinfo property can grow very large. This severely slows down git-svn's make_log_entry() because it is checking all mergeinfo entries every time the property changes. In most cases, the additions to svn:mergeinfo since the last commit are pretty small, and there is nothing to gain by checking merges that were already checked for the last commit in the branch. Add a mergeinfo_changes() function which computes the set of interesting changes to svn:mergeinfo since the last commit. Filter out merged branches whose ranges haven't changed, and remove a common prefix of ranges from other merged branches. This speeds up "git svn fetch" by several orders of magnitude on a large repository where thousands of feature branches have been merged. Signed-off-by: Jakob Stoklund Olesen Signed-off-by: Eric Wong --- perl/Git/SVN.pm | 84 ++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 72 insertions(+), 12 deletions(-) diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index 09cff135ef..abd51eadd2 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -1178,7 +1178,7 @@ sub find_parent_branch { or die "SVN connection failed somewhere...\n"; } print STDERR "Successfully followed parent\n" unless $::_q > 1; - return $self->make_log_entry($rev, [$parent], $ed); + return $self->make_log_entry($rev, [$parent], $ed, $r0, $branch_from); } return undef; } @@ -1210,7 +1210,7 @@ sub do_fetch { unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) { die "SVN connection failed somewhere...\n"; } - $self->make_log_entry($rev, \@parents, $ed); + $self->make_log_entry($rev, \@parents, $ed, $last_rev); } sub mkemptydirs { @@ -1478,9 +1478,9 @@ sub find_extra_svk_parents { sub lookup_svn_merge { my $uuid = shift; my $url = shift; - my $merge = shift; + my $source = shift; + my $revs = shift; - my ($source, $revs) = split ":", $merge; my $path = $source; $path =~ s{^/}{}; my $gs = Git::SVN->find_by_url($url.$source, $url, $path); @@ -1702,6 +1702,62 @@ sub parents_exclude { return @excluded; } +# Compute what's new in svn:mergeinfo. +sub mergeinfo_changes { + my ($self, $old_path, $old_rev, $path, $rev, $mergeinfo_prop) = @_; + my %minfo = map {split ":", $_ } split "\n", $mergeinfo_prop; + my $old_minfo = {}; + + # Initialize cache on the first call. + unless (defined $self->{cached_mergeinfo_rev}) { + $self->{cached_mergeinfo_rev} = {}; + $self->{cached_mergeinfo} = {}; + } + + my $cached_rev = $self->{cached_mergeinfo_rev}{$old_path}; + if (defined $cached_rev && $cached_rev == $old_rev) { + $old_minfo = $self->{cached_mergeinfo}{$old_path}; + } else { + my $ra = $self->ra; + # Give up if $old_path isn't in the repo. + # This is probably a merge on a subtree. + if ($ra->check_path($old_path, $old_rev) != $SVN::Node::dir) { + warn "W: ignoring svn:mergeinfo on $old_path, ", + "directory didn't exist in r$old_rev\n"; + return {}; + } + my (undef, undef, $props) = + $self->ra->get_dir($old_path, $old_rev); + if (defined $props->{"svn:mergeinfo"}) { + my %omi = map {split ":", $_ } split "\n", + $props->{"svn:mergeinfo"}; + $old_minfo = \%omi; + } + $self->{cached_mergeinfo}{$old_path} = $old_minfo; + $self->{cached_mergeinfo_rev}{$old_path} = $old_rev; + } + + # Cache the new mergeinfo. + $self->{cached_mergeinfo}{$path} = \%minfo; + $self->{cached_mergeinfo_rev}{$path} = $rev; + + my %changes = (); + foreach my $p (keys %minfo) { + my $a = $old_minfo->{$p} || ""; + my $b = $minfo{$p}; + # Omit merged branches whose ranges lists are unchanged. + next if $a eq $b; + # Remove any common range list prefix. + ($a ^ $b) =~ /^[\0]*/; + my $common_prefix = rindex $b, ",", $+[0] - 1; + $changes{$p} = substr $b, $common_prefix + 1; + } + print STDERR "Checking svn:mergeinfo changes since r$old_rev: ", + scalar(keys %minfo), " sources, ", + scalar(keys %changes), " changed\n"; + + return \%changes; +} # note: this function should only be called if the various dirprops # have actually changed @@ -1715,14 +1771,15 @@ sub find_extra_svn_parents { # history. Then, we figure out which git revisions are in # that tip, but not this revision. If all of those revisions # are now marked as merge, we can add the tip as a parent. - my @merges = split "\n", $mergeinfo; + my @merges = sort keys %$mergeinfo; my @merge_tips; my $url = $self->url; my $uuid = $self->ra_uuid; my @all_ranges; for my $merge ( @merges ) { my ($tip_commit, @ranges) = - lookup_svn_merge( $uuid, $url, $merge ); + lookup_svn_merge( $uuid, $url, + $merge, $mergeinfo->{$merge} ); unless (!$tip_commit or grep { $_ eq $tip_commit } @$parents ) { push @merge_tips, $tip_commit; @@ -1738,8 +1795,9 @@ sub find_extra_svn_parents { # check merge tips for new parents my @new_parents; for my $merge_tip ( @merge_tips ) { - my $spec = shift @merges; + my $merge = shift @merges; next unless $merge_tip and $excluded{$merge_tip}; + my $spec = "$merge:$mergeinfo->{$merge}"; # check out 'new' tips my $merge_base; @@ -1770,7 +1828,7 @@ sub find_extra_svn_parents { .@incomplete." commit(s) (eg $incomplete[0])\n"; } else { warn - "Found merge parent (svn:mergeinfo prop): ", + "Found merge parent ($spec): ", $merge_tip, "\n"; push @new_parents, $merge_tip; } @@ -1797,7 +1855,7 @@ sub find_extra_svn_parents { } sub make_log_entry { - my ($self, $rev, $parents, $ed) = @_; + my ($self, $rev, $parents, $ed, $parent_rev, $parent_path) = @_; my $untracked = $self->get_untracked($ed); my @parents = @$parents; @@ -1809,10 +1867,12 @@ sub make_log_entry { ($ed, $props->{"svk:merge"}, \@parents); } if ( $props->{"svn:mergeinfo"} ) { + my $mi_changes = $self->mergeinfo_changes + ($parent_path || $path, $parent_rev, + $path, $rev, + $props->{"svn:mergeinfo"}); $self->find_extra_svn_parents - ($ed, - $props->{"svn:mergeinfo"}, - \@parents); + ($ed, $mi_changes, \@parents); } } -- cgit v1.2.3 From 9ee13a934e21380ff6082d98a86cab5e890418dc Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 16 Apr 2014 23:54:06 -0700 Subject: git-svn: only look at the root path for svn:mergeinfo Subversion can put mergeinfo on any sub-directory to track cherry-picks. Since cherry-picks are not represented explicitly in git, git-svn should just ignore it. Signed-off-by: Jakob Stoklund Olesen Signed-off-by: Eric Wong --- perl/Git/SVN.pm | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index abd51eadd2..b1a84d03cb 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -1210,7 +1210,7 @@ sub do_fetch { unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) { die "SVN connection failed somewhere...\n"; } - $self->make_log_entry($rev, \@parents, $ed, $last_rev); + $self->make_log_entry($rev, \@parents, $ed, $last_rev, $self->path); } sub mkemptydirs { @@ -1859,21 +1859,18 @@ sub make_log_entry { my $untracked = $self->get_untracked($ed); my @parents = @$parents; - my $ps = $ed->{path_strip} || ""; - for my $path ( grep { m/$ps/ } %{$ed->{dir_prop}} ) { - my $props = $ed->{dir_prop}{$path}; - if ( $props->{"svk:merge"} ) { - $self->find_extra_svk_parents - ($ed, $props->{"svk:merge"}, \@parents); - } - if ( $props->{"svn:mergeinfo"} ) { - my $mi_changes = $self->mergeinfo_changes - ($parent_path || $path, $parent_rev, - $path, $rev, - $props->{"svn:mergeinfo"}); - $self->find_extra_svn_parents - ($ed, $mi_changes, \@parents); - } + my $props = $ed->{dir_prop}{$self->path}; + if ( $props->{"svk:merge"} ) { + $self->find_extra_svk_parents + ($ed, $props->{"svk:merge"}, \@parents); + } + if ( $props->{"svn:mergeinfo"} ) { + my $mi_changes = $self->mergeinfo_changes + ($parent_path, $parent_rev, + $self->path, $rev, + $props->{"svn:mergeinfo"}); + $self->find_extra_svn_parents + ($ed, $mi_changes, \@parents); } open my $un, '>>', "$self->{dir}/unhandled.log" or croak $!; -- cgit v1.2.3 From d0b34f241dc59fe2352cb1a724d0c5e8f0d2ff82 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 19 Oct 2014 04:08:31 +0000 Subject: git-svn: reduce check_cherry_pick cache overhead We do not need to store entire lists of commits, only the number of incomplete and the first commit for reference. This reduces the amount of data we need to store in memory and on disk stores. Signed-off-by: Eric Wong --- perl/Git/SVN.pm | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index b1a84d03cb..171af37f4a 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -1537,7 +1537,7 @@ sub _rev_list { @rv; } -sub check_cherry_pick { +sub check_cherry_pick2 { my $base = shift; my $tip = shift; my $parents = shift; @@ -1552,7 +1552,8 @@ sub check_cherry_pick { delete $commits{$commit}; } } - return (keys %commits); + my @k = (keys %commits); + return (scalar @k, $k[0]); } sub has_no_changes { @@ -1597,7 +1598,7 @@ sub tie_for_persistent_memoization { mkpath([$cache_path]) unless -d $cache_path; my %lookup_svn_merge_cache; - my %check_cherry_pick_cache; + my %check_cherry_pick2_cache; my %has_no_changes_cache; my %_rev_list_cache; @@ -1608,11 +1609,11 @@ sub tie_for_persistent_memoization { LIST_CACHE => ['HASH' => \%lookup_svn_merge_cache], ; - tie_for_persistent_memoization(\%check_cherry_pick_cache, - "$cache_path/check_cherry_pick"); - memoize 'check_cherry_pick', + tie_for_persistent_memoization(\%check_cherry_pick2_cache, + "$cache_path/check_cherry_pick2"); + memoize 'check_cherry_pick2', SCALAR_CACHE => 'FAULT', - LIST_CACHE => ['HASH' => \%check_cherry_pick_cache], + LIST_CACHE => ['HASH' => \%check_cherry_pick2_cache], ; tie_for_persistent_memoization(\%has_no_changes_cache, @@ -1636,7 +1637,7 @@ sub tie_for_persistent_memoization { $memoized = 0; Memoize::unmemoize 'lookup_svn_merge'; - Memoize::unmemoize 'check_cherry_pick'; + Memoize::unmemoize 'check_cherry_pick2'; Memoize::unmemoize 'has_no_changes'; Memoize::unmemoize '_rev_list'; } @@ -1648,7 +1649,8 @@ sub tie_for_persistent_memoization { return unless -d $cache_path; for my $cache_file (("$cache_path/lookup_svn_merge", - "$cache_path/check_cherry_pick", + "$cache_path/check_cherry_pick", # old + "$cache_path/check_cherry_pick2", "$cache_path/has_no_changes")) { for my $suffix (qw(yaml db)) { my $file = "$cache_file.$suffix"; @@ -1817,15 +1819,15 @@ sub find_extra_svn_parents { } # double check that there are no missing non-merge commits - my (@incomplete) = check_cherry_pick( + my ($ninc, $ifirst) = check_cherry_pick2( $merge_base, $merge_tip, $parents, @all_ranges, ); - if ( @incomplete ) { - warn "W:svn cherry-pick ignored ($spec) - missing " - .@incomplete." commit(s) (eg $incomplete[0])\n"; + if ($ninc) { + warn "W:svn cherry-pick ignored ($spec) - missing " . + "$ninc commit(s) (eg $ifirst)\n"; } else { warn "Found merge parent ($spec): ", -- cgit v1.2.3 From 54b95346c1322bb122e12aba0f03652f241a918b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 20 Oct 2014 01:02:53 +0000 Subject: git-svn: cache only mergeinfo revisions This should reduce excessive memory usage from the new mergeinfo caches without hurting performance too much, assuming reasonable latency to the SVN server. Cc: Hin-Tak Leung Suggested-by: Jakob Stoklund Olesen Signed-off-by: Eric Wong --- perl/Git/SVN.pm | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index 171af37f4a..f8a75b190c 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -1713,13 +1713,10 @@ sub mergeinfo_changes { # Initialize cache on the first call. unless (defined $self->{cached_mergeinfo_rev}) { $self->{cached_mergeinfo_rev} = {}; - $self->{cached_mergeinfo} = {}; } my $cached_rev = $self->{cached_mergeinfo_rev}{$old_path}; - if (defined $cached_rev && $cached_rev == $old_rev) { - $old_minfo = $self->{cached_mergeinfo}{$old_path}; - } else { + unless (defined $cached_rev && $cached_rev == $old_rev) { my $ra = $self->ra; # Give up if $old_path isn't in the repo. # This is probably a merge on a subtree. @@ -1728,19 +1725,16 @@ sub mergeinfo_changes { "directory didn't exist in r$old_rev\n"; return {}; } - my (undef, undef, $props) = - $self->ra->get_dir($old_path, $old_rev); - if (defined $props->{"svn:mergeinfo"}) { - my %omi = map {split ":", $_ } split "\n", - $props->{"svn:mergeinfo"}; - $old_minfo = \%omi; - } - $self->{cached_mergeinfo}{$old_path} = $old_minfo; - $self->{cached_mergeinfo_rev}{$old_path} = $old_rev; } + my (undef, undef, $props) = $self->ra->get_dir($old_path, $old_rev); + if (defined $props->{"svn:mergeinfo"}) { + my %omi = map {split ":", $_ } split "\n", + $props->{"svn:mergeinfo"}; + $old_minfo = \%omi; + } + $self->{cached_mergeinfo_rev}{$old_path} = $old_rev; # Cache the new mergeinfo. - $self->{cached_mergeinfo}{$path} = \%minfo; $self->{cached_mergeinfo_rev}{$path} = $rev; my %changes = (); -- cgit v1.2.3 From 2b6c613f1a873555475050bd8f5a22828f0d03a3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 21 Oct 2014 06:23:22 +0000 Subject: git-svn: remove mergeinfo rev caching This should further reduce memory usage from the new mergeinfo speedups without hurting performance too much, assuming reasonable latency to the SVN server. Cc: Hin-Tak Leung Suggested-by: Jakob Stoklund Olesen Signed-off-by: Eric Wong --- perl/Git/SVN.pm | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index f8a75b190c..4364506ca2 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -1710,32 +1710,20 @@ sub mergeinfo_changes { my %minfo = map {split ":", $_ } split "\n", $mergeinfo_prop; my $old_minfo = {}; - # Initialize cache on the first call. - unless (defined $self->{cached_mergeinfo_rev}) { - $self->{cached_mergeinfo_rev} = {}; - } - - my $cached_rev = $self->{cached_mergeinfo_rev}{$old_path}; - unless (defined $cached_rev && $cached_rev == $old_rev) { - my $ra = $self->ra; - # Give up if $old_path isn't in the repo. - # This is probably a merge on a subtree. - if ($ra->check_path($old_path, $old_rev) != $SVN::Node::dir) { - warn "W: ignoring svn:mergeinfo on $old_path, ", - "directory didn't exist in r$old_rev\n"; - return {}; - } - } - my (undef, undef, $props) = $self->ra->get_dir($old_path, $old_rev); + my $ra = $self->ra; + # Give up if $old_path isn't in the repo. + # This is probably a merge on a subtree. + if ($ra->check_path($old_path, $old_rev) != $SVN::Node::dir) { + warn "W: ignoring svn:mergeinfo on $old_path, ", + "directory didn't exist in r$old_rev\n"; + return {}; + } + my (undef, undef, $props) = $ra->get_dir($old_path, $old_rev); if (defined $props->{"svn:mergeinfo"}) { my %omi = map {split ":", $_ } split "\n", $props->{"svn:mergeinfo"}; $old_minfo = \%omi; } - $self->{cached_mergeinfo_rev}{$old_path} = $old_rev; - - # Cache the new mergeinfo. - $self->{cached_mergeinfo_rev}{$path} = $rev; my %changes = (); foreach my $p (keys %minfo) { -- cgit v1.2.3 From f947ae4b655b5354ca2a7c2456e04d6567921160 Mon Sep 17 00:00:00 2001 From: Sveinung Kvilhaugsvik Date: Fri, 10 Oct 2014 00:49:59 +0200 Subject: git-svn.txt: advertise pushurl with dcommit Advertise that the svn-remote..pushurl config key allows specifying the commit URL for the entire SVN repository in the documentation of the git svn dcommit command. Signed-off-by: Sveinung Kvilhaugsvik Signed-off-by: Eric Wong --- Documentation/git-svn.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/git-svn.txt b/Documentation/git-svn.txt index ef8ef1c545..af660f96a0 100644 --- a/Documentation/git-svn.txt +++ b/Documentation/git-svn.txt @@ -252,6 +252,10 @@ Use of 'dcommit' is preferred to 'set-tree' (below). config key: svn-remote..commiturl config key: svn.commiturl (overwrites all svn-remote..commiturl options) + +Note that the SVN URL of the commiturl config key includes the SVN branch. +If you rather want to set the commit URL for an entire SVN repository use +svn-remote..pushurl instead. ++ Using this option for any other purpose (don't ask) is very strongly discouraged. -- cgit v1.2.3 From dfa72fdb96befbd790f623bb2909a347176753c2 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 24 Oct 2014 22:53:52 +0000 Subject: git-svn: reload RA every log-window-size Despite attempting to use local memory pools everywhere we can, (including our call to SVN::Ra::do_update and all subsequent reporter calls), there does not appear to be a way to force the Git::SVN::Fetcher callbacks to use a pool other than the per-SVN::Ra pool. Git::SVN::Fetcher ends up using the main RA pool which grows monotonically in size for the lifetime of the RA object. Thus the only way to free that memory appears to be to destroy and recreate the RA connection for at every --log-window-size interval. This reduces memory usage over the course of fetching 10K revisions using a test repository created with the script at the end of this commit message. As reported by time(1) on my x86-64 system: before: 54024k after: 28680k Unfortunately, there remains some yet-to-be-tracked-down slow memory growth which would be evident as the `nr' parameter increases in the repository generation script: -----------------------------8<------------------------------ set -e tmp=$(mktemp -d svntestrepo-XXXXXXXX) svnadmin create "$tmp" repo=file://"$(cd $tmp && pwd)" svn co "$repo" "$tmp/wd" cd "$tmp/wd" if ! test -f a then > a svn add a svn commit -m 'A' fi nr=10000 while test $nr -gt 0 do echo $nr > a svn commit -q -m A nr=$((nr - 1)) done echo "repository created in $repo" -----------------------------8<------------------------------ Signed-off-by: Eric Wong --- perl/Git/SVN/Ra.pm | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/perl/Git/SVN/Ra.pm b/perl/Git/SVN/Ra.pm index a7b0119ee5..1828519b4a 100644 --- a/perl/Git/SVN/Ra.pm +++ b/perl/Git/SVN/Ra.pm @@ -376,10 +376,19 @@ sub longest_common_path { sub gs_fetch_loop_common { my ($self, $base, $head, $gsv, $globs) = @_; return if ($base > $head); + my $gpool = SVN::Pool->new_default; + my $ra_url = $self->url; + my $reload_ra = sub { + $_[0] = undef; + $self = undef; + $RA = undef; + $gpool->clear; + $self = Git::SVN::Ra->new($ra_url); + $ra_invalid = undef; + }; my $inc = $_log_window_size; my ($min, $max) = ($base, $head < $base + $inc ? $head : $base + $inc); my $longest_path = longest_common_path($gsv, $globs); - my $ra_url = $self->url; my $find_trailing_edge; while (1) { my %revs; @@ -449,13 +458,7 @@ sub gs_fetch_loop_common { "$g->{t}-maxRev"; Git::SVN::tmp_config($k, $r); } - if ($ra_invalid) { - $_[0] = undef; - $self = undef; - $RA = undef; - $self = Git::SVN::Ra->new($ra_url); - $ra_invalid = undef; - } + $reload_ra->() if $ra_invalid; } # pre-fill the .rev_db since it'll eventually get filled in # with '0' x40 if something new gets committed @@ -472,6 +475,8 @@ sub gs_fetch_loop_common { $min = $max + 1; $max += $inc; $max = $head if ($max > $head); + + $reload_ra->(); } Git::SVN::gc(); } -- cgit v1.2.3 From 6725ecaba7af16c69591e5a180acbc521e2bba63 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 25 Oct 2014 07:56:11 +0000 Subject: git-svn: remove unnecessary DESTROY override This override was probably never necessary, but most likely a no-op as it does not appear to do anything in SVN::Ra itself. Signed-off-by: Eric Wong --- perl/Git/SVN/Ra.pm | 4 ---- 1 file changed, 4 deletions(-) diff --git a/perl/Git/SVN/Ra.pm b/perl/Git/SVN/Ra.pm index 1828519b4a..e326849c30 100644 --- a/perl/Git/SVN/Ra.pm +++ b/perl/Git/SVN/Ra.pm @@ -177,10 +177,6 @@ sub get_dir { wantarray ? (\%dirents, $r, $props) : \%dirents; } -sub DESTROY { - # do not call the real DESTROY since we store ourselves in $RA -} - # get_log(paths, start, end, limit, # discover_changed_paths, strict_node_history, receiver) sub get_log { -- cgit v1.2.3 From aee7d04c126b48a9871309ec65cecf88781b1d32 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 25 Oct 2014 07:56:12 +0000 Subject: git-svn: save a little memory as fetch progresses There is no reason to keep entries in the %revs hash after we're done processing a revision, so allow entries become freed as processing continues. Signed-off-by: Eric Wong --- perl/Git/SVN/Ra.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perl/Git/SVN/Ra.pm b/perl/Git/SVN/Ra.pm index e326849c30..5bc5b4e594 100644 --- a/perl/Git/SVN/Ra.pm +++ b/perl/Git/SVN/Ra.pm @@ -431,7 +431,7 @@ sub gs_fetch_loop_common { my %exists = map { $_->path => $_ } @$gsv; foreach my $r (sort {$a <=> $b} keys %revs) { - my ($paths, $logged) = @{$revs{$r}}; + my ($paths, $logged) = @{delete $revs{$r}}; foreach my $gs ($self->match_globs(\%exists, $paths, $globs, $r)) { -- cgit v1.2.3 From 7676aff70973e617c3f58a8633db6d0e3ee99e45 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 27 Oct 2014 01:39:39 +0000 Subject: git-svn: disable _rev_list memoization This memoization appears unneeded as the check_cherry_pick2 cache is in front of it does enough. With this change applied, importing from local svn+ssh and http copies of the R repo[1] takes only 2:00 (2 hours) on my system and the git-svn process never uses more than 60MB RSS on my x86-64 GNU/Linux system[2]. This 60M measurement is only for the git-svn Perl process itself and does not include memory used by git subprocesses accessing large packs (subprocess memory usage _is_ measured by my time(1) tool). Before this change, an import took longer (2:20) on svn+ssh:// but git-svn used around 240MB during the imports. Worse yet, git-svn ballooned to over 400M when writing out the cache to the filesystem. I also tried removing memoization for `has_no_changes', too, but a local copy of the R repository(*) was not close to finishing within 10 hours on my system. [1] http://svn.r-project.org/R [2] file:// repos causes libsvn to use more memory internally Signed-off-by: Eric Wong Cc: Hin-Tak Leung --- perl/Git/SVN.pm | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index 4364506ca2..5f9d4690bb 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -1600,7 +1600,6 @@ sub tie_for_persistent_memoization { my %lookup_svn_merge_cache; my %check_cherry_pick2_cache; my %has_no_changes_cache; - my %_rev_list_cache; tie_for_persistent_memoization(\%lookup_svn_merge_cache, "$cache_path/lookup_svn_merge"); @@ -1622,14 +1621,6 @@ sub tie_for_persistent_memoization { SCALAR_CACHE => ['HASH' => \%has_no_changes_cache], LIST_CACHE => 'FAULT', ; - - tie_for_persistent_memoization(\%_rev_list_cache, - "$cache_path/_rev_list"); - memoize '_rev_list', - SCALAR_CACHE => 'FAULT', - LIST_CACHE => ['HASH' => \%_rev_list_cache], - ; - } sub unmemoize_svn_mergeinfo_functions { @@ -1639,7 +1630,6 @@ sub tie_for_persistent_memoization { Memoize::unmemoize 'lookup_svn_merge'; Memoize::unmemoize 'check_cherry_pick2'; Memoize::unmemoize 'has_no_changes'; - Memoize::unmemoize '_rev_list'; } sub clear_memoized_mergeinfo_caches { -- cgit v1.2.3 From 822aaf0f0865299d6cd3a6866e8047638e3f898c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 29 Oct 2014 19:31:55 +0000 Subject: Git.pm: add specified name to tempfile template This should help me track down errors in git-svn more easily: write .git/Git_XXXXXX: Bad file descriptor at /usr/lib/perl5/SVN/Ra.pm line 623 Signed-off-by: Eric Wong --- perl/Git.pm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/perl/Git.pm b/perl/Git.pm index 204fdc6737..b5905ee1ad 100644 --- a/perl/Git.pm +++ b/perl/Git.pm @@ -1294,8 +1294,11 @@ sub _temp_cache { $tmpdir = $self->repo_path(); } + my $n = $name; + $n =~ s/\W/_/g; # no strange chars + ($$temp_fd, $fname) = File::Temp::tempfile( - 'Git_XXXXXX', UNLINK => 1, DIR => $tmpdir, + "Git_${n}_XXXXXX", UNLINK => 1, DIR => $tmpdir, ) or throw Error::Simple("couldn't open new temp file"); $$temp_fd->autoflush; -- cgit v1.2.3 From 835e3ddeff2ae75b1fe71388686d44c536f2ed55 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 29 Oct 2014 19:55:02 +0000 Subject: git-svn: prepare SVN::Ra config pieces once Memoizing these initialization functions saves some memory for long fetches which require scanning many unwanted revisions before any wanted revisions happen. Signed-off-by: Eric Wong --- perl/Git/SVN/Ra.pm | 63 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/perl/Git/SVN/Ra.pm b/perl/Git/SVN/Ra.pm index 5bc5b4e594..75cdac90cf 100644 --- a/perl/Git/SVN/Ra.pm +++ b/perl/Git/SVN/Ra.pm @@ -2,6 +2,7 @@ package Git::SVN::Ra; use vars qw/@ISA $config_dir $_ignore_refs_regex $_log_window_size/; use strict; use warnings; +use Memoize; use SVN::Client; use Git::SVN::Utils qw( canonicalize_url @@ -76,6 +77,40 @@ sub _auth_providers () { \@rv; } +sub prepare_config_once { + SVN::_Core::svn_config_ensure($config_dir, undef); + my ($baton, $callbacks) = SVN::Core::auth_open_helper(_auth_providers); + my $config = SVN::Core::config_get_config($config_dir); + my $dont_store_passwords = 1; + my $conf_t = $config->{'config'}; + + no warnings 'once'; + # The usage of $SVN::_Core::SVN_CONFIG_* variables + # produces warnings that variables are used only once. + # I had not found the better way to shut them up, so + # the warnings of type 'once' are disabled in this block. + if (SVN::_Core::svn_config_get_bool($conf_t, + $SVN::_Core::SVN_CONFIG_SECTION_AUTH, + $SVN::_Core::SVN_CONFIG_OPTION_STORE_PASSWORDS, + 1) == 0) { + SVN::_Core::svn_auth_set_parameter($baton, + $SVN::_Core::SVN_AUTH_PARAM_DONT_STORE_PASSWORDS, + bless (\$dont_store_passwords, "_p_void")); + } + if (SVN::_Core::svn_config_get_bool($conf_t, + $SVN::_Core::SVN_CONFIG_SECTION_AUTH, + $SVN::_Core::SVN_CONFIG_OPTION_STORE_AUTH_CREDS, + 1) == 0) { + $Git::SVN::Prompt::_no_auth_cache = 1; + } + + return ($config, $baton, $callbacks); +} # no warnings 'once' + +INIT { + Memoize::memoize '_auth_providers'; + Memoize::memoize 'prepare_config_once'; +} sub new { my ($class, $url) = @_; @@ -84,34 +119,8 @@ sub new { ::_req_svn(); - SVN::_Core::svn_config_ensure($config_dir, undef); - my ($baton, $callbacks) = SVN::Core::auth_open_helper(_auth_providers); - my $config = SVN::Core::config_get_config($config_dir); $RA = undef; - my $dont_store_passwords = 1; - my $conf_t = ${$config}{'config'}; - { - no warnings 'once'; - # The usage of $SVN::_Core::SVN_CONFIG_* variables - # produces warnings that variables are used only once. - # I had not found the better way to shut them up, so - # the warnings of type 'once' are disabled in this block. - if (SVN::_Core::svn_config_get_bool($conf_t, - $SVN::_Core::SVN_CONFIG_SECTION_AUTH, - $SVN::_Core::SVN_CONFIG_OPTION_STORE_PASSWORDS, - 1) == 0) { - SVN::_Core::svn_auth_set_parameter($baton, - $SVN::_Core::SVN_AUTH_PARAM_DONT_STORE_PASSWORDS, - bless (\$dont_store_passwords, "_p_void")); - } - if (SVN::_Core::svn_config_get_bool($conf_t, - $SVN::_Core::SVN_CONFIG_SECTION_AUTH, - $SVN::_Core::SVN_CONFIG_OPTION_STORE_AUTH_CREDS, - 1) == 0) { - $Git::SVN::Prompt::_no_auth_cache = 1; - } - } # no warnings 'once' - + my ($config, $baton, $callbacks) = prepare_config_once(); my $self = SVN::Ra->new(url => $url, auth => $baton, config => $config, pool => SVN::Pool->new, -- cgit v1.2.3 From 4ae9a7b966e61d25605b575163964f8375e37c9a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 29 Oct 2014 20:10:29 +0000 Subject: git-svn: (cleanup) remove editor param passing Neither find_extra_svk_parents or find_extra_svn_parents ever used the `$ed' parameter. Signed-off-by: Eric Wong --- perl/Git/SVN.pm | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index 5f9d4690bb..893b9a8bb5 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -1433,7 +1433,7 @@ sub check_author { } sub find_extra_svk_parents { - my ($self, $ed, $tickets, $parents) = @_; + my ($self, $tickets, $parents) = @_; # aha! svk:merge property changed... my @tickets = split "\n", $tickets; my @known_parents; @@ -1736,7 +1736,7 @@ sub mergeinfo_changes { # note: this function should only be called if the various dirprops # have actually changed sub find_extra_svn_parents { - my ($self, $ed, $mergeinfo, $parents) = @_; + my ($self, $mergeinfo, $parents) = @_; # aha! svk:merge property changed... memoize_svn_mergeinfo_functions(); @@ -1835,16 +1835,14 @@ sub make_log_entry { my @parents = @$parents; my $props = $ed->{dir_prop}{$self->path}; if ( $props->{"svk:merge"} ) { - $self->find_extra_svk_parents - ($ed, $props->{"svk:merge"}, \@parents); + $self->find_extra_svk_parents($props->{"svk:merge"}, \@parents); } if ( $props->{"svn:mergeinfo"} ) { my $mi_changes = $self->mergeinfo_changes ($parent_path, $parent_rev, $self->path, $rev, $props->{"svn:mergeinfo"}); - $self->find_extra_svn_parents - ($ed, $mi_changes, \@parents); + $self->find_extra_svn_parents($mi_changes, \@parents); } open my $un, '>>', "$self->{dir}/unhandled.log" or croak $!; -- cgit v1.2.3 From da0bc948ac2e01652a150fd4a57cebad6143242c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 30 Oct 2014 08:31:28 +0000 Subject: git-svn: add space after "W:" prefix in warning And minor reformatting while we're in the area. Signed-off-by: Eric Wong --- perl/Git/SVN.pm | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index 893b9a8bb5..d9a52a52df 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -1798,12 +1798,10 @@ sub find_extra_svn_parents { ); if ($ninc) { - warn "W:svn cherry-pick ignored ($spec) - missing " . + warn "W: svn cherry-pick ignored ($spec) - missing " . "$ninc commit(s) (eg $ifirst)\n"; } else { - warn - "Found merge parent ($spec): ", - $merge_tip, "\n"; + warn "Found merge parent ($spec): ", $merge_tip, "\n"; push @new_parents, $merge_tip; } } -- cgit v1.2.3 From 7ffa35b0479dac547659f06b8a6ea7d31c57cc05 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 31 Oct 2014 10:34:03 +0000 Subject: git-svn: use SVN::Ra::get_dir2 when possible This avoids the following failure with normal "get_dir" on newer versions of SVN (tested with SVN 1.8.8-1ubuntu3.1): Incorrect parameters given: Could not convert '%ld' into a number get_dir2 also has the potential to be more efficient by requesting less data. ref: <1414636504.45506.YahooMailBasic@web172304.mail.ir2.yahoo.com> ref: <1414722617.89476.YahooMailBasic@web172305.mail.ir2.yahoo.com> Signed-off-by: Eric Wong Cc: Hin-Tak Leung --- perl/Git/SVN/Ra.pm | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/perl/Git/SVN/Ra.pm b/perl/Git/SVN/Ra.pm index 75cdac90cf..622535e217 100644 --- a/perl/Git/SVN/Ra.pm +++ b/perl/Git/SVN/Ra.pm @@ -175,7 +175,17 @@ sub get_dir { } } my $pool = SVN::Pool->new; - my ($d, undef, $props) = $self->SUPER::get_dir($dir, $r, $pool); + my ($d, undef, $props); + + if (::compare_svn_version('1.4.0') >= 0) { + # n.b. in addition to being potentially more efficient, + # this works around what appears to be a bug in some + # SVN 1.8 versions + my $kind = 1; # SVN_DIRENT_KIND + ($d, undef, $props) = $self->get_dir2($dir, $r, $kind, $pool); + } else { + ($d, undef, $props) = $self->SUPER::get_dir($dir, $r, $pool); + } my %dirents = map { $_ => { kind => $d->{$_}->kind } } keys %$d; $pool->clear; if ($r != $cache->{r}) { -- cgit v1.2.3