diff options
author | Jacob Vosmaer <jacob@gitlab.com> | 2018-10-16 19:44:41 +0300 |
---|---|---|
committer | Jacob Vosmaer <jacob@gitlab.com> | 2018-10-16 19:44:41 +0300 |
commit | 0fa8caf1a1d393bb4dcfb1ef82483e0c2985a544 (patch) | |
tree | 002559732cfb44705b9502f29793d7322ffd20e8 | |
parent | 0eb740be6489c8b881820f99b8468705c2581a50 (diff) |
Add example for "leave pool" scenario
-rw-r--r-- | helper.rb | 28 | ||||
-rwxr-xr-x | test-dedup | 98 |
2 files changed, 69 insertions, 57 deletions
diff --git a/helper.rb b/helper.rb new file mode 100644 index 000000000..5700ede38 --- /dev/null +++ b/helper.rb @@ -0,0 +1,28 @@ +def run_pipeline(pipeline, dir) + warn "#{File.basename(dir)}$ #{pipeline.map { |c| c.join(' ') }.join(' | ')}" + + statuses = Open3.pipeline(*pipeline, chdir: dir) + + statuses.all? { |s| s && s.success? } +end + +def run_pipeline!(pipeline, dir) + abort "failed" unless run_pipeline(pipeline, dir) +end + +# Note: tricks with the 'dir' argument and File.basename are there only +# to make the script output prettier. +def run!(cmd, dir=nil) + abort "failed" unless run(cmd, dir) +end + +def run(cmd, dir=nil) + dir ||= Dir.pwd + cmd_s = cmd.join(' ') + warn "#{File.basename(dir)}$ #{cmd_s}" + start = Time.now + status = system(*cmd, chdir: dir) + delta = Time.now - start + warn sprintf("time: %.3fs\n", delta) if delta > 1.0 + status +end diff --git a/test-dedup b/test-dedup index 6f63bd460..15962a115 100755 --- a/test-dedup +++ b/test-dedup @@ -3,6 +3,8 @@ require 'open3' require 'tempfile' require 'fileutils' +require_relative 'helper' + TEST_REPO = ENV.fetch('TEST_REPO') TMP_ROOT = File.absolute_path(Dir.mktmpdir) @@ -15,38 +17,39 @@ def main children = {} %w[repo1 repo2 repo3].each { |name| children[name] = create_child(name) } + # Create a pool based on repo1 + # pool = File.join(TMP_ROOT, 'pool.git') - source_name, source_path = children.first - - # This creates a bunch of refs in the top-level namespace we don't want. - # However, they speed up the first fetch, so we keep them for now. + source_name, source_path = 'repo1', children['repo1'] create_pool_repository(source_name, source_path, pool) - show_sizes([pool]) - link_repository_to_pool(pool, source_name, source_path) + show_sizes([pool, children['repo1']]) - children.each do |name, path| - next if name == source_name - - link_repository_to_pool(pool, name, path) - end - - run!(%w[git remote -v], pool) - show_sizes([pool] + children.values) - - children.each do |_, path| - # -l is important. Not sure about -A vs -a - run!(%w[git repack -Ald --quiet], path) - end - - show_sizes([pool] + children.values) - + # Clone a new repo, repo4, from repo1 (fork parent) within the pool + # target_name, target_path = 'repo4', child_path('repo4') children[target_name] = target_path - prepare_clone_in_pool(pool, children['repo2'], target_name, target_path) + prepare_clone_in_pool(pool, source_path, target_name, target_path) link_repository_to_pool(pool, target_name, target_path) + show_sizes([pool, source_path, target_path]) - show_sizes([pool] + children.values) + # Repo4 leaves pool (e.g. fork switches to private) + # + repo_name, repo_path = 'repo4', child_path('repo4') + # To make things interesting, ensure repo4 is deduplicated + run!(%w[git repack --quiet -Ald], repo_path) + run!(%w[find objects -type f], repo_path) + show_sizes([repo_path]) + + # Enter critical section where repo may not receive pushes (???) + prepare_leave_pool(repo_path) + unlink_repository_from_pool(pool, repo_name, repo_path) + # Exit critical section + + # Sanity checks + run!(%w[find objects -type f], repo_path) + run!(%w[git fsck --connectivity-only], repo_path) + show_sizes([repo_path]) end def create_pool_repository(source_name, source_path, pool) @@ -99,19 +102,6 @@ def delete_top_level_refs(repo) ], repo) end -def run_pipeline(pipeline, dir) - warn "#{File.basename(dir)}$ #{pipeline.map { |c| c.join(' ') }.join(' | ')}" - - statuses = Open3.pipeline(*pipeline, chdir: dir) - - statuses.all? { |s| s && s.success? } -end - -def run_pipeline!(pipeline, dir) - abort "failed" unless run_pipeline(pipeline, dir) -end - - def show_sizes(paths) paths.each do |p| run!(%W[du -sh #{File.basename(p)}], File.dirname(p)) @@ -137,33 +127,27 @@ end def prepare_clone_in_pool(pool, source_path, target_name, target_path) puts '--- PrepareCloneInPool' - # The --reference options is the secret sauce that prevents copying - # objects that exist in the pool. After this clone, - # objects/info/alternates is already set up but it points to an absolute - # path. This is not what we want. This gets fixed later by - # link_repository_to_pool. - # - run!(%W[git clone --quiet --bare --reference #{File.basename(pool)} #{File.basename(source_path)} #{File.basename(target_path)}], File.dirname(target_path)) + # After this clone, objects/info/alternates is already set up but it + # points to an absolute path. This is not what we want. This gets fixed + # later by link_repository_to_pool. + # + run!(%W[git clone --quiet --bare --local #{File.basename(source_path)} #{File.basename(target_path)}], File.dirname(target_path)) clean_child(target_path) puts '---' end -# Note: tricks with the 'dir' argument and File.basename are there only -# to make the script output prettier. -def run!(cmd, dir=nil) - abort "failed" unless run(cmd, dir) +def prepare_leave_pool(repo_path) + puts '--- PrepareLeavePool' + run!(%w[git repack --quiet -a], repo_path) + puts '---' end -def run(cmd, dir=nil) - dir ||= Dir.pwd - cmd_s = cmd.join(' ') - warn "#{File.basename(dir)}$ #{cmd_s}" - start = Time.now - status = system(*cmd, chdir: dir) - delta = Time.now - start - warn sprintf("time: %.3fs\n", delta) if delta > 1.0 - status +def unlink_repository_from_pool(pool, repo_name, repo_path) + puts '--- UnlinkRepositoryFromPool' + run!(%w[rm objects/info/alternates], repo_path) + run!(%W[git remote remove #{repo_name}], pool) + puts '---' end main |