diff options
author | Brandon Holtsclaw <me@brandonholtsclaw.com> | 2007-12-01 01:39:12 +0300 |
---|---|---|
committer | Brandon Holtsclaw <me@brandonholtsclaw.com> | 2007-12-01 01:39:12 +0300 |
commit | dc3d065f4c4c8da49abba61ff1f3bc2f9db6ae7d (patch) | |
tree | 6166b37b1e9a27283b705da819fe70afb396ac54 |
initial import0.4.5
-rw-r--r-- | CHANGELOG | 12 | ||||
-rw-r--r-- | TODO | 2 | ||||
-rwxr-xr-x | apt-mirror | 583 | ||||
-rw-r--r-- | mirror.list | 69 |
4 files changed, 666 insertions, 0 deletions
diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..b497af1 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,12 @@ +0.4.5 + ++added mipsel, armel, and lpia arch support ++fixed the tilde replace code so it works with 2 or more tilde + in the package names ++default _tilde option set to 0 ++minor updates to the manpage wordings ++new _stat function to cache and speed up parsing ++download size now in human readable numbers ( KiB,MiB,GiB,etc ) + +0.4.4 +changes documented in the debian changelog @@ -0,0 +1,2 @@ +*future plans* + diff --git a/apt-mirror b/apt-mirror new file mode 100755 index 0000000..da8545c --- /dev/null +++ b/apt-mirror @@ -0,0 +1,583 @@ +#!/usr/bin/perl -w + +=pod + +=head1 NAME + +apt-mirror - apt sources mirroring tool + +=head1 SYNOPSIS + +apt-mirror [configfile] + +=head1 DESCRIPTION + +A small and efficient tool that lets you mirror a part of or +the whole Debian GNU/Linux distribution or any other apt sources. + +Main features: + * It uses a config similar to apts F<sources.list> + * It's fully pool comply + * It supports multithreaded downloading + * It supports multiple architectures at the same time + * It can automatically remove unneeded files + * It works well on overloaded channel to internet + * It never produces an inconsistent mirror including while mirroring + * It works on all POSIX complied systems with perl and wget + +=head1 COMMENTS + +apt-mirror uses F</etc/apt/mirror.list> as a configuration file. +By default it is tuned to official debian mirror in Finland. Change +it for your needs. + +After you setup the configuration file you may run as root: + + # su - apt-mirror -c apt-mirror + +Or uncomment line in F</etc/cron.d/apt-mirror> to enable daily mirror upgrades. + +=head1 FILES + +F</etc/apt/mirror.list> + Main configuration file + +F</etc/cron.d/apt-mirror> + Cron configuration template + +F</var/spool/apt-mirror/mirror> + Mirror places here + +F</var/spool/apt-mirror/skel> + Place for temporarily downloaded indexes + +F</var/spool/apt-mirror/var> + Log files placed here. URLs and MD5 summs also here. + +=head1 KNOWN BUGS + +Do not specify default http port (80) explicitly. (i.e. deb http://server:80/ sid ...) + +=head1 AUTHOR + +Dmitry N. Hramtsov E<lt>hdn@nsu.ruE<gt> + +=cut + +use strict; +use File::Copy; +use File::Path; +use File::Basename; + +my $config_file; + +my %config_variables = ( + "defaultarch" => `dpkg --print-installation-architecture 2>/dev/null` || 'i386', + "nthreads" => 20, + "base_path" => '/var/spool/apt-mirror', + "mirror_path" => '$base_path/mirror', + "skel_path" => '$base_path/skel', + "var_path" => '$base_path/var', + "cleanscript" => '$var_path/clean.sh', + "_contents" => 1, + "_autoclean" => 0, + "_tilde" => 0 +); + +my @config_binaries = (); +my @config_sources = (); + +my @index_urls; +my @childrens = (); +my %skipclean = (); +my %clean_directory = (); + + +###################################################################################### +## Setting up $config_file variable + +$config_file = "/etc/apt/mirror.list"; # Default value +if($_ = shift) { + die("apt-mirror: invalid config file specified") unless -f $_; + $config_file = $_; +} + +chomp $config_variables{"defaultarch"}; + +###################################################################################### +## Common subroutines + +sub round_number +{ + my $n = shift; + my $minus = $n < 0 ? '-' : ''; + $n = abs($n); + $n = int(($n + .05) * 10) / 10; + $n .= '.0' unless $n =~ /\./; + $n .= '0' if substr($n,(length($n) - 1),1) eq '.'; + chop $n if $n =~ /\.\d\d0$/; + return "$minus$n"; +} + +sub format_bytes { + my $bytes = shift; + my $bytes_out = '0'; + my $size_name = 'bytes'; + my $KiB = 1024; + my $MiB = 1024 * 1024; + my $GiB = 1024 * 1024 * 1024; + + if ($bytes >= $KiB) { + $bytes_out = $bytes / $KiB; + $size_name = 'KiB'; + if ($bytes >= $MiB) { + $bytes_out = $bytes / $MiB; + $size_name = 'MiB'; + if ($bytes >= $GiB) { + $bytes_out = $bytes / $GiB; + $size_name = 'GiB'; + } + } + } else { + $bytes_out = $bytes; + $size_name = 'bytes'; + } + + $bytes_out = round_number($bytes_out); + return "$bytes_out $size_name"; +} + +sub get_variable { + my $value = $config_variables{shift @_}; + my $count = 16; + while($value =~ s/\$(\w+)/$config_variables{$1}/xg) { + die("apt-mirror: too many substitution while evaluating variable") if ($count --) < 0; + } + return $value; +} + +sub download_urls { + my $stage = shift; + my @urls; + my $i = 0; + my $pid; + my $nthreads = get_variable("nthreads"); + local $| = 1; + + @urls = @_; + $nthreads = @urls if @urls < $nthreads; + + print "Downloading " . scalar(@urls) . " $stage files using $nthreads threads...\n"; + + while(scalar @urls) { + my @part = splice(@urls, 0, int(@urls / $nthreads)); + open URLS, ">" . get_variable("var_path") . "/$stage-urls.$i" or die("apt-mirror: can't write to intermediate file ($stage-urls.$i)"); + foreach (@part) { print URLS "$_\n"; } + close URLS or die("apt-mirror: can't close intermediate file ($stage-urls.$i)"); + + $pid = fork(); + + die("apt-mirror: can't do fork in download_urls") if $pid < 0; + + if($pid == 0) { +# print " * Running wget $i...\n"; + exec 'wget', '-t', '0', '-r', '-N', '-l', 'inf', '-o', get_variable("var_path") . "/$stage-log.$i", '-i', get_variable("var_path") . "/$stage-urls.$i"; +# print " * Can't exec $i...\n"; +# unlink get_variable("var_path") . "/URLS.$i"; +# unlink get_variable("var_path") . "/wget-log.$i"; +# exit 1; + } + + push @childrens, $pid; + $i++; $nthreads--; + } + + print "Begin time: " . localtime() . "\n[" . scalar(@childrens) . "]... "; + while(scalar @childrens) { + my $dead = wait(); + @childrens = grep { $_ != $dead } @childrens; + print "[" . scalar(@childrens) . "]... "; + } + print "\nEnd time: " . localtime() . "\n\n"; +} + + +###################################################################################### +## Parse config + +open CONFIG, "<$config_file" or die("apt-mirror: can't open config file ($config_file)"); +while(<CONFIG>) { + next if /^\s*#/; + next unless /\S/; + my @config_line = split; + my $config_line = shift @config_line; + + if($config_line eq "set") { + $config_variables{$config_line[0]} = $config_line[1]; + next; + } + + if($config_line eq "deb") { + push @config_binaries, [get_variable("defaultarch"), @config_line]; + next; + } + + if($config_line =~ /deb-(alpha|amd64|armel|arm|hppa|hurd-i386|i386|ia64|lpia|m68k|mipsel|mips|powerpc|s390|sh|sparc)/) { + push @config_binaries, [$1, @config_line]; + next; + } + + if($config_line eq "deb-src") { + push @config_sources, [@config_line]; + next; + } + + if($config_line eq "skip-clean") { + $config_line[0] =~ s[^(\w+)://][]; + $config_line[0] =~ s[/$][]; + $config_line[0] =~ s[~][%7E]g if get_variable("_tilde"); + $skipclean{$config_line[0]} = 1; + next; + } + + if($config_line eq "clean") { + $config_line[0] =~ s[^(\w+)://][]; + $config_line[0] =~ s[/$][]; + $config_line[0] =~ s[~][%7E]g if get_variable("_tilde"); + $clean_directory{$config_line[0]} = 1; + next; + } + + die("apt-mirror: invalid line in config file ($.: $config_line ...)"); +} +close CONFIG; + +die("Please explicitly specify 'defaultarch' in mirror.list") unless get_variable("defaultarch"); + + +###################################################################################### +## Skel download + +my %urls_to_download = (); +my ($url, $arch); + +sub remove_double_slashes { + local $_ = shift; + while(s[/\./][/]g) {} + while(s[(?<!:)//][/]g) {} + while(s[(?<!:/)/[^/]+/\.\./][/]g) {} + s/~/\%7E/g if get_variable("_tilde"); + return $_; +} + +sub add_url_to_download { + my $url = remove_double_slashes(shift); + $urls_to_download{$url} = shift; +} + +foreach (@config_sources) { + my ($uri, $distribution, @components) = @{$_}; + + if(@components) { + $url = $uri . "/dists/" . $distribution . "/"; + + add_url_to_download($url . "Release"); + add_url_to_download($url . "Release.gpg"); + foreach (@components) { + add_url_to_download($url . $_ . "/source/Release"); + add_url_to_download($url . $_ . "/source/Sources.gz"); + } + } else { + add_url_to_download($uri . "/$distribution/Sources.gz"); + } +} + +foreach (@config_binaries) { + my ($arch, $uri, $distribution, @components) = @{$_}; + + if(@components) { + $url = $uri . "/dists/" . $distribution . "/"; + + add_url_to_download($url . "Release"); + add_url_to_download($url . "Release.gpg"); + add_url_to_download($url . "Contents-" . $arch . ".gz") if get_variable("_contents"); + foreach (@components) { + add_url_to_download($url . $_ . "/binary-" . $arch . "/Release"); + add_url_to_download($url . $_ . "/binary-" . $arch . "/Packages.gz"); + } + } else { + add_url_to_download($uri . "/$distribution/Packages.gz"); + } +} + +chdir get_variable("skel_path") or die("apt-mirror: can't chdir to skel"); +@index_urls = sort keys %urls_to_download; +download_urls("index", @index_urls); + +foreach (keys %urls_to_download) { + s[^(\w+)://][]; + s[~][%7E]g if get_variable("_tilde"); + $skipclean{$_} = 1; + $skipclean{$_} = 1 if s[\.gz$][]; +} + +###################################################################################### +## Main download prepair + +%urls_to_download = (); + +open FILES_ALL, ">" . get_variable("var_path") . "/ALL" or die("apt-mirror: can't write to intermediate file (ALL)"); +open FILES_NEW, ">" . get_variable("var_path") . "/NEW" or die("apt-mirror: can't write to intermediate file (NEW)"); +open FILES_MD5, ">" . get_variable("var_path") . "/MD5" or die("apt-mirror: can't write to intermediate file (MD5)"); + +my %stat_cache = (); + +sub _stat { + my ($filename) = shift; + return @{$stat_cache{$filename}} if exists $stat_cache{$filename}; + my @res = stat($filename); + $stat_cache{$filename} = \@res; + return @res; +} + +sub clear_stat_cache { + %stat_cache = (); +} + +sub need_update { + my $filename = shift; + my $size_on_server = shift; + + my (undef, undef, undef, undef, undef, undef, undef, $size) = _stat($filename); + + return 1 unless($size); + return 0 if $size_on_server == $size; + return 1; +} + +sub remove_spaces($) { + my $hashref = shift; + foreach (keys %{$hashref}) { + while(substr($hashref->{$_}, 0, 1) eq ' ') { + substr($hashref->{$_}, 0, 1) = ''; + } + } +} + +sub proceed_index_gz { + my $uri = shift; + my $index = shift; + my ($path, $package, $mirror, $files) = ''; + + $path = $uri; + $path =~ s[^(\w+)://][]; + $path =~ s/~/\%7E/g if get_variable("_tilde"); + local $/ = "\n\n"; + $mirror = get_variable("mirror_path") . "/" . $path; + + if($index =~ s/\.gz$//) { + system("gunzip < $path/$index.gz > $path/$index"); + } + + open STREAM, "<$path/$index" or die("apt-mirror: can't open index in proceed_index_gz"); + + while($package = <STREAM>) { + local $/ = "\n"; + chomp $package; + my (undef, %lines) = split(/^([\w\-]+:)/m, $package); + + chomp(%lines); + remove_spaces(\%lines); + + if(exists $lines{"Filename:"}) { # Packages index + $skipclean{remove_double_slashes($path . "/" . $lines{"Filename:"})} = 1; + print FILES_ALL remove_double_slashes($path . "/" . $lines{"Filename:"}) . "\n"; + print FILES_MD5 $lines{"MD5sum:"} . " " . remove_double_slashes($path . "/" . $lines{"Filename:"}) . "\n"; + if(need_update($mirror . "/" . $lines{"Filename:"}, $lines{"Size:"})) { + print FILES_NEW remove_double_slashes($uri . "/" . $lines{"Filename:"}) . "\n"; + add_url_to_download($uri . "/" . $lines{"Filename:"}, $lines{"Size:"}); + } + } else { # Sources index + foreach (split(/\n/, $lines{"Files:"})) { + next if $_ eq ''; + my @file = split; + die("apt-mirror: invalid Sources format") if @file != 3; + $skipclean{remove_double_slashes($path . "/" . $lines{"Directory:"} . "/" . $file[2])} = 1; + print FILES_ALL remove_double_slashes($path . "/" . $lines{"Directory:"} . "/" . $file[2]) . "\n"; + print FILES_MD5 $file[0] . " " . remove_double_slashes($path . "/" . $lines{"Directory:"} . "/" . $file[2]) . "\n"; + if(need_update($mirror . "/" . $lines{"Directory:"} . "/" . $file[2], $file[1])) { + print FILES_NEW remove_double_slashes($uri . "/" . $lines{"Directory:"} . "/" . $file[2]) . "\n"; + add_url_to_download($uri . "/" . $lines{"Directory:"} . "/" . $file[2], $file[1]); + } + } + } + } + + close STREAM; +} + +print "Proceed indexes: ["; + +foreach (@config_sources) { + my ($uri, $distribution, @components) = @{$_}; + print "S"; + if(@components) { + my $component; + foreach $component (@components) { + proceed_index_gz($uri, "/dists/$distribution/$component/source/Sources.gz"); + } + } else { + proceed_index_gz($uri, "/$distribution/Sources.gz"); + } +} + +foreach (@config_binaries) { + my ($arch, $uri, $distribution, @components) = @{$_}; + print "P"; + if(@components) { + my $component; + foreach $component (@components) { + proceed_index_gz($uri, "/dists/$distribution/$component/binary-$arch/Packages.gz"); + } + } else { + proceed_index_gz($uri, "/$distribution/Packages.gz"); + } +} + +clear_stat_cache(); + +print "]\n\n"; + +close FILES_ALL; +close FILES_NEW; +close FILES_MD5; + + +###################################################################################### +## Main download + +chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror"); + +my $need_bytes = 0; +foreach (values %urls_to_download) { + $need_bytes += $_; +} + +my $size_output = format_bytes($need_bytes); + +print "$size_output will be downloaded into archive.\n"; + +download_urls("archive", sort keys %urls_to_download); + + +###################################################################################### +## Copy skel to main archive + +sub copy_file { + my ($from, $to) = @_; + my $dir = dirname($to); + return unless -f $from; + mkpath($dir) unless -d $dir; + unless(copy($from, $to)) { + warn("apt-mirror: can't copy $from to $to"); + return; + } + my ($atime,$mtime) = ( stat($from) ) [8, 9]; + utime($atime, $mtime, $to) or die("apt-mirror: can't utime $to"); +} + +foreach (@index_urls) { + die("apt-mirror: invalid url in index_urls") unless s[^(\w+)://][]; + copy_file(get_variable("skel_path") . "/$_", get_variable("mirror_path") . "/$_"); + copy_file(get_variable("skel_path") . "/$_", get_variable("mirror_path") . "/$_") if(s/\.gz$//); +} + + +###################################################################################### +## Make cleaning script + +my (@rm_dirs, @rm_files) = (); +my $unnecessary_bytes = 0; + +sub process_symlink { + return 1; # symlinks are always needed +} + +sub process_file { + my $file = shift; + $file =~ s[~][%7E]g if get_variable("_tilde"); + return 1 if $skipclean{$file}; + $file =~ s[%7E][~] if get_variable("_tilde"); + push @rm_files, $file; + my (undef, undef, undef, undef, undef, undef, undef, $size, undef, undef, undef, undef, $blocks) = stat($file); + $unnecessary_bytes += $blocks * 512; + return 0; +} + +sub process_directory { + my $dir = shift; + my $is_needed = 0; + return 1 if $skipclean{$dir}; + opendir(DIR, $dir) or die "apt-mirror: can't opendir $dir: $!"; + foreach (grep { !/^\.$/ && !/^\.\.$/ } readdir(DIR)) { + my $item = $dir . "/". $_; + $is_needed |= process_directory($item) if -d $item && ! -l $item; + $is_needed |= process_file($item) if -f $item; + $is_needed |= process_symlink($item) if -l $item; + } + closedir DIR; + push @rm_dirs, $dir unless $is_needed; + return $is_needed; +} + +chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror"); + +foreach (keys %clean_directory) { + process_directory($_) if -d $_ && ! -l $_; +} + +open CLEAN, ">" . get_variable("cleanscript") or die("apt-mirror: can't open clean script file"); + +my ($i, $total) = (0, scalar @rm_files); + +if(get_variable("_autoclean")) { + + my $size_output = format_bytes($unnecessary_bytes); + print "$size_output in $total files and " . scalar(@rm_dirs) . " directories will be freed..."; + + chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror"); + + foreach (@rm_files) { unlink $_; } + foreach (@rm_dirs) { rmdir $_; } + +} else { + + my $size_output = format_bytes($unnecessary_bytes); + print "$size_output in $total files and " . scalar(@rm_dirs) . " directories can be freed.\n"; + print "Run " . get_variable("cleanscript") . " for this purpose.\n\n"; + + print CLEAN "cd " . get_variable("mirror_path") . " || exit 1\n\n"; + print CLEAN "echo 'Removing $total unnecessary files [$unnecessary_bytes bytes]...'\n"; + foreach (@rm_files) { + print CLEAN "rm -f '$_'\n"; + print CLEAN "echo -n '[" . int(100 * $i/$total) . "\%]'\n" unless $i % 500; + print CLEAN "echo -n .\n" unless $i % 10; + $i ++; + } + print CLEAN "echo 'done.'\n"; + print CLEAN "echo\n\n"; + + $i = 0; $total = scalar @rm_dirs; + print CLEAN "echo 'Removing $total unnecessary directories...'\n"; + foreach (@rm_dirs) { + print CLEAN "rmdir '$_'\n"; + print CLEAN "echo -n '[" . int(100 * $i/$total) . "\%]'\n" unless $i % 50; + print CLEAN "echo -n .\n"; + $i ++; + } + print CLEAN "echo 'done.'\n"; + print CLEAN "echo\n"; + + close CLEAN; + +} diff --git a/mirror.list b/mirror.list new file mode 100644 index 0000000..e5c3b7f --- /dev/null +++ b/mirror.list @@ -0,0 +1,69 @@ +# apt-mirror configuration file + +## +## The default configuration options (uncomment and change to override) +## +# +# set base_path /var/spool/apt-mirror +# set mirror_path $base_path/mirror +# set skel_path $base_path/skel +# set var_path $base_path/var +# +# set defaultarch <running host architecture> +# set nthreads 20 +# + + +## +## Example sources +## + +# sarge's section +deb http://ftp.fi.debian.org/debian sarge main contrib non-free +deb-src http://ftp.fi.debian.org/debian sarge main contrib non-free + +deb http://security.debian.org/debian-security sarge/updates main contrib non-free +deb-src http://security.debian.org/debian-security sarge/updates main contrib non-free + +deb http://ftp.fi.debian.org/debian sarge main/debian-installer + + +# sarge-proposed-updates's section +deb http://ftp.fi.debian.org/debian sarge-proposed-updates main contrib non-free +deb-src http://ftp.fi.debian.org/debian sarge-proposed-updates main contrib non-free + + +# sid's section +deb http://ftp.fi.debian.org/debian sid main contrib non-free +deb-src http://ftp.fi.debian.org/debian sid main contrib non-free + +deb http://ftp.fi.debian.org/debian sid main/debian-installer + + +# experimental section +deb http://ftp.fi.debian.org/debian ../project/experimental main contrib non-free +deb-src http://ftp.fi.debian.org/debian ../project/experimental main contrib non-free + + +## +## Cleaner configuration example +## +# +# set cleanscript $var_path/clean.sh +# + +# Cleaning section +clean http://security.debian.org/ +clean http://ftp.fi.debian.org/ + + +skip-clean http://ftp.fi.debian.org/doc/ +skip-clean http://ftp.fi.debian.org/tools/ +skip-clean http://ftp.fi.debian.org/debian-cd/ +skip-clean http://ftp.fi.debian.org/debian-minicd/ +skip-clean http://ftp.fi.debian.org/debian/dists/sarge/main/installer-i386/ +skip-clean http://ftp.fi.debian.org/debian/dists/sid/main/installer-i386/ +skip-clean http://ftp.fi.debian.org/debian/doc/ +skip-clean http://ftp.fi.debian.org/debian/tools/ +skip-clean http://ftp.fi.debian.org/debian/project/ +skip-clean http://ftp.fi.debian.org/debian-non-US/project/ |