diff options
author | Georgi Georgiev <310867+chutzimir@users.noreply.github.com> | 2022-07-12 13:19:49 +0300 |
---|---|---|
committer | Georgi Georgiev <310867+chutzimir@users.noreply.github.com> | 2022-07-25 03:23:59 +0300 |
commit | 6ebdf5fc3920a08931aa7ce0e9359dea57990249 (patch) | |
tree | 10ee1abbd026d353fc4f38aea208da690bda218f | |
parent | 88649412fdfbd6648fe87cf5e91a2196d37d488e (diff) |
Paranoid mode: Abort if we downloaded a corrupted file
-rwxr-xr-x | apt-mirror | 201 | ||||
-rw-r--r-- | mirror.list | 2 |
2 files changed, 156 insertions, 47 deletions
@@ -114,6 +114,7 @@ my %config_variables = ( "auth_no_challenge" => 0, "no_check_certificate" => 0, "unlink" => 0, + "paranoid" => 0, "postmirror_script" => '$var_path/postmirror.sh', "use_proxy" => 'off', "http_proxy" => '', @@ -131,12 +132,37 @@ my @childrens = (); my %skipclean = (); my %clean_directory = (); my @hash_strength = qw(SHA512 SHA256 SHA1 MD5Sum); +my %packages_hashes = ( + SHA512 => "SHA512", + SHA256 => "SHA256", + SHA1 => "SHA1", + MD5Sum => "MD5sum", +); +my %sources_hashes = ( + SHA512 => "Checksums-Sha512", + SHA256 => "Checksums-Sha256", + SHA1 => "Checksums-Sha1", + MD5Sum => "Files", +); +my %verify_commands = ( + SHA512 => "sha512sum", + SHA256 => "sha256sum", + SHA1 => "sha1sum", + MD5Sum => "md5sum", +); +my %checksum_filenames = ( + SHA512 => "SHA512", + SHA256 => "SHA256", + SHA1 => "SHA1", + MD5Sum => "MD5", +); # Mapping of files downloaded from a by-hash directory to their canonical locations. my %hashsum_to_files = (); # Mapping of all the checksums for a given canonical filename. my %file_to_hashsums; +my %urls_checksums = (); ###################################################################################### ## Setting up $config_file variable @@ -233,6 +259,33 @@ sub unlock_aptmirror unlink( get_variable("var_path") . "/apt-mirror.lock" ); } +sub delete_corrupted_files +{ + my $stage = shift; + my $found = 0; + foreach my $hash (@hash_strength) + { + my $file = get_variable("var_path") . "/${stage}-${hash}"; + if (-s $file) + { + my $pipe; + open $pipe, "-|", qq(env LC_ALL=C ${verify_commands{$hash}} --check --quiet ${file} 2>/dev/null) or die "Cannot run ${verify_commands{$hash}}"; + while (<$pipe>) + { + my ($filename) = /^(.*): FAILED/; + if (-f $filename) + { + $found++; + print "$filename is corrupted, deleting....\n"; + unlink $filename or die "Cannot delete $filename."; + } + } + close $pipe; + } + } + return $found; +} + sub download_urls { my $stage = shift; @@ -259,6 +312,29 @@ sub download_urls } print "Downloading " . scalar(@urls) . " $stage files using $nthreads threads...\n"; + if (get_variable("paranoid")) + { + my %fh = (); + foreach my $hash (@hash_strength) + { + open $fh{$hash}, ">", get_variable("var_path") . "/${stage}-${hash}" or die ("apt-mirror: Cannot write to ${stage}-${hash}"); + } + + foreach (@urls) + { + if ($urls_checksums{$_}) + { + my ($hash, $hashsum) = @{$urls_checksums{$_}}; + my $fh = $fh{$hash}; + print $fh $hashsum . " " . sanitise_uri($_) . "\n"; + } + } + foreach my $hash (@hash_strength) + { + close $fh{$hash}; + } + } + my @url_fds; for ($i=0; $i<$nthreads; $i++) { @@ -302,6 +378,14 @@ sub download_urls } print "\nEnd time: " . localtime() . "\n\n"; + if (get_variable("paranoid")) + { + if (delete_corrupted_files($stage) > 0) + { + die "Some files were corrupted while downloading, aborting..."; + } + } + if (scalar keys %hashsum_to_files > 0) { foreach my $hashsum_filename (keys %hashsum_to_files) @@ -431,11 +515,12 @@ sub add_url_to_download my $strongest_hash = shift; my $hash = shift; my $hashsum = shift; + my $acquire_by_hash = shift; my $canonical_filename = sanitise_uri($url); $skipclean{$canonical_filename} = 1; - if ($hashsum) + if ($acquire_by_hash) { # If the optional hashsum was passed as an argument # - download the strongest hash only @@ -454,6 +539,8 @@ sub add_url_to_download $hashsum_to_files{$hashsum_filename} ||= []; push @{$hashsum_to_files{$hashsum_filename}}, $canonical_filename; $urls_to_download{$url} = $size; + $urls_checksums{$url} = [ $hash, $hashsum ]; + } else { # We are not going to download using this checksum, but we still # need to know where to put the checksum. @@ -463,6 +550,10 @@ sub add_url_to_download } else { # Not using by-hash, so download the file only. $urls_to_download{$url} = $size; + if ($strongest_hash and ($hash eq $strongest_hash)) + { + $urls_checksums{$url} = [ $hash, $hashsum ]; + } } } @@ -674,11 +765,11 @@ sub find_metadata_in_release my ( $hashsum, $size, $filename, $hash ) = @{$_}; if ($acquire_by_hash) { - add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum ); + add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum, 1 ); } else { - add_url_to_download( $dist_uri . $filename, $size ); + add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum, 0 ); } } return 1; @@ -762,11 +853,14 @@ download_urls( "index", @index_urls ); %urls_to_download = (); -open FILES_ALL, ">" . get_variable("var_path") . "/ALL" or die("apt-mirror: can't write to intermediate file (ALL)"); -open FILES_NEW, ">" . get_variable("var_path") . "/NEW" or die("apt-mirror: can't write to intermediate file (NEW)"); -open FILES_MD5, ">" . get_variable("var_path") . "/MD5" or die("apt-mirror: can't write to intermediate file (MD5)"); -open FILES_SHA1, ">" . get_variable("var_path") . "/SHA1" or die("apt-mirror: can't write to intermediate file (SHA1)"); -open FILES_SHA256, ">" . get_variable("var_path") . "/SHA256" or die("apt-mirror: can't write to intermediate file (SHA256)"); +my %files_fh; + +open $files_fh{ALL}, ">" . get_variable("var_path") . "/ALL" or die("apt-mirror: can't write to intermediate file (ALL)"); +open $files_fh{NEW}, ">" . get_variable("var_path") . "/NEW" or die("apt-mirror: can't write to intermediate file (NEW)"); +foreach my $hash (@hash_strength) +{ + open $files_fh{$hash}, ">" . get_variable("var_path") . "/" . ${checksum_filenames{$hash}} or die("apt-mirror: can't write to intermediate file (${hash})"); +} my %stat_cache = (); @@ -800,18 +894,6 @@ sub need_update return 1; } -sub remove_spaces($) -{ - my $hashref = shift; - foreach ( keys %{$hashref} ) - { - while ( substr( $hashref->{$_}, 0, 1 ) eq ' ' ) - { - substr( $hashref->{$_}, 0, 1 ) = ''; - } - } -} - sub process_index { my $uri = shift; @@ -854,39 +936,65 @@ sub process_index { local $/ = "\n"; chomp $package; - my ( undef, %lines ) = split( /^([\w\-]+:)/m, $package ); + my ( undef, %lines ) = split( /^([\w\-]+): */m, $package ); - $lines{"Directory:"} = "" unless defined $lines{"Directory:"}; chomp(%lines); - remove_spaces( \%lines ); - if ( exists $lines{"Filename:"} ) + if ( exists $lines{"Filename"} ) { # Packages index - $skipclean{ remove_double_slashes( $path . "/" . $lines{"Filename:"} ) } = 1; - print FILES_ALL remove_double_slashes( $path . "/" . $lines{"Filename:"} ) . "\n"; - print FILES_MD5 $lines{"MD5sum:"} . " " . remove_double_slashes( $path . "/" . $lines{"Filename:"} ) . "\n" if defined $lines{"MD5sum:"}; - print FILES_SHA1 $lines{"SHA1:"} . " " . remove_double_slashes( $path . "/" . $lines{"Filename:"} ) . "\n" if defined $lines{"SHA1:"}; - print FILES_SHA256 $lines{"SHA256:"} . " " . remove_double_slashes( $path . "/" . $lines{"Filename:"} ) . "\n" if defined $lines{"SHA256:"}; - if ( need_update( $mirror . "/" . $lines{"Filename:"}, $lines{"Size:"} ) ) + my $filename = remove_double_slashes( $path . "/" . $lines{"Filename"}); + $skipclean{ $filename } = 1; + print { $files_fh{ALL} } $filename . "\n"; + foreach my $hash (@hash_strength) + { + my $index_hash = $packages_hashes{$hash}; + print { $files_fh{$hash} } $lines{$index_hash} . " " . $filename . "\n" if $lines{$index_hash}; + } + if ( need_update( $mirror . "/" . $lines{"Filename"}, $lines{"Size"} ) ) { - print FILES_NEW remove_double_slashes( $uri . "/" . $lines{"Filename:"} ) . "\n"; - add_url_to_download( $uri . "/" . $lines{"Filename:"}, $lines{"Size:"} ); + my $hashsum = undef; + my $hash = undef; + foreach $hash (@hash_strength) + { + my $index_hash = $packages_hashes{$hash}; + if ($lines{$index_hash}) + { + $hashsum = ${lines{$index_hash}}; + last; + } + } + print { $files_fh{NEW} } $filename. "\n"; + add_url_to_download( $uri . "/" . $lines{"Filename"}, $lines{"Size"}, $hash, $hash, $hashsum, 0 ); } } else { # Sources index - foreach ( split( /\n/, $lines{"Files:"} ) ) + $lines{"Directory"} = "" unless defined $lines{"Directory"}; + foreach my $hash (@hash_strength) { - next if $_ eq ''; - my @file = split; - die("apt-mirror: invalid Sources format") if @file != 3; - $skipclean{ remove_double_slashes( $path . "/" . $lines{"Directory:"} . "/" . $file[2] ) } = 1; - print FILES_ALL remove_double_slashes( $path . "/" . $lines{"Directory:"} . "/" . $file[2] ) . "\n"; - print FILES_MD5 $file[0] . " " . remove_double_slashes( $path . "/" . $lines{"Directory:"} . "/" . $file[2] ) . "\n"; - if ( need_update( $mirror . "/" . $lines{"Directory:"} . "/" . $file[2], $file[1] ) ) + my $index_hash = $sources_hashes{$hash}; + if ($lines{$index_hash}) { - print FILES_NEW remove_double_slashes( $uri . "/" . $lines{"Directory:"} . "/" . $file[2] ) . "\n"; - add_url_to_download( $uri . "/" . $lines{"Directory:"} . "/" . $file[2], $file[1] ); + foreach ( split( /\n/, $lines{$index_hash} ) ) + { + next if $_ eq ''; + my @file = split; + die("apt-mirror: invalid Sources format") if @file != 3; + my $download_url = $uri . "/" . $lines{"Directory"} . "/" . $file[2]; + my $filename = remove_double_slashes( $path . "/" . $lines{"Directory"} . "/" . $file[2] ); + print { $files_fh{$hash} } $file[0] . " " . ${filename} . "\n"; + + unless ($skipclean{ $filename }) + { + $skipclean{ $filename } = 1; + print { $files_fh{ALL} } ${filename} . "\n"; + if ( need_update( $mirror . "/" . $lines{"Directory"} . "/" . $file[2], $file[1] ) ) + { + print { $files_fh{NEW} } ${download_url} . "\n"; + add_url_to_download( $uri . "/" . $lines{"Directory"} . "/" . $file[2], $file[1], $hash, $hash, $file[0], 0 ); + } + } + } } } } @@ -938,11 +1046,10 @@ clear_stat_cache(); print "]\n\n"; -close FILES_ALL; -close FILES_NEW; -close FILES_MD5; -close FILES_SHA1; -close FILES_SHA256; +foreach my $fh (values %files_fh) +{ + close $fh; +} ###################################################################################### ## Main download diff --git a/mirror.list b/mirror.list index dbd4179..6f37b3e 100644 --- a/mirror.list +++ b/mirror.list @@ -10,6 +10,8 @@ set limit_rate 100m set _tilde 0 # Use --unlink with wget (for use with hardlinked directories) set unlink 1 +# Verify downloaded files checksums and abort if detected corruption +set paranoid 1 set use_proxy off set http_proxy 127.0.0.1:3128 set proxy_user user |