diff options
author | Jeremy T. Bouse <jbouse@users.noreply.github.com> | 2022-10-17 16:47:56 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-17 16:47:56 +0300 |
commit | 207f8a51c0c7ae7f2fe0ad23242b6976950347df (patch) | |
tree | 5984ae1d8729b8ae9bdafb1a0c4c0c749d601a97 | |
parent | 2258cc160b3e4d1398f94b11e7ab2802795fe970 (diff) | |
parent | 0a7160213af172284c4090ed29f4aec45d77e428 (diff) |
Merge pull request #156 from chutzimir/download-by-hash
Respect the "Acquire-By-Hash" field if present (download `by-hash`)
-rwxr-xr-x | apt-mirror | 574 |
1 files changed, 266 insertions, 308 deletions
@@ -124,10 +124,18 @@ my %config_variables = ( my @config_binaries = (); my @config_sources = (); +my @release_urls; my @index_urls; my @childrens = (); my %skipclean = (); my %clean_directory = (); +my @hash_strength = qw(SHA512 SHA256 SHA1 MD5Sum); + +# Mapping of files downloaded from a by-hash directory to their canonical locations. +my %hashsum_to_files = (); + +# Mapping of all the checksums for a given canonical filename. +my %file_to_hashsums; ###################################################################################### ## Setting up $config_file variable @@ -282,6 +290,18 @@ sub download_urls print "[" . scalar(@childrens) . "]... "; } print "\nEnd time: " . localtime() . "\n\n"; + + if (scalar keys %hashsum_to_files > 0) + { + foreach my $hashsum_filename (keys %hashsum_to_files) + { + foreach my $filename (@{$hashsum_to_files{$hashsum_filename}}) + { + copy_file( $hashsum_filename, $filename ); + } + } + } + } ## Parse config @@ -387,7 +407,45 @@ sub remove_double_slashes sub add_url_to_download { my $url = remove_double_slashes(shift); - $urls_to_download{$url} = shift; + my $size = shift; + my $strongest_hash = shift; + my $hash = shift; + my $hashsum = shift; + + my $canonical_filename = $url; + $canonical_filename =~ s[^(\w+)://][]; + $canonical_filename =~ s[~][%7E]g if get_variable("_tilde"); + $skipclean{$canonical_filename} = 1; + + if ($hashsum) + { + # If the optional hashsum was passed as an argument + # - download the strongest hash only + # - make a copy to the canonical location + # - make a copy for the other known hash versions + + $url = dirname($url) . "/by-hash/${hash}/${hashsum}"; + + my $hashsum_filename = dirname($canonical_filename) . "/by-hash/${hash}/${hashsum}"; + $skipclean{$hashsum_filename} = 1; + + if ($hash eq $strongest_hash) + { + # This is the strongest hash, which is the one to download. + # Also need to remember to which canonical location it should be linked. + $hashsum_to_files{$hashsum_filename} ||= []; + push @{$hashsum_to_files{$hashsum_filename}}, $canonical_filename; + $urls_to_download{$url} = $size; + } else { + # We are not going to download using this checksum, but we still + # need to know where to put the checksum. + $file_to_hashsums{$canonical_filename} ||= []; + push @{$file_to_hashsums{$canonical_filename}}, $hashsum_filename; + } + } else { + # Not using by-hash, so download the file only. + $urls_to_download{$url} = $size; + } } foreach (@config_sources) @@ -397,26 +455,16 @@ foreach (@config_sources) if (@components) { $url = $uri . "/dists/" . $distribution . "/"; - - add_url_to_download( $url . "InRelease" ); - add_url_to_download( $url . "Release" ); - add_url_to_download( $url . "Release.gpg" ); - foreach (@components) - { - add_url_to_download( $url . $_ . "/source/Release" ); - add_url_to_download( $url . $_ . "/source/Sources.gz" ); - add_url_to_download( $url . $_ . "/source/Sources.bz2" ); - add_url_to_download( $url . $_ . "/source/Sources.xz" ); - } } else { - add_url_to_download( $uri . "/$distribution/Release" ); - add_url_to_download( $uri . "/$distribution/Release.gpg" ); - add_url_to_download( $uri . "/$distribution/Sources.gz" ); - add_url_to_download( $uri . "/$distribution/Sources.bz2" ); - add_url_to_download( $uri . "/$distribution/Sources.xz" ); + # https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format + $url = $uri . "/" . $distribution . "/"; } + + add_url_to_download( $url . "InRelease" ); + add_url_to_download( $url . "Release" ); + add_url_to_download( $url . "Release.gpg" ); } foreach (@config_binaries) @@ -427,56 +475,25 @@ foreach (@config_binaries) { $url = $uri . "/dists/" . $distribution . "/"; - add_url_to_download( $url . "InRelease" ); - add_url_to_download( $url . "Release" ); - add_url_to_download( $url . "Release.gpg" ); - if ( get_variable("_contents") ) - { - add_url_to_download( $url . "Contents-" . $arch . ".gz" ); - add_url_to_download( $url . "Contents-" . $arch . ".bz2" ); - add_url_to_download( $url . "Contents-" . $arch . ".xz" ); - } - foreach (@components) - { - if ( get_variable("_contents") ) - { - add_url_to_download( $url . $_ . "/Contents-" . $arch . ".gz" ); - add_url_to_download( $url . $_ . "/Contents-" . $arch . ".bz2" ); - add_url_to_download( $url . $_ . "/Contents-" . $arch . ".xz" ); - } - add_url_to_download( $url . $_ . "/binary-" . $arch . "/Release" ); - add_url_to_download( $url . $_ . "/binary-" . $arch . "/Packages.gz" ); - add_url_to_download( $url . $_ . "/binary-" . $arch . "/Packages.bz2" ); - add_url_to_download( $url . $_ . "/binary-" . $arch . "/Packages.xz" ); - add_url_to_download( $url . $_ . "/i18n/Index" ); - } } else { - add_url_to_download( $uri . "/$distribution/Release" ); - add_url_to_download( $uri . "/$distribution/Release.gpg" ); - add_url_to_download( $uri . "/$distribution/Packages.gz" ); - add_url_to_download( $uri . "/$distribution/Packages.bz2" ); - add_url_to_download( $uri . "/$distribution/Packages.xz" ); + # https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format + $url = $uri . "/" . $distribution . "/"; } -} -chdir get_variable("skel_path") or die("apt-mirror: can't chdir to skel"); -@index_urls = sort keys %urls_to_download; -download_urls( "index", @index_urls ); + add_url_to_download( $url . "InRelease" ); + add_url_to_download( $url . "Release" ); + add_url_to_download( $url . "Release.gpg" ); -foreach ( keys %urls_to_download ) -{ - s[^(\w+)://][]; - s[~][%7E]g if get_variable("_tilde"); - $skipclean{$_} = 1; - $skipclean{$_} = 1 if s[\.gz$][]; - $skipclean{$_} = 1 if s[\.bz2$][]; - $skipclean{$_} = 1 if s[\.xz$][]; } +chdir get_variable("skel_path") or die("apt-mirror: can't chdir to skel"); +@release_urls = sort keys %urls_to_download; +download_urls( "release", @release_urls ); + ###################################################################################### -## Translation index download +## Download all relevant metadata %urls_to_download = (); @@ -489,39 +506,115 @@ sub sanitise_uri return $uri; } -sub find_translation_files_in_release +sub find_metadata_in_release { - # Look in the dists/$DIST/Release file for the translation files that belong - # to the given component. + # Look in the Release file for any files we need to download + my ( $arch, $uri, $distribution, @components ) = @_; - my $dist_uri = shift; - my $component = shift; my ( $release_uri, $release_path, $line ) = ''; + my $component_regex = undef; + my $arch_regex = "(?:${arch}|all)"; + my $compressed_extension_regex = '(?:\.(?:gz|bz2|xz))$'; + my $dist_uri; + my $hash_type_regex = "(?:" . join("|", @hash_strength) . ")"; + + if (@components) + { + $dist_uri = remove_double_slashes($uri . "/dists/" . $distribution . "/"); + $component_regex = "(?:" . join("|", @components) . ")"; + } + else { + $dist_uri = remove_double_slashes($uri . "/" . $distribution . "/"); + } - $release_uri = $dist_uri . "Release"; - $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri); + my $stream; + foreach my $release_filename ("InRelease", "Release") + { + $release_uri = $dist_uri . $release_filename; + $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri); - unless ( open STREAM, "<$release_path" ) + last if ( open $stream, "<", $release_path); + $stream = undef; + } + + unless ( $stream ) { - warn( "Failed to open Release file from " . $release_uri ); - return; + warn( "Failed to find InRelease or Release in " . get_variable("skel_path") . "/" . sanitise_uri($dist_uri) ); + return 0; } - my $checksums = 0; - while ( $line = <STREAM> ) + + my $hash = undef; + my %avaiable_hashes = (); + my $acquire_by_hash = 0; + my @parts_to_download = (); + while ( $line = <$stream> ) { chomp $line; - if ($checksums) + if ($hash) { if ( $line =~ /^ +(.*)$/ ) { my @parts = split( / +/, $1 ); if ( @parts == 3 ) { - my ( $sha1, $size, $filename ) = @parts; - if ( $filename =~ m{^$component/i18n/Translation-[^./]*\.(bz2|xz)$} ) + my ( $hashsum, $size, $filename ) = @parts; + push @parts, $hash; + if ($arch eq "source") { - add_url_to_download( $dist_uri . $filename, $size ); + if ($component_regex) + { + # Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format + if ( + ( + $filename =~ m{^${component_regex}/source/Sources${compressed_extension_regex}} + ) or ( + $filename =~ m{^${component_regex}/Contents-source${compressed_extension_regex}} + ) + ) + { + push @parts_to_download, \@parts; + } + } else { + # Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format + if ($filename =~ m{^Sources${compressed_extension_regex}} + ) { + push @parts_to_download, \@parts; + } + } + } else { + if ($component_regex) + { + # Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format + if ( + ( + $filename =~ m{^Contents-${arch_regex}${compressed_extension_regex}} + ) or ( + $filename =~ m{^${component_regex}/Contents-${arch_regex}${compressed_extension_regex}} + ) or ( + $filename =~ m{^${component_regex}/binary-${arch_regex}/Packages${compressed_extension_regex}} + ) or ( + $filename =~ m{^${component_regex}/binary-${arch_regex}/Release$} + ) or ( + $filename =~ m{^${component_regex}/cnf/Commands-${arch_regex}${compressed_extension_regex}} + ) or ( + $filename =~ m{^${component_regex}/dep11/Components-${arch_regex}.*${compressed_extension_regex}} + ) or ( + $filename =~ m{^${component_regex}/dep11/icons-.*${compressed_extension_regex}} + ) or ( + $filename =~ m{^${component_regex}/i18n/Translation-.*${compressed_extension_regex}} + ) + ) + { + push @parts_to_download, \@parts; + } + } else { + # Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format + if ($filename =~ m{^Packages${compressed_extension_regex}}) + { + push @parts_to_download, \@parts; + } + } } } else @@ -531,286 +624,129 @@ sub find_translation_files_in_release } else { - $checksums = 0; + $hash = undef; } } - if ( not $checksums ) + if ( not $hash ) { - if ( $line eq "SHA256:" ) + if ( $line =~ /^(${hash_type_regex}):$/ ) { - $checksums = 1; + $hash = $1; + $avaiable_hashes{$hash} = 1; + } + elsif ( $line eq "Acquire-By-Hash: yes" ) + { + $acquire_by_hash = 1; } } } -} - -sub process_translation_index -{ - # Extract all translation files from the dists/$DIST/$COMPONENT/i18n/Index - # file. Fall back to parsing dists/$DIST/Release if i18n/Index is not found. - - my $dist_uri = remove_double_slashes(shift); - my $component = shift; - my ( $base_uri, $index_uri, $index_path, $line ) = ''; + close $stream; - $base_uri = $dist_uri . $component . "/i18n/"; - $index_uri = $base_uri . "Index"; - $index_path = get_variable("skel_path") . "/" . sanitise_uri($index_uri); - - unless ( open STREAM, "<$index_path" ) + my $strongest_hash; + if ($acquire_by_hash) { - find_translation_files_in_release( $dist_uri, $component ); - return; - } - - my $checksums = 0; - while ( $line = <STREAM> ) - { - chomp $line; - if ($checksums) + foreach (@hash_strength) { - if ( $line =~ /^ +(.*)$/ ) - { - my @parts = split( / +/, $1 ); - if ( @parts == 3 ) - { - my ( $sha1, $size, $filename ) = @parts; - add_url_to_download( $base_uri . $filename, $size ); - } - else - { - warn("Malformed checksum line \"$1\" in $index_uri"); - } - } - else + if ($avaiable_hashes{$_}) { - $checksums = 0; + $strongest_hash = $_; + last; } } - if ( not $checksums ) + unless ($strongest_hash) { - if ( $line eq "SHA256:" or $line eq "SHA1:" or $line eq "MD5Sum:" ) - { - $checksums = 1; - } + warn("Cannot find a supported hash in $release_uri, will download from canonical locations."); + $acquire_by_hash = 0; } } - close STREAM; -} - -print "Processing translation indexes: ["; - -foreach (@config_binaries) -{ - my ( $arch, $uri, $distribution, @components ) = @{$_}; - print "T"; - if (@components) + foreach (@parts_to_download) { - $url = $uri . "/dists/" . $distribution . "/"; - - my $component; - foreach $component (@components) + my ( $hashsum, $size, $filename, $hash ) = @{$_}; + if ($acquire_by_hash) { - process_translation_index( $url, $component ); + add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum ); } - } -} - -print "]\n\n"; - -push( @index_urls, sort keys %urls_to_download ); -download_urls( "translation", sort keys %urls_to_download ); - -foreach ( keys %urls_to_download ) -{ - s[^(\w+)://][]; - s[~][%7E]g if get_variable("_tilde"); - $skipclean{$_} = 1; -} - -###################################################################################### -## DEP-11 index download - -%urls_to_download = (); - -sub find_dep11_files_in_release -{ - # Look in the dists/$DIST/Release file for the DEP-11 files that belong - # to the given component and architecture. - - my $dist_uri = shift; - my $component = shift; - my $arch = shift; - my ( $release_uri, $release_path, $line ) = ''; - - $release_uri = $dist_uri . "Release"; - $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri); - - unless ( open STREAM, "<$release_path" ) - { - warn( "Failed to open Release file from " . $release_uri ); - return; - } - - my $checksums = 0; - while ( $line = <STREAM> ) - { - chomp $line; - if ($checksums) - { - if ( $line =~ /^ +(.*)$/ ) - { - my @parts = split( / +/, $1 ); - if ( @parts == 3 ) - { - my ( $sha1, $size, $filename ) = @parts; - if ( $filename =~ m{^$component/dep11/(Components-${arch}\.yml|icons-[^./]+\.tar)\.(gz|bz2|xz)$} ) - { - add_url_to_download( $dist_uri . $filename, $size ); - } - } - else - { - warn("Malformed checksum line \"$1\" in $release_uri"); - } - } - else - { - $checksums = 0; - } - } - if ( not $checksums ) + else { - if ( $line eq "SHA256:" ) - { - $checksums = 1; - } + add_url_to_download( $dist_uri . $filename, $size ); } } + return 1; } -print "Processing DEP-11 indexes: ["; - +print "Processing metadata files from releases ["; foreach (@config_binaries) { my ( $arch, $uri, $distribution, @components ) = @{$_}; - print "D"; - if (@components) - { - $url = $uri . "/dists/" . $distribution . "/"; - - my $component; - foreach $component (@components) - { - find_dep11_files_in_release( $url, $component, $arch ); - } - } -} - -print "]\n\n"; - -push( @index_urls, sort keys %urls_to_download ); -download_urls( "dep11", sort keys %urls_to_download ); - -foreach ( keys %urls_to_download ) -{ - s[^(\w+)://][]; - s[~][%7E]g if get_variable("_tilde"); - $skipclean{$_} = 1; -} - -###################################################################################### -## by-hash SHA256 files download - -%urls_to_download = (); - -sub find_by_hash_sha256_files_in_release -{ - # Look in the dists/$DIST/Release file for the by-hash SHA256 files that belong - # to the given component and architecture. - - my $dist_uri = shift; - my $component = shift; - my $arch = shift; - my ( $release_uri, $release_path, $line ) = ''; - - $release_uri = $dist_uri . "Release"; - $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri); - - unless ( open STREAM, "<$release_path" ) + print "M"; + unless (find_metadata_in_release( $arch, $uri, $distribution, @components)) { - warn( "Failed to open Release file from " . $release_uri ); - return; - } - - my $checksums = 0; - while ( $line = <STREAM> ) - { - chomp $line; - if ($checksums) + # Insecure repo with no release file - try to get the well known indices + foreach my $file_extension (".gz", ".xz", ".bz2", "") { - if ( $line =~ /^ +(.*)$/ ) + if (@components) { - my @parts = split( / +/, $1 ); - if ( @parts == 3 ) + # Debian repo + foreach my $component (@components) { - my ( $sha256, $size, $filename ) = @parts; - my $dirname = dirname($filename); - my $sha256_filename = '/'.$dirname.'/by-hash/SHA256/'.$sha256; + foreach my $path ( + "/dists/${distribution}/${component}/binary-${arch}/Packages", + "/dists/${distribution}/${component}/binary-all/Packages", + "/dists/${distribution}/${component}/Contents-${arch}", + "/dists/${distribution}/${component}/Contents-all", + "/dists/${distribution}/Contents-${arch}", + "/dists/${distribution}/Contents-all", + ) { - add_url_to_download( $dist_uri . $sha256_filename ); + add_url_to_download( "${uri}/${path}${file_extension}" ); } } - else + } else { + # Flat repo + foreach my $path ( + "${distribution}/Packages", + "${distribution}/Contents-${arch}", + "${distribution}/Contents-all", + ) { - warn("Malformed checksum line \"$1\" in $release_uri"); + add_url_to_download( "${uri}/${path}${file_extension}" ); } } - else - { - $checksums = 0; - } - } - if ( not $checksums ) - { - if ( $line eq "SHA256:" ) - { - $checksums = 1; - } } } } -print "Processing SHA256 by-hash files ["; - -foreach (@config_binaries) +foreach (@config_sources) { - my ( $arch, $uri, $distribution, @components ) = @{$_}; - print "D"; - if (@components) + my ( $uri, $distribution, @components ) = @{$_}; + print "M"; + unless (find_metadata_in_release( "source", $uri, $distribution, @components)) { - $url = $uri . "/dists/" . $distribution . "/"; - - my $component; - foreach $component (@components) + # Insecure repo with no release file - try to get the well known indices + foreach my $file_extension (".gz", ".xz", ".bz2", "") { - find_by_hash_sha256_files_in_release( $url, $component, $arch ); + if (@components) + { + # Debian repo + foreach my $path ( + "${distribution}/source/Sources", + "${distribution}/Contents-source", + ) + { + add_url_to_download( "${uri}/${path}${file_extension}" ); + } + } else { + # Flat repo + add_url_to_download( "${uri}/${distribution}/Sources${file_extension}" ); + } } } } - print "]\n\n"; -push( @index_urls, sort keys %urls_to_download ); -download_urls( "by-hash-SHA256", sort keys %urls_to_download ); - -foreach ( keys %urls_to_download ) -{ - s[^(\w+)://][]; - s[~][%7E]g if get_variable("_tilde"); - $skipclean{$_} = 1; -} +@index_urls = sort keys %urls_to_download; +download_urls( "index", @index_urls ); ###################################################################################### ## Main download preparations @@ -867,6 +803,7 @@ sub process_index { my $uri = shift; my $index = shift; + my $optional = shift; my ( $path, $package, $mirror, $files ) = ''; $path = sanitise_uri($uri); @@ -888,6 +825,10 @@ sub process_index unless ( open STREAM, "<$path/$index" ) { + if ($optional) + { + return; + } warn("apt-mirror: can't open index $path/$index in process_index"); return; } @@ -967,6 +908,7 @@ foreach (@config_binaries) foreach $component (@components) { process_index( $uri, "/dists/$distribution/$component/binary-$arch/Packages" ); + process_index( $uri, "/dists/$distribution/$component/binary-all/Packages", 1 ); } } else @@ -1024,13 +966,29 @@ sub copy_file utime( $atime, $mtime, $to ) or die("apt-mirror: can't utime $to"); } -foreach (@index_urls) +foreach (@release_urls, @index_urls) { die("apt-mirror: invalid url in index_urls") unless s[^(\w+)://][]; copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") ); - copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") ) if (s/\.gz$//); - copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") ) if (s/\.bz2$//); - copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") ) if (s/\.xz$//); + + my $sanitized_uri = sanitise_uri($_); + + # If we downloaded any files from a checksum location, now is the time to + # populate the canonical filename. + if ($hashsum_to_files{$sanitized_uri}) + { + foreach my $filename (@{$hashsum_to_files{$sanitized_uri}}) + { + copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $filename ); + if ($file_to_hashsums{$filename}) + { + foreach my $hashsum_filename (@{$file_to_hashsums{$filename}}) + { + copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $hashsum_filename ); + } + } + } + } } ###################################################################################### |