Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/apt-mirror/apt-mirror.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremy T. Bouse <jbouse@users.noreply.github.com>2022-10-17 16:47:56 +0300
committerGitHub <noreply@github.com>2022-10-17 16:47:56 +0300
commit207f8a51c0c7ae7f2fe0ad23242b6976950347df (patch)
tree5984ae1d8729b8ae9bdafb1a0c4c0c749d601a97
parent2258cc160b3e4d1398f94b11e7ab2802795fe970 (diff)
parent0a7160213af172284c4090ed29f4aec45d77e428 (diff)
Merge pull request #156 from chutzimir/download-by-hash
Respect the "Acquire-By-Hash" field if present (download `by-hash`)
-rwxr-xr-xapt-mirror574
1 files changed, 266 insertions, 308 deletions
diff --git a/apt-mirror b/apt-mirror
index 095664c..eff86cb 100755
--- a/apt-mirror
+++ b/apt-mirror
@@ -124,10 +124,18 @@ my %config_variables = (
my @config_binaries = ();
my @config_sources = ();
+my @release_urls;
my @index_urls;
my @childrens = ();
my %skipclean = ();
my %clean_directory = ();
+my @hash_strength = qw(SHA512 SHA256 SHA1 MD5Sum);
+
+# Mapping of files downloaded from a by-hash directory to their canonical locations.
+my %hashsum_to_files = ();
+
+# Mapping of all the checksums for a given canonical filename.
+my %file_to_hashsums;
######################################################################################
## Setting up $config_file variable
@@ -282,6 +290,18 @@ sub download_urls
print "[" . scalar(@childrens) . "]... ";
}
print "\nEnd time: " . localtime() . "\n\n";
+
+ if (scalar keys %hashsum_to_files > 0)
+ {
+ foreach my $hashsum_filename (keys %hashsum_to_files)
+ {
+ foreach my $filename (@{$hashsum_to_files{$hashsum_filename}})
+ {
+ copy_file( $hashsum_filename, $filename );
+ }
+ }
+ }
+
}
## Parse config
@@ -387,7 +407,45 @@ sub remove_double_slashes
sub add_url_to_download
{
my $url = remove_double_slashes(shift);
- $urls_to_download{$url} = shift;
+ my $size = shift;
+ my $strongest_hash = shift;
+ my $hash = shift;
+ my $hashsum = shift;
+
+ my $canonical_filename = $url;
+ $canonical_filename =~ s[^(\w+)://][];
+ $canonical_filename =~ s[~][%7E]g if get_variable("_tilde");
+ $skipclean{$canonical_filename} = 1;
+
+ if ($hashsum)
+ {
+ # If the optional hashsum was passed as an argument
+ # - download the strongest hash only
+ # - make a copy to the canonical location
+ # - make a copy for the other known hash versions
+
+ $url = dirname($url) . "/by-hash/${hash}/${hashsum}";
+
+ my $hashsum_filename = dirname($canonical_filename) . "/by-hash/${hash}/${hashsum}";
+ $skipclean{$hashsum_filename} = 1;
+
+ if ($hash eq $strongest_hash)
+ {
+ # This is the strongest hash, which is the one to download.
+ # Also need to remember to which canonical location it should be linked.
+ $hashsum_to_files{$hashsum_filename} ||= [];
+ push @{$hashsum_to_files{$hashsum_filename}}, $canonical_filename;
+ $urls_to_download{$url} = $size;
+ } else {
+ # We are not going to download using this checksum, but we still
+ # need to know where to put the checksum.
+ $file_to_hashsums{$canonical_filename} ||= [];
+ push @{$file_to_hashsums{$canonical_filename}}, $hashsum_filename;
+ }
+ } else {
+ # Not using by-hash, so download the file only.
+ $urls_to_download{$url} = $size;
+ }
}
foreach (@config_sources)
@@ -397,26 +455,16 @@ foreach (@config_sources)
if (@components)
{
$url = $uri . "/dists/" . $distribution . "/";
-
- add_url_to_download( $url . "InRelease" );
- add_url_to_download( $url . "Release" );
- add_url_to_download( $url . "Release.gpg" );
- foreach (@components)
- {
- add_url_to_download( $url . $_ . "/source/Release" );
- add_url_to_download( $url . $_ . "/source/Sources.gz" );
- add_url_to_download( $url . $_ . "/source/Sources.bz2" );
- add_url_to_download( $url . $_ . "/source/Sources.xz" );
- }
}
else
{
- add_url_to_download( $uri . "/$distribution/Release" );
- add_url_to_download( $uri . "/$distribution/Release.gpg" );
- add_url_to_download( $uri . "/$distribution/Sources.gz" );
- add_url_to_download( $uri . "/$distribution/Sources.bz2" );
- add_url_to_download( $uri . "/$distribution/Sources.xz" );
+ # https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format
+ $url = $uri . "/" . $distribution . "/";
}
+
+ add_url_to_download( $url . "InRelease" );
+ add_url_to_download( $url . "Release" );
+ add_url_to_download( $url . "Release.gpg" );
}
foreach (@config_binaries)
@@ -427,56 +475,25 @@ foreach (@config_binaries)
{
$url = $uri . "/dists/" . $distribution . "/";
- add_url_to_download( $url . "InRelease" );
- add_url_to_download( $url . "Release" );
- add_url_to_download( $url . "Release.gpg" );
- if ( get_variable("_contents") )
- {
- add_url_to_download( $url . "Contents-" . $arch . ".gz" );
- add_url_to_download( $url . "Contents-" . $arch . ".bz2" );
- add_url_to_download( $url . "Contents-" . $arch . ".xz" );
- }
- foreach (@components)
- {
- if ( get_variable("_contents") )
- {
- add_url_to_download( $url . $_ . "/Contents-" . $arch . ".gz" );
- add_url_to_download( $url . $_ . "/Contents-" . $arch . ".bz2" );
- add_url_to_download( $url . $_ . "/Contents-" . $arch . ".xz" );
- }
- add_url_to_download( $url . $_ . "/binary-" . $arch . "/Release" );
- add_url_to_download( $url . $_ . "/binary-" . $arch . "/Packages.gz" );
- add_url_to_download( $url . $_ . "/binary-" . $arch . "/Packages.bz2" );
- add_url_to_download( $url . $_ . "/binary-" . $arch . "/Packages.xz" );
- add_url_to_download( $url . $_ . "/i18n/Index" );
- }
}
else
{
- add_url_to_download( $uri . "/$distribution/Release" );
- add_url_to_download( $uri . "/$distribution/Release.gpg" );
- add_url_to_download( $uri . "/$distribution/Packages.gz" );
- add_url_to_download( $uri . "/$distribution/Packages.bz2" );
- add_url_to_download( $uri . "/$distribution/Packages.xz" );
+ # https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format
+ $url = $uri . "/" . $distribution . "/";
}
-}
-chdir get_variable("skel_path") or die("apt-mirror: can't chdir to skel");
-@index_urls = sort keys %urls_to_download;
-download_urls( "index", @index_urls );
+ add_url_to_download( $url . "InRelease" );
+ add_url_to_download( $url . "Release" );
+ add_url_to_download( $url . "Release.gpg" );
-foreach ( keys %urls_to_download )
-{
- s[^(\w+)://][];
- s[~][%7E]g if get_variable("_tilde");
- $skipclean{$_} = 1;
- $skipclean{$_} = 1 if s[\.gz$][];
- $skipclean{$_} = 1 if s[\.bz2$][];
- $skipclean{$_} = 1 if s[\.xz$][];
}
+chdir get_variable("skel_path") or die("apt-mirror: can't chdir to skel");
+@release_urls = sort keys %urls_to_download;
+download_urls( "release", @release_urls );
+
######################################################################################
-## Translation index download
+## Download all relevant metadata
%urls_to_download = ();
@@ -489,39 +506,115 @@ sub sanitise_uri
return $uri;
}
-sub find_translation_files_in_release
+sub find_metadata_in_release
{
- # Look in the dists/$DIST/Release file for the translation files that belong
- # to the given component.
+ # Look in the Release file for any files we need to download
+ my ( $arch, $uri, $distribution, @components ) = @_;
- my $dist_uri = shift;
- my $component = shift;
my ( $release_uri, $release_path, $line ) = '';
+ my $component_regex = undef;
+ my $arch_regex = "(?:${arch}|all)";
+ my $compressed_extension_regex = '(?:\.(?:gz|bz2|xz))$';
+ my $dist_uri;
+ my $hash_type_regex = "(?:" . join("|", @hash_strength) . ")";
+
+ if (@components)
+ {
+ $dist_uri = remove_double_slashes($uri . "/dists/" . $distribution . "/");
+ $component_regex = "(?:" . join("|", @components) . ")";
+ }
+ else {
+ $dist_uri = remove_double_slashes($uri . "/" . $distribution . "/");
+ }
- $release_uri = $dist_uri . "Release";
- $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri);
+ my $stream;
+ foreach my $release_filename ("InRelease", "Release")
+ {
+ $release_uri = $dist_uri . $release_filename;
+ $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri);
- unless ( open STREAM, "<$release_path" )
+ last if ( open $stream, "<", $release_path);
+ $stream = undef;
+ }
+
+ unless ( $stream )
{
- warn( "Failed to open Release file from " . $release_uri );
- return;
+ warn( "Failed to find InRelease or Release in " . get_variable("skel_path") . "/" . sanitise_uri($dist_uri) );
+ return 0;
}
- my $checksums = 0;
- while ( $line = <STREAM> )
+
+ my $hash = undef;
+ my %avaiable_hashes = ();
+ my $acquire_by_hash = 0;
+ my @parts_to_download = ();
+ while ( $line = <$stream> )
{
chomp $line;
- if ($checksums)
+ if ($hash)
{
if ( $line =~ /^ +(.*)$/ )
{
my @parts = split( / +/, $1 );
if ( @parts == 3 )
{
- my ( $sha1, $size, $filename ) = @parts;
- if ( $filename =~ m{^$component/i18n/Translation-[^./]*\.(bz2|xz)$} )
+ my ( $hashsum, $size, $filename ) = @parts;
+ push @parts, $hash;
+ if ($arch eq "source")
{
- add_url_to_download( $dist_uri . $filename, $size );
+ if ($component_regex)
+ {
+ # Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format
+ if (
+ (
+ $filename =~ m{^${component_regex}/source/Sources${compressed_extension_regex}}
+ ) or (
+ $filename =~ m{^${component_regex}/Contents-source${compressed_extension_regex}}
+ )
+ )
+ {
+ push @parts_to_download, \@parts;
+ }
+ } else {
+ # Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format
+ if ($filename =~ m{^Sources${compressed_extension_regex}}
+ ) {
+ push @parts_to_download, \@parts;
+ }
+ }
+ } else {
+ if ($component_regex)
+ {
+ # Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format
+ if (
+ (
+ $filename =~ m{^Contents-${arch_regex}${compressed_extension_regex}}
+ ) or (
+ $filename =~ m{^${component_regex}/Contents-${arch_regex}${compressed_extension_regex}}
+ ) or (
+ $filename =~ m{^${component_regex}/binary-${arch_regex}/Packages${compressed_extension_regex}}
+ ) or (
+ $filename =~ m{^${component_regex}/binary-${arch_regex}/Release$}
+ ) or (
+ $filename =~ m{^${component_regex}/cnf/Commands-${arch_regex}${compressed_extension_regex}}
+ ) or (
+ $filename =~ m{^${component_regex}/dep11/Components-${arch_regex}.*${compressed_extension_regex}}
+ ) or (
+ $filename =~ m{^${component_regex}/dep11/icons-.*${compressed_extension_regex}}
+ ) or (
+ $filename =~ m{^${component_regex}/i18n/Translation-.*${compressed_extension_regex}}
+ )
+ )
+ {
+ push @parts_to_download, \@parts;
+ }
+ } else {
+ # Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format
+ if ($filename =~ m{^Packages${compressed_extension_regex}})
+ {
+ push @parts_to_download, \@parts;
+ }
+ }
}
}
else
@@ -531,286 +624,129 @@ sub find_translation_files_in_release
}
else
{
- $checksums = 0;
+ $hash = undef;
}
}
- if ( not $checksums )
+ if ( not $hash )
{
- if ( $line eq "SHA256:" )
+ if ( $line =~ /^(${hash_type_regex}):$/ )
{
- $checksums = 1;
+ $hash = $1;
+ $avaiable_hashes{$hash} = 1;
+ }
+ elsif ( $line eq "Acquire-By-Hash: yes" )
+ {
+ $acquire_by_hash = 1;
}
}
}
-}
-
-sub process_translation_index
-{
- # Extract all translation files from the dists/$DIST/$COMPONENT/i18n/Index
- # file. Fall back to parsing dists/$DIST/Release if i18n/Index is not found.
-
- my $dist_uri = remove_double_slashes(shift);
- my $component = shift;
- my ( $base_uri, $index_uri, $index_path, $line ) = '';
+ close $stream;
- $base_uri = $dist_uri . $component . "/i18n/";
- $index_uri = $base_uri . "Index";
- $index_path = get_variable("skel_path") . "/" . sanitise_uri($index_uri);
-
- unless ( open STREAM, "<$index_path" )
+ my $strongest_hash;
+ if ($acquire_by_hash)
{
- find_translation_files_in_release( $dist_uri, $component );
- return;
- }
-
- my $checksums = 0;
- while ( $line = <STREAM> )
- {
- chomp $line;
- if ($checksums)
+ foreach (@hash_strength)
{
- if ( $line =~ /^ +(.*)$/ )
- {
- my @parts = split( / +/, $1 );
- if ( @parts == 3 )
- {
- my ( $sha1, $size, $filename ) = @parts;
- add_url_to_download( $base_uri . $filename, $size );
- }
- else
- {
- warn("Malformed checksum line \"$1\" in $index_uri");
- }
- }
- else
+ if ($avaiable_hashes{$_})
{
- $checksums = 0;
+ $strongest_hash = $_;
+ last;
}
}
- if ( not $checksums )
+ unless ($strongest_hash)
{
- if ( $line eq "SHA256:" or $line eq "SHA1:" or $line eq "MD5Sum:" )
- {
- $checksums = 1;
- }
+ warn("Cannot find a supported hash in $release_uri, will download from canonical locations.");
+ $acquire_by_hash = 0;
}
}
- close STREAM;
-}
-
-print "Processing translation indexes: [";
-
-foreach (@config_binaries)
-{
- my ( $arch, $uri, $distribution, @components ) = @{$_};
- print "T";
- if (@components)
+ foreach (@parts_to_download)
{
- $url = $uri . "/dists/" . $distribution . "/";
-
- my $component;
- foreach $component (@components)
+ my ( $hashsum, $size, $filename, $hash ) = @{$_};
+ if ($acquire_by_hash)
{
- process_translation_index( $url, $component );
+ add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum );
}
- }
-}
-
-print "]\n\n";
-
-push( @index_urls, sort keys %urls_to_download );
-download_urls( "translation", sort keys %urls_to_download );
-
-foreach ( keys %urls_to_download )
-{
- s[^(\w+)://][];
- s[~][%7E]g if get_variable("_tilde");
- $skipclean{$_} = 1;
-}
-
-######################################################################################
-## DEP-11 index download
-
-%urls_to_download = ();
-
-sub find_dep11_files_in_release
-{
- # Look in the dists/$DIST/Release file for the DEP-11 files that belong
- # to the given component and architecture.
-
- my $dist_uri = shift;
- my $component = shift;
- my $arch = shift;
- my ( $release_uri, $release_path, $line ) = '';
-
- $release_uri = $dist_uri . "Release";
- $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri);
-
- unless ( open STREAM, "<$release_path" )
- {
- warn( "Failed to open Release file from " . $release_uri );
- return;
- }
-
- my $checksums = 0;
- while ( $line = <STREAM> )
- {
- chomp $line;
- if ($checksums)
- {
- if ( $line =~ /^ +(.*)$/ )
- {
- my @parts = split( / +/, $1 );
- if ( @parts == 3 )
- {
- my ( $sha1, $size, $filename ) = @parts;
- if ( $filename =~ m{^$component/dep11/(Components-${arch}\.yml|icons-[^./]+\.tar)\.(gz|bz2|xz)$} )
- {
- add_url_to_download( $dist_uri . $filename, $size );
- }
- }
- else
- {
- warn("Malformed checksum line \"$1\" in $release_uri");
- }
- }
- else
- {
- $checksums = 0;
- }
- }
- if ( not $checksums )
+ else
{
- if ( $line eq "SHA256:" )
- {
- $checksums = 1;
- }
+ add_url_to_download( $dist_uri . $filename, $size );
}
}
+ return 1;
}
-print "Processing DEP-11 indexes: [";
-
+print "Processing metadata files from releases [";
foreach (@config_binaries)
{
my ( $arch, $uri, $distribution, @components ) = @{$_};
- print "D";
- if (@components)
- {
- $url = $uri . "/dists/" . $distribution . "/";
-
- my $component;
- foreach $component (@components)
- {
- find_dep11_files_in_release( $url, $component, $arch );
- }
- }
-}
-
-print "]\n\n";
-
-push( @index_urls, sort keys %urls_to_download );
-download_urls( "dep11", sort keys %urls_to_download );
-
-foreach ( keys %urls_to_download )
-{
- s[^(\w+)://][];
- s[~][%7E]g if get_variable("_tilde");
- $skipclean{$_} = 1;
-}
-
-######################################################################################
-## by-hash SHA256 files download
-
-%urls_to_download = ();
-
-sub find_by_hash_sha256_files_in_release
-{
- # Look in the dists/$DIST/Release file for the by-hash SHA256 files that belong
- # to the given component and architecture.
-
- my $dist_uri = shift;
- my $component = shift;
- my $arch = shift;
- my ( $release_uri, $release_path, $line ) = '';
-
- $release_uri = $dist_uri . "Release";
- $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri);
-
- unless ( open STREAM, "<$release_path" )
+ print "M";
+ unless (find_metadata_in_release( $arch, $uri, $distribution, @components))
{
- warn( "Failed to open Release file from " . $release_uri );
- return;
- }
-
- my $checksums = 0;
- while ( $line = <STREAM> )
- {
- chomp $line;
- if ($checksums)
+ # Insecure repo with no release file - try to get the well known indices
+ foreach my $file_extension (".gz", ".xz", ".bz2", "")
{
- if ( $line =~ /^ +(.*)$/ )
+ if (@components)
{
- my @parts = split( / +/, $1 );
- if ( @parts == 3 )
+ # Debian repo
+ foreach my $component (@components)
{
- my ( $sha256, $size, $filename ) = @parts;
- my $dirname = dirname($filename);
- my $sha256_filename = '/'.$dirname.'/by-hash/SHA256/'.$sha256;
+ foreach my $path (
+ "/dists/${distribution}/${component}/binary-${arch}/Packages",
+ "/dists/${distribution}/${component}/binary-all/Packages",
+ "/dists/${distribution}/${component}/Contents-${arch}",
+ "/dists/${distribution}/${component}/Contents-all",
+ "/dists/${distribution}/Contents-${arch}",
+ "/dists/${distribution}/Contents-all",
+ )
{
- add_url_to_download( $dist_uri . $sha256_filename );
+ add_url_to_download( "${uri}/${path}${file_extension}" );
}
}
- else
+ } else {
+ # Flat repo
+ foreach my $path (
+ "${distribution}/Packages",
+ "${distribution}/Contents-${arch}",
+ "${distribution}/Contents-all",
+ )
{
- warn("Malformed checksum line \"$1\" in $release_uri");
+ add_url_to_download( "${uri}/${path}${file_extension}" );
}
}
- else
- {
- $checksums = 0;
- }
- }
- if ( not $checksums )
- {
- if ( $line eq "SHA256:" )
- {
- $checksums = 1;
- }
}
}
}
-print "Processing SHA256 by-hash files [";
-
-foreach (@config_binaries)
+foreach (@config_sources)
{
- my ( $arch, $uri, $distribution, @components ) = @{$_};
- print "D";
- if (@components)
+ my ( $uri, $distribution, @components ) = @{$_};
+ print "M";
+ unless (find_metadata_in_release( "source", $uri, $distribution, @components))
{
- $url = $uri . "/dists/" . $distribution . "/";
-
- my $component;
- foreach $component (@components)
+ # Insecure repo with no release file - try to get the well known indices
+ foreach my $file_extension (".gz", ".xz", ".bz2", "")
{
- find_by_hash_sha256_files_in_release( $url, $component, $arch );
+ if (@components)
+ {
+ # Debian repo
+ foreach my $path (
+ "${distribution}/source/Sources",
+ "${distribution}/Contents-source",
+ )
+ {
+ add_url_to_download( "${uri}/${path}${file_extension}" );
+ }
+ } else {
+ # Flat repo
+ add_url_to_download( "${uri}/${distribution}/Sources${file_extension}" );
+ }
}
}
}
-
print "]\n\n";
-push( @index_urls, sort keys %urls_to_download );
-download_urls( "by-hash-SHA256", sort keys %urls_to_download );
-
-foreach ( keys %urls_to_download )
-{
- s[^(\w+)://][];
- s[~][%7E]g if get_variable("_tilde");
- $skipclean{$_} = 1;
-}
+@index_urls = sort keys %urls_to_download;
+download_urls( "index", @index_urls );
######################################################################################
## Main download preparations
@@ -867,6 +803,7 @@ sub process_index
{
my $uri = shift;
my $index = shift;
+ my $optional = shift;
my ( $path, $package, $mirror, $files ) = '';
$path = sanitise_uri($uri);
@@ -888,6 +825,10 @@ sub process_index
unless ( open STREAM, "<$path/$index" )
{
+ if ($optional)
+ {
+ return;
+ }
warn("apt-mirror: can't open index $path/$index in process_index");
return;
}
@@ -967,6 +908,7 @@ foreach (@config_binaries)
foreach $component (@components)
{
process_index( $uri, "/dists/$distribution/$component/binary-$arch/Packages" );
+ process_index( $uri, "/dists/$distribution/$component/binary-all/Packages", 1 );
}
}
else
@@ -1024,13 +966,29 @@ sub copy_file
utime( $atime, $mtime, $to ) or die("apt-mirror: can't utime $to");
}
-foreach (@index_urls)
+foreach (@release_urls, @index_urls)
{
die("apt-mirror: invalid url in index_urls") unless s[^(\w+)://][];
copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") );
- copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") ) if (s/\.gz$//);
- copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") ) if (s/\.bz2$//);
- copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") ) if (s/\.xz$//);
+
+ my $sanitized_uri = sanitise_uri($_);
+
+ # If we downloaded any files from a checksum location, now is the time to
+ # populate the canonical filename.
+ if ($hashsum_to_files{$sanitized_uri})
+ {
+ foreach my $filename (@{$hashsum_to_files{$sanitized_uri}})
+ {
+ copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $filename );
+ if ($file_to_hashsums{$filename})
+ {
+ foreach my $hashsum_filename (@{$file_to_hashsums{$filename}})
+ {
+ copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $hashsum_filename );
+ }
+ }
+ }
+ }
}
######################################################################################