diff options
author | Georgi Georgiev <chutz@gg3.net> | 2022-05-17 08:06:13 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-17 08:06:13 +0300 |
commit | f4c9f79e8d37a89abe9a9baea6616d8ec1e0f73f (patch) | |
tree | 462022c03adf04dbdf6479d2a2095102f8490db2 | |
parent | da97e701d5188316cc5d2cc7f4ffada47a880aba (diff) |
Refactor metadata parsing to reduce copy-paste (#20)
* Parse the release files for all relevant metadata in one shot
There is no need to do the same thing over and over. Parse the Release
file once, and pick only the files we need to mirror.
* Arch all is always required
Even when we mirror only a specific arch, the "all" architecture
packages are required for that arch.
Documented at https://wiki.debian.org/DebianRepository/Format#Architectures
Real example: https://apt.puppetlabs.com/dists/focal/Release
deb [arch=amd64] https://apt.puppetlabs.com/ focal puppet
fixes Stifler6996/apt-mirror#14
-rwxr-xr-x | apt-mirror | 245 |
1 files changed, 97 insertions, 148 deletions
@@ -414,26 +414,17 @@ foreach (@config_sources) if (@components) { $url = $uri . "/dists/" . $distribution . "/"; - - add_url_to_download( $url . "InRelease" ); - add_url_to_download( $url . "Release" ); - add_url_to_download( $url . "Release.gpg" ); - foreach (@components) - { - add_url_to_download( $url . $_ . "/source/Release" ); - add_url_to_download( $url . $_ . "/source/Sources.gz" ); - add_url_to_download( $url . $_ . "/source/Sources.bz2" ); - add_url_to_download( $url . $_ . "/source/Sources.xz" ); - } } else { - add_url_to_download( $uri . "/$distribution/Release" ); - add_url_to_download( $uri . "/$distribution/Release.gpg" ); - add_url_to_download( $uri . "/$distribution/Sources.gz" ); - add_url_to_download( $uri . "/$distribution/Sources.bz2" ); - add_url_to_download( $uri . "/$distribution/Sources.xz" ); + # https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format + $url = $uri . "/" . $distribution . "/"; + add_url_to_download( $url . "Sources" ); } + + add_url_to_download( $url . "InRelease" ); + add_url_to_download( $url . "Release" ); + add_url_to_download( $url . "Release.gpg" ); } foreach (@config_binaries) @@ -444,47 +435,18 @@ foreach (@config_binaries) { $url = $uri . "/dists/" . $distribution . "/"; - add_url_to_download( $url . "InRelease" ); - add_url_to_download( $url . "Release" ); - add_url_to_download( $url . "Release.gpg" ); - if ( get_variable("_contents") ) - { - add_url_to_download( $url . "Contents-" . $arch . ".gz" ); - add_url_to_download( $url . "Contents-" . $arch . ".bz2" ); - add_url_to_download( $url . "Contents-" . $arch . ".xz" ); - } - foreach (@components) - { - if ( get_variable("_contents") ) - { - add_url_to_download( $url . $_ . "/Contents-" . $arch . ".gz" ); - add_url_to_download( $url . $_ . "/Contents-" . $arch . ".bz2" ); - add_url_to_download( $url . $_ . "/Contents-" . $arch . ".xz" ); - } - add_url_to_download( $url . $_ . "/binary-" . $arch . "/Release" ); - add_url_to_download( $url . $_ . "/binary-" . $arch . "/Packages.gz" ); - add_url_to_download( $url . $_ . "/binary-" . $arch . "/Packages.bz2" ); - add_url_to_download( $url . $_ . "/binary-" . $arch . "/Packages.xz" ); - add_url_to_download( $url . $_ . "/i18n/Index" ); - add_url_to_download( $url . $_ . "/cnf/Commands-" . $arch . ".xz" ); - } - } - elsif ($distribution) - { - add_url_to_download( $uri . "/$distribution/Release" ); - add_url_to_download( $uri . "/$distribution/Release.gpg" ); - add_url_to_download( $uri . "/$distribution/Packages.gz" ); - add_url_to_download( $uri . "/$distribution/Packages.bz2" ); - add_url_to_download( $uri . "/$distribution/Packages.xz" ); } else { - add_url_to_download( $uri . "/Release" ); - add_url_to_download( $uri . "/Release.gpg" ); - add_url_to_download( $uri . "/Packages.gz" ); - add_url_to_download( $uri . "/Packages.bz2" ); - add_url_to_download( $uri . "/Packages.xz" ); + # https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format + $url = $uri . "/" . $distribution . "/"; + add_url_to_download( $url . "Packages" ); } + + add_url_to_download( $url . "InRelease" ); + add_url_to_download( $url . "Release" ); + add_url_to_download( $url . "Release.gpg" ); + } chdir get_variable("skel_path") or die("apt-mirror: can't chdir to skel"); @@ -492,7 +454,7 @@ chdir get_variable("skel_path") or die("apt-mirror: can't chdir to skel"); download_urls( "index", @index_urls ); ###################################################################################### -## Translation index download +## Download all relevant metadata %urls_to_download = (); @@ -508,15 +470,25 @@ sub sanitise_uri return $uri; } -sub find_translation_files_in_release +sub find_metadata_in_release { - # Look in the dists/$DIST/Release file for the translation files that belong - # to the given component. + # Look in the Release file for any files we need to download + my ( $arch, $uri, $distribution, @components ) = @_; - my $dist_uri = shift; - my $component = shift; my ( $release_uri, $release_path, $line ) = ''; + my $component_regex = undef; + my $arch_regex = "(?:${arch}|all)"; + my $compressed_extension_regex = '(?:\.(?:gz|bz2|xz))?$'; + my $dist_uri; + if (@components) + { + $dist_uri = remove_double_slashes($uri . "/dists/" . $distribution . "/"); + $component_regex = "(?:" . join("|", @components) . ")"; + } + else { + $dist_uri = remove_double_slashes($uri . "/" . $distribution . "/"); + } $release_uri = $dist_uri . "Release"; $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri); @@ -537,10 +509,50 @@ sub find_translation_files_in_release my @parts = split( / +/, $1 ); if ( @parts == 3 ) { - my ( $sha1, $size, $filename ) = @parts; - if ( $filename =~ m{^$component/i18n/Translation-[^./]*\.(bz2|xz)$} ) + my ( $sha256, $size, $filename ) = @parts; + if ($arch eq "source") { - add_url_to_download( $dist_uri . $filename, $size ); + if ($component_regex) + { + if ( + ( + $filename =~ m{^${component_regex}/source/} + ) or ( + $filename =~ m{^${component_regex}/Contents-source/} + ) + ) + { + add_url_to_download( $dist_uri . $filename, $size ); + } + } else { + if ($filename =~ m{^Sources${compressed_extension_regex}} + ) { + add_url_to_download( $dist_uri . $filename, $size ); + } + } + } else { + if ( + ( + $filename =~ m{^Contents-${arch_regex}${compressed_extension_regex}$} + ) or ( + $filename =~ m{^Packages${compressed_extension_regex}$} + ) or ( + $filename =~ m{^${component_regex}/Contents-${arch_regex}${compressed_extension_regex}$} + ) or ( + $filename =~ m{^${component_regex}/binary-${arch_regex}/Packages${compressed_extension_regex}$} + ) or ( + $filename =~ m{^${component_regex}/cnf/Commands-${arch_regex}${compressed_extension_regex}$} + ) or ( + $filename =~ m{^${component_regex}/dep11/Components-${arch_regex}} + ) or ( + $filename =~ m{^${component_regex}/dep11/icons-} + ) or ( + $filename =~ m{^${component_regex}/i18n/Translation-} + ) + ) + { + add_url_to_download( $dist_uri . $filename, $size ); + } } } else @@ -563,6 +575,21 @@ sub find_translation_files_in_release } } +print "Processing metadata files from releases ["; +foreach (@config_binaries) +{ + my ( $arch, $uri, $distribution, @components ) = @{$_}; + print "M"; + find_metadata_in_release( $arch, $uri, $distribution, @components); +} +foreach (@config_sources) +{ + my ( $uri, $distribution, @components ) = @{$_}; + print "M"; + find_metadata_in_release( "source", $uri, $distribution, @components); +} +print "]\n\n"; + sub process_translation_index { # Extract all translation files from the dists/$DIST/$COMPONENT/i18n/Index @@ -578,7 +605,7 @@ sub process_translation_index unless ( open STREAM, "<$index_path" ) { - find_translation_files_in_release( $dist_uri, $component ); + # No Index, nothing to process. return; } @@ -639,91 +666,7 @@ foreach (@config_binaries) print "]\n\n"; push( @index_urls, sort keys %urls_to_download ); -download_urls( "translation", sort keys %urls_to_download ); - -###################################################################################### -## DEP-11 index download - -%urls_to_download = (); - -sub find_dep11_files_in_release -{ - # Look in the dists/$DIST/Release file for the DEP-11 files that belong - # to the given component and architecture. - - my $dist_uri = shift; - my $component = shift; - my $arch = shift; - my ( $release_uri, $release_path, $line ) = ''; - - $release_uri = $dist_uri . "Release"; - $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri); - - unless ( open STREAM, "<$release_path" ) - { - warn( "Failed to open Release file from " . $release_uri ); - return; - } - - my $checksums = 0; - while ( $line = <STREAM> ) - { - chomp $line; - if ($checksums) - { - if ( $line =~ /^ +(.*)$/ ) - { - my @parts = split( / +/, $1 ); - if ( @parts == 3 ) - { - my ( $sha1, $size, $filename ) = @parts; - if ( $filename =~ m{^$component/dep11/(Components-${arch}\.yml|icons-[^./]+\.tar)\.(gz|bz2|xz)$} ) - { - add_url_to_download( $dist_uri . $filename, $size ); - } - } - else - { - warn("Malformed checksum line \"$1\" in $release_uri"); - } - } - else - { - $checksums = 0; - } - } - if ( not $checksums ) - { - if ( $line eq "SHA256:" ) - { - $checksums = 1; - } - } - } -} - -print "Processing DEP-11 indexes: ["; - -foreach (@config_binaries) -{ - my ( $arch, $uri, $distribution, @components ) = @{$_}; - print "D"; - if (@components) - { - $url = $uri . "/dists/" . $distribution . "/"; - - my $component; - foreach $component (@components) - { - find_dep11_files_in_release( $url, $component, $arch ); - } - } -} - -print "]\n\n"; - -push( @index_urls, sort keys %urls_to_download ); -download_urls( "dep11", sort keys %urls_to_download ); +download_urls( "metadata", sort keys %urls_to_download ); ###################################################################################### ## Main download preparations @@ -780,6 +723,7 @@ sub process_index { my $uri = shift; my $index = shift; + my $optional = shift; my ( $path, $package, $mirror, $files ) = ''; $path = sanitise_uri($uri); @@ -801,6 +745,10 @@ sub process_index unless ( open STREAM, "<$path/$index" ) { + if ($optional) + { + return; + } warn("apt-mirror: can't open index $path/$index in process_index"); return; } @@ -880,6 +828,7 @@ foreach (@config_binaries) foreach $component (@components) { process_index( $uri, "/dists/$distribution/$component/binary-$arch/Packages" ); + process_index( $uri, "/dists/$distribution/$component/binary-all/Packages", 1 ); } } elsif ($distribution) |