akamai-tools/purge-one-hour


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

#! /usr/bin/perl
#
# script to purge the Akamai cache.
#
# Notes:
#
# - we limit the purging to files newer than an hour
# - there must be a file ~openssl/.edgerc with our Akamai credentials
# - the Akamai supplied program 'akamai-purge' must be installed in
#   /usr/local/bin

use strict;
use warnings;

use Cwd qw(:DEFAULT abs_path);
use File::Basename;

# Find all .html files that include a .inc file, and create a map
my %inc2html = ();

my $debug = $ENV{DEBUG};
my $dryrun = $ENV{DRYRUN};

if (scalar @ARGV != 2 || $ARGV[0] eq '-h') {
    my $prog = basename($0);
    print STDERR <<_____;
usage: $prog BASEDIR BASEURL

where BASEDIR is the base directory where original web files are scanned for,
and BASEURL is the corresponding base URL.
_____
    exit $ARGV[0] eq '-h' ? 0 : 1;
}

my $basedir = shift @ARGV; # For example, /var/www/openssl
my $baseurl = shift @ARGV; # For example, https://www.openssl.org

# This ensures that we have an absolute base directory, and that it's
# normalised (i.e. doesn't have duplicated or ending slashes)
my $tmp = abs_path($basedir) or die "$basedir: $!\n";
$basedir = $tmp;

foreach ( `find $basedir -type f -name '*.html'` ) {
    chomp;
    my $file = $_;
    my ($dn, $fn) = $_ =~ m/^(?:(.*)\/)?([^\/]*)$/;
    my @incs = ();

    open HTML, $_;
    foreach ( <HTML> ) {
	if (/<!--\s*#include\s+virtual="([^"]*)"\s*-->/) {
	    my $vf = $1;
	    $vf = ($vf =~ m|^/|) ? "$basedir$vf" : "$dn/$vf";
	    push @incs, "$vf";
	}
    }
    close HTML;

    foreach ( @incs ) {
	push @{$inc2html{$_}}, $file;
    }
}

if ($debug) {
    for ( sort keys %inc2html ) {
	print STDERR "DEBUG: $_ => ", join(", ", @{$inc2html{$_}}), "\n";
    }
}

# Find all files younger than an hour
# Discard those in .git/ and bin/
# Discard any .ht*
# For any virtually included file, use the corresponding .html file instead
# For all remaining files, turn it into a valid URL
# For any valid index file, duplicate into two URLs without the file,
#    one with an ending slash and one without.
my %files = ();

foreach ( `find $basedir -type f -mtime -2` ) {
    chomp;
    next if /^\Q$basedir\E\/(\.git|bin)/;
    next if /\/\.ht\w+$/;
    my $x = $_;
    my @files = defined $inc2html{$x} ? @{$inc2html{$x}} : ( $x );
    foreach ( @files ) {
	s/^\Q$basedir\E\//$baseurl\//;
	$files{$_} = 1;
	if ( /^(.*)\/index.(html|cgi|pl|php|xhtml|htm)$/ ) {
	    $files{"$1/"} = $files{"$1"} = 1;
	}
    }
}

# Finally, output the result to the akamai-purge program
my @files = sort keys %files;
while ( @files ) {
    my $count = 500;		# Try not to overwhelm Akamai
    if ( $dryrun || open PURGE, '| /usr/local/bin/akamai-purge invalidate' ) {
	printf STDERR
	    "DEBUG: Invoking '/usr/local/bin/akamai-purge invalidate' with:\n"
	    if $debug;
	while ( @files && $count-- > 0 ) {
	    my $file = pop @files;
	    print STDERR "  ",$file,"\n" if $debug;
	    print PURGE $file,"\n" unless $dryrun;
	}
	close PURGE unless $dryrun;
    }
}