Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.kernel.org/pub/scm/git/git.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Sunshine <sunshine@sunshineco.com>2022-09-01 03:29:41 +0300
committerJunio C Hamano <gitster@pobox.com>2022-09-01 20:07:40 +0300
commit6594554119811a01888b44112a7daec6fa0312b2 (patch)
tree8e8cc81dd951dca7a449c82aa954af681ae2b353 /t/chainlint.pl
parent7d4804731ed642b92b516908fb93397b08e986bf (diff)
chainlint.pl: add POSIX shell parser
Continue fleshing out chainlint.pl by adding a general purpose recursive descent parser for the POSIX shell command language. Although never invoked directly, upcoming parser subclasses will extend its functionality for specific purposes, such as plucking test definitions from input scripts and applying domain-specific knowledge to perform test validation. Signed-off-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 't/chainlint.pl')
-rwxr-xr-xt/chainlint.pl243
1 files changed, 243 insertions, 0 deletions
diff --git a/t/chainlint.pl b/t/chainlint.pl
index 81ffbf28bf..cdf136896b 100755
--- a/t/chainlint.pl
+++ b/t/chainlint.pl
@@ -198,6 +198,249 @@ RESTART:
return length($token) ? $token : undef;
}
+# ShellParser parses POSIX shell scripts (with minor extensions for Bash). It
+# is a recursive descent parser very roughly modeled after section 2.10 "Shell
+# Grammar" of POSIX chapter 2 "Shell Command Language".
+package ShellParser;
+
+sub new {
+ my ($class, $s) = @_;
+ my $self = bless {
+ buff => [],
+ stop => [],
+ output => []
+ } => $class;
+ $self->{lexer} = Lexer->new($self, $s);
+ return $self;
+}
+
+sub next_token {
+ my $self = shift @_;
+ return pop(@{$self->{buff}}) if @{$self->{buff}};
+ return $self->{lexer}->scan_token();
+}
+
+sub untoken {
+ my $self = shift @_;
+ push(@{$self->{buff}}, @_);
+}
+
+sub peek {
+ my $self = shift @_;
+ my $token = $self->next_token();
+ return undef unless defined($token);
+ $self->untoken($token);
+ return $token;
+}
+
+sub stop_at {
+ my ($self, $token) = @_;
+ return 1 unless defined($token);
+ my $stop = ${$self->{stop}}[-1] if @{$self->{stop}};
+ return defined($stop) && $token =~ $stop;
+}
+
+sub expect {
+ my ($self, $expect) = @_;
+ my $token = $self->next_token();
+ return $token if defined($token) && $token eq $expect;
+ push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token : "<end-of-input>") . "'\n");
+ $self->untoken($token) if defined($token);
+ return ();
+}
+
+sub optional_newlines {
+ my $self = shift @_;
+ my @tokens;
+ while (my $token = $self->peek()) {
+ last unless $token eq "\n";
+ push(@tokens, $self->next_token());
+ }
+ return @tokens;
+}
+
+sub parse_group {
+ my $self = shift @_;
+ return ($self->parse(qr/^}$/),
+ $self->expect('}'));
+}
+
+sub parse_subshell {
+ my $self = shift @_;
+ return ($self->parse(qr/^\)$/),
+ $self->expect(')'));
+}
+
+sub parse_case_pattern {
+ my $self = shift @_;
+ my @tokens;
+ while (defined(my $token = $self->next_token())) {
+ push(@tokens, $token);
+ last if $token eq ')';
+ }
+ return @tokens;
+}
+
+sub parse_case {
+ my $self = shift @_;
+ my @tokens;
+ push(@tokens,
+ $self->next_token(), # subject
+ $self->optional_newlines(),
+ $self->expect('in'),
+ $self->optional_newlines());
+ while (1) {
+ my $token = $self->peek();
+ last unless defined($token) && $token ne 'esac';
+ push(@tokens,
+ $self->parse_case_pattern(),
+ $self->optional_newlines(),
+ $self->parse(qr/^(?:;;|esac)$/)); # item body
+ $token = $self->peek();
+ last unless defined($token) && $token ne 'esac';
+ push(@tokens,
+ $self->expect(';;'),
+ $self->optional_newlines());
+ }
+ push(@tokens, $self->expect('esac'));
+ return @tokens;
+}
+
+sub parse_for {
+ my $self = shift @_;
+ my @tokens;
+ push(@tokens,
+ $self->next_token(), # variable
+ $self->optional_newlines());
+ my $token = $self->peek();
+ if (defined($token) && $token eq 'in') {
+ push(@tokens,
+ $self->expect('in'),
+ $self->optional_newlines());
+ }
+ push(@tokens,
+ $self->parse(qr/^do$/), # items
+ $self->expect('do'),
+ $self->optional_newlines(),
+ $self->parse_loop_body(),
+ $self->expect('done'));
+ return @tokens;
+}
+
+sub parse_if {
+ my $self = shift @_;
+ my @tokens;
+ while (1) {
+ push(@tokens,
+ $self->parse(qr/^then$/), # if/elif condition
+ $self->expect('then'),
+ $self->optional_newlines(),
+ $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body
+ my $token = $self->peek();
+ last unless defined($token) && $token eq 'elif';
+ push(@tokens, $self->expect('elif'));
+ }
+ my $token = $self->peek();
+ if (defined($token) && $token eq 'else') {
+ push(@tokens,
+ $self->expect('else'),
+ $self->optional_newlines(),
+ $self->parse(qr/^fi$/)); # else body
+ }
+ push(@tokens, $self->expect('fi'));
+ return @tokens;
+}
+
+sub parse_loop_body {
+ my $self = shift @_;
+ return $self->parse(qr/^done$/);
+}
+
+sub parse_loop {
+ my $self = shift @_;
+ return ($self->parse(qr/^do$/), # condition
+ $self->expect('do'),
+ $self->optional_newlines(),
+ $self->parse_loop_body(),
+ $self->expect('done'));
+}
+
+sub parse_func {
+ my $self = shift @_;
+ return ($self->expect('('),
+ $self->expect(')'),
+ $self->optional_newlines(),
+ $self->parse_cmd()); # body
+}
+
+sub parse_bash_array_assignment {
+ my $self = shift @_;
+ my @tokens = $self->expect('(');
+ while (defined(my $token = $self->next_token())) {
+ push(@tokens, $token);
+ last if $token eq ')';
+ }
+ return @tokens;
+}
+
+my %compound = (
+ '{' => \&parse_group,
+ '(' => \&parse_subshell,
+ 'case' => \&parse_case,
+ 'for' => \&parse_for,
+ 'if' => \&parse_if,
+ 'until' => \&parse_loop,
+ 'while' => \&parse_loop);
+
+sub parse_cmd {
+ my $self = shift @_;
+ my $cmd = $self->next_token();
+ return () unless defined($cmd);
+ return $cmd if $cmd eq "\n";
+
+ my $token;
+ my @tokens = $cmd;
+ if ($cmd eq '!') {
+ push(@tokens, $self->parse_cmd());
+ return @tokens;
+ } elsif (my $f = $compound{$cmd}) {
+ push(@tokens, $self->$f());
+ } elsif (defined($token = $self->peek()) && $token eq '(') {
+ if ($cmd !~ /\w=$/) {
+ push(@tokens, $self->parse_func());
+ return @tokens;
+ }
+ $tokens[-1] .= join(' ', $self->parse_bash_array_assignment());
+ }
+
+ while (defined(my $token = $self->next_token())) {
+ $self->untoken($token), last if $self->stop_at($token);
+ push(@tokens, $token);
+ last if $token =~ /^(?:[;&\n|]|&&|\|\|)$/;
+ }
+ push(@tokens, $self->next_token()) if $tokens[-1] ne "\n" && defined($token = $self->peek()) && $token eq "\n";
+ return @tokens;
+}
+
+sub accumulate {
+ my ($self, $tokens, $cmd) = @_;
+ push(@$tokens, @$cmd);
+}
+
+sub parse {
+ my ($self, $stop) = @_;
+ push(@{$self->{stop}}, $stop);
+ goto DONE if $self->stop_at($self->peek());
+ my @tokens;
+ while (my @cmd = $self->parse_cmd()) {
+ $self->accumulate(\@tokens, \@cmd);
+ last if $self->stop_at($self->peek());
+ }
+DONE:
+ pop(@{$self->{stop}});
+ return @tokens;
+}
+
package ScriptParser;
sub new {