diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2021-06-17 15:10:02 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2021-06-17 15:10:02 +0300 |
commit | 11cb5f046dddc630abd416593e176d65f6ba2b69 (patch) | |
tree | 01019213f9ea4a50fa5b7c7593e98570f6fa5c69 /lib/gitlab/template_parser | |
parent | 612bb6f624ea7fdf5fd20e3332d543191603db88 (diff) |
Add latest changes from gitlab-org/gitlab@master
Diffstat (limited to 'lib/gitlab/template_parser')
-rw-r--r-- | lib/gitlab/template_parser/ast.rb | 157 | ||||
-rw-r--r-- | lib/gitlab/template_parser/error.rb | 8 | ||||
-rw-r--r-- | lib/gitlab/template_parser/eval_state.rb | 26 | ||||
-rw-r--r-- | lib/gitlab/template_parser/parser.rb | 176 |
4 files changed, 367 insertions, 0 deletions
diff --git a/lib/gitlab/template_parser/ast.rb b/lib/gitlab/template_parser/ast.rb new file mode 100644 index 00000000000..89318ee0d68 --- /dev/null +++ b/lib/gitlab/template_parser/ast.rb @@ -0,0 +1,157 @@ +# frozen_string_literal: true + +module Gitlab + module TemplateParser + # AST nodes to evaluate when rendering a template. + # + # Evaluating an AST is done by walking over the nodes and calling + # `evaluate`. This method takes two arguments: + # + # 1. An instance of `EvalState`, used for tracking data such as the number + # of nested loops. + # 2. An object used as the data for the current scope. This can be an Array, + # Hash, String, or something else. It's up to the AST node to determine + # what to do with it. + # + # While tree walking interpreters (such as implemented here) aren't usually + # the fastest type of interpreter, they are: + # + # 1. Fast enough for our use case + # 2. Easy to implement and maintain + # + # In addition, our AST interpreter doesn't allow for arbitrary code + # execution, unlike existing template engines such as Mustache + # (https://github.com/mustache/mustache/issues/244) or ERB. + # + # Our interpreter also takes care of limiting the number of nested loops. + # And unlike Liquid, our interpreter is much smaller and thus has a smaller + # attack surface. Liquid isn't without its share of issues, such as + # https://github.com/Shopify/liquid/pull/1071. + # + # We also evaluated using Handlebars using the project + # https://github.com/SmartBear/ruby-handlebars. Sadly, this implementation + # of Handlebars doesn't support control of whitespace + # (https://github.com/SmartBear/ruby-handlebars/issues/37), and the project + # didn't appear to be maintained that much. + # + # This doesn't mean these template engines aren't good, instead it means + # they won't work for our use case. For more information, refer to the + # comment https://gitlab.com/gitlab-org/gitlab/-/merge_requests/50063#note_469293322. + module AST + # An identifier in a selector. + Identifier = Struct.new(:name) do + def evaluate(state, data) + return data if name == 'it' + + data[name] if data.is_a?(Hash) + end + end + + # An integer used in a selector. + Integer = Struct.new(:value) do + def evaluate(state, data) + data[value] if data.is_a?(Array) + end + end + + # A selector used for loading a value. + Selector = Struct.new(:steps) do + def evaluate(state, data) + steps.reduce(data) do |current, step| + break if current.nil? + + step.evaluate(state, current) + end + end + end + + # A tag used for displaying a value in the output. + Variable = Struct.new(:selector) do + def evaluate(state, data) + selector.evaluate(state, data).to_s + end + end + + # A collection of zero or more expressions. + Expressions = Struct.new(:nodes) do + def evaluate(state, data) + nodes.map { |node| node.evaluate(state, data) }.join('') + end + end + + # A single text node. + Text = Struct.new(:text) do + def evaluate(*) + text + end + end + + # An `if` expression, with an optional `else` clause. + If = Struct.new(:condition, :true_body, :false_body) do + def evaluate(state, data) + result = + if truthy?(condition.evaluate(state, data)) + true_body.evaluate(state, data) + elsif false_body + false_body.evaluate(state, data) + end + + result.to_s + end + + def truthy?(value) + # We treat empty collections and such as false, removing the need for + # some sort of `if length(x) > 0` expression. + value.respond_to?(:empty?) ? !value.empty? : !!value + end + end + + # An `each` expression. + Each = Struct.new(:collection, :body) do + def evaluate(state, data) + values = collection.evaluate(state, data) + + return '' unless values.respond_to?(:each) + + # While unlikely to happen, it's possible users attempt to nest many + # loops in order to negatively impact the GitLab instance. To make + # this more difficult, we limit the number of nested loops a user can + # create. + state.enter_loop do + values.map { |value| body.evaluate(state, value) }.join('') + end + end + end + + # A class for transforming a raw Parslet AST into a more structured/easier + # to work with AST. + # + # For more information about Parslet transformations, refer to the + # documentation at http://kschiess.github.io/parslet/transform.html. + class Transformer < Parslet::Transform + rule(ident: simple(:name)) { Identifier.new(name.to_s) } + rule(int: simple(:name)) { Integer.new(name.to_i) } + rule(text: simple(:text)) { Text.new(text.to_s) } + rule(exprs: subtree(:nodes)) { Expressions.new(nodes) } + rule(selector: sequence(:steps)) { Selector.new(steps) } + rule(selector: simple(:step)) { Selector.new([step]) } + rule(variable: simple(:selector)) { Variable.new(selector) } + rule(each: simple(:values), body: simple(:body)) do + Each.new(values, body) + end + + rule(if: simple(:cond), true_body: simple(:true_body)) do + If.new(cond, true_body) + end + + rule( + if: simple(:cond), + true_body: simple(:true_body), + false_body: simple(:false_body) + ) do + If.new(cond, true_body, false_body) + end + end + end + end +end diff --git a/lib/gitlab/template_parser/error.rb b/lib/gitlab/template_parser/error.rb new file mode 100644 index 00000000000..1dcde448749 --- /dev/null +++ b/lib/gitlab/template_parser/error.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module Gitlab + module TemplateParser + # An error raised when a template couldn't be rendered. + Error = Class.new(StandardError) + end +end diff --git a/lib/gitlab/template_parser/eval_state.rb b/lib/gitlab/template_parser/eval_state.rb new file mode 100644 index 00000000000..7cf2ab21f50 --- /dev/null +++ b/lib/gitlab/template_parser/eval_state.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module Gitlab + module TemplateParser + # A class for tracking state when evaluating a template + class EvalState + MAX_LOOPS = 4 + + def initialize + @loops = 0 + end + + def enter_loop + if @loops == MAX_LOOPS + raise Error, "You can only nest up to #{MAX_LOOPS} loops" + end + + @loops += 1 + retval = yield + @loops -= 1 + + retval + end + end + end +end diff --git a/lib/gitlab/template_parser/parser.rb b/lib/gitlab/template_parser/parser.rb new file mode 100644 index 00000000000..157339414c4 --- /dev/null +++ b/lib/gitlab/template_parser/parser.rb @@ -0,0 +1,176 @@ +# frozen_string_literal: true + +module Gitlab + module TemplateParser + # A parser for a simple template syntax, used for example to generate changelogs. + # + # As a quick primer on the template syntax, a basic template looks like + # this: + # + # {% each users %} + # Name: {{name}} + # Age: {{age}} + # + # {% if birthday %} + # This user is celebrating their birthday today! Yay! + # {% end %} + # {% end %} + # + # For more information, refer to the Parslet documentation found at + # http://kschiess.github.io/parslet/. + class Parser < Parslet::Parser + root(:exprs) + + rule(:exprs) do + ( + variable | if_expr | each_expr | escaped | text | newline + ).repeat.as(:exprs) + end + + rule(:space) { match('[ \\t]') } + rule(:whitespace) { match('\s').repeat } + rule(:lf) { str("\n") } + rule(:newline) { lf.as(:text) } + + # Escaped newlines are ignored, allowing the user to control the + # whitespace in the output. All other escape sequences are treated as + # literal text. + # + # For example, this: + # + # foo \ + # bar + # + # Is parsed into this: + # + # foo bar + rule(:escaped) do + backslash = str('\\') + + (backslash >> lf).ignore | (backslash >> chars).as(:text) + end + + # A sequence of regular characters, with the exception of newlines and + # escaped newlines. + rule(:chars) do + char = match("[^{\\\\\n]") + + # The rules here are such that we do treat single curly braces or + # non-opening tags (e.g. `{foo}`) as text, but not opening tags + # themselves (e.g. `{{`). + ( + char.repeat(1) | curly_open >> (curly_open | percent).absent? + ).repeat(1) + end + + rule(:text) { chars.as(:text) } + + # An integer, limited to 10 digits (= a 32 bits integer). + # + # The size is limited to prevents users from creating integers that are + # too large, as this may result in runtime errors. + rule(:integer) { match('\d').repeat(1, 10).as(:int) } + + # An identifier to look up in a data structure. + # + # We only support simple ASCII identifiers as we simply don't have a need + # for more complex identifiers (e.g. those containing multibyte + # characters). + rule(:ident) { match('[a-zA-Z_]').repeat(1).as(:ident) } + + # A selector is used for reading a value, consisting of one or more + # "steps". + # + # Examples: + # + # name + # users.0.name + # 0 + # it + rule(:selector) do + step = ident | integer + + whitespace >> + (step >> (str('.') >> step).repeat).as(:selector) >> + whitespace + end + + rule(:curly_open) { str('{') } + rule(:curly_close) { str('}') } + rule(:percent) { str('%') } + + # A variable tag. + # + # Examples: + # + # {{name}} + # {{users.0.name}} + rule(:variable) do + curly_open.repeat(2) >> selector.as(:variable) >> curly_close.repeat(2) + end + + rule(:expr_open) { curly_open >> percent >> whitespace } + rule(:expr_close) do + # Since whitespace control is important (as Markdown is whitespace + # sensitive), we default to stripping a newline that follows a %} tag. + # This is less annoying compared to having to opt-in to this behaviour. + whitespace >> percent >> curly_close >> lf.maybe.ignore + end + + rule(:end_tag) { expr_open >> str('end') >> expr_close } + + # An `if` expression, with an optional `else` clause. + # + # Examples: + # + # {% if foo %} + # yes + # {% end %} + # + # {% if foo %} + # yes + # {% else %} + # no + # {% end %} + rule(:if_expr) do + else_tag = + expr_open >> str('else') >> expr_close >> exprs.as(:false_body) + + expr_open >> + str('if') >> + space.repeat(1) >> + selector.as(:if) >> + expr_close >> + exprs.as(:true_body) >> + else_tag.maybe >> + end_tag + end + + # An `each` expression, used for iterating over collections. + # + # Example: + # + # {% each users %} + # * {{name}} + # {% end %} + rule(:each_expr) do + expr_open >> + str('each') >> + space.repeat(1) >> + selector.as(:each) >> + expr_close >> + exprs.as(:body) >> + end_tag + end + + def parse_and_transform(input) + AST::Transformer.new.apply(parse(input)) + rescue Parslet::ParseFailed => ex + # We raise a custom error so it's easier to catch different parser + # related errors. In addition, this ensures the caller of this method + # doesn't depend on a Parslet specific error class. + raise Error, "Failed to parse the template: #{ex.message}" + end + end + end +end |