1 files changed, 473 insertions, 0 deletions
diff --git a/deps/v8/src/torque/earley-parser.h b/deps/v8/src/torque/earley-parser.h
new file mode 100644
index 00000000000..1e77734ab66
--- /dev/null
+++ b/deps/v8/src/torque/earley-parser.h
@@ -0,0 +1,473 @@
+// Copyright 2018 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_TORQUE_EARLEY_PARSER_H_
+#define V8_TORQUE_EARLEY_PARSER_H_
+
+#include <map>
+#include <vector>
+
+#include "src/base/optional.h"
+#include "src/torque/contextual.h"
+#include "src/torque/source-positions.h"
+#include "src/torque/utils.h"
+
+namespace v8 {
+namespace internal {
+namespace torque {
+
+class Symbol;
+class Item;
+
+class ParseResultHolderBase {
+ public:
+  enum class TypeId;
+  virtual ~ParseResultHolderBase() = default;
+  template <class T>
+  T& Cast();
+  template <class T>
+  const T& Cast() const;
+
+ protected:
+  explicit ParseResultHolderBase(TypeId type_id) : type_id_(type_id) {
+    // MSVC wrongly complains about type_id_ being an unused private field.
+    USE(type_id_);
+  }
+
+ private:
+  const TypeId type_id_;
+};
+
+using ParseResultTypeId = ParseResultHolderBase::TypeId;
+
+template <class T>
+class ParseResultHolder : public ParseResultHolderBase {
+ public:
+  explicit ParseResultHolder(T value)
+      : ParseResultHolderBase(id), value_(std::move(value)) {}
+
+ private:
+  V8_EXPORT_PRIVATE static const TypeId id;
+  friend class ParseResultHolderBase;
+  T value_;
+};
+
+template <class T>
+T& ParseResultHolderBase::Cast() {
+  CHECK_EQ(ParseResultHolder<T>::id, type_id_);
+  return static_cast<ParseResultHolder<T>*>(this)->value_;
+}
+
+template <class T>
+const T& ParseResultHolderBase::Cast() const {
+  CHECK_EQ(ParseResultHolder<T>::id, type_id_);
+  return static_cast<const ParseResultHolder<T>*>(this)->value_;
+}
+
+class ParseResult {
+ public:
+  template <class T>
+  explicit ParseResult(T x) : value_(new ParseResultHolder<T>(std::move(x))) {}
+
+  template <class T>
+  const T& Cast() const {
+    return value_->Cast<T>();
+  }
+  template <class T>
+  T& Cast() {
+    return value_->Cast<T>();
+  }
+
+ private:
+  std::unique_ptr<ParseResultHolderBase> value_;
+};
+
+using InputPosition = const char*;
+
+struct MatchedInput {
+  MatchedInput(InputPosition begin, InputPosition end, SourcePosition pos)
+      : begin(begin), end(end), pos(pos) {}
+  InputPosition begin;
+  InputPosition end;
+  SourcePosition pos;
+  std::string ToString() const { return {begin, end}; }
+};
+
+class ParseResultIterator {
+ public:
+  explicit ParseResultIterator(std::vector<ParseResult> results,
+                               MatchedInput matched_input)
+      : results_(std::move(results)), matched_input_(matched_input) {}
+  ~ParseResultIterator() {
+    // Check that all parse results have been used.
+    CHECK_EQ(results_.size(), i_);
+  }
+
+  ParseResult Next() {
+    CHECK_LT(i_, results_.size());
+    return std::move(results_[i_++]);
+  }
+  template <class T>
+  T NextAs() {
+    return std::move(Next().Cast<T>());
+  }
+  bool HasNext() const { return i_ < results_.size(); }
+
+  const MatchedInput& matched_input() const { return matched_input_; }
+
+ private:
+  std::vector<ParseResult> results_;
+  size_t i_ = 0;
+  MatchedInput matched_input_;
+
+  DISALLOW_COPY_AND_MOVE_AND_ASSIGN(ParseResultIterator);
+};
+
+struct LexerResult {
+  std::vector<Symbol*> token_symbols;
+  std::vector<MatchedInput> token_contents;
+};
+
+using Action =
+    base::Optional<ParseResult> (*)(ParseResultIterator* child_results);
+
+inline base::Optional<ParseResult> DefaultAction(
+    ParseResultIterator* child_results) {
+  if (!child_results->HasNext()) return base::nullopt;
+  return child_results->Next();
+}
+
+// A rule of the context-free grammar. Each rule can have an action attached to
+// it, which is executed after the parsing is finished.
+class Rule final {
+ public:
+  explicit Rule(std::vector<Symbol*> right_hand_side,
+                Action action = DefaultAction)
+      : right_hand_side_(std::move(right_hand_side)), action_(action) {}
+
+  Symbol* left() const {
+    DCHECK_NOT_NULL(left_hand_side_);
+    return left_hand_side_;
+  }
+  const std::vector<Symbol*>& right() const { return right_hand_side_; }
+
+  void SetLeftHandSide(Symbol* left_hand_side) {
+    DCHECK_NULL(left_hand_side_);
+    left_hand_side_ = left_hand_side;
+  }
+
+  V8_EXPORT_PRIVATE base::Optional<ParseResult> RunAction(
+      const Item* completed_item, const LexerResult& tokens) const;
+
+ private:
+  Symbol* left_hand_side_ = nullptr;
+  std::vector<Symbol*> right_hand_side_;
+  Action action_;
+};
+
+// A Symbol represents a terminal or a non-terminal of the grammar.
+// It stores the list of rules, which have this symbol as the
+// left-hand side.
+// Terminals have an empty list of rules, they are created by the Lexer
+// instead of from rules.
+// Symbols need to reside at stable memory addresses, because the addresses are
+// used in the parser.
+class Symbol {
+ public:
+  Symbol() : Symbol({}) {}
+  Symbol(std::initializer_list<Rule> rules) { *this = rules; }
+
+  V8_EXPORT_PRIVATE Symbol& operator=(std::initializer_list<Rule> rules);
+
+  bool IsTerminal() const { return rules_.empty(); }
+  Rule* rule(size_t index) const { return rules_[index].get(); }
+  size_t rule_number() const { return rules_.size(); }
+
+  void AddRule(const Rule& rule) {
+    rules_.push_back(base::make_unique<Rule>(rule));
+    rules_.back()->SetLeftHandSide(this);
+  }
+
+  V8_EXPORT_PRIVATE base::Optional<ParseResult> RunAction(
+      const Item* item, const LexerResult& tokens);
+
+ private:
+  std::vector<std::unique_ptr<Rule>> rules_;
+
+  // Disallow copying and moving to ensure Symbol has a stable address.
+  DISALLOW_COPY_AND_MOVE_AND_ASSIGN(Symbol);
+};
+
+// Items are the core datastructure of Earley's algorithm.
+// They consist of a (partially) matched rule, a marked position inside of the
+// right-hand side of the rule (traditionally written as a dot) and an input
+// range from {start} to {pos} that matches the symbols of the right-hand side
+// that are left of the mark. In addition, they store a child and a left-sibling
+// pointer to reconstruct the AST in the end.
+class Item {
+ public:
+  Item(const Rule* rule, size_t mark, size_t start, size_t pos)
+      : rule_(rule), mark_(mark), start_(start), pos_(pos) {
+    DCHECK_LE(mark_, right().size());
+  }
+
+  // A complete item has the mark at the right end, which means the input range
+  // matches the complete rule.
+  bool IsComplete() const {
+    DCHECK_LE(mark_, right().size());
+    return mark_ == right().size();
+  }
+
+  // The symbol right after the mark is expected at {pos} for this item to
+  // advance.
+  Symbol* NextSymbol() const {
+    DCHECK(!IsComplete());
+    DCHECK_LT(mark_, right().size());
+    return right()[mark_];
+  }
+
+  // We successfully parsed NextSymbol() between {pos} and {new_pos}.
+  // If NextSymbol() was a non-terminal, then {child} is a pointer to a
+  // completed item for this parse.
+  // We create a new item, which moves the mark one forward.
+  Item Advance(size_t new_pos, const Item* child = nullptr) const {
+    if (child) {
+      DCHECK(child->IsComplete());
+      DCHECK_EQ(pos(), child->start());
+      DCHECK_EQ(new_pos, child->pos());
+      DCHECK_EQ(NextSymbol(), child->left());
+    }
+    Item result(rule_, mark_ + 1, start_, new_pos);
+    result.prev_ = this;
+    result.child_ = child;
+    return result;
+  }
+
+  // Collect the items representing the AST children of this completed item.
+  std::vector<const Item*> Children() const;
+  // The matched input separated according to the next branching AST level.
+  std::string SplitByChildren(const LexerResult& tokens) const;
+  // Check if {other} results in the same AST as this Item.
+  void CheckAmbiguity(const Item& other, const LexerResult& tokens) const;
+
+  MatchedInput GetMatchedInput(const LexerResult& tokens) const {
+    return {tokens.token_contents[start_].begin,
+            start_ == pos_ ? tokens.token_contents[start_].begin
+                           : tokens.token_contents[pos_ - 1].end,
+            tokens.token_contents[start_].pos};
+  }
+
+  // We exclude {prev_} and {child_} from equality and hash computations,
+  // because they are just globally unique data associated with an item.
+  bool operator==(const Item& other) const {
+    return rule_ == other.rule_ && mark_ == other.mark_ &&
+           start_ == other.start_ && pos_ == other.pos_;
+  }
+
+  friend size_t hash_value(const Item& i) {
+    return base::hash_combine(i.rule_, i.mark_, i.start_, i.pos_);
+  }
+
+  const Rule* rule() const { return rule_; }
+  Symbol* left() const { return rule_->left(); }
+  const std::vector<Symbol*>& right() const { return rule_->right(); }
+  size_t pos() const { return pos_; }
+  size_t start() const { return start_; }
+
+ private:
+  const Rule* rule_;
+  size_t mark_;
+  size_t start_;
+  size_t pos_;
+
+  const Item* prev_ = nullptr;
+  const Item* child_ = nullptr;
+};
+
+inline base::Optional<ParseResult> Symbol::RunAction(
+    const Item* item, const LexerResult& tokens) {
+  DCHECK(item->IsComplete());
+  DCHECK_EQ(item->left(), this);
+  return item->rule()->RunAction(item, tokens);
+}
+
+V8_EXPORT_PRIVATE const Item* RunEarleyAlgorithm(
+    Symbol* start, const LexerResult& tokens,
+    std::unordered_set<Item, base::hash<Item>>* processed);
+
+inline base::Optional<ParseResult> ParseTokens(Symbol* start,
+                                               const LexerResult& tokens) {
+  std::unordered_set<Item, base::hash<Item>> table;
+  const Item* final_item = RunEarleyAlgorithm(start, tokens, &table);
+  return start->RunAction(final_item, tokens);
+}
+
+// The lexical syntax is dynamically defined while building the grammar by
+// adding patterns and keywords to the Lexer.
+// The term keyword here can stand for any fixed character sequence, including
+// operators and parentheses.
+// Each pattern or keyword automatically gets a terminal symbol associated with
+// it. These symbols form the result of the lexing.
+// Patterns and keywords are matched using the longest match principle. If the
+// longest matching pattern coincides with a keyword, the keyword symbol is
+// chosen instead of the pattern.
+// In addition, there is a single whitespace pattern which is consumed but does
+// not become part of the token list.
+class Lexer {
+ public:
+  // Functions to define patterns. They try to match starting from {pos}. If
+  // successful, they return true and advance {pos}. Otherwise, {pos} stays
+  // unchanged.
+  using PatternFunction = bool (*)(InputPosition* pos);
+
+  void SetWhitespace(PatternFunction whitespace) {
+    match_whitespace_ = whitespace;
+  }
+
+  Symbol* Pattern(PatternFunction pattern) { return &patterns_[pattern]; }
+  Symbol* Token(const std::string& keyword) { return &keywords_[keyword]; }
+  V8_EXPORT_PRIVATE LexerResult RunLexer(const std::string& input);
+
+ private:
+  PatternFunction match_whitespace_ = [](InputPosition*) { return false; };
+  std::map<PatternFunction, Symbol> patterns_;
+  std::map<std::string, Symbol> keywords_;
+  Symbol* MatchToken(InputPosition* pos, InputPosition end);
+};
+
+// A grammar can have a result, which is the results of the start symbol.
+// Grammar is intended to be subclassed, with Symbol members forming the
+// mutually recursive rules of the grammar.
+class Grammar {
+ public:
+  using PatternFunction = Lexer::PatternFunction;
+
+  explicit Grammar(Symbol* start) : start_(start) {}
+
+  base::Optional<ParseResult> Parse(const std::string& input) {
+    LexerResult tokens = lexer().RunLexer(input);
+    return ParseTokens(start_, tokens);
+  }
+
+ protected:
+  Symbol* Token(const std::string& s) { return lexer_.Token(s); }
+  Symbol* Pattern(PatternFunction pattern) { return lexer_.Pattern(pattern); }
+  void SetWhitespace(PatternFunction ws) { lexer_.SetWhitespace(ws); }
+
+  // NewSymbol() allocates a fresh symbol and stores it in the current grammar.
+  // This is necessary to define helpers that create new symbols.
+  Symbol* NewSymbol(std::initializer_list<Rule> rules = {}) {
+    Symbol* result = new Symbol(rules);
+    generated_symbols_.push_back(std::unique_ptr<Symbol>(result));
+    return result;
+  }
+
+  // Helper functions to define lexer patterns. If they match, they return true
+  // and advance {pos}. Otherwise, {pos} is unchanged.
+  V8_EXPORT_PRIVATE static bool MatchChar(int (*char_class)(int),
+                                          InputPosition* pos);
+  V8_EXPORT_PRIVATE static bool MatchChar(bool (*char_class)(char),
+                                          InputPosition* pos);
+  V8_EXPORT_PRIVATE static bool MatchAnyChar(InputPosition* pos);
+  V8_EXPORT_PRIVATE static bool MatchString(const char* s, InputPosition* pos);
+
+  // The action MatchInput() produces the input matched by the rule as
+  // result.
+  static base::Optional<ParseResult> YieldMatchedInput(
+      ParseResultIterator* child_results) {
+    return ParseResult{child_results->matched_input().ToString()};
+  }
+
+  // Create a new symbol to parse the given sequence of symbols.
+  // At most one of the symbols can return a result.
+  Symbol* Sequence(std::vector<Symbol*> symbols) {
+    return NewSymbol({Rule(std::move(symbols))});
+  }
+
+  template <class T, T value>
+  static base::Optional<ParseResult> YieldIntegralConstant(
+      ParseResultIterator* child_results) {
+    return ParseResult{value};
+  }
+
+  template <class T>
+  static base::Optional<ParseResult> YieldDefaultValue(
+      ParseResultIterator* child_results) {
+    return ParseResult{T{}};
+  }
+
+  template <class From, class To>
+  static base::Optional<ParseResult> CastParseResult(
+      ParseResultIterator* child_results) {
+    To result = std::move(child_results->NextAs<From>());
+    return ParseResult{std::move(result)};
+  }
+
+  // Try to parse {s} and return the result of type {Result} casted to {T}.
+  // Otherwise, the result is a default-constructed {T}.
+  template <class T, class Result = T>
+  Symbol* TryOrDefault(Symbol* s) {
+    return NewSymbol({Rule({s}, CastParseResult<Result, T>),
+                      Rule({}, YieldDefaultValue<T>)});
+  }
+
+  template <class T>
+  static base::Optional<ParseResult> MakeSingletonVector(
+      ParseResultIterator* child_results) {
+    T x = child_results->NextAs<T>();
+    std::vector<T> result;
+    result.push_back(std::move(x));
+    return ParseResult{std::move(result)};
+  }
+
+  template <class T>
+  static base::Optional<ParseResult> MakeExtendedVector(
+      ParseResultIterator* child_results) {
+    std::vector<T> l = child_results->NextAs<std::vector<T>>();
+    T x = child_results->NextAs<T>();
+    l.push_back(std::move(x));
+    return ParseResult{std::move(l)};
+  }
+
+  // For example, NonemptyList(Token("A"), Token(",")) parses any of
+  // A or A,A or A,A,A and so on.
+  template <class T>
+  Symbol* NonemptyList(Symbol* element,
+                       base::Optional<Symbol*> separator = {}) {
+    Symbol* list = NewSymbol();
+    *list = {Rule({element}, MakeSingletonVector<T>),
+             separator
+                 ? Rule({list, *separator, element}, MakeExtendedVector<T>)
+                 : Rule({list, element}, MakeExtendedVector<T>)};
+    return list;
+  }
+
+  template <class T>
+  Symbol* List(Symbol* element, base::Optional<Symbol*> separator = {}) {
+    return TryOrDefault<std::vector<T>>(NonemptyList<T>(element, separator));
+  }
+
+  template <class T>
+  Symbol* Optional(Symbol* x) {
+    return TryOrDefault<base::Optional<T>, T>(x);
+  }
+
+  Symbol* CheckIf(Symbol* x) {
+    return NewSymbol({Rule({x}, YieldIntegralConstant<bool, true>),
+                      Rule({}, YieldIntegralConstant<bool, false>)});
+  }
+
+  Lexer& lexer() { return lexer_; }
+
+ private:
+  Lexer lexer_;
+  std::vector<std::unique_ptr<Symbol>> generated_symbols_;
+  Symbol* start_;
+};
+
+}  // namespace torque
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_TORQUE_EARLEY_PARSER_H_