#pragma once #include "util/string_piece.hh" namespace Moses { namespace ScoreStsg { enum TreeFragmentTokenType { TreeFragmentToken_EOS, TreeFragmentToken_LSB, TreeFragmentToken_RSB, TreeFragmentToken_WORD }; struct TreeFragmentToken { public: TreeFragmentToken(TreeFragmentTokenType, StringPiece, std::size_t); TreeFragmentTokenType type; StringPiece value; std::size_t pos; }; // Tokenizes tree fragment strings in Moses format. // // For example, the string "[NP [NP [NN a]] [NP]]" is tokenized to the sequence: // // 1 LSB "[" // 2 WORD "NP" // 3 LSB "[" // 4 WORD "NP" // 5 LSB "[" // 6 WORD "NN" // 7 WORD "a" // 8 RSB "]" // 9 RSB "]" // 10 LSB "[" // 11 WORD "NP" // 12 RSB "]" // 13 RSB "]" // 14 EOS undefined // class TreeFragmentTokenizer { public: TreeFragmentTokenizer(); TreeFragmentTokenizer(const StringPiece &); const TreeFragmentToken &operator*() const { return value_; } const TreeFragmentToken *operator->() const { return &value_; } TreeFragmentTokenizer &operator++(); TreeFragmentTokenizer operator++(int); friend bool operator==(const TreeFragmentTokenizer &, const TreeFragmentTokenizer &); friend bool operator!=(const TreeFragmentTokenizer &, const TreeFragmentTokenizer &); private: StringPiece str_; TreeFragmentToken value_; StringPiece::const_iterator iter_; StringPiece::const_iterator end_; std::size_t pos_; }; } // namespace ScoreStsg } // namespace Moses