Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'moses/Syntax/F2S/HyperTreeLoader.cpp')
-rw-r--r--moses/Syntax/F2S/HyperTreeLoader.cpp148
1 files changed, 148 insertions, 0 deletions
diff --git a/moses/Syntax/F2S/HyperTreeLoader.cpp b/moses/Syntax/F2S/HyperTreeLoader.cpp
new file mode 100644
index 000000000..8dcadef55
--- /dev/null
+++ b/moses/Syntax/F2S/HyperTreeLoader.cpp
@@ -0,0 +1,148 @@
+#include "HyperTreeLoader.h"
+
+#include <sys/stat.h>
+#include <stdlib.h>
+
+#include <fstream>
+#include <string>
+#include <iterator>
+#include <algorithm>
+#include <iostream>
+
+#include "moses/FactorCollection.h"
+#include "moses/Word.h"
+#include "moses/Util.h"
+#include "moses/InputFileStream.h"
+#include "moses/StaticData.h"
+#include "moses/WordsRange.h"
+#include "moses/ChartTranslationOptionList.h"
+#include "moses/FactorCollection.h"
+#include "moses/Syntax/RuleTableFF.h"
+#include "util/file_piece.hh"
+#include "util/string_piece.hh"
+#include "util/tokenize_piece.hh"
+#include "util/double-conversion/double-conversion.h"
+#include "util/exception.hh"
+
+#include "HyperPath.h"
+#include "HyperPathLoader.h"
+#include "HyperTree.h"
+
+namespace Moses
+{
+namespace Syntax
+{
+namespace F2S
+{
+
+bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
+ const std::vector<FactorType> &output,
+ const std::string &inFile,
+ const RuleTableFF &ff,
+ HyperTree &trie)
+{
+ PrintUserTime(std::string("Start loading HyperTree"));
+
+ const StaticData &staticData = StaticData::Instance();
+ const std::string &factorDelimiter = staticData.GetFactorDelimiter();
+
+ std::size_t count = 0;
+
+ std::ostream *progress = NULL;
+ IFVERBOSE(1) progress = &std::cerr;
+ util::FilePiece in(inFile.c_str(), progress);
+
+ // reused variables
+ std::vector<float> scoreVector;
+ StringPiece line;
+
+ double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
+
+ HyperPathLoader hyperPathLoader(Input, input);
+
+ Phrase dummySourcePhrase;
+ {
+ Word *lhs = NULL;
+ dummySourcePhrase.CreateFromString(Input, input, "hello", &lhs);
+ delete lhs;
+ }
+
+ while(true) {
+ try {
+ line = in.ReadLine();
+ } catch (const util::EndOfFileException &e) {
+ break;
+ }
+
+ util::TokenIter<util::MultiCharacter> pipes(line, "|||");
+ StringPiece sourceString(*pipes);
+ StringPiece targetString(*++pipes);
+ StringPiece scoreString(*++pipes);
+
+ StringPiece alignString;
+ if (++pipes) {
+ StringPiece temp(*pipes);
+ alignString = temp;
+ }
+
+ if (++pipes) {
+ StringPiece str(*pipes); //counts
+ }
+
+ scoreVector.clear();
+ for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
+ int processed;
+ float score = converter.StringToFloat(s->data(), s->length(), &processed);
+ UTIL_THROW_IF2(isnan(score), "Bad score " << *s << " on line " << count);
+ scoreVector.push_back(FloorScore(TransformScore(score)));
+ }
+ const std::size_t numScoreComponents = ff.GetNumScoreComponents();
+ if (scoreVector.size() != numScoreComponents) {
+ UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
+ << numScoreComponents << ") of score components on line " << count);
+ }
+
+ // Source-side
+ HyperPath sourceFragment;
+ hyperPathLoader.Load(sourceString, sourceFragment);
+
+ // Target-side
+ TargetPhrase *targetPhrase = new TargetPhrase(&ff);
+ Word *targetLHS = NULL;
+ targetPhrase->CreateFromString(Output, output, targetString, &targetLHS);
+ targetPhrase->SetTargetLHS(targetLHS);
+ targetPhrase->SetAlignmentInfo(alignString);
+
+ if (++pipes) {
+ StringPiece sparseString(*pipes);
+ targetPhrase->SetSparseScore(&ff, sparseString);
+ }
+
+ if (++pipes) {
+ StringPiece propertiesString(*pipes);
+ targetPhrase->SetProperties(propertiesString);
+ }
+
+ targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
+ targetPhrase->EvaluateInIsolation(dummySourcePhrase,
+ ff.GetFeaturesToApply());
+
+ // Add rule to trie.
+ TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
+ trie, sourceFragment);
+ phraseColl.Add(targetPhrase);
+
+ count++;
+ }
+
+ // sort and prune each target phrase collection
+ if (ff.GetTableLimit()) {
+ SortAndPrune(trie, ff.GetTableLimit());
+ }
+
+ return true;
+}
+
+} // namespace F2S
+} // namespace Syntax
+} // namespace Moses