diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2013-09-06 00:22:53 +0400 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2013-09-06 00:22:53 +0400 |
commit | 56c4a412310402488c2612f034215ff881b7649d (patch) | |
tree | 7125cd1408a24af6676f45538c3cfb770cbf8b1d /moses/PCNTools.cpp | |
parent | 9894941294f51e0b5aba88b308aea3e6f42e95ff (diff) |
lattice decoding with sparse features
Diffstat (limited to 'moses/PCNTools.cpp')
-rw-r--r-- | moses/PCNTools.cpp | 64 |
1 files changed, 51 insertions, 13 deletions
diff --git a/moses/PCNTools.cpp b/moses/PCNTools.cpp index fac386444..1b0a7da6c 100644 --- a/moses/PCNTools.cpp +++ b/moses/PCNTools.cpp @@ -2,6 +2,9 @@ #include <iostream> #include <cstdlib> +#include "Util.h" + +using namespace std; namespace PCN { @@ -25,6 +28,17 @@ inline void eatws(const std::string& in, int& c) } } +std::string getString(const std::string& in, int &c) +{ + std::string ret; + eatws(in,c); + while (c < (int)in.size() && get(in,c) != ' ' && get(in,c) != ')' && get(in,c) != ',') { + ret += get(in,c++); + } + eatws(in,c); + return ret; +} + // from 'foo' return foo std::string getEscapedString(const std::string& in, int &c) { @@ -82,23 +96,47 @@ CNAlt getCNAlt(const std::string& in, int &c) return CNAlt(); } size_t cnNext = 1; - std::vector<float> probs; - probs.push_back(getFloat(in,c)); + + // read all tokens after the 1st + std::vector<string> toks; + toks.push_back(getString(in,c)); while (get(in,c) == ',') { c++; - float val = getFloat(in,c); - probs.push_back(val); + string tok = getString(in,c); + toks.push_back(tok); } - //if we read more than one prob, this was a lattice, last item was column increment - if (probs.size()>1) { - cnNext = static_cast<size_t>(probs.back()); - probs.pop_back(); - if (cnNext < 1) { - ; //throw "bad link length" - std::cerr << "PCN/PLF parse error: bad link length at last element of cn alt block\n"; - return CNAlt(); - } + + std::vector<float> probs; + + // dense scores + size_t ind; + for (ind = 0; ind < toks.size() - 1; ++ind) { + const string &tok = toks[ind]; + + if (tok.find('=') == tok.npos) { + // not sparse feature + float val = Moses::Scan<float>(tok); + probs.push_back(val); + } + else { + // sparse feature + break; + } } + + // sparse features + std::map<string, float> denseFeatures; + for (; ind < toks.size() - 1; ++ind) { + const string &tok = toks[ind]; + vector<string> keyValue = Moses::Tokenize(tok, "="); + CHECK(keyValue.size() == 2); + float prob = Moses::Scan<float>(keyValue[1]); + denseFeatures[ keyValue[0] ] = prob; + } + + //last item is column increment + cnNext = Moses::Scan<size_t>(toks.back()); + if (get(in,c++) != ')') { std::cerr << "PCN/PLF parse error: expected ) at end of cn alt block\n"; // throw "expected )"; return CNAlt(); |