Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2013-09-06 00:22:53 +0400
committerHieu Hoang <hieuhoang@gmail.com>2013-09-06 00:22:53 +0400
commit56c4a412310402488c2612f034215ff881b7649d (patch)
tree7125cd1408a24af6676f45538c3cfb770cbf8b1d /moses/PCNTools.cpp
parent9894941294f51e0b5aba88b308aea3e6f42e95ff (diff)
lattice decoding with sparse features
Diffstat (limited to 'moses/PCNTools.cpp')
-rw-r--r--moses/PCNTools.cpp64
1 files changed, 51 insertions, 13 deletions
diff --git a/moses/PCNTools.cpp b/moses/PCNTools.cpp
index fac386444..1b0a7da6c 100644
--- a/moses/PCNTools.cpp
+++ b/moses/PCNTools.cpp
@@ -2,6 +2,9 @@
#include <iostream>
#include <cstdlib>
+#include "Util.h"
+
+using namespace std;
namespace PCN
{
@@ -25,6 +28,17 @@ inline void eatws(const std::string& in, int& c)
}
}
+std::string getString(const std::string& in, int &c)
+{
+ std::string ret;
+ eatws(in,c);
+ while (c < (int)in.size() && get(in,c) != ' ' && get(in,c) != ')' && get(in,c) != ',') {
+ ret += get(in,c++);
+ }
+ eatws(in,c);
+ return ret;
+}
+
// from 'foo' return foo
std::string getEscapedString(const std::string& in, int &c)
{
@@ -82,23 +96,47 @@ CNAlt getCNAlt(const std::string& in, int &c)
return CNAlt();
}
size_t cnNext = 1;
- std::vector<float> probs;
- probs.push_back(getFloat(in,c));
+
+ // read all tokens after the 1st
+ std::vector<string> toks;
+ toks.push_back(getString(in,c));
while (get(in,c) == ',') {
c++;
- float val = getFloat(in,c);
- probs.push_back(val);
+ string tok = getString(in,c);
+ toks.push_back(tok);
}
- //if we read more than one prob, this was a lattice, last item was column increment
- if (probs.size()>1) {
- cnNext = static_cast<size_t>(probs.back());
- probs.pop_back();
- if (cnNext < 1) {
- ; //throw "bad link length"
- std::cerr << "PCN/PLF parse error: bad link length at last element of cn alt block\n";
- return CNAlt();
- }
+
+ std::vector<float> probs;
+
+ // dense scores
+ size_t ind;
+ for (ind = 0; ind < toks.size() - 1; ++ind) {
+ const string &tok = toks[ind];
+
+ if (tok.find('=') == tok.npos) {
+ // not sparse feature
+ float val = Moses::Scan<float>(tok);
+ probs.push_back(val);
+ }
+ else {
+ // sparse feature
+ break;
+ }
}
+
+ // sparse features
+ std::map<string, float> denseFeatures;
+ for (; ind < toks.size() - 1; ++ind) {
+ const string &tok = toks[ind];
+ vector<string> keyValue = Moses::Tokenize(tok, "=");
+ CHECK(keyValue.size() == 2);
+ float prob = Moses::Scan<float>(keyValue[1]);
+ denseFeatures[ keyValue[0] ] = prob;
+ }
+
+ //last item is column increment
+ cnNext = Moses::Scan<size_t>(toks.back());
+
if (get(in,c++) != ')') {
std::cerr << "PCN/PLF parse error: expected ) at end of cn alt block\n"; // throw "expected )";
return CNAlt();