Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2012-11-12 23:56:18 +0400
committerHieu Hoang <hieuhoang@gmail.com>2012-11-12 23:56:18 +0400
commit5e3ef23cef6101d2c098eb3445f562e8f595655b (patch)
treeb8c332b6fa82bae84ea4910967a10ba1b08a7107 /moses/PCNTools.cpp
parent8c785cff2b1be3cccd76ea9026f71b649762dfc3 (diff)
move moses/src/* to moses/
Diffstat (limited to 'moses/PCNTools.cpp')
-rw-r--r--moses/PCNTools.cpp170
1 files changed, 170 insertions, 0 deletions
diff --git a/moses/PCNTools.cpp b/moses/PCNTools.cpp
new file mode 100644
index 000000000..fac386444
--- /dev/null
+++ b/moses/PCNTools.cpp
@@ -0,0 +1,170 @@
+#include "PCNTools.h"
+
+#include <iostream>
+#include <cstdlib>
+
+namespace PCN
+{
+
+const std::string chars = "'\\";
+const char& quote = chars[0];
+const char& slash = chars[1];
+
+// safe get
+inline char get(const std::string& in, int c)
+{
+ if (c < 0 || c >= (int)in.size()) return 0;
+ else return in[(size_t)c];
+}
+
+// consume whitespace
+inline void eatws(const std::string& in, int& c)
+{
+ while (get(in,c) == ' ') {
+ c++;
+ }
+}
+
+// from 'foo' return foo
+std::string getEscapedString(const std::string& in, int &c)
+{
+ eatws(in,c);
+ if (get(in,c++) != quote) return "ERROR";
+ std::string res;
+ char cur = 0;
+ do {
+ cur = get(in,c++);
+ if (cur == slash) {
+ res += get(in,c++);
+ } else if (cur != quote) {
+ res += cur;
+ }
+ } while (get(in,c) != quote && (c < (int)in.size()));
+ c++;
+ eatws(in,c);
+ return res;
+}
+
+// basically atof
+float getFloat(const std::string& in, int &c)
+{
+ std::string tmp;
+ eatws(in,c);
+ while (c < (int)in.size() && get(in,c) != ' ' && get(in,c) != ')' && get(in,c) != ',') {
+ tmp += get(in,c++);
+ }
+ eatws(in,c);
+ return atof(tmp.c_str());
+}
+
+// basically atof
+int getInt(const std::string& in, int &c)
+{
+ std::string tmp;
+ eatws(in,c);
+ while (c < (int)in.size() && get(in,c) != ' ' && get(in,c) != ')' && get(in,c) != ',') {
+ tmp += get(in,c++);
+ }
+ eatws(in,c);
+ return atoi(tmp.c_str());
+}
+
+// parse ('foo', 0.23)
+CNAlt getCNAlt(const std::string& in, int &c)
+{
+ if (get(in,c++) != '(') {
+ std::cerr << "PCN/PLF parse error: expected ( at start of cn alt block\n"; // throw "expected (";
+ return CNAlt();
+ }
+ std::string word = getEscapedString(in,c);
+ if (get(in,c++) != ',') {
+ std::cerr << "PCN/PLF parse error: expected , after string\n"; // throw "expected , after string";
+ return CNAlt();
+ }
+ size_t cnNext = 1;
+ std::vector<float> probs;
+ probs.push_back(getFloat(in,c));
+ while (get(in,c) == ',') {
+ c++;
+ float val = getFloat(in,c);
+ probs.push_back(val);
+ }
+ //if we read more than one prob, this was a lattice, last item was column increment
+ if (probs.size()>1) {
+ cnNext = static_cast<size_t>(probs.back());
+ probs.pop_back();
+ if (cnNext < 1) {
+ ; //throw "bad link length"
+ std::cerr << "PCN/PLF parse error: bad link length at last element of cn alt block\n";
+ return CNAlt();
+ }
+ }
+ if (get(in,c++) != ')') {
+ std::cerr << "PCN/PLF parse error: expected ) at end of cn alt block\n"; // throw "expected )";
+ return CNAlt();
+ }
+ eatws(in,c);
+ return CNAlt(std::pair<std::string, std::vector<float> >(word,probs), cnNext);
+}
+
+// parse (('foo', 0.23), ('bar', 0.77))
+CNCol getCNCol(const std::string& in, int &c)
+{
+ CNCol res;
+ if (get(in,c++) != '(') return res; // error
+ eatws(in,c);
+ while (1) {
+ if (c > (int)in.size()) {
+ break;
+ }
+ if (get(in,c) == ')') {
+ c++;
+ eatws(in,c);
+ break;
+ }
+ if (get(in,c) == ',' && get(in,c+1) == ')') {
+ c+=2;
+ eatws(in,c);
+ break;
+ }
+ if (get(in,c) == ',') {
+ c++;
+ eatws(in,c);
+ }
+ res.push_back(getCNAlt(in, c));
+ }
+ return res;
+}
+
+// parse ((('foo', 0.23), ('bar', 0.77)), (('a', 0.3), ('c', 0.7)))
+CN parsePCN(const std::string& in)
+{
+ CN res;
+ int c = 0;
+ if (in[c++] != '(') return res; // error
+ while (1) {
+ if (c > (int)in.size()) {
+ break;
+ }
+ if (get(in,c) == ')') {
+ c++;
+ eatws(in,c);
+ break;
+ }
+ if (get(in,c) == ',' && get(in,c+1) == ')') {
+ c+=2;
+ eatws(in,c);
+ break;
+ }
+ if (get(in,c) == ',') {
+ c++;
+ eatws(in,c);
+ }
+ res.push_back(getCNCol(in, c));
+ }
+ return res;
+}
+
+
+}
+