Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <fishandfrolick@gmail.com>2012-05-25 16:41:06 +0400
committerHieu Hoang <fishandfrolick@gmail.com>2012-05-25 16:41:06 +0400
commit8844be6f25bd87ae20ee19eda6e1aface0f27cc5 (patch)
treefa98ff0b28471724b1e886cc1b95318884ea0225 /OnDiskPt
parent480b2f2fcddd2f08bad1cb577bfae4517475991a (diff)
Move queryOnDiskPt from contrib to OnDiskPt folder. Easier to amend Jam file
Diffstat (limited to 'OnDiskPt')
-rw-r--r--OnDiskPt/Jamfile3
-rw-r--r--OnDiskPt/queryOnDiskPt.cpp174
2 files changed, 177 insertions, 0 deletions
diff --git a/OnDiskPt/Jamfile b/OnDiskPt/Jamfile
index f9811c05b..9aa00fcae 100644
--- a/OnDiskPt/Jamfile
+++ b/OnDiskPt/Jamfile
@@ -1,2 +1,5 @@
lib OnDiskPt : OnDiskWrapper.cpp SourcePhrase.cpp TargetPhrase.cpp Word.cpp Phrase.cpp PhraseNode.cpp TargetPhraseCollection.cpp Vocab.cpp ../moses/src//headers ;
+
exe CreateOnDisk : Main.cpp ../moses/src//moses OnDiskPt ;
+exe queryOnDiskPt : queryOnDiskPt.cpp ../moses/src//moses OnDiskPt ;
+
diff --git a/OnDiskPt/queryOnDiskPt.cpp b/OnDiskPt/queryOnDiskPt.cpp
new file mode 100644
index 000000000..f7435649a
--- /dev/null
+++ b/OnDiskPt/queryOnDiskPt.cpp
@@ -0,0 +1,174 @@
+// Query binary phrase tables.
+// Christian Hardmeier, 16 May 2010
+
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+
+#include "util.h"
+#include "OnDiskWrapper.h"
+#include "SourcePhrase.h"
+
+using namespace std;
+using namespace OnDiskPt;
+
+void usage();
+
+typedef unsigned int uint;
+
+void Tokenize(OnDiskPt::Phrase &phrase
+ , const std::string &token, bool addSourceNonTerm, bool addTargetNonTerm
+ , OnDiskPt::OnDiskWrapper &onDiskWrapper)
+{
+
+ bool nonTerm = false;
+ size_t tokSize = token.size();
+ int comStr =token.compare(0, 1, "[");
+
+ if (comStr == 0) {
+ comStr = token.compare(tokSize - 1, 1, "]");
+ nonTerm = comStr == 0;
+ }
+
+ if (nonTerm) {
+ // non-term
+ size_t splitPos = token.find_first_of("[", 2);
+ string wordStr = token.substr(0, splitPos);
+
+ if (splitPos == string::npos) {
+ // lhs - only 1 word
+ Word *word = new Word();
+ word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
+ phrase.AddWord(word);
+ } else {
+ // source & target non-terms
+ if (addSourceNonTerm) {
+ Word *word = new Word();
+ word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
+ phrase.AddWord(word);
+ }
+
+ wordStr = token.substr(splitPos, tokSize - splitPos);
+ if (addTargetNonTerm) {
+ Word *word = new Word();
+ word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
+ phrase.AddWord(word);
+ }
+
+ }
+ } else {
+ // term
+ Word *word = new Word();
+ word->CreateFromString(token, onDiskWrapper.GetVocab());
+ phrase.AddWord(word);
+ }
+}
+
+
+int main(int argc, char **argv)
+{
+ int tableLimit = 20;
+ std::string ttable = "";
+ bool useAlignments = false;
+
+ for(int i = 1; i < argc; i++) {
+ if(!strcmp(argv[i], "-tlimit")) {
+ if(i + 1 == argc)
+ usage();
+ tableLimit = atoi(argv[++i]);
+ } else if(!strcmp(argv[i], "-t")) {
+ if(i + 1 == argc)
+ usage();
+ ttable = argv[++i];
+ }
+ else
+ usage();
+ }
+
+ if(ttable == "")
+ usage();
+
+ OnDiskWrapper onDiskWrapper;
+ bool retDb = onDiskWrapper.BeginLoad(ttable);
+ CHECK(retDb);
+
+ cerr << "Ready..." << endl;
+
+ std::string line;
+ while(getline(std::cin, line)) {
+ std::vector<std::string> tokens;
+ tokens = Moses::Tokenize(line, " ");
+
+ cerr << "line: " << line << endl;
+
+ // create source phrase
+ SourcePhrase sourcePhrase;
+
+ for (size_t pos = 0; pos < tokens.size(); ++pos)
+ {
+ const string &tok = tokens[pos];
+
+ if (pos == tokens.size() - 1)
+ { // last position. LHS non-term
+ Tokenize(sourcePhrase, tok, false, true, onDiskWrapper);
+ }
+ else
+ {
+ Tokenize(sourcePhrase, tok, true, true, onDiskWrapper);
+ }
+ }
+
+ const PhraseNode *node = &onDiskWrapper.GetRootSourceNode();
+ cerr << "node=" << node << endl;
+ assert(node);
+
+ for (size_t pos = 0; pos < sourcePhrase.GetSize(); ++pos)
+ {
+ const Word &word = sourcePhrase.GetWord(pos);
+ cerr << word << " ";
+ node = node->GetChild(word, onDiskWrapper);
+ cerr << "node=" << node << endl;
+
+ if (node == NULL)
+ {
+ break;
+ }
+ }
+
+ if (node)
+ { // source phrase points to a bunch of rules
+ const TargetPhraseCollection *coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper);
+ string str = coll->GetDebugStr();
+ cout << "Found " << coll->GetSize() << endl;
+
+ for (size_t ind = 0; ind < coll->GetSize(); ++ind)
+ {
+ const TargetPhrase &targetPhrase = coll->GetTargetPhrase(ind);
+ cerr << " ";
+ targetPhrase.DebugPrint(cerr, onDiskWrapper.GetVocab());
+ cerr << endl;
+
+
+ }
+ }
+ else
+ {
+ cout << "Not found" << endl;
+ }
+
+ std::cout << '\n';
+ std::cout.flush();
+ }
+
+ cerr << "Finished." << endl;
+
+}
+
+void usage()
+{
+ std::cerr << "Usage: queryOnDiskPt [-n <nscores>] [-a] -t <ttable>\n"
+ "-tlimit <table limit> max number of rules per source phrase (default: 20)\n"
+ "-t <ttable> phrase table\n";
+ exit(1);
+}