Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--misc/CreateProbingPT.cpp16
-rw-r--r--moses/TranslationModel/ProbingPT/huffmanish.cpp26
-rw-r--r--moses/TranslationModel/ProbingPT/huffmanish.hh4
-rw-r--r--moses/TranslationModel/ProbingPT/quering.cpp24
-rw-r--r--moses/TranslationModel/ProbingPT/quering.hh4
-rw-r--r--moses/TranslationModel/ProbingPT/storing.cpp6
-rw-r--r--moses/TranslationModel/ProbingPT/storing.hh4
7 files changed, 68 insertions, 16 deletions
diff --git a/misc/CreateProbingPT.cpp b/misc/CreateProbingPT.cpp
index 3ea369a96..2b0e8cd8a 100644
--- a/misc/CreateProbingPT.cpp
+++ b/misc/CreateProbingPT.cpp
@@ -5,14 +5,22 @@
int main(int argc, char* argv[]){
- if (argc != 3) {
+ const char * is_reordering = "false";
+
+ if (!(argc == 5 || argc == 4)) {
// Tell the user how to run the program
- std::cerr << "Provided " << argc << " arguments, needed 3." << std::endl;
- std::cerr << "Usage: " << argv[0] << " path_to_phrasetable output_dir" << std::endl;
+ std::cerr << "Provided " << argc << " arguments, needed 4 or 5." << std::endl;
+ std::cerr << "Usage: " << argv[0] << " path_to_phrasetable output_dir num_scores is_reordering" << std::endl;
+ std::cerr << "is_reordering should be either true or false, but it is currently a stub feature." << std::endl;
+ //std::cerr << "Usage: " << argv[0] << " path_to_phrasetable number_of_uniq_lines output_bin_file output_hash_table output_vocab_id" << std::endl;
return 1;
}
- createProbingPT(argv[1], argv[2]);
+ if (argc == 5) {
+ is_reordering = argv[4];
+ }
+
+ createProbingPT(argv[1], argv[2], argv[3], is_reordering);
util::PrintUsage(std::cout);
return 0;
diff --git a/moses/TranslationModel/ProbingPT/huffmanish.cpp b/moses/TranslationModel/ProbingPT/huffmanish.cpp
index eea0a7c53..bf3989c0e 100644
--- a/moses/TranslationModel/ProbingPT/huffmanish.cpp
+++ b/moses/TranslationModel/ProbingPT/huffmanish.cpp
@@ -210,7 +210,7 @@ HuffmanDecoder::HuffmanDecoder (std::map<unsigned int, std::string> * lookup_tar
lookup_word_all1 = *lookup_word1;
}
-std::vector<target_text> HuffmanDecoder::full_decode_line (std::vector<unsigned char> lines){
+std::vector<target_text> HuffmanDecoder::full_decode_line (std::vector<unsigned char> lines, int num_scores){
std::vector<target_text> retvector; //All target phrases
std::vector<unsigned int> decoded_lines = vbyte_decode_line(lines); //All decoded lines
std::vector<unsigned int>::iterator it = decoded_lines.begin(); //Iterator for them
@@ -218,9 +218,19 @@ std::vector<target_text> HuffmanDecoder::full_decode_line (std::vector<unsigned
short zero_count = 0; //Count home many zeroes we have met. so far. Every 3 zeroes mean a new target phrase.
while(it != decoded_lines.end()){
+ if (zero_count == 1) {
+ //We are extracting scores. we know how many scores there are so we can push them
+ //to the vector. This is done in case any of the scores is 0, because it would mess
+ //up the state machine.
+ for (int i = 0; i < num_scores; i++){
+ current_target_phrase.push_back(*it);
+ it++;
+ }
+ }
+
if (zero_count == 3) {
//We have finished with this entry, decode it, and add it to the retvector.
- retvector.push_back(decode_line(current_target_phrase));
+ retvector.push_back(decode_line(current_target_phrase, num_scores));
current_target_phrase.clear(); //Clear the current target phrase and the zero_count
zero_count = 0; //So that we can reuse them for the next target phrase
}
@@ -234,7 +244,7 @@ std::vector<target_text> HuffmanDecoder::full_decode_line (std::vector<unsigned
//Don't forget the last remaining line!
if (zero_count == 3) {
//We have finished with this entry, decode it, and add it to the retvector.
- retvector.push_back(decode_line(current_target_phrase));
+ retvector.push_back(decode_line(current_target_phrase, num_scores));
current_target_phrase.clear(); //Clear the current target phrase and the zero_count
zero_count = 0; //So that we can reuse them for the next target phrase
}
@@ -243,7 +253,7 @@ std::vector<target_text> HuffmanDecoder::full_decode_line (std::vector<unsigned
}
-target_text HuffmanDecoder::decode_line (std::vector<unsigned int> input){
+target_text HuffmanDecoder::decode_line (std::vector<unsigned int> input, int num_scores){
//demo decoder
target_text ret;
//Split everything
@@ -261,7 +271,13 @@ target_text HuffmanDecoder::decode_line (std::vector<unsigned int> input){
} else if (num_zeroes == 0){
target_phrase.push_back(num);
} else if (num_zeroes == 1){
- probs.push_back(num);
+ //Push exactly num_scores scores
+ for (int i = 0; i < num_scores; i++){
+ probs.push_back(num);
+ counter++;
+ num = input[counter];
+ }
+ continue;
} else if (num_zeroes == 2){
wAll = num;
}
diff --git a/moses/TranslationModel/ProbingPT/huffmanish.hh b/moses/TranslationModel/ProbingPT/huffmanish.hh
index 3116484e9..46b7dbeea 100644
--- a/moses/TranslationModel/ProbingPT/huffmanish.hh
+++ b/moses/TranslationModel/ProbingPT/huffmanish.hh
@@ -90,10 +90,10 @@ public:
std::string getTargetWordsFromIDs(std::vector<unsigned int> ids);
- target_text decode_line (std::vector<unsigned int> input);
+ target_text decode_line (std::vector<unsigned int> input, int num_scores);
//Variable byte decodes a all target phrases contained here and then passes them to decode_line
- std::vector<target_text> full_decode_line (std::vector<unsigned char> lines);
+ std::vector<target_text> full_decode_line (std::vector<unsigned char> lines, int num_scores);
};
std::string getTargetWordsFromIDs(std::vector<unsigned int> ids, std::map<unsigned int, std::string> * lookup_target_phrase);
diff --git a/moses/TranslationModel/ProbingPT/quering.cpp b/moses/TranslationModel/ProbingPT/quering.cpp
index 5d5dc4247..f8cdc7763 100644
--- a/moses/TranslationModel/ProbingPT/quering.cpp
+++ b/moses/TranslationModel/ProbingPT/quering.cpp
@@ -39,8 +39,26 @@ QueryEngine::QueryEngine(const char * filepath) : decoder(filepath){
//Read config file
std::string line;
std::ifstream config ((basepath + "/config").c_str());
+ //Check API version:
getline(config, line);
- int tablesize = atoi(line.c_str()); //Get tablesize.
+ if (atoi(line.c_str()) != API_VERSION) {
+ std::cerr << "The ProbingPT API has changed, please rebinarize your phrase tables." << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ //Get tablesize.
+ getline(config, line);
+ int tablesize = atoi(line.c_str());
+ //Number of scores
+ getline(config, line);
+ num_scores = atoi(line.c_str());
+ //do we have a reordering table
+ getline(config, line);
+ std::transform(line.begin(), line.end(), line.begin(), ::tolower); //Get the boolean in lowercase
+ is_reordering = false;
+ if (line == "true") {
+ is_reordering = true;
+ std::cerr << "WARNING. REORDERING TABLES NOT SUPPORTED YET." << std::endl;
+ }
config.close();
//Mmap binary table
@@ -94,7 +112,7 @@ std::pair<bool, std::vector<target_text> > QueryEngine::query(std::vector<uint64
}
//Get only the translation entries necessary
- translation_entries = decoder.full_decode_line(encoded_text);
+ translation_entries = decoder.full_decode_line(encoded_text, num_scores);
}
@@ -137,7 +155,7 @@ std::pair<bool, std::vector<target_text> > QueryEngine::query(StringPiece source
}
//Get only the translation entries necessary
- translation_entries = decoder.full_decode_line(encoded_text);
+ translation_entries = decoder.full_decode_line(encoded_text, num_scores);
}
diff --git a/moses/TranslationModel/ProbingPT/quering.hh b/moses/TranslationModel/ProbingPT/quering.hh
index 133f484ce..b6266f7c7 100644
--- a/moses/TranslationModel/ProbingPT/quering.hh
+++ b/moses/TranslationModel/ProbingPT/quering.hh
@@ -5,6 +5,8 @@
#include "hash.hh" //Includes line splitter
#include <sys/stat.h> //For finding size of file
#include "vocabid.hh"
+#include <algorithm> //toLower
+#define API_VERSION 3
char * read_binary_file(char * filename);
@@ -21,6 +23,8 @@ class QueryEngine {
size_t binary_filesize;
size_t table_filesize;
+ int num_scores;
+ bool is_reordering;
public:
QueryEngine (const char *);
~QueryEngine();
diff --git a/moses/TranslationModel/ProbingPT/storing.cpp b/moses/TranslationModel/ProbingPT/storing.cpp
index 7ce23ad66..6315b7b3d 100644
--- a/moses/TranslationModel/ProbingPT/storing.cpp
+++ b/moses/TranslationModel/ProbingPT/storing.cpp
@@ -34,7 +34,8 @@ BinaryFileWriter::~BinaryFileWriter (){
binfile.clear();
}
-void createProbingPT(const char * phrasetable_path, const char * target_path){
+void createProbingPT(const char * phrasetable_path, const char * target_path,
+ const char * num_scores, const char * is_reordering){
//Get basepath and create directory if missing
std::string basepath(target_path);
mkdir(basepath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
@@ -147,6 +148,9 @@ void createProbingPT(const char * phrasetable_path, const char * target_path){
//Write configfile
std::ofstream configfile;
configfile.open((basepath + "/config").c_str());
+ configfile << API_VERSION << '\n';
configfile << uniq_entries << '\n';
+ configfile << num_scores << '\n';
+ configfile << is_reordering << '\n';
configfile.close();
}
diff --git a/moses/TranslationModel/ProbingPT/storing.hh b/moses/TranslationModel/ProbingPT/storing.hh
index dfcdbcc41..821fd14ca 100644
--- a/moses/TranslationModel/ProbingPT/storing.hh
+++ b/moses/TranslationModel/ProbingPT/storing.hh
@@ -12,8 +12,10 @@
#include "util/file_piece.hh"
#include "util/file.hh"
#include "vocabid.hh"
+#define API_VERSION 3
-void createProbingPT(const char * phrasetable_path, const char * target_path);
+void createProbingPT(const char * phrasetable_path, const char * target_path,
+ const char * num_scores, const char * is_reordering);
class BinaryFileWriter {
std::vector<unsigned char> binfile;