Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <marcinj@virtua.(none)>2013-07-08 11:58:02 +0400
committerMarcin Junczys-Dowmunt <marcinj@virtua.(none)>2013-07-08 11:58:02 +0400
commitd3b4c11be24d8c2b0c38ad9463a8e4def1a4e114 (patch)
tree2b18f57976ea81120c690f157c8ffc197e9f53ea
parent69b7bd3336b687935dc23460d90937ff4c1bba4d (diff)
Fixed queryPhraseTableMin, added warnings for compacting phrase tables qithout alignment
-rw-r--r--misc/queryPhraseTableMin.cpp4
-rw-r--r--moses/TranslationModel/CompactPT/PhraseDecoder.cpp9
-rw-r--r--moses/TranslationModel/CompactPT/PhraseDecoder.h5
-rw-r--r--moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp4
-rw-r--r--moses/TranslationModel/CompactPT/PhraseTableCreator.cpp36
5 files changed, 36 insertions, 22 deletions
diff --git a/misc/queryPhraseTableMin.cpp b/misc/queryPhraseTableMin.cpp
index f68117336..0b4324020 100644
--- a/misc/queryPhraseTableMin.cpp
+++ b/misc/queryPhraseTableMin.cpp
@@ -51,8 +51,8 @@ int main(int argc, char **argv)
const_cast<std::vector<std::string>&>(parameter->GetParam("factor-delimiter")).resize(1, "||dummy_string||");
const_cast<std::vector<std::string>&>(parameter->GetParam("input-factors")).resize(1, "0");
const_cast<std::vector<std::string>&>(parameter->GetParam("verbose")).resize(1, "0");
- const_cast<std::vector<std::string>&>(parameter->GetParam("weight-w")).resize(1, "0");
- const_cast<std::vector<std::string>&>(parameter->GetParam("weight-d")).resize(1, "0");
+ //const_cast<std::vector<std::string>&>(parameter->GetParam("weight-w")).resize(1, "0");
+ //const_cast<std::vector<std::string>&>(parameter->GetParam("weight-d")).resize(1, "0");
StaticData::InstanceNonConst().LoadData(parameter);
diff --git a/moses/TranslationModel/CompactPT/PhraseDecoder.cpp b/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
index 085a7337c..c0767dad9 100644
--- a/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDecoder.cpp
@@ -190,7 +190,7 @@ std::string PhraseDecoder::MakeSourceKey(std::string &source)
return source + m_separator;
}
-TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel)
+TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel, bool eval)
{
// Not using TargetPhraseCollection avoiding "new" operator
@@ -234,7 +234,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &
// Decompress and decode target phrase collection
TargetPhraseVectorPtr decodedPhraseColl =
- DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel);
+ DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel, eval);
return decodedPhraseColl;
} else
@@ -243,7 +243,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &
TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
TargetPhraseVectorPtr tpv, BitWrapper<> &encodedBitStream,
- const Phrase &sourcePhrase, bool topLevel)
+ const Phrase &sourcePhrase, bool topLevel, bool eval)
{
bool extending = tpv->size();
@@ -397,7 +397,8 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
if(scores.size() == m_numScoreComponent) {
targetPhrase->GetScoreBreakdown().Assign(&m_phraseDictionary, scores);
- targetPhrase->Evaluate(sourcePhrase);
+ if(eval)
+ targetPhrase->Evaluate(sourcePhrase);
if(m_containsAlignmentInfo)
state = Alignment;
diff --git a/moses/TranslationModel/CompactPT/PhraseDecoder.h b/moses/TranslationModel/CompactPT/PhraseDecoder.h
index 85e9334da..413918314 100644
--- a/moses/TranslationModel/CompactPT/PhraseDecoder.h
+++ b/moses/TranslationModel/CompactPT/PhraseDecoder.h
@@ -131,12 +131,13 @@ public:
size_t Load(std::FILE* in);
TargetPhraseVectorPtr CreateTargetPhraseCollection(const Phrase &sourcePhrase,
- bool topLevel = false);
+ bool topLevel = false, bool eval = true);
TargetPhraseVectorPtr DecodeCollection(TargetPhraseVectorPtr tpv,
BitWrapper<> &encodedBitStream,
const Phrase &sourcePhrase,
- bool topLevel);
+ bool topLevel,
+ bool eval);
void PruneCache();
};
diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
index 51ff4c299..8d0f9ff2f 100644
--- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
@@ -117,7 +117,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c
// Retrieve target phrase collection from phrase table
TargetPhraseVectorPtr decodedPhraseColl
- = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
+ = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, true);
if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
@@ -151,7 +151,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionRaw(const Phrase &sourcePhrase
return TargetPhraseVectorPtr();
// Retrieve target phrase collection from phrase table
- return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
+ return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, false);
}
PhraseDictionaryCompact::~PhraseDictionaryCompact()
diff --git a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
index fc3b056c6..f2192ee36 100644
--- a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
@@ -38,7 +38,7 @@ bool operator<(const PackedItem &pi1, const PackedItem &pi2)
}
std::string PhraseTableCreator::m_phraseStopSymbol = "__SPECIAL_STOP_SYMBOL__";
-std::string PhraseTableCreator::m_separator = " ||| ";
+std::string PhraseTableCreator::m_separator = "|||";
PhraseTableCreator::PhraseTableCreator(std::string inPath,
std::string outPath,
@@ -332,12 +332,12 @@ void PhraseTableCreator::CreateRankHash()
inline std::string PhraseTableCreator::MakeSourceKey(std::string &source)
{
- return source + m_separator;
+ return source + " " + m_separator + " ";
}
inline std::string PhraseTableCreator::MakeSourceTargetKey(std::string &source, std::string &target)
{
- return source + m_separator + target + m_separator;
+ return source + " " + m_separator + " " + target + " " + m_separator + " ";
}
void PhraseTableCreator::EncodeTargetPhrases()
@@ -1034,17 +1034,24 @@ void RankingTask::operator()()
for(size_t i = 0; i < lines.size(); i++) {
std::vector<std::string> tokens;
Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);
-
- if(tokens.size() < 3) {
+
+ for(std::vector<std::string>::iterator it = tokens.begin(); it != tokens.end(); it++)
+ *it = Moses::Trim(*it);
+
+ if(tokens.size() < 4) {
std::cerr << "Error: It seems the following line has a wrong format:" << std::endl;
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
abort();
}
- if(tokens.size() == 3 && m_creator.m_warnMe) {
- std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl;
- std::cerr << "but you are using PREnc encoding which makes use of alignment data. " << std::endl;
- std::cerr << "Better use -encoding None or disable this warning with -no-warnings ." << std::endl;
+
+ if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
+ std::cerr << "Error: It seems the following line contains no alignment information, " << std::endl;
+ std::cerr << "but you are using ";
+ std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
+ std::cerr << " encoding which makes use of alignment data. " << std::endl;
+ std::cerr << "Use -encoding None" << std::endl;
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
+ abort();
}
std::vector<float> scores = Tokenize<float>(tokens[2]);
@@ -1125,18 +1132,23 @@ void EncodingTask::operator()()
std::vector<std::string> tokens;
Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);
+ for(std::vector<std::string>::iterator it = tokens.begin(); it != tokens.end(); it++)
+ *it = Moses::Trim(*it);
+
if(tokens.size() < 3) {
std::cerr << "Error: It seems the following line has a wrong format:" << std::endl;
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
abort();
}
- if(tokens.size() == 3 && m_creator.m_coding != PhraseTableCreator::None && m_creator.m_warnMe) {
- std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl;
+
+ if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
+ std::cerr << "Error: It seems the following line contains no alignment information, " << std::endl;
std::cerr << "but you are using ";
std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
std::cerr << " encoding which makes use of alignment data. " << std::endl;
- std::cerr << "Better use -encoding None or disable this warning with -no-warnings." << std::endl;
+ std::cerr << "Use -encoding None" << std::endl;
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
+ abort();
}
size_t ownRank = 0;