Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2015-05-03 10:50:31 +0300
committerHieu Hoang <hieuhoang@gmail.com>2015-05-03 10:50:31 +0300
commite5f76ee99e1da3f9ef32338243ef046154f282d9 (patch)
treec8dc0eb33e8301a8d24c658161a737407dfe20f0
parent73ae7d7e209b78c40a4106d94c62d35ff98bf83b (diff)
parent402b958d9069ff11df3e603f473ade9487bac481 (diff)
Merge branch 'master' of github.com:moses-smt/mosesdecoder
-rw-r--r--.gitignore1
-rw-r--r--contrib/moses-speedtest/runtests.py53
-rw-r--r--mert/BleuDocScorer.cpp2
-rw-r--r--mert/BleuDocScorer.h4
-rw-r--r--mert/BleuScorer.cpp127
-rw-r--r--mert/BleuScorer.h32
-rw-r--r--mert/ForestRescoreTest.cpp6
-rw-r--r--mert/HopeFearDecoder.cpp2
-rw-r--r--mert/HopeFearDecoder.h5
-rw-r--r--mert/Jamfile4
-rw-r--r--mert/MiraFeatureVectorTest.cpp3
-rw-r--r--mert/Ngram.h4
-rw-r--r--mert/Reference.h5
-rw-r--r--mert/Scorer.cpp4
-rw-r--r--mert/Scorer.h8
-rw-r--r--mert/mert.cpp2
-rw-r--r--mert/sentence-bleu-nbest.cpp66
-rw-r--r--mert/sentence-bleu.cpp43
-rw-r--r--moses-cmd/LatticeMBRGrid.cpp63
-rw-r--r--moses-cmd/MainVW.cpp27
-rw-r--r--moses/BaseManager.cpp4
-rw-r--r--moses/ChartCellCollection.h4
-rw-r--r--moses/ConfusionNet.cpp2
-rw-r--r--moses/ContextScope.h125
-rw-r--r--moses/DecodeStepTranslation.cpp59
-rw-r--r--moses/ExportInterface.cpp207
-rw-r--r--moses/ExportInterface.h4
-rw-r--r--moses/FF/Factory.cpp44
-rw-r--r--moses/FF/FeatureFunction.cpp14
-rw-r--r--moses/FF/FeatureFunction.h8
-rw-r--r--moses/FF/InternalTree.cpp3
-rw-r--r--moses/FF/InternalTree.h9
-rw-r--r--moses/FF/LexicalReordering/LexicalReordering.cpp8
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingState.cpp8
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingState.h25
-rw-r--r--moses/FF/LexicalReordering/SparseReordering.cpp8
-rw-r--r--moses/FF/Model1Feature.cpp55
-rw-r--r--moses/FF/Model1Feature.h6
-rw-r--r--moses/FF/PhraseOrientationFeature.cpp2
-rw-r--r--moses/FF/PhraseOrientationFeature.h18
-rw-r--r--moses/FF/RulePairUnlexicalizedSource.cpp15
-rw-r--r--moses/FF/RuleScope.cpp48
-rw-r--r--moses/FF/TreeStructureFeature.cpp2
-rw-r--r--moses/FF/VW/VW.h8
-rw-r--r--moses/Hypothesis.cpp1077
-rw-r--r--moses/Hypothesis.h2
-rw-r--r--moses/IOWrapper.h2
-rw-r--r--moses/LM/RDLM.cpp377
-rw-r--r--moses/LM/RDLM.h159
-rw-r--r--moses/Manager.cpp8
-rw-r--r--moses/OutputCollector.h2
-rw-r--r--moses/Parameter.cpp38
-rw-r--r--moses/Parameter.h24
-rw-r--r--moses/ScoreComponentCollection.cpp28
-rw-r--r--moses/ScoreComponentCollection.h4
-rw-r--r--moses/Sentence.cpp190
-rw-r--r--moses/Sentence.h157
-rw-r--r--moses/StaticData.cpp34
-rw-r--r--moses/StaticData.h12
-rw-r--r--moses/Syntax/F2S/HyperTreeLoader.cpp2
-rw-r--r--moses/Syntax/F2S/HyperTreeLoader.h2
-rw-r--r--moses/Syntax/F2S/Manager-inl.h2
-rw-r--r--moses/Syntax/F2S/Manager.h2
-rw-r--r--moses/Syntax/InputWeightFF.cpp12
-rw-r--r--moses/Syntax/InputWeightFF.h2
-rw-r--r--moses/TargetPhrase.cpp31
-rw-r--r--moses/TargetPhrase.h8
-rw-r--r--moses/TrainingTask.h10
-rw-r--r--moses/TranslationModel/CompactPT/BlockHashIndex.h2
-rw-r--r--moses/TranslationModel/CompactPT/MmapAllocator.h4
-rw-r--r--moses/TranslationModel/PhraseDictionary.h6
-rw-r--r--moses/TranslationOption.cpp2
-rw-r--r--moses/TranslationOption.h2
-rw-r--r--moses/TranslationOptionCollection.cpp17
-rw-r--r--moses/TranslationOptionCollection.h5
-rw-r--r--moses/TranslationOptionCollectionConfusionNet.cpp2
-rw-r--r--moses/TranslationOptionCollectionLattice.cpp2
-rw-r--r--moses/TranslationTask.cpp72
-rw-r--r--moses/TranslationTask.h23
-rw-r--r--moses/TreeInput.h2
-rw-r--r--moses/TypeDef.h66
-rw-r--r--moses/Util.cpp7
-rw-r--r--moses/Util.h17
-rw-r--r--moses/server/Optimizer.cpp105
-rw-r--r--moses/server/Optimizer.h14
-rw-r--r--moses/server/TranslationRequest.cpp647
-rw-r--r--moses/server/TranslationRequest.h146
-rw-r--r--moses/server/Translator.cpp54
-rw-r--r--moses/server/Translator.h18
-rw-r--r--moses/server/Updater.cpp92
-rw-r--r--moses/server/Updater.h26
-rw-r--r--moses/thread_safe_container.h176
-rw-r--r--phrase-extract/ExtractionPhrasePair.h2
-rw-r--r--phrase-extract/PropertiesConsolidator.cpp24
-rw-r--r--phrase-extract/extract-ghkm/ExtractGHKM.cpp12
-rw-r--r--phrase-extract/filter-rule-table/TreeCfgFilter.cpp2
-rw-r--r--phrase-extract/filter-rule-table/TreeCfgFilter.h5
-rw-r--r--phrase-extract/postprocess-egret-forests/Forest.h4
-rw-r--r--phrase-extract/postprocess-egret-forests/ForestParser.cpp17
-rw-r--r--phrase-extract/postprocess-egret-forests/ForestParser.h17
-rw-r--r--phrase-extract/postprocess-egret-forests/ForestWriter.cpp6
-rw-r--r--phrase-extract/postprocess-egret-forests/ForestWriter.h4
-rw-r--r--phrase-extract/postprocess-egret-forests/PostprocessEgretForests.cpp10
-rw-r--r--phrase-extract/postprocess-egret-forests/SplitPointFileParser.cpp15
-rw-r--r--phrase-extract/postprocess-egret-forests/SplitPointFileParser.h15
-rw-r--r--phrase-extract/postprocess-egret-forests/Symbol.h4
-rw-r--r--phrase-extract/postprocess-egret-forests/TopologicalSorter.h4
-rw-r--r--phrase-extract/score-main.cpp5
-rw-r--r--scripts/ems/experiment.meta28
-rwxr-xr-xscripts/ems/experiment.perl62
-rwxr-xr-xscripts/ems/support/build-sparse-features.perl16
-rwxr-xr-xscripts/ems/support/fast-align-in-parts.perl91
-rwxr-xr-xscripts/ems/support/generic-parallelizer.perl4
-rwxr-xr-xscripts/ems/support/lmplz-wrapper.perl10
-rwxr-xr-xscripts/training/mert-moses.pl38
-rwxr-xr-xscripts/training/wrappers/make-factor-brown-cluster-mkcls.perl12
-rwxr-xr-xscripts/training/wrappers/make-factor-de-lemma.perl33
-rwxr-xr-xscripts/training/wrappers/make-factor-de-morph.perl62
-rwxr-xr-xscripts/training/wrappers/make-factor-en-porter.perl10
-rw-r--r--symal/symal.cpp16
120 files changed, 2819 insertions, 2548 deletions
diff --git a/.gitignore b/.gitignore
index 9c82eb9f2..edb3260f8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -68,6 +68,7 @@ contrib/other-builds/*.xcodeproj/xcuserdata/
*/*.xcodeproj/xcuserdata
mert/sentence-bleu
+mert/sentence-bleu-nbest
._*
.DS_Store
*.pbxuser
diff --git a/contrib/moses-speedtest/runtests.py b/contrib/moses-speedtest/runtests.py
index 0978c8ef2..b82587dce 100644
--- a/contrib/moses-speedtest/runtests.py
+++ b/contrib/moses-speedtest/runtests.py
@@ -26,11 +26,18 @@ def parse_cmd():
arguments = parser.parse_args()
return arguments
-def repoinit(testconfig):
- """Determines revision and sets up the repo."""
+def repoinit(testconfig, profiler=True):
+ """Determines revision and sets up the repo. If given the profiler optional
+ argument, wil init the profiler repo instead of the default one."""
revision = ''
#Update the repo
- os.chdir(testconfig.repo)
+ if profiler:
+ if testconfig.repo_prof is not None:
+ os.chdir(testconfig.repo_prof)
+ else:
+ raise ValueError('Profiling repo is not defined')
+ else:
+ os.chdir(testconfig.repo)
#Checkout specific branch, else maintain main branch
if testconfig.branch != 'master':
subprocess.call(['git', 'checkout', testconfig.branch])
@@ -49,13 +56,14 @@ def repoinit(testconfig):
rev, _ = subprocess.Popen(['git rev-parse HEAD'], stdout=subprocess.PIPE,\
stderr=subprocess.PIPE, shell=True).communicate()
revision = str(rev).replace("\\n'", '').replace("b'", '')
-
+
return revision
class Configuration:
"""A simple class to hold all of the configuration constatns"""
- def __init__(self, repo, drop_caches, tests, testlogs, basebranch, baserev):
+ def __init__(self, repo, drop_caches, tests, testlogs, basebranch, baserev, repo_prof=None):
self.repo = repo
+ self.repo_prof = repo_prof
self.drop_caches = drop_caches
self.tests = tests
self.testlogs = testlogs
@@ -80,15 +88,16 @@ class Configuration:
class Test:
"""A simple class to contain all information about tests"""
- def __init__(self, name, command, ldopts, permutations):
+ def __init__(self, name, command, ldopts, permutations, prof_command=None):
self.name = name
self.command = command
+ self.prof_command = prof_command
self.ldopts = ldopts.replace(' ', '').split(',') #Not tested yet
self.permutations = permutations
-def parse_configfile(conffile, testdir, moses_repo):
+def parse_configfile(conffile, testdir, moses_repo, moses_prof_repo=None):
"""Parses the config file"""
- command, ldopts = '', ''
+ command, ldopts, prof_command = '', '', None
permutations = []
fileopen = open(conffile, 'r')
for line in fileopen:
@@ -99,6 +108,8 @@ def parse_configfile(conffile, testdir, moses_repo):
if opt == 'Command:':
command = args.replace('\n', '')
+ if moses_prof is not None: # Get optional command for profiling
+ prof_command = moses_prof_repo + '/bin/' + command
command = moses_repo + '/bin/' + command
elif opt == 'LDPRE:':
ldopts = args.replace('\n', '')
@@ -107,14 +118,14 @@ def parse_configfile(conffile, testdir, moses_repo):
else:
raise ValueError('Unrecognized option ' + opt)
#We use the testdir as the name.
- testcase = Test(testdir, command, ldopts, permutations)
+ testcase = Test(testdir, command, ldopts, permutations, prof_command)
fileopen.close()
return testcase
def parse_testconfig(conffile):
"""Parses the config file for the whole testsuite."""
repo_path, drop_caches, tests_dir, testlog_dir = '', '', '', ''
- basebranch, baserev = '', ''
+ basebranch, baserev, repo_prof_path = '', '', None
fileopen = open(conffile, 'r')
for line in fileopen:
line = line.split('#')[0] # Discard comments
@@ -133,10 +144,12 @@ def parse_testconfig(conffile):
basebranch = args.replace('\n', '')
elif opt == 'BASEREV:':
baserev = args.replace('\n', '')
+ elif opt == 'MOSES_PROFILER_PATH:': # Optional
+ repo_prof_path = args.replace('\n', '')
else:
raise ValueError('Unrecognized option ' + opt)
config = Configuration(repo_path, drop_caches, tests_dir, testlog_dir,\
- basebranch, baserev)
+ basebranch, baserev, repo_prof_path)
fileopen.close()
return config
@@ -146,6 +159,8 @@ def get_config():
config = parse_testconfig(args.configfile)
config.additional_args(args.singletestdir, args.revision, args.branch)
revision = repoinit(config)
+ if config.repo_prof is not None:
+ repoinit(config, True)
config.set_revision(revision)
return config
@@ -221,6 +236,10 @@ def execute_tests(testcase, cur_directory, config):
stderr=None, shell=True).communicate()
write_log('/tmp/time_moses_tests', testcase.name + '_ldpre_' +opt +'_cached', config)
+ #if 'profile' in testcase.permutations:
+ #TODO Separate the above into functions so we can execute them with profiling moses.
+ #Fix the logic in the main
+
# Go through all the test directories and executes tests
if __name__ == '__main__':
CONFIG = get_config()
@@ -260,7 +279,7 @@ if __name__ == '__main__':
#Create a new configuration for base version tests:
BASECONFIG = Configuration(CONFIG.repo, CONFIG.drop_caches,\
CONFIG.tests, CONFIG.testlogs, CONFIG.basebranch,\
- CONFIG.baserev)
+ CONFIG.baserev, CONFIG.repo_prof)
BASECONFIG.additional_args(None, CONFIG.baserev, CONFIG.basebranch)
#Set up the repository and get its revision:
REVISION = repoinit(BASECONFIG)
@@ -268,6 +287,11 @@ if __name__ == '__main__':
#Build
os.chdir(BASECONFIG.repo)
subprocess.call(['./previous.sh'], shell=True)
+ #If profiler configuration exists also init it
+ if BASECONFIG.repo_prof is not None:
+ repoinit(BASECONFIG, True)
+ os.chdir(BASECONFIG.repo_prof)
+ subprocess.call(['./previous.sh'], shell=True)
#Perform tests
for directory in FIRSTTIME:
@@ -277,10 +301,15 @@ if __name__ == '__main__':
#Reset back the repository to the normal configuration
repoinit(CONFIG)
+ if BASECONFIG.repo_prof is not None:
+ repoinit(CONFIG, True)
#Builds moses
os.chdir(CONFIG.repo)
subprocess.call(['./previous.sh'], shell=True)
+ if CONFIG.repo_prof is not None:
+ os.chdir(CONFIG.repo_prof)
+ subprocess.call(['./previous.sh'], shell=True)
if CONFIG.singletest:
TESTCASE = parse_configfile(CONFIG.tests + '/' +\
diff --git a/mert/BleuDocScorer.cpp b/mert/BleuDocScorer.cpp
index 48c17ee96..d71c5171d 100644
--- a/mert/BleuDocScorer.cpp
+++ b/mert/BleuDocScorer.cpp
@@ -174,7 +174,7 @@ statscore_t BleuDocScorer::calculateScore(const vector<int>& comps) const
UTIL_THROW_IF(comps.size() != kBleuNgramOrder * 2 + 1, util::Exception, "Error");
float logbleu = 0.0;
- for (int i = 0; i < kBleuNgramOrder; ++i) {
+ for (size_t i = 0; i < kBleuNgramOrder; ++i) {
if (comps[2*i] == 0) {
return 0.0;
}
diff --git a/mert/BleuDocScorer.h b/mert/BleuDocScorer.h
index 9677410f8..d27088254 100644
--- a/mert/BleuDocScorer.h
+++ b/mert/BleuDocScorer.h
@@ -1,5 +1,4 @@
-#ifndef MERT_BLEU_DOC_SCORER_H_
-#define MERT_BLEU_DOC_SCORER_H_
+#pragma once
#include <ostream>
#include <string>
@@ -64,4 +63,3 @@ private:
}
-#endif // MERT_BLEU_DOC_SCORER_H_
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index dab1b3ccf..8ab749f3b 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -45,14 +45,14 @@ BleuScorer::BleuScorer(const string& config)
} else if (reflen == REFLEN_CLOSEST) {
m_ref_length_type = CLOSEST;
} else {
- throw runtime_error("Unknown reference length strategy: " + reflen);
+ UTIL_THROW2("Unknown reference length strategy: " + reflen);
}
}
BleuScorer::~BleuScorer() {}
size_t BleuScorer::CountNgrams(const string& line, NgramCounts& counts,
- unsigned int n, bool is_testing)
+ unsigned int n, bool is_testing) const
{
assert(n > 0);
vector<int> encoded_tokens;
@@ -97,22 +97,13 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
for (size_t i = 0; i < referenceFiles.size(); ++i) {
TRACE_ERR("Loading reference from " << referenceFiles[i] << endl);
- if (!OpenReference(referenceFiles[i].c_str(), i)) {
- throw runtime_error("Unable to open " + referenceFiles[i]);
+ ifstream ifs(referenceFiles[i].c_str());
+ if (!OpenReferenceStream(&ifs, i)) {
+ UTIL_THROW2("Cannot open " + referenceFiles[i]);
}
}
}
-bool BleuScorer::OpenReference(const char* filename, size_t file_id)
-{
- ifstream ifs(filename);
- if (!ifs) {
- cerr << "Cannot open " << filename << endl;
- return false;
- }
- return OpenReferenceStream(&ifs, file_id);
-}
-
bool BleuScorer::OpenReferenceStream(istream* is, size_t file_id)
{
if (is == NULL) return false;
@@ -120,31 +111,17 @@ bool BleuScorer::OpenReferenceStream(istream* is, size_t file_id)
string line;
size_t sid = 0;
while (getline(*is, line)) {
+ // TODO: rather than loading the whole reference corpus into memory, can we stream it line by line?
+ // (loading the whole reference corpus can take gigabytes of RAM if done with millions of sentences)
line = preprocessSentence(line);
if (file_id == 0) {
Reference* ref = new Reference;
m_references.push_back(ref); // Take ownership of the Reference object.
}
- if (m_references.size() <= sid) {
- cerr << "Reference " << file_id << "has too many sentences." << endl;
- return false;
- }
- NgramCounts counts;
- size_t length = CountNgrams(line, counts, kBleuNgramOrder);
-
- //for any counts larger than those already there, merge them in
- for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
- const NgramCounts::Key& ngram = ci->first;
- const NgramCounts::Value newcount = ci->second;
-
- NgramCounts::Value oldcount = 0;
- m_references[sid]->get_counts()->Lookup(ngram, &oldcount);
- if (newcount > oldcount) {
- m_references[sid]->get_counts()->operator[](ngram) = newcount;
- }
- }
- //add in the length
- m_references[sid]->push_back(length);
+ UTIL_THROW_IF2(m_references.size() <= sid, "Reference " << file_id << "has too many sentences.");
+
+ ProcessReferenceLine(line, m_references[sid]);
+
if (sid > 0 && sid % 100 == 0) {
TRACE_ERR(".");
}
@@ -153,20 +130,53 @@ bool BleuScorer::OpenReferenceStream(istream* is, size_t file_id)
return true;
}
-void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
+void BleuScorer::ProcessReferenceLine(const std::string& line, Reference* ref) const
+{
+ NgramCounts counts;
+ size_t length = CountNgrams(line, counts, kBleuNgramOrder);
+
+ //for any counts larger than those already there, merge them in
+ for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
+ const NgramCounts::Key& ngram = ci->first;
+ const NgramCounts::Value newcount = ci->second;
+
+ NgramCounts::Value oldcount = 0;
+ ref->get_counts()->Lookup(ngram, &oldcount);
+ if (newcount > oldcount) {
+ ref->get_counts()->operator[](ngram) = newcount;
+ }
+ }
+ //add in the length
+ ref->push_back(length);
+}
+
+bool BleuScorer::GetNextReferenceFromStreams(std::vector<boost::shared_ptr<std::ifstream> >& referenceStreams, Reference& ref) const
{
- if (sid >= m_references.size()) {
- stringstream msg;
- msg << "Sentence id (" << sid << ") not found in reference set";
- throw runtime_error(msg.str());
+ for (vector<boost::shared_ptr<ifstream> >::iterator ifs=referenceStreams.begin(); ifs!=referenceStreams.end(); ++ifs) {
+ if (!(*ifs)) return false;
+ string line;
+ if (!getline(**ifs, line)) return false;
+ line = preprocessSentence(line);
+ ProcessReferenceLine(line, &ref);
}
+ return true;
+}
+
+void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
+{
+ UTIL_THROW_IF2(sid >= m_references.size(), "Sentence id (" << sid << ") not found in reference set");
+ CalcBleuStats(*(m_references[sid]), text, entry);
+}
+
+void BleuScorer::CalcBleuStats(const Reference& ref, const std::string& text, ScoreStats& entry) const
+{
NgramCounts testcounts;
// stats for this line
vector<ScoreStatsType> stats(kBleuNgramOrder * 2);
string sentence = preprocessSentence(text);
const size_t length = CountNgrams(sentence, testcounts, kBleuNgramOrder, true);
- const int reference_len = CalcReferenceLength(sid, length);
+ const int reference_len = CalcReferenceLength(ref, length);
stats.push_back(reference_len);
//precision on each ngram type
@@ -177,7 +187,7 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
NgramCounts::Value correct = 0;
NgramCounts::Value v = 0;
- if (m_references[sid]->get_counts()->Lookup(testcounts_it->first, &v)) {
+ if (ref.get_counts()->Lookup(testcounts_it->first, &v)) {
correct = min(v, guess);
}
stats[len * 2 - 2] += correct;
@@ -207,21 +217,20 @@ statscore_t BleuScorer::calculateScore(const vector<ScoreStatsType>& comps) cons
return exp(logbleu);
}
-int BleuScorer::CalcReferenceLength(size_t sentence_id, size_t length)
+int BleuScorer::CalcReferenceLength(const Reference& ref, std::size_t length) const
{
switch (m_ref_length_type) {
case AVERAGE:
- return m_references[sentence_id]->CalcAverage();
+ return ref.CalcAverage();
break;
case CLOSEST:
- return m_references[sentence_id]->CalcClosest(length);
+ return ref.CalcClosest(length);
break;
case SHORTEST:
- return m_references[sentence_id]->CalcShortest();
+ return ref.CalcShortest();
break;
default:
- cerr << "unknown reference types." << endl;
- exit(1);
+ UTIL_THROW2("Unknown reference types");
}
}
@@ -304,23 +313,15 @@ vector<float> BleuScorer::ScoreNbestList(const string& scoreFile, const string&
}
vector<pair<size_t,size_t> > hypotheses;
- if (featureDataIters[0] == FeatureDataIterator::end()) {
- cerr << "Error: at the end of feature data iterator" << endl;
- exit(1);
- }
+ UTIL_THROW_IF2(featureDataIters[0] == FeatureDataIterator::end(),
+ "At the end of feature data iterator");
for (size_t i = 0; i < featureFiles.size(); ++i) {
- if (featureDataIters[i] == FeatureDataIterator::end()) {
- cerr << "Error: Feature file " << i << " ended prematurely" << endl;
- exit(1);
- }
- if (scoreDataIters[i] == ScoreDataIterator::end()) {
- cerr << "Error: Score file " << i << " ended prematurely" << endl;
- exit(1);
- }
- if (featureDataIters[i]->size() != scoreDataIters[i]->size()) {
- cerr << "Error: features and scores have different size" << endl;
- exit(1);
- }
+ UTIL_THROW_IF2(featureDataIters[i] == FeatureDataIterator::end(),
+ "Feature file " << i << " ended prematurely");
+ UTIL_THROW_IF2(scoreDataIters[i] == ScoreDataIterator::end(),
+ "Score file " << i << " ended prematurely");
+ UTIL_THROW_IF2(featureDataIters[i]->size() != scoreDataIters[i]->size(),
+ "Features and scores have different size");
for (size_t j = 0; j < featureDataIters[i]->size(); ++j) {
hypotheses.push_back(pair<size_t,size_t>(i,j));
}
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index 0594c8dca..d7ee8e4e7 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -1,23 +1,23 @@
-#ifndef MERT_BLEU_SCORER_H_
-#define MERT_BLEU_SCORER_H_
+#pragma once
-#include <ostream>
+#include <fstream>
#include <string>
#include <vector>
-#include "Types.h"
+#include <boost/shared_ptr.hpp>
+
+#include "Ngram.h"
+#include "Reference.h"
+#include "ScopedVector.h"
#include "ScoreData.h"
#include "StatisticsBasedScorer.h"
-#include "ScopedVector.h"
+#include "Types.h"
namespace MosesTuning
{
const size_t kBleuNgramOrder = 4;
-class NgramCounts;
-class Reference;
-
/**
* Bleu scoring
*/
@@ -42,11 +42,14 @@ public:
return 2 * kBleuNgramOrder + 1;
}
- int CalcReferenceLength(std::size_t sentence_id, std::size_t length);
+ void CalcBleuStats(const Reference& ref, const std::string& text, ScoreStats& entry) const;
+
+ int CalcReferenceLength(const Reference& ref, std::size_t length) const;
ReferenceLengthType GetReferenceLengthType() const {
return m_ref_length_type;
}
+
void SetReferenceLengthType(ReferenceLengthType type) {
m_ref_length_type = type;
}
@@ -62,14 +65,16 @@ public:
/**
* Count the ngrams of each type, up to the given length in the input line.
*/
- std::size_t CountNgrams(const std::string& line, NgramCounts& counts, unsigned int n, bool is_testing=false);
+ size_t CountNgrams(const std::string& line, NgramCounts& counts, unsigned int n, bool is_testing=false) const;
void DumpCounts(std::ostream* os, const NgramCounts& counts) const;
- bool OpenReference(const char* filename, std::size_t file_id);
-
// NOTE: this function is used for unit testing.
- virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id);
+ bool OpenReferenceStream(std::istream* is, std::size_t file_id);
+
+ void ProcessReferenceLine(const std::string& line, Reference* ref) const;
+
+ bool GetNextReferenceFromStreams(std::vector<boost::shared_ptr<std::ifstream> >& referenceStreams, Reference& ref) const;
//private:
protected:
@@ -99,4 +104,3 @@ float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vec
}
-#endif // MERT_BLEU_SCORER_H_
diff --git a/mert/ForestRescoreTest.cpp b/mert/ForestRescoreTest.cpp
index f1a1c8423..91c4fe4f3 100644
--- a/mert/ForestRescoreTest.cpp
+++ b/mert/ForestRescoreTest.cpp
@@ -13,7 +13,8 @@
using namespace std;
using namespace MosesTuning;
-BOOST_AUTO_TEST_CASE(viterbi_simple_lattice) {
+BOOST_AUTO_TEST_CASE(viterbi_simple_lattice)
+{
Vocab vocab;
WordVec words;
string wordStrings[] =
@@ -244,7 +245,8 @@ BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[8]);
}
-BOOST_AUTO_TEST_CASE(viterbi_full_hypergraph) {
+BOOST_AUTO_TEST_CASE(viterbi_full_hypergraph)
+{
Vocab vocab;
//References
ReferenceSet references;
diff --git a/mert/HopeFearDecoder.cpp b/mert/HopeFearDecoder.cpp
index 5288116d6..be9d8f2c9 100644
--- a/mert/HopeFearDecoder.cpp
+++ b/mert/HopeFearDecoder.cpp
@@ -98,7 +98,7 @@ void NbestHopeFearDecoder::HopeFear(
size_t hope_index=0, fear_index=0, model_index=0;
ValType hope_score=0, fear_score=0, model_score=0;
for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
- ValType hope_bleu, hope_model;
+ ValType hope_bleu=0, hope_model=0;
for(size_t i=0; i< train_->cur_size(); i++) {
const MiraFeatureVector& vec=train_->featuresAt(i);
ValType score = wv.score(vec);
diff --git a/mert/HopeFearDecoder.h b/mert/HopeFearDecoder.h
index 53c0e935d..73f0e97d9 100644
--- a/mert/HopeFearDecoder.h
+++ b/mert/HopeFearDecoder.h
@@ -16,8 +16,7 @@ You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
-#ifndef MERT_HOPEFEARDECODER_H
-#define MERT_HOPEFEARDECODER_H
+#pragma once
#include <vector>
@@ -160,5 +159,3 @@ private:
};
-#endif
-
diff --git a/mert/Jamfile b/mert/Jamfile
index aff2c78be..51736dace 100644
--- a/mert/Jamfile
+++ b/mert/Jamfile
@@ -66,11 +66,13 @@ exe evaluator : evaluator.cpp mert_lib ;
exe sentence-bleu : sentence-bleu.cpp mert_lib ;
+exe sentence-bleu-nbest : sentence-bleu-nbest.cpp mert_lib ;
+
exe pro : pro.cpp mert_lib ..//boost_program_options ;
exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ..//boost_filesystem ;
-alias programs : mert extractor evaluator pro kbmira sentence-bleu ;
+alias programs : mert extractor evaluator pro kbmira sentence-bleu sentence-bleu-nbest ;
unit-test bleu_scorer_test : BleuScorerTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_framework ;
diff --git a/mert/MiraFeatureVectorTest.cpp b/mert/MiraFeatureVectorTest.cpp
index d64ba79a5..999b8512a 100644
--- a/mert/MiraFeatureVectorTest.cpp
+++ b/mert/MiraFeatureVectorTest.cpp
@@ -11,7 +11,8 @@ how many of the features are really "dense". This is because in hg mira
all features (sparse and dense) are to get rolled in to SparseVector
*/
-BOOST_AUTO_TEST_CASE(from_sparse) {
+BOOST_AUTO_TEST_CASE(from_sparse)
+{
SparseVector sp;
sp.set("dense0", 0.2);
sp.set("dense1", 0.3);
diff --git a/mert/Ngram.h b/mert/Ngram.h
index 521dc4928..de2703605 100644
--- a/mert/Ngram.h
+++ b/mert/Ngram.h
@@ -1,5 +1,4 @@
-#ifndef MERT_NGRAM_H_
-#define MERT_NGRAM_H_
+#pragma once
#include <vector>
#include <string>
@@ -121,4 +120,3 @@ private:
}
-#endif // MERT_NGRAM_H_
diff --git a/mert/Reference.h b/mert/Reference.h
index 2c12f2ed7..a7878f3e7 100644
--- a/mert/Reference.h
+++ b/mert/Reference.h
@@ -59,6 +59,11 @@ public:
int CalcClosest(std::size_t length) const;
int CalcShortest() const;
+ void clear() {
+ m_length.clear();
+ m_counts->clear();
+ }
+
private:
NgramCounts* m_counts;
diff --git a/mert/Scorer.cpp b/mert/Scorer.cpp
index ffaf03be4..34484ee30 100644
--- a/mert/Scorer.cpp
+++ b/mert/Scorer.cpp
@@ -64,7 +64,7 @@ void Scorer::InitConfig(const string& config)
}
}
-void Scorer::TokenizeAndEncode(const string& line, vector<int>& encoded)
+void Scorer::TokenizeAndEncode(const string& line, vector<int>& encoded) const
{
for (util::TokenIter<util::AnyCharacter, true> it(line, util::AnyCharacter(" "));
it; ++it) {
@@ -81,7 +81,7 @@ void Scorer::TokenizeAndEncode(const string& line, vector<int>& encoded)
}
}
-void Scorer::TokenizeAndEncodeTesting(const string& line, vector<int>& encoded)
+void Scorer::TokenizeAndEncodeTesting(const string& line, vector<int>& encoded) const
{
for (util::TokenIter<util::AnyCharacter, true> it(line, util::AnyCharacter(" "));
it; ++it) {
diff --git a/mert/Scorer.h b/mert/Scorer.h
index 8c468aff2..a08fc436d 100644
--- a/mert/Scorer.h
+++ b/mert/Scorer.h
@@ -1,5 +1,4 @@
-#ifndef MERT_SCORER_H_
-#define MERT_SCORER_H_
+#pragma once
#include <iostream>
#include <sstream>
@@ -187,12 +186,12 @@ protected:
* Tokenise line and encode.
* Note: We assume that all tokens are separated by whitespaces.
*/
- void TokenizeAndEncode(const std::string& line, std::vector<int>& encoded);
+ void TokenizeAndEncode(const std::string& line, std::vector<int>& encoded) const;
/*
* Tokenize functions for testing only.
*/
- void TokenizeAndEncodeTesting(const std::string& line, std::vector<int>& encoded);
+ void TokenizeAndEncodeTesting(const std::string& line, std::vector<int>& encoded) const;
/**
* Every inherited scorer should call this function for each sentence
@@ -236,4 +235,3 @@ inline float score_average(const statscores_t& scores, size_t start, size_t end)
}
-#endif // MERT_SCORER_H_
diff --git a/mert/mert.cpp b/mert/mert.cpp
index 82b4cc34d..aa6e2a08e 100644
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@@ -474,7 +474,7 @@ int main(int argc, char **argv)
// A task for each start point
for (size_t j = 0; j < startingPoints.size(); ++j) {
boost::shared_ptr<OptimizationTask>
- task(new OptimizationTask(optimizer, startingPoints[j]));
+ task(new OptimizationTask(optimizer, startingPoints[j]));
tasks.push_back(task);
#ifdef WITH_THREADS
pool.Submit(task);
diff --git a/mert/sentence-bleu-nbest.cpp b/mert/sentence-bleu-nbest.cpp
new file mode 100644
index 000000000..599230511
--- /dev/null
+++ b/mert/sentence-bleu-nbest.cpp
@@ -0,0 +1,66 @@
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <string>
+
+#include <boost/shared_ptr.hpp>
+
+#include "BleuScorer.h"
+#include "Reference.h"
+#include "moses/Util.h"
+#include "util/exception.hh"
+
+using namespace MosesTuning;
+
+int main(int argc, char **argv)
+{
+ if (argc == 1) {
+ std::cerr << "Usage: ./sentence-bleu-nbest ref1 [ref2 ...] < plain-nbest > bleu-scores" << std::endl;
+ return 1;
+ }
+
+ std::vector<std::string> refFiles(argv + 1, argv + argc);
+
+ // TODO all of these are empty for now
+ std::string config;
+ std::string factors;
+ std::string filter;
+
+ BleuScorer scorer(config);
+ scorer.setFactors(factors);
+ scorer.setFilter(filter);
+
+ // initialize reference streams
+ std::vector<boost::shared_ptr<std::ifstream> > refStreams;
+ for (std::vector<std::string>::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile) {
+ TRACE_ERR("Loading reference from " << *refFile << std::endl);
+ boost::shared_ptr<std::ifstream> ifs(new std::ifstream(refFile->c_str()));
+ UTIL_THROW_IF2(!ifs, "Cannot open " << *refFile);
+ refStreams.push_back(ifs);
+ }
+
+ // load sentences, preparing statistics, score
+ std::string nbestLine;
+ int sid = -1;
+ Reference ref;
+ while ( getline(std::cin, nbestLine) ) {
+ std::vector<std::string> items;
+ Moses::TokenizeMultiCharSeparator(items, nbestLine, " ||| ");
+ int sidCurrent = Moses::Scan<int>(items[0]);
+
+ if (sidCurrent != sid) {
+ ref.clear();
+ if (!scorer.GetNextReferenceFromStreams(refStreams, ref)) {
+ UTIL_THROW2("Missing references");
+ }
+ sid = sidCurrent;
+ }
+ ScoreStats scoreStats;
+ scorer.CalcBleuStats(ref, items[1], scoreStats);
+ std::vector<float> stats(scoreStats.getArray(), scoreStats.getArray() + scoreStats.size());
+ std::cout << smoothedSentenceBleu(stats) << std::endl;
+ }
+
+ return 0;
+}
+
diff --git a/mert/sentence-bleu.cpp b/mert/sentence-bleu.cpp
index 5269d37cd..3f886ffeb 100644
--- a/mert/sentence-bleu.cpp
+++ b/mert/sentence-bleu.cpp
@@ -1,18 +1,26 @@
+#include <fstream>
#include <iostream>
#include <vector>
#include <string>
+#include <boost/shared_ptr.hpp>
+
#include "BleuScorer.h"
+#include "Reference.h"
+#include "moses/Util.h"
+#include "util/exception.hh"
using namespace std;
using namespace MosesTuning;
+
int main(int argc, char **argv)
{
if (argc == 1) {
cerr << "Usage: ./sentence-bleu ref1 [ref2 ...] < candidate > bleu-scores" << endl;
return 1;
}
+
vector<string> refFiles(argv + 1, argv + argc);
// TODO all of these are empty for now
@@ -23,22 +31,31 @@ int main(int argc, char **argv)
BleuScorer scorer(config);
scorer.setFactors(factors);
scorer.setFilter(filter);
- scorer.setReferenceFiles(refFiles);
- vector<ScoreStats> entries;
-
- // Loading sentences and preparing statistics
- ScoreStats scoreentry;
- string line;
- while (getline(cin, line)) {
- scorer.prepareStats(entries.size(), line, scoreentry);
- entries.push_back(scoreentry);
+ // initialize reference streams
+ vector<boost::shared_ptr<ifstream> > refStreams;
+ for (vector<string>::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile) {
+ TRACE_ERR("Loading reference from " << *refFile << endl);
+ boost::shared_ptr<ifstream> ifs(new ifstream(refFile->c_str()));
+ UTIL_THROW_IF2(!ifs, "Cannot open " << *refFile);
+ refStreams.push_back(ifs);
}
- vector<ScoreStats>::const_iterator sentIt;
- for (sentIt = entries.begin(); sentIt != entries.end(); sentIt++) {
- vector<float> stats(sentIt->getArray(), sentIt->getArray() + sentIt->size());
- cout << smoothedSentenceBleu(stats) << "\n";
+ // load sentences, preparing statistics, score
+ string hypothesisLine;
+ size_t sid = 0;
+ while (getline(std::cin, hypothesisLine)) {
+ Reference ref;
+ if (!scorer.GetNextReferenceFromStreams(refStreams, ref)) {
+ UTIL_THROW2("Missing references");
+ }
+ ScoreStats scoreStats;
+ scorer.CalcBleuStats(ref, hypothesisLine, scoreStats);
+ vector<float> stats(scoreStats.getArray(), scoreStats.getArray() + scoreStats.size());
+ std::cout << smoothedSentenceBleu(stats) << std::endl;
+ ++sid;
}
+
return 0;
}
+
diff --git a/moses-cmd/LatticeMBRGrid.cpp b/moses-cmd/LatticeMBRGrid.cpp
index f842b1136..0447a16fa 100644
--- a/moses-cmd/LatticeMBRGrid.cpp
+++ b/moses-cmd/LatticeMBRGrid.cpp
@@ -177,39 +177,34 @@ int main(int argc, char* argv[])
const vector<float>& scale_grid = grid.getGrid(lmbr_scale);
boost::shared_ptr<InputType> source;
- while((source = ioWrapper->ReadInput()) != NULL)
- {
- // set up task of translating one sentence
- boost::shared_ptr<TranslationTask> ttask;
- ttask = TranslationTask::create(source, ioWrapper);
- Manager manager(ttask);
- manager.Decode();
- TrellisPathList nBestList;
- manager.CalcNBest(nBestSize, nBestList,true);
- //grid search
- BOOST_FOREACH(float const& p, pgrid)
- {
- SD.SetLatticeMBRPrecision(p);
- BOOST_FOREACH(float const& r, rgrid)
- {
- SD.SetLatticeMBRPRatio(r);
- BOOST_FOREACH(size_t const prune_i, prune_grid)
- {
- SD.SetLatticeMBRPruningFactor(size_t(prune_i));
- BOOST_FOREACH(float const& scale_i, scale_grid)
- {
- SD.SetMBRScale(scale_i);
- size_t lineCount = source->GetTranslationId();
- cout << lineCount << " ||| " << p << " "
- << r << " " << size_t(prune_i) << " " << scale_i
- << " ||| ";
- vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
- manager.OutputBestHypo(mbrBestHypo, lineCount,
- SD.GetReportSegmentation(),
- SD.GetReportAllFactors(),cout);
- }
- }
- }
- }
+ while((source = ioWrapper->ReadInput()) != NULL) {
+ // set up task of translating one sentence
+ boost::shared_ptr<TranslationTask> ttask;
+ ttask = TranslationTask::create(source, ioWrapper);
+ Manager manager(ttask);
+ manager.Decode();
+ TrellisPathList nBestList;
+ manager.CalcNBest(nBestSize, nBestList,true);
+ //grid search
+ BOOST_FOREACH(float const& p, pgrid) {
+ SD.SetLatticeMBRPrecision(p);
+ BOOST_FOREACH(float const& r, rgrid) {
+ SD.SetLatticeMBRPRatio(r);
+ BOOST_FOREACH(size_t const prune_i, prune_grid) {
+ SD.SetLatticeMBRPruningFactor(size_t(prune_i));
+ BOOST_FOREACH(float const& scale_i, scale_grid) {
+ SD.SetMBRScale(scale_i);
+ size_t lineCount = source->GetTranslationId();
+ cout << lineCount << " ||| " << p << " "
+ << r << " " << size_t(prune_i) << " " << scale_i
+ << " ||| ";
+ vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
+ manager.OutputBestHypo(mbrBestHypo, lineCount,
+ SD.GetReportSegmentation(),
+ SD.GetReportAllFactors(),cout);
+ }
+ }
+ }
}
+ }
}
diff --git a/moses-cmd/MainVW.cpp b/moses-cmd/MainVW.cpp
index 302866733..ac54c1ed6 100644
--- a/moses-cmd/MainVW.cpp
+++ b/moses-cmd/MainVW.cpp
@@ -144,26 +144,27 @@ int main(int argc, char** argv)
#endif
// main loop over set of input sentences
-
+
boost::shared_ptr<InputType> source;
- while ((source = ioWrapper->ReadInput()) != NULL)
- {
- IFVERBOSE(1) { ResetUserTime(); }
+ while ((source = ioWrapper->ReadInput()) != NULL) {
+ IFVERBOSE(1) {
+ ResetUserTime();
+ }
- InputType* foo = source.get();
- FeatureFunction::CallChangeSource(foo);
+ InputType* foo = source.get();
+ FeatureFunction::CallChangeSource(foo);
- // set up task of training one sentence
- boost::shared_ptr<TrainingTask> task;
- task = TrainingTask::create(source, ioWrapper);
+ // set up task of training one sentence
+ boost::shared_ptr<TrainingTask> task;
+ task = TrainingTask::create(source, ioWrapper);
- // execute task
+ // execute task
#ifdef WITH_THREADS
- pool.Submit(task);
+ pool.Submit(task);
#else
- task->Run();
+ task->Run();
#endif
- }
+ }
// we are done, finishing up
#ifdef WITH_THREADS
diff --git a/moses/BaseManager.cpp b/moses/BaseManager.cpp
index a89bb848a..83d48e6e4 100644
--- a/moses/BaseManager.cpp
+++ b/moses/BaseManager.cpp
@@ -17,7 +17,9 @@ BaseManager::BaseManager(ttasksptr const& ttask)
const InputType&
BaseManager::GetSource() const
-{ return m_source; }
+{
+ return m_source;
+}
diff --git a/moses/ChartCellCollection.h b/moses/ChartCellCollection.h
index 5945ce12a..ac8e0fd38 100644
--- a/moses/ChartCellCollection.h
+++ b/moses/ChartCellCollection.h
@@ -36,8 +36,8 @@ class ChartCellCollectionBase
{
public:
template <class Factory> ChartCellCollectionBase(const InputType &input,
- const Factory &factory,
- const ChartParser &parser)
+ const Factory &factory,
+ const ChartParser &parser)
:m_cells(input.GetSize()) {
size_t size = input.GetSize();
diff --git a/moses/ConfusionNet.cpp b/moses/ConfusionNet.cpp
index e305a4147..0c355fd94 100644
--- a/moses/ConfusionNet.cpp
+++ b/moses/ConfusionNet.cpp
@@ -299,7 +299,7 @@ CreateTranslationOptionCollection(ttasksptr const& ttask) const
= StaticData::Instance().GetTranslationOptionThreshold();
TranslationOptionCollection *rv
= new TranslationOptionCollectionConfusionNet
- (ttask, *this, maxNoTransOptPerCoverage, translationOptionThreshold);
+ (ttask, *this, maxNoTransOptPerCoverage, translationOptionThreshold);
assert(rv);
return rv;
}
diff --git a/moses/ContextScope.h b/moses/ContextScope.h
index ed9f854ff..e9edf7b15 100644
--- a/moses/ContextScope.h
+++ b/moses/ContextScope.h
@@ -18,80 +18,75 @@
namespace Moses
{
- class ContextScope
- {
- protected:
- typedef std::map<void const*, boost::shared_ptr<void> > scratchpad_t;
- typedef scratchpad_t::iterator iter_t;
- typedef scratchpad_t::value_type entry_t;
- typedef scratchpad_t::const_iterator const_iter_t;
- scratchpad_t m_scratchpad;
- mutable boost::shared_mutex m_lock;
- public:
- // class write_access
- // {
- // boost::unique_lock<boost::shared_mutex> m_lock;
- // public:
+class ContextScope
+{
+protected:
+ typedef std::map<void const*, boost::shared_ptr<void> > scratchpad_t;
+ typedef scratchpad_t::iterator iter_t;
+ typedef scratchpad_t::value_type entry_t;
+ typedef scratchpad_t::const_iterator const_iter_t;
+ scratchpad_t m_scratchpad;
+ mutable boost::shared_mutex m_lock;
+public:
+ // class write_access
+ // {
+ // boost::unique_lock<boost::shared_mutex> m_lock;
+ // public:
- // write_access(boost::shared_mutex& lock)
- // : m_lock(lock)
- // { }
+ // write_access(boost::shared_mutex& lock)
+ // : m_lock(lock)
+ // { }
- // write_access(write_access& other)
- // {
- // swap(m_lock, other.m_lock);
- // }
- // };
+ // write_access(write_access& other)
+ // {
+ // swap(m_lock, other.m_lock);
+ // }
+ // };
- // write_access lock() const
- // {
- // return write_access(m_lock);
- // }
+ // write_access lock() const
+ // {
+ // return write_access(m_lock);
+ // }
- template<typename T>
- boost::shared_ptr<void> const&
- set(void const* const key, boost::shared_ptr<T> const& val)
- {
- boost::unique_lock<boost::shared_mutex> lock(m_lock);
- return (m_scratchpad[key] = val);
- }
+ template<typename T>
+ boost::shared_ptr<void> const&
+ set(void const* const key, boost::shared_ptr<T> const& val) {
+ boost::unique_lock<boost::shared_mutex> lock(m_lock);
+ return (m_scratchpad[key] = val);
+ }
- template<typename T>
- boost::shared_ptr<T> const
- get(void const* key, bool CreateNewIfNecessary=false)
- {
- using boost::shared_mutex;
- using boost::upgrade_lock;
- // T const* key = reinterpret_cast<T const*>(xkey);
- upgrade_lock<shared_mutex> lock(m_lock);
- iter_t m = m_scratchpad.find(key);
- boost::shared_ptr< T > ret;
- if (m != m_scratchpad.end())
- {
- if (m->second == NULL && CreateNewIfNecessary)
- {
- boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
- m->second.reset(new T);
- }
- ret = boost::static_pointer_cast< T >(m->second);
- return ret;
- }
- if (!CreateNewIfNecessary) return ret;
- boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
- ret.reset(new T);
- m_scratchpad[key] = ret;
+ template<typename T>
+ boost::shared_ptr<T> const
+ get(void const* key, bool CreateNewIfNecessary=false) {
+ using boost::shared_mutex;
+ using boost::upgrade_lock;
+ // T const* key = reinterpret_cast<T const*>(xkey);
+ upgrade_lock<shared_mutex> lock(m_lock);
+ iter_t m = m_scratchpad.find(key);
+ boost::shared_ptr< T > ret;
+ if (m != m_scratchpad.end()) {
+ if (m->second == NULL && CreateNewIfNecessary) {
+ boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
+ m->second.reset(new T);
+ }
+ ret = boost::static_pointer_cast< T >(m->second);
return ret;
}
+ if (!CreateNewIfNecessary) return ret;
+ boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
+ ret.reset(new T);
+ m_scratchpad[key] = ret;
+ return ret;
+ }
- ContextScope() { }
+ ContextScope() { }
- ContextScope(ContextScope const& other)
- {
- boost::unique_lock<boost::shared_mutex> lock1(this->m_lock);
- boost::unique_lock<boost::shared_mutex> lock2(other.m_lock);
- m_scratchpad = other.m_scratchpad;
- }
+ ContextScope(ContextScope const& other) {
+ boost::unique_lock<boost::shared_mutex> lock1(this->m_lock);
+ boost::unique_lock<boost::shared_mutex> lock2(other.m_lock);
+ m_scratchpad = other.m_scratchpad;
+ }
- };
+};
};
diff --git a/moses/DecodeStepTranslation.cpp b/moses/DecodeStepTranslation.cpp
index 7ea26f8a5..034c06fc2 100644
--- a/moses/DecodeStepTranslation.cpp
+++ b/moses/DecodeStepTranslation.cpp
@@ -218,17 +218,16 @@ const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
void
DecodeStepTranslation::
ProcessLEGACY(TranslationOption const& in,
- DecodeStep const& decodeStep,
- PartialTranslOptColl &out,
- TranslationOptionCollection *toc,
- bool adhereTableLimit) const
+ DecodeStep const& decodeStep,
+ PartialTranslOptColl &out,
+ TranslationOptionCollection *toc,
+ bool adhereTableLimit) const
{
- if (in.GetTargetPhrase().GetSize() == 0)
- {
- // word deletion
- out.Add(new TranslationOption(in));
- return;
- }
+ if (in.GetTargetPhrase().GetSize() == 0) {
+ // word deletion
+ out.Add(new TranslationOption(in));
+ return;
+ }
// normal trans step
WordsRange const& srcRange = in.GetSourceWordsRange();
@@ -241,34 +240,32 @@ ProcessLEGACY(TranslationOption const& in,
TargetPhraseCollectionWithSourcePhrase const* phraseColl;
phraseColl = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
- if (phraseColl != NULL)
- {
- TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
- iterEnd = ((adhereTableLimit && tableLimit && phraseColl->GetSize() >= tableLimit)
- ? phraseColl->begin() + tableLimit : phraseColl->end());
+ if (phraseColl != NULL) {
+ TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
+ iterEnd = ((adhereTableLimit && tableLimit && phraseColl->GetSize() >= tableLimit)
+ ? phraseColl->begin() + tableLimit : phraseColl->end());
- for (iterTargetPhrase = phraseColl->begin();
- iterTargetPhrase != iterEnd;
- ++iterTargetPhrase)
- {
- TargetPhrase const& targetPhrase = **iterTargetPhrase;
- if (targetPhrase.GetSize() != currSize ||
- (IsFilteringStep() && !in.IsCompatible(targetPhrase, m_conflictFactors)))
- continue;
+ for (iterTargetPhrase = phraseColl->begin();
+ iterTargetPhrase != iterEnd;
+ ++iterTargetPhrase) {
+ TargetPhrase const& targetPhrase = **iterTargetPhrase;
+ if (targetPhrase.GetSize() != currSize ||
+ (IsFilteringStep() && !in.IsCompatible(targetPhrase, m_conflictFactors)))
+ continue;
- TargetPhrase outPhrase(inPhrase);
- outPhrase.Merge(targetPhrase, m_newOutputFactors);
- outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
+ TargetPhrase outPhrase(inPhrase);
+ outPhrase.Merge(targetPhrase, m_newOutputFactors);
+ outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
- TranslationOption *newTransOpt = new TranslationOption(srcRange, outPhrase);
- assert(newTransOpt != NULL);
+ TranslationOption *newTransOpt = new TranslationOption(srcRange, outPhrase);
+ assert(newTransOpt != NULL);
- newTransOpt->SetInputPath(inputPath);
+ newTransOpt->SetInputPath(inputPath);
- out.Add(newTransOpt);
+ out.Add(newTransOpt);
- }
}
+ }
}
}
diff --git a/moses/ExportInterface.cpp b/moses/ExportInterface.cpp
index 27f757b5c..342e6dc7a 100644
--- a/moses/ExportInterface.cpp
+++ b/moses/ExportInterface.cpp
@@ -83,16 +83,16 @@ void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
SimpleTranslationInterface::SimpleTranslationInterface(const string &mosesIni): m_staticData(StaticData::Instance())
{
- if (!m_params.LoadParam(mosesIni)) {
- cerr << "Error; Cannot load parameters at " << mosesIni<<endl;
- exit(1);
- }
- if (!StaticData::LoadDataStatic(&m_params, mosesIni.c_str())) {
- cerr << "Error; Cannot load static data in file " << mosesIni<<endl;
- exit(1);
- }
+ if (!m_params.LoadParam(mosesIni)) {
+ cerr << "Error; Cannot load parameters at " << mosesIni<<endl;
+ exit(1);
+ }
+ if (!StaticData::LoadDataStatic(&m_params, mosesIni.c_str())) {
+ cerr << "Error; Cannot load static data in file " << mosesIni<<endl;
+ exit(1);
+ }
- util::rand_init();
+ util::rand_init();
}
@@ -114,13 +114,15 @@ string SimpleTranslationInterface::translate(const string &inputString)
boost::shared_ptr<InputType> source = ioWrapper->ReadInput();
if (!source) return "Error: Source==null!!!";
- IFVERBOSE(1) { ResetUserTime(); }
+ IFVERBOSE(1) {
+ ResetUserTime();
+ }
FeatureFunction::CallChangeSource(&*source);
// set up task of translating one sentence
boost::shared_ptr<TranslationTask> task
- = TranslationTask::create(source, ioWrapper);
+ = TranslationTask::create(source, ioWrapper);
task->Run();
string output = outputStream.str();
@@ -147,10 +149,14 @@ int
run_as_server()
{
#ifdef HAVE_XMLRPC_C
- int port; params.SetParameter(port, "server-port", 8080);
- bool isSerial; params.SetParameter(isSerial, "serial", false);
- string logfile; params.SetParameter(logfile, "server-log", string(""));
- size_t num_threads; params.SetParameter(num_threads, "threads", size_t(10));
+ int port;
+ params.SetParameter(port, "server-port", 8080);
+ bool isSerial;
+ params.SetParameter(isSerial, "serial", false);
+ string logfile;
+ params.SetParameter(logfile, "server-log", string(""));
+ size_t num_threads;
+ params.SetParameter(num_threads, "threads", size_t(10));
if (isSerial) VERBOSE(1,"Running server in serial mode." << endl);
xmlrpc_c::registry myRegistry;
@@ -166,8 +172,9 @@ run_as_server()
xmlrpc_c::serverAbyss myAbyssServer(myRegistry, port, logfile);
XVERBOSE(1,"Listening on port " << port << endl);
- if (isSerial) { while(1) myAbyssServer.runOnce(); }
- else myAbyssServer.run();
+ if (isSerial) {
+ while(1) myAbyssServer.runOnce();
+ } else myAbyssServer.run();
std::cerr << "xmlrpc_c::serverAbyss.run() returned but should not." << std::endl;
// #pragma message("BUILDING MOSES WITH SERVER SUPPORT")
@@ -193,16 +200,15 @@ batch_run()
// set up read/writing class:
boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper);
UTIL_THROW_IF2(ioWrapper == NULL, "Error; Failed to create IO object"
- << " [" << HERE << "]");
+ << " [" << HERE << "]");
// check on weights
const ScoreComponentCollection& weights = staticData.GetAllWeights();
- IFVERBOSE(2)
- {
- TRACE_ERR("The global weight vector looks like this: ");
- TRACE_ERR(weights);
- TRACE_ERR("\n");
- }
+ IFVERBOSE(2) {
+ TRACE_ERR("The global weight vector looks like this: ");
+ TRACE_ERR(weights);
+ TRACE_ERR("\n");
+ }
#ifdef WITH_THREADS
ThreadPool pool(staticData.ThreadCount());
@@ -214,57 +220,53 @@ batch_run()
// main loop over set of input sentences
boost::shared_ptr<InputType> source;
- while ((source = ioWrapper->ReadInput()) != NULL)
- {
- IFVERBOSE(1) ResetUserTime();
+ while ((source = ioWrapper->ReadInput()) != NULL) {
+ IFVERBOSE(1) ResetUserTime();
- FeatureFunction::CallChangeSource(source.get());
+ FeatureFunction::CallChangeSource(source.get());
- // set up task of translating one sentence
- boost::shared_ptr<TranslationTask>
- task = TranslationTask::create(source, ioWrapper);
- task->SetContextString(context_string);
+ // set up task of translating one sentence
+ boost::shared_ptr<TranslationTask>
+ task = TranslationTask::create(source, ioWrapper);
+ task->SetContextString(context_string);
- // Allow for (sentence-)context-specific processing prior to
- // decoding. This can be used, for example, for context-sensitive
- // phrase lookup.
- FeatureFunction::SetupAll(*task);
+ // Allow for (sentence-)context-specific processing prior to
+ // decoding. This can be used, for example, for context-sensitive
+ // phrase lookup.
+ FeatureFunction::SetupAll(*task);
- // execute task
+ // execute task
#ifdef WITH_THREADS
#ifdef PT_UG
- // simulated post-editing requires threads (within the dynamic phrase tables)
- // but runs all sentences serially, to allow updating of the bitext.
- bool spe = params.isParamSpecified("spe-src");
- if (spe)
- {
- // simulated post-editing: always run single-threaded!
- task->Run();
- string src,trg,aln;
- UTIL_THROW_IF2(!getline(*ioWrapper->spe_src,src), "[" << HERE << "] "
- << "missing update data for simulated post-editing.");
- UTIL_THROW_IF2(!getline(*ioWrapper->spe_trg,trg), "[" << HERE << "] "
- << "missing update data for simulated post-editing.");
- UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] "
- << "missing update data for simulated post-editing.");
- BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl())
- {
- Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
- if (sapt) sapt->add(src,trg,aln);
- VERBOSE(1,"[" << HERE << " added src] " << src << endl);
- VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
- VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
- }
- }
- else pool.Submit(task);
+ // simulated post-editing requires threads (within the dynamic phrase tables)
+ // but runs all sentences serially, to allow updating of the bitext.
+ bool spe = params.isParamSpecified("spe-src");
+ if (spe) {
+ // simulated post-editing: always run single-threaded!
+ task->Run();
+ string src,trg,aln;
+ UTIL_THROW_IF2(!getline(*ioWrapper->spe_src,src), "[" << HERE << "] "
+ << "missing update data for simulated post-editing.");
+ UTIL_THROW_IF2(!getline(*ioWrapper->spe_trg,trg), "[" << HERE << "] "
+ << "missing update data for simulated post-editing.");
+ UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] "
+ << "missing update data for simulated post-editing.");
+ BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl()) {
+ Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
+ if (sapt) sapt->add(src,trg,aln);
+ VERBOSE(1,"[" << HERE << " added src] " << src << endl);
+ VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
+ VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
+ }
+ } else pool.Submit(task);
#else
- pool.Submit(task);
+ pool.Submit(task);
#endif
#else
- task->Run();
+ task->Run();
#endif
- }
+ }
// we are done, finishing up
#ifdef WITH_THREADS
@@ -289,52 +291,49 @@ int decoder_main(int argc, char** argv)
#ifdef NDEBUG
try
#endif
- {
+ {
#ifdef HAVE_PROTOBUF
- GOOGLE_PROTOBUF_VERIFY_VERSION;
+ GOOGLE_PROTOBUF_VERIFY_VERSION;
#endif
- // echo command line, if verbose
- IFVERBOSE(1)
- {
- TRACE_ERR("command: ");
- for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
- TRACE_ERR(endl);
- }
-
- // set number of significant decimals in output
- FixPrecision(cout);
- FixPrecision(cerr);
-
- // load all the settings into the Parameter class
- // (stores them as strings, or array of strings)
- if (!params.LoadParam(argc,argv))
- exit(1);
-
- // initialize all "global" variables, which are stored in StaticData
- // note: this also loads models such as the language model, etc.
- if (!StaticData::LoadDataStatic(&params, argv[0]))
- exit(1);
-
- // setting "-show-weights" -> just dump out weights and exit
- if (params.isParamSpecified("show-weights"))
- {
- ShowWeights();
- exit(0);
- }
-
- if (params.GetParam("server"))
- return run_as_server();
- else
- return batch_run();
+ // echo command line, if verbose
+ IFVERBOSE(1) {
+ TRACE_ERR("command: ");
+ for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
+ TRACE_ERR(endl);
+ }
+
+ // set number of significant decimals in output
+ FixPrecision(cout);
+ FixPrecision(cerr);
+ // load all the settings into the Parameter class
+ // (stores them as strings, or array of strings)
+ if (!params.LoadParam(argc,argv))
+ exit(1);
+
+ // initialize all "global" variables, which are stored in StaticData
+ // note: this also loads models such as the language model, etc.
+ if (!StaticData::LoadDataStatic(&params, argv[0]))
+ exit(1);
+
+ // setting "-show-weights" -> just dump out weights and exit
+ if (params.isParamSpecified("show-weights")) {
+ ShowWeights();
+ exit(0);
}
+
+ if (params.GetParam("server"))
+ return run_as_server();
+ else
+ return batch_run();
+
+ }
#ifdef NDEBUG
- catch (const std::exception &e)
- {
- std::cerr << "Exception: " << e.what() << std::endl;
- return EXIT_FAILURE;
- }
+ catch (const std::exception &e) {
+ std::cerr << "Exception: " << e.what() << std::endl;
+ return EXIT_FAILURE;
+ }
#endif
}
diff --git a/moses/ExportInterface.h b/moses/ExportInterface.h
index 56e37c7e1..03a8b1f1c 100644
--- a/moses/ExportInterface.h
+++ b/moses/ExportInterface.h
@@ -45,7 +45,9 @@ public:
~SimpleTranslationInterface();
std::string translate(const std::string &input);
Moses::StaticData& getStaticData();
- Moses::Parameter& getParameters(){ return m_params; }
+ Moses::Parameter& getParameters() {
+ return m_params;
+ }
private:
SimpleTranslationInterface();
Moses::Parameter m_params;
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 81c6bdeb9..c797381ff 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -157,32 +157,26 @@ FeatureFactory
std::vector<float> weights = static_data.GetParameter()->GetWeights(featureName);
- if (feature->GetNumScoreComponents())
- {
- if (weights.size() == 0)
- {
- weights = feature->DefaultWeights();
- if (weights.size() == 0)
- {
- TRACE_ERR("WARNING: No weights specified in config file for FF "
- << featureName << ". This FF does not supply default values.\n"
- << "WARNING: Auto-initializing all weights for this FF to 1.0");
- weights.assign(feature->GetNumScoreComponents(),1.0);
- }
- else
- {
- TRACE_ERR("WARNING: No weights specified in config file for FF "
- << featureName << ". Using default values supplied by FF.");
- }
- }
- UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(),
- "FATAL ERROR: Mismatch in number of features and number "
- << "of weights for Feature Function " << featureName
- << " (features: " << feature->GetNumScoreComponents()
- << " vs. weights: " << weights.size() << ")");
- static_data.SetWeights(feature, weights);
+ if (feature->GetNumScoreComponents()) {
+ if (weights.size() == 0) {
+ weights = feature->DefaultWeights();
+ if (weights.size() == 0) {
+ TRACE_ERR("WARNING: No weights specified in config file for FF "
+ << featureName << ". This FF does not supply default values.\n"
+ << "WARNING: Auto-initializing all weights for this FF to 1.0");
+ weights.assign(feature->GetNumScoreComponents(),1.0);
+ } else {
+ TRACE_ERR("WARNING: No weights specified in config file for FF "
+ << featureName << ". Using default values supplied by FF.");
+ }
}
- else if (feature->IsTuneable())
+ UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(),
+ "FATAL ERROR: Mismatch in number of features and number "
+ << "of weights for Feature Function " << featureName
+ << " (features: " << feature->GetNumScoreComponents()
+ << " vs. weights: " << weights.size() << ")");
+ static_data.SetWeights(feature, weights);
+ } else if (feature->IsTuneable())
static_data.SetWeights(feature, weights);
}
diff --git a/moses/FF/FeatureFunction.cpp b/moses/FF/FeatureFunction.cpp
index 298a9e65c..baa2b5563 100644
--- a/moses/FF/FeatureFunction.cpp
+++ b/moses/FF/FeatureFunction.cpp
@@ -55,7 +55,7 @@ void FeatureFunction::CallChangeSource(InputType * const&input)
void FeatureFunction::SetupAll(TranslationTask const& ttask)
{
BOOST_FOREACH(FeatureFunction* ff, s_staticColl)
- ff->Setup(ttask);
+ ff->Setup(ttask);
}
FeatureFunction::
@@ -193,17 +193,23 @@ void FeatureFunction::SetTuneableComponents(const std::string& value)
void
FeatureFunction
::InitializeForInput(ttasksptr const& ttask)
-{ InitializeForInput(*(ttask->GetSource().get())); }
+{
+ InitializeForInput(*(ttask->GetSource().get()));
+}
void
FeatureFunction
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask)
-{ CleanUpAfterSentenceProcessing(*(ttask->GetSource().get())); }
+{
+ CleanUpAfterSentenceProcessing(*(ttask->GetSource().get()));
+}
size_t
FeatureFunction
::GetIndex() const
-{ return m_index; }
+{
+ return m_index;
+}
/// set index
diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h
index a8f189f0b..56f6cdff0 100644
--- a/moses/FF/FeatureFunction.h
+++ b/moses/FF/FeatureFunction.h
@@ -136,7 +136,9 @@ public:
CleanUpAfterSentenceProcessing(ttasksptr const& ttask);
const std::string &
- GetArgLine() const { return m_argLine; }
+ GetArgLine() const {
+ return m_argLine;
+ }
// given a target phrase containing only factors specified in mask
// return true if the feature function can be evaluated
@@ -153,8 +155,8 @@ public:
// source from the input sentence
virtual void
EvaluateInIsolation(const Phrase &source, const TargetPhrase &targetPhrase,
- ScoreComponentCollection& scoreBreakdown,
- ScoreComponentCollection& estimatedFutureScore) const = 0;
+ ScoreComponentCollection& scoreBreakdown,
+ ScoreComponentCollection& estimatedFutureScore) const = 0;
// override this method if you want to change the input before decoding
virtual void ChangeSource(InputType * const&input) const { }
diff --git a/moses/FF/InternalTree.cpp b/moses/FF/InternalTree.cpp
index 95730f018..4a01ea1b2 100644
--- a/moses/FF/InternalTree.cpp
+++ b/moses/FF/InternalTree.cpp
@@ -147,8 +147,7 @@ void InternalTree::GetUnbinarizedChildren(std::vector<TreePointer> &ret) const
const std::string &label = (*itx)->GetLabel();
if (!label.empty() && label[0] == '^') {
(*itx)->GetUnbinarizedChildren(ret);
- }
- else {
+ } else {
ret.push_back(*itx);
}
}
diff --git a/moses/FF/InternalTree.h b/moses/FF/InternalTree.h
index f9a8ba5d8..8f982c6aa 100644
--- a/moses/FF/InternalTree.h
+++ b/moses/FF/InternalTree.h
@@ -96,8 +96,7 @@ public:
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// Python-like generator that yields next nonterminal leaf on every call
- $generator(leafNT)
- {
+ $generator(leafNT) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNT(InternalTree* root = 0): tree(root) {}
@@ -116,8 +115,7 @@ public:
// Python-like generator that yields the parent of the next nonterminal leaf on every call
- $generator(leafNTParent)
- {
+ $generator(leafNTParent) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNTParent(InternalTree* root = 0): tree(root) {}
@@ -135,8 +133,7 @@ public:
};
// Python-like generator that yields the next nonterminal leaf on every call, and also stores the path from the root of the tree to the nonterminal
- $generator(leafNTPath)
- {
+ $generator(leafNTPath) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
std::vector<InternalTree*> * path;
diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp
index c67a16076..9a8fa0f08 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@@ -66,9 +66,9 @@ LexicalReordering(const std::string &line)
// sanity check: number of default scores
size_t numScores
- = m_numScoreComponents
+ = m_numScoreComponents
= m_numTuneableComponents
- = m_configuration->GetNumScoreComponents();
+ = m_configuration->GetNumScoreComponents();
UTIL_THROW_IF2(m_haveDefaultScores && m_defaultScores.size() != numScores,
"wrong number of default scores (" << m_defaultScores.size()
<< ") for lexicalized reordering model (expected "
@@ -89,7 +89,7 @@ Load()
typedef LexicalReorderingTable LRTable;
if (m_filePath.size())
m_table.reset(LRTable::LoadAvailable(m_filePath, m_factorsF,
- m_factorsE, std::vector<FactorType>()));
+ m_factorsE, std::vector<FactorType>()));
}
Scores
@@ -158,7 +158,7 @@ LexicalReordering::
SetCache(TranslationOptionList& tol) const
{
BOOST_FOREACH(TranslationOption* to, tol)
- this->SetCache(*to);
+ this->SetCache(*to);
}
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
index 48fd577f1..90de3ad9c 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@@ -101,7 +101,7 @@ GetOrientation(int const reoDistance) const
// this one is for HierarchicalReorderingBackwardState
return ((m_modelType == LeftRight)
? (reoDistance >= 1) ? R : L
- : (reoDistance == 1) ? M
+ : (reoDistance == 1) ? M
: (m_modelType == Monotonic) ? NM
: (reoDistance == -1) ? S
: (m_modelType == MSD) ? D
@@ -115,7 +115,7 @@ GetOrientation(WordsRange const& prev, WordsRange const& cur,
{
return ((m_modelType == LeftRight)
? cur.GetStartPos() > prev.GetEndPos() ? R : L
- : IsMonotonicStep(prev,cur,cov) ? M
+ : IsMonotonicStep(prev,cur,cov) ? M
: (m_modelType == Monotonic) ? NM
: IsSwap(prev,cur,cov) ? S
: (m_modelType == MSD) ? D
@@ -263,7 +263,7 @@ CopyScores(ScoreComponentCollection* accum,
const SparseReordering* sparse = m_configuration.GetSparseReordering();
if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
- m_direction, accum);
+ m_direction, accum);
}
@@ -342,7 +342,7 @@ Expand(const TranslationOption& topt, const InputType& input,
LRModel const& lrmodel = m_configuration;
WordsRange const cur = topt.GetSourceWordsRange();
LRModel::ReorderingType reoType = (m_first ? lrmodel.GetOrientation(cur)
- : lrmodel.GetOrientation(m_prevRange,cur));
+ : lrmodel.GetOrientation(m_prevRange,cur));
CopyScores(scores, topt, input, reoType);
}
return new PhraseBasedReorderingState(this, topt);
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h
index 1e488fc41..19904ae32 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@@ -44,19 +44,18 @@ public:
static const ReorderingType L = 1; // left
static const ReorderingType MAX = 3; // largest possible
#else
- enum ReorderingType
- {
- M = 0, // monotonic
- NM = 1, // non-monotonic
- S = 1, // swap
- D = 2, // discontinuous
- DL = 2, // discontinuous, left
- DR = 3, // discontinuous, right
- R = 0, // right
- L = 1, // left
- MAX = 3, // largest possible
- NONE = 4 // largest possible
- };
+ enum ReorderingType {
+ M = 0, // monotonic
+ NM = 1, // non-monotonic
+ S = 1, // swap
+ D = 2, // discontinuous
+ DL = 2, // discontinuous, left
+ DR = 3, // discontinuous, right
+ R = 0, // right
+ L = 1, // left
+ MAX = 3, // largest possible
+ NONE = 4 // largest possible
+ };
#endif
// determine orientation, depending on model:
diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index 5397dcb10..6c81ca414 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -114,10 +114,10 @@ void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id,
position <= SparseReorderingFeatureKey::Last; ++position) {
for (int reoType = 0; reoType <= LRModel::MAX; ++reoType) {
SparseReorderingFeatureKey
- key(index, static_cast<SparseReorderingFeatureKey::Type>(type),
- factor, isCluster,
- static_cast<SparseReorderingFeatureKey::Position>(position),
- side, static_cast<LRModel::ReorderingType>(reoType));
+ key(index, static_cast<SparseReorderingFeatureKey::Type>(type),
+ factor, isCluster,
+ static_cast<SparseReorderingFeatureKey::Position>(position),
+ side, static_cast<LRModel::ReorderingType>(reoType));
m_featureMap.insert(pair<SparseReorderingFeatureKey, FName>(key,m_producer->GetFeatureName(key.Name(id))));
}
}
diff --git a/moses/FF/Model1Feature.cpp b/moses/FF/Model1Feature.cpp
index 6f6552461..09cfd47ab 100644
--- a/moses/FF/Model1Feature.cpp
+++ b/moses/FF/Model1Feature.cpp
@@ -71,21 +71,18 @@ void Model1Vocabulary::Load(const std::string& fileName)
std::string line;
unsigned i = 0;
- if ( getline(inFile, line) ) // first line of MGIZA vocabulary files seems to be special : "1 UNK 0" -- skip if it's this
- {
+ if ( getline(inFile, line) ) { // first line of MGIZA vocabulary files seems to be special : "1 UNK 0" -- skip if it's this
++i;
std::vector<std::string> tokens = Tokenize(line);
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
unsigned id = Scan<unsigned>(tokens[0]);
- if (! ( (id == 1) && (tokens[1] == "UNK") ))
- {
+ if (! ( (id == 1) && (tokens[1] == "UNK") )) {
const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading?
bool stored = Store(factor, id);
UTIL_THROW_IF2(!stored, "Line " << i << " in " << fileName << " overwrites existing vocabulary entry.");
}
}
- while ( getline(inFile, line) )
- {
+ while ( getline(inFile, line) ) {
++i;
std::vector<std::string> tokens = Tokenize(line);
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
@@ -104,8 +101,7 @@ void Model1LexicalTable::Load(const std::string &fileName, const Model1Vocabular
std::string line;
unsigned i = 0;
- while ( getline(inFile, line) )
- {
+ while ( getline(inFile, line) ) {
++i;
std::vector<std::string> tokens = Tokenize(line);
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
@@ -183,35 +179,31 @@ void Model1Feature::Load()
}
void Model1Feature::EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore) const
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
{
const Sentence& sentence = static_cast<const Sentence&>(input);
float score = 0.0;
float norm = TransformScore(1+sentence.GetSize());
- for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT)
- {
+ for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) {
const Word &wordT = targetPhrase.GetWord(posT);
- if ( !wordT.IsNonTerminal() )
- {
+ if ( !wordT.IsNonTerminal() ) {
float thisWordProb = m_model1.GetProbability(m_emptyWord,wordT[0]); // probability conditioned on empty word
// cache lookup
bool foundInCache = false;
{
- #ifdef WITH_THREADS
+#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
- #endif
+#endif
boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> >::const_iterator sentenceCache = m_cache.find(&input);
- if (sentenceCache != m_cache.end())
- {
+ if (sentenceCache != m_cache.end()) {
boost::unordered_map<const Factor*, float>::const_iterator cacheHit = sentenceCache->second.find(wordT[0]);
- if (cacheHit != sentenceCache->second.end())
- {
+ if (cacheHit != sentenceCache->second.end()) {
foundInCache = true;
score += cacheHit->second;
FEATUREVERBOSE(3, "Cached score( " << wordT << " ) = " << cacheHit->second << std::endl);
@@ -219,10 +211,8 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input
}
}
- if (!foundInCache)
- {
- for (size_t posS=1; posS<sentence.GetSize()-1; ++posS) // ignore <s> and </s>
- {
+ if (!foundInCache) {
+ for (size_t posS=1; posS<sentence.GetSize()-1; ++posS) { // ignore <s> and </s>
const Word &wordS = sentence.GetWord(posS);
float modelProb = m_model1.GetProbability(wordS[0],wordT[0]);
FEATUREVERBOSE(4, "p( " << wordT << " | " << wordS << " ) = " << modelProb << std::endl);
@@ -231,10 +221,10 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input
float thisWordScore = TransformScore(thisWordProb) - norm;
FEATUREVERBOSE(3, "score( " << wordT << " ) = " << thisWordScore << std::endl);
{
- #ifdef WITH_THREADS
+#ifdef WITH_THREADS
// need to update cache; write lock
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
- #endif
+#endif
m_cache[&input][wordT[0]] = thisWordScore;
}
score += thisWordScore;
@@ -247,14 +237,13 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input
void Model1Feature::CleanUpAfterSentenceProcessing(const InputType& source)
{
- #ifdef WITH_THREADS
+#ifdef WITH_THREADS
// need to update cache; write lock
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
- #endif
+#endif
// clear cache
boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> >::iterator sentenceCache = m_cache.find(&source);
- if (sentenceCache != m_cache.end())
- {
+ if (sentenceCache != m_cache.end()) {
sentenceCache->second.clear();
m_cache.erase(sentenceCache);
}
diff --git a/moses/FF/Model1Feature.h b/moses/FF/Model1Feature.h
index 9c380e3ae..610a39808 100644
--- a/moses/FF/Model1Feature.h
+++ b/moses/FF/Model1Feature.h
@@ -17,7 +17,7 @@ class Model1Vocabulary
{
public:
- #define INVALID_ID std::numeric_limits<unsigned>::max() // UINT_MAX
+#define INVALID_ID std::numeric_limits<unsigned>::max() // UINT_MAX
static const std::string GIZANULL;
Model1Vocabulary();
@@ -103,10 +103,10 @@ private:
// cache
mutable boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> > m_cache;
- #ifdef WITH_THREADS
+#ifdef WITH_THREADS
// reader-writer lock
mutable boost::shared_mutex m_accessLock;
- #endif
+#endif
};
diff --git a/moses/FF/PhraseOrientationFeature.cpp b/moses/FF/PhraseOrientationFeature.cpp
index 2a59340ea..1c9a3f738 100644
--- a/moses/FF/PhraseOrientationFeature.cpp
+++ b/moses/FF/PhraseOrientationFeature.cpp
@@ -288,7 +288,7 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
if (currTarPhr.GetAlignNonTerm().GetSize() != 0) {
const boost::shared_ptr<void> data = currTarPhr.GetData("Orientation");
UTIL_THROW_IF2(!data, GetScoreProducerDescription()
- << ": Orientation data not set in target phrase. ");
+ << ": Orientation data not set in target phrase. ");
reoClassData = static_cast<const PhraseOrientationFeature::ReoClassData*>( data.get() );
}
diff --git a/moses/FF/PhraseOrientationFeature.h b/moses/FF/PhraseOrientationFeature.h
index aaee79a15..4460a1ea7 100644
--- a/moses/FF/PhraseOrientationFeature.h
+++ b/moses/FF/PhraseOrientationFeature.h
@@ -301,15 +301,15 @@ class PhraseOrientationFeature : public StatefulFeatureFunction
public:
struct ReoClassData {
- public:
- std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
- std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
- bool firstNonTerminalIsBoundary;
- bool firstNonTerminalPreviousSourceSpanIsAligned;
- bool firstNonTerminalFollowingSourceSpanIsAligned;
- bool lastNonTerminalIsBoundary;
- bool lastNonTerminalPreviousSourceSpanIsAligned;
- bool lastNonTerminalFollowingSourceSpanIsAligned;
+ public:
+ std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
+ std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
+ bool firstNonTerminalIsBoundary;
+ bool firstNonTerminalPreviousSourceSpanIsAligned;
+ bool firstNonTerminalFollowingSourceSpanIsAligned;
+ bool lastNonTerminalIsBoundary;
+ bool lastNonTerminalPreviousSourceSpanIsAligned;
+ bool lastNonTerminalFollowingSourceSpanIsAligned;
};
PhraseOrientationFeature(const std::string &line);
diff --git a/moses/FF/RulePairUnlexicalizedSource.cpp b/moses/FF/RulePairUnlexicalizedSource.cpp
index 148d54052..f490a2b1a 100644
--- a/moses/FF/RulePairUnlexicalizedSource.cpp
+++ b/moses/FF/RulePairUnlexicalizedSource.cpp
@@ -39,9 +39,9 @@ void RulePairUnlexicalizedSource::SetParameter(const std::string& key, const std
void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
{
const Factor* targetPhraseLHS = targetPhrase.GetTargetLHS()[0];
if ( !m_glueRules && (targetPhraseLHS == m_glueTargetLHS) ) {
@@ -51,8 +51,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
return;
}
- for (size_t posS=0; posS<source.GetSize(); ++posS)
- {
+ for (size_t posS=0; posS<source.GetSize(); ++posS) {
const Word &wordS = source.GetWord(posS);
if ( !wordS.IsNonTerminal() ) {
return;
@@ -61,8 +60,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
ostringstream namestr;
- for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT)
- {
+ for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) {
const Word &wordT = targetPhrase.GetWord(posT);
const Factor* factorT = wordT[0];
if ( wordT.IsNonTerminal() ) {
@@ -78,8 +76,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
namestr << targetPhraseLHS->GetString() << "|";
for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin();
- it!=targetPhrase.GetAlignNonTerm().end(); ++it)
- {
+ it!=targetPhrase.GetAlignNonTerm().end(); ++it) {
namestr << "|" << it->first << "-" << it->second;
}
diff --git a/moses/FF/RuleScope.cpp b/moses/FF/RuleScope.cpp
index 08987537d..c894a2b20 100644
--- a/moses/FF/RuleScope.cpp
+++ b/moses/FF/RuleScope.cpp
@@ -26,16 +26,16 @@ void RuleScope::EvaluateInIsolation(const Phrase &source
, ScoreComponentCollection &estimatedFutureScore) const
{
if (IsGlueRule(source)) {
- return;
+ return;
}
float score = 0;
if (source.GetSize() > 0 && source.Front().IsNonTerminal()) {
- ++score;
+ ++score;
}
if (source.GetSize() > 1 && source.Back().IsNonTerminal()) {
- ++score;
+ ++score;
}
/*
@@ -61,23 +61,20 @@ void RuleScope::EvaluateInIsolation(const Phrase &source
*/
if (m_perScope) {
- UTIL_THROW_IF2(m_numScoreComponents <= score,
- "Insufficient number of score components. Scope=" << score << ". NUmber of score components=" << score);
- vector<float> scores(m_numScoreComponents, 0);
- scores[score] = 1;
+ UTIL_THROW_IF2(m_numScoreComponents <= score,
+ "Insufficient number of score components. Scope=" << score << ". NUmber of score components=" << score);
+ vector<float> scores(m_numScoreComponents, 0);
+ scores[score] = 1;
- if (m_futureCostOnly) {
- estimatedFutureScore.PlusEquals(this, scores);
- }
- else {
- scoreBreakdown.PlusEquals(this, scores);
- }
- }
- else if (m_futureCostOnly) {
- estimatedFutureScore.PlusEquals(this, score);
- }
- else {
- scoreBreakdown.PlusEquals(this, score);
+ if (m_futureCostOnly) {
+ estimatedFutureScore.PlusEquals(this, scores);
+ } else {
+ scoreBreakdown.PlusEquals(this, scores);
+ }
+ } else if (m_futureCostOnly) {
+ estimatedFutureScore.PlusEquals(this, score);
+ } else {
+ scoreBreakdown.PlusEquals(this, score);
}
}
@@ -85,14 +82,11 @@ void RuleScope::SetParameter(const std::string& key, const std::string& value)
{
if (key == "source-syntax") {
m_sourceSyntax = Scan<bool>(value);
- }
- else if (key == "per-scope") {
- m_perScope = Scan<bool>(value);
- }
- else if ("future-cost-only") {
- m_futureCostOnly = Scan<bool>(value);
- }
- else {
+ } else if (key == "per-scope") {
+ m_perScope = Scan<bool>(value);
+ } else if ("future-cost-only") {
+ m_futureCostOnly = Scan<bool>(value);
+ } else {
StatelessFeatureFunction::SetParameter(key, value);
}
}
diff --git a/moses/FF/TreeStructureFeature.cpp b/moses/FF/TreeStructureFeature.cpp
index f2988f2b9..fc1fcdc5b 100644
--- a/moses/FF/TreeStructureFeature.cpp
+++ b/moses/FF/TreeStructureFeature.cpp
@@ -72,7 +72,7 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy
bool full_sentence = (mytree->GetChildren().back()->GetLabel() == "</s>" || (mytree->GetChildren().back()->GetLabel() == "SEND" && mytree->GetChildren().back()->GetChildren().back()->GetLabel() == "</s>"));
if (m_binarized && full_sentence) {
- mytree->Unbinarize();
+ mytree->Unbinarize();
}
return new TreeState(mytree);
diff --git a/moses/FF/VW/VW.h b/moses/FF/VW/VW.h
index c94791c32..bd59a41a4 100644
--- a/moses/FF/VW/VW.h
+++ b/moses/FF/VW/VW.h
@@ -183,8 +183,8 @@ public:
// optionally update translation options using leave-one-out
std::vector<bool> keep = (m_leaveOneOut.size() > 0)
- ? LeaveOneOut(translationOptionList, correct)
- : std::vector<bool>(translationOptionList.size(), true);
+ ? LeaveOneOut(translationOptionList, correct)
+ : std::vector<bool>(translationOptionList.size(), true);
// check whether we (still) have some correct translation
int firstCorrect = -1;
@@ -312,11 +312,11 @@ public:
return;
UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
- "This feature function requires the TabbedSentence input type");
+ "This feature function requires the TabbedSentence input type");
const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2,
- "TabbedSentence must contain target<tab>alignment");
+ "TabbedSentence must contain target<tab>alignment");
// target sentence represented as a phrase
Phrase *target = new Phrase();
diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp
index b792d11f8..bc466664a 100644
--- a/moses/Hypothesis.cpp
+++ b/moses/Hypothesis.cpp
@@ -45,630 +45,633 @@ namespace Moses
{
#ifdef USE_HYPO_POOL
- ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
+ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
#endif
- Hypothesis::
- Hypothesis(Manager& manager, InputType const& source, const TranslationOption &initialTransOpt)
- : m_prevHypo(NULL)
- , m_sourceCompleted(source.GetSize(), manager.GetSource().m_sourceCompleted)
- , m_sourceInput(source)
- , m_currSourceWordsRange(
- m_sourceCompleted.GetFirstGapPos()>0 ? 0 : NOT_FOUND,
- m_sourceCompleted.GetFirstGapPos()>0 ? m_sourceCompleted.GetFirstGapPos()-1 : NOT_FOUND)
- , m_currTargetWordsRange(NOT_FOUND, NOT_FOUND)
- , m_wordDeleted(false)
- , m_totalScore(0.0f)
- , m_futureScore(0.0f)
- , m_ffStates(StatefulFeatureFunction::GetStatefulFeatureFunctions().size())
- , m_arcList(NULL)
- , m_transOpt(initialTransOpt)
- , m_manager(manager)
- , m_id(m_manager.GetNextHypoId())
- {
- // used for initial seeding of trans process
- // initialize scores
- //_hash_computed = false;
- //s_HypothesesCreated = 1;
- const vector<const StatefulFeatureFunction*>& ffs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
- for (unsigned i = 0; i < ffs.size(); ++i)
- m_ffStates[i] = ffs[i]->EmptyHypothesisState(source);
- m_manager.GetSentenceStats().AddCreated();
- }
+Hypothesis::
+Hypothesis(Manager& manager, InputType const& source, const TranslationOption &initialTransOpt)
+ : m_prevHypo(NULL)
+ , m_sourceCompleted(source.GetSize(), manager.GetSource().m_sourceCompleted)
+ , m_sourceInput(source)
+ , m_currSourceWordsRange(
+ m_sourceCompleted.GetFirstGapPos()>0 ? 0 : NOT_FOUND,
+ m_sourceCompleted.GetFirstGapPos()>0 ? m_sourceCompleted.GetFirstGapPos()-1 : NOT_FOUND)
+ , m_currTargetWordsRange(NOT_FOUND, NOT_FOUND)
+ , m_wordDeleted(false)
+ , m_totalScore(0.0f)
+ , m_futureScore(0.0f)
+ , m_ffStates(StatefulFeatureFunction::GetStatefulFeatureFunctions().size())
+ , m_arcList(NULL)
+ , m_transOpt(initialTransOpt)
+ , m_manager(manager)
+ , m_id(m_manager.GetNextHypoId())
+{
+ // used for initial seeding of trans process
+ // initialize scores
+ //_hash_computed = false;
+ //s_HypothesesCreated = 1;
+ const vector<const StatefulFeatureFunction*>& ffs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+ for (unsigned i = 0; i < ffs.size(); ++i)
+ m_ffStates[i] = ffs[i]->EmptyHypothesisState(source);
+ m_manager.GetSentenceStats().AddCreated();
+}
- /***
- * continue prevHypo by appending the phrases in transOpt
- */
- Hypothesis::
- Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
- : m_prevHypo(&prevHypo)
- , m_sourceCompleted(prevHypo.m_sourceCompleted )
- , m_sourceInput(prevHypo.m_sourceInput)
- , m_currSourceWordsRange(transOpt.GetSourceWordsRange())
- , m_currTargetWordsRange(prevHypo.m_currTargetWordsRange.GetEndPos() + 1,
- prevHypo.m_currTargetWordsRange.GetEndPos()
- + transOpt.GetTargetPhrase().GetSize())
- , m_wordDeleted(false)
- , m_totalScore(0.0f)
- , m_futureScore(0.0f)
- , m_ffStates(prevHypo.m_ffStates.size())
- , m_arcList(NULL)
- , m_transOpt(transOpt)
- , m_manager(prevHypo.GetManager())
- , m_id(m_manager.GetNextHypoId())
- {
- m_currScoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
-
- // assert that we are not extending our hypothesis by retranslating something
- // that this hypothesis has already translated!
- assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange));
-
- //_hash_computed = false;
- m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true);
- m_wordDeleted = transOpt.IsDeletionOption();
- m_manager.GetSentenceStats().AddCreated();
- }
+/***
+ * continue prevHypo by appending the phrases in transOpt
+ */
+Hypothesis::
+Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
+ : m_prevHypo(&prevHypo)
+ , m_sourceCompleted(prevHypo.m_sourceCompleted )
+ , m_sourceInput(prevHypo.m_sourceInput)
+ , m_currSourceWordsRange(transOpt.GetSourceWordsRange())
+ , m_currTargetWordsRange(prevHypo.m_currTargetWordsRange.GetEndPos() + 1,
+ prevHypo.m_currTargetWordsRange.GetEndPos()
+ + transOpt.GetTargetPhrase().GetSize())
+ , m_wordDeleted(false)
+ , m_totalScore(0.0f)
+ , m_futureScore(0.0f)
+ , m_ffStates(prevHypo.m_ffStates.size())
+ , m_arcList(NULL)
+ , m_transOpt(transOpt)
+ , m_manager(prevHypo.GetManager())
+ , m_id(m_manager.GetNextHypoId())
+{
+ m_currScoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
- Hypothesis::
- ~Hypothesis()
- {
- for (unsigned i = 0; i < m_ffStates.size(); ++i)
- delete m_ffStates[i];
+ // assert that we are not extending our hypothesis by retranslating something
+ // that this hypothesis has already translated!
+ assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange));
- if (m_arcList) {
- ArcList::iterator iter;
- for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) {
- FREEHYPO(*iter);
- }
- m_arcList->clear();
+ //_hash_computed = false;
+ m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true);
+ m_wordDeleted = transOpt.IsDeletionOption();
+ m_manager.GetSentenceStats().AddCreated();
+}
+
+Hypothesis::
+~Hypothesis()
+{
+ for (unsigned i = 0; i < m_ffStates.size(); ++i)
+ delete m_ffStates[i];
- delete m_arcList;
- m_arcList = NULL;
+ if (m_arcList) {
+ ArcList::iterator iter;
+ for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) {
+ FREEHYPO(*iter);
}
+ m_arcList->clear();
+
+ delete m_arcList;
+ m_arcList = NULL;
}
+}
- void
- Hypothesis::
- AddArc(Hypothesis *loserHypo)
- {
- if (!m_arcList) {
- if (loserHypo->m_arcList) { // we don't have an arcList, but loser does
- this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
- loserHypo->m_arcList = 0; // prevent a double deletion
- } else {
- this->m_arcList = new ArcList();
- }
+void
+Hypothesis::
+AddArc(Hypothesis *loserHypo)
+{
+ if (!m_arcList) {
+ if (loserHypo->m_arcList) { // we don't have an arcList, but loser does
+ this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
+ loserHypo->m_arcList = 0; // prevent a double deletion
} else {
- if (loserHypo->m_arcList) { // both have an arc list: merge. delete loser
- size_t my_size = m_arcList->size();
- size_t add_size = loserHypo->m_arcList->size();
- this->m_arcList->resize(my_size + add_size, 0);
- std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(Hypothesis *));
- delete loserHypo->m_arcList;
- loserHypo->m_arcList = 0;
- } else { // loserHypo doesn't have any arcs
- // DO NOTHING
- }
+ this->m_arcList = new ArcList();
+ }
+ } else {
+ if (loserHypo->m_arcList) { // both have an arc list: merge. delete loser
+ size_t my_size = m_arcList->size();
+ size_t add_size = loserHypo->m_arcList->size();
+ this->m_arcList->resize(my_size + add_size, 0);
+ std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(Hypothesis *));
+ delete loserHypo->m_arcList;
+ loserHypo->m_arcList = 0;
+ } else { // loserHypo doesn't have any arcs
+ // DO NOTHING
}
- m_arcList->push_back(loserHypo);
}
+ m_arcList->push_back(loserHypo);
+}
- /***
- * return the subclass of Hypothesis most appropriate to the given translation option
- */
- Hypothesis*
- Hypothesis::
- CreateNext(const TranslationOption &transOpt) const
- {
- return Create(*this, transOpt);
- }
+/***
+ * return the subclass of Hypothesis most appropriate to the given translation option
+ */
+Hypothesis*
+Hypothesis::
+CreateNext(const TranslationOption &transOpt) const
+{
+ return Create(*this, transOpt);
+}
- /***
- * return the subclass of Hypothesis most appropriate to the given translation option
- */
- Hypothesis*
- Hypothesis::
- Create(const Hypothesis &prevHypo, const TranslationOption &transOpt)
- {
+/***
+ * return the subclass of Hypothesis most appropriate to the given translation option
+ */
+Hypothesis*
+Hypothesis::
+Create(const Hypothesis &prevHypo, const TranslationOption &transOpt)
+{
#ifdef USE_HYPO_POOL
- Hypothesis *ptr = s_objectPool.getPtr();
- return new(ptr) Hypothesis(prevHypo, transOpt);
+ Hypothesis *ptr = s_objectPool.getPtr();
+ return new(ptr) Hypothesis(prevHypo, transOpt);
#else
- return new Hypothesis(prevHypo, transOpt);
+ return new Hypothesis(prevHypo, transOpt);
#endif
- }
- /***
- * return the subclass of Hypothesis most appropriate to the given target phrase
- */
-
- Hypothesis*
- Hypothesis::
- Create(Manager& manager, InputType const& m_source,
- const TranslationOption &initialTransOpt)
- {
+}
+/***
+ * return the subclass of Hypothesis most appropriate to the given target phrase
+ */
+
+Hypothesis*
+Hypothesis::
+Create(Manager& manager, InputType const& m_source,
+ const TranslationOption &initialTransOpt)
+{
#ifdef USE_HYPO_POOL
- Hypothesis *ptr = s_objectPool.getPtr();
- return new(ptr) Hypothesis(manager, m_source, initialTransOpt);
+ Hypothesis *ptr = s_objectPool.getPtr();
+ return new(ptr) Hypothesis(manager, m_source, initialTransOpt);
#else
- return new Hypothesis(manager, m_source, initialTransOpt);
+ return new Hypothesis(manager, m_source, initialTransOpt);
#endif
- }
+}
- /** check, if two hypothesis can be recombined.
- this is actually a sorting function that allows us to
- keep an ordered list of hypotheses. This makes recombination
- much quicker.
- */
- int
- Hypothesis::
- RecombineCompare(const Hypothesis &compare) const
- {
- // -1 = this < compare
- // +1 = this > compare
- // 0 = this ==compare
- int comp = m_sourceCompleted.Compare(compare.m_sourceCompleted);
- if (comp != 0)
- return comp;
-
- for (unsigned i = 0; i < m_ffStates.size(); ++i) {
- if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) {
- comp = m_ffStates[i] - compare.m_ffStates[i];
- } else {
- comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]);
- }
- if (comp != 0) return comp;
+/** check, if two hypothesis can be recombined.
+ this is actually a sorting function that allows us to
+ keep an ordered list of hypotheses. This makes recombination
+ much quicker.
+*/
+int
+Hypothesis::
+RecombineCompare(const Hypothesis &compare) const
+{
+ // -1 = this < compare
+ // +1 = this > compare
+ // 0 = this ==compare
+ int comp = m_sourceCompleted.Compare(compare.m_sourceCompleted);
+ if (comp != 0)
+ return comp;
+
+ for (unsigned i = 0; i < m_ffStates.size(); ++i) {
+ if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) {
+ comp = m_ffStates[i] - compare.m_ffStates[i];
+ } else {
+ comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]);
}
+ if (comp != 0) return comp;
+ }
+
+ return 0;
+}
- return 0;
+void
+Hypothesis::
+EvaluateWhenApplied(StatefulFeatureFunction const& sfff,
+ int state_idx)
+{
+ const StaticData &staticData = StaticData::Instance();
+ if (! staticData.IsFeatureFunctionIgnored( sfff )) {
+ m_ffStates[state_idx]
+ = sfff.EvaluateWhenApplied
+ (*this, m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL,
+ &m_currScoreBreakdown);
}
+}
- void
- Hypothesis::
- EvaluateWhenApplied(StatefulFeatureFunction const& sfff,
- int state_idx)
- {
- const StaticData &staticData = StaticData::Instance();
- if (! staticData.IsFeatureFunctionIgnored( sfff ))
- {
- m_ffStates[state_idx]
- = sfff.EvaluateWhenApplied
- (*this, m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL,
- &m_currScoreBreakdown);
- }
+void
+Hypothesis::
+EvaluateWhenApplied(const StatelessFeatureFunction& slff)
+{
+ const StaticData &staticData = StaticData::Instance();
+ if (! staticData.IsFeatureFunctionIgnored( slff )) {
+ slff.EvaluateWhenApplied(*this, &m_currScoreBreakdown);
}
+}
- void
- Hypothesis::
- EvaluateWhenApplied(const StatelessFeatureFunction& slff)
- {
+/***
+ * calculate the logarithm of our total translation score (sum up components)
+ */
+void
+Hypothesis::
+EvaluateWhenApplied(const SquareMatrix &futureScore)
+{
+ IFVERBOSE(2) {
+ m_manager.GetSentenceStats().StartTimeOtherScore();
+ }
+ // some stateless score producers cache their values in the translation
+ // option: add these here
+ // language model scores for n-grams completely contained within a target
+ // phrase are also included here
+
+ // compute values of stateless feature functions that were not
+ // cached in the translation option
+ const vector<const StatelessFeatureFunction*>& sfs =
+ StatelessFeatureFunction::GetStatelessFeatureFunctions();
+ for (unsigned i = 0; i < sfs.size(); ++i) {
+ const StatelessFeatureFunction &ff = *sfs[i];
+ EvaluateWhenApplied(ff);
+ }
+
+ const vector<const StatefulFeatureFunction*>& ffs =
+ StatefulFeatureFunction::GetStatefulFeatureFunctions();
+ for (unsigned i = 0; i < ffs.size(); ++i) {
+ const StatefulFeatureFunction &ff = *ffs[i];
const StaticData &staticData = StaticData::Instance();
- if (! staticData.IsFeatureFunctionIgnored( slff )) {
- slff.EvaluateWhenApplied(*this, &m_currScoreBreakdown);
+ if (! staticData.IsFeatureFunctionIgnored(ff)) {
+ m_ffStates[i] = ff.EvaluateWhenApplied(*this,
+ m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
+ &m_currScoreBreakdown);
}
}
- /***
- * calculate the logarithm of our total translation score (sum up components)
- */
- void
- Hypothesis::
- EvaluateWhenApplied(const SquareMatrix &futureScore)
- {
- IFVERBOSE(2) {
- m_manager.GetSentenceStats().StartTimeOtherScore();
- }
- // some stateless score producers cache their values in the translation
- // option: add these here
- // language model scores for n-grams completely contained within a target
- // phrase are also included here
-
- // compute values of stateless feature functions that were not
- // cached in the translation option
- const vector<const StatelessFeatureFunction*>& sfs =
- StatelessFeatureFunction::GetStatelessFeatureFunctions();
- for (unsigned i = 0; i < sfs.size(); ++i) {
- const StatelessFeatureFunction &ff = *sfs[i];
- EvaluateWhenApplied(ff);
- }
+ IFVERBOSE(2) {
+ m_manager.GetSentenceStats().StopTimeOtherScore();
+ m_manager.GetSentenceStats().StartTimeEstimateScore();
+ }
- const vector<const StatefulFeatureFunction*>& ffs =
- StatefulFeatureFunction::GetStatefulFeatureFunctions();
- for (unsigned i = 0; i < ffs.size(); ++i) {
- const StatefulFeatureFunction &ff = *ffs[i];
- const StaticData &staticData = StaticData::Instance();
- if (! staticData.IsFeatureFunctionIgnored(ff)) {
- m_ffStates[i] = ff.EvaluateWhenApplied(*this,
- m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
- &m_currScoreBreakdown);
- }
- }
+ // FUTURE COST
+ m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted );
- IFVERBOSE(2) {
- m_manager.GetSentenceStats().StopTimeOtherScore();
- m_manager.GetSentenceStats().StartTimeEstimateScore();
- }
+ // TOTAL
+ m_totalScore = m_currScoreBreakdown.GetWeightedScore() + m_futureScore;
+ if (m_prevHypo) m_totalScore += m_prevHypo->GetScore();
- // FUTURE COST
- m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted );
+ IFVERBOSE(2) {
+ m_manager.GetSentenceStats().StopTimeEstimateScore();
+ }
+}
- // TOTAL
- m_totalScore = m_currScoreBreakdown.GetWeightedScore() + m_futureScore;
- if (m_prevHypo) m_totalScore += m_prevHypo->GetScore();
+const Hypothesis* Hypothesis::GetPrevHypo()const
+{
+ return m_prevHypo;
+}
- IFVERBOSE(2) {
- m_manager.GetSentenceStats().StopTimeEstimateScore();
- }
+/**
+ * print hypothesis information for pharaoh-style logging
+ */
+void
+Hypothesis::
+PrintHypothesis() const
+{
+ if (!m_prevHypo) {
+ TRACE_ERR(endl << "NULL hypo" << endl);
+ return;
}
-
- const Hypothesis* Hypothesis::GetPrevHypo()const
- {
- return m_prevHypo;
+ TRACE_ERR(endl << "creating hypothesis "<< m_id <<" from "<< m_prevHypo->m_id<<" ( ");
+ int end = (int)(m_prevHypo->GetCurrTargetPhrase().GetSize()-1);
+ int start = end-1;
+ if ( start < 0 ) start = 0;
+ if ( m_prevHypo->m_currTargetWordsRange.GetStartPos() == NOT_FOUND ) {
+ TRACE_ERR( "<s> ");
+ } else {
+ TRACE_ERR( "... ");
}
-
- /**
- * print hypothesis information for pharaoh-style logging
- */
- void
- Hypothesis::
- PrintHypothesis() const
- {
- if (!m_prevHypo) {
- TRACE_ERR(endl << "NULL hypo" << endl);
- return;
- }
- TRACE_ERR(endl << "creating hypothesis "<< m_id <<" from "<< m_prevHypo->m_id<<" ( ");
- int end = (int)(m_prevHypo->GetCurrTargetPhrase().GetSize()-1);
- int start = end-1;
- if ( start < 0 ) start = 0;
- if ( m_prevHypo->m_currTargetWordsRange.GetStartPos() == NOT_FOUND ) {
- TRACE_ERR( "<s> ");
- } else {
- TRACE_ERR( "... ");
- }
- if (end>=0) {
- WordsRange range(start, end);
- TRACE_ERR( m_prevHypo->GetCurrTargetPhrase().GetSubString(range) << " ");
- }
- TRACE_ERR( ")"<<endl);
- TRACE_ERR( "\tbase score "<< (m_prevHypo->m_totalScore - m_prevHypo->m_futureScore) <<endl);
- TRACE_ERR( "\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()
- <<": " << m_transOpt.GetInputPath().GetPhrase() << endl);
-
- TRACE_ERR( "\ttranslated as: "<<(Phrase&) GetCurrTargetPhrase()<<endl); // <<" => translation cost "<<m_score[ScoreType::PhraseTrans];
-
- if (m_wordDeleted) TRACE_ERR( "\tword deleted"<<endl);
- // TRACE_ERR( "\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange())); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
- // TRACE_ERR( "\tlanguage model cost "); // <<m_score[ScoreType::LanguageModelScore]<<endl;
- // TRACE_ERR( "\tword penalty "); // <<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<<endl;
- TRACE_ERR( "\tscore "<<m_totalScore - m_futureScore<<" + future cost "<<m_futureScore<<" = "<<m_totalScore<<endl);
- TRACE_ERR( "\tunweighted feature scores: " << m_currScoreBreakdown << endl);
- //PrintLMScores();
+ if (end>=0) {
+ WordsRange range(start, end);
+ TRACE_ERR( m_prevHypo->GetCurrTargetPhrase().GetSubString(range) << " ");
}
+ TRACE_ERR( ")"<<endl);
+ TRACE_ERR( "\tbase score "<< (m_prevHypo->m_totalScore - m_prevHypo->m_futureScore) <<endl);
+ TRACE_ERR( "\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()
+ <<": " << m_transOpt.GetInputPath().GetPhrase() << endl);
+
+ TRACE_ERR( "\ttranslated as: "<<(Phrase&) GetCurrTargetPhrase()<<endl); // <<" => translation cost "<<m_score[ScoreType::PhraseTrans];
+
+ if (m_wordDeleted) TRACE_ERR( "\tword deleted"<<endl);
+ // TRACE_ERR( "\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange())); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
+ // TRACE_ERR( "\tlanguage model cost "); // <<m_score[ScoreType::LanguageModelScore]<<endl;
+ // TRACE_ERR( "\tword penalty "); // <<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<<endl;
+ TRACE_ERR( "\tscore "<<m_totalScore - m_futureScore<<" + future cost "<<m_futureScore<<" = "<<m_totalScore<<endl);
+ TRACE_ERR( "\tunweighted feature scores: " << m_currScoreBreakdown << endl);
+ //PrintLMScores();
+}
- void
- Hypothesis::
- CleanupArcList()
- {
- // point this hypo's main hypo to itself
- SetWinningHypo(this);
-
- if (!m_arcList) return;
+void
+Hypothesis::
+CleanupArcList()
+{
+ // point this hypo's main hypo to itself
+ SetWinningHypo(this);
- /* keep only number of arcs we need to create all n-best paths.
- * However, may not be enough if only unique candidates are needed,
- * so we'll keep all of arc list if nedd distinct n-best list
- */
- const StaticData &staticData = StaticData::Instance();
- size_t nBestSize = staticData.GetNBestSize();
- bool distinctNBest = (staticData.GetDistinctNBest() ||
- staticData.GetLatticeSamplesSize() ||
- staticData.UseMBR() ||
- staticData.GetOutputSearchGraph() ||
- staticData.GetOutputSearchGraphSLF() ||
- staticData.GetOutputSearchGraphHypergraph() ||
- staticData.UseLatticeMBR());
-
- if (!distinctNBest && m_arcList->size() > nBestSize * 5)
- {
- // prune arc list only if there too many arcs
- NTH_ELEMENT4(m_arcList->begin(), m_arcList->begin() + nBestSize - 1,
- m_arcList->end(), CompareHypothesisTotalScore());
-
- // delete bad ones
- ArcList::iterator iter;
- for (iter = m_arcList->begin() + nBestSize; iter != m_arcList->end() ; ++iter)
- FREEHYPO(*iter);
- m_arcList->erase(m_arcList->begin() + nBestSize, m_arcList->end());
- }
+ if (!m_arcList) return;
- // set all arc's main hypo variable to this hypo
- ArcList::iterator iter = m_arcList->begin();
- for (; iter != m_arcList->end() ; ++iter) {
- Hypothesis *arc = *iter;
- arc->SetWinningHypo(this);
- }
+ /* keep only number of arcs we need to create all n-best paths.
+ * However, may not be enough if only unique candidates are needed,
+ * so we'll keep all of arc list if nedd distinct n-best list
+ */
+ const StaticData &staticData = StaticData::Instance();
+ size_t nBestSize = staticData.GetNBestSize();
+ bool distinctNBest = (staticData.GetDistinctNBest() ||
+ staticData.GetLatticeSamplesSize() ||
+ staticData.UseMBR() ||
+ staticData.GetOutputSearchGraph() ||
+ staticData.GetOutputSearchGraphSLF() ||
+ staticData.GetOutputSearchGraphHypergraph() ||
+ staticData.UseLatticeMBR());
+
+ if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
+ // prune arc list only if there too many arcs
+ NTH_ELEMENT4(m_arcList->begin(), m_arcList->begin() + nBestSize - 1,
+ m_arcList->end(), CompareHypothesisTotalScore());
+
+ // delete bad ones
+ ArcList::iterator iter;
+ for (iter = m_arcList->begin() + nBestSize; iter != m_arcList->end() ; ++iter)
+ FREEHYPO(*iter);
+ m_arcList->erase(m_arcList->begin() + nBestSize, m_arcList->end());
}
- TargetPhrase const&
- Hypothesis::
- GetCurrTargetPhrase() const
- { return m_transOpt.GetTargetPhrase(); }
-
- void
- Hypothesis::
- GetOutputPhrase(Phrase &out) const
- {
- if (m_prevHypo != NULL)
- m_prevHypo->GetOutputPhrase(out);
- out.Append(GetCurrTargetPhrase());
+ // set all arc's main hypo variable to this hypo
+ ArcList::iterator iter = m_arcList->begin();
+ for (; iter != m_arcList->end() ; ++iter) {
+ Hypothesis *arc = *iter;
+ arc->SetWinningHypo(this);
}
+}
- TO_STRING_BODY(Hypothesis)
+TargetPhrase const&
+Hypothesis::
+GetCurrTargetPhrase() const
+{
+ return m_transOpt.GetTargetPhrase();
+}
- // friend
- ostream& operator<<(ostream& out, const Hypothesis& hypo)
- {
- hypo.ToStream(out);
- // words bitmap
- out << "[" << hypo.m_sourceCompleted << "] ";
+void
+Hypothesis::
+GetOutputPhrase(Phrase &out) const
+{
+ if (m_prevHypo != NULL)
+ m_prevHypo->GetOutputPhrase(out);
+ out.Append(GetCurrTargetPhrase());
+}
- // scores
- out << " [total=" << hypo.GetTotalScore() << "]";
- out << " " << hypo.GetScoreBreakdown();
+TO_STRING_BODY(Hypothesis)
- // alignment
- out << " " << hypo.GetCurrTargetPhrase().GetAlignNonTerm();
+// friend
+ostream& operator<<(ostream& out, const Hypothesis& hypo)
+{
+ hypo.ToStream(out);
+ // words bitmap
+ out << "[" << hypo.m_sourceCompleted << "] ";
- return out;
- }
+ // scores
+ out << " [total=" << hypo.GetTotalScore() << "]";
+ out << " " << hypo.GetScoreBreakdown();
+ // alignment
+ out << " " << hypo.GetCurrTargetPhrase().GetAlignNonTerm();
- std::string
- Hypothesis::
- GetSourcePhraseStringRep(const vector<FactorType> factorsToPrint) const
- { return m_transOpt.GetInputPath().GetPhrase().GetStringRep(factorsToPrint); }
-
- std::string
- Hypothesis::
- GetTargetPhraseStringRep(const vector<FactorType> factorsToPrint) const
- { return (m_prevHypo ? GetCurrTargetPhrase().GetStringRep(factorsToPrint) : ""); }
-
- std::string
- Hypothesis::
- GetSourcePhraseStringRep() const
- {
- vector<FactorType> allFactors(MAX_NUM_FACTORS);
- for(size_t i=0; i < MAX_NUM_FACTORS; i++)
- allFactors[i] = i;
- return GetSourcePhraseStringRep(allFactors);
- }
+ return out;
+}
- std::string
- Hypothesis::
- GetTargetPhraseStringRep() const
- {
- vector<FactorType> allFactors(MAX_NUM_FACTORS);
- for(size_t i=0; i < MAX_NUM_FACTORS; i++)
- allFactors[i] = i;
- return GetTargetPhraseStringRep(allFactors);
- }
- void
- Hypothesis::
- OutputAlignment(std::ostream &out) const
- {
- std::vector<const Hypothesis *> edges;
- const Hypothesis *currentHypo = this;
- while (currentHypo) {
- edges.push_back(currentHypo);
- currentHypo = currentHypo->GetPrevHypo();
- }
+std::string
+Hypothesis::
+GetSourcePhraseStringRep(const vector<FactorType> factorsToPrint) const
+{
+ return m_transOpt.GetInputPath().GetPhrase().GetStringRep(factorsToPrint);
+}
- OutputAlignment(out, edges);
+std::string
+Hypothesis::
+GetTargetPhraseStringRep(const vector<FactorType> factorsToPrint) const
+{
+ return (m_prevHypo ? GetCurrTargetPhrase().GetStringRep(factorsToPrint) : "");
+}
+
+std::string
+Hypothesis::
+GetSourcePhraseStringRep() const
+{
+ vector<FactorType> allFactors(MAX_NUM_FACTORS);
+ for(size_t i=0; i < MAX_NUM_FACTORS; i++)
+ allFactors[i] = i;
+ return GetSourcePhraseStringRep(allFactors);
+}
+
+std::string
+Hypothesis::
+GetTargetPhraseStringRep() const
+{
+ vector<FactorType> allFactors(MAX_NUM_FACTORS);
+ for(size_t i=0; i < MAX_NUM_FACTORS; i++)
+ allFactors[i] = i;
+ return GetTargetPhraseStringRep(allFactors);
+}
+void
+Hypothesis::
+OutputAlignment(std::ostream &out) const
+{
+ std::vector<const Hypothesis *> edges;
+ const Hypothesis *currentHypo = this;
+ while (currentHypo) {
+ edges.push_back(currentHypo);
+ currentHypo = currentHypo->GetPrevHypo();
}
- void
- Hypothesis::
- OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
- {
- size_t targetOffset = 0;
+ OutputAlignment(out, edges);
- for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
- const Hypothesis &edge = *edges[currEdge];
- const TargetPhrase &tp = edge.GetCurrTargetPhrase();
- size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
+}
- OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);
+void
+Hypothesis::
+OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
+{
+ size_t targetOffset = 0;
- targetOffset += tp.GetSize();
- }
- // Used by --print-alignment-info, so no endl
- }
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const TargetPhrase &tp = edge.GetCurrTargetPhrase();
+ size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
- void
- Hypothesis::
- OutputAlignment(ostream &out, const AlignmentInfo &ai,
- size_t sourceOffset, size_t targetOffset)
- {
- typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
- AlignVec alignments = ai.GetSortedAlignments();
-
- AlignVec::const_iterator it;
- for (it = alignments.begin(); it != alignments.end(); ++it) {
- const std::pair<size_t,size_t> &alignment = **it;
- out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
- }
+ OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);
+ targetOffset += tp.GetSize();
}
+ // Used by --print-alignment-info, so no endl
+}
- void
- Hypothesis::
- OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
- {
- if (!hypo->GetPrevHypo()) return;
- OutputInput(map, hypo->GetPrevHypo());
- map[hypo->GetCurrSourceWordsRange().GetStartPos()]
- = &hypo->GetTranslationOption().GetInputPath().GetPhrase();
- }
+void
+Hypothesis::
+OutputAlignment(ostream &out, const AlignmentInfo &ai,
+ size_t sourceOffset, size_t targetOffset)
+{
+ typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
+ AlignVec alignments = ai.GetSortedAlignments();
- void
- Hypothesis::
- OutputInput(std::ostream& os) const
- {
- size_t len = this->GetInput().GetSize();
- std::vector<const Phrase*> inp_phrases(len, 0);
- OutputInput(inp_phrases, this);
- for (size_t i=0; i<len; ++i)
- if (inp_phrases[i]) os << *inp_phrases[i];
+ AlignVec::const_iterator it;
+ for (it = alignments.begin(); it != alignments.end(); ++it) {
+ const std::pair<size_t,size_t> &alignment = **it;
+ out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
}
- void
- Hypothesis::
- OutputBestSurface(std::ostream &out, const std::vector<FactorType> &outputFactorOrder,
- char reportSegmentation, bool reportAllFactors) const
- {
- if (m_prevHypo)
- { // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
- m_prevHypo->OutputBestSurface(out, outputFactorOrder, reportSegmentation, reportAllFactors);
- }
- OutputSurface(out, *this, outputFactorOrder, reportSegmentation, reportAllFactors);
+}
+
+void
+Hypothesis::
+OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
+{
+ if (!hypo->GetPrevHypo()) return;
+ OutputInput(map, hypo->GetPrevHypo());
+ map[hypo->GetCurrSourceWordsRange().GetStartPos()]
+ = &hypo->GetTranslationOption().GetInputPath().GetPhrase();
+}
+
+void
+Hypothesis::
+OutputInput(std::ostream& os) const
+{
+ size_t len = this->GetInput().GetSize();
+ std::vector<const Phrase*> inp_phrases(len, 0);
+ OutputInput(inp_phrases, this);
+ for (size_t i=0; i<len; ++i)
+ if (inp_phrases[i]) os << *inp_phrases[i];
+}
+
+void
+Hypothesis::
+OutputBestSurface(std::ostream &out, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors) const
+{
+ if (m_prevHypo) {
+ // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
+ m_prevHypo->OutputBestSurface(out, outputFactorOrder, reportSegmentation, reportAllFactors);
}
+ OutputSurface(out, *this, outputFactorOrder, reportSegmentation, reportAllFactors);
+}
- //////////////////////////////////////////////////////////////////////////
- /***
- * print surface factor only for the given phrase
- */
- void
- Hypothesis::
- OutputSurface(std::ostream &out, const Hypothesis &edge,
- const std::vector<FactorType> &outputFactorOrder,
- char reportSegmentation, bool reportAllFactors) const
- {
- UTIL_THROW_IF2(outputFactorOrder.size() == 0,
- "Must specific at least 1 output factor");
- const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
- bool markUnknown = StaticData::Instance().GetMarkUnknown();
- if (reportAllFactors == true) {
- out << phrase;
- } else {
- FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
+//////////////////////////////////////////////////////////////////////////
+/***
+ * print surface factor only for the given phrase
+ */
+void
+Hypothesis::
+OutputSurface(std::ostream &out, const Hypothesis &edge,
+ const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors) const
+{
+ UTIL_THROW_IF2(outputFactorOrder.size() == 0,
+ "Must specific at least 1 output factor");
+ const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
+ bool markUnknown = StaticData::Instance().GetMarkUnknown();
+ if (reportAllFactors == true) {
+ out << phrase;
+ } else {
+ FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
+
+ std::map<size_t, const Factor*> placeholders;
+ if (placeholderFactor != NOT_FOUND) {
+ // creates map of target position -> factor for placeholders
+ placeholders = GetPlaceholders(edge, placeholderFactor);
+ }
+
+ size_t size = phrase.GetSize();
+ for (size_t pos = 0 ; pos < size ; pos++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
- std::map<size_t, const Factor*> placeholders;
- if (placeholderFactor != NOT_FOUND) {
- // creates map of target position -> factor for placeholders
- placeholders = GetPlaceholders(edge, placeholderFactor);
+ if (placeholders.size()) {
+ // do placeholders
+ std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
+ if (iter != placeholders.end()) {
+ factor = iter->second;
+ }
}
- size_t size = phrase.GetSize();
- for (size_t pos = 0 ; pos < size ; pos++) {
- const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
-
- if (placeholders.size()) {
- // do placeholders
- std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
- if (iter != placeholders.end()) {
- factor = iter->second;
- }
- }
-
- UTIL_THROW_IF2(factor == NULL,
- "No factor 0 at position " << pos);
-
- //preface surface form with UNK if marking unknowns
- const Word &word = phrase.GetWord(pos);
- if(markUnknown && word.IsOOV()) {
- out << "UNK" << *factor;
- } else {
- out << *factor;
- }
-
- for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
- const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
- UTIL_THROW_IF2(factor == NULL,
- "No factor " << i << " at position " << pos);
-
- out << "|" << *factor;
- }
- out << " ";
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor 0 at position " << pos);
+
+ //preface surface form with UNK if marking unknowns
+ const Word &word = phrase.GetWord(pos);
+ if(markUnknown && word.IsOOV()) {
+ out << "UNK" << *factor;
+ } else {
+ out << *factor;
}
- }
- // trace ("report segmentation") option "-t" / "-tt"
- if (reportSegmentation > 0 && phrase.GetSize() > 0) {
- const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
- const int sourceStart = sourceRange.GetStartPos();
- const int sourceEnd = sourceRange.GetEndPos();
- out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
- if (reportSegmentation == 2) {
- out << ",wa=";
- const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
- Hypothesis::OutputAlignment(out, ai, 0, 0);
- out << ",total=";
- out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
- out << ",";
- ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
- scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
- scoreBreakdown.OutputAllFeatureScores(out);
+ for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor " << i << " at position " << pos);
+
+ out << "|" << *factor;
}
- out << "| ";
+ out << " ";
}
}
- std::map<size_t, const Factor*>
- Hypothesis::
- GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
- {
- const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
- const Phrase &inputPhrase = inputPath.GetPhrase();
-
- std::map<size_t, const Factor*> ret;
-
- for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
- const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
- if (factor) {
- std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
- UTIL_THROW_IF2(targetPos.size() != 1,
- "Placeholder should be aligned to 1, and only 1, word");
- ret[*targetPos.begin()] = factor;
- }
+ // trace ("report segmentation") option "-t" / "-tt"
+ if (reportSegmentation > 0 && phrase.GetSize() > 0) {
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ const int sourceStart = sourceRange.GetStartPos();
+ const int sourceEnd = sourceRange.GetEndPos();
+ out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
+ if (reportSegmentation == 2) {
+ out << ",wa=";
+ const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
+ Hypothesis::OutputAlignment(out, ai, 0, 0);
+ out << ",total=";
+ out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
+ out << ",";
+ ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
+ scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
+ scoreBreakdown.OutputAllFeatureScores(out);
}
+ out << "| ";
+ }
+}
- return ret;
+std::map<size_t, const Factor*>
+Hypothesis::
+GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
+{
+ const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
+ const Phrase &inputPhrase = inputPath.GetPhrase();
+
+ std::map<size_t, const Factor*> ret;
+
+ for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
+ const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
+ if (factor) {
+ std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
+ UTIL_THROW_IF2(targetPos.size() != 1,
+ "Placeholder should be aligned to 1, and only 1, word");
+ ret[*targetPos.begin()] = factor;
+ }
}
+ return ret;
+}
+
#ifdef HAVE_XMLRPC_C
- void
- Hypothesis::
- OutputLocalWordAlignment(vector<xmlrpc_c::value>& dest) const
- {
- using namespace std;
- WordsRange const& src = this->GetCurrSourceWordsRange();
- WordsRange const& trg = this->GetCurrTargetWordsRange();
-
- vector<pair<size_t,size_t> const* > a
- = this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments();
- typedef pair<size_t,size_t> item;
- map<string, xmlrpc_c::value> M;
- BOOST_FOREACH(item const* p, a)
- {
- M["source-word"] = xmlrpc_c::value_int(src.GetStartPos() + p->first);
- M["target-word"] = xmlrpc_c::value_int(trg.GetStartPos() + p->second);
- dest.push_back(xmlrpc_c::value_struct(M));
- }
+void
+Hypothesis::
+OutputLocalWordAlignment(vector<xmlrpc_c::value>& dest) const
+{
+ using namespace std;
+ WordsRange const& src = this->GetCurrSourceWordsRange();
+ WordsRange const& trg = this->GetCurrTargetWordsRange();
+
+ vector<pair<size_t,size_t> const* > a
+ = this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments();
+ typedef pair<size_t,size_t> item;
+ map<string, xmlrpc_c::value> M;
+ BOOST_FOREACH(item const* p, a) {
+ M["source-word"] = xmlrpc_c::value_int(src.GetStartPos() + p->first);
+ M["target-word"] = xmlrpc_c::value_int(trg.GetStartPos() + p->second);
+ dest.push_back(xmlrpc_c::value_struct(M));
}
+}
- void
- Hypothesis::
- OutputWordAlignment(vector<xmlrpc_c::value>& out) const
- {
- vector<Hypothesis const*> tmp;
- for (Hypothesis const* h = this; h; h = h->GetPrevHypo())
- tmp.push_back(h);
- for (size_t i = tmp.size(); i-- > 0;)
- tmp[i]->OutputLocalWordAlignment(out);
- }
+void
+Hypothesis::
+OutputWordAlignment(vector<xmlrpc_c::value>& out) const
+{
+ vector<Hypothesis const*> tmp;
+ for (Hypothesis const* h = this; h; h = h->GetPrevHypo())
+ tmp.push_back(h);
+ for (size_t i = tmp.size(); i-- > 0;)
+ tmp[i]->OutputLocalWordAlignment(out);
+}
#endif
diff --git a/moses/Hypothesis.h b/moses/Hypothesis.h
index 0ce75b83c..e1e95fbf3 100644
--- a/moses/Hypothesis.h
+++ b/moses/Hypothesis.h
@@ -313,7 +313,7 @@ struct CompareHypothesisTotalScore {
ObjectPool<Hypothesis> &pool = Hypothesis::GetObjectPool(); \
pool.freeObject(hypo); \
} \
-
+
#else
#define FREEHYPO(hypo) delete hypo
#endif
diff --git a/moses/IOWrapper.h b/moses/IOWrapper.h
index 8ed9a02e5..c58c82dfa 100644
--- a/moses/IOWrapper.h
+++ b/moses/IOWrapper.h
@@ -161,7 +161,7 @@ public:
return m_detailTreeFragmentsOutputCollector.get();
}
- void SetInputStreamFromString(std::istringstream &input){
+ void SetInputStreamFromString(std::istringstream &input) {
m_inputStream = &input;
}
diff --git a/moses/LM/RDLM.cpp b/moses/LM/RDLM.cpp
index 179b67095..70fabbc6e 100644
--- a/moses/LM/RDLM.cpp
+++ b/moses/LM/RDLM.cpp
@@ -13,12 +13,14 @@ namespace Moses
typedef Eigen::Map<Eigen::Matrix<int,Eigen::Dynamic,1> > EigenMap;
-RDLM::~RDLM() {
+RDLM::~RDLM()
+{
delete lm_head_base_instance_;
delete lm_label_base_instance_;
}
-void RDLM::Load() {
+void RDLM::Load()
+{
lm_head_base_instance_ = new nplm::neuralTM();
lm_head_base_instance_->read(m_path_head_lm);
@@ -87,8 +89,8 @@ void RDLM::Load() {
// just score provided file, then exit.
if (!m_debugPath.empty()) {
- ScoreFile(m_debugPath);
- exit(1);
+ ScoreFile(m_debugPath);
+ exit(1);
}
// {
@@ -202,8 +204,7 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
// ignore glue rules
if (root->GetLabel() == m_glueSymbol) {
// recursion
- for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it)
- {
+ for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels);
}
return;
@@ -213,11 +214,11 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
if (m_binarized && root->GetLabel()[0] == '^' && !ancestor_heads.empty()) {
// recursion
if (root->IsLeafNT() && m_context_up > 1 && ancestor_heads.size()) {
- root = back_pointers.find(root)->second.get();
- rescoring_levels = m_context_up-1;
+ root = back_pointers.find(root)->second.get();
+ rescoring_levels = m_context_up-1;
}
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
- Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels);
+ Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels);
}
return;
}
@@ -239,35 +240,34 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
if (root->GetLength() == 1 && root->GetChildren()[0]->IsTerminal()) {
// root of tree: score without context
if (ancestor_heads.empty() || (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head)) {
- std::vector<int> ngram_head_null (static_head_null);
- ngram_head_null.back() = lm_head->lookup_output_word(root->GetChildren()[0]->GetLabel());
- if (m_isPretermBackoff && ngram_head_null.back() == 0) {
- ngram_head_null.back() = lm_head->lookup_output_word(root->GetLabel());
- }
- if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head) {
- std::vector<int>::iterator it = ngram_head_null.begin();
- std::fill_n(it, m_context_left, static_start_head);
- it += m_context_left;
- std::fill_n(it, m_context_left, static_start_label);
- it += m_context_left;
- std::fill_n(it, m_context_right, static_stop_head);
- it += m_context_right;
- std::fill_n(it, m_context_right, static_stop_label);
- it += m_context_right;
- size_t context_up_nonempty = std::min(m_context_up, ancestor_heads.size());
- it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
- it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
- }
- if (ancestor_labels.size() >= m_context_up && !num_virtual) {
- score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
- }
- else {
- boost::hash_combine(boundary_hash, ngram_head_null.back());
- score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
- }
+ std::vector<int> ngram_head_null (static_head_null);
+ ngram_head_null.back() = lm_head->lookup_output_word(root->GetChildren()[0]->GetLabel());
+ if (m_isPretermBackoff && ngram_head_null.back() == 0) {
+ ngram_head_null.back() = lm_head->lookup_output_word(root->GetLabel());
+ }
+ if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head) {
+ std::vector<int>::iterator it = ngram_head_null.begin();
+ std::fill_n(it, m_context_left, static_start_head);
+ it += m_context_left;
+ std::fill_n(it, m_context_left, static_start_label);
+ it += m_context_left;
+ std::fill_n(it, m_context_right, static_stop_head);
+ it += m_context_right;
+ std::fill_n(it, m_context_right, static_stop_label);
+ it += m_context_right;
+ size_t context_up_nonempty = std::min(m_context_up, ancestor_heads.size());
+ it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
+ it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
+ }
+ if (ancestor_labels.size() >= m_context_up && !num_virtual) {
+ score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
+ } else {
+ boost::hash_combine(boundary_hash, ngram_head_null.back());
+ score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
+ }
}
return;
- // we only need to re-visit previous hypotheses if we have more context available.
+ // we only need to re-visit previous hypotheses if we have more context available.
} else if (root->IsLeafNT()) {
if (m_context_up > 1 && ancestor_heads.size()) {
root = back_pointers.find(root)->second.get();
@@ -276,8 +276,7 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
return;
}
rescoring_levels = m_context_up-1;
- }
- else {
+ } else {
return;
}
}
@@ -302,19 +301,17 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
int reached_end = 0;
int label_idx, label_idx_out;
if (m_binarized && head_label[0] == '^') {
- virtual_head = true;
- if (m_binarized == 1 || (m_binarized == 3 && head_label[2] == 'l')) {
- reached_end = 1; //indicate that we've seen the first symbol of the RHS
- }
- else if (m_binarized == 2 || (m_binarized == 3 && head_label[2] == 'r')) {
- reached_end = 2; // indicate that we've seen the last symbol of the RHS
- }
- // with 'full' binarization, direction is encoded in 2nd char
- std::string clipped_label = (m_binarized == 3) ? head_label.substr(2,head_label.size()-2) : head_label.substr(1,head_label.size()-1);
- label_idx = lm_label->lookup_input_word(clipped_label);
- label_idx_out = lm_label->lookup_output_word(clipped_label);
- }
- else {
+ virtual_head = true;
+ if (m_binarized == 1 || (m_binarized == 3 && head_label[2] == 'l')) {
+ reached_end = 1; //indicate that we've seen the first symbol of the RHS
+ } else if (m_binarized == 2 || (m_binarized == 3 && head_label[2] == 'r')) {
+ reached_end = 2; // indicate that we've seen the last symbol of the RHS
+ }
+ // with 'full' binarization, direction is encoded in 2nd char
+ std::string clipped_label = (m_binarized == 3) ? head_label.substr(2,head_label.size()-2) : head_label.substr(1,head_label.size()-1);
+ label_idx = lm_label->lookup_input_word(clipped_label);
+ label_idx_out = lm_label->lookup_output_word(clipped_label);
+ } else {
reached_end = 3; // indicate that we've seen first and last symbol of the RHS
label_idx = lm_label->lookup_input_word(head_label);
label_idx_out = lm_label->lookup_output_word(head_label);
@@ -324,49 +321,47 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
// root of tree: score without context
if (ancestor_heads.empty() || (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head)) {
- if (head_idx != static_dummy_head && head_idx != static_head_head) {
- std::vector<int> ngram_head_null (static_head_null);
- *(ngram_head_null.end()-2) = label_idx;
- ngram_head_null.back() = head_ids.second;
- if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
- std::vector<int>::iterator it = ngram_head_null.begin();
- std::fill_n(it, m_context_left, static_start_head);
- it += m_context_left;
- std::fill_n(it, m_context_left, static_start_label);
- it += m_context_left;
- std::fill_n(it, m_context_right, static_stop_head);
- it += m_context_right;
- std::fill_n(it, m_context_right, static_stop_label);
- it += m_context_right;
- it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
- it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
- score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
- }
- else {
- boost::hash_combine(boundary_hash, ngram_head_null.back());
- score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
- }
- }
- std::vector<int> ngram_label_null (static_label_null);
- ngram_label_null.back() = label_idx_out;
+ if (head_idx != static_dummy_head && head_idx != static_head_head) {
+ std::vector<int> ngram_head_null (static_head_null);
+ *(ngram_head_null.end()-2) = label_idx;
+ ngram_head_null.back() = head_ids.second;
if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
- std::vector<int>::iterator it = ngram_label_null.begin();
- std::fill_n(it, m_context_left, static_start_head);
- it += m_context_left;
- std::fill_n(it, m_context_left, static_start_label);
- it += m_context_left;
- std::fill_n(it, m_context_right, static_stop_head);
- it += m_context_right;
- std::fill_n(it, m_context_right, static_stop_label);
- it += m_context_right;
- it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
- it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
- score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
- }
- else {
- boost::hash_combine(boundary_hash, ngram_label_null.back());
- score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
+ std::vector<int>::iterator it = ngram_head_null.begin();
+ std::fill_n(it, m_context_left, static_start_head);
+ it += m_context_left;
+ std::fill_n(it, m_context_left, static_start_label);
+ it += m_context_left;
+ std::fill_n(it, m_context_right, static_stop_head);
+ it += m_context_right;
+ std::fill_n(it, m_context_right, static_stop_label);
+ it += m_context_right;
+ it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
+ it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
+ score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
+ } else {
+ boost::hash_combine(boundary_hash, ngram_head_null.back());
+ score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
}
+ }
+ std::vector<int> ngram_label_null (static_label_null);
+ ngram_label_null.back() = label_idx_out;
+ if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
+ std::vector<int>::iterator it = ngram_label_null.begin();
+ std::fill_n(it, m_context_left, static_start_head);
+ it += m_context_left;
+ std::fill_n(it, m_context_left, static_start_label);
+ it += m_context_left;
+ std::fill_n(it, m_context_right, static_stop_head);
+ it += m_context_right;
+ std::fill_n(it, m_context_right, static_stop_label);
+ it += m_context_right;
+ it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
+ it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
+ score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
+ } else {
+ boost::hash_combine(boundary_hash, ngram_label_null.back());
+ score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
+ }
}
ancestor_heads.push_back(head_idx);
@@ -374,15 +369,14 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
if (virtual_head) {
num_virtual = m_context_up;
- }
- else if (num_virtual) {
- --num_virtual;
+ } else if (num_virtual) {
+ --num_virtual;
}
// fill ancestor context (same for all children)
if (context_up_nonempty < m_context_up) {
- ++context_up_nonempty;
+ ++context_up_nonempty;
}
size_t up_padding = m_context_up - context_up_nonempty;
@@ -439,13 +433,13 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
std::vector<int>::iterator it = ngram.begin();
if (left_padding > 0) {
- it += left_padding;
+ it += left_padding;
}
it = std::copy(heads.begin()+left_offset, heads.begin()+i, it);
if (left_padding > 0) {
- it += left_padding;
+ it += left_padding;
}
it = std::copy(labels.begin()+left_offset, labels.begin()+i, it);
@@ -453,33 +447,30 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
it = std::copy(heads.begin()+i+1, heads.begin()+right_offset, it);
if (right_padding > 0) {
- if (reached_end == 2 || reached_end == 3) {
- std::fill_n(it, right_padding, static_stop_head);
- it += right_padding;
- }
- else {
- std::copy(static_label_null.begin()+offset_up_head-m_context_right-right_padding, static_label_null.begin()-m_context_right+offset_up_head, it);
- }
+ if (reached_end == 2 || reached_end == 3) {
+ std::fill_n(it, right_padding, static_stop_head);
+ it += right_padding;
+ } else {
+ std::copy(static_label_null.begin()+offset_up_head-m_context_right-right_padding, static_label_null.begin()-m_context_right+offset_up_head, it);
+ }
}
it = std::copy(labels.begin()+i+1, labels.begin()+right_offset, it);
if (right_padding > 0) {
- if (reached_end == 2 || reached_end == 3) {
- std::fill_n(it, right_padding, static_stop_label);
- it += right_padding;
- }
- else {
- std::copy(static_label_null.begin()+offset_up_head-right_padding, static_label_null.begin()+offset_up_head, it);
- }
+ if (reached_end == 2 || reached_end == 3) {
+ std::fill_n(it, right_padding, static_stop_label);
+ it += right_padding;
+ } else {
+ std::copy(static_label_null.begin()+offset_up_head-right_padding, static_label_null.begin()+offset_up_head, it);
+ }
}
ngram.back() = labels_output[i];
if (ancestor_labels.size() >= m_context_up && !num_virtual) {
score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
- }
- else {
+ } else {
boost::hash_combine(boundary_hash, ngram.back());
score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
}
@@ -492,8 +483,7 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
if (ancestor_labels.size() >= m_context_up && !num_virtual) {
score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
- }
- else {
+ } else {
boost::hash_combine(boundary_hash, ngram.back());
score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
}
@@ -502,25 +492,24 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
// next time, we need to add less start symbol padding
if (left_padding)
- left_padding--;
+ left_padding--;
else
- left_offset++;
+ left_offset++;
if (right_offset < heads.size())
- right_offset++;
+ right_offset++;
else
- right_padding++;
+ right_padding++;
}
if (rescoring_levels == 1) {
- ancestor_heads.pop_back();
- ancestor_labels.pop_back();
- return;
+ ancestor_heads.pop_back();
+ ancestor_labels.pop_back();
+ return;
}
// recursion
- for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it)
- {
+ for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels - 1);
}
ancestor_heads.pop_back();
@@ -531,19 +520,17 @@ InternalTree* RDLM::GetHead(InternalTree* root, const TreePointerMap & back_poin
{
InternalTree *tree;
- for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it)
- {
+ for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
if ((*it)->IsLeafNT()) {
tree = back_pointers.find(it->get())->second.get();
- }
- else {
+ } else {
tree = it->get();
}
if (m_binarized && tree->GetLabel()[0] == '^') {
- head_ptr = GetHead(tree, back_pointers, IDs, head_ptr);
- if (head_ptr != NULL && !m_isPTKVZ) {
- return head_ptr;
+ head_ptr = GetHead(tree, back_pointers, IDs, head_ptr);
+ if (head_ptr != NULL && !m_isPTKVZ) {
+ return head_ptr;
}
}
@@ -563,8 +550,7 @@ InternalTree* RDLM::GetHead(InternalTree* root, const TreePointerMap & back_poin
for (std::vector<TreePointer>::const_iterator it2 = tree->GetChildren().begin(); it2 != tree->GetChildren().end(); ++it2) {
if ((*it2)->IsLeafNT()) {
tree2 = back_pointers.find(it2->get())->second.get();
- }
- else {
+ } else {
tree2 = it2->get();
}
if (tree2->GetLabel() == "PTKVZ" && tree2->GetLength() == 1 && tree2->GetChildren()[0]->IsTerminal()) {
@@ -602,18 +588,18 @@ void RDLM::GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & bac
// extract head words / labels
for (std::vector<TreePointer>::const_iterator itx = real_children.begin(); itx != real_children.end(); itx = ++real_children) {
if ((*itx)->IsTerminal()) {
- std::cerr << "non-terminal node " << root->GetLabel() << " has a mix of terminal and non-terminal children. This shouldn't happen..." << std::endl;
- std::cerr << "children: ";
- for (std::vector<TreePointer>::const_iterator itx2 = root->GetChildren().begin(); itx2 != root->GetChildren().end(); ++itx2) {
- std::cerr << (*itx2)->GetLabel() << " ";
- }
- std::cerr << std::endl;
- // resize vectors (should we throw exception instead?)
- heads.pop_back();
- labels.pop_back();
- heads_output.pop_back();
- labels_output.pop_back();
- continue;
+ std::cerr << "non-terminal node " << root->GetLabel() << " has a mix of terminal and non-terminal children. This shouldn't happen..." << std::endl;
+ std::cerr << "children: ";
+ for (std::vector<TreePointer>::const_iterator itx2 = root->GetChildren().begin(); itx2 != root->GetChildren().end(); ++itx2) {
+ std::cerr << (*itx2)->GetLabel() << " ";
+ }
+ std::cerr << std::endl;
+ // resize vectors (should we throw exception instead?)
+ heads.pop_back();
+ labels.pop_back();
+ heads_output.pop_back();
+ labels_output.pop_back();
+ continue;
}
InternalTree* child = itx->get();
// also go through trees or previous hypotheses to rescore nodes for which more context has become available
@@ -659,8 +645,7 @@ void RDLM::GetIDs(const std::string & head, const std::string & preterminal, std
}
if (m_sharedVocab) {
IDs.second = IDs.first;
- }
- else {
+ } else {
IDs.second = lm_head_base_instance_->lookup_output_word(head);
if (m_isPretermBackoff && IDs.second == 0) {
IDs.second = lm_head_base_instance_->lookup_output_word(preterminal);
@@ -672,12 +657,12 @@ void RDLM::GetIDs(const std::string & head, const std::string & preterminal, std
void RDLM::PrintInfo(std::vector<int> &ngram, nplm::neuralTM* lm) const
{
for (size_t i = 0; i < ngram.size()-1; i++) {
- std::cerr << lm->get_input_vocabulary().words()[ngram[i]] << " ";
+ std::cerr << lm->get_input_vocabulary().words()[ngram[i]] << " ";
}
std::cerr << lm->get_output_vocabulary().words()[ngram.back()] << " ";
for (size_t i = 0; i < ngram.size(); i++) {
- std::cerr << ngram[i] << " ";
+ std::cerr << ngram[i] << " ";
}
std::cerr << "score: " << lm->lookup_ngram(ngram) << std::endl;
}
@@ -691,32 +676,31 @@ RDLM::TreePointerMap RDLM::AssociateLeafNTs(InternalTree* root, const std::vecto
bool found = false;
InternalTree::leafNT next_leafNT(root);
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
- found = next_leafNT(it);
- if (found) {
- ret[it->get()] = *it_prev;
- }
- else {
- std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
- }
+ found = next_leafNT(it);
+ if (found) {
+ ret[it->get()] = *it_prev;
+ } else {
+ std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
+ }
}
return ret;
}
void RDLM::ScoreFile(std::string &path)
{
- InputFileStream inStream(path);
- std::string line, null;
- std::vector<int> ancestor_heads(m_context_up, static_root_head);
- std::vector<int> ancestor_labels(m_context_up, static_root_label);
- while(getline(inStream, line)) {
- TreePointerMap back_pointers;
- boost::array<float, 4> score;
- score.fill(0);
- InternalTree* mytree (new InternalTree(line));
- size_t boundary_hash = 0;
- Score(mytree, back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
- std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << std::endl;
- }
+ InputFileStream inStream(path);
+ std::string line, null;
+ std::vector<int> ancestor_heads(m_context_up, static_root_head);
+ std::vector<int> ancestor_labels(m_context_up, static_root_label);
+ while(getline(inStream, line)) {
+ TreePointerMap back_pointers;
+ boost::array<float, 4> score;
+ score.fill(0);
+ InternalTree* mytree (new InternalTree(line));
+ size_t boundary_hash = 0;
+ Score(mytree, back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
+ std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << std::endl;
+ }
}
@@ -727,42 +711,42 @@ void RDLM::SetParameter(const std::string& key, const std::string& value)
m_tuneable = Scan<bool>(value);
} else if (key == "filterable") { //ignore
} else if (key == "path_head_lm") {
- m_path_head_lm = value;
+ m_path_head_lm = value;
} else if (key == "path_label_lm") {
- m_path_label_lm = value;
+ m_path_label_lm = value;
} else if (key == "ptkvz") {
- m_isPTKVZ = Scan<bool>(value);
+ m_isPTKVZ = Scan<bool>(value);
} else if (key == "backoff") {
- m_isPretermBackoff = Scan<bool>(value);
+ m_isPretermBackoff = Scan<bool>(value);
} else if (key == "context_up") {
- m_context_up = Scan<size_t>(value);
+ m_context_up = Scan<size_t>(value);
} else if (key == "context_left") {
- m_context_left = Scan<size_t>(value);
+ m_context_left = Scan<size_t>(value);
} else if (key == "context_right") {
- m_context_right = Scan<size_t>(value);
+ m_context_right = Scan<size_t>(value);
} else if (key == "debug_path") {
- m_debugPath = value;
+ m_debugPath = value;
} else if (key == "premultiply") {
- m_premultiply = Scan<bool>(value);
+ m_premultiply = Scan<bool>(value);
} else if (key == "rerank") {
- m_rerank = Scan<bool>(value);
+ m_rerank = Scan<bool>(value);
} else if (key == "normalize_head_lm") {
- m_normalizeHeadLM = Scan<bool>(value);
+ m_normalizeHeadLM = Scan<bool>(value);
} else if (key == "normalize_label_lm") {
- m_normalizeLabelLM = Scan<bool>(value);
+ m_normalizeLabelLM = Scan<bool>(value);
} else if (key == "binarized") {
- if (value == "left")
- m_binarized = 1;
- else if (value == "right")
- m_binarized = 2;
- else if (value == "full")
- m_binarized = 3;
- else
- UTIL_THROW(util::Exception, "Unknown value for argument " << key << "=" << value);
+ if (value == "left")
+ m_binarized = 1;
+ else if (value == "right")
+ m_binarized = 2;
+ else if (value == "full")
+ m_binarized = 3;
+ else
+ UTIL_THROW(util::Exception, "Unknown value for argument " << key << "=" << value);
} else if (key == "glue_symbol") {
- m_glueSymbol = value;
+ m_glueSymbol = value;
} else if (key == "cache_size") {
- m_cacheSize = Scan<int>(value);
+ m_cacheSize = Scan<int>(value);
} else {
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
}
@@ -808,8 +792,8 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
size_t boundary_hash = 0;
if (!m_rerank) {
Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
- accumulator->PlusEquals(ff_idx, score[0] + score[1]);
- accumulator->PlusEquals(ff_idx+1, score[2] + score[3]);
+ accumulator->PlusEquals(ff_idx, score[0] + score[1]);
+ accumulator->PlusEquals(ff_idx+1, score[2] + score[3]);
}
mytree->Combine(previous_trees);
if (m_rerank && full_sentence) {
@@ -818,12 +802,11 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
accumulator->PlusEquals(ff_idx+1, score[2] + score[3]);
}
if (m_binarized && full_sentence) {
- mytree->Unbinarize();
+ mytree->Unbinarize();
}
return new RDLMState(mytree, score[1], score[3], boundary_hash);
- }
- else {
+ } else {
UTIL_THROW2("Error: RDLM active, but no internal tree structure found");
}
diff --git a/moses/LM/RDLM.h b/moses/LM/RDLM.h
index 8ae49ce76..1b92ed7c9 100644
--- a/moses/LM/RDLM.h
+++ b/moses/LM/RDLM.h
@@ -11,8 +11,9 @@
// Sennrich, Rico (2015). Modelling and Optimizing on Syntactic N-Grams for Statistical Machine Translation. Transactions of the Association for Computational Linguistics.
// see 'scripts/training/rdlm' for training scripts
-namespace nplm {
- class neuralTM;
+namespace nplm
+{
+class neuralTM;
}
namespace Moses
@@ -32,21 +33,21 @@ public:
{}
float GetApproximateScoreHead() const {
- return m_approx_head;
+ return m_approx_head;
}
float GetApproximateScoreLabel() const {
- return m_approx_label;
+ return m_approx_label;
}
size_t GetHash() const {
- return m_hash;
+ return m_hash;
}
int Compare(const FFState& other) const {
- if (m_hash == static_cast<const RDLMState*>(&other)->GetHash()) return 0;
- else if (m_hash > static_cast<const RDLMState*>(&other)->GetHash()) return 1;
- else return -1;
+ if (m_hash == static_cast<const RDLMState*>(&other)->GetHash()) return 0;
+ else if (m_hash > static_cast<const RDLMState*>(&other)->GetHash()) return 1;
+ else return -1;
}
};
@@ -121,10 +122,9 @@ public:
, m_normalizeLabelLM(false)
, m_sharedVocab(false)
, m_binarized(0)
- , m_cacheSize(1000000)
- {
- ReadParameters();
- }
+ , m_cacheSize(1000000) {
+ ReadParameters();
+ }
~RDLM();
@@ -147,21 +147,23 @@ public:
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const {};
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const {};
void EvaluateWithSourceContext(const InputType &input
- , const InputPath &inputPath
- , const TargetPhrase &targetPhrase
- , const StackVec *stackVec
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection *estimatedFutureScore = NULL) const {};
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const {};
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {};
FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
- ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");};
+ ScoreComponentCollection* accumulator) const {
+ UTIL_THROW(util::Exception, "Not implemented");
+ };
FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
@@ -173,71 +175,72 @@ public:
class UnbinarizedChildren
{
private:
- std::vector<TreePointer>::const_iterator iter;
- std::vector<TreePointer>::const_iterator _begin;
- std::vector<TreePointer>::const_iterator _end;
- InternalTree* current;
- const TreePointerMap & back_pointers;
- bool binarized;
- std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > stack;
+ std::vector<TreePointer>::const_iterator iter;
+ std::vector<TreePointer>::const_iterator _begin;
+ std::vector<TreePointer>::const_iterator _end;
+ InternalTree* current;
+ const TreePointerMap & back_pointers;
+ bool binarized;
+ std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > stack;
public:
- UnbinarizedChildren(InternalTree* root, const TreePointerMap & pointers, bool binary):
- current(root),
- back_pointers(pointers),
- binarized(binary)
- {
- stack.reserve(10);
- _end = current->GetChildren().end();
- iter = current->GetChildren().begin();
- // expand virtual node
- while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
- stack.push_back(std::make_pair(current, iter));
- // also go through trees or previous hypotheses to rescore nodes for which more context has become available
- if ((*iter)->IsLeafNT()) {
- current = back_pointers.find(iter->get())->second.get();
- }
- else {
- current = iter->get();
- }
- iter = current->GetChildren().begin();
- }
- _begin = iter;
+ UnbinarizedChildren(InternalTree* root, const TreePointerMap & pointers, bool binary):
+ current(root),
+ back_pointers(pointers),
+ binarized(binary) {
+ stack.reserve(10);
+ _end = current->GetChildren().end();
+ iter = current->GetChildren().begin();
+ // expand virtual node
+ while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
+ stack.push_back(std::make_pair(current, iter));
+ // also go through trees or previous hypotheses to rescore nodes for which more context has become available
+ if ((*iter)->IsLeafNT()) {
+ current = back_pointers.find(iter->get())->second.get();
+ } else {
+ current = iter->get();
}
+ iter = current->GetChildren().begin();
+ }
+ _begin = iter;
+ }
- std::vector<TreePointer>::const_iterator begin() const { return _begin; }
- std::vector<TreePointer>::const_iterator end() const { return _end; }
-
- std::vector<TreePointer>::const_iterator operator++() {
- iter++;
- if (iter == current->GetChildren().end()) {
- while (!stack.empty()) {
- std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> & active = stack.back();
- current = active.first;
- iter = ++active.second;
- stack.pop_back();
- if (iter != current->GetChildren().end()) {
- break;
- }
- }
- if (iter == _end) {
- return iter;
+ std::vector<TreePointer>::const_iterator begin() const {
+ return _begin;
+ }
+ std::vector<TreePointer>::const_iterator end() const {
+ return _end;
+ }
+
+ std::vector<TreePointer>::const_iterator operator++() {
+ iter++;
+ if (iter == current->GetChildren().end()) {
+ while (!stack.empty()) {
+ std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> & active = stack.back();
+ current = active.first;
+ iter = ++active.second;
+ stack.pop_back();
+ if (iter != current->GetChildren().end()) {
+ break;
}
}
- // expand virtual node
- while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
- stack.push_back(std::make_pair(current, iter));
- // also go through trees or previous hypotheses to rescore nodes for which more context has become available
- if ((*iter)->IsLeafNT()) {
- current = back_pointers.find(iter->get())->second.get();
- }
- else {
- current = iter->get();
- }
- iter = current->GetChildren().begin();
+ if (iter == _end) {
+ return iter;
+ }
+ }
+ // expand virtual node
+ while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
+ stack.push_back(std::make_pair(current, iter));
+ // also go through trees or previous hypotheses to rescore nodes for which more context has become available
+ if ((*iter)->IsLeafNT()) {
+ current = back_pointers.find(iter->get())->second.get();
+ } else {
+ current = iter->get();
}
- return iter;
+ iter = current->GetChildren().begin();
}
+ return iter;
+ }
};
};
diff --git a/moses/Manager.cpp b/moses/Manager.cpp
index 8daaa6c8e..bb27e368b 100644
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@@ -73,7 +73,7 @@ Manager::Manager(ttasksptr const& ttask)
const StaticData &staticData = StaticData::Instance();
SearchAlgorithm searchAlgorithm = staticData.GetSearchAlgorithm();
m_search = Search::CreateSearch(*this, *source, searchAlgorithm,
- *m_transOptColl);
+ *m_transOptColl);
StaticData::Instance().InitializeForInput(ttask);
}
@@ -87,7 +87,9 @@ Manager::~Manager()
const InputType&
Manager::GetSource() const
-{ return m_source ; }
+{
+ return m_source ;
+}
/**
* Main decoder loop that translates a sentence by expanding
@@ -130,7 +132,7 @@ void Manager::Decode()
searchTime.start();
m_search->Decode();
VERBOSE(1, "Line " << m_source.GetTranslationId()
- << ": Search took " << searchTime << " seconds" << endl);
+ << ": Search took " << searchTime << " seconds" << endl);
IFVERBOSE(2) {
GetSentenceStats().StopTimeTotal();
TRACE_ERR(GetSentenceStats());
diff --git a/moses/OutputCollector.h b/moses/OutputCollector.h
index 647b81c3e..4ca0f5ac1 100644
--- a/moses/OutputCollector.h
+++ b/moses/OutputCollector.h
@@ -110,7 +110,7 @@ private:
#endif
public:
- void SetOutputStream(std::ostream* outStream){
+ void SetOutputStream(std::ostream* outStream) {
m_outStream = outStream;
}
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index 5b5d76828..d47aca040 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -203,7 +203,7 @@ Parameter::Parameter()
AddParam(nbest_opts,"lattice-samples", "generate samples from lattice, in same format as nbest list. Uses the file and size arguments, as in n-best-list");
AddParam(nbest_opts,"include-segmentation-in-n-best", "include phrasal segmentation in the n-best list. default is false");
AddParam(nbest_opts,"print-alignment-info-in-n-best",
- "Include word-to-word alignment in the n-best list. Word-to-word alignments are taken from the phrase table if any. Default is false");
+ "Include word-to-word alignment in the n-best list. Word-to-word alignments are taken from the phrase table if any. Default is false");
///////////////////////////////////////////////////////////////////////////////////////
// server options
@@ -215,7 +215,7 @@ Parameter::Parameter()
po::options_description irstlm_opts("IRSTLM Options");
AddParam(irstlm_opts,"clean-lm-cache",
- "clean language model caches after N translations (default N=1)");
+ "clean language model caches after N translations (default N=1)");
po::options_description chart_opts("Chart Decoding Options");
AddParam(chart_opts,"max-chart-span", "maximum num. of source word chart rules can consume (default 10)");
@@ -346,8 +346,8 @@ const PARAM_VEC *Parameter::GetParam(const std::string &paramName) const
void
Parameter::
AddParam(po::options_description& optgroup,
- string const& paramName,
- string const& description)
+ string const& paramName,
+ string const& description)
{
m_valid[paramName] = true;
m_description[paramName] = description;
@@ -358,9 +358,9 @@ AddParam(po::options_description& optgroup,
void
Parameter::
AddParam(po::options_description& optgroup,
- string const& paramName,
- string const& abbrevName,
- string const& description)
+ string const& paramName,
+ string const& abbrevName,
+ string const& description)
{
m_valid[paramName] = true;
m_valid[abbrevName] = true;
@@ -368,11 +368,10 @@ AddParam(po::options_description& optgroup,
m_fullname[abbrevName] = paramName;
m_description[paramName] = description;
string optname = paramName;
- if (abbrevName.size() == 1)
- {
- optname += string(",")+abbrevName;
- // m_confusable[abbrevName[0]].insert(paramName);
- }
+ if (abbrevName.size() == 1) {
+ optname += string(",")+abbrevName;
+ // m_confusable[abbrevName[0]].insert(paramName);
+ }
optgroup.add_options()(optname.c_str(),description.c_str());
}
@@ -429,12 +428,11 @@ LoadParam(int argc, char* xargv[])
// legacy parameter handling: all parameters are expected
// to start with a single dash
char* argv[argc+1];
- for (int i = 0; i < argc; ++i)
- {
- argv[i] = xargv[i];
- if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == '-')
- ++argv[i];
- }
+ for (int i = 0; i < argc; ++i) {
+ argv[i] = xargv[i];
+ if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == '-')
+ ++argv[i];
+ }
// config file (-f) arg mandatory
string configPath;
@@ -1260,7 +1258,7 @@ Validate()
bool
Parameter::
FilesExist(const string &paramName, int fieldNo,
- std::vector<std::string> const& extensions)
+ std::vector<std::string> const& extensions)
{
typedef std::vector<std::string> StringVec;
StringVec::const_iterator iter;
@@ -1589,7 +1587,7 @@ template<>
void
Parameter::
SetParameter<bool>(bool &parameter, std::string const& parameterName,
- bool const& defaultValue) const
+ bool const& defaultValue) const
{
const PARAM_VEC *params = GetParam(parameterName);
diff --git a/moses/Parameter.h b/moses/Parameter.h
index 90b18c427..f6e20efc2 100644
--- a/moses/Parameter.h
+++ b/moses/Parameter.h
@@ -66,27 +66,27 @@ protected:
void
AddParam(options_description& optgroup,
- value_semantic const* optvalue,
- std::string const& paramName,
- std::string const& description);
+ value_semantic const* optvalue,
+ std::string const& paramName,
+ std::string const& description);
void
AddParam(options_description& optgroup,
- std::string const &paramName,
- std::string const &description);
+ std::string const &paramName,
+ std::string const &description);
void
AddParam(options_description& optgroup,
- value_semantic const* optvalue,
- std::string const& paramName,
- std::string const& abbrevName,
- std::string const& description);
+ value_semantic const* optvalue,
+ std::string const& paramName,
+ std::string const& abbrevName,
+ std::string const& description);
void
AddParam(options_description& optgroup,
- std::string const& paramName,
- std::string const& abbrevName,
- std::string const& description);
+ std::string const& paramName,
+ std::string const& abbrevName,
+ std::string const& description);
void PrintCredit();
void PrintFF() const;
diff --git a/moses/ScoreComponentCollection.cpp b/moses/ScoreComponentCollection.cpp
index d07fb5f00..31de139ea 100644
--- a/moses/ScoreComponentCollection.cpp
+++ b/moses/ScoreComponentCollection.cpp
@@ -67,7 +67,7 @@ RegisterScoreProducer(FeatureFunction* scoreProducer)
VERBOSE(1, "FeatureFunction: "
<< scoreProducer->GetScoreProducerDescription()
<< " start: " << start
- << " end: " << (s_denseVectorSize-1) << endl);
+ << " end: " << (s_denseVectorSize-1) << endl);
}
@@ -194,21 +194,19 @@ void ScoreComponentCollection::Save(ostream& out, bool multiline) const
}
std::vector<FeatureFunction*> const& all_ff
- = FeatureFunction::GetFeatureFunctions();
- BOOST_FOREACH(FeatureFunction const* ff, all_ff)
- {
- string name = ff->GetScoreProducerDescription();
- size_t i = ff->GetIndex();
- if (ff->GetNumScoreComponents() == 1)
- out << name << sep << m_scores[i] << linesep;
- else
- {
- size_t stop = i + ff->GetNumScoreComponents();
- boost::format fmt("%s_%d");
- for (size_t k = 1; i < stop; ++i, ++k)
- out << fmt % name % k << sep << m_scores[i] << linesep;
- }
+ = FeatureFunction::GetFeatureFunctions();
+ BOOST_FOREACH(FeatureFunction const* ff, all_ff) {
+ string name = ff->GetScoreProducerDescription();
+ size_t i = ff->GetIndex();
+ if (ff->GetNumScoreComponents() == 1)
+ out << name << sep << m_scores[i] << linesep;
+ else {
+ size_t stop = i + ff->GetNumScoreComponents();
+ boost::format fmt("%s_%d");
+ for (size_t k = 1; i < stop; ++i, ++k)
+ out << fmt % name % k << sep << m_scores[i] << linesep;
}
+ }
// write sparse features
m_scores.write(out,sep,linesep);
}
diff --git a/moses/ScoreComponentCollection.h b/moses/ScoreComponentCollection.h
index 0dbdb366c..696658c80 100644
--- a/moses/ScoreComponentCollection.h
+++ b/moses/ScoreComponentCollection.h
@@ -231,10 +231,10 @@ public:
//! produced by sp
void
PlusEquals(const FeatureFunction* sp,
- const ScoreComponentCollection& scores) {
+ const ScoreComponentCollection& scores) {
size_t i = sp->GetIndex();
size_t stop = i + sp->GetNumScoreComponents();
- for (;i < stop; ++i) m_scores[i] += scores.m_scores[i];
+ for (; i < stop; ++i) m_scores[i] += scores.m_scores[i];
}
//! Add scores from a single FeatureFunction only
diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp
index cf866f933..e4dab8547 100644
--- a/moses/Sentence.cpp
+++ b/moses/Sentence.cpp
@@ -60,28 +60,23 @@ aux_init_partial_translation(string& line)
string sourceCompletedStr;
int loc1 = line.find( "|||", 0 );
int loc2 = line.find( "|||", loc1 + 3 );
- if (loc1 > -1 && loc2 > -1)
- {
- m_initialTargetPhrase = Trim(line.substr(0, loc1));
- string scov = Trim(line.substr(loc1 + 3, loc2 - loc1 - 3));
- line = line.substr(loc2 + 3);
-
- m_sourceCompleted.resize(scov.size());
- int contiguous = 1;
- for (size_t i = 0; i < scov.size(); ++i)
- {
- if (sourceCompletedStr.at(i) == '1')
- {
- m_sourceCompleted[i] = true;
- if (contiguous) m_frontSpanCoveredLength++;
- }
- else
- {
- m_sourceCompleted[i] = false;
- contiguous = 0;
- }
- }
+ if (loc1 > -1 && loc2 > -1) {
+ m_initialTargetPhrase = Trim(line.substr(0, loc1));
+ string scov = Trim(line.substr(loc1 + 3, loc2 - loc1 - 3));
+ line = line.substr(loc2 + 3);
+
+ m_sourceCompleted.resize(scov.size());
+ int contiguous = 1;
+ for (size_t i = 0; i < scov.size(); ++i) {
+ if (sourceCompletedStr.at(i) == '1') {
+ m_sourceCompleted[i] = true;
+ if (contiguous) m_frontSpanCoveredLength++;
+ } else {
+ m_sourceCompleted[i] = false;
+ contiguous = 0;
+ }
}
+ }
}
void
@@ -94,38 +89,31 @@ aux_interpret_sgml_markup(string& line)
metamap::const_iterator i;
if ((i = meta.find("id")) != meta.end())
this->SetTranslationId(atol(i->second.c_str()));
- if ((i = meta.find("docid")) != meta.end())
- {
- this->SetDocumentId(atol(i->second.c_str()));
- this->SetUseTopicId(false);
+ if ((i = meta.find("docid")) != meta.end()) {
+ this->SetDocumentId(atol(i->second.c_str()));
+ this->SetUseTopicId(false);
+ this->SetUseTopicIdAndProb(false);
+ }
+ if ((i = meta.find("topic")) != meta.end()) {
+ vector<string> topic_params;
+ boost::split(topic_params, i->second, boost::is_any_of("\t "));
+ if (topic_params.size() == 1) {
+ this->SetTopicId(atol(topic_params[0].c_str()));
+ this->SetUseTopicId(true);
this->SetUseTopicIdAndProb(false);
+ } else {
+ this->SetTopicIdAndProb(topic_params);
+ this->SetUseTopicId(false);
+ this->SetUseTopicIdAndProb(true);
}
- if ((i = meta.find("topic")) != meta.end())
- {
- vector<string> topic_params;
- boost::split(topic_params, i->second, boost::is_any_of("\t "));
- if (topic_params.size() == 1)
- {
- this->SetTopicId(atol(topic_params[0].c_str()));
- this->SetUseTopicId(true);
- this->SetUseTopicIdAndProb(false);
- }
- else
- {
- this->SetTopicIdAndProb(topic_params);
- this->SetUseTopicId(false);
- this->SetUseTopicIdAndProb(true);
- }
- }
- if ((i = meta.find("weight-setting")) != meta.end())
- {
- this->SetWeightSetting(i->second);
- this->SetSpecifiesWeightSetting(true);
- StaticData::Instance().SetWeightSetting(i->second);
- // oh this is so horrible! Why does this have to be propagated globally?
- // --- UG
- }
- else this->SetSpecifiesWeightSetting(false);
+ }
+ if ((i = meta.find("weight-setting")) != meta.end()) {
+ this->SetWeightSetting(i->second);
+ this->SetSpecifiesWeightSetting(true);
+ StaticData::Instance().SetWeightSetting(i->second);
+ // oh this is so horrible! Why does this have to be propagated globally?
+ // --- UG
+ } else this->SetSpecifiesWeightSetting(false);
}
void
@@ -135,48 +123,44 @@ aux_interpret_dlt(string& line) // whatever DLT means ... --- UG
using namespace std;
typedef map<string, string> str2str_map;
vector<str2str_map> meta = ProcessAndStripDLT(line);
- BOOST_FOREACH(str2str_map const& M, meta)
- {
- str2str_map::const_iterator i,j;
- if ((i = M.find("type")) != M.end())
- {
- j = M.find("id");
- string id = j == M.end() ? "default" : j->second;
- if (i->second == "cbtm")
- {
- PhraseDictionaryDynamicCacheBased* cbtm;
- cbtm = PhraseDictionaryDynamicCacheBased::InstanceNonConst(id);
- if (cbtm) cbtm->ExecuteDlt(M);
- }
- if (i->second == "cblm")
- {
- DynamicCacheBasedLanguageModel* cblm;
- cblm = DynamicCacheBasedLanguageModel::InstanceNonConst(id);
- if (cblm) cblm->ExecuteDlt(M);
- }
- }
+ BOOST_FOREACH(str2str_map const& M, meta) {
+ str2str_map::const_iterator i,j;
+ if ((i = M.find("type")) != M.end()) {
+ j = M.find("id");
+ string id = j == M.end() ? "default" : j->second;
+ if (i->second == "cbtm") {
+ PhraseDictionaryDynamicCacheBased* cbtm;
+ cbtm = PhraseDictionaryDynamicCacheBased::InstanceNonConst(id);
+ if (cbtm) cbtm->ExecuteDlt(M);
+ }
+ if (i->second == "cblm") {
+ DynamicCacheBasedLanguageModel* cblm;
+ cblm = DynamicCacheBasedLanguageModel::InstanceNonConst(id);
+ if (cblm) cblm->ExecuteDlt(M);
+ }
}
+ }
}
void
Sentence::
aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
- std::vector<std::pair<size_t, std::string> >& placeholders)
-{ // parse XML markup in translation line
+ std::vector<std::pair<size_t, std::string> >& placeholders)
+{
+ // parse XML markup in translation line
const StaticData &SD = StaticData::Instance();
using namespace std;
- if (SD.GetXmlInputType() != XmlPassThrough)
- {
- int offset = SD.IsSyntax() ? 1 : 0;
- bool OK = ProcessAndStripXMLTags(line, m_xmlOptions,
- m_reorderingConstraint,
- xmlWalls, placeholders, offset,
- SD.GetXmlBrackets().first,
- SD.GetXmlBrackets().second);
- UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line);
- }
+ if (SD.GetXmlInputType() != XmlPassThrough) {
+ int offset = SD.IsSyntax() ? 1 : 0;
+ bool OK = ProcessAndStripXMLTags(line, m_xmlOptions,
+ m_reorderingConstraint,
+ xmlWalls, placeholders, offset,
+ SD.GetXmlBrackets().first,
+ SD.GetXmlBrackets().second);
+ UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line);
+ }
}
void
@@ -197,11 +181,10 @@ init(string line, std::vector<FactorType> const& factorOrder)
aux_interpret_dlt(line); // some poorly documented cache-based stuff
// if sentences is specified as "<passthrough tag1=""/>"
- if (SD.IsPassthroughEnabled() || SD.IsPassthroughInNBestEnabled())
- {
- string pthru = PassthroughSGML(line,"passthrough");
- this->SetPassthroughInformation(pthru);
- }
+ if (SD.IsPassthroughEnabled() || SD.IsPassthroughInNBestEnabled()) {
+ string pthru = PassthroughSGML(line,"passthrough");
+ this->SetPassthroughInformation(pthru);
+ }
vector<size_t> xmlWalls;
vector<pair<size_t, string> >placeholders;
@@ -218,26 +201,23 @@ init(string line, std::vector<FactorType> const& factorOrder)
// our XmlOptions and create TranslationOptions
// only fill the vector if we are parsing XML
- if (SD.GetXmlInputType() != XmlPassThrough)
- {
- m_xmlCoverageMap.assign(GetSize(), false);
- BOOST_FOREACH(XmlOption* o, m_xmlOptions)
- {
- WordsRange const& r = o->range;
- for(size_t j = r.GetStartPos(); j <= r.GetEndPos(); ++j)
- m_xmlCoverageMap[j]=true;
- }
+ if (SD.GetXmlInputType() != XmlPassThrough) {
+ m_xmlCoverageMap.assign(GetSize(), false);
+ BOOST_FOREACH(XmlOption* o, m_xmlOptions) {
+ WordsRange const& r = o->range;
+ for(size_t j = r.GetStartPos(); j <= r.GetEndPos(); ++j)
+ m_xmlCoverageMap[j]=true;
}
+ }
// reordering walls and zones
m_reorderingConstraint.InitializeWalls(GetSize());
// set reordering walls, if "-monotone-at-punction" is set
- if (SD.UseReorderingConstraint() && GetSize())
- {
- WordsRange r(0, GetSize()-1);
- m_reorderingConstraint.SetMonotoneAtPunctuation(GetSubString(r));
- }
+ if (SD.UseReorderingConstraint() && GetSize()) {
+ WordsRange r(0, GetSize()-1);
+ m_reorderingConstraint.SetMonotoneAtPunctuation(GetSubString(r));
+ }
// set walls obtained from xml
for(size_t i=0; i<xmlWalls.size(); i++)
@@ -283,8 +263,8 @@ CreateTranslationOptionCollection(ttasksptr const& ttask) const
size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
float transOptThreshold = StaticData::Instance().GetTranslationOptionThreshold();
TranslationOptionCollection *rv
- = new TranslationOptionCollectionText(ttask, *this, maxNoTransOptPerCoverage,
- transOptThreshold);
+ = new TranslationOptionCollectionText(ttask, *this, maxNoTransOptPerCoverage,
+ transOptThreshold);
assert(rv);
return rv;
}
@@ -386,7 +366,7 @@ CreateFromString(vector<FactorType> const& FOrder, string const& phraseString)
Sentence::
Sentence(size_t const transId, string const& stext,
- vector<FactorType> const* IFO)
+ vector<FactorType> const* IFO)
: InputType(transId)
{
if (IFO) init(stext, *IFO);
diff --git a/moses/Sentence.h b/moses/Sentence.h
index 8a870f76b..661280711 100644
--- a/moses/Sentence.h
+++ b/moses/Sentence.h
@@ -32,109 +32,110 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
- class WordsRange;
- class PhraseDictionary;
- class TranslationOption;
- class TranslationOptionCollection;
- class ChartTranslationOptions;
- class TranslationTask;
- struct XmlOption;
-
+class WordsRange;
+class PhraseDictionary;
+class TranslationOption;
+class TranslationOptionCollection;
+class ChartTranslationOptions;
+class TranslationTask;
+struct XmlOption;
+
+
+/**
+ * A Phrase class with an ID. Used specifically as source input so contains functionality to read
+ * from IODevice and create trans opt
+ */
+class Sentence : public Phrase, public InputType
+{
+protected:
/**
- * A Phrase class with an ID. Used specifically as source input so contains functionality to read
- * from IODevice and create trans opt
+ * Utility method that takes in a string representing an XML tag and the name of the attribute,
+ * and returns the value of that tag if present, empty string otherwise
*/
- class Sentence : public Phrase, public InputType
- {
- protected:
-
- /**
- * Utility method that takes in a string representing an XML tag and the name of the attribute,
- * and returns the value of that tag if present, empty string otherwise
- */
- std::vector<XmlOption*> m_xmlOptions;
- std::vector <bool> m_xmlCoverageMap;
+ std::vector<XmlOption*> m_xmlOptions;
+ std::vector <bool> m_xmlCoverageMap;
- NonTerminalSet m_defaultLabelSet;
+ NonTerminalSet m_defaultLabelSet;
- void ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders);
+ void ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders);
- public:
- Sentence();
- Sentence(size_t const transId, std::string const& stext,
- std::vector<FactorType> const* IFO = NULL);
- // Sentence(size_t const transId, std::string const& stext);
- ~Sentence();
+public:
+ Sentence();
+ Sentence(size_t const transId, std::string const& stext,
+ std::vector<FactorType> const* IFO = NULL);
+ // Sentence(size_t const transId, std::string const& stext);
+ ~Sentence();
- InputTypeEnum GetType() const {
- return SentenceInput;
- }
+ InputTypeEnum GetType() const {
+ return SentenceInput;
+ }
- //! Calls Phrase::GetSubString(). Implements abstract InputType::GetSubString()
- Phrase GetSubString(const WordsRange& r) const {
- return Phrase::GetSubString(r);
- }
+ //! Calls Phrase::GetSubString(). Implements abstract InputType::GetSubString()
+ Phrase GetSubString(const WordsRange& r) const {
+ return Phrase::GetSubString(r);
+ }
- //! Calls Phrase::GetWord(). Implements abstract InputType::GetWord()
- const Word& GetWord(size_t pos) const {
- return Phrase::GetWord(pos);
- }
+ //! Calls Phrase::GetWord(). Implements abstract InputType::GetWord()
+ const Word& GetWord(size_t pos) const {
+ return Phrase::GetWord(pos);
+ }
- //! Calls Phrase::GetSize(). Implements abstract InputType::GetSize()
- size_t GetSize() const {
- return Phrase::GetSize();
- }
+ //! Calls Phrase::GetSize(). Implements abstract InputType::GetSize()
+ size_t GetSize() const {
+ return Phrase::GetSize();
+ }
- //! Returns true if there were any XML tags parsed that at least partially covered the range passed
- bool XmlOverlap(size_t startPos, size_t endPos) const;
+ //! Returns true if there were any XML tags parsed that at least partially covered the range passed
+ bool XmlOverlap(size_t startPos, size_t endPos) const;
- //! populates vector argument with XML force translation options for the specific range passed
- void GetXmlTranslationOptions(std::vector<TranslationOption*> &list) const;
- void GetXmlTranslationOptions(std::vector<TranslationOption*> &list, size_t startPos, size_t endPos) const;
- std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions() const;
+ //! populates vector argument with XML force translation options for the specific range passed
+ void GetXmlTranslationOptions(std::vector<TranslationOption*> &list) const;
+ void GetXmlTranslationOptions(std::vector<TranslationOption*> &list, size_t startPos, size_t endPos) const;
+ std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions() const;
- virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
- void Print(std::ostream& out) const;
+ virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
+ void Print(std::ostream& out) const;
- TranslationOptionCollection*
- CreateTranslationOptionCollection(ttasksptr const& ttask) const;
+ TranslationOptionCollection*
+ CreateTranslationOptionCollection(ttasksptr const& ttask) const;
- virtual void
- CreateFromString(std::vector<FactorType> const &factorOrder,
- std::string const& phraseString);
+ virtual void
+ CreateFromString(std::vector<FactorType> const &factorOrder,
+ std::string const& phraseString);
- const NonTerminalSet&
- GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const
- { return m_defaultLabelSet; }
+ const NonTerminalSet&
+ GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const {
+ return m_defaultLabelSet;
+ }
- void
- init(std::string line, std::vector<FactorType> const& factorOrder);
+ void
+ init(std::string line, std::vector<FactorType> const& factorOrder);
- private:
- // auxliliary functions for Sentence initialization
- // void aux_interpret_sgml_markup(std::string& line);
- // void aux_interpret_dlt(std::string& line);
- // void aux_interpret_xml (std::string& line, std::vector<size_t> & xmlWalls,
- // std::vector<std::pair<size_t, std::string> >& placeholders);
+private:
+ // auxliliary functions for Sentence initialization
+ // void aux_interpret_sgml_markup(std::string& line);
+ // void aux_interpret_dlt(std::string& line);
+ // void aux_interpret_xml (std::string& line, std::vector<size_t> & xmlWalls,
+ // std::vector<std::pair<size_t, std::string> >& placeholders);
- void
- aux_interpret_sgml_markup(std::string& line);
+ void
+ aux_interpret_sgml_markup(std::string& line);
- void
- aux_interpret_dlt(std::string& line);
+ void
+ aux_interpret_dlt(std::string& line);
- void
- aux_interpret_xml
- (std::string& line, std::vector<size_t> & xmlWalls,
- std::vector<std::pair<size_t, std::string> >& placeholders);
+ void
+ aux_interpret_xml
+ (std::string& line, std::vector<size_t> & xmlWalls,
+ std::vector<std::pair<size_t, std::string> >& placeholders);
- void
- aux_init_partial_translation(std::string& line);
+ void
+ aux_init_partial_translation(std::string& line);
- };
+};
}
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index 420ad7a20..c3d55f5c7 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -118,7 +118,7 @@ StaticData
string &feature = toks[0];
std::map<std::string, std::string>::const_iterator iter
- = featureNameOverride.find(feature);
+ = featureNameOverride.find(feature);
if (iter == featureNameOverride.end()) {
// feature name not override
m_registry.Construct(feature, line);
@@ -146,7 +146,7 @@ StaticData
m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput);
m_parameter->SetParameter(m_continuePartialTranslation,
- "continue-partial-translation", false );
+ "continue-partial-translation", false );
std::string s_it = "text input";
if (m_inputType == 1) {
@@ -160,7 +160,7 @@ StaticData
}
VERBOSE(2,"input type is: "<<s_it<<"\n");
- // use of xml in input
+ // use of xml in input
m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough);
// specify XML tags opening and closing brackets for XML option
@@ -178,7 +178,7 @@ StaticData
}
m_parameter->SetParameter(m_defaultNonTermOnlyForEmptyRange,
- "default-non-term-for-empty-range-only", false );
+ "default-non-term-for-empty-range-only", false );
}
@@ -347,18 +347,18 @@ StaticData
m_parameter->SetParameter(m_PrintAlignmentInfoNbest,
- "print-alignment-info-in-n-best", false );
+ "print-alignment-info-in-n-best", false );
// include feature names in the n-best list
m_parameter->SetParameter(m_labeledNBestList, "labeled-n-best-list", true );
// include word alignment in the n-best list
m_parameter->SetParameter(m_nBestIncludesSegmentation,
- "include-segmentation-in-n-best", false );
+ "include-segmentation-in-n-best", false );
// print all factors of output translations
m_parameter->SetParameter(m_reportAllFactorsNBest,
- "report-all-factors-in-n-best", false );
+ "report-all-factors-in-n-best", false );
m_parameter->SetParameter(m_printNBestTrees, "n-best-trees", false );
return true;
@@ -412,7 +412,7 @@ StaticData
#ifndef WITH_THREADS
if (m_threadCount > 1) {
std::cerr << "Error: Thread count of " << params->at(0)
- << " but moses not built with thread support";
+ << " but moses not built with thread support";
return false;
}
#endif
@@ -426,11 +426,11 @@ StaticData
::ini_cube_pruning_options()
{
m_parameter->SetParameter(m_cubePruningPopLimit, "cube-pruning-pop-limit",
- DEFAULT_CUBE_PRUNING_POP_LIMIT);
+ DEFAULT_CUBE_PRUNING_POP_LIMIT);
m_parameter->SetParameter(m_cubePruningDiversity, "cube-pruning-diversity",
- DEFAULT_CUBE_PRUNING_DIVERSITY);
+ DEFAULT_CUBE_PRUNING_DIVERSITY);
m_parameter->SetParameter(m_cubePruningLazyScoring, "cube-pruning-lazy-scoring",
- false);
+ false);
}
void
@@ -468,7 +468,7 @@ void
StaticData
::ini_oov_options()
{
- // unknown word processing
+ // unknown word processing
m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false );
m_parameter->SetParameter(m_markUnknown, "mark-unknown", false );
@@ -647,7 +647,7 @@ bool StaticData::LoadData(Parameter *parameter)
// S2T decoder
m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm",
- RecursiveCYKPlus);
+ RecursiveCYKPlus);
ini_zombie_options(); // probably dead, or maybe not
@@ -1016,7 +1016,7 @@ StaticData
::InitializeForInput(ttasksptr const& ttask) const
{
const std::vector<FeatureFunction*> &producers
- = FeatureFunction::GetFeatureFunctions();
+ = FeatureFunction::GetFeatureFunctions();
for(size_t i=0; i<producers.size(); ++i) {
FeatureFunction &ff = *producers[i];
if (! IsFeatureFunctionIgnored(ff)) {
@@ -1024,7 +1024,7 @@ StaticData
iTime.start();
ff.InitializeForInput(ttask);
VERBOSE(3,"InitializeForInput( " << ff.GetScoreProducerDescription() << " )"
- << "= " << iTime << endl);
+ << "= " << iTime << endl);
}
}
}
@@ -1034,7 +1034,7 @@ StaticData
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const
{
const std::vector<FeatureFunction*> &producers
- = FeatureFunction::GetFeatureFunctions();
+ = FeatureFunction::GetFeatureFunctions();
for(size_t i=0; i<producers.size(); ++i) {
FeatureFunction &ff = *producers[i];
if (! IsFeatureFunctionIgnored(ff)) {
@@ -1111,7 +1111,7 @@ bool StaticData::CheckWeights() const
if (!weightNames.empty()) {
cerr << "The following weights have no feature function. "
- << "Maybe incorrectly spelt weights: ";
+ << "Maybe incorrectly spelt weights: ";
set<string>::iterator iter;
for (iter = weightNames.begin(); iter != weightNames.end(); ++iter) {
cerr << *iter << ",";
diff --git a/moses/StaticData.h b/moses/StaticData.h
index 438ac0633..2b46d1ef0 100644
--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@@ -476,18 +476,18 @@ public:
// m_searchAlgorithm == SyntaxF2S;
// }
- bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const
- {
+ bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const {
if (algo == DefaultSearchAlgorithm)
algo = m_searchAlgorithm;
return (algo == CYKPlus || algo == ChartIncremental ||
- algo == SyntaxS2T || algo == SyntaxT2S ||
- algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
+ algo == SyntaxS2T || algo == SyntaxT2S ||
+ algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
}
const ScoreComponentCollection&
- GetAllWeights() const
- { return m_allWeights; }
+ GetAllWeights() const {
+ return m_allWeights;
+ }
void SetAllWeights(const ScoreComponentCollection& weights) {
m_allWeights = weights;
diff --git a/moses/Syntax/F2S/HyperTreeLoader.cpp b/moses/Syntax/F2S/HyperTreeLoader.cpp
index bd19cbace..21d5b0447 100644
--- a/moses/Syntax/F2S/HyperTreeLoader.cpp
+++ b/moses/Syntax/F2S/HyperTreeLoader.cpp
@@ -146,7 +146,7 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
}
void HyperTreeLoader::ExtractSourceTerminalSetFromHyperPath(
- const HyperPath &hp, boost::unordered_set<std::size_t> &sourceTerminalSet)
+ const HyperPath &hp, boost::unordered_set<std::size_t> &sourceTerminalSet)
{
for (std::vector<HyperPath::NodeSeq>::const_iterator p = hp.nodeSeqs.begin();
p != hp.nodeSeqs.end(); ++p) {
diff --git a/moses/Syntax/F2S/HyperTreeLoader.h b/moses/Syntax/F2S/HyperTreeLoader.h
index 088c7eaf5..eebf1185a 100644
--- a/moses/Syntax/F2S/HyperTreeLoader.h
+++ b/moses/Syntax/F2S/HyperTreeLoader.h
@@ -31,7 +31,7 @@ public:
private:
void ExtractSourceTerminalSetFromHyperPath(
- const HyperPath &, boost::unordered_set<std::size_t> &);
+ const HyperPath &, boost::unordered_set<std::size_t> &);
};
} // namespace F2S
diff --git a/moses/Syntax/F2S/Manager-inl.h b/moses/Syntax/F2S/Manager-inl.h
index 3aedc640e..55f85e888 100644
--- a/moses/Syntax/F2S/Manager-inl.h
+++ b/moses/Syntax/F2S/Manager-inl.h
@@ -39,7 +39,7 @@ Manager<RuleMatcher>::Manager(ttasksptr const& ttask)
if (const ForestInput *p = dynamic_cast<const ForestInput*>(&m_source)) {
m_forest = p->GetForest();
m_rootVertex = p->GetRootVertex();
- m_sentenceLength = p->GetSize();
+ m_sentenceLength = p->GetSize();
} else if (const TreeInput *p = dynamic_cast<const TreeInput*>(&m_source)) {
T2S::InputTreeBuilder builder;
T2S::InputTree tmpTree;
diff --git a/moses/Syntax/F2S/Manager.h b/moses/Syntax/F2S/Manager.h
index 1dcab4f5e..bcf1ff2bd 100644
--- a/moses/Syntax/F2S/Manager.h
+++ b/moses/Syntax/F2S/Manager.h
@@ -39,7 +39,7 @@ public:
typedef std::vector<boost::shared_ptr<KBestExtractor::Derivation> > kBestList_t;
void ExtractKBest(std::size_t k, kBestList_t& kBestList,
- bool onlyDistinct=false) const;
+ bool onlyDistinct=false) const;
void OutputDetailedTranslationReport(OutputCollector *collector) const;
diff --git a/moses/Syntax/InputWeightFF.cpp b/moses/Syntax/InputWeightFF.cpp
index af44e31ec..8bb88c6ac 100644
--- a/moses/Syntax/InputWeightFF.cpp
+++ b/moses/Syntax/InputWeightFF.cpp
@@ -11,34 +11,34 @@ namespace Syntax
{
InputWeightFF::InputWeightFF(const std::string &line)
- : StatelessFeatureFunction(1, line)
+ : StatelessFeatureFunction(1, line)
{
ReadParameters();
}
void InputWeightFF::EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const
+ ScoreComponentCollection* accumulator) const
{
// TODO Throw exception.
assert(false);
}
void InputWeightFF::EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const
+ ScoreComponentCollection* accumulator) const
{
// TODO Throw exception.
assert(false);
}
void InputWeightFF::EvaluateWhenApplied(
- const Syntax::SHyperedge &hyperedge,
- ScoreComponentCollection* accumulator) const
+ const Syntax::SHyperedge &hyperedge,
+ ScoreComponentCollection* accumulator) const
{
accumulator->PlusEquals(this, hyperedge.label.inputWeight);
}
void InputWeightFF::SetParameter(const std::string& key,
- const std::string& value)
+ const std::string& value)
{
StatelessFeatureFunction::SetParameter(key, value);
}
diff --git a/moses/Syntax/InputWeightFF.h b/moses/Syntax/InputWeightFF.h
index bdda1d922..127834e64 100644
--- a/moses/Syntax/InputWeightFF.h
+++ b/moses/Syntax/InputWeightFF.h
@@ -42,7 +42,7 @@ public:
ScoreComponentCollection *) const {}
void EvaluateTranslationOptionListWithSourceContext(
- const InputType &, const TranslationOptionList &) const {}
+ const InputType &, const TranslationOptionList &) const {}
};
} // Syntax
diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp
index 5a26e44cc..4976375e9 100644
--- a/moses/TargetPhrase.cpp
+++ b/moses/TargetPhrase.cpp
@@ -225,21 +225,19 @@ void TargetPhrase::SetSparseScore(const FeatureFunction* translationScoreProduce
boost::shared_ptr<Scores>
mergescores(boost::shared_ptr<Scores> const& a,
- boost::shared_ptr<Scores> const& b)
+ boost::shared_ptr<Scores> const& b)
{
boost::shared_ptr<Scores> ret;
if (!a) return b ? b : ret;
if (!b) return a;
if (a->size() != b->size()) return ret;
ret.reset(new Scores(*a));
- for (size_t i = 0; i < a->size(); ++i)
- {
- if ((*a)[i] == 0) (*a)[i] = (*b)[i];
- else if ((*b)[i])
- {
- UTIL_THROW_IF2((*a)[i] != (*b)[i], "can't merge feature vectors");
- }
+ for (size_t i = 0; i < a->size(); ++i) {
+ if ((*a)[i] == 0) (*a)[i] = (*b)[i];
+ else if ((*b)[i]) {
+ UTIL_THROW_IF2((*a)[i] != (*b)[i], "can't merge feature vectors");
}
+ }
return ret;
}
@@ -253,12 +251,11 @@ Merge(const TargetPhrase &copy, const std::vector<FactorType>& factorVec)
m_fullScore += copy.m_fullScore;
typedef ScoreCache_t::iterator iter;
typedef ScoreCache_t::value_type item;
- BOOST_FOREACH(item const& s, copy.m_cached_scores)
- {
- pair<iter,bool> foo = m_cached_scores.insert(s);
- if (foo.second == false)
- foo.first->second = mergescores(foo.first->second, s.second);
- }
+ BOOST_FOREACH(item const& s, copy.m_cached_scores) {
+ pair<iter,bool> foo = m_cached_scores.insert(s);
+ if (foo.second == false)
+ foo.first->second = mergescores(foo.first->second, s.second);
+ }
}
TargetPhrase::ScoreCache_t const&
@@ -279,8 +276,10 @@ GetExtraScores(FeatureFunction const* ff) const
void
TargetPhrase::
SetExtraScores(FeatureFunction const* ff,
- boost::shared_ptr<Scores> const& s)
-{ m_cached_scores[ff] = s; }
+ boost::shared_ptr<Scores> const& s)
+{
+ m_cached_scores[ff] = s;
+}
void TargetPhrase::SetProperties(const StringPiece &str)
diff --git a/moses/TargetPhrase.h b/moses/TargetPhrase.h
index 1f5960121..35b06c1c7 100644
--- a/moses/TargetPhrase.h
+++ b/moses/TargetPhrase.h
@@ -51,15 +51,15 @@ class PhraseDictionary;
*/
class TargetPhrase: public Phrase
{
- public:
+public:
typedef std::map<FeatureFunction const*, boost::shared_ptr<Scores> >
- ScoreCache_t;
+ ScoreCache_t;
ScoreCache_t const& GetExtraScores() const;
Scores const* GetExtraScores(FeatureFunction const* ff) const;
void SetExtraScores(FeatureFunction const* ff,
- boost::shared_ptr<Scores> const& scores);
+ boost::shared_ptr<Scores> const& scores);
- private:
+private:
ScoreCache_t m_cached_scores;
private:
diff --git a/moses/TrainingTask.h b/moses/TrainingTask.h
index 6166b4d42..4d2152920 100644
--- a/moses/TrainingTask.h
+++ b/moses/TrainingTask.h
@@ -18,7 +18,7 @@ class TrainingTask : public Moses::TranslationTask
protected:
TrainingTask(boost::shared_ptr<Moses::InputType> const source,
- boost::shared_ptr<Moses::IOWrapper> const ioWrapper)
+ boost::shared_ptr<Moses::IOWrapper> const ioWrapper)
: TranslationTask(source, ioWrapper)
{ }
@@ -26,8 +26,7 @@ public:
// factory function
static boost::shared_ptr<TrainingTask>
- create(boost::shared_ptr<InputType> const& source)
- {
+ create(boost::shared_ptr<InputType> const& source) {
boost::shared_ptr<IOWrapper> nix;
boost::shared_ptr<TrainingTask> ret(new TrainingTask(source, nix));
ret->m_self = ret;
@@ -37,8 +36,7 @@ public:
// factory function
static boost::shared_ptr<TrainingTask>
create(boost::shared_ptr<InputType> const& source,
- boost::shared_ptr<IOWrapper> const& ioWrapper)
- {
+ boost::shared_ptr<IOWrapper> const& ioWrapper) {
boost::shared_ptr<TrainingTask> ret(new TrainingTask(source, ioWrapper));
ret->m_self = ret;
return ret;
@@ -53,7 +51,7 @@ public:
std::cerr << *m_source << std::endl;
TranslationOptionCollection *transOptColl
- = m_source->CreateTranslationOptionCollection(this->self());
+ = m_source->CreateTranslationOptionCollection(this->self());
transOptColl->CreateTranslationOptions();
delete transOptColl;
diff --git a/moses/TranslationModel/CompactPT/BlockHashIndex.h b/moses/TranslationModel/CompactPT/BlockHashIndex.h
index 130dd89fc..0f20fa1b2 100644
--- a/moses/TranslationModel/CompactPT/BlockHashIndex.h
+++ b/moses/TranslationModel/CompactPT/BlockHashIndex.h
@@ -163,7 +163,7 @@ public:
#ifdef WITH_THREADS
boost::shared_ptr<HashTask<Keys> >
- ht(new HashTask<Keys>(current, *this, keys));
+ ht(new HashTask<Keys>(current, *this, keys));
m_threadPool.Submit(ht);
#else
CalcHash(current, keys);
diff --git a/moses/TranslationModel/CompactPT/MmapAllocator.h b/moses/TranslationModel/CompactPT/MmapAllocator.h
index 389b60359..5c1d5b58e 100644
--- a/moses/TranslationModel/CompactPT/MmapAllocator.h
+++ b/moses/TranslationModel/CompactPT/MmapAllocator.h
@@ -133,7 +133,7 @@ public:
size_t read = 0;
read += ftruncate(m_file_desc, m_map_size);
m_data_ptr = (char *)util::MapOrThrow(
- m_map_size, true, map_shared, false, m_file_desc, 0);
+ m_map_size, true, map_shared, false, m_file_desc, 0);
return (pointer)m_data_ptr;
} else {
size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
@@ -142,7 +142,7 @@ public:
size_t map_size = m_map_size + relative_offset;
m_data_ptr = (char *)util::MapOrThrow(
- m_map_size, false, map_shared, false, m_file_desc, map_offset);
+ m_map_size, false, map_shared, false, m_file_desc, map_offset);
return (pointer)(m_data_ptr + relative_offset);
}
diff --git a/moses/TranslationModel/PhraseDictionary.h b/moses/TranslationModel/PhraseDictionary.h
index 2c1f1f39e..6c30d5acd 100644
--- a/moses/TranslationModel/PhraseDictionary.h
+++ b/moses/TranslationModel/PhraseDictionary.h
@@ -117,8 +117,7 @@ public:
virtual
TargetPhraseCollection const *
- GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src)
- {
+ GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) {
return GetTargetPhraseCollectionLEGACY(src);
}
@@ -129,8 +128,7 @@ public:
virtual
void
GetTargetPhraseCollectionBatch(ttasksptr const& ttask,
- const InputPathList &inputPathQueue) const
- {
+ const InputPathList &inputPathQueue) const {
GetTargetPhraseCollectionBatch(inputPathQueue);
}
diff --git a/moses/TranslationOption.cpp b/moses/TranslationOption.cpp
index 52bf49fb2..484692ad9 100644
--- a/moses/TranslationOption.cpp
+++ b/moses/TranslationOption.cpp
@@ -109,7 +109,7 @@ ostream& operator<<(ostream& out, const TranslationOption& possibleTranslation)
return out;
}
- /** returns cached scores */
+/** returns cached scores */
const Scores*
TranslationOption::
GetLexReorderingScores(LexicalReordering const* scoreProducer) const
diff --git a/moses/TranslationOption.h b/moses/TranslationOption.h
index 4bf545f7d..87a3c9c5d 100644
--- a/moses/TranslationOption.h
+++ b/moses/TranslationOption.h
@@ -164,7 +164,7 @@ public:
// }
void CacheLexReorderingScores(const LexicalReordering &scoreProducer,
- const Scores &score);
+ const Scores &score);
TO_STRING();
diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp
index 1e3ef9045..07544b88d 100644
--- a/moses/TranslationOptionCollection.cpp
+++ b/moses/TranslationOptionCollection.cpp
@@ -57,7 +57,7 @@ namespace Moses
* called by inherited classe */
TranslationOptionCollection::
TranslationOptionCollection(ttasksptr const& ttask,
- InputType const& src,
+ InputType const& src,
size_t maxNoTransOptPerCoverage,
float translationOptionThreshold)
: m_ttask(ttask)
@@ -626,14 +626,13 @@ CacheLexReordering()
{
size_t const stop = m_source.GetSize();
typedef StatefulFeatureFunction sfFF;
- BOOST_FOREACH(sfFF const* ff, sfFF::GetStatefulFeatureFunctions())
- {
- if (typeid(*ff) != typeid(LexicalReordering)) continue;
- LexicalReordering const& lr = static_cast<const LexicalReordering&>(*ff);
- for (size_t s = 0 ; s < stop ; s++)
- BOOST_FOREACH(TranslationOptionList& tol, m_collection[s])
- lr.SetCache(tol);
- }
+ BOOST_FOREACH(sfFF const* ff, sfFF::GetStatefulFeatureFunctions()) {
+ if (typeid(*ff) != typeid(LexicalReordering)) continue;
+ LexicalReordering const& lr = static_cast<const LexicalReordering&>(*ff);
+ for (size_t s = 0 ; s < stop ; s++)
+ BOOST_FOREACH(TranslationOptionList& tol, m_collection[s])
+ lr.SetCache(tol);
+ }
}
//! list of trans opt for a particular span
diff --git a/moses/TranslationOptionCollection.h b/moses/TranslationOptionCollection.h
index 4c0a6bdc6..2712441ed 100644
--- a/moses/TranslationOptionCollection.h
+++ b/moses/TranslationOptionCollection.h
@@ -75,7 +75,7 @@ protected:
InputPathList m_inputPathQueue;
TranslationOptionCollection(ttasksptr const& ttask,
- InputType const& src, size_t maxNoTransOptPerCoverage,
+ InputType const& src, size_t maxNoTransOptPerCoverage,
float translationOptionThreshold);
void CalcFutureScore();
@@ -177,8 +177,7 @@ public:
return m_inputPathQueue;
}
- ttasksptr GetTranslationTask() const
- {
+ ttasksptr GetTranslationTask() const {
return m_ttask.lock();
}
TO_STRING();
diff --git a/moses/TranslationOptionCollectionConfusionNet.cpp b/moses/TranslationOptionCollectionConfusionNet.cpp
index 387821102..6ee83d969 100644
--- a/moses/TranslationOptionCollectionConfusionNet.cpp
+++ b/moses/TranslationOptionCollectionConfusionNet.cpp
@@ -21,7 +21,7 @@ namespace Moses
/** constructor; just initialize the base class */
TranslationOptionCollectionConfusionNet::
TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
- const ConfusionNet &input,
+ const ConfusionNet &input,
size_t maxNoTransOptPerCoverage,
float translationOptionThreshold)
: TranslationOptionCollection(ttask,input, maxNoTransOptPerCoverage,
diff --git a/moses/TranslationOptionCollectionLattice.cpp b/moses/TranslationOptionCollectionLattice.cpp
index e2d9e996a..fde40e538 100644
--- a/moses/TranslationOptionCollectionLattice.cpp
+++ b/moses/TranslationOptionCollectionLattice.cpp
@@ -23,7 +23,7 @@ TranslationOptionCollectionLattice
( ttasksptr const& ttask, const WordLattice &input,
size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
: TranslationOptionCollection(ttask, input, maxNoTransOptPerCoverage,
- translationOptionThreshold)
+ translationOptionThreshold)
{
UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(),
"Not for models using the legqacy binary phrase table");
diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp
index 764ca998a..3794d35e7 100644
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@@ -53,7 +53,7 @@ TranslationTask
boost::shared_ptr<TranslationTask>
TranslationTask
::create(boost::shared_ptr<InputType> const& source,
- boost::shared_ptr<IOWrapper> const& ioWrapper)
+ boost::shared_ptr<IOWrapper> const& ioWrapper)
{
boost::shared_ptr<TranslationTask> ret(new TranslationTask(source, ioWrapper));
ret->m_self = ret;
@@ -63,7 +63,7 @@ TranslationTask
TranslationTask
::TranslationTask(boost::shared_ptr<InputType> const& source,
- boost::shared_ptr<IOWrapper> const& ioWrapper)
+ boost::shared_ptr<IOWrapper> const& ioWrapper)
: m_source(source) , m_ioWrapper(ioWrapper)
{ }
@@ -82,37 +82,33 @@ TranslationTask
if (!staticData.IsSyntax(algo))
manager.reset(new Manager(this->self())); // phrase-based
- else if (algo == SyntaxF2S || algo == SyntaxT2S)
- { // STSG-based tree-to-string / forest-to-string decoding (ask Phil Williams)
- typedef Syntax::F2S::RuleMatcherCallback Callback;
- typedef Syntax::F2S::RuleMatcherHyperTree<Callback> RuleMatcher;
- manager.reset(new Syntax::F2S::Manager<RuleMatcher>(this->self()));
- }
-
- else if (algo == SyntaxS2T)
- { // new-style string-to-tree decoding (ask Phil Williams)
- S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
- if (algorithm == RecursiveCYKPlus)
- {
- typedef Syntax::S2T::EagerParserCallback Callback;
- typedef Syntax::S2T::RecursiveCYKPlusParser<Callback> Parser;
- manager.reset(new Syntax::S2T::Manager<Parser>(this->self()));
- }
- else if (algorithm == Scope3)
- {
- typedef Syntax::S2T::StandardParserCallback Callback;
- typedef Syntax::S2T::Scope3Parser<Callback> Parser;
- manager.reset(new Syntax::S2T::Manager<Parser>(this->self()));
- }
- else UTIL_THROW2("ERROR: unhandled S2T parsing algorithm");
- }
-
- else if (algo == SyntaxT2S_SCFG)
- { // SCFG-based tree-to-string decoding (ask Phil Williams)
- typedef Syntax::F2S::RuleMatcherCallback Callback;
- typedef Syntax::T2S::RuleMatcherSCFG<Callback> RuleMatcher;
- manager.reset(new Syntax::T2S::Manager<RuleMatcher>(this->self()));
- }
+ else if (algo == SyntaxF2S || algo == SyntaxT2S) {
+ // STSG-based tree-to-string / forest-to-string decoding (ask Phil Williams)
+ typedef Syntax::F2S::RuleMatcherCallback Callback;
+ typedef Syntax::F2S::RuleMatcherHyperTree<Callback> RuleMatcher;
+ manager.reset(new Syntax::F2S::Manager<RuleMatcher>(this->self()));
+ }
+
+ else if (algo == SyntaxS2T) {
+ // new-style string-to-tree decoding (ask Phil Williams)
+ S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
+ if (algorithm == RecursiveCYKPlus) {
+ typedef Syntax::S2T::EagerParserCallback Callback;
+ typedef Syntax::S2T::RecursiveCYKPlusParser<Callback> Parser;
+ manager.reset(new Syntax::S2T::Manager<Parser>(this->self()));
+ } else if (algorithm == Scope3) {
+ typedef Syntax::S2T::StandardParserCallback Callback;
+ typedef Syntax::S2T::Scope3Parser<Callback> Parser;
+ manager.reset(new Syntax::S2T::Manager<Parser>(this->self()));
+ } else UTIL_THROW2("ERROR: unhandled S2T parsing algorithm");
+ }
+
+ else if (algo == SyntaxT2S_SCFG) {
+ // SCFG-based tree-to-string decoding (ask Phil Williams)
+ typedef Syntax::F2S::RuleMatcherCallback Callback;
+ typedef Syntax::T2S::RuleMatcherSCFG<Callback> RuleMatcher;
+ manager.reset(new Syntax::T2S::Manager<RuleMatcher>(this->self()));
+ }
else if (algo == ChartIncremental) // Ken's incremental decoding
manager.reset(new Incremental::Manager(this->self()));
@@ -126,8 +122,8 @@ TranslationTask
void TranslationTask::Run()
{
UTIL_THROW_IF2(!m_source || !m_ioWrapper,
- "Base Instances of TranslationTask must be initialized with"
- << " input and iowrapper.");
+ "Base Instances of TranslationTask must be initialized with"
+ << " input and iowrapper.");
// shorthand for "global data"
@@ -152,7 +148,7 @@ void TranslationTask::Run()
boost::shared_ptr<BaseManager> manager = SetupManager();
VERBOSE(1, "Line " << translationId << ": Initialize search took "
- << initTime << " seconds total" << endl);
+ << initTime << " seconds total" << endl);
manager->Decode();
@@ -209,9 +205,9 @@ void TranslationTask::Run()
// report additional statistics
manager->CalcDecoderStatistics();
VERBOSE(1, "Line " << translationId << ": Additional reporting took "
- << additionalReportingTime << " seconds total" << endl);
+ << additionalReportingTime << " seconds total" << endl);
VERBOSE(1, "Line " << translationId << ": Translation took "
- << translationTime << " seconds total" << endl);
+ << translationTime << " seconds total" << endl);
IFVERBOSE(2) {
PrintUserTime("Sentence Decoding Time:");
}
diff --git a/moses/TranslationTask.h b/moses/TranslationTask.h
index df1cf9f48..2b75c47d5 100644
--- a/moses/TranslationTask.h
+++ b/moses/TranslationTask.h
@@ -40,7 +40,9 @@ class TranslationTask : public Moses::Task
TranslationTask(TranslationTask const& other) { }
TranslationTask const&
- operator=(TranslationTask const& other) { return *this; }
+ operator=(TranslationTask const& other) {
+ return *this;
+ }
protected:
boost::weak_ptr<TranslationTask> m_self; // weak ptr to myself
@@ -48,7 +50,7 @@ protected:
// pointer to ContextScope, which stores context-specific information
TranslationTask() { } ;
TranslationTask(boost::shared_ptr<Moses::InputType> const& source,
- boost::shared_ptr<Moses::IOWrapper> const& ioWrapper);
+ boost::shared_ptr<Moses::IOWrapper> const& ioWrapper);
// Yes, the constructor is protected.
//
// TranslationTasks can only be created through the creator
@@ -68,11 +70,15 @@ protected:
public:
boost::shared_ptr<TranslationTask>
- self() { return m_self.lock(); }
+ self() {
+ return m_self.lock();
+ }
virtual
boost::shared_ptr<TranslationTask const>
- self() const { return m_self.lock(); }
+ self() const {
+ return m_self.lock();
+ }
// creator functions
static boost::shared_ptr<TranslationTask> create();
@@ -84,7 +90,7 @@ public:
static
boost::shared_ptr<TranslationTask>
create(boost::shared_ptr<Moses::InputType> const& source,
- boost::shared_ptr<Moses::IOWrapper> const& ioWrapper);
+ boost::shared_ptr<Moses::IOWrapper> const& ioWrapper);
~TranslationTask();
/** Translate one sentence
@@ -92,15 +98,16 @@ public:
virtual void Run();
boost::shared_ptr<Moses::InputType>
- GetSource() const { return m_source; }
+ GetSource() const {
+ return m_source;
+ }
boost::shared_ptr<BaseManager>
SetupManager(SearchAlgorithm algo = DefaultSearchAlgorithm);
boost::shared_ptr<ContextScope> const&
- GetScope() const
- {
+ GetScope() const {
UTIL_THROW_IF2(m_scope == NULL, "No context scope!");
return m_scope;
}
diff --git a/moses/TreeInput.h b/moses/TreeInput.h
index 7b76ce303..fc7387b0d 100644
--- a/moses/TreeInput.h
+++ b/moses/TreeInput.h
@@ -8,7 +8,7 @@
namespace Moses
{
- class TranslationTask;
+class TranslationTask;
//! @todo what is this?
class XMLParseOutput
{
diff --git a/moses/TypeDef.h b/moses/TypeDef.h
index 66536909f..366a9dc77 100644
--- a/moses/TypeDef.h
+++ b/moses/TypeDef.h
@@ -89,18 +89,18 @@ enum FactorDirection {
};
enum DecodeType {
- Translate
- ,Generate
+ Translate,
+ Generate
};
namespace LexReorderType
{
enum LexReorderType { // explain values
- Backward
- ,Forward
- ,Bidirectional
- ,Fe
- ,F
+ Backward,
+ Forward,
+ Bidirectional,
+ Fe,
+ F
};
}
@@ -113,13 +113,13 @@ enum DistortionOrientationOptions {
}
enum InputTypeEnum {
- SentenceInput = 0
- ,ConfusionNetworkInput = 1
- ,WordLatticeInput = 2
- ,TreeInputType = 3
- //,WordLatticeInput2 = 4
- , TabbedSentenceInput = 5
- ,ForestInputType = 6
+ SentenceInput = 0,
+ ConfusionNetworkInput = 1,
+ WordLatticeInput = 2,
+ TreeInputType = 3,
+ //,WordLatticeInput2 = 4,
+ TabbedSentenceInput = 5,
+ ForestInputType = 6
};
enum XmlInputType {
@@ -131,41 +131,41 @@ enum XmlInputType {
};
enum DictionaryFind {
- Best = 0
- ,All = 1
+ Best = 0,
+ All = 1
};
// Note: StaticData uses SearchAlgorithm to determine whether the translation
// model is phrase-based or syntax-based. If you add a syntax-based search
// algorithm here then you should also update StaticData::IsSyntax().
enum SearchAlgorithm {
- Normal = 0
- ,CubePruning = 1
+ Normal = 0,
+ CubePruning = 1,
//,CubeGrowing = 2
- ,CYKPlus = 3
- ,NormalBatch = 4
- ,ChartIncremental = 5
- ,SyntaxS2T = 6
- ,SyntaxT2S = 7
- ,SyntaxT2S_SCFG = 8
- ,SyntaxF2S = 9
- ,DefaultSearchAlgorithm = 777 // means: use StaticData.m_searchAlgorithm
+ CYKPlus = 3,
+ NormalBatch = 4,
+ ChartIncremental = 5,
+ SyntaxS2T = 6,
+ SyntaxT2S = 7,
+ SyntaxT2S_SCFG = 8,
+ SyntaxF2S = 9,
+ DefaultSearchAlgorithm = 777 // means: use StaticData.m_searchAlgorithm
};
enum SourceLabelOverlap {
- SourceLabelOverlapAdd = 0
- ,SourceLabelOverlapReplace = 1
- ,SourceLabelOverlapDiscard = 2
+ SourceLabelOverlapAdd = 0,
+ SourceLabelOverlapReplace = 1,
+ SourceLabelOverlapDiscard = 2
};
enum WordAlignmentSort {
- NoSort = 0
- ,TargetOrder = 1
+ NoSort = 0,
+ TargetOrder = 1
};
enum FormatType {
- MosesFormat
- ,HieroFormat
+ MosesFormat,
+ HieroFormat
};
enum S2TParsingAlgorithm {
diff --git a/moses/Util.cpp b/moses/Util.cpp
index a8175b58d..fd0538efd 100644
--- a/moses/Util.cpp
+++ b/moses/Util.cpp
@@ -90,13 +90,6 @@ bool FileExists(const std::string& filePath)
return !ifs.fail();
}
-const std::string Trim(const std::string& str, const std::string dropChars)
-{
- std::string res = str;
- res.erase(str.find_last_not_of(dropChars)+1);
- return res.erase(0, res.find_first_not_of(dropChars));
-}
-
void ResetUserTime()
{
g_timer.start();
diff --git a/moses/Util.h b/moses/Util.h
index 5c9b493f2..43443eb02 100644
--- a/moses/Util.h
+++ b/moses/Util.h
@@ -19,8 +19,7 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
-#ifndef moses_Util_h
-#define moses_Util_h
+#pragma once
#include <iostream>
#include <fstream>
@@ -89,10 +88,17 @@ namespace Moses
#define NTH_ELEMENT4(begin, middle, end, orderer) std::nth_element(begin, middle, end, orderer)
#endif
-//! delete white spaces at beginning and end of string
-const std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r");
+
const std::string ToLower(const std::string& str);
+//! delete white spaces at beginning and end of string
+inline std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r")
+{
+ std::string res = str;
+ res.erase(str.find_last_not_of(dropChars)+1);
+ return res.erase(0, res.find_first_not_of(dropChars));
+}
+
//! get string representation of any object/variable, as long as it can pipe to a stream
template<typename T>
inline std::string SPrint(const T &input)
@@ -421,7 +427,7 @@ inline float CalcTranslationScore(const std::vector<float> &probVector,
out << *this; \
return out.str(); \
} \
-
+
//! delete and remove every element of a collection object such as set, list etc
template<class COLL>
void RemoveAllInColl(COLL &coll)
@@ -533,4 +539,3 @@ void ShowWeights();
} // namespace
-#endif
diff --git a/moses/server/Optimizer.cpp b/moses/server/Optimizer.cpp
index d28d7f085..8e5babfc7 100644
--- a/moses/server/Optimizer.cpp
+++ b/moses/server/Optimizer.cpp
@@ -3,70 +3,67 @@
namespace MosesServer
{
- using namespace std;
+using namespace std;
- Optimizer::
- Optimizer()
- {
- // signature and help strings are documentation -- the client
- // can query this information with a system.methodSignature and
- // system.methodHelp RPC.
- this->_signature = "S:S";
- this->_help = "Optimizes multi-model translation model";
- }
+Optimizer::
+Optimizer()
+{
+ // signature and help strings are documentation -- the client
+ // can query this information with a system.methodSignature and
+ // system.methodHelp RPC.
+ this->_signature = "S:S";
+ this->_help = "Optimizes multi-model translation model";
+}
- void
- Optimizer::
- execute(xmlrpc_c::paramList const& paramList,
- xmlrpc_c::value * const retvalP)
- {
+void
+Optimizer::
+execute(xmlrpc_c::paramList const& paramList,
+ xmlrpc_c::value * const retvalP)
+{
#ifdef WITH_DLIB
- const params_t params = paramList.getStruct(0);
- params_t::const_iterator si;
- if ((si = params.find("model_name")) == params.end())
- {
- string msg = "Missing name of model to be optimized";
- msg += " (e.g. PhraseDictionaryMultiModelCounts0)";
- throw xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
- }
- const string model_name = xmlrpc_c::value_string(si->second);
+ const params_t params = paramList.getStruct(0);
+ params_t::const_iterator si;
+ if ((si = params.find("model_name")) == params.end()) {
+ string msg = "Missing name of model to be optimized";
+ msg += " (e.g. PhraseDictionaryMultiModelCounts0)";
+ throw xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
+ }
+ const string model_name = xmlrpc_c::value_string(si->second);
- if ((si = params.find("phrase_pairs")) == params.end())
- {
- throw xmlrpc_c::fault("Missing list of phrase pairs",
- xmlrpc_c::fault::CODE_PARSE);
- }
+ if ((si = params.find("phrase_pairs")) == params.end()) {
+ throw xmlrpc_c::fault("Missing list of phrase pairs",
+ xmlrpc_c::fault::CODE_PARSE);
+ }
- vector<pair<string, string> > phrase_pairs;
+ vector<pair<string, string> > phrase_pairs;
- xmlrpc_c::value_array pp_array = xmlrpc_c::value_array(si->second);
- vector<xmlrpc_c::value> ppValVec(pp_array.vectorValueValue());
- for (size_t i = 0; i < ppValVec.size(); ++i)
- {
- xmlrpc_c::value_array pp_array
- = xmlrpc_c::value_array(ppValVec[i]);
- vector<xmlrpc_c::value> pp(pp_array.vectorValueValue());
- string L1 = xmlrpc_c::value_string(pp[0]);
- string L2 = xmlrpc_c::value_string(pp[1]);
- phrase_pairs.push_back(make_pair(L1,L2));
- }
+ xmlrpc_c::value_array pp_array = xmlrpc_c::value_array(si->second);
+ vector<xmlrpc_c::value> ppValVec(pp_array.vectorValueValue());
+ for (size_t i = 0; i < ppValVec.size(); ++i) {
+ xmlrpc_c::value_array pp_array
+ = xmlrpc_c::value_array(ppValVec[i]);
+ vector<xmlrpc_c::value> pp(pp_array.vectorValueValue());
+ string L1 = xmlrpc_c::value_string(pp[0]);
+ string L2 = xmlrpc_c::value_string(pp[1]);
+ phrase_pairs.push_back(make_pair(L1,L2));
+ }
- // PhraseDictionaryMultiModel* pdmm
- // = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
- PhraseDictionaryMultiModel* pdmm = FindPhraseDictionary(model_name);
- vector<float> weight_vector = pdmm->MinimizePerplexity(phrase_pairs);
+ // PhraseDictionaryMultiModel* pdmm
+ // = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
+ PhraseDictionaryMultiModel* pdmm = FindPhraseDictionary(model_name);
+ vector<float> weight_vector = pdmm->MinimizePerplexity(phrase_pairs);
- vector<xmlrpc_c::value> weight_vector_ret;
- for (size_t i=0;i < weight_vector.size();i++)
- weight_vector_ret.push_back(xmlrpc_c::value_double(weight_vector[i]));
+ vector<xmlrpc_c::value> weight_vector_ret;
+ for (size_t i=0; i < weight_vector.size(); i++)
+ weight_vector_ret.push_back(xmlrpc_c::value_double(weight_vector[i]));
- *retvalP = xmlrpc_c::value_array(weight_vector_ret);
+ *retvalP = xmlrpc_c::value_array(weight_vector_ret);
#else
- string errmsg = "Error: Perplexity minimization requires dlib ";
- errmsg += "(compilation option --with-dlib)";
- std::cerr << errmsg << std::endl;
- *retvalP = xmlrpc_c::value_string(errmsg);
+ string errmsg = "Error: Perplexity minimization requires dlib ";
+ errmsg += "(compilation option --with-dlib)";
+ std::cerr << errmsg << std::endl;
+ *retvalP = xmlrpc_c::value_string(errmsg);
#endif
- }
+}
}
diff --git a/moses/server/Optimizer.h b/moses/server/Optimizer.h
index 8911b089f..da84df023 100644
--- a/moses/server/Optimizer.h
+++ b/moses/server/Optimizer.h
@@ -6,12 +6,12 @@
namespace MosesServer
{
- class
+class
Optimizer : public xmlrpc_c::method
- {
- public:
- Optimizer();
- void execute(xmlrpc_c::paramList const& paramList,
- xmlrpc_c::value * const retvalP);
- };
+{
+public:
+ Optimizer();
+ void execute(xmlrpc_c::paramList const& paramList,
+ xmlrpc_c::value * const retvalP);
+};
}
diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp
index 62e3031fa..5c87eb1a7 100644
--- a/moses/server/TranslationRequest.cpp
+++ b/moses/server/TranslationRequest.cpp
@@ -3,372 +3,363 @@
namespace MosesServer
{
- using namespace std;
- using Moses::Hypothesis;
- using Moses::StaticData;
- using Moses::WordsRange;
- using Moses::ChartHypothesis;
- using Moses::Phrase;
- using Moses::Manager;
- using Moses::SearchGraphNode;
- using Moses::TrellisPathList;
- using Moses::TranslationOptionCollection;
- using Moses::TranslationOptionList;
- using Moses::TranslationOption;
- using Moses::TargetPhrase;
- using Moses::FValue;
- using Moses::PhraseDictionaryMultiModel;
- using Moses::FindPhraseDictionary;
- using Moses::Sentence;
-
- boost::shared_ptr<TranslationRequest>
- TranslationRequest::
- create(xmlrpc_c::paramList const& paramList,
- boost::condition_variable& cond,
- boost::mutex& mut)
- {
- boost::shared_ptr<TranslationRequest> ret;
- ret.reset(new TranslationRequest(paramList,cond, mut));
- ret->m_self = ret;
- return ret;
- }
-
- void
- TranslationRequest::
- Run()
- {
- parse_request(m_paramList.getStruct(0));
-
- Moses::StaticData const& SD = Moses::StaticData::Instance();
+using namespace std;
+using Moses::Hypothesis;
+using Moses::StaticData;
+using Moses::WordsRange;
+using Moses::ChartHypothesis;
+using Moses::Phrase;
+using Moses::Manager;
+using Moses::SearchGraphNode;
+using Moses::TrellisPathList;
+using Moses::TranslationOptionCollection;
+using Moses::TranslationOptionList;
+using Moses::TranslationOption;
+using Moses::TargetPhrase;
+using Moses::FValue;
+using Moses::PhraseDictionaryMultiModel;
+using Moses::FindPhraseDictionary;
+using Moses::Sentence;
+
+boost::shared_ptr<TranslationRequest>
+TranslationRequest::
+create(xmlrpc_c::paramList const& paramList,
+ boost::condition_variable& cond,
+ boost::mutex& mut)
+{
+ boost::shared_ptr<TranslationRequest> ret;
+ ret.reset(new TranslationRequest(paramList,cond, mut));
+ ret->m_self = ret;
+ return ret;
+}
- //Make sure alternative paths are retained, if necessary
- if (m_withGraphInfo || m_nbestSize>0)
- // why on earth is this a global variable? Is this even thread-safe???? UG
- (const_cast<Moses::StaticData&>(SD)).SetOutputSearchGraph(true);
+void
+TranslationRequest::
+Run()
+{
+ parse_request(m_paramList.getStruct(0));
- std::stringstream out, graphInfo, transCollOpts;
+ Moses::StaticData const& SD = Moses::StaticData::Instance();
- if (SD.IsSyntax())
- run_chart_decoder();
- else
- run_phrase_decoder();
+ //Make sure alternative paths are retained, if necessary
+ if (m_withGraphInfo || m_nbestSize>0)
+ // why on earth is this a global variable? Is this even thread-safe???? UG
+ (const_cast<Moses::StaticData&>(SD)).SetOutputSearchGraph(true);
- XVERBOSE(1,"Output: " << out.str() << endl);
- {
- boost::lock_guard<boost::mutex> lock(m_mutex);
- m_done = true;
- }
- m_cond.notify_one();
+ std::stringstream out, graphInfo, transCollOpts;
- }
+ if (SD.IsSyntax())
+ run_chart_decoder();
+ else
+ run_phrase_decoder();
- /// add phrase alignment information from a Hypothesis
- void
- TranslationRequest::
- add_phrase_aln_info(Hypothesis const& h, vector<xmlrpc_c::value>& aInfo) const
+ XVERBOSE(1,"Output: " << out.str() << endl);
{
- if (!m_withAlignInfo) return;
- WordsRange const& trg = h.GetCurrTargetWordsRange();
- WordsRange const& src = h.GetCurrSourceWordsRange();
-
- std::map<std::string, xmlrpc_c::value> pAlnInfo;
- pAlnInfo["tgt-start"] = xmlrpc_c::value_int(trg.GetStartPos());
- pAlnInfo["src-start"] = xmlrpc_c::value_int(src.GetStartPos());
- pAlnInfo["src-end"] = xmlrpc_c::value_int(src.GetEndPos());
- aInfo.push_back(xmlrpc_c::value_struct(pAlnInfo));
+ boost::lock_guard<boost::mutex> lock(m_mutex);
+ m_done = true;
}
+ m_cond.notify_one();
- void
- TranslationRequest::
- outputChartHypo(ostream& out, const ChartHypothesis* hypo)
- {
- Phrase outPhrase(20);
- hypo->GetOutputPhrase(outPhrase);
-
- // delete 1st & last
- assert(outPhrase.GetSize() >= 2);
- outPhrase.RemoveWord(0);
- outPhrase.RemoveWord(outPhrase.GetSize() - 1);
- for (size_t pos = 0 ; pos < outPhrase.GetSize() ; pos++)
- out << *outPhrase.GetFactor(pos, 0) << " ";
- }
+}
- bool
- TranslationRequest::
- compareSearchGraphNode(const Moses::SearchGraphNode& a,
- const Moses::SearchGraphNode& b)
- { return a.hypo->GetId() < b.hypo->GetId(); }
+/// add phrase alignment information from a Hypothesis
+void
+TranslationRequest::
+add_phrase_aln_info(Hypothesis const& h, vector<xmlrpc_c::value>& aInfo) const
+{
+ if (!m_withAlignInfo) return;
+ WordsRange const& trg = h.GetCurrTargetWordsRange();
+ WordsRange const& src = h.GetCurrSourceWordsRange();
+
+ std::map<std::string, xmlrpc_c::value> pAlnInfo;
+ pAlnInfo["tgt-start"] = xmlrpc_c::value_int(trg.GetStartPos());
+ pAlnInfo["src-start"] = xmlrpc_c::value_int(src.GetStartPos());
+ pAlnInfo["src-end"] = xmlrpc_c::value_int(src.GetEndPos());
+ aInfo.push_back(xmlrpc_c::value_struct(pAlnInfo));
+}
- void
- TranslationRequest::
- insertGraphInfo(Manager& manager, map<string, xmlrpc_c::value>& retData)
- {
- using xmlrpc_c::value_int;
- using xmlrpc_c::value_double;
- using xmlrpc_c::value_struct;
- using xmlrpc_c::value_string;
- vector<xmlrpc_c::value> searchGraphXml;
- vector<SearchGraphNode> searchGraph;
- manager.GetSearchGraph(searchGraph);
- std::sort(searchGraph.begin(), searchGraph.end());
- BOOST_FOREACH(Moses::SearchGraphNode const& n, searchGraph)
- {
- map<string, xmlrpc_c::value> x; // search graph xml node
- x["forward"] = value_double(n.forward);
- x["fscore"] = value_double(n.fscore);
- const Hypothesis* hypo = n.hypo;
- x["hyp"] = value_int(hypo->GetId());
- x["stack"] = value_int(hypo->GetWordsBitmap().GetNumWordsCovered());
- if (hypo->GetId() != 0)
- {
- const Hypothesis *prevHypo = hypo->GetPrevHypo();
- x["back"] = value_int(prevHypo->GetId());
- x["score"] = value_double(hypo->GetScore());
- x["transition"] = value_double(hypo->GetScore() - prevHypo->GetScore());
- if (n.recombinationHypo)
- x["recombined"] = value_int(n.recombinationHypo->GetId());
- x["cover-start"] = value_int(hypo->GetCurrSourceWordsRange().GetStartPos());
- x["cover-end"] = value_int(hypo->GetCurrSourceWordsRange().GetEndPos());
- x["out"] = value_string(hypo->GetCurrTargetPhrase().GetStringRep(StaticData::Instance().GetOutputFactorOrder()));
- }
- searchGraphXml.push_back(value_struct(x));
- }
- retData["sg"] = xmlrpc_c::value_array(searchGraphXml);
- }
+void
+TranslationRequest::
+outputChartHypo(ostream& out, const ChartHypothesis* hypo)
+{
+ Phrase outPhrase(20);
+ hypo->GetOutputPhrase(outPhrase);
+
+ // delete 1st & last
+ assert(outPhrase.GetSize() >= 2);
+ outPhrase.RemoveWord(0);
+ outPhrase.RemoveWord(outPhrase.GetSize() - 1);
+ for (size_t pos = 0 ; pos < outPhrase.GetSize() ; pos++)
+ out << *outPhrase.GetFactor(pos, 0) << " ";
+}
- void
- TranslationRequest::
- output_phrase(ostream& out, Phrase const& phrase) const
- {
- if (!m_reportAllFactors)
- {
- for (size_t i = 0 ; i < phrase.GetSize(); ++i)
- out << *phrase.GetFactor(i, 0) << " ";
- }
- else out << phrase;
+bool
+TranslationRequest::
+compareSearchGraphNode(const Moses::SearchGraphNode& a,
+ const Moses::SearchGraphNode& b)
+{
+ return a.hypo->GetId() < b.hypo->GetId();
+}
+
+void
+TranslationRequest::
+insertGraphInfo(Manager& manager, map<string, xmlrpc_c::value>& retData)
+{
+ using xmlrpc_c::value_int;
+ using xmlrpc_c::value_double;
+ using xmlrpc_c::value_struct;
+ using xmlrpc_c::value_string;
+ vector<xmlrpc_c::value> searchGraphXml;
+ vector<SearchGraphNode> searchGraph;
+ manager.GetSearchGraph(searchGraph);
+ std::sort(searchGraph.begin(), searchGraph.end());
+ BOOST_FOREACH(Moses::SearchGraphNode const& n, searchGraph) {
+ map<string, xmlrpc_c::value> x; // search graph xml node
+ x["forward"] = value_double(n.forward);
+ x["fscore"] = value_double(n.fscore);
+ const Hypothesis* hypo = n.hypo;
+ x["hyp"] = value_int(hypo->GetId());
+ x["stack"] = value_int(hypo->GetWordsBitmap().GetNumWordsCovered());
+ if (hypo->GetId() != 0) {
+ const Hypothesis *prevHypo = hypo->GetPrevHypo();
+ x["back"] = value_int(prevHypo->GetId());
+ x["score"] = value_double(hypo->GetScore());
+ x["transition"] = value_double(hypo->GetScore() - prevHypo->GetScore());
+ if (n.recombinationHypo)
+ x["recombined"] = value_int(n.recombinationHypo->GetId());
+ x["cover-start"] = value_int(hypo->GetCurrSourceWordsRange().GetStartPos());
+ x["cover-end"] = value_int(hypo->GetCurrSourceWordsRange().GetEndPos());
+ x["out"] = value_string(hypo->GetCurrTargetPhrase().GetStringRep(StaticData::Instance().GetOutputFactorOrder()));
+ }
+ searchGraphXml.push_back(value_struct(x));
}
+ retData["sg"] = xmlrpc_c::value_array(searchGraphXml);
+}
- void
- TranslationRequest::
- outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
- {
- TrellisPathList nBestList;
- vector<xmlrpc_c::value> nBestXml;
- manager.CalcNBest(m_nbestSize, nBestList, m_nbestDistinct);
-
- BOOST_FOREACH(Moses::TrellisPath const* path, nBestList)
- {
- vector<const Hypothesis *> const& E = path->GetEdges();
- if (!E.size()) continue;
- std::map<std::string, xmlrpc_c::value> nBestXmlItem;
- pack_hypothesis(E, "hyp", nBestXmlItem);
- if (m_withScoreBreakdown)
- {
- // should the score breakdown be reported in a more structured manner?
- ostringstream buf;
- path->GetScoreBreakdown()->OutputAllFeatureScores(buf);
- nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str());
- }
-
- // weighted score
- nBestXmlItem["totalScore"] = xmlrpc_c::value_double(path->GetTotalScore());
- nBestXml.push_back(xmlrpc_c::value_struct(nBestXmlItem));
- }
- retData["nbest"] = xmlrpc_c::value_array(nBestXml);
+void
+TranslationRequest::
+output_phrase(ostream& out, Phrase const& phrase) const
+{
+ if (!m_reportAllFactors) {
+ for (size_t i = 0 ; i < phrase.GetSize(); ++i)
+ out << *phrase.GetFactor(i, 0) << " ";
+ } else out << phrase;
+}
+
+void
+TranslationRequest::
+outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
+{
+ TrellisPathList nBestList;
+ vector<xmlrpc_c::value> nBestXml;
+ manager.CalcNBest(m_nbestSize, nBestList, m_nbestDistinct);
+
+ BOOST_FOREACH(Moses::TrellisPath const* path, nBestList) {
+ vector<const Hypothesis *> const& E = path->GetEdges();
+ if (!E.size()) continue;
+ std::map<std::string, xmlrpc_c::value> nBestXmlItem;
+ pack_hypothesis(E, "hyp", nBestXmlItem);
+ if (m_withScoreBreakdown) {
+ // should the score breakdown be reported in a more structured manner?
+ ostringstream buf;
+ path->GetScoreBreakdown()->OutputAllFeatureScores(buf);
+ nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str());
+ }
+
+ // weighted score
+ nBestXmlItem["totalScore"] = xmlrpc_c::value_double(path->GetTotalScore());
+ nBestXml.push_back(xmlrpc_c::value_struct(nBestXmlItem));
}
+ retData["nbest"] = xmlrpc_c::value_array(nBestXml);
+}
- void
- TranslationRequest::
- insertTranslationOptions(Moses::Manager& manager,
- std::map<std::string, xmlrpc_c::value>& retData)
- {
- const TranslationOptionCollection* toptsColl
- = manager.getSntTranslationOptions();
- vector<xmlrpc_c::value> toptsXml;
- size_t const stop = toptsColl->GetSource().GetSize();
- TranslationOptionList const* tol;
- for (size_t s = 0 ; s < stop ; ++s)
- {
- for (size_t e = s;
- (tol = toptsColl->GetTranslationOptionList(s,e)) != NULL;
- ++e)
- {
- BOOST_FOREACH(TranslationOption const* topt, *tol)
- {
- std::map<std::string, xmlrpc_c::value> toptXml;
- TargetPhrase const& tp = topt->GetTargetPhrase();
- StaticData const& GLOBAL = StaticData::Instance();
- std::string tphrase = tp.GetStringRep(GLOBAL.GetOutputFactorOrder());
- toptXml["phrase"] = xmlrpc_c::value_string(tphrase);
- toptXml["fscore"] = xmlrpc_c::value_double(topt->GetFutureScore());
- toptXml["start"] = xmlrpc_c::value_int(s);
- toptXml["end"] = xmlrpc_c::value_int(e);
- vector<xmlrpc_c::value> scoresXml;
- const std::valarray<FValue> &scores
- = topt->GetScoreBreakdown().getCoreFeatures();
- for (size_t j = 0; j < scores.size(); ++j)
- scoresXml.push_back(xmlrpc_c::value_double(scores[j]));
-
- toptXml["scores"] = xmlrpc_c::value_array(scoresXml);
- toptsXml.push_back(xmlrpc_c::value_struct(toptXml));
- }
- }
+void
+TranslationRequest::
+insertTranslationOptions(Moses::Manager& manager,
+ std::map<std::string, xmlrpc_c::value>& retData)
+{
+ const TranslationOptionCollection* toptsColl
+ = manager.getSntTranslationOptions();
+ vector<xmlrpc_c::value> toptsXml;
+ size_t const stop = toptsColl->GetSource().GetSize();
+ TranslationOptionList const* tol;
+ for (size_t s = 0 ; s < stop ; ++s) {
+ for (size_t e = s;
+ (tol = toptsColl->GetTranslationOptionList(s,e)) != NULL;
+ ++e) {
+ BOOST_FOREACH(TranslationOption const* topt, *tol) {
+ std::map<std::string, xmlrpc_c::value> toptXml;
+ TargetPhrase const& tp = topt->GetTargetPhrase();
+ StaticData const& GLOBAL = StaticData::Instance();
+ std::string tphrase = tp.GetStringRep(GLOBAL.GetOutputFactorOrder());
+ toptXml["phrase"] = xmlrpc_c::value_string(tphrase);
+ toptXml["fscore"] = xmlrpc_c::value_double(topt->GetFutureScore());
+ toptXml["start"] = xmlrpc_c::value_int(s);
+ toptXml["end"] = xmlrpc_c::value_int(e);
+ vector<xmlrpc_c::value> scoresXml;
+ const std::valarray<FValue> &scores
+ = topt->GetScoreBreakdown().getCoreFeatures();
+ for (size_t j = 0; j < scores.size(); ++j)
+ scoresXml.push_back(xmlrpc_c::value_double(scores[j]));
+
+ toptXml["scores"] = xmlrpc_c::value_array(scoresXml);
+ toptsXml.push_back(xmlrpc_c::value_struct(toptXml));
}
- retData["topt"] = xmlrpc_c::value_array(toptsXml);
+ }
}
+ retData["topt"] = xmlrpc_c::value_array(toptsXml);
+}
- bool
- check(std::map<std::string, xmlrpc_c::value> const& params, std::string const key)
- {
- std::map<std::string, xmlrpc_c::value>::const_iterator m;
- return (params.find(key) != params.end());
- }
+bool
+check(std::map<std::string, xmlrpc_c::value> const& params, std::string const key)
+{
+ std::map<std::string, xmlrpc_c::value>::const_iterator m;
+ return (params.find(key) != params.end());
+}
- TranslationRequest::
- TranslationRequest(xmlrpc_c::paramList const& paramList,
- boost::condition_variable& cond, boost::mutex& mut)
- : m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList)
- { }
-
- void
- TranslationRequest::
- parse_request(std::map<std::string, xmlrpc_c::value> const& params)
- { // parse XMLRPC request
- // params_t const params = m_paramList.getStruct(0);
- m_paramList.verifyEnd(1); // ??? UG
-
- // source text must be given, or we don't know what to translate
- typedef std::map<std::string, xmlrpc_c::value> params_t;
- params_t::const_iterator si = params.find("text");
- if (si == params.end())
- throw xmlrpc_c::fault("Missing source text", xmlrpc_c::fault::CODE_PARSE);
- m_source_string = xmlrpc_c::value_string(si->second);
- XVERBOSE(1,"Input: " << m_source_string << endl);
-
- m_withAlignInfo = check(params, "align");
- m_withWordAlignInfo = check(params, "word-align");
- m_withGraphInfo = check(params, "sg");
- m_withTopts = check(params, "topt");
- m_reportAllFactors = check(params, "report-all-factors");
- m_nbestDistinct = check(params, "nbest-distinct");
- m_withScoreBreakdown = check(params, "add-score-breakdown");
- m_source.reset(new Sentence(0,m_source_string));
- si = params.find("lambda");
- if (si != params.end())
- {
- // muMo = multiModel
- xmlrpc_c::value_array muMoArray = xmlrpc_c::value_array(si->second);
- vector<xmlrpc_c::value> muMoValVec(muMoArray.vectorValueValue());
- vector<float> w(muMoValVec.size());
- for (size_t i = 0; i < muMoValVec.size(); ++i)
- w[i] = xmlrpc_c::value_double(muMoValVec[i]);
- if (w.size() && (si = params.find("model_name")) != params.end())
- {
- string const model_name = xmlrpc_c::value_string(si->second);
- PhraseDictionaryMultiModel* pdmm
- = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
- // Moses::PhraseDictionaryMultiModel* pdmm
- // = FindPhraseDictionary(model_name);
- pdmm->SetTemporaryMultiModelWeightsVector(w);
- }
- }
+TranslationRequest::
+TranslationRequest(xmlrpc_c::paramList const& paramList,
+ boost::condition_variable& cond, boost::mutex& mut)
+ : m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList)
+{ }
- // // biased sampling for suffix-array-based sampling phrase table?
- // if ((si = params.find("bias")) != params.end())
- // {
- // std::vector<xmlrpc_c::value> tmp
- // = xmlrpc_c::value_array(si->second).cvalue();
- // for (size_t i = 1; i < tmp.size(); i += 2)
- // m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]);
- // }
- } // end of Translationtask::parse_request()
+void
+TranslationRequest::
+parse_request(std::map<std::string, xmlrpc_c::value> const& params)
+{
+ // parse XMLRPC request
+ // params_t const params = m_paramList.getStruct(0);
+ m_paramList.verifyEnd(1); // ??? UG
+
+ // source text must be given, or we don't know what to translate
+ typedef std::map<std::string, xmlrpc_c::value> params_t;
+ params_t::const_iterator si = params.find("text");
+ if (si == params.end())
+ throw xmlrpc_c::fault("Missing source text", xmlrpc_c::fault::CODE_PARSE);
+ m_source_string = xmlrpc_c::value_string(si->second);
+ XVERBOSE(1,"Input: " << m_source_string << endl);
+
+ m_withAlignInfo = check(params, "align");
+ m_withWordAlignInfo = check(params, "word-align");
+ m_withGraphInfo = check(params, "sg");
+ m_withTopts = check(params, "topt");
+ m_reportAllFactors = check(params, "report-all-factors");
+ m_nbestDistinct = check(params, "nbest-distinct");
+ m_withScoreBreakdown = check(params, "add-score-breakdown");
+ m_source.reset(new Sentence(0,m_source_string));
+ si = params.find("lambda");
+ if (si != params.end()) {
+ // muMo = multiModel
+ xmlrpc_c::value_array muMoArray = xmlrpc_c::value_array(si->second);
+ vector<xmlrpc_c::value> muMoValVec(muMoArray.vectorValueValue());
+ vector<float> w(muMoValVec.size());
+ for (size_t i = 0; i < muMoValVec.size(); ++i)
+ w[i] = xmlrpc_c::value_double(muMoValVec[i]);
+ if (w.size() && (si = params.find("model_name")) != params.end()) {
+ string const model_name = xmlrpc_c::value_string(si->second);
+ PhraseDictionaryMultiModel* pdmm
+ = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
+ // Moses::PhraseDictionaryMultiModel* pdmm
+ // = FindPhraseDictionary(model_name);
+ pdmm->SetTemporaryMultiModelWeightsVector(w);
+ }
+ }
+ // // biased sampling for suffix-array-based sampling phrase table?
+ // if ((si = params.find("bias")) != params.end())
+ // {
+ // std::vector<xmlrpc_c::value> tmp
+ // = xmlrpc_c::value_array(si->second).cvalue();
+ // for (size_t i = 1; i < tmp.size(); i += 2)
+ // m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]);
+ // }
+} // end of Translationtask::parse_request()
- void
- TranslationRequest::
- run_chart_decoder()
- {
- Moses::TreeInput tinput;
- istringstream buf(m_source_string + "\n");
- tinput.Read(buf, StaticData::Instance().GetInputFactorOrder());
- Moses::ChartManager manager(this->self());
- manager.Decode();
+void
+TranslationRequest::
+run_chart_decoder()
+{
+ Moses::TreeInput tinput;
+ istringstream buf(m_source_string + "\n");
+ tinput.Read(buf, StaticData::Instance().GetInputFactorOrder());
- const Moses::ChartHypothesis *hypo = manager.GetBestHypothesis();
- ostringstream out;
- outputChartHypo(out,hypo);
+ Moses::ChartManager manager(this->self());
+ manager.Decode();
- m_target_string = out.str();
- m_retData["text"] = xmlrpc_c::value_string(m_target_string);
+ const Moses::ChartHypothesis *hypo = manager.GetBestHypothesis();
+ ostringstream out;
+ outputChartHypo(out,hypo);
- if (m_withGraphInfo)
- {
- std::ostringstream sgstream;
- manager.OutputSearchGraphMoses(sgstream);
- m_retData["sg"] = xmlrpc_c::value_string(sgstream.str());
- }
- } // end of TranslationRequest::run_chart_decoder()
+ m_target_string = out.str();
+ m_retData["text"] = xmlrpc_c::value_string(m_target_string);
- void
- TranslationRequest::
- pack_hypothesis(vector<Hypothesis const* > const& edges, string const& key,
- map<string, xmlrpc_c::value> & dest) const
- {
- // target string
- ostringstream target;
- BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
- output_phrase(target, e->GetCurrTargetPhrase());
- dest[key] = xmlrpc_c::value_string(target.str());
+ if (m_withGraphInfo) {
+ std::ostringstream sgstream;
+ manager.OutputSearchGraphMoses(sgstream);
+ m_retData["sg"] = xmlrpc_c::value_string(sgstream.str());
+ }
+} // end of TranslationRequest::run_chart_decoder()
- if (m_withAlignInfo)
- { // phrase alignment, if requested
+void
+TranslationRequest::
+pack_hypothesis(vector<Hypothesis const* > const& edges, string const& key,
+ map<string, xmlrpc_c::value> & dest) const
+{
+ // target string
+ ostringstream target;
+ BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
+ output_phrase(target, e->GetCurrTargetPhrase());
+ dest[key] = xmlrpc_c::value_string(target.str());
- vector<xmlrpc_c::value> p_aln;
- BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
- add_phrase_aln_info(*e, p_aln);
- dest["align"] = xmlrpc_c::value_array(p_aln);
- }
+ if (m_withAlignInfo) {
+ // phrase alignment, if requested
- if (m_withWordAlignInfo)
- { // word alignment, if requested
- vector<xmlrpc_c::value> w_aln;
- BOOST_FOREACH(Hypothesis const* e, edges)
- e->OutputLocalWordAlignment(w_aln);
- dest["word-align"] = xmlrpc_c::value_array(w_aln);
- }
+ vector<xmlrpc_c::value> p_aln;
+ BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
+ add_phrase_aln_info(*e, p_aln);
+ dest["align"] = xmlrpc_c::value_array(p_aln);
}
- void
- TranslationRequest::
- pack_hypothesis(Hypothesis const* h, string const& key,
- map<string, xmlrpc_c::value>& dest) const
- {
- using namespace std;
- vector<Hypothesis const*> edges;
- for (;h; h = h->GetPrevHypo())
- edges.push_back(h);
- pack_hypothesis(edges, key, dest);
+ if (m_withWordAlignInfo) {
+ // word alignment, if requested
+ vector<xmlrpc_c::value> w_aln;
+ BOOST_FOREACH(Hypothesis const* e, edges)
+ e->OutputLocalWordAlignment(w_aln);
+ dest["word-align"] = xmlrpc_c::value_array(w_aln);
}
+}
+void
+TranslationRequest::
+pack_hypothesis(Hypothesis const* h, string const& key,
+ map<string, xmlrpc_c::value>& dest) const
+{
+ using namespace std;
+ vector<Hypothesis const*> edges;
+ for (; h; h = h->GetPrevHypo())
+ edges.push_back(h);
+ pack_hypothesis(edges, key, dest);
+}
- void
- TranslationRequest::
- run_phrase_decoder()
- {
- Manager manager(this->self());
- // if (m_bias.size()) manager.SetBias(&m_bias);
- manager.Decode();
- pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData);
+void
+TranslationRequest::
+run_phrase_decoder()
+{
+ Manager manager(this->self());
+ // if (m_bias.size()) manager.SetBias(&m_bias);
+ manager.Decode();
- if (m_withGraphInfo) insertGraphInfo(manager,m_retData);
- if (m_withTopts) insertTranslationOptions(manager,m_retData);
- if (m_nbestSize) outputNBest(manager, m_retData);
+ pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData);
- (const_cast<StaticData&>(Moses::StaticData::Instance()))
- .SetOutputSearchGraph(false);
- // WTF? one more reason not to have this as global variable! --- UG
+ if (m_withGraphInfo) insertGraphInfo(manager,m_retData);
+ if (m_withTopts) insertTranslationOptions(manager,m_retData);
+ if (m_nbestSize) outputNBest(manager, m_retData);
- }
+ (const_cast<StaticData&>(Moses::StaticData::Instance()))
+ .SetOutputSearchGraph(false);
+ // WTF? one more reason not to have this as global variable! --- UG
+
+}
}
diff --git a/moses/server/TranslationRequest.h b/moses/server/TranslationRequest.h
index 6c7cd7275..d67e55e03 100644
--- a/moses/server/TranslationRequest.h
+++ b/moses/server/TranslationRequest.h
@@ -23,100 +23,106 @@
#include <xmlrpc-c/base.hpp>
namespace MosesServer
{
- class
+class
TranslationRequest : public virtual Moses::TranslationTask
- {
- boost::condition_variable& m_cond;
- boost::mutex& m_mutex;
- bool m_done;
+{
+ boost::condition_variable& m_cond;
+ boost::mutex& m_mutex;
+ bool m_done;
- xmlrpc_c::paramList const& m_paramList;
- std::map<std::string, xmlrpc_c::value> m_retData;
- std::map<uint32_t,float> m_bias; // for biased sampling
+ xmlrpc_c::paramList const& m_paramList;
+ std::map<std::string, xmlrpc_c::value> m_retData;
+ std::map<uint32_t,float> m_bias; // for biased sampling
- std::string m_source_string, m_target_string;
- bool m_withAlignInfo;
- bool m_withWordAlignInfo;
- bool m_withGraphInfo;
- bool m_withTopts;
- bool m_reportAllFactors;
- bool m_nbestDistinct;
- bool m_withScoreBreakdown;
- size_t m_nbestSize;
+ std::string m_source_string, m_target_string;
+ bool m_withAlignInfo;
+ bool m_withWordAlignInfo;
+ bool m_withGraphInfo;
+ bool m_withTopts;
+ bool m_reportAllFactors;
+ bool m_nbestDistinct;
+ bool m_withScoreBreakdown;
+ size_t m_nbestSize;
- void
- parse_request();
+ void
+ parse_request();
- void
- parse_request(std::map<std::string, xmlrpc_c::value> const& req);
+ void
+ parse_request(std::map<std::string, xmlrpc_c::value> const& req);
- virtual void
- run_chart_decoder();
+ virtual void
+ run_chart_decoder();
- virtual void
- run_phrase_decoder();
+ virtual void
+ run_phrase_decoder();
- void
- pack_hypothesis(std::vector<Moses::Hypothesis const* > const& edges,
- std::string const& key,
- std::map<std::string, xmlrpc_c::value> & dest) const;
+ void
+ pack_hypothesis(std::vector<Moses::Hypothesis const* > const& edges,
+ std::string const& key,
+ std::map<std::string, xmlrpc_c::value> & dest) const;
- void
- pack_hypothesis(Moses::Hypothesis const* h, std::string const& key,
- std::map<std::string, xmlrpc_c::value> & dest) const;
+ void
+ pack_hypothesis(Moses::Hypothesis const* h, std::string const& key,
+ std::map<std::string, xmlrpc_c::value> & dest) const;
- void
- output_phrase(std::ostream& out, Moses::Phrase const& phrase) const;
+ void
+ output_phrase(std::ostream& out, Moses::Phrase const& phrase) const;
- void
- add_phrase_aln_info(Moses::Hypothesis const& h,
- std::vector<xmlrpc_c::value>& aInfo) const;
+ void
+ add_phrase_aln_info(Moses::Hypothesis const& h,
+ std::vector<xmlrpc_c::value>& aInfo) const;
- void
- outputChartHypo(std::ostream& out, const Moses::ChartHypothesis* hypo);
+ void
+ outputChartHypo(std::ostream& out, const Moses::ChartHypothesis* hypo);
- bool
- compareSearchGraphNode(const Moses::SearchGraphNode& a,
- const Moses::SearchGraphNode& b);
+ bool
+ compareSearchGraphNode(const Moses::SearchGraphNode& a,
+ const Moses::SearchGraphNode& b);
- void
- insertGraphInfo(Moses::Manager& manager,
- std::map<std::string, xmlrpc_c::value>& retData);
- void
- outputNBest(Moses::Manager const& manager,
- std::map<std::string, xmlrpc_c::value>& retData);
+ void
+ insertGraphInfo(Moses::Manager& manager,
+ std::map<std::string, xmlrpc_c::value>& retData);
+ void
+ outputNBest(Moses::Manager const& manager,
+ std::map<std::string, xmlrpc_c::value>& retData);
- void
- insertTranslationOptions(Moses::Manager& manager,
- std::map<std::string, xmlrpc_c::value>& retData);
- protected:
- TranslationRequest(xmlrpc_c::paramList const& paramList,
- boost::condition_variable& cond,
- boost::mutex& mut);
+ void
+ insertTranslationOptions(Moses::Manager& manager,
+ std::map<std::string, xmlrpc_c::value>& retData);
+protected:
+ TranslationRequest(xmlrpc_c::paramList const& paramList,
+ boost::condition_variable& cond,
+ boost::mutex& mut);
- public:
+public:
- static
- boost::shared_ptr<TranslationRequest>
- create(xmlrpc_c::paramList const& paramList,
- boost::condition_variable& cond,
- boost::mutex& mut);
+ static
+ boost::shared_ptr<TranslationRequest>
+ create(xmlrpc_c::paramList const& paramList,
+ boost::condition_variable& cond,
+ boost::mutex& mut);
- virtual bool
- DeleteAfterExecution() { return false; }
+ virtual bool
+ DeleteAfterExecution() {
+ return false;
+ }
- bool
- IsDone() const { return m_done; }
+ bool
+ IsDone() const {
+ return m_done;
+ }
- std::map<std::string, xmlrpc_c::value> const&
- GetRetData() { return m_retData; }
+ std::map<std::string, xmlrpc_c::value> const&
+ GetRetData() {
+ return m_retData;
+ }
- void
- Run();
+ void
+ Run();
- };
+};
}
diff --git a/moses/server/Translator.cpp b/moses/server/Translator.cpp
index d4cff99df..be8920abd 100644
--- a/moses/server/Translator.cpp
+++ b/moses/server/Translator.cpp
@@ -4,34 +4,34 @@
namespace MosesServer
{
- using namespace std;
- using namespace Moses;
+using namespace std;
+using namespace Moses;
- Translator::
- Translator(size_t numThreads)
- : m_threadPool(numThreads)
- {
- // signature and help strings are documentation -- the client
- // can query this information with a system.methodSignature and
- // system.methodHelp RPC.
- this->_signature = "S:S";
- this->_help = "Does translation";
- }
+Translator::
+Translator(size_t numThreads)
+ : m_threadPool(numThreads)
+{
+ // signature and help strings are documentation -- the client
+ // can query this information with a system.methodSignature and
+ // system.methodHelp RPC.
+ this->_signature = "S:S";
+ this->_help = "Does translation";
+}
- void
- Translator::
- execute(xmlrpc_c::paramList const& paramList,
- xmlrpc_c::value * const retvalP)
- {
- boost::condition_variable cond;
- boost::mutex mut;
- boost::shared_ptr<TranslationRequest> task
- = TranslationRequest::create(paramList,cond,mut);
- m_threadPool.Submit(task);
- boost::unique_lock<boost::mutex> lock(mut);
- while (!task->IsDone())
- cond.wait(lock);
- *retvalP = xmlrpc_c::value_struct(task->GetRetData());
- }
+void
+Translator::
+execute(xmlrpc_c::paramList const& paramList,
+ xmlrpc_c::value * const retvalP)
+{
+ boost::condition_variable cond;
+ boost::mutex mut;
+ boost::shared_ptr<TranslationRequest> task
+ = TranslationRequest::create(paramList,cond,mut);
+ m_threadPool.Submit(task);
+ boost::unique_lock<boost::mutex> lock(mut);
+ while (!task->IsDone())
+ cond.wait(lock);
+ *retvalP = xmlrpc_c::value_struct(task->GetRetData());
+}
}
diff --git a/moses/server/Translator.h b/moses/server/Translator.h
index e3117c290..4a6f889e8 100644
--- a/moses/server/Translator.h
+++ b/moses/server/Translator.h
@@ -10,17 +10,17 @@
#endif
namespace MosesServer
{
- class
+class
// MosesServer::
Translator : public xmlrpc_c::method
- {
- public:
- Translator(size_t numThreads = 10);
+{
+public:
+ Translator(size_t numThreads = 10);
- void execute(xmlrpc_c::paramList const& paramList,
- xmlrpc_c::value * const retvalP);
- private:
- Moses::ThreadPool m_threadPool;
- };
+ void execute(xmlrpc_c::paramList const& paramList,
+ xmlrpc_c::value * const retvalP);
+private:
+ Moses::ThreadPool m_threadPool;
+};
}
diff --git a/moses/server/Updater.cpp b/moses/server/Updater.cpp
index 818f374a5..bf129bf49 100644
--- a/moses/server/Updater.cpp
+++ b/moses/server/Updater.cpp
@@ -2,56 +2,56 @@
namespace MosesServer
{
- using namespace Moses;
- using namespace std;
+using namespace Moses;
+using namespace std;
- Updater::
- Updater()
- {
- // signature and help strings are documentation -- the client
- // can query this information with a system.methodSignature and
- // system.methodHelp RPC.
- this->_signature = "S:S";
- this->_help = "Updates stuff";
- }
+Updater::
+Updater()
+{
+ // signature and help strings are documentation -- the client
+ // can query this information with a system.methodSignature and
+ // system.methodHelp RPC.
+ this->_signature = "S:S";
+ this->_help = "Updates stuff";
+}
- void
- Updater::
- execute(xmlrpc_c::paramList const& paramList,
- xmlrpc_c::value * const retvalP)
- {
+void
+Updater::
+execute(xmlrpc_c::paramList const& paramList,
+ xmlrpc_c::value * const retvalP)
+{
#if PT_UG
- const params_t params = paramList.getStruct(0);
- breakOutParams(params);
- Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]);
- pdsa->add(m_src, m_trg, m_aln);
- XVERBOSE(1,"Done inserting\n");
- *retvalP = xmlrpc_c::value_string("Phrase table updated");
+ const params_t params = paramList.getStruct(0);
+ breakOutParams(params);
+ Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]);
+ pdsa->add(m_src, m_trg, m_aln);
+ XVERBOSE(1,"Done inserting\n");
+ *retvalP = xmlrpc_c::value_string("Phrase table updated");
#endif
- };
+};
- void
- Updater::
- breakOutParams(const params_t& params)
- {
- params_t::const_iterator si = params.find("source");
- if(si == params.end())
- throw xmlrpc_c::fault("Missing source sentence",
- xmlrpc_c::fault::CODE_PARSE);
- m_src = xmlrpc_c::value_string(si->second);
- XVERBOSE(1,"source = " << m_src << endl);
- si = params.find("target");
- if(si == params.end())
- throw xmlrpc_c::fault("Missing target sentence",
- xmlrpc_c::fault::CODE_PARSE);
- m_trg = xmlrpc_c::value_string(si->second);
- XVERBOSE(1,"target = " << m_trg << endl);
- if((si = params.find("alignment")) == params.end())
- throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE);
- m_aln = xmlrpc_c::value_string(si->second);
- XVERBOSE(1,"alignment = " << m_aln << endl);
- m_bounded = ((si = params.find("bounded")) != params.end());
- m_add2ORLM = ((si = params.find("updateORLM")) != params.end());
- };
+void
+Updater::
+breakOutParams(const params_t& params)
+{
+ params_t::const_iterator si = params.find("source");
+ if(si == params.end())
+ throw xmlrpc_c::fault("Missing source sentence",
+ xmlrpc_c::fault::CODE_PARSE);
+ m_src = xmlrpc_c::value_string(si->second);
+ XVERBOSE(1,"source = " << m_src << endl);
+ si = params.find("target");
+ if(si == params.end())
+ throw xmlrpc_c::fault("Missing target sentence",
+ xmlrpc_c::fault::CODE_PARSE);
+ m_trg = xmlrpc_c::value_string(si->second);
+ XVERBOSE(1,"target = " << m_trg << endl);
+ if((si = params.find("alignment")) == params.end())
+ throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE);
+ m_aln = xmlrpc_c::value_string(si->second);
+ XVERBOSE(1,"alignment = " << m_aln << endl);
+ m_bounded = ((si = params.find("bounded")) != params.end());
+ m_add2ORLM = ((si = params.find("updateORLM")) != params.end());
+};
}
diff --git a/moses/server/Updater.h b/moses/server/Updater.h
index 9bb20b775..e3eba52ef 100644
--- a/moses/server/Updater.h
+++ b/moses/server/Updater.h
@@ -19,26 +19,26 @@
namespace MosesServer
{
- class
+class
Updater: public xmlrpc_c::method
- {
+{
- typedef std::map<std::string, xmlrpc_c::value> params_t;
+ typedef std::map<std::string, xmlrpc_c::value> params_t;
- std::string m_src, m_trg, m_aln;
- bool m_bounded, m_add2ORLM;
+ std::string m_src, m_trg, m_aln;
+ bool m_bounded, m_add2ORLM;
- public:
- Updater();
+public:
+ Updater();
- void
- execute(xmlrpc_c::paramList const& paramList,
- xmlrpc_c::value * const retvalP);
+ void
+ execute(xmlrpc_c::paramList const& paramList,
+ xmlrpc_c::value * const retvalP);
- void
- breakOutParams(const params_t& params);
+ void
+ breakOutParams(const params_t& params);
- };
+};
}
diff --git a/moses/thread_safe_container.h b/moses/thread_safe_container.h
index 1983d7234..6a977185b 100644
--- a/moses/thread_safe_container.h
+++ b/moses/thread_safe_container.h
@@ -18,108 +18,104 @@
namespace Moses
{
- // todo: replace this with thread lock-free containers, if a stable library can
- // be found somewhere
+// todo: replace this with thread lock-free containers, if a stable library can
+// be found somewhere
- template<typename KEY, typename VAL, class CONTAINER = std::map<KEY,VAL> >
- class
+template<typename KEY, typename VAL, class CONTAINER = std::map<KEY,VAL> >
+class
ThreadSafeContainer
+{
+protected:
+ mutable boost::shared_mutex m_lock;
+ CONTAINER m_container;
+ typedef typename CONTAINER::iterator iter_t;
+ typedef typename CONTAINER::const_iterator const_iter_t;
+ typedef typename CONTAINER::value_type entry_t;
+public:
+
+ class locking_iterator
{
- protected:
- mutable boost::shared_mutex m_lock;
- CONTAINER m_container;
- typedef typename CONTAINER::iterator iter_t;
- typedef typename CONTAINER::const_iterator const_iter_t;
- typedef typename CONTAINER::value_type entry_t;
- public:
+ boost::unique_lock<boost::shared_mutex> m_lock;
+ CONTAINER const* m_container;
+ const_iter_t m_iter;
- class locking_iterator
- {
- boost::unique_lock<boost::shared_mutex> m_lock;
- CONTAINER const* m_container;
- const_iter_t m_iter;
-
- locking_iterator(locking_iterator const& other); // no copies!
- public:
- locking_iterator() : m_container(NULL) { }
-
- locking_iterator(boost::shared_mutex& lock,
- CONTAINER const* container,
- const_iter_t const& iter)
- : m_lock(lock), m_container(container), m_iter(iter)
- { }
-
- entry_t const& operator->()
- {
- UTIL_THROW_IF2(m_container == NULL, "This locking iterator is invalid "
- << "or has not been assigned.");
- return m_iter.operator->();
- }
-
- // locking operators transfer the lock upon assignment and become invalid
- locking_iterator const&
- operator=(locking_iterator& other)
- {
- m_lock.swap(other.m_lock);
- m_iter = other.m_iter;
- other.m_iter = other.m_container.end();
- }
-
- bool
- operator==(const_iter_t const& other)
- {
- return m_iter == other;
- }
-
- locking_iterator const&
- operator++() { ++m_iter; return *this; }
-
- // DO NOT DEFINE THE POST-INCREMENT OPERATOR!
- // locking_operators are non-copyable,
- // so we can't simply make a copy before incrementing and return
- // the copy after incrementing
- locking_iterator const&
- operator++(int);
- };
-
- const_iter_t const& end() const
- { return m_container.end(); }
-
- locking_iterator begin() const
- {
- return locking_iterator(m_lock, this, m_container.begin());
+ locking_iterator(locking_iterator const& other); // no copies!
+ public:
+ locking_iterator() : m_container(NULL) { }
+
+ locking_iterator(boost::shared_mutex& lock,
+ CONTAINER const* container,
+ const_iter_t const& iter)
+ : m_lock(lock), m_container(container), m_iter(iter)
+ { }
+
+ entry_t const& operator->() {
+ UTIL_THROW_IF2(m_container == NULL, "This locking iterator is invalid "
+ << "or has not been assigned.");
+ return m_iter.operator->();
}
- VAL const& set(KEY const& key, VAL const& val)
- {
- boost::unique_lock< boost::shared_mutex > lock(m_lock);
- entry_t entry(key,val);
- iter_t foo = m_container.insert(entry).first;
- foo->second = val;
- return foo->second;
+ // locking operators transfer the lock upon assignment and become invalid
+ locking_iterator const&
+ operator=(locking_iterator& other) {
+ m_lock.swap(other.m_lock);
+ m_iter = other.m_iter;
+ other.m_iter = other.m_container.end();
}
- VAL const* get(KEY const& key, VAL const& default_val)
- {
- boost::shared_lock< boost::shared_mutex > lock(m_lock);
- entry_t entry(key, default_val);
- iter_t foo = m_container.insert(entry).first;
- return &(foo->second);
+ bool
+ operator==(const_iter_t const& other) {
+ return m_iter == other;
}
- VAL const* get(KEY const& key) const
- {
- boost::shared_lock< boost::shared_mutex > lock(m_lock);
- const_iter_t m = m_container.find(key);
- if (m == m_container.end()) return NULL;
- return &m->second;
+ locking_iterator const&
+ operator++() {
+ ++m_iter;
+ return *this;
}
- size_t erase(KEY const& key)
- {
- boost::unique_lock< boost::shared_mutex > lock(m_lock);
- return m_container.erase(key);
- }
+ // DO NOT DEFINE THE POST-INCREMENT OPERATOR!
+ // locking_operators are non-copyable,
+ // so we can't simply make a copy before incrementing and return
+ // the copy after incrementing
+ locking_iterator const&
+ operator++(int);
};
+
+ const_iter_t const& end() const {
+ return m_container.end();
+ }
+
+ locking_iterator begin() const {
+ return locking_iterator(m_lock, this, m_container.begin());
+ }
+
+ VAL const& set(KEY const& key, VAL const& val) {
+ boost::unique_lock< boost::shared_mutex > lock(m_lock);
+ entry_t entry(key,val);
+ iter_t foo = m_container.insert(entry).first;
+ foo->second = val;
+ return foo->second;
+ }
+
+ VAL const* get(KEY const& key, VAL const& default_val) {
+ boost::shared_lock< boost::shared_mutex > lock(m_lock);
+ entry_t entry(key, default_val);
+ iter_t foo = m_container.insert(entry).first;
+ return &(foo->second);
+ }
+
+ VAL const* get(KEY const& key) const {
+ boost::shared_lock< boost::shared_mutex > lock(m_lock);
+ const_iter_t m = m_container.find(key);
+ if (m == m_container.end()) return NULL;
+ return &m->second;
+ }
+
+ size_t erase(KEY const& key) {
+ boost::unique_lock< boost::shared_mutex > lock(m_lock);
+ return m_container.erase(key);
+ }
+};
}
#endif
diff --git a/phrase-extract/ExtractionPhrasePair.h b/phrase-extract/ExtractionPhrasePair.h
index 3fa380d4f..58935a727 100644
--- a/phrase-extract/ExtractionPhrasePair.h
+++ b/phrase-extract/ExtractionPhrasePair.h
@@ -146,7 +146,7 @@ public:
void AddProperty(const std::string &key, const std::string &value, float count) {
std::map<std::string,
- std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
+ std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
if ( iter == m_properties.end() ) {
// key not found: insert property key and value
PROPERTY_VALUES *propertyValues = new PROPERTY_VALUES();
diff --git a/phrase-extract/PropertiesConsolidator.cpp b/phrase-extract/PropertiesConsolidator.cpp
index 59c56b54b..94b6ea13a 100644
--- a/phrase-extract/PropertiesConsolidator.cpp
+++ b/phrase-extract/PropertiesConsolidator.cpp
@@ -116,18 +116,18 @@ void PropertiesConsolidator::ProcessPropertiesString(const std::string &properti
} else if ( !keyValue[0].compare("POS") ) {
-/* DO NOTHING (property is not registered in the decoder at the moment)
- if ( m_partsOfSpeechFlag ) {
-
- // POS property: replace strings with vocabulary indices
- out << " {{" << keyValue[0];
- ProcessPOSPropertyValue(keyValue[1], out);
- out << "}}";
-
- } else { // don't process POS property
- out << " {{" << keyValue[0] << " " << keyValue[1] << "}}";
- }
-*/
+ /* DO NOTHING (property is not registered in the decoder at the moment)
+ if ( m_partsOfSpeechFlag ) {
+
+ // POS property: replace strings with vocabulary indices
+ out << " {{" << keyValue[0];
+ ProcessPOSPropertyValue(keyValue[1], out);
+ out << "}}";
+
+ } else { // don't process POS property
+ out << " {{" << keyValue[0] << " " << keyValue[1] << "}}";
+ }
+ */
} else {
diff --git a/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
index 7e9a3ec0a..6468b7473 100644
--- a/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@@ -699,12 +699,12 @@ void ExtractGHKM::WriteGlueGrammar(
// const size_t partOfSpeechSentenceStart = 0;
// const size_t partOfSpeechSentenceEnd = 1;
- #ifndef BOS_
- #define BOS_ "<s>" //Beginning of sentence symbol
- #endif
- #ifndef EOS_
- #define EOS_ "</s>" //End of sentence symbol
- #endif
+#ifndef BOS_
+#define BOS_ "<s>" //Beginning of sentence symbol
+#endif
+#ifndef EOS_
+#define EOS_ "</s>" //End of sentence symbol
+#endif
std::string sentenceStartSource = BOS_;
std::string sentenceEndSource = EOS_;
diff --git a/phrase-extract/filter-rule-table/TreeCfgFilter.cpp b/phrase-extract/filter-rule-table/TreeCfgFilter.cpp
index 153c706f3..cb04dc94e 100644
--- a/phrase-extract/filter-rule-table/TreeCfgFilter.cpp
+++ b/phrase-extract/filter-rule-table/TreeCfgFilter.cpp
@@ -12,7 +12,7 @@ namespace FilterRuleTable
{
TreeCfgFilter::TreeCfgFilter(
- const std::vector<boost::shared_ptr<StringTree> > &sentences)
+ const std::vector<boost::shared_ptr<StringTree> > &sentences)
{
}
diff --git a/phrase-extract/filter-rule-table/TreeCfgFilter.h b/phrase-extract/filter-rule-table/TreeCfgFilter.h
index 5812a6dcc..7dd0fa072 100644
--- a/phrase-extract/filter-rule-table/TreeCfgFilter.h
+++ b/phrase-extract/filter-rule-table/TreeCfgFilter.h
@@ -25,8 +25,9 @@ namespace FilterRuleTable
// Filters a rule table, discarding rules that cannot be applied to a given
// test set. The rule table must have a TSG source-side and the test sentences
// must be parse trees.
-class TreeCfgFilter : public CfgFilter {
- public:
+class TreeCfgFilter : public CfgFilter
+{
+public:
// Initialize the filter for a given set of test sentences.
TreeCfgFilter(const std::vector<boost::shared_ptr<StringTree> > &);
diff --git a/phrase-extract/postprocess-egret-forests/Forest.h b/phrase-extract/postprocess-egret-forests/Forest.h
index 88344e0d7..7f00ecb88 100644
--- a/phrase-extract/postprocess-egret-forests/Forest.h
+++ b/phrase-extract/postprocess-egret-forests/Forest.h
@@ -15,7 +15,7 @@ namespace PostprocessEgretForests
class Forest
{
- public:
+public:
struct Vertex;
struct Hyperedge {
@@ -35,7 +35,7 @@ class Forest
std::vector<boost::shared_ptr<Vertex> > vertices;
- private:
+private:
// Copying is not allowed.
Forest(const Forest &);
Forest &operator=(const Forest &);
diff --git a/phrase-extract/postprocess-egret-forests/ForestParser.cpp b/phrase-extract/postprocess-egret-forests/ForestParser.cpp
index 565117ace..21e479ca6 100644
--- a/phrase-extract/postprocess-egret-forests/ForestParser.cpp
+++ b/phrase-extract/postprocess-egret-forests/ForestParser.cpp
@@ -17,15 +17,18 @@ namespace PostprocessEgretForests
{
ForestParser::ForestParser()
- : m_input(0) {
+ : m_input(0)
+{
}
ForestParser::ForestParser(std::istream &input)
- : m_input(&input) {
+ : m_input(&input)
+{
++(*this);
}
-ForestParser &ForestParser::operator++() {
+ForestParser &ForestParser::operator++()
+{
if (!m_input) {
return *this;
}
@@ -106,7 +109,7 @@ void ForestParser::ParseHyperedgeLine(const std::string &line, Forest &forest)
}
boost::shared_ptr<Forest::Vertex> ForestParser::ParseVertex(
- const StringPiece &s)
+ const StringPiece &s)
{
VertexSP v = boost::make_shared<Forest::Vertex>();
std::size_t pos = s.rfind('[');
@@ -132,12 +135,14 @@ boost::shared_ptr<Forest::Vertex> ForestParser::ParseVertex(
return v;
}
-bool operator==(const ForestParser &lhs, const ForestParser &rhs) {
+bool operator==(const ForestParser &lhs, const ForestParser &rhs)
+{
// TODO Is this right? Compare values of istreams if non-zero?
return lhs.m_input == rhs.m_input;
}
-bool operator!=(const ForestParser &lhs, const ForestParser &rhs) {
+bool operator!=(const ForestParser &lhs, const ForestParser &rhs)
+{
return !(lhs == rhs);
}
diff --git a/phrase-extract/postprocess-egret-forests/ForestParser.h b/phrase-extract/postprocess-egret-forests/ForestParser.h
index ee9ced56a..7f0b6f297 100644
--- a/phrase-extract/postprocess-egret-forests/ForestParser.h
+++ b/phrase-extract/postprocess-egret-forests/ForestParser.h
@@ -20,8 +20,9 @@ namespace Syntax
namespace PostprocessEgretForests
{
-class ForestParser {
- public:
+class ForestParser
+{
+public:
struct Entry {
std::size_t sentNum;
std::string sentence;
@@ -31,15 +32,19 @@ class ForestParser {
ForestParser();
ForestParser(std::istream &);
- Entry &operator*() { return m_entry; }
- Entry *operator->() { return &m_entry; }
+ Entry &operator*() {
+ return m_entry;
+ }
+ Entry *operator->() {
+ return &m_entry;
+ }
ForestParser &operator++();
friend bool operator==(const ForestParser &, const ForestParser &);
friend bool operator!=(const ForestParser &, const ForestParser &);
- private:
+private:
typedef boost::shared_ptr<Forest::Vertex> VertexSP;
typedef boost::shared_ptr<Forest::Hyperedge> HyperedgeSP;
@@ -60,7 +65,7 @@ class ForestParser {
};
typedef boost::unordered_set<VertexSP, VertexSetHash,
- VertexSetPred> VertexSet;
+ VertexSetPred> VertexSet;
// Copying is not allowed
ForestParser(const ForestParser &);
diff --git a/phrase-extract/postprocess-egret-forests/ForestWriter.cpp b/phrase-extract/postprocess-egret-forests/ForestWriter.cpp
index 7d8360098..54a2cbed9 100644
--- a/phrase-extract/postprocess-egret-forests/ForestWriter.cpp
+++ b/phrase-extract/postprocess-egret-forests/ForestWriter.cpp
@@ -61,7 +61,8 @@ void ForestWriter::WriteVertex(const Forest::Vertex &v)
}
}
-std::string ForestWriter::PossiblyEscape(const std::string &s) const {
+std::string ForestWriter::PossiblyEscape(const std::string &s) const
+{
if (m_options.escape) {
return Escape(s);
} else {
@@ -70,7 +71,8 @@ std::string ForestWriter::PossiblyEscape(const std::string &s) const {
}
// Escapes XML special characters.
-std::string ForestWriter::Escape(const std::string &s) const {
+std::string ForestWriter::Escape(const std::string &s) const
+{
std::string t;
std::size_t len = s.size();
t.reserve(len);
diff --git a/phrase-extract/postprocess-egret-forests/ForestWriter.h b/phrase-extract/postprocess-egret-forests/ForestWriter.h
index ae3cf028d..10c1fe05c 100644
--- a/phrase-extract/postprocess-egret-forests/ForestWriter.h
+++ b/phrase-extract/postprocess-egret-forests/ForestWriter.h
@@ -15,13 +15,13 @@ namespace PostprocessEgretForests
class ForestWriter
{
- public:
+public:
ForestWriter(const Options &options, std::ostream &out)
: m_options(options), m_out(out) {}
void Write(const std::string &, const Forest &, std::size_t);
- private:
+private:
std::string Escape(const std::string &) const;
std::string PossiblyEscape(const std::string &) const;
void WriteHyperedgeLine(const Forest::Hyperedge &);
diff --git a/phrase-extract/postprocess-egret-forests/PostprocessEgretForests.cpp b/phrase-extract/postprocess-egret-forests/PostprocessEgretForests.cpp
index 92a9d195f..d87e082dc 100644
--- a/phrase-extract/postprocess-egret-forests/PostprocessEgretForests.cpp
+++ b/phrase-extract/postprocess-egret-forests/PostprocessEgretForests.cpp
@@ -47,8 +47,8 @@ int PostprocessEgretForests::Main(int argc, char *argv[])
}
void PostprocessEgretForests::ProcessForest(
- std::istream &in, std::ostream &out, SplitPointFileParser *splitPointParser,
- const Options &options)
+ std::istream &in, std::ostream &out, SplitPointFileParser *splitPointParser,
+ const Options &options)
{
std::size_t sentNum = 0;
ForestWriter writer(options, out);
@@ -77,7 +77,7 @@ void PostprocessEgretForests::ProcessForest(
}
void PostprocessEgretForests::OpenInputFileOrDie(const std::string &filename,
- std::ifstream &stream)
+ std::ifstream &stream)
{
stream.open(filename.c_str());
if (!stream) {
@@ -88,7 +88,7 @@ void PostprocessEgretForests::OpenInputFileOrDie(const std::string &filename,
}
void PostprocessEgretForests::ProcessOptions(int argc, char *argv[],
- Options &options) const
+ Options &options) const
{
namespace po = boost::program_options;
namespace cls = boost::program_options::command_line_style;
@@ -119,7 +119,7 @@ void PostprocessEgretForests::ProcessOptions(int argc, char *argv[],
// (these are used as positional options).
po::options_description hidden("Hidden options");
hidden.add_options()
- // None
+ // None
;
// Compose the full set of command-line options.
diff --git a/phrase-extract/postprocess-egret-forests/SplitPointFileParser.cpp b/phrase-extract/postprocess-egret-forests/SplitPointFileParser.cpp
index 5b3ce7a54..4bf3c4792 100644
--- a/phrase-extract/postprocess-egret-forests/SplitPointFileParser.cpp
+++ b/phrase-extract/postprocess-egret-forests/SplitPointFileParser.cpp
@@ -16,15 +16,18 @@ namespace PostprocessEgretForests
{
SplitPointFileParser::SplitPointFileParser()
- : m_input(0) {
+ : m_input(0)
+{
}
SplitPointFileParser::SplitPointFileParser(std::istream &input)
- : m_input(&input) {
+ : m_input(&input)
+{
++(*this);
}
-SplitPointFileParser &SplitPointFileParser::operator++() {
+SplitPointFileParser &SplitPointFileParser::operator++()
+{
if (!m_input) {
return *this;
}
@@ -66,13 +69,15 @@ void SplitPointFileParser::ParseLine(const std::string &line,
}
bool operator==(const SplitPointFileParser &lhs,
- const SplitPointFileParser &rhs) {
+ const SplitPointFileParser &rhs)
+{
// TODO Is this right? Compare values of istreams if non-zero?
return lhs.m_input == rhs.m_input;
}
bool operator!=(const SplitPointFileParser &lhs,
- const SplitPointFileParser &rhs) {
+ const SplitPointFileParser &rhs)
+{
return !(lhs == rhs);
}
diff --git a/phrase-extract/postprocess-egret-forests/SplitPointFileParser.h b/phrase-extract/postprocess-egret-forests/SplitPointFileParser.h
index a2d700971..35fdb3ad2 100644
--- a/phrase-extract/postprocess-egret-forests/SplitPointFileParser.h
+++ b/phrase-extract/postprocess-egret-forests/SplitPointFileParser.h
@@ -13,8 +13,9 @@ namespace Syntax
namespace PostprocessEgretForests
{
-class SplitPointFileParser {
- public:
+class SplitPointFileParser
+{
+public:
struct Entry {
std::vector<SplitPoint> splitPoints;
};
@@ -22,8 +23,12 @@ class SplitPointFileParser {
SplitPointFileParser();
SplitPointFileParser(std::istream &);
- const Entry &operator*() const { return m_entry; }
- const Entry *operator->() const { return &m_entry; }
+ const Entry &operator*() const {
+ return m_entry;
+ }
+ const Entry *operator->() const {
+ return &m_entry;
+ }
SplitPointFileParser &operator++();
@@ -33,7 +38,7 @@ class SplitPointFileParser {
friend bool operator!=(const SplitPointFileParser &,
const SplitPointFileParser &);
- private:
+private:
void ParseLine(const std::string &, std::vector<SplitPoint> &);
Entry m_entry;
diff --git a/phrase-extract/postprocess-egret-forests/Symbol.h b/phrase-extract/postprocess-egret-forests/Symbol.h
index f00f642d3..1b8929f49 100644
--- a/phrase-extract/postprocess-egret-forests/Symbol.h
+++ b/phrase-extract/postprocess-egret-forests/Symbol.h
@@ -30,14 +30,14 @@ inline bool operator==(const Symbol &s, const Symbol &t)
}
struct SymbolHasher {
- public:
+public:
std::size_t operator()(const Symbol &s) const {
return hash_value(s);
}
};
struct SymbolEqualityPred {
- public:
+public:
bool operator()(const Symbol &s, const Symbol &t) const {
return s.value == t.value && s.isNonTerminal == t.isNonTerminal;
}
diff --git a/phrase-extract/postprocess-egret-forests/TopologicalSorter.h b/phrase-extract/postprocess-egret-forests/TopologicalSorter.h
index 96f19a8e9..7ed667369 100644
--- a/phrase-extract/postprocess-egret-forests/TopologicalSorter.h
+++ b/phrase-extract/postprocess-egret-forests/TopologicalSorter.h
@@ -16,10 +16,10 @@ namespace PostprocessEgretForests
class TopologicalSorter
{
- public:
+public:
void Sort(const Forest &, std::vector<const Forest::Vertex *> &);
- private:
+private:
typedef boost::unordered_set<const Forest::Vertex *> VertexSet;
void BuildPredSets(const Forest &);
diff --git a/phrase-extract/score-main.cpp b/phrase-extract/score-main.cpp
index d56b8f379..b65dce4ba 100644
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@@ -315,7 +315,6 @@ int main(int argc, char* argv[])
// loop through all extracted phrase translations
std::string line, lastLine;
- lastLine[0] = '\0';
ExtractionPhrasePair *phrasePair = NULL;
std::vector< ExtractionPhrasePair* > phrasePairsWithSameSource;
std::vector< ExtractionPhrasePair* > phrasePairsWithSameSourceAndTarget; // required for hierarchical rules only, as non-terminal alignments might make the phrases incompatible
@@ -901,7 +900,7 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
}
if (nonTermContext && !inverseFlag) {
- std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContext");
+ std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContext");
if (!propValue.empty() && propValue.size() < 50000) {
size_t nNTs = NumNonTerminal(phraseSource);
phraseTableFile << " {{NonTermContext " << nNTs << " " << propValue << "}}";
@@ -909,7 +908,7 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
}
if (nonTermContextTarget && !inverseFlag) {
- std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContextTarget");
+ std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContextTarget");
if (!propValue.empty() && propValue.size() < 50000) {
size_t nNTs = NumNonTerminal(phraseSource);
phraseTableFile << " {{NonTermContextTarget " << nNTs << " " << propValue << "}}";
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index 57ef4f9d6..d1448ef44 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -233,6 +233,8 @@ train
template: $lm-training -order $order $settings -text IN -lm OUT
error: cannot execute binary file
error: unrecognised option
+ not-error: BadDiscountException
+ not-error: To override this error
randomize
in: lm
out: rlm
@@ -309,8 +311,14 @@ split-tuning
default-name: lm/interpolate-tuning.split
pass-unless: output-splitter
template: $output-splitter -model IN1.$output-extension < IN > OUT
+strip-tuning
+ in: split-tuning
+ out: stripped-tuning
+ default-name: lm/interpolate-tuning.stripped
+ pass-unless: mock-output-parser-lm
+ template: $moses-script-dir/training/strip-xml.perl < IN > OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees
interpolate
- in: script split-tuning LM:lm
+ in: script stripped-tuning LM:lm
rerun-on-change: srilm-dir group weights
out: lm
default-name: lm/interpolated-lm
@@ -466,14 +474,32 @@ fast-align
in: prepared-data-fast-align
out: fast-alignment
rerun-on-change: fast-align-settings
+ ignore-if: fast-align-max-lines
template: $external-bin-dir/fast_align -i IN $fast-align-settings > OUT
default-name: fast-align
fast-align-inverse
in: prepared-data-fast-align
out: fast-alignment-inverse
rerun-on-change: fast-align-settings
+ ignore-if: fast-align-max-lines
template: $external-bin-dir/fast_align -i IN -r $fast-align-settings > OUT
default-name: fast-align-inverse
+fast-align-in-parts
+ in: prepared-data-fast-align
+ out: fast-alignment
+ rerun-on-change: fast-align-settings fast-align-max-lines
+ ignore-unless: fast-align-max-lines
+ tmp-name: training/tmp.fast-align
+ template: $moses-script-dir/ems/support/fast-align-in-parts.perl -bin $external-bin-dir/fast_align -i IN -max-lines $fast-align-max-lines -tmp TMP -settings '$fast-align-settings' > OUT
+ default-name: fast-align
+fast-align-in-parts-inverse
+ in: prepared-data-fast-align
+ out: fast-alignment-inverse
+ rerun-on-change: fast-align-settings fast-align-max-lines
+ ignore-unless: fast-align-max-lines
+ tmp-name: training/tmp.fast-align-inverse
+ template: $moses-script-dir/ems/support/fast-align-in-parts.perl -bin $external-bin-dir/fast_align -i IN -r -max-lines $fast-align-max-lines -tmp TMP -settings '$fast-align-settings' > OUT
+ default-name: fast-align
symmetrize-fast-align
in: fast-alignment fast-alignment-inverse corpus-mml-prefilter=OR=corpus
out: word-alignment
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index 7070a7c9e..8e23b7b18 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -312,10 +312,10 @@ sub read_meta {
$ONLY_FACTOR_0{"$module:$step"}++;
}
elsif ($1 eq "error") {
- @{$ERROR{"$module:$step"}} = split(/,/,$2);
+ push @{$ERROR{"$module:$step"}}, $2;
}
elsif ($1 eq "not-error") {
- @{$NOT_ERROR{"$module:$step"}} = split(/,/,$2);
+ push @{$NOT_ERROR{"$module:$step"}}, $2;
}
else {
die("META ERROR unknown parameter: $1");
@@ -1282,10 +1282,10 @@ sub execute_steps {
&write_info($i);
# cluster job submission
- if ($CLUSTER && ! &is_qsub_script($i)) {
+ if ($CLUSTER && (!&is_qsub_script($i) || (&backoff_and_get($DO_STEP[$i].":jobs") && (&backoff_and_get($DO_STEP[$i].":jobs")==1)))) {
$DO{$i}++;
my $qsub_args = &get_qsub_args($DO_STEP[$i]);
- print "\texecuting $step via qsub ($active active)\n";
+ print "\texecuting $step via qsub $qsub_args ($active active)\n";
my $qsub_command="qsub $qsub_args -S /bin/bash -e $step.STDERR -o $step.STDOUT $step";
print "\t$qsub_command\n" if $VERBOSE;
`$qsub_command`;
@@ -1338,15 +1338,15 @@ sub execute_steps {
sub get_qsub_args {
my ($step) = @_;
- my $qsub_args = &get("$step:qsub-settings");
- $qsub_args = &get("GENERAL:qsub-settings") unless defined($qsub_args);
+ my $qsub_args = &backoff_and_get("$step:qsub-settings");
$qsub_args = "" unless defined($qsub_args);
my $memory = &get("$step:qsub-memory");
$qsub_args .= " -pe memory $memory" if defined($memory);
my $hours = &get("$step:qsub-hours");
$qsub_args .= " -l h_rt=$hours:0:0" if defined($hours);
my $project = &backoff_and_get("$step:qsub-project");
- $qsub_args = "-P $project" if defined($project);
+ $qsub_args .= " -P $project" if defined($project);
+ $qsub_args =~ s/^ //;
print "qsub args: $qsub_args\n" if $VERBOSE;
return $qsub_args;
}
@@ -1880,7 +1880,7 @@ sub define_tuning_tune {
my $decoder_settings = &backoff_and_get("TUNING:decoder-settings");
$decoder_settings = "" unless $decoder_settings;
- $decoder_settings .= " -v 0 " unless $CLUSTER && $jobs;
+ $decoder_settings .= " -v 0 " unless $CLUSTER && $jobs && $jobs>1;
my $tuning_settings = &backoff_and_get("TUNING:tuning-settings");
$tuning_settings = "" unless $tuning_settings;
@@ -1891,9 +1891,9 @@ sub define_tuning_tune {
$cmd .= " --skip-decoder" if $skip_decoder;
$cmd .= " --inputtype $tune_inputtype" if defined($tune_inputtype);
- my $qsub_args = &get_qsub_args("TUNING");
+ my $qsub_args = &get_qsub_args($DO_STEP[$step_id]);
$cmd .= " --queue-flags=\"$qsub_args\"" if ($CLUSTER && $qsub_args);
- $cmd .= " --jobs $jobs" if $CLUSTER && $jobs;
+ $cmd .= " --jobs $jobs" if $CLUSTER && $jobs && $jobs>1;
my $tuning_dir = $tuned_config;
$tuning_dir =~ s/\/[^\/]+$//;
$cmd .= "\nmkdir -p $tuning_dir";
@@ -2576,6 +2576,7 @@ sub define_training_create_config {
my $set = shift @LM_SETS;
next if defined($INTERPOLATED_AWAY{$set});
my $order = &check_backoff_and_get("LM:$set:order");
+
my $lm_file = "$lm";
my $type = 0; # default: SRILM
@@ -2591,6 +2592,13 @@ sub define_training_create_config {
# manually set type
$type = &backoff_and_get("LM:$set:type") if (&backoff_and_get("LM:$set:type"));
+ # binarized by INTERPOLATED-LM
+ if (&get("INTERPOLATED-LM:lm-binarizer")) {
+ $lm_file =~ s/\.lm/\.binlm/;
+ $type = 1;
+ $type = &get("INTERPOLATED-LM:type") if &get("INTERPOLATED-LM:type");
+ }
+
# which factor is the model trained on?
my $factor = 0;
if (&backoff_and_get("TRAINING:output-factors") &&
@@ -2696,7 +2704,7 @@ sub define_interpolated_lm_interpolate {
sub define_interpolated_lm_process {
my ($step_id) = @_;
- my ($processed_lm, $interpolatd_lm) = &get_output_and_input($step_id);
+ my ($processed_lm, $interpolated_lm) = &get_output_and_input($step_id);
my ($module,$set,$stepname) = &deconstruct_name($DO_STEP[$step_id]);
my $tool = &check_backoff_and_get("INTERPOLATED-LM:lm-${stepname}r");
my $FACTOR = &backoff_and_get_array("TRAINING:output-factors");
@@ -2706,11 +2714,23 @@ sub define_interpolated_lm_process {
my $cmd = "";
foreach my $factor (keys %{$ILM_SETS}) {
foreach my $order (keys %{$$ILM_SETS{$factor}}) {
- next unless scalar(@{$$ILM_SETS{$factor}{$order}}) > 1;
- my $suffix = "";
- $suffix = ".$$FACTOR[$factor]" if $icount > 1 && defined($FACTOR);
- $suffix .= ".order$order" if $icount > 1;
- $cmd .= "$tool $interpolatd_lm$suffix $processed_lm$suffix\n";
+ my ($name,$name_processed);
+ if (scalar(@{$$ILM_SETS{$factor}{$order}}) == 1) {
+ # not interpolated -> get name from LM version of these steps
+ my($id,$set) = split(/ /,$$ILM_SETS{$factor}{$order}[0]);
+ $name = &get_default_file("LM",$set,"train"); # well... works for now;
+ $name_processed = $STEP_OUTNAME{"LM:$stepname"};
+ $name_processed =~ s/^(.+\/)([^\/]+)$/$1$set.$2/;
+ $name_processed = &versionize(&long_file_name($name_processed,"lm",""));
+ }
+ else {
+ my $suffix = "";
+ $suffix = ".$$FACTOR[$factor]" if $icount > 1 && defined($FACTOR);
+ $suffix .= ".order$order" if $icount > 1;
+ $name = "$interpolated_lm$suffix";
+ $name_processed = "$processed_lm$suffix";
+ }
+ $cmd .= "$tool $name $name_processed\n";
}
}
@@ -3072,7 +3092,7 @@ sub define_evaluation_decode {
my $nbest_size;
$nbest_size = $nbest if $nbest;
$nbest_size =~ s/[^\d]//g if $nbest;
- if ($jobs && $CLUSTER) {
+ if ($jobs && $jobs>1 && $CLUSTER) {
$cmd .= "mkdir -p $dir/evaluation/tmp.$set.$VERSION\n";
$cmd .= "cd $dir/evaluation/tmp.$set.$VERSION\n";
if (defined $moses_parallel) {
@@ -3496,9 +3516,15 @@ sub check_backoff_and_get_array {
return $CONFIG{$parameter} if defined($CONFIG{$parameter});
# remove set -> find setting for module
- $parameter =~ s/:.*:/:/;
+ $parameter =~ s/:[^:]+:/:/;
return $CONFIG{$parameter} if defined($CONFIG{$parameter});
+ # remove step (if exists)
+ if ($parameter =~ /:[^:]+:/) {
+ $parameter =~ s/:[^:]+:/:/;
+ return $CONFIG{$parameter} if defined($CONFIG{$parameter});
+ }
+
# remove model -> find global setting
$parameter =~ s/^[^:]+:/GENERAL:/;
return $CONFIG{$parameter} if defined($CONFIG{$parameter});
diff --git a/scripts/ems/support/build-sparse-features.perl b/scripts/ems/support/build-sparse-features.perl
index 5d9b786ad..3f4b505d5 100755
--- a/scripts/ems/support/build-sparse-features.perl
+++ b/scripts/ems/support/build-sparse-features.perl
@@ -12,15 +12,17 @@ use strict;
my ($corpus,$input_extension,$output_extension,$outfile_prefix,$specification) = @ARGV;
my $ini = "[feature]\n";
my %ALREADY;
+my %ID;
foreach my $feature_spec (split(/,\s*/,$specification)) {
my @SPEC = split(/\s+/,$feature_spec);
my $factor = ($SPEC[0] eq 'word-translation') ? "0-0" : "0";
$factor = $1 if $feature_spec =~ / factor ([\d\-]+)/;
+ $feature_spec =~ s/ factor ([\d\-]+)//;
if ($SPEC[0] eq 'target-word-insertion') {
- $ini .= "TargetWordInsertionFeature name=TWI factor=$factor";
+ $ini .= "TargetWordInsertionFeature name=TWI".&get_id($SPEC[0])." factor=$factor";
if ($SPEC[1] eq 'top' && $SPEC[2] =~ /^\d+$/) {
my $file = &create_top_words($output_extension, $SPEC[2]);
@@ -34,7 +36,7 @@ foreach my $feature_spec (split(/,\s*/,$specification)) {
$ini .= "\n";
}
elsif ($SPEC[0] eq 'source-word-deletion') {
- $ini .= "SourceWordDeletionFeature name=SWD factor=$factor";
+ $ini .= "SourceWordDeletionFeature name=SWD".&get_id($SPEC[0])." factor=$factor";
if ($SPEC[1] eq 'top' && $SPEC[2] =~ /^\d+$/) {
my $file = &create_top_words($input_extension, $SPEC[2]);
$ini .= " path=$file";
@@ -60,7 +62,7 @@ foreach my $feature_spec (split(/,\s*/,$specification)) {
die("ERROR: Unknown parameter specification in '$SPEC[1]'\n");
}
my ($input_factor,$output_factor) = split(/\-/,$factor);
- $ini .= "WordTranslationFeature name=WT input-factor=$input_factor output-factor=$output_factor simple=1 source-context=0 target-context=0$extra_ini\n";
+ $ini .= "WordTranslationFeature name=WT".&get_id($SPEC[0])." input-factor=$input_factor output-factor=$output_factor simple=1 source-context=0 target-context=0$extra_ini\n";
}
elsif ($SPEC[0] eq 'phrase-length') {
$ini .= "PhraseLengthFeature name=PL\n";
@@ -111,3 +113,11 @@ sub create_top_words {
return $file;
}
+
+sub get_id {
+ my ($name) = @_;
+ $ID{$name}++;
+ return "" if $ID{$name} == 1;
+ return $ID{$name};
+}
+
diff --git a/scripts/ems/support/fast-align-in-parts.perl b/scripts/ems/support/fast-align-in-parts.perl
new file mode 100755
index 000000000..fa501b454
--- /dev/null
+++ b/scripts/ems/support/fast-align-in-parts.perl
@@ -0,0 +1,91 @@
+#!/usr/bin/env perl
+
+#######################
+# Revision history
+#
+# 28 Apr 2015 first version
+
+use warnings;
+use strict;
+use Getopt::Long qw(:config pass_through no_ignore_case permute);
+
+my ($BIN,$IN,$MAX_LINES,$SETTINGS,$REVERSE,$TMP);
+
+GetOptions('bin=s' => \$BIN,
+ 'i=s' => \$IN,
+ 'max-lines=i' => \$MAX_LINES,
+ 'settings=s' => \$SETTINGS,
+ 'r' => \$REVERSE,
+ 'tmp=s' => \$TMP,
+ ) or exit(1);
+
+die("ERROR - usage: fast-align-in-parts.perl -bin FAST_ALIGN_BIN -i PARALLEL_CORPUS -max-lines COUNT -settings CONFIG [-r] -tmp TMPDIR")
+ unless defined($BIN) && defined($IN) && defined($SETTINGS) && defined($TMP) && defined($MAX_LINES)
+ && $MAX_LINES > 0;
+die("ERROR - input file does not exist: $IN") unless -e $IN;
+die("ERROR - fast_align binary does not exist: $BIN") unless -e $BIN;
+
+chomp(my $line_count = `cat $IN | wc -l`);
+
+# not more than maximal number of lines -> just run it regulary
+if ($MAX_LINES > $line_count) {
+ my $cmd = "$BIN -i $IN $SETTINGS";
+ $cmd .= " -r" if defined($REVERSE);
+ safesystem($cmd) or die;
+ exit(0);
+}
+
+my $cmd = "mkdir -p $TMP";
+safesystem($cmd) or die;
+
+# split input
+$cmd = "split -a 2 -l $MAX_LINES $IN $TMP/prepared-";
+safesystem($cmd) or die;
+
+# process
+my @INPUT_FILES = `ls $TMP/prepared-*`;
+chop(@INPUT_FILES);
+foreach my $input_file (@INPUT_FILES) {
+ # create output file name
+ die("ERROR") unless $input_file =~ /prepared-(..)$/;
+ my $output_file = "$TMP/aligned-$1";
+
+ # process part
+ my $cmd = "$BIN -i $input_file $SETTINGS";
+ $cmd .= " -r" if defined($REVERSE);
+ $cmd .= " >$output_file";
+ safesystem($cmd) or die;
+ die("ERROR: no output produced from command $cmd") unless -e $output_file;
+
+ # check line count
+ chomp(my $input_line_count = `cat $input_file | wc -l`);
+ chomp(my $output_line_count = `cat $output_file | wc -l`);
+ die("ERROR: mismatched number of lines in part $1\n\t$input_line_count\t$input_file\n\t$output_line_count\t$output_file\n") unless $input_line_count == $output_line_count;
+}
+
+# join output
+$cmd = "cat $TMP/aligned-*";
+safesystem($cmd) or die;
+
+$cmd = "rm -r $TMP/* ; rmdir $TMP";
+safesystem($cmd);
+
+sub safesystem {
+ print STDERR "Executing: @_\n";
+ system(@_);
+ if ($? == -1) {
+ print STDERR "Failed to execute: @_\n $!\n";
+ exit(1);
+ }
+ elsif ($? & 127) {
+ printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
+ ($? & 127), ($? & 128) ? 'with' : 'without';
+ exit 1;
+ }
+ else {
+ my $exitcode = $? >> 8;
+ print STDERR "Exit code: $exitcode\n" if $exitcode;
+ return ! $exitcode;
+ }
+}
+
diff --git a/scripts/ems/support/generic-parallelizer.perl b/scripts/ems/support/generic-parallelizer.perl
index 0b248be7e..fd7fb2552 100755
--- a/scripts/ems/support/generic-parallelizer.perl
+++ b/scripts/ems/support/generic-parallelizer.perl
@@ -4,7 +4,7 @@ use warnings;
use strict;
my $jobs = 20;
-my ($infile,$outfile,$cmd,$tmpdir);
+my ($infile,$outfile,$cmd,$tmpdir,$qflags);
use Getopt::Long qw(:config pass_through no_ignore_case);
GetOptions('jobs=i' => \$jobs,
@@ -12,7 +12,7 @@ GetOptions('jobs=i' => \$jobs,
'in=s' => \$infile,
'out=s' => \$outfile,
'cmd=s' => \$cmd,
- 'queue-flags=s' => \$qflags,
+ 'queue-flags=s' => \$qflags,
) or exit(1);
die("ERROR: specify infile with -in") unless $infile;
diff --git a/scripts/ems/support/lmplz-wrapper.perl b/scripts/ems/support/lmplz-wrapper.perl
index eadca6263..0f1e03d15 100755
--- a/scripts/ems/support/lmplz-wrapper.perl
+++ b/scripts/ems/support/lmplz-wrapper.perl
@@ -7,11 +7,12 @@ use Getopt::Long "GetOptions";
Getopt::Long::config("no_auto_abbrev");
Getopt::Long::config("pass_through");
-
-my ($TEXT,$ORDER,$BIN,$LM);
+my ($TEXT,$ORDER,$BIN,$LM,$MEMORY,$TMPDIR);
&GetOptions('text=s' => \$TEXT,
'lm=s' => \$LM,
+ 'S=s' => \$MEMORY,
+ 'T=s' => \$TMPDIR,
'bin=s' => \$BIN,
'order=i' => \$ORDER);
@@ -19,8 +20,9 @@ die("ERROR: specify at least --bin BIN --text CORPUS --lm LM and --order N!")
unless defined($BIN) && defined($TEXT) && defined($LM) && defined($ORDER);
my $settings = join(' ', @ARGV);
-#print STDERR "settngs=$settings \n";
-
my $cmd = "$BIN --text $TEXT --order $ORDER --arpa $LM $settings";
+$cmd .= " -T $TMPDIR" if defined($TMPDIR);
+$cmd .= " -S $MEMORY" if defined($MEMORY);
+$cmd .= " " . join(' ', @ARGV) if scalar(@ARGV); # Pass remaining args through.
print "exec: $cmd\n";
`$cmd`;
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index 86084abbf..a7263d4bd 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -76,7 +76,7 @@ my $___N_BEST_LIST_SIZE = 100;
my $___LATTICE_SAMPLES = 0;
my $queue_flags = "-hard"; # extra parameters for parallelizer
# the -l ws0ssmt was relevant only to JHU 2006 workshop
-my $___JOBS = undef; # if parallel, number of jobs to use (undef or 0 -> serial)
+my $___JOBS = undef; # if parallel, number of jobs to use (undef or <= 0 -> serial)
my $___DECODER_FLAGS = ""; # additional parametrs to pass to the decoder
my $continue = 0; # should we try to continue from the last saved step?
my $skip_decoder = 0; # and should we skip the first decoder run (assuming we got interrupted during mert)
@@ -544,7 +544,7 @@ if ($__PROMIX_TRAINING) {
my $___FILTER_F = $___DEV_F;
$___FILTER_F = $filterfile if (defined $filterfile);
my $cmd = "$filtercmd ./$filtered_path $filtered_config $___FILTER_F";
- &submit_or_exec($cmd, "filterphrases_$i.out", "filterphrases_$i.err");
+ &submit_or_exec($cmd, "filterphrases_$i.out", "filterphrases_$i.err", 1);
push (@_PROMIX_TABLES_BIN,"$filtered_path/phrase-table.0-0.1.1");
}
}
@@ -559,7 +559,7 @@ if ($___FILTER_PHRASE_TABLE) {
my $___FILTER_F = $___DEV_F;
$___FILTER_F = $filterfile if (defined $filterfile);
my $cmd = "$filtercmd ./$outdir $___CONFIG $___FILTER_F";
- &submit_or_exec($cmd, "filterphrases.out", "filterphrases.err");
+ &submit_or_exec($cmd, "filterphrases.out", "filterphrases.err", 1);
}
# make a backup copy of startup ini filepath
@@ -829,7 +829,7 @@ while (1) {
# remove segmentation
$cmd .= " -l $__REMOVE_SEGMENTATION" if $__PROMIX_TRAINING;
$cmd = &create_extractor_script($cmd, $___WORKING_DIR);
- &submit_or_exec($cmd, "extract.out","extract.err");
+ &submit_or_exec($cmd, "extract.out","extract.err", 1);
}
# Create the initial weights file for mert: init.opt
@@ -919,11 +919,11 @@ while (1) {
my $pro_optimizer_cmd = "$pro_optimizer $megam_default_options run$run.pro.data";
if ($___PAIRWISE_RANKED_OPTIMIZER) { # pro optimization
$cmd = "$mert_pro_cmd $proargs $seed_settings $pro_file_settings -o run$run.pro.data ; echo 'not used' > $weights_out_file; $pro_optimizer_cmd";
- &submit_or_exec($cmd, $mert_outfile, $mert_logfile);
+ &submit_or_exec($cmd, $mert_outfile, $mert_logfile, 1);
} elsif ($___PRO_STARTING_POINT) { # First, run pro, then mert
# run pro...
my $pro_cmd = "$mert_pro_cmd $proargs $seed_settings $pro_file_settings -o run$run.pro.data ; $pro_optimizer_cmd";
- &submit_or_exec($pro_cmd, "run$run.pro.out", "run$run.pro.err");
+ &submit_or_exec($pro_cmd, "run$run.pro.out", "run$run.pro.err", 1);
# ... get results ...
($bestpoint,$devbleu) = &get_weights_from_mert("run$run.pro.out","run$run.pro.err",scalar @{$featlist->{"names"}},\%sparse_weights, \@promix_weights);
# Get the pro outputs ready for mert. Add the weight ranges,
@@ -951,11 +951,11 @@ while (1) {
# ... and run mert
$cmd =~ s/(--ifile \S+)/$1,run$run.init.pro/;
- &submit_or_exec($cmd . $mert_settings, $mert_outfile, $mert_logfile);
+ &submit_or_exec($cmd . $mert_settings, $mert_outfile, $mert_logfile, ($__THREADS ? $__THREADS : 1) );
} elsif ($___BATCH_MIRA) { # batch MIRA optimization
safesystem("echo 'not used' > $weights_out_file") or die;
$cmd = "$mert_mira_cmd $mira_settings $seed_settings $pro_file_settings -o $mert_outfile";
- &submit_or_exec($cmd, "run$run.mira.out", $mert_logfile);
+ &submit_or_exec($cmd, "run$run.mira.out", $mert_logfile, 1);
} elsif ($___HG_MIRA) {
safesystem("echo 'not used' > $weights_out_file") or die;
$mira_settings .= " --type hypergraph ";
@@ -963,7 +963,7 @@ while (1) {
$mira_settings .= " --hgdir $hypergraph_dir ";
#$mira_settings .= "--verbose ";
$cmd = "$mert_mira_cmd $mira_settings $seed_settings -o $mert_outfile";
- &submit_or_exec($cmd, "run$run.mira.out", $mert_logfile);
+ &submit_or_exec($cmd, "run$run.mira.out", $mert_logfile, 1);
} elsif ($__PROMIX_TRAINING) {
# PRO trained mixture model
safesystem("echo 'not used' > $weights_out_file") or die;
@@ -972,10 +972,10 @@ while (1) {
$cmd .= join(" ", map {"-p $_"} @_PROMIX_TABLES_BIN);
$cmd .= " -i $___DEV_F";
print "Starting promix optimisation at " . `date`;
- &submit_or_exec($cmd, "$mert_outfile", $mert_logfile);
+ &submit_or_exec($cmd, "$mert_outfile", $mert_logfile, 1);
print "Finished promix optimisation at " . `date`;
} else { # just mert
- &submit_or_exec($cmd . $mert_settings, $mert_outfile, $mert_logfile);
+ &submit_or_exec($cmd . $mert_settings, $mert_outfile, $mert_logfile, ($__THREADS ? $__THREADS : 1) );
}
die "Optimization failed, file $weights_out_file does not exist or is empty"
@@ -1283,7 +1283,7 @@ sub run_decoder {
$lsamp_cmd = " -lattice-samples $lsamp_filename $___LATTICE_SAMPLES ";
}
- if (defined $___JOBS && $___JOBS > 0) {
+ if (defined $___JOBS && $___JOBS > 1) {
die "Hypergraph mira not supported by moses-parallel" if $___HG_MIRA;
$decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG";
$decoder_cmd .= " -inputtype $___INPUTTYPE" if defined($___INPUTTYPE);
@@ -1378,9 +1378,9 @@ sub get_featlist_from_moses {
print STDERR "Asking moses for feature names and values from $___CONFIG\n";
my $cmd = "$___DECODER $___DECODER_FLAGS -config $configfn";
$cmd .= " -inputtype $___INPUTTYPE" if defined($___INPUTTYPE);
- $cmd .= " -show-weights > $featlistfn";
+ $cmd .= " -show-weights";
print STDERR "Executing: $cmd\n";
- safesystem($cmd) or die "Failed to run moses with the config $configfn";
+ &submit_or_exec($cmd, $featlistfn, "/dev/null", 1);
}
return get_featlist_from_file($featlistfn);
}
@@ -1706,10 +1706,14 @@ sub ensure_full_path {
}
sub submit_or_exec {
- my ($cmd, $stdout, $stderr) = @_;
+ my ($cmd, $stdout, $stderr, $threads) = @_;
print STDERR "exec: $cmd\n";
- if (defined $___JOBS && $___JOBS > 0) {
- safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=$stdout -stderr=$stderr" )
+ if (defined $___JOBS && $___JOBS > 1) {
+ # request fewer CPU slots, if not needed
+ my $queue_flags_for_this_command = $queue_flags;
+ $threads = 1 unless defined($threads);
+ $queue_flags_for_this_command =~ s/(\-pe smp) \d+/$1 $threads/;
+ safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags_for_this_command\" -stdout=$stdout -stderr=$stderr" )
or die "ERROR: Failed to submit '$cmd' (via $qsubwrapper)";
} else {
safesystem("$cmd > $stdout 2> $stderr") or die "ERROR: Failed to run '$cmd'.";
diff --git a/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl b/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl
index 88d16b3f6..35714271c 100755
--- a/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl
+++ b/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl
@@ -3,11 +3,18 @@
use warnings;
use strict;
-my ($lowercase, $cluster_file,$in,$out,$tmp) = @ARGV;
+my ($lowercase,$cluster_file,$in,$out,$tmp) = @ARGV;
my $CLUSTER = &read_cluster_from_mkcls($cluster_file);
-open(IN,$in) || die("ERROR: could not open input");
+# is $lowercase a script?
+if ($lowercase =~ /\//) {
+ open(IN,"$lowercase < $in|") || die("ERROR: could not open input");
+ $lowercase = 0;
+}
+else {
+ open(IN,$in) || die("ERROR: could not open input");
+}
binmode(IN, ":utf8");
open(OUT,">$out");
binmode(OUT, ":utf8");
@@ -18,6 +25,7 @@ while(<IN>) {
s/ $//;
my $first = 1;
foreach my $word (split) {
+ # if lowercase is a flag
if ($lowercase) {
$word = lc($word);
}
diff --git a/scripts/training/wrappers/make-factor-de-lemma.perl b/scripts/training/wrappers/make-factor-de-lemma.perl
new file mode 100755
index 000000000..db978317e
--- /dev/null
+++ b/scripts/training/wrappers/make-factor-de-lemma.perl
@@ -0,0 +1,33 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Encode;
+use FindBin qw($RealBin);
+
+my ($in,$out,$tmpdir) = @ARGV;
+
+`mkdir -p $tmpdir`;
+`$RealBin/../../tokenizer/deescape-special-chars.perl < $in | /home/pkoehn/statmt/bin/unicode2latin1.perl > $tmpdir/tok.$$`;
+`/home/pkoehn/statmt/bin/run-lopar-tagger.perl $tmpdir/tok.$$ $tmpdir/lopar.$$`;
+
+open(LOPAR,"$tmpdir/lopar.$$");
+open(OUT,"|$RealBin/../../tokenizer/escape-special-chars.perl > $out");
+while(<LOPAR>) {
+ chomp;
+ s/ +/ /g;
+ s/^ //;
+ s/ $//;
+ my $first = 1;
+ foreach (split) {
+ die("ERROR: choked on token '$_'") unless /^(.+)_([^_]+)_(.+)$/;
+ my ($word,$pos,$lemma) = ($1,$2,$3);
+ print OUT " " unless $first;
+ $first = 0;
+ $lemma =~ s/\|.+$//;
+ $lemma = $word if $lemma =~ /^\<.+\>$/;
+ print OUT encode('utf8', decode('iso-8859-1', $lemma));
+ }
+ print OUT "\n";
+}
+close(LOPAR);
+close(OUT);
diff --git a/scripts/training/wrappers/make-factor-de-morph.perl b/scripts/training/wrappers/make-factor-de-morph.perl
index 1cc917bce..366a5a76d 100755
--- a/scripts/training/wrappers/make-factor-de-morph.perl
+++ b/scripts/training/wrappers/make-factor-de-morph.perl
@@ -1,31 +1,31 @@
-#!/usr/bin/env perl
-
-use warnings;
-use strict;
-use Encode;
-use FindBin qw($RealBin);
-my ($in,$out,$tmpdir) = @ARGV;
-
-`mkdir -p $tmpdir`;
-`$RealBin/../../tokenizer/deescape-special-chars.perl < $in | /home/pkoehn/statmt/bin/unicode2latin1.perl > $tmpdir/tok.$$`;
-`/home/pkoehn/statmt/bin/run-lopar-tagger.perl $tmpdir/tok.$$ $tmpdir/lopar.$$`;
-
-open(LOPAR,"$tmpdir/lopar.$$");
-open(OUT,"|$RealBin/../../tokenizer/escape-special-chars.perl >$out");
-while(<LOPAR>) {
- chomp;
- s/ +/ /g;
- s/^ //;
- s/ $//;
- my $first = 1;
- foreach (split) {
- die("ERROR: choked on token '$_'") unless /^(.+)_([^_]+)_(.+)$/;
- my ($word,$morph,$lemma) = ($1,$2,$3);
- print OUT " " unless $first;
- $first = 0;
- print OUT encode('utf8', decode('iso-8859-1', $morph));
- }
- print OUT "\n";
-}
-close(LOPAR);
-close(OUT);
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+use Encode;
+use FindBin qw($RealBin);
+my ($in,$out,$tmpdir) = @ARGV;
+
+`mkdir -p $tmpdir`;
+`$RealBin/../../tokenizer/deescape-special-chars.perl < $in | /home/pkoehn/statmt/bin/unicode2latin1.perl > $tmpdir/tok.$$`;
+`/home/pkoehn/statmt/bin/run-lopar-tagger.perl $tmpdir/tok.$$ $tmpdir/lopar.$$`;
+
+open(LOPAR,"$tmpdir/lopar.$$");
+open(OUT,"|$RealBin/../../tokenizer/escape-special-chars.perl >$out");
+while(<LOPAR>) {
+ chomp;
+ s/ +/ /g;
+ s/^ //;
+ s/ $//;
+ my $first = 1;
+ foreach (split) {
+ die("ERROR: choked on token '$_'") unless /^(.+)_([^_]+)_(.+)$/;
+ my ($word,$morph,$lemma) = ($1,$2,$3);
+ print OUT " " unless $first;
+ $first = 0;
+ print OUT encode('utf8', decode('iso-8859-1', $morph));
+ }
+ print OUT "\n";
+}
+close(LOPAR);
+close(OUT);
diff --git a/scripts/training/wrappers/make-factor-en-porter.perl b/scripts/training/wrappers/make-factor-en-porter.perl
new file mode 100755
index 000000000..749dc1318
--- /dev/null
+++ b/scripts/training/wrappers/make-factor-en-porter.perl
@@ -0,0 +1,10 @@
+#!/usr/bin/perl -w
+
+use strict;
+use FindBin qw($RealBin);
+
+my ($in,$out,$tmpdir) = @ARGV;
+
+my $porter_in = "$tmpdir/porter-in.$$";
+`$RealBin/../../tokenizer/deescape-special-chars.perl < $in > $porter_in`;
+`/home/pkoehn/statmt/bin/porter-stemmer $porter_in | $RealBin/../../tokenizer/escape-special-chars.perl > $out`;
diff --git a/symal/symal.cpp b/symal/symal.cpp
index 7f3e22866..927676393 100644
--- a/symal/symal.cpp
+++ b/symal/symal.cpp
@@ -427,7 +427,7 @@ int main(int argc, char** argv)
ostream *out = &std::cout;
if (input) {
- fstream *fin = new fstream(input,ios::in);
+ fstream *fin = new fstream(input,ios::in);
if (!fin->is_open()) {
cerr << "cannot open " << input << "\n";
exit(1);
@@ -436,7 +436,7 @@ int main(int argc, char** argv)
}
if (output) {
- fstream *fout = new fstream(output,ios::out);
+ fstream *fout = new fstream(output,ios::out);
if (!fout->is_open()) {
cerr << "cannot open " << output << "\n";
exit(1);
@@ -506,12 +506,12 @@ int main(int argc, char** argv)
for (int i=1; i<=MAX_N; i++) delete [] A[i];
delete [] A;
- if (inp != &std::cin) {
- delete inp;
- }
- if (out != &std::cout) {
- delete inp;
- }
+ if (inp != &std::cin) {
+ delete inp;
+ }
+ if (out != &std::cout) {
+ delete inp;
+ }
exit(0);
}