Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBarry Haddow <barry.haddow@gmail.com>2011-10-12 20:14:23 +0400
committerBarry Haddow <barry.haddow@gmail.com>2011-10-12 20:14:23 +0400
commitc83166087e4d7093c7be605f30f92e183d0845eb (patch)
tree22f927112078fc818a1d9f66a230d716e962506d /moses-cmd
parentff15f28d9cda0c4c413fa6862f81eadbfe1ae900 (diff)
parent210f87bebddc2ca3a4cae66f365cd78126a3dff0 (diff)
Merge branch 'master' into miramerge
Conflicts: moses/src/LanguageModel.cpp moses/src/TargetPhrase.h moses/src/TrellisPath.h moses/src/Util.h scripts/training/train-model.perl
Diffstat (limited to 'moses-cmd')
-rw-r--r--moses-cmd/moses-cmd.xcodeproj/project.pbxproj3
-rw-r--r--moses-cmd/src/IOWrapper.cpp17
-rw-r--r--moses-cmd/src/LatticeMBR.cpp1
-rw-r--r--moses-cmd/src/LatticeMBR.h12
-rw-r--r--moses-cmd/src/Main.cpp73
5 files changed, 62 insertions, 44 deletions
diff --git a/moses-cmd/moses-cmd.xcodeproj/project.pbxproj b/moses-cmd/moses-cmd.xcodeproj/project.pbxproj
index 263979a9e..e1931906c 100644
--- a/moses-cmd/moses-cmd.xcodeproj/project.pbxproj
+++ b/moses-cmd/moses-cmd.xcodeproj/project.pbxproj
@@ -410,6 +410,7 @@
isa = XCBuildConfiguration;
buildSettings = {
ARCHS = "$(ONLY_ACTIVE_ARCH_PRE_XCODE_3_1)";
+ HEADER_SEARCH_PATHS = ../kenlm;
ONLY_ACTIVE_ARCH_PRE_XCODE_3_1 = "$(NATIVE_ARCH_ACTUAL)";
SDKROOT = "$(DEVELOPER_SDK_DIR)/MacOSX10.6.sdk";
VALID_ARCHS = "i386 ppc ppc64 ppc7400 ppc970 x86_64";
@@ -420,6 +421,7 @@
isa = XCBuildConfiguration;
buildSettings = {
ARCHS = "$(ONLY_ACTIVE_ARCH_PRE_XCODE_3_1)";
+ HEADER_SEARCH_PATHS = ../kenlm;
ONLY_ACTIVE_ARCH_PRE_XCODE_3_1 = "$(NATIVE_ARCH_ACTUAL)";
SDKROOT = "$(DEVELOPER_SDK_DIR)/MacOSX10.6.sdk";
VALID_ARCHS = "i386 ppc ppc64 ppc7400 ppc970 x86_64";
@@ -430,6 +432,7 @@
isa = XCBuildConfiguration;
buildSettings = {
ARCHS = "$(ONLY_ACTIVE_ARCH_PRE_XCODE_3_1)";
+ HEADER_SEARCH_PATHS = ../kenlm;
ONLY_ACTIVE_ARCH_PRE_XCODE_3_1 = "$(NATIVE_ARCH_ACTUAL)";
SDKROOT = "$(DEVELOPER_SDK_DIR)/MacOSX10.6.sdk";
VALID_ARCHS = "i386 ppc ppc64 ppc7400 ppc970 x86_64";
diff --git a/moses-cmd/src/IOWrapper.cpp b/moses-cmd/src/IOWrapper.cpp
index 30e98558e..12c6d1936 100644
--- a/moses-cmd/src/IOWrapper.cpp
+++ b/moses-cmd/src/IOWrapper.cpp
@@ -312,8 +312,8 @@ void OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*translationId*
for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
const Factor *factor = mbrBestHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
- if (i>0) out << " ";
- out << *factor;
+ if (i>0) out << " " << *factor;
+ else out << *factor;
}
out << endl;
}
@@ -529,22 +529,21 @@ void OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>&
{
for (vector<LatticeMBRSolution>::const_iterator si = solutions.begin(); si != solutions.end(); ++si) {
out << translationId;
- out << " ||| ";
+ out << " |||";
const vector<Word> mbrHypo = si->GetWords();
for (size_t i = 0 ; i < mbrHypo.size() ; i++) {
const Factor *factor = mbrHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
- if (i>0) out << " ";
- out << *factor;
+ if (i>0) out << " " << *factor;
+ else out << *factor;
}
- out << " ||| ";
- out << "map: " << si->GetMapScore();
+ out << " |||";
+ out << " map: " << si->GetMapScore();
out << " w: " << mbrHypo.size();
const vector<float>& ngramScores = si->GetNgramScores();
for (size_t i = 0; i < ngramScores.size(); ++i) {
out << " " << ngramScores[i];
}
- out << " ||| ";
- out << si->GetScore();
+ out << " ||| " << si->GetScore();
out << endl;
}
diff --git a/moses-cmd/src/LatticeMBR.cpp b/moses-cmd/src/LatticeMBR.cpp
index 24b9fd6d6..ef7a5f71a 100644
--- a/moses-cmd/src/LatticeMBR.cpp
+++ b/moses-cmd/src/LatticeMBR.cpp
@@ -237,6 +237,7 @@ void pruneLatticeFB(Lattice & connectedHyp, map < const Hypothesis*, set <const
const ArcList *arcList = succHyp->GetArcList();
if (arcList != NULL) {
ArcList::const_iterator iterArcList;
+ //QUESTION: What happens if there's more than one loserPrevHypo?
for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
const Hypothesis *loserHypo = *iterArcList;
const Hypothesis* loserPrevHypo = loserHypo->GetPrevHypo();
diff --git a/moses-cmd/src/LatticeMBR.h b/moses-cmd/src/LatticeMBR.h
index 5a5c15dff..8b54e6c51 100644
--- a/moses-cmd/src/LatticeMBR.h
+++ b/moses-cmd/src/LatticeMBR.h
@@ -19,17 +19,7 @@
using namespace Moses;
-template<class T>
-T log_sum (T log_a, T log_b)
-{
- T v;
- if (log_a < log_b) {
- v = log_b+log ( 1 + exp ( log_a-log_b ));
- } else {
- v = log_a+log ( 1 + exp ( log_b-log_a ));
- }
- return ( v );
-}
+
class Edge;
diff --git a/moses-cmd/src/Main.cpp b/moses-cmd/src/Main.cpp
index e8949ddb9..846f27b7f 100644
--- a/moses-cmd/src/Main.cpp
+++ b/moses-cmd/src/Main.cpp
@@ -72,11 +72,13 @@ public:
TranslationTask(size_t lineNumber,
InputType* source, OutputCollector* outputCollector, OutputCollector* nbestCollector,
+ OutputCollector* latticeSamplesCollector,
OutputCollector* wordGraphCollector, OutputCollector* searchGraphCollector,
OutputCollector* detailedTranslationCollector,
OutputCollector* alignmentInfoCollector ) :
m_source(source), m_lineNumber(lineNumber),
m_outputCollector(outputCollector), m_nbestCollector(nbestCollector),
+ m_latticeSamplesCollector(latticeSamplesCollector),
m_wordGraphCollector(wordGraphCollector), m_searchGraphCollector(searchGraphCollector),
m_detailedTranslationCollector(detailedTranslationCollector),
m_alignmentInfoCollector(alignmentInfoCollector) {}
@@ -240,6 +242,15 @@ public:
m_nbestCollector->Write(m_lineNumber, out.str());
}
+ //lattice samples
+ if (m_latticeSamplesCollector) {
+ TrellisPathList latticeSamples;
+ ostringstream out;
+ manager.CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
+ OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), manager.GetTranslationSystem(), m_lineNumber);
+ m_latticeSamplesCollector->Write(m_lineNumber, out.str());
+ }
+
// detailed translation reporting
if (m_detailedTranslationCollector) {
ostringstream out;
@@ -264,6 +275,7 @@ private:
size_t m_lineNumber;
OutputCollector* m_outputCollector;
OutputCollector* m_nbestCollector;
+ OutputCollector* m_latticeSamplesCollector;
OutputCollector* m_wordGraphCollector;
OutputCollector* m_searchGraphCollector;
OutputCollector* m_detailedTranslationCollector;
@@ -342,25 +354,6 @@ int main(int argc, char** argv)
}
- // create threadpool, if using multi-threaded decoding
- // note: multi-threading is done on sentence-level,
- // each thread translates one sentence
- int threadcount = (params->GetParam("threads").size() > 0) ?
- Scan<size_t>(params->GetParam("threads")[0]) : 1;
-
-#ifdef WITH_THREADS
- if (threadcount < 1) {
- cerr << "Error: Need to specify a positive number of threads" << endl;
- exit(1);
- }
- ThreadPool pool(threadcount);
-#else
- if (threadcount > 1) {
- cerr << "Error: Thread count of " << threadcount << " but moses not built with thread support" << endl;
- exit(1);
- }
-#endif
-
// initialize all "global" variables, which are stored in StaticData
// note: this also loads models such as the language model, etc.
if (!StaticData::LoadDataStatic(params)) {
@@ -376,6 +369,10 @@ int main(int argc, char** argv)
// shorthand for accessing information in StaticData
const StaticData& staticData = StaticData::Instance();
+
+ //initialise random numbers
+ srand(time(NULL));
+
// set up read/writing class
IOWrapper* ioWrapper = GetIODevice(staticData);
if (!ioWrapper) {
@@ -396,21 +393,43 @@ int main(int argc, char** argv)
// because multithreading may return sentences in shuffled order
auto_ptr<OutputCollector> outputCollector; // for translations
auto_ptr<OutputCollector> nbestCollector; // for n-best lists
+ auto_ptr<OutputCollector> latticeSamplesCollector; //for lattice samples
auto_ptr<ofstream> nbestOut;
+ auto_ptr<ofstream> latticeSamplesOut;
size_t nbestSize = staticData.GetNBestSize();
string nbestFile = staticData.GetNBestFilePath();
+ bool output1best = true;
if (nbestSize) {
if (nbestFile == "-" || nbestFile == "/dev/stdout") {
// nbest to stdout, no 1-best
nbestCollector.reset(new OutputCollector());
+ output1best = false;
} else {
// nbest to file, 1-best to stdout
nbestOut.reset(new ofstream(nbestFile.c_str()));
- assert(nbestOut->good());
+ if (!nbestOut->good()) {
+ TRACE_ERR("ERROR: Failed to open " << nbestFile << " for nbest lists" << endl);
+ exit(1);
+ }
nbestCollector.reset(new OutputCollector(nbestOut.get()));
- outputCollector.reset(new OutputCollector());
}
- } else {
+ }
+ size_t latticeSamplesSize = staticData.GetLatticeSamplesSize();
+ string latticeSamplesFile = staticData.GetLatticeSamplesFilePath();
+ if (latticeSamplesSize) {
+ if (latticeSamplesFile == "-" || latticeSamplesFile == "/dev/stdout") {
+ latticeSamplesCollector.reset(new OutputCollector());
+ output1best = false;
+ } else {
+ latticeSamplesOut.reset(new ofstream(latticeSamplesFile.c_str()));
+ if (!latticeSamplesOut->good()) {
+ TRACE_ERR("ERROR: Failed to open " << latticeSamplesFile << " for lattice samples" << endl);
+ exit(1);
+ }
+ latticeSamplesCollector.reset(new OutputCollector(latticeSamplesOut.get()));
+ }
+ }
+ if (output1best) {
outputCollector.reset(new OutputCollector());
}
@@ -439,6 +458,10 @@ int main(int argc, char** argv)
alignmentInfoCollector.reset(new OutputCollector(ioWrapper->GetAlignmentOutputStream()));
}
+#ifdef WITH_THREADS
+ ThreadPool pool(staticData.ThreadCount());
+#endif
+
// main loop over set of input sentences
InputType* source = NULL;
size_t lineCount = 0;
@@ -449,13 +472,15 @@ int main(int argc, char** argv)
// set up task of translating one sentence
TranslationTask* task =
new TranslationTask(lineCount,source, outputCollector.get(),
- nbestCollector.get(), wordGraphCollector.get(),
+ nbestCollector.get(),
+ latticeSamplesCollector.get(),
+ wordGraphCollector.get(),
searchGraphCollector.get(),
detailedTranslationCollector.get(),
alignmentInfoCollector.get() );
// execute task
#ifdef WITH_THREADS
- pool.Submit(task);
+ pool.Submit(task);
#else
task->Run();
#endif