added some comments

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3911 1f5c12ca-751b-0410-a591-d2e778427230
author: phkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230> 2011-03-02 22:02:07 +0300
committer: phkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230> 2011-03-02 22:02:07 +0300
commit: 4ee7e5f6736ae8a8c90475ed1736d43bd17d52f5 (patch)
tree: ff157e602082099184553150c47dd1ca27dc8700 /moses-cmd
parent: 106c4e0fc24618785cccbd846e88a9178e372cc0 (diff)
1 files changed, 81 insertions, 29 deletions
diff --git a/moses-cmd/src/Main.cpp b/moses-cmd/src/Main.cpp
index 8c6c4d637..7a0ab7c7b 100644
--- a/moses-cmd/src/Main.cpp
+++ b/moses-cmd/src/Main.cpp
@@ -50,6 +50,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 using namespace std;
 using namespace Moses;
 
+// output floats with three significant digits
 static const size_t PRECISION = 3;
 
 /** Enforce rounding */
@@ -59,8 +60,10 @@ void fix(std::ostream& stream, size_t size)
   stream.precision(size);
 }
 
-/**
-  * Translates a sentence.
+/** Translates a sentence.
+  * - calls the search (Manager)
+  * - applies the decision rule
+  * - outputs best translation and additional reporting
   **/
 class TranslationTask : public Task
 {
@@ -78,17 +81,29 @@ public:
     m_detailedTranslationCollector(detailedTranslationCollector),
     m_alignmentInfoCollector(alignmentInfoCollector) {}
 
+	/** Translate one sentence
+   * gets called by main function implemented at end of this source file */
   void Run() {
+
+    // report thread number
 #ifdef BOOST_HAS_PTHREADS
     TRACE_ERR("Translating line " << m_lineNumber << "  in thread id " << pthread_self() << std::endl);
 #endif
+
+    // shorthand for "global data"
     const StaticData &staticData = StaticData::Instance();
+    // input sentence
     Sentence sentence(Input);
+    // set translation system
     const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
+
+    // execute the translation
+    // note: this executes the search, resulting in a search graph
+    //       we still need to apply the decision rule (MAP, MBR, ...)
     Manager manager(*m_source,staticData.GetSearchAlgorithm(), &system);
     manager.ProcessSentence();
 
-    //Word Graph
+    // output word graph
     if (m_wordGraphCollector) {
       ostringstream out;
       fix(out,PRECISION);
@@ -96,7 +111,7 @@ public:
       m_wordGraphCollector->Write(m_lineNumber, out.str());
     }
 
-    //Search Graph
+    // output search graph
     if (m_searchGraphCollector) {
       ostringstream out;
       fix(out,PRECISION);
@@ -113,23 +128,23 @@ public:
         manager.SerializeSearchGraphPB(m_lineNumber, output);
       }
 #endif
-    }
-
-
+    }		
 
+    // apply decision rule and output best translation(s)
     if (m_outputCollector) {
       ostringstream out;
       ostringstream debug;
       fix(debug,PRECISION);
 
-      //All derivations - send them to debug stream
+      // all derivations - send them to debug stream
       if (staticData.PrintAllDerivations()) {
         manager.PrintAllDerivations(m_lineNumber, debug);
       }
 
-      //Best hypothesis
+      // MAP decoding: best hypothesis
       const Hypothesis* bestHypo = NULL;
-      if (!staticData.UseMBR()) {
+      if (!staticData.UseMBR()) 
+			{
         bestHypo = manager.GetBestHypothesis();
         if (bestHypo) {
           if (staticData.IsPathRecoveryEnabled()) {
@@ -148,7 +163,12 @@ public:
           }
         }
         out << endl;
-      } else {
+			}
+
+      // MBR decoding (n-best MBR, lattice MBR, consensus)
+      else 
+			{
+        // we first need the n-best translations
         size_t nBestSize = staticData.GetMBRSize();
         if (nBestSize <= 0) {
           cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
@@ -161,6 +181,7 @@ public:
           PrintUserTime("calculated n-best list for (L)MBR decoding");
         }
 
+        // lattice MBR
         if (staticData.UseLatticeMBR()) {
           if (m_nbestCollector) {
             //lattice mbr nbest
@@ -179,7 +200,10 @@ public:
               PrintUserTime("finished Lattice MBR decoding");
             }
           }
-        } else if (staticData.UseConsensusDecoding()) {
+        }
+
+        // consensus decoding
+				else if (staticData.UseConsensusDecoding()) {
           const TrellisPath &conBestHypo = doConsensusDecoding(manager,nBestList);
           OutputBestHypo(conBestHypo, m_lineNumber,
                          staticData.GetReportSegmentation(),
@@ -188,8 +212,10 @@ public:
           IFVERBOSE(2) {
             PrintUserTime("finished Consensus decoding");
           }
-        } else {
-          //MBR decoding
+				}
+				
+        // n-best MBR decoding
+        else {
           const Moses::TrellisPath &mbrBestHypo = doMBR(nBestList);
           OutputBestHypo(mbrBestHypo, m_lineNumber,
                          staticData.GetReportSegmentation(),
@@ -198,11 +224,14 @@ public:
           IFVERBOSE(2) {
             PrintUserTime("finished MBR decoding");
           }
-
         }
       }
+
+      // report best translation to output collector
       m_outputCollector->Write(m_lineNumber,out.str(),debug.str());
     }
+
+    // output n-best list
     if (m_nbestCollector && !staticData.UseLatticeMBR()) {
       TrellisPathList nBestList;
       ostringstream out;
@@ -211,7 +240,7 @@ public:
       m_nbestCollector->Write(m_lineNumber, out.str());
     }
 
-    //detailed translation reporting
+    // detailed translation reporting
     if (m_detailedTranslationCollector) {
       ostringstream out;
       fix(out,PRECISION);
@@ -219,10 +248,10 @@ public:
       m_detailedTranslationCollector->Write(m_lineNumber,out.str());
     }
 
+    // report additional statistics
     IFVERBOSE(2) {
       PrintUserTime("Sentence Decoding Time:");
     }
-
     manager.CalcDecoderStatistics();
   }
 
@@ -279,29 +308,36 @@ static void ShowWeights()
   }
 }
 
+/** main function of the command line version of the decoder **/
 int main(int argc, char** argv)
 {
 
 #ifdef HAVE_PROTOBUF
   GOOGLE_PROTOBUF_VERIFY_VERSION;
 #endif
+
+  // echo command line, if verbose
   IFVERBOSE(1) {
     TRACE_ERR("command: ");
     for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
     TRACE_ERR(endl);
   }
 
+  // set number of significant decimals in output
   fix(cout,PRECISION);
   fix(cerr,PRECISION);
 
-
+  // load all the settings into the Parameter class
+  // (stores them as strings, or array of strings)
   Parameter* params = new Parameter();
   if (!params->LoadParam(argc,argv)) {
     params->Explain();
     exit(1);
   }
 
-  //create threadpool, if necessary
+  // create threadpool, if using multi-threaded decoding
+  // note: multi-threading is done on sentence-level,
+  // each thread translates one sentence
   int threadcount = (params->GetParam("threads").size() > 0) ?
                     Scan<size_t>(params->GetParam("threads")[0]) : 1;
 
@@ -318,20 +354,23 @@ int main(int argc, char** argv)
   }
 #endif
 
-
+  // initialize all "global" variables, which are stored in StaticData
+  // note: this also loads models such as the language model, etc.
   if (!StaticData::LoadDataStatic(params)) {
     exit(1);
   }
 
+  // setting "-show-weights" -> just dump out weights and exit
   if (params->isParamSpecified("show-weights")) {
     ShowWeights();
     exit(0);
   }
 
+  // shorthand for accessing information in StaticData
   const StaticData& staticData = StaticData::Instance();
+
   // set up read/writing class
   IOWrapper* ioWrapper = GetIODevice(staticData);
-
   if (!ioWrapper) {
     cerr << "Error; Failed to create IO object" << endl;
     exit(1);
@@ -353,20 +392,20 @@ int main(int argc, char** argv)
     exit(1);
   }
 
-
-  InputType* source = NULL;
-  size_t lineCount = 0;
-  auto_ptr<OutputCollector> outputCollector;//for translations
-  auto_ptr<OutputCollector> nbestCollector;
+  // initialize output streams
+  // note: we can't just write to STDOUT or files
+  // because multithreading may return sentences in shuffled order
+  auto_ptr<OutputCollector> outputCollector; // for translations
+  auto_ptr<OutputCollector> nbestCollector;  // for n-best lists
   auto_ptr<ofstream> nbestOut;
   size_t nbestSize = staticData.GetNBestSize();
   string nbestFile = staticData.GetNBestFilePath();
   if (nbestSize) {
     if (nbestFile == "-" || nbestFile == "/dev/stdout") {
-      //nbest to stdout, no 1-best
+      // nbest to stdout, no 1-best
       nbestCollector.reset(new OutputCollector());
     } else {
-      //nbest to file, 1-best to stdout
+      // nbest to file, 1-best to stdout
       nbestOut.reset(new ofstream(nbestFile.c_str()));
       assert(nbestOut->good());
       nbestCollector.reset(new OutputCollector(nbestOut.get()));
@@ -376,44 +415,57 @@ int main(int argc, char** argv)
     outputCollector.reset(new OutputCollector());
   }
 
+  // initialize stream for word graph (aka: output lattice)
   auto_ptr<OutputCollector> wordGraphCollector;
   if (staticData.GetOutputWordGraph()) {
     wordGraphCollector.reset(new OutputCollector(&(ioWrapper->GetOutputWordGraphStream())));
   }
 
+  // initialize stream for search graph
+  // note: this is essentially the same as above, but in a different format
   auto_ptr<OutputCollector> searchGraphCollector;
   if (staticData.GetOutputSearchGraph()) {
     searchGraphCollector.reset(new OutputCollector(&(ioWrapper->GetOutputSearchGraphStream())));
   }
 
+  // initialize stram for details about the decoder run
   auto_ptr<OutputCollector> detailedTranslationCollector;
   if (staticData.IsDetailedTranslationReportingEnabled()) {
     detailedTranslationCollector.reset(new OutputCollector(&(ioWrapper->GetDetailedTranslationReportingStream())));
   }
+
+  // initialize stram for word alignment between input and output
   auto_ptr<OutputCollector> alignmentInfoCollector;
   if (!staticData.GetAlignmentOutputFile().empty()) {
     alignmentInfoCollector.reset(new OutputCollector(ioWrapper->GetAlignmentOutputStream()));
   }
 
+  // main loop over set of input sentences
+  InputType* source = NULL;
+  size_t lineCount = 0;
   while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
     IFVERBOSE(1) {
       ResetUserTime();
     }
+    // set up task of translating one sentence
     TranslationTask* task =
       new TranslationTask(lineCount,source, outputCollector.get(),
                           nbestCollector.get(), wordGraphCollector.get(),
                           searchGraphCollector.get(),
                           detailedTranslationCollector.get(),
                           alignmentInfoCollector.get() );
+    // execute task
 #ifdef WITH_THREADS
     pool.Submit(task);
-
 #else
     task->Run();
 #endif
+
     source = NULL; //make sure it doesn't get deleted
     ++lineCount;
   }
+
+  // we are done, finishing up
 #ifdef WITH_THREADS
   pool.Stop(true); //flush remaining jobs
 #endif
author	phkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230>	2011-03-02 22:02:07 +0300
committer	phkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230>	2011-03-02 22:02:07 +0300
commit	4ee7e5f6736ae8a8c90475ed1736d43bd17d52f5 (patch)
tree	ff157e602082099184553150c47dd1ca27dc8700 /moses-cmd
parent	106c4e0fc24618785cccbd846e88a9178e372cc0 (diff)