Moses::Scan too inefficient

author: Matthias Huck <mhuck@inf.ed.ac.uk> 2015-07-24 22:43:29 +0300
committer: Matthias Huck <mhuck@inf.ed.ac.uk> 2015-07-24 22:43:29 +0300
commit: 472529ade857a69e01f81cac6675fa7eeb9c2ba9 (patch)
tree: 1ab9562f09a1a71b42ef2514b01c2f02591419f4 /phrase-extract
parent: 9e31bced9afa395bc470de2c9958044e357574ad (diff)
2 files changed, 16 insertions, 14 deletions
diff --git a/phrase-extract/consolidate-main.cpp b/phrase-extract/consolidate-main.cpp
index 732185eb3..c9496f988 100644
--- a/phrase-extract/consolidate-main.cpp
+++ b/phrase-extract/consolidate-main.cpp
@@ -17,6 +17,7 @@
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  ***********************************************************************/
 
+#include <cstdlib>
 #include <vector>
 #include <string>
 
@@ -123,7 +124,7 @@ int main(int argc, char* argv[])
       std::cerr << "include "<< (sparseCountBinFeatureFlag ? "sparse " : "") << "count bin feature:";
       int prev = 0;
       while(i+1<argc && argv[i+1][0]>='0' && argv[i+1][0]<='9') {
-        int binCount = Moses::Scan<int>(argv[++i]);
+        int binCount = std::atoi( argv[++i] );
         countBin.push_back( binCount );
         if (prev+1 == binCount) {
           std::cerr << " " << binCount;
@@ -164,8 +165,8 @@ int main(int argc, char* argv[])
         }
         pos = single_setting.find(":");
         UTIL_THROW_IF2(pos == std::string::npos, "faulty MinScore setting '" << single_setting << "' in '" << argv[i] << "'");
-        unsigned int field = Moses::Scan<unsigned int>( single_setting.substr(0,pos) );
-        float threshold = Moses::Scan<float>( single_setting.substr(pos+1) );
+        unsigned int field = std::atoll( single_setting.substr(0,pos).c_str() );
+        float threshold = std::atof( single_setting.substr(pos+1).c_str() );
         if (field == 0) {
           minScore0 = threshold;
           std::cerr << "setting minScore0 to " << threshold << std::endl;
@@ -195,9 +196,9 @@ void loadCountOfCounts( const std::string& fileNameCountOfCounts )
   std::string line;
   while (getline(fileCountOfCounts, line)) {
     if (totalCount < 0)
-      totalCount = Moses::Scan<float>(line); // total number of distinct phrase pairs
+      totalCount = std::atof( line.c_str() ); // total number of distinct phrase pairs
     else
-      countOfCounts.push_back( Moses::Scan<float>(line) );
+      countOfCounts.push_back( std::atof( line.c_str() ) );
   }
   fileCountOfCounts.Close();
 
@@ -286,13 +287,13 @@ void processFiles( const std::string& fileNameDirect,
     Moses::Tokenize( directCounts, itemDirect[4] );
     std::vector<std::string> indirectCounts;
     Moses::Tokenize( indirectCounts, itemIndirect[4] );
-    float countF = Moses::Scan<float>(directCounts[0]);
-    float countE = Moses::Scan<float>(indirectCounts[0]);
-    float countEF = Moses::Scan<float>(indirectCounts[1]);
+    float countF  = std::atof( directCounts[0].c_str() );
+    float countE  = std::atof( indirectCounts[0].c_str() );
+    float countEF = std::atof( indirectCounts[1].c_str() );
     float n1_F, n1_E;
     if (kneserNeyFlag) {
-      n1_F = Moses::Scan<float>(directCounts[2]);
-      n1_E = Moses::Scan<float>(indirectCounts[2]);
+      n1_F = std::atof( directCounts[2].c_str() );
+      n1_E = std::atof( indirectCounts[2].c_str() );
     }
 
     // Good Turing discounting
diff --git a/phrase-extract/score-main.cpp b/phrase-extract/score-main.cpp
index a09bd0543..09cec8fbe 100644
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@@ -19,6 +19,7 @@
 
 #include <sstream>
 #include <assert.h>
+#include <cstdlib>
 #include <cstring>
 #include <list>
 #include <map>
@@ -252,11 +253,11 @@ int main(int argc, char* argv[])
       negLogProb = -1;
       std::cerr << "using negative log-probabilities" << std::endl;
     } else if (strcmp(argv[i],"--MinCount") == 0) {
-      minCount = Moses::Scan<float>( argv[++i] );
+      minCount = std::atof( argv[++i] );
       std::cerr << "dropping all phrase pairs occurring less than " << minCount << " times" << std::endl;
       minCount -= 0.00001; // account for rounding
     } else if (strcmp(argv[i],"--MinCountHierarchical") == 0) {
-      minCountHierarchical = Moses::Scan<float>( argv[++i] );
+      minCountHierarchical = std::atof( argv[++i] );
       std::cerr << "dropping all hierarchical phrase pairs occurring less than " << minCountHierarchical << " times" << std::endl;
       minCountHierarchical -= 0.00001; // account for rounding
     } else if (strcmp(argv[i],"--CrossedNonTerm") == 0) {
@@ -570,7 +571,7 @@ void processLine( std::string line,
     } else if (item + (includeSentenceIdFlag?-1:0) == 4) { // count
       sscanf(token[j].c_str(), "%f", &count);
     } else if (item + (includeSentenceIdFlag?-1:0) == 5) { // target syntax PCFG score
-      float pcfgScore = Moses::Scan<float>( token[j] );
+      float pcfgScore = std::atof( token[j].c_str() );
       pcfgSum = pcfgScore * count;
     }
   }
@@ -1196,7 +1197,7 @@ void LexicalTable::load( const std::string &fileName )
       continue;
     }
 
-    double prob = Moses::Scan<double>( token[2] );
+    double prob = std::atof( token[2].c_str() );
     WORD_ID wordT = vcbT.storeIfNew( token[0] );
     WORD_ID wordS = vcbS.storeIfNew( token[1] );
     ltable[ wordS ][ wordT ] = prob;
author	Matthias Huck <mhuck@inf.ed.ac.uk>	2015-07-24 22:43:29 +0300
committer	Matthias Huck <mhuck@inf.ed.ac.uk>	2015-07-24 22:43:29 +0300
commit	472529ade857a69e01f81cac6675fa7eeb9c2ba9 (patch)
tree	1ab9562f09a1a71b42ef2514b01c2f02591419f4 /phrase-extract
parent	9e31bced9afa395bc470de2c9958044e357574ad (diff)