diff options
author | Hieu Hoang <fishandfrolick@gmail.com> | 2012-06-30 18:43:47 +0400 |
---|---|---|
committer | Hieu Hoang <fishandfrolick@gmail.com> | 2012-06-30 18:43:47 +0400 |
commit | ef9db932aa4ce4e57ead87e965a374f580a7cac2 (patch) | |
tree | dd6a06157328b410ff08dafaec5fb98a1acd122f /phrase-extract | |
parent | b1ce27d01f2629749deb9cb48f218246be6eca28 (diff) |
add namespace to phrase-extract
Diffstat (limited to 'phrase-extract')
28 files changed, 183 insertions, 35 deletions
diff --git a/phrase-extract/AlignmentPhrase.cpp b/phrase-extract/AlignmentPhrase.cpp index 2d4439567..d51aadd01 100644 --- a/phrase-extract/AlignmentPhrase.cpp +++ b/phrase-extract/AlignmentPhrase.cpp @@ -24,6 +24,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA using namespace std; +namespace MosesTraining +{ + void AlignmentElement::Merge(size_t align) { m_elements.insert(align); @@ -40,3 +43,7 @@ void AlignmentPhrase::Merge(const std::vector< std::vector<size_t> > &source) } } } + +} // namespace + + diff --git a/phrase-extract/AlignmentPhrase.h b/phrase-extract/AlignmentPhrase.h index f77b44f36..ec6431f18 100644 --- a/phrase-extract/AlignmentPhrase.h +++ b/phrase-extract/AlignmentPhrase.h @@ -23,9 +23,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include <vector> #include <set> +namespace MosesTraining +{ + class WordsRange; - class AlignmentElement { protected: @@ -68,5 +70,5 @@ public: } }; - +} // namespace diff --git a/phrase-extract/ExtractedRule.cpp b/phrase-extract/ExtractedRule.cpp index c566e842a..985f2f093 100644 --- a/phrase-extract/ExtractedRule.cpp +++ b/phrase-extract/ExtractedRule.cpp @@ -10,6 +10,9 @@ using namespace std; +namespace MosesTraining +{ + void ExtractedRule::OutputNTLengths(std::ostream &out) const { ostringstream outString; @@ -39,3 +42,4 @@ std::ostream& operator<<(std::ostream &out, const ExtractedRule &obj) return out; } +} // namespace diff --git a/phrase-extract/ExtractedRule.h b/phrase-extract/ExtractedRule.h index be6e30836..992a807b3 100644 --- a/phrase-extract/ExtractedRule.h +++ b/phrase-extract/ExtractedRule.h @@ -26,6 +26,9 @@ #include <sstream> #include <map> +namespace MosesTraining +{ + // sentence-level collection of rules class ExtractedRule { @@ -71,4 +74,6 @@ public: void OutputNTLengths(std::ostringstream &out) const; }; +} + #endif diff --git a/phrase-extract/Hole.h b/phrase-extract/Hole.h index 31e928f6e..c570ec7a1 100644 --- a/phrase-extract/Hole.h +++ b/phrase-extract/Hole.h @@ -26,6 +26,9 @@ #include <string> #include <vector> +namespace MosesTraining +{ + class Hole { protected: @@ -108,4 +111,6 @@ public: } }; +} + #endif diff --git a/phrase-extract/HoleCollection.cpp b/phrase-extract/HoleCollection.cpp index 4cffab7fd..fd79d74b1 100644 --- a/phrase-extract/HoleCollection.cpp +++ b/phrase-extract/HoleCollection.cpp @@ -21,6 +21,9 @@ #include <algorithm> +namespace MosesTraining +{ + void HoleCollection::SortSourceHoles() { assert(m_sortedSourceHoles.size() == 0); @@ -60,3 +63,5 @@ int HoleCollection::Scope(const Hole &proposedHole) const } return scope; } + +} diff --git a/phrase-extract/HoleCollection.h b/phrase-extract/HoleCollection.h index 355e825fb..2894101bd 100644 --- a/phrase-extract/HoleCollection.h +++ b/phrase-extract/HoleCollection.h @@ -26,6 +26,9 @@ #include "Hole.h" +namespace MosesTraining +{ + class HoleCollection { protected: @@ -94,4 +97,6 @@ public: }; +} + #endif diff --git a/phrase-extract/PhraseAlignment.cpp b/phrase-extract/PhraseAlignment.cpp index ceb74f04c..e432294b9 100644 --- a/phrase-extract/PhraseAlignment.cpp +++ b/phrase-extract/PhraseAlignment.cpp @@ -17,6 +17,9 @@ using namespace std; +namespace MosesTraining +{ + extern Vocabulary vcbT; extern Vocabulary vcbS; @@ -236,4 +239,5 @@ int PhraseAlignment::Compare(const PhraseAlignment &other) const } +} diff --git a/phrase-extract/PhraseAlignment.h b/phrase-extract/PhraseAlignment.h index 8bd83503d..9763b7a52 100644 --- a/phrase-extract/PhraseAlignment.h +++ b/phrase-extract/PhraseAlignment.h @@ -13,6 +13,9 @@ #include <set> #include <map> +namespace MosesTraining +{ + // data structure for a single phrase pair class PhraseAlignment { @@ -52,3 +55,6 @@ public: { return m_ntLengths; } }; + +} + diff --git a/phrase-extract/RuleExist.h b/phrase-extract/RuleExist.h index cf7fae3cd..94ea4b98e 100644 --- a/phrase-extract/RuleExist.h +++ b/phrase-extract/RuleExist.h @@ -25,6 +25,9 @@ #include "Hole.h" +namespace MosesTraining +{ + // reposity of extracted phrase pairs // which are potential holes in larger phrase pairs class RuleExist @@ -56,4 +59,7 @@ public: }; +} + + #endif diff --git a/phrase-extract/RuleExtractionOptions.h b/phrase-extract/RuleExtractionOptions.h index 272af2c76..bb2d97580 100644 --- a/phrase-extract/RuleExtractionOptions.h +++ b/phrase-extract/RuleExtractionOptions.h @@ -21,6 +21,9 @@ #ifndef RULEEXTRACTIONOPTIONS_H_INCLUDED_ #define RULEEXTRACTIONOPTIONS_H_INCLUDED_ +namespace MosesTraining +{ + struct RuleExtractionOptions { public: int maxSpan; @@ -85,4 +88,6 @@ public: {} }; +} + #endif diff --git a/phrase-extract/SentenceAlignment.cpp b/phrase-extract/SentenceAlignment.cpp index 8b513cfb4..8e44bddc4 100644 --- a/phrase-extract/SentenceAlignment.cpp +++ b/phrase-extract/SentenceAlignment.cpp @@ -25,6 +25,9 @@ #include "tables-core.h" +namespace MosesTraining +{ + SentenceAlignment::~SentenceAlignment() {} bool SentenceAlignment::processTargetSentence(const char * targetString, int) @@ -89,3 +92,6 @@ bool SentenceAlignment::create( char targetString[], char sourceString[], char a } return true; } + +} + diff --git a/phrase-extract/SentenceAlignment.h b/phrase-extract/SentenceAlignment.h index df3987198..b1fb5933a 100644 --- a/phrase-extract/SentenceAlignment.h +++ b/phrase-extract/SentenceAlignment.h @@ -24,6 +24,9 @@ #include <string> #include <vector> +namespace MosesTraining +{ + class SentenceAlignment { public: @@ -43,4 +46,7 @@ public: char alignmentString[], int sentenceID); }; +} + + #endif diff --git a/phrase-extract/SentenceAlignmentWithSyntax.cpp b/phrase-extract/SentenceAlignmentWithSyntax.cpp index 06dc3919f..83a048757 100644 --- a/phrase-extract/SentenceAlignmentWithSyntax.cpp +++ b/phrase-extract/SentenceAlignmentWithSyntax.cpp @@ -29,6 +29,9 @@ using namespace std; +namespace MosesTraining +{ + bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetString, int sentenceID) { if (!m_options.targetSyntax) { @@ -68,3 +71,8 @@ bool SentenceAlignmentWithSyntax::processSourceSentence(const char * sourceStrin source = tokenize(sourceStringCPP.c_str()); return true; } + +} // namespace + + + diff --git a/phrase-extract/SentenceAlignmentWithSyntax.h b/phrase-extract/SentenceAlignmentWithSyntax.h index a2c164655..38fa77907 100644 --- a/phrase-extract/SentenceAlignmentWithSyntax.h +++ b/phrase-extract/SentenceAlignmentWithSyntax.h @@ -30,6 +30,9 @@ #include "SentenceAlignment.h" #include "SyntaxTree.h" +namespace MosesTraining +{ + class SentenceAlignmentWithSyntax : public SentenceAlignment { public: @@ -62,4 +65,6 @@ public: processSourceSentence(const char *, int); }; +} + #endif diff --git a/phrase-extract/SyntaxTree.cpp b/phrase-extract/SyntaxTree.cpp index f2783ffd2..0b99f0d22 100644 --- a/phrase-extract/SyntaxTree.cpp +++ b/phrase-extract/SyntaxTree.cpp @@ -26,6 +26,9 @@ #include <cassert> #include <iostream> +namespace MosesTraining +{ + SyntaxTree::~SyntaxTree() { Clear(); @@ -178,3 +181,6 @@ std::ostream& operator<<(std::ostream& os, const SyntaxTree& t) } return os; } + +} + diff --git a/phrase-extract/SyntaxTree.h b/phrase-extract/SyntaxTree.h index 17c106b49..dac20f9b2 100644 --- a/phrase-extract/SyntaxTree.h +++ b/phrase-extract/SyntaxTree.h @@ -27,6 +27,9 @@ #include <map> #include <sstream> +namespace MosesTraining +{ + class SyntaxNode { protected: @@ -119,3 +122,5 @@ public: std::ostream& operator<<(std::ostream&, const SyntaxTree&); +} + diff --git a/phrase-extract/XmlTree.cpp b/phrase-extract/XmlTree.cpp index 29c0d94aa..e826263f9 100644 --- a/phrase-extract/XmlTree.cpp +++ b/phrase-extract/XmlTree.cpp @@ -32,6 +32,8 @@ using namespace std; +namespace MosesTraining +{ inline std::vector<std::string> Tokenize(const std::string& str, const std::string& delimiters = " \t") @@ -390,3 +392,5 @@ bool ProcessAndStripXMLTags(string &line, SyntaxTree &tree, set< string > &label line = cleanLine; return true; } + +} diff --git a/phrase-extract/XmlTree.h b/phrase-extract/XmlTree.h index 7e6bbecea..ebf235b64 100644 --- a/phrase-extract/XmlTree.h +++ b/phrase-extract/XmlTree.h @@ -27,6 +27,9 @@ #include <map>
#include "SyntaxTree.h"
+namespace MosesTraining
+{
+
std::string ParseXmlTagAttribute(const std::string& tag,const std::string& attributeName);
std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r");
std::string TrimXml(const std::string& str);
@@ -34,3 +37,7 @@ bool isXmlTag(const std::string& tag); std::vector<std::string> TokenizeXml(const std::string& str);
bool ProcessAndStripXMLTags(std::string &line, SyntaxTree &tree, std::set< std::string > &labelCollection, std::map< std::string, int > &topLabelCollection );
std::string unescape(const std::string &str);
+
+
+} // namespace
+
diff --git a/phrase-extract/extract-lex.h b/phrase-extract/extract-lex.h index e2225ecbc..d272cf6ff 100644 --- a/phrase-extract/extract-lex.h +++ b/phrase-extract/extract-lex.h @@ -6,6 +6,9 @@ #include <fstream> #include <iostream> +namespace MosesTraining +{ + //! convert string to variable of type T. Used to reading floats, int etc from files template<typename T> @@ -115,3 +118,4 @@ public: }; +} // namespace diff --git a/phrase-extract/extract-rules.cpp b/phrase-extract/extract-rules.cpp index 762327681..c333040f6 100644 --- a/phrase-extract/extract-rules.cpp +++ b/phrase-extract/extract-rules.cpp @@ -52,6 +52,7 @@ #define LINE_MAX_LENGTH 500000 using namespace std; +using namespace MosesTraining; typedef vector< int > LabelIndex; typedef map< int, int > WordIndex; diff --git a/phrase-extract/extract.cpp b/phrase-extract/extract.cpp index 16b413da9..6583fd077 100644 --- a/phrase-extract/extract.cpp +++ b/phrase-extract/extract.cpp @@ -25,9 +25,13 @@ #include "OutputFileStream.h" using namespace std; +using namespace MosesTraining; #define LINE_MAX_LENGTH 500000 +namespace MosesTraining +{ + // HPhraseVertex represents a point in the alignment matrix typedef pair <int, int> HPhraseVertex; @@ -94,6 +98,8 @@ bool sentenceIdFlag = false; //create extract file with sentence id bool onlyOutputSpanInfo = false; bool gzOutput = false; +} + int main(int argc, char* argv[]) { cerr << "PhraseExtract v1.4, written by Philipp Koehn\n" @@ -253,6 +259,9 @@ int main(int argc, char* argv[]) } } +namespace MosesTraining +{ + void extract(SentenceAlignment &sentence) { int countE = sentence.target.size(); @@ -697,3 +706,6 @@ void extractBase( SentenceAlignment &sentence ) } } } + +} + diff --git a/phrase-extract/extract.xcodeproj/project.pbxproj b/phrase-extract/extract.xcodeproj/project.pbxproj index 1e02493cb..e0223f4d2 100644 --- a/phrase-extract/extract.xcodeproj/project.pbxproj +++ b/phrase-extract/extract.xcodeproj/project.pbxproj @@ -106,6 +106,8 @@ 1E7C2CFC11F1146300213451 /* consolidate-direct */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "consolidate-direct"; sourceTree = BUILT_PRODUCTS_DIR; }; 1E7C2CFE11F1146300213451 /* extract-rules */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "extract-rules"; sourceTree = BUILT_PRODUCTS_DIR; }; 1E7C2D0011F1146300213451 /* statistics */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = statistics; sourceTree = BUILT_PRODUCTS_DIR; }; + 1E9B03A4159E70A100E91032 /* consolidate-reverse.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "consolidate-reverse.cpp"; sourceTree = "<group>"; }; + 1E9B03A5159E70A100E91032 /* XmlException.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = XmlException.h; sourceTree = "<group>"; }; 1EB1C8301200D5C00079FCBB /* PhraseAlignment.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PhraseAlignment.h; sourceTree = "<group>"; }; 1EB1C8311200D5C00079FCBB /* PhraseAlignment.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = PhraseAlignment.cpp; sourceTree = "<group>"; }; 1EB1C8491200D77E0079FCBB /* score.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = score.h; sourceTree = "<group>"; }; @@ -174,6 +176,8 @@ 08FB7795FE84155DC02AAC07 /* Source */ = { isa = PBXGroup; children = ( + 1E9B03A4159E70A100E91032 /* consolidate-reverse.cpp */, + 1E9B03A5159E70A100E91032 /* XmlException.h */, 1E671A7E155C22C500119DD9 /* OutputFileStream.cpp */, 1E671A7F155C22C500119DD9 /* OutputFileStream.h */, 1E3EF29E13DBEAF300C1D54A /* extract-lex.cpp */, diff --git a/phrase-extract/hierarchical.h b/phrase-extract/hierarchical.h index 61c899013..dd9c77a25 100644 --- a/phrase-extract/hierarchical.h +++ b/phrase-extract/hierarchical.h @@ -14,6 +14,9 @@ #include <set> #include <vector> +namespace MosesTraining +{ + // HPhraseVertex represents a point in the alignment matrix typedef std::pair <int, int> HPhraseVertex; @@ -29,5 +32,6 @@ typedef std::vector < HPhrase > HPhraseVector; // The key of the std::map is the English index and the value is a std::set of the foreign ones typedef std::map <int, std::set<int> > HSenteceVertices; +} // namespace #endif /* HIERARCHICAL_H_ */ diff --git a/phrase-extract/score.cpp b/phrase-extract/score.cpp index 5e0ade627..bfc95de09 100644 --- a/phrase-extract/score.cpp +++ b/phrase-extract/score.cpp @@ -35,43 +35,12 @@ #include "OutputFileStream.h" using namespace std; +using namespace MosesTraining; #define LINE_MAX_LENGTH 100000 -Vocabulary vcbT; -Vocabulary vcbS; - -class LexicalTable +namespace MosesTraining { -public: - map< WORD_ID, map< WORD_ID, double > > ltable; - void load( char[] ); - double permissiveLookup( WORD_ID wordS, WORD_ID wordT ) { - // cout << endl << vcbS.getWord( wordS ) << "-" << vcbT.getWord( wordT ) << ":"; - if (ltable.find( wordS ) == ltable.end()) return 1.0; - if (ltable[ wordS ].find( wordT ) == ltable[ wordS ].end()) return 1.0; - // cout << ltable[ wordS ][ wordT ]; - return ltable[ wordS ][ wordT ]; - } -}; - -vector<string> tokenize( const char [] ); - -void writeCountOfCounts( const string &fileNameCountOfCounts ); -void processPhrasePairs( vector< PhraseAlignment > & , ostream &phraseTableFile); -PhraseAlignment* findBestAlignment(const PhraseAlignmentCollection &phrasePair ); -void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float, int, ostream &phraseTableFile ); -double computeLexicalTranslation( const PHRASE &, const PHRASE &, PhraseAlignment * ); -double computeUnalignedPenalty( const PHRASE &, const PHRASE &, PhraseAlignment * ); -set<string> functionWordList; -void loadFunctionWords( const char* fileNameFunctionWords ); -double computeUnalignedFWPenalty( const PHRASE &, const PHRASE &, PhraseAlignment * ); -void calcNTLengthProb(const vector< PhraseAlignment* > &phrasePairs - , map<size_t, map<size_t, float> > &sourceProb - , map<size_t, map<size_t, float> > &targetProb); -void printSourcePhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &); -void printTargetPhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &); - LexicalTable lexTable; bool inverseFlag = false; bool hierarchicalFlag = false; @@ -92,6 +61,28 @@ int countOfCounts[COC_MAX+1]; int totalDistinct = 0; float minCountHierarchical = 0; +Vocabulary vcbT; +Vocabulary vcbS; + +} // namespace + +vector<string> tokenize( const char [] ); + +void writeCountOfCounts( const string &fileNameCountOfCounts ); +void processPhrasePairs( vector< PhraseAlignment > & , ostream &phraseTableFile); +PhraseAlignment* findBestAlignment(const PhraseAlignmentCollection &phrasePair ); +void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float, int, ostream &phraseTableFile ); +double computeLexicalTranslation( const PHRASE &, const PHRASE &, PhraseAlignment * ); +double computeUnalignedPenalty( const PHRASE &, const PHRASE &, PhraseAlignment * ); +set<string> functionWordList; +void loadFunctionWords( const char* fileNameFunctionWords ); +double computeUnalignedFWPenalty( const PHRASE &, const PHRASE &, PhraseAlignment * ); +void calcNTLengthProb(const vector< PhraseAlignment* > &phrasePairs + , map<size_t, map<size_t, float> > &sourceProb + , map<size_t, map<size_t, float> > &targetProb); +void printSourcePhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &); +void printTargetPhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &); + int main(int argc, char* argv[]) { cerr << "Score v2.0 written by Philipp Koehn\n" diff --git a/phrase-extract/score.h b/phrase-extract/score.h index 9faa144c5..ed9adc18c 100644 --- a/phrase-extract/score.h +++ b/phrase-extract/score.h @@ -10,6 +10,9 @@ #include <string> #include <vector> +namespace MosesTraining +{ + class PhraseAlignment; typedef std::vector<PhraseAlignment*> PhraseAlignmentCollection; @@ -58,8 +61,26 @@ private: }; +class LexicalTable +{ +public: + std::map< WORD_ID, std::map< WORD_ID, double > > ltable; + void load( char[] ); + double permissiveLookup( WORD_ID wordS, WORD_ID wordT ) { + // cout << endl << vcbS.getWord( wordS ) << "-" << vcbT.getWord( wordT ) << ":"; + if (ltable.find( wordS ) == ltable.end()) return 1.0; + if (ltable[ wordS ].find( wordT ) == ltable[ wordS ].end()) return 1.0; + // cout << ltable[ wordS ][ wordT ]; + return ltable[ wordS ][ wordT ]; + } +}; + // other functions ********************************************* inline bool isNonTerminal( const std::string &word ) { return (word.length()>=3 && word[0] == '[' && word[word.length()-1] == ']'); } + + +} + diff --git a/phrase-extract/tables-core.cpp b/phrase-extract/tables-core.cpp index 93ad8b6a1..6b35f371b 100644 --- a/phrase-extract/tables-core.cpp +++ b/phrase-extract/tables-core.cpp @@ -30,6 +30,9 @@ vector<string> tokenize( const char* input ) return token; } +namespace MosesTraining +{ + bool isNonTerminal( const WORD &symbol ) { return symbol.substr(0, 1) == "[" && symbol.substr(symbol.size()-1, 1) == "]"; } @@ -122,3 +125,5 @@ double DTable::get( int distortion ) return dtable[ distortion ]; } +} + diff --git a/phrase-extract/tables-core.h b/phrase-extract/tables-core.h index 1899b4d77..e239e5900 100644 --- a/phrase-extract/tables-core.h +++ b/phrase-extract/tables-core.h @@ -14,6 +14,9 @@ extern std::vector<std::string> tokenize( const char*); +namespace MosesTraining +{ + typedef std::string WORD; typedef unsigned int WORD_ID; @@ -63,4 +66,6 @@ public: double get( int ); }; +} + #endif |