Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieu@hoang.co.uk>2013-05-29 21:16:15 +0400
committerHieu Hoang <hieu@hoang.co.uk>2013-05-29 21:16:15 +0400
commit6249432407af8730c10bccc7894c0725fcaf5e47 (patch)
tree3ac1f094b9fdc199b04bc5ef209ce00e3596e37d /phrase-extract
parent59bd7deb4b6b9c4f7b3b7dbb055783528fbc31ca (diff)
beautify
Diffstat (limited to 'phrase-extract')
-rw-r--r--phrase-extract/AlignmentPhrase.h2
-rw-r--r--phrase-extract/ExtractedRule.cpp9
-rw-r--r--phrase-extract/ExtractedRule.h9
-rw-r--r--phrase-extract/Hole.h2
-rw-r--r--phrase-extract/HoleCollection.cpp2
-rw-r--r--phrase-extract/OutputFileStream.cpp8
-rw-r--r--phrase-extract/PhraseAlignment.cpp51
-rw-r--r--phrase-extract/PhraseAlignment.h33
-rw-r--r--phrase-extract/PhraseExtractionOptions.h203
-rw-r--r--phrase-extract/RuleExtractionOptions.h2
-rw-r--r--phrase-extract/ScoreFeature.cpp129
-rw-r--r--phrase-extract/ScoreFeature.h110
-rw-r--r--phrase-extract/ScoreFeatureTest.cpp30
-rw-r--r--phrase-extract/SentenceAlignment.cpp12
-rw-r--r--phrase-extract/SentenceAlignment.h2
-rw-r--r--phrase-extract/XmlTree.cpp2
-rw-r--r--phrase-extract/consolidate-direct-main.cpp41
-rw-r--r--phrase-extract/consolidate-main.cpp71
-rw-r--r--phrase-extract/consolidate-reverse-main.cpp5
-rw-r--r--phrase-extract/domain.cpp60
-rw-r--r--phrase-extract/domain.h134
-rw-r--r--phrase-extract/extract-ghkm/Alignment.cpp12
-rw-r--r--phrase-extract/extract-ghkm/Alignment.h12
-rw-r--r--phrase-extract/extract-ghkm/AlignmentGraph.cpp18
-rw-r--r--phrase-extract/extract-ghkm/AlignmentGraph.h28
-rw-r--r--phrase-extract/extract-ghkm/ComposedRule.cpp34
-rw-r--r--phrase-extract/extract-ghkm/ComposedRule.h16
-rw-r--r--phrase-extract/extract-ghkm/Exception.h20
-rw-r--r--phrase-extract/extract-ghkm/ExtractGHKM.cpp144
-rw-r--r--phrase-extract/extract-ghkm/ExtractGHKM.h22
-rw-r--r--phrase-extract/extract-ghkm/Main.cpp6
-rw-r--r--phrase-extract/extract-ghkm/Node.cpp12
-rw-r--r--phrase-extract/extract-ghkm/Node.h96
-rw-r--r--phrase-extract/extract-ghkm/Options.h40
-rw-r--r--phrase-extract/extract-ghkm/ParseTree.cpp12
-rw-r--r--phrase-extract/extract-ghkm/ParseTree.h42
-rw-r--r--phrase-extract/extract-ghkm/ScfgRule.cpp18
-rw-r--r--phrase-extract/extract-ghkm/ScfgRule.h55
-rw-r--r--phrase-extract/extract-ghkm/ScfgRuleWriter.cpp20
-rw-r--r--phrase-extract/extract-ghkm/ScfgRuleWriter.h22
-rw-r--r--phrase-extract/extract-ghkm/Span.cpp12
-rw-r--r--phrase-extract/extract-ghkm/Span.h12
-rw-r--r--phrase-extract/extract-ghkm/Subgraph.cpp12
-rw-r--r--phrase-extract/extract-ghkm/Subgraph.h67
-rw-r--r--phrase-extract/extract-ghkm/XmlTreeParser.cpp20
-rw-r--r--phrase-extract/extract-ghkm/XmlTreeParser.h19
-rw-r--r--phrase-extract/extract-lex-main.cpp74
-rw-r--r--phrase-extract/extract-lex.h46
-rw-r--r--phrase-extract/extract-main.cpp122
-rw-r--r--phrase-extract/extract-rules-main.cpp84
-rw-r--r--phrase-extract/lexical-reordering/reordering_classes.cpp2
-rw-r--r--phrase-extract/lexical-reordering/score.cpp21
-rw-r--r--phrase-extract/pcfg-common/exception.h23
-rw-r--r--phrase-extract/pcfg-common/numbered_set.h55
-rw-r--r--phrase-extract/pcfg-common/pcfg.h35
-rw-r--r--phrase-extract/pcfg-common/pcfg_tree.h37
-rw-r--r--phrase-extract/pcfg-common/syntax_tree.h81
-rw-r--r--phrase-extract/pcfg-common/tool.h25
-rw-r--r--phrase-extract/pcfg-common/typedef.h12
-rw-r--r--phrase-extract/pcfg-common/xml_tree_parser.h19
-rw-r--r--phrase-extract/pcfg-common/xml_tree_writer.h30
-rw-r--r--phrase-extract/pcfg-extract/options.h12
-rw-r--r--phrase-extract/pcfg-extract/pcfg_extract.h19
-rw-r--r--phrase-extract/pcfg-extract/rule_collection.h35
-rw-r--r--phrase-extract/pcfg-extract/rule_extractor.h19
-rw-r--r--phrase-extract/pcfg-score/options.h12
-rw-r--r--phrase-extract/pcfg-score/pcfg_score.h19
-rw-r--r--phrase-extract/pcfg-score/tree_scorer.h19
-rw-r--r--phrase-extract/score-main.cpp251
-rw-r--r--phrase-extract/score.h2
-rw-r--r--phrase-extract/tables-core.cpp7
71 files changed, 1485 insertions, 1264 deletions
diff --git a/phrase-extract/AlignmentPhrase.h b/phrase-extract/AlignmentPhrase.h
index ec6431f18..52d9c85ea 100644
--- a/phrase-extract/AlignmentPhrase.h
+++ b/phrase-extract/AlignmentPhrase.h
@@ -25,7 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace MosesTraining
{
-
+
class WordsRange;
class AlignmentElement
diff --git a/phrase-extract/ExtractedRule.cpp b/phrase-extract/ExtractedRule.cpp
index 985f2f093..50d9085e6 100644
--- a/phrase-extract/ExtractedRule.cpp
+++ b/phrase-extract/ExtractedRule.cpp
@@ -23,20 +23,19 @@ void ExtractedRule::OutputNTLengths(std::ostream &out) const
void ExtractedRule::OutputNTLengths(std::ostringstream &outString) const
{
std::map<size_t, std::pair<size_t, size_t> >::const_iterator iter;
- for (iter = m_ntLengths.begin(); iter != m_ntLengths.end(); ++iter)
- {
+ for (iter = m_ntLengths.begin(); iter != m_ntLengths.end(); ++iter) {
size_t sourcePos = iter->first;
const std::pair<size_t, size_t> &spanLengths = iter->second;
- outString << sourcePos << "=" << spanLengths.first << "," <<spanLengths.second << " ";
+ outString << sourcePos << "=" << spanLengths.first << "," <<spanLengths.second << " ";
}
}
std::ostream& operator<<(std::ostream &out, const ExtractedRule &obj)
{
- out << obj.source << " ||| " << obj.target << " ||| "
+ out << obj.source << " ||| " << obj.target << " ||| "
<< obj.alignment << " ||| "
<< obj.alignmentInv << " ||| ";
-
+
obj.OutputNTLengths(out);
return out;
diff --git a/phrase-extract/ExtractedRule.h b/phrase-extract/ExtractedRule.h
index 992a807b3..c26de37ca 100644
--- a/phrase-extract/ExtractedRule.h
+++ b/phrase-extract/ExtractedRule.h
@@ -49,7 +49,7 @@ public:
double pcfgScore;
std::map<size_t, std::pair<size_t, size_t> > m_ntLengths;
-
+
ExtractedRule(int sT, int eT, int sS, int eS)
: source()
, target()
@@ -64,12 +64,11 @@ public:
, count(0)
, pcfgScore(0.0)
{}
-
- void SetSpanLength(size_t sourcePos, size_t sourceLength, size_t targetLength)
- {
+
+ void SetSpanLength(size_t sourcePos, size_t sourceLength, size_t targetLength) {
m_ntLengths[sourcePos] = std::pair<size_t, size_t>(sourceLength, targetLength);
}
-
+
void OutputNTLengths(std::ostream &out) const;
void OutputNTLengths(std::ostringstream &out) const;
};
diff --git a/phrase-extract/Hole.h b/phrase-extract/Hole.h
index c570ec7a1..efedf2f53 100644
--- a/phrase-extract/Hole.h
+++ b/phrase-extract/Hole.h
@@ -72,7 +72,7 @@ public:
int GetSize(size_t direction) const {
return m_end[direction] - m_start[direction] + 1;
}
-
+
void SetPos(int pos, size_t direction) {
m_pos[direction] = pos;
}
diff --git a/phrase-extract/HoleCollection.cpp b/phrase-extract/HoleCollection.cpp
index fba295993..e63e2eacc 100644
--- a/phrase-extract/HoleCollection.cpp
+++ b/phrase-extract/HoleCollection.cpp
@@ -64,7 +64,7 @@ int HoleCollection::Scope(const Hole &proposedHole) const
const int holeEnd = proposedHole.GetEnd(0);
int scope = m_scope.back();
if (holeStart == m_sourcePhraseStart.back() ||
- find(m_sourceHoleEndPoints.begin(), m_sourceHoleEndPoints.end(), holeStart-1) != m_sourceHoleEndPoints.end()) {
+ find(m_sourceHoleEndPoints.begin(), m_sourceHoleEndPoints.end(), holeStart-1) != m_sourceHoleEndPoints.end()) {
++scope; // Adding hole would introduce choice point at start of hole.
}
if (holeEnd == m_sourcePhraseEnd.back() ||
diff --git a/phrase-extract/OutputFileStream.cpp b/phrase-extract/OutputFileStream.cpp
index 2cad33bb9..a61ce1ab1 100644
--- a/phrase-extract/OutputFileStream.cpp
+++ b/phrase-extract/OutputFileStream.cpp
@@ -46,11 +46,11 @@ OutputFileStream::~OutputFileStream()
bool OutputFileStream::Open(const std::string &filePath)
{
- m_outFile = new ofstream(filePath.c_str(), ios_base::out | ios_base::binary);
+ m_outFile = new ofstream(filePath.c_str(), ios_base::out | ios_base::binary);
if (m_outFile->fail()) {
return false;
}
-
+
if (filePath.size() > 3 && filePath.substr(filePath.size() - 3, 3) == ".gz") {
this->push(boost::iostreams::gzip_compressor());
}
@@ -64,10 +64,10 @@ void OutputFileStream::Close()
if (m_outFile == NULL) {
return;
}
-
+
this->flush();
this->pop(); // file
-
+
m_outFile->close();
delete m_outFile;
m_outFile = NULL;
diff --git a/phrase-extract/PhraseAlignment.cpp b/phrase-extract/PhraseAlignment.cpp
index bdfead082..f830e411f 100644
--- a/phrase-extract/PhraseAlignment.cpp
+++ b/phrase-extract/PhraseAlignment.cpp
@@ -29,10 +29,10 @@ extern bool hierarchicalFlag;
template<typename T>
inline T Scan(const std::string &input)
{
- std::stringstream stream(input);
- T ret;
- stream >> ret;
- return ret;
+ std::stringstream stream(input);
+ T ret;
+ stream >> ret;
+ return ret;
}
@@ -40,11 +40,10 @@ inline T Scan(const std::string &input)
template<typename T>
inline void Scan(std::vector<T> &output, const std::vector< std::string > &input)
{
- output.resize(input.size());
- for (size_t i = 0 ; i < input.size() ; i++)
- {
- output[i] = Scan<T>( input[i] );
- }
+ output.resize(input.size());
+ for (size_t i = 0 ; i < input.size() ; i++) {
+ output[i] = Scan<T>( input[i] );
+ }
}
@@ -56,7 +55,7 @@ inline void Tokenize(std::vector<std::string> &output
std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
// Find first "non-delimiter".
std::string::size_type pos = str.find_first_of(delimiters, lastPos);
-
+
while (std::string::npos != pos || std::string::npos != lastPos) {
// Found a token, add it to the vector.
output.push_back(str.substr(lastPos, pos - lastPos));
@@ -70,12 +69,12 @@ inline void Tokenize(std::vector<std::string> &output
// speeded up version of above
template<typename T>
inline void Tokenize( std::vector<T> &output
- , const std::string &input
- , const std::string& delimiters = " \t")
+ , const std::string &input
+ , const std::string& delimiters = " \t")
{
- std::vector<std::string> stringVector;
- Tokenize(stringVector, input, delimiters);
- return Scan<T>(output, stringVector );
+ std::vector<std::string> stringVector;
+ Tokenize(stringVector, input, delimiters);
+ return Scan<T>(output, stringVector );
}
// read in a phrase pair and store it
@@ -94,8 +93,7 @@ void PhraseAlignment::create( char line[], int lineID, bool includeSentenceIdFla
else if (item == 2) { // target phrase
phraseT.push_back( vcbT.storeIfNew( token[j] ) );
- }
- else if (item == 3) { // alignment
+ } else if (item == 3) { // alignment
int s,t;
sscanf(token[j].c_str(), "%d-%d", &s, &t);
if ((size_t)t >= phraseT.size() || (size_t)s >= phraseS.size()) {
@@ -135,17 +133,17 @@ void PhraseAlignment::create( char line[], int lineID, bool includeSentenceIdFla
void PhraseAlignment::addNTLength(const std::string &tok)
{
vector< string > tokens;
-
+
Tokenize(tokens, tok, "=");
assert(tokens.size() == 2);
-
+
size_t sourcePos = Scan<size_t>(tokens[0]);
assert(sourcePos < phraseS.size());
-
+
vector< size_t > ntLengths;
Tokenize<size_t>(ntLengths, tokens[1], ",");
assert(ntLengths.size() == 2);
-
+
m_ntLengths[sourcePos] = std::pair<size_t, size_t>(ntLengths[0], ntLengths[1]);
}
@@ -211,13 +209,13 @@ int PhraseAlignment::Compare(const PhraseAlignment &other) const
if (this == &other) // comparing with itself
return 0;
- if (GetTarget() != other.GetTarget())
+ if (GetTarget() != other.GetTarget())
return ( GetTarget() < other.GetTarget() ) ? -1 : +1;
if (GetSource() != other.GetSource())
- return ( GetSource() < other.GetSource() ) ? -1 : +1;
+ return ( GetSource() < other.GetSource() ) ? -1 : +1;
- if (!hierarchicalFlag)
+ if (!hierarchicalFlag)
return 0;
// loop over all words (note: 0 = left hand side of rule)
@@ -228,15 +226,14 @@ int PhraseAlignment::Compare(const PhraseAlignment &other) const
if (alignedToT[i].size() != 1 ||
other.alignedToT[i].size() != 1 ||
- thisAlign != otherAlign)
- {
+ thisAlign != otherAlign) {
int ret = (thisAlign < otherAlign) ? -1 : +1;
return ret;
}
}
}
return 0;
-
+
}
}
diff --git a/phrase-extract/PhraseAlignment.h b/phrase-extract/PhraseAlignment.h
index c0df2aa37..06d9cfad0 100644
--- a/phrase-extract/PhraseAlignment.h
+++ b/phrase-extract/PhraseAlignment.h
@@ -24,7 +24,7 @@ protected:
PHRASE phraseT;
std::map<size_t, std::pair<size_t, size_t> > m_ntLengths;
-
+
void createAlignVec(size_t sourceSize, size_t targetSize);
void addNTLength(const std::string &tok);
public:
@@ -41,11 +41,10 @@ public:
bool equals( const PhraseAlignment& );
bool match( const PhraseAlignment& );
- int Compare(const PhraseAlignment &compare) const;
- inline bool operator<(const PhraseAlignment &compare) const
- {
- return Compare(compare) < 0;
- }
+ int Compare(const PhraseAlignment &compare) const;
+ inline bool operator<(const PhraseAlignment &compare) const {
+ return Compare(compare) < 0;
+ }
const PHRASE &GetSource() const {
return phraseS;
@@ -53,9 +52,10 @@ public:
const PHRASE &GetTarget() const {
return phraseT;
}
-
- const std::map<size_t, std::pair<size_t, size_t> > &GetNTLengths() const
- { return m_ntLengths; }
+
+ const std::map<size_t, std::pair<size_t, size_t> > &GetNTLengths() const {
+ return m_ntLengths;
+ }
};
@@ -67,8 +67,7 @@ typedef std::vector<PhraseAlignment*> PhraseAlignmentCollection;
class PhraseAlignmentCollectionOrderer
{
public:
- bool operator()(const PhraseAlignmentCollection &collA, const PhraseAlignmentCollection &collB) const
- {
+ bool operator()(const PhraseAlignmentCollection &collA, const PhraseAlignmentCollection &collB) const {
assert(collA.size() > 0);
assert(collB.size() > 0);
@@ -77,7 +76,7 @@ public:
bool ret = objA < objB;
return ret;
- }
+ }
};
@@ -97,10 +96,12 @@ public:
std::pair<Coll::iterator,bool> insert ( const PhraseAlignmentCollection& obj );
- const SortedColl &GetSortedColl() const
- { return m_sortedColl; }
- size_t GetSize() const
- { return m_coll.size(); }
+ const SortedColl &GetSortedColl() const {
+ return m_sortedColl;
+ }
+ size_t GetSize() const {
+ return m_coll.size();
+ }
private:
SortedColl m_sortedColl;
diff --git a/phrase-extract/PhraseExtractionOptions.h b/phrase-extract/PhraseExtractionOptions.h
index 2daeaf0ca..60e56b08c 100644
--- a/phrase-extract/PhraseExtractionOptions.h
+++ b/phrase-extract/PhraseExtractionOptions.h
@@ -29,11 +29,12 @@ enum REO_MODEL_TYPE {REO_MSD, REO_MSLR, REO_MONO};
enum REO_POS {LEFT, RIGHT, DLEFT, DRIGHT, UNKNOWN};
-class PhraseExtractionOptions {
-
- public:
- const int maxPhraseLength;
- private:
+class PhraseExtractionOptions
+{
+
+public:
+ const int maxPhraseLength;
+private:
bool allModelsOutputFlag;
bool wordModel;
REO_MODEL_TYPE wordType;
@@ -48,103 +49,103 @@ class PhraseExtractionOptions {
bool gzOutput;
std::string instanceWeightsFile; //weights for each sentence
-public:
+public:
PhraseExtractionOptions(const int initmaxPhraseLength):
- maxPhraseLength(initmaxPhraseLength),
- allModelsOutputFlag(false),
- wordModel(false),
- wordType(REO_MSD),
- phraseModel(false),
- phraseType(REO_MSD),
- hierModel(false),
- hierType(REO_MSD),
- orientationFlag(false),
- translationFlag(true),
- includeSentenceIdFlag(false),
- onlyOutputSpanInfo(false),
- gzOutput(false){}
-
- //functions for initialization of options
- void initAllModelsOutputFlag(const bool initallModelsOutputFlag){
- allModelsOutputFlag=initallModelsOutputFlag;
- }
- void initWordModel(const bool initwordModel){
- wordModel=initwordModel;
- }
- void initWordType(REO_MODEL_TYPE initwordType ){
- wordType=initwordType;
- }
- void initPhraseModel(const bool initphraseModel ){
- phraseModel=initphraseModel;
- }
- void initPhraseType(REO_MODEL_TYPE initphraseType){
- phraseType=initphraseType;
- }
- void initHierModel(const bool inithierModel){
- hierModel=inithierModel;
- }
- void initHierType(REO_MODEL_TYPE inithierType){
- hierType=inithierType;
- }
- void initOrientationFlag(const bool initorientationFlag){
- orientationFlag=initorientationFlag;
- }
- void initTranslationFlag(const bool inittranslationFlag){
- translationFlag=inittranslationFlag;
- }
- void initIncludeSentenceIdFlag(const bool initincludeSentenceIdFlag){
- includeSentenceIdFlag=initincludeSentenceIdFlag;
- }
- void initOnlyOutputSpanInfo(const bool initonlyOutputSpanInfo){
- onlyOutputSpanInfo= initonlyOutputSpanInfo;
- }
- void initGzOutput (const bool initgzOutput){
- gzOutput= initgzOutput;
- }
- void initInstanceWeightsFile(const char* initInstanceWeightsFile) {
- instanceWeightsFile = std::string(initInstanceWeightsFile);
- }
-
- // functions for getting values
- bool isAllModelsOutputFlag() const {
- return allModelsOutputFlag;
- }
- bool isWordModel() const {
- return wordModel;
- }
- REO_MODEL_TYPE isWordType() const {
- return wordType;
- }
- bool isPhraseModel() const {
- return phraseModel;
- }
- REO_MODEL_TYPE isPhraseType() const {
- return phraseType;
- }
- bool isHierModel() const {
- return hierModel;
- }
- REO_MODEL_TYPE isHierType() const {
- return hierType;
- }
- bool isOrientationFlag() const {
- return orientationFlag;
- }
- bool isTranslationFlag() const {
- return translationFlag;
- }
- bool isIncludeSentenceIdFlag() const {
- return includeSentenceIdFlag;
- }
- bool isOnlyOutputSpanInfo() const {
- return onlyOutputSpanInfo;
- }
- bool isGzOutput () const {
- return gzOutput;
- }
- std::string getInstanceWeightsFile() const {
- return instanceWeightsFile;
- }
+ maxPhraseLength(initmaxPhraseLength),
+ allModelsOutputFlag(false),
+ wordModel(false),
+ wordType(REO_MSD),
+ phraseModel(false),
+ phraseType(REO_MSD),
+ hierModel(false),
+ hierType(REO_MSD),
+ orientationFlag(false),
+ translationFlag(true),
+ includeSentenceIdFlag(false),
+ onlyOutputSpanInfo(false),
+ gzOutput(false) {}
+
+ //functions for initialization of options
+ void initAllModelsOutputFlag(const bool initallModelsOutputFlag) {
+ allModelsOutputFlag=initallModelsOutputFlag;
+ }
+ void initWordModel(const bool initwordModel) {
+ wordModel=initwordModel;
+ }
+ void initWordType(REO_MODEL_TYPE initwordType ) {
+ wordType=initwordType;
+ }
+ void initPhraseModel(const bool initphraseModel ) {
+ phraseModel=initphraseModel;
+ }
+ void initPhraseType(REO_MODEL_TYPE initphraseType) {
+ phraseType=initphraseType;
+ }
+ void initHierModel(const bool inithierModel) {
+ hierModel=inithierModel;
+ }
+ void initHierType(REO_MODEL_TYPE inithierType) {
+ hierType=inithierType;
+ }
+ void initOrientationFlag(const bool initorientationFlag) {
+ orientationFlag=initorientationFlag;
+ }
+ void initTranslationFlag(const bool inittranslationFlag) {
+ translationFlag=inittranslationFlag;
+ }
+ void initIncludeSentenceIdFlag(const bool initincludeSentenceIdFlag) {
+ includeSentenceIdFlag=initincludeSentenceIdFlag;
+ }
+ void initOnlyOutputSpanInfo(const bool initonlyOutputSpanInfo) {
+ onlyOutputSpanInfo= initonlyOutputSpanInfo;
+ }
+ void initGzOutput (const bool initgzOutput) {
+ gzOutput= initgzOutput;
+ }
+ void initInstanceWeightsFile(const char* initInstanceWeightsFile) {
+ instanceWeightsFile = std::string(initInstanceWeightsFile);
+ }
+
+ // functions for getting values
+ bool isAllModelsOutputFlag() const {
+ return allModelsOutputFlag;
+ }
+ bool isWordModel() const {
+ return wordModel;
+ }
+ REO_MODEL_TYPE isWordType() const {
+ return wordType;
+ }
+ bool isPhraseModel() const {
+ return phraseModel;
+ }
+ REO_MODEL_TYPE isPhraseType() const {
+ return phraseType;
+ }
+ bool isHierModel() const {
+ return hierModel;
+ }
+ REO_MODEL_TYPE isHierType() const {
+ return hierType;
+ }
+ bool isOrientationFlag() const {
+ return orientationFlag;
+ }
+ bool isTranslationFlag() const {
+ return translationFlag;
+ }
+ bool isIncludeSentenceIdFlag() const {
+ return includeSentenceIdFlag;
+ }
+ bool isOnlyOutputSpanInfo() const {
+ return onlyOutputSpanInfo;
+ }
+ bool isGzOutput () const {
+ return gzOutput;
+ }
+ std::string getInstanceWeightsFile() const {
+ return instanceWeightsFile;
+ }
};
}
diff --git a/phrase-extract/RuleExtractionOptions.h b/phrase-extract/RuleExtractionOptions.h
index 431be58b0..772d803a4 100644
--- a/phrase-extract/RuleExtractionOptions.h
+++ b/phrase-extract/RuleExtractionOptions.h
@@ -54,7 +54,7 @@ public:
bool unpairedExtractFormat;
bool conditionOnTargetLhs;
bool boundaryRules;
-
+
RuleExtractionOptions()
: maxSpan(10)
, minHoleSource(2)
diff --git a/phrase-extract/ScoreFeature.cpp b/phrase-extract/ScoreFeature.cpp
index 5998c528c..25e497df2 100644
--- a/phrase-extract/ScoreFeature.cpp
+++ b/phrase-extract/ScoreFeature.cpp
@@ -22,82 +22,81 @@
using namespace std;
-namespace MosesTraining
+namespace MosesTraining
{
- const string& ScoreFeatureManager::usage() const
- {
- const static string& usage = "[--[Sparse]Domain[Indicator|Ratio|Subset|Bin] domain-file [bins]]" ;
- return usage;
- }
+const string& ScoreFeatureManager::usage() const
+{
+ const static string& usage = "[--[Sparse]Domain[Indicator|Ratio|Subset|Bin] domain-file [bins]]" ;
+ return usage;
+}
- void ScoreFeatureManager::configure(const std::vector<std::string> args)
- {
- bool domainAdded = false;
- bool sparseDomainAdded = false;
- for (size_t i = 0; i < args.size(); ++i) {
- if (args[i] == "--IgnoreSentenceId") {
- m_includeSentenceId = true;
+void ScoreFeatureManager::configure(const std::vector<std::string> args)
+{
+ bool domainAdded = false;
+ bool sparseDomainAdded = false;
+ for (size_t i = 0; i < args.size(); ++i) {
+ if (args[i] == "--IgnoreSentenceId") {
+ m_includeSentenceId = true;
+ } else if (args[i].substr(0,8) == "--Domain") {
+ string type = args[i].substr(8);
+ ++i;
+ UTIL_THROW_IF(i == args.size(), ScoreFeatureArgumentException, "Missing domain file");
+ string domainFile = args[i];
+ UTIL_THROW_IF(domainAdded, ScoreFeatureArgumentException,
+ "Only allowed one domain feature");
+ if (type == "Subset") {
+ m_features.push_back(ScoreFeaturePtr(new SubsetDomainFeature(domainFile)));
+ } else if (type == "Ratio") {
+ m_features.push_back(ScoreFeaturePtr(new RatioDomainFeature(domainFile)));
+ } else if (type == "Indicator") {
+ m_features.push_back(ScoreFeaturePtr(new IndicatorDomainFeature(domainFile)));
+ } else {
+ UTIL_THROW(ScoreFeatureArgumentException, "Unknown domain feature type " << type);
}
- else if (args[i].substr(0,8) == "--Domain") {
- string type = args[i].substr(8);
- ++i;
- UTIL_THROW_IF(i == args.size(), ScoreFeatureArgumentException, "Missing domain file");
- string domainFile = args[i];
- UTIL_THROW_IF(domainAdded, ScoreFeatureArgumentException,
- "Only allowed one domain feature");
- if (type == "Subset") {
- m_features.push_back(ScoreFeaturePtr(new SubsetDomainFeature(domainFile)));
- } else if (type == "Ratio") {
- m_features.push_back(ScoreFeaturePtr(new RatioDomainFeature(domainFile)));
- } else if (type == "Indicator") {
- m_features.push_back(ScoreFeaturePtr(new IndicatorDomainFeature(domainFile)));
- } else {
- UTIL_THROW(ScoreFeatureArgumentException, "Unknown domain feature type " << type);
- }
- domainAdded = true;
- m_includeSentenceId = true;
- } else if (args[i].substr(0,14) == "--SparseDomain") {
- string type = args[i].substr(14);
- ++i;
- UTIL_THROW_IF(i == args.size(), ScoreFeatureArgumentException, "Missing domain file");
- string domainFile = args[i];
- UTIL_THROW_IF(sparseDomainAdded, ScoreFeatureArgumentException,
- "Only allowed one sparse domain feature");
- if (type == "Subset") {
- m_features.push_back(ScoreFeaturePtr(new SparseSubsetDomainFeature(domainFile)));
- } else if (type == "Ratio") {
- m_features.push_back(ScoreFeaturePtr(new SparseRatioDomainFeature(domainFile)));
- } else if (type == "Indicator") {
- m_features.push_back(ScoreFeaturePtr(new SparseIndicatorDomainFeature(domainFile)));
- } else {
- UTIL_THROW(ScoreFeatureArgumentException, "Unknown domain feature type " << type);
- }
- sparseDomainAdded = true;
- m_includeSentenceId = true;
+ domainAdded = true;
+ m_includeSentenceId = true;
+ } else if (args[i].substr(0,14) == "--SparseDomain") {
+ string type = args[i].substr(14);
+ ++i;
+ UTIL_THROW_IF(i == args.size(), ScoreFeatureArgumentException, "Missing domain file");
+ string domainFile = args[i];
+ UTIL_THROW_IF(sparseDomainAdded, ScoreFeatureArgumentException,
+ "Only allowed one sparse domain feature");
+ if (type == "Subset") {
+ m_features.push_back(ScoreFeaturePtr(new SparseSubsetDomainFeature(domainFile)));
+ } else if (type == "Ratio") {
+ m_features.push_back(ScoreFeaturePtr(new SparseRatioDomainFeature(domainFile)));
+ } else if (type == "Indicator") {
+ m_features.push_back(ScoreFeaturePtr(new SparseIndicatorDomainFeature(domainFile)));
} else {
- UTIL_THROW(ScoreFeatureArgumentException,"Unknown score argument " << args[i]);
+ UTIL_THROW(ScoreFeatureArgumentException, "Unknown domain feature type " << type);
}
+ sparseDomainAdded = true;
+ m_includeSentenceId = true;
+ } else {
+ UTIL_THROW(ScoreFeatureArgumentException,"Unknown score argument " << args[i]);
}
-
}
- bool ScoreFeatureManager::equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const
- {
- for (size_t i = 0; i < m_features.size(); ++i) {
- if (!m_features[i]->equals(lhs,rhs)) return false;
- }
- return true;
+}
+
+bool ScoreFeatureManager::equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const
+{
+ for (size_t i = 0; i < m_features.size(); ++i) {
+ if (!m_features[i]->equals(lhs,rhs)) return false;
}
+ return true;
+}
- void ScoreFeatureManager::addFeatures(const ScoreFeatureContext& context,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const
- {
- for (size_t i = 0; i < m_features.size(); ++i) {
- m_features[i]->add(context, denseValues, sparseValues);
- }
- }
+void ScoreFeatureManager::addFeatures(const ScoreFeatureContext& context,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const
+{
+ for (size_t i = 0; i < m_features.size(); ++i) {
+ m_features[i]->add(context, denseValues, sparseValues);
+ }
+}
}
diff --git a/phrase-extract/ScoreFeature.h b/phrase-extract/ScoreFeature.h
index c7d856bcf..76939436f 100644
--- a/phrase-extract/ScoreFeature.h
+++ b/phrase-extract/ScoreFeature.h
@@ -20,7 +20,7 @@
/**
* This contains extra features that can be added to the scorer. To add a new feature:
* 1. Implement a subclass of ScoreFeature
- * 2. Updated ScoreFeatureManager.configure() to configure your feature, and usage() to
+ * 2. Updated ScoreFeatureManager.configure() to configure your feature, and usage() to
* display usage info.
* 3. Write unit tests (see ScoreFeatureTest.cpp) and regression tests
**/
@@ -37,35 +37,37 @@
#include "PhraseAlignment.h"
-namespace MosesTraining
+namespace MosesTraining
{
-struct MaybeLog{
+struct MaybeLog {
MaybeLog(bool useLog, float negativeLog):
m_useLog(useLog), m_negativeLog(negativeLog) {}
-
- inline float operator() (float a) const
- { return m_useLog ? m_negativeLog*log(a) : a; }
+
+ inline float operator() (float a) const {
+ return m_useLog ? m_negativeLog*log(a) : a;
+ }
float m_useLog;
float m_negativeLog;
};
-class ScoreFeatureArgumentException : public util::Exception
+class ScoreFeatureArgumentException : public util::Exception
{
- public:
- ScoreFeatureArgumentException() throw() {*this << "Unable to configure features: ";}
- ~ScoreFeatureArgumentException() throw() {}
+public:
+ ScoreFeatureArgumentException() throw() {
+ *this << "Unable to configure features: ";
+ }
+ ~ScoreFeatureArgumentException() throw() {}
};
/** Passed to each feature to be used to calculate its values */
-struct ScoreFeatureContext
-{
+struct ScoreFeatureContext {
ScoreFeatureContext(
const PhraseAlignmentCollection &thePhrasePair,
float theCount, /* Total counts of all phrase pairs*/
const MaybeLog& theMaybeLog
- ) :
+ ) :
phrasePair(thePhrasePair),
count(theCount),
maybeLog(theMaybeLog)
@@ -82,53 +84,57 @@ struct ScoreFeatureContext
**/
class ScoreFeature
{
- public:
- /** Add the values for this feature function. */
- virtual void add(const ScoreFeatureContext& context,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const = 0;
+public:
+ /** Add the values for this feature function. */
+ virtual void add(const ScoreFeatureContext& context,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const = 0;
- /** Return true if the two phrase pairs are equal from the point of this feature. Assume
- that they already compare true according to PhraseAlignment.equals()
- **/
- virtual bool equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const = 0;
+ /** Return true if the two phrase pairs are equal from the point of this feature. Assume
+ that they already compare true according to PhraseAlignment.equals()
+ **/
+ virtual bool equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const = 0;
- virtual ~ScoreFeature() {}
+ virtual ~ScoreFeature() {}
};
typedef boost::shared_ptr<ScoreFeature> ScoreFeaturePtr;
class ScoreFeatureManager
{
- public:
- ScoreFeatureManager():
- m_includeSentenceId(false) {}
-
- /** To be appended to the score usage message */
- const std::string& usage() const;
-
- /** Pass the unused command-line arguments to configure the extra features */
- void configure(const std::vector<std::string> args);
-
- /** Add all the features */
- void addFeatures(const ScoreFeatureContext& context,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
-
- /**
- * Used to tell if the PhraseAlignment should be considered the same by all
- * extended features.
- **/
- bool equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const;
-
- const std::vector<ScoreFeaturePtr>& getFeatures() const {return m_features;}
-
- /** Do we need to include sentence ids in phrase pairs? */
- bool includeSentenceId() const {return m_includeSentenceId;}
-
- private:
- std::vector<ScoreFeaturePtr> m_features;
- bool m_includeSentenceId;
+public:
+ ScoreFeatureManager():
+ m_includeSentenceId(false) {}
+
+ /** To be appended to the score usage message */
+ const std::string& usage() const;
+
+ /** Pass the unused command-line arguments to configure the extra features */
+ void configure(const std::vector<std::string> args);
+
+ /** Add all the features */
+ void addFeatures(const ScoreFeatureContext& context,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const;
+
+ /**
+ * Used to tell if the PhraseAlignment should be considered the same by all
+ * extended features.
+ **/
+ bool equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const;
+
+ const std::vector<ScoreFeaturePtr>& getFeatures() const {
+ return m_features;
+ }
+
+ /** Do we need to include sentence ids in phrase pairs? */
+ bool includeSentenceId() const {
+ return m_includeSentenceId;
+ }
+
+private:
+ std::vector<ScoreFeaturePtr> m_features;
+ bool m_includeSentenceId;
};
}
diff --git a/phrase-extract/ScoreFeatureTest.cpp b/phrase-extract/ScoreFeatureTest.cpp
index fecde015a..f4570fe30 100644
--- a/phrase-extract/ScoreFeatureTest.cpp
+++ b/phrase-extract/ScoreFeatureTest.cpp
@@ -31,14 +31,16 @@ using namespace MosesTraining;
using namespace std;
//pesky global variables
-namespace MosesTraining {
- bool hierarchicalFlag = false;
- Vocabulary vcbT;
- Vocabulary vcbS;
+namespace MosesTraining
+{
+bool hierarchicalFlag = false;
+Vocabulary vcbT;
+Vocabulary vcbS;
}
-const char *DomainFileLocation() {
+const char *DomainFileLocation()
+{
if (boost::unit_test::framework::master_test_suite().argc < 2) {
return "test.domain";
}
@@ -62,7 +64,7 @@ BOOST_AUTO_TEST_CASE(manager_configure_domain_except)
template <class Expected>
static void checkDomainConfigured(
- const vector<string>& args)
+ const vector<string>& args)
{
ScoreFeatureManager manager;
manager.configure(args);
@@ -76,17 +78,17 @@ static void checkDomainConfigured(
BOOST_AUTO_TEST_CASE(manager_config_domain)
{
checkDomainConfigured<RatioDomainFeature>
- (boost::assign::list_of ("--DomainRatio")("/dev/null"));
+ (boost::assign::list_of ("--DomainRatio")("/dev/null"));
checkDomainConfigured<IndicatorDomainFeature>
- (boost::assign::list_of("--DomainIndicator")("/dev/null"));
+ (boost::assign::list_of("--DomainIndicator")("/dev/null"));
checkDomainConfigured<SubsetDomainFeature>
- (boost::assign::list_of("--DomainSubset")("/dev/null"));
+ (boost::assign::list_of("--DomainSubset")("/dev/null"));
checkDomainConfigured<SparseRatioDomainFeature>
- (boost::assign::list_of("--SparseDomainRatio")("/dev/null"));
+ (boost::assign::list_of("--SparseDomainRatio")("/dev/null"));
checkDomainConfigured<SparseIndicatorDomainFeature>
- (boost::assign::list_of("--SparseDomainIndicator")("/dev/null"));
+ (boost::assign::list_of("--SparseDomainIndicator")("/dev/null"));
checkDomainConfigured<SparseSubsetDomainFeature>
- (boost::assign::list_of("--SparseDomainSubset")("/dev/null"));
+ (boost::assign::list_of("--SparseDomainSubset")("/dev/null"));
}
@@ -98,8 +100,8 @@ BOOST_AUTO_TEST_CASE(domain_equals)
char buf2[] = "a ||| b ||| 0-0 ||| 2";
char buf3[] = "a ||| b ||| 0-0 ||| 3";
a1.create(buf1, 0, true); //domain a
- a2.create(buf2, 1, true); //domain c
- a3.create(buf3, 2, true); //domain c
+ a2.create(buf2, 1, true); //domain c
+ a3.create(buf3, 2, true); //domain c
BOOST_CHECK(feature.equals(a2,a3));
BOOST_CHECK(!feature.equals(a1,a3));
BOOST_CHECK(!feature.equals(a1,a3));
diff --git a/phrase-extract/SentenceAlignment.cpp b/phrase-extract/SentenceAlignment.cpp
index 96ef02865..b2d5520aa 100644
--- a/phrase-extract/SentenceAlignment.cpp
+++ b/phrase-extract/SentenceAlignment.cpp
@@ -94,12 +94,12 @@ bool SentenceAlignment::create( char targetString[], char sourceString[], char a
cerr << "T: " << targetString << endl << "S: " << sourceString << endl;
return false;
}
-
+
if (boundaryRules) {
++s;
++t;
}
-
+
// cout << "alignmentSequence[i] " << alignmentSequence[i] << " is " << s << ", " << t << endl;
if ((size_t)t >= target.size() || (size_t)s >= source.size()) {
cerr << "WARNING: sentence " << sentenceID << " has alignment point (" << s << ", " << t << ") out of bounds (" << source.size() << ", " << target.size() << ")\n";
@@ -109,16 +109,16 @@ bool SentenceAlignment::create( char targetString[], char sourceString[], char a
alignedToT[t].push_back( s );
alignedCountS[s]++;
}
-
+
if (boundaryRules) {
alignedToT[0].push_back(0);
alignedCountS[0]++;
-
+
alignedToT.back().push_back(alignedCountS.size() - 1);
alignedCountS.back()++;
-
+
}
-
+
return true;
}
diff --git a/phrase-extract/SentenceAlignment.h b/phrase-extract/SentenceAlignment.h
index 76cf950d4..e215f5fef 100644
--- a/phrase-extract/SentenceAlignment.h
+++ b/phrase-extract/SentenceAlignment.h
@@ -45,7 +45,7 @@ public:
bool create(char targetString[], char sourceString[],
char alignmentString[], char weightString[], int sentenceID, bool boundaryRules);
-
+
};
}
diff --git a/phrase-extract/XmlTree.cpp b/phrase-extract/XmlTree.cpp
index 05bcefe20..eedb3b260 100644
--- a/phrase-extract/XmlTree.cpp
+++ b/phrase-extract/XmlTree.cpp
@@ -364,7 +364,7 @@ bool ProcessAndStripXMLTags(string &line, SyntaxTree &tree, set< string > &label
string pcfgString = ParseXmlTagAttribute(tagContent,"pcfg");
float pcfgScore = pcfgString == "" ? 0.0f
- : std::atof(pcfgString.c_str());
+ : std::atof(pcfgString.c_str());
// report what we have processed so far
if (0) {
diff --git a/phrase-extract/consolidate-direct-main.cpp b/phrase-extract/consolidate-direct-main.cpp
index e7e68e977..3b38f741c 100644
--- a/phrase-extract/consolidate-direct-main.cpp
+++ b/phrase-extract/consolidate-direct-main.cpp
@@ -89,21 +89,20 @@ int main(int argc, char* argv[])
char* &fileNameConsolidated = argv[2];
ostream *fileConsolidated;
-
- if (strcmp(fileNameConsolidated, "-") == 0) {
- fileConsolidated = &cout;
- }
- else {
+
+ if (strcmp(fileNameConsolidated, "-") == 0) {
+ fileConsolidated = &cout;
+ } else {
Moses::OutputFileStream *outputFile = new Moses::OutputFileStream();
- bool success = outputFile->Open(fileNameConsolidated);
- if (!success) {
- cerr << "ERROR: could not open file phrase table file "
- << fileNameConsolidated << endl;
- exit(1);
- }
- fileConsolidated = outputFile;
- }
-
+ bool success = outputFile->Open(fileNameConsolidated);
+ if (!success) {
+ cerr << "ERROR: could not open file phrase table file "
+ << fileNameConsolidated << endl;
+ exit(1);
+ }
+ fileConsolidated = outputFile;
+ }
+
int i=0;
while(true) {
i++;
@@ -119,8 +118,8 @@ int main(int argc, char* argv[])
// output alignment and probabilities
(*fileConsolidated) << itemDirect[2] // prob direct
- << " 2.718" // phrase count feature
- << " ||| " << itemDirect[3]; // alignment
+ << " 2.718" // phrase count feature
+ << " ||| " << itemDirect[3]; // alignment
// counts
(*fileConsolidated) << "||| 0 " << itemDirect[4]; // indirect
@@ -128,11 +127,11 @@ int main(int argc, char* argv[])
}
- fileConsolidated->flush();
- if (fileConsolidated != &cout) {
- delete fileConsolidated;
- }
-
+ fileConsolidated->flush();
+ if (fileConsolidated != &cout) {
+ delete fileConsolidated;
+ }
+
cerr << "Finished" << endl;
}
diff --git a/phrase-extract/consolidate-main.cpp b/phrase-extract/consolidate-main.cpp
index fd33907de..67a097910 100644
--- a/phrase-extract/consolidate-main.cpp
+++ b/phrase-extract/consolidate-main.cpp
@@ -42,7 +42,10 @@ bool goodTuringFlag = false;
bool kneserNeyFlag = false;
bool logProbFlag = false;
bool outputNTLengths = false;
-inline float maybeLogProb( float a ) { return logProbFlag ? log(a) : a; }
+inline float maybeLogProb( float a )
+{
+ return logProbFlag ? log(a) : a;
+}
char line[LINE_MAX_LENGTH];
void processFiles( char*, char*, char*, char* );
@@ -79,7 +82,7 @@ int main(int argc, char* argv[])
cerr << "not including the phrase count feature\n";
} else if (strcmp(argv[i],"--GoodTuring") == 0) {
goodTuringFlag = true;
- if (i+1==argc) {
+ if (i+1==argc) {
cerr << "ERROR: specify count of count files for Good Turing discounting!\n";
exit(1);
}
@@ -87,7 +90,7 @@ int main(int argc, char* argv[])
cerr << "adjusting phrase translation probabilities with Good Turing discounting\n";
} else if (strcmp(argv[i],"--KneserNey") == 0) {
kneserNeyFlag = true;
- if (i+1==argc) {
+ if (i+1==argc) {
cerr << "ERROR: specify count of count files for Kneser Ney discounting!\n";
exit(1);
}
@@ -105,8 +108,11 @@ int main(int argc, char* argv[])
while(i+1<argc && argv[i+1][0]>='0' && argv[i+1][0]<='9') {
int binCount = atoi(argv[++i]);
countBin.push_back( binCount );
- if (prev+1 == binCount) { cerr << " " << binCount; }
- else { cerr << " " << (prev+1) << "-" << binCount; }
+ if (prev+1 == binCount) {
+ cerr << " " << binCount;
+ } else {
+ cerr << " " << (prev+1) << "-" << binCount;
+ }
prev = binCount;
}
cerr << " " << (prev+1) << "+\n";
@@ -152,7 +158,7 @@ void loadCountOfCounts( char* fileNameCountOfCounts )
if (goodTuringFlag) {
goodTuringDiscount.push_back(0.01); // floor value
for( size_t i=1; i<countOfCounts.size()-1; i++ ) {
- goodTuringDiscount.push_back(((float)i+1)/(float)i*((countOfCounts[i+1]+0.1) / ((float)countOfCounts[i]+0.1)));
+ goodTuringDiscount.push_back(((float)i+1)/(float)i*((countOfCounts[i+1]+0.1) / ((float)countOfCounts[i]+0.1)));
if (goodTuringDiscount[i]>1)
goodTuringDiscount[i] = 1;
if (goodTuringDiscount[i]<goodTuringDiscount[i-1])
@@ -253,21 +259,21 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
float adjustedCountEF_indirect = adjustedCountEF;
// Kneser Ney discounting [Foster et al, 2006]
- if (kneserNeyFlag) {
- float D = kneserNey_D3;
- if (countEF < 2) D = kneserNey_D1;
- else if (countEF < 3) D = kneserNey_D2;
- if (D > countEF) D = countEF - 0.01; // sanity constraint
-
- float p_b_E = n1_E / totalCount; // target phrase prob based on distinct
- float alpha_F = D * n1_F / countF; // available mass
- adjustedCountEF = countEF - D + countF * alpha_F * p_b_E;
-
- // for indirect
- float p_b_F = n1_F / totalCount; // target phrase prob based on distinct
- float alpha_E = D * n1_E / countE; // available mass
- adjustedCountEF_indirect = countEF - D + countE * alpha_E * p_b_F;
- }
+ if (kneserNeyFlag) {
+ float D = kneserNey_D3;
+ if (countEF < 2) D = kneserNey_D1;
+ else if (countEF < 3) D = kneserNey_D2;
+ if (D > countEF) D = countEF - 0.01; // sanity constraint
+
+ float p_b_E = n1_E / totalCount; // target phrase prob based on distinct
+ float alpha_F = D * n1_F / countF; // available mass
+ adjustedCountEF = countEF - D + countF * alpha_F * p_b_E;
+
+ // for indirect
+ float p_b_F = n1_F / totalCount; // target phrase prob based on distinct
+ float alpha_E = D * n1_E / countE; // available mass
+ adjustedCountEF_indirect = countEF - D + countE * alpha_E * p_b_F;
+ }
// prob indirect
if (!onlyDirectFlag) {
@@ -296,30 +302,27 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
if (!foundBin && countEF <= countBin[i]) {
fileConsolidated << " " << maybeLogProb(2.718);
foundBin = true;
- }
- else {
+ } else {
fileConsolidated << " " << maybeLogProb(1);
}
}
- fileConsolidated << " " << maybeLogProb( foundBin ? 1 : 2.718 );
+ fileConsolidated << " " << maybeLogProb( foundBin ? 1 : 2.718 );
}
// alignment
fileConsolidated << " ||| " << itemDirect[3];
// counts, for debugging
- fileConsolidated << "||| " << countE << " " << countF << " " << countEF;
+ fileConsolidated << "||| " << countE << " " << countF << " " << countEF;
- if (outputNTLengths)
- {
+ if (outputNTLengths) {
fileConsolidated << " ||| " << itemDirect[5];
}
-
+
// count bin feature (as a sparse feature)
- if (sparseCountBinFeatureFlag ||
- directSparseScores.compare("") != 0 ||
- indirectSparseScores.compare("") != 0)
- {
+ if (sparseCountBinFeatureFlag ||
+ directSparseScores.compare("") != 0 ||
+ indirectSparseScores.compare("") != 0) {
fileConsolidated << " |||";
if (directSparseScores.compare("") != 0)
fileConsolidated << " " << directSparseScores;
@@ -351,13 +354,13 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
fileConsolidated.Close();
}
-void breakdownCoreAndSparse( string combined, string &core, string &sparse )
+void breakdownCoreAndSparse( string combined, string &core, string &sparse )
{
core = "";
sparse = "";
vector<string> score = tokenize( combined.c_str() );
for(size_t i=0; i<score.size(); i++) {
- if ((score[i][0] >= '0' && score[i][0] <= '9') || i+1 == score.size())
+ if ((score[i][0] >= '0' && score[i][0] <= '9') || i+1 == score.size())
core += " " + score[i];
else {
sparse += " " + score[i];
diff --git a/phrase-extract/consolidate-reverse-main.cpp b/phrase-extract/consolidate-reverse-main.cpp
index c86d870c8..6843bf3aa 100644
--- a/phrase-extract/consolidate-reverse-main.cpp
+++ b/phrase-extract/consolidate-reverse-main.cpp
@@ -229,13 +229,12 @@ string reverseAlignment(const string &alignments)
vector<string> alignToks = tokenize(alignments.c_str());
- for (size_t i = 0; i < alignToks.size(); ++i)
- {
+ for (size_t i = 0; i < alignToks.size(); ++i) {
string &alignPair = alignToks[i];
vector<string> alignPoints;
Tokenize(alignPoints, alignPair, "-");
assert(alignPoints.size() == 2);
-
+
ret << alignPoints[1] << "-" << alignPoints[0] << " ";
}
diff --git a/phrase-extract/domain.cpp b/phrase-extract/domain.cpp
index 29ba8ee64..67b4a13c3 100644
--- a/phrase-extract/domain.cpp
+++ b/phrase-extract/domain.cpp
@@ -13,7 +13,8 @@ namespace MosesTraining
{
// handling of domain names: load database with sentence-id / domain name info
-void Domain::load( const std::string &domainFileName ) {
+void Domain::load( const std::string &domainFileName )
+{
Moses::InputFileStream fileS( domainFileName );
istream *fileP = &fileS;
while(true) {
@@ -39,7 +40,8 @@ void Domain::load( const std::string &domainFileName ) {
}
// get domain name based on sentence number
-string Domain::getDomainOfSentence( int sentenceId ) const {
+string Domain::getDomainOfSentence( int sentenceId ) const
+{
for(size_t i=0; i<spec.size(); i++) {
if (sentenceId <= spec[i].first) {
return spec[i].second;
@@ -54,9 +56,9 @@ DomainFeature::DomainFeature(const string& domainFile)
m_domain.load(domainFile);
}
-void DomainFeature::add(const ScoreFeatureContext& context,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const
+void DomainFeature::add(const ScoreFeatureContext& context,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const
{
map< string, float > domainCount;
for(size_t i=0; i<context.phrasePair.size(); i++) {
@@ -71,13 +73,13 @@ void DomainFeature::add(const ScoreFeatureContext& context,
}
void SubsetDomainFeature::add(const map<string,float>& domainCount,float count,
- const MaybeLog& maybeLog,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const
+ const MaybeLog& maybeLog,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const
{
- if (m_domain.list.size() > 6) {
+ if (m_domain.list.size() > 6) {
UTIL_THROW_IF(m_domain.list.size() > 6, ScoreFeatureArgumentException,
- "too many domains for core domain subset features");
+ "too many domains for core domain subset features");
}
size_t bitmap = 0;
for(size_t bit = 0; bit < m_domain.list.size(); bit++) {
@@ -87,13 +89,13 @@ void SubsetDomainFeature::add(const map<string,float>& domainCount,float count,
}
for(size_t i = 1; i < (1 << m_domain.list.size()); i++) {
denseValues.push_back(maybeLog( (bitmap == i) ? 2.718 : 1 ));
- }
+ }
}
void SparseSubsetDomainFeature::add(const map<string,float>& domainCount,float count,
- const MaybeLog& maybeLog,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const
+ const MaybeLog& maybeLog,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const
{
typedef vector<string>::const_iterator I;
ostringstream key;
@@ -108,9 +110,9 @@ void SparseSubsetDomainFeature::add(const map<string,float>& domainCount,float c
void RatioDomainFeature::add(const map<string,float>& domainCount,float count,
- const MaybeLog& maybeLog,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const
+ const MaybeLog& maybeLog,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const
{
typedef vector< string >::const_iterator I;
for (I i = m_domain.list.begin(); i != m_domain.list.end(); i++ ) {
@@ -125,9 +127,9 @@ void RatioDomainFeature::add(const map<string,float>& domainCount,float count,
void SparseRatioDomainFeature::add(const map<string,float>& domainCount,float count,
- const MaybeLog& maybeLog,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const
+ const MaybeLog& maybeLog,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const
{
typedef map< string, float >::const_iterator I;
for (I i=domainCount.begin(); i != domainCount.end(); i++) {
@@ -137,9 +139,9 @@ void SparseRatioDomainFeature::add(const map<string,float>& domainCount,float co
void IndicatorDomainFeature::add(const map<string,float>& domainCount,float count,
- const MaybeLog& maybeLog,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const
+ const MaybeLog& maybeLog,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const
{
typedef vector< string >::const_iterator I;
for (I i = m_domain.list.begin(); i != m_domain.list.end(); i++ ) {
@@ -154,20 +156,20 @@ void IndicatorDomainFeature::add(const map<string,float>& domainCount,float coun
}
void SparseIndicatorDomainFeature::add(const map<string,float>& domainCount,float count,
- const MaybeLog& maybeLog,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const
+ const MaybeLog& maybeLog,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const
{
typedef map< string, float >::const_iterator I;
for (I i=domainCount.begin(); i != domainCount.end(); i++) {
- sparseValues["dom_" + i->first] = 1;
+ sparseValues["dom_" + i->first] = 1;
}
}
-bool DomainFeature::equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const
+bool DomainFeature::equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const
{
return m_domain.getDomainOfSentence(lhs.sentenceId) ==
- m_domain.getDomainOfSentence( rhs.sentenceId);
+ m_domain.getDomainOfSentence( rhs.sentenceId);
}
diff --git a/phrase-extract/domain.h b/phrase-extract/domain.h
index f3e1e92a3..279496e01 100644
--- a/phrase-extract/domain.h
+++ b/phrase-extract/domain.h
@@ -31,106 +31,106 @@ public:
class DomainFeature : public ScoreFeature
{
- public:
+public:
+
+ DomainFeature(const std::string& domainFile);
+ bool equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const;
+ void add(const ScoreFeatureContext& context,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const;
- DomainFeature(const std::string& domainFile);
- bool equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const;
- void add(const ScoreFeatureContext& context,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
+protected:
+ /** Overriden in subclass */
+ virtual void add(const std::map<std::string,float>& domainCounts, float count,
+ const MaybeLog& maybeLog,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const = 0;
- protected:
- /** Overriden in subclass */
- virtual void add(const std::map<std::string,float>& domainCounts, float count,
- const MaybeLog& maybeLog,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const = 0;
-
- Domain m_domain;
+ Domain m_domain;
};
class SubsetDomainFeature : public DomainFeature
{
- public:
- SubsetDomainFeature(const std::string& domainFile) :
- DomainFeature(domainFile) {}
-
- protected:
- virtual void add(const std::map<std::string,float>& domainCounts, float count,
- const MaybeLog& maybeLog,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
+public:
+ SubsetDomainFeature(const std::string& domainFile) :
+ DomainFeature(domainFile) {}
+
+protected:
+ virtual void add(const std::map<std::string,float>& domainCounts, float count,
+ const MaybeLog& maybeLog,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const;
};
class SparseSubsetDomainFeature : public DomainFeature
{
- public:
- SparseSubsetDomainFeature(const std::string& domainFile) :
- DomainFeature(domainFile) {}
+public:
+ SparseSubsetDomainFeature(const std::string& domainFile) :
+ DomainFeature(domainFile) {}
- protected:
- virtual void add(const std::map<std::string,float>& domainCounts, float count,
- const MaybeLog& maybeLog,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
+protected:
+ virtual void add(const std::map<std::string,float>& domainCounts, float count,
+ const MaybeLog& maybeLog,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const;
};
class IndicatorDomainFeature : public DomainFeature
{
- public:
- IndicatorDomainFeature(const std::string& domainFile) :
- DomainFeature(domainFile) {}
-
- protected:
- virtual void add(const std::map<std::string,float>& domainCounts, float count,
- const MaybeLog& maybeLog,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
+public:
+ IndicatorDomainFeature(const std::string& domainFile) :
+ DomainFeature(domainFile) {}
+
+protected:
+ virtual void add(const std::map<std::string,float>& domainCounts, float count,
+ const MaybeLog& maybeLog,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const;
};
class SparseIndicatorDomainFeature : public DomainFeature
{
- public:
- SparseIndicatorDomainFeature(const std::string& domainFile) :
- DomainFeature(domainFile) {}
-
- protected:
- virtual void add(const std::map<std::string,float>& domainCounts, float count,
- const MaybeLog& maybeLog,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
+public:
+ SparseIndicatorDomainFeature(const std::string& domainFile) :
+ DomainFeature(domainFile) {}
+
+protected:
+ virtual void add(const std::map<std::string,float>& domainCounts, float count,
+ const MaybeLog& maybeLog,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const;
};
class RatioDomainFeature : public DomainFeature
{
- public:
- RatioDomainFeature(const std::string& domainFile) :
- DomainFeature(domainFile) {}
-
- protected:
- virtual void add(const std::map<std::string,float>& domainCounts, float count,
- const MaybeLog& maybeLog,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
+public:
+ RatioDomainFeature(const std::string& domainFile) :
+ DomainFeature(domainFile) {}
+
+protected:
+ virtual void add(const std::map<std::string,float>& domainCounts, float count,
+ const MaybeLog& maybeLog,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const;
};
class SparseRatioDomainFeature : public DomainFeature
{
- public:
- SparseRatioDomainFeature(const std::string& domainFile) :
- DomainFeature(domainFile) {}
-
- protected:
- virtual void add(const std::map<std::string,float>& domainCounts, float count,
- const MaybeLog& maybeLog,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
+public:
+ SparseRatioDomainFeature(const std::string& domainFile) :
+ DomainFeature(domainFile) {}
+
+protected:
+ virtual void add(const std::map<std::string,float>& domainCounts, float count,
+ const MaybeLog& maybeLog,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const;
};
diff --git a/phrase-extract/extract-ghkm/Alignment.cpp b/phrase-extract/extract-ghkm/Alignment.cpp
index fcd5e14e1..744b4b1a2 100644
--- a/phrase-extract/extract-ghkm/Alignment.cpp
+++ b/phrase-extract/extract-ghkm/Alignment.cpp
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -24,8 +24,10 @@
#include <cassert>
#include <cstdlib>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
Alignment ReadAlignment(const std::string &s)
{
diff --git a/phrase-extract/extract-ghkm/Alignment.h b/phrase-extract/extract-ghkm/Alignment.h
index bc42191e1..051d5ca92 100644
--- a/phrase-extract/extract-ghkm/Alignment.h
+++ b/phrase-extract/extract-ghkm/Alignment.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -25,8 +25,10 @@
#include <utility>
#include <vector>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
typedef std::vector<std::pair<int, int> > Alignment;
diff --git a/phrase-extract/extract-ghkm/AlignmentGraph.cpp b/phrase-extract/extract-ghkm/AlignmentGraph.cpp
index 6bd32a13b..974188dbd 100644
--- a/phrase-extract/extract-ghkm/AlignmentGraph.cpp
+++ b/phrase-extract/extract-ghkm/AlignmentGraph.cpp
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -30,8 +30,10 @@
#include <memory>
#include <stack>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
AlignmentGraph::AlignmentGraph(const ParseTree *t,
const std::vector<std::string> &s,
@@ -84,8 +86,8 @@ AlignmentGraph::~AlignmentGraph()
}
Subgraph AlignmentGraph::ComputeMinimalFrontierGraphFragment(
- Node *root,
- const std::set<Node *> &frontierSet)
+ Node *root,
+ const std::set<Node *> &frontierSet)
{
std::stack<Node *> expandableNodes;
std::set<const Node *> expandedNodes;
@@ -302,7 +304,7 @@ void AlignmentGraph::CalcComplementSpans(Node *root)
}
void AlignmentGraph::GetTargetTreeLeaves(Node *root,
- std::vector<Node *> &leaves)
+ std::vector<Node *> &leaves)
{
if (root->IsSink()) {
leaves.push_back(root);
diff --git a/phrase-extract/extract-ghkm/AlignmentGraph.h b/phrase-extract/extract-ghkm/AlignmentGraph.h
index 94948758a..cf26b8c27 100644
--- a/phrase-extract/extract-ghkm/AlignmentGraph.h
+++ b/phrase-extract/extract-ghkm/AlignmentGraph.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -28,8 +28,10 @@
#include <string>
#include <vector>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
class Node;
class ParseTree;
@@ -37,20 +39,24 @@ class Subgraph;
class AlignmentGraph
{
- public:
+public:
AlignmentGraph(const ParseTree *,
const std::vector<std::string> &,
const Alignment &);
~AlignmentGraph();
- Node *GetRoot() { return m_root; }
- const std::vector<Node *> &GetTargetNodes() { return m_targetNodes; }
+ Node *GetRoot() {
+ return m_root;
+ }
+ const std::vector<Node *> &GetTargetNodes() {
+ return m_targetNodes;
+ }
void ExtractMinimalRules(const Options &);
void ExtractComposedRules(const Options &);
- private:
+private:
// Disallow copying
AlignmentGraph(const AlignmentGraph &);
AlignmentGraph &operator=(const AlignmentGraph &);
@@ -58,11 +64,11 @@ class AlignmentGraph
Node *CopyParseTree(const ParseTree *);
void ComputeFrontierSet(Node *, const Options &, std::set<Node *> &) const;
void CalcComplementSpans(Node *);
- void GetTargetTreeLeaves(Node *, std::vector<Node *> &);
+ void GetTargetTreeLeaves(Node *, std::vector<Node *> &);
void AttachUnalignedSourceWords();
Node *DetermineAttachmentPoint(int);
Subgraph ComputeMinimalFrontierGraphFragment(Node *,
- const std::set<Node *> &);
+ const std::set<Node *> &);
void ExtractComposedRules(Node *, const Options &);
Node *m_root;
diff --git a/phrase-extract/extract-ghkm/ComposedRule.cpp b/phrase-extract/extract-ghkm/ComposedRule.cpp
index 8bf3cfc72..e9fc826b7 100644
--- a/phrase-extract/extract-ghkm/ComposedRule.cpp
+++ b/phrase-extract/extract-ghkm/ComposedRule.cpp
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -27,14 +27,16 @@
#include <vector>
#include <queue>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
ComposedRule::ComposedRule(const Subgraph &baseRule)
- : m_baseRule(baseRule)
- , m_depth(baseRule.GetDepth())
- , m_size(baseRule.GetSize())
- , m_nodeCount(baseRule.GetNodeCount())
+ : m_baseRule(baseRule)
+ , m_depth(baseRule.GetDepth())
+ , m_size(baseRule.GetSize())
+ , m_nodeCount(baseRule.GetNodeCount())
{
const std::set<const Node *> &leaves = baseRule.GetLeaves();
for (std::set<const Node *>::const_iterator p = leaves.begin();
@@ -47,12 +49,12 @@ ComposedRule::ComposedRule(const Subgraph &baseRule)
ComposedRule::ComposedRule(const ComposedRule &other, const Subgraph &rule,
int depth)
- : m_baseRule(other.m_baseRule)
- , m_attachedRules(other.m_attachedRules)
- , m_openAttachmentPoints(other.m_openAttachmentPoints)
- , m_depth(depth)
- , m_size(other.m_size+rule.GetSize())
- , m_nodeCount(other.m_nodeCount+rule.GetNodeCount()-1)
+ : m_baseRule(other.m_baseRule)
+ , m_attachedRules(other.m_attachedRules)
+ , m_openAttachmentPoints(other.m_openAttachmentPoints)
+ , m_depth(depth)
+ , m_size(other.m_size+rule.GetSize())
+ , m_nodeCount(other.m_nodeCount+rule.GetNodeCount()-1)
{
m_attachedRules.push_back(&rule);
m_openAttachmentPoints.pop();
@@ -71,7 +73,7 @@ void ComposedRule::CloseAttachmentPoint()
}
ComposedRule *ComposedRule::AttemptComposition(const Subgraph &rule,
- const Options &options) const
+ const Options &options) const
{
// The smallest possible rule fragment should be rooted at a tree node.
// Note that this differs from the original GHKM definition.
diff --git a/phrase-extract/extract-ghkm/ComposedRule.h b/phrase-extract/extract-ghkm/ComposedRule.h
index 65ce9ac70..b5f72a492 100644
--- a/phrase-extract/extract-ghkm/ComposedRule.h
+++ b/phrase-extract/extract-ghkm/ComposedRule.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -26,15 +26,17 @@
#include <vector>
#include <queue>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
class Node;
struct Options;
class ComposedRule
{
- public:
+public:
// Form a 'trivial' ComposedRule from a single existing rule.
ComposedRule(const Subgraph &baseRule);
@@ -53,7 +55,7 @@ class ComposedRule
// Constructs a Subgraph object corresponding to the composed rule.
Subgraph CreateSubgraph();
- private:
+private:
ComposedRule(const ComposedRule &, const Subgraph &, int);
const Subgraph &m_baseRule;
diff --git a/phrase-extract/extract-ghkm/Exception.h b/phrase-extract/extract-ghkm/Exception.h
index 9928785f0..a1e623cd1 100644
--- a/phrase-extract/extract-ghkm/Exception.h
+++ b/phrase-extract/extract-ghkm/Exception.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -23,16 +23,20 @@
#include <string>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
class Exception
{
- public:
+public:
Exception(const char *msg) : m_msg(msg) {}
Exception(const std::string &msg) : m_msg(msg) {}
- const std::string &GetMsg() const { return m_msg; }
- private:
+ const std::string &GetMsg() const {
+ return m_msg;
+ }
+private:
std::string m_msg;
};
diff --git a/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
index e3b52943c..80568ccd5 100644
--- a/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -43,8 +43,10 @@
#include <sstream>
#include <vector>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
int ExtractGHKM::Main(int argc, char *argv[])
{
@@ -107,7 +109,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
++lineNum;
// Parse target tree.
- if (targetLine.size() == 0) {
+ if (targetLine.size() == 0) {
std::cerr << "skipping line " << lineNum << " with empty target tree\n";
continue;
}
@@ -263,64 +265,64 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
// Declare the command line options that are visible to the user.
po::options_description visible(usageTop.str());
visible.add_options()
- //("help", "print this help message and exit")
- ("AllowUnary",
- "allow fully non-lexical unary rules")
- ("ConditionOnTargetLHS",
- "write target LHS instead of \"X\" as source LHS")
- ("GlueGrammar",
- po::value(&options.glueGrammarFile),
- "write glue grammar to named file")
- ("GZOutput",
- "write gzipped extract files")
- ("MaxNodes",
- po::value(&options.maxNodes)->default_value(options.maxNodes),
- "set maximum number of tree nodes for composed rules")
- ("MaxRuleDepth",
- po::value(&options.maxRuleDepth)->default_value(options.maxRuleDepth),
- "set maximum depth for composed rules")
- ("MaxRuleSize",
- po::value(&options.maxRuleSize)->default_value(options.maxRuleSize),
- "set maximum size for composed rules")
- ("MaxScope",
- po::value(&options.maxScope)->default_value(options.maxScope),
- "set maximum allowed scope")
- ("Minimal",
- "extract minimal rules only")
- ("PCFG",
- "include score based on PCFG scores in target corpus")
- ("SentenceOffset",
- po::value(&options.sentenceOffset)->default_value(options.sentenceOffset),
- "set sentence number offset if processing split corpus")
- ("UnknownWordLabel",
- po::value(&options.unknownWordFile),
- "write unknown word labels to named file")
- ("UnknownWordMinRelFreq",
- po::value(&options.unknownWordMinRelFreq)->default_value(
- options.unknownWordMinRelFreq),
- "set minimum relative frequency for unknown word labels")
- ("UnknownWordUniform",
- "write uniform weights to unknown word label file")
- ("UnpairedExtractFormat",
- "do not pair non-terminals in extract files")
+ //("help", "print this help message and exit")
+ ("AllowUnary",
+ "allow fully non-lexical unary rules")
+ ("ConditionOnTargetLHS",
+ "write target LHS instead of \"X\" as source LHS")
+ ("GlueGrammar",
+ po::value(&options.glueGrammarFile),
+ "write glue grammar to named file")
+ ("GZOutput",
+ "write gzipped extract files")
+ ("MaxNodes",
+ po::value(&options.maxNodes)->default_value(options.maxNodes),
+ "set maximum number of tree nodes for composed rules")
+ ("MaxRuleDepth",
+ po::value(&options.maxRuleDepth)->default_value(options.maxRuleDepth),
+ "set maximum depth for composed rules")
+ ("MaxRuleSize",
+ po::value(&options.maxRuleSize)->default_value(options.maxRuleSize),
+ "set maximum size for composed rules")
+ ("MaxScope",
+ po::value(&options.maxScope)->default_value(options.maxScope),
+ "set maximum allowed scope")
+ ("Minimal",
+ "extract minimal rules only")
+ ("PCFG",
+ "include score based on PCFG scores in target corpus")
+ ("SentenceOffset",
+ po::value(&options.sentenceOffset)->default_value(options.sentenceOffset),
+ "set sentence number offset if processing split corpus")
+ ("UnknownWordLabel",
+ po::value(&options.unknownWordFile),
+ "write unknown word labels to named file")
+ ("UnknownWordMinRelFreq",
+ po::value(&options.unknownWordMinRelFreq)->default_value(
+ options.unknownWordMinRelFreq),
+ "set minimum relative frequency for unknown word labels")
+ ("UnknownWordUniform",
+ "write uniform weights to unknown word label file")
+ ("UnpairedExtractFormat",
+ "do not pair non-terminals in extract files")
;
// Declare the command line options that are hidden from the user
// (these are used as positional options).
po::options_description hidden("Hidden options");
hidden.add_options()
- ("TargetFile",
- po::value(&options.targetFile),
- "target file")
- ("SourceFile",
- po::value(&options.sourceFile),
- "source file")
- ("AlignmentFile",
- po::value(&options.alignmentFile),
- "alignment file")
- ("ExtractFile",
- po::value(&options.extractFile),
- "extract file")
+ ("TargetFile",
+ po::value(&options.targetFile),
+ "target file")
+ ("SourceFile",
+ po::value(&options.sourceFile),
+ "source file")
+ ("AlignmentFile",
+ po::value(&options.alignmentFile),
+ "alignment file")
+ ("ExtractFile",
+ po::value(&options.extractFile),
+ "extract file")
;
// Compose the full set of command-line options.
@@ -337,8 +339,8 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
// Process the command-line.
po::variables_map vm;
const int optionStyle = cls::allow_long
- | cls::long_allow_adjacent
- | cls::long_allow_next;
+ | cls::long_allow_adjacent
+ | cls::long_allow_next;
try {
po::store(po::command_line_parser(argc, argv).style(optionStyle).
options(cmdLineOptions).positional(p).run(), vm);
@@ -424,9 +426,9 @@ std::vector<std::string> ExtractGHKM::ReadTokens(const std::string &s)
}
void ExtractGHKM::WriteGlueGrammar(
- const std::set<std::string> &labelSet,
- const std::map<std::string, int> &topLabelSet,
- std::ostream &out)
+ const std::set<std::string> &labelSet,
+ const std::map<std::string, int> &topLabelSet,
+ std::ostream &out)
{
// chose a top label that is not already a label
std::string topLabel = "QQQQQQ";
@@ -457,10 +459,10 @@ void ExtractGHKM::WriteGlueGrammar(
}
void ExtractGHKM::CollectWordLabelCounts(
- ParseTree &root,
- const Options &options,
- std::map<std::string, int> &wordCount,
- std::map<std::string, std::string> &wordLabel)
+ ParseTree &root,
+ const Options &options,
+ std::map<std::string, int> &wordCount,
+ std::map<std::string, std::string> &wordLabel)
{
std::vector<const ParseTree*> leaves;
root.GetLeaves(std::back_inserter(leaves));
@@ -486,10 +488,10 @@ void ExtractGHKM::CollectWordLabelCounts(
}
void ExtractGHKM::WriteUnknownWordLabel(
- const std::map<std::string, int> &wordCount,
- const std::map<std::string, std::string> &wordLabel,
- const Options &options,
- std::ostream &out)
+ const std::map<std::string, int> &wordCount,
+ const std::map<std::string, std::string> &wordLabel,
+ const Options &options,
+ std::ostream &out)
{
std::map<std::string, int> labelCount;
int total = 0;
diff --git a/phrase-extract/extract-ghkm/ExtractGHKM.h b/phrase-extract/extract-ghkm/ExtractGHKM.h
index 6519bf675..c78aea109 100644
--- a/phrase-extract/extract-ghkm/ExtractGHKM.h
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -27,22 +27,26 @@
#include <string>
#include <vector>
-namespace Moses {
+namespace Moses
+{
class OutputFileStream;
-namespace GHKM {
+namespace GHKM
+{
struct Options;
class ParseTree;
class ExtractGHKM
{
- public:
+public:
ExtractGHKM() : m_name("extract-ghkm") {}
- const std::string &GetName() const { return m_name; }
+ const std::string &GetName() const {
+ return m_name;
+ }
int Main(int argc, char *argv[]);
- private:
+private:
void Error(const std::string &) const;
void OpenInputFileOrDie(const std::string &, std::ifstream &);
void OpenOutputFileOrDie(const std::string &, std::ofstream &);
@@ -60,7 +64,7 @@ class ExtractGHKM
const std::map<std::string, int> &,
std::ostream &);
std::vector<std::string> ReadTokens(const std::string &);
-
+
void ProcessOptions(int, char *[], Options &) const;
std::string m_name;
diff --git a/phrase-extract/extract-ghkm/Main.cpp b/phrase-extract/extract-ghkm/Main.cpp
index faf3230a6..14064406b 100644
--- a/phrase-extract/extract-ghkm/Main.cpp
+++ b/phrase-extract/extract-ghkm/Main.cpp
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
diff --git a/phrase-extract/extract-ghkm/Node.cpp b/phrase-extract/extract-ghkm/Node.cpp
index beb7470b8..e14d8c050 100644
--- a/phrase-extract/extract-ghkm/Node.cpp
+++ b/phrase-extract/extract-ghkm/Node.cpp
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -21,8 +21,10 @@
#include "Subgraph.h"
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
Node::~Node()
{
diff --git a/phrase-extract/extract-ghkm/Node.h b/phrase-extract/extract-ghkm/Node.h
index 775473362..2eed01311 100644
--- a/phrase-extract/extract-ghkm/Node.h
+++ b/phrase-extract/extract-ghkm/Node.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -28,8 +28,10 @@
#include <string>
#include <vector>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
class Subgraph;
@@ -37,34 +39,68 @@ enum NodeType { SOURCE, TARGET, TREE };
class Node
{
- public:
+public:
Node(const std::string &label, NodeType type)
- : m_label(label)
- , m_type(type)
- , m_pcfgScore(0.0f) {}
+ : m_label(label)
+ , m_type(type)
+ , m_pcfgScore(0.0f) {}
~Node();
- const std::string &GetLabel() const { return m_label; }
- NodeType GetType() const { return m_type; }
- const std::vector<Node*> &GetChildren() const { return m_children; }
- const std::vector<Node*> &GetParents() const { return m_parents; }
- float GetPcfgScore() const { return m_pcfgScore; }
- const Span &GetSpan() const { return m_span; }
- const Span &GetComplementSpan() const { return m_complementSpan; }
- const std::vector<const Subgraph*> &GetRules() const { return m_rules; }
-
- void SetChildren(const std::vector<Node*> &c) { m_children = c; }
- void SetParents(const std::vector<Node*> &p) { m_parents = p; }
- void SetPcfgScore(float s) { m_pcfgScore = s; }
- void SetSpan(const Span &s) { m_span = s; }
- void SetComplementSpan(const Span &cs) { m_complementSpan = cs; }
-
- void AddChild(Node *c) { m_children.push_back(c); }
- void AddParent(Node *p) { m_parents.push_back(p); }
- void AddRule(const Subgraph *s) { m_rules.push_back(s); }
-
- bool IsSink() const { return m_children.empty(); }
+ const std::string &GetLabel() const {
+ return m_label;
+ }
+ NodeType GetType() const {
+ return m_type;
+ }
+ const std::vector<Node*> &GetChildren() const {
+ return m_children;
+ }
+ const std::vector<Node*> &GetParents() const {
+ return m_parents;
+ }
+ float GetPcfgScore() const {
+ return m_pcfgScore;
+ }
+ const Span &GetSpan() const {
+ return m_span;
+ }
+ const Span &GetComplementSpan() const {
+ return m_complementSpan;
+ }
+ const std::vector<const Subgraph*> &GetRules() const {
+ return m_rules;
+ }
+
+ void SetChildren(const std::vector<Node*> &c) {
+ m_children = c;
+ }
+ void SetParents(const std::vector<Node*> &p) {
+ m_parents = p;
+ }
+ void SetPcfgScore(float s) {
+ m_pcfgScore = s;
+ }
+ void SetSpan(const Span &s) {
+ m_span = s;
+ }
+ void SetComplementSpan(const Span &cs) {
+ m_complementSpan = cs;
+ }
+
+ void AddChild(Node *c) {
+ m_children.push_back(c);
+ }
+ void AddParent(Node *p) {
+ m_parents.push_back(p);
+ }
+ void AddRule(const Subgraph *s) {
+ m_rules.push_back(s);
+ }
+
+ bool IsSink() const {
+ return m_children.empty();
+ }
bool IsPreterminal() const;
void PropagateIndex(int);
@@ -82,7 +118,7 @@ class Node
template<typename InputIterator>
static Node *LowestCommonAncestor(InputIterator first, InputIterator last);
- private:
+private:
// Disallow copying
Node(const Node &);
Node &operator=(const Node &);
diff --git a/phrase-extract/extract-ghkm/Options.h b/phrase-extract/extract-ghkm/Options.h
index d348a57d8..e54a9ddae 100644
--- a/phrase-extract/extract-ghkm/Options.h
+++ b/phrase-extract/extract-ghkm/Options.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -23,25 +23,27 @@
#include <string>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
struct Options {
- public:
+public:
Options()
- : allowUnary(false)
- , conditionOnTargetLhs(false)
- , gzOutput(false)
- , maxNodes(15)
- , maxRuleDepth(3)
- , maxRuleSize(3)
- , maxScope(3)
- , minimal(false)
- , pcfg(false)
- , sentenceOffset(0)
- , unpairedExtractFormat(false)
- , unknownWordMinRelFreq(0.03f)
- , unknownWordUniform(false) {}
+ : allowUnary(false)
+ , conditionOnTargetLhs(false)
+ , gzOutput(false)
+ , maxNodes(15)
+ , maxRuleDepth(3)
+ , maxRuleSize(3)
+ , maxScope(3)
+ , minimal(false)
+ , pcfg(false)
+ , sentenceOffset(0)
+ , unpairedExtractFormat(false)
+ , unknownWordMinRelFreq(0.03f)
+ , unknownWordUniform(false) {}
// Positional options
std::string targetFile;
diff --git a/phrase-extract/extract-ghkm/ParseTree.cpp b/phrase-extract/extract-ghkm/ParseTree.cpp
index 052b8dee1..f86486487 100644
--- a/phrase-extract/extract-ghkm/ParseTree.cpp
+++ b/phrase-extract/extract-ghkm/ParseTree.cpp
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -19,8 +19,10 @@
#include "ParseTree.h"
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
ParseTree::~ParseTree()
{
diff --git a/phrase-extract/extract-ghkm/ParseTree.h b/phrase-extract/extract-ghkm/ParseTree.h
index 273e2e04e..03da17735 100644
--- a/phrase-extract/extract-ghkm/ParseTree.h
+++ b/phrase-extract/extract-ghkm/ParseTree.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -24,27 +24,39 @@
#include <string>
#include <vector>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
class ParseTree
{
- public:
+public:
ParseTree(const std::string &label)
- : m_label(label)
- , m_parent(0)
- , m_pcfgScore(0.0) {}
+ : m_label(label)
+ , m_parent(0)
+ , m_pcfgScore(0.0) {}
~ParseTree();
- const std::string &GetLabel() const { return m_label; }
- const std::vector<ParseTree*> &GetChildren() const { return m_children; }
- const ParseTree *GetParent() const { return m_parent; }
- float GetPcfgScore() const { return m_pcfgScore; }
+ const std::string &GetLabel() const {
+ return m_label;
+ }
+ const std::vector<ParseTree*> &GetChildren() const {
+ return m_children;
+ }
+ const ParseTree *GetParent() const {
+ return m_parent;
+ }
+ float GetPcfgScore() const {
+ return m_pcfgScore;
+ }
void SetParent(ParseTree *);
void SetChildren(const std::vector<ParseTree*> &);
- void SetPcfgScore(float score) { m_pcfgScore = score; }
+ void SetPcfgScore(float score) {
+ m_pcfgScore = score;
+ }
void AddChild(ParseTree *);
@@ -53,7 +65,7 @@ class ParseTree
template<typename OutputIterator>
void GetLeaves(OutputIterator);
- private:
+private:
// Disallow copying
ParseTree(const ParseTree &);
ParseTree &operator=(const ParseTree &);
diff --git a/phrase-extract/extract-ghkm/ScfgRule.cpp b/phrase-extract/extract-ghkm/ScfgRule.cpp
index 5dc70052c..2c901413d 100644
--- a/phrase-extract/extract-ghkm/ScfgRule.cpp
+++ b/phrase-extract/extract-ghkm/ScfgRule.cpp
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -24,13 +24,15 @@
#include <algorithm>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
ScfgRule::ScfgRule(const Subgraph &fragment)
- : m_sourceLHS("X", NonTerminal)
- , m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal)
- , m_pcfgScore(fragment.GetPcfgScore())
+ : m_sourceLHS("X", NonTerminal)
+ , m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal)
+ , m_pcfgScore(fragment.GetPcfgScore())
{
// Source RHS
diff --git a/phrase-extract/extract-ghkm/ScfgRule.h b/phrase-extract/extract-ghkm/ScfgRule.h
index 2405d8fa3..21a9e9900 100644
--- a/phrase-extract/extract-ghkm/ScfgRule.h
+++ b/phrase-extract/extract-ghkm/ScfgRule.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -26,42 +26,59 @@
#include <string>
#include <vector>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
class Node;
class Subgraph;
enum SymbolType { Terminal, NonTerminal };
-struct Symbol
-{
- public:
+struct Symbol {
+public:
Symbol(const std::string &v, SymbolType t) : m_value(v) , m_type(t) {}
- const std::string &GetValue() const { return m_value; }
- SymbolType GetType() const { return m_type; }
+ const std::string &GetValue() const {
+ return m_value;
+ }
+ SymbolType GetType() const {
+ return m_type;
+ }
- private:
+private:
std::string m_value;
SymbolType m_type;
};
class ScfgRule
{
- public:
+public:
ScfgRule(const Subgraph &fragment);
- const Symbol &GetSourceLHS() const { return m_sourceLHS; }
- const Symbol &GetTargetLHS() const { return m_targetLHS; }
- const std::vector<Symbol> &GetSourceRHS() const { return m_sourceRHS; }
- const std::vector<Symbol> &GetTargetRHS() const { return m_targetRHS; }
- const Alignment &GetAlignment() const { return m_alignment; }
- float GetPcfgScore() const { return m_pcfgScore; }
+ const Symbol &GetSourceLHS() const {
+ return m_sourceLHS;
+ }
+ const Symbol &GetTargetLHS() const {
+ return m_targetLHS;
+ }
+ const std::vector<Symbol> &GetSourceRHS() const {
+ return m_sourceRHS;
+ }
+ const std::vector<Symbol> &GetTargetRHS() const {
+ return m_targetRHS;
+ }
+ const Alignment &GetAlignment() const {
+ return m_alignment;
+ }
+ float GetPcfgScore() const {
+ return m_pcfgScore;
+ }
int Scope() const;
- private:
+private:
static bool PartitionOrderComp(const Node *, const Node *);
Symbol m_sourceLHS;
diff --git a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
index cd993d6e8..54b3978d1 100644
--- a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -30,8 +30,10 @@
#include <sstream>
#include <vector>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
void ScfgRuleWriter::Write(const ScfgRule &rule)
{
@@ -70,8 +72,8 @@ void ScfgRuleWriter::Write(const ScfgRule &rule)
}
void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule,
- std::ostream &sourceSS,
- std::ostream &targetSS)
+ std::ostream &sourceSS,
+ std::ostream &targetSS)
{
const std::vector<Symbol> &sourceRHS = rule.GetSourceRHS();
const std::vector<Symbol> &targetRHS = rule.GetTargetRHS();
@@ -122,8 +124,8 @@ void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule,
}
void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule,
- std::ostream &sourceSS,
- std::ostream &targetSS)
+ std::ostream &sourceSS,
+ std::ostream &targetSS)
{
const std::vector<Symbol> &sourceRHS = rule.GetSourceRHS();
const std::vector<Symbol> &targetRHS = rule.GetTargetRHS();
diff --git a/phrase-extract/extract-ghkm/ScfgRuleWriter.h b/phrase-extract/extract-ghkm/ScfgRuleWriter.h
index b92a432a1..ee29e49e5 100644
--- a/phrase-extract/extract-ghkm/ScfgRuleWriter.h
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -23,8 +23,10 @@
#include <ostream>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
struct Options;
class ScfgRule;
@@ -32,15 +34,15 @@ struct Symbol;
class ScfgRuleWriter
{
- public:
+public:
ScfgRuleWriter(std::ostream &fwd, std::ostream &inv, const Options &options)
- : m_fwd(fwd)
- , m_inv(inv)
- , m_options(options) {}
+ : m_fwd(fwd)
+ , m_inv(inv)
+ , m_options(options) {}
void Write(const ScfgRule &);
- private:
+private:
// Disallow copying
ScfgRuleWriter(const ScfgRuleWriter &);
ScfgRuleWriter &operator=(const ScfgRuleWriter &);
diff --git a/phrase-extract/extract-ghkm/Span.cpp b/phrase-extract/extract-ghkm/Span.cpp
index f0eccbdf2..d637ec3d2 100644
--- a/phrase-extract/extract-ghkm/Span.cpp
+++ b/phrase-extract/extract-ghkm/Span.cpp
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -19,8 +19,10 @@
#include "Span.h"
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
bool SpansIntersect(const Span &a, const ContiguousSpan &b)
{
diff --git a/phrase-extract/extract-ghkm/Span.h b/phrase-extract/extract-ghkm/Span.h
index 003d1ef84..c4d146c4e 100644
--- a/phrase-extract/extract-ghkm/Span.h
+++ b/phrase-extract/extract-ghkm/Span.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -24,8 +24,10 @@
#include <map>
#include <set>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
typedef std::set<int> Span;
typedef std::pair<int, int> ContiguousSpan;
diff --git a/phrase-extract/extract-ghkm/Subgraph.cpp b/phrase-extract/extract-ghkm/Subgraph.cpp
index e048f2c55..3c0503010 100644
--- a/phrase-extract/extract-ghkm/Subgraph.cpp
+++ b/phrase-extract/extract-ghkm/Subgraph.cpp
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -21,8 +21,10 @@
#include "Node.h"
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
void Subgraph::GetTargetLeaves(std::vector<const Node *> &result) const
{
diff --git a/phrase-extract/extract-ghkm/Subgraph.h b/phrase-extract/extract-ghkm/Subgraph.h
index ede1233e9..f4d1e0c8d 100644
--- a/phrase-extract/extract-ghkm/Subgraph.h
+++ b/phrase-extract/extract-ghkm/Subgraph.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -26,47 +26,62 @@
#include <set>
#include <vector>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
class Node;
class Subgraph
{
- public:
+public:
Subgraph(const Node *root)
- : m_root(root)
- , m_depth(0)
- , m_size(root->GetType() == TREE ? 1 : 0)
- , m_nodeCount(1)
- , m_pcfgScore(0.0f) {}
+ : m_root(root)
+ , m_depth(0)
+ , m_size(root->GetType() == TREE ? 1 : 0)
+ , m_nodeCount(1)
+ , m_pcfgScore(0.0f) {}
Subgraph(const Node *root, const std::set<const Node *> &leaves)
- : m_root(root)
- , m_leaves(leaves)
- , m_depth(-1)
- , m_size(-1)
- , m_nodeCount(-1)
- , m_pcfgScore(0.0f)
- {
+ : m_root(root)
+ , m_leaves(leaves)
+ , m_depth(-1)
+ , m_size(-1)
+ , m_nodeCount(-1)
+ , m_pcfgScore(0.0f) {
m_depth = CalcDepth(m_root);
m_size = CalcSize(m_root);
m_nodeCount = CountNodes(m_root);
m_pcfgScore = CalcPcfgScore();
}
- const Node *GetRoot() const { return m_root; }
- const std::set<const Node *> &GetLeaves() const { return m_leaves; }
- int GetDepth() const { return m_depth; }
- int GetSize() const { return m_size; }
- int GetNodeCount() const { return m_nodeCount; }
- float GetPcfgScore() const { return m_pcfgScore; }
+ const Node *GetRoot() const {
+ return m_root;
+ }
+ const std::set<const Node *> &GetLeaves() const {
+ return m_leaves;
+ }
+ int GetDepth() const {
+ return m_depth;
+ }
+ int GetSize() const {
+ return m_size;
+ }
+ int GetNodeCount() const {
+ return m_nodeCount;
+ }
+ float GetPcfgScore() const {
+ return m_pcfgScore;
+ }
- bool IsTrivial() const { return m_leaves.empty(); }
+ bool IsTrivial() const {
+ return m_leaves.empty();
+ }
void GetTargetLeaves(std::vector<const Node *> &) const;
- private:
+private:
void GetTargetLeaves(const Node *, std::vector<const Node *> &) const;
int CalcDepth(const Node *) const;
int CalcSize(const Node *) const;
diff --git a/phrase-extract/extract-ghkm/XmlTreeParser.cpp b/phrase-extract/extract-ghkm/XmlTreeParser.cpp
index 66024ff01..2f28c3244 100644
--- a/phrase-extract/extract-ghkm/XmlTreeParser.cpp
+++ b/phrase-extract/extract-ghkm/XmlTreeParser.cpp
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -29,13 +29,15 @@
using namespace MosesTraining;
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
XmlTreeParser::XmlTreeParser(std::set<std::string> &labelSet,
std::map<std::string, int> &topLabelSet)
- : m_labelSet(labelSet)
- , m_topLabelSet(topLabelSet)
+ : m_labelSet(labelSet)
+ , m_topLabelSet(topLabelSet)
{
}
@@ -60,8 +62,8 @@ std::auto_ptr<ParseTree> XmlTreeParser::Parse(const std::string &line)
// Converts a SyntaxNode tree to a Moses::GHKM::ParseTree.
std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
- const SyntaxNode &tree,
- const std::vector<std::string> &words)
+ const SyntaxNode &tree,
+ const std::vector<std::string> &words)
{
std::auto_ptr<ParseTree> root(new ParseTree(tree.GetLabel()));
root->SetPcfgScore(tree.GetPcfgScore());
diff --git a/phrase-extract/extract-ghkm/XmlTreeParser.h b/phrase-extract/extract-ghkm/XmlTreeParser.h
index 7b63ae1e4..d00fd7d9f 100644
--- a/phrase-extract/extract-ghkm/XmlTreeParser.h
+++ b/phrase-extract/extract-ghkm/XmlTreeParser.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -31,18 +31,21 @@
#include <string>
#include <vector>
-namespace Moses {
-namespace GHKM {
+namespace Moses
+{
+namespace GHKM
+{
class ParseTree;
// Parses a string in Moses' XML parse tree format and returns a ParseTree
// object.
-class XmlTreeParser {
- public:
+class XmlTreeParser
+{
+public:
XmlTreeParser(std::set<std::string> &, std::map<std::string, int> &);
std::auto_ptr<ParseTree> Parse(const std::string &);
- private:
+private:
std::auto_ptr<ParseTree> ConvertTree(const MosesTraining::SyntaxNode &,
const std::vector<std::string> &);
diff --git a/phrase-extract/extract-lex-main.cpp b/phrase-extract/extract-lex-main.cpp
index a59450da8..f63015a6a 100644
--- a/phrase-extract/extract-lex-main.cpp
+++ b/phrase-extract/extract-lex-main.cpp
@@ -10,16 +10,16 @@ using namespace MosesTraining;
float COUNT_INCR = 1;
-void fix(std::ostream& stream)
+void fix(std::ostream& stream)
{
- stream.setf(std::ios::fixed);
- stream.precision(7);
+ stream.setf(std::ios::fixed);
+ stream.precision(7);
}
int main(int argc, char* argv[])
{
cerr << "Starting...\n";
-
+
assert(argc == 6);
char* &filePathTarget = argv[1];
char* &filePathSource = argv[2];
@@ -43,8 +43,7 @@ int main(int argc, char* argv[])
size_t lineCount = 0;
string lineTarget, lineSource, lineAlign;
- while (getline(streamTarget, lineTarget))
- {
+ while (getline(streamTarget, lineTarget)) {
if (lineCount % 10000 == 0)
cerr << lineCount << " ";
@@ -52,7 +51,7 @@ int main(int argc, char* argv[])
assert(isSource);
istream &isAlign = getline(streamAlign, lineAlign);
assert(isAlign);
-
+
vector<string> toksTarget, toksSource, toksAlign;
Tokenize(toksTarget, lineTarget);
Tokenize(toksSource, lineSource);
@@ -61,13 +60,13 @@ int main(int argc, char* argv[])
/*
cerr << endl
<< toksTarget.size() << " " << lineTarget << endl
- << toksSource.size() << " " << lineSource << endl
+ << toksSource.size() << " " << lineSource << endl
<< toksAlign.size() << " " << lineAlign << endl;
*/
extractSingleton.Process(toksTarget, toksSource, toksAlign, lineCount);
-
- ++lineCount;
+
+ ++lineCount;
}
extractSingleton.Output(streamLexS2T, streamLexT2S);
@@ -86,35 +85,32 @@ namespace MosesTraining
const std::string *Vocab::GetOrAdd(const std::string &word)
{
- const string *ret = &(*m_coll.insert(word).first);
+ const string *ret = &(*m_coll.insert(word).first);
return ret;
}
void ExtractLex::Process(vector<string> &toksTarget, vector<string> &toksSource, vector<string> &toksAlign, size_t lineCount)
{
std::vector<bool> m_sourceAligned(toksSource.size(), false)
- , m_targetAligned(toksTarget.size(), false);
+ , m_targetAligned(toksTarget.size(), false);
vector<string>::const_iterator iterAlign;
- for (iterAlign = toksAlign.begin(); iterAlign != toksAlign.end(); ++iterAlign)
- {
+ for (iterAlign = toksAlign.begin(); iterAlign != toksAlign.end(); ++iterAlign) {
const string &alignTok = *iterAlign;
-
+
vector<size_t> alignPos;
Tokenize(alignPos, alignTok, "-");
assert(alignPos.size() == 2);
- if (alignPos[0] >= toksSource.size())
- {
- cerr << "ERROR: alignment over source length. Alignment " << alignPos[0] << " at line " << lineCount << endl;
- continue;
- }
- if (alignPos[1] >= toksTarget.size())
- {
- cerr << "ERROR: alignment over target length. Alignment " << alignPos[1] << " at line " << lineCount << endl;
- continue;
- }
-
+ if (alignPos[0] >= toksSource.size()) {
+ cerr << "ERROR: alignment over source length. Alignment " << alignPos[0] << " at line " << lineCount << endl;
+ continue;
+ }
+ if (alignPos[1] >= toksTarget.size()) {
+ cerr << "ERROR: alignment over target length. Alignment " << alignPos[1] << " at line " << lineCount << endl;
+ continue;
+ }
+
assert(alignPos[0] < toksSource.size());
assert(alignPos[1] < toksTarget.size());
@@ -123,12 +119,12 @@ void ExtractLex::Process(vector<string> &toksTarget, vector<string> &toksSource,
const string &tmpSource = toksSource[ alignPos[0] ];
const string &tmpTarget = toksTarget[ alignPos[1] ];
-
+
const string *source = m_vocab.GetOrAdd(tmpSource);
const string *target = m_vocab.GetOrAdd(tmpTarget);
Process(target, source);
-
+
}
ProcessUnaligned(toksTarget, toksSource, m_sourceAligned, m_targetAligned);
@@ -154,15 +150,13 @@ void ExtractLex::Process(WordCount &wcIn, const std::string *out)
}
void ExtractLex::ProcessUnaligned(vector<string> &toksTarget, vector<string> &toksSource
- , const std::vector<bool> &m_sourceAligned, const std::vector<bool> &m_targetAligned)
+ , const std::vector<bool> &m_sourceAligned, const std::vector<bool> &m_targetAligned)
{
- const string *nullWord = m_vocab.GetOrAdd("NULL");
+ const string *nullWord = m_vocab.GetOrAdd("NULL");
- for (size_t pos = 0; pos < m_sourceAligned.size(); ++pos)
- {
+ for (size_t pos = 0; pos < m_sourceAligned.size(); ++pos) {
bool isAlignedCurr = m_sourceAligned[pos];
- if (!isAlignedCurr)
- {
+ if (!isAlignedCurr) {
const string &tmpWord = toksSource[pos];
const string *sourceWord = m_vocab.GetOrAdd(tmpWord);
@@ -170,11 +164,9 @@ void ExtractLex::ProcessUnaligned(vector<string> &toksTarget, vector<string> &to
}
}
- for (size_t pos = 0; pos < m_targetAligned.size(); ++pos)
- {
+ for (size_t pos = 0; pos < m_targetAligned.size(); ++pos) {
bool isAlignedCurr = m_targetAligned[pos];
- if (!isAlignedCurr)
- {
+ if (!isAlignedCurr) {
const string &tmpWord = toksTarget[pos];
const string *targetWord = m_vocab.GetOrAdd(tmpWord);
@@ -193,16 +185,14 @@ void ExtractLex::Output(std::ofstream &streamLexS2T, std::ofstream &streamLexT2S
void ExtractLex::Output(const std::map<const std::string*, WordCount> &coll, std::ofstream &outStream)
{
std::map<const std::string*, WordCount>::const_iterator iterOuter;
- for (iterOuter = coll.begin(); iterOuter != coll.end(); ++iterOuter)
- {
+ for (iterOuter = coll.begin(); iterOuter != coll.end(); ++iterOuter) {
const string &inStr = *iterOuter->first;
const WordCount &inWC = iterOuter->second;
const std::map<const std::string*, WordCount> &outColl = inWC.GetColl();
std::map<const std::string*, WordCount>::const_iterator iterInner;
- for (iterInner = outColl.begin(); iterInner != outColl.end(); ++iterInner)
- {
+ for (iterInner = outColl.begin(); iterInner != outColl.end(); ++iterInner) {
const string &outStr = *iterInner->first;
const WordCount &outWC = iterInner->second;
diff --git a/phrase-extract/extract-lex.h b/phrase-extract/extract-lex.h
index d272cf6ff..d79038fc6 100644
--- a/phrase-extract/extract-lex.h
+++ b/phrase-extract/extract-lex.h
@@ -14,10 +14,10 @@ namespace MosesTraining
template<typename T>
inline T Scan(const std::string &input)
{
- std::stringstream stream(input);
- T ret;
- stream >> ret;
- return ret;
+ std::stringstream stream(input);
+ T ret;
+ stream >> ret;
+ return ret;
}
@@ -25,13 +25,12 @@ inline T Scan(const std::string &input)
template<typename T>
inline void Scan(std::vector<T> &output, const std::vector< std::string > &input)
{
- output.resize(input.size());
- for (size_t i = 0 ; i < input.size() ; i++)
- {
- output[i] = Scan<T>( input[i] );
- }
+ output.resize(input.size());
+ for (size_t i = 0 ; i < input.size() ; i++) {
+ output[i] = Scan<T>( input[i] );
+ }
}
-
+
inline void Tokenize(std::vector<std::string> &output
, const std::string& str
@@ -55,17 +54,17 @@ inline void Tokenize(std::vector<std::string> &output
// speeded up version of above
template<typename T>
inline void Tokenize( std::vector<T> &output
- , const std::string &input
- , const std::string& delimiters = " \t")
+ , const std::string &input
+ , const std::string& delimiters = " \t")
{
- std::vector<std::string> stringVector;
- Tokenize(stringVector, input, delimiters);
- return Scan<T>(output, stringVector );
+ std::vector<std::string> stringVector;
+ Tokenize(stringVector, input, delimiters);
+ return Scan<T>(output, stringVector );
}
class WordCount
{
- friend std::ostream& operator<<(std::ostream&, const WordCount&);
+ friend std::ostream& operator<<(std::ostream&, const WordCount&);
public:
float m_count;
@@ -83,13 +82,16 @@ public:
void AddCount(float incr);
- std::map<const std::string*, WordCount> &GetColl()
- { return m_coll; }
- const std::map<const std::string*, WordCount> &GetColl() const
- { return m_coll; }
+ std::map<const std::string*, WordCount> &GetColl() {
+ return m_coll;
+ }
+ const std::map<const std::string*, WordCount> &GetColl() const {
+ return m_coll;
+ }
- const float GetCount() const
- { return m_count; }
+ const float GetCount() const {
+ return m_count;
+ }
};
diff --git a/phrase-extract/extract-main.cpp b/phrase-extract/extract-main.cpp
index cab91e92d..a8edb298a 100644
--- a/phrase-extract/extract-main.cpp
+++ b/phrase-extract/extract-main.cpp
@@ -29,7 +29,8 @@
using namespace std;
using namespace MosesTraining;
-namespace MosesTraining {
+namespace MosesTraining
+{
const long int LINE_MAX_LENGTH = 500000 ;
@@ -49,37 +50,38 @@ typedef vector < HPhrase > HPhraseVector;
// The key of the map is the English index and the value is a set of the source ones
typedef map <int, set<int> > HSentenceVertices;
- REO_POS getOrientWordModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
+REO_POS getOrientWordModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
int, int, int, int, int, int, int,
bool (*)(int, int), bool (*)(int, int));
- REO_POS getOrientPhraseModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
+REO_POS getOrientPhraseModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
int, int, int, int, int, int, int,
bool (*)(int, int), bool (*)(int, int),
const HSentenceVertices &, const HSentenceVertices &);
- REO_POS getOrientHierModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
+REO_POS getOrientHierModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
int, int, int, int, int, int, int,
bool (*)(int, int), bool (*)(int, int),
const HSentenceVertices &, const HSentenceVertices &,
const HSentenceVertices &, const HSentenceVertices &,
REO_POS);
- void insertVertex(HSentenceVertices &, int, int);
- void insertPhraseVertices(HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, HSentenceVertices &,
+void insertVertex(HSentenceVertices &, int, int);
+void insertPhraseVertices(HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, HSentenceVertices &,
int, int, int, int);
- string getOrientString(REO_POS, REO_MODEL_TYPE);
+string getOrientString(REO_POS, REO_MODEL_TYPE);
- bool ge(int, int);
- bool le(int, int);
- bool lt(int, int);
+bool ge(int, int);
+bool le(int, int);
+bool lt(int, int);
- bool isAligned (SentenceAlignment &, int, int);
- int sentenceOffset = 0;
+bool isAligned (SentenceAlignment &, int, int);
+int sentenceOffset = 0;
}
-namespace MosesTraining{
+namespace MosesTraining
+{
-class ExtractTask
+class ExtractTask
{
public:
ExtractTask(size_t id, SentenceAlignment &sentence,PhraseExtractionOptions &initoptions, Moses::OutputFileStream &extractFile, Moses::OutputFileStream &extractFileInv,Moses::OutputFileStream &extractFileOrientation):
@@ -87,8 +89,8 @@ public:
m_options(initoptions),
m_extractFile(extractFile),
m_extractFileInv(extractFileInv),
- m_extractFileOrientation(extractFileOrientation){}
-void Run();
+ m_extractFileOrientation(extractFileOrientation) {}
+ void Run();
private:
vector< string > m_extractedPhrases;
vector< string > m_extractedPhrasesInv;
@@ -98,7 +100,7 @@ private:
void extract(SentenceAlignment &);
void addPhrase(SentenceAlignment &, int, int, int, int, string &);
void writePhrasesToFile();
-
+
SentenceAlignment &m_sentence;
const PhraseExtractionOptions &m_options;
Moses::OutputFileStream &m_extractFile;
@@ -112,7 +114,7 @@ int main(int argc, char* argv[])
cerr << "PhraseExtract v1.4, written by Philipp Koehn\n"
<< "phrase extraction from an aligned parallel corpus\n";
- if (argc < 6) {
+ if (argc < 6) {
cerr << "syntax: extract en de align extract max-length [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ] ";
cerr<<"| --OnlyOutputSpanInfo | --NoTTable | --GZOutput | --IncludeSentenceId | --SentenceOffset n | --InstanceWeights filename ]\n";
exit(1);
@@ -135,7 +137,7 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--NoTTable") == 0) {
options.initTranslationFlag(false);
} else if (strcmp(argv[i], "--IncludeSentenceId") == 0) {
- options.initIncludeSentenceIdFlag(true);
+ options.initIncludeSentenceIdFlag(true);
} else if (strcmp(argv[i], "--SentenceOffset") == 0) {
if (i+1 >= argc || argv[i+1][0] < '0' || argv[i+1][0] > '9') {
cerr << "extract: syntax error, used switch --SentenceOffset without a number" << endl;
@@ -143,7 +145,7 @@ int main(int argc, char* argv[])
}
sentenceOffset = atoi(argv[++i]);
} else if (strcmp(argv[i], "--GZOutput") == 0) {
- options.initGzOutput(true);
+ options.initGzOutput(true);
} else if (strcmp(argv[i], "--InstanceWeights") == 0) {
if (i+1 >= argc) {
cerr << "extract: syntax error, used switch --InstanceWeights without file name" << endl;
@@ -260,7 +262,7 @@ int main(int argc, char* argv[])
SAFE_GETLINE((*iwFileP), weightString, LINE_MAX_LENGTH, '\n', __FILE__);
}
SentenceAlignment sentence;
- // cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl;
+ // cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl;
//az: output src, tgt, and alingment line
if (options.isOnlyOutputSpanInfo()) {
cout << "LOG: SRC: " << foreignString << endl;
@@ -268,8 +270,8 @@ int main(int argc, char* argv[])
cout << "LOG: ALT: " << alignmentString << endl;
cout << "LOG: PHRASES_BEGIN:" << endl;
}
- if (sentence.create( englishString, foreignString, alignmentString, weightString, i, false)) {
- ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFile , extractFileInv, extractFileOrientation);
+ if (sentence.create( englishString, foreignString, alignmentString, weightString, i, false)) {
+ ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFile , extractFileInv, extractFileOrientation);
task->Run();
delete task;
@@ -286,17 +288,18 @@ int main(int argc, char* argv[])
if (options.isTranslationFlag()) {
extractFile.Close();
extractFileInv.Close();
-
+
+ }
+ if (options.isOrientationFlag()) {
+ extractFileOrientation.Close();
}
- if (options.isOrientationFlag()){
- extractFileOrientation.Close();
- }
}
}
namespace MosesTraining
{
-void ExtractTask::Run() {
+void ExtractTask::Run()
+{
extract(m_sentence);
writePhrasesToFile();
m_extractedPhrases.clear();
@@ -665,16 +668,16 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
{
// source
// // cout << "adding ( " << startF << "-" << endF << ", " << startE << "-" << endE << ")\n";
- ostringstream outextractstr;
- ostringstream outextractstrInv;
- ostringstream outextractstrOrientation;
+ ostringstream outextractstr;
+ ostringstream outextractstrInv;
+ ostringstream outextractstrOrientation;
if (m_options.isOnlyOutputSpanInfo()) {
cout << startF << " " << endF << " " << startE << " " << endE << endl;
return;
}
-for(int fi=startF; fi<=endF; fi++) {
+ for(int fi=startF; fi<=endF; fi++) {
if (m_options.isTranslationFlag()) outextractstr << sentence.source[fi] << " ";
if (m_options.isOrientationFlag()) outextractstrOrientation << sentence.source[fi] << " ";
}
@@ -693,13 +696,13 @@ for(int fi=startF; fi<=endF; fi++) {
// source (for inverse)
- if (m_options.isTranslationFlag()) {
+ if (m_options.isTranslationFlag()) {
for(int fi=startF; fi<=endF; fi++)
outextractstrInv << sentence.source[fi] << " ";
outextractstrInv << "|||";
}
// alignment
- if (m_options.isTranslationFlag()) {
+ if (m_options.isTranslationFlag()) {
for(int ei=startE; ei<=endE; ei++) {
for(unsigned int i=0; i<sentence.alignedToT[ei].size(); i++) {
int fi = sentence.alignedToT[ei][i];
@@ -732,39 +735,40 @@ for(int fi=startF; fi<=endF; fi++) {
if (m_options.isOrientationFlag()) outextractstrOrientation << "\n";
- m_extractedPhrases.push_back(outextractstr.str());
- m_extractedPhrasesInv.push_back(outextractstrInv.str());
- m_extractedPhrasesOri.push_back(outextractstrOrientation.str());
+ m_extractedPhrases.push_back(outextractstr.str());
+ m_extractedPhrasesInv.push_back(outextractstrInv.str());
+ m_extractedPhrasesOri.push_back(outextractstrOrientation.str());
}
-void ExtractTask::writePhrasesToFile(){
+void ExtractTask::writePhrasesToFile()
+{
- ostringstream outextractFile;
- ostringstream outextractFileInv;
- ostringstream outextractFileOrientation;
+ ostringstream outextractFile;
+ ostringstream outextractFileInv;
+ ostringstream outextractFileOrientation;
- for(vector<string>::const_iterator phrase=m_extractedPhrases.begin();phrase!=m_extractedPhrases.end();phrase++){
- outextractFile<<phrase->data();
- }
- for(vector<string>::const_iterator phrase=m_extractedPhrasesInv.begin();phrase!=m_extractedPhrasesInv.end();phrase++){
- outextractFileInv<<phrase->data();
- }
- for(vector<string>::const_iterator phrase=m_extractedPhrasesOri.begin();phrase!=m_extractedPhrasesOri.end();phrase++){
- outextractFileOrientation<<phrase->data();
- }
+ for(vector<string>::const_iterator phrase=m_extractedPhrases.begin(); phrase!=m_extractedPhrases.end(); phrase++) {
+ outextractFile<<phrase->data();
+ }
+ for(vector<string>::const_iterator phrase=m_extractedPhrasesInv.begin(); phrase!=m_extractedPhrasesInv.end(); phrase++) {
+ outextractFileInv<<phrase->data();
+ }
+ for(vector<string>::const_iterator phrase=m_extractedPhrasesOri.begin(); phrase!=m_extractedPhrasesOri.end(); phrase++) {
+ outextractFileOrientation<<phrase->data();
+ }
- m_extractFile << outextractFile.str();
- m_extractFileInv << outextractFileInv.str();
- m_extractFileOrientation << outextractFileOrientation.str();
+ m_extractFile << outextractFile.str();
+ m_extractFileInv << outextractFileInv.str();
+ m_extractFileOrientation << outextractFileOrientation.str();
}
// if proper conditioning, we need the number of times a source phrase occured
void ExtractTask::extractBase( SentenceAlignment &sentence )
{
- ostringstream outextractFile;
- ostringstream outextractFileInv;
+ ostringstream outextractFile;
+ ostringstream outextractFileInv;
int countF = sentence.source.size();
for(int startF=0; startF<countF; startF++) {
@@ -772,8 +776,8 @@ void ExtractTask::extractBase( SentenceAlignment &sentence )
(endF<countF && endF<startF+m_options.maxPhraseLength);
endF++) {
for(int fi=startF; fi<=endF; fi++) {
- outextractFile << sentence.source[fi] << " ";
- }
+ outextractFile << sentence.source[fi] << " ";
+ }
outextractFile << "|||" << endl;
}
}
@@ -789,8 +793,8 @@ void ExtractTask::extractBase( SentenceAlignment &sentence )
outextractFileInv << "|||" << endl;
}
}
- m_extractFile << outextractFile.str();
- m_extractFileInv << outextractFileInv.str();
+ m_extractFile << outextractFile.str();
+ m_extractFileInv << outextractFileInv.str();
}
diff --git a/phrase-extract/extract-rules-main.cpp b/phrase-extract/extract-rules-main.cpp
index 368aae1f5..f8e315e2c 100644
--- a/phrase-extract/extract-rules-main.cpp
+++ b/phrase-extract/extract-rules-main.cpp
@@ -55,7 +55,7 @@ using namespace MosesTraining;
typedef vector< int > LabelIndex;
typedef map< int, int > WordIndex;
-class ExtractTask
+class ExtractTask
{
private:
SentenceAlignmentWithSyntax &m_sentence;
@@ -64,31 +64,30 @@ private:
Moses::OutputFileStream& m_extractFileInv;
vector< ExtractedRule > m_extractedRules;
-
+
// main functions
void extractRules();
void addRuleToCollection(ExtractedRule &rule);
void consolidateRules();
void writeRulesToFile();
-
+
// subs
void addRule( int, int, int, int, int, RuleExist &ruleExist);
void addHieroRule( int startT, int endT, int startS, int endS
- , RuleExist &ruleExist, HoleCollection &holeColl, int numHoles, int initStartF, int wordCountT, int wordCountS);
+ , RuleExist &ruleExist, HoleCollection &holeColl, int numHoles, int initStartF, int wordCountT, int wordCountS);
void saveHieroPhrase( int startT, int endT, int startS, int endS
, HoleCollection &holeColl, LabelIndex &labelIndex, int countS);
string saveTargetHieroPhrase( int startT, int endT, int startS, int endS
- , WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex, double &logPCFGScore, int countS);
+ , WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex, double &logPCFGScore, int countS);
string saveSourceHieroPhrase( int startT, int endT, int startS, int endS
, HoleCollection &holeColl, const LabelIndex &labelIndex);
void preprocessSourceHieroPhrase( int startT, int endT, int startS, int endS
- , WordIndex &indexS, HoleCollection &holeColl, const LabelIndex &labelIndex);
+ , WordIndex &indexS, HoleCollection &holeColl, const LabelIndex &labelIndex);
void saveHieroAlignment( int startT, int endT, int startS, int endS
- , const WordIndex &indexS, const WordIndex &indexT, HoleCollection &holeColl, ExtractedRule &rule);
+ , const WordIndex &indexS, const WordIndex &indexT, HoleCollection &holeColl, ExtractedRule &rule);
void saveAllHieroPhrases( int startT, int endT, int startS, int endS, HoleCollection &holeColl, int countS);
-
- inline string IntToString( int i )
- {
+
+ inline string IntToString( int i ) {
stringstream out;
out << i;
return out.str();
@@ -123,7 +122,7 @@ int main(int argc, char* argv[])
if (argc < 5) {
cerr << "syntax: extract-rules corpus.target corpus.source corpus.align extract ["
- << " --GlueGrammar FILE"
+ << " --GlueGrammar FILE"
<< " | --UnknownWordLabel FILE"
<< " | --OnlyDirect"
<< " | --OutputNTLengths"
@@ -139,8 +138,8 @@ int main(int argc, char* argv[])
<< " | --AllowOnlyUnalignedWords | --DisallowNonTermConsecTarget |--NonTermConsecSource | --NoNonTermFirstWord | --NoFractionalCounting"
<< " | --UnpairedExtractFormat"
<< " | --ConditionOnTargetLHS ]"
- << " | --BoundaryRules[" << options.boundaryRules << "]";
-
+ << " | --BoundaryRules[" << options.boundaryRules << "]";
+
exit(1);
}
char* &fileNameT = argv[1];
@@ -212,10 +211,9 @@ int main(int argc, char* argv[])
cerr << "extract error: --MaxScope should be at least 0" << endl;
exit(1);
}
+ } else if (strcmp(argv[i], "--GZOutput") == 0) {
+ options.gzOutput = true;
}
- else if (strcmp(argv[i], "--GZOutput") == 0) {
- options.gzOutput = true;
- }
// allow consecutive non-terminals (X Y | X Y)
else if (strcmp(argv[i],"--TargetSyntax") == 0) {
options.targetSyntax = true;
@@ -265,7 +263,7 @@ int main(int argc, char* argv[])
options.unpairedExtractFormat = true;
} else if (strcmp(argv[i],"--ConditionOnTargetLHS") == 0) {
options.conditionOnTargetLhs = true;
- } else if (strcmp(argv[i],"-threads") == 0 ||
+ } else if (strcmp(argv[i],"-threads") == 0 ||
strcmp(argv[i],"--threads") == 0 ||
strcmp(argv[i],"--Threads") == 0) {
#ifdef WITH_THREADS
@@ -327,8 +325,8 @@ int main(int argc, char* argv[])
SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n', __FILE__);
SentenceAlignmentWithSyntax sentence
- (targetLabelCollection, sourceLabelCollection,
- targetTopLabelCollection, sourceTopLabelCollection, options);
+ (targetLabelCollection, sourceLabelCollection,
+ targetTopLabelCollection, sourceTopLabelCollection, options);
//az: output src, tgt, and alingment line
if (options.onlyOutputSpanInfo) {
cout << "LOG: SRC: " << sourceString << endl;
@@ -364,7 +362,8 @@ int main(int argc, char* argv[])
writeUnknownWordLabel(fileNameUnknownWordLabel);
}
-void ExtractTask::Run() {
+void ExtractTask::Run()
+{
extractRules();
consolidateRules();
writeRulesToFile();
@@ -471,7 +470,7 @@ void ExtractTask::extractRules()
}
void ExtractTask::preprocessSourceHieroPhrase( int startT, int endT, int startS, int endS
- , WordIndex &indexS, HoleCollection &holeColl, const LabelIndex &labelIndex)
+ , WordIndex &indexS, HoleCollection &holeColl, const LabelIndex &labelIndex)
{
vector<Hole*>::iterator iterHoleList = holeColl.GetSortedSourceHoles().begin();
assert(iterHoleList != holeColl.GetSortedSourceHoles().end());
@@ -509,8 +508,8 @@ void ExtractTask::preprocessSourceHieroPhrase( int startT, int endT, int startS,
}
string ExtractTask::saveTargetHieroPhrase( int startT, int endT, int startS, int endS
- , WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex, double &logPCFGScore
- , int countS)
+ , WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex, double &logPCFGScore
+ , int countS)
{
HoleList::iterator iterHoleList = holeColl.GetHoles().begin();
assert(iterHoleList != holeColl.GetHoles().end());
@@ -536,11 +535,11 @@ string ExtractTask::saveTargetHieroPhrase( int startT, int endT, int startS, int
if (m_options.targetSyntax) {
targetLabel = m_sentence.targetTree.GetNodes(currPos,hole.GetEnd(1))[labelI]->GetLabel();
} else if (m_options.boundaryRules && (startS == 0 || endS == countS - 1)) {
- targetLabel = "S";
+ targetLabel = "S";
} else {
targetLabel = "X";
}
-
+
hole.SetLabel(targetLabel, 1);
if (m_options.unpairedExtractFormat) {
@@ -571,7 +570,7 @@ string ExtractTask::saveTargetHieroPhrase( int startT, int endT, int startS, int
}
string ExtractTask::saveSourceHieroPhrase( int startT, int endT, int startS, int endS
- , HoleCollection &holeColl, const LabelIndex &labelIndex)
+ , HoleCollection &holeColl, const LabelIndex &labelIndex)
{
vector<Hole*>::iterator iterHoleList = holeColl.GetSortedSourceHoles().begin();
assert(iterHoleList != holeColl.GetSortedSourceHoles().end());
@@ -615,7 +614,7 @@ string ExtractTask::saveSourceHieroPhrase( int startT, int endT, int startS, int
}
void ExtractTask::saveHieroAlignment( int startT, int endT, int startS, int endS
- , const WordIndex &indexS, const WordIndex &indexT, HoleCollection &holeColl, ExtractedRule &rule)
+ , const WordIndex &indexS, const WordIndex &indexT, HoleCollection &holeColl, ExtractedRule &rule)
{
// print alignment of words
for(int ti=startT; ti<=endT; ti++) {
@@ -636,13 +635,13 @@ void ExtractTask::saveHieroAlignment( int startT, int endT, int startS, int endS
HoleList::const_iterator iterHole;
for (iterHole = holeColl.GetHoles().begin(); iterHole != holeColl.GetHoles().end(); ++iterHole) {
const Hole &hole = *iterHole;
-
+
std::string sourceSymbolIndex = IntToString(hole.GetPos(0));
std::string targetSymbolIndex = IntToString(hole.GetPos(1));
rule.alignment += sourceSymbolIndex + "-" + targetSymbolIndex + " ";
if (!m_options.onlyDirectFlag)
rule.alignmentInv += targetSymbolIndex + "-" + sourceSymbolIndex + " ";
-
+
rule.SetSpanLength(hole.GetPos(0), hole.GetSize(0), hole.GetSize(1) ) ;
}
@@ -654,7 +653,7 @@ void ExtractTask::saveHieroAlignment( int startT, int endT, int startS, int endS
}
void ExtractTask::saveHieroPhrase( int startT, int endT, int startS, int endS
- , HoleCollection &holeColl, LabelIndex &labelIndex, int countS)
+ , HoleCollection &holeColl, LabelIndex &labelIndex, int countS)
{
WordIndex indexS, indexT; // to keep track of word positions in rule
@@ -680,12 +679,12 @@ void ExtractTask::saveHieroPhrase( int startT, int endT, int startS, int endS
if (m_options.pcfgScore) {
double logPCFGScore = m_sentence.targetTree.GetNodes(startT,endT)[labelIndex[0]]->GetPcfgScore();
rule.target = saveTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex, logPCFGScore, countS)
- + " [" + targetLabel + "]";
+ + " [" + targetLabel + "]";
rule.pcfgScore = std::exp(logPCFGScore);
} else {
double logPCFGScore = 0.0f;
rule.target = saveTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex, logPCFGScore, countS)
- + " [" + targetLabel + "]";
+ + " [" + targetLabel + "]";
}
// source
@@ -754,8 +753,8 @@ void ExtractTask::saveAllHieroPhrases( int startT, int endT, int startS, int end
// this function is called recursively
// it pokes a new hole into the phrase pair, and then calls itself for more holes
void ExtractTask::addHieroRule( int startT, int endT, int startS, int endS
- , RuleExist &ruleExist, HoleCollection &holeColl
- , int numHoles, int initStartT, int wordCountT, int wordCountS)
+ , RuleExist &ruleExist, HoleCollection &holeColl
+ , int numHoles, int initStartT, int wordCountT, int wordCountS)
{
// done, if already the maximum number of non-terminals in phrase pair
if (numHoles >= m_options.maxNonTerm)
@@ -862,7 +861,7 @@ void ExtractTask::addHieroRule( int startT, int endT, int startS, int endS
allowablePhrase = false;
// passed all checks...
- if (allowablePhrase)
+ if (allowablePhrase)
saveAllHieroPhrases(startT, endT, startS, endS, holeColl, wordCountS);
// recursively search for next hole
@@ -880,12 +879,12 @@ void ExtractTask::addHieroRule( int startT, int endT, int startS, int endS
void ExtractTask::addRule( int startT, int endT, int startS, int endS, int countS, RuleExist &ruleExist)
{
// contains only <s> or </s>. Don't output
- if (m_options.boundaryRules
- && ( (startS == 0 && endS == 0)
- || (startS == countS-1 && endS == countS-1))) {
+ if (m_options.boundaryRules
+ && ( (startS == 0 && endS == 0)
+ || (startS == countS-1 && endS == countS-1))) {
return;
}
-
+
if (m_options.onlyOutputSpanInfo) {
cout << startS << " " << endS << " " << startT << " " << endT << endl;
return;
@@ -897,11 +896,10 @@ void ExtractTask::addRule( int startT, int endT, int startS, int endS, int count
string targetLabel,sourceLabel;
if (m_options.targetSyntax && m_options.conditionOnTargetLhs) {
sourceLabel = targetLabel = m_sentence.targetTree.GetNodes(startT,endT)[0]->GetLabel();
- }
- else {
+ } else {
sourceLabel = m_options.sourceSyntax ?
m_sentence.sourceTree.GetNodes(startS,endS)[0]->GetLabel() : "X";
-
+
if (m_options.targetSyntax) {
targetLabel = m_sentence.targetTree.GetNodes(startT,endT)[0]->GetLabel();
} else if (m_options.boundaryRules && (startS == 0 || endS == countS - 1)) {
@@ -1008,7 +1006,7 @@ void ExtractTask::writeRulesToFile()
<< rule->alignment << " ||| "
<< rule->count << " ||| ";
if (m_options.outputNTLengths) {
- rule->OutputNTLengths(out);
+ rule->OutputNTLengths(out);
}
if (m_options.pcfgScore) {
out << " ||| " << rule->pcfgScore;
diff --git a/phrase-extract/lexical-reordering/reordering_classes.cpp b/phrase-extract/lexical-reordering/reordering_classes.cpp
index e5b3fe7cd..8c5163f9b 100644
--- a/phrase-extract/lexical-reordering/reordering_classes.cpp
+++ b/phrase-extract/lexical-reordering/reordering_classes.cpp
@@ -57,7 +57,7 @@ void ModelScore::reset_f()
}
void ModelScore::add_example
- (const StringPiece& previous, const StringPiece& next, float weight)
+(const StringPiece& previous, const StringPiece& next, float weight)
{
count_fe_prev[getType(previous)]+=weight;
count_f_prev[getType(previous)]+=weight;
diff --git a/phrase-extract/lexical-reordering/score.cpp b/phrase-extract/lexical-reordering/score.cpp
index 545abf303..d404822b8 100644
--- a/phrase-extract/lexical-reordering/score.cpp
+++ b/phrase-extract/lexical-reordering/score.cpp
@@ -29,11 +29,11 @@ void get_orientations(const StringPiece& pair, StringPiece& previous, StringPiec
class FileFormatException : public util::Exception
{
- public:
- FileFormatException() throw() {
- *this << "Invalid extract file format: ";
- }
- ~FileFormatException() throw() {}
+public:
+ FileFormatException() throw() {
+ *this << "Invalid extract file format: ";
+ }
+ ~FileFormatException() throw() {}
};
int main(int argc, char* argv[])
@@ -214,9 +214,10 @@ int main(int argc, char* argv[])
}
template <class It> StringPiece
-GrabOrDie(It &it, const StringPiece& line) {
- UTIL_THROW_IF(!it, FileFormatException, line.as_string());
- return *it++;
+GrabOrDie(It &it, const StringPiece& line)
+{
+ UTIL_THROW_IF(!it, FileFormatException, line.as_string());
+ return *it++;
}
@@ -236,12 +237,12 @@ void split_line(
| phrase | hier
| phrase | hier ||| weight
*/
-
+
util::TokenIter<util::MultiCharacter> pipes(line, util::MultiCharacter(" ||| "));
foreign = GrabOrDie(pipes,line);
english = GrabOrDie(pipes,line);
StringPiece next = GrabOrDie(pipes,line);
-
+
util::TokenIter<util::MultiCharacter> singlePipe(next, util::MultiCharacter(" | "));
wbe = GrabOrDie(singlePipe,line);
if (singlePipe) {
diff --git a/phrase-extract/pcfg-common/exception.h b/phrase-extract/pcfg-common/exception.h
index 3dbd59d0e..d9266ca36 100644
--- a/phrase-extract/pcfg-common/exception.h
+++ b/phrase-extract/pcfg-common/exception.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -23,15 +23,20 @@
#include <string>
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
-class Exception {
- public:
+class Exception
+{
+public:
Exception(const char *msg) : msg_(msg) {}
Exception(const std::string &msg) : msg_(msg) {}
- const std::string &msg() const { return msg_; }
- private:
+ const std::string &msg() const {
+ return msg_;
+ }
+private:
std::string msg_;
};
diff --git a/phrase-extract/pcfg-common/numbered_set.h b/phrase-extract/pcfg-common/numbered_set.h
index 15e768b4c..66e960404 100644
--- a/phrase-extract/pcfg-common/numbered_set.h
+++ b/phrase-extract/pcfg-common/numbered_set.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -29,32 +29,45 @@
#include <sstream>
#include <vector>
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
// Stores a set of elements of type T, each of which is allocated an integral
// ID of type I. IDs are contiguous starting at 0. Individual elements cannot
// be removed once inserted (but the whole set can be cleared).
template<typename T, typename I=std::size_t>
-class NumberedSet {
- private:
+class NumberedSet
+{
+private:
typedef boost::unordered_map<T, I> ElementToIdMap;
typedef std::vector<const T *> IdToElementMap;
- public:
+public:
typedef I IdType;
typedef typename IdToElementMap::const_iterator const_iterator;
NumberedSet() {}
- const_iterator begin() const { return id_to_element_.begin(); }
- const_iterator end() const { return id_to_element_.end(); }
+ const_iterator begin() const {
+ return id_to_element_.begin();
+ }
+ const_iterator end() const {
+ return id_to_element_.end();
+ }
// Static value
- static I NullId() { return std::numeric_limits<I>::max(); }
+ static I NullId() {
+ return std::numeric_limits<I>::max();
+ }
- bool Empty() const { return id_to_element_.empty(); }
- std::size_t Size() const { return id_to_element_.size(); }
+ bool Empty() const {
+ return id_to_element_.empty();
+ }
+ std::size_t Size() const {
+ return id_to_element_.size();
+ }
// Insert the given object and return its ID.
I Insert(const T &);
@@ -64,19 +77,21 @@ class NumberedSet {
void Clear();
- private:
+private:
ElementToIdMap element_to_id_;
IdToElementMap id_to_element_;
};
template<typename T, typename I>
-I NumberedSet<T, I>::Lookup(const T &s) const {
+I NumberedSet<T, I>::Lookup(const T &s) const
+{
typename ElementToIdMap::const_iterator p = element_to_id_.find(s);
return (p == element_to_id_.end()) ? NullId() : p->second;
}
template<typename T, typename I>
-const T &NumberedSet<T, I>::Lookup(I id) const {
+const T &NumberedSet<T, I>::Lookup(I id) const
+{
if (id < 0 || id >= id_to_element_.size()) {
std::ostringstream msg;
msg << "Value not found: " << id;
@@ -86,10 +101,11 @@ const T &NumberedSet<T, I>::Lookup(I id) const {
}
template<typename T, typename I>
-I NumberedSet<T, I>::Insert(const T &x) {
+I NumberedSet<T, I>::Insert(const T &x)
+{
std::pair<T, I> value(x, id_to_element_.size());
std::pair<typename ElementToIdMap::iterator, bool> result =
- element_to_id_.insert(value);
+ element_to_id_.insert(value);
if (result.second) {
// x is a new element.
id_to_element_.push_back(&result.first->first);
@@ -98,7 +114,8 @@ I NumberedSet<T, I>::Insert(const T &x) {
}
template<typename T, typename I>
-void NumberedSet<T, I>::Clear() {
+void NumberedSet<T, I>::Clear()
+{
element_to_id_.clear();
id_to_element_.clear();
}
diff --git a/phrase-extract/pcfg-common/pcfg.h b/phrase-extract/pcfg-common/pcfg.h
index b87336584..5398cd97e 100644
--- a/phrase-extract/pcfg-common/pcfg.h
+++ b/phrase-extract/pcfg-common/pcfg.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -28,11 +28,14 @@
#include <ostream>
#include <vector>
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
-class Pcfg {
- public:
+class Pcfg
+{
+public:
typedef std::vector<std::size_t> Key;
typedef std::map<Key, double> Map;
typedef Map::iterator iterator;
@@ -40,18 +43,26 @@ class Pcfg {
Pcfg() {}
- iterator begin() { return rules_.begin(); }
- const_iterator begin() const { return rules_.begin(); }
+ iterator begin() {
+ return rules_.begin();
+ }
+ const_iterator begin() const {
+ return rules_.begin();
+ }
- iterator end() { return rules_.end(); }
- const_iterator end() const { return rules_.end(); }
+ iterator end() {
+ return rules_.end();
+ }
+ const_iterator end() const {
+ return rules_.end();
+ }
void Add(const Key &, double);
bool Lookup(const Key &, double &) const;
void Read(std::istream &, Vocabulary &);
void Write(const Vocabulary &, std::ostream &) const;
- private:
+private:
Map rules_;
};
diff --git a/phrase-extract/pcfg-common/pcfg_tree.h b/phrase-extract/pcfg-common/pcfg_tree.h
index bdac64dfc..d125cad16 100644
--- a/phrase-extract/pcfg-common/pcfg_tree.h
+++ b/phrase-extract/pcfg-common/pcfg_tree.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -26,34 +26,43 @@
#include <string>
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
template<typename DerivedType>
-class PcfgTreeBase : public SyntaxTreeBase<std::string, DerivedType> {
- public:
+class PcfgTreeBase : public SyntaxTreeBase<std::string, DerivedType>
+{
+public:
typedef std::string LabelType;
typedef SyntaxTreeBase<LabelType, DerivedType> BaseType;
PcfgTreeBase(const LabelType &label) : BaseType(label), score_(0.0) {}
- double score() const { return score_; }
- void set_score(double s) { score_ = s; }
+ double score() const {
+ return score_;
+ }
+ void set_score(double s) {
+ score_ = s;
+ }
- private:
+private:
double score_;
};
-class PcfgTree : public PcfgTreeBase<PcfgTree> {
- public:
+class PcfgTree : public PcfgTreeBase<PcfgTree>
+{
+public:
typedef PcfgTreeBase<PcfgTree> BaseType;
PcfgTree(const BaseType::LabelType &label) : BaseType(label) {}
};
// Specialise XmlOutputHandler for PcfgTree.
template<>
-class XmlOutputHandler<PcfgTree> {
- public:
+class XmlOutputHandler<PcfgTree>
+{
+public:
typedef std::map<std::string, std::string> AttributeMap;
void GetLabel(const PcfgTree &tree, std::string &label) const {
diff --git a/phrase-extract/pcfg-common/syntax_tree.h b/phrase-extract/pcfg-common/syntax_tree.h
index 89c6ec0c3..93d9dbec9 100644
--- a/phrase-extract/pcfg-common/syntax_tree.h
+++ b/phrase-extract/pcfg-common/syntax_tree.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -24,62 +24,87 @@
#include <cassert>
#include <vector>
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
// Base class for SyntaxTree, AgreementTree, and friends.
template<typename T, typename DerivedType>
-class SyntaxTreeBase {
- public:
+class SyntaxTreeBase
+{
+public:
// Constructors
SyntaxTreeBase(const T &label)
- : label_(label)
- , children_()
- , parent_(0) {}
+ : label_(label)
+ , children_()
+ , parent_(0) {}
SyntaxTreeBase(const T &label, const std::vector<DerivedType *> &children)
- : label_(label)
- , children_(children)
- , parent_(0) {}
+ : label_(label)
+ , children_(children)
+ , parent_(0) {}
// Destructor
virtual ~SyntaxTreeBase();
- const T &label() const { return label_; }
- const DerivedType *parent() const { return parent_; }
- DerivedType *parent() { return parent_; }
- const std::vector<DerivedType *> &children() const { return children_; }
- std::vector<DerivedType *> &children() { return children_; }
+ const T &label() const {
+ return label_;
+ }
+ const DerivedType *parent() const {
+ return parent_;
+ }
+ DerivedType *parent() {
+ return parent_;
+ }
+ const std::vector<DerivedType *> &children() const {
+ return children_;
+ }
+ std::vector<DerivedType *> &children() {
+ return children_;
+ }
- void set_label(const T &label) { label_ = label; }
- void set_parent(DerivedType *parent) { parent_ = parent; }
- void set_children(const std::vector<DerivedType *> &c) { children_ = c; }
+ void set_label(const T &label) {
+ label_ = label;
+ }
+ void set_parent(DerivedType *parent) {
+ parent_ = parent;
+ }
+ void set_children(const std::vector<DerivedType *> &c) {
+ children_ = c;
+ }
- bool IsLeaf() const { return children_.empty(); }
+ bool IsLeaf() const {
+ return children_.empty();
+ }
bool IsPreterminal() const {
return children_.size() == 1 && children_[0]->IsLeaf();
}
- void AddChild(DerivedType *child) { children_.push_back(child); }
+ void AddChild(DerivedType *child) {
+ children_.push_back(child);
+ }
- private:
+private:
T label_;
std::vector<DerivedType *> children_;
DerivedType *parent_;
};
template<typename T>
-class SyntaxTree : public SyntaxTreeBase<T, SyntaxTree<T> > {
- public:
+class SyntaxTree : public SyntaxTreeBase<T, SyntaxTree<T> >
+{
+public:
typedef SyntaxTreeBase<T, SyntaxTree<T> > BaseType;
SyntaxTree(const T &label) : BaseType(label) {}
SyntaxTree(const T &label, const std::vector<SyntaxTree *> &children)
- : BaseType(label, children) {}
+ : BaseType(label, children) {}
};
template<typename T, typename DerivedType>
-SyntaxTreeBase<T, DerivedType>::~SyntaxTreeBase() {
+SyntaxTreeBase<T, DerivedType>::~SyntaxTreeBase()
+{
for (std::size_t i = 0; i < children_.size(); ++i) {
delete children_[i];
}
diff --git a/phrase-extract/pcfg-common/tool.h b/phrase-extract/pcfg-common/tool.h
index 0af342569..aada036e3 100644
--- a/phrase-extract/pcfg-common/tool.h
+++ b/phrase-extract/pcfg-common/tool.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -28,18 +28,23 @@
#include <iostream>
#include <string>
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
-class Tool {
- public:
+class Tool
+{
+public:
virtual ~Tool() {}
- const std::string &name() const { return name_; }
+ const std::string &name() const {
+ return name_;
+ }
virtual int Main(int argc, char *argv[]) = 0;
- protected:
+protected:
Tool(const std::string &name) : name_(name) {}
// Returns the boost::program_options style that should be used by all tools.
@@ -77,7 +82,7 @@ class Tool {
// the file cannot be opened for writing.
void OpenNamedOutputOrDie(const std::string &, std::ofstream &);
- private:
+private:
std::string name_;
std::istream *input_ptr_;
std::ifstream input_file_stream_;
diff --git a/phrase-extract/pcfg-common/typedef.h b/phrase-extract/pcfg-common/typedef.h
index 49a12d681..ce3e0423b 100644
--- a/phrase-extract/pcfg-common/typedef.h
+++ b/phrase-extract/pcfg-common/typedef.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -26,8 +26,10 @@
#include <string>
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
typedef NumberedSet<std::string> Vocabulary;
diff --git a/phrase-extract/pcfg-common/xml_tree_parser.h b/phrase-extract/pcfg-common/xml_tree_parser.h
index 7d01b0684..7eec14033 100644
--- a/phrase-extract/pcfg-common/xml_tree_parser.h
+++ b/phrase-extract/pcfg-common/xml_tree_parser.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -30,16 +30,19 @@
#include <string>
#include <vector>
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
// Parses a string in Moses' XML parse tree format and returns a PcfgTree
// object.
-class XmlTreeParser {
- public:
+class XmlTreeParser
+{
+public:
XmlTreeParser();
std::auto_ptr<PcfgTree> Parse(const std::string &);
- private:
+private:
std::auto_ptr<PcfgTree> ConvertTree(const MosesTraining::SyntaxNode &,
const std::vector<std::string> &);
diff --git a/phrase-extract/pcfg-common/xml_tree_writer.h b/phrase-extract/pcfg-common/xml_tree_writer.h
index 6a9a3de05..426efec17 100644
--- a/phrase-extract/pcfg-common/xml_tree_writer.h
+++ b/phrase-extract/pcfg-common/xml_tree_writer.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -32,12 +32,15 @@
#include <vector>
#include <string>
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
template<typename InputTree>
-class XmlOutputHandler {
- public:
+class XmlOutputHandler
+{
+public:
typedef std::map<std::string, std::string> AttributeMap;
void GetLabel(const InputTree &, std::string &) const;
@@ -45,17 +48,19 @@ class XmlOutputHandler {
};
template<typename InputTree>
-class XmlTreeWriter : public XmlOutputHandler<InputTree> {
- public:
+class XmlTreeWriter : public XmlOutputHandler<InputTree>
+{
+public:
typedef XmlOutputHandler<InputTree> Base;
void Write(const InputTree &, std::ostream &) const;
- private:
+private:
std::string Escape(const std::string &) const;
};
template<typename InputTree>
void XmlTreeWriter<InputTree>::Write(const InputTree &tree,
- std::ostream &out) const {
+ std::ostream &out) const
+{
assert(!tree.IsLeaf());
// Opening tag
@@ -99,7 +104,8 @@ void XmlTreeWriter<InputTree>::Write(const InputTree &tree,
// Escapes XML special characters.
template<typename InputTree>
-std::string XmlTreeWriter<InputTree>::Escape(const std::string &s) const {
+std::string XmlTreeWriter<InputTree>::Escape(const std::string &s) const
+{
std::string t;
std::size_t len = s.size();
t.reserve(len);
diff --git a/phrase-extract/pcfg-extract/options.h b/phrase-extract/pcfg-extract/options.h
index 3acb31b58..2633f025a 100644
--- a/phrase-extract/pcfg-extract/options.h
+++ b/phrase-extract/pcfg-extract/options.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -23,8 +23,10 @@
#include <string>
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
struct Options {
std::string corpus_file;
diff --git a/phrase-extract/pcfg-extract/pcfg_extract.h b/phrase-extract/pcfg-extract/pcfg_extract.h
index 1af6cb4fe..e8c306876 100644
--- a/phrase-extract/pcfg-extract/pcfg_extract.h
+++ b/phrase-extract/pcfg-extract/pcfg_extract.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -23,16 +23,19 @@
#include "pcfg-common/tool.h"
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
class Options;
-class PcfgExtract : public Tool {
- public:
+class PcfgExtract : public Tool
+{
+public:
PcfgExtract() : Tool("pcfg-extract") {}
virtual int Main(int, char *[]);
- private:
+private:
void ProcessOptions(int, char *[], Options &) const;
};
diff --git a/phrase-extract/pcfg-extract/rule_collection.h b/phrase-extract/pcfg-extract/rule_collection.h
index 452fa0e97..32cb2dc05 100644
--- a/phrase-extract/pcfg-extract/rule_collection.h
+++ b/phrase-extract/pcfg-extract/rule_collection.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -27,12 +27,15 @@
#include <vector>
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
// Contains PCFG rules and their counts.
-class RuleCollection {
- public:
+class RuleCollection
+{
+public:
typedef boost::unordered_map<std::vector<std::size_t>, std::size_t> RhsCountMap;
typedef boost::unordered_map<std::size_t, RhsCountMap> Map;
typedef Map::iterator iterator;
@@ -40,16 +43,24 @@ class RuleCollection {
RuleCollection() {}
- iterator begin() { return collection_.begin(); }
- const_iterator begin() const { return collection_.begin(); }
+ iterator begin() {
+ return collection_.begin();
+ }
+ const_iterator begin() const {
+ return collection_.begin();
+ }
- iterator end() { return collection_.end(); }
- const_iterator end() const { return collection_.end(); }
+ iterator end() {
+ return collection_.end();
+ }
+ const_iterator end() const {
+ return collection_.end();
+ }
void Add(std::size_t, const std::vector<std::size_t> &);
void CreatePcfg(Pcfg &);
- private:
+private:
Map collection_;
};
diff --git a/phrase-extract/pcfg-extract/rule_extractor.h b/phrase-extract/pcfg-extract/rule_extractor.h
index 6bcffbc61..e4b411c01 100644
--- a/phrase-extract/pcfg-extract/rule_extractor.h
+++ b/phrase-extract/pcfg-extract/rule_extractor.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -25,17 +25,20 @@
#include "pcfg-common/typedef.h"
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
class PcfgTree;
// Extracts PCFG rules from syntax trees and adds them to a RuleCollection.
-class RuleExtractor {
- public:
+class RuleExtractor
+{
+public:
RuleExtractor(Vocabulary &);
void Extract(const PcfgTree &, RuleCollection &) const;
- private:
+private:
Vocabulary &non_term_vocab_;
};
diff --git a/phrase-extract/pcfg-score/options.h b/phrase-extract/pcfg-score/options.h
index e54b2a0b9..fd54b4b6b 100644
--- a/phrase-extract/pcfg-score/options.h
+++ b/phrase-extract/pcfg-score/options.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -23,8 +23,10 @@
#include <string>
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
struct Options {
std::string pcfg_file;
diff --git a/phrase-extract/pcfg-score/pcfg_score.h b/phrase-extract/pcfg-score/pcfg_score.h
index 5e506c39d..f49c9a0be 100644
--- a/phrase-extract/pcfg-score/pcfg_score.h
+++ b/phrase-extract/pcfg-score/pcfg_score.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -23,16 +23,19 @@
#include "pcfg-common/tool.h"
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
class Options;
-class PcfgScore : public Tool {
- public:
+class PcfgScore : public Tool
+{
+public:
PcfgScore() : Tool("pcfg-score") {}
virtual int Main(int, char *[]);
- private:
+private:
void ProcessOptions(int, char *[], Options &) const;
};
diff --git a/phrase-extract/pcfg-score/tree_scorer.h b/phrase-extract/pcfg-score/tree_scorer.h
index 36f4e1e99..8cb59c0c2 100644
--- a/phrase-extract/pcfg-score/tree_scorer.h
+++ b/phrase-extract/pcfg-score/tree_scorer.h
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -25,18 +25,21 @@
#include "pcfg-common/pcfg_tree.h"
#include "pcfg-common/typedef.h"
-namespace Moses {
-namespace PCFG {
+namespace Moses
+{
+namespace PCFG
+{
-class TreeScorer {
- public:
+class TreeScorer
+{
+public:
TreeScorer(const Pcfg &, const Vocabulary &);
// Score tree according to PCFG. Returns false if unsuccessful (due to
// missing rule).
bool Score(PcfgTree &) const;
- private:
+private:
const Pcfg &pcfg_;
const Vocabulary &non_term_vocab_;
};
diff --git a/phrase-extract/score-main.cpp b/phrase-extract/score-main.cpp
index 0e4ad57f4..3042cbe3e 100644
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@@ -68,7 +68,7 @@ float minCountHierarchical = 0;
Vocabulary vcbT;
Vocabulary vcbS;
-
+
} // namespace
vector<string> tokenize( const char [] );
@@ -130,18 +130,18 @@ int main(int argc, char* argv[])
cerr << "not computing lexical translation score\n";
} else if (strcmp(argv[i],"--GoodTuring") == 0) {
goodTuringFlag = true;
- fileNameCountOfCounts = string(fileNamePhraseTable) + ".coc";
+ fileNameCountOfCounts = string(fileNamePhraseTable) + ".coc";
cerr << "adjusting phrase translation probabilities with Good Turing discounting\n";
} else if (strcmp(argv[i],"--KneserNey") == 0) {
kneserNeyFlag = true;
- fileNameCountOfCounts = string(fileNamePhraseTable) + ".coc";
+ fileNameCountOfCounts = string(fileNamePhraseTable) + ".coc";
cerr << "adjusting phrase translation probabilities with Kneser Ney discounting\n";
} else if (strcmp(argv[i],"--UnalignedPenalty") == 0) {
unalignedFlag = true;
cerr << "using unaligned word penalty\n";
} else if (strcmp(argv[i],"--UnalignedFunctionWordPenalty") == 0) {
unalignedFWFlag = true;
- if (i+1==argc) {
+ if (i+1==argc) {
cerr << "ERROR: specify count of count files for Kneser Ney discounting!\n";
exit(1);
}
@@ -204,22 +204,21 @@ int main(int argc, char* argv[])
istream &extractFileP = extractFile;
// output file: phrase translation table
- ostream *phraseTableFile;
-
- if (fileNamePhraseTable == "-") {
- phraseTableFile = &cout;
- }
- else {
- Moses::OutputFileStream *outputFile = new Moses::OutputFileStream();
- bool success = outputFile->Open(fileNamePhraseTable);
- if (!success) {
- cerr << "ERROR: could not open file phrase table file "
- << fileNamePhraseTable << endl;
- exit(1);
- }
- phraseTableFile = outputFile;
- }
-
+ ostream *phraseTableFile;
+
+ if (fileNamePhraseTable == "-") {
+ phraseTableFile = &cout;
+ } else {
+ Moses::OutputFileStream *outputFile = new Moses::OutputFileStream();
+ bool success = outputFile->Open(fileNamePhraseTable);
+ if (!success) {
+ cerr << "ERROR: could not open file phrase table file "
+ << fileNamePhraseTable << endl;
+ exit(1);
+ }
+ phraseTableFile = outputFile;
+ }
+
// loop through all extracted phrase translations
float lastCount = 0.0f;
float lastPcfgSum = 0.0f;
@@ -250,25 +249,23 @@ int main(int argc, char* argv[])
lastPcfgSum = phrasePair.pcfgSum;
// only differs in count? just add count
- if (lastPhrasePair != NULL
- && lastPhrasePair->equals( phrasePair )
- && featureManager.equals(*lastPhrasePair, phrasePair)) {
+ if (lastPhrasePair != NULL
+ && lastPhrasePair->equals( phrasePair )
+ && featureManager.equals(*lastPhrasePair, phrasePair)) {
lastPhrasePair->count += phrasePair.count;
lastPhrasePair->pcfgSum += phrasePair.pcfgSum;
continue;
}
-
+
// if new source phrase, process last batch
if (lastPhrasePair != NULL &&
lastPhrasePair->GetSource() != phrasePair.GetSource()) {
processPhrasePairs( phrasePairsWithSameF, *phraseTableFile, isSingleton, featureManager, maybeLogProb );
-
+
phrasePairsWithSameF.clear();
isSingleton = false;
lastPhrasePair = NULL;
- }
- else
- {
+ } else {
isSingleton = true;
}
@@ -277,11 +274,11 @@ int main(int argc, char* argv[])
lastPhrasePair = &phrasePairsWithSameF.back();
}
processPhrasePairs( phrasePairsWithSameF, *phraseTableFile, isSingleton, featureManager, maybeLogProb );
-
- phraseTableFile->flush();
- if (phraseTableFile != &cout) {
- delete phraseTableFile;
- }
+
+ phraseTableFile->flush();
+ if (phraseTableFile != &cout) {
+ delete phraseTableFile;
+ }
// output count of count statistics
if (goodTuringFlag || kneserNeyFlag) {
@@ -292,13 +289,13 @@ int main(int argc, char* argv[])
void writeCountOfCounts( const string &fileNameCountOfCounts )
{
// open file
- Moses::OutputFileStream countOfCountsFile;
- bool success = countOfCountsFile.Open(fileNameCountOfCounts.c_str());
- if (!success) {
- cerr << "ERROR: could not open count-of-counts file "
- << fileNameCountOfCounts << endl;
+ Moses::OutputFileStream countOfCountsFile;
+ bool success = countOfCountsFile.Open(fileNameCountOfCounts.c_str());
+ if (!success) {
+ cerr << "ERROR: could not open count-of-counts file "
+ << fileNameCountOfCounts << endl;
return;
- }
+ }
// Kneser-Ney needs the total number of phrase pairs
countOfCountsFile << totalDistinct << endl;
@@ -307,7 +304,7 @@ void writeCountOfCounts( const string &fileNameCountOfCounts )
for(int i=1; i<=COC_MAX; i++) {
countOfCountsFile << countOfCounts[ i ] << endl;
}
- countOfCountsFile.Close();
+ countOfCountsFile.Close();
}
void processPhrasePairs( vector< PhraseAlignment > &phrasePair, ostream &phraseTableFile, bool isSingleton, const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb )
@@ -317,65 +314,63 @@ void processPhrasePairs( vector< PhraseAlignment > &phrasePair, ostream &phraseT
// group phrase pairs based on alignments that matter
// (i.e. that re-arrange non-terminals)
PhrasePairGroup phrasePairGroup;
-
+
float totalSource = 0;
//cerr << "phrasePair.size() = " << phrasePair.size() << endl;
-
+
// loop through phrase pairs
for(size_t i=0; i<phrasePair.size(); i++) {
// add to total count
PhraseAlignment &currPhrasePair = phrasePair[i];
-
+
totalSource += phrasePair[i].count;
-
+
// check for matches
//cerr << "phrasePairGroup.size() = " << phrasePairGroup.size() << endl;
-
+
PhraseAlignmentCollection phraseAlignColl;
phraseAlignColl.push_back(&currPhrasePair);
pair<PhrasePairGroup::iterator, bool> retInsert;
retInsert = phrasePairGroup.insert(phraseAlignColl);
- if (!retInsert.second)
- { // already exist. Add to that collection instead
+ if (!retInsert.second) {
+ // already exist. Add to that collection instead
PhraseAlignmentCollection &existingColl = const_cast<PhraseAlignmentCollection&>(*retInsert.first);
existingColl.push_back(&currPhrasePair);
}
-
+
}
// output the distinct phrase pairs, one at a time
const PhrasePairGroup::SortedColl &sortedColl = phrasePairGroup.GetSortedColl();
PhrasePairGroup::SortedColl::const_iterator iter;
- for(iter = sortedColl.begin(); iter != sortedColl.end(); ++iter)
- {
+ for(iter = sortedColl.begin(); iter != sortedColl.end(); ++iter) {
const PhraseAlignmentCollection &group = **iter;
outputPhrasePair( group, totalSource, phrasePairGroup.GetSize(), phraseTableFile, isSingleton, featureManager, maybeLogProb );
}
-
+
}
const PhraseAlignment &findBestAlignment(const PhraseAlignmentCollection &phrasePair )
{
float bestAlignmentCount = -1;
PhraseAlignment* bestAlignment = NULL;
-
+
for(size_t i=0; i<phrasePair.size(); i++) {
size_t alignInd;
- if (inverseFlag)
- { // count backwards, so that alignments for ties will be the same for both normal & inverse scores
+ if (inverseFlag) {
+ // count backwards, so that alignments for ties will be the same for both normal & inverse scores
alignInd = phrasePair.size() - i - 1;
- }
- else {
+ } else {
alignInd = i;
}
-
+
if (phrasePair[alignInd]->count > bestAlignmentCount) {
bestAlignmentCount = phrasePair[alignInd]->count;
bestAlignment = phrasePair[alignInd];
}
- }
+ }
return *bestAlignment;
}
@@ -386,14 +381,12 @@ void calcNTLengthProb(const map<size_t, map<size_t, size_t> > &lengths
, map<size_t, map<size_t, float> > &probs)
{
map<size_t, map<size_t, size_t> >::const_iterator iterOuter;
- for (iterOuter = lengths.begin(); iterOuter != lengths.end(); ++iterOuter)
- {
+ for (iterOuter = lengths.begin(); iterOuter != lengths.end(); ++iterOuter) {
size_t sourcePos = iterOuter->first;
const map<size_t, size_t> &inner = iterOuter->second;
-
+
map<size_t, size_t>::const_iterator iterInner;
- for (iterInner = inner.begin(); iterInner != inner.end(); ++iterInner)
- {
+ for (iterInner = inner.begin(); iterInner != inner.end(); ++iterInner) {
size_t length = iterInner->first;
size_t count = iterInner->second;
float prob = (float) count / (float) total;
@@ -411,54 +404,49 @@ void calcNTLengthProb(const vector< PhraseAlignment* > &phrasePairs
map<size_t, size_t> totals;
// 1st = position in source phrase, 2nd = total counts
// each source pos should have same count?
-
+
vector< PhraseAlignment* >::const_iterator iterOuter;
- for (iterOuter = phrasePairs.begin(); iterOuter != phrasePairs.end(); ++iterOuter)
- {
+ for (iterOuter = phrasePairs.begin(); iterOuter != phrasePairs.end(); ++iterOuter) {
const PhraseAlignment &phrasePair = **iterOuter;
const std::map<size_t, std::pair<size_t, size_t> > &ntLengths = phrasePair.GetNTLengths();
-
+
std::map<size_t, std::pair<size_t, size_t> >::const_iterator iterInner;
- for (iterInner = ntLengths.begin(); iterInner != ntLengths.end(); ++iterInner)
- {
+ for (iterInner = ntLengths.begin(); iterInner != ntLengths.end(); ++iterInner) {
size_t sourcePos = iterInner->first;
size_t sourceLength = iterInner->second.first;
size_t targetLength = iterInner->second.second;
-
+
sourceLengths[sourcePos][sourceLength]++;
targetLengths[sourcePos][targetLength]++;
totals[sourcePos]++;
}
}
-
- if (totals.size() == 0)
- { // no non-term. Don't bother
+
+ if (totals.size() == 0) {
+ // no non-term. Don't bother
return;
}
size_t total = totals.begin()->second;
- if (totals.size() > 1)
- {
+ if (totals.size() > 1) {
assert(total == (++totals.begin())->second );
}
-
+
calcNTLengthProb(sourceLengths, total, sourceProb);
calcNTLengthProb(targetLengths, total, targetProb);
-
+
}
void outputNTLengthProbs(ostream &phraseTableFile, const map<size_t, map<size_t, float> > &probs, const string &prefix)
{
map<size_t, map<size_t, float> >::const_iterator iterOuter;
- for (iterOuter = probs.begin(); iterOuter != probs.end(); ++iterOuter)
- {
+ for (iterOuter = probs.begin(); iterOuter != probs.end(); ++iterOuter) {
size_t sourcePos = iterOuter->first;
const map<size_t, float> &inner = iterOuter->second;
-
+
map<size_t, float>::const_iterator iterInner;
- for (iterInner = inner.begin(); iterInner != inner.end(); ++iterInner)
- {
+ for (iterInner = inner.begin(); iterInner != inner.end(); ++iterInner) {
size_t length = iterInner->first;
float prob = iterInner->second;
@@ -470,47 +458,40 @@ void outputNTLengthProbs(ostream &phraseTableFile, const map<size_t, map<size_t,
bool calcCrossedNonTerm(size_t sourcePos, size_t targetPos, const std::vector< std::set<size_t> > &alignedToS)
{
- for (size_t currSource = 0; currSource < alignedToS.size(); ++currSource)
- {
- if (currSource == sourcePos)
- { // skip
- }
- else
- {
+ for (size_t currSource = 0; currSource < alignedToS.size(); ++currSource) {
+ if (currSource == sourcePos) {
+ // skip
+ } else {
const std::set<size_t> &targetSet = alignedToS[currSource];
std::set<size_t>::const_iterator iter;
- for (iter = targetSet.begin(); iter != targetSet.end(); ++iter)
- {
+ for (iter = targetSet.begin(); iter != targetSet.end(); ++iter) {
size_t currTarget = *iter;
-
+
if ((currSource < sourcePos && currTarget > targetPos)
|| (currSource > sourcePos && currTarget < targetPos)
- )
- {
+ ) {
return true;
}
}
-
+
}
}
-
+
return false;
}
int calcCrossedNonTerm(const PHRASE &phraseS, const PhraseAlignment &bestAlignment)
{
const std::vector< std::set<size_t> > &alignedToS = bestAlignment.alignedToS;
-
- for (size_t sourcePos = 0; sourcePos < alignedToS.size(); ++sourcePos)
- {
+
+ for (size_t sourcePos = 0; sourcePos < alignedToS.size(); ++sourcePos) {
const std::set<size_t> &targetSet = alignedToS[sourcePos];
-
+
WORD_ID wordId = phraseS[sourcePos];
const WORD &word = vcbS.getWord(wordId);
bool isNonTerm = isNonTerminal(word);
-
- if (isNonTerm)
- {
+
+ if (isNonTerm) {
assert(targetSet.size() == 1);
size_t targetPos = *targetSet.begin();
bool ret = calcCrossedNonTerm(sourcePos, targetPos, alignedToS);
@@ -518,17 +499,17 @@ int calcCrossedNonTerm(const PHRASE &phraseS, const PhraseAlignment &bestAlignme
return 1;
}
}
-
+
return 0;
}
void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCount, int distinctCount, ostream &phraseTableFile, bool isSingleton, const ScoreFeatureManager& featureManager,
- const MaybeLog& maybeLogProb )
+ const MaybeLog& maybeLogProb )
{
if (phrasePair.size() == 0) return;
const PhraseAlignment &bestAlignment = findBestAlignment( phrasePair );
-
+
// compute count
float count = 0;
for(size_t i=0; i<phrasePair.size(); i++) {
@@ -550,7 +531,7 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
if (pcfgFlag && !inverseFlag) {
float pcfgSum = 0;
for(size_t i=0; i<phrasePair.size(); ++i) {
- pcfgSum += phrasePair[i]->pcfgSum;
+ pcfgSum += phrasePair[i]->pcfgSum;
}
pcfgScore = pcfgSum / count;
}
@@ -604,11 +585,11 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
if (singletonFeature) {
phraseTableFile << " " << (isSingleton ? 1 : 0);
}
-
+
if (crossedNonTerm && !inverseFlag) {
phraseTableFile << " " << calcCrossedNonTerm(phraseS, bestAlignment);
}
-
+
// target-side PCFG score
if (pcfgFlag && !inverseFlag) {
phraseTableFile << " " << maybeLogProb(pcfgScore );
@@ -624,7 +605,7 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
}
for (map<string,float>::const_iterator i = extraSparse.begin();
- i != extraSparse.end(); ++i) {
+ i != extraSparse.end(); ++i) {
phraseTableFile << " " << i->first << " " << i->second;
}
@@ -633,8 +614,8 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
// alignment info for non-terminals
if (! inverseFlag) {
if (hierarchicalFlag) {
- // always output alignment if hiero style, but only for non-terms
- // (eh: output all alignments, needed for some feature functions)
+ // always output alignment if hiero style, but only for non-terms
+ // (eh: output all alignments, needed for some feature functions)
assert(phraseT.size() == bestAlignment.alignedToT.size() + 1);
std::vector<std::string> alignment;
for(size_t j = 0; j < phraseT.size() - 1; j++) {
@@ -657,15 +638,15 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
std::stringstream point;
point << sourcePos << "-" << j;
alignment.push_back(point.str());
- }
- }
- }
- // now print all alignments, sorted by source index
- sort(alignment.begin(), alignment.end());
- for (size_t i = 0; i < alignment.size(); ++i) {
- phraseTableFile << alignment[i] << " ";
- }
- } else if (wordAlignmentFlag) {
+ }
+ }
+ }
+ // now print all alignments, sorted by source index
+ sort(alignment.begin(), alignment.end());
+ for (size_t i = 0; i < alignment.size(); ++i) {
+ phraseTableFile << alignment[i] << " ";
+ }
+ } else if (wordAlignmentFlag) {
// alignment info in pb model
for(size_t j=0; j<bestAlignment.alignedToT.size(); j++) {
const set< size_t > &aligned = bestAlignment.alignedToT[j];
@@ -678,28 +659,26 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
// counts
-
+
phraseTableFile << " ||| " << totalCount << " " << count;
- if (kneserNeyFlag)
+ if (kneserNeyFlag)
phraseTableFile << " " << distinctCount;
-
- // nt lengths
- if (outputNTLengths)
- {
+
+ // nt lengths
+ if (outputNTLengths) {
phraseTableFile << " ||| ";
- if (!inverseFlag)
- {
+ if (!inverseFlag) {
map<size_t, map<size_t, float> > sourceProb, targetProb;
// 1st sourcePos, 2nd = length, 3rd = prob
calcNTLengthProb(phrasePair, sourceProb, targetProb);
-
+
outputNTLengthProbs(phraseTableFile, sourceProb, "S");
outputNTLengthProbs(phraseTableFile, targetProb, "T");
- }
+ }
}
-
+
phraseTableFile << endl;
}
@@ -878,13 +857,13 @@ void printTargetPhrase(const PHRASE &phraseS, const PHRASE &phraseT,
std::pair<PhrasePairGroup::Coll::iterator,bool> PhrasePairGroup::insert ( const PhraseAlignmentCollection& obj )
{
std::pair<iterator,bool> ret = m_coll.insert(obj);
-
- if (ret.second)
- { // obj inserted. Also add to sorted vector
+
+ if (ret.second) {
+ // obj inserted. Also add to sorted vector
const PhraseAlignmentCollection &insertedObj = *ret.first;
m_sortedColl.push_back(&insertedObj);
}
-
+
return ret;
}
diff --git a/phrase-extract/score.h b/phrase-extract/score.h
index 59d2cf58f..6a10536c1 100644
--- a/phrase-extract/score.h
+++ b/phrase-extract/score.h
@@ -32,6 +32,6 @@ inline bool isNonTerminal( const std::string &word )
return (word.length()>=3 && word[0] == '[' && word[word.length()-1] == ']');
}
-
+
}
diff --git a/phrase-extract/tables-core.cpp b/phrase-extract/tables-core.cpp
index 6b35f371b..30c1544e9 100644
--- a/phrase-extract/tables-core.cpp
+++ b/phrase-extract/tables-core.cpp
@@ -33,8 +33,9 @@ vector<string> tokenize( const char* input )
namespace MosesTraining
{
-bool isNonTerminal( const WORD &symbol ) {
- return symbol.substr(0, 1) == "[" && symbol.substr(symbol.size()-1, 1) == "]";
+bool isNonTerminal( const WORD &symbol )
+{
+ return symbol.substr(0, 1) == "[" && symbol.substr(symbol.size()-1, 1) == "]";
}
WORD_ID Vocabulary::storeIfNew( const WORD& word )
@@ -105,7 +106,7 @@ void DTable::load( const string& fileName )
std::cerr << "Error reading from " << fileName << std::endl;
abort();
}
-
+
vector<string> token = tokenize(line.c_str());
if (token.size() < 2) {
cerr << "line " << i << " in " << fileName << " too short, skipping\n";