Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicola Bertoldi <bertoldi@fbk.eu>2014-05-19 17:35:08 +0400
committerNicola Bertoldi <bertoldi@fbk.eu>2014-05-19 17:35:08 +0400
commit20b3e8929e93c40e0f1ec61b9268330fe59607f9 (patch)
treeae6102023df2adfd6bfb287cbc6d89caad5afeac /phrase-extract
parent2f3cd5e2fe54f0352eee4657ea91e0039073a95a (diff)
beautify
Diffstat (limited to 'phrase-extract')
-rw-r--r--phrase-extract/DomainFeature.cpp14
-rw-r--r--phrase-extract/DomainFeature.h4
-rw-r--r--phrase-extract/ExtractionPhrasePair.cpp52
-rw-r--r--phrase-extract/ExtractionPhrasePair.h27
-rw-r--r--phrase-extract/InternalStructFeature.cpp29
-rw-r--r--phrase-extract/InternalStructFeature.h38
-rw-r--r--phrase-extract/ScoreFeature.cpp18
-rw-r--r--phrase-extract/ScoreFeature.h12
-rw-r--r--phrase-extract/extract-ordering-main.cpp38
-rw-r--r--phrase-extract/score-main.cpp133
10 files changed, 186 insertions, 179 deletions
diff --git a/phrase-extract/DomainFeature.cpp b/phrase-extract/DomainFeature.cpp
index 2f99a8709..0526d058b 100644
--- a/phrase-extract/DomainFeature.cpp
+++ b/phrase-extract/DomainFeature.cpp
@@ -55,9 +55,9 @@ DomainFeature::DomainFeature(const string& domainFile) : m_propertyKey("domain")
m_domain.load(domainFile);
}
-void DomainFeature::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
- float count,
- int sentenceId) const
+void DomainFeature::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
+ float count,
+ int sentenceId) const
{
std::string value = m_domain.getDomainOfSentence(sentenceId);
phrasePair.AddProperty(m_propertyKey, value, count);
@@ -69,13 +69,13 @@ void DomainFeature::add(const ScoreFeatureContext& context,
{
const map<string,float> *domainCount = context.phrasePair.GetProperty(m_propertyKey);
assert( domainCount != NULL );
- add(*domainCount,
- context.phrasePair.GetCount(),
- context.maybeLog,
+ add(*domainCount,
+ context.phrasePair.GetCount(),
+ context.maybeLog,
denseValues, sparseValues);
}
-void SubsetDomainFeature::add(const map<string,float>& domainCount,
+void SubsetDomainFeature::add(const map<string,float>& domainCount,
float count,
const MaybeLog& maybeLog,
std::vector<float>& denseValues,
diff --git a/phrase-extract/DomainFeature.h b/phrase-extract/DomainFeature.h
index 8ebc599e2..bcb2e63a2 100644
--- a/phrase-extract/DomainFeature.h
+++ b/phrase-extract/DomainFeature.h
@@ -35,8 +35,8 @@ public:
DomainFeature(const std::string& domainFile);
- void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
- float count,
+ void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
+ float count,
int sentenceId) const;
void add(const ScoreFeatureContext& context,
diff --git a/phrase-extract/ExtractionPhrasePair.cpp b/phrase-extract/ExtractionPhrasePair.cpp
index a975b4126..102537ca1 100644
--- a/phrase-extract/ExtractionPhrasePair.cpp
+++ b/phrase-extract/ExtractionPhrasePair.cpp
@@ -29,7 +29,8 @@
using namespace std;
-namespace MosesTraining {
+namespace MosesTraining
+{
extern Vocabulary vcbT;
@@ -38,23 +39,23 @@ extern Vocabulary vcbS;
extern bool hierarchicalFlag;
-ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource,
- const PHRASE *phraseTarget,
- ALIGNMENT *targetToSourceAlignment,
- float count, float pcfgSum ) :
- m_phraseSource(phraseSource),
- m_phraseTarget(phraseTarget),
- m_count(count),
- m_pcfgSum(pcfgSum)
+ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource,
+ const PHRASE *phraseTarget,
+ ALIGNMENT *targetToSourceAlignment,
+ float count, float pcfgSum ) :
+ m_phraseSource(phraseSource),
+ m_phraseTarget(phraseTarget),
+ m_count(count),
+ m_pcfgSum(pcfgSum)
{
assert(phraseSource->empty());
assert(phraseTarget->empty());
m_count = count;
m_pcfgSum = pcfgSum;
-
+
std::pair< std::map<ALIGNMENT*,float>::iterator, bool > insertedAlignment =
- m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
+ m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
m_lastTargetToSourceAlignment = insertedAlignment.first;
m_lastCount = m_count;
@@ -64,29 +65,30 @@ ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource,
}
-ExtractionPhrasePair::~ExtractionPhrasePair( ) {
+ExtractionPhrasePair::~ExtractionPhrasePair( )
+{
Clear();
}
// return value: true if the given alignment was seen for the first time and thus will be stored,
// false if it was present already (the pointer may thus be deleted(
-bool ExtractionPhrasePair::Add( ALIGNMENT *targetToSourceAlignment,
- float count, float pcfgSum )
+bool ExtractionPhrasePair::Add( ALIGNMENT *targetToSourceAlignment,
+ float count, float pcfgSum )
{
m_count += count;
m_pcfgSum += pcfgSum;
m_lastCount = count;
m_lastPcfgSum = pcfgSum;
-
+
std::map<ALIGNMENT*,float>::iterator iter = m_lastTargetToSourceAlignment;
if ( *(iter->first) == *targetToSourceAlignment ) {
iter->second += count;
return false;
} else {
std::pair< std::map<ALIGNMENT*,float>::iterator, bool > insertedAlignment =
- m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
+ m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
if ( !insertedAlignment.second ) {
// the alignment already exists: increment count
insertedAlignment.first->second += count;
@@ -105,7 +107,7 @@ void ExtractionPhrasePair::IncrementPrevious( float count, float pcfgSum )
m_pcfgSum += pcfgSum;
m_lastTargetToSourceAlignment->second += count;
// properties
- for ( std::map<std::string, std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter=m_properties.begin();
+ for ( std::map<std::string, std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter=m_properties.begin();
iter !=m_properties.end(); ++iter ) {
LAST_PROPERTY_VALUE *lastPropertyValue = (iter->second).second;
(*lastPropertyValue)->second += count;
@@ -116,7 +118,7 @@ void ExtractionPhrasePair::IncrementPrevious( float count, float pcfgSum )
}
-// Check for lexical match
+// Check for lexical match
// and in case of SCFG rules for equal non-terminal alignment.
bool ExtractionPhrasePair::Matches( const PHRASE *otherPhraseSource,
const PHRASE *otherPhraseTarget,
@@ -132,9 +134,9 @@ bool ExtractionPhrasePair::Matches( const PHRASE *otherPhraseSource,
return MatchesAlignment( otherTargetToSourceAlignment );
}
-// Check for lexical match
+// Check for lexical match
// and in case of SCFG rules for equal non-terminal alignment.
-// Set boolean indicators.
+// Set boolean indicators.
// (Note that we check in the order: target - source - alignment
// and do not touch the subsequent boolean indicators once a previous one has been set to false.)
bool ExtractionPhrasePair::Matches( const PHRASE *otherPhraseSource,
@@ -194,7 +196,7 @@ bool ExtractionPhrasePair::MatchesAlignment( ALIGNMENT *otherTargetToSourceAlign
return true;
}
-void ExtractionPhrasePair::Clear()
+void ExtractionPhrasePair::Clear()
{
delete m_phraseSource;
delete m_phraseTarget;
@@ -218,7 +220,7 @@ void ExtractionPhrasePair::Clear()
m_lastCount = 0.0f;
m_lastPcfgSum = 0.0f;
m_lastTargetToSourceAlignment = m_targetToSourceAlignments.begin();
-
+
m_isValid = false;
}
@@ -252,7 +254,7 @@ const ALIGNMENT *ExtractionPhrasePair::FindBestAlignmentTargetToSource() const
std::map<ALIGNMENT*,float>::const_iterator bestAlignment = m_targetToSourceAlignments.end();
- for (std::map<ALIGNMENT*,float>::const_iterator iter=m_targetToSourceAlignments.begin();
+ for (std::map<ALIGNMENT*,float>::const_iterator iter=m_targetToSourceAlignments.begin();
iter!=m_targetToSourceAlignments.end(); ++iter) {
if ( (iter->second > bestAlignmentCount) ||
( (iter->second == bestAlignmentCount) &&
@@ -281,7 +283,7 @@ const std::string *ExtractionPhrasePair::FindBestPropertyValue(const std::string
PROPERTY_VALUES::const_iterator bestPropertyValue = allPropertyValues->end();
- for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
+ for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
iter!=allPropertyValues->end(); ++iter) {
if ( (iter->second > bestPropertyCount) ||
( (iter->second == bestPropertyCount) &&
@@ -308,7 +310,7 @@ std::string ExtractionPhrasePair::CollectAllPropertyValues(const std::string &ke
}
std::ostringstream oss;
- for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
+ for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
iter!=allPropertyValues->end(); ++iter) {
if (iter!=allPropertyValues->begin()) {
oss << " ";
diff --git a/phrase-extract/ExtractionPhrasePair.h b/phrase-extract/ExtractionPhrasePair.h
index f04984391..e9f643d2c 100644
--- a/phrase-extract/ExtractionPhrasePair.h
+++ b/phrase-extract/ExtractionPhrasePair.h
@@ -24,20 +24,22 @@
#include <set>
#include <map>
-namespace MosesTraining {
+namespace MosesTraining
+{
typedef std::vector< std::set<size_t> > ALIGNMENT;
-class ExtractionPhrasePair {
+class ExtractionPhrasePair
+{
protected:
typedef std::map<std::string,float> PROPERTY_VALUES;
typedef std::map<std::string,float>::iterator LAST_PROPERTY_VALUE;
-
+
bool m_isValid;
const PHRASE *m_phraseSource;
@@ -47,8 +49,8 @@ protected:
float m_pcfgSum;
std::map<ALIGNMENT*,float> m_targetToSourceAlignments;
- std::map<std::string,
- std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > > m_properties;
+ std::map<std::string,
+ std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > > m_properties;
float m_lastCount;
float m_lastPcfgSum;
@@ -56,14 +58,14 @@ protected:
public:
- ExtractionPhrasePair( const PHRASE *phraseSource,
- const PHRASE *phraseTarget,
- ALIGNMENT *targetToSourceAlignment,
+ ExtractionPhrasePair( const PHRASE *phraseSource,
+ const PHRASE *phraseTarget,
+ ALIGNMENT *targetToSourceAlignment,
float count, float pcfgSum );
~ExtractionPhrasePair();
- bool Add( ALIGNMENT *targetToSourceAlignment,
+ bool Add( ALIGNMENT *targetToSourceAlignment,
float count, float pcfgSum );
void IncrementPrevious( float count, float pcfgSum );
@@ -91,7 +93,7 @@ public:
const PHRASE *GetSource() const {
return m_phraseSource;
}
-
+
const PHRASE *GetTarget() const {
return m_phraseTarget;
}
@@ -126,10 +128,9 @@ public:
void AddProperties( const std::string &str, float count );
- void AddProperty( const std::string &key, const std::string &value, float count )
- {
+ void AddProperty( const std::string &key, const std::string &value, float count ) {
std::map<std::string,
- std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
+ std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
if ( iter == m_properties.end() ) {
// key not found: insert property key and value
PROPERTY_VALUES *propertyValues = new PROPERTY_VALUES();
diff --git a/phrase-extract/InternalStructFeature.cpp b/phrase-extract/InternalStructFeature.cpp
index 3757b0e43..a2369a80c 100644
--- a/phrase-extract/InternalStructFeature.cpp
+++ b/phrase-extract/InternalStructFeature.cpp
@@ -8,7 +8,8 @@ namespace MosesTraining
void InternalStructFeature::add(const ScoreFeatureContext& context,
std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const {
+ std::map<std::string,float>& sparseValues) const
+{
const std::map<std::string,float> *allTrees = context.phrasePair.GetProperty("Tree"); // our would we rather want to take the most frequent one only?
for ( std::map<std::string,float>::const_iterator iter=allTrees->begin();
iter!=allTrees->end(); ++iter ) {
@@ -19,24 +20,26 @@ void InternalStructFeature::add(const ScoreFeatureContext& context,
void InternalStructFeatureDense::add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const {
- //cout<<"Dense: "<<*internalStruct<<endl;
- size_t start=0;
- int countNP=0;
- while((start = treeFragment->find("NP", start)) != string::npos) {
- countNP += count;
- start+=2; //length of "NP"
- }
- //should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
- //should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
- denseValues.push_back(exp(countNP));
+ std::map<std::string,float>& sparseValues) const
+{
+ //cout<<"Dense: "<<*internalStruct<<endl;
+ size_t start=0;
+ int countNP=0;
+ while((start = treeFragment->find("NP", start)) != string::npos) {
+ countNP += count;
+ start+=2; //length of "NP"
+ }
+ //should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
+ //should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
+ denseValues.push_back(exp(countNP));
}
void InternalStructFeatureSparse::add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const {
+ std::map<std::string,float>& sparseValues) const
+{
//cout<<"Sparse: "<<*internalStruct<<endl;
if(treeFragment->find("VBZ")!=std::string::npos)
sparseValues["NTVBZ"] += count;
diff --git a/phrase-extract/InternalStructFeature.h b/phrase-extract/InternalStructFeature.h
index 7969dc8a8..bd44f61fb 100644
--- a/phrase-extract/InternalStructFeature.h
+++ b/phrase-extract/InternalStructFeature.h
@@ -21,20 +21,20 @@ namespace MosesTraining
class InternalStructFeature : public ScoreFeature
{
public:
- InternalStructFeature() : m_type(0) {};
- /** Add the values for this feature function. */
- void add(const ScoreFeatureContext& context,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
+ InternalStructFeature() : m_type(0) {};
+ /** Add the values for this feature function. */
+ void add(const ScoreFeatureContext& context,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const;
protected:
- /** Overridden in subclass */
- virtual void add(const std::string *treeFragment,
- float count,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const = 0;
- int m_type;
+ /** Overridden in subclass */
+ virtual void add(const std::string *treeFragment,
+ float count,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const = 0;
+ int m_type;
};
class InternalStructFeatureDense : public InternalStructFeature
@@ -45,10 +45,10 @@ public:
m_type=1;
} //std::cout<<"InternalStructFeatureDense: Construct "<<m_type<<"\n";}
protected:
- virtual void add(const std::string *treeFragment,
- float count,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
+ virtual void add(const std::string *treeFragment,
+ float count,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const;
};
class InternalStructFeatureSparse : public InternalStructFeature
@@ -59,10 +59,10 @@ public:
m_type=2;
}// std::cout<<"InternalStructFeatureSparse: Construct "<<m_type<<"\n";}
protected:
- virtual void add(const std::string *treeFragment,
- float count,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
+ virtual void add(const std::string *treeFragment,
+ float count,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const;
};
}
diff --git a/phrase-extract/ScoreFeature.cpp b/phrase-extract/ScoreFeature.cpp
index 7db57b38e..c037ab584 100644
--- a/phrase-extract/ScoreFeature.cpp
+++ b/phrase-extract/ScoreFeature.cpp
@@ -77,12 +77,12 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args)
}
sparseDomainAdded = true;
m_includeSentenceId = true;
- } else if(args[i] == "--TreeFeatureSparse"){
- //MARIA
- m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
- } else if(args[i] == "--TreeFeatureDense"){
- //MARIA
- m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense()));
+ } else if(args[i] == "--TreeFeatureSparse") {
+ //MARIA
+ m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
+ } else if(args[i] == "--TreeFeatureDense") {
+ //MARIA
+ m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense()));
} else {
UTIL_THROW(ScoreFeatureArgumentException,"Unknown score argument " << args[i]);
}
@@ -91,9 +91,9 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args)
}
-void ScoreFeatureManager::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
- float count,
- int sentenceId) const
+void ScoreFeatureManager::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
+ float count,
+ int sentenceId) const
{
for (size_t i = 0; i < m_features.size(); ++i) {
m_features[i]->addPropertiesToPhrasePair(phrasePair, count, sentenceId);
diff --git a/phrase-extract/ScoreFeature.h b/phrase-extract/ScoreFeature.h
index 926397e71..1f697c989 100644
--- a/phrase-extract/ScoreFeature.h
+++ b/phrase-extract/ScoreFeature.h
@@ -84,10 +84,10 @@ class ScoreFeature
public:
/** Some features might need to store properties in ExtractionPhrasePair,
- * e.g. to pass along external information loaded by a feature
+ * e.g. to pass along external information loaded by a feature
* which may distinguish several phrase occurrences based on sentence ID */
- virtual void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
- float count,
+ virtual void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
+ float count,
int sentenceId) const {};
/** Add the values for this feature function. */
@@ -113,10 +113,10 @@ public:
void configure(const std::vector<std::string> args);
/** Some features might need to store properties in ExtractionPhrasePair,
- * e.g. to pass along external information loaded by a feature
+ * e.g. to pass along external information loaded by a feature
* which may distinguish several phrase occurrences based on sentence ID */
- void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
- float count,
+ void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
+ float count,
int sentenceId) const;
/** Add all the features */
diff --git a/phrase-extract/extract-ordering-main.cpp b/phrase-extract/extract-ordering-main.cpp
index 104457b01..b37309d47 100644
--- a/phrase-extract/extract-ordering-main.cpp
+++ b/phrase-extract/extract-ordering-main.cpp
@@ -92,9 +92,9 @@ class ExtractTask
public:
ExtractTask(size_t id, SentenceAlignment &sentence,PhraseExtractionOptions &initoptions, Moses::OutputFileStream &extractFileOrientation)
:m_sentence(sentence),
- m_options(initoptions),
- m_extractFileOrientation(extractFileOrientation)
- {}
+ m_options(initoptions),
+ m_extractFileOrientation(extractFileOrientation)
+ {}
void Run();
private:
void extract(SentenceAlignment &);
@@ -151,11 +151,11 @@ int main(int argc, char* argv[])
}
options.initInstanceWeightsFile(argv[++i]);
} else if (strcmp(argv[i], "--Debug") == 0) {
- options.debug = true;
+ options.debug = true;
} else if (strcmp(argv[i], "--MinPhraseLength") == 0) {
- options.minPhraseLength = atoi(argv[++i]);
+ options.minPhraseLength = atoi(argv[++i]);
} else if (strcmp(argv[i], "--Separator") == 0) {
- options.separator = argv[++i];
+ options.separator = argv[++i];
} else if(strcmp(argv[i],"--model") == 0) {
if (i+1 >= argc) {
cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
@@ -605,16 +605,14 @@ string getOrientString(REO_POS orient, REO_MODEL_TYPE modelType)
int getClass(const std::string &str)
{
- size_t pos = str.find("swap");
- if (pos == str.npos) {
- return 0;
- }
- else if (pos == 0) {
- return 1;
- }
- else {
- return 2;
- }
+ size_t pos = str.find("swap");
+ if (pos == str.npos) {
+ return 0;
+ } else if (pos == 0) {
+ return 1;
+ } else {
+ return 2;
+ }
}
void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE, int startF, int endF , string &orientationInfo)
@@ -635,19 +633,19 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
// start
m_extractFileOrientation << "<s> ";
for(int fi=0; fi<startF; fi++) {
- m_extractFileOrientation << sentence.source[fi] << " ";
+ m_extractFileOrientation << sentence.source[fi] << " ";
}
m_extractFileOrientation << sep << " ";
// middle
for(int fi=startF; fi<=endF; fi++) {
- m_extractFileOrientation << sentence.source[fi] << " ";
+ m_extractFileOrientation << sentence.source[fi] << " ";
}
m_extractFileOrientation << sep << " ";
// end
for(int fi=endF+1; fi<sentence.source.size(); fi++) {
- m_extractFileOrientation << sentence.source[fi] << " ";
+ m_extractFileOrientation << sentence.source[fi] << " ";
}
m_extractFileOrientation << "</s> ";
@@ -655,7 +653,7 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
// target
/*
for(int ei=startE; ei<=endE; ei++) {
- m_extractFileOrientation << sentence.target[ei] << " ";
+ m_extractFileOrientation << sentence.target[ei] << " ";
}
*/
m_extractFileOrientation << endl;
diff --git a/phrase-extract/score-main.cpp b/phrase-extract/score-main.cpp
index cd8f9ddaa..72c4c1476 100644
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@@ -68,7 +68,7 @@ std::map<std::string,float> sourceLHSCounts;
std::map<std::string, std::map<std::string,float>* > targetLHSAndSourceLHSJointCounts;
std::set<std::string> sourceLabelSet;
-std::map<std::string,size_t> sourceLabels;
+std::map<std::string,size_t> sourceLabels;
std::vector<std::string> sourceLabelsByIndex;
Vocabulary vcbT;
@@ -79,12 +79,12 @@ Vocabulary vcbS;
std::vector<std::string> tokenize( const char [] );
void processLine( std::string line,
- int lineID, bool includeSentenceIdFlag, int &sentenceId,
+ int lineID, bool includeSentenceIdFlag, int &sentenceId,
PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment,
std::string &additionalPropertiesString,
float &count, float &pcfgSum );
void writeCountOfCounts( const std::string &fileNameCountOfCounts );
-void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile,
+void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile,
const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb );
void outputPhrasePair(const ExtractionPhrasePair &phrasePair, float, int, ostream &phraseTableFile, const ScoreFeatureManager &featureManager, const MaybeLog &maybeLog );
double computeLexicalTranslation( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource );
@@ -100,7 +100,7 @@ void invertAlignment( const PHRASE *phraseSource, const PHRASE *phraseTarget, co
int main(int argc, char* argv[])
{
- std::cerr << "Score v2.1 -- "
+ std::cerr << "Score v2.1 -- "
<< "scoring methods for extracted rules" << std::endl;
ScoreFeatureManager featureManager;
@@ -155,7 +155,7 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--UnalignedFunctionWordPenalty") == 0) {
unalignedFWFlag = true;
if (i+1==argc) {
- std::cerr << "ERROR: specify function words file for unaligned function word penalty!" << std::endl;
+ std::cerr << "ERROR: specify function words file for unaligned function word penalty!" << std::endl;
exit(1);
}
fileNameFunctionWords = argv[++i];
@@ -224,8 +224,8 @@ int main(int argc, char* argv[])
Moses::OutputFileStream *outputFile = new Moses::OutputFileStream();
bool success = outputFile->Open(fileNamePhraseTable);
if (!success) {
- std::cerr << "ERROR: could not open file phrase table file "
- << fileNamePhraseTable << std::endl;
+ std::cerr << "ERROR: could not open file phrase table file "
+ << fileNamePhraseTable << std::endl;
exit(1);
}
phraseTableFile = outputFile;
@@ -251,12 +251,12 @@ int main(int argc, char* argv[])
tmpPhraseSource = new PHRASE();
tmpPhraseTarget = new PHRASE();
tmpTargetToSourceAlignment = new ALIGNMENT();
- processLine( std::string(line),
+ processLine( std::string(line),
i, featureManager.includeSentenceId(), tmpSentenceId,
- tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
+ tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
tmpAdditionalPropertiesString,
tmpCount, tmpPcfgSum);
- phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget,
+ phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget,
tmpTargetToSourceAlignment,
tmpCount, tmpPcfgSum );
phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount );
@@ -288,14 +288,16 @@ int main(int argc, char* argv[])
tmpPhraseTarget = new PHRASE();
tmpTargetToSourceAlignment = new ALIGNMENT();
tmpAdditionalPropertiesString.clear();
- processLine( std::string(line),
+ processLine( std::string(line),
i, featureManager.includeSentenceId(), tmpSentenceId,
- tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
+ tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
tmpAdditionalPropertiesString,
- tmpCount, tmpPcfgSum);
+ tmpCount, tmpPcfgSum);
bool matchesPrevious = false;
- bool sourceMatch = true; bool targetMatch = true; bool alignmentMatch = true; // be careful with these,
+ bool sourceMatch = true;
+ bool targetMatch = true;
+ bool alignmentMatch = true; // be careful with these,
// ExtractionPhrasePair::Matches() checks them in order and does not continue with the others
// once the first of them has been found to have to be set to false
@@ -330,7 +332,7 @@ int main(int argc, char* argv[])
if ( !phrasePairsWithSameSource.empty() &&
!sourceMatch ) {
processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb );
- for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
+ for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
iter!=phrasePairsWithSameSource.end(); ++iter) {
delete *iter;
}
@@ -347,8 +349,8 @@ int main(int argc, char* argv[])
}
}
- phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget,
- tmpTargetToSourceAlignment,
+ phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget,
+ tmpTargetToSourceAlignment,
tmpCount, tmpPcfgSum );
phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount );
featureManager.addPropertiesToPhrasePair( *phrasePair, tmpCount, tmpSentenceId );
@@ -364,7 +366,7 @@ int main(int argc, char* argv[])
}
processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb );
- for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
+ for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
iter!=phrasePairsWithSameSource.end(); ++iter) {
delete *iter;
}
@@ -384,7 +386,7 @@ int main(int argc, char* argv[])
void processLine( std::string line,
- int lineID, bool includeSentenceIdFlag, int &sentenceId,
+ int lineID, bool includeSentenceIdFlag, int &sentenceId,
PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment,
std::string &additionalPropertiesString,
float &count, float &pcfgSum )
@@ -474,7 +476,7 @@ void writeCountOfCounts( const string &fileNameCountOfCounts )
}
-void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile,
+void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile,
const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb )
{
if (phrasePairsWithSameSource.size() == 0) {
@@ -486,23 +488,23 @@ void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSa
//std::cerr << "phrasePairs.size() = " << phrasePairs.size() << std::endl;
// loop through phrase pairs
- for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
+ for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
iter!=phrasePairsWithSameSource.end(); ++iter) {
// add to total count
totalSource += (*iter)->GetCount();
}
// output the distinct phrase pairs, one at a time
- for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
+ for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
iter!=phrasePairsWithSameSource.end(); ++iter) {
// add to total count
outputPhrasePair( **iter, totalSource, phrasePairsWithSameSource.size(), phraseTableFile, featureManager, maybeLogProb );
}
}
-void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
- float totalCount, int distinctCount,
- ostream &phraseTableFile,
+void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
+ float totalCount, int distinctCount,
+ ostream &phraseTableFile,
const ScoreFeatureManager& featureManager,
const MaybeLog& maybeLogProb )
{
@@ -557,45 +559,45 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
// alignment
if ( hierarchicalFlag ) {
- // always output alignment if hiero style
- assert(phraseTarget->size() == bestAlignmentT2S->size()+1);
- std::vector<std::string> alignment;
- for ( size_t j = 0; j < phraseTarget->size() - 1; ++j ) {
- if ( isNonTerminal(vcbT.getWord( phraseTarget->at(j) ))) {
- if ( bestAlignmentT2S->at(j).size() != 1 ) {
- std::cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << std::endl;
- phraseTableFile.flush();
- assert(bestAlignmentT2S->at(j).size() == 1);
- }
- size_t sourcePos = *(bestAlignmentT2S->at(j).begin());
- //phraseTableFile << sourcePos << "-" << j << " ";
+ // always output alignment if hiero style
+ assert(phraseTarget->size() == bestAlignmentT2S->size()+1);
+ std::vector<std::string> alignment;
+ for ( size_t j = 0; j < phraseTarget->size() - 1; ++j ) {
+ if ( isNonTerminal(vcbT.getWord( phraseTarget->at(j) ))) {
+ if ( bestAlignmentT2S->at(j).size() != 1 ) {
+ std::cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << std::endl;
+ phraseTableFile.flush();
+ assert(bestAlignmentT2S->at(j).size() == 1);
+ }
+ size_t sourcePos = *(bestAlignmentT2S->at(j).begin());
+ //phraseTableFile << sourcePos << "-" << j << " ";
+ std::stringstream point;
+ point << sourcePos << "-" << j;
+ alignment.push_back(point.str());
+ } else {
+ for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
+ setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
+ size_t sourcePos = *setIter;
std::stringstream point;
point << sourcePos << "-" << j;
alignment.push_back(point.str());
- } else {
- for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
- setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
- size_t sourcePos = *setIter;
- std::stringstream point;
- point << sourcePos << "-" << j;
- alignment.push_back(point.str());
- }
}
}
- // now print all alignments, sorted by source index
- sort(alignment.begin(), alignment.end());
- for (size_t i = 0; i < alignment.size(); ++i) {
- phraseTableFile << alignment[i] << " ";
- }
+ }
+ // now print all alignments, sorted by source index
+ sort(alignment.begin(), alignment.end());
+ for (size_t i = 0; i < alignment.size(); ++i) {
+ phraseTableFile << alignment[i] << " ";
+ }
} else if ( !inverseFlag && wordAlignmentFlag) {
- // alignment info in pb model
- for (size_t j = 0; j < bestAlignmentT2S->size(); ++j) {
- for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
- setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
- size_t sourcePos = *setIter;
- phraseTableFile << sourcePos << "-" << j << " ";
- }
+ // alignment info in pb model
+ for (size_t j = 0; j < bestAlignmentT2S->size(); ++j) {
+ for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
+ setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
+ size_t sourcePos = *setIter;
+ phraseTableFile << sourcePos << "-" << j << " ";
}
+ }
}
phraseTableFile << " ||| ";
@@ -646,7 +648,7 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
if (kneserNeyFlag)
phraseTableFile << " " << distinctCount;
- if ((treeFragmentsFlag) &&
+ if ((treeFragmentsFlag) &&
!inverseFlag) {
phraseTableFile << " |||";
}
@@ -671,7 +673,7 @@ bool calcCrossedNonTerm( size_t targetPos, size_t sourcePos, const ALIGNMENT *al
// skip
} else {
const std::set<size_t> &sourceSet = alignmentTargetToSource->at(currTarget);
- for (std::set<size_t>::const_iterator iter = sourceSet.begin();
+ for (std::set<size_t>::const_iterator iter = sourceSet.begin();
iter != sourceSet.end(); ++iter) {
size_t currSource = *iter;
@@ -808,9 +810,9 @@ void LexicalTable::load( const string &fileName )
std::vector<string> token = tokenize( line );
if (token.size() != 3) {
- std::cerr << "line " << i << " in " << fileName
- << " has wrong number of tokens, skipping:" << std::endl
- << token.size() << " " << token[0] << " " << line << std::endl;
+ std::cerr << "line " << i << " in " << fileName
+ << " has wrong number of tokens, skipping:" << std::endl
+ << token.size() << " " << token[0] << " " << line << std::endl;
continue;
}
@@ -889,15 +891,16 @@ void printTargetPhrase(const PHRASE *phraseSource, const PHRASE *phraseTarget,
void invertAlignment(const PHRASE *phraseSource, const PHRASE *phraseTarget,
- const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment) {
-// typedef std::vector< std::set<size_t> > ALIGNMENT;
+ const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment)
+{
+// typedef std::vector< std::set<size_t> > ALIGNMENT;
outSourceToTargetAlignment->clear();
size_t numberOfSourceSymbols = (hierarchicalFlag ? phraseSource->size()-1 : phraseSource->size());
outSourceToTargetAlignment->resize(numberOfSourceSymbols);
// add alignment point
for (size_t targetPosition = 0; targetPosition < inTargetToSourceAlignment->size(); ++targetPosition) {
- for ( std::set<size_t>::iterator setIter = (inTargetToSourceAlignment->at(targetPosition)).begin();
+ for ( std::set<size_t>::iterator setIter = (inTargetToSourceAlignment->at(targetPosition)).begin();
setIter != (inTargetToSourceAlignment->at(targetPosition)).end(); ++setIter ) {
size_t sourcePosition = *setIter;
outSourceToTargetAlignment->at(sourcePosition).insert(targetPosition);