Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Huck <huck@i6.informatik.rwth-aachen.de>2013-09-13 19:10:21 +0400
committerMatthias Huck <huck@i6.informatik.rwth-aachen.de>2013-09-13 19:10:21 +0400
commitc39bed60c054ba5cc36a0032714b00978b713494 (patch)
treeed2301ea4081696e3e4303a6be279bf0dd2cceeb
parentbff123635ea9773d50f8536885ba409d24175749 (diff)
Tree fragments in GHKM glue rules;
output of LHS tag in tree fragments for UNKs; GHKMParse info is now denoted as Tree info
-rw-r--r--moses-chart-cmd/IOWrapper.cpp30
-rw-r--r--moses-chart-cmd/IOWrapper.h8
-rw-r--r--moses-chart-cmd/Main.cpp4
-rw-r--r--moses/ChartParser.cpp4
-rw-r--r--moses/Parameter.cpp2
-rw-r--r--moses/StaticData.cpp10
-rw-r--r--moses/StaticData.h10
-rw-r--r--phrase-extract/InternalStructFeature.cpp2
-rw-r--r--phrase-extract/PhraseAlignment.cpp8
-rw-r--r--phrase-extract/PhraseAlignment.h2
-rw-r--r--phrase-extract/extract-ghkm/ExtractGHKM.cpp10
-rw-r--r--phrase-extract/extract-ghkm/ScfgRuleWriter.cpp2
-rw-r--r--phrase-extract/score-main.cpp43
13 files changed, 67 insertions, 68 deletions
diff --git a/moses-chart-cmd/IOWrapper.cpp b/moses-chart-cmd/IOWrapper.cpp
index 5c38d86be..69ec9ece4 100644
--- a/moses-chart-cmd/IOWrapper.cpp
+++ b/moses-chart-cmd/IOWrapper.cpp
@@ -71,7 +71,7 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
,m_alignmentInfoStream(NULL)
,m_inputFilePath(inputFilePath)
,m_detailOutputCollector(NULL)
- ,m_detailGhkmOutputCollector(NULL)
+ ,m_detailTreeFragmentsOutputCollector(NULL)
,m_nBestOutputCollector(NULL)
,m_searchGraphOutputCollector(NULL)
,m_singleBestOutputCollector(NULL)
@@ -117,10 +117,10 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
m_detailOutputCollector = new Moses::OutputCollector(m_detailedTranslationReportingStream);
}
- if (staticData.IsDetailedGhkmTranslationReportingEnabled()) {
- const std::string &path = staticData.GetDetailedGhkmTranslationReportingFilePath();
- m_detailedGhkmTranslationReportingStream = new std::ofstream(path.c_str());
- m_detailGhkmOutputCollector = new Moses::OutputCollector(m_detailedGhkmTranslationReportingStream);
+ if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
+ const std::string &path = staticData.GetDetailedTreeFragmentsTranslationReportingFilePath();
+ m_detailedTreeFragmentsTranslationReportingStream = new std::ofstream(path.c_str());
+ m_detailTreeFragmentsOutputCollector = new Moses::OutputCollector(m_detailedTreeFragmentsTranslationReportingStream);
}
if (!staticData.GetAlignmentOutputFile().empty()) {
@@ -137,7 +137,7 @@ IOWrapper::~IOWrapper()
}
delete m_outputSearchGraphStream;
delete m_detailedTranslationReportingStream;
- delete m_detailedGhkmTranslationReportingStream;
+ delete m_detailedTreeFragmentsTranslationReportingStream;
delete m_alignmentInfoStream;
delete m_detailOutputCollector;
delete m_nBestOutputCollector;
@@ -321,11 +321,11 @@ void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &
}
}
-void IOWrapper::OutputGhkmTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
+void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
{
// recursive
if (hypo != NULL) {
- const std::string key = "GHKMParse";
+ const std::string key = "Tree";
std::string value;
bool hasprop;
const TargetPhrase &currTarPhr = hypo->GetCurrTargetPhrase();
@@ -340,11 +340,11 @@ void IOWrapper::OutputGhkmTranslationOptions(std::ostream &out, ApplicationConte
<< "-> " << hypo->GetCurrTargetPhrase()
<< " " << hypo->GetTotalScore() << hypo->GetScoreBreakdown();
- out << std::endl;
+ out << " ||| ";
if (hasprop)
out << " " << value;
else
- out << " " << "noGHKMParseInfo";
+ out << " " << "noTreeInfo";
out << std::endl;
}
@@ -353,7 +353,7 @@ void IOWrapper::OutputGhkmTranslationOptions(std::ostream &out, ApplicationConte
std::vector<const ChartHypothesis*>::const_iterator iter;
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
const ChartHypothesis *prevHypo = *iter;
- OutputGhkmTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
}
}
@@ -373,7 +373,7 @@ void IOWrapper::OutputDetailedTranslationReport(
m_detailOutputCollector->Write(translationId, out.str());
}
-void IOWrapper::OutputDetailedGhkmTranslationReport(
+void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
const ChartHypothesis *hypo,
const Sentence &sentence,
long translationId)
@@ -384,9 +384,9 @@ void IOWrapper::OutputDetailedGhkmTranslationReport(
std::ostringstream out;
ApplicationContext applicationContext;
- OutputGhkmTranslationOptions(out, applicationContext, hypo, sentence, translationId);
- CHECK(m_detailGhkmOutputCollector);
- m_detailGhkmOutputCollector->Write(translationId, out.str());
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
+ CHECK(m_detailTreeFragmentsOutputCollector);
+ m_detailTreeFragmentsOutputCollector->Write(translationId, out.str());
}
void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId)
diff --git a/moses-chart-cmd/IOWrapper.h b/moses-chart-cmd/IOWrapper.h
index 4ac88da7c..ec1aee9de 100644
--- a/moses-chart-cmd/IOWrapper.h
+++ b/moses-chart-cmd/IOWrapper.h
@@ -70,12 +70,12 @@ protected:
const Moses::FactorMask &m_inputFactorUsed;
std::ostream *m_outputSearchGraphStream;
std::ostream *m_detailedTranslationReportingStream;
- std::ostream *m_detailedGhkmTranslationReportingStream;
+ std::ostream *m_detailedTreeFragmentsTranslationReportingStream;
std::ostream *m_alignmentInfoStream;
std::string m_inputFilePath;
std::istream *m_inputStream;
Moses::OutputCollector *m_detailOutputCollector;
- Moses::OutputCollector *m_detailGhkmOutputCollector;
+ Moses::OutputCollector *m_detailTreeFragmentsOutputCollector;
Moses::OutputCollector *m_nBestOutputCollector;
Moses::OutputCollector *m_searchGraphOutputCollector;
Moses::OutputCollector *m_singleBestOutputCollector;
@@ -86,7 +86,7 @@ protected:
size_t OutputAlignment(Alignments &retAlign, const Moses::ChartHypothesis *hypo, size_t startTarget);
void OutputAlignment(std::vector< std::set<size_t> > &retAlignmentsS2T, const Moses::AlignmentInfo &ai);
void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
- void OutputGhkmTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
+ void OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
void ReconstructApplicationContext(const Moses::ChartHypothesis &hypo,
const Moses::Sentence &sentence,
ApplicationContext &context);
@@ -117,7 +117,7 @@ public:
void OutputNBestList(const Moses::ChartTrellisPathList &nBestList, long translationId);
void OutputNBestList(const std::vector<search::Applied> &nbest, long translationId);
void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
- void OutputDetailedGhkmTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
+ void OutputDetailedTreeFragmentsTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
void Backtrack(const Moses::ChartHypothesis *hypo);
void ResetTranslationId();
diff --git a/moses-chart-cmd/Main.cpp b/moses-chart-cmd/Main.cpp
index eb06ce6d6..4ceae1f72 100644
--- a/moses-chart-cmd/Main.cpp
+++ b/moses-chart-cmd/Main.cpp
@@ -127,9 +127,9 @@ public:
const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
m_ioWrapper.OutputDetailedTranslationReport(bestHypo, sentence, translationId);
}
- if (staticData.IsDetailedGhkmTranslationReportingEnabled()) {
+ if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
- m_ioWrapper.OutputDetailedGhkmTranslationReport(bestHypo, sentence, translationId);
+ m_ioWrapper.OutputDetailedTreeFragmentsTranslationReport(bestHypo, sentence, translationId);
}
// n-best
diff --git a/moses/ChartParser.cpp b/moses/ChartParser.cpp
index db7cb90aa..37733228c 100644
--- a/moses/ChartParser.cpp
+++ b/moses/ChartParser.cpp
@@ -96,8 +96,8 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
targetPhrase->SetTargetLHS(targetLHS);
targetPhrase->SetAlignmentInfo("0-0");
- if (staticData.IsDetailedGhkmTranslationReportingEnabled()) {
- targetPhrase->SetProperty("GHKMParse","( UNK "+sourceWord[0]->GetString().as_string()+" )");
+ if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
+ targetPhrase->SetProperty("Tree","( " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" )");
}
// chart rule
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index e0bdbd3b1..012ca3622 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -67,7 +67,7 @@ Parameter::Parameter()
AddParam("stack-diversity", "sd", "minimum number of hypothesis of each coverage in stack (default 0)");
AddParam("threads","th", "number of threads to use in decoding (defaults to single-threaded)");
AddParam("translation-details", "T", "for each best hypothesis, report translation details to the given file");
- AddParam("ghkm-translation-details", "Tghkm", "for each hypothesis, report removed internal nodes to given file");
+ AddParam("tree-translation-details", "Ttree", "for each hypothesis, report translation details with tree fragment info to given file");
AddParam("translation-option-threshold", "tot", "threshold for translation options relative to best for input phrase");
AddParam("early-discarding-threshold", "edt", "threshold for constructing hypotheses based on estimate cost");
AddParam("verbose", "v", "verbosity level of the logging");
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index d7728b030..7b4d208a4 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -60,7 +60,7 @@ StaticData::StaticData()
,m_unknownWordPenaltyProducer(NULL)
,m_inputFeature(NULL)
,m_detailedTranslationReportingFilePath()
- ,m_detailedGhkmTranslationReportingFilePath()
+ ,m_detailedTreeFragmentsTranslationReportingFilePath()
,m_onlyDistinctNBest(false)
,m_needAlignmentInfo(false)
,m_factorDelimiter("|") // default delimiter between factors
@@ -308,12 +308,12 @@ bool StaticData::LoadData(Parameter *parameter)
return false;
}
}
- if (m_parameter->isParamSpecified("ghkm-translation-details")) {
- const vector<string> &args = m_parameter->GetParam("ghkm-translation-details");
+ if (m_parameter->isParamSpecified("tree-translation-details")) {
+ const vector<string> &args = m_parameter->GetParam("tree-translation-details");
if (args.size() == 1) {
- m_detailedGhkmTranslationReportingFilePath = args[0];
+ m_detailedTreeFragmentsTranslationReportingFilePath = args[0];
} else {
- UserMessage::Add(string("the ghkm-translation-details option requires exactly one filename argument"));
+ UserMessage::Add(string("the tree-translation-details option requires exactly one filename argument"));
return false;
}
}
diff --git a/moses/StaticData.h b/moses/StaticData.h
index a23dfee09..8f8e7a1f3 100644
--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@@ -137,7 +137,7 @@ protected:
bool m_reportAllFactors;
bool m_reportAllFactorsNBest;
std::string m_detailedTranslationReportingFilePath;
- std::string m_detailedGhkmTranslationReportingFilePath;
+ std::string m_detailedTreeFragmentsTranslationReportingFilePath;
bool m_onlyDistinctNBest;
bool m_PrintAlignmentInfo;
bool m_needAlignmentInfo;
@@ -368,11 +368,11 @@ public:
const std::string &GetDetailedTranslationReportingFilePath() const {
return m_detailedTranslationReportingFilePath;
}
- bool IsDetailedGhkmTranslationReportingEnabled() const {
- return !m_detailedGhkmTranslationReportingFilePath.empty();
+ bool IsDetailedTreeFragmentsTranslationReportingEnabled() const {
+ return !m_detailedTreeFragmentsTranslationReportingFilePath.empty();
}
- const std::string &GetDetailedGhkmTranslationReportingFilePath() const {
- return m_detailedGhkmTranslationReportingFilePath;
+ const std::string &GetDetailedTreeFragmentsTranslationReportingFilePath() const {
+ return m_detailedTreeFragmentsTranslationReportingFilePath;
}
bool IsLabeledNBestList() const {
return m_labeledNBestList;
diff --git a/phrase-extract/InternalStructFeature.cpp b/phrase-extract/InternalStructFeature.cpp
index 5f558f4d7..67cc186db 100644
--- a/phrase-extract/InternalStructFeature.cpp
+++ b/phrase-extract/InternalStructFeature.cpp
@@ -26,7 +26,7 @@ void InternalStructFeature::add(const ScoreFeatureContext& context,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const{
for(size_t i=0; i<context.phrasePair.size(); i++) {
- add(&context.phrasePair[i]->ghkmParse, denseValues, sparseValues);
+ add(&context.phrasePair[i]->treeFragment, denseValues, sparseValues);
}
}
diff --git a/phrase-extract/PhraseAlignment.cpp b/phrase-extract/PhraseAlignment.cpp
index 3f6103f33..0bb636c97 100644
--- a/phrase-extract/PhraseAlignment.cpp
+++ b/phrase-extract/PhraseAlignment.cpp
@@ -82,7 +82,7 @@ void PhraseAlignment::create( char line[], int lineID, bool includeSentenceIdFla
{
assert(phraseS.empty());
assert(phraseT.empty());
- ghkmParse.clear();
+ treeFragment.clear();
vector< string > token = tokenize( line );
int item = 1;
@@ -109,11 +109,11 @@ void PhraseAlignment::create( char line[], int lineID, bool includeSentenceIdFla
alignedToT[t].insert( s );
alignedToS[s].insert( t );
}
- } else if ( (item >= 4) && (token[j] == "GHKMParse") ) { // check for information with a key field
+ } else if ( (item >= 4) && (token[j] == "Tree") ) { // check for information with a key field
++j;
while ( (j < token.size() ) && (token[j] != "|||") ) {
- ghkmParse.append(" ");
- ghkmParse.append(token[j]);
+ treeFragment.append(" ");
+ treeFragment.append(token[j]);
++j;
}
--j;
diff --git a/phrase-extract/PhraseAlignment.h b/phrase-extract/PhraseAlignment.h
index ce2d0a1eb..9e9abc0f8 100644
--- a/phrase-extract/PhraseAlignment.h
+++ b/phrase-extract/PhraseAlignment.h
@@ -32,7 +32,7 @@ public:
float count;
int sentenceId;
std::string domain;
- std::string ghkmParse;
+ std::string treeFragment;
std::vector< std::set<size_t> > alignedToT;
std::vector< std::set<size_t> > alignedToS;
diff --git a/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
index 778f2492e..e85280a78 100644
--- a/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@@ -446,22 +446,22 @@ void ExtractGHKM::WriteGlueGrammar(
}
// basic rules
- out << "<s> [X] ||| <s> [" << topLabel << "] ||| 1 ||| " << std::endl;
- out << "[X][" << topLabel << "] </s> [X] ||| [X][" << topLabel << "] </s> [" << topLabel << "] ||| 1 ||| 0-0 " << std::endl;
+ out << "<s> [X] ||| <s> [" << topLabel << "] ||| 1 ||| ||| ||| ||| {{Tree ( " << topLabel << " ( SSTART <s> ) )}}" << std::endl;
+ out << "[X][" << topLabel << "] </s> [X] ||| [X][" << topLabel << "] </s> [" << topLabel << "] ||| 1 ||| 0-0 ||| ||| ||| {{Tree ( " << topLabel << " ( SEND </s> ) )}}" << std::endl;
// top rules
for (std::map<std::string, int>::const_iterator i = topLabelSet.begin();
i != topLabelSet.end(); ++i) {
- out << "<s> [X][" << i->first << "] </s> [X] ||| <s> [X][" << i->first << "] </s> [" << topLabel << "] ||| 1 ||| 1-1" << std::endl;
+ out << "<s> [X][" << i->first << "] </s> [X] ||| <s> [X][" << i->first << "] </s> [" << topLabel << "] ||| 1 ||| 1-1 ||| ||| ||| {{Tree ( " << topLabel << " ( SSTART <s> ) ( " << i->first << " ) ( SEND </s> ) )}}" << std::endl;
}
// glue rules
for(std::set<std::string>::const_iterator i = labelSet.begin();
i != labelSet.end(); i++ ) {
- out << "[X][" << topLabel << "] [X][" << *i << "] [X] ||| [X][" << topLabel << "] [X][" << *i << "] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1" << std::endl;
+ out << "[X][" << topLabel << "] [X][" << *i << "] [X] ||| [X][" << topLabel << "] [X][" << *i << "] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1 ||| ||| ||| {{Tree ( " << topLabel << " ( "<< topLabel << " ) ( " << *i << " ) )}}" << std::endl;
}
// glue rule for unknown word...
- out << "[X][" << topLabel << "] [X][X] [X] ||| [X][" << topLabel << "] [X][X] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1 " << std::endl;
+ out << "[X][" << topLabel << "] [X][X] [X] ||| [X][" << topLabel << "] [X][X] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1 ||| ||| ||| {{Tree ( " << topLabel << " ( X ) )}}" << std::endl;
}
void ExtractGHKM::CollectWordLabelCounts(
diff --git a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
index 5ba813679..c3b2688e3 100644
--- a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
@@ -168,7 +168,7 @@ void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out)
void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g)
{
Write(rule,false);
- m_fwd << " GHKMParse ";
+ m_fwd << " Tree ";
g.PrintTree(m_fwd);
m_fwd << std::endl;
m_inv << std::endl;
diff --git a/phrase-extract/score-main.cpp b/phrase-extract/score-main.cpp
index 3caa8026f..d23d9e216 100644
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@@ -49,7 +49,7 @@ LexicalTable lexTable;
bool inverseFlag = false;
bool hierarchicalFlag = false;
bool pcfgFlag = false;
-bool ghkmParseFlag = false;
+bool treeFragmentsFlag = false;
bool unpairedExtractFormatFlag = false;
bool conditionOnTargetLhsFlag = false;
bool wordAlignmentFlag = true;
@@ -78,7 +78,7 @@ vector<string> tokenize( const char [] );
void writeCountOfCounts( const string &fileNameCountOfCounts );
void processPhrasePairs( vector< PhraseAlignment > & , ostream &phraseTableFile, bool isSingleton, const ScoreFeatureManager& featureManager, const MaybeLog& maybeLog);
const PhraseAlignment &findBestAlignment(const PhraseAlignmentCollection &phrasePair );
-const std::string &findBestGHKMParse(const PhraseAlignmentCollection &phrasePair );
+const std::string &findBestTreeFragment(const PhraseAlignmentCollection &phrasePair );
void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float, int, ostream &phraseTableFile, bool isSingleton, const ScoreFeatureManager& featureManager, const MaybeLog& maybeLog );
double computeLexicalTranslation( const PHRASE &, const PHRASE &, const PhraseAlignment & );
double computeUnalignedPenalty( const PHRASE &, const PHRASE &, const PhraseAlignment & );
@@ -98,7 +98,7 @@ int main(int argc, char* argv[])
ScoreFeatureManager featureManager;
if (argc < 4) {
- cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--NoWordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--OutputNTLengths] [--PCFG] [--GHKMParse] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--Singleton] [--CrossedNonTerm] \n";
+ cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--NoWordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--OutputNTLengths] [--PCFG] [--TreeFragments] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--Singleton] [--CrossedNonTerm] \n";
cerr << featureManager.usage() << endl;
exit(1);
}
@@ -119,9 +119,9 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--PCFG") == 0) {
pcfgFlag = true;
cerr << "including PCFG scores\n";
- } else if (strcmp(argv[i],"--GHKMParse") == 0) {
- ghkmParseFlag = true;
- cerr << "including GHKM parse\n";
+ } else if (strcmp(argv[i],"--TreeFragments") == 0) {
+ treeFragmentsFlag = true;
+ cerr << "including tree fragments from syntactic parse\n";
} else if (strcmp(argv[i],"--UnpairedExtractFormat") == 0) {
unpairedExtractFormatFlag = true;
cerr << "processing unpaired extract format\n";
@@ -381,27 +381,27 @@ const PhraseAlignment &findBestAlignment(const PhraseAlignmentCollection &phrase
return *bestAlignment;
}
-const std::string &findBestGHKMParse(const PhraseAlignmentCollection &phrasePair )
+const std::string &findBestTreeFragment(const PhraseAlignmentCollection &phrasePair )
{
- float bestGHKMParseCount = -1;
- PhraseAlignment *bestGHKMParse = NULL;
+ float bestTreeFragmentCount = -1;
+ PhraseAlignment *bestTreeFragment = NULL;
for(size_t i=0; i<phrasePair.size(); i++) {
- size_t ghkmParseInd;
+ size_t treeFragmentInd;
if (inverseFlag) {
// count backwards, so that alignments for ties will be the same for both normal & inverse scores
- ghkmParseInd = phrasePair.size() - i - 1;
+ treeFragmentInd = phrasePair.size() - i - 1;
} else {
- ghkmParseInd = i;
+ treeFragmentInd = i;
}
- if (phrasePair[ghkmParseInd]->count > bestGHKMParseCount) {
- bestGHKMParseCount = phrasePair[ghkmParseInd]->count;
- bestGHKMParse = phrasePair[ghkmParseInd];
+ if (phrasePair[treeFragmentInd]->count > bestTreeFragmentCount) {
+ bestTreeFragmentCount = phrasePair[treeFragmentInd]->count;
+ bestTreeFragment = phrasePair[treeFragmentInd];
}
}
- return bestGHKMParse->ghkmParse;
+ return bestTreeFragment->treeFragment;
}
@@ -708,12 +708,11 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
}
}
-
- // GHKM parse
- if (ghkmParseFlag && !inverseFlag) {
- const std::string &bestGHKMParse = findBestGHKMParse( phrasePair );
- if ( !bestGHKMParse.empty() )
- phraseTableFile << " ||| {{GHKMParse" << bestGHKMParse << "}}";
+ // tree fragments
+ if (treeFragmentsFlag && !inverseFlag) {
+ const std::string &bestTreeFragment = findBestTreeFragment( phrasePair );
+ if ( !bestTreeFragment.empty() )
+ phraseTableFile << " ||| {{Tree " << bestTreeFragment << "}}";
}