Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhil Williams <philip.williams@mac.com>2013-09-14 01:16:42 +0400
committerPhil Williams <philip.williams@mac.com>2013-09-14 01:16:42 +0400
commitcdd9df19d26723645454f8ddef467489d1ed341b (patch)
tree1fd033efb62e16fc5fbb37077c7723cf7255bde3 /phrase-extract
parent3b03d803d98194772e4b0ac6df7041c8c98c0119 (diff)
Remove --OutputNTLengths from extract-rules, etc.
The option isn't used in master and the output is compatible with the current rule table format. If anyone wants this in master it should probably be fixed in the span-length branch then merged.
Diffstat (limited to 'phrase-extract')
-rw-r--r--phrase-extract/ExtractedRule.cpp44
-rw-r--r--phrase-extract/ExtractedRule.h11
-rw-r--r--phrase-extract/RuleExtractionOptions.h2
-rw-r--r--phrase-extract/consolidate-main.cpp9
-rw-r--r--phrase-extract/extract-rules-main.cpp9
-rw-r--r--phrase-extract/score-main.cpp104
6 files changed, 2 insertions, 177 deletions
diff --git a/phrase-extract/ExtractedRule.cpp b/phrase-extract/ExtractedRule.cpp
deleted file mode 100644
index 50d9085e6..000000000
--- a/phrase-extract/ExtractedRule.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-//
-// ExtractedRule.cpp
-// extract
-//
-// Created by Hieu Hoang on 13/09/2011.
-// Copyright 2011 __MyCompanyName__. All rights reserved.
-//
-
-#include "ExtractedRule.h"
-
-using namespace std;
-
-namespace MosesTraining
-{
-
-void ExtractedRule::OutputNTLengths(std::ostream &out) const
-{
- ostringstream outString;
- OutputNTLengths(outString);
- out << outString;
-}
-
-void ExtractedRule::OutputNTLengths(std::ostringstream &outString) const
-{
- std::map<size_t, std::pair<size_t, size_t> >::const_iterator iter;
- for (iter = m_ntLengths.begin(); iter != m_ntLengths.end(); ++iter) {
- size_t sourcePos = iter->first;
- const std::pair<size_t, size_t> &spanLengths = iter->second;
- outString << sourcePos << "=" << spanLengths.first << "," <<spanLengths.second << " ";
- }
-}
-
-std::ostream& operator<<(std::ostream &out, const ExtractedRule &obj)
-{
- out << obj.source << " ||| " << obj.target << " ||| "
- << obj.alignment << " ||| "
- << obj.alignmentInv << " ||| ";
-
- obj.OutputNTLengths(out);
-
- return out;
-}
-
-} // namespace
diff --git a/phrase-extract/ExtractedRule.h b/phrase-extract/ExtractedRule.h
index a6cd5074d..cb2f2261d 100644
--- a/phrase-extract/ExtractedRule.h
+++ b/phrase-extract/ExtractedRule.h
@@ -32,8 +32,6 @@ namespace MosesTraining
// sentence-level collection of rules
class ExtractedRule
{
- friend std::ostream& operator<<(std::ostream &, const ExtractedRule &);
-
public:
std::string source;
std::string target;
@@ -54,8 +52,6 @@ public:
float count;
double pcfgScore;
- std::map<size_t, std::pair<size_t, size_t> > m_ntLengths;
-
ExtractedRule(int sT, int eT, int sS, int eS)
: source()
, target()
@@ -76,13 +72,6 @@ public:
, count(0)
, pcfgScore(0.0) {
}
-
- void SetSpanLength(size_t sourcePos, size_t sourceLength, size_t targetLength) {
- m_ntLengths[sourcePos] = std::pair<size_t, size_t>(sourceLength, targetLength);
- }
-
- void OutputNTLengths(std::ostream &out) const;
- void OutputNTLengths(std::ostringstream &out) const;
};
}
diff --git a/phrase-extract/RuleExtractionOptions.h b/phrase-extract/RuleExtractionOptions.h
index a9b0ce9e6..b38258470 100644
--- a/phrase-extract/RuleExtractionOptions.h
+++ b/phrase-extract/RuleExtractionOptions.h
@@ -49,7 +49,6 @@ public:
bool duplicateRules;
bool fractionalCounting;
bool pcfgScore;
- bool outputNTLengths;
bool gzOutput;
bool unpairedExtractFormat;
bool conditionOnTargetLhs;
@@ -83,7 +82,6 @@ public:
, duplicateRules(true)
, fractionalCounting(true)
, pcfgScore(false)
- , outputNTLengths(false)
, gzOutput(false)
, unpairedExtractFormat(false)
, conditionOnTargetLhs(false)
diff --git a/phrase-extract/consolidate-main.cpp b/phrase-extract/consolidate-main.cpp
index 221b7048c..b0e2c8594 100644
--- a/phrase-extract/consolidate-main.cpp
+++ b/phrase-extract/consolidate-main.cpp
@@ -41,7 +41,6 @@ bool lowCountFlag = false;
bool goodTuringFlag = false;
bool kneserNeyFlag = false;
bool logProbFlag = false;
-bool outputNTLengths = false;
inline float maybeLogProb( float a )
{
return logProbFlag ? log(a) : a;
@@ -62,7 +61,7 @@ int main(int argc, char* argv[])
<< "consolidating direct and indirect rule tables\n";
if (argc < 4) {
- cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] [--OutputNTLengths] \n";
+ cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] \n";
exit(1);
}
char* &fileNameDirect = argv[1];
@@ -119,8 +118,6 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--LogProb") == 0) {
logProbFlag = true;
cerr << "using log-probabilities\n";
- } else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
- outputNTLengths = true;
} else {
cerr << "ERROR: unknown option " << argv[i] << endl;
exit(1);
@@ -315,10 +312,6 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
// counts, for debugging
fileConsolidated << "||| " << countE << " " << countF << " " << countEF;
- if (outputNTLengths) {
- fileConsolidated << " ||| " << itemDirect[5];
- }
-
// count bin feature (as a sparse feature)
if (sparseCountBinFeatureFlag ||
directSparseScores.compare("") != 0 ||
diff --git a/phrase-extract/extract-rules-main.cpp b/phrase-extract/extract-rules-main.cpp
index 97a593085..c625c3582 100644
--- a/phrase-extract/extract-rules-main.cpp
+++ b/phrase-extract/extract-rules-main.cpp
@@ -129,7 +129,6 @@ int main(int argc, char* argv[])
<< " --GlueGrammar FILE"
<< " | --UnknownWordLabel FILE"
<< " | --OnlyDirect"
- << " | --OutputNTLengths"
<< " | --MaxSpan[" << options.maxSpan << "]"
<< " | --MinHoleTarget[" << options.minHoleTarget << "]"
<< " | --MinHoleSource[" << options.minHoleSource << "]"
@@ -262,8 +261,6 @@ int main(int argc, char* argv[])
options.fractionalCounting = false;
} else if (strcmp(argv[i],"--PCFG") == 0) {
options.pcfgScore = true;
- } else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
- options.outputNTLengths = true;
} else if (strcmp(argv[i],"--UnpairedExtractFormat") == 0) {
options.unpairedExtractFormat = true;
} else if (strcmp(argv[i],"--ConditionOnTargetLHS") == 0) {
@@ -663,9 +660,6 @@ void ExtractTask::saveHieroAlignment( int startT, int endT, int startS, int endS
rule.alignment += sourceSymbolIndex + "-" + targetSymbolIndex + " ";
if (!m_options.onlyDirectFlag)
rule.alignmentInv += targetSymbolIndex + "-" + sourceSymbolIndex + " ";
-
- rule.SetSpanLength(hole.GetPos(0), hole.GetSize(0), hole.GetSize(1) ) ;
-
}
rule.alignment.erase(rule.alignment.size()-1);
@@ -1077,9 +1071,6 @@ void ExtractTask::writeRulesToFile()
<< rule->target << " ||| "
<< rule->alignment << " ||| "
<< rule->count << " ||| ";
- if (m_options.outputNTLengths) {
- rule->OutputNTLengths(out);
- }
if (m_options.pcfgScore) {
out << " ||| " << rule->pcfgScore;
}
diff --git a/phrase-extract/score-main.cpp b/phrase-extract/score-main.cpp
index 3042cbe3e..b7ccea0fc 100644
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@@ -59,7 +59,6 @@ int negLogProb = 1;
bool lexFlag = true;
bool unalignedFlag = false;
bool unalignedFWFlag = false;
-bool outputNTLengths = false;
bool singletonFeature = false;
bool crossedNonTerm = false;
int countOfCounts[COC_MAX+1];
@@ -82,9 +81,6 @@ double computeUnalignedPenalty( const PHRASE &, const PHRASE &, const PhraseAlig
set<string> functionWordList;
void loadFunctionWords( const string &fileNameFunctionWords );
double computeUnalignedFWPenalty( const PHRASE &, const PHRASE &, const PhraseAlignment & );
-void calcNTLengthProb(const vector< PhraseAlignment* > &phrasePairs
- , map<size_t, map<size_t, float> > &sourceProb
- , map<size_t, map<size_t, float> > &targetProb);
void printSourcePhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &);
void printTargetPhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &);
@@ -95,7 +91,7 @@ int main(int argc, char* argv[])
ScoreFeatureManager featureManager;
if (argc < 4) {
- cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--NoWordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--OutputNTLengths] [--PCFG] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--Singleton] [--CrossedNonTerm] \n";
+ cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--NoWordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--PCFG] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--Singleton] [--CrossedNonTerm] \n";
cerr << featureManager.usage() << endl;
exit(1);
}
@@ -158,8 +154,6 @@ int main(int argc, char* argv[])
minCountHierarchical = atof(argv[++i]);
cerr << "dropping all phrase pairs occurring less than " << minCountHierarchical << " times\n";
minCountHierarchical -= 0.00001; // account for rounding
- } else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
- outputNTLengths = true;
} else if (strcmp(argv[i],"--Singleton") == 0) {
singletonFeature = true;
cerr << "binary singleton feature\n";
@@ -375,87 +369,6 @@ const PhraseAlignment &findBestAlignment(const PhraseAlignmentCollection &phrase
return *bestAlignment;
}
-
-void calcNTLengthProb(const map<size_t, map<size_t, size_t> > &lengths
- , size_t total
- , map<size_t, map<size_t, float> > &probs)
-{
- map<size_t, map<size_t, size_t> >::const_iterator iterOuter;
- for (iterOuter = lengths.begin(); iterOuter != lengths.end(); ++iterOuter) {
- size_t sourcePos = iterOuter->first;
- const map<size_t, size_t> &inner = iterOuter->second;
-
- map<size_t, size_t>::const_iterator iterInner;
- for (iterInner = inner.begin(); iterInner != inner.end(); ++iterInner) {
- size_t length = iterInner->first;
- size_t count = iterInner->second;
- float prob = (float) count / (float) total;
- probs[sourcePos][length] = prob;
- }
- }
-}
-
-void calcNTLengthProb(const vector< PhraseAlignment* > &phrasePairs
- , map<size_t, map<size_t, float> > &sourceProb
- , map<size_t, map<size_t, float> > &targetProb)
-{
- map<size_t, map<size_t, size_t> > sourceLengths, targetLengths;
- // 1st = position in source phrase, 2nd = length, 3rd = count
- map<size_t, size_t> totals;
- // 1st = position in source phrase, 2nd = total counts
- // each source pos should have same count?
-
- vector< PhraseAlignment* >::const_iterator iterOuter;
- for (iterOuter = phrasePairs.begin(); iterOuter != phrasePairs.end(); ++iterOuter) {
- const PhraseAlignment &phrasePair = **iterOuter;
- const std::map<size_t, std::pair<size_t, size_t> > &ntLengths = phrasePair.GetNTLengths();
-
- std::map<size_t, std::pair<size_t, size_t> >::const_iterator iterInner;
- for (iterInner = ntLengths.begin(); iterInner != ntLengths.end(); ++iterInner) {
- size_t sourcePos = iterInner->first;
- size_t sourceLength = iterInner->second.first;
- size_t targetLength = iterInner->second.second;
-
- sourceLengths[sourcePos][sourceLength]++;
- targetLengths[sourcePos][targetLength]++;
-
- totals[sourcePos]++;
- }
- }
-
- if (totals.size() == 0) {
- // no non-term. Don't bother
- return;
- }
-
- size_t total = totals.begin()->second;
- if (totals.size() > 1) {
- assert(total == (++totals.begin())->second );
- }
-
- calcNTLengthProb(sourceLengths, total, sourceProb);
- calcNTLengthProb(targetLengths, total, targetProb);
-
-}
-
-void outputNTLengthProbs(ostream &phraseTableFile, const map<size_t, map<size_t, float> > &probs, const string &prefix)
-{
- map<size_t, map<size_t, float> >::const_iterator iterOuter;
- for (iterOuter = probs.begin(); iterOuter != probs.end(); ++iterOuter) {
- size_t sourcePos = iterOuter->first;
- const map<size_t, float> &inner = iterOuter->second;
-
- map<size_t, float>::const_iterator iterInner;
- for (iterInner = inner.begin(); iterInner != inner.end(); ++iterInner) {
- size_t length = iterInner->first;
- float prob = iterInner->second;
-
- phraseTableFile << sourcePos << "|" << prefix << "|" << length << "=" << prob << " ";
- }
- }
-
-}
-
bool calcCrossedNonTerm(size_t sourcePos, size_t targetPos, const std::vector< std::set<size_t> > &alignedToS)
{
for (size_t currSource = 0; currSource < alignedToS.size(); ++currSource) {
@@ -664,21 +577,6 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
if (kneserNeyFlag)
phraseTableFile << " " << distinctCount;
- // nt lengths
- if (outputNTLengths) {
- phraseTableFile << " ||| ";
-
- if (!inverseFlag) {
- map<size_t, map<size_t, float> > sourceProb, targetProb;
- // 1st sourcePos, 2nd = length, 3rd = prob
-
- calcNTLengthProb(phrasePair, sourceProb, targetProb);
-
- outputNTLengthProbs(phraseTableFile, sourceProb, "S");
- outputNTLengthProbs(phraseTableFile, targetProb, "T");
- }
- }
-
phraseTableFile << endl;
}