Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/misc
diff options
context:
space:
mode:
authorhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2011-02-24 16:57:11 +0300
committerhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2011-02-24 16:57:11 +0300
commita3d97584a907511b00b9f0c7e99b8d7d5b93abb1 (patch)
treef4c7f33cf90e48b83147467269f2d2bb0d45aef3 /misc
parenta0b6abdfd3599e7fbdc6aac76fcd2cb4483d63ce (diff)
run beautify.perl. Consistent formatting for .h & .cpp files
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'misc')
-rw-r--r--misc/GenerateTuples.cpp429
-rw-r--r--misc/GenerateTuples.h6
-rw-r--r--misc/processLexicalTable.cpp40
-rw-r--r--misc/processPhraseTable.cpp321
-rw-r--r--misc/queryLexicalTable.cpp101
-rw-r--r--misc/queryPhraseTable.cpp130
6 files changed, 524 insertions, 503 deletions
diff --git a/misc/GenerateTuples.cpp b/misc/GenerateTuples.cpp
index 123f8a18e..e46e87e06 100644
--- a/misc/GenerateTuples.cpp
+++ b/misc/GenerateTuples.cpp
@@ -18,7 +18,7 @@ using namespace Moses;
#if 0
// Generates all tuples from n indexes with ranges 0 to card[j]-1, respectively..
-// Input: number of indexes and ranges: ranges[0] ... ranges[num_idx-1]
+// Input: number of indexes and ranges: ranges[0] ... ranges[num_idx-1]
// Output: number of tuples and monodimensional array of tuples.
// Reference: mixed-radix generation algorithm (D. E. Knuth, TAOCP v. 4.2)
@@ -27,24 +27,26 @@ size_t GenerateTuples(unsigned num_idx,unsigned* ranges,unsigned *&tuples)
unsigned* single_tuple= new unsigned[num_idx+1];
unsigned num_tuples=1;
- for (unsigned k=0;k<num_idx;++k)
- {
- num_tuples *= ranges[k];
- single_tuple[k]=0;
- }
+ for (unsigned k=0; k<num_idx; ++k) {
+ num_tuples *= ranges[k];
+ single_tuple[k]=0;
+ }
tuples=new unsigned[num_idx * num_tuples];
// we need this additional element for the last iteration
- single_tuple[num_idx]=0;
+ single_tuple[num_idx]=0;
unsigned j=0;
- for (unsigned n=0;n<num_tuples;++n){
+ for (unsigned n=0; n<num_tuples; ++n) {
memcpy((void *)((tuples + n * num_idx)),(void *)single_tuple,num_idx * sizeof(unsigned));
j=0;
- while (single_tuple[j]==ranges[j]-1){single_tuple[j]=0; ++j;}
+ while (single_tuple[j]==ranges[j]-1) {
+ single_tuple[j]=0;
+ ++j;
+ }
++single_tuple[j];
}
- delete [] single_tuple;
+ delete [] single_tuple;
return num_tuples;
}
@@ -53,73 +55,88 @@ typedef PhraseDictionaryTree::PrefixPtr PPtr;
typedef std::vector<PPtr> vPPtr;
typedef std::vector<std::vector<Factor const*> > mPhrase;
-std::ostream& operator<<(std::ostream& out,const mPhrase& p) {
- for(size_t i=0;i<p.size();++i) {
- out<<i<<" - ";
- for(size_t j=0;j<p[i].size();++j)
- out<<p[i][j]->ToString()<<" ";
- out<<"|";
- }
+std::ostream& operator<<(std::ostream& out,const mPhrase& p)
+{
+ for(size_t i=0; i<p.size(); ++i) {
+ out<<i<<" - ";
+ for(size_t j=0; j<p[i].size(); ++j)
+ out<<p[i][j]->ToString()<<" ";
+ out<<"|";
+ }
- return out;
+ return out;
}
struct State {
- vPPtr ptrs;
- WordsRange range;
- float score;
+ vPPtr ptrs;
+ WordsRange range;
+ float score;
- State() : range(0,0),score(0.0) {}
- State(size_t b,size_t e,const vPPtr& v,float sc=0.0) : ptrs(v),range(b,e),score(sc) {}
-
- size_t begin() const {return range.GetStartPos();}
- size_t end() const {return range.GetEndPos();}
- float GetScore() const {return score;}
+ State() : range(0,0),score(0.0) {}
+ State(size_t b,size_t e,const vPPtr& v,float sc=0.0) : ptrs(v),range(b,e),score(sc) {}
+
+ size_t begin() const {
+ return range.GetStartPos();
+ }
+ size_t end() const {
+ return range.GetEndPos();
+ }
+ float GetScore() const {
+ return score;
+ }
};
-std::ostream& operator<<(std::ostream& out,const State& s) {
- out<<"["<<s.ptrs.size()<<" ("<<s.begin()<<","<<s.end()<<") "<<s.GetScore()<<"]";
+std::ostream& operator<<(std::ostream& out,const State& s)
+{
+ out<<"["<<s.ptrs.size()<<" ("<<s.begin()<<","<<s.end()<<") "<<s.GetScore()<<"]";
- return out;
+ return out;
}
typedef std::map<mPhrase,float> E2Costs;
struct GCData {
- const std::vector<PhraseDictionaryTree const*>& pdicts;
- const std::vector<std::vector<float> >& weights;
- std::vector<FactorType> inF,outF;
- size_t distinctOutputFactors;
- vPPtr root;
- size_t totalTuples,distinctTuples;
-
-
- GCData(const std::vector<PhraseDictionaryTree const*>& a,
- const std::vector<std::vector<float> >& b)
- : pdicts(a),weights(b),totalTuples(0),distinctTuples(0) {
-
- assert(pdicts.size()==weights.size());
- std::set<FactorType> distinctOutFset;
- inF.resize(pdicts.size());
- outF.resize(pdicts.size());
- root.resize(pdicts.size());
- for(size_t i=0;i<pdicts.size();++i)
- {
- root[i]=pdicts[i]->GetRoot();
- inF[i]=pdicts[i]->GetInputFactorType();
- outF[i]=pdicts[i]->GetOutputFactorType();
- distinctOutFset.insert(pdicts[i]->GetOutputFactorType());
- }
- distinctOutputFactors=distinctOutFset.size();
- }
-
- FactorType OutFT(size_t i) const {return outF[i];}
- FactorType InFT(size_t i) const {return inF[i];}
- size_t DistinctOutFactors() const {return distinctOutputFactors;}
-
- const vPPtr& GetRoot() const {return root;}
+ const std::vector<PhraseDictionaryTree const*>& pdicts;
+ const std::vector<std::vector<float> >& weights;
+ std::vector<FactorType> inF,outF;
+ size_t distinctOutputFactors;
+ vPPtr root;
+ size_t totalTuples,distinctTuples;
+
+
+ GCData(const std::vector<PhraseDictionaryTree const*>& a,
+ const std::vector<std::vector<float> >& b)
+ : pdicts(a),weights(b),totalTuples(0),distinctTuples(0) {
+
+ assert(pdicts.size()==weights.size());
+ std::set<FactorType> distinctOutFset;
+ inF.resize(pdicts.size());
+ outF.resize(pdicts.size());
+ root.resize(pdicts.size());
+ for(size_t i=0; i<pdicts.size(); ++i) {
+ root[i]=pdicts[i]->GetRoot();
+ inF[i]=pdicts[i]->GetInputFactorType();
+ outF[i]=pdicts[i]->GetOutputFactorType();
+ distinctOutFset.insert(pdicts[i]->GetOutputFactorType());
+ }
+ distinctOutputFactors=distinctOutFset.size();
+ }
+
+ FactorType OutFT(size_t i) const {
+ return outF[i];
+ }
+ FactorType InFT(size_t i) const {
+ return inF[i];
+ }
+ size_t DistinctOutFactors() const {
+ return distinctOutputFactors;
+ }
+
+ const vPPtr& GetRoot() const {
+ return root;
+ }
};
@@ -131,167 +148,167 @@ typedef std::vector<OutputFactor2TgtCandList*> Len2Cands;
void GeneratePerFactorTgtList(size_t factorType,PPtr pptr,GCData& data,Len2Cands& len2cands)
{
- std::vector<FactorTgtCand> cands;
- data.pdicts[factorType]->GetTargetCandidates(pptr,cands);
-
- for(std::vector<FactorTgtCand>::const_iterator cand=cands.begin();cand!=cands.end();++cand) {
- assert(data.weights[factorType].size()==cand->second.size());
- float costs=std::inner_product(data.weights[factorType].begin(),
- data.weights[factorType].end(),
- cand->second.begin(),
- 0.0);
-
- size_t len=cand->first.size();
- if(len>=len2cands.size()) len2cands.resize(len+1,0);
- if(!len2cands[len]) len2cands[len]=new OutputFactor2TgtCandList(data.DistinctOutFactors());
- OutputFactor2TgtCandList &outf2tcandlist=*len2cands[len];
-
- outf2tcandlist[data.OutFT(factorType)].push_back(std::make_pair(costs,cand->first));
- }
+ std::vector<FactorTgtCand> cands;
+ data.pdicts[factorType]->GetTargetCandidates(pptr,cands);
+
+ for(std::vector<FactorTgtCand>::const_iterator cand=cands.begin(); cand!=cands.end(); ++cand) {
+ assert(data.weights[factorType].size()==cand->second.size());
+ float costs=std::inner_product(data.weights[factorType].begin(),
+ data.weights[factorType].end(),
+ cand->second.begin(),
+ 0.0);
+
+ size_t len=cand->first.size();
+ if(len>=len2cands.size()) len2cands.resize(len+1,0);
+ if(!len2cands[len]) len2cands[len]=new OutputFactor2TgtCandList(data.DistinctOutFactors());
+ OutputFactor2TgtCandList &outf2tcandlist=*len2cands[len];
+
+ outf2tcandlist[data.OutFT(factorType)].push_back(std::make_pair(costs,cand->first));
+ }
}
-void GenerateTupleTgtCands(OutputFactor2TgtCandList& tCand,E2Costs& e2costs,GCData& data)
+void GenerateTupleTgtCands(OutputFactor2TgtCandList& tCand,E2Costs& e2costs,GCData& data)
{
- // check if candidates are non-empty
- bool gotCands=1;
- for(size_t j=0;gotCands && j<tCand.size();++j)
- gotCands &= !tCand[j].empty();
-
- if(gotCands) {
- // enumerate tuples
- assert(data.DistinctOutFactors()==tCand.size());
- std::vector<unsigned> radix(data.DistinctOutFactors());
- for(size_t i=0;i<tCand.size();++i) radix[i]=tCand[i].size();
-
- unsigned *tuples=0;
- size_t numTuples=GenerateTuples(radix.size(),&radix[0],tuples);
-
- data.totalTuples+=numTuples;
-
- for(size_t i=0;i<numTuples;++i)
- {
- mPhrase e(radix.size());float costs=0.0;
- for(size_t j=0;j<radix.size();++j)
- {
- assert(tuples[radix.size()*i+j]<tCand[j].size());
- std::pair<float,vFactor> const& mycand=tCand[j][tuples[radix.size()*i+j]];
- e[j]=mycand.second;
- costs+=mycand.first;
- }
+ // check if candidates are non-empty
+ bool gotCands=1;
+ for(size_t j=0; gotCands && j<tCand.size(); ++j)
+ gotCands &= !tCand[j].empty();
+
+ if(gotCands) {
+ // enumerate tuples
+ assert(data.DistinctOutFactors()==tCand.size());
+ std::vector<unsigned> radix(data.DistinctOutFactors());
+ for(size_t i=0; i<tCand.size(); ++i) radix[i]=tCand[i].size();
+
+ unsigned *tuples=0;
+ size_t numTuples=GenerateTuples(radix.size(),&radix[0],tuples);
+
+ data.totalTuples+=numTuples;
+
+ for(size_t i=0; i<numTuples; ++i) {
+ mPhrase e(radix.size());
+ float costs=0.0;
+ for(size_t j=0; j<radix.size(); ++j) {
+ assert(tuples[radix.size()*i+j]<tCand[j].size());
+ std::pair<float,vFactor> const& mycand=tCand[j][tuples[radix.size()*i+j]];
+ e[j]=mycand.second;
+ costs+=mycand.first;
+ }
#ifdef DEBUG
- bool mismatch=0;
- for(size_t j=1;!mismatch && j<e.size();++j)
- if(e[j].size()!=e[j-1].size()) mismatch=1;
- assert(mismatch==0);
+ bool mismatch=0;
+ for(size_t j=1; !mismatch && j<e.size(); ++j)
+ if(e[j].size()!=e[j-1].size()) mismatch=1;
+ assert(mismatch==0);
#endif
- std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(e,costs));
- if(p.second) ++data.distinctTuples;
- else {
- // entry known, take min of costs, alternative: sum probs
- if(costs<p.first->second) p.first->second=costs;
- }
- }
- delete [] tuples;
- }
+ std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(e,costs));
+ if(p.second) ++data.distinctTuples;
+ else {
+ // entry known, take min of costs, alternative: sum probs
+ if(costs<p.first->second) p.first->second=costs;
+ }
+ }
+ delete [] tuples;
+ }
}
-void GenerateCandidates_(E2Costs& e2costs,const vPPtr& nextP,GCData& data)
+void GenerateCandidates_(E2Costs& e2costs,const vPPtr& nextP,GCData& data)
{
- Len2Cands len2cands;
- // generate candidates for each element of nextP:
- for(size_t factorType=0;factorType<nextP.size();++factorType)
- if(nextP[factorType])
- GeneratePerFactorTgtList(factorType,nextP[factorType],data,len2cands);
-
- // for each length: enumerate tuples, compute score, and insert in e2costs
- for(size_t len=0;len<len2cands.size();++len) if(len2cands[len])
- GenerateTupleTgtCands(*len2cands[len],e2costs,data);
+ Len2Cands len2cands;
+ // generate candidates for each element of nextP:
+ for(size_t factorType=0; factorType<nextP.size(); ++factorType)
+ if(nextP[factorType])
+ GeneratePerFactorTgtList(factorType,nextP[factorType],data,len2cands);
+
+ // for each length: enumerate tuples, compute score, and insert in e2costs
+ for(size_t len=0; len<len2cands.size(); ++len) if(len2cands[len])
+ GenerateTupleTgtCands(*len2cands[len],e2costs,data);
}
void GenerateCandidates(const ConfusionNet& src,
- const std::vector<PhraseDictionaryTree const*>& pdicts,
- const std::vector<std::vector<float> >& weights,
- int verbose) {
- GCData data(pdicts,weights);
-
- std::vector<State> stack;
- for(size_t i=0;i<src.GetSize();++i) stack.push_back(State(i,i,data.GetRoot()));
-
- std::map<WordsRange,E2Costs> cov2E;
-
- // std::cerr<<"start while loop. initial stack size: "<<stack.size()<<"\n";
-
- while(!stack.empty())
- {
- State curr(stack.back());
- stack.pop_back();
-
- //std::cerr<<"processing state "<<curr<<" stack size: "<<stack.size()<<"\n";
-
- assert(curr.end()<src.GetSize());
- const ConfusionNet::Column &currCol=src[curr.end()];
- for(size_t colidx=0;colidx<currCol.size();++colidx)
- {
- const Word& w=currCol[colidx].first;
- vPPtr nextP(curr.ptrs);
- for(size_t j=0;j<nextP.size();++j)
- nextP[j]=pdicts[j]->Extend(nextP[j],
- w.GetFactor(data.InFT(j))->GetString());
-
- bool valid=1;
- for(size_t j=0;j<nextP.size();++j) if(!nextP[j]) {valid=0;break;}
-
- if(valid)
- {
- if(curr.end()+1<src.GetSize())
- stack.push_back(State(curr.begin(),curr.end()+1,nextP,
- curr.GetScore()+currCol[colidx].second));
-
- E2Costs &e2costs=cov2E[WordsRange(curr.begin(),curr.end()+1)];
- GenerateCandidates_(e2costs,nextP,data);
- }
- }
-
- // check if there are translations of one-word phrases ...
- //if(curr.begin()==curr.end() && tCand.empty()) {}
-
- } // end while(!stack.empty())
-
- if(verbose) {
- // print statistics for debugging purposes
- std::cerr<<"tuple stats: total: "<<data.totalTuples
- <<" distinct: "<<data.distinctTuples<<" ("
- <<(data.distinctTuples/(0.01*data.totalTuples))
- <<"%)\n";
- std::cerr<<"per coverage set:\n";
- for(std::map<WordsRange,E2Costs>::const_iterator i=cov2E.begin();
- i!=cov2E.end();++i) {
- std::cerr<<i->first<<" -- distinct cands: "
- <<i->second.size()<<"\n";
- }
- std::cerr<<"\n\n";
- }
-
- if(verbose>10) {
- std::cerr<<"full list:\n";
- for(std::map<WordsRange,E2Costs>::const_iterator i=cov2E.begin();
- i!=cov2E.end();++i) {
- std::cerr<<i->first<<" -- distinct cands: "
- <<i->second.size()<<"\n";
- for(E2Costs::const_iterator j=i->second.begin();j!=i->second.end();++j)
- std::cerr<<j->first<<" -- "<<j->second<<"\n";
- }
- }
+ const std::vector<PhraseDictionaryTree const*>& pdicts,
+ const std::vector<std::vector<float> >& weights,
+ int verbose)
+{
+ GCData data(pdicts,weights);
+
+ std::vector<State> stack;
+ for(size_t i=0; i<src.GetSize(); ++i) stack.push_back(State(i,i,data.GetRoot()));
+
+ std::map<WordsRange,E2Costs> cov2E;
+
+ // std::cerr<<"start while loop. initial stack size: "<<stack.size()<<"\n";
+
+ while(!stack.empty()) {
+ State curr(stack.back());
+ stack.pop_back();
+
+ //std::cerr<<"processing state "<<curr<<" stack size: "<<stack.size()<<"\n";
+
+ assert(curr.end()<src.GetSize());
+ const ConfusionNet::Column &currCol=src[curr.end()];
+ for(size_t colidx=0; colidx<currCol.size(); ++colidx) {
+ const Word& w=currCol[colidx].first;
+ vPPtr nextP(curr.ptrs);
+ for(size_t j=0; j<nextP.size(); ++j)
+ nextP[j]=pdicts[j]->Extend(nextP[j],
+ w.GetFactor(data.InFT(j))->GetString());
+
+ bool valid=1;
+ for(size_t j=0; j<nextP.size(); ++j) if(!nextP[j]) {
+ valid=0;
+ break;
+ }
+
+ if(valid) {
+ if(curr.end()+1<src.GetSize())
+ stack.push_back(State(curr.begin(),curr.end()+1,nextP,
+ curr.GetScore()+currCol[colidx].second));
+
+ E2Costs &e2costs=cov2E[WordsRange(curr.begin(),curr.end()+1)];
+ GenerateCandidates_(e2costs,nextP,data);
+ }
+ }
+
+ // check if there are translations of one-word phrases ...
+ //if(curr.begin()==curr.end() && tCand.empty()) {}
+
+ } // end while(!stack.empty())
+
+ if(verbose) {
+ // print statistics for debugging purposes
+ std::cerr<<"tuple stats: total: "<<data.totalTuples
+ <<" distinct: "<<data.distinctTuples<<" ("
+ <<(data.distinctTuples/(0.01*data.totalTuples))
+ <<"%)\n";
+ std::cerr<<"per coverage set:\n";
+ for(std::map<WordsRange,E2Costs>::const_iterator i=cov2E.begin();
+ i!=cov2E.end(); ++i) {
+ std::cerr<<i->first<<" -- distinct cands: "
+ <<i->second.size()<<"\n";
+ }
+ std::cerr<<"\n\n";
+ }
+
+ if(verbose>10) {
+ std::cerr<<"full list:\n";
+ for(std::map<WordsRange,E2Costs>::const_iterator i=cov2E.begin();
+ i!=cov2E.end(); ++i) {
+ std::cerr<<i->first<<" -- distinct cands: "
+ <<i->second.size()<<"\n";
+ for(E2Costs::const_iterator j=i->second.begin(); j!=i->second.end(); ++j)
+ std::cerr<<j->first<<" -- "<<j->second<<"\n";
+ }
+ }
}
#else
void GenerateCandidates(const ConfusionNet&,
- const std::vector<PhraseDictionaryTree const*>&,
- const std::vector<std::vector<float> >&,
- int)
+ const std::vector<PhraseDictionaryTree const*>&,
+ const std::vector<std::vector<float> >&,
+ int)
{
- std::cerr<<"ERROR: GenerateCandidates is currently broken\n";
+ std::cerr<<"ERROR: GenerateCandidates is currently broken\n";
}
#endif
diff --git a/misc/GenerateTuples.h b/misc/GenerateTuples.h
index 362c1534f..728f28823 100644
--- a/misc/GenerateTuples.h
+++ b/misc/GenerateTuples.h
@@ -6,7 +6,7 @@
class ConfusionNet;
void GenerateCandidates(const ConfusionNet& src,
- const std::vector<PhraseDictionaryTree const*>& pdicts,
- const std::vector<std::vector<float> >& weights,
- int verbose=0) ;
+ const std::vector<PhraseDictionaryTree const*>& pdicts,
+ const std::vector<std::vector<float> >& weights,
+ int verbose=0) ;
#endif
diff --git a/misc/processLexicalTable.cpp b/misc/processLexicalTable.cpp
index e62211256..e396e95e5 100644
--- a/misc/processLexicalTable.cpp
+++ b/misc/processLexicalTable.cpp
@@ -9,43 +9,45 @@ using namespace Moses;
Timer timer;
-void printHelp(){
+void printHelp()
+{
std::cerr << "Usage:\n"
- "options: \n"
- "\t-in string -- input table file name\n"
- "\t-out string -- prefix of binary table files\n"
- "If -in is not specified reads from stdin\n"
- "\n";
+ "options: \n"
+ "\t-in string -- input table file name\n"
+ "\t-out string -- prefix of binary table files\n"
+ "If -in is not specified reads from stdin\n"
+ "\n";
}
-int main(int argc, char** argv){
+int main(int argc, char** argv)
+{
std::cerr << "processLexicalTable v0.1 by Konrad Rawlik\n";
std::string inFilePath;
std::string outFilePath("out");
- if(1 >= argc){
- printHelp();
- return 1;
+ if(1 >= argc) {
+ printHelp();
+ return 1;
}
- for(int i = 1; i < argc; ++i){
+ for(int i = 1; i < argc; ++i) {
std::string arg(argv[i]);
- if("-in" == arg && i+1 < argc){
+ if("-in" == arg && i+1 < argc) {
++i;
inFilePath = argv[i];
- } else if("-out" == arg && i+1 < argc){
+ } else if("-out" == arg && i+1 < argc) {
++i;
outFilePath = argv[i];
} else {
//somethings wrong... print help
- printHelp();
+ printHelp();
return 1;
}
}
-
- if(inFilePath.empty()){
- std::cerr << "processing stdin to " << outFilePath << ".*\n";
- return LexicalReorderingTableTree::Create(std::cin, outFilePath);
+
+ if(inFilePath.empty()) {
+ std::cerr << "processing stdin to " << outFilePath << ".*\n";
+ return LexicalReorderingTableTree::Create(std::cin, outFilePath);
} else {
- std::cerr << "processing " << inFilePath<< " to " << outFilePath << ".*\n";
+ std::cerr << "processing " << inFilePath<< " to " << outFilePath << ".*\n";
InputFileStream file(inFilePath);
return LexicalReorderingTableTree::Create(file, outFilePath);
}
diff --git a/misc/processPhraseTable.cpp b/misc/processPhraseTable.cpp
index 89d462b0e..7eca79349 100644
--- a/misc/processPhraseTable.cpp
+++ b/misc/processPhraseTable.cpp
@@ -22,179 +22,176 @@ Timer timer;
template<typename T>
std::ostream& operator<<(std::ostream& out,const std::vector<T>& x)
{
- out<<x.size()<<" ";
- typename std::vector<T>::const_iterator iend=x.end();
- for(typename std::vector<T>::const_iterator i=x.begin();i!=iend;++i)
- out<<*i<<' ';
- return out;
+ out<<x.size()<<" ";
+ typename std::vector<T>::const_iterator iend=x.end();
+ for(typename std::vector<T>::const_iterator i=x.begin(); i!=iend; ++i)
+ out<<*i<<' ';
+ return out;
}
-inline bool existsFile(const char* filename) {
+inline bool existsFile(const char* filename)
+{
struct stat mystat;
return (stat(filename,&mystat)==0);
}
-inline bool existsFile(const std::string& filename) {
+inline bool existsFile(const std::string& filename)
+{
return existsFile(filename.c_str());
}
-int main(int argc,char **argv) {
- std::string fto;size_t noScoreComponent=5;int cn=0;
- bool aligninfo=false;
- std::vector<std::pair<std::string,std::pair<char*,char*> > > ftts;
- int verb=0;
- for(int i=1;i<argc;++i) {
- std::string s(argv[i]);
- if(s=="-ttable") {
- std::pair<char*,char*> p;
- p.first=argv[++i];
- p.second=argv[++i];
- ftts.push_back(std::make_pair(std::string(argv[++i]),p));
- }
- else if(s=="-nscores") noScoreComponent=atoi(argv[++i]);
- else if(s=="-out") fto=std::string(argv[++i]);
- else if(s=="-cn") cn=1;
- else if(s=="-irst") cn=2;
- else if(s=="-alignment-info") aligninfo=true;
- else if(s=="-v") verb=atoi(argv[++i]);
- else if(s=="-h")
- {
- std::cerr<<"usage "<<argv[0]<<" :\n\n"
- "options:\n"
- "\t-ttable int int string -- translation table file, use '-' for stdin\n"
- "\t-out string -- output file name prefix for binary ttable\n"
- "\t-nscores int -- number of scores in ttable\n"
- "\t-alignment-info -- include alignment info in the binary ttable (suffix \".wa\")\n"
- "\nfunctions:\n"
- "\t - convert ascii ttable in binary format\n"
- "\t - if ttable is not read from stdin:\n"
- "\t treat each line as source phrase an print tgt candidates\n"
- "\n";
- return 1;
- }
- else
- {
- std::cerr<<"ERROR: unknown option '"<<s<<"'\n";
- return 1;
- }
- }
-
-
- if(ftts.size()) {
-
- if(ftts.size()==1){
- std::cerr<<"processing ptree for ";
- PhraseDictionaryTree pdt(noScoreComponent);
-
- pdt.PrintWordAlignment(aligninfo);
-
- if (ftts[0].first=="-") {
- std::cerr<< "stdin\n";
- pdt.Create(std::cin,fto);
- }
- else{
- std::cerr<< ftts[0].first << "\n";
- InputFileStream in(ftts[0].first);
- pdt.Create(in,fto);
- }
- }
- else
- {
+int main(int argc,char **argv)
+{
+ std::string fto;
+ size_t noScoreComponent=5;
+ int cn=0;
+ bool aligninfo=false;
+ std::vector<std::pair<std::string,std::pair<char*,char*> > > ftts;
+ int verb=0;
+ for(int i=1; i<argc; ++i) {
+ std::string s(argv[i]);
+ if(s=="-ttable") {
+ std::pair<char*,char*> p;
+ p.first=argv[++i];
+ p.second=argv[++i];
+ ftts.push_back(std::make_pair(std::string(argv[++i]),p));
+ } else if(s=="-nscores") noScoreComponent=atoi(argv[++i]);
+ else if(s=="-out") fto=std::string(argv[++i]);
+ else if(s=="-cn") cn=1;
+ else if(s=="-irst") cn=2;
+ else if(s=="-alignment-info") aligninfo=true;
+ else if(s=="-v") verb=atoi(argv[++i]);
+ else if(s=="-h") {
+ std::cerr<<"usage "<<argv[0]<<" :\n\n"
+ "options:\n"
+ "\t-ttable int int string -- translation table file, use '-' for stdin\n"
+ "\t-out string -- output file name prefix for binary ttable\n"
+ "\t-nscores int -- number of scores in ttable\n"
+ "\t-alignment-info -- include alignment info in the binary ttable (suffix \".wa\")\n"
+ "\nfunctions:\n"
+ "\t - convert ascii ttable in binary format\n"
+ "\t - if ttable is not read from stdin:\n"
+ "\t treat each line as source phrase an print tgt candidates\n"
+ "\n";
+ return 1;
+ } else {
+ std::cerr<<"ERROR: unknown option '"<<s<<"'\n";
+ return 1;
+ }
+ }
+
+
+ if(ftts.size()) {
+
+ if(ftts.size()==1) {
+ std::cerr<<"processing ptree for ";
+ PhraseDictionaryTree pdt(noScoreComponent);
+
+ pdt.PrintWordAlignment(aligninfo);
+
+ if (ftts[0].first=="-") {
+ std::cerr<< "stdin\n";
+ pdt.Create(std::cin,fto);
+ } else {
+ std::cerr<< ftts[0].first << "\n";
+ InputFileStream in(ftts[0].first);
+ pdt.Create(in,fto);
+ }
+ } else {
#if 0
- std::vector<PhraseDictionaryTree const*> pdicts;
- std::vector<FactorType> factorOrder;
- for(size_t i=0;i<ftts.size();++i) {
-
- PhraseDictionaryTree *pdtptr=new PhraseDictionaryTree(noScoreComponent,
- &factorCollection,
- getFactorType(atoi(ftts[i].second.first)),
- getFactorType(atoi(ftts[i].second.second))
- );
- factorOrder.push_back(pdtptr->GetInputFactorType());
- PhraseDictionaryTree &pdt=*pdtptr;
- pdicts.push_back(pdtptr);
-
- std::string facStr="."+std::string(ftts[i].second.first)+"-"+std::string(ftts[i].second.second);
- std::string prefix=ftts[i].first+facStr;
- if(!existsFile(prefix+".binphr.idx")) {
- std::cerr<<"bin ttable does not exist -> create it\n";
- InputFileStream in(prefix);
- pdt.Create(in,prefix);
- }
- std::cerr<<"reading bin ttable\n";
- pdt.Read(prefix);
-
- }
-
- std::cerr<<"processing stdin\n";
- if(!cn) {
- std::string line;
- while(getline(std::cin,line)) {
- std::istringstream is(line);
+ std::vector<PhraseDictionaryTree const*> pdicts;
+ std::vector<FactorType> factorOrder;
+ for(size_t i=0; i<ftts.size(); ++i) {
+
+ PhraseDictionaryTree *pdtptr=new PhraseDictionaryTree(noScoreComponent,
+ &factorCollection,
+ getFactorType(atoi(ftts[i].second.first)),
+ getFactorType(atoi(ftts[i].second.second))
+ );
+ factorOrder.push_back(pdtptr->GetInputFactorType());
+ PhraseDictionaryTree &pdt=*pdtptr;
+ pdicts.push_back(pdtptr);
+
+ std::string facStr="."+std::string(ftts[i].second.first)+"-"+std::string(ftts[i].second.second);
+ std::string prefix=ftts[i].first+facStr;
+ if(!existsFile(prefix+".binphr.idx")) {
+ std::cerr<<"bin ttable does not exist -> create it\n";
+ InputFileStream in(prefix);
+ pdt.Create(in,prefix);
+ }
+ std::cerr<<"reading bin ttable\n";
+ pdt.Read(prefix);
+
+ }
+
+ std::cerr<<"processing stdin\n";
+ if(!cn) {
+ std::string line;
+ while(getline(std::cin,line)) {
+ std::istringstream is(line);
#if 0
- std::vector<std::string> f;
- std::copy(std::istream_iterator<std::string>(is),
- std::istream_iterator<std::string>(),
- std::back_inserter(f));
+ std::vector<std::string> f;
+ std::copy(std::istream_iterator<std::string>(is),
+ std::istream_iterator<std::string>(),
+ std::back_inserter(f));
#endif
- std::cerr<<"got source phrase '"<<line<<"'\n";
-
- Phrase F(Input);
- F.CreateFromString(factorOrder,line,factorCollection);
-
- for(size_t k=0;k<pdicts.size();++k) {
- PhraseDictionaryTree const& pdt=*pdicts[k];
-
- std::vector<std::string> f(F.GetSize());
- for(size_t i=0;i<F.GetSize();++i)
- f[i]=F.GetFactor(i,pdt.GetInputFactorType())->ToString();
-
- std::stringstream iostA,iostB;
- std::cerr<<"full phrase processing "<<f<<"\n";
- pdt.PrintTargetCandidates(f,iostA);
-
- std::cerr<<"processing with prefix ptr\n";
- PhraseDictionaryTree::PrefixPtr p(pdt.GetRoot());
-
- for(size_t i=0;i<f.size() && p;++i) {
- std::cerr<<"pre "<<i<<" "<<(p?"1":"0")<<"\n";
- p=pdt.Extend(p,f[i]);
- std::cerr<<"post "<<i<<" "<<(p?"1":"0")<<"\n";
- }
- if(p) {
- std::cerr<<"retrieving candidates from prefix ptr\n";
- pdt.PrintTargetCandidates(p,iostB);}
- else {
- std::cerr<<"final ptr is invalid\n";
- iostB<<"there are 0 target candidates\n";
- }
- if(iostA.str() != iostB.str())
- std::cerr<<"ERROR: translation candidates mismatch '"<<iostA.str()<<"' and for prefix pointer: '"<<iostB.str()<<"'\n";
-
- std::cerr<<"translation candidates:\n"<<iostA.str()<<"\n";
- pdt.FreeMemory();
-
- }
-
- }
- }
- else {
- // process confusion net input
- ConfusionNet net(&factorCollection);
- std::vector<std::vector<float> > weights;
- for(size_t i=0;i<pdicts.size();++i)
- weights.push_back(std::vector<float>(noScoreComponent,1/(1.0*noScoreComponent)));
-
- while(net.ReadF(std::cin,factorOrder,cn-1)) {
- net.Print(std::cerr);
- GenerateCandidates(net,pdicts,weights,verb);
- }
-
- }
+ std::cerr<<"got source phrase '"<<line<<"'\n";
+
+ Phrase F(Input);
+ F.CreateFromString(factorOrder,line,factorCollection);
+
+ for(size_t k=0; k<pdicts.size(); ++k) {
+ PhraseDictionaryTree const& pdt=*pdicts[k];
+
+ std::vector<std::string> f(F.GetSize());
+ for(size_t i=0; i<F.GetSize(); ++i)
+ f[i]=F.GetFactor(i,pdt.GetInputFactorType())->ToString();
+
+ std::stringstream iostA,iostB;
+ std::cerr<<"full phrase processing "<<f<<"\n";
+ pdt.PrintTargetCandidates(f,iostA);
+
+ std::cerr<<"processing with prefix ptr\n";
+ PhraseDictionaryTree::PrefixPtr p(pdt.GetRoot());
+
+ for(size_t i=0; i<f.size() && p; ++i) {
+ std::cerr<<"pre "<<i<<" "<<(p?"1":"0")<<"\n";
+ p=pdt.Extend(p,f[i]);
+ std::cerr<<"post "<<i<<" "<<(p?"1":"0")<<"\n";
+ }
+ if(p) {
+ std::cerr<<"retrieving candidates from prefix ptr\n";
+ pdt.PrintTargetCandidates(p,iostB);
+ } else {
+ std::cerr<<"final ptr is invalid\n";
+ iostB<<"there are 0 target candidates\n";
+ }
+ if(iostA.str() != iostB.str())
+ std::cerr<<"ERROR: translation candidates mismatch '"<<iostA.str()<<"' and for prefix pointer: '"<<iostB.str()<<"'\n";
+
+ std::cerr<<"translation candidates:\n"<<iostA.str()<<"\n";
+ pdt.FreeMemory();
+
+ }
+
+ }
+ } else {
+ // process confusion net input
+ ConfusionNet net(&factorCollection);
+ std::vector<std::vector<float> > weights;
+ for(size_t i=0; i<pdicts.size(); ++i)
+ weights.push_back(std::vector<float>(noScoreComponent,1/(1.0*noScoreComponent)));
+
+ while(net.ReadF(std::cin,factorOrder,cn-1)) {
+ net.Print(std::cerr);
+ GenerateCandidates(net,pdicts,weights,verb);
+ }
+
+ }
#else
- std::cerr<<"ERROR: these functions are currently broken...\n";
- exit(1);
+ std::cerr<<"ERROR: these functions are currently broken...\n";
+ exit(1);
#endif
- }
- }
-
+ }
+ }
+
}
diff --git a/misc/queryLexicalTable.cpp b/misc/queryLexicalTable.cpp
index 97e6d29bf..388336947 100644
--- a/misc/queryLexicalTable.cpp
+++ b/misc/queryLexicalTable.cpp
@@ -11,25 +11,28 @@ using namespace Moses;
Timer timer;
-void printHelp(){
+void printHelp()
+{
std::cerr << "Usage:\n"
- "options: \n"
- "\t-table file -- input table file name\n"
- "\t-f string -- f query phrase\n"
- "\t-e string -- e query phrase\n"
- "\t-c string -- context query phrase\n"
- "\n";
+ "options: \n"
+ "\t-table file -- input table file name\n"
+ "\t-f string -- f query phrase\n"
+ "\t-e string -- e query phrase\n"
+ "\t-c string -- context query phrase\n"
+ "\n";
}
-std::ostream& operator<<(std::ostream& o, Scores s){
- for(int i = 0; i < s.size(); ++i){
- o << s[i] << " ";
+std::ostream& operator<<(std::ostream& o, Scores s)
+{
+ for(int i = 0; i < s.size(); ++i) {
+ o << s[i] << " ";
}
//o << std::endln;
return o;
};
-int main(int argc, char** argv){
+int main(int argc, char** argv)
+{
std::cerr << "queryLexicalTable v0.2 by Konrad Rawlik\n";
std::string inFilePath;
std::string outFilePath("out");
@@ -37,38 +40,38 @@ int main(int argc, char** argv){
std::string query_e, query_f, query_c;
bool use_context = false;
bool use_e = false;
- if(1 >= argc){
- printHelp();
- return 1;
+ if(1 >= argc) {
+ printHelp();
+ return 1;
}
- for(int i = 1; i < argc; ++i){
+ for(int i = 1; i < argc; ++i) {
std::string arg(argv[i]);
- if("-table" == arg && i+1 < argc){
- //std::cerr << "Table is " << argv[i];
+ if("-table" == arg && i+1 < argc) {
+ //std::cerr << "Table is " << argv[i];
++i;
inFilePath = argv[i];
- } else if("-f" == arg && i+1 < argc){
- ++i;
- //std::cerr << "F is " << argv[i];
- query_f = argv[i];
- } else if("-e" == arg && i+1 < argc){
- ++i;
- query_e = argv[i];
- use_e = true;
- } else if("-c" == arg){
- if(i+1 < argc && '-' != argv[i+1][0]){
- ++i;
- query_c = argv[i];
- use_context = true;
- } else {
- use_context = false;
- }
- } else if("-cache" == arg){
- ++i;
- cache = true;
+ } else if("-f" == arg && i+1 < argc) {
+ ++i;
+ //std::cerr << "F is " << argv[i];
+ query_f = argv[i];
+ } else if("-e" == arg && i+1 < argc) {
+ ++i;
+ query_e = argv[i];
+ use_e = true;
+ } else if("-c" == arg) {
+ if(i+1 < argc && '-' != argv[i+1][0]) {
+ ++i;
+ query_c = argv[i];
+ use_context = true;
+ } else {
+ use_context = false;
+ }
+ } else if("-cache" == arg) {
+ ++i;
+ cache = true;
} else {
//somethings wrong... print help
- printHelp();
+ printHelp();
return 1;
}
}
@@ -77,28 +80,28 @@ int main(int argc, char** argv){
FactorList e_mask;
FactorList c_mask;
f_mask.push_back(0);
- if(use_e){
- e_mask.push_back(0);
+ if(use_e) {
+ e_mask.push_back(0);
}
- if(use_context){
- c_mask.push_back(0);
+ if(use_context) {
+ c_mask.push_back(0);
}
Phrase e(Output),f(Input),c(Output);
e.CreateFromString(e_mask, query_e, "|");
f.CreateFromString(f_mask, query_f, "|");
c.CreateFromString(c_mask, query_c,"|");
LexicalReorderingTable* table;
- if(FileExists(inFilePath+".binlexr.idx")){
- std::cerr << "Loading binary table...\n";
- table = new LexicalReorderingTableTree(inFilePath, f_mask, e_mask, c_mask);
+ if(FileExists(inFilePath+".binlexr.idx")) {
+ std::cerr << "Loading binary table...\n";
+ table = new LexicalReorderingTableTree(inFilePath, f_mask, e_mask, c_mask);
} else {
- std::cerr << "Loading ordinary table...\n";
- table = new LexicalReorderingTableMemory(inFilePath, f_mask, e_mask, c_mask);
+ std::cerr << "Loading ordinary table...\n";
+ table = new LexicalReorderingTableMemory(inFilePath, f_mask, e_mask, c_mask);
}
//table->DbgDump(&std::cerr);
- if(cache){
- std::cerr << "Caching for f\n";
- table->InitializeForInputPhrase(f);
+ if(cache) {
+ std::cerr << "Caching for f\n";
+ table->InitializeForInputPhrase(f);
}
std::cerr << "Querying: f='" << f.GetStringRep(f_mask) << "' e='" << e.GetStringRep(e_mask) << "' c='" << c.GetStringRep(c_mask) << "'\n";
std::cerr << table->GetScore(f,e,c) << "\n";
diff --git a/misc/queryPhraseTable.cpp b/misc/queryPhraseTable.cpp
index ef95a1fba..d056e83a5 100644
--- a/misc/queryPhraseTable.cpp
+++ b/misc/queryPhraseTable.cpp
@@ -13,70 +13,72 @@ void usage();
typedef unsigned int uint;
-int main(int argc, char **argv) {
- int nscores = 5;
- std::string ttable = "";
- bool useAlignments = false;
-
- for(int i = 1; i < argc; i++) {
- if(!strcmp(argv[i], "-n")) {
- if(i + 1 == argc)
- usage();
- nscores = atoi(argv[++i]);
- } else if(!strcmp(argv[i], "-t")) {
- if(i + 1 == argc)
- usage();
- ttable = argv[++i];
- } else if(!strcmp(argv[i], "-a"))
- useAlignments = true;
- else
- usage();
- }
-
- if(ttable == "")
- usage();
-
- Moses::PhraseDictionaryTree ptree(nscores);
- ptree.UseWordAlignment(useAlignments);
- ptree.Read(ttable);
-
- std::string line;
- while(getline(std::cin, line)) {
- std::vector<std::string> srcphrase;
- srcphrase = Moses::Tokenize<std::string>(line);
-
- std::vector<Moses::StringTgtCand> tgtcands;
- std::vector<std::string> wordAlignment;
-
- if(useAlignments)
- ptree.GetTargetCandidates(srcphrase, tgtcands, wordAlignment);
- else
- ptree.GetTargetCandidates(srcphrase, tgtcands);
-
- for(uint i = 0; i < tgtcands.size(); i++) {
- std::cout << line << " |||";
- for(uint j = 0; j < tgtcands[i].first.size(); j++)
- std::cout << ' ' << *tgtcands[i].first[j];
- std::cout << " |||";
-
- if(useAlignments) {
- std::cout << " " << wordAlignment[i] << " |||";
- }
-
- for(uint j = 0; j < tgtcands[i].second.size(); j++)
- std::cout << ' ' << tgtcands[i].second[j];
- std::cout << '\n';
- }
-
- std::cout << '\n';
- std::cout.flush();
- }
+int main(int argc, char **argv)
+{
+ int nscores = 5;
+ std::string ttable = "";
+ bool useAlignments = false;
+
+ for(int i = 1; i < argc; i++) {
+ if(!strcmp(argv[i], "-n")) {
+ if(i + 1 == argc)
+ usage();
+ nscores = atoi(argv[++i]);
+ } else if(!strcmp(argv[i], "-t")) {
+ if(i + 1 == argc)
+ usage();
+ ttable = argv[++i];
+ } else if(!strcmp(argv[i], "-a"))
+ useAlignments = true;
+ else
+ usage();
+ }
+
+ if(ttable == "")
+ usage();
+
+ Moses::PhraseDictionaryTree ptree(nscores);
+ ptree.UseWordAlignment(useAlignments);
+ ptree.Read(ttable);
+
+ std::string line;
+ while(getline(std::cin, line)) {
+ std::vector<std::string> srcphrase;
+ srcphrase = Moses::Tokenize<std::string>(line);
+
+ std::vector<Moses::StringTgtCand> tgtcands;
+ std::vector<std::string> wordAlignment;
+
+ if(useAlignments)
+ ptree.GetTargetCandidates(srcphrase, tgtcands, wordAlignment);
+ else
+ ptree.GetTargetCandidates(srcphrase, tgtcands);
+
+ for(uint i = 0; i < tgtcands.size(); i++) {
+ std::cout << line << " |||";
+ for(uint j = 0; j < tgtcands[i].first.size(); j++)
+ std::cout << ' ' << *tgtcands[i].first[j];
+ std::cout << " |||";
+
+ if(useAlignments) {
+ std::cout << " " << wordAlignment[i] << " |||";
+ }
+
+ for(uint j = 0; j < tgtcands[i].second.size(); j++)
+ std::cout << ' ' << tgtcands[i].second[j];
+ std::cout << '\n';
+ }
+
+ std::cout << '\n';
+ std::cout.flush();
+ }
}
-void usage() {
- std::cerr << "Usage: queryPhraseTable [-n <nscores>] [-a] -t <ttable>\n"
- "-n <nscores> number of scores in phrase table (default: 5)\n"
- "-a binary phrase table contains alignments\n"
- "-t <ttable> phrase table\n";
- exit(1);
+void usage()
+{
+ std::cerr << "Usage: queryPhraseTable [-n <nscores>] [-a] -t <ttable>\n"
+ "-n <nscores> number of scores in phrase table (default: 5)\n"
+ "-a binary phrase table contains alignments\n"
+ "-t <ttable> phrase table\n";
+ exit(1);
}