1 files changed, 0 insertions, 294 deletions
diff --git a/misc/GenerateTuples.cpp b/misc/GenerateTuples.cpp
deleted file mode 100644
index 97b0bdd96..000000000
--- a/misc/GenerateTuples.cpp
+++ /dev/null
@@ -1,294 +0,0 @@
-
-////////////////////////////////////////////////////////////
-//
-// generate set of target candidates for confusion net
-//
-////////////////////////////////////////////////////////////
-
-
-
-#include <numeric>
-#include "Word.h"
-#include "Phrase.h"
-#include "ConfusionNet.h"
-#include "WordsRange.h"
-#include "PhraseDictionaryTree.h"
-#if 0
-// Generates all tuples from  n indexes with ranges 0 to card[j]-1, respectively..
-// Input: number of indexes and  ranges: ranges[0] ... ranges[num_idx-1] 
-// Output: number of tuples and monodimensional array of tuples.
-// Reference: mixed-radix generation algorithm (D. E. Knuth, TAOCP v. 4.2)
-
-size_t GenerateTuples(unsigned num_idx,unsigned* ranges,unsigned *&tuples)
-{
-  unsigned* single_tuple= new unsigned[num_idx+1];
-  unsigned num_tuples=1;
-
-  for (unsigned k=0;k<num_idx;++k)
-    {
-      num_tuples *= ranges[k];
-      single_tuple[k]=0;
-    }
-
-  tuples=new unsigned[num_idx * num_tuples];
-
-  // we need this additional element for the last iteration
-  single_tuple[num_idx]=0; 
-  unsigned j=0;
-  for (unsigned n=0;n<num_tuples;++n){
-    memcpy((void *)((tuples + n * num_idx)),(void *)single_tuple,num_idx * sizeof(unsigned));
-    j=0;
-    while (single_tuple[j]==ranges[j]-1){single_tuple[j]=0; ++j;}
-    ++single_tuple[j];
-  }
-	delete [] single_tuple;
-  return num_tuples;
-}
-
-
-typedef PhraseDictionaryTree::PrefixPtr PPtr;
-typedef std::vector<PPtr> vPPtr;
-typedef std::vector<std::vector<Factor const*> > mPhrase;
-
-std::ostream& operator<<(std::ostream& out,const mPhrase& p) {
-	for(size_t i=0;i<p.size();++i) {
-		out<<i<<" - ";
-		for(size_t j=0;j<p[i].size();++j)
-			out<<p[i][j]->ToString()<<" ";
-		out<<"|";
-	}
-
-	return out;
-}
-
-struct State {
-	vPPtr ptrs;
-	WordsRange range;
-	float score;
-
-	State() : range(0,0),score(0.0) {}
-	State(size_t b,size_t e,const vPPtr& v,float sc=0.0) : ptrs(v),range(b,e),score(sc) {}
-	
-	size_t begin() const {return range.GetStartPos();}
-	size_t end() const {return range.GetEndPos();}
-	float GetScore() const {return score;}
-
-};
-
-std::ostream& operator<<(std::ostream& out,const State& s) {
-	out<<"["<<s.ptrs.size()<<" ("<<s.begin()<<","<<s.end()<<") "<<s.GetScore()<<"]";
-
-	return out;
-}
-
-typedef std::map<mPhrase,float> E2Costs;
-
-
-struct GCData {
-	const std::vector<PhraseDictionaryTree const*>& pdicts;
-	const std::vector<std::vector<float> >& weights;
-	std::vector<FactorType> inF,outF;
-	size_t distinctOutputFactors;
-	vPPtr root;
-	size_t totalTuples,distinctTuples;
-
-
-	GCData(const std::vector<PhraseDictionaryTree const*>& a,
-				 const std::vector<std::vector<float> >& b) 
-		: pdicts(a),weights(b),totalTuples(0),distinctTuples(0) {
-
-		assert(pdicts.size()==weights.size());
-		std::set<FactorType> distinctOutFset;
-		inF.resize(pdicts.size());
-		outF.resize(pdicts.size());
-		root.resize(pdicts.size());
-		for(size_t i=0;i<pdicts.size();++i) 
-			{
-				root[i]=pdicts[i]->GetRoot();
-				inF[i]=pdicts[i]->GetInputFactorType();
-				outF[i]=pdicts[i]->GetOutputFactorType();
-				distinctOutFset.insert(pdicts[i]->GetOutputFactorType());
-			}
-		distinctOutputFactors=distinctOutFset.size();
-	}
-
-	FactorType OutFT(size_t i) const {return outF[i];}
-	FactorType InFT(size_t i) const {return inF[i];}
-	size_t DistinctOutFactors() const {return distinctOutputFactors;}
-
-	const vPPtr& GetRoot() const {return root;}
-
-};
-
-typedef std::vector<Factor const*> vFactor;
-typedef std::vector<std::pair<float,vFactor> > TgtCandList;
-
-typedef std::vector<TgtCandList> OutputFactor2TgtCandList;
-typedef std::vector<OutputFactor2TgtCandList*> Len2Cands;
-
-void GeneratePerFactorTgtList(size_t factorType,PPtr pptr,GCData& data,Len2Cands& len2cands)
-{
-	std::vector<FactorTgtCand> cands;
-	data.pdicts[factorType]->GetTargetCandidates(pptr,cands);
-
-	for(std::vector<FactorTgtCand>::const_iterator cand=cands.begin();cand!=cands.end();++cand) {
-		assert(data.weights[factorType].size()==cand->second.size());
-		float costs=std::inner_product(data.weights[factorType].begin(),
-																	 data.weights[factorType].end(),
-																	 cand->second.begin(),
-																	 0.0);
-
-		size_t len=cand->first.size();
-		if(len>=len2cands.size()) len2cands.resize(len+1,0);
-		if(!len2cands[len]) len2cands[len]=new OutputFactor2TgtCandList(data.DistinctOutFactors());
-		OutputFactor2TgtCandList &outf2tcandlist=*len2cands[len];
-
-		outf2tcandlist[data.OutFT(factorType)].push_back(std::make_pair(costs,cand->first));
-	}
-}
-
-void GenerateTupleTgtCands(OutputFactor2TgtCandList& tCand,E2Costs& e2costs,GCData& data) 
-{
-	// check if candidates are non-empty
-	bool gotCands=1;
-	for(size_t j=0;gotCands && j<tCand.size();++j)
-		gotCands &= !tCand[j].empty();
-				
-	if(gotCands) {
-		// enumerate tuples
-		assert(data.DistinctOutFactors()==tCand.size());
-		std::vector<unsigned> radix(data.DistinctOutFactors());
-		for(size_t i=0;i<tCand.size();++i) radix[i]=tCand[i].size();
-
-		unsigned *tuples=0;
-		size_t numTuples=GenerateTuples(radix.size(),&radix[0],tuples);
-
-		data.totalTuples+=numTuples;
-
-		for(size_t i=0;i<numTuples;++i)
-			{
-				mPhrase e(radix.size());float costs=0.0;
-				for(size_t j=0;j<radix.size();++j)
-					{
-						assert(tuples[radix.size()*i+j]<tCand[j].size());
-						std::pair<float,vFactor> const& mycand=tCand[j][tuples[radix.size()*i+j]];
-						e[j]=mycand.second;
-						costs+=mycand.first;
-					}
-#ifdef DEBUG
-				bool mismatch=0;
-				for(size_t j=1;!mismatch && j<e.size();++j)
-					if(e[j].size()!=e[j-1].size()) mismatch=1;
-				assert(mismatch==0);
-#endif
-				std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(e,costs));
-				if(p.second) ++data.distinctTuples;
-				else {
-					// entry known, take min of costs, alternative: sum probs
-					if(costs<p.first->second) p.first->second=costs;
-				}
-			}
-		delete [] tuples;
-	}
-}
-
-void GenerateCandidates_(E2Costs& e2costs,const vPPtr& nextP,GCData& data) 
-{
-	Len2Cands len2cands;
-	// generate candidates for each element of nextP:
-	for(size_t factorType=0;factorType<nextP.size();++factorType) 
-		if(nextP[factorType]) 
-			GeneratePerFactorTgtList(factorType,nextP[factorType],data,len2cands);
-
-	// for each length: enumerate tuples, compute score, and insert in e2costs
-	for(size_t len=0;len<len2cands.size();++len) if(len2cands[len]) 
-		GenerateTupleTgtCands(*len2cands[len],e2costs,data);
-}
-
-void GenerateCandidates(const ConfusionNet& src,
-												const std::vector<PhraseDictionaryTree const*>& pdicts,
-												const std::vector<std::vector<float> >& weights,
-												int verbose) {
-	GCData data(pdicts,weights);
-
-	std::vector<State> stack;
-	for(size_t i=0;i<src.GetSize();++i) stack.push_back(State(i,i,data.GetRoot()));
-
-	std::map<WordsRange,E2Costs> cov2E;
-
-	//	std::cerr<<"start while loop. initial stack size: "<<stack.size()<<"\n";
-
-	while(!stack.empty()) 
-	{
-		State curr(stack.back());
-		stack.pop_back();
-		
-		//std::cerr<<"processing state "<<curr<<" stack size: "<<stack.size()<<"\n";
-
-		assert(curr.end()<src.GetSize());
-		const ConfusionNet::Column &currCol=src[curr.end()];
-		for(size_t colidx=0;colidx<currCol.size();++colidx) 
-		{
-			const Word& w=currCol[colidx].first;
-			vPPtr nextP(curr.ptrs);
-			for(size_t j=0;j<nextP.size();++j)
-				nextP[j]=pdicts[j]->Extend(nextP[j],
-																	 w.GetFactor(data.InFT(j))->GetString());
-	
-			bool valid=1;
-			for(size_t j=0;j<nextP.size();++j) if(!nextP[j]) {valid=0;break;}
-
-			if(valid) 
-			{
-				if(curr.end()+1<src.GetSize())
-					stack.push_back(State(curr.begin(),curr.end()+1,nextP,
-																curr.GetScore()+currCol[colidx].second));
-
-				E2Costs &e2costs=cov2E[WordsRange(curr.begin(),curr.end()+1)];
-				GenerateCandidates_(e2costs,nextP,data);
-			}
-		}
-			
-		// check if there are translations of one-word phrases ...
-		//if(curr.begin()==curr.end() && tCand.empty()) {}		
-
-	} // end while(!stack.empty()) 
-
-	if(verbose) {
-		// print statistics for debugging purposes
-		std::cerr<<"tuple stats:  total: "<<data.totalTuples
-						 <<" distinct: "<<data.distinctTuples<<" ("
-						 <<(data.distinctTuples/(0.01*data.totalTuples))
-						 <<"%)\n";
-		std::cerr<<"per coverage set:\n";
-		for(std::map<WordsRange,E2Costs>::const_iterator i=cov2E.begin();
-				i!=cov2E.end();++i) {
-			std::cerr<<i->first<<" -- distinct cands: "
-							 <<i->second.size()<<"\n";
-		}
-		std::cerr<<"\n\n";
-	}
-
-	if(verbose>10) {
-		std::cerr<<"full list:\n";
-		for(std::map<WordsRange,E2Costs>::const_iterator i=cov2E.begin();
-				i!=cov2E.end();++i) {
-			std::cerr<<i->first<<" -- distinct cands: "
-							 <<i->second.size()<<"\n";
-			for(E2Costs::const_iterator j=i->second.begin();j!=i->second.end();++j)
-				std::cerr<<j->first<<" -- "<<j->second<<"\n";
-		}
-	}
-}
-
-#else
-
-void GenerateCandidates(const ConfusionNet&,
-												const std::vector<PhraseDictionaryTree const*>&,
-												const std::vector<std::vector<float> >&,
-												int) 
-{
-	std::cerr<<"ERROR: GenerateCandidates is currently broken\n";
-}
-
-#endif