Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLane Schwartz <dowobeha@gmail.com>2013-08-28 21:27:12 +0400
committerLane Schwartz <dowobeha@gmail.com>2013-08-28 21:27:12 +0400
commit7675825eb7e2fa979200cb06d8af8b38a38b1310 (patch)
tree86838835b5a686445e3c1e08431ca908aabfd0ab
parent8a726a9de4d56574083602424c753935c27290a8 (diff)
parentd79d97c0e683eccb2c94a50e971616c5514d91bd (diff)
Merge branch 'master' of www:/repos/git/Decoders/mosesdecoder
-rw-r--r--moses/FF/OSM-Feature/OpSequenceModel.cpp13
-rw-r--r--moses/FF/OSM-Feature/osmHyp.cpp4
-rw-r--r--moses/LM/IRST.cpp15
-rw-r--r--moses/LM/Implementation.cpp16
-rw-r--r--moses/LM/Implementation.h7
-rw-r--r--moses/LM/SRI.cpp15
-rw-r--r--moses/LM/SingleFactor.cpp9
-rw-r--r--moses/LM/SingleFactor.h1
-rw-r--r--moses/TargetPhrase.cpp17
-rw-r--r--moses/TargetPhrase.h2
-rw-r--r--scripts/OSM/generateSequences.cpp970
11 files changed, 504 insertions, 565 deletions
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp
index a99148e10..f4eac9673 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@@ -224,7 +224,7 @@ std::vector<float> OpSequenceModel::GetFutureScores(const Phrase &source, const
vector<float> scores(numFeatures, 0);
scores[0] = unkOpProb;
return scores;
- } else{
+ } else {
const vector<float> &scores = iter->second;
return scores;
}
@@ -235,16 +235,15 @@ void OpSequenceModel::SetParameter(const std::string& key, const std::string& va
if (key == "path") {
m_lmPath = value;
- }else if (key == "numFeatures"){
+ } else if (key == "numFeatures") {
numFeatures = Scan<int>(value);
- }else if (key == "order") {
+ } else if (key == "order") {
lmOrder = Scan<int>(value);
- }else if (key == "sFactor"){
+ } else if (key == "sFactor") {
sFactor = Scan<int>(value);
- }else if (key == "tFactor"){
+ } else if (key == "tFactor") {
tFactor = Scan<int>(value);
- }
- else {
+ } else {
StatefulFeatureFunction::SetParameter(key, value);
}
}
diff --git a/moses/FF/OSM-Feature/osmHyp.cpp b/moses/FF/OSM-Feature/osmHyp.cpp
index 238f538dc..eb5b1af39 100644
--- a/moses/FF/OSM-Feature/osmHyp.cpp
+++ b/moses/FF/OSM-Feature/osmHyp.cpp
@@ -583,10 +583,10 @@ void osmHypothesis :: constructCepts(vector <int> & align , int startIndex , int
void osmHypothesis :: populateScores(vector <float> & scores , const int numFeatures)
{
scores.clear();
- scores.push_back(opProb);
+ scores.push_back(opProb);
if (numFeatures == 1)
- return;
+ return;
scores.push_back(gapWidth);
scores.push_back(gapCount);
diff --git a/moses/LM/IRST.cpp b/moses/LM/IRST.cpp
index c19b1ab43..bde78e1b8 100644
--- a/moses/LM/IRST.cpp
+++ b/moses/LM/IRST.cpp
@@ -48,19 +48,8 @@ LanguageModelIRST::LanguageModelIRST(const std::string &line)
throw runtime_error("Error: " + SPrint(threadCount) + " number of threads specified but IRST LM is not threadsafe.");
}
- for (size_t i = 0; i < m_args.size(); ++i) {
- const vector<string> &args = m_args[i];
-
- if (args[0] == "factor") {
- m_factorType = Scan<FactorType>(args[1]);
- } else if (args[0] == "order") {
- m_nGramOrder = Scan<size_t>(args[1]);
- } else if (args[0] == "path") {
- m_filePath = args[1];
- } else {
- throw "Unknown argument " + args[0];
- }
- }
+ ReadParameters();
+
}
LanguageModelIRST::~LanguageModelIRST()
diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp
index e9c651089..3f94fece8 100644
--- a/moses/LM/Implementation.cpp
+++ b/moses/LM/Implementation.cpp
@@ -41,6 +41,22 @@ using namespace std;
namespace Moses
{
+LanguageModelImplementation::LanguageModelImplementation(const std::string& description, const std::string &line)
+ :LanguageModel(description, line)
+{
+}
+
+void LanguageModelImplementation::SetParameter(const std::string& key, const std::string& value)
+{
+ if (key == "order") {
+ m_nGramOrder = Scan<size_t>(value);
+ } else if (key == "path") {
+ m_filePath = value;
+ } else {
+ LanguageModel::SetParameter(key, value);
+ }
+
+}
void LanguageModelImplementation::ShiftOrPush(std::vector<const Word*> &contextFactor, const Word &word) const
{
diff --git a/moses/LM/Implementation.h b/moses/LM/Implementation.h
index 750fff013..a8768dfb7 100644
--- a/moses/LM/Implementation.h
+++ b/moses/LM/Implementation.h
@@ -61,13 +61,14 @@ protected:
Word m_sentenceStartWord, m_sentenceEndWord; //! Contains factors which represents the beging and end words for this LM.
//! Usually <s> and </s>
- LanguageModelImplementation(const std::string& description, const std::string &line)
- :LanguageModel(description, line) {
- }
+ LanguageModelImplementation(const std::string& description, const std::string &line);
+
public:
virtual ~LanguageModelImplementation() {}
+ void SetParameter(const std::string& key, const std::string& value);
+
/* get score of n-gram. n-gram should not be bigger than m_nGramOrder
* Specific implementation can return State and len data to be used in hypothesis pruning
* \param contextFactor n-gram to be scored
diff --git a/moses/LM/SRI.cpp b/moses/LM/SRI.cpp
index 71fd6dcb9..712c93c39 100644
--- a/moses/LM/SRI.cpp
+++ b/moses/LM/SRI.cpp
@@ -58,20 +58,7 @@ LanguageModelSRI::LanguageModelSRI(const std::string &line)
,m_srilmVocab(0)
,m_srilmModel(0)
{
- for (size_t i = 0; i < m_args.size(); ++i) {
- const vector<string> &args = m_args[i];
-
- if (args[0] == "factor") {
- m_factorType = Scan<FactorType>(args[1]);
- } else if (args[0] == "order") {
- m_nGramOrder = Scan<size_t>(args[1]);
- } else if (args[0] == "path") {
- m_filePath = args[1];
- } else {
- throw "Unknown argument " + args[0];
- }
- }
-
+ ReadParameters();
}
LanguageModelSRI::~LanguageModelSRI()
diff --git a/moses/LM/SingleFactor.cpp b/moses/LM/SingleFactor.cpp
index 6247648f3..54097d6fb 100644
--- a/moses/LM/SingleFactor.cpp
+++ b/moses/LM/SingleFactor.cpp
@@ -72,6 +72,15 @@ bool LanguageModelSingleFactor::IsUseable(const FactorMask &mask) const
return ret;
}
+void LanguageModelSingleFactor::SetParameter(const std::string& key, const std::string& value)
+{
+ if (key == "factor") {
+ m_factorType = Scan<FactorType>(value);
+ } else {
+ LanguageModelImplementation::SetParameter(key, value);
+ }
+}
+
}
diff --git a/moses/LM/SingleFactor.h b/moses/LM/SingleFactor.h
index 9ebd52817..af0edd4c6 100644
--- a/moses/LM/SingleFactor.h
+++ b/moses/LM/SingleFactor.h
@@ -48,6 +48,7 @@ protected:
public:
virtual ~LanguageModelSingleFactor();
bool IsUseable(const FactorMask &mask) const;
+ void SetParameter(const std::string& key, const std::string& value);
const Factor *GetSentenceStart() const {
return m_sentenceStart;
diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp
index 24bc8e362..8cf42310c 100644
--- a/moses/TargetPhrase.cpp
+++ b/moses/TargetPhrase.cpp
@@ -203,15 +203,14 @@ void TargetPhrase::Merge(const TargetPhrase &copy, const std::vector<FactorType>
void TargetPhrase::GetProperty(const std::string &key, std::string &value, bool &found) const
{
- std::map<std::string, std::string>::const_iterator iter;
- iter = m_properties.find(key);
- if (iter == m_properties.end()) {
- found = false;
- }
- else {
- found = true;
- value = iter->second;
- }
+ std::map<std::string, std::string>::const_iterator iter;
+ iter = m_properties.find(key);
+ if (iter == m_properties.end()) {
+ found = false;
+ } else {
+ found = true;
+ value = iter->second;
+ }
}
void swap(TargetPhrase &first, TargetPhrase &second)
diff --git a/moses/TargetPhrase.h b/moses/TargetPhrase.h
index 0eae6ea3a..f5c3e5601 100644
--- a/moses/TargetPhrase.h
+++ b/moses/TargetPhrase.h
@@ -124,7 +124,7 @@ public:
}
void SetProperty(const std::string &key, const std::string &value) {
- m_properties[key] = value;
+ m_properties[key] = value;
}
void GetProperty(const std::string &key, std::string &value, bool &found) const;
diff --git a/scripts/OSM/generateSequences.cpp b/scripts/OSM/generateSequences.cpp
index f80267ff8..58398e536 100644
--- a/scripts/OSM/generateSequences.cpp
+++ b/scripts/OSM/generateSequences.cpp
@@ -12,71 +12,63 @@ using namespace std;
int stringToInteger(string s)
{
- istringstream buffer(s);
- int some_int;
- buffer >> some_int;
- return some_int;
+ istringstream buffer(s);
+ int some_int;
+ buffer >> some_int;
+ return some_int;
}
void loadInput(const char * fileName, vector <string> & input)
{
- ifstream sr (fileName);
- char* tmp;
-
- if(sr.is_open())
- {
- while(! sr.eof() )
- {
-
- tmp= new char[5000];
- sr.getline (tmp,5000);
- input.push_back(tmp);
- //cout<<tmp<<input.size()<<endl;
- delete [] tmp;
- }
-
- sr.close();
- }
- else
- {
- cout<<"Unable to read "<<fileName<<endl;
- exit(1);
- }
+ ifstream sr (fileName);
+ char* tmp;
+
+ if(sr.is_open()) {
+ while(! sr.eof() ) {
+
+ tmp= new char[5000];
+ sr.getline (tmp,5000);
+ input.push_back(tmp);
+ //cout<<tmp<<input.size()<<endl;
+ delete [] tmp;
+ }
+
+ sr.close();
+ } else {
+ cout<<"Unable to read "<<fileName<<endl;
+ exit(1);
+ }
}
void getWords(string inp, vector <string> & currInput)
{
- currInput.clear();
-
- int a=0;
- a = inp.find(' ', inp.length()-1);
-
- if( a == -1)
- inp.append(" ");
-
- a=0;
- int b=0;
-
- for (int j=0; j<inp.length(); j++)
- {
-
- a=inp.find(' ',b);
-
- if(a != -1)
- {
- currInput.push_back(inp.substr(b,a-b));
-
- b=a+1;
- j=b;
- }
- else
- {
- j=inp.length();
- }
-
- }
+ currInput.clear();
+
+ int a=0;
+ a = inp.find(' ', inp.length()-1);
+
+ if( a == -1)
+ inp.append(" ");
+
+ a=0;
+ int b=0;
+
+ for (int j=0; j<inp.length(); j++) {
+
+ a=inp.find(' ',b);
+
+ if(a != -1) {
+ currInput.push_back(inp.substr(b,a-b));
+
+ b=a+1;
+ j=b;
+ } else {
+ j=inp.length();
+ }
+
+ }
}
@@ -84,29 +76,26 @@ void getWords(string inp, vector <string> & currInput)
string getTranslation(int index, vector < pair <string , vector <int> > > & gCepts , vector <string> & currF , map <string,int> & singletons)
{
- string translation = "";
-
- vector <int> fSide = gCepts[index].second;
- vector <int> :: iterator iter;
-
- for (iter = fSide.begin(); iter != fSide.end(); iter++)
- {
- if (iter != fSide.begin())
- translation += "^_^";
-
- translation+= currF[*iter];
- }
-
- if(singletons.find(translation)==singletons.end())
- {
- return "_TRANS_" + gCepts[index].first + "_TO_" + translation + " ";
- }
-
- else
- {
-
- return "_TRANS_SLF_ ";
- }
+ string translation = "";
+
+ vector <int> fSide = gCepts[index].second;
+ vector <int> :: iterator iter;
+
+ for (iter = fSide.begin(); iter != fSide.end(); iter++) {
+ if (iter != fSide.begin())
+ translation += "^_^";
+
+ translation+= currF[*iter];
+ }
+
+ if(singletons.find(translation)==singletons.end()) {
+ return "_TRANS_" + gCepts[index].first + "_TO_" + translation + " ";
+ }
+
+ else {
+
+ return "_TRANS_SLF_ ";
+ }
}
@@ -114,184 +103,162 @@ string getTranslation(int index, vector < pair <string , vector <int> > > & gCep
int closestGap(map <int,string> gap,int j1, int & gp)
{
- int dist=1172;
- int value=-1;
- int temp=0;
- gp=0;
- int opGap=0;
-
- map <int,string> :: iterator iter;
-
- iter=gap.end();
-
- do
- {
- iter--;
- //cout<<"Trapped "<<iter->first<<endl;
-
- if(iter->first==j1 and iter->second=="Unfilled")
- {
- opGap++;
- gp = opGap;
- return j1;
- }
-
- if(iter->second =="Unfilled")
- {
- opGap++;
- temp = iter->first - j1;
-
- if(temp<0)
- temp=temp * -1;
-
- if(dist>temp && iter->first < j1)
- {
- dist=temp;
- value=iter->first;
- gp=opGap;
- }
- }
-
-
- }
- while(iter!=gap.begin());
-
- //cout<<"Out"<<endl;
- return value;
+ int dist=1172;
+ int value=-1;
+ int temp=0;
+ gp=0;
+ int opGap=0;
+
+ map <int,string> :: iterator iter;
+
+ iter=gap.end();
+
+ do {
+ iter--;
+ //cout<<"Trapped "<<iter->first<<endl;
+
+ if(iter->first==j1 and iter->second=="Unfilled") {
+ opGap++;
+ gp = opGap;
+ return j1;
+ }
+
+ if(iter->second =="Unfilled") {
+ opGap++;
+ temp = iter->first - j1;
+
+ if(temp<0)
+ temp=temp * -1;
+
+ if(dist>temp && iter->first < j1) {
+ dist=temp;
+ value=iter->first;
+ gp=opGap;
+ }
+ }
+
+
+ } while(iter!=gap.begin());
+
+ //cout<<"Out"<<endl;
+ return value;
}
void generateStory(vector <pair <string , vector <int> > > & gCepts, set <int> & targetNullWords, vector<string> & currF, map <string,int> & singletons)
{
- int fl = 0;
- int i = 0; // Current English string position
- int j = 0; // Current French Position
- int N = gCepts.size(); // Total number of English words
- int k = 0; // Number of already generate French words
- int E = 0; // Position after most rightward French word generate so far
- int j1 = 0; // Next french translation;
- int Li =0; // Links of word i
- int Lj=0; // Links of word j
- map <int,int > generated;
- map <int,string> gap;
- map <int,int> :: iterator iter;
- int gp=0;
- //vector <string> iterator :: iterF;
-
- while (targetNullWords.find(j) != targetNullWords.end())
- {
- cout<<"_INS_"<<currF[j]<<" ";
- generated[j]=-1; // This word is generated -1 means unlinked ...
- j=j+1;
- }
-
- while (i < gCepts.size() && gCepts[i].second.size() == 0)
- {
- cout<<"_DEL_"<<gCepts[i].first<<" ";
- i=i+1;
- }
-
- E=j; // Update the position of most rightward French word
-
- while (i<N)
- {
-
- //cout<<"I am sending to the link "<<i<<" with 0 "<<endl;
- //j1 = getLink(i,0,Li,k);
-
- Li = gCepts[i].second.size();
- j1 = gCepts[i].second[k];
-
- //cout<<"i = "<<i<<" j1 = "<<j1<<" j = "<<j<<" E = "<<E<<endl;
-
- if(j<j1) // reordering needed ...
- {
- iter = generated.find(j);
- if( iter == generated.end()) // fj is not generated ...
- {
- cout<<"_INS_GAP_ ";
- gap[j] = "Unfilled";
- }
-
- if (j==E)
- {
- j=j1;
- }
- else
- {
- cout<<"_JMP_FWD_ ";
- j=E;
- }
-
- }
-
- if(j1<j)
- {
- iter = generated.find(j);
- if(j<E && iter == generated.end()) // fj is not generated ...
- {
-
- cout<<"_INS_GAP_ ";
- gap[j]="Unfilled";
- }
-
- j=closestGap(gap,j1,gp);
- //cout<<j<<endl;
- cout<<"_JMP_BCK_"<<gp<<" ";
-
- if(j==j1)
- gap[j]="Filled";
-
- }
-
- if(j<j1)
- {
- cout<<"_INS_GAP_ ";
- gap[j] = "Unfilled";
- j=j1;
- }
-
- if(k==0)
- {
- cout<<getTranslation(i, gCepts,currF,singletons);
- }
- else
- {
- cout<<"_CONT_CEPT_ ";
- }
- generated[j]=i;
- j=j+1;
- k=k+1;
-
- while(targetNullWords.find(j) != targetNullWords.end()) // fj is unlinked word ...
- {
- //cout<<"Came here"<<j<<k<<endl;
- cout<<"_INS_"<<currF[j]<<" ";
- generated[j]=-1; // This word is generated -1 means unlinked ...
- j=j+1;
- }
-
- if(E<j)
- E=j;
- //cout<<" Li "<<Li<<endl;
- if(k==Li)
- {
- i=i+1;
- k=0;
-
- while(i < gCepts.size() && gCepts[i].second.size() == 0) // ei is unliked word ...
- {
- cout<<"_DEL_"<<gCepts[i].first<<" ";
- i=i+1;
-
- }
-
- }
-
- }
-
- cout<<endl;
+ int fl = 0;
+ int i = 0; // Current English string position
+ int j = 0; // Current French Position
+ int N = gCepts.size(); // Total number of English words
+ int k = 0; // Number of already generate French words
+ int E = 0; // Position after most rightward French word generate so far
+ int j1 = 0; // Next french translation;
+ int Li =0; // Links of word i
+ int Lj=0; // Links of word j
+ map <int,int > generated;
+ map <int,string> gap;
+ map <int,int> :: iterator iter;
+ int gp=0;
+ //vector <string> iterator :: iterF;
+
+ while (targetNullWords.find(j) != targetNullWords.end()) {
+ cout<<"_INS_"<<currF[j]<<" ";
+ generated[j]=-1; // This word is generated -1 means unlinked ...
+ j=j+1;
+ }
+
+ while (i < gCepts.size() && gCepts[i].second.size() == 0) {
+ cout<<"_DEL_"<<gCepts[i].first<<" ";
+ i=i+1;
+ }
+
+ E=j; // Update the position of most rightward French word
+
+ while (i<N) {
+
+ //cout<<"I am sending to the link "<<i<<" with 0 "<<endl;
+ //j1 = getLink(i,0,Li,k);
+
+ Li = gCepts[i].second.size();
+ j1 = gCepts[i].second[k];
+
+ //cout<<"i = "<<i<<" j1 = "<<j1<<" j = "<<j<<" E = "<<E<<endl;
+
+ if(j<j1) { // reordering needed ...
+ iter = generated.find(j);
+ if( iter == generated.end()) { // fj is not generated ...
+ cout<<"_INS_GAP_ ";
+ gap[j] = "Unfilled";
+ }
+
+ if (j==E) {
+ j=j1;
+ } else {
+ cout<<"_JMP_FWD_ ";
+ j=E;
+ }
+
+ }
+
+ if(j1<j) {
+ iter = generated.find(j);
+ if(j<E && iter == generated.end()) { // fj is not generated ...
+
+ cout<<"_INS_GAP_ ";
+ gap[j]="Unfilled";
+ }
+
+ j=closestGap(gap,j1,gp);
+ //cout<<j<<endl;
+ cout<<"_JMP_BCK_"<<gp<<" ";
+
+ if(j==j1)
+ gap[j]="Filled";
+
+ }
+
+ if(j<j1) {
+ cout<<"_INS_GAP_ ";
+ gap[j] = "Unfilled";
+ j=j1;
+ }
+
+ if(k==0) {
+ cout<<getTranslation(i, gCepts,currF,singletons);
+ } else {
+ cout<<"_CONT_CEPT_ ";
+ }
+ generated[j]=i;
+ j=j+1;
+ k=k+1;
+
+ while(targetNullWords.find(j) != targetNullWords.end()) { // fj is unlinked word ...
+ //cout<<"Came here"<<j<<k<<endl;
+ cout<<"_INS_"<<currF[j]<<" ";
+ generated[j]=-1; // This word is generated -1 means unlinked ...
+ j=j+1;
+ }
+
+ if(E<j)
+ E=j;
+ //cout<<" Li "<<Li<<endl;
+ if(k==Li) {
+ i=i+1;
+ k=0;
+
+ while(i < gCepts.size() && gCepts[i].second.size() == 0) { // ei is unliked word ...
+ cout<<"_DEL_"<<gCepts[i].first<<" ";
+ i=i+1;
+
+ }
+
+ }
+
+ }
+
+ cout<<endl;
}
@@ -299,299 +266,270 @@ void generateStory(vector <pair <string , vector <int> > > & gCepts, set <int> &
void ceptsInGenerativeStoryFormat(vector < pair < set <int> , set <int> > > & ceptsInPhrase , vector < pair < string , vector <int> > > & gCepts , set <int> & sourceNullWords, vector <string> & currE)
{
- gCepts.clear();
- set <int> eSide;
- set <int> fSide;
- std::set <int> :: iterator iter;
- string english;
- vector <int> germanIndex;
- int engIndex = 0;
- int prev;
- int curr;
- set <int> engDone;
-
-
- for (int i = 0; i< ceptsInPhrase.size(); i++)
- {
- english = "";
- germanIndex.clear();
- fSide = ceptsInPhrase[i].first;
- eSide = ceptsInPhrase[i].second;
-
-
- while(engIndex < *eSide.begin())
- {
- // cout<<engIndex<<" "<<*eSide.begin()<<endl;
-
- while(engDone.find(engIndex) != engDone.end())
- engIndex++;
-
- while(sourceNullWords.find(engIndex) != sourceNullWords.end())
- {
- english = currE[engIndex];
- engIndex++;
- gCepts.push_back(make_pair (english , germanIndex));
- english = "";
- }
- }
-
- for (iter = eSide.begin(); iter != eSide.end(); iter++)
- {
- curr = *iter;
-
- if(iter != eSide.begin())
- {
- english += "^_^";
-
- if (prev == curr-1)
- {
- prev++;
- engIndex++;
- }
- else
- engDone.insert(curr);
- }
- else
- {
- prev = curr;
- //engIndex++;
- engIndex = prev+1;
- }
- english +=currE[curr];
-
- }
-
- for (iter = fSide.begin(); iter != fSide.end(); iter++)
- {
- germanIndex.push_back(*iter);
- }
-
- gCepts.push_back(make_pair (english , germanIndex));
- // cout<<engIndex<<endl;
-
- }
-
- english = "";
- germanIndex.clear();
-
- //for (int i = 0; i< currE.size(); i++)
- // cout<<i<<" "<<currE[i]<<endl;
-
- while(engIndex < currE.size())
- {
- // cout<<engIndex<<" "<<currE.size()-1<<endl;
- while(engDone.find(engIndex) != engDone.end())
- engIndex++;
-
- while(sourceNullWords.find(engIndex) != sourceNullWords.end())
- {
- english = currE[engIndex];
- //cout<<"Here "<<engIndex<<english<<" "<<germanIndex.size()<<endl;
- engIndex++;
- gCepts.push_back(make_pair (english , germanIndex));
- english = "";
- }
- }
-
+ gCepts.clear();
+ set <int> eSide;
+ set <int> fSide;
+ std::set <int> :: iterator iter;
+ string english;
+ vector <int> germanIndex;
+ int engIndex = 0;
+ int prev;
+ int curr;
+ set <int> engDone;
+
+
+ for (int i = 0; i< ceptsInPhrase.size(); i++) {
+ english = "";
+ germanIndex.clear();
+ fSide = ceptsInPhrase[i].first;
+ eSide = ceptsInPhrase[i].second;
+
+
+ while(engIndex < *eSide.begin()) {
+ // cout<<engIndex<<" "<<*eSide.begin()<<endl;
+
+ while(engDone.find(engIndex) != engDone.end())
+ engIndex++;
+
+ while(sourceNullWords.find(engIndex) != sourceNullWords.end()) {
+ english = currE[engIndex];
+ engIndex++;
+ gCepts.push_back(make_pair (english , germanIndex));
+ english = "";
+ }
+ }
+
+ for (iter = eSide.begin(); iter != eSide.end(); iter++) {
+ curr = *iter;
+
+ if(iter != eSide.begin()) {
+ english += "^_^";
+
+ if (prev == curr-1) {
+ prev++;
+ engIndex++;
+ } else
+ engDone.insert(curr);
+ } else {
+ prev = curr;
+ //engIndex++;
+ engIndex = prev+1;
+ }
+ english +=currE[curr];
+
+ }
+
+ for (iter = fSide.begin(); iter != fSide.end(); iter++) {
+ germanIndex.push_back(*iter);
+ }
+
+ gCepts.push_back(make_pair (english , germanIndex));
+ // cout<<engIndex<<endl;
+
+ }
+
+ english = "";
+ germanIndex.clear();
+
+ //for (int i = 0; i< currE.size(); i++)
+ // cout<<i<<" "<<currE[i]<<endl;
+
+ while(engIndex < currE.size()) {
+ // cout<<engIndex<<" "<<currE.size()-1<<endl;
+ while(engDone.find(engIndex) != engDone.end())
+ engIndex++;
+
+ while(sourceNullWords.find(engIndex) != sourceNullWords.end()) {
+ english = currE[engIndex];
+ //cout<<"Here "<<engIndex<<english<<" "<<germanIndex.size()<<endl;
+ engIndex++;
+ gCepts.push_back(make_pair (english , germanIndex));
+ english = "";
+ }
+ }
+
}
void printCepts(vector < pair < string , vector <int> > > & gCepts , vector <string> & currF)
{
- string eSide;
- vector <int> fSide;
-
- for (int i = 0; i < gCepts.size(); i++)
- {
-
- fSide = gCepts[i].second;
- eSide = gCepts[i].first;
-
- cout<<eSide;
- cout<<" <---> ";
-
- for (int j = 0; j < fSide.size(); j++)
- {
- cout<<currF[fSide[j]]<<" ";
- }
-
- cout<<endl;
- }
-
+ string eSide;
+ vector <int> fSide;
+
+ for (int i = 0; i < gCepts.size(); i++) {
+
+ fSide = gCepts[i].second;
+ eSide = gCepts[i].first;
+
+ cout<<eSide;
+ cout<<" <---> ";
+
+ for (int j = 0; j < fSide.size(); j++) {
+ cout<<currF[fSide[j]]<<" ";
+ }
+
+ cout<<endl;
+ }
+
}
void getMeCepts ( set <int> & eSide , set <int> & fSide , map <int , vector <int> > & tS , map <int , vector <int> > & sT)
{
- set <int> :: iterator iter;
+ set <int> :: iterator iter;
- int sz = eSide.size();
- vector <int> t;
+ int sz = eSide.size();
+ vector <int> t;
- for (iter = eSide.begin(); iter != eSide.end(); iter++)
- {
- t = tS[*iter];
+ for (iter = eSide.begin(); iter != eSide.end(); iter++) {
+ t = tS[*iter];
- for (int i = 0; i < t.size(); i++)
- {
- fSide.insert(t[i]);
- }
+ for (int i = 0; i < t.size(); i++) {
+ fSide.insert(t[i]);
+ }
- }
+ }
- for (iter = fSide.begin(); iter != fSide.end(); iter++)
- {
+ for (iter = fSide.begin(); iter != fSide.end(); iter++) {
- t = sT[*iter];
+ t = sT[*iter];
- for (int i = 0 ; i<t.size(); i++)
- {
- eSide.insert(t[i]);
- }
+ for (int i = 0 ; i<t.size(); i++) {
+ eSide.insert(t[i]);
+ }
- }
+ }
- if (eSide.size () > sz)
- {
- getMeCepts(eSide,fSide,tS,sT);
- }
+ if (eSide.size () > sz) {
+ getMeCepts(eSide,fSide,tS,sT);
+ }
}
void constructCepts(vector < pair < set <int> , set <int> > > & ceptsInPhrase, set <int> & sourceNullWords, set <int> & targetNullWords, vector <string> & alignment, int eSize, int fSize)
{
-
- ceptsInPhrase.clear();
- sourceNullWords.clear();
- targetNullWords.clear();
-
- vector <int> align;
-
- std::map <int , vector <int> > sT;
- std::map <int , vector <int> > tS;
- std::set <int> eSide;
- std::set <int> fSide;
- std::set <int> :: iterator iter;
- std :: map <int , vector <int> > :: iterator iter2;
- std :: pair < set <int> , set <int> > cept;
- int src;
- int tgt;
- ceptsInPhrase.clear();
-
- for (int j=0; j<alignment.size(); j+=2)
- {
- align.push_back(stringToInteger(alignment[j+1]));
- align.push_back(stringToInteger(alignment[j]));
- }
-
- for (int i = 0; i < align.size(); i+=2)
- {
- src = align[i];
- tgt = align[i+1];
- tS[tgt].push_back(src);
- sT[src].push_back(tgt);
- }
-
- for (int i = 0; i< fSize; i++)
- {
- if (sT.find(i) == sT.end())
- {
- targetNullWords.insert(i);
- }
- }
-
- for (int i = 0; i< eSize; i++)
- {
- if (tS.find(i) == tS.end())
- {
- sourceNullWords.insert(i);
- }
- }
-
-
- while (tS.size() != 0 && sT.size() != 0)
- {
-
- iter2 = tS.begin();
-
- eSide.clear();
- fSide.clear();
- eSide.insert (iter2->first);
-
- getMeCepts(eSide, fSide, tS , sT);
-
- for (iter = eSide.begin(); iter != eSide.end(); iter++)
- {
- iter2 = tS.find(*iter);
- tS.erase(iter2);
- }
-
- for (iter = fSide.begin(); iter != fSide.end(); iter++)
- {
- iter2 = sT.find(*iter);
- sT.erase(iter2);
- }
-
- cept = make_pair (fSide , eSide);
- ceptsInPhrase.push_back(cept);
- }
+
+ ceptsInPhrase.clear();
+ sourceNullWords.clear();
+ targetNullWords.clear();
+
+ vector <int> align;
+
+ std::map <int , vector <int> > sT;
+ std::map <int , vector <int> > tS;
+ std::set <int> eSide;
+ std::set <int> fSide;
+ std::set <int> :: iterator iter;
+ std :: map <int , vector <int> > :: iterator iter2;
+ std :: pair < set <int> , set <int> > cept;
+ int src;
+ int tgt;
+ ceptsInPhrase.clear();
+
+ for (int j=0; j<alignment.size(); j+=2) {
+ align.push_back(stringToInteger(alignment[j+1]));
+ align.push_back(stringToInteger(alignment[j]));
+ }
+
+ for (int i = 0; i < align.size(); i+=2) {
+ src = align[i];
+ tgt = align[i+1];
+ tS[tgt].push_back(src);
+ sT[src].push_back(tgt);
+ }
+
+ for (int i = 0; i< fSize; i++) {
+ if (sT.find(i) == sT.end()) {
+ targetNullWords.insert(i);
+ }
+ }
+
+ for (int i = 0; i< eSize; i++) {
+ if (tS.find(i) == tS.end()) {
+ sourceNullWords.insert(i);
+ }
+ }
+
+
+ while (tS.size() != 0 && sT.size() != 0) {
+
+ iter2 = tS.begin();
+
+ eSide.clear();
+ fSide.clear();
+ eSide.insert (iter2->first);
+
+ getMeCepts(eSide, fSide, tS , sT);
+
+ for (iter = eSide.begin(); iter != eSide.end(); iter++) {
+ iter2 = tS.find(*iter);
+ tS.erase(iter2);
+ }
+
+ for (iter = fSide.begin(); iter != fSide.end(); iter++) {
+ iter2 = sT.find(*iter);
+ sT.erase(iter2);
+ }
+
+ cept = make_pair (fSide , eSide);
+ ceptsInPhrase.push_back(cept);
+ }
}
int main(int argc, char * argv[])
{
- vector <string> e;
- vector <string> f;
- vector <string> a;
- vector <string> singletons;
- map <string,int> sTons;
- vector < pair < set <int> , set <int> > > ceptsInPhrase;
- vector < pair < string , vector <int> > > gCepts;
-
- set <int> sourceNullWords;
- set <int> targetNullWords;
-
- vector <string> currE;
- vector <string> currF;
- vector <string> currA;
-
- loadInput(argv[4],singletons);
-
- for(int i=0; i<singletons.size(); i++)
- sTons[singletons[i]]=i;
-
- loadInput(argv[1],e);
- loadInput(argv[2],f);
- loadInput(argv[3],a);
-
-
- for (int i=0; i<a.size()-1; i++)
- {
-
-
- getWords(e[i],currE);
- getWords(f[i],currF);
- getWords(a[i],currA);
-
- constructCepts(ceptsInPhrase, sourceNullWords , targetNullWords, currA , currE.size(), currF.size());
- //cout<<"CC done"<<endl;
- ceptsInGenerativeStoryFormat(ceptsInPhrase , gCepts , sourceNullWords, currE);
- //cout<<"format done"<<endl;
- // printCepts(gCepts, currF);
- generateStory(gCepts, targetNullWords ,currF,sTons);
-
-
- /*
- cout<<"________________________________________"<<endl;
-
- cout<<"Press any integer to continue ..."<<endl;
- int xx;
- cin>>xx;
- */
-
- }
-
-
- return 0;
-
+ vector <string> e;
+ vector <string> f;
+ vector <string> a;
+ vector <string> singletons;
+ map <string,int> sTons;
+ vector < pair < set <int> , set <int> > > ceptsInPhrase;
+ vector < pair < string , vector <int> > > gCepts;
+
+ set <int> sourceNullWords;
+ set <int> targetNullWords;
+
+ vector <string> currE;
+ vector <string> currF;
+ vector <string> currA;
+
+ loadInput(argv[4],singletons);
+
+ for(int i=0; i<singletons.size(); i++)
+ sTons[singletons[i]]=i;
+
+ loadInput(argv[1],e);
+ loadInput(argv[2],f);
+ loadInput(argv[3],a);
+
+
+ for (int i=0; i<a.size()-1; i++) {
+
+
+ getWords(e[i],currE);
+ getWords(f[i],currF);
+ getWords(a[i],currA);
+
+ constructCepts(ceptsInPhrase, sourceNullWords , targetNullWords, currA , currE.size(), currF.size());
+ //cout<<"CC done"<<endl;
+ ceptsInGenerativeStoryFormat(ceptsInPhrase , gCepts , sourceNullWords, currE);
+ //cout<<"format done"<<endl;
+ // printCepts(gCepts, currF);
+ generateStory(gCepts, targetNullWords ,currF,sTons);
+
+
+ /*
+ cout<<"________________________________________"<<endl;
+
+ cout<<"Press any integer to continue ..."<<endl;
+ int xx;
+ cin>>xx;
+ */
+
+ }
+
+
+ return 0;
+
}