diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2012-10-25 19:30:16 +0400 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2012-10-25 19:30:16 +0400 |
commit | 3bb4c3994dbed41e324151f0b46d6e725334cd56 (patch) | |
tree | e9254ec0ba01305e255cf11243b32ad2f8f84928 /scripts/fuzzy-match/create_xml.cpp | |
parent | 33063e8d4b4766c03ce35d4f76ebe416dcd69790 (diff) |
perl to cpp
Diffstat (limited to 'scripts/fuzzy-match/create_xml.cpp')
-rw-r--r-- | scripts/fuzzy-match/create_xml.cpp | 165 |
1 files changed, 86 insertions, 79 deletions
diff --git a/scripts/fuzzy-match/create_xml.cpp b/scripts/fuzzy-match/create_xml.cpp index e95e52039..885dd7d2c 100644 --- a/scripts/fuzzy-match/create_xml.cpp +++ b/scripts/fuzzy-match/create_xml.cpp @@ -221,98 +221,105 @@ void createXML(const string &source, const string &input, const string &target, inputBitmap.push_back(0); } } // else if ( !currently_matching + } // for ( size_t p = 0 + + cerr << target << endl; + for (size_t i = 0; i < targetBitmap.size(); ++i) + cerr << targetBitmap[i]; + cerr << endl; + + for (map<int, string>::const_iterator iter = frameInput.begin(); iter != frameInput.end(); ++iter) { + cerr << iter->first << ":" <<iter->second << endl; + } - cerr << target << endl; - for (size_t i = 0; i < targetBitmap.size(); ++i) - cerr << targetBitmap[i]; - cerr << endl; + // STEP 2: BUILD RULE AND FRAME - for (map<int, string>::const_iterator iter = frameInput.begin(); iter != frameInput.end(); ++iter) { - cerr << iter->first << ":" <<iter->second << endl; + // hierarchical rule + string rule_s = ""; + int rule_pos_s = 0; + map<int, int> ruleAlignS; + + for (size_t i = 0 ; i < inputBitmap.size() ; ++i ) { + if ( inputBitmap[i] ) { + rule_s += inputToks[i] + " "; + ruleAlignS[ alignI2S[i] ] = rule_pos_s++; } - // STEP 2: BUILD RULE AND FRAME + for (size_t j = 0; j < nonTerms.size(); ++j) { + map<string, int> &nt = nonTerms[j]; + if (i == nt["start_i"]) { + rule_s += "[X][X]"; + nt["rule_pos_s"] = rule_pos_s++; + } + } + } - // hierarchical rule - string rule_s = ""; - int rule_pos_s = 0; - map<int, int> ruleAlignS; + string rule_t = ""; + int rule_pos_t = 0; + map<int, int> ruleAlignT; - for (size_t i = 0 ; i < inputBitmap.size() ; ++i ) { - if ( inputBitmap[i] ) { - rule_s += inputToks[i] + " "; - ruleAlignS[ alignI2S[i] ] = rule_pos_s++; - } + for (size_t t = -1 ; t < targetBitmap.size(); t++ ) { + if (t >= 0 && targetBitmap[t]) { + rule_t += targetsToks[t] + " "; + ruleAlignT[t] = rule_pos_t++; + } - for (size_t j = 0; j < nonTerms.size(); ++j) { - map<string, int> &nt = nonTerms[j]; - if (i == nt["start_i"]) { - rule_s += "[X][X]"; - nt["rule_pos_s"] = rule_pos_s++; - } - } + for (size_t i = 0; i < nonTerms.size(); ++i) { + map<string, int> &nt = nonTerms[i]; + + if (t == nt["start_t"]) { + rule_t += "[X][X] "; + nt["rule_pos_t"] = rule_pos_t++; + } } + } - string rule_t = ""; - int rule_pos_t = 0; - map<int, int> ruleAlignT; - - for (size_t t = -1 ; t < targetBitmap.size(); t++ ) { - if (t >= 0 && targetBitmap[t]) { - rule_t += targetsToks[t] + " "; - ruleAlignT[t] = rule_pos_t++; - } - - for (size_t i = 0; i < nonTerms.size(); ++i) { - map<string, int> &nt = nonTerms[i]; - - if (t == nt["start_t"]) { - rule_t += "[X][X] "; - nt["rule_pos_t"] = rule_pos_t++; - } - } - } - - string ruleAlignment; - - for (map<int, int>::const_iterator iter = ruleAlignT.begin(); iter != ruleAlignT.end(); ++iter) { - int s = iter->first; - std::map<int, int> &targets = alignments.m_alignS2T[s]; - - std::map<int, int>::const_iterator iter; - for (iter = targets.begin(); iter != targets.end(); ++iter) { - int t =iter->first; - if (ruleAlignT.find(s) == ruleAlignT.end()) - continue; - ruleAlignment += ruleAlignS[s] + "-" + SPrint(ruleAlignT[t]) + " "; - } - } - - for (size_t i = 0; i < nonTerms.size(); ++i) { - map<string, int> &nt = nonTerms[i]; - ruleAlignment += SPrint(nt["rule_pos_s"]) + "-" + SPrint(nt["rule_pos_t"]) + " "; - } - - /* TODO - rule_s = Trim(rule_s); - rule_t = Trim(rule_t); - ruleAlignment = Trim(ruleAlignment); - */ - - string rule_alignment_inv; - vector<string> ruleAlignmentToks = Tokenize(ruleAlignment, "-"); - for (size_t i = 0; i < ruleAlignmentToks.size(); ++i) { - const string &alignPoint = ruleAlignmentToks[i]; - vector<string> toks = Tokenize(alignPoint); - assert(toks.size() == 2); - rule_alignment_inv += toks[1] + "-" +toks[0]; - } - //rule_alignment_inv = Trim(rule_alignment_inv); TODO + string ruleAlignment; + for (map<int, int>::const_iterator iter = ruleAlignT.begin(); iter != ruleAlignT.end(); ++iter) { + int s = iter->first; + std::map<int, int> &targets = alignments.m_alignS2T[s]; + std::map<int, int>::const_iterator iter; + for (iter = targets.begin(); iter != targets.end(); ++iter) { + int t =iter->first; + if (ruleAlignT.find(s) == ruleAlignT.end()) + continue; + ruleAlignment += ruleAlignS[s] + "-" + SPrint(ruleAlignT[t]) + " "; + } + } + + for (size_t i = 0; i < nonTerms.size(); ++i) { + map<string, int> &nt = nonTerms[i]; + ruleAlignment += SPrint(nt["rule_pos_s"]) + "-" + SPrint(nt["rule_pos_t"]) + " "; + } + + /* TODO + rule_s = Trim(rule_s); + rule_t = Trim(rule_t); + ruleAlignment = Trim(ruleAlignment); + */ + + string rule_alignment_inv; + vector<string> ruleAlignmentToks = Tokenize(ruleAlignment, "-"); + for (size_t i = 0; i < ruleAlignmentToks.size(); ++i) { + const string &alignPoint = ruleAlignmentToks[i]; + vector<string> toks = Tokenize(alignPoint); + assert(toks.size() == 2); + rule_alignment_inv += toks[1] + "-" +toks[0]; + } + //rule_alignment_inv = Trim(rule_alignment_inv); TODO + + // frame + string frame; + if (frameInput.find(-1) == frameInput.end()) + frame = frameInput[-1]; + + int currently_included = 0; + int start_t = -1; + targetBitmap.push_back(0); - } // for ( size_t p = 0 } |