Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2012-11-10 22:38:49 +0400
committerHieu Hoang <hieuhoang@gmail.com>2012-11-10 22:38:49 +0400
commitb75e26c686f4dd65bd16a48c066d7882aeed7836 (patch)
treeaf1cc352147ca9f50a44cdb8a9e54e6a9ec6877e /scripts/fuzzy-match
parent27a6cf2ebc2ac8b60890f3f06f2a2973ca466375 (diff)
fuzzy match bug. Everything matches except alignments
Diffstat (limited to 'scripts/fuzzy-match')
-rw-r--r--scripts/fuzzy-match/create_xml.cpp67
1 files changed, 36 insertions, 31 deletions
diff --git a/scripts/fuzzy-match/create_xml.cpp b/scripts/fuzzy-match/create_xml.cpp
index 48d957f2d..a83e6d581 100644
--- a/scripts/fuzzy-match/create_xml.cpp
+++ b/scripts/fuzzy-match/create_xml.cpp
@@ -10,6 +10,13 @@
using namespace std;
using namespace Moses;
+inline const std::string TrimInternal(const std::string& str, const std::string dropChars = " \t\n\r")
+{
+ std::string res = str;
+ res.erase(str.find_last_not_of(dropChars)+1);
+ return res.erase(0, res.find_first_not_of(dropChars));
+}
+
class CreateXMLRetValues
{
public:
@@ -172,38 +179,38 @@ CreateXMLRetValues createXML(const string &source, const string &input, const st
start_t = tt;
}
}
+ }
- // end of sentence? add to end
- if ( start_t == 1000 && i > inputToks.size() - 1 ) {
- start_t = targetsToks.size() - 1;
- }
+ // end of sentence? add to end
+ if ( start_t == 1000 && i > inputToks.size() - 1 ) {
+ start_t = targetsToks.size() - 1;
+ }
- // backtrack to previous words if unaligned
- if ( start_t == 1000 ) {
- start_t = -1;
- for ( int ss = s - 1 ; start_t == -1 && ss >= 0 ; ss-- ) {
- const std::map<int, int> &targets = alignments.m_alignS2T[ss];
-
- std::map<int, int>::const_iterator iter;
- for (iter = targets.begin(); iter != targets.end(); ++iter) {
- size_t tt = iter->first;
- if (tt > start_t) {
- start_t = tt;
- }
+ // backtrack to previous words if unaligned
+ if ( start_t == 1000 ) {
+ start_t = -1;
+ for ( int ss = s - 1 ; start_t == -1 && ss >= 0 ; ss-- ) {
+ const std::map<int, int> &targets = alignments.m_alignS2T[ss];
+
+ std::map<int, int>::const_iterator iter;
+ for (iter = targets.begin(); iter != targets.end(); ++iter) {
+ size_t tt = iter->first;
+ if (tt > start_t) {
+ start_t = tt;
}
}
- } // if ( start_t == 1000 ) {
-
- frameInput[start_t] += insertion;
- map<string, int> nt;
- nt["start_t"] = start_t;
- nt["start_i"] = start_i;
- nonTerms.push_back(nt);
- }
+ }
+ } // if ( start_t == 1000 ) {
- currently_matching = 1;
+ frameInput[start_t] += insertion;
+ map<string, int> nt;
+ nt["start_t"] = start_t;
+ nt["start_i"] = start_i;
+ nonTerms.push_back(nt);
} // if (start_i < i ) {
+
+ currently_matching = 1;
} // else if ( !currently_matching
cerr << action << " " << s << " " << i
@@ -314,11 +321,9 @@ CreateXMLRetValues createXML(const string &source, const string &input, const st
ret.ruleAlignment += SPrint(nt["rule_pos_s"]) + "-" + SPrint(nt["rule_pos_t"]) + " ";
}
- /* TODO
- ruleS = Trim(ruleS);
- ruleT = Trim(ruleT);
- ruleAlignment = Trim(ruleAlignment);
- */
+ ret.ruleS = TrimInternal(ret.ruleS);
+ ret.ruleT = TrimInternal(ret.ruleT);
+ ret.ruleAlignment = TrimInternal(ret.ruleAlignment);
vector<string> ruleAlignmentToks = Tokenize(ret.ruleAlignment);
for (size_t i = 0; i < ruleAlignmentToks.size(); ++i) {
@@ -327,7 +332,7 @@ CreateXMLRetValues createXML(const string &source, const string &input, const st
assert(toks.size() == 2);
ret.ruleAlignmentInv += toks[1] + "-" +toks[0];
}
- //ruleAlignmentInv = Trim(ruleAlignmentInv); TODO
+ ret.ruleAlignmentInv = TrimInternal(ret.ruleAlignmentInv);
// frame
ret.frame;