Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRico Sennrich <rico.sennrich@gmx.ch>2012-06-16 10:36:51 +0400
committerRico Sennrich <rico.sennrich@gmx.ch>2012-06-16 10:38:43 +0400
commite5bec4a48b1e520d9f5538a9215737ad1be760df (patch)
tree48b4daee5401302e6a0d88c16fb9fb5b8623b2cd /contrib
parent454ef13442463838581f9bc00f3dd42c408d9e4b (diff)
minor fix for hierarchical sigtest filter
(rules that end with two nonterminals were all thrown out)
Diffstat (limited to 'contrib')
-rw-r--r--contrib/sigtest-filter/filter-pt.cpp15
1 files changed, 5 insertions, 10 deletions
diff --git a/contrib/sigtest-filter/filter-pt.cpp b/contrib/sigtest-filter/filter-pt.cpp
index 5aa7766d0..5ee5ee5b5 100644
--- a/contrib/sigtest-filter/filter-pt.cpp
+++ b/contrib/sigtest-filter/filter-pt.cpp
@@ -281,7 +281,7 @@ SentIdSet find_occurrences(const std::string& rule, C_SuffixArraySearchApplicati
int endPos = 0;
vector<std::string> phrases;
- while (rule.find("[X][X] ", pos) < rule.size()-10) {
+ while (rule.find("[X][X] ", pos) < rule.size()) {
endPos = rule.find("[X][X] ",pos) - 1; // -1 to cut space before NT
if (endPos < pos) { // no space: NT at start of rule (or two consecutive NTs)
pos += 7;
@@ -291,16 +291,11 @@ SentIdSet find_occurrences(const std::string& rule, C_SuffixArraySearchApplicati
pos = endPos + 8;
}
- // NT at end of rule
- if (rule.find(" [X][X] [X]", pos) < rule.size()) {
- endPos = rule.size()-11;
- }
- // rule doesn't end with NT: cut LHS of rule
- else {
- endPos = rule.size()-4;
+ // cut LHS of rule
+ endPos = rule.size()-4;
+ if (endPos > pos) {
+ phrases.push_back(rule.substr(pos,endPos-pos));
}
-
- phrases.push_back(rule.substr(pos,endPos-pos));
sa_set = lookup_multiple_phrases(phrases, my_sa, rule, cache);
}
else {