Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@Hieus-MacBook.local>2011-12-12 17:48:42 +0400
committerHieu Hoang <hieuhoang@Hieus-MacBook.local>2011-12-12 17:48:42 +0400
commit21009b5d1e44b70a7e5b1f0d039e83a961a41776 (patch)
tree14933af527fa33b63be235cc6c0c99b02e5bcd28 /mert/Data.cpp
parent9b58880a32d19366ddce203a4d0079dbcf60f7cf (diff)
revert
Diffstat (limited to 'mert/Data.cpp')
-rw-r--r--mert/Data.cpp93
1 files changed, 93 insertions, 0 deletions
diff --git a/mert/Data.cpp b/mert/Data.cpp
index 23fdc6d82..94f5287a8 100644
--- a/mert/Data.cpp
+++ b/mert/Data.cpp
@@ -47,6 +47,99 @@ Data::~Data() {
}
}
+//ADDED BY TS
+void Data::remove_duplicates() {
+
+ uint nSentences = featdata->size();
+ assert(scoredata->size() == nSentences);
+
+ for (uint s=0; s < nSentences; s++) {
+
+ FeatureArray& feat_array = featdata->get(s);
+ ScoreArray& score_array = scoredata->get(s);
+
+ assert(feat_array.size() == score_array.size());
+
+ //serves as a hash-map:
+ std::map<double, std::vector<uint> > lookup;
+
+ uint end_pos = feat_array.size() - 1;
+
+ uint nRemoved = 0;
+ for (uint k=0; k <= end_pos; k++) {
+
+ const FeatureStats& cur_feats = feat_array.get(k);
+
+ double sum = 0.0;
+ for (uint l=0; l < cur_feats.size(); l++)
+ sum += cur_feats.get(l);
+
+ if (lookup.find(sum) != lookup.end()) {
+
+ //std::cerr << "hit" << std::endl;
+
+ std::vector<uint>& cur_list = lookup[sum];
+
+ uint l=0;
+ for (l=0; l < cur_list.size(); l++) {
+
+ uint j=cur_list[l];
+
+ if (cur_feats == feat_array.get(j)
+ && score_array.get(k) == score_array.get(j)) {
+
+ if (k < end_pos) {
+
+ feat_array.swap(k,end_pos);
+ score_array.swap(k,end_pos);
+
+ k--;
+ }
+
+ end_pos--;
+ nRemoved++;
+ break;
+ }
+ }
+
+ if (l == lookup[sum].size())
+ cur_list.push_back(k);
+ }
+ else
+ lookup[sum].push_back(k);
+
+ // for (uint j=0; j < k; j++) {
+
+ // if (feat_array.get(k) == feat_array.get(j)
+ // && score_array.get(k) == score_array.get(j)) {
+
+ // if (k < end_pos) {
+
+ // feat_array.swap(k,end_pos);
+ // score_array.swap(k,end_pos);
+
+ // k--;
+ // }
+
+ // end_pos--;
+ // nRemoved++;
+ // break;
+ // }
+ // }
+ }
+
+ std::cerr << "removed " << nRemoved << "/" << feat_array.size() << std::endl;
+
+ if (nRemoved > 0) {
+
+ feat_array.resize(end_pos+1);
+ score_array.resize(end_pos+1);
+ }
+ }
+}
+//END_ADDED
+
+
void Data::loadnbest(const std::string &file)
{
TRACE_ERR("loading nbest from " << file << std::endl);