Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Germann <Ulrich.Germann@gmail.com>2016-01-22 20:52:40 +0300
committerUlrich Germann <Ulrich.Germann@gmail.com>2016-01-22 20:52:40 +0300
commit63b293390201dc1c582a850c10ed8641f88e6282 (patch)
tree7caa6628e5329aea0b5bf2e5086799a26b07bca0 /moses/TranslationModel
parent56e13d152a99821f5d67873f79ce5163fc0211bf (diff)
Improvements to check-coverage3.cc.
Diffstat (limited to 'moses/TranslationModel')
-rw-r--r--moses/TranslationModel/UG/check-coverage3.cc57
1 files changed, 41 insertions, 16 deletions
diff --git a/moses/TranslationModel/UG/check-coverage3.cc b/moses/TranslationModel/UG/check-coverage3.cc
index 8c9dc873c..34860dd89 100644
--- a/moses/TranslationModel/UG/check-coverage3.cc
+++ b/moses/TranslationModel/UG/check-coverage3.cc
@@ -13,6 +13,8 @@
#include <sstream>
#include "mm/ug_bitext_sampler.h"
+#include <boost/program_options.hpp>
+namespace po=boost::program_options;
using namespace Moses;
using namespace sapt;
using namespace std;
@@ -40,26 +42,54 @@ basename(string const path, string const suffix)
return path.substr(p+1, suffix == &path[k] ? k-p-1 : path.size() - p);
}
+string docname;
+
+void
+print_evidence_list(bitext_t const& B, std::map<uint32_t, uint32_t> const& indoc)
+{
+ typedef std::map<uint32_t, uint32_t>::const_iterator iter;
+ typedef pair<size_t,string> item;
+ vector<item> where;
+ where.reserve(indoc.size());
+
+ for (iter d = indoc.begin(); d != indoc.end(); ++d)
+ where.push_back(item(d->second, B.docid2name(d->first)));
+ sort(where.begin(),where.end(),greater<item>());
+ BOOST_FOREACH(item const& doc, where)
+ if (docname == doc.second)
+ cout << (boost::format("\t\t%4d ! %s") % doc.first % doc.second) << endl;
+ else
+ cout << (boost::format("\t\t%4d %s") % doc.first % doc.second) << endl;
+}
+
+
int main(int argc, char* argv[])
{
boost::shared_ptr<bitext_t> B(new bitext_t);
B->open(argv[1],argv[2],argv[3]);
- string line;
+ string line, refline;
string ifile = argv[4];
- string docname = basename(ifile, string(".") + argv[2] + ".gz");
+ string rfile = argc > 5 ? argv[5] : "";
+ docname = basename(ifile, string(".") + argv[2] + ".gz");
id_type docid = B->docname2docid(docname);
- boost::iostreams::filtering_istream in;
+ boost::iostreams::filtering_istream in, ref;
ugdiss::open_input_stream(ifile,in);
+ if (rfile.size()) ugdiss::open_input_stream(rfile,ref);
while(getline(in,line))
{
- cout << line << " [" << docname << "]" << endl;
+ if (rfile.size()) getline(ref,refline);
+ cout << string(80,'-') << endl;
+ cout << " [" << docname << "]" << endl;
+ cout << line << endl;
+ if (refline.size()) cout << refline << endl;
+ cout << string(80,'-') << endl;
vector<id_type> snt;
B->V1->fillIdSeq(line,snt);
for (size_t i = 0; i < snt.size(); ++i)
{
bitext_t::iter m(B->I1.get());
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k);
- for (size_t num_occurrences = m.ca(); m.size(); m.up())
+ for (size_t num_occurrences = 0; m.size(); m.up())
{
if (size_t(m.ca()) == num_occurrences) continue;
num_occurrences = m.ca();
@@ -72,10 +102,12 @@ int main(int argc, char* argv[])
sapt::pstats::indoc_map_t::const_iterator d
= s.stats()->indoc.find(docid);
size_t indoccnt = d != s.stats()->indoc.end() ? d->second : 0;
- cout << m.size() << " : " << m.str(B->V1.get()) << " ("
+ cout // << m.size() << " : "
+ << m.str(B->V1.get()) << " ("
<< s.stats()->trg.size() << " entries; "
<< indoccnt << "/" << s.stats()->good
- << " samples in domain)" << endl;
+ << " samples in domain; " << num_occurrences
+ << " occ.)" << endl;
vector<PhrasePair<Token> > ppairs;
PhrasePair<Token>::SortDescendingByJointCount sorter;
expand(m,*B,*s.stats(),ppairs,NULL);
@@ -87,21 +119,14 @@ int main(int argc, char* argv[])
ppair.good2 = ppair.raw2 * float(ppair.good1)/ppair.raw1;
ppair.good2 = max(ppair.good2, ppair.joint);
-#if 0
+#if 1
cout << "\t"
<< (fmt % ppair.joint % ppair.good1 % ppair.good2
% B->T2->pid2str(B->V2.get(),ppair.p2)
% (float(ppair.joint)/ppair.good1)
% (float(ppair.joint)/ppair.good2)
) << "\n";
- typedef std::map<uint32_t, uint32_t>::const_iterator iter;
- for (iter d = ppair.indoc.begin(); d != ppair.indoc.end(); ++d)
- {
- // if (d != ppair.indoc.begin()) cout << "; ";
- cout << (boost::format("\t\t%4d %s") % d->second
- % B->docid2name(d->first))
- << endl;
- }
+ print_evidence_list(*B, ppair.indoc);
cout << endl;
#else
cout << "\t"