Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Germann <Ulrich.Germann@gmail.com>2015-03-30 01:43:50 +0300
committerUlrich Germann <Ulrich.Germann@gmail.com>2015-03-30 01:43:50 +0300
commit529a766da7c5ccff67b80ebe18bd2f09a5b4fed0 (patch)
tree5bc37bf554c750211cca605a408fa6c49100108f
parent31d55d7936e600572296a7ff49d6b9884cc5d4a5 (diff)
Initial check-in.
-rw-r--r--moses/TranslationModel/UG/mm/ug_sampling_bias.cc164
-rw-r--r--moses/TranslationModel/UG/mm/ug_sampling_bias.h83
2 files changed, 247 insertions, 0 deletions
diff --git a/moses/TranslationModel/UG/mm/ug_sampling_bias.cc b/moses/TranslationModel/UG/mm/ug_sampling_bias.cc
new file mode 100644
index 000000000..d0f03852f
--- /dev/null
+++ b/moses/TranslationModel/UG/mm/ug_sampling_bias.cc
@@ -0,0 +1,164 @@
+#include "ug_sampling_bias.h"
+#include <iostream>
+#include <boost/foreach.hpp>
+
+#ifdef HAVE_CURLPP
+#include <curlpp/Options.hpp>
+#include <curlpp/cURLpp.hpp>
+#include <curlpp/Easy.hpp>
+#endif
+
+namespace Moses
+{
+ namespace bitext
+ {
+ using ugdiss::id_type;
+
+ std::string
+ query_bias_server(std::string const& url, std::string const& text)
+ {
+ // communicate with the bias server; resuts will be in ...
+ std::ostringstream os;
+ curlpp::Easy myRequest;
+ std::string query = url+curlpp::escape(text);
+ myRequest.setOpt(new curlpp::options::Url(query));
+ curlpp::options::WriteStream ws(&os);
+ myRequest.setOpt(ws); // Give it to your request
+ myRequest.perform(); // This will output to os
+ return os.str();
+ }
+
+ DocumentBias
+ ::DocumentBias
+ ( std::vector<id_type> const& sid2doc,
+ std::map<std::string,id_type> const& docname2docid,
+ std::string const& server_url, std::string const& text)
+ : m_sid2docid(sid2doc)
+ , m_bias(docname2docid.size(), 0)
+ {
+ std::string json = query_bias_server(server_url, text);
+ init_from_json(json, docname2docid);
+ }
+
+ void
+ DocumentBias
+ ::init_from_json
+ ( std::string const& json, std::map<std::string,id_type> const& docname2docid )
+ { // poor man's special purpose json parser for responses from the
+ // MMT bias server
+
+ std::string d; float total = 0; std::map<std::string,float> bias;
+ size_t i = 0; while (i < json.size() && json[i] != '"') ++i;
+ while (++i < json.size())
+ {
+ size_t k = i; while (i < json.size() && json[i] != '"') ++i;
+ if (i >= json.size()) break;
+ float& f = bias[json.substr(k,i-k)];
+ while (++i < json.size() && json[i] != ':');
+ k = ++i;
+ while (++i < json.size() && json[i] != ',' && json[i] != '}');
+ total += (f = atof(json.substr(k, i-k).c_str()));
+ k = ++i; while (i < json.size() && json[i] != '"') ++i;
+ }
+
+ typedef std::pair<std::string const,float> item;
+ if (total) { BOOST_FOREACH(item& x, bias) { x.second /= total; } }
+ BOOST_FOREACH(item& x, bias)
+ std::cerr << "CONTEXT SERVER RESPONSE "
+ << x.first << " " << x.second << std::endl;
+
+ init(bias, docname2docid);
+
+ // using xmlrpc_parse_json didn't always work (parser errors)
+ // xmlrpc_value* b = xmlrpc_parse_json(env ,buf.str().c_str());
+ // std::cerr << "|" << buf.str() << "|" << std::endl;
+ // // if (b == NULL) std::cerr << "OOpS" << std::endl;
+ // xmlrpc_c::value_struct v(b); // = *b;
+ // std::map<std::string, xmlrpc_c::value> const
+ // bmap = static_cast<map<std::string, xmlrpc_c::value> >(v);
+ // std::map<std::string, float> bias;
+ // typedef std::map<std::string, xmlrpc_c::value>::value_type item;
+ // float total = 0;
+ // BOOST_FOREACH(item const& x, bmap)
+ // {
+ // total += bias[x.first] = xmlrpc_c::value_double(x.second);
+ // }
+ // typedef std::map<std::string, float>::value_type fitem;
+ // BOOST_FOREACH(fitem const& x, bias)
+ // std::cerr << x.first << " " << x.second/total << std::endl;
+ // // delete b;
+ }
+
+
+ void
+ DocumentBias
+ ::init(std::map<std::string,float> const& biasmap,
+ std::map<std::string,id_type> const& docname2docid)
+ {
+ typedef std::map<std::string, id_type>::value_type doc_record;
+ float total = 0;
+ BOOST_FOREACH(doc_record const& d, docname2docid)
+ {
+ std::map<std::string, float>::const_iterator m = biasmap.find(d.first);
+ if (m != biasmap.end()) total += (m_bias[d.second] = m->second);
+ }
+ if (total) { BOOST_FOREACH(float& f, m_bias) f /= total; }
+ BOOST_FOREACH(doc_record const& d, docname2docid)
+ std::cerr << "BIAS " << d.first << " " << m_bias[d.second] << std::endl;
+ }
+
+ id_type
+ DocumentBias
+ ::GetClass(id_type const idx) const
+ {
+ return m_sid2docid.at(idx);
+ }
+
+ float
+ DocumentBias
+ ::operator[](id_type const idx) const
+ {
+ UTIL_THROW_IF2(idx >= m_sid2docid.size(), "Out of bounds");
+ return m_bias[m_sid2docid[idx]];
+ }
+
+ size_t
+ DocumentBias
+ ::size() const
+ { return m_sid2docid.size(); }
+
+
+
+ SentenceBias
+ ::SentenceBias(std::vector<float> const& bias)
+ : m_bias(bias) { }
+
+ SentenceBias
+ ::SentenceBias(size_t const s) : m_bias(s) { }
+
+ id_type
+ SentenceBias
+ ::GetClass(id_type idx) const { return idx; }
+
+ float&
+ SentenceBias
+ ::operator[](id_type const idx)
+ {
+ UTIL_THROW_IF2(idx >= m_bias.size(), "Out of bounds");
+ return m_bias[idx];
+ }
+
+ float
+ SentenceBias
+ ::operator[](id_type const idx) const
+ {
+ UTIL_THROW_IF2(idx >= m_bias.size(), "Out of bounds");
+ return m_bias[idx];
+ }
+
+ size_t
+ SentenceBias
+ ::size() const { return m_bias.size(); }
+
+ }
+}
diff --git a/moses/TranslationModel/UG/mm/ug_sampling_bias.h b/moses/TranslationModel/UG/mm/ug_sampling_bias.h
new file mode 100644
index 000000000..1fa1b3aa9
--- /dev/null
+++ b/moses/TranslationModel/UG/mm/ug_sampling_bias.h
@@ -0,0 +1,83 @@
+// -*- c++ -*-
+#pragma once
+
+#include <map>
+#include<vector>
+#include <string>
+#include "moses/Util.h"
+#include "ug_typedefs.h"
+
+namespace Moses
+{
+ namespace bitext
+ {
+ using ugdiss::id_type;
+
+ std::string query_bias_server(std::string const& url, std::string const& text);
+
+ class SamplingBias
+ {
+ public:
+
+ virtual float
+ operator[](id_type const ID) const = 0;
+ // returns (unnormalized bias) for the class of item ID
+
+ virtual size_t size() const = 0;
+ // number of classes
+
+ virtual id_type
+ GetClass(id_type const ID) const = 0;
+ // returns class of item ID
+ };
+
+ class
+ DocumentBias : public SamplingBias
+ {
+ std::vector<id_type> const& m_sid2docid;
+ std::vector<float> m_bias;
+
+ public:
+
+ DocumentBias(std::vector<id_type> const& sid2doc,
+ std::map<std::string,id_type> const& docname2docid,
+ std::string const& server_url, std::string const& text);
+
+ void
+ init_from_json
+ ( std::string const& json,
+ std::map<std::string,id_type> const& docname2docid );
+
+ void
+ init
+ ( std::map<std::string,float> const& biasmap,
+ std::map<std::string,id_type> const& docname2docid);
+
+ id_type
+ GetClass(id_type const idx) const;
+
+ float
+ operator[](id_type const idx) const;
+
+ size_t
+ size() const;
+ };
+
+ class
+ SentenceBias : public SamplingBias
+ {
+ std::vector<float> m_bias;
+ public:
+ SentenceBias(std::vector<float> const& bias);
+ SentenceBias(size_t const s);
+
+ id_type GetClass(id_type idx) const;
+
+ float& operator[](id_type const idx);
+ float operator[](id_type const idx) const;
+ size_t size() const;
+
+ };
+
+ }
+}