diff options
author | Ulrich Germann <Ulrich.Germann@gmail.com> | 2015-03-30 01:43:50 +0300 |
---|---|---|
committer | Ulrich Germann <Ulrich.Germann@gmail.com> | 2015-03-30 01:43:50 +0300 |
commit | 529a766da7c5ccff67b80ebe18bd2f09a5b4fed0 (patch) | |
tree | 5bc37bf554c750211cca605a408fa6c49100108f | |
parent | 31d55d7936e600572296a7ff49d6b9884cc5d4a5 (diff) |
Initial check-in.
-rw-r--r-- | moses/TranslationModel/UG/mm/ug_sampling_bias.cc | 164 | ||||
-rw-r--r-- | moses/TranslationModel/UG/mm/ug_sampling_bias.h | 83 |
2 files changed, 247 insertions, 0 deletions
diff --git a/moses/TranslationModel/UG/mm/ug_sampling_bias.cc b/moses/TranslationModel/UG/mm/ug_sampling_bias.cc new file mode 100644 index 000000000..d0f03852f --- /dev/null +++ b/moses/TranslationModel/UG/mm/ug_sampling_bias.cc @@ -0,0 +1,164 @@ +#include "ug_sampling_bias.h" +#include <iostream> +#include <boost/foreach.hpp> + +#ifdef HAVE_CURLPP +#include <curlpp/Options.hpp> +#include <curlpp/cURLpp.hpp> +#include <curlpp/Easy.hpp> +#endif + +namespace Moses +{ + namespace bitext + { + using ugdiss::id_type; + + std::string + query_bias_server(std::string const& url, std::string const& text) + { + // communicate with the bias server; resuts will be in ... + std::ostringstream os; + curlpp::Easy myRequest; + std::string query = url+curlpp::escape(text); + myRequest.setOpt(new curlpp::options::Url(query)); + curlpp::options::WriteStream ws(&os); + myRequest.setOpt(ws); // Give it to your request + myRequest.perform(); // This will output to os + return os.str(); + } + + DocumentBias + ::DocumentBias + ( std::vector<id_type> const& sid2doc, + std::map<std::string,id_type> const& docname2docid, + std::string const& server_url, std::string const& text) + : m_sid2docid(sid2doc) + , m_bias(docname2docid.size(), 0) + { + std::string json = query_bias_server(server_url, text); + init_from_json(json, docname2docid); + } + + void + DocumentBias + ::init_from_json + ( std::string const& json, std::map<std::string,id_type> const& docname2docid ) + { // poor man's special purpose json parser for responses from the + // MMT bias server + + std::string d; float total = 0; std::map<std::string,float> bias; + size_t i = 0; while (i < json.size() && json[i] != '"') ++i; + while (++i < json.size()) + { + size_t k = i; while (i < json.size() && json[i] != '"') ++i; + if (i >= json.size()) break; + float& f = bias[json.substr(k,i-k)]; + while (++i < json.size() && json[i] != ':'); + k = ++i; + while (++i < json.size() && json[i] != ',' && json[i] != '}'); + total += (f = atof(json.substr(k, i-k).c_str())); + k = ++i; while (i < json.size() && json[i] != '"') ++i; + } + + typedef std::pair<std::string const,float> item; + if (total) { BOOST_FOREACH(item& x, bias) { x.second /= total; } } + BOOST_FOREACH(item& x, bias) + std::cerr << "CONTEXT SERVER RESPONSE " + << x.first << " " << x.second << std::endl; + + init(bias, docname2docid); + + // using xmlrpc_parse_json didn't always work (parser errors) + // xmlrpc_value* b = xmlrpc_parse_json(env ,buf.str().c_str()); + // std::cerr << "|" << buf.str() << "|" << std::endl; + // // if (b == NULL) std::cerr << "OOpS" << std::endl; + // xmlrpc_c::value_struct v(b); // = *b; + // std::map<std::string, xmlrpc_c::value> const + // bmap = static_cast<map<std::string, xmlrpc_c::value> >(v); + // std::map<std::string, float> bias; + // typedef std::map<std::string, xmlrpc_c::value>::value_type item; + // float total = 0; + // BOOST_FOREACH(item const& x, bmap) + // { + // total += bias[x.first] = xmlrpc_c::value_double(x.second); + // } + // typedef std::map<std::string, float>::value_type fitem; + // BOOST_FOREACH(fitem const& x, bias) + // std::cerr << x.first << " " << x.second/total << std::endl; + // // delete b; + } + + + void + DocumentBias + ::init(std::map<std::string,float> const& biasmap, + std::map<std::string,id_type> const& docname2docid) + { + typedef std::map<std::string, id_type>::value_type doc_record; + float total = 0; + BOOST_FOREACH(doc_record const& d, docname2docid) + { + std::map<std::string, float>::const_iterator m = biasmap.find(d.first); + if (m != biasmap.end()) total += (m_bias[d.second] = m->second); + } + if (total) { BOOST_FOREACH(float& f, m_bias) f /= total; } + BOOST_FOREACH(doc_record const& d, docname2docid) + std::cerr << "BIAS " << d.first << " " << m_bias[d.second] << std::endl; + } + + id_type + DocumentBias + ::GetClass(id_type const idx) const + { + return m_sid2docid.at(idx); + } + + float + DocumentBias + ::operator[](id_type const idx) const + { + UTIL_THROW_IF2(idx >= m_sid2docid.size(), "Out of bounds"); + return m_bias[m_sid2docid[idx]]; + } + + size_t + DocumentBias + ::size() const + { return m_sid2docid.size(); } + + + + SentenceBias + ::SentenceBias(std::vector<float> const& bias) + : m_bias(bias) { } + + SentenceBias + ::SentenceBias(size_t const s) : m_bias(s) { } + + id_type + SentenceBias + ::GetClass(id_type idx) const { return idx; } + + float& + SentenceBias + ::operator[](id_type const idx) + { + UTIL_THROW_IF2(idx >= m_bias.size(), "Out of bounds"); + return m_bias[idx]; + } + + float + SentenceBias + ::operator[](id_type const idx) const + { + UTIL_THROW_IF2(idx >= m_bias.size(), "Out of bounds"); + return m_bias[idx]; + } + + size_t + SentenceBias + ::size() const { return m_bias.size(); } + + } +} diff --git a/moses/TranslationModel/UG/mm/ug_sampling_bias.h b/moses/TranslationModel/UG/mm/ug_sampling_bias.h new file mode 100644 index 000000000..1fa1b3aa9 --- /dev/null +++ b/moses/TranslationModel/UG/mm/ug_sampling_bias.h @@ -0,0 +1,83 @@ +// -*- c++ -*- +#pragma once + +#include <map> +#include<vector> +#include <string> +#include "moses/Util.h" +#include "ug_typedefs.h" + +namespace Moses +{ + namespace bitext + { + using ugdiss::id_type; + + std::string query_bias_server(std::string const& url, std::string const& text); + + class SamplingBias + { + public: + + virtual float + operator[](id_type const ID) const = 0; + // returns (unnormalized bias) for the class of item ID + + virtual size_t size() const = 0; + // number of classes + + virtual id_type + GetClass(id_type const ID) const = 0; + // returns class of item ID + }; + + class + DocumentBias : public SamplingBias + { + std::vector<id_type> const& m_sid2docid; + std::vector<float> m_bias; + + public: + + DocumentBias(std::vector<id_type> const& sid2doc, + std::map<std::string,id_type> const& docname2docid, + std::string const& server_url, std::string const& text); + + void + init_from_json + ( std::string const& json, + std::map<std::string,id_type> const& docname2docid ); + + void + init + ( std::map<std::string,float> const& biasmap, + std::map<std::string,id_type> const& docname2docid); + + id_type + GetClass(id_type const idx) const; + + float + operator[](id_type const idx) const; + + size_t + size() const; + }; + + class + SentenceBias : public SamplingBias + { + std::vector<float> m_bias; + public: + SentenceBias(std::vector<float> const& bias); + SentenceBias(size_t const s); + + id_type GetClass(id_type idx) const; + + float& operator[](id_type const idx); + float operator[](id_type const idx) const; + size_t size() const; + + }; + + } +} |