Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'vowpalwabbit/gd_mf.cc')
-rw-r--r--vowpalwabbit/gd_mf.cc161
1 files changed, 84 insertions, 77 deletions
diff --git a/vowpalwabbit/gd_mf.cc b/vowpalwabbit/gd_mf.cc
index 1a9a6335..4e91aeba 100644
--- a/vowpalwabbit/gd_mf.cc
+++ b/vowpalwabbit/gd_mf.cc
@@ -26,10 +26,12 @@ using namespace LEARNER;
namespace GDMF {
struct gdmf {
vw* all;
+ uint32_t rank;
};
-void mf_print_offset_features(vw& all, example& ec, size_t offset)
+void mf_print_offset_features(gdmf& d, example& ec, size_t offset)
{
+ vw& all = *d.all;
weight* weights = all.reg.weight_vector;
size_t mask = all.reg.weight_mask;
for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++)
@@ -53,7 +55,7 @@ void mf_print_offset_features(vw& all, example& ec, size_t offset)
if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0)
{
/* print out nsk^feature:hash:value:weight:nsk^feature^:hash:value:weight:prod_weights */
- for (size_t k = 1; k <= all.rank; k++)
+ for (size_t k = 1; k <= d.rank; k++)
{
for (audit_data* f = ec.audit_features[(int)(*i)[0]].begin; f!= ec.audit_features[(int)(*i)[0]].end; f++)
for (audit_data* f2 = ec.audit_features[(int)(*i)[1]].begin; f2!= ec.audit_features[(int)(*i)[1]].end; f2++)
@@ -62,11 +64,11 @@ void mf_print_offset_features(vw& all, example& ec, size_t offset)
<<"(" << ((f->weight_index + offset +k) & mask) << ")" << ':' << f->x;
cout << ':' << weights[(f->weight_index + offset + k) & mask];
- cout << ':' << f2->space << k << '^' << f2->feature << ':' << ((f2->weight_index+k+all.rank)&mask)
- <<"(" << ((f2->weight_index + offset +k+all.rank) & mask) << ")" << ':' << f2->x;
- cout << ':' << weights[(f2->weight_index + offset + k+all.rank) & mask];
+ cout << ':' << f2->space << k << '^' << f2->feature << ':' << ((f2->weight_index+k+d.rank)&mask)
+ <<"(" << ((f2->weight_index + offset +k+d.rank) & mask) << ")" << ':' << f2->x;
+ cout << ':' << weights[(f2->weight_index + offset + k+d.rank) & mask];
- cout << ':' << weights[(f->weight_index + offset + k) & mask] * weights[(f2->weight_index + offset + k + all.rank) & mask];
+ cout << ':' << weights[(f->weight_index + offset + k) & mask] * weights[(f2->weight_index + offset + k + d.rank) & mask];
}
}
}
@@ -77,17 +79,25 @@ void mf_print_offset_features(vw& all, example& ec, size_t offset)
cout << endl;
}
-void mf_print_audit_features(vw& all, example& ec, size_t offset)
+void mf_print_audit_features(gdmf& d, example& ec, size_t offset)
{
- print_result(all.stdout_fileno,ec.pred.scalar,-1,ec.tag);
- mf_print_offset_features(all, ec, offset);
+ print_result(d.all->stdout_fileno,ec.pred.scalar,-1,ec.tag);
+ mf_print_offset_features(d, ec, offset);
}
-float mf_predict(vw& all, example& ec)
+float mf_predict(gdmf& d, example& ec)
{
+ vw& all = *d.all;
label_data& ld = ec.l.simple;
float prediction = ld.initial;
+ for (vector<string>::iterator i = d.all->pairs.begin(); i != d.all->pairs.end();i++)
+ {
+ ec.num_features -= ec.atomics[(int)(*i)[0]].size() * ec.atomics[(int)(*i)[1]].size();
+ ec.num_features += ec.atomics[(int)(*i)[0]].size() * d.rank;
+ ec.num_features += ec.atomics[(int)(*i)[1]].size() * d.rank;
+ }
+
// clear stored predictions
ec.topic_predictions.erase();
@@ -107,18 +117,18 @@ float mf_predict(vw& all, example& ec)
{
if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0)
{
- for (uint32_t k = 1; k <= all.rank; k++)
+ for (uint32_t k = 1; k <= d.rank; k++)
{
// x_l * l^k
- // l^k is from index+1 to index+all.rank
+ // l^k is from index+1 to index+d.rank
//float x_dot_l = sd_offset_add(weights, mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, k);
float x_dot_l = 0.;
GD::foreach_feature<float, GD::vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, x_dot_l, k);
// x_r * r^k
- // r^k is from index+all.rank+1 to index+2*all.rank
- //float x_dot_r = sd_offset_add(weights, mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, k+all.rank);
+ // r^k is from index+d.rank+1 to index+2*d.rank
+ //float x_dot_r = sd_offset_add(weights, mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, k+d.rank);
float x_dot_r = 0.;
- GD::foreach_feature<float,GD::vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, x_dot_r, k+all.rank);
+ GD::foreach_feature<float,GD::vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, x_dot_r, k+d.rank);
prediction += x_dot_l * x_dot_r;
@@ -146,7 +156,7 @@ float mf_predict(vw& all, example& ec)
ec.loss = all.loss->getLoss(all.sd, ec.pred.scalar, ld.label) * ld.weight;
if (all.audit)
- mf_print_audit_features(all, ec, 0);
+ mf_print_audit_features(d, ec, 0);
return ec.pred.scalar;
}
@@ -158,55 +168,55 @@ void sd_offset_update(weight* weights, size_t mask, feature* begin, feature* end
weights[(f->weight_index + offset) & mask] += update * f->x - regularization * weights[(f->weight_index + offset) & mask];
}
-void mf_train(vw& all, example& ec)
-{
- weight* weights = all.reg.weight_vector;
- size_t mask = all.reg.weight_mask;
- label_data& ld = ec.l.simple;
-
- // use final prediction to get update size
+ void mf_train(gdmf& d, example& ec)
+ {
+ vw& all = *d.all;
+ weight* weights = all.reg.weight_vector;
+ size_t mask = all.reg.weight_mask;
+ label_data& ld = ec.l.simple;
+
+ // use final prediction to get update size
// update = eta_t*(y-y_hat) where eta_t = eta/(3*t^p) * importance weight
- float eta_t = all.eta/pow(ec.example_t,all.power_t) / 3.f * ld.weight;
- float update = all.loss->getUpdate(ec.pred.scalar, ld.label, eta_t, 1.); //ec.total_sum_feat_sq);
-
- float regularization = eta_t * all.l2_lambda;
-
- // linear update
- for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++)
- sd_offset_update(weights, mask, ec.atomics[*i].begin, ec.atomics[*i].end, 0, update, regularization);
-
- // quadratic update
- for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++)
- {
- if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0)
- {
-
- // update l^k weights
- for (size_t k = 1; k <= all.rank; k++)
- {
- // r^k \cdot x_r
- float r_dot_x = ec.topic_predictions[2*k];
- // l^k <- l^k + update * (r^k \cdot x_r) * x_l
- sd_offset_update(weights, mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, k, update*r_dot_x, regularization);
- }
-
- // update r^k weights
- for (size_t k = 1; k <= all.rank; k++)
- {
- // l^k \cdot x_l
- float l_dot_x = ec.topic_predictions[2*k-1];
- // r^k <- r^k + update * (l^k \cdot x_l) * x_r
- sd_offset_update(weights, mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, k+all.rank, update*l_dot_x, regularization);
- }
-
- }
- }
- if (all.triples.begin() != all.triples.end()) {
- cerr << "cannot use triples in matrix factorization" << endl;
- throw exception();
- }
-}
-
+ float eta_t = all.eta/pow(ec.example_t,all.power_t) / 3.f * ld.weight;
+ float update = all.loss->getUpdate(ec.pred.scalar, ld.label, eta_t, 1.); //ec.total_sum_feat_sq);
+
+ float regularization = eta_t * all.l2_lambda;
+
+ // linear update
+ for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++)
+ sd_offset_update(weights, mask, ec.atomics[*i].begin, ec.atomics[*i].end, 0, update, regularization);
+
+ // quadratic update
+ for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++)
+ {
+ if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0)
+ {
+
+ // update l^k weights
+ for (size_t k = 1; k <= d.rank; k++)
+ {
+ // r^k \cdot x_r
+ float r_dot_x = ec.topic_predictions[2*k];
+ // l^k <- l^k + update * (r^k \cdot x_r) * x_l
+ sd_offset_update(weights, mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, k, update*r_dot_x, regularization);
+ }
+ // update r^k weights
+ for (size_t k = 1; k <= d.rank; k++)
+ {
+ // l^k \cdot x_l
+ float l_dot_x = ec.topic_predictions[2*k-1];
+ // r^k <- r^k + update * (l^k \cdot x_l) * x_r
+ sd_offset_update(weights, mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, k+d.rank, update*l_dot_x, regularization);
+ }
+
+ }
+ }
+ if (all.triples.begin() != all.triples.end()) {
+ cerr << "cannot use triples in matrix factorization" << endl;
+ throw exception();
+ }
+ }
+
void save_load(gdmf& d, io_buf& model_file, bool read, bool text)
{
vw* all = d.all;
@@ -231,7 +241,7 @@ void mf_train(vw& all, example& ec)
do
{
brw = 0;
- size_t K = all->rank*2+1;
+ size_t K = d.rank*2+1;
text_len = sprintf(buff, "%d ", i);
brw += bin_text_read_write_fixed(model_file,(char *)&i, sizeof (i),
@@ -273,19 +283,16 @@ void mf_train(vw& all, example& ec)
}
void predict(gdmf& d, base_learner& base, example& ec)
- {
- vw* all = d.all;
-
- mf_predict(*all,ec);
+ { mf_predict(d,ec);
}
void learn(gdmf& d, base_learner& base, example& ec)
{
- vw* all = d.all;
+ vw& all = *d.all;
- predict(d, base, ec);
- if (all->training && ec.l.simple.label != FLT_MAX)
- mf_train(*all, ec);
+ mf_predict(d, ec);
+ if (all.training && ec.l.simple.label != FLT_MAX)
+ mf_train(d, ec);
}
base_learner* setup(vw& all, po::variables_map& vm)
@@ -294,16 +301,16 @@ void mf_train(vw& all, example& ec)
opts.add_options()
("rank", po::value<uint32_t>(), "rank for matrix factorization.");
vm = add_options(all, opts);
- if(!vm.count("gdmf"))
+ if(!vm.count("rank"))
return NULL;
- else
- all.rank = vm["gdmf"].as<uint32_t>();
gdmf& data = calloc_or_die<gdmf>();
data.all = &all;
+ data.rank = vm["rank"].as<uint32_t>();
+ *all.file_options << " --rank " << data.rank;
// store linear + 2*rank weights per index, round up to power of two
- float temp = ceilf(logf((float)(all.rank*2+1)) / logf (2.f));
+ float temp = ceilf(logf((float)(data.rank*2+1)) / logf (2.f));
all.reg.stride_shift = (size_t) temp;
all.random_weights = true;