diff options
Diffstat (limited to 'vowpalwabbit/gd_mf.cc')
-rw-r--r-- | vowpalwabbit/gd_mf.cc | 161 |
1 files changed, 84 insertions, 77 deletions
diff --git a/vowpalwabbit/gd_mf.cc b/vowpalwabbit/gd_mf.cc index 1a9a6335..4e91aeba 100644 --- a/vowpalwabbit/gd_mf.cc +++ b/vowpalwabbit/gd_mf.cc @@ -26,10 +26,12 @@ using namespace LEARNER; namespace GDMF { struct gdmf { vw* all; + uint32_t rank; }; -void mf_print_offset_features(vw& all, example& ec, size_t offset) +void mf_print_offset_features(gdmf& d, example& ec, size_t offset) { + vw& all = *d.all; weight* weights = all.reg.weight_vector; size_t mask = all.reg.weight_mask; for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) @@ -53,7 +55,7 @@ void mf_print_offset_features(vw& all, example& ec, size_t offset) if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0) { /* print out nsk^feature:hash:value:weight:nsk^feature^:hash:value:weight:prod_weights */ - for (size_t k = 1; k <= all.rank; k++) + for (size_t k = 1; k <= d.rank; k++) { for (audit_data* f = ec.audit_features[(int)(*i)[0]].begin; f!= ec.audit_features[(int)(*i)[0]].end; f++) for (audit_data* f2 = ec.audit_features[(int)(*i)[1]].begin; f2!= ec.audit_features[(int)(*i)[1]].end; f2++) @@ -62,11 +64,11 @@ void mf_print_offset_features(vw& all, example& ec, size_t offset) <<"(" << ((f->weight_index + offset +k) & mask) << ")" << ':' << f->x; cout << ':' << weights[(f->weight_index + offset + k) & mask]; - cout << ':' << f2->space << k << '^' << f2->feature << ':' << ((f2->weight_index+k+all.rank)&mask) - <<"(" << ((f2->weight_index + offset +k+all.rank) & mask) << ")" << ':' << f2->x; - cout << ':' << weights[(f2->weight_index + offset + k+all.rank) & mask]; + cout << ':' << f2->space << k << '^' << f2->feature << ':' << ((f2->weight_index+k+d.rank)&mask) + <<"(" << ((f2->weight_index + offset +k+d.rank) & mask) << ")" << ':' << f2->x; + cout << ':' << weights[(f2->weight_index + offset + k+d.rank) & mask]; - cout << ':' << weights[(f->weight_index + offset + k) & mask] * weights[(f2->weight_index + offset + k + all.rank) & mask]; + cout << ':' << weights[(f->weight_index + offset + k) & mask] * weights[(f2->weight_index + offset + k + d.rank) & mask]; } } } @@ -77,17 +79,25 @@ void mf_print_offset_features(vw& all, example& ec, size_t offset) cout << endl; } -void mf_print_audit_features(vw& all, example& ec, size_t offset) +void mf_print_audit_features(gdmf& d, example& ec, size_t offset) { - print_result(all.stdout_fileno,ec.pred.scalar,-1,ec.tag); - mf_print_offset_features(all, ec, offset); + print_result(d.all->stdout_fileno,ec.pred.scalar,-1,ec.tag); + mf_print_offset_features(d, ec, offset); } -float mf_predict(vw& all, example& ec) +float mf_predict(gdmf& d, example& ec) { + vw& all = *d.all; label_data& ld = ec.l.simple; float prediction = ld.initial; + for (vector<string>::iterator i = d.all->pairs.begin(); i != d.all->pairs.end();i++) + { + ec.num_features -= ec.atomics[(int)(*i)[0]].size() * ec.atomics[(int)(*i)[1]].size(); + ec.num_features += ec.atomics[(int)(*i)[0]].size() * d.rank; + ec.num_features += ec.atomics[(int)(*i)[1]].size() * d.rank; + } + // clear stored predictions ec.topic_predictions.erase(); @@ -107,18 +117,18 @@ float mf_predict(vw& all, example& ec) { if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0) { - for (uint32_t k = 1; k <= all.rank; k++) + for (uint32_t k = 1; k <= d.rank; k++) { // x_l * l^k - // l^k is from index+1 to index+all.rank + // l^k is from index+1 to index+d.rank //float x_dot_l = sd_offset_add(weights, mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, k); float x_dot_l = 0.; GD::foreach_feature<float, GD::vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, x_dot_l, k); // x_r * r^k - // r^k is from index+all.rank+1 to index+2*all.rank - //float x_dot_r = sd_offset_add(weights, mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, k+all.rank); + // r^k is from index+d.rank+1 to index+2*d.rank + //float x_dot_r = sd_offset_add(weights, mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, k+d.rank); float x_dot_r = 0.; - GD::foreach_feature<float,GD::vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, x_dot_r, k+all.rank); + GD::foreach_feature<float,GD::vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, x_dot_r, k+d.rank); prediction += x_dot_l * x_dot_r; @@ -146,7 +156,7 @@ float mf_predict(vw& all, example& ec) ec.loss = all.loss->getLoss(all.sd, ec.pred.scalar, ld.label) * ld.weight; if (all.audit) - mf_print_audit_features(all, ec, 0); + mf_print_audit_features(d, ec, 0); return ec.pred.scalar; } @@ -158,55 +168,55 @@ void sd_offset_update(weight* weights, size_t mask, feature* begin, feature* end weights[(f->weight_index + offset) & mask] += update * f->x - regularization * weights[(f->weight_index + offset) & mask]; } -void mf_train(vw& all, example& ec) -{ - weight* weights = all.reg.weight_vector; - size_t mask = all.reg.weight_mask; - label_data& ld = ec.l.simple; - - // use final prediction to get update size + void mf_train(gdmf& d, example& ec) + { + vw& all = *d.all; + weight* weights = all.reg.weight_vector; + size_t mask = all.reg.weight_mask; + label_data& ld = ec.l.simple; + + // use final prediction to get update size // update = eta_t*(y-y_hat) where eta_t = eta/(3*t^p) * importance weight - float eta_t = all.eta/pow(ec.example_t,all.power_t) / 3.f * ld.weight; - float update = all.loss->getUpdate(ec.pred.scalar, ld.label, eta_t, 1.); //ec.total_sum_feat_sq); - - float regularization = eta_t * all.l2_lambda; - - // linear update - for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) - sd_offset_update(weights, mask, ec.atomics[*i].begin, ec.atomics[*i].end, 0, update, regularization); - - // quadratic update - for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) - { - if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0) - { - - // update l^k weights - for (size_t k = 1; k <= all.rank; k++) - { - // r^k \cdot x_r - float r_dot_x = ec.topic_predictions[2*k]; - // l^k <- l^k + update * (r^k \cdot x_r) * x_l - sd_offset_update(weights, mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, k, update*r_dot_x, regularization); - } - - // update r^k weights - for (size_t k = 1; k <= all.rank; k++) - { - // l^k \cdot x_l - float l_dot_x = ec.topic_predictions[2*k-1]; - // r^k <- r^k + update * (l^k \cdot x_l) * x_r - sd_offset_update(weights, mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, k+all.rank, update*l_dot_x, regularization); - } - - } - } - if (all.triples.begin() != all.triples.end()) { - cerr << "cannot use triples in matrix factorization" << endl; - throw exception(); - } -} - + float eta_t = all.eta/pow(ec.example_t,all.power_t) / 3.f * ld.weight; + float update = all.loss->getUpdate(ec.pred.scalar, ld.label, eta_t, 1.); //ec.total_sum_feat_sq); + + float regularization = eta_t * all.l2_lambda; + + // linear update + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) + sd_offset_update(weights, mask, ec.atomics[*i].begin, ec.atomics[*i].end, 0, update, regularization); + + // quadratic update + for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) + { + if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0) + { + + // update l^k weights + for (size_t k = 1; k <= d.rank; k++) + { + // r^k \cdot x_r + float r_dot_x = ec.topic_predictions[2*k]; + // l^k <- l^k + update * (r^k \cdot x_r) * x_l + sd_offset_update(weights, mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, k, update*r_dot_x, regularization); + } + // update r^k weights + for (size_t k = 1; k <= d.rank; k++) + { + // l^k \cdot x_l + float l_dot_x = ec.topic_predictions[2*k-1]; + // r^k <- r^k + update * (l^k \cdot x_l) * x_r + sd_offset_update(weights, mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, k+d.rank, update*l_dot_x, regularization); + } + + } + } + if (all.triples.begin() != all.triples.end()) { + cerr << "cannot use triples in matrix factorization" << endl; + throw exception(); + } + } + void save_load(gdmf& d, io_buf& model_file, bool read, bool text) { vw* all = d.all; @@ -231,7 +241,7 @@ void mf_train(vw& all, example& ec) do { brw = 0; - size_t K = all->rank*2+1; + size_t K = d.rank*2+1; text_len = sprintf(buff, "%d ", i); brw += bin_text_read_write_fixed(model_file,(char *)&i, sizeof (i), @@ -273,19 +283,16 @@ void mf_train(vw& all, example& ec) } void predict(gdmf& d, base_learner& base, example& ec) - { - vw* all = d.all; - - mf_predict(*all,ec); + { mf_predict(d,ec); } void learn(gdmf& d, base_learner& base, example& ec) { - vw* all = d.all; + vw& all = *d.all; - predict(d, base, ec); - if (all->training && ec.l.simple.label != FLT_MAX) - mf_train(*all, ec); + mf_predict(d, ec); + if (all.training && ec.l.simple.label != FLT_MAX) + mf_train(d, ec); } base_learner* setup(vw& all, po::variables_map& vm) @@ -294,16 +301,16 @@ void mf_train(vw& all, example& ec) opts.add_options() ("rank", po::value<uint32_t>(), "rank for matrix factorization."); vm = add_options(all, opts); - if(!vm.count("gdmf")) + if(!vm.count("rank")) return NULL; - else - all.rank = vm["gdmf"].as<uint32_t>(); gdmf& data = calloc_or_die<gdmf>(); data.all = &all; + data.rank = vm["rank"].as<uint32_t>(); + *all.file_options << " --rank " << data.rank; // store linear + 2*rank weights per index, round up to power of two - float temp = ceilf(logf((float)(all.rank*2+1)) / logf (2.f)); + float temp = ceilf(logf((float)(data.rank*2+1)) / logf (2.f)); all.reg.stride_shift = (size_t) temp; all.random_weights = true; |