diff options
author | Paulius Klyvis <paulius.klyvis@adform.com> | 2014-12-10 16:21:54 +0300 |
---|---|---|
committer | Paulius Klyvis <paulius.klyvis@adform.com> | 2014-12-10 16:21:54 +0300 |
commit | b1d6c08355d3cf7331ad849cbe4740e4b9d13fb2 (patch) | |
tree | 6a376e43532c39bcfd034a27b2955b7b6a757c08 /vowpalwabbit | |
parent | 643abe4893c22ef7bae535a60a53a9b87856ee56 (diff) |
use foreach template
Diffstat (limited to 'vowpalwabbit')
-rw-r--r-- | vowpalwabbit/ftrl_proximal.cc | 146 |
1 files changed, 26 insertions, 120 deletions
diff --git a/vowpalwabbit/ftrl_proximal.cc b/vowpalwabbit/ftrl_proximal.cc index f6d6fa60..36d90daa 100644 --- a/vowpalwabbit/ftrl_proximal.cc +++ b/vowpalwabbit/ftrl_proximal.cc @@ -46,44 +46,32 @@ namespace FTRL { vw* all; // set by initializer - double ftrl_alpha; - double ftrl_beta; + float ftrl_alpha; + float ftrl_beta; // evaluation file pointer FILE* fo; bool progressive_validation; }; - void update_accumulated_state(weight* w, ftrl &b) { + void update_accumulated_state(weight* w, float ftrl_alpha) { double ng2 = w[W_G2] + w[W_GT]*w[W_GT]; - double sigma = (sqrt(ng2) - sqrt(w[W_G2]))/ b.ftrl_alpha; + double sigma = (sqrt(ng2) - sqrt(w[W_G2]))/ ftrl_alpha; w[W_ZT] += w[W_GT] - sigma * w[W_XT]; w[W_G2] = ng2; } - // use in gradient prediction - void quad_grad_update(weight* weights, feature& page_feature, - v_array<feature> &offer_features, size_t mask, float g, ftrl &b) { - size_t halfhash = quadratic_constant * page_feature.weight_index; - float update = g * page_feature.x; - for (feature* ele = offer_features.begin; ele != offer_features.end; ele++) - { - weight* w=&weights[(halfhash + ele->weight_index) & mask]; - w[W_GT] = update * ele->x; - update_accumulated_state(w, b); - } - } - - void cubic_grad_update(weight* weights, feature& f0, feature& f1, - v_array<feature> &cross_features, size_t mask, float g, ftrl &b) { - size_t halfhash = cubic_constant2 * (cubic_constant * f0.weight_index + f1.weight_index); - float update = g * f0.x * f1.x; - for (feature* ele = cross_features.begin; ele != cross_features.end; ele++) { - weight* w=&weights[(halfhash + ele->weight_index) & mask]; - w[W_GT] = update * ele->x; - update_accumulated_state(w, b); - } - } + struct update_data { + float update; + float ftrl_alpha; + }; + + //void update_grad(weight* weights, size_t mask, float loss_grad) + void update_grad(update_data& d, float x, float& wref) { + float* w = &wref; + w[W_GT] = d.update * x; + update_accumulated_state(w, d.ftrl_alpha); + } float ftrl_predict(vw& all, example& ec) { ec.partial_prediction = GD::inline_predict(all, ec); @@ -97,42 +85,13 @@ namespace FTRL { label_data& ld = ec.l.simple; all.set_minmax(all.sd, ld.label); - float loss_grad = all.loss->first_derivative(all.sd, fp, ld.label) * ld.weight; - - size_t mask = all.reg.weight_mask; - weight* weights = all.reg.weight_vector; - for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) { - feature *f = ec.atomics[*i].begin; - for (; f != ec.atomics[*i].end; f++) { - weight* w = &weights[f->weight_index & mask]; - w[W_GT] = loss_grad * f->x; // += -> = - update_accumulated_state(w, b); - } - } - - // bi-gram feature - for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end(); i++) { - if (ec.atomics[(int)(*i)[0]].size() > 0) { - v_array<feature> temp = ec.atomics[(int)(*i)[0]]; - for (; temp.begin != temp.end; temp.begin++) - quad_grad_update(weights, *temp.begin, ec.atomics[(int)(*i)[1]], mask, loss_grad, b); - } - } + struct update_data data; + + data.update = all.loss->first_derivative(all.sd, fp, ld.label) * ld.weight; + data.ftrl_alpha = b.ftrl_alpha; + + GD::foreach_feature<update_data,update_grad>(all, ec, data); - // tri-gram feature - for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) { - if ((ec.atomics[(int)(*i)[0]].size() == 0) - || (ec.atomics[(int)(*i)[1]].size() == 0) - || (ec.atomics[(int)(*i)[2]].size() == 0)) { - continue; - } - v_array<feature> temp1 = ec.atomics[(int)(*i)[0]]; - for (; temp1.begin != temp1.end; temp1.begin++) { - v_array<feature> temp2 = ec.atomics[(int)(*i)[1]]; - for (; temp2.begin != temp2.end; temp2.begin++) - cubic_grad_update(weights, *temp1.begin, *temp2.begin, ec.atomics[(int)(*i)[2]], mask, loss_grad, b); - } - } return fp; } @@ -179,59 +138,6 @@ namespace FTRL { update_weight(*all, a, ec); } - /*void save_load_online_state(vw& all, io_buf& model_file, bool read, bool text) { - char buff[512]; - - int text_len = sprintf(buff, "sum_loss %f\n", all.sd->sum_loss); - bin_text_read_write_fixed(model_file,(char*)&all.sd->sum_loss, sizeof(all.sd->sum_loss), "", read, buff, text_len, text); - - text_len = sprintf(buff, "weighted_examples %f\n", all.sd->weighted_examples); - bin_text_read_write_fixed(model_file,(char*)&all.sd->weighted_examples, sizeof(all.sd->weighted_examples), "", read, buff, text_len, text); - - text_len = sprintf(buff, "weighted_labels %f\n", all.sd->weighted_labels); - bin_text_read_write_fixed(model_file,(char*)&all.sd->weighted_labels, sizeof(all.sd->weighted_labels), "", read, buff, text_len, text); - - text_len = sprintf(buff, "example_number %u\n", (uint32_t)all.sd->example_number); - bin_text_read_write_fixed(model_file,(char*)&all.sd->example_number, sizeof(all.sd->example_number), "", read, buff, text_len, text); - - text_len = sprintf(buff, "total_features %u\n", (uint32_t)all.sd->total_features); - bin_text_read_write_fixed(model_file,(char*)&all.sd->total_features, sizeof(all.sd->total_features), "", read, buff, text_len, text); - - uint32_t length = 1 << all.num_bits; - uint32_t stride = all.reg.stride_shift; - uint32_t i = 0; - size_t brw = 1; - do - { - brw = 1; - weight* v; - if (read) { // read binary - brw = bin_read_fixed(model_file, (char*)&i, sizeof(i),""); - if (brw > 0) { - assert (i< length); - v = &(all.reg.weight_vector[stride*i]); - brw += bin_read_fixed(model_file, (char*)v, 4*sizeof(*v), ""); - } - } - else { // write binary or text - // save w[W_XT], w[W_ZT], w[W_G2] if any of them is not zero - v = &(all.reg.weight_vector[stride*i]); - if (v[W_XT] !=0. || v[W_ZT] !=0. || v[W_G2] !=0.) { - text_len = sprintf(buff, "%d", i); - brw = bin_text_write_fixed(model_file,(char *)&i, sizeof (i), - buff, text_len, text); - - text_len = sprintf(buff, ":%f %f %f %f\n", *v, *(v+1), *(v+2), *(v+3)); - brw += bin_text_write_fixed(model_file, (char *)v, 4*sizeof (*v), - buff, text_len, text); - } // end if - - } // end else - - if (!read) { i++; } - } while ((!read && i < length) || (read && brw >0)); - }*/ - void save_load(ftrl& b, io_buf& model_file, bool read, bool text) { vw* all = b.all; if (read) { @@ -261,7 +167,7 @@ namespace FTRL { //ec.l.simple.prediction = ftrl_predict(*all,ec); ec.pred.scalar = ftrl_predict(*all,ec); } - + learner* setup(vw& all, po::variables_map& vm) { ftrl* b = (ftrl*)calloc_or_die(1, sizeof(ftrl)); @@ -272,18 +178,18 @@ namespace FTRL { po::options_description ftrl_opts("FTRL options"); ftrl_opts.add_options() - ("ftrl_alpha", po::value<double>(&(b->ftrl_alpha)), "Learning rate for FTRL-proximal optimization") - ("ftrl_beta", po::value<double>(&(b->ftrl_beta)), "FTRL beta") + ("ftrl_alpha", po::value<float>(&(b->ftrl_alpha)), "Learning rate for FTRL-proximal optimization") + ("ftrl_beta", po::value<float>(&(b->ftrl_beta)), "FTRL beta") ("progressive_validation", po::value<string>()->default_value("ftrl.evl"), "File to record progressive validation for ftrl-proximal"); vm = add_options(all, ftrl_opts); if (vm.count("ftrl_alpha")) { - b->ftrl_alpha = vm["ftrl_alpha"].as<double>(); + b->ftrl_alpha = vm["ftrl_alpha"].as<float>(); } if (vm.count("ftrl_beta")) { - b->ftrl_beta = vm["ftrl_beta"].as<double>(); + b->ftrl_beta = vm["ftrl_beta"].as<float>(); } all.reg.stride_shift = 2; // NOTE: for more parameter storage |