diff options
Diffstat (limited to 'vowpalwabbit/gd.cc')
-rw-r--r-- | vowpalwabbit/gd.cc | 170 |
1 files changed, 85 insertions, 85 deletions
diff --git a/vowpalwabbit/gd.cc b/vowpalwabbit/gd.cc index 816ac171..021c8876 100644 --- a/vowpalwabbit/gd.cc +++ b/vowpalwabbit/gd.cc @@ -43,7 +43,7 @@ namespace GD size_t no_win_counter; size_t early_stop_thres; float initial_constant; - void (*predict)(gd*, learner&, example*); + void (*predict)(gd&, learner&, example&); vw* all; }; @@ -57,12 +57,12 @@ namespace GD }; template <void (*T)(train_data&, float, float&)> - void generic_train(vw& all, example* &ec, float update, bool sqrt_norm) + void generic_train(vw& all, example& ec, float update, bool sqrt_norm) { if (fabs(update) == 0.) return; - float total_weight = ec->example_t; + float total_weight = ec.example_t; if(!all.holdout_set_off) total_weight -= (float)all.sd->weighted_holdout_examples; //exclude weights from test_only examples @@ -124,9 +124,9 @@ float InvSqrt(float x){ } } - void end_pass(gd* g) + void end_pass(gd& g) { - vw* all = g->all; + vw* all = g.all; sync_weights(*all); if(all->span_server != "") { @@ -144,9 +144,9 @@ float InvSqrt(float x){ if(!all->holdout_set_off) { - if(summarize_holdout_set(*all, g->no_win_counter)) + if(summarize_holdout_set(*all, g.no_win_counter)) finalize_regressor(*all, all->final_regressor_name); - if((g->early_stop_thres == g->no_win_counter) && + if((g.early_stop_thres == g.no_win_counter) && ((all->check_holdout_every_n_passes <= 1) || ((all->current_pass % all->check_holdout_every_n_passes) == 0))) all-> early_terminate = true; @@ -258,17 +258,17 @@ void audit_triple(vw& all, feature& f0, audit_data* f0_audit, feature& f1, audit audit_features(all, right_features, audit_right, results, prepend, ns_pre, halfhash + offset); } -void print_features(vw& all, example* &ec) +void print_features(vw& all, example& ec) { weight* weights = all.reg.weight_vector; if (all.lda > 0) { size_t count = 0; - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) - count += ec->audit_features[*i].size() + ec->atomics[*i].size(); - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) - for (audit_data *f = ec->audit_features[*i].begin; f != ec->audit_features[*i].end; f++) + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) + count += ec.audit_features[*i].size() + ec.atomics[*i].size(); + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) + for (audit_data *f = ec.audit_features[*i].begin; f != ec.audit_features[*i].end; f++) { cout << '\t' << f->space << '^' << f->feature << ':' << (f->weight_index/all.reg.stride & all.parse_mask) << ':' << f->x; for (size_t k = 0; k < all.lda; k++) @@ -282,21 +282,21 @@ void print_features(vw& all, example* &ec) string empty; string ns_pre; - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++){ + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++){ ns_pre = ""; - audit_features(all, ec->atomics[*i], ec->audit_features[*i], features, empty, ns_pre, ec->ft_offset); + audit_features(all, ec.atomics[*i], ec.audit_features[*i], features, empty, ns_pre, ec.ft_offset); ns_pre = ""; } for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) { int fst = (*i)[0]; int snd = (*i)[1]; - for (size_t j = 0; j < ec->atomics[fst].size(); j++) + for (size_t j = 0; j < ec.atomics[fst].size(); j++) { audit_data* a = NULL; - if (ec->audit_features[fst].size() > 0) - a = & ec->audit_features[fst][j]; - audit_quad(all, ec->atomics[fst][j], a, ec->atomics[snd], ec->audit_features[snd], features, ns_pre); + if (ec.audit_features[fst].size() > 0) + a = & ec.audit_features[fst][j]; + audit_quad(all, ec.atomics[fst][j], a, ec.atomics[snd], ec.audit_features[snd], features, ns_pre); } } @@ -305,17 +305,17 @@ void print_features(vw& all, example* &ec) int fst = (*i)[0]; int snd = (*i)[1]; int trd = (*i)[2]; - for (size_t j = 0; j < ec->atomics[fst].size(); j++) + for (size_t j = 0; j < ec.atomics[fst].size(); j++) { audit_data* a1 = NULL; - if (ec->audit_features[fst].size() > 0) - a1 = & ec->audit_features[fst][j]; - for (size_t k = 0; k < ec->atomics[snd].size(); k++) + if (ec.audit_features[fst].size() > 0) + a1 = & ec.audit_features[fst][j]; + for (size_t k = 0; k < ec.atomics[snd].size(); k++) { audit_data* a2 = NULL; - if (ec->audit_features[snd].size() > 0) - a2 = & ec->audit_features[snd][k]; - audit_triple(all, ec->atomics[fst][j], a1, ec->atomics[snd][k], a2, ec->atomics[trd], ec->audit_features[trd], features, ns_pre); + if (ec.audit_features[snd].size() > 0) + a2 = & ec.audit_features[snd][k]; + audit_triple(all, ec.atomics[fst][j], a1, ec.atomics[snd][k], a2, ec.atomics[trd], ec.audit_features[trd], features, ns_pre); } } } @@ -329,10 +329,10 @@ void print_features(vw& all, example* &ec) } } -void print_audit_features(vw& all, example* ec) +void print_audit_features(vw& all, example& ec) { if(all.audit) - print_result(all.stdout_fileno,ec->final_prediction,-1,ec->tag); + print_result(all.stdout_fileno,ec.final_prediction,-1,ec.tag); fflush(stdout); print_features(all, ec); } @@ -352,9 +352,9 @@ float finalize_prediction(vw& all, float ret) } template<bool normalized_training, bool reg_mode_odd, bool power_t_half> -void predict(gd* g, learner& base, example* ec) +void predict(gd& g, learner& base, example& ec) { - vw* all = g->all; + vw* all = g.all; if (normalized_training) { if(power_t_half) { @@ -363,27 +363,27 @@ void predict(gd* g, learner& base, example* ec) float gravity = (float)all->sd->gravity; if (all->adaptive) if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<float, vec_add_trunc_rescale<true, 1> >(*all, ec, gravity); + ec.partial_prediction = inline_predict<float, vec_add_trunc_rescale<true, 1> >(*all, ec, gravity); else - ec->partial_prediction = inline_predict<float, vec_add_trunc_rescale<true, 2> >(*all, ec, gravity); + ec.partial_prediction = inline_predict<float, vec_add_trunc_rescale<true, 2> >(*all, ec, gravity); else if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<float, vec_add_trunc_rescale<false, 1> >(*all, ec, gravity); + ec.partial_prediction = inline_predict<float, vec_add_trunc_rescale<false, 1> >(*all, ec, gravity); else - ec->partial_prediction = inline_predict<float, vec_add_trunc_rescale<false, 2> >(*all, ec, gravity); + ec.partial_prediction = inline_predict<float, vec_add_trunc_rescale<false, 2> >(*all, ec, gravity); } else { if (all->adaptive) if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<vec_add_rescale<true, 1> >(*all, ec); + ec.partial_prediction = inline_predict<vec_add_rescale<true, 1> >(*all, ec); else - ec->partial_prediction = inline_predict<vec_add_rescale<true, 2> >(*all, ec); + ec.partial_prediction = inline_predict<vec_add_rescale<true, 2> >(*all, ec); else if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<vec_add_rescale<false, 1> >(*all, ec); + ec.partial_prediction = inline_predict<vec_add_rescale<false, 1> >(*all, ec); else - ec->partial_prediction = inline_predict<vec_add_rescale<false, 2> >(*all, ec); + ec.partial_prediction = inline_predict<vec_add_rescale<false, 2> >(*all, ec); } } else { @@ -392,28 +392,28 @@ void predict(gd* g, learner& base, example* ec) gnp temp = {(float)all->sd->gravity, all->power_t}; if (all->adaptive) if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<true, 1> >(*all, ec, temp); + ec.partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<true, 1> >(*all, ec, temp); else - ec->partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<true, 2> >(*all, ec, temp); + ec.partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<true, 2> >(*all, ec, temp); else if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<false, 1> >(*all, ec, temp); + ec.partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<false, 1> >(*all, ec, temp); else - ec->partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<false, 2> >(*all, ec, temp); + ec.partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<false, 2> >(*all, ec, temp); } else { float power_t = all->power_t; if (all->adaptive) if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<float, vec_add_rescale_general<true, 1> >(*all, ec, power_t); + ec.partial_prediction = inline_predict<float, vec_add_rescale_general<true, 1> >(*all, ec, power_t); else - ec->partial_prediction = inline_predict<float, vec_add_rescale_general<true, 2> >(*all, ec, power_t); + ec.partial_prediction = inline_predict<float, vec_add_rescale_general<true, 2> >(*all, ec, power_t); else if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<float, vec_add_rescale_general<false, 1> >(*all, ec, power_t); + ec.partial_prediction = inline_predict<float, vec_add_rescale_general<false, 1> >(*all, ec, power_t); else - ec->partial_prediction = inline_predict<float, vec_add_rescale_general<false, 2> >(*all, ec, power_t); + ec.partial_prediction = inline_predict<float, vec_add_rescale_general<false, 2> >(*all, ec, power_t); } } } @@ -422,13 +422,13 @@ void predict(gd* g, learner& base, example* ec) if (reg_mode_odd) { float gravity = (float)all->sd->gravity; - ec->partial_prediction = inline_predict<float, vec_add_trunc>(*all, ec, gravity); + ec.partial_prediction = inline_predict<float, vec_add_trunc>(*all, ec, gravity); } else - ec->partial_prediction = inline_predict<vec_add>(*all, ec); + ec.partial_prediction = inline_predict<vec_add>(*all, ec); } - ec->final_prediction = finalize_prediction(*all, ec->partial_prediction * (float)all->sd->contraction); + ec.final_prediction = finalize_prediction(*all, ec.partial_prediction * (float)all->sd->contraction); if (all->audit || all->hash_inv) print_audit_features(*all, ec); @@ -495,10 +495,10 @@ inline void powert_norm_compute(norm_data& nd, float x, float& fw) { } template <void (*T)(norm_data&,float,float&)> -float compute_norm(vw& all, example* &ec) +float compute_norm(vw& all, example& ec) {//We must traverse the features in _precisely_ the same order as during training. - label_data* ld = (label_data*)ec->ld; - float g = all.loss->getSquareGrad(ec->final_prediction, ld->label) * ld->weight; + label_data* ld = (label_data*)ec.ld; + float g = all.loss->getSquareGrad(ec.final_prediction, ld->label) * ld->weight; if (g==0) return 1.; norm_data nd = {g, 0., 0., all.power_t}; @@ -506,7 +506,7 @@ float compute_norm(vw& all, example* &ec) foreach_feature<norm_data,T>(all, ec, nd); if(all.normalized_updates) { - float total_weight = ec->example_t; + float total_weight = ec.example_t; if(!all.holdout_set_off) total_weight -= (float)all.sd->weighted_holdout_examples; //exclude weights from test_only examples @@ -527,13 +527,13 @@ float compute_norm(vw& all, example* &ec) } template<bool adaptive, bool normalized, bool feature_mask_off, size_t normalized_idx, size_t feature_mask_idx> -void local_predict(vw& all, gd& g, example* ec) +void local_predict(vw& all, gd& g, example& ec) { - label_data* ld = (label_data*)ec->ld; + label_data* ld = (label_data*)ec.ld; if(g.active_simulation){ - float k = ec->example_t - ld->weight; - ec->revert_weight = all.loss->getRevertingWeight(all.sd, ec->final_prediction, all.eta/powf(k,all.power_t)); + float k = ec.example_t - ld->weight; + ec.revert_weight = all.loss->getRevertingWeight(all.sd, ec.final_prediction, all.eta/powf(k,all.power_t)); float importance = query_decision(all, ec, k); if(importance > 0){ all.sd->queries += 1; @@ -547,16 +547,16 @@ void local_predict(vw& all, gd& g, example* ec) if(all.active && ld->label != FLT_MAX) t = (float)all.sd->weighted_unlabeled_examples; else - t = (float)(ec->example_t - all.sd->weighted_holdout_examples); + t = (float)(ec.example_t - all.sd->weighted_holdout_examples); - ec->eta_round = 0; + ec.eta_round = 0; if (ld->label != FLT_MAX) - ec->loss = all.loss->getLoss(all.sd, ec->final_prediction, ld->label) * ld->weight; + ec.loss = all.loss->getLoss(all.sd, ec.final_prediction, ld->label) * ld->weight; - if (ld->label != FLT_MAX && !ec->test_only) + if (ld->label != FLT_MAX && !ec.test_only) { - if (all.training && ec->loss > 0.) + if (all.training && ec.loss > 0.) { float eta_t; float norm; @@ -566,22 +566,22 @@ void local_predict(vw& all, gd& g, example* ec) else norm = compute_norm<powert_norm_compute<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx> >(all,ec); else - norm = ec->total_sum_feat_sq; + norm = ec.total_sum_feat_sq; eta_t = all.eta * norm * ld->weight; if(!adaptive && all.power_t != 0) eta_t *= powf(t,-all.power_t); float update = 0.f; if( all.invariant_updates ) - update = all.loss->getUpdate(ec->final_prediction, ld->label, eta_t, norm); + update = all.loss->getUpdate(ec.final_prediction, ld->label, eta_t, norm); else - update = all.loss->getUnsafeUpdate(ec->final_prediction, ld->label, eta_t, norm); + update = all.loss->getUnsafeUpdate(ec.final_prediction, ld->label, eta_t, norm); - ec->eta_round = (float) (update / all.sd->contraction); + ec.eta_round = (float) (update / all.sd->contraction); - if (all.reg_mode && fabs(ec->eta_round) > 1e-8) { - double dev1 = all.loss->first_derivative(all.sd, ec->final_prediction, ld->label); - double eta_bar = (fabs(dev1) > 1e-8) ? (-ec->eta_round / dev1) : 0.0; + if (all.reg_mode && fabs(ec.eta_round) > 1e-8) { + double dev1 = all.loss->first_derivative(all.sd, ec.final_prediction, ld->label); + double eta_bar = (fabs(dev1) > 1e-8) ? (-ec.eta_round / dev1) : 0.0; if (fabs(dev1) > 1e-8) all.sd->contraction *= (1. - all.l2_lambda * eta_bar * norm); //all.sd->contraction /= (1. + all.l2_lambda * eta_bar * norm); @@ -590,23 +590,23 @@ void local_predict(vw& all, gd& g, example* ec) } } else if(all.active) - ec->revert_weight = all.loss->getRevertingWeight(all.sd, ec->final_prediction, all.eta/powf(t,all.power_t)); + ec.revert_weight = all.loss->getRevertingWeight(all.sd, ec.final_prediction, all.eta/powf(t,all.power_t)); } template<bool adaptive, bool normalized, bool feature_mask_off, size_t normalized_idx, size_t feature_mask_idx> -void update(gd* g, learner& base, example* ec) +void update(gd& g, learner& base, example& ec) { - vw* all = g->all; + vw* all = g.all; - local_predict<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx > (*all, *g, ec); + local_predict<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx > (*all, g, ec); - if (ec->eta_round != 0.) + if (ec.eta_round != 0.) { if(all->power_t == 0.5) - generic_train<specialized_update<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx> > (*all,ec,(float)ec->eta_round,true); + generic_train<specialized_update<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx> > (*all,ec,(float)ec.eta_round,true); else - generic_train<general_update<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx> >(*all,ec,(float)ec->eta_round,false); + generic_train<general_update<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx> >(*all,ec,(float)ec.eta_round,false); if (all->sd->contraction < 1e-10) // updating weights now to avoid numerical instability sync_weights(*all); @@ -614,16 +614,16 @@ void update(gd* g, learner& base, example* ec) } template<bool adaptive, bool normalized, bool feature_mask_off, size_t normalized_idx, size_t feature_mask_idx> -void learn(gd* g, learner& base, example* ec) +void learn(gd& g, learner& base, example& ec) { - vw* all = g->all; - label_data* ld = (label_data*)ec->ld; + vw* all = g.all; + label_data* ld = (label_data*)ec.ld; - assert(ec->in_use); + assert(ec.in_use); - g->predict(g,base,ec); + g.predict(g,base,ec); - if ((all->holdout_set_off || !ec->test_only) && ld->weight > 0) + if ((all->holdout_set_off || !ec.test_only) && ld->weight > 0) update<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx>(g,base,ec); } @@ -830,9 +830,9 @@ void save_load_online_state(vw& all, io_buf& model_file, bool read, bool text) while ((!read && i < length) || (read && brw >0)); } -void save_load(gd* g, io_buf& model_file, bool read, bool text) +void save_load(gd& g, io_buf& model_file, bool read, bool text) { - vw* all = g->all; + vw* all = g.all; if(read) { initialize_regressor(*all); @@ -850,8 +850,8 @@ void save_load(gd* g, io_buf& model_file, bool read, bool text) } } - if (g->initial_constant != 0.0) - VW::set_weight(*all, constant, 0, g->initial_constant); + if (g.initial_constant != 0.0) + VW::set_weight(*all, constant, 0, g.initial_constant); } |