diff options
34 files changed, 1164 insertions, 1142 deletions
diff --git a/utl/active_interactor.py b/utl/active_interactor.py index cf54b153..d84099d0 100755 --- a/utl/active_interactor.py +++ b/utl/active_interactor.py @@ -83,7 +83,7 @@ try: print 'sending unlabeled examples ...' for i,line in enumerate(unlabeled): sock.sendall(line) - #print 'sending unlabeled '+repr(line[:20]) + print 'sending unlabeled '+repr(line[:20]) response=recvall(sock, 256) #print 'unlabeled response '+repr(response) responselist=response.split(' ') diff --git a/vowpalwabbit/autolink.cc b/vowpalwabbit/autolink.cc index 582b8d52..68781c04 100644 --- a/vowpalwabbit/autolink.cc +++ b/vowpalwabbit/autolink.cc @@ -14,23 +14,23 @@ namespace ALINK { }; template <bool is_learn> - void predict_or_learn(autolink* b, learner& base, example* ec) + void predict_or_learn(autolink& b, learner& base, example& ec) { base.predict(ec); - float base_pred = ec->final_prediction; + float base_pred = ec.final_prediction; // add features of label - ec->indices.push_back(autolink_namespace); + ec.indices.push_back(autolink_namespace); float sum_sq = 0; - for (size_t i = 0; i < b->d; i++) + for (size_t i = 0; i < b.d; i++) if (base_pred != 0.) { - feature f = { base_pred, (uint32_t) (autoconstant + i * b->stride) }; - ec->atomics[autolink_namespace].push_back(f); + feature f = { base_pred, (uint32_t) (autoconstant + i * b.stride) }; + ec.atomics[autolink_namespace].push_back(f); sum_sq += base_pred*base_pred; - base_pred *= ec->final_prediction; + base_pred *= ec.final_prediction; } - ec->total_sum_feat_sq += sum_sq; + ec.total_sum_feat_sq += sum_sq; // apply predict or learn if (is_learn) @@ -38,9 +38,9 @@ namespace ALINK { else base.predict(ec); - ec->atomics[autolink_namespace].erase(); - ec->indices.pop(); - ec->total_sum_feat_sq -= sum_sq; + ec.atomics[autolink_namespace].erase(); + ec.indices.pop(); + ec.total_sum_feat_sq -= sum_sq; } learner* setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::variables_map& vm_file) diff --git a/vowpalwabbit/bfgs.cc b/vowpalwabbit/bfgs.cc index b3c8367a..67c7074e 100644 --- a/vowpalwabbit/bfgs.cc +++ b/vowpalwabbit/bfgs.cc @@ -150,15 +150,15 @@ void reset_state(vw& all, bfgs& b, bool zero) // w[2] = step direction // w[3] = preconditioner -bool test_example(example* ec) +bool test_example(example& ec) { - return ((label_data*)ec->ld)->label == FLT_MAX; + return ((label_data*)ec.ld)->label == FLT_MAX; } - float bfgs_predict(vw& all, example* &ec) + float bfgs_predict(vw& all, example& ec) { - ec->partial_prediction = GD::inline_predict<vec_add>(all,ec); - return GD::finalize_prediction(all, ec->partial_prediction); + ec.partial_prediction = GD::inline_predict<vec_add>(all,ec); + return GD::finalize_prediction(all, ec.partial_prediction); } inline void add_grad(float& d, float f, float& fw) @@ -166,18 +166,18 @@ inline void add_grad(float& d, float f, float& fw) fw += d * f; } -float predict_and_gradient(vw& all, example* &ec) +float predict_and_gradient(vw& all, example &ec) { float fp = bfgs_predict(all, ec); - label_data* ld = (label_data*)ec->ld; + label_data* ld = (label_data*)ec.ld; all.set_minmax(all.sd, ld->label); float loss_grad = all.loss->first_derivative(all.sd, fp,ld->label)*ld->weight; - ec->ft_offset += W_GT; + ec.ft_offset += W_GT; GD::foreach_feature<float,add_grad>(all, ec, loss_grad); - ec->ft_offset -= W_GT; + ec.ft_offset -= W_GT; return fp; } @@ -187,22 +187,22 @@ inline void add_precond(float& d, float f, float& fw) fw += d * f * f; } -void update_preconditioner(vw& all, example* &ec) +void update_preconditioner(vw& all, example& ec) { - label_data* ld = (label_data*)ec->ld; - float curvature = all.loss->second_derivative(all.sd, ec->final_prediction,ld->label) * ld->weight; + label_data* ld = (label_data*)ec.ld; + float curvature = all.loss->second_derivative(all.sd, ec.final_prediction,ld->label) * ld->weight; - ec->ft_offset += W_COND; + ec.ft_offset += W_COND; GD::foreach_feature<float,add_precond>(all, ec, curvature); - ec->ft_offset -= W_COND; + ec.ft_offset -= W_COND; } -float dot_with_direction(vw& all, example* &ec) +float dot_with_direction(vw& all, example& ec) { - ec->ft_offset+= W_DIR; + ec.ft_offset+= W_DIR; float ret = GD::inline_predict<vec_add>(all, ec); - ec->ft_offset-= W_DIR; + ec.ft_offset-= W_DIR; return ret; } @@ -726,10 +726,9 @@ int process_pass(vw& all, bfgs& b) { return status; } -void process_example(vw& all, bfgs& b, example *ec) +void process_example(vw& all, bfgs& b, example& ec) { - - label_data* ld = (label_data*)ec->ld; + label_data* ld = (label_data*)ec.ld; if (b.first_pass) b.importance_weight_sum += ld->weight; @@ -738,10 +737,10 @@ void process_example(vw& all, bfgs& b, example *ec) /********************************************************************/ if (b.gradient_pass) { - ec->final_prediction = predict_and_gradient(all, ec);//w[0] & w[1] - ec->loss = all.loss->getLoss(all.sd, ec->final_prediction, ld->label) * ld->weight; - b.loss_sum += ec->loss; - b.predictions.push_back(ec->final_prediction); + ec.final_prediction = predict_and_gradient(all, ec);//w[0] & w[1] + ec.loss = all.loss->getLoss(all.sd, ec.final_prediction, ld->label) * ld->weight; + b.loss_sum += ec.loss; + b.predictions.push_back(ec.final_prediction); } /********************************************************************/ /* II) CURVATURE CALCULATION ****************************************/ @@ -751,9 +750,9 @@ void process_example(vw& all, bfgs& b, example *ec) float d_dot_x = dot_with_direction(all, ec);//w[2] if (b.example_number >= b.predictions.size())//Make things safe in case example source is strange. b.example_number = b.predictions.size()-1; - ec->final_prediction = b.predictions[b.example_number]; - ec->partial_prediction = b.predictions[b.example_number]; - ec->loss = all.loss->getLoss(all.sd, ec->final_prediction, ld->label) * ld->weight; + ec.final_prediction = b.predictions[b.example_number]; + ec.partial_prediction = b.predictions[b.example_number]; + ec.loss = all.loss->getLoss(all.sd, ec.final_prediction, ld->label) * ld->weight; float sd = all.loss->second_derivative(all.sd, b.predictions[b.example_number++],ld->label); b.curvature += d_dot_x*d_dot_x*sd*ld->weight; } @@ -762,46 +761,46 @@ void process_example(vw& all, bfgs& b, example *ec) update_preconditioner(all, ec);//w[3] } -void end_pass(bfgs* b) +void end_pass(bfgs& b) { - vw* all = b->all; + vw* all = b.all; - if (b->current_pass <= b->final_pass) + if (b.current_pass <= b.final_pass) { - if(b->current_pass < b->final_pass) + if(b.current_pass < b.final_pass) { - int status = process_pass(*all, *b); + int status = process_pass(*all, b); //reaching the max number of passes regardless of convergence - if(b->final_pass == b->current_pass) + if(b.final_pass == b.current_pass) { cerr<<"Maximum number of passes reached. "; - if(!b->output_regularizer) + if(!b.output_regularizer) cerr<<"If you want to optimize further, increase the number of passes\n"; - if(b->output_regularizer) + if(b.output_regularizer) { cerr<<"\nRegular model file has been created. "; cerr<<"Output feature regularizer file is created only when the convergence is reached. Try increasing the number of passes for convergence\n"; - b->output_regularizer = false; + b.output_regularizer = false; } } //attain convergence before reaching max iterations - if (status != LEARN_OK && b->final_pass > b->current_pass) { - b->final_pass = b->current_pass; + if (status != LEARN_OK && b.final_pass > b.current_pass) { + b.final_pass = b.current_pass; } - if (b->output_regularizer && b->final_pass == b->current_pass) { + if (b.output_regularizer && b.final_pass == b.current_pass) { zero_preconditioner(*all); - b->preconditioner_pass = true; + b.preconditioner_pass = true; } if(!all->holdout_set_off) { - if(summarize_holdout_set(*all, b->no_win_counter)) + if(summarize_holdout_set(*all, b.no_win_counter)) finalize_regressor(*all, all->final_regressor_name); - if(b->early_stop_thres == b->no_win_counter) + if(b.early_stop_thres == b.no_win_counter) { all-> early_terminate = true; cerr<<"Early termination reached w.r.t. holdout set error"; @@ -810,47 +809,47 @@ void end_pass(bfgs* b) } }else{//reaching convergence in the previous pass - if(b->output_regularizer) - preconditioner_to_regularizer(*all, *b, (*all).l2_lambda); - b->current_pass ++; + if(b.output_regularizer) + preconditioner_to_regularizer(*all, b, (*all).l2_lambda); + b.current_pass ++; } } } // placeholder -void predict(bfgs* b, learner& base, example* ec) +void predict(bfgs& b, learner& base, example& ec) { - vw* all = b->all; - ec->final_prediction = bfgs_predict(*all,ec); + vw* all = b.all; + ec.final_prediction = bfgs_predict(*all,ec); } -void learn(bfgs* b, learner& base, example* ec) +void learn(bfgs& b, learner& base, example& ec) { - vw* all = b->all; - assert(ec->in_use); + vw* all = b.all; + assert(ec.in_use); - if (b->current_pass <= b->final_pass) + if (b.current_pass <= b.final_pass) { - if(ec->test_only) + if(ec.test_only) { - label_data* ld = (label_data*)ec->ld; + label_data* ld = (label_data*)ec.ld; predict(b, base, ec); - ec->loss = all->loss->getLoss(all->sd, ec->final_prediction, ld->label) * ld->weight; + ec.loss = all->loss->getLoss(all->sd, ec.final_prediction, ld->label) * ld->weight; } else if (test_example(ec)) predict(b, base, ec); else - process_example(*all, *b, ec); + process_example(*all, b, ec); } } -void finish(bfgs* b) +void finish(bfgs& b) { - b->predictions.delete_v(); - free(b->mem); - free(b->rho); - free(b->alpha); + b.predictions.delete_v(); + free(b.mem); + free(b.rho); + free(b.alpha); } void save_load_regularizer(vw& all, bfgs& b, io_buf& model_file, bool read, bool text) @@ -902,9 +901,9 @@ void save_load_regularizer(vw& all, bfgs& b, io_buf& model_file, bool read, bool } -void save_load(bfgs* b, io_buf& model_file, bool read, bool text) +void save_load(bfgs& b, io_buf& model_file, bool read, bool text) { - vw* all = b->all; + vw* all = b.all; uint32_t length = 1 << all->num_bits; @@ -913,8 +912,8 @@ void save_load(bfgs* b, io_buf& model_file, bool read, bool text) initialize_regressor(*all); if (all->per_feature_regularizer_input != "") { - b->regularizers = (weight *)calloc(2*length, sizeof(weight)); - if (b->regularizers == NULL) + b.regularizers = (weight *)calloc(2*length, sizeof(weight)); + if (b.regularizers == NULL) { cerr << all->program_name << ": Failed to allocate regularizers array: try decreasing -b <bits>" << endl; throw exception(); @@ -922,18 +921,18 @@ void save_load(bfgs* b, io_buf& model_file, bool read, bool text) } int m = all->m; - b->mem_stride = (m==0) ? CG_EXTRA : 2*m; - b->mem = (float*) malloc(sizeof(float)*all->length()*(b->mem_stride)); - b->rho = (double*) malloc(sizeof(double)*m); - b->alpha = (double*) malloc(sizeof(double)*m); + b.mem_stride = (m==0) ? CG_EXTRA : 2*m; + b.mem = (float*) malloc(sizeof(float)*all->length()*(b.mem_stride)); + b.rho = (double*) malloc(sizeof(double)*m); + b.alpha = (double*) malloc(sizeof(double)*m); if (!all->quiet) { - fprintf(stderr, "m = %d\nAllocated %luM for weights and mem\n", m, (long unsigned int)all->length()*(sizeof(float)*(b->mem_stride)+sizeof(weight)*all->reg.stride) >> 20); + fprintf(stderr, "m = %d\nAllocated %luM for weights and mem\n", m, (long unsigned int)all->length()*(sizeof(float)*(b.mem_stride)+sizeof(weight)*all->reg.stride) >> 20); } - b->net_time = 0.0; - ftime(&b->t_start_global); + b.net_time = 0.0; + ftime(&b.t_start_global); if (!all->quiet) { @@ -943,14 +942,14 @@ void save_load(bfgs* b, io_buf& model_file, bool read, bool text) cerr.precision(5); } - if (b->regularizers != NULL) + if (b.regularizers != NULL) all->l2_lambda = 1; // To make sure we are adding the regularization - b->output_regularizer = (all->per_feature_regularizer_output != "" || all->per_feature_regularizer_text != ""); - reset_state(*all, *b, false); + b.output_regularizer = (all->per_feature_regularizer_output != "" || all->per_feature_regularizer_text != ""); + reset_state(*all, b, false); } - //bool reg_vector = b->output_regularizer || all->per_feature_regularizer_input.length() > 0; - bool reg_vector = (b->output_regularizer && !read) || (all->per_feature_regularizer_input.length() > 0 && read); + //bool reg_vector = b.output_regularizer || all->per_feature_regularizer_input.length() > 0; + bool reg_vector = (b.output_regularizer && !read) || (all->per_feature_regularizer_input.length() > 0 && read); if (model_file.files.size() > 0) { @@ -961,15 +960,15 @@ void save_load(bfgs* b, io_buf& model_file, bool read, bool text) buff, text_len, text); if (reg_vector) - save_load_regularizer(*all, *b, model_file, read, text); + save_load_regularizer(*all, b, model_file, read, text); else GD::save_load_regressor(*all, model_file, read, text); } } - void init_driver(bfgs* b) + void init_driver(bfgs& b) { - b->backstep_on = true; + b.backstep_on = true; } learner* setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::variables_map& vm_file) diff --git a/vowpalwabbit/binary.cc b/vowpalwabbit/binary.cc index f7f0e8b4..46694b26 100644 --- a/vowpalwabbit/binary.cc +++ b/vowpalwabbit/binary.cc @@ -6,22 +6,22 @@ using namespace LEARNER; namespace BINARY { template <bool is_learn> - void predict_or_learn(void* d, learner& base, example* ec) { + void predict_or_learn(float&, learner& base, example& ec) { if (is_learn) base.learn(ec); else base.predict(ec); - if ( ec->final_prediction > 0) - ec->final_prediction = 1; + if ( ec.final_prediction > 0) + ec.final_prediction = 1; else - ec->final_prediction = -1; + ec.final_prediction = -1; - label_data* ld = (label_data*)ec->ld;//New loss - if (ld->label == ec->final_prediction) - ec->loss = 0.; + label_data* ld = (label_data*)ec.ld;//New loss + if (ld->label == ec.final_prediction) + ec.loss = 0.; else - ec->loss = 1.; + ec.loss = 1.; } learner* setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::variables_map& vm_file) @@ -36,8 +36,8 @@ namespace BINARY { all.sd->binary_label = true; //Create new learner learner* ret = new learner(NULL, all.l); - ret->set_learn<void, predict_or_learn<true> >(); - ret->set_predict<void, predict_or_learn<false> >(); + ret->set_learn<float, predict_or_learn<true> >(); + ret->set_predict<float, predict_or_learn<false> >(); return ret; } } diff --git a/vowpalwabbit/bs.cc b/vowpalwabbit/bs.cc index 482dbdf5..035a5e28 100644 --- a/vowpalwabbit/bs.cc +++ b/vowpalwabbit/bs.cc @@ -62,13 +62,13 @@ namespace BS { return 20.; } - void bs_predict_mean(vw& all, example* ec, vector<double> &pred_vec) + void bs_predict_mean(vw& all, example& ec, vector<double> &pred_vec) { - ec->final_prediction = (float)accumulate(pred_vec.begin(), pred_vec.end(), 0.0)/pred_vec.size(); - ec->loss = all.loss->getLoss(all.sd, ec->final_prediction, ((label_data*)ec->ld)->label) * ((label_data*)ec->ld)->weight; + ec.final_prediction = (float)accumulate(pred_vec.begin(), pred_vec.end(), 0.0)/pred_vec.size(); + ec.loss = all.loss->getLoss(all.sd, ec.final_prediction, ((label_data*)ec.ld)->label) * ((label_data*)ec.ld)->weight; } - void bs_predict_vote(vw& all, example* ec, vector<double> &pred_vec) + void bs_predict_vote(vw& all, example& ec, vector<double> &pred_vec) { //majority vote in linear time unsigned int counter = 0; float current_label = 1.; @@ -92,16 +92,16 @@ namespace BS { } if(counter == 0)//no majority exists { - ec->final_prediction = -1; - ec->loss = 1.; + ec.final_prediction = -1; + ec.loss = 1.; return; } //will output majority if it exists - ec->final_prediction = current_label; - if (ec->final_prediction == ((label_data*)ec->ld)->label) - ec->loss = 0.; + ec.final_prediction = current_label; + if (ec.final_prediction == ((label_data*)ec.ld)->label) + ec.loss = 0.; else - ec->loss = 1.; + ec.loss = 1.; } void print_result(int f, float res, float weight, v_array<char> tag, float lb, float ub) @@ -127,105 +127,105 @@ namespace BS { } } - void output_example(vw& all, bs* d, example* ec) + void output_example(vw& all, bs& d, example& ec) { - label_data* ld = (label_data*)ec->ld; + label_data* ld = (label_data*)ec.ld; - if(ec->test_only) + if(ec.test_only) { all.sd->weighted_holdout_examples += ld->weight;//test weight seen all.sd->weighted_holdout_examples_since_last_dump += ld->weight; all.sd->weighted_holdout_examples_since_last_pass += ld->weight; - all.sd->holdout_sum_loss += ec->loss; - all.sd->holdout_sum_loss_since_last_dump += ec->loss; - all.sd->holdout_sum_loss_since_last_pass += ec->loss;//since last pass + all.sd->holdout_sum_loss += ec.loss; + all.sd->holdout_sum_loss_since_last_dump += ec.loss; + all.sd->holdout_sum_loss_since_last_pass += ec.loss;//since last pass } else { all.sd->weighted_examples += ld->weight; - all.sd->sum_loss += ec->loss; - all.sd->sum_loss_since_last_dump += ec->loss; - all.sd->total_features += ec->num_features; + all.sd->sum_loss += ec.loss; + all.sd->sum_loss_since_last_dump += ec.loss; + all.sd->total_features += ec.num_features; all.sd->example_number++; } if(all.final_prediction_sink.size() != 0)//get confidence interval only when printing out predictions { - d->lb = FLT_MAX; - d->ub = -FLT_MAX; - for (unsigned i = 0; i < d->pred_vec.size(); i++) + d.lb = FLT_MAX; + d.ub = -FLT_MAX; + for (unsigned i = 0; i < d.pred_vec.size(); i++) { - if(d->pred_vec[i] > d->ub) - d->ub = (float)d->pred_vec[i]; - if(d->pred_vec[i] < d->lb) - d->lb = (float)d->pred_vec[i]; + if(d.pred_vec[i] > d.ub) + d.ub = (float)d.pred_vec[i]; + if(d.pred_vec[i] < d.lb) + d.lb = (float)d.pred_vec[i]; } } for (int* sink = all.final_prediction_sink.begin; sink != all.final_prediction_sink.end; sink++) - BS::print_result(*sink, ec->final_prediction, 0, ec->tag, d->lb, d->ub); + BS::print_result(*sink, ec.final_prediction, 0, ec.tag, d.lb, d.ub); print_update(all, ec); } template <bool is_learn> - void predict_or_learn(bs* d, learner& base, example* ec) + void predict_or_learn(bs& d, learner& base, example& ec) { - vw* all = d->all; + vw* all = d.all; bool shouldOutput = all->raw_prediction > 0; - float weight_temp = ((label_data*)ec->ld)->weight; + float weight_temp = ((label_data*)ec.ld)->weight; string outputString; stringstream outputStringStream(outputString); - d->pred_vec.clear(); + d.pred_vec.clear(); - for (size_t i = 1; i <= d->B; i++) + for (size_t i = 1; i <= d.B; i++) { - ((label_data*)ec->ld)->weight = weight_temp * weight_gen(); + ((label_data*)ec.ld)->weight = weight_temp * weight_gen(); if (is_learn) base.learn(ec, i-1); else base.predict(ec, i-1); - d->pred_vec.push_back(ec->final_prediction); + d.pred_vec.push_back(ec.final_prediction); if (shouldOutput) { if (i > 1) outputStringStream << ' '; - outputStringStream << i << ':' << ec->partial_prediction; + outputStringStream << i << ':' << ec.partial_prediction; } } - ((label_data*)ec->ld)->weight = weight_temp; + ((label_data*)ec.ld)->weight = weight_temp; - switch(d->bs_type) + switch(d.bs_type) { case BS_TYPE_MEAN: - bs_predict_mean(*all, ec, d->pred_vec); + bs_predict_mean(*all, ec, d.pred_vec); break; case BS_TYPE_VOTE: - bs_predict_vote(*all, ec, d->pred_vec); + bs_predict_vote(*all, ec, d.pred_vec); break; default: - std::cerr << "Unknown bs_type specified: " << d->bs_type << ". Exiting." << endl; + std::cerr << "Unknown bs_type specified: " << d.bs_type << ". Exiting." << endl; throw exception(); } if (shouldOutput) - all->print_text(all->raw_prediction, outputStringStream.str(), ec->tag); + all->print_text(all->raw_prediction, outputStringStream.str(), ec.tag); } - void finish_example(vw& all, bs* d, example* ec) + void finish_example(vw& all, bs& d, example& ec) { BS::output_example(all, d, ec); - VW::finish_example(all, ec); + VW::finish_example(all, &ec); } - void finish(bs* d) + void finish(bs& d) { - d->pred_vec.~vector(); + d.pred_vec.~vector(); } learner* setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::variables_map& vm_file) diff --git a/vowpalwabbit/cb.cc b/vowpalwabbit/cb.cc index 9d2437ac..a3214ba1 100644 --- a/vowpalwabbit/cb.cc +++ b/vowpalwabbit/cb.cc @@ -223,9 +223,9 @@ namespace CB return NULL; } - void gen_cs_example_ips(vw& all, cb& c, example* ec, CSOAA::label& cs_ld) + void gen_cs_example_ips(vw& all, cb& c, example& ec, CSOAA::label& cs_ld) {//this implements the inverse propensity score method, where cost are importance weighted by the probability of the chosen action - CB::label* ld = (CB::label*)ec->ld; + CB::label* ld = (CB::label*)ec.ld; //generate cost-sensitive example cs_ld.costs.erase(); @@ -282,7 +282,7 @@ namespace CB } template <bool is_learn> - void call_scorer(vw& all, cb& c, example* ec, uint32_t index) + void call_scorer(vw& all, cb& c, example& ec, uint32_t index) { float old_min = all.sd->min_label; //all.sd->min_label = c.min_cost; @@ -297,9 +297,9 @@ namespace CB } template <bool is_learn> - float get_cost_pred(vw& all, cb& c, example* ec, uint32_t index) + float get_cost_pred(vw& all, cb& c, example& ec, uint32_t index) { - CB::label* ld = (CB::label*)ec->ld; + CB::label* ld = (CB::label*)ec.ld; label_data simple_temp; simple_temp.initial = 0.; @@ -314,21 +314,21 @@ namespace CB simple_temp.weight = 0.; } - ec->ld = &simple_temp; + ec.ld = &simple_temp; call_scorer<is_learn>(all, c, ec, index); - ec->ld = ld; + ec.ld = ld; - float cost = ec->final_prediction; + float cost = ec.final_prediction; return cost; } template <bool is_learn> - void gen_cs_example_dm(vw& all, cb& c, example* ec, CSOAA::label& cs_ld) + void gen_cs_example_dm(vw& all, cb& c, example& ec, CSOAA::label& cs_ld) { //this implements the direct estimation method, where costs are directly specified by the learned regressor. - CB::label* ld = (CB::label*)ec->ld; + CB::label* ld = (CB::label*)ec.ld; float min = FLT_MAX; size_t argmin = 1; @@ -393,13 +393,13 @@ namespace CB } } - ec->final_prediction = (float)argmin; + ec.final_prediction = (float)argmin; } template <bool is_learn> - void gen_cs_example_dr(vw& all, cb& c, example* ec, CSOAA::label& cs_ld) + void gen_cs_example_dr(vw& all, cb& c, example& ec, CSOAA::label& cs_ld) {//this implements the doubly robust method - CB::label* ld = (CB::label*)ec->ld; + CB::label* ld = (CB::label*)ec.ld; //generate cost sensitive example cs_ld.costs.erase(); @@ -455,9 +455,9 @@ namespace CB } } - void cb_test_to_cs_test_label(vw& all, example* ec, CSOAA::label& cs_ld) + void cb_test_to_cs_test_label(vw& all, example& ec, CSOAA::label& cs_ld) { - CB::label* ld = (CB::label*)ec->ld; + CB::label* ld = (CB::label*)ec.ld; cs_ld.costs.erase(); if(ld->costs.size() > 0) @@ -479,67 +479,67 @@ namespace CB } template <bool is_learn> - void predict_or_learn(cb* c, learner& base, example* ec) { - vw* all = c->all; - CB::label* ld = (CB::label*)ec->ld; + void predict_or_learn(cb& c, learner& base, example& ec) { + vw* all = c.all; + CB::label* ld = (CB::label*)ec.ld; //check if this is a test example where we just want a prediction if( CB::is_test_label(ld) ) { //if so just query base cost-sensitive learner - cb_test_to_cs_test_label(*all,ec,c->cb_cs_ld); + cb_test_to_cs_test_label(*all,ec,c.cb_cs_ld); - ec->ld = &c->cb_cs_ld; + ec.ld = &c.cb_cs_ld; base.predict(ec); - ec->ld = ld; + ec.ld = ld; for (size_t i=0; i<ld->costs.size(); i++) - ld->costs[i].partial_prediction = c->cb_cs_ld.costs[i].partial_prediction; + ld->costs[i].partial_prediction = c.cb_cs_ld.costs[i].partial_prediction; return; } //now this is a training example - c->known_cost = get_observed_cost(ld); - c->min_cost = min (c->min_cost, c->known_cost->cost); - c->max_cost = max (c->max_cost, c->known_cost->cost); + c.known_cost = get_observed_cost(ld); + c.min_cost = min (c.min_cost, c.known_cost->cost); + c.max_cost = max (c.max_cost, c.known_cost->cost); //generate a cost-sensitive example to update classifiers - switch(c->cb_type) + switch(c.cb_type) { case CB_TYPE_IPS: - gen_cs_example_ips(*all,*c,ec,c->cb_cs_ld); + gen_cs_example_ips(*all,c,ec,c.cb_cs_ld); break; case CB_TYPE_DM: - gen_cs_example_dm<is_learn>(*all,*c,ec,c->cb_cs_ld); + gen_cs_example_dm<is_learn>(*all,c,ec,c.cb_cs_ld); break; case CB_TYPE_DR: - gen_cs_example_dr<is_learn>(*all,*c,ec,c->cb_cs_ld); + gen_cs_example_dr<is_learn>(*all,c,ec,c.cb_cs_ld); break; default: - std::cerr << "Unknown cb_type specified for contextual bandit learning: " << c->cb_type << ". Exiting." << endl; + std::cerr << "Unknown cb_type specified for contextual bandit learning: " << c.cb_type << ". Exiting." << endl; throw exception(); } - if (c->cb_type != CB_TYPE_DM) + if (c.cb_type != CB_TYPE_DM) { - ec->ld = &c->cb_cs_ld; + ec.ld = &c.cb_cs_ld; if (is_learn) base.learn(ec); else base.predict(ec); - ec->ld = ld; + ec.ld = ld; for (size_t i=0; i<ld->costs.size(); i++) - ld->costs[i].partial_prediction = c->cb_cs_ld.costs[i].partial_prediction; + ld->costs[i].partial_prediction = c.cb_cs_ld.costs[i].partial_prediction; } } - void init_driver(cb*) + void init_driver(cb&) { fprintf(stderr, "*estimate* *estimate* avglossreg last pred last correct\n"); } - void print_update(vw& all, cb& c, bool is_test, example *ec) + void print_update(vw& all, cb& c, bool is_test, example& ec) { if (all.sd->weighted_examples >= all.sd->dump_interval && !all.quiet && !all.bfgs) { @@ -565,8 +565,8 @@ namespace CB (long int)all.sd->example_number, all.sd->weighted_examples, label_buf, - (long unsigned int)ec->final_prediction, - (long unsigned int)ec->num_features, + (long unsigned int)ec.final_prediction, + (long unsigned int)ec.num_features, c.avg_loss_regressors, c.last_pred_reg, c.last_correct_cost); @@ -581,8 +581,8 @@ namespace CB (long int)all.sd->example_number, all.sd->weighted_examples, label_buf, - (long unsigned int)ec->final_prediction, - (long unsigned int)ec->num_features, + (long unsigned int)ec.final_prediction, + (long unsigned int)ec.num_features, c.avg_loss_regressors, c.last_pred_reg, c.last_correct_cost); @@ -593,14 +593,14 @@ namespace CB } } - void output_example(vw& all, cb& c, example* ec) + void output_example(vw& all, cb& c, example& ec) { - CB::label* ld = (CB::label*)ec->ld; + CB::label* ld = (CB::label*)ec.ld; float loss = 0.; if (!CB::is_test_label(ld)) {//need to compute exact loss - size_t pred = (size_t)ec->final_prediction; + size_t pred = (size_t)ec.final_prediction; float chosen_loss = FLT_MAX; if( know_all_cost_example(ld) ) { @@ -626,11 +626,11 @@ namespace CB loss = chosen_loss; } - if(ec->test_only) + if(ec.test_only) { - all.sd->weighted_holdout_examples += ec->global_weight;//test weight seen - all.sd->weighted_holdout_examples_since_last_dump += ec->global_weight; - all.sd->weighted_holdout_examples_since_last_pass += ec->global_weight; + all.sd->weighted_holdout_examples += ec.global_weight;//test weight seen + all.sd->weighted_holdout_examples_since_last_dump += ec.global_weight; + all.sd->weighted_holdout_examples_since_last_pass += ec.global_weight; all.sd->holdout_sum_loss += loss; all.sd->holdout_sum_loss_since_last_dump += loss; all.sd->holdout_sum_loss_since_last_pass += loss;//since last pass @@ -640,30 +640,30 @@ namespace CB all.sd->sum_loss += loss; all.sd->sum_loss_since_last_dump += loss; all.sd->weighted_examples += 1.; - all.sd->total_features += ec->num_features; + all.sd->total_features += ec.num_features; all.sd->example_number++; } for (size_t i = 0; i<all.final_prediction_sink.size(); i++) { int f = all.final_prediction_sink[i]; - all.print(f, ec->final_prediction, 0, ec->tag); + all.print(f, ec.final_prediction, 0, ec.tag); } - print_update(all, c, CB::is_test_label((CB::label*)ec->ld), ec); + print_update(all, c, CB::is_test_label((CB::label*)ec.ld), ec); } - void finish(cb* c) + void finish(cb& c) { - c->cb_cs_ld.costs.delete_v(); + c.cb_cs_ld.costs.delete_v(); } - void finish_example(vw& all, cb* c, example* ec) + void finish_example(vw& all, cb& c, example& ec) { - output_example(all, *c, ec); - VW::finish_example(all, ec); + output_example(all, c, ec); + VW::finish_example(all, &ec); } learner* setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::variables_map& vm_file) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index d41975ae..3f62a3ef 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -17,92 +17,92 @@ namespace CBIFY { CB::label cb_label; }; - void do_uniform(cbify* data, example* ec) + void do_uniform(cbify& data, example& ec) { //Draw an action - uint32_t action = (uint32_t)ceil(frand48() * data->k); + uint32_t action = (uint32_t)ceil(frand48() * data.k); - ec->final_prediction = (float)action; + ec.final_prediction = (float)action; } - void do_loss(example* ec) + void do_loss(example& ec) { - OAA::mc_label* ld = (OAA::mc_label*)ec->ld;//New loss + OAA::mc_label* ld = (OAA::mc_label*)ec.ld;//New loss - if (ld->label != ec->final_prediction) - ec->loss = 1.; + if (ld->label != ec.final_prediction) + ec.loss = 1.; else - ec->loss = 0.; + ec.loss = 0.; } template <bool is_learn> - void predict_or_learn_first(cbify* data, learner& base, example* ec) + void predict_or_learn_first(cbify& data, learner& base, example& ec) {//Explore tau times, then act according to optimal. - OAA::mc_label* ld = (OAA::mc_label*)ec->ld; + OAA::mc_label* ld = (OAA::mc_label*)ec.ld; //Use CB to find current prediction for remaining rounds. - if (data->tau > 0) + if (data.tau > 0) { do_uniform(data, ec); do_loss(ec); - data->tau--; + data.tau--; cout << "tau--" << endl; - uint32_t action = (uint32_t)ec->final_prediction; - CB::cb_class l = {ec->loss, action, 1.f / data->k}; - data->cb_label.costs.erase(); - data->cb_label.costs.push_back(l); - ec->ld = &(data->cb_label); + uint32_t action = (uint32_t)ec.final_prediction; + CB::cb_class l = {ec.loss, action, 1.f / data.k}; + data.cb_label.costs.erase(); + data.cb_label.costs.push_back(l); + ec.ld = &(data.cb_label); if (is_learn) base.learn(ec); else base.predict(ec); - ec->final_prediction = (float)action; - ec->loss = l.cost; + ec.final_prediction = (float)action; + ec.loss = l.cost; } else { - data->cb_label.costs.erase(); - ec->ld = &(data->cb_label); + data.cb_label.costs.erase(); + ec.ld = &(data.cb_label); if (is_learn) base.learn(ec); else base.predict(ec); do_loss(ec); } - ec->ld = ld; + ec.ld = ld; } template <bool is_learn> - void predict_or_learn_greedy(cbify* data, learner& base, example* ec) + void predict_or_learn_greedy(cbify& data, learner& base, example& ec) {//Explore uniform random an epsilon fraction of the time. - OAA::mc_label* ld = (OAA::mc_label*)ec->ld; + OAA::mc_label* ld = (OAA::mc_label*)ec.ld; - data->cb_label.costs.erase(); - ec->ld = &(data->cb_label); + data.cb_label.costs.erase(); + ec.ld = &(data.cb_label); base.predict(ec); do_loss(ec); - uint32_t action = (uint32_t)ec->final_prediction; + uint32_t action = (uint32_t)ec.final_prediction; - float base_prob = data->epsilon / data->k; - if (frand48() < 1. - data->epsilon) + float base_prob = data.epsilon / data.k; + if (frand48() < 1. - data.epsilon) { - CB::cb_class l = {ec->loss, action, 1.f - data->epsilon + base_prob}; - data->cb_label.costs.push_back(l); + CB::cb_class l = {ec.loss, action, 1.f - data.epsilon + base_prob}; + data.cb_label.costs.push_back(l); } else { do_uniform(data, ec); do_loss(ec); - action = (uint32_t)ec->final_prediction; - CB::cb_class l = {ec->loss, (uint32_t)ec->final_prediction, base_prob}; - data->cb_label.costs.push_back(l); + action = (uint32_t)ec.final_prediction; + CB::cb_class l = {ec.loss, (uint32_t)ec.final_prediction, base_prob}; + data.cb_label.costs.push_back(l); } if (is_learn) base.learn(ec); - ec->final_prediction = (float)action; - ec->loss = data->cb_label.costs[0].cost; - ec->ld = ld; + ec.final_prediction = (float)action; + ec.loss = data.cb_label.costs[0].cost; + ec.ld = ld; } void learn_bagging(void* d, learner& base, example* ec) @@ -119,10 +119,10 @@ namespace CBIFY { //Use cost sensitive oracle to cover actions to form distribution. } - void finish_example(vw& all, cbify*, example* ec) + void finish_example(vw& all, cbify&, example& ec) { OAA::output_example(all, ec); - VW::finish_example(all, ec); + VW::finish_example(all, &ec); } learner* setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::variables_map& vm_file) diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc index 88ca33d7..0e97d7ac 100644 --- a/vowpalwabbit/csoaa.cc +++ b/vowpalwabbit/csoaa.cc @@ -206,7 +206,7 @@ namespace CSOAA { } } - void print_update(vw& all, bool is_test, example *ec) + void print_update(vw& all, bool is_test, example& ec) { if (all.sd->weighted_examples >= all.sd->dump_interval && !all.quiet && !all.bfgs) { @@ -232,8 +232,8 @@ namespace CSOAA { (long int)all.sd->example_number, all.sd->weighted_examples, label_buf, - (long unsigned int)ec->final_prediction, - (long unsigned int)ec->num_features); + (long unsigned int)ec.final_prediction, + (long unsigned int)ec.num_features); all.sd->weighted_holdout_examples_since_last_dump = 0; all.sd->holdout_sum_loss_since_last_dump = 0.0; @@ -245,8 +245,8 @@ namespace CSOAA { (long int)all.sd->example_number, all.sd->weighted_examples, label_buf, - (long unsigned int)ec->final_prediction, - (long unsigned int)ec->num_features); + (long unsigned int)ec.final_prediction, + (long unsigned int)ec.num_features); all.sd->sum_loss_since_last_dump = 0.0; all.sd->old_weighted_examples = all.sd->weighted_examples; @@ -254,14 +254,14 @@ namespace CSOAA { } } - void output_example(vw& all, example* ec) + void output_example(vw& all, example& ec) { - label* ld = (label*)ec->ld; + label* ld = (label*)ec.ld; float loss = 0.; if (!is_test_label(ld)) {//need to compute exact loss - size_t pred = (size_t)ec->final_prediction; + size_t pred = (size_t)ec.final_prediction; float chosen_loss = FLT_MAX; float min = FLT_MAX; @@ -277,11 +277,11 @@ namespace CSOAA { loss = chosen_loss - min; } - if(ec->test_only) + if(ec.test_only) { - all.sd->weighted_holdout_examples += ec->global_weight;//test weight seen - all.sd->weighted_holdout_examples_since_last_dump += ec->global_weight; - all.sd->weighted_holdout_examples_since_last_pass += ec->global_weight; + all.sd->weighted_holdout_examples += ec.global_weight;//test weight seen + all.sd->weighted_holdout_examples_since_last_dump += ec.global_weight; + all.sd->weighted_holdout_examples_since_last_pass += ec.global_weight; all.sd->holdout_sum_loss += loss; all.sd->holdout_sum_loss_since_last_dump += loss; all.sd->holdout_sum_loss_since_last_pass += loss;//since last pass @@ -289,14 +289,14 @@ namespace CSOAA { else { all.sd->weighted_examples += 1.; - all.sd->total_features += ec->num_features; + all.sd->total_features += ec.num_features; all.sd->sum_loss += loss; all.sd->sum_loss_since_last_dump += loss; all.sd->example_number++; } for (int* sink = all.final_prediction_sink.begin; sink != all.final_prediction_sink.end; sink++) - all.print((int)*sink, ec->final_prediction, 0, ec->tag); + all.print((int)*sink, ec.final_prediction, 0, ec.tag); if (all.raw_prediction > 0) { string outputString; @@ -307,21 +307,21 @@ namespace CSOAA { outputStringStream << cl.weight_index << ':' << cl.partial_prediction; } //outputStringStream << endl; - all.print_text(all.raw_prediction, outputStringStream.str(), ec->tag); + all.print_text(all.raw_prediction, outputStringStream.str(), ec.tag); } - print_update(all, is_test_label((label*)ec->ld), ec); + print_update(all, is_test_label((label*)ec.ld), ec); } template <bool is_learn> - void predict_or_learn(csoaa* c, learner& base, example* ec) { - vw* all = c->all; - label* ld = (label*)ec->ld; + void predict_or_learn(csoaa& c, learner& base, example& ec) { + vw* all = c.all; + label* ld = (label*)ec.ld; size_t prediction = 1; float score = FLT_MAX; label_data simple_temp = { 0., 0., 0. }; - ec->ld = &simple_temp; + ec.ld = &simple_temp; for (wclass *cl = ld->costs.begin; cl != ld->costs.end; cl ++) { uint32_t i = cl->weight_index; @@ -344,21 +344,21 @@ namespace CSOAA { else base.predict(ec, i); - cl->partial_prediction = ec->partial_prediction; - if (ec->partial_prediction < score || (ec->partial_prediction == score && i < prediction)) { - score = ec->partial_prediction; + cl->partial_prediction = ec.partial_prediction; + if (ec.partial_prediction < score || (ec.partial_prediction == score && i < prediction)) { + score = ec.partial_prediction; prediction = i; } - ec->partial_prediction = 0.; + ec.partial_prediction = 0.; } - ec->ld = ld; - ec->final_prediction = (float)prediction; + ec.ld = ld; + ec.final_prediction = (float)prediction; } - void finish_example(vw& all, csoaa*, example* ec) + void finish_example(vw& all, csoaa&, example& ec) { output_example(all, ec); - VW::finish_example(all, ec); + VW::finish_example(all, &ec); } learner* setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::variables_map& vm_file) @@ -391,9 +391,9 @@ namespace CSOAA { return l; } - bool example_is_test(example* ec) + bool example_is_test(example& ec) { - v_array<CSOAA::wclass> costs = ((label*)ec->ld)->costs; + v_array<CSOAA::wclass> costs = ((label*)ec.ld)->costs; if (costs.size() == 0) return true; for (size_t j=0; j<costs.size(); j++) if (costs[j].x != FLT_MAX) return false; @@ -425,20 +425,20 @@ namespace LabelDict { size_t hash_lab(size_t lab) { return 328051 + 94389193 * lab; } - bool ec_is_label_definition(example*ec) // label defs look like "___:-1" + bool ec_is_label_definition(example& ec) // label defs look like "___:-1" { - v_array<CSOAA::wclass> costs = ((CSOAA::label*)ec->ld)->costs; + v_array<CSOAA::wclass> costs = ((CSOAA::label*)ec.ld)->costs; for (size_t j=0; j<costs.size(); j++) if (costs[j].x >= 0.) return false; - if (ec->indices.size() == 0) return false; - if (ec->indices.size() > 2) return false; - if (ec->indices[0] != 'l') return false; + if (ec.indices.size() == 0) return false; + if (ec.indices.size() > 2) return false; + if (ec.indices[0] != 'l') return false; return true; } - bool ec_is_example_header(example*ec) // example headers look like "0:-1" + bool ec_is_example_header(example& ec) // example headers look like "0:-1" { - v_array<CSOAA::wclass> costs = ((CSOAA::label*)ec->ld)->costs; + v_array<CSOAA::wclass> costs = ((CSOAA::label*)ec.ld)->costs; if (costs.size() != 1) return false; if (costs[0].weight_index != 0) return false; if (costs[0].x >= 0) return false; @@ -448,10 +448,10 @@ namespace LabelDict { bool ec_seq_is_label_definition(ldf& l, v_array<example*>ec_seq) { if (l.ec_seq.size() == 0) return false; - bool is_lab = ec_is_label_definition(l.ec_seq[0]); + bool is_lab = ec_is_label_definition(*l.ec_seq[0]); for (size_t i=1; i<l.ec_seq.size(); i++) { - if (is_lab != ec_is_label_definition(l.ec_seq[i])) { - if (!((i == l.ec_seq.size()-1) && (example_is_newline(l.ec_seq[i])))) { + if (is_lab != ec_is_label_definition(*l.ec_seq[i])) { + if (!((i == l.ec_seq.size()-1) && (example_is_newline(*l.ec_seq[i])))) { cerr << "error: mixed label definition and examples in ldf data!" << endl; throw exception(); } @@ -460,77 +460,77 @@ namespace LabelDict { return is_lab; } - void del_example_namespace(example*ec, char ns, v_array<feature> features) { + void del_example_namespace(example& ec, char ns, v_array<feature> features) { size_t numf = features.size(); - ec->num_features -= numf; - - assert (ec->atomics[(size_t)ns].size() >= numf); - if (ec->atomics[(size_t)ns].size() == numf) { // did NOT have ns - assert(ec->indices.size() > 0); - assert(ec->indices[ec->indices.size()-1] == (size_t)ns); - ec->indices.pop(); - ec->total_sum_feat_sq -= ec->sum_feat_sq[(size_t)ns]; - ec->atomics[(size_t)ns].erase(); - ec->sum_feat_sq[(size_t)ns] = 0.; + ec.num_features -= numf; + + assert (ec.atomics[(size_t)ns].size() >= numf); + if (ec.atomics[(size_t)ns].size() == numf) { // did NOT have ns + assert(ec.indices.size() > 0); + assert(ec.indices[ec.indices.size()-1] == (size_t)ns); + ec.indices.pop(); + ec.total_sum_feat_sq -= ec.sum_feat_sq[(size_t)ns]; + ec.atomics[(size_t)ns].erase(); + ec.sum_feat_sq[(size_t)ns] = 0.; } else { // DID have ns for (feature*f=features.begin; f!=features.end; f++) { - ec->sum_feat_sq[(size_t)ns] -= f->x * f->x; - ec->atomics[(size_t)ns].pop(); + ec.sum_feat_sq[(size_t)ns] -= f->x * f->x; + ec.atomics[(size_t)ns].pop(); } } } - void add_example_namespace(example*ec, char ns, v_array<feature> features) { + void add_example_namespace(example& ec, char ns, v_array<feature> features) { bool has_ns = false; - for (size_t i=0; i<ec->indices.size(); i++) { - if (ec->indices[i] == (size_t)ns) { + for (size_t i=0; i<ec.indices.size(); i++) { + if (ec.indices[i] == (size_t)ns) { has_ns = true; break; } } if (has_ns) { - ec->total_sum_feat_sq -= ec->sum_feat_sq[(size_t)ns]; + ec.total_sum_feat_sq -= ec.sum_feat_sq[(size_t)ns]; } else { - ec->indices.push_back((size_t)ns); - ec->sum_feat_sq[(size_t)ns] = 0; + ec.indices.push_back((size_t)ns); + ec.sum_feat_sq[(size_t)ns] = 0; } for (feature*f=features.begin; f!=features.end; f++) { - ec->sum_feat_sq[(size_t)ns] += f->x * f->x; - ec->atomics[(size_t)ns].push_back(*f); + ec.sum_feat_sq[(size_t)ns] += f->x * f->x; + ec.atomics[(size_t)ns].push_back(*f); } - ec->num_features += features.size(); - ec->total_sum_feat_sq += ec->sum_feat_sq[(size_t)ns]; + ec.num_features += features.size(); + ec.total_sum_feat_sq += ec.sum_feat_sq[(size_t)ns]; } - void add_example_namespaces_from_example(example*target, example*source) { - for (unsigned char* idx=source->indices.begin; idx!=source->indices.end; idx++) { + void add_example_namespaces_from_example(example& target, example& source) { + for (unsigned char* idx=source.indices.begin; idx!=source.indices.end; idx++) { if (*idx == constant_namespace) continue; - add_example_namespace(target, (char)*idx, source->atomics[*idx]); + add_example_namespace(target, (char)*idx, source.atomics[*idx]); } } - void del_example_namespaces_from_example(example*target, example*source) { - //for (size_t*idx=source->indices.begin; idx!=source->indices.end; idx++) { - unsigned char* idx = source->indices.end; + void del_example_namespaces_from_example(example& target, example& source) { + //for (size_t*idx=source.indices.begin; idx!=source.indices.end; idx++) { + unsigned char* idx = source.indices.end; idx--; - for (; idx>=source->indices.begin; idx--) { + for (; idx>=source.indices.begin; idx--) { if (*idx == constant_namespace) continue; - del_example_namespace(target, (char)*idx, source->atomics[*idx]); + del_example_namespace(target, (char)*idx, source.atomics[*idx]); } } - void add_example_namespace_from_memory(ldf& l, example*ec, size_t lab) { + void add_example_namespace_from_memory(ldf& l, example& ec, size_t lab) { size_t lab_hash = hash_lab(lab); v_array<feature> features = l.label_features.get(lab, lab_hash); if (features.size() == 0) return; add_example_namespace(ec, 'l', features); } - void del_example_namespace_from_memory(ldf& l, example* ec, size_t lab) { + void del_example_namespace_from_memory(ldf& l, example& ec, size_t lab) { size_t lab_hash = hash_lab(lab); v_array<feature> features = l.label_features.get(lab, lab_hash); if (features.size() == 0) return; @@ -628,8 +628,8 @@ namespace LabelDict { ec->indices.decr(); } - void make_single_prediction(vw& all, ldf& l, learner& base, example*ec, size_t*prediction, float*min_score, float*min_cost, float*max_cost) { - label *ld = (label*)ec->ld; + void make_single_prediction(vw& all, ldf& l, learner& base, example& ec, size_t*prediction, float*min_score, float*min_cost, float*max_cost) { + label *ld = (label*)ec.ld; v_array<CSOAA::wclass> costs = ld->costs; label_data simple_label; @@ -637,25 +637,25 @@ namespace LabelDict { simple_label.initial = 0.; simple_label.label = FLT_MAX; simple_label.weight = 0.; - ec->partial_prediction = 0.; + ec.partial_prediction = 0.; - ec->ld = &simple_label; + ec.ld = &simple_label; base.predict(ec); // make a prediction } else { for (size_t j=0; j<costs.size(); j++) { simple_label.initial = 0.; simple_label.label = FLT_MAX; simple_label.weight = 0.; - ec->partial_prediction = 0.; + ec.partial_prediction = 0.; LabelDict::add_example_namespace_from_memory(l, ec, costs[j].weight_index); - ec->ld = &simple_label; + ec.ld = &simple_label; base.predict(ec); // make a prediction - costs[j].partial_prediction = ec->partial_prediction; + costs[j].partial_prediction = ec.partial_prediction; - if (min_score && prediction && (ec->partial_prediction < *min_score)) { - *min_score = ec->partial_prediction; + if (min_score && prediction && (ec.partial_prediction < *min_score)) { + *min_score = ec.partial_prediction; *prediction = costs[j].weight_index; } @@ -666,7 +666,7 @@ namespace LabelDict { } } - ec->ld = ld; + ec.ld = ld; } @@ -674,23 +674,23 @@ namespace LabelDict { void do_actual_learning_wap(vw& all, ldf& l, learner& base, size_t start_K) { size_t K = l.ec_seq.size(); - bool isTest = CSOAA::example_is_test(l.ec_seq[start_K]); + bool isTest = CSOAA::example_is_test(*l.ec_seq[start_K]); size_t prediction = 0; float min_score = FLT_MAX; for (size_t k=start_K; k<K; k++) { example *ec = l.ec_seq.begin[k]; - if (CSOAA::example_is_test(ec) != isTest) { + if (CSOAA::example_is_test(*ec) != isTest) { isTest = true; cerr << "warning: wap_ldf got mix of train/test data; assuming test" << endl; } - if (LabelDict::ec_is_example_header(l.ec_seq[k])) { + if (LabelDict::ec_is_example_header(*l.ec_seq[k])) { cerr << "warning: example headers at position " << k << ": can only have in initial position!" << endl; throw exception(); } - make_single_prediction(all, l, base, ec, &prediction, &min_score, NULL, NULL); + make_single_prediction(all, l, base, *ec, &prediction, &min_score, NULL, NULL); } // do actual learning @@ -718,7 +718,7 @@ namespace LabelDict { for (size_t j1=0; j1<costs1.size(); j1++) { if (costs1[j1].weight_index == (uint32_t)-1) continue; if (all.training && !isTest) { - LabelDict::add_example_namespace_from_memory(l, ec1, costs1[j1].weight_index); + LabelDict::add_example_namespace_from_memory(l, *ec1, costs1[j1].weight_index); for (size_t k2=k1+1; k2<K; k2++) { example *ec2 = l.ec_seq.begin[k2]; @@ -732,7 +732,7 @@ namespace LabelDict { if (value_diff < 1e-6) continue; - LabelDict::add_example_namespace_from_memory(l, ec2, costs2[j2].weight_index); + LabelDict::add_example_namespace_from_memory(l, *ec2, costs2[j2].weight_index); // learn ec1->example_t = l.csoaa_example_t; @@ -742,15 +742,15 @@ namespace LabelDict { ec1->partial_prediction = 0.; subtract_example(all, ec1, ec2); if (is_learn) - base.learn(ec1); + base.learn(*ec1); else - base.predict(ec1); + base.predict(*ec1); unsubtract_example(all, ec1); - LabelDict::del_example_namespace_from_memory(l, ec2, costs2[j2].weight_index); + LabelDict::del_example_namespace_from_memory(l, *ec2, costs2[j2].weight_index); } } - LabelDict::del_example_namespace_from_memory(l, ec1, costs1[j1].weight_index); + LabelDict::del_example_namespace_from_memory(l, *ec1, costs1[j1].weight_index); } if (prediction == costs1[j1].weight_index) prediction_is_me = true; @@ -766,7 +766,7 @@ namespace LabelDict { { size_t K = l.ec_seq.size(); size_t prediction = 0; - bool isTest = CSOAA::example_is_test(l.ec_seq[start_K]); + bool isTest = CSOAA::example_is_test(*l.ec_seq[start_K]); float min_score = FLT_MAX; float min_cost = FLT_MAX; float max_cost = -FLT_MAX; @@ -775,16 +775,16 @@ namespace LabelDict { for (size_t k=start_K; k<K; k++) { example *ec = l.ec_seq.begin[k]; - if (CSOAA::example_is_test(ec) != isTest) { + if (CSOAA::example_is_test(*ec) != isTest) { isTest = true; cerr << "warning: ldf got mix of train/test data; assuming test" << endl; } - if (LabelDict::ec_is_example_header(l.ec_seq[k])) { + if (LabelDict::ec_is_example_header(*l.ec_seq[k])) { cerr << "warning: example headers at position " << k << ": can only have in initial position!" << endl; throw exception(); } //clog << "msp k=" << k << endl; - make_single_prediction(all, l, base, ec, &prediction, &min_score, &min_cost, &max_cost); + make_single_prediction(all, l, base, *ec, &prediction, &min_score, &min_cost, &max_cost); } // do actual learning @@ -823,12 +823,12 @@ namespace LabelDict { //ec->partial_prediction = costs[j].partial_prediction; //cerr << "[" << ec->partial_prediction << "," << ec->done << "]"; //ec->done = false; - LabelDict::add_example_namespace_from_memory(l, ec, costs[j].weight_index); + LabelDict::add_example_namespace_from_memory(l, *ec, costs[j].weight_index); if (is_learn) - base.learn(ec); + base.learn(*ec); else - base.predict(ec); - LabelDict::del_example_namespace_from_memory(l, ec, costs[j].weight_index); + base.predict(*ec); + LabelDict::del_example_namespace_from_memory(l, *ec, costs[j].weight_index); ec->example_t = example_t; } @@ -873,10 +873,10 @@ namespace LabelDict { /////////////////////// check for headers size_t K = l.ec_seq.size(); size_t start_K = 0; - if (LabelDict::ec_is_example_header(l.ec_seq[0])) { + if (LabelDict::ec_is_example_header(*l.ec_seq[0])) { start_K = 1; for (size_t k=1; k<K; k++) - LabelDict::add_example_namespaces_from_example(l.ec_seq[k], l.ec_seq[0]); + LabelDict::add_example_namespaces_from_example(*l.ec_seq[k], *l.ec_seq[0]); } /////////////////////// learn @@ -886,23 +886,23 @@ namespace LabelDict { /////////////////////// remove header if (start_K > 0) for (size_t k=1; k<K; k++) - LabelDict::del_example_namespaces_from_example(l.ec_seq[k], l.ec_seq[0]); + LabelDict::del_example_namespaces_from_example(*l.ec_seq[k], *l.ec_seq[0]); } - void output_example(vw& all, example* ec, bool&hit_loss) + void output_example(vw& all, example& ec, bool& hit_loss) { - label* ld = (label*)ec->ld; + label* ld = (label*)ec.ld; v_array<CSOAA::wclass> costs = ld->costs; if (example_is_newline(ec)) return; if (LabelDict::ec_is_example_header(ec)) return; if (LabelDict::ec_is_label_definition(ec)) return; - all.sd->total_features += ec->num_features; + all.sd->total_features += ec.num_features; float loss = 0.; - size_t final_pred = (size_t)ec->final_prediction; + size_t final_pred = (size_t)ec.final_prediction; if (!CSOAA::example_is_test(ec)) { for (size_t j=0; j<costs.size(); j++) { @@ -919,7 +919,7 @@ namespace LabelDict { } for (int* sink = all.final_prediction_sink.begin; sink != all.final_prediction_sink.end; sink++) - all.print(*sink, ec->final_prediction, 0, ec->tag); + all.print(*sink, ec.final_prediction, 0, ec.tag); if (all.raw_prediction > 0) { string outputString; @@ -929,7 +929,7 @@ namespace LabelDict { outputStringStream << costs[i].weight_index << ':' << costs[i].partial_prediction; } //outputStringStream << endl; - all.print_text(all.raw_prediction, outputStringStream.str(), ec->tag); + all.print_text(all.raw_prediction, outputStringStream.str(), ec.tag); } @@ -944,7 +944,7 @@ namespace LabelDict { bool hit_loss = false; for (example** ecc=l.ec_seq.begin; ecc!=l.ec_seq.end; ecc++) - output_example(all, *ecc, hit_loss); + output_example(all, **ecc, hit_loss); if (!l.is_singleline && (all.raw_prediction > 0)) all.print_text(all.raw_prediction, "", l.ec_seq[0]->tag); @@ -960,9 +960,9 @@ namespace LabelDict { l.ec_seq.erase(); } - void end_pass(ldf* l) + void end_pass(ldf& l) { - l->first_pass = false; + l.first_pass = false; } /* @@ -1021,7 +1021,7 @@ namespace LabelDict { } */ - void finish_singleline_example(vw& all, ldf*, example* ec) + void finish_singleline_example(vw& all, ldf&, example& ec) { if (! LabelDict::ec_is_label_definition(ec)) { all.sd->weighted_examples += 1; @@ -1029,73 +1029,73 @@ namespace LabelDict { } bool hit_loss = false; output_example(all, ec, hit_loss); - VW::finish_example(all, ec); + VW::finish_example(all, &ec); } - void finish_multiline_example(vw& all, ldf* l, example* ec) + void finish_multiline_example(vw& all, ldf& l, example& ec) { - if (l->need_to_clear) { - if (l->ec_seq.size() > 0) { - output_example_seq(all, *l); + if (l.need_to_clear) { + if (l.ec_seq.size() > 0) { + output_example_seq(all, l); global_print_newline(all); } - clear_seq_and_finish_examples(all, *l); - l->need_to_clear = false; - if (ec->in_use) VW::finish_example(all, ec); + clear_seq_and_finish_examples(all, l); + l.need_to_clear = false; + if (ec.in_use) VW::finish_example(all, &ec); } } - void end_examples(ldf* l) + void end_examples(ldf& l) { - if (l->need_to_clear) - l->ec_seq.erase(); + if (l.need_to_clear) + l.ec_seq.erase(); } - void finish(ldf* l) + void finish(ldf& l) { //vw* all = l->all; - l->ec_seq.delete_v(); - LabelDict::free_label_features(*l); + l.ec_seq.delete_v(); + LabelDict::free_label_features(l); } template <bool is_learn> - void predict_or_learn(ldf* l, learner& base, example *ec) { - vw* all = l->all; - l->base = &base; + void predict_or_learn(ldf& l, learner& base, example &ec) { + vw* all = l.all; + l.base = &base; bool is_test = CSOAA::example_is_test(ec) || !all->training; if (is_test) - make_single_prediction(*all, *l, base, ec, NULL, NULL, NULL, NULL); + make_single_prediction(*all, l, base, ec, NULL, NULL, NULL, NULL); - bool need_to_break = l->ec_seq.size() >= all->p->ring_size - 2; + bool need_to_break = l.ec_seq.size() >= all->p->ring_size - 2; - if (l->is_singleline) + if (l.is_singleline) assert(is_test); else if (example_is_newline(ec) || need_to_break) { - if (need_to_break && l->first_pass) - cerr << "warning: length of sequence at " << ec->example_counter << " exceeds ring size; breaking apart" << endl; + if (need_to_break && l.first_pass) + cerr << "warning: length of sequence at " << ec.example_counter << " exceeds ring size; breaking apart" << endl; - do_actual_learning<is_learn>(*all, *l, base); - l->need_to_clear = true; + do_actual_learning<is_learn>(*all, l, base); + l.need_to_clear = true; } else if (LabelDict::ec_is_label_definition(ec)) { - if (l->ec_seq.size() > 0) { + if (l.ec_seq.size() > 0) { cerr << "error: label definition encountered in data block" << endl; throw exception(); } if (! is_test) { - l->ec_seq.push_back(ec); - do_actual_learning<is_learn>(*all, *l, base); - l->need_to_clear = true; + l.ec_seq.push_back(&ec); + do_actual_learning<is_learn>(*all, l, base); + l.need_to_clear = true; } } else { - if (l->need_to_clear) { // should only happen if we're NOT driving - l->ec_seq.erase(); - l->need_to_clear = false; + if (l.need_to_clear) { // should only happen if we're NOT driving + l.ec_seq.erase(); + l.need_to_clear = false; } - l->ec_seq.push_back(ec); + l.ec_seq.push_back(&ec); } } diff --git a/vowpalwabbit/csoaa.h b/vowpalwabbit/csoaa.h index 4978ec46..a3ecebba 100644 --- a/vowpalwabbit/csoaa.h +++ b/vowpalwabbit/csoaa.h @@ -30,7 +30,7 @@ namespace CSOAA { LEARNER::learner* setup(vw& all, std::vector<std::string>&, po::variables_map& vm, po::variables_map& vm_file); - void output_example(vw& all, example* ec); + void output_example(vw& all, example& ec); size_t read_cached_label(shared_data* sd, void* v, io_buf& cache); void cache_label(void* v, io_buf& cache); void default_label(void* v); @@ -45,7 +45,7 @@ namespace CSOAA { copy_label, sizeof(label)}; - bool example_is_test(example* ec); + bool example_is_test(example& ec); } namespace CSOAA_AND_WAP_LDF { @@ -53,7 +53,7 @@ namespace CSOAA_AND_WAP_LDF { LEARNER::learner* setup(vw& all, std::vector<std::string>&, po::variables_map& vm, po::variables_map& vm_file); void global_print_newline(vw& all); - void output_example(vw& all, example* ec, bool&hit_loss); + void output_example(vw& all, example& ec, bool& hit_loss); const label_parser cs_label_parser = CSOAA::cs_label_parser; } diff --git a/vowpalwabbit/ect.cc b/vowpalwabbit/ect.cc index 94576f41..3fd7c385 100644 --- a/vowpalwabbit/ect.cc +++ b/vowpalwabbit/ect.cc @@ -186,7 +186,7 @@ namespace ECT return e.last_pair + (eliminations-1); } - float ect_predict(vw& all, ect& e, learner& base, example* ec) + float ect_predict(vw& all, ect& e, learner& base, example& ec) { if (e.k == (size_t)1) return 1; @@ -195,7 +195,7 @@ namespace ECT //Binary final elimination tournament first label_data simple_temp = {FLT_MAX, 0., 0.}; - ec->ld = & simple_temp; + ec.ld = & simple_temp; for (size_t i = e.tree_height-1; i != (size_t)0 -1; i--) { @@ -205,7 +205,7 @@ namespace ECT base.learn(ec, problem_number); - float pred = ec->final_prediction; + float pred = ec.final_prediction; if (pred > 0.) finals_winner = finals_winner | (((size_t)1) << i); } @@ -216,7 +216,7 @@ namespace ECT { base.learn(ec, id - e.k); - if (ec->final_prediction > 0.) + if (ec.final_prediction > 0.) id = e.directions[id].right; else id = e.directions[id].left; @@ -232,11 +232,11 @@ namespace ECT return false; } - void ect_train(vw& all, ect& e, learner& base, example* ec) + void ect_train(vw& all, ect& e, learner& base, example& ec) { if (e.k == 1)//nothing to do return; - OAA::mc_label * mc = (OAA::mc_label*)ec->ld; + OAA::mc_label * mc = (OAA::mc_label*)ec.ld; label_data simple_temp = {1.,mc->weight,0.}; @@ -252,12 +252,12 @@ namespace ECT simple_temp.label = 1; simple_temp.weight = mc->weight; - ec->ld = &simple_temp; + ec.ld = &simple_temp; base.learn(ec, id-e.k); simple_temp.weight = 0.; base.learn(ec, id-e.k);//inefficient, we should extract final prediction exactly. - float pred = ec->final_prediction; + float pred = ec.final_prediction; bool won = pred*simple_temp.label > 0; @@ -305,13 +305,13 @@ namespace ECT label = 1; simple_temp.label = label; simple_temp.weight = (float)(1 << (e.tree_height -i -1)); - ec->ld = & simple_temp; + ec.ld = & simple_temp; uint32_t problem_number = e.last_pair + j*(1 << (i+1)) + (1 << i) -1; base.learn(ec, problem_number); - float pred = ec->final_prediction; + float pred = ec.final_prediction; if (pred > 0.) e.tournaments_won[j] = right; else @@ -324,53 +324,53 @@ namespace ECT } } - void predict(ect* e, learner& base, example* ec) { - vw* all = e->all; + void predict(ect& e, learner& base, example& ec) { + vw* all = e.all; - OAA::mc_label* mc = (OAA::mc_label*)ec->ld; - if (mc->label == 0 || (mc->label > e->k && mc->label != (uint32_t)-1)) - cout << "label " << mc->label << " is not in {1,"<< e->k << "} This won't work right." << endl; - ec->final_prediction = ect_predict(*all, *e, base, ec); - ec->ld = mc; + OAA::mc_label* mc = (OAA::mc_label*)ec.ld; + if (mc->label == 0 || (mc->label > e.k && mc->label != (uint32_t)-1)) + cout << "label " << mc->label << " is not in {1,"<< e.k << "} This won't work right." << endl; + ec.final_prediction = ect_predict(*all, e, base, ec); + ec.ld = mc; } - void learn(ect* e, learner& base, example* ec) + void learn(ect& e, learner& base, example& ec) { - vw* all = e->all; + vw* all = e.all; - OAA::mc_label* mc = (OAA::mc_label*)ec->ld; + OAA::mc_label* mc = (OAA::mc_label*)ec.ld; predict(e, base, ec); - float new_label = ec->final_prediction; + float new_label = ec.final_prediction; if (mc->label != (uint32_t)-1 && all->training) - ect_train(*all, *e, base, ec); - ec->ld = mc; - ec->final_prediction = new_label; + ect_train(*all, e, base, ec); + ec.ld = mc; + ec.final_prediction = new_label; } - void finish(ect* e) + void finish(ect& e) { - for (size_t l = 0; l < e->all_levels.size(); l++) + for (size_t l = 0; l < e.all_levels.size(); l++) { - for (size_t t = 0; t < e->all_levels[l].size(); t++) - e->all_levels[l][t].delete_v(); - e->all_levels[l].delete_v(); + for (size_t t = 0; t < e.all_levels[l].size(); t++) + e.all_levels[l][t].delete_v(); + e.all_levels[l].delete_v(); } - e->final_nodes.delete_v(); + e.final_nodes.delete_v(); - e->up_directions.delete_v(); + e.up_directions.delete_v(); - e->directions.delete_v(); + e.directions.delete_v(); - e->down_directions.delete_v(); + e.down_directions.delete_v(); - e->tournaments_won.delete_v(); + e.tournaments_won.delete_v(); } - void finish_example(vw& all, ect*, example* ec) + void finish_example(vw& all, ect&, example& ec) { OAA::output_example(all, ec); - VW::finish_example(all, ec); + VW::finish_example(all, &ec); } learner* setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::variables_map& vm_file) diff --git a/vowpalwabbit/example.cc b/vowpalwabbit/example.cc index d38cc78e..293b79fb 100644 --- a/vowpalwabbit/example.cc +++ b/vowpalwabbit/example.cc @@ -68,7 +68,7 @@ flat_example* flatten_example(vw& all, example *ec) features_and_source fs; fs.base = all.reg.weight_vector; - GD::foreach_feature<features_and_source, vec_store>(all, ec, fs); + GD::foreach_feature<features_and_source, vec_store>(all, *ec, fs); qsort(fs.feature_map.begin, fs.feature_map.size(), sizeof(feature), compare_feature); fec->feature_map_len = fs.feature_map.size(); diff --git a/vowpalwabbit/example.h b/vowpalwabbit/example.h index e459e8cd..f98503e7 100644 --- a/vowpalwabbit/example.h +++ b/vowpalwabbit/example.h @@ -92,12 +92,12 @@ void free_flatten_example(flat_example* fec); example *alloc_examples(size_t,size_t); void dealloc_example(void(*delete_label)(void*), example&); -inline int example_is_newline(example* ec) +inline int example_is_newline(example& ec) { // if only index is constant namespace or no index - return ((ec->indices.size() == 0) || - ((ec->indices.size() == 1) && - (ec->indices.last() == constant_namespace))); + return ((ec.indices.size() == 0) || + ((ec.indices.size() == 1) && + (ec.indices.last() == constant_namespace))); } #endif diff --git a/vowpalwabbit/gd.cc b/vowpalwabbit/gd.cc index 816ac171..021c8876 100644 --- a/vowpalwabbit/gd.cc +++ b/vowpalwabbit/gd.cc @@ -43,7 +43,7 @@ namespace GD size_t no_win_counter; size_t early_stop_thres; float initial_constant; - void (*predict)(gd*, learner&, example*); + void (*predict)(gd&, learner&, example&); vw* all; }; @@ -57,12 +57,12 @@ namespace GD }; template <void (*T)(train_data&, float, float&)> - void generic_train(vw& all, example* &ec, float update, bool sqrt_norm) + void generic_train(vw& all, example& ec, float update, bool sqrt_norm) { if (fabs(update) == 0.) return; - float total_weight = ec->example_t; + float total_weight = ec.example_t; if(!all.holdout_set_off) total_weight -= (float)all.sd->weighted_holdout_examples; //exclude weights from test_only examples @@ -124,9 +124,9 @@ float InvSqrt(float x){ } } - void end_pass(gd* g) + void end_pass(gd& g) { - vw* all = g->all; + vw* all = g.all; sync_weights(*all); if(all->span_server != "") { @@ -144,9 +144,9 @@ float InvSqrt(float x){ if(!all->holdout_set_off) { - if(summarize_holdout_set(*all, g->no_win_counter)) + if(summarize_holdout_set(*all, g.no_win_counter)) finalize_regressor(*all, all->final_regressor_name); - if((g->early_stop_thres == g->no_win_counter) && + if((g.early_stop_thres == g.no_win_counter) && ((all->check_holdout_every_n_passes <= 1) || ((all->current_pass % all->check_holdout_every_n_passes) == 0))) all-> early_terminate = true; @@ -258,17 +258,17 @@ void audit_triple(vw& all, feature& f0, audit_data* f0_audit, feature& f1, audit audit_features(all, right_features, audit_right, results, prepend, ns_pre, halfhash + offset); } -void print_features(vw& all, example* &ec) +void print_features(vw& all, example& ec) { weight* weights = all.reg.weight_vector; if (all.lda > 0) { size_t count = 0; - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) - count += ec->audit_features[*i].size() + ec->atomics[*i].size(); - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) - for (audit_data *f = ec->audit_features[*i].begin; f != ec->audit_features[*i].end; f++) + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) + count += ec.audit_features[*i].size() + ec.atomics[*i].size(); + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) + for (audit_data *f = ec.audit_features[*i].begin; f != ec.audit_features[*i].end; f++) { cout << '\t' << f->space << '^' << f->feature << ':' << (f->weight_index/all.reg.stride & all.parse_mask) << ':' << f->x; for (size_t k = 0; k < all.lda; k++) @@ -282,21 +282,21 @@ void print_features(vw& all, example* &ec) string empty; string ns_pre; - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++){ + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++){ ns_pre = ""; - audit_features(all, ec->atomics[*i], ec->audit_features[*i], features, empty, ns_pre, ec->ft_offset); + audit_features(all, ec.atomics[*i], ec.audit_features[*i], features, empty, ns_pre, ec.ft_offset); ns_pre = ""; } for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) { int fst = (*i)[0]; int snd = (*i)[1]; - for (size_t j = 0; j < ec->atomics[fst].size(); j++) + for (size_t j = 0; j < ec.atomics[fst].size(); j++) { audit_data* a = NULL; - if (ec->audit_features[fst].size() > 0) - a = & ec->audit_features[fst][j]; - audit_quad(all, ec->atomics[fst][j], a, ec->atomics[snd], ec->audit_features[snd], features, ns_pre); + if (ec.audit_features[fst].size() > 0) + a = & ec.audit_features[fst][j]; + audit_quad(all, ec.atomics[fst][j], a, ec.atomics[snd], ec.audit_features[snd], features, ns_pre); } } @@ -305,17 +305,17 @@ void print_features(vw& all, example* &ec) int fst = (*i)[0]; int snd = (*i)[1]; int trd = (*i)[2]; - for (size_t j = 0; j < ec->atomics[fst].size(); j++) + for (size_t j = 0; j < ec.atomics[fst].size(); j++) { audit_data* a1 = NULL; - if (ec->audit_features[fst].size() > 0) - a1 = & ec->audit_features[fst][j]; - for (size_t k = 0; k < ec->atomics[snd].size(); k++) + if (ec.audit_features[fst].size() > 0) + a1 = & ec.audit_features[fst][j]; + for (size_t k = 0; k < ec.atomics[snd].size(); k++) { audit_data* a2 = NULL; - if (ec->audit_features[snd].size() > 0) - a2 = & ec->audit_features[snd][k]; - audit_triple(all, ec->atomics[fst][j], a1, ec->atomics[snd][k], a2, ec->atomics[trd], ec->audit_features[trd], features, ns_pre); + if (ec.audit_features[snd].size() > 0) + a2 = & ec.audit_features[snd][k]; + audit_triple(all, ec.atomics[fst][j], a1, ec.atomics[snd][k], a2, ec.atomics[trd], ec.audit_features[trd], features, ns_pre); } } } @@ -329,10 +329,10 @@ void print_features(vw& all, example* &ec) } } -void print_audit_features(vw& all, example* ec) +void print_audit_features(vw& all, example& ec) { if(all.audit) - print_result(all.stdout_fileno,ec->final_prediction,-1,ec->tag); + print_result(all.stdout_fileno,ec.final_prediction,-1,ec.tag); fflush(stdout); print_features(all, ec); } @@ -352,9 +352,9 @@ float finalize_prediction(vw& all, float ret) } template<bool normalized_training, bool reg_mode_odd, bool power_t_half> -void predict(gd* g, learner& base, example* ec) +void predict(gd& g, learner& base, example& ec) { - vw* all = g->all; + vw* all = g.all; if (normalized_training) { if(power_t_half) { @@ -363,27 +363,27 @@ void predict(gd* g, learner& base, example* ec) float gravity = (float)all->sd->gravity; if (all->adaptive) if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<float, vec_add_trunc_rescale<true, 1> >(*all, ec, gravity); + ec.partial_prediction = inline_predict<float, vec_add_trunc_rescale<true, 1> >(*all, ec, gravity); else - ec->partial_prediction = inline_predict<float, vec_add_trunc_rescale<true, 2> >(*all, ec, gravity); + ec.partial_prediction = inline_predict<float, vec_add_trunc_rescale<true, 2> >(*all, ec, gravity); else if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<float, vec_add_trunc_rescale<false, 1> >(*all, ec, gravity); + ec.partial_prediction = inline_predict<float, vec_add_trunc_rescale<false, 1> >(*all, ec, gravity); else - ec->partial_prediction = inline_predict<float, vec_add_trunc_rescale<false, 2> >(*all, ec, gravity); + ec.partial_prediction = inline_predict<float, vec_add_trunc_rescale<false, 2> >(*all, ec, gravity); } else { if (all->adaptive) if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<vec_add_rescale<true, 1> >(*all, ec); + ec.partial_prediction = inline_predict<vec_add_rescale<true, 1> >(*all, ec); else - ec->partial_prediction = inline_predict<vec_add_rescale<true, 2> >(*all, ec); + ec.partial_prediction = inline_predict<vec_add_rescale<true, 2> >(*all, ec); else if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<vec_add_rescale<false, 1> >(*all, ec); + ec.partial_prediction = inline_predict<vec_add_rescale<false, 1> >(*all, ec); else - ec->partial_prediction = inline_predict<vec_add_rescale<false, 2> >(*all, ec); + ec.partial_prediction = inline_predict<vec_add_rescale<false, 2> >(*all, ec); } } else { @@ -392,28 +392,28 @@ void predict(gd* g, learner& base, example* ec) gnp temp = {(float)all->sd->gravity, all->power_t}; if (all->adaptive) if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<true, 1> >(*all, ec, temp); + ec.partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<true, 1> >(*all, ec, temp); else - ec->partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<true, 2> >(*all, ec, temp); + ec.partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<true, 2> >(*all, ec, temp); else if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<false, 1> >(*all, ec, temp); + ec.partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<false, 1> >(*all, ec, temp); else - ec->partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<false, 2> >(*all, ec, temp); + ec.partial_prediction = inline_predict<gnp, vec_add_trunc_rescale_general<false, 2> >(*all, ec, temp); } else { float power_t = all->power_t; if (all->adaptive) if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<float, vec_add_rescale_general<true, 1> >(*all, ec, power_t); + ec.partial_prediction = inline_predict<float, vec_add_rescale_general<true, 1> >(*all, ec, power_t); else - ec->partial_prediction = inline_predict<float, vec_add_rescale_general<true, 2> >(*all, ec, power_t); + ec.partial_prediction = inline_predict<float, vec_add_rescale_general<true, 2> >(*all, ec, power_t); else if (all->normalized_idx == 1) - ec->partial_prediction = inline_predict<float, vec_add_rescale_general<false, 1> >(*all, ec, power_t); + ec.partial_prediction = inline_predict<float, vec_add_rescale_general<false, 1> >(*all, ec, power_t); else - ec->partial_prediction = inline_predict<float, vec_add_rescale_general<false, 2> >(*all, ec, power_t); + ec.partial_prediction = inline_predict<float, vec_add_rescale_general<false, 2> >(*all, ec, power_t); } } } @@ -422,13 +422,13 @@ void predict(gd* g, learner& base, example* ec) if (reg_mode_odd) { float gravity = (float)all->sd->gravity; - ec->partial_prediction = inline_predict<float, vec_add_trunc>(*all, ec, gravity); + ec.partial_prediction = inline_predict<float, vec_add_trunc>(*all, ec, gravity); } else - ec->partial_prediction = inline_predict<vec_add>(*all, ec); + ec.partial_prediction = inline_predict<vec_add>(*all, ec); } - ec->final_prediction = finalize_prediction(*all, ec->partial_prediction * (float)all->sd->contraction); + ec.final_prediction = finalize_prediction(*all, ec.partial_prediction * (float)all->sd->contraction); if (all->audit || all->hash_inv) print_audit_features(*all, ec); @@ -495,10 +495,10 @@ inline void powert_norm_compute(norm_data& nd, float x, float& fw) { } template <void (*T)(norm_data&,float,float&)> -float compute_norm(vw& all, example* &ec) +float compute_norm(vw& all, example& ec) {//We must traverse the features in _precisely_ the same order as during training. - label_data* ld = (label_data*)ec->ld; - float g = all.loss->getSquareGrad(ec->final_prediction, ld->label) * ld->weight; + label_data* ld = (label_data*)ec.ld; + float g = all.loss->getSquareGrad(ec.final_prediction, ld->label) * ld->weight; if (g==0) return 1.; norm_data nd = {g, 0., 0., all.power_t}; @@ -506,7 +506,7 @@ float compute_norm(vw& all, example* &ec) foreach_feature<norm_data,T>(all, ec, nd); if(all.normalized_updates) { - float total_weight = ec->example_t; + float total_weight = ec.example_t; if(!all.holdout_set_off) total_weight -= (float)all.sd->weighted_holdout_examples; //exclude weights from test_only examples @@ -527,13 +527,13 @@ float compute_norm(vw& all, example* &ec) } template<bool adaptive, bool normalized, bool feature_mask_off, size_t normalized_idx, size_t feature_mask_idx> -void local_predict(vw& all, gd& g, example* ec) +void local_predict(vw& all, gd& g, example& ec) { - label_data* ld = (label_data*)ec->ld; + label_data* ld = (label_data*)ec.ld; if(g.active_simulation){ - float k = ec->example_t - ld->weight; - ec->revert_weight = all.loss->getRevertingWeight(all.sd, ec->final_prediction, all.eta/powf(k,all.power_t)); + float k = ec.example_t - ld->weight; + ec.revert_weight = all.loss->getRevertingWeight(all.sd, ec.final_prediction, all.eta/powf(k,all.power_t)); float importance = query_decision(all, ec, k); if(importance > 0){ all.sd->queries += 1; @@ -547,16 +547,16 @@ void local_predict(vw& all, gd& g, example* ec) if(all.active && ld->label != FLT_MAX) t = (float)all.sd->weighted_unlabeled_examples; else - t = (float)(ec->example_t - all.sd->weighted_holdout_examples); + t = (float)(ec.example_t - all.sd->weighted_holdout_examples); - ec->eta_round = 0; + ec.eta_round = 0; if (ld->label != FLT_MAX) - ec->loss = all.loss->getLoss(all.sd, ec->final_prediction, ld->label) * ld->weight; + ec.loss = all.loss->getLoss(all.sd, ec.final_prediction, ld->label) * ld->weight; - if (ld->label != FLT_MAX && !ec->test_only) + if (ld->label != FLT_MAX && !ec.test_only) { - if (all.training && ec->loss > 0.) + if (all.training && ec.loss > 0.) { float eta_t; float norm; @@ -566,22 +566,22 @@ void local_predict(vw& all, gd& g, example* ec) else norm = compute_norm<powert_norm_compute<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx> >(all,ec); else - norm = ec->total_sum_feat_sq; + norm = ec.total_sum_feat_sq; eta_t = all.eta * norm * ld->weight; if(!adaptive && all.power_t != 0) eta_t *= powf(t,-all.power_t); float update = 0.f; if( all.invariant_updates ) - update = all.loss->getUpdate(ec->final_prediction, ld->label, eta_t, norm); + update = all.loss->getUpdate(ec.final_prediction, ld->label, eta_t, norm); else - update = all.loss->getUnsafeUpdate(ec->final_prediction, ld->label, eta_t, norm); + update = all.loss->getUnsafeUpdate(ec.final_prediction, ld->label, eta_t, norm); - ec->eta_round = (float) (update / all.sd->contraction); + ec.eta_round = (float) (update / all.sd->contraction); - if (all.reg_mode && fabs(ec->eta_round) > 1e-8) { - double dev1 = all.loss->first_derivative(all.sd, ec->final_prediction, ld->label); - double eta_bar = (fabs(dev1) > 1e-8) ? (-ec->eta_round / dev1) : 0.0; + if (all.reg_mode && fabs(ec.eta_round) > 1e-8) { + double dev1 = all.loss->first_derivative(all.sd, ec.final_prediction, ld->label); + double eta_bar = (fabs(dev1) > 1e-8) ? (-ec.eta_round / dev1) : 0.0; if (fabs(dev1) > 1e-8) all.sd->contraction *= (1. - all.l2_lambda * eta_bar * norm); //all.sd->contraction /= (1. + all.l2_lambda * eta_bar * norm); @@ -590,23 +590,23 @@ void local_predict(vw& all, gd& g, example* ec) } } else if(all.active) - ec->revert_weight = all.loss->getRevertingWeight(all.sd, ec->final_prediction, all.eta/powf(t,all.power_t)); + ec.revert_weight = all.loss->getRevertingWeight(all.sd, ec.final_prediction, all.eta/powf(t,all.power_t)); } template<bool adaptive, bool normalized, bool feature_mask_off, size_t normalized_idx, size_t feature_mask_idx> -void update(gd* g, learner& base, example* ec) +void update(gd& g, learner& base, example& ec) { - vw* all = g->all; + vw* all = g.all; - local_predict<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx > (*all, *g, ec); + local_predict<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx > (*all, g, ec); - if (ec->eta_round != 0.) + if (ec.eta_round != 0.) { if(all->power_t == 0.5) - generic_train<specialized_update<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx> > (*all,ec,(float)ec->eta_round,true); + generic_train<specialized_update<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx> > (*all,ec,(float)ec.eta_round,true); else - generic_train<general_update<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx> >(*all,ec,(float)ec->eta_round,false); + generic_train<general_update<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx> >(*all,ec,(float)ec.eta_round,false); if (all->sd->contraction < 1e-10) // updating weights now to avoid numerical instability sync_weights(*all); @@ -614,16 +614,16 @@ void update(gd* g, learner& base, example* ec) } template<bool adaptive, bool normalized, bool feature_mask_off, size_t normalized_idx, size_t feature_mask_idx> -void learn(gd* g, learner& base, example* ec) +void learn(gd& g, learner& base, example& ec) { - vw* all = g->all; - label_data* ld = (label_data*)ec->ld; + vw* all = g.all; + label_data* ld = (label_data*)ec.ld; - assert(ec->in_use); + assert(ec.in_use); - g->predict(g,base,ec); + g.predict(g,base,ec); - if ((all->holdout_set_off || !ec->test_only) && ld->weight > 0) + if ((all->holdout_set_off || !ec.test_only) && ld->weight > 0) update<adaptive, normalized, feature_mask_off, normalized_idx, feature_mask_idx>(g,base,ec); } @@ -830,9 +830,9 @@ void save_load_online_state(vw& all, io_buf& model_file, bool read, bool text) while ((!read && i < length) || (read && brw >0)); } -void save_load(gd* g, io_buf& model_file, bool read, bool text) +void save_load(gd& g, io_buf& model_file, bool read, bool text) { - vw* all = g->all; + vw* all = g.all; if(read) { initialize_regressor(*all); @@ -850,8 +850,8 @@ void save_load(gd* g, io_buf& model_file, bool read, bool text) } } - if (g->initial_constant != 0.0) - VW::set_weight(*all, constant, 0, g->initial_constant); + if (g.initial_constant != 0.0) + VW::set_weight(*all, constant, 0, g.initial_constant); } diff --git a/vowpalwabbit/gd.h b/vowpalwabbit/gd.h index c14aa956..c4ed556c 100644 --- a/vowpalwabbit/gd.h +++ b/vowpalwabbit/gd.h @@ -19,9 +19,9 @@ license as described in the file LICENSE. namespace GD{ void print_result(int f, float res, v_array<char> tag); -void print_audit_features(regressor ®, example* ec, size_t offset); +void print_audit_features(regressor ®, example& ec, size_t offset); float finalize_prediction(vw&, float ret); -void print_audit_features(vw&, example* ec); +void print_audit_features(vw&, example& ec); void train_one_example(regressor& r, example* ex); void train_offset_example(regressor& r, example* ex, size_t offset); void compute_update(example* ec); @@ -39,52 +39,53 @@ void output_and_account_example(example* ec); } template <class R, void (*T)(R&, float, float&)> - void foreach_feature(vw& all, example* ec, R& dat) + void foreach_feature(vw& all, example& ec, R& dat) { - uint32_t offset = ec->ft_offset; + uint32_t offset = ec.ft_offset; - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) - foreach_feature<R,T>(all.reg.weight_vector, all.reg.weight_mask, ec->atomics[*i].begin, ec->atomics[*i].end, dat, offset); + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) + foreach_feature<R,T>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[*i].begin, ec.atomics[*i].end, dat, offset); for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) { - if (ec->atomics[(int)(*i)[0]].size() > 0) { - v_array<feature> temp = ec->atomics[(int)(*i)[0]]; + if (ec.atomics[(int)(*i)[0]].size() > 0) { + v_array<feature> temp = ec.atomics[(int)(*i)[0]]; for (; temp.begin != temp.end; temp.begin++) { uint32_t halfhash = quadratic_constant * (temp.begin->weight_index + offset); - foreach_feature<R,T>(all.reg.weight_vector, all.reg.weight_mask, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, dat, + + foreach_feature<R,T>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, dat, halfhash, temp.begin->x); } } } for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) { - if ((ec->atomics[(int)(*i)[0]].size() == 0) || (ec->atomics[(int)(*i)[1]].size() == 0) || (ec->atomics[(int)(*i)[2]].size() == 0)) { continue; } - v_array<feature> temp1 = ec->atomics[(int)(*i)[0]]; + if ((ec.atomics[(int)(*i)[0]].size() == 0) || (ec.atomics[(int)(*i)[1]].size() == 0) || (ec.atomics[(int)(*i)[2]].size() == 0)) { continue; } + v_array<feature> temp1 = ec.atomics[(int)(*i)[0]]; for (; temp1.begin != temp1.end; temp1.begin++) { - v_array<feature> temp2 = ec->atomics[(int)(*i)[1]]; + v_array<feature> temp2 = ec.atomics[(int)(*i)[1]]; for (; temp2.begin != temp2.end; temp2.begin++) { uint32_t halfhash = cubic_constant2 * (cubic_constant * (temp1.begin->weight_index + offset) + temp2.begin->weight_index + offset); float mult = temp1.begin->x * temp2.begin->x; - foreach_feature<R,T>(all.reg.weight_vector, all.reg.weight_mask, ec->atomics[(int)(*i)[2]].begin, ec->atomics[(int)(*i)[2]].end, dat, halfhash, mult); + foreach_feature<R,T>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(int)(*i)[2]].begin, ec.atomics[(int)(*i)[2]].end, dat, halfhash, mult); } } } } template <class R, void (*T)(predict_data<R>&, float, float&)> - float inline_predict(vw& all, example* ec, R extra) + float inline_predict(vw& all, example& ec, R extra) { - predict_data<R> temp = {all.p->lp.get_initial(ec->ld), extra}; + predict_data<R> temp = {all.p->lp.get_initial(ec.ld), extra}; foreach_feature<predict_data<R>, T>(all, ec, temp); return temp.prediction; } template <void (*T)(float&, float, float&)> - float inline_predict(vw& all, example* ec) + float inline_predict(vw& all, example& ec) { - float temp = all.p->lp.get_initial(ec->ld); + float temp = all.p->lp.get_initial(ec.ld); foreach_feature<float, T>(all, ec, temp); return temp; } diff --git a/vowpalwabbit/gd_mf.cc b/vowpalwabbit/gd_mf.cc index 57f1c61b..105972a2 100644 --- a/vowpalwabbit/gd_mf.cc +++ b/vowpalwabbit/gd_mf.cc @@ -30,49 +30,102 @@ namespace GDMF { vw* all; }; -void mf_print_audit_features(vw& all, example* ec, size_t offset); +void mf_print_offset_features(vw& all, example& ec, size_t offset) +{ + weight* weights = all.reg.weight_vector; + size_t mask = all.reg.weight_mask; + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) + if (ec.audit_features[*i].begin != ec.audit_features[*i].end) + for (audit_data *f = ec.audit_features[*i].begin; f != ec.audit_features[*i].end; f++) + { + cout << '\t' << f->space << '^' << f->feature << ':' << f->weight_index <<"(" << ((f->weight_index + offset) & mask) << ")" << ':' << f->x; -float mf_predict(vw& all, example* ec) + cout << ':' << weights[(f->weight_index + offset) & mask]; + } + else + for (feature *f = ec.atomics[*i].begin; f != ec.atomics[*i].end; f++) + { + size_t index = (f->weight_index + offset) & all.reg.weight_mask; + + cout << "\tConstant:"; + cout << (index/all.reg.stride & all.parse_mask) << ':' << f->x; + cout << ':' << trunc_weight(weights[index], (float)all.sd->gravity) * (float)all.sd->contraction; + } + for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) + if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0) + { + /* print out nsk^feature:hash:value:weight:nsk^feature^:hash:value:weight:prod_weights */ + for (size_t k = 1; k <= all.rank; k++) + { + for (audit_data* f = ec.audit_features[(int)(*i)[0]].begin; f!= ec.audit_features[(int)(*i)[0]].end; f++) + for (audit_data* f2 = ec.audit_features[(int)(*i)[1]].begin; f2!= ec.audit_features[(int)(*i)[1]].end; f2++) + { + cout << '\t' << f->space << k << '^' << f->feature << ':' << ((f->weight_index+k)&mask) + <<"(" << ((f->weight_index + offset +k) & mask) << ")" << ':' << f->x; + cout << ':' << weights[(f->weight_index + offset + k) & mask]; + + cout << ':' << f2->space << k << '^' << f2->feature << ':' << ((f2->weight_index+k+all.rank)&mask) + <<"(" << ((f2->weight_index + offset +k+all.rank) & mask) << ")" << ':' << f2->x; + cout << ':' << weights[(f2->weight_index + offset + k+all.rank) & mask]; + + cout << ':' << weights[(f->weight_index + offset + k) & mask] * weights[(f2->weight_index + offset + k + all.rank) & mask]; + } + } + } + if (all.triples.begin() != all.triples.end()) { + cerr << "cannot use triples in matrix factorization" << endl; + throw exception(); + } + cout << endl; +} + +void mf_print_audit_features(vw& all, example& ec, size_t offset) { - float prediction = all.p->lp.get_initial(ec->ld); + print_result(all.stdout_fileno,ec.final_prediction,-1,ec.tag); + mf_print_offset_features(all, ec, offset); +} + +float mf_predict(vw& all, example& ec) +{ + float prediction = all.p->lp.get_initial(ec.ld); // clear stored predictions - ec->topic_predictions.erase(); + ec.topic_predictions.erase(); float linear_prediction = 0.; // linear terms - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) - GD::foreach_feature<float, vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec->atomics[*i].begin, ec->atomics[*i].end, linear_prediction); + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) + GD::foreach_feature<float, vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[*i].begin, ec.atomics[*i].end, linear_prediction); // store constant + linear prediction // note: constant is now automatically added - ec->topic_predictions.push_back(linear_prediction); + ec.topic_predictions.push_back(linear_prediction); prediction += linear_prediction; // interaction terms for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) { - if (ec->atomics[(int)(*i)[0]].size() > 0 && ec->atomics[(int)(*i)[1]].size() > 0) + if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0) { for (uint32_t k = 1; k <= all.rank; k++) { // x_l * l^k // l^k is from index+1 to index+all.rank - //float x_dot_l = sd_offset_add(weights, mask, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, k); + //float x_dot_l = sd_offset_add(weights, mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, k); float x_dot_l = 0.; - GD::foreach_feature<float, vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, x_dot_l, k); + GD::foreach_feature<float, vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, x_dot_l, k); // x_r * r^k // r^k is from index+all.rank+1 to index+2*all.rank - //float x_dot_r = sd_offset_add(weights, mask, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k+all.rank); + //float x_dot_r = sd_offset_add(weights, mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, k+all.rank); float x_dot_r = 0.; - GD::foreach_feature<float,vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, x_dot_r, k+all.rank); + GD::foreach_feature<float,vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, x_dot_r, k+all.rank); prediction += x_dot_l * x_dot_r; // store prediction from interaction terms - ec->topic_predictions.push_back(x_dot_l); - ec->topic_predictions.push_back(x_dot_r); + ec.topic_predictions.push_back(x_dot_l); + ec.topic_predictions.push_back(x_dot_r); } } } @@ -82,67 +135,67 @@ float mf_predict(vw& all, example* ec) throw exception(); } - // ec->topic_predictions has linear, x_dot_l_1, x_dot_r_1, x_dot_l_2, x_dot_r_2, ... + // ec.topic_predictions has linear, x_dot_l_1, x_dot_r_1, x_dot_l_2, x_dot_r_2, ... - ec->partial_prediction = prediction; + ec.partial_prediction = prediction; // finalize prediction and compute loss - label_data* ld = (label_data*)ec->ld; + label_data* ld = (label_data*)ec.ld; all.set_minmax(all.sd, ld->label); - ec->final_prediction = GD::finalize_prediction(all, ec->partial_prediction); + ec.final_prediction = GD::finalize_prediction(all, ec.partial_prediction); if (ld->label != FLT_MAX) { - ec->loss = all.loss->getLoss(all.sd, ec->final_prediction, ld->label) * ld->weight; + ec.loss = all.loss->getLoss(all.sd, ec.final_prediction, ld->label) * ld->weight; } if (all.audit) mf_print_audit_features(all, ec, 0); - return ec->final_prediction; + return ec.final_prediction; } -void mf_train(vw& all, example* &ec, float update) +void mf_train(vw& all, example& ec, float update) { weight* weights = all.reg.weight_vector; size_t mask = all.reg.weight_mask; - label_data* ld = (label_data*)ec->ld; + label_data* ld = (label_data*)ec.ld; // use final prediction to get update size // update = eta_t*(y-y_hat) where eta_t = eta/(3*t^p) * importance weight - float eta_t = all.eta/pow(ec->example_t,all.power_t) / 3.f * ld->weight; - update = all.loss->getUpdate(ec->final_prediction, ld->label, eta_t, 1.); //ec->total_sum_feat_sq); + float eta_t = all.eta/pow(ec.example_t,all.power_t) / 3.f * ld->weight; + update = all.loss->getUpdate(ec.final_prediction, ld->label, eta_t, 1.); //ec.total_sum_feat_sq); float regularization = eta_t * all.l2_lambda; // linear update - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) - sd_offset_update(weights, mask, ec->atomics[*i].begin, ec->atomics[*i].end, 0, update, regularization); + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) + sd_offset_update(weights, mask, ec.atomics[*i].begin, ec.atomics[*i].end, 0, update, regularization); // quadratic update for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) { - if (ec->atomics[(int)(*i)[0]].size() > 0 && ec->atomics[(int)(*i)[1]].size() > 0) + if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0) { // update l^k weights for (size_t k = 1; k <= all.rank; k++) { // r^k \cdot x_r - float r_dot_x = ec->topic_predictions[2*k]; + float r_dot_x = ec.topic_predictions[2*k]; // l^k <- l^k + update * (r^k \cdot x_r) * x_l - sd_offset_update(weights, mask, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, k, update*r_dot_x, regularization); + sd_offset_update(weights, mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, k, update*r_dot_x, regularization); } // update r^k weights for (size_t k = 1; k <= all.rank; k++) { // l^k \cdot x_l - float l_dot_x = ec->topic_predictions[2*k-1]; + float l_dot_x = ec.topic_predictions[2*k-1]; // r^k <- r^k + update * (l^k \cdot x_l) * x_r - sd_offset_update(weights, mask, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k+all.rank, update*l_dot_x, regularization); + sd_offset_update(weights, mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, k+all.rank, update*l_dot_x, regularization); } } @@ -154,64 +207,9 @@ void mf_train(vw& all, example* &ec, float update) } -void mf_print_offset_features(vw& all, example* &ec, size_t offset) -{ - weight* weights = all.reg.weight_vector; - size_t mask = all.reg.weight_mask; - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) - if (ec->audit_features[*i].begin != ec->audit_features[*i].end) - for (audit_data *f = ec->audit_features[*i].begin; f != ec->audit_features[*i].end; f++) - { - cout << '\t' << f->space << '^' << f->feature << ':' << f->weight_index <<"(" << ((f->weight_index + offset) & mask) << ")" << ':' << f->x; - - cout << ':' << weights[(f->weight_index + offset) & mask]; - } - else - for (feature *f = ec->atomics[*i].begin; f != ec->atomics[*i].end; f++) - { - size_t index = (f->weight_index + offset) & all.reg.weight_mask; - - cout << "\tConstant:"; - cout << (index/all.reg.stride & all.parse_mask) << ':' << f->x; - cout << ':' << trunc_weight(weights[index], (float)all.sd->gravity) * (float)all.sd->contraction; - } - for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) - if (ec->atomics[(int)(*i)[0]].size() > 0 && ec->atomics[(int)(*i)[1]].size() > 0) - { - /* print out nsk^feature:hash:value:weight:nsk^feature^:hash:value:weight:prod_weights */ - for (size_t k = 1; k <= all.rank; k++) - { - for (audit_data* f = ec->audit_features[(int)(*i)[0]].begin; f!= ec->audit_features[(int)(*i)[0]].end; f++) - for (audit_data* f2 = ec->audit_features[(int)(*i)[1]].begin; f2!= ec->audit_features[(int)(*i)[1]].end; f2++) - { - cout << '\t' << f->space << k << '^' << f->feature << ':' << ((f->weight_index+k)&mask) - <<"(" << ((f->weight_index + offset +k) & mask) << ")" << ':' << f->x; - cout << ':' << weights[(f->weight_index + offset + k) & mask]; - - cout << ':' << f2->space << k << '^' << f2->feature << ':' << ((f2->weight_index+k+all.rank)&mask) - <<"(" << ((f2->weight_index + offset +k+all.rank) & mask) << ")" << ':' << f2->x; - cout << ':' << weights[(f2->weight_index + offset + k+all.rank) & mask]; - - cout << ':' << weights[(f->weight_index + offset + k) & mask] * weights[(f2->weight_index + offset + k + all.rank) & mask]; - } - } - } - if (all.triples.begin() != all.triples.end()) { - cerr << "cannot use triples in matrix factorization" << endl; - throw exception(); - } - cout << endl; -} - -void mf_print_audit_features(vw& all, example* ec, size_t offset) -{ - print_result(all.stdout_fileno,ec->final_prediction,-1,ec->tag); - mf_print_offset_features(all, ec, offset); -} - - void save_load(gdmf* d, io_buf& model_file, bool read, bool text) + void save_load(gdmf& d, io_buf& model_file, bool read, bool text) { - vw* all = d->all; + vw* all = d.all; uint32_t length = 1 << all->num_bits; uint32_t stride = all->reg.stride; @@ -263,9 +261,9 @@ void mf_print_audit_features(vw& all, example* ec, size_t offset) } } -void end_pass(gdmf* d) +void end_pass(gdmf& d) { - vw* all = d->all; + vw* all = d.all; all->eta *= all->eta_decay_rate; if (all->save_per_pass) @@ -274,20 +272,20 @@ void end_pass(gdmf* d) all->current_pass++; } - void predict(gdmf* d, learner& base, example* ec) + void predict(gdmf& d, learner& base, example& ec) { - vw* all = d->all; + vw* all = d.all; mf_predict(*all,ec); } - void learn(gdmf* d, learner& base, example* ec) + void learn(gdmf& d, learner& base, example& ec) { - vw* all = d->all; + vw* all = d.all; predict(d, base, ec); - if (all->training && ((label_data*)(ec->ld))->label != FLT_MAX) - mf_train(*all, ec, ec->eta_round); + if (all->training && ((label_data*)(ec.ld))->label != FLT_MAX) + mf_train(*all, ec, ec.eta_round); } learner* setup(vw& all) diff --git a/vowpalwabbit/global_data.cc b/vowpalwabbit/global_data.cc index a0d602ee..bf109bbb 100644 --- a/vowpalwabbit/global_data.cc +++ b/vowpalwabbit/global_data.cc @@ -187,7 +187,7 @@ void noop_mm(shared_data* sd, float label) void vw::learn(example* ec) { - this->l->learn(ec); + this->l->learn(*ec); } void compile_gram(vector<string> grams, uint32_t* dest, char* descriptor, bool quiet) diff --git a/vowpalwabbit/lda_core.cc b/vowpalwabbit/lda_core.cc index db2281a2..fcf79afc 100644 --- a/vowpalwabbit/lda_core.cc +++ b/vowpalwabbit/lda_core.cc @@ -25,6 +25,7 @@ license as described in the file LICENSE. #include "vw.h"
using namespace LEARNER;
+using namespace std;
namespace LDA {
@@ -44,7 +45,7 @@ public: v_array<int> doc_lengths;
v_array<float> digammas;
v_array<float> v;
- std::vector<index_feature> sorted_features;
+ vector<index_feature> sorted_features;
bool total_lambda_init;
@@ -516,9 +517,9 @@ size_t next_pow2(size_t x) { return ((size_t)1) << i;
}
-void save_load(lda* l, io_buf& model_file, bool read, bool text)
+void save_load(lda& l, io_buf& model_file, bool read, bool text)
{
- vw* all = l->all;
+ vw* all = l.all;
uint32_t length = 1 << all->num_bits;
uint32_t stride = all->reg.stride;
@@ -648,13 +649,13 @@ void save_load(lda* l, io_buf& model_file, bool read, bool text) {
float score = lda_loop(*l.all, l.Elogtheta, &(l.v[d*l.all->lda]), weights, l.examples[d],l.all->power_t);
if (l.all->audit)
- GD::print_audit_features(*l.all, l.examples[d]);
+ GD::print_audit_features(*l.all, *l.examples[d]);
// If the doc is empty, give it loss of 0.
if (l.doc_lengths[d] > 0) {
l.all->sd->sum_loss -= score;
l.all->sd->sum_loss_since_last_dump -= score;
}
- return_simple_example(*l.all, NULL, l.examples[d]);
+ return_simple_example(*l.all, NULL, *l.examples[d]);
}
for (index_feature* s = &l.sorted_features[0]; s <= &l.sorted_features.back();)
@@ -691,54 +692,68 @@ void save_load(lda* l, io_buf& model_file, bool read, bool text) l.doc_lengths.erase();
}
- void learn(lda* l, learner& base, example* ec)
+ void learn(lda& l, learner& base, example& ec)
{
- size_t num_ex = l->examples.size();
- l->examples.push_back(ec);
- l->doc_lengths.push_back(0);
- for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) {
- feature* f = ec->atomics[*i].begin;
- for (; f != ec->atomics[*i].end; f++) {
+ size_t num_ex = l.examples.size();
+ l.examples.push_back(&ec);
+ l.doc_lengths.push_back(0);
+ for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) {
+ feature* f = ec.atomics[*i].begin;
+ for (; f != ec.atomics[*i].end; f++) {
index_feature temp = {(uint32_t)num_ex, *f};
- l->sorted_features.push_back(temp);
- l->doc_lengths[num_ex] += (int)f->x;
+ l.sorted_features.push_back(temp);
+ l.doc_lengths[num_ex] += (int)f->x;
}
}
- if (++num_ex == l->all->minibatch && !ec->test_only)
- learn_batch(*l);
+ if (++num_ex == l.all->minibatch && !ec.test_only)
+ learn_batch(l);
}
// placeholder
- void predict(lda* l, learner& base, example* ec)
+ void predict(lda& l, learner& base, example& ec)
{
- bool test_only = ec->test_only;
- ec->test_only = true;
+ bool test_only = ec.test_only;
+ ec.test_only = true;
learn(l, base, ec);
- ec->test_only = test_only;
+ ec.test_only = test_only;
}
- void end_pass(lda* l)
+ void end_pass(lda& l)
{
- if (l->examples.size())
- learn_batch(*l);
+ if (l.examples.size())
+ learn_batch(l);
}
-void end_examples(lda* l)
+void end_examples(lda& l)
{
- for (size_t i = 0; i < l->all->length(); i++) {
- weight* weights_for_w = & (l->all->reg.weight_vector[i*l->all->reg.stride]);
- float decay = fmin(1.0, exp(l->decay_levels.last() - l->decay_levels.end[(int)(-1- l->example_t +weights_for_w[l->all->lda])]));
- for (size_t k = 0; k < l->all->lda; k++)
+ for (size_t i = 0; i < l.all->length(); i++) {
+ weight* weights_for_w = & (l.all->reg.weight_vector[i*l.all->reg.stride]);
+ float decay = fmin(1.0, exp(l.decay_levels.last() - l.decay_levels.end[(int)(-1- l.example_t +weights_for_w[l.all->lda])]));
+ for (size_t k = 0; k < l.all->lda; k++)
weights_for_w[k] *= decay;
}
}
- void finish_example(vw& all, lda*, example*ec)
+ void finish_example(vw& all, lda&, example& ec)
{}
-learner* setup(vw&all, std::vector<std::string>&opts, po::variables_map& vm)
+ void finish(lda& ld)
+ {
+ ld.sorted_features.~vector<index_feature>();
+ ld.Elogtheta.delete_v();
+ ld.decay_levels.delete_v();
+ ld.total_new.delete_v();
+ ld.examples.delete_v();
+ ld.total_lambda.delete_v();
+ ld.doc_lengths.delete_v();
+ ld.digammas.delete_v();
+ ld.v.delete_v();
+ }
+
+learner* setup(vw&all, vector<string>&opts, po::variables_map& vm)
{
- lda* ld = new lda;
+ lda* ld = (lda*)calloc(1,sizeof(lda));
+ ld->sorted_features = vector<index_feature>();
ld->total_lambda_init = 0;
ld->all = &all;
ld->example_t = all.initial_t;
@@ -773,12 +788,13 @@ learner* setup(vw&all, std::vector<std::string>&opts, po::variables_map& vm) if (vm.count("minibatch")) {
size_t minibatch2 = next_pow2(all.minibatch);
all.p->ring_size = all.p->ring_size > minibatch2 ? all.p->ring_size : minibatch2;
-}
+ }
ld->v.resize(all.lda*all.minibatch);
ld->decay_levels.push_back(0.f);
-
+
+ all.l->finish();
learner* l = new learner(ld, all.reg.stride);
l->set_learn<lda,learn>();
l->set_predict<lda,predict>();
@@ -786,6 +802,7 @@ learner* setup(vw&all, std::vector<std::string>&opts, po::variables_map& vm) l->set_finish_example<lda,finish_example>();
l->set_end_examples<lda,end_examples>();
l->set_end_pass<lda,end_pass>();
+ l->set_finish<lda,finish>();
return l;
}
diff --git a/vowpalwabbit/learner.cc b/vowpalwabbit/learner.cc index 1b936991..671f4de2 100644 --- a/vowpalwabbit/learner.cc +++ b/vowpalwabbit/learner.cc @@ -17,8 +17,8 @@ namespace LEARNER { if (ec->indices.size() > 1) // one nonconstant feature. { - all->l->learn(ec); - all->l->finish_example(*all, ec); + all->l->learn(*ec); + all->l->finish_example(*all, *ec); } else if (ec->end_pass) { @@ -41,17 +41,17 @@ namespace LEARNER } else { - all->l->learn(ec); + all->l->learn(*ec); if(all->early_terminate) { all->p->done = true; - all->l->finish_example(*all, ec); + all->l->finish_example(*all, *ec); return; } else { - all->l->finish_example(*all, ec); + all->l->finish_example(*all, *ec); } } } diff --git a/vowpalwabbit/learner.h b/vowpalwabbit/learner.h index 46f8222e..7cef426f 100644 --- a/vowpalwabbit/learner.h +++ b/vowpalwabbit/learner.h @@ -10,7 +10,7 @@ license as described in the file LICENSE. using namespace std; struct vw; -void return_simple_example(vw& all, void*, example* ec); +void return_simple_example(vw& all, void*, example& ec); namespace LEARNER { @@ -34,9 +34,9 @@ namespace LEARNER struct learn_data { void* data; learner* base; - void (*learn_f)(void* data, learner& base, example*); - void (*predict_f)(void* data, learner& base, example*); - void (*update_f)(void* data, learner& base, example*); + void (*learn_f)(void* data, learner& base, example&); + void (*predict_f)(void* data, learner& base, example&); + void (*update_f)(void* data, learner& base, example&); }; struct save_load_data{ @@ -48,13 +48,13 @@ namespace LEARNER struct finish_example_data{ void* data; learner* base; - void (*finish_example_f)(vw&, void* data, example*); + void (*finish_example_f)(vw&, void* data, example&); }; void generic_driver(vw* all); inline void generic_sl(void*, io_buf&, bool, bool) {} - inline void generic_learner(void* data, learner& base, example*) + inline void generic_learner(void* data, learner& base, example&) { cout << "calling generic learner\n";} inline void generic_func(void* data) {} @@ -62,25 +62,25 @@ namespace LEARNER const learn_data generic_learn_fd = {NULL, NULL, generic_learner, NULL, NULL}; const func_data generic_func_fd = {NULL, NULL, generic_func}; - template<class R, void (*T)(R*, learner& base, example* ec)> - inline void tlearn(void* d, learner& base, example* ec) - { T((R*)d, base, ec); } + template<class R, void (*T)(R&, learner& base, example& ec)> + inline void tlearn(void* d, learner& base, example& ec) + { T(*(R*)d, base, ec); } - template<class R, void (*T)(R*, io_buf& io, bool read, bool text)> + template<class R, void (*T)(R&, io_buf& io, bool read, bool text)> inline void tsl(void* d, io_buf& io, bool read, bool text) - { T((R*)d, io, read, text); } + { T(*(R*)d, io, read, text); } - template<class R, void (*T)(R*)> - inline void tfunc(void* d) { T((R*)d); } + template<class R, void (*T)(R&)> + inline void tfunc(void* d) { T(*(R*)d); } - template<class R, void (*T)(vw& all, R*, example*)> - inline void tend_example(vw& all, void* d, example* ec) - { T(all, (R*)d, ec); } + template<class R, void (*T)(vw& all, R&, example&)> + inline void tend_example(vw& all, void* d, example& ec) + { T(all, *(R*)d, ec); } - template <class T, void (*learn)(T* data, learner& base, example*), void (*predict)(T* data, learner& base, example*)> + template <class T, void (*learn)(T* data, learner& base, example&), void (*predict)(T* data, learner& base, example&)> struct learn_helper { - void (*learn_f)(void* data, learner& base, example*); - void (*predict_f)(void* data, learner& base, example*); + void (*learn_f)(void* data, learner& base, example&); + void (*predict_f)(void* data, learner& base, example&); learn_helper() { learn_f = tlearn<T,learn>; @@ -103,38 +103,38 @@ public: size_t increment; //called once for each example. Must work under reduction. - inline void learn(example* ec, size_t i=0) + inline void learn(example& ec, size_t i=0) { - ec->ft_offset += (uint32_t)(increment*i); + ec.ft_offset += (uint32_t)(increment*i); learn_fd.learn_f(learn_fd.data, *learn_fd.base, ec); - ec->ft_offset -= (uint32_t)(increment*i); + ec.ft_offset -= (uint32_t)(increment*i); } - template <class T, void (*u)(T* data, learner& base, example*)> + template <class T, void (*u)(T& data, learner& base, example&)> inline void set_learn() { learn_fd.learn_f = tlearn<T,u>; learn_fd.update_f = tlearn<T,u>; } - inline void predict(example* ec, size_t i=0) + inline void predict(example& ec, size_t i=0) { - ec->ft_offset += (uint32_t)(increment*i); + ec.ft_offset += (uint32_t)(increment*i); learn_fd.predict_f(learn_fd.data, *learn_fd.base, ec); - ec->ft_offset -= (uint32_t)(increment*i); + ec.ft_offset -= (uint32_t)(increment*i); } - template <class T, void (*u)(T* data, learner& base, example*)> + template <class T, void (*u)(T& data, learner& base, example&)> inline void set_predict() { learn_fd.predict_f = tlearn<T,u>; } - inline void update(example* ec, size_t i=0) + inline void update(example& ec, size_t i=0) { - ec->ft_offset += (uint32_t)(increment*i); + ec.ft_offset += (uint32_t)(increment*i); learn_fd.update_f(learn_fd.data, *learn_fd.base, ec); - ec->ft_offset -= (uint32_t)(increment*i); + ec.ft_offset -= (uint32_t)(increment*i); } - template <class T, void (*u)(T* data, learner& base, example*)> + template <class T, void (*u)(T& data, learner& base, example&)> inline void set_update() { learn_fd.update_f = tlearn<T,u>; @@ -142,14 +142,14 @@ public: //called anytime saving or loading needs to happen. Autorecursive. inline void save_load(io_buf& io, bool read, bool text) { save_load_fd.save_load_f(save_load_fd.data, io, read, text); if (save_load_fd.base) save_load_fd.base->save_load(io, read, text); } - template <class T, void (*sl)(T*, io_buf&, bool, bool)> + template <class T, void (*sl)(T&, io_buf&, bool, bool)> inline void set_save_load() { save_load_fd.save_load_f = tsl<T,sl>; save_load_fd.data = learn_fd.data; save_load_fd.base = learn_fd.base;} //called to clean up state. Autorecursive. - template <class T, void (*f)(T*)> + template <class T, void (*f)(T&)> void set_finish() { finisher_fd = tuple_dbf(learn_fd.data,learn_fd.base, tfunc<T, f>); } inline void finish() { @@ -164,24 +164,24 @@ public: void end_pass(){ end_pass_fd.func(end_pass_fd.data); if (end_pass_fd.base) end_pass_fd.base->end_pass(); }//autorecursive - template <class T, void (*f)(T*)> + template <class T, void (*f)(T&)> void set_end_pass() {end_pass_fd = tuple_dbf(learn_fd.data, learn_fd.base, tfunc<T,f>);} //called after parsing of examples is complete. Autorecursive. void end_examples() { end_examples_fd.func(end_examples_fd.data); if (end_examples_fd.base) end_examples_fd.base->end_examples(); } - template <class T, void (*f)(T*)> + template <class T, void (*f)(T&)> void set_end_examples() {end_examples_fd = tuple_dbf(learn_fd.data,learn_fd.base, tfunc<T,f>);} //Called at the beginning by the driver. Explicitly not recursive. void init_driver() { init_fd.func(init_fd.data);} - template <class T, void (*f)(T*)> + template <class T, void (*f)(T&)> void set_init_driver() { init_fd = tuple_dbf(learn_fd.data,learn_fd.base, tfunc<T,f>); } //called after learn example for each example. Explicitly not recursive. - inline void finish_example(vw& all, example* ec) { finish_example_fd.finish_example_f(all, finish_example_fd.data, ec);} - template<class T, void (*f)(vw& all, T*, example*)> + inline void finish_example(vw& all, example& ec) { finish_example_fd.finish_example_f(all, finish_example_fd.data, ec);} + template<class T, void (*f)(vw& all, T&, example&)> void set_finish_example() {finish_example_fd.data = learn_fd.data; finish_example_fd.finish_example_f = tend_example<T,f>;} diff --git a/vowpalwabbit/lrq.cc b/vowpalwabbit/lrq.cc index 7c19ce3f..8fbed800 100644 --- a/vowpalwabbit/lrq.cc +++ b/vowpalwabbit/lrq.cc @@ -46,41 +46,41 @@ namespace { } inline bool - example_is_test (example* ec) + example_is_test (example& ec) { - return ec->test_only || (((label_data*) ec->ld)->label == FLT_MAX); + return ec.test_only || (((label_data*) ec.ld)->label == FLT_MAX); } void - reset_seed (LRQ::LRQstate* lrq) + reset_seed (LRQ::LRQstate& lrq) { - if (lrq->all->bfgs) - lrq->seed = lrq->initial_seed; + if (lrq.all->bfgs) + lrq.seed = lrq.initial_seed; } } namespace LRQ { template <bool is_learn> - void predict_or_learn(LRQstate* lrq, learner& base, example* ec) + void predict_or_learn(LRQstate& lrq, learner& base, example& ec) { - vw& all = *lrq->all; + vw& all = *lrq.all; // Remember original features - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; ++i) + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; ++i) { - if (lrq->lrindices[*i]) - lrq->orig_size[*i] = ec->atomics[*i].size (); + if (lrq.lrindices[*i]) + lrq.orig_size[*i] = ec.atomics[*i].size (); } - size_t which = ec->example_counter; + size_t which = ec.example_counter; simple_prediction first_prediction; float first_loss; unsigned int maxiter = (all.training && ! example_is_test (ec)) ? 2 : 1; - bool do_dropout = lrq->dropout && all.training && ! example_is_test (ec); - float scale = (! lrq->dropout || do_dropout) ? 1.f : 0.5f; + bool do_dropout = lrq.dropout && all.training && ! example_is_test (ec); + float scale = (! lrq.dropout || do_dropout) ? 1.f : 0.5f; for (unsigned int iter = 0; iter < maxiter; ++iter, ++which) { @@ -89,23 +89,23 @@ namespace LRQ { // TODO: what happens with --lrq ab2 --lrq ac2 // i.e. namespace occurs multiple times (?) - for (vector<string>::iterator i = lrq->lrpairs.begin (); - i != lrq->lrpairs.end (); + for (vector<string>::iterator i = lrq.lrpairs.begin (); + i != lrq.lrpairs.end (); ++i) { unsigned char left = (*i)[which%2]; unsigned char right = (*i)[(which+1)%2]; unsigned int k = atoi (i->c_str () + 2); - for (unsigned int lfn = 0; lfn < lrq->orig_size[left]; ++lfn) + for (unsigned int lfn = 0; lfn < lrq.orig_size[left]; ++lfn) { - feature* lf = ec->atomics[left].begin + lfn; + feature* lf = ec.atomics[left].begin + lfn; float lfx = lf->x; - size_t lindex = lf->weight_index + ec->ft_offset; + size_t lindex = lf->weight_index + ec.ft_offset; for (unsigned int n = 1; n <= k; ++n) { - if (! do_dropout || cheesyrbit (lrq->seed)) + if (! do_dropout || cheesyrbit (lrq.seed)) { uint32_t lwindex = (uint32_t)(lindex + n * all.reg.stride); @@ -116,12 +116,12 @@ namespace LRQ { *lw = cheesyrand (lwindex); for (unsigned int rfn = 0; - rfn < lrq->orig_size[right]; + rfn < lrq.orig_size[right]; ++rfn) { - feature* rf = ec->atomics[right].begin + rfn; + feature* rf = ec.atomics[right].begin + rfn; - // NB: ec->ft_offset added by base learner + // NB: ec.ft_offset added by base learner float rfx = rf->x; size_t rindex = rf->weight_index; uint32_t rwindex = (uint32_t)(rindex + n * all.reg.stride); @@ -130,14 +130,14 @@ namespace LRQ { lrq.x = scale * *lw * lfx * rfx; lrq.weight_index = rwindex; - ec->atomics[right].push_back (lrq); + ec.atomics[right].push_back (lrq); if (all.audit) { char name[4] = { 'l', 'r', 'q', '\0' }; char subname[4] = { left, '^', right, '\0' }; audit_data ad = { name, subname, lrq.weight_index, lrq.x, false }; - ec->audit_features[right].push_back (ad); + ec.audit_features[right].push_back (ad); } } } @@ -154,27 +154,27 @@ namespace LRQ { if (iter == 0) { - first_prediction = ec->final_prediction; - first_loss = ec->loss; + first_prediction = ec.final_prediction; + first_loss = ec.loss; } else { - ec->final_prediction = first_prediction; - ec->loss = first_loss; + ec.final_prediction = first_prediction; + ec.loss = first_loss; } - for (vector<string>::iterator i = lrq->lrpairs.begin (); - i != lrq->lrpairs.end (); + for (vector<string>::iterator i = lrq.lrpairs.begin (); + i != lrq.lrpairs.end (); ++i) { unsigned char right = (*i)[(which+1)%2]; - ec->atomics[right].end = - ec->atomics[right].begin + lrq->orig_size[right]; + ec.atomics[right].end = + ec.atomics[right].begin + lrq.orig_size[right]; if (all.audit) - ec->audit_features[right].end = - ec->audit_features[right].begin + lrq->orig_size[right]; + ec.audit_features[right].end = + ec.audit_features[right].begin + lrq.orig_size[right]; } } } diff --git a/vowpalwabbit/mf.cc b/vowpalwabbit/mf.cc index ab6b8988..d0236ed0 100644 --- a/vowpalwabbit/mf.cc +++ b/vowpalwabbit/mf.cc @@ -53,53 +53,53 @@ struct mf { }; template <bool cache_sub_predictions> -void predict(mf *data, learner& base, example* ec) { - vw* all = data->all; +void predict(mf& data, learner& base, example& ec) { + vw* all = data.all; float prediction = 0; if (cache_sub_predictions) - data->sub_predictions.resize(2*all->rank+1, true); + data.sub_predictions.resize(2*all->rank+1, true); // predict from linear terms base.predict(ec); // store linear prediction if (cache_sub_predictions) - data->sub_predictions[0] = ec->partial_prediction; - prediction += ec->partial_prediction; + data.sub_predictions[0] = ec.partial_prediction; + prediction += ec.partial_prediction; // store namespace indices - copy_array(data->indices, ec->indices); + copy_array(data.indices, ec.indices); // erase indices - ec->indices.erase(); - ec->indices.push_back(0); + ec.indices.erase(); + ec.indices.push_back(0); // add interaction terms to prediction - for (vector<string>::iterator i = data->pairs.begin(); i != data->pairs.end(); i++) { + for (vector<string>::iterator i = data.pairs.begin(); i != data.pairs.end(); i++) { int left_ns = (int) (*i)[0]; int right_ns = (int) (*i)[1]; - if (ec->atomics[left_ns].size() > 0 && ec->atomics[right_ns].size() > 0) { + if (ec.atomics[left_ns].size() > 0 && ec.atomics[right_ns].size() > 0) { for (size_t k = 1; k <= all->rank; k++) { - ec->indices[0] = left_ns; + ec.indices[0] = left_ns; // compute l^k * x_l using base learner base.predict(ec, k); - float x_dot_l = ec->partial_prediction; + float x_dot_l = ec.partial_prediction; if (cache_sub_predictions) - data->sub_predictions[2*k-1] = x_dot_l; + data.sub_predictions[2*k-1] = x_dot_l; // set example to right namespace only - ec->indices[0] = right_ns; + ec.indices[0] = right_ns; // compute r^k * x_r using base learner base.predict(ec, k + all->rank); - float x_dot_r = ec->partial_prediction; + float x_dot_r = ec.partial_prediction; if (cache_sub_predictions) - data->sub_predictions[2*k] = x_dot_r; + data.sub_predictions[2*k] = x_dot_r; // accumulate prediction prediction += (x_dot_l * x_dot_r); @@ -107,15 +107,15 @@ void predict(mf *data, learner& base, example* ec) { } } // restore namespace indices and label - copy_array(ec->indices, data->indices); + copy_array(ec.indices, data.indices); // finalize prediction - ec->partial_prediction = prediction; - ec->final_prediction = GD::finalize_prediction(*(data->all), ec->partial_prediction); + ec.partial_prediction = prediction; + ec.final_prediction = GD::finalize_prediction(*(data.all), ec.partial_prediction); } -void learn(mf* data, learner& base, example* ec) { - vw* all = data->all; +void learn(mf& data, learner& base, example& ec) { + vw* all = data.all; // predict with current weights predict<true>(data, base, ec); @@ -124,71 +124,71 @@ void learn(mf* data, learner& base, example* ec) { base.update(ec); // store namespace indices - copy_array(data->indices, ec->indices); + copy_array(data.indices, ec.indices); // erase indices - ec->indices.erase(); - ec->indices.push_back(0); + ec.indices.erase(); + ec.indices.push_back(0); // update interaction terms // looping over all pairs of non-empty namespaces - for (vector<string>::iterator i = data->pairs.begin(); i != data->pairs.end(); i++) { + for (vector<string>::iterator i = data.pairs.begin(); i != data.pairs.end(); i++) { int left_ns = (int) (*i)[0]; int right_ns = (int) (*i)[1]; - if (ec->atomics[left_ns].size() > 0 && ec->atomics[right_ns].size() > 0) { + if (ec.atomics[left_ns].size() > 0 && ec.atomics[right_ns].size() > 0) { // set example to left namespace only - ec->indices[0] = left_ns; + ec.indices[0] = left_ns; // store feature values in left namespace - copy_array(data->temp_features, ec->atomics[left_ns]); + copy_array(data.temp_features, ec.atomics[left_ns]); for (size_t k = 1; k <= all->rank; k++) { // multiply features in left namespace by r^k * x_r - for (feature* f = ec->atomics[left_ns].begin; f != ec->atomics[left_ns].end; f++) - f->x *= data->sub_predictions[2*k]; + for (feature* f = ec.atomics[left_ns].begin; f != ec.atomics[left_ns].end; f++) + f->x *= data.sub_predictions[2*k]; // update l^k using base learner base.update(ec, k); // restore left namespace features (undoing multiply) - copy_array(ec->atomics[left_ns], data->temp_features); + copy_array(ec.atomics[left_ns], data.temp_features); } // set example to right namespace only - ec->indices[0] = right_ns; + ec.indices[0] = right_ns; // store feature values for right namespace - copy_array(data->temp_features, ec->atomics[right_ns]); + copy_array(data.temp_features, ec.atomics[right_ns]); for (size_t k = 1; k <= all->rank; k++) { // multiply features in right namespace by l^k * x_l - for (feature* f = ec->atomics[right_ns].begin; f != ec->atomics[right_ns].end; f++) - f->x *= data->sub_predictions[2*k-1]; + for (feature* f = ec.atomics[right_ns].begin; f != ec.atomics[right_ns].end; f++) + f->x *= data.sub_predictions[2*k-1]; // update r^k using base learner base.update(ec, k + all->rank); // restore right namespace features - copy_array(ec->atomics[right_ns], data->temp_features); + copy_array(ec.atomics[right_ns], data.temp_features); } } } // restore namespace indices - copy_array(ec->indices, data->indices); + copy_array(ec.indices, data.indices); } -void finish(mf* o) { +void finish(mf& o) { // restore global pairs - o->all->pairs = o->pairs; + o.all->pairs = o.pairs; // clean up local v_arrays - o->indices.delete_v(); - o->sub_predictions.delete_v(); + o.indices.delete_v(); + o.sub_predictions.delete_v(); } diff --git a/vowpalwabbit/nn.cc b/vowpalwabbit/nn.cc index a68a5840..7c744791 100644 --- a/vowpalwabbit/nn.cc +++ b/vowpalwabbit/nn.cc @@ -90,71 +90,71 @@ namespace NN { n.finished_setup = true; } - void end_pass(nn* n) + void end_pass(nn& n) { - if (n->all->bfgs) - n->xsubi = n->save_xsubi; + if (n.all->bfgs) + n.xsubi = n.save_xsubi; } template <bool is_learn> - void predict_or_learn(nn* n, learner& base, example* ec) + void predict_or_learn(nn& n, learner& base, example& ec) { - bool shouldOutput = n->all->raw_prediction > 0; + bool shouldOutput = n.all->raw_prediction > 0; - if (! n->finished_setup) - finish_setup (*n, *(n->all)); + if (! n.finished_setup) + finish_setup (n, *(n.all)); - label_data* ld = (label_data*)ec->ld; + label_data* ld = (label_data*)ec.ld; float save_label = ld->label; - void (*save_set_minmax) (shared_data*, float) = n->all->set_minmax; + void (*save_set_minmax) (shared_data*, float) = n.all->set_minmax; float save_min_label; float save_max_label; - float dropscale = n->dropout ? 2.0f : 1.0f; - loss_function* save_loss = n->all->loss; + float dropscale = n.dropout ? 2.0f : 1.0f; + loss_function* save_loss = n.all->loss; - float* hidden_units = (float*) alloca (n->k * sizeof (float)); - bool* dropped_out = (bool*) alloca (n->k * sizeof (bool)); + float* hidden_units = (float*) alloca (n.k * sizeof (float)); + bool* dropped_out = (bool*) alloca (n.k * sizeof (bool)); string outputString; stringstream outputStringStream(outputString); - n->all->set_minmax = noop_mm; - n->all->loss = n->squared_loss; - save_min_label = n->all->sd->min_label; - n->all->sd->min_label = hidden_min_activation; - save_max_label = n->all->sd->max_label; - n->all->sd->max_label = hidden_max_activation; + n.all->set_minmax = noop_mm; + n.all->loss = n.squared_loss; + save_min_label = n.all->sd->min_label; + n.all->sd->min_label = hidden_min_activation; + save_max_label = n.all->sd->max_label; + n.all->sd->max_label = hidden_max_activation; //ld->label = FLT_MAX; - for (unsigned int i = 0; i < n->k; ++i) + for (unsigned int i = 0; i < n.k; ++i) { - uint32_t biasindex = (uint32_t) constant * n->all->wpp * n->all->reg.stride + i * (uint32_t)n->increment + ec->ft_offset; - weight* w = &n->all->reg.weight_vector[biasindex & n->all->reg.weight_mask]; + uint32_t biasindex = (uint32_t) constant * n.all->wpp * n.all->reg.stride + i * (uint32_t)n.increment + ec.ft_offset; + weight* w = &n.all->reg.weight_vector[biasindex & n.all->reg.weight_mask]; // avoid saddle point at 0 if (*w == 0) { w[0] = (float) (frand48 () - 0.5); - if (n->dropout && n->all->normalized_updates) - w[n->all->normalized_idx] = 1e-4f; + if (n.dropout && n.all->normalized_updates) + w[n.all->normalized_idx] = 1e-4f; } base.predict(ec, i); - hidden_units[i] = ec->final_prediction; + hidden_units[i] = ec.final_prediction; - dropped_out[i] = (n->dropout && merand48 (n->xsubi) < 0.5); + dropped_out[i] = (n.dropout && merand48 (n.xsubi) < 0.5); if (shouldOutput) { if (i > 0) outputStringStream << ' '; - outputStringStream << i << ':' << ec->partial_prediction << ',' << fasttanh (hidden_units[i]); + outputStringStream << i << ':' << ec.partial_prediction << ',' << fasttanh (hidden_units[i]); } } //ld->label = save_label; - n->all->loss = save_loss; - n->all->set_minmax = save_set_minmax; - n->all->sd->min_label = save_min_label; - n->all->sd->max_label = save_max_label; + n.all->loss = save_loss; + n.all->set_minmax = save_set_minmax; + n.all->sd->min_label = save_min_label; + n.all->sd->max_label = save_max_label; bool converse = false; float save_partial_prediction = 0; @@ -163,121 +163,121 @@ namespace NN { CONVERSE: // That's right, I'm using goto. So sue me. - n->output_layer.total_sum_feat_sq = 1; - n->output_layer.sum_feat_sq[nn_output_namespace] = 1; + n.output_layer.total_sum_feat_sq = 1; + n.output_layer.sum_feat_sq[nn_output_namespace] = 1; - for (unsigned int i = 0; i < n->k; ++i) + for (unsigned int i = 0; i < n.k; ++i) { float sigmah = (dropped_out[i]) ? 0.0f : dropscale * fasttanh (hidden_units[i]); - n->output_layer.atomics[nn_output_namespace][i].x = sigmah; + n.output_layer.atomics[nn_output_namespace][i].x = sigmah; - n->output_layer.total_sum_feat_sq += sigmah * sigmah; - n->output_layer.sum_feat_sq[nn_output_namespace] += sigmah * sigmah; + n.output_layer.total_sum_feat_sq += sigmah * sigmah; + n.output_layer.sum_feat_sq[nn_output_namespace] += sigmah * sigmah; - uint32_t nuindex = n->output_layer.atomics[nn_output_namespace][i].weight_index + (n->k * (uint32_t)n->increment) + ec->ft_offset; - weight* w = &n->all->reg.weight_vector[nuindex & n->all->reg.weight_mask]; + uint32_t nuindex = n.output_layer.atomics[nn_output_namespace][i].weight_index + (n.k * (uint32_t)n.increment) + ec.ft_offset; + weight* w = &n.all->reg.weight_vector[nuindex & n.all->reg.weight_mask]; // avoid saddle point at 0 if (*w == 0) { - float sqrtk = sqrt ((float)n->k); + float sqrtk = sqrt ((float)n.k); w[0] = (float) (frand48 () - 0.5) / sqrtk; - if (n->dropout && n->all->normalized_updates) - w[n->all->normalized_idx] = 1e-4f; + if (n.dropout && n.all->normalized_updates) + w[n.all->normalized_idx] = 1e-4f; } } - if (n->inpass) { + if (n.inpass) { // TODO: this is not correct if there is something in the // nn_output_namespace but at least it will not leak memory // in that case - ec->indices.push_back (nn_output_namespace); - v_array<feature> save_nn_output_namespace = ec->atomics[nn_output_namespace]; - ec->atomics[nn_output_namespace] = n->output_layer.atomics[nn_output_namespace]; - ec->sum_feat_sq[nn_output_namespace] = n->output_layer.sum_feat_sq[nn_output_namespace]; - ec->total_sum_feat_sq += n->output_layer.sum_feat_sq[nn_output_namespace]; + ec.indices.push_back (nn_output_namespace); + v_array<feature> save_nn_output_namespace = ec.atomics[nn_output_namespace]; + ec.atomics[nn_output_namespace] = n.output_layer.atomics[nn_output_namespace]; + ec.sum_feat_sq[nn_output_namespace] = n.output_layer.sum_feat_sq[nn_output_namespace]; + ec.total_sum_feat_sq += n.output_layer.sum_feat_sq[nn_output_namespace]; if (is_learn) - base.learn(ec, n->k); + base.learn(ec, n.k); else - base.predict(ec, n->k); - n->output_layer.partial_prediction = ec->partial_prediction; - n->output_layer.loss = ec->loss; - ec->total_sum_feat_sq -= n->output_layer.sum_feat_sq[nn_output_namespace]; - ec->sum_feat_sq[nn_output_namespace] = 0; - ec->atomics[nn_output_namespace] = save_nn_output_namespace; - ec->indices.pop (); + base.predict(ec, n.k); + n.output_layer.partial_prediction = ec.partial_prediction; + n.output_layer.loss = ec.loss; + ec.total_sum_feat_sq -= n.output_layer.sum_feat_sq[nn_output_namespace]; + ec.sum_feat_sq[nn_output_namespace] = 0; + ec.atomics[nn_output_namespace] = save_nn_output_namespace; + ec.indices.pop (); } else { - n->output_layer.ft_offset = ec->ft_offset; - n->output_layer.ld = ec->ld; - n->output_layer.partial_prediction = 0; - n->output_layer.eta_round = ec->eta_round; - n->output_layer.eta_global = ec->eta_global; - n->output_layer.global_weight = ec->global_weight; - n->output_layer.example_t = ec->example_t; + n.output_layer.ft_offset = ec.ft_offset; + n.output_layer.ld = ec.ld; + n.output_layer.partial_prediction = 0; + n.output_layer.eta_round = ec.eta_round; + n.output_layer.eta_global = ec.eta_global; + n.output_layer.global_weight = ec.global_weight; + n.output_layer.example_t = ec.example_t; if (is_learn) - base.learn(&n->output_layer, n->k); + base.learn(n.output_layer, n.k); else - base.predict(&n->output_layer, n->k); - n->output_layer.ld = 0; + base.predict(n.output_layer, n.k); + n.output_layer.ld = 0; } - n->output_layer.final_prediction = GD::finalize_prediction (*(n->all), n->output_layer.partial_prediction); + n.output_layer.final_prediction = GD::finalize_prediction (*(n.all), n.output_layer.partial_prediction); if (shouldOutput) { - outputStringStream << ' ' << n->output_layer.partial_prediction; - n->all->print_text(n->all->raw_prediction, outputStringStream.str(), ec->tag); + outputStringStream << ' ' << n.output_layer.partial_prediction; + n.all->print_text(n.all->raw_prediction, outputStringStream.str(), ec.tag); } - if (is_learn && n->all->training && ld->label != FLT_MAX) { - float gradient = n->all->loss->first_derivative(n->all->sd, - n->output_layer.final_prediction, + if (is_learn && n.all->training && ld->label != FLT_MAX) { + float gradient = n.all->loss->first_derivative(n.all->sd, + n.output_layer.final_prediction, ld->label); if (fabs (gradient) > 0) { - n->all->loss = n->squared_loss; - n->all->set_minmax = noop_mm; - save_min_label = n->all->sd->min_label; - n->all->sd->min_label = hidden_min_activation; - save_max_label = n->all->sd->max_label; - n->all->sd->max_label = hidden_max_activation; - - for (unsigned int i = 0; i < n->k; ++i) { + n.all->loss = n.squared_loss; + n.all->set_minmax = noop_mm; + save_min_label = n.all->sd->min_label; + n.all->sd->min_label = hidden_min_activation; + save_max_label = n.all->sd->max_label; + n.all->sd->max_label = hidden_max_activation; + + for (unsigned int i = 0; i < n.k; ++i) { if (! dropped_out[i]) { float sigmah = - n->output_layer.atomics[nn_output_namespace][i].x / dropscale; + n.output_layer.atomics[nn_output_namespace][i].x / dropscale; float sigmahprime = dropscale * (1.0f - sigmah * sigmah); - uint32_t nuindex = n->output_layer.atomics[nn_output_namespace][i].weight_index + (n->k * (uint32_t)n->increment) + ec->ft_offset; - float nu = n->all->reg.weight_vector[nuindex & n->all->reg.weight_mask]; + uint32_t nuindex = n.output_layer.atomics[nn_output_namespace][i].weight_index + (n.k * (uint32_t)n.increment) + ec.ft_offset; + float nu = n.all->reg.weight_vector[nuindex & n.all->reg.weight_mask]; float gradhw = 0.5f * nu * gradient * sigmahprime; - ld->label = GD::finalize_prediction (*(n->all), hidden_units[i] - gradhw); + ld->label = GD::finalize_prediction (*(n.all), hidden_units[i] - gradhw); if (ld->label != hidden_units[i]) base.learn(ec, i); } } - n->all->loss = save_loss; - n->all->set_minmax = save_set_minmax; - n->all->sd->min_label = save_min_label; - n->all->sd->max_label = save_max_label; + n.all->loss = save_loss; + n.all->set_minmax = save_set_minmax; + n.all->sd->min_label = save_min_label; + n.all->sd->max_label = save_max_label; } } ld->label = save_label; if (! converse) { - save_partial_prediction = n->output_layer.partial_prediction; - save_final_prediction = n->output_layer.final_prediction; - save_ec_loss = n->output_layer.loss; + save_partial_prediction = n.output_layer.partial_prediction; + save_final_prediction = n.output_layer.final_prediction; + save_ec_loss = n.output_layer.loss; } - if (n->dropout && ! converse) + if (n.dropout && ! converse) { - for (unsigned int i = 0; i < n->k; ++i) + for (unsigned int i = 0; i < n.k; ++i) { dropped_out[i] = ! dropped_out[i]; } @@ -286,12 +286,12 @@ CONVERSE: // That's right, I'm using goto. So sue me. goto CONVERSE; } - ec->partial_prediction = save_partial_prediction; - ec->final_prediction = save_final_prediction; - ec->loss = save_ec_loss; + ec.partial_prediction = save_partial_prediction; + ec.final_prediction = save_final_prediction; + ec.loss = save_ec_loss; } - void finish_example(vw& all, nn*, example* ec) + void finish_example(vw& all, nn&, example& ec) { int save_raw_prediction = all.raw_prediction; all.raw_prediction = -1; @@ -299,11 +299,11 @@ CONVERSE: // That's right, I'm using goto. So sue me. all.raw_prediction = save_raw_prediction; } - void finish(nn* n) + void finish(nn& n) { - delete n->squared_loss; - free (n->output_layer.indices.begin); - free (n->output_layer.atomics[nn_output_namespace].begin); + delete n.squared_loss; + free (n.output_layer.indices.begin); + free (n.output_layer.atomics[nn_output_namespace].begin); } learner* setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::variables_map& vm_file) diff --git a/vowpalwabbit/oaa.cc b/vowpalwabbit/oaa.cc index 0a7f6649..01849199 100644 --- a/vowpalwabbit/oaa.cc +++ b/vowpalwabbit/oaa.cc @@ -110,11 +110,11 @@ namespace OAA { } } - void print_update(vw& all, example *ec) + void print_update(vw& all, example &ec) { if (all.sd->weighted_examples >= all.sd->dump_interval && !all.quiet && !all.bfgs) { - mc_label* ld = (mc_label*) ec->ld; + mc_label* ld = (mc_label*) ec.ld; char label_buf[32]; if (ld->label == INT_MAX) strcpy(label_buf," unknown"); @@ -137,8 +137,8 @@ namespace OAA { (long int)all.sd->example_number, all.sd->weighted_examples, label_buf, - (long int)ec->final_prediction, - (long unsigned int)ec->num_features); + (long int)ec.final_prediction, + (long unsigned int)ec.num_features); all.sd->weighted_holdout_examples_since_last_dump = 0; all.sd->holdout_sum_loss_since_last_dump = 0.0; @@ -150,8 +150,8 @@ namespace OAA { (long int)all.sd->example_number, all.sd->weighted_examples, label_buf, - (long int)ec->final_prediction, - (long unsigned int)ec->num_features); + (long int)ec.final_prediction, + (long unsigned int)ec.num_features); all.sd->sum_loss_since_last_dump = 0.0; all.sd->old_weighted_examples = all.sd->weighted_examples; @@ -159,19 +159,19 @@ namespace OAA { } } - void output_example(vw& all, example* ec) + void output_example(vw& all, example& ec) { - mc_label* ld = (mc_label*)ec->ld; + mc_label* ld = (mc_label*)ec.ld; size_t loss = 1; - if (ld->label == (uint32_t)ec->final_prediction) + if (ld->label == (uint32_t)ec.final_prediction) loss = 0; - if(ec->test_only) + if(ec.test_only) { - all.sd->weighted_holdout_examples += ec->global_weight;//test weight seen - all.sd->weighted_holdout_examples_since_last_dump += ec->global_weight; - all.sd->weighted_holdout_examples_since_last_pass += ec->global_weight; + all.sd->weighted_holdout_examples += ec.global_weight;//test weight seen + all.sd->weighted_holdout_examples_since_last_dump += ec.global_weight; + all.sd->weighted_holdout_examples_since_last_pass += ec.global_weight; all.sd->holdout_sum_loss += loss; all.sd->holdout_sum_loss_since_last_dump += loss; all.sd->holdout_sum_loss_since_last_pass += loss;//since last pass @@ -179,36 +179,36 @@ namespace OAA { else { all.sd->weighted_examples += ld->weight; - all.sd->total_features += ec->num_features; + all.sd->total_features += ec.num_features; all.sd->sum_loss += loss; all.sd->sum_loss_since_last_dump += loss; all.sd->example_number++; } for (int* sink = all.final_prediction_sink.begin; sink != all.final_prediction_sink.end; sink++) - all.print(*sink, ec->final_prediction, 0, ec->tag); + all.print(*sink, ec.final_prediction, 0, ec.tag); OAA::print_update(all, ec); } - void finish_example(vw& all, oaa*, example* ec) + void finish_example(vw& all, oaa&, example& ec) { output_example(all, ec); - VW::finish_example(all, ec); + VW::finish_example(all, &ec); } template <bool is_learn> - void predict_or_learn(oaa* o, learner& base, example* ec) { - vw* all = o->all; + void predict_or_learn(oaa& o, learner& base, example& ec) { + vw* all = o.all; bool shouldOutput = all->raw_prediction > 0; - mc_label* mc_label_data = (mc_label*)ec->ld; + mc_label* mc_label_data = (mc_label*)ec.ld; float prediction = 1; float score = INT_MIN; - if (mc_label_data->label == 0 || (mc_label_data->label > o->k && mc_label_data->label != (uint32_t)-1)) - cout << "label " << mc_label_data->label << " is not in {1,"<< o->k << "} This won't work right." << endl; + if (mc_label_data->label == 0 || (mc_label_data->label > o.k && mc_label_data->label != (uint32_t)-1)) + cout << "label " << mc_label_data->label << " is not in {1,"<< o.k << "} This won't work right." << endl; string outputString; stringstream outputStringStream(outputString); @@ -216,9 +216,9 @@ namespace OAA { label_data simple_temp; simple_temp.initial = 0.; simple_temp.weight = mc_label_data->weight; - ec->ld = &simple_temp; + ec.ld = &simple_temp; - for (size_t i = 1; i <= o->k; i++) + for (size_t i = 1; i <= o.k; i++) { if (is_learn) { @@ -232,22 +232,22 @@ namespace OAA { else base.predict(ec, i-1); - if (ec->partial_prediction > score) + if (ec.partial_prediction > score) { - score = ec->partial_prediction; + score = ec.partial_prediction; prediction = (float)i; } if (shouldOutput) { if (i > 1) outputStringStream << ' '; - outputStringStream << i << ':' << ec->partial_prediction; + outputStringStream << i << ':' << ec.partial_prediction; } } - ec->ld = mc_label_data; - ec->final_prediction = prediction; + ec.ld = mc_label_data; + ec.final_prediction = prediction; if (shouldOutput) - all->print_text(all->raw_prediction, outputStringStream.str(), ec->tag); + all->print_text(all->raw_prediction, outputStringStream.str(), ec.tag); } learner* setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::variables_map& vm_file) diff --git a/vowpalwabbit/oaa.h b/vowpalwabbit/oaa.h index ea01bb0f..a85d0c18 100644 --- a/vowpalwabbit/oaa.h +++ b/vowpalwabbit/oaa.h @@ -36,7 +36,7 @@ namespace OAA NULL, sizeof(mc_label)}; - void output_example(vw& all, example* ec); + void output_example(vw& all, example& ec); inline int example_is_test(example* ec) { return (((OAA::mc_label*)ec->ld)->label == (uint32_t)-1); } diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc index eb9c1a05..ae6f7b4d 100644 --- a/vowpalwabbit/parse_args.cc +++ b/vowpalwabbit/parse_args.cc @@ -854,6 +854,9 @@ vw* parse_args(int argc, char *argv[]) if(vm.count("autolink") || vm_file.count("autolink") ) all->l = ALINK::setup(*all, to_pass_further, vm, vm_file); + if (vm.count("lrq") || vm_file.count("lrq")) + all->l = LRQ::setup(*all, to_pass_further, vm, vm_file); + all->l = Scorer::setup(*all, to_pass_further, vm, vm_file); if(vm.count("top") || vm_file.count("top") ) @@ -862,9 +865,6 @@ vw* parse_args(int argc, char *argv[]) if (vm.count("binary") || vm_file.count("binary")) all->l = BINARY::setup(*all, to_pass_further, vm, vm_file); - if (vm.count("lrq") || vm_file.count("lrq")) - all->l = LRQ::setup(*all, to_pass_further, vm, vm_file); - if(vm.count("oaa") || vm_file.count("oaa") ) { if (got_mc) { cerr << "error: cannot specify multiple MC learners" << endl; throw exception(); } diff --git a/vowpalwabbit/parse_example.cc b/vowpalwabbit/parse_example.cc index 62e0f519..d0cc5d6c 100644 --- a/vowpalwabbit/parse_example.cc +++ b/vowpalwabbit/parse_example.cc @@ -239,6 +239,8 @@ public: v_array<char> base_v_array; push_many(base_v_array, name.begin, name.end - name.begin); base_v_array.push_back('\0'); + if (base != NULL) + free(base); base = base_v_array.begin; } channel_hash = p->hasher(name, hash_base); @@ -260,7 +262,6 @@ public: inline void nameSpace(){ cur_channel_v = 1.0; - base = NULL; index = 0; new_index = false; anon = 0; @@ -271,6 +272,8 @@ public: new_index = true; if(audit) { + if (base != NULL) + free(base); base = (char *) calloc(2,sizeof(char)); base[0] = ' '; base[1] = '\0'; @@ -312,8 +315,11 @@ public: this->weights_per_problem = all.wpp; this->affix_features = all.affix_features; this->spelling_features = all.spelling_features; + this->base = NULL; audit = all.audit || all.hash_inv; listNameSpace(); + if (base != NULL) + free(base); } } }; diff --git a/vowpalwabbit/parser.cc b/vowpalwabbit/parser.cc index 7e80826f..2c55f0f9 100644 --- a/vowpalwabbit/parser.cc +++ b/vowpalwabbit/parser.cc @@ -746,7 +746,7 @@ void setup_example(vw& all, example* ae) ae->loss = 0.; ae->example_counter = (size_t)(all.p->parsed_examples + 1); - if ((!all.p->emptylines_separate_examples) || example_is_newline(ae)) + if ((!all.p->emptylines_separate_examples) || example_is_newline(*ae)) all.p->in_pass_counter++; ae->test_only = is_test_only(all.p->in_pass_counter, all.holdout_period, all.holdout_after, all.holdout_set_off); @@ -957,14 +957,14 @@ namespace VW{ words.delete_v(); } - void empty_example(vw& all, example* ec) + void empty_example(vw& all, example& ec) { if (all.audit || all.hash_inv) - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) { for (audit_data* temp - = ec->audit_features[*i].begin; - temp != ec->audit_features[*i].end; temp++) + = ec.audit_features[*i].begin; + temp != ec.audit_features[*i].end; temp++) { if (temp->alloced) { @@ -973,19 +973,19 @@ namespace VW{ temp->alloced=false; } } - ec->audit_features[*i].erase(); + ec.audit_features[*i].erase(); } - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) { - ec->atomics[*i].erase(); - ec->sum_feat_sq[*i]=0; + ec.atomics[*i].erase(); + ec.sum_feat_sq[*i]=0; } - ec->indices.erase(); - ec->tag.erase(); - ec->sorted = false; - ec->end_pass = false; + ec.indices.erase(); + ec.tag.erase(); + ec.sorted = false; + ec.end_pass = false; } void finish_example(vw& all, example* ec) @@ -995,7 +995,7 @@ namespace VW{ condition_variable_signal(&all.p->output_done); mutex_unlock(&all.p->output_lock); - empty_example(all, ec); + empty_example(all, *ec); mutex_lock(&all.p->examples_lock); assert(ec->in_use); diff --git a/vowpalwabbit/scorer.cc b/vowpalwabbit/scorer.cc index 8ce6ebfe..8024cafe 100644 --- a/vowpalwabbit/scorer.cc +++ b/vowpalwabbit/scorer.cc @@ -9,10 +9,10 @@ namespace Scorer { }; template <bool is_learn> - void predict_or_learn(scorer* s, learner& base, example* ec) + void predict_or_learn(scorer& s, learner& base, example& ec) { - label_data* ld = (label_data*)ec->ld; - s->all->set_minmax(s->all->sd, ld->label); + label_data* ld = (label_data*)ec.ld; + s.all->set_minmax(s.all->sd, ld->label); if (is_learn) base.learn(ec); diff --git a/vowpalwabbit/searn.cc b/vowpalwabbit/searn.cc index 1533aeca..808860fe 100644 --- a/vowpalwabbit/searn.cc +++ b/vowpalwabbit/searn.cc @@ -362,13 +362,13 @@ namespace Searn { //cerr << "predict: action=" << action << endl; void* old_label = ecs[action].ld; ecs[action].ld = &test_label; - base.predict(&ecs[action], pol); + base.predict(ecs[action], pol); srn->total_predictions_made++; srn->num_features += ecs[action].num_features; srn->empty_example->in_use = true; //cerr << "predict: empty_example" << endl; - base.predict(srn->empty_example); + base.predict(*(srn->empty_example)); ecs[action].ld = old_label; if ((action == 0) || @@ -429,37 +429,37 @@ namespace Searn { return ld->costs[0].action; } - uint32_t single_prediction_notLDF(vw& all, searn& srn, learner& base, example* ec, void*valid_labels, uint32_t pol, bool allow_exploration) + uint32_t single_prediction_notLDF(vw& all, searn& srn, learner& base, example& ec, void*valid_labels, uint32_t pol, bool allow_exploration) { assert(pol >= 0); - void* old_label = ec->ld; - ec->ld = valid_labels; + void* old_label = ec.ld; + ec.ld = valid_labels; base.predict(ec, pol); srn.total_predictions_made++; - srn.num_features += ec->num_features; - uint32_t final_prediction = (uint32_t)ec->final_prediction; + srn.num_features += ec.num_features; + uint32_t final_prediction = (uint32_t)ec.final_prediction; if (allow_exploration && (srn.exploration_temperature > 0.)) { if (srn.rollout_all_actions) - final_prediction = sample_with_temperature_csoaa((CSOAA::label*)ec->ld, srn.exploration_temperature); + final_prediction = sample_with_temperature_csoaa((CSOAA::label*)ec.ld, srn.exploration_temperature); else - final_prediction = sample_with_temperature_cb( (CB::label *)ec->ld, srn.exploration_temperature); + final_prediction = sample_with_temperature_cb( (CB::label *)ec.ld, srn.exploration_temperature); } if ((srn.state == INIT_TEST) && (all.raw_prediction > 0) && (srn.rollout_all_actions)) { // srn.rollout_all_actions ==> this is not CB, so we have CSOAA::labels string outputString; stringstream outputStringStream(outputString); - CSOAA::label *ld = (CSOAA::label*)ec->ld; + CSOAA::label *ld = (CSOAA::label*)ec.ld; for (CSOAA::wclass* c = ld->costs.begin; c != ld->costs.end; ++c) { if (c != ld->costs.begin) outputStringStream << ' '; outputStringStream << c->weight_index << ':' << c->partial_prediction; } - all.print_text(all.raw_prediction, outputStringStream.str(), ec->tag); + all.print_text(all.raw_prediction, outputStringStream.str(), ec.tag); } - ec->ld = old_label; + ec.ld = old_label; return final_prediction; } @@ -485,7 +485,7 @@ namespace Searn { } else { // learned policy if (!srn.is_ldf) { // single example if (srn.auto_history) add_history_to_example(all, srn.hinfo, ecs, srn.rollout_action.begin+srn.t); - size_t action = single_prediction_notLDF(all, srn, base, ecs, valid_labels, pol, allow_exploration); + size_t action = single_prediction_notLDF(all, srn, base, *ecs, valid_labels, pol, allow_exploration); if (srn.auto_history) remove_history_from_example(all, srn.hinfo, ecs); return (uint32_t)action; } else { @@ -985,7 +985,7 @@ namespace Searn { void* old_label = ec[0].ld; ec[0].ld = labels; if (srn.auto_history) add_history_to_example(all, srn.hinfo, ec, srn.rollout_action.begin+srn.learn_t); - base.learn(&ec[0], srn.current_policy); + base.learn(ec[0], srn.current_policy); if (srn.auto_history) remove_history_from_example(all, srn.hinfo, ec); ec[0].ld = old_label; srn.total_examples_generated++; @@ -998,10 +998,10 @@ namespace Searn { //clog << endl << "this_example = "; GD::print_audit_features(all, &ec[a]); add_history_to_example(all, srn.hinfo, &ec[a], srn.rollout_action.begin+srn.learn_t, ((CSOAA::label*)ec[a].ld)->costs[0].weight_index); - base.learn(&ec[a], srn.current_policy); + base.learn(ec[a], srn.current_policy); } //clog << "learn: generate empty example" << endl; - base.learn(srn.empty_example); + base.learn(*srn.empty_example); //clog << "learn done " << repeat << endl; for (size_t a=0; a<len; a++) remove_history_from_example(all, srn.hinfo, &ec[a]); @@ -1207,23 +1207,23 @@ namespace Searn { out[max_len] = 0; } -void print_update(vw& all, searn* srn) +void print_update(vw& all, searn& srn) { - if (!srn->printed_output_header && !all.quiet) { + if (!srn.printed_output_header && !all.quiet) { const char * header_fmt = "%-10s %-10s %8s %15s %24s %22s %8s %5s %5s %15s %15s\n"; fprintf(stderr, header_fmt, "average", "since", "sequence", "example", "current label", "current predicted", "current", "cur", "cur", "predic.", "examples"); fprintf(stderr, header_fmt, "loss", "last", "counter", "weight", "sequence prefix", "sequence prefix", "features", "pass", "pol", "made", "gener."); cerr.precision(5); - srn->printed_output_header = true; + srn.printed_output_header = true; } - if (!should_print_update(all, srn->hit_new_pass)) + if (!should_print_update(all, srn.hit_new_pass)) return; char true_label[21]; char pred_label[21]; - to_short_string(srn->truth_string->str(), 20, true_label); - to_short_string(srn->pred_string->str() , 20, pred_label); + to_short_string(srn.truth_string->str(), 20, true_label); + to_short_string(srn.pred_string->str() , 20, pred_label); float avg_loss = 0.; float avg_loss_since = 0.; @@ -1245,14 +1245,14 @@ void print_update(vw& all, searn* srn) all.sd->weighted_examples, true_label, pred_label, - (long unsigned int)srn->num_features, - (int)srn->read_example_last_pass, - (int)srn->current_policy, - (long unsigned int)srn->total_predictions_made, - (long unsigned int)srn->total_examples_generated); + (long unsigned int)srn.num_features, + (int)srn.read_example_last_pass, + (int)srn.current_policy, + (long unsigned int)srn.total_predictions_made, + (long unsigned int)srn.total_examples_generated); if (PRINT_CLOCK_TIME) { - size_t num_sec = (size_t)(((float)(clock() - srn->start_clock_time)) / CLOCKS_PER_SEC); + size_t num_sec = (size_t)(((float)(clock() - srn.start_clock_time)) / CLOCKS_PER_SEC); fprintf(stderr, " %15lusec", num_sec); } @@ -1370,69 +1370,69 @@ void print_update(vw& all, searn* srn) } template <bool is_learn> - void searn_predict_or_learn(searn* srn, learner& base, example*ec) { - vw* all = srn->all; - srn->base_learner = &base; + void searn_predict_or_learn(searn& srn, learner& base, example& ec) { + vw* all = srn.all; + srn.base_learner = &base; bool is_real_example = true; - if (example_is_newline(ec) || srn->ec_seq.size() >= all->p->ring_size - 2) { - if (srn->ec_seq.size() >= all->p->ring_size - 2) { // give some wiggle room - std::cerr << "warning: length of sequence at " << ec->example_counter << " exceeds ring size; breaking apart" << std::endl; + if (example_is_newline(ec) || srn.ec_seq.size() >= all->p->ring_size - 2) { + if (srn.ec_seq.size() >= all->p->ring_size - 2) { // give some wiggle room + std::cerr << "warning: length of sequence at " << ec.example_counter << " exceeds ring size; breaking apart" << std::endl; } - do_actual_learning<is_learn>(*all, *srn); - clear_seq(*all, *srn); - srn->hit_new_pass = false; + do_actual_learning<is_learn>(*all, srn); + clear_seq(*all, srn); + srn.hit_new_pass = false; //VW::finish_example(*all, ec); is_real_example = false; } else { - srn->ec_seq.push_back(ec); + srn.ec_seq.push_back(&ec); } if (is_real_example) { - srn->read_example_last_id = ec->example_counter; + srn.read_example_last_id = ec.example_counter; } } - void end_pass(searn* srn) { - vw* all = srn->all; - srn->hit_new_pass = true; - srn->read_example_last_pass++; - srn->passes_since_new_policy++; - if (srn->passes_since_new_policy >= srn->passes_per_policy) { - srn->passes_since_new_policy = 0; + void end_pass(searn& srn) { + vw* all = srn.all; + srn.hit_new_pass = true; + srn.read_example_last_pass++; + srn.passes_since_new_policy++; + if (srn.passes_since_new_policy >= srn.passes_per_policy) { + srn.passes_since_new_policy = 0; if(all->training) - srn->current_policy++; - if (srn->current_policy > srn->total_number_of_policies) { + srn.current_policy++; + if (srn.current_policy > srn.total_number_of_policies) { std::cerr << "internal error (bug): too many policies; not advancing" << std::endl; - srn->current_policy = srn->total_number_of_policies; + srn.current_policy = srn.total_number_of_policies; } //reset searn_trained_nb_policies in options_from_file so it is saved to regressor file later std::stringstream ss; - ss << srn->current_policy; + ss << srn.current_policy; VW::cmd_string_replace_value(all->options_from_file,"--searn_trained_nb_policies", ss.str()); } } - void finish_example(vw& all, searn* srn, example* ec) { - if (ec->end_pass || example_is_newline(ec) || srn->ec_seq.size() >= all.p->ring_size - 2) { + void finish_example(vw& all, searn& srn, example& ec) { + if (ec.end_pass || example_is_newline(ec) || srn.ec_seq.size() >= all.p->ring_size - 2) { print_update(all, srn); - VW::finish_example(all, ec); + VW::finish_example(all, &ec); } } - void end_examples(searn* srn) { - vw* all = srn->all; + void end_examples(searn& srn) { + vw* all = srn.all; - do_actual_learning<true>(*all, *srn); + do_actual_learning<true>(*all, srn); if( all->training ) { std::stringstream ss1; std::stringstream ss2; - ss1 << ((srn->passes_since_new_policy == 0) ? srn->current_policy : (srn->current_policy+1)); + ss1 << ((srn.passes_since_new_policy == 0) ? srn.current_policy : (srn.current_policy+1)); //use cmd_string_replace_value in case we already loaded a predictor which had a value stored for --searn_trained_nb_policies VW::cmd_string_replace_value(all->options_from_file,"--searn_trained_nb_policies", ss1.str()); - ss2 << srn->total_number_of_policies; + ss2 << srn.total_number_of_policies; //use cmd_string_replace_value in case we already loaded a predictor which had a value stored for --searn_total_nb_policies VW::cmd_string_replace_value(all->options_from_file,"--searn_total_nb_policies", ss2.str()); } @@ -1492,58 +1492,58 @@ void print_update(vw& all, searn* srn) srn.empty_example->in_use = true; } - void searn_finish(searn* srn) + void searn_finish(searn& srn) { - vw* all = srn->all; + vw* all = srn.all; //cerr << "searn_finish" << endl; - delete srn->truth_string; - delete srn->pred_string; - delete srn->neighbor_features_string; - srn->neighbor_features.erase(); - srn->neighbor_features.delete_v(); + delete srn.truth_string; + delete srn.pred_string; + delete srn.neighbor_features_string; + srn.neighbor_features.erase(); + srn.neighbor_features.delete_v(); - if (srn->rollout_all_actions) { // dst should be a CSOAA::label* - ((CSOAA::label*)srn->valid_labels)->costs.erase(); - ((CSOAA::label*)srn->valid_labels)->costs.delete_v(); + if (srn.rollout_all_actions) { // dst should be a CSOAA::label* + ((CSOAA::label*)srn.valid_labels)->costs.erase(); + ((CSOAA::label*)srn.valid_labels)->costs.delete_v(); } else { - ((CB::label*)srn->valid_labels)->costs.erase(); - ((CB::label*)srn->valid_labels)->costs.delete_v(); + ((CB::label*)srn.valid_labels)->costs.erase(); + ((CB::label*)srn.valid_labels)->costs.delete_v(); } - if (srn->rollout_all_actions) // labels are CSOAA - delete (CSOAA::label*)srn->valid_labels; + if (srn.rollout_all_actions) // labels are CSOAA + delete (CSOAA::label*)srn.valid_labels; else // labels are CB - delete (CB::label*)srn->valid_labels; + delete (CB::label*)srn.valid_labels; - dealloc_example(CSOAA::delete_label, *(srn->empty_example)); - free(srn->empty_example); + dealloc_example(CSOAA::delete_label, *(srn.empty_example)); + free(srn.empty_example); - srn->ec_seq.delete_v(); + srn.ec_seq.delete_v(); - clear_snapshot(*all, *srn); - srn->snapshot_data.delete_v(); + clear_snapshot(*all, srn); + srn.snapshot_data.delete_v(); - for (size_t i=0; i<srn->train_labels.size(); i++) { - if (srn->rollout_all_actions) { - ((CSOAA::label*)srn->train_labels[i])->costs.erase(); - ((CSOAA::label*)srn->train_labels[i])->costs.delete_v(); - delete ((CSOAA::label*)srn->train_labels[i]); + for (size_t i=0; i<srn.train_labels.size(); i++) { + if (srn.rollout_all_actions) { + ((CSOAA::label*)srn.train_labels[i])->costs.erase(); + ((CSOAA::label*)srn.train_labels[i])->costs.delete_v(); + delete ((CSOAA::label*)srn.train_labels[i]); } else { - ((CB::label*)srn->train_labels[i])->costs.erase(); - ((CB::label*)srn->train_labels[i])->costs.delete_v(); - delete ((CB::label*)srn->train_labels[i]); + ((CB::label*)srn.train_labels[i])->costs.erase(); + ((CB::label*)srn.train_labels[i])->costs.delete_v(); + delete ((CB::label*)srn.train_labels[i]); } } - srn->train_labels.delete_v(); - srn->train_action.delete_v(); - srn->train_action_ids.delete_v(); - srn->rollout_action.delete_v(); - srn->learn_losses.delete_v(); - - if (srn->task->finish != NULL) { - srn->task->finish(*srn); - free(srn->task); + srn.train_labels.delete_v(); + srn.train_action.delete_v(); + srn.train_action_ids.delete_v(); + srn.rollout_action.delete_v(); + srn.learn_losses.delete_v(); + + if (srn.task->finish != NULL) { + srn.task->finish(srn); + free(srn.task); } } diff --git a/vowpalwabbit/sender.cc b/vowpalwabbit/sender.cc index 95f51a32..9658977f 100644 --- a/vowpalwabbit/sender.cc +++ b/vowpalwabbit/sender.cc @@ -42,15 +42,15 @@ namespace SENDER { s.buf->files.push_back(s.sd); } - void send_features(io_buf *b, example* ec, uint32_t mask) + void send_features(io_buf *b, example& ec, uint32_t mask) { // note: subtracting 1 b/c not sending constant - output_byte(*b,(unsigned char) (ec->indices.size()-1)); + output_byte(*b,(unsigned char) (ec.indices.size()-1)); - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) { + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) { if (*i == constant_namespace) continue; - output_features(*b, *i, ec->atomics[*i].begin, ec->atomics[*i].end, mask); + output_features(*b, *i, ec.atomics[*i].begin, ec.atomics[*i].end, mask); } b->flush(); } @@ -67,39 +67,39 @@ void receive_result(sender& s) ec->loss = s.all->loss->getLoss(s.all->sd, ec->final_prediction, ld->label) * ld->weight; - return_simple_example(*(s.all), NULL, ec); + return_simple_example(*(s.all), NULL, *ec); } - void learn(sender* s, learner& base, example* ec) + void learn(sender& s, learner& base, example& ec) { - if (s->received_index + s->all->p->ring_size - 1 == s->sent_index) - receive_result(*s); - - label_data* ld = (label_data*)ec->ld; - s->all->set_minmax(s->all->sd, ld->label); - simple_label.cache_label(ld, *s->buf);//send label information. - cache_tag(*s->buf, ec->tag); - send_features(s->buf,ec, (uint32_t)s->all->parse_mask); - s->delay_ring[s->sent_index++ % s->all->p->ring_size] = ec; + if (s.received_index + s.all->p->ring_size - 1 == s.sent_index) + receive_result(s); + + label_data* ld = (label_data*)ec.ld; + s.all->set_minmax(s.all->sd, ld->label); + simple_label.cache_label(ld, *s.buf);//send label information. + cache_tag(*s.buf, ec.tag); + send_features(s.buf,ec, (uint32_t)s.all->parse_mask); + s.delay_ring[s.sent_index++ % s.all->p->ring_size] = &ec; } - void finish_example(vw& all, sender*, example*ec) + void finish_example(vw& all, sender&, example& ec) {} -void end_examples(sender* s) +void end_examples(sender& s) { //close our outputs to signal finishing. - while (s->received_index != s->sent_index) - receive_result(*s); - shutdown(s->buf->files[0],SHUT_WR); + while (s.received_index != s.sent_index) + receive_result(s); + shutdown(s.buf->files[0],SHUT_WR); } - void finish(sender* s) + void finish(sender& s) { - s->buf->files.delete_v(); - s->buf->space.delete_v(); - free(s->delay_ring); - delete s->buf; + s.buf->files.delete_v(); + s.buf->space.delete_v(); + free(s.delay_ring); + delete s.buf; } learner* setup(vw& all, po::variables_map& vm, vector<string> pairs) diff --git a/vowpalwabbit/simple_label.cc b/vowpalwabbit/simple_label.cc index 5b4a3073..d3646ec9 100644 --- a/vowpalwabbit/simple_label.cc +++ b/vowpalwabbit/simple_label.cc @@ -118,7 +118,7 @@ float get_active_coin_bias(float k, float l, float g, float c0) return b*rs*rs; } -float query_decision(vw& all, example* ec, float k) +float query_decision(vw& all, example& ec, float k) { float bias, avg_loss, weighted_queries; if (k<=1.) @@ -126,7 +126,7 @@ float query_decision(vw& all, example* ec, float k) else{ weighted_queries = (float)(all.initial_t + all.sd->weighted_examples - all.sd->weighted_unlabeled_examples); avg_loss = (float)(all.sd->sum_loss/k + sqrt((1.+0.5*log(k))/(weighted_queries+0.0001))); - bias = get_active_coin_bias(k, avg_loss, ec->revert_weight/k, all.active_c0); + bias = get_active_coin_bias(k, avg_loss, ec.revert_weight/k, all.active_c0); } if(frand48()<bias) return 1.f/bias; @@ -134,11 +134,11 @@ float query_decision(vw& all, example* ec, float k) return -1.; } -void print_update(vw& all, example *ec) +void print_update(vw& all, example& ec) { if (all.sd->weighted_examples >= all.sd->dump_interval && !all.quiet && !all.bfgs) { - label_data* ld = (label_data*) ec->ld; + label_data* ld = (label_data*) ec.ld; char label_buf[32]; if (ld->label == FLT_MAX) strcpy(label_buf," unknown"); @@ -160,8 +160,8 @@ void print_update(vw& all, example *ec) (long int)all.sd->example_number, all.sd->weighted_examples, label_buf, - ec->final_prediction, - (long unsigned int)ec->num_features); + ec.final_prediction, + (long unsigned int)ec.num_features); all.sd->weighted_holdout_examples_since_last_dump = 0.; all.sd->holdout_sum_loss_since_last_dump = 0.0; @@ -173,39 +173,40 @@ void print_update(vw& all, example *ec) (long int)all.sd->example_number, all.sd->weighted_examples, label_buf, - ec->final_prediction, - (long unsigned int)ec->num_features); + ec.final_prediction, + (long unsigned int)ec.num_features); all.sd->sum_loss_since_last_dump = 0.0; all.sd->old_weighted_examples = all.sd->weighted_examples; VW::update_dump_interval(all); + fflush(stderr); } } -void output_and_account_example(vw& all, example* ec) +void output_and_account_example(vw& all, example& ec) { - label_data* ld = (label_data*)ec->ld; + label_data* ld = (label_data*)ec.ld; - if(ec->test_only) + if(ec.test_only) { - all.sd->weighted_holdout_examples += ec->global_weight;//test weight seen - all.sd->weighted_holdout_examples_since_last_dump += ec->global_weight; - all.sd->weighted_holdout_examples_since_last_pass += ec->global_weight; - all.sd->holdout_sum_loss += ec->loss; - all.sd->holdout_sum_loss_since_last_dump += ec->loss; - all.sd->holdout_sum_loss_since_last_pass += ec->loss;//since last pass + all.sd->weighted_holdout_examples += ec.global_weight;//test weight seen + all.sd->weighted_holdout_examples_since_last_dump += ec.global_weight; + all.sd->weighted_holdout_examples_since_last_pass += ec.global_weight; + all.sd->holdout_sum_loss += ec.loss; + all.sd->holdout_sum_loss_since_last_dump += ec.loss; + all.sd->holdout_sum_loss_since_last_pass += ec.loss;//since last pass } else { if (ld->label != FLT_MAX) all.sd->weighted_labels += ld->label * ld->weight; all.sd->weighted_examples += ld->weight; - all.sd->sum_loss += ec->loss; - all.sd->sum_loss_since_last_dump += ec->loss; - all.sd->total_features += ec->num_features; + all.sd->sum_loss += ec.loss; + all.sd->sum_loss_since_last_dump += ec.loss; + all.sd->total_features += ec.num_features; all.sd->example_number++; } - all.print(all.raw_prediction, ec->partial_prediction, -1, ec->tag); + all.print(all.raw_prediction, ec.partial_prediction, -1, ec.tag); float ai=-1; if(all.active && ld->label == FLT_MAX) @@ -218,18 +219,18 @@ void output_and_account_example(vw& all, example* ec) if(all.active && all.lda == 0) active_print_result(f, ec->final_prediction, ai, ec->tag); else if (all.lda > 0) - print_lda_result(all, f,ec->topic_predictions.begin,0.,ec->tag); + print_lda_result(all, f,ec.topic_predictions.begin,0.,ec.tag); else - all.print(f, ec->final_prediction, 0, ec->tag); + all.print(f, ec.final_prediction, 0, ec.tag); } print_update(all, ec); } -void return_simple_example(vw& all, void*, example* ec) +void return_simple_example(vw& all, void*, example& ec) { output_and_account_example(all, ec); - VW::finish_example(all,ec); + VW::finish_example(all,&ec); } bool summarize_holdout_set(vw& all, size_t& no_win_counter) diff --git a/vowpalwabbit/simple_label.h b/vowpalwabbit/simple_label.h index 898df9be..a8d5d114 100644 --- a/vowpalwabbit/simple_label.h +++ b/vowpalwabbit/simple_label.h @@ -17,7 +17,7 @@ struct label_data { float initial; }; -void return_simple_example(vw& all, void*, example* ec); +void return_simple_example(vw& all, void*, example& ec); size_t read_cached_simple_label(shared_data* sd, void* v, io_buf& cache); void cache_simple_label(void* v, io_buf& cache); @@ -32,8 +32,8 @@ const label_parser simple_label = {default_simple_label, parse_simple_label, NULL, sizeof(label_data)}; -float query_decision(vw& all, example* ec, float k); +float query_decision(vw& all, example& ec, float k); bool summarize_holdout_set(vw& all, size_t& no_win_counter); -void print_update(vw& all, example *ec); +void print_update(vw& all, example &ec); #endif diff --git a/vowpalwabbit/topk.cc b/vowpalwabbit/topk.cc index 69c9fb8b..fd479012 100644 --- a/vowpalwabbit/topk.cc +++ b/vowpalwabbit/topk.cc @@ -67,25 +67,25 @@ namespace TOPK { } } - void output_example(vw& all, topk* d, example* ec) + void output_example(vw& all, topk& d, example& ec) { - label_data* ld = (label_data*)ec->ld; + label_data* ld = (label_data*)ec.ld; all.sd->weighted_examples += ld->weight; - all.sd->sum_loss += ec->loss; - all.sd->sum_loss_since_last_dump += ec->loss; - all.sd->total_features += ec->num_features; + all.sd->sum_loss += ec.loss; + all.sd->sum_loss_since_last_dump += ec.loss; + all.sd->total_features += ec.num_features; all.sd->example_number++; if (example_is_newline(ec)) for (int* sink = all.final_prediction_sink.begin; sink != all.final_prediction_sink.end; sink++) - TOPK::print_result(*sink, d->pr_queue); + TOPK::print_result(*sink, d.pr_queue); print_update(all, ec); } template <bool is_learn> - void predict_or_learn(topk* d, learner& base, example* ec) + void predict_or_learn(topk& d, learner& base, example& ec) { if (example_is_newline(ec)) return;//do not predict newline @@ -94,21 +94,21 @@ namespace TOPK { else base.predict(ec); - if(d->pr_queue.size() < d->B) - d->pr_queue.push(make_pair(ec->final_prediction, ec->tag)); + if(d.pr_queue.size() < d.B) + d.pr_queue.push(make_pair(ec.final_prediction, ec.tag)); - else if(d->pr_queue.top().first < ec->final_prediction) + else if(d.pr_queue.top().first < ec.final_prediction) { - d->pr_queue.pop(); - d->pr_queue.push(make_pair(ec->final_prediction, ec->tag)); + d.pr_queue.pop(); + d.pr_queue.push(make_pair(ec.final_prediction, ec.tag)); } } - void finish_example(vw& all, topk* d, example* ec) + void finish_example(vw& all, topk& d, example& ec) { TOPK::output_example(all, d, ec); - VW::finish_example(all, ec); + VW::finish_example(all, &ec); } learner* setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::variables_map& vm_file) diff --git a/vowpalwabbit/wap.cc b/vowpalwabbit/wap.cc index 5fee79f0..f0b226aa 100644 --- a/vowpalwabbit/wap.cc +++ b/vowpalwabbit/wap.cc @@ -24,29 +24,29 @@ namespace WAP { vw* all; }; - void mirror_features(vw& all, example* ec, uint32_t offset1, uint32_t offset2) + void mirror_features(vw& all, example& ec, uint32_t offset1, uint32_t offset2) { - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) { - size_t original_length = ec->atomics[*i].size(); + size_t original_length = ec.atomics[*i].size(); for (uint32_t j = 0; j < original_length; j++) { - feature* f = &ec->atomics[*i][j]; + feature* f = &ec.atomics[*i][j]; feature temp = {- f->x, f->weight_index + offset2}; f->weight_index += offset1; - ec->atomics[*i].push_back(temp); + ec.atomics[*i].push_back(temp); } - ec->sum_feat_sq[*i] *= 2; + ec.sum_feat_sq[*i] *= 2; } if (all.audit || all.hash_inv) { - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) - if (ec->audit_features[*i].begin != ec->audit_features[*i].end) + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) + if (ec.audit_features[*i].begin != ec.audit_features[*i].end) { - size_t original_length = ec->audit_features[*i].size(); + size_t original_length = ec.audit_features[*i].size(); for (uint32_t j = 0; j < original_length; j++) { - audit_data* f = &ec->audit_features[*i][j]; + audit_data* f = &ec.audit_features[*i][j]; char* new_space = NULL; if (f->space != NULL) { @@ -58,43 +58,43 @@ namespace WAP { *new_feature = '-'; audit_data temp = {new_space, new_feature, f->weight_index + offset2, - f->x, true}; f->weight_index += offset1; - ec->audit_features[*i].push_back(temp); + ec.audit_features[*i].push_back(temp); } } } - ec->num_features *= 2; - ec->total_sum_feat_sq *= 2; + ec.num_features *= 2; + ec.total_sum_feat_sq *= 2; } - void unmirror_features(vw& all, example* ec, uint32_t offset1, uint32_t offset2) + void unmirror_features(vw& all, example& ec, uint32_t offset1, uint32_t offset2) { - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) { - ec->atomics[*i].end = ec->atomics[*i].begin+ec->atomics[*i].size()/2; - feature* end = ec->atomics[*i].end; - for (feature* f = ec->atomics[*i].begin; f!= end; f++) + ec.atomics[*i].end = ec.atomics[*i].begin+ec.atomics[*i].size()/2; + feature* end = ec.atomics[*i].end; + for (feature* f = ec.atomics[*i].begin; f!= end; f++) f->weight_index -= offset1; - ec->sum_feat_sq[*i] /= 2; + ec.sum_feat_sq[*i] /= 2; } if (all.audit || all.hash_inv) { - for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) - if (ec->audit_features[*i].begin != ec->audit_features[*i].end) + for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) + if (ec.audit_features[*i].begin != ec.audit_features[*i].end) { - for (audit_data *f = ec->audit_features[*i].begin + ec->audit_features[*i].size()/2; f != ec->audit_features[*i].end; f++) + for (audit_data *f = ec.audit_features[*i].begin + ec.audit_features[*i].size()/2; f != ec.audit_features[*i].end; f++) { if (f->space != NULL) free(f->space); free(f->feature); f->alloced = false; } - ec->audit_features[*i].end = ec->audit_features[*i].begin+ec->audit_features[*i].size()/2; - for (audit_data *f = ec->audit_features[*i].begin; f != ec->audit_features[*i].end; f++) + ec.audit_features[*i].end = ec.audit_features[*i].begin+ec.audit_features[*i].size()/2; + for (audit_data *f = ec.audit_features[*i].begin; f != ec.audit_features[*i].end; f++) f->weight_index -= offset1; } } - ec->num_features /= 2; - ec->total_sum_feat_sq /= 2; + ec.num_features /= 2; + ec.total_sum_feat_sq /= 2; } struct float_wclass @@ -126,9 +126,9 @@ namespace WAP { } v_array<float_wclass> vs; - void train(vw& all, wap& w, learner& base, example* ec) + void train(vw& all, wap& w, learner& base, example& ec) { - CSOAA::label* ld = (CSOAA::label*)ec->ld; + CSOAA::label* ld = (CSOAA::label*)ec.ld; CSOAA::wclass* old_end = ld->costs.end; CSOAA::wclass* j = ld->costs.begin; @@ -174,9 +174,9 @@ namespace WAP { else simple_temp.label = -1; - ec->ld = &simple_temp; + ec.ld = &simple_temp; - ec->partial_prediction = 0.; + ec.partial_prediction = 0.; uint32_t myi = (uint32_t)vs[i].ci.weight_index; uint32_t myj = (uint32_t)vs[j].ci.weight_index; @@ -188,15 +188,15 @@ namespace WAP { } ld->costs.end = old_end; - ec->ld = ld; + ec.ld = ld; } - size_t test(vw& all, wap& w, learner& base, example* ec) + size_t test(vw& all, wap& w, learner& base, example& ec) { size_t prediction = 1; float score = -FLT_MAX; - CSOAA::label* cost_label = (CSOAA::label*)ec->ld; + CSOAA::label* cost_label = (CSOAA::label*)ec.ld; for (uint32_t i = 0; i < cost_label->costs.size(); i++) { @@ -205,36 +205,36 @@ namespace WAP { simple_temp.weight = 0.; simple_temp.label = FLT_MAX; uint32_t myi = (uint32_t)cost_label->costs[i].weight_index; - ec->ld = &simple_temp; + ec.ld = &simple_temp; base.predict(ec, myi-1); - if (ec->partial_prediction > score) + if (ec.partial_prediction > score) { - score = ec->partial_prediction; + score = ec.partial_prediction; prediction = myi; } - cost_label->costs[i].partial_prediction = -ec->partial_prediction; + cost_label->costs[i].partial_prediction = -ec.partial_prediction; } return prediction; } template <bool is_learn> - void predict_or_learn(wap* w, learner& base, example* ec) + void predict_or_learn(wap& w, learner& base, example& ec) { - CSOAA::label* cost_label = (CSOAA::label*)ec->ld; - vw* all = w->all; + CSOAA::label* cost_label = (CSOAA::label*)ec.ld; + vw* all = w.all; - size_t prediction = test(*all, *w, base, ec); - ec->ld = cost_label; + size_t prediction = test(*all, w, base, ec); + ec.ld = cost_label; if (is_learn && cost_label->costs.size() > 0) - train(*all, *w, base, ec); - ec->final_prediction = (float)prediction; + train(*all, w, base, ec); + ec.final_prediction = (float)prediction; } - void finish_example(vw& all, wap*, example* ec) + void finish_example(vw& all, wap&, example& ec) { CSOAA::output_example(all, ec); - VW::finish_example(all, ec); + VW::finish_example(all, &ec); } learner* setup(vw& all, std::vector<std::string>&, po::variables_map& vm, po::variables_map& vm_file) |