diff options
author | John Langford <jl@hunch.net> | 2014-12-26 23:15:02 +0300 |
---|---|---|
committer | John Langford <jl@hunch.net> | 2014-12-26 23:15:02 +0300 |
commit | 74baf926ce5e5eb79c9ae50d150638176132bb02 (patch) | |
tree | d9a346826b309b74e63fc3c84bceba8bd4550ac3 /vowpalwabbit | |
parent | 834bb36afe50c82b7457f4920644f0b37dc1f7f4 (diff) | |
parent | d3495ca382dbf55a889678c1d945d8649e15917d (diff) |
fix conflicts
Diffstat (limited to 'vowpalwabbit')
31 files changed, 346 insertions, 405 deletions
diff --git a/vowpalwabbit/active.cc b/vowpalwabbit/active.cc index 3dc6af02..1e87208e 100644 --- a/vowpalwabbit/active.cc +++ b/vowpalwabbit/active.cc @@ -162,16 +162,15 @@ namespace ACTIVE { if(!vm.count("active")) return NULL; - active* data = calloc_or_die<active>(); - - data->active_c0 = 8; - data->all=&all; + active& data = calloc_or_die<active>(); + data.active_c0 = 8; + data.all=&all; if (vm.count("mellowness")) - data->active_c0 = vm["mellowness"].as<float>(); + data.active_c0 = vm["mellowness"].as<float>(); //Create new learner - learner* ret = new learner(data, all.l); + learner* ret = new learner(&data, all.l); if (vm.count("simulation")) { ret->set_learn<active, predict_or_learn_simulation<true> >(); diff --git a/vowpalwabbit/autolink.cc b/vowpalwabbit/autolink.cc index 3fc67920..f34a228f 100644 --- a/vowpalwabbit/autolink.cc +++ b/vowpalwabbit/autolink.cc @@ -50,16 +50,13 @@ namespace ALINK { if(!vm.count("autolink")) return NULL; - autolink* data = calloc_or_die<autolink>(); - - data->d = (uint32_t)vm["autolink"].as<size_t>(); - data->stride_shift = all.reg.stride_shift; + autolink& data = calloc_or_die<autolink>(); + data.d = (uint32_t)vm["autolink"].as<size_t>(); + data.stride_shift = all.reg.stride_shift; - std::stringstream ss; - ss << " --autolink " << data->d; - all.file_options = all.file_options+ss.str(); + all.file_options << " --autolink " << data.d; - learner* ret = new learner(data, all.l); + learner* ret = new learner(&data, all.l); ret->set_learn<autolink, predict_or_learn<true> >(); ret->set_predict<autolink, predict_or_learn<false> >(); return ret; diff --git a/vowpalwabbit/bfgs.cc b/vowpalwabbit/bfgs.cc index 3ac88939..43e5e502 100644 --- a/vowpalwabbit/bfgs.cc +++ b/vowpalwabbit/bfgs.cc @@ -981,32 +981,32 @@ learner* setup(vw& all, po::variables_map& vm) if(!vm.count("bfgs") && !vm.count("conjugate_gradient")) return NULL; - bfgs* b = calloc_or_die<bfgs>(); - b->all = &all; - b->m = vm["mem"].as<uint32_t>(); - b->rel_threshold = vm["termination"].as<float>(); - b->wolfe1_bound = 0.01; - b->first_hessian_on=true; - b->first_pass = true; - b->gradient_pass = true; - b->preconditioner_pass = true; - b->backstep_on = false; - b->final_pass=all.numpasses; - b->no_win_counter = 0; - b->early_stop_thres = 3; + bfgs& b = calloc_or_die<bfgs>(); + b.all = &all; + b.m = vm["mem"].as<uint32_t>(); + b.rel_threshold = vm["termination"].as<float>(); + b.wolfe1_bound = 0.01; + b.first_hessian_on=true; + b.first_pass = true; + b.gradient_pass = true; + b.preconditioner_pass = true; + b.backstep_on = false; + b.final_pass=all.numpasses; + b.no_win_counter = 0; + b.early_stop_thres = 3; if(!all.holdout_set_off) { all.sd->holdout_best_loss = FLT_MAX; if(vm.count("early_terminate")) - b->early_stop_thres = vm["early_terminate"].as< size_t>(); + b.early_stop_thres = vm["early_terminate"].as< size_t>(); } - if (vm.count("hessian_on") || b->m==0) { + if (vm.count("hessian_on") || b.m==0) { all.hessian_on = true; } if (!all.quiet) { - if (b->m>0) + if (b.m>0) cerr << "enabling BFGS based optimization "; else cerr << "enabling conjugate gradient optimization via BFGS "; @@ -1024,7 +1024,7 @@ learner* setup(vw& all, po::variables_map& vm) all.bfgs = true; all.reg.stride_shift = 2; - learner* l = new learner(b, 1 << all.reg.stride_shift); + learner* l = new learner(&b, 1 << all.reg.stride_shift); l->set_learn<bfgs, learn>(); l->set_predict<bfgs, predict>(); l->set_save_load<bfgs,save_load>(); diff --git a/vowpalwabbit/bs.cc b/vowpalwabbit/bs.cc index 5ae4bf1d..c199e1db 100644 --- a/vowpalwabbit/bs.cc +++ b/vowpalwabbit/bs.cc @@ -241,9 +241,9 @@ namespace BS { learner* setup(vw& all, po::variables_map& vm) { - bs* data = calloc_or_die<bs>(); - data->ub = FLT_MAX; - data->lb = -FLT_MAX; + bs& data = calloc_or_die<bs>(); + data.ub = FLT_MAX; + data.lb = -FLT_MAX; po::options_description bs_options("Bootstrap options"); bs_options.add_options() @@ -251,12 +251,10 @@ namespace BS { vm = add_options(all, bs_options); - data->B = (uint32_t)vm["bootstrap"].as<size_t>(); + data.B = (uint32_t)vm["bootstrap"].as<size_t>(); //append bs with number of samples to options_from_file so it is saved to regressor later - std::stringstream ss; - ss << " --bootstrap " << data->B; - all.file_options.append(ss.str()); + all.file_options << " --bootstrap " << data.B; std::string type_string("mean"); @@ -265,25 +263,24 @@ namespace BS { type_string = vm["bs_type"].as<std::string>(); if (type_string.compare("mean") == 0) { - data->bs_type = BS_TYPE_MEAN; + data.bs_type = BS_TYPE_MEAN; } else if (type_string.compare("vote") == 0) { - data->bs_type = BS_TYPE_VOTE; + data.bs_type = BS_TYPE_VOTE; } else { std::cerr << "warning: bs_type must be in {'mean','vote'}; resetting to mean." << std::endl; - data->bs_type = BS_TYPE_MEAN; + data.bs_type = BS_TYPE_MEAN; } } else //by default use mean - data->bs_type = BS_TYPE_MEAN; - all.file_options.append(" --bs_type "); - all.file_options.append(type_string); + data.bs_type = BS_TYPE_MEAN; + all.file_options << " --bs_type " << type_string; - data->pred_vec.reserve(data->B); - data->all = &all; + data.pred_vec.reserve(data.B); + data.all = &all; - learner* l = new learner(data, all.l, data->B); + learner* l = new learner(&data, all.l, data.B); l->set_learn<bs, predict_or_learn<true> >(); l->set_predict<bs, predict_or_learn<false> >(); l->set_finish_example<bs,finish_example>(); diff --git a/vowpalwabbit/cb_algs.cc b/vowpalwabbit/cb_algs.cc index 45542f04..395d496b 100644 --- a/vowpalwabbit/cb_algs.cc +++ b/vowpalwabbit/cb_algs.cc @@ -508,17 +508,14 @@ namespace CB_ALGS if (!vm.count("cb")) return NULL; - cb* c = calloc_or_die<cb>(); - - c->all = &all; - c->min_cost = 0.; - c->max_cost = 1.; + cb& c = calloc_or_die<cb>(); + c.all = &all; + c.min_cost = 0.; + c.max_cost = 1.; uint32_t nb_actions = (uint32_t)vm["cb"].as<size_t>(); - std::stringstream ss; - ss << " --cb " << nb_actions; - all.file_options.append(ss.str()); + all.file_options << " --cb " << nb_actions; all.sd->k = nb_actions; @@ -532,12 +529,10 @@ namespace CB_ALGS std::string type_string; type_string = vm["cb_type"].as<std::string>(); + all.file_options << " --cb_type " << type_string; - all.file_options.append(" --cb_type "); - all.file_options.append(type_string); - if (type_string.compare("dr") == 0) - c->cb_type = CB_TYPE_DR; + c.cb_type = CB_TYPE_DR; else if (type_string.compare("dm") == 0) { if (eval) @@ -545,23 +540,23 @@ namespace CB_ALGS cout << "direct method can not be used for evaluation --- it is biased." << endl; throw exception(); } - c->cb_type = CB_TYPE_DM; + c.cb_type = CB_TYPE_DM; problem_multiplier = 1; } else if (type_string.compare("ips") == 0) { - c->cb_type = CB_TYPE_IPS; + c.cb_type = CB_TYPE_IPS; problem_multiplier = 1; } else { std::cerr << "warning: cb_type must be in {'ips','dm','dr'}; resetting to dr." << std::endl; - c->cb_type = CB_TYPE_DR; + c.cb_type = CB_TYPE_DR; } } else { //by default use doubly robust - c->cb_type = CB_TYPE_DR; - all.file_options.append(" --cb_type dr"); + c.cb_type = CB_TYPE_DR; + all.file_options << " --cb_type dr"; } if (eval) @@ -569,7 +564,7 @@ namespace CB_ALGS else all.p->lp = CB::cb_label; - learner* l = new learner(c, all.l, problem_multiplier); + learner* l = new learner(&c, all.l, problem_multiplier); if (eval) { l->set_learn<cb, learn_eval>(); diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 8d8f62d0..e79c25ec 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -389,32 +389,29 @@ namespace CBIFY { vm = add_options(all, opts); if (!vm.count("cbify")) return NULL; - - cbify* data = calloc_or_die<cbify>(); - data->all = &all; - data->k = (uint32_t)vm["cbify"].as<size_t>(); - - //appends nb_actions to options_from_file so it is saved to regressor later - std::stringstream ss; - ss << " --cbify " << data->k; - all.file_options.append(ss.str()); + + cbify& data = calloc_or_die<cbify>(); + + data.all = &all; + data.k = (uint32_t)vm["cbify"].as<size_t>(); + all.file_options << " --cbify " << data.k; all.p->lp = MULTICLASS::mc_label; learner* l; - data->recorder.reset(new vw_recorder()); - data->mwt_explorer.reset(new MwtExplorer<vw_context>("vw", *data->recorder.get())); + data.recorder.reset(new vw_recorder()); + data.mwt_explorer.reset(new MwtExplorer<vw_context>("vw", *data.recorder.get())); if (vm.count("cover")) { size_t cover = (uint32_t)vm["cover"].as<size_t>(); - data->cs = all.cost_sensitive; - data->second_cs_label.costs.resize(data->k); - data->second_cs_label.costs.end = data->second_cs_label.costs.begin+data->k; + data.cs = all.cost_sensitive; + data.second_cs_label.costs.resize(data.k); + data.second_cs_label.costs.end = data.second_cs_label.costs.begin+data.k; float epsilon = 0.05f; if (vm.count("epsilon")) epsilon = vm["epsilon"].as<float>(); - data->scorer.reset(new vw_cover_scorer(epsilon, cover, (u32)data->k)); - data->generic_explorer.reset(new GenericExplorer<vw_context>(*data->scorer.get(), (u32)data->k)); - l = new learner(data, all.l, cover + 1); + data.scorer.reset(new vw_cover_scorer(epsilon, cover, (u32)data.k)); + data.generic_explorer.reset(new GenericExplorer<vw_context>(*data.scorer.get(), (u32)data.k)); + l = new learner(&data, all.l, cover + 1); l->set_learn<cbify, predict_or_learn_cover<true> >(); l->set_predict<cbify, predict_or_learn_cover<false> >(); } @@ -423,19 +420,19 @@ namespace CBIFY { size_t bags = (uint32_t)vm["bag"].as<size_t>(); for (size_t i = 0; i < bags; i++) { - data->policies.push_back(unique_ptr<IPolicy<vw_context>>(new vw_policy(i))); + data.policies.push_back(unique_ptr<IPolicy<vw_context>>(new vw_policy(i))); } - data->bootstrap_explorer.reset(new BootstrapExplorer<vw_context>(data->policies, (u32)data->k)); - l = new learner(data, all.l, bags); + data.bootstrap_explorer.reset(new BootstrapExplorer<vw_context>(data.policies, (u32)data.k)); + l = new learner(&data, all.l, bags); l->set_learn<cbify, predict_or_learn_bag<true> >(); l->set_predict<cbify, predict_or_learn_bag<false> >(); } else if (vm.count("first") ) { uint32_t tau = (uint32_t)vm["first"].as<size_t>(); - data->policy.reset(new vw_policy()); - data->tau_explorer.reset(new TauFirstExplorer<vw_context>(*data->policy.get(), (u32)tau, (u32)data->k)); - l = new learner(data, all.l, 1); + data.policy.reset(new vw_policy()); + data.tau_explorer.reset(new TauFirstExplorer<vw_context>(*data.policy.get(), (u32)tau, (u32)data.k)); + l = new learner(&data, all.l, 1); l->set_learn<cbify, predict_or_learn_first<true> >(); l->set_predict<cbify, predict_or_learn_first<false> >(); } @@ -444,9 +441,9 @@ namespace CBIFY { float epsilon = 0.05f; if (vm.count("epsilon")) epsilon = vm["epsilon"].as<float>(); - data->policy.reset(new vw_policy()); - data->greedy_explorer.reset(new EpsilonGreedyExplorer<vw_context>(*data->policy.get(), epsilon, (u32)data->k)); - l = new learner(data, all.l, 1); + data.policy.reset(new vw_policy()); + data.greedy_explorer.reset(new EpsilonGreedyExplorer<vw_context>(*data.policy.get(), epsilon, (u32)data.k)); + l = new learner(&data, all.l, 1); l->set_learn<cbify, predict_or_learn_greedy<true> >(); l->set_predict<cbify, predict_or_learn_greedy<false> >(); } diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc index f5e5d91f..679e31ed 100644 --- a/vowpalwabbit/csoaa.cc +++ b/vowpalwabbit/csoaa.cc @@ -76,22 +76,19 @@ namespace CSOAA { vm = add_options(all, opts); if(!vm.count("csoaa")) return NULL; - csoaa* c = calloc_or_die<csoaa>(); - c->all = &all; + csoaa& c = calloc_or_die<csoaa>(); + c.all = &all; //first parse for number of actions uint32_t nb_actions = 0; nb_actions = (uint32_t)vm["csoaa"].as<size_t>(); - //append csoaa with nb_actions to file_options so it is saved to regressor later - std::stringstream ss; - ss << " --csoaa " << nb_actions; - all.file_options.append(ss.str()); + all.file_options << " --csoaa " << nb_actions; all.p->lp = cs_label; all.sd->k = nb_actions; - learner* l = new learner(c, all.l, nb_actions); + learner* l = new learner(&c, all.l, nb_actions); l->set_learn<csoaa, predict_or_learn<true> >(); l->set_predict<csoaa, predict_or_learn<false> >(); l->set_finish_example<csoaa,finish_example>(); @@ -667,24 +664,22 @@ namespace LabelDict { if(!vm.count("csoaa_ldf") && !vm.count("wap_ldf")) return NULL; - ldf* ld = calloc_or_die<ldf>(); + ldf& ld = calloc_or_die<ldf>(); - ld->all = &all; - ld->need_to_clear = true; - ld->first_pass = true; + ld.all = &all; + ld.need_to_clear = true; + ld.first_pass = true; string ldf_arg; if( vm.count("csoaa_ldf") ){ ldf_arg = vm["csoaa_ldf"].as<string>(); - all.file_options.append(" --csoaa_ldf "); - all.file_options.append(ldf_arg); + all.file_options << " --csoaa_ldf " << ldf_arg; } else { ldf_arg = vm["wap_ldf"].as<string>(); - ld->is_wap = true; - all.file_options.append(" --wap_ldf "); - all.file_options.append(ldf_arg); + ld.is_wap = true; + all.file_options << " --wap_ldf " << ldf_arg; } if ( vm.count("ldf_override") ) ldf_arg = vm["ldf_override"].as<string>(); @@ -693,40 +688,40 @@ namespace LabelDict { all.sd->k = (uint32_t)-1; - ld->treat_as_classifier = false; - ld->is_singleline = false; + ld.treat_as_classifier = false; + ld.is_singleline = false; if (ldf_arg.compare("multiline") == 0 || ldf_arg.compare("m") == 0) { - ld->treat_as_classifier = false; + ld.treat_as_classifier = false; } else if (ldf_arg.compare("multiline-classifier") == 0 || ldf_arg.compare("mc") == 0) { - ld->treat_as_classifier = true; + ld.treat_as_classifier = true; } else { if (all.training) { cerr << "ldf requires either m/multiline or mc/multiline-classifier, except in test-mode which can be s/sc/singleline/singleline-classifier" << endl; throw exception(); } if (ldf_arg.compare("singleline") == 0 || ldf_arg.compare("s") == 0) { - ld->treat_as_classifier = false; - ld->is_singleline = true; + ld.treat_as_classifier = false; + ld.is_singleline = true; } else if (ldf_arg.compare("singleline-classifier") == 0 || ldf_arg.compare("sc") == 0) { - ld->treat_as_classifier = true; - ld->is_singleline = true; + ld.treat_as_classifier = true; + ld.is_singleline = true; } } - all.p->emptylines_separate_examples = true; // TODO: check this to be sure!!! !ld->is_singleline; + all.p->emptylines_separate_examples = true; // TODO: check this to be sure!!! !ld.is_singleline; if (all.add_constant) { all.add_constant = false; } - ld->label_features.init(256, v_array<feature>(), LabelDict::size_t_eq); - ld->label_features.get(1, 94717244); // TODO: figure this out + ld.label_features.init(256, v_array<feature>(), LabelDict::size_t_eq); + ld.label_features.get(1, 94717244); // TODO: figure this out - ld->read_example_this_loop = 0; - ld->need_to_clear = false; - learner* l = new learner(ld, all.l); + ld.read_example_this_loop = 0; + ld.need_to_clear = false; + learner* l = new learner(&ld, all.l); l->set_learn<ldf, predict_or_learn<true> >(); l->set_predict<ldf, predict_or_learn<false> >(); - if (ld->is_singleline) + if (ld.is_singleline) l->set_finish_example<ldf,finish_singleline_example>(); else l->set_finish_example<ldf,finish_multiline_example>(); diff --git a/vowpalwabbit/ect.cc b/vowpalwabbit/ect.cc index 5acbd525..e73b2442 100644 --- a/vowpalwabbit/ect.cc +++ b/vowpalwabbit/ect.cc @@ -376,28 +376,24 @@ namespace ECT if (!vm.count("ect")) return NULL; - ect* data = calloc_or_die<ect>(); + ect& data = calloc_or_die<ect>(); //first parse for number of actions - data->k = (int)vm["ect"].as<size_t>(); + data.k = (int)vm["ect"].as<size_t>(); //append ect with nb_actions to options_from_file so it is saved to regressor later - stringstream ss; - ss << " --ect " << data->k; - if (vm.count("error")) { - data->errors = (uint32_t)vm["error"].as<size_t>(); + data.errors = (uint32_t)vm["error"].as<size_t>(); } else - data->errors = 0; + data.errors = 0; //append error flag to options_from_file so it is saved in regressor file later - ss << " --error " << data->errors; - all.file_options.append(ss.str()); + all.file_options << " --ect " << data.k << " --error " << data.errors; all.p->lp = MULTICLASS::mc_label; - size_t wpp = create_circuit(all, *data, data->k, data->errors+1); - data->all = &all; + size_t wpp = create_circuit(all, data, data.k, data.errors+1); + data.all = &all; - learner* l = new learner(data, all.l, wpp); + learner* l = new learner(&data, all.l, wpp); l->set_learn<ect, learn>(); l->set_predict<ect, predict>(); l->set_finish_example<ect,finish_example>(); diff --git a/vowpalwabbit/example.cc b/vowpalwabbit/example.cc index f38bd041..52399839 100644 --- a/vowpalwabbit/example.cc +++ b/vowpalwabbit/example.cc @@ -135,23 +135,23 @@ void return_features(feature* f) flat_example* flatten_example(vw& all, example *ec) { - flat_example* fec = calloc_or_die<flat_example>(); - fec->l = ec->l; + flat_example& fec = calloc_or_die<flat_example>(); + fec.l = ec->l; - fec->tag_len = ec->tag.size(); - if (fec->tag_len >0) + fec.tag_len = ec->tag.size(); + if (fec.tag_len >0) { - fec->tag = calloc_or_die<char>(fec->tag_len+1); - memcpy(fec->tag,ec->tag.begin, fec->tag_len); + fec.tag = calloc_or_die<char>(fec.tag_len+1); + memcpy(fec.tag,ec->tag.begin, fec.tag_len); } - fec->example_counter = ec->example_counter; - fec->ft_offset = ec->ft_offset; - fec->num_features = ec->num_features; + fec.example_counter = ec->example_counter; + fec.ft_offset = ec->ft_offset; + fec.num_features = ec->num_features; - fec->feature_map = VW::get_features(all, ec, fec->feature_map_len); + fec.feature_map = VW::get_features(all, ec, fec.feature_map_len); - return fec; + return &fec; } flat_example* flatten_sort_example(vw& all, example *ec) diff --git a/vowpalwabbit/ftrl_proximal.cc b/vowpalwabbit/ftrl_proximal.cc index 12363994..2707cbcf 100644 --- a/vowpalwabbit/ftrl_proximal.cc +++ b/vowpalwabbit/ftrl_proximal.cc @@ -179,45 +179,45 @@ namespace FTRL { learner* setup(vw& all, po::variables_map& vm) { - ftrl* b = calloc_or_die<ftrl>(); - b->all = &all; - b->ftrl_beta = 0.0; - b->ftrl_alpha = 0.1; + ftrl& b = calloc_or_die<ftrl>(); + b.all = &all; + b.ftrl_beta = 0.0; + b.ftrl_alpha = 0.1; po::options_description ftrl_opts("FTRL options"); ftrl_opts.add_options() - ("ftrl_alpha", po::value<float>(&(b->ftrl_alpha)), "Learning rate for FTRL-proximal optimization") - ("ftrl_beta", po::value<float>(&(b->ftrl_beta)), "FTRL beta") + ("ftrl_alpha", po::value<float>(&(b.ftrl_alpha)), "Learning rate for FTRL-proximal optimization") + ("ftrl_beta", po::value<float>(&(b.ftrl_beta)), "FTRL beta") ("progressive_validation", po::value<string>()->default_value("ftrl.evl"), "File to record progressive validation for ftrl-proximal"); vm = add_options(all, ftrl_opts); if (vm.count("ftrl_alpha")) { - b->ftrl_alpha = vm["ftrl_alpha"].as<float>(); + b.ftrl_alpha = vm["ftrl_alpha"].as<float>(); } if (vm.count("ftrl_beta")) { - b->ftrl_beta = vm["ftrl_beta"].as<float>(); + b.ftrl_beta = vm["ftrl_beta"].as<float>(); } all.reg.stride_shift = 2; // NOTE: for more parameter storage - b->progressive_validation = false; + b.progressive_validation = false; if (vm.count("progressive_validation")) { std::string filename = vm["progressive_validation"].as<string>(); - b->fo = fopen(filename.c_str(), "w"); - assert(b->fo != NULL); - b->progressive_validation = true; + b.fo = fopen(filename.c_str(), "w"); + assert(b.fo != NULL); + b.progressive_validation = true; } if (!all.quiet) { cerr << "Enabling FTRL-Proximal based optimization" << endl; - cerr << "ftrl_alpha = " << b->ftrl_alpha << endl; - cerr << "ftrl_beta = " << b->ftrl_beta << endl; + cerr << "ftrl_alpha = " << b.ftrl_alpha << endl; + cerr << "ftrl_beta = " << b.ftrl_beta << endl; } - learner* l = new learner(b, 1 << all.reg.stride_shift); + learner* l = new learner(&b, 1 << all.reg.stride_shift); l->set_learn<ftrl, learn>(); l->set_predict<ftrl, predict>(); l->set_save_load<ftrl,save_load>(); diff --git a/vowpalwabbit/gd.cc b/vowpalwabbit/gd.cc index 46a71873..66575e38 100644 --- a/vowpalwabbit/gd.cc +++ b/vowpalwabbit/gd.cc @@ -852,20 +852,19 @@ learner* setup(vw& all, po::variables_map& vm) ("normalized", "use per feature normalized updates") ("exact_adaptive_norm", "use current default invariant normalized adaptive update rule"); vm = add_options(all, opts); - gd* g = calloc_or_die<gd>(); - - g->all = &all; - g->all->normalized_sum_norm_x = 0; - g->no_win_counter = 0; - g->total_weight = 0.; - g->early_stop_thres = 3; - g->neg_norm_power = (all.adaptive ? (all.power_t - 1.f) : -1.f); - g->neg_power_t = - all.power_t; + gd& g = calloc_or_die<gd>(); + g.all = &all; + g.all->normalized_sum_norm_x = 0; + g.no_win_counter = 0; + g.total_weight = 0.; + g.early_stop_thres = 3; + g.neg_norm_power = (all.adaptive ? (all.power_t - 1.f) : -1.f); + g.neg_power_t = - all.power_t; if(all.initial_t > 0)//for the normalized update: if initial_t is bigger than 1 we interpret this as if we had seen (all.initial_t) previous fake datapoints all with norm 1 { - g->all->normalized_sum_norm_x = all.initial_t; - g->total_weight = all.initial_t; + g.all->normalized_sum_norm_x = all.initial_t; + g.total_weight = all.initial_t; } bool feature_mask_off = true; @@ -876,11 +875,11 @@ learner* setup(vw& all, po::variables_map& vm) { all.sd->holdout_best_loss = FLT_MAX; if(vm.count("early_terminate")) - g->early_stop_thres = vm["early_terminate"].as< size_t>(); + g.early_stop_thres = vm["early_terminate"].as< size_t>(); } if (vm.count("constant")) { - g->initial_constant = vm["constant"].as<float>(); + g.initial_constant = vm["constant"].as<float>(); } if( !all.training || ( ( vm.count("sgd") || vm.count("adaptive") || vm.count("invariant") || vm.count("normalized") ) && !vm.count("exact_adaptive_norm")) ) @@ -907,28 +906,28 @@ learner* setup(vw& all, po::variables_map& vm) cerr << "Warning: the learning rate for the last pass is multiplied by: " << pow((double)all.eta_decay_rate, (double)all.numpasses) << " adjust --decay_learning_rate larger to avoid this." << endl; - learner* ret = new learner(g, 1); + learner* ret = new learner(&g, 1); if (all.reg_mode % 2) if (all.audit || all.hash_inv) { ret->set_predict<gd, predict<true, true> >(); - g->predict = predict<true, true>; + g.predict = predict<true, true>; } else { ret->set_predict<gd, predict<true, false> >(); - g->predict = predict<true, false>; + g.predict = predict<true, false>; } else if (all.audit || all.hash_inv) { ret->set_predict<gd, predict<false, true> >(); - g->predict = predict<false, true>; + g.predict = predict<false, true>; } else { ret->set_predict<gd, predict<false, false> >(); - g->predict = predict<false, false>; + g.predict = predict<false, false>; } uint32_t stride; diff --git a/vowpalwabbit/gd_mf.cc b/vowpalwabbit/gd_mf.cc index 36aa8f90..eec54663 100644 --- a/vowpalwabbit/gd_mf.cc +++ b/vowpalwabbit/gd_mf.cc @@ -299,8 +299,8 @@ void mf_train(vw& all, example& ec) else all.rank = vm["gdmf"].as<uint32_t>(); - gdmf* data = calloc_or_die<gdmf>(); - data->all = &all; + gdmf& data = calloc_or_die<gdmf>(); + data.all = &all; // store linear + 2*rank weights per index, round up to power of two float temp = ceilf(logf((float)(all.rank*2+1)) / logf (2.f)); @@ -339,7 +339,7 @@ void mf_train(vw& all, example& ec) } all.eta *= powf((float)(all.sd->t), all.power_t); - learner* l = new learner(data, 1 << all.reg.stride_shift); + learner* l = new learner(&data, 1 << all.reg.stride_shift); l->set_learn<gdmf, learn>(); l->set_predict<gdmf, predict>(); l->set_save_load<gdmf,save_load>(); diff --git a/vowpalwabbit/global_data.cc b/vowpalwabbit/global_data.cc index cdd870e5..e27c646d 100644 --- a/vowpalwabbit/global_data.cc +++ b/vowpalwabbit/global_data.cc @@ -235,7 +235,7 @@ po::variables_map add_options(vw& all, po::options_description& opts) vw::vw() { - sd = calloc_or_die<shared_data>(); + sd = &calloc_or_die<shared_data>(); sd->dump_interval = 1.; // next update progress dump sd->contraction = 1.; sd->max_label = 1.; @@ -280,8 +280,6 @@ vw::vw() per_feature_regularizer_output = ""; per_feature_regularizer_text = ""; - file_options = ""; - #ifdef _WIN32 stdout_fileno = _fileno(stdout); #else diff --git a/vowpalwabbit/global_data.h b/vowpalwabbit/global_data.h index 9026d46b..f651426f 100644 --- a/vowpalwabbit/global_data.h +++ b/vowpalwabbit/global_data.h @@ -199,7 +199,7 @@ struct vw { double normalized_sum_norm_x; po::options_description opts; - std::string file_options; + std::stringstream file_options; vector<std::string> args; void* /*Search::search*/ searchstr; diff --git a/vowpalwabbit/kernel_svm.cc b/vowpalwabbit/kernel_svm.cc index b28b2ad0..1f72ffc6 100644 --- a/vowpalwabbit/kernel_svm.cc +++ b/vowpalwabbit/kernel_svm.cc @@ -106,7 +106,7 @@ namespace KSVM { krow.delete_v(); // free flatten example contents - flat_example *fec = calloc_or_die<flat_example>(); + flat_example *fec = &calloc_or_die<flat_example>(); *fec = ex; free_flatten_example(fec); // free contents of flat example and frees fec. } @@ -222,7 +222,7 @@ namespace KSVM int save_load_flat_example(io_buf& model_file, bool read, flat_example*& fec) { size_t brw = 1; if(read) { - fec = calloc_or_die<flat_example>(); + fec = &calloc_or_die<flat_example>(); brw = bin_read_fixed(model_file, (char*) fec, sizeof(flat_example), ""); if(brw > 0) { @@ -277,7 +277,7 @@ namespace KSVM for(uint32_t i = 0;i < model->num_support;i++) { if(read) { save_load_flat_example(model_file, read, fec); - svm_example* tmp= calloc_or_die<svm_example>(); + svm_example* tmp= &calloc_or_die<svm_example>(); tmp->init_svm_example(fec); model->support_vec.push_back(tmp); } @@ -398,7 +398,7 @@ namespace KSVM void predict(svm_params& params, learner &base, example& ec) { flat_example* fec = flatten_sort_example(*(params.all),&ec); if(fec) { - svm_example* sec = calloc_or_die<svm_example>(); + svm_example* sec = &calloc_or_die<svm_example>(); sec->init_svm_example(fec); float score; predict(params, &sec, &score, 1); @@ -584,7 +584,7 @@ namespace KSVM for(size_t i = 0;i < params.pool_size; i++) { if(!save_load_flat_example(*b, true, fec)) { - params.pool[i] = calloc_or_die<svm_example>(); + params.pool[i] = &calloc_or_die<svm_example>(); params.pool[i]->init_svm_example(fec); train_pool[i] = true; params.pool_pos++; @@ -739,7 +739,7 @@ namespace KSVM // cout<<i<<":"<<fec->feature_map[i].x<<" "<<fec->feature_map[i].weight_index<<" "; // cout<<endl; if(fec) { - svm_example* sec = calloc_or_die<svm_example>(); + svm_example* sec = &calloc_or_die<svm_example>(); sec->init_svm_example(fec); float score = 0; predict(params, &sec, &score, 1); @@ -814,54 +814,52 @@ namespace KSVM delete all.loss; all.loss = getLossFunction(&all, loss_function, (float)loss_parameter); - svm_params* params = calloc_or_die<svm_params>(); - params->model = calloc_or_die<svm_model>(); - params->model->num_support = 0; - //params->curcache = 0; - params->maxcache = 1024*1024*1024; - params->loss_sum = 0.; - params->all = &all; + svm_params& params = calloc_or_die<svm_params>(); + params.model = &calloc_or_die<svm_model>(); + params.model->num_support = 0; + //params.curcache = 0; + params.maxcache = 1024*1024*1024; + params.loss_sum = 0.; + params.all = &all; if(vm.count("reprocess")) - params->reprocess = vm["reprocess"].as<std::size_t>(); + params.reprocess = vm["reprocess"].as<std::size_t>(); else - params->reprocess = 1; + params.reprocess = 1; if(vm.count("active")) - params->active = true; - if(params->active) { + params.active = true; + if(params.active) { if(vm.count("active_c")) - params->active_c = vm["active_c"].as<double>(); + params.active_c = vm["active_c"].as<double>(); else - params->active_c = 1.; + params.active_c = 1.; if(vm.count("pool_greedy")) - params->active_pool_greedy = 1; + params.active_pool_greedy = 1; /*if(vm.count("para_active")) - params->para_active = 1;*/ + params.para_active = 1;*/ } if(vm.count("pool_size")) - params->pool_size = vm["pool_size"].as<std::size_t>(); + params.pool_size = vm["pool_size"].as<std::size_t>(); else - params->pool_size = 1; + params.pool_size = 1; - params->pool = calloc_or_die<svm_example*>(params->pool_size); - params->pool_pos = 0; + params.pool = calloc_or_die<svm_example*>(params.pool_size); + params.pool_pos = 0; if(vm.count("subsample")) - params->subsample = vm["subsample"].as<std::size_t>(); - else if(params->para_active) - params->subsample = (size_t)ceil(params->pool_size / all.total); + params.subsample = vm["subsample"].as<std::size_t>(); + else if(params.para_active) + params.subsample = (size_t)ceil(params.pool_size / all.total); else - params->subsample = 1; + params.subsample = 1; - params->lambda = all.l2_lambda; + params.lambda = all.l2_lambda; - std::stringstream ss1, ss2; - ss1 <<" --lambda "<< params->lambda; - all.file_options.append(ss1.str()); + all.file_options <<" --lambda "<< params.lambda; - cerr<<"Lambda = "<<params->lambda<<endl; + cerr<<"Lambda = "<<params.lambda<<endl; std::string kernel_type; @@ -870,44 +868,39 @@ namespace KSVM else kernel_type = string("linear"); - ss2 <<" --kernel "<< kernel_type; - all.file_options.append(ss2.str()); + all.file_options <<" --kernel "<< kernel_type; cerr<<"Kernel = "<<kernel_type<<endl; if(kernel_type.compare("rbf") == 0) { - params->kernel_type = SVM_KER_RBF; + params.kernel_type = SVM_KER_RBF; float bandwidth = 1.; if(vm.count("bandwidth")) { - std::stringstream ss; bandwidth = vm["bandwidth"].as<float>(); - ss<<" --bandwidth "<<bandwidth; - all.file_options.append(ss.str()); + all.file_options <<" --bandwidth "<<bandwidth; } cerr<<"bandwidth = "<<bandwidth<<endl; - params->kernel_params = calloc_or_die<double>(); - *((float*)params->kernel_params) = bandwidth; + params.kernel_params = &calloc_or_die<double>(); + *((float*)params.kernel_params) = bandwidth; } else if(kernel_type.compare("poly") == 0) { - params->kernel_type = SVM_KER_POLY; + params.kernel_type = SVM_KER_POLY; int degree = 2; if(vm.count("degree")) { - std::stringstream ss; degree = vm["degree"].as<int>(); - ss<<" --degree "<<degree; - all.file_options.append(ss.str()); + all.file_options <<" --degree "<<degree; } cerr<<"degree = "<<degree<<endl; - params->kernel_params = calloc_or_die<int>(); - *((int*)params->kernel_params) = degree; + params.kernel_params = &calloc_or_die<int>(); + *((int*)params.kernel_params) = degree; } else - params->kernel_type = SVM_KER_LIN; + params.kernel_type = SVM_KER_LIN; - params->all->reg.weight_mask = (uint32_t)LONG_MAX; - params->all->reg.stride_shift = 0; + params.all->reg.weight_mask = (uint32_t)LONG_MAX; + params.all->reg.stride_shift = 0; - learner* l = new learner(params, 1); + learner* l = new learner(¶ms, 1); l->set_learn<svm_params, learn>(); l->set_predict<svm_params, predict>(); l->set_save_load<svm_params, save_load>(); diff --git a/vowpalwabbit/lda_core.cc b/vowpalwabbit/lda_core.cc index c7d9ede2..8c8961af 100644 --- a/vowpalwabbit/lda_core.cc +++ b/vowpalwabbit/lda_core.cc @@ -770,27 +770,25 @@ void end_examples(lda& l) else
all.lda = vm["lda"].as<uint32_t>();
- lda* ld = calloc_or_die<lda>();
+ lda& ld = calloc_or_die<lda>();
- ld->lda = all.lda;
- ld->lda_alpha = vm["lda_alpha"].as<float>();
- ld->lda_rho = vm["lda_rho"].as<float>();
- ld->lda_D = vm["lda_D"].as<float>();
- ld->lda_epsilon = vm["lda_epsilon"].as<float>();
- ld->minibatch = vm["minibatch"].as<size_t>();
- ld->sorted_features = vector<index_feature>();
- ld->total_lambda_init = 0;
- ld->all = &all;
- ld->example_t = all.initial_t;
+ ld.lda = all.lda;
+ ld.lda_alpha = vm["lda_alpha"].as<float>();
+ ld.lda_rho = vm["lda_rho"].as<float>();
+ ld.lda_D = vm["lda_D"].as<float>();
+ ld.lda_epsilon = vm["lda_epsilon"].as<float>();
+ ld.minibatch = vm["minibatch"].as<size_t>();
+ ld.sorted_features = vector<index_feature>();
+ ld.total_lambda_init = 0;
+ ld.all = &all;
+ ld.example_t = all.initial_t;
float temp = ceilf(logf((float)(all.lda*2+1)) / logf (2.f));
all.reg.stride_shift = (size_t)temp;
all.random_weights = true;
all.add_constant = false;
- std::stringstream ss;
- ss << " --lda " << all.lda;
- all.file_options.append(ss.str());
+ all.file_options << " --lda " << all.lda;
if (all.eta > 1.)
{
@@ -799,15 +797,16 @@ void end_examples(lda& l) }
if (vm.count("minibatch")) {
- size_t minibatch2 = next_pow2(ld->minibatch);
+ size_t minibatch2 = next_pow2(ld.minibatch);
all.p->ring_size = all.p->ring_size > minibatch2 ? all.p->ring_size : minibatch2;
+ }
- ld->v.resize(all.lda*ld->minibatch);
+ ld.v.resize(all.lda*ld.minibatch);
- ld->decay_levels.push_back(0.f);
+ ld.decay_levels.push_back(0.f);
- learner* l = new learner(ld, 1 << all.reg.stride_shift);
+ learner* l = new learner(&ld, 1 << all.reg.stride_shift);
l->set_learn<lda,learn>();
l->set_predict<lda,predict>();
l->set_save_load<lda,save_load>();
@@ -817,5 +816,6 @@ void end_examples(lda& l) l->set_finish<lda,finish>();
return l;
+ }
+ }
-}
diff --git a/vowpalwabbit/log_multi.cc b/vowpalwabbit/log_multi.cc index 7bb66e9a..c1077320 100644 --- a/vowpalwabbit/log_multi.cc +++ b/vowpalwabbit/log_multi.cc @@ -522,9 +522,7 @@ namespace LOG_MULTI data->swap_resist = vm["swap_resistance"].as<uint32_t>();
//append log_multi with nb_actions to options_from_file so it is saved to regressor later
- std::stringstream ss;
- ss << " --log_multi " << data->k;
- all.file_options.append(ss.str());
+ all.file_options << " --log_multi " << data->k;
if (vm.count("no_progress"))
data->progress = false;
diff --git a/vowpalwabbit/lrq.cc b/vowpalwabbit/lrq.cc index 3caf81ae..ee826b72 100644 --- a/vowpalwabbit/lrq.cc +++ b/vowpalwabbit/lrq.cc @@ -200,29 +200,25 @@ namespace LRQ { LRQstate* lrq = (LRQstate*)calloc(1, sizeof (LRQstate)); unsigned int maxk = 0; lrq->all = &all; - + size_t random_seed = 0; if (vm.count("random_seed")) random_seed = vm["random_seed"].as<size_t> (); - + lrq->initial_seed = lrq->seed = random_seed | 8675309; - if (vm.count("lrqdropout")) - lrq->dropout = true; - else - lrq->dropout = false; - - all.file_options.append(" --lrqdropout"); + if (vm.count("lrqdropout")) + lrq->dropout = true; + else + lrq->dropout = false; + + all.file_options << " --lrqdropout "; lrq->lrpairs = vm["lrq"].as<vector<string> > (); - stringstream ss; for (vector<string>::iterator i = lrq->lrpairs.begin (); i != lrq->lrpairs.end (); ++i) - ss << " --lrq " << *i; + all.file_options << " --lrq " << *i; - all.file_options.append(ss.str()); - - if (! all.quiet) { cerr << "creating low rank quadratic features for pairs: "; diff --git a/vowpalwabbit/memory.h b/vowpalwabbit/memory.h index 63441f46..6d67d51e 100644 --- a/vowpalwabbit/memory.h +++ b/vowpalwabbit/memory.h @@ -4,7 +4,7 @@ #include <iostream> template<class T> -T* calloc_or_die(size_t nmemb = 1) +T* calloc_or_die(size_t nmemb) { if (nmemb == 0) return NULL; @@ -17,4 +17,10 @@ T* calloc_or_die(size_t nmemb = 1) return (T*)data; } +template<class T> T& calloc_or_die() +{ + return *calloc_or_die<T>(1); +} + + void free_it(void* ptr); diff --git a/vowpalwabbit/nn.cc b/vowpalwabbit/nn.cc index 2ae38489..3a9d8e45 100644 --- a/vowpalwabbit/nn.cc +++ b/vowpalwabbit/nn.cc @@ -320,62 +320,53 @@ CONVERSE: // That's right, I'm using goto. So sue me. if(!vm.count("nn")) return NULL; - nn* n = calloc_or_die<nn>(); - n->all = &all; - + nn& n = calloc_or_die<nn>(); + n.all = &all; //first parse for number of hidden units - n->k = (uint32_t)vm["nn"].as<size_t>(); - - std::stringstream ss; - ss << " --nn " << n->k; - all.file_options.append(ss.str()); + n.k = (uint32_t)vm["nn"].as<size_t>(); + all.file_options << " --nn " << n.k; if ( vm.count("dropout") ) { - n->dropout = true; - - std::stringstream ss; - ss << " --dropout "; - all.file_options.append(ss.str()); + n.dropout = true; + all.file_options << " --dropout "; } if ( vm.count("meanfield") ) { - n->dropout = false; + n.dropout = false; if (! all.quiet) std::cerr << "using mean field for neural network " << (all.training ? "training" : "testing") << std::endl; } - if (n->dropout) + if (n.dropout) if (! all.quiet) std::cerr << "using dropout for neural network " << (all.training ? "training" : "testing") << std::endl; if (vm.count ("inpass")) { - n->inpass = true; + n.inpass = true; + all.file_options << " --inpass"; - std::stringstream ss; - ss << " --inpass"; - all.file_options.append(ss.str()); } - if (n->inpass && ! all.quiet) + if (n.inpass && ! all.quiet) std::cerr << "using input passthrough for neural network " << (all.training ? "training" : "testing") << std::endl; - n->finished_setup = false; - n->squared_loss = getLossFunction (0, "squared", 0); + n.finished_setup = false; + n.squared_loss = getLossFunction (0, "squared", 0); - n->xsubi = 0; + n.xsubi = 0; if (vm.count("random_seed")) - n->xsubi = vm["random_seed"].as<size_t>(); + n.xsubi = vm["random_seed"].as<size_t>(); - n->save_xsubi = n->xsubi; - n->increment = all.l->increment;//Indexing of output layer is odd. - learner* l = new learner(n, all.l, n->k+1); + n.save_xsubi = n.xsubi; + n.increment = all.l->increment;//Indexing of output layer is odd. + learner* l = new learner(&n, all.l, n.k+1); l->set_learn<nn, predict_or_learn<true> >(); l->set_predict<nn, predict_or_learn<false> >(); l->set_finish<nn, finish>(); diff --git a/vowpalwabbit/oaa.cc b/vowpalwabbit/oaa.cc index 2fd3c9d3..1a132045 100644 --- a/vowpalwabbit/oaa.cc +++ b/vowpalwabbit/oaa.cc @@ -20,7 +20,7 @@ using namespace MULTICLASS; namespace OAA { struct oaa{ - uint32_t k; + size_t k; bool shouldOutput; vw* all; }; @@ -33,8 +33,7 @@ namespace OAA { ec.l.simple = {0.f, mc_label_data.weight, 0.f}; - string outputString; - stringstream outputStringStream(outputString); + stringstream outputStringStream; uint32_t prediction = 1; float score = INT_MIN; @@ -85,21 +84,17 @@ namespace OAA { if(!vm.count("oaa")) return NULL; - oaa* data = calloc_or_die<oaa>(); + oaa& data = calloc_or_die<oaa>(); //first parse for number of actions - - data->k = (uint32_t)vm["oaa"].as<size_t>(); - + data.k = vm["oaa"].as<size_t>(); //append oaa with nb_actions to options_from_file so it is saved to regressor later - std::stringstream ss; - ss << " --oaa " << data->k; - all.file_options.append(ss.str()); + all.file_options << " --oaa " << data.k; - data->shouldOutput = all.raw_prediction > 0; - data->all = &all; + data.shouldOutput = all.raw_prediction > 0; + data.all = &all; all.p->lp = mc_label; - learner* l = new learner(data, all.l, data->k); + learner* l = new learner(&data, all.l, data.k); l->set_learn<oaa, predict_or_learn<true> >(); l->set_predict<oaa, predict_or_learn<false> >(); l->set_finish_example<oaa, finish_example>(); diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc index 3136115e..56e3a89b 100644 --- a/vowpalwabbit/parse_args.cc +++ b/vowpalwabbit/parse_args.cc @@ -354,9 +354,7 @@ void parse_feature_tweaks(vw& all, po::variables_map& vm) if (vm.count("affix")) { parse_affix_argument(all, vm["affix"].as<string>()); - stringstream ss; - ss << " --affix " << vm["affix"].as<string>(); - all.file_options.append(ss.str()); + all.file_options << " --affix " << vm["affix"].as<string>(); } if(vm.count("ngram")){ @@ -898,7 +896,7 @@ vw* parse_args(int argc, char *argv[]) parse_regressor_args(*all, vm, io_temp); int temp_argc = 0; - char** temp_argv = VW::get_argv_from_string(all->file_options, temp_argc); + char** temp_argv = VW::get_argv_from_string(all->file_options.str(), temp_argc); add_to_args(*all, temp_argc, temp_argv); for (int i = 0; i < temp_argc; i++) free(temp_argv[i]); @@ -912,7 +910,7 @@ vw* parse_args(int argc, char *argv[]) po::store(pos, vm); po::notify(vm); - all->file_options = ""; + all->file_options.str(""); parse_feature_tweaks(*all, vm); //feature tweaks @@ -968,16 +966,14 @@ vw* parse_args(int argc, char *argv[]) } namespace VW { - void cmd_string_replace_value( string& cmd, string flag_to_replace, string new_value ) + void cmd_string_replace_value( std::stringstream& ss, string flag_to_replace, string new_value ) { flag_to_replace.append(" "); //add a space to make sure we obtain the right flag in case 2 flags start with the same set of characters + string cmd = ss.str(); size_t pos = cmd.find(flag_to_replace); - if( pos == string::npos ) { + if( pos == string::npos ) //flag currently not present in command string, so just append it to command string - cmd.append(" "); - cmd.append(flag_to_replace); - cmd.append(new_value); - } + ss << " " << flag_to_replace << new_value; else { //flag is present, need to replace old value with new value @@ -987,14 +983,13 @@ namespace VW { //now pos is position where value starts //find position of next space size_t pos_after_value = cmd.find(" ",pos); - if(pos_after_value == string::npos) { + if(pos_after_value == string::npos) //we reach the end of the string, so replace the all characters after pos by new_value cmd.replace(pos,cmd.size()-pos,new_value); - } - else { + else //replace characters between pos and pos_after_value by new_value cmd.replace(pos,pos_after_value-pos,new_value); - } + ss.str(cmd); } } diff --git a/vowpalwabbit/parse_regressor.cc b/vowpalwabbit/parse_regressor.cc index 504db6a7..daee30da 100644 --- a/vowpalwabbit/parse_regressor.cc +++ b/vowpalwabbit/parse_regressor.cc @@ -229,16 +229,16 @@ void save_load_header(vw& all, io_buf& model_file, bool read, bool text) "", read, "\n",1, text); - text_len = sprintf(buff, "options:%s\n", all.file_options.c_str()); - uint32_t len = (uint32_t)all.file_options.length()+1; - memcpy(buff2, all.file_options.c_str(),len); + text_len = sprintf(buff, "options:%s\n", all.file_options.str().c_str()); + uint32_t len = (uint32_t)all.file_options.str().length()+1; + memcpy(buff2, all.file_options.str().c_str(),len); if (read) len = buf_size; bin_text_read_write(model_file,buff2, len, "", read, buff, text_len, text); if (read) - all.file_options.assign(buff2); + all.file_options.str(buff2); } } @@ -348,7 +348,7 @@ void parse_mask_regressor_args(vw& all, po::variables_map& vm){ } } else { // If no initial regressor, just clear out the options loaded from the header. - all.file_options.assign(""); + all.file_options.str(""); } } } diff --git a/vowpalwabbit/parser.cc b/vowpalwabbit/parser.cc index f47d5a10..44055bab 100644 --- a/vowpalwabbit/parser.cc +++ b/vowpalwabbit/parser.cc @@ -153,16 +153,16 @@ bool is_test_only(uint32_t counter, uint32_t period, uint32_t after, bool holdou parser* new_parser() { - parser* ret = calloc_or_die<parser>(); - ret->input = new io_buf; - ret->output = new io_buf; - ret->local_example_number = 0; - ret->in_pass_counter = 0; - ret->ring_size = 1 << 8; - ret->done = false; - ret->used_index = 0; - - return ret; + parser& ret = calloc_or_die<parser>(); + ret.input = new io_buf; + ret.output = new io_buf; + ret.local_example_number = 0; + ret.in_pass_counter = 0; + ret.ring_size = 1 << 8; + ret.done = false; + ret.used_index = 0; + + return &ret; } void set_compressed(parser* par){ diff --git a/vowpalwabbit/print.cc b/vowpalwabbit/print.cc index c5c6566e..503b99e5 100644 --- a/vowpalwabbit/print.cc +++ b/vowpalwabbit/print.cc @@ -54,13 +54,14 @@ namespace PRINT if(!vm.count("print")) return NULL; - print* p = calloc_or_die<print>(); - p->all = &all; + print& p = calloc_or_die<print>(); + p.all = &all; + size_t length = ((size_t)1) << all.num_bits; all.reg.weight_mask = (length << all.reg.stride_shift) - 1; all.reg.stride_shift = 0; - learner* ret = new learner(p, 1); + learner* ret = new learner(&p, 1); ret->set_learn<print,learn>(); ret->set_predict<print,learn>(); return ret; diff --git a/vowpalwabbit/scorer.cc b/vowpalwabbit/scorer.cc index a76e11b1..a889a2ed 100644 --- a/vowpalwabbit/scorer.cc +++ b/vowpalwabbit/scorer.cc @@ -51,12 +51,12 @@ namespace Scorer { opts.add_options() ("link", po::value<string>()->default_value("identity"), "Specify the link function: identity, logistic or glf1"); vm = add_options(all, opts); - scorer* s = calloc_or_die<scorer>(); - s->all = &all; + string link = vm["link"].as<string>(); - learner* l = new learner(s, all.l); + scorer& s = calloc_or_die<scorer>(); + s.all = &all; - string link = vm["link"].as<string>(); + learner* l = new learner(&s, all.l); if (!vm.count("link") || link.compare("identity") == 0) { l->set_learn<scorer, predict_or_learn<true, noop> >(); @@ -64,13 +64,13 @@ namespace Scorer { } else if (link.compare("logistic") == 0) { - all.file_options.append(" --link=logistic "); + all.file_options << " --link=logistic "; l->set_learn<scorer, predict_or_learn<true, logistic> >(); l->set_predict<scorer, predict_or_learn<false, logistic> >(); } else if (link.compare("glf1") == 0) { - all.file_options.append(" --link=glf1 "); + all.file_options << " --link=glf1 "; l->set_learn<scorer, predict_or_learn<true, glf1> >(); l->set_predict<scorer, predict_or_learn<false, glf1> >(); } diff --git a/vowpalwabbit/search.cc b/vowpalwabbit/search.cc index 6f03f84e..2907f5c7 100644 --- a/vowpalwabbit/search.cc +++ b/vowpalwabbit/search.cc @@ -1653,9 +1653,7 @@ namespace Search { template<class T> void check_option(T& ret, vw&all, po::variables_map& vm, const char* opt_name, bool default_to_cmdline, bool(*equal)(T,T), const char* mismatch_error_string, const char* required_error_string) { if (vm.count(opt_name)) { ret = vm[opt_name].as<T>(); - stringstream ss; - ss << " --" << opt_name << " " << ret; - all.file_options.append(ss.str()); + all.file_options << " --" << opt_name << " " << ret; } else if (strlen(required_error_string)>0) { std::cerr << required_error_string << endl; if (! vm.count("help")) @@ -1666,9 +1664,7 @@ namespace Search { void check_option(bool& ret, vw&all, po::variables_map& vm, const char* opt_name, bool default_to_cmdline, const char* mismatch_error_string) { if (vm.count(opt_name)) { ret = true; - stringstream ss; - ss << " --" << opt_name; - all.file_options.append(ss.str()); + all.file_options << " --" << opt_name; } else ret = false; } @@ -1799,7 +1795,7 @@ namespace Search { vm = add_options(all, opts); if (!vm.count("search")) return NULL; - + bool has_hook_task = false; for (size_t i=0; i<all.args.size()-1; i++) if (all.args[i] == "--search_task" && all.args[i+1] == "hook") @@ -1809,10 +1805,10 @@ namespace Search { if (all.args[i] == "--search_task" && all.args[i+1] != "hook") all.args.erase(all.args.begin() + i, all.args.begin() + i + 2); - search* sch = calloc_or_die<search>(); - sch->priv = new search_private(); - search_initialize(&all, *sch); - search_private& priv = *sch->priv; + search& sch = calloc_or_die<search>(); + sch.priv = new search_private(); + search_initialize(&all, sch); + search_private& priv = *sch.priv; std::string task_string; std::string interpolation_string = "data"; @@ -1854,7 +1850,7 @@ namespace Search { string neighbor_features_string; check_option<string>(neighbor_features_string, all, vm, "search_neighbor_features", false, string_equal, "warning: you specified a different feature structure with --search_neighbor_features than the one loaded from predictor. using loaded value of: ", ""); - parse_neighbor_features(neighbor_features_string, *sch); + parse_neighbor_features(neighbor_features_string, sch); if (interpolation_string.compare("data") == 0) { // run as dagger priv.adaptive_beta = true; @@ -1896,7 +1892,7 @@ namespace Search { "warning: you specified a different history length through --search_history_length than the one loaded from predictor. using loaded value of: ", ""); //check if the base learner is contextual bandit, in which case, we dont rollout all actions. - priv.allowed_actions_cache = calloc_or_die<polylabel>(); + priv.allowed_actions_cache = &calloc_or_die<polylabel>(); if (vm.count("cb")) { priv.cb_learner = true; CB::cb_label.default_label(priv.allowed_actions_cache); @@ -1950,7 +1946,7 @@ namespace Search { for (search_task** mytask = all_tasks; *mytask != NULL; mytask++) if (task_string.compare((*mytask)->task_name) == 0) { priv.task = *mytask; - sch->task_name = (*mytask)->task_name; + sch.task_name = (*mytask)->task_name; break; } if (priv.task == NULL) { @@ -1964,7 +1960,7 @@ namespace Search { // default to OAA labels unless the task wants to override this (which they can do in initialize) all.p->lp = MC::mc_label; if (priv.task) - priv.task->initialize(*sch, priv.A, vm); + priv.task->initialize(sch, priv.A, vm); if (vm.count("search_allowed_transitions")) read_allowed_transitions((action)priv.A, vm["search_allowed_transitions"].as<string>().c_str()); @@ -1983,7 +1979,7 @@ namespace Search { if (!priv.allow_current_policy) // if we're not dagger all.check_holdout_every_n_passes = priv.passes_per_policy; - all.searchstr = sch; + all.searchstr = &sch; priv.start_clock_time = clock(); @@ -1991,7 +1987,7 @@ namespace Search { vm.insert(pair<string,po::variable_value>(string("csoaa"),vm["search"])); learner* base = setup_base(all,vm); - learner* l = new learner(sch, base, priv.total_number_of_policies); + learner* l = new learner(&sch, all.l, priv.total_number_of_policies); l->set_learn<search, search_predict_or_learn<true> >(); l->set_predict<search, search_predict_or_learn<false> >(); l->set_finish_example<search,finish_example>(); diff --git a/vowpalwabbit/sender.cc b/vowpalwabbit/sender.cc index e7407f9d..0a0a284a 100644 --- a/vowpalwabbit/sender.cc +++ b/vowpalwabbit/sender.cc @@ -109,18 +109,18 @@ learner* setup(vw& all, po::variables_map& vm) if(!vm.count("sendto")) return NULL; - sender* s = calloc_or_die<sender>(); - s->sd = -1; + sender& s = calloc_or_die<sender>(); + s.sd = -1; if (vm.count("sendto")) { vector<string> hosts = vm["sendto"].as< vector<string> >(); - open_sockets(*s, hosts[0]); + open_sockets(s, hosts[0]); } - s->all = &all; - s->delay_ring = calloc_or_die<example*>(all.p->ring_size); + s.all = &all; + s.delay_ring = calloc_or_die<example*>(all.p->ring_size); - learner* l = new learner(s, 1); + learner* l = new learner(&s, 1); l->set_learn<sender, learn>(); l->set_predict<sender, learn>(); l->set_finish<sender, finish>(); diff --git a/vowpalwabbit/stagewise_poly.cc b/vowpalwabbit/stagewise_poly.cc index a12ce55d..fb3dc425 100644 --- a/vowpalwabbit/stagewise_poly.cc +++ b/vowpalwabbit/stagewise_poly.cc @@ -672,35 +672,34 @@ namespace StagewisePoly if (vm.count("stage_poly")) return NULL; - stagewise_poly *poly = calloc_or_die<stagewise_poly>(); - poly->all = &all; - - depthsbits_create(*poly); - sort_data_create(*poly); - - poly->sched_exponent = vm.count("sched_exponent") ? vm["sched_exponent"].as<float>() : 1.f; - poly->batch_sz = vm.count("batch_sz") ? vm["batch_sz"].as<uint32_t>() : 1000; - poly->batch_sz_double = vm.count("batch_sz_no_doubling") ? false : true; + stagewise_poly& poly = calloc_or_die<stagewise_poly>(); + poly.all = &all; + depthsbits_create(poly); + sort_data_create(poly); + + poly.sched_exponent = vm.count("sched_exponent") ? vm["sched_exponent"].as<float>() : 1.f; + poly.batch_sz = vm.count("batch_sz") ? vm["batch_sz"].as<uint32_t>() : 1000; + poly.batch_sz_double = vm.count("batch_sz_no_doubling") ? false : true; #ifdef MAGIC_ARGUMENT - poly->magic_argument = vm.count("magic_argument") ? vm["magic_argument"].as<float>() : 0.; + poly.magic_argument = vm.count("magic_argument") ? vm["magic_argument"].as<float>() : 0.; #endif //MAGIC_ARGUMENT - poly->sum_sparsity = 0; - poly->sum_input_sparsity = 0; - poly->num_examples = 0; - poly->sum_sparsity_sync = 0; - poly->sum_input_sparsity_sync = 0; - poly->num_examples_sync = 0; - poly->last_example_counter = -1; - poly->numpasses = 1; - poly->update_support = false; - poly->original_ec = NULL; - poly->next_batch_sz = poly->batch_sz; + poly.sum_sparsity = 0; + poly.sum_input_sparsity = 0; + poly.num_examples = 0; + poly.sum_sparsity_sync = 0; + poly.sum_input_sparsity_sync = 0; + poly.num_examples_sync = 0; + poly.last_example_counter = -1; + poly.numpasses = 1; + poly.update_support = false; + poly.original_ec = NULL; + poly.next_batch_sz = poly.batch_sz; //following is so that saved models know to load us. - all.file_options.append(" --stage_poly"); + all.file_options << " --stage_poly"; - learner *l = new learner(poly, all.l); + learner *l = new learner(&poly, all.l); l->set_learn<stagewise_poly, learn>(); l->set_predict<stagewise_poly, predict>(); l->set_finish<stagewise_poly, finish>(); diff --git a/vowpalwabbit/topk.cc b/vowpalwabbit/topk.cc index 1aeb4127..1fc86002 100644 --- a/vowpalwabbit/topk.cc +++ b/vowpalwabbit/topk.cc @@ -120,13 +120,11 @@ namespace TOPK { if(!vm.count("top")) return NULL; - topk* data = calloc_or_die<topk>(); + topk& data = calloc_or_die<topk>(); + data.B = (uint32_t)vm["top"].as<size_t>(); + data.all = &all; - data->B = (uint32_t)vm["top"].as<size_t>(); - - data->all = &all; - - learner* l = new learner(data, all.l); + learner* l = new learner(&data, all.l); l->set_learn<topk, predict_or_learn<true> >(); l->set_predict<topk, predict_or_learn<false> >(); l->set_finish_example<topk,finish_example>(); diff --git a/vowpalwabbit/vw.h b/vowpalwabbit/vw.h index 3408da9b..49bfaca3 100644 --- a/vowpalwabbit/vw.h +++ b/vowpalwabbit/vw.h @@ -18,7 +18,7 @@ namespace VW { */ vw* initialize(string s); - void cmd_string_replace_value( string& cmd, string flag_to_replace, string new_value ); + void cmd_string_replace_value( std::stringstream& ss, string flag_to_replace, string new_value ); char** get_argv_from_string(string s, int& argc); |