Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Langford <jl@nyclamp.(none)>2014-05-02 19:29:54 +0400
committerJohn Langford <jl@nyclamp.(none)>2014-05-02 19:29:54 +0400
commit10776f942eb78e894cb57b3e2b34968278fb3cc5 (patch)
treebf5ee88e10cbd8034704a5a040dee3d2470b2b0e /vowpalwabbit/parse_args.cc
parent663819314cc2486a076ac6dee080e4e775764f69 (diff)
giant refactor of parse_args
Diffstat (limited to 'vowpalwabbit/parse_args.cc')
-rw-r--r--vowpalwabbit/parse_args.cc1046
1 files changed, 545 insertions, 501 deletions
diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc
index 6aa75c1d..3edc34f5 100644
--- a/vowpalwabbit/parse_args.cc
+++ b/vowpalwabbit/parse_args.cc
@@ -103,331 +103,123 @@ void parse_affix_argument(vw&all, string str) {
free(cstr);
}
-vw* parse_args(int argc, char *argv[])
+void parse_diagnostics(vw& all, po::variables_map& vm, po::options_description& desc, int argc)
{
- po::options_description desc("VW options");
-
- vw* all = new vw();
-
- size_t random_seed = 0;
- all->program_name = argv[0];
-
- po::options_description in_opt("Input options");
-
- in_opt.add_options()
- ("data,d", po::value< string >(), "Example Set")
- ("ring_size", po::value<size_t>(&(all->p->ring_size)), "size of example ring")
- ("examples", po::value<size_t>(&(all->max_examples)), "number of examples to parse")
- ("testonly,t", "Ignore label information and just test")
- ("daemon", "persistent daemon mode on port 26542")
- ("port", po::value<size_t>(),"port to listen on; use 0 to pick unused port")
- ("num_children", po::value<size_t>(&(all->num_children)), "number of children for persistent daemon mode")
- ("pid_file", po::value< string >(), "Write pid file in persistent daemon mode")
- ("port_file", po::value< string >(), "Write port used in persistent daemon mode")
- ("passes", po::value<size_t>(&(all->numpasses)),"Number of Training Passes")
- ("cache,c", "Use a cache. The default is <data>.cache")
- ("cache_file", po::value< vector<string> >(), "The location(s) of cache_file.")
- ("kill_cache,k", "do not reuse existing cache: create a new one always")
- ("compressed", "use gzip format whenever possible. If a cache file is being created, this option creates a compressed cache file. A mixture of raw-text & compressed inputs are supported with autodetection.")
- ("no_stdin", "do not default to reading from stdin")
- ("save_resume", "save extra state so learning can be resumed later with new data")
- ;
-
- po::options_description out_opt("Output options");
-
- out_opt.add_options()
- ("audit,a", "print weights of features")
- ("predictions,p", po::value< string >(), "File to output predictions to")
- ("raw_predictions,r", po::value< string >(), "File to output unnormalized predictions to")
- ("sendto", po::value< vector<string> >(), "send examples to <host>")
- ("quiet", "Don't output disgnostics and progress updates")
- ("progress,P", po::value< string >(), "Progress update frequency. int: additive, float: multiplicative")
- ("binary", "report loss as binary classification on -1,1")
- ("min_prediction", po::value<float>(&(all->sd->min_label)), "Smallest prediction to output")
- ("max_prediction", po::value<float>(&(all->sd->max_label)), "Largest prediction to output")
- ;
-
- po::options_description update_opt("Update options");
-
- update_opt.add_options()
- ("sgd", "use regular stochastic gradient descent update.")
- ("hessian_on", "use second derivative in line search")
- ("bfgs", "use bfgs optimization")
- ("mem", po::value<int>(&(all->m)), "memory in bfgs")
- ("termination", po::value<float>(&(all->rel_threshold)),"Termination threshold")
- ("adaptive", "use adaptive, individual learning rates.")
- ("invariant", "use safe/importance aware updates.")
- ("normalized", "use per feature normalized updates")
- ("exact_adaptive_norm", "use current default invariant normalized adaptive update rule")
- ("conjugate_gradient", "use conjugate gradient based optimization")
- ("l1", po::value<float>(&(all->l1_lambda)), "l_1 lambda")
- ("l2", po::value<float>(&(all->l2_lambda)), "l_2 lambda")
- ("learning_rate,l", po::value<float>(&(all->eta)), "Set Learning Rate")
- ("loss_function", po::value<string>()->default_value("squared"), "Specify the loss function to be used, uses squared by default. Currently available ones are squared, classic, hinge, logistic and quantile.")
- ("quantile_tau", po::value<float>()->default_value(0.5), "Parameter \\tau associated with Quantile loss. Defaults to 0.5")
- ("power_t", po::value<float>(&(all->power_t)), "t power value")
- ("decay_learning_rate", po::value<float>(&(all->eta_decay_rate)),
- "Set Decay factor for learning_rate between passes")
- ("initial_pass_length", po::value<size_t>(&(all->pass_length)), "initial number of examples per pass")
- ("initial_t", po::value<double>(&((all->sd->t))), "initial t value")
- ("feature_mask", po::value< string >(), "Use existing regressor to determine which parameters may be updated. If no initial_regressor given, also used for initial weights.")
- ;
-
- po::options_description weight_opt("Weight options");
-
- weight_opt.add_options()
- ("bit_precision,b", po::value<size_t>(), "number of bits in the feature table")
- ("initial_regressor,i", po::value< vector<string> >(), "Initial regressor(s)")
- ("final_regressor,f", po::value< string >(), "Final regressor")
- ("initial_weight", po::value<float>(&(all->initial_weight)), "Set all weights to an initial value of 1.")
- ("random_weights", po::value<bool>(&(all->random_weights)), "make initial weights random")
- ("readable_model", po::value< string >(), "Output human-readable final regressor with numeric features")
- ("invert_hash", po::value< string >(), "Output human-readable final regressor with feature names")
- ("save_per_pass", "Save the model after every pass over data")
- ("input_feature_regularizer", po::value< string >(&(all->per_feature_regularizer_input)), "Per feature regularization input file")
- ("output_feature_regularizer_binary", po::value< string >(&(all->per_feature_regularizer_output)), "Per feature regularization output file")
- ("output_feature_regularizer_text", po::value< string >(&(all->per_feature_regularizer_text)), "Per feature regularization output file, in text")
- ;
-
- po::options_description holdout_opt("Holdout options");
- holdout_opt.add_options()
- ("holdout_off", "no holdout data in multiple passes")
- ("holdout_period", po::value<uint32_t>(&(all->holdout_period)), "holdout period for test only, default 10")
- ("holdout_after", po::value<uint32_t>(&(all->holdout_after)), "holdout after n training examples, default off (disables holdout_period)")
- ("early_terminate", po::value<size_t>(), "Specify the number of passes tolerated when holdout loss doesn't decrease before early termination, default is 3")
- ;
-
- po::options_description namespace_opt("Feature namespace options");
- namespace_opt.add_options()
- ("hash", po::value< string > (), "how to hash the features. Available options: strings, all")
- ("ignore", po::value< vector<unsigned char> >(), "ignore namespaces beginning with character <arg>")
- ("keep", po::value< vector<unsigned char> >(), "keep namespaces beginning with character <arg>")
- ("noconstant", "Don't add a constant feature")
- ("constant,C", po::value<float>(&(all->initial_constant)), "Set initial value of constant")
- ("sort_features", "turn this on to disregard order in which features have been defined. This will lead to smaller cache sizes")
- ("ngram", po::value< vector<string> >(), "Generate N grams. To generate N grams for a single namespace 'foo', arg should be fN.")
- ("skips", po::value< vector<string> >(), "Generate skips in N grams. This in conjunction with the ngram tag can be used to generate generalized n-skip-k-gram. To generate n-skips for a single namespace 'foo', arg should be fn.")
- ("affix", po::value<string>(), "generate prefixes/suffixes of features; argument '+2a,-3b,+1' means generate 2-char prefixes for namespace a, 3-char suffixes for b and 1 char prefixes for default namespace")
- ("spelling", po::value< vector<string> >(), "compute spelling features for a give namespace (use '_' for default namespace)");
- ;
-
- po::options_description mf_opt("Matrix factorization options");
- mf_opt.add_options()
- ("quadratic,q", po::value< vector<string> > (),
- "Create and use quadratic features")
- ("q:", po::value< string >(), ": corresponds to a wildcard for all printable characters")
- ("cubic", po::value< vector<string> > (),
- "Create and use cubic features")
- ("rank", po::value<uint32_t>(&(all->rank)), "rank for matrix factorization.")
- ("new_mf", "use new, reduction-based matrix factorization")
- ;
-
- po::options_description lrq_opt("Low Rank Quadratic options");
- lrq_opt.add_options()
- ("lrq", po::value<vector<string> > (), "use low rank quadratic features")
- ("lrqdropout", "use dropout training for low rank quadratic features")
- ;
-
- po::options_description multiclass_opt("Multiclass options");
- multiclass_opt.add_options()
- ("oaa", po::value<size_t>(), "Use one-against-all multiclass learning with <k> labels")
- ("ect", po::value<size_t>(), "Use error correcting tournament with <k> labels")
- ("csoaa", po::value<size_t>(), "Use one-against-all multiclass learning with <k> costs")
- ("wap", po::value<size_t>(), "Use weighted all-pairs multiclass learning with <k> costs")
- ("csoaa_ldf", po::value<string>(), "Use one-against-all multiclass learning with label dependent features. Specify singleline or multiline.")
- ("wap_ldf", po::value<string>(), "Use weighted all-pairs multiclass learning with label dependent features. Specify singleline or multiline.")
- ;
-
- po::options_description active_opt("Active Learning options");
- active_opt.add_options()
- ("active_learning", "active learning mode")
- ("active_simulation", "active learning simulation mode")
- ("active_mellowness", po::value<float>(&(all->active_c0)), "active learning mellowness parameter c_0. Default 8")
- ;
-
- po::options_description cluster_opt("Parallelization options");
- cluster_opt.add_options()
- ("span_server", po::value<string>(&(all->span_server)), "Location of server for setting up spanning tree")
- ("unique_id", po::value<size_t>(&(all->unique_id)),"unique id used for cluster parallel jobs")
- ("total", po::value<size_t>(&(all->total)),"total number of nodes used in cluster parallel job")
- ("node", po::value<size_t>(&(all->node)),"node number in cluster parallel job")
- ;
-
- po::options_description other_opt("Other options");
- other_opt.add_options()
- ("bs", po::value<size_t>(), "bootstrap mode with k rounds by online importance resampling")
- ("top", po::value<size_t>(), "top k recommendation")
- ("bs_type", po::value<string>(), "bootstrap mode - currently 'mean' or 'vote'")
- ("autolink", po::value<size_t>(), "create link function with polynomial d")
- ("cb", po::value<size_t>(), "Use contextual bandit learning with <k> costs")
- ("lda", po::value<uint32_t>(&(all->lda)), "Run lda with <int> topics")
- ("nn", po::value<size_t>(), "Use sigmoidal feedforward network with <k> hidden units")
- ("cbify", po::value<size_t>(), "Convert multiclass on <k> classes into a contextual bandit problem and solve")
- ("search", po::value<size_t>(), "use search-based structured prediction, argument=maximum action id or 0 for LDF")
- ;
-
- // Declare the supported options.
- desc.add_options()
- ("help,h","Look here: http://hunch.net/~vw/ and click on Tutorial.")
- ("version","Version information")
- ("random_seed", po::value<size_t>(&random_seed), "seed random number generator")
- ("noop","do no learning")
- ("print","print examples");
-
- //po::positional_options_description p;
- // Be friendly: if -d was left out, treat positional param as data file
- //p.add("data", -1);
-
- desc.add(in_opt)
- .add(out_opt)
- .add(update_opt)
- .add(weight_opt)
- .add(holdout_opt)
- .add(namespace_opt)
- .add(mf_opt)
- .add(lrq_opt)
- .add(multiclass_opt)
- .add(active_opt)
- .add(cluster_opt)
- .add(other_opt);
-
- po::variables_map vm = po::variables_map();
- po::variables_map vm_file = po::variables_map(); //separate variable map for storing flags in regressor file
-
- po::parsed_options parsed = po::command_line_parser(argc, argv).
- style(po::command_line_style::default_style ^ po::command_line_style::allow_guessing).
- options(desc).allow_unregistered().run(); // got rid of ".positional(p)" because it doesn't work well with unrecognized options
- vector<string> to_pass_further = po::collect_unrecognized(parsed.options, po::include_positional);
- string last_unrec_arg =
- (to_pass_further.size() > 0)
- ? string(to_pass_further[to_pass_further.size()-1]) // we want to write this down in case it's a data argument ala the positional option we got rid of
- : "";
-
- po::store(parsed, vm);
- po::notify(vm);
-
- if(all->numpasses > 1)
- all->holdout_set_off = false;
-
- if(vm.count("holdout_off"))
- all->holdout_set_off = true;
-
- if(!all->holdout_set_off && (vm.count("output_feature_regularizer_binary") || vm.count("output_feature_regularizer_text")))
- {
- all->holdout_set_off = true;
- cerr<<"Making holdout_set_off=true since output regularizer specified\n";
- }
-
- all->data_filename = "";
-
- all->searn = false;
- all->searnstr = NULL;
-
- all->sd->weighted_unlabeled_examples = all->sd->t;
- all->initial_t = (float)all->sd->t;
-
- if(all->initial_t > 0)
- {
- all->normalized_sum_norm_x = all->initial_t;//for the normalized update: if initial_t is bigger than 1 we interpret this as if we had seen (all->initial_t) previous fake datapoints all with norm 1
- }
-
+ // Begin diagnostic options
if (vm.count("help") || argc == 1) {
/* upon direct query for help -- spit it out to stdout */
cout << "\n" << desc << "\n";
exit(0);
}
+ if (vm.count("version")) {
+ /* upon direct query for version -- spit it out to stdout */
+ cout << version.to_string() << "\n";
+ exit(0);
+ }
+
if (vm.count("quiet")) {
- all->quiet = true;
+ all.quiet = true;
// --quiet wins over --progress
} else {
- all->quiet = false;
+ all.quiet = false;
if (vm.count("progress")) {
string progress_str = vm["progress"].as<string>();
- all->progress_arg = (float)::atof(progress_str.c_str());
+ all.progress_arg = (float)::atof(progress_str.c_str());
// --progress interval is dual: either integer or floating-point
if (progress_str.find_first_of(".") == string::npos) {
// No "." in arg: assume integer -> additive
- all->progress_add = true;
- if (all->progress_arg < 1) {
+ all.progress_add = true;
+ if (all.progress_arg < 1) {
cerr << "warning: additive --progress <int>"
<< " can't be < 1: forcing to 1\n";
- all->progress_arg = 1;
+ all.progress_arg = 1;
}
- all->sd->dump_interval = all->progress_arg;
+ all.sd->dump_interval = all.progress_arg;
} else {
// A "." in arg: assume floating-point -> multiplicative
- all->progress_add = false;
+ all.progress_add = false;
- if (all->progress_arg <= 1.0) {
+ if (all.progress_arg <= 1.0) {
cerr << "warning: multiplicative --progress <float>: "
<< vm["progress"].as<string>()
<< " is <= 1.0: adding 1.0\n";
- all->progress_arg += 1.0;
+ all.progress_arg += 1.0;
- } else if (all->progress_arg > 9.0) {
+ } else if (all.progress_arg > 9.0) {
cerr << "warning: multiplicative --progress <float>"
<< " is > 9.0: you probably meant to use an integer\n";
}
- all->sd->dump_interval = 1.0;
+ all.sd->dump_interval = 1.0;
}
}
- }
+ }
- msrand48(random_seed);
+ if (vm.count("audit")){
+ all.audit = true;
+ }
+}
- if (vm.count("active_simulation"))
- all->active_simulation = true;
-
- if (vm.count("active_learning") && !all->active_simulation)
- all->active = true;
-
+void parse_source(vw& all, po::variables_map& vm)
+{
+ //begin input source
if (vm.count("no_stdin"))
- all->stdin_off = true;
-
- if (vm.count("testonly") || all->eta == 0.)
- {
- if (!all->quiet)
- cerr << "only testing" << endl;
- all->training = false;
- if (all->lda > 0)
- all->eta = 0;
- }
- else
- all->training = true;
-
+ all.stdin_off = true;
+
if ( (vm.count("total") || vm.count("node") || vm.count("unique_id")) && !(vm.count("total") && vm.count("node") && vm.count("unique_id")) )
{
cout << "you must specificy unique_id, total, and node if you specify any" << endl;
throw exception();
}
-
- if (all->l1_lambda < 0.) {
- cerr << "l1_lambda should be nonnegative: resetting from " << all->l1_lambda << " to 0" << endl;
- all->l1_lambda = 0.;
+
+ if (vm.count("daemon") || vm.count("pid_file") || (vm.count("port") && !all.active) ) {
+ all.daemon = true;
+
+ // allow each child to process up to 1e5 connections
+ all.numpasses = (size_t) 1e5;
}
- if (all->l2_lambda < 0.) {
- cerr << "l2_lambda should be nonnegative: resetting from " << all->l2_lambda << " to 0" << endl;
- all->l2_lambda = 0.;
+
+ if (vm.count("compressed"))
+ set_compressed(all.p);
+
+ if (vm.count("data")) {
+ all.data_filename = vm["data"].as<string>();
+ if (ends_with(all.data_filename, ".gz"))
+ set_compressed(all.p);
+ } else
+ all.data_filename = "";
+}
+
+void parse_feature_tweaks(vw& all, po::variables_map& vm, po::variables_map& vm_file)
+{
+ //feature manipulation
+ string hash_function("strings");
+ if(vm.count("hash"))
+ hash_function = vm["hash"].as<string>();
+ all.p->hasher = getHasher(hash_function);
+
+ if (vm.count("spelling")) {
+ vector<string> spelling_ns = vm["spelling"].as< vector<string> >();
+ for (size_t id=0; id<spelling_ns.size(); id++)
+ if (spelling_ns[id][0] == '_') all.spelling_features[(unsigned char)' '] = true;
+ else all.spelling_features[(size_t)spelling_ns[id][0]] = true;
}
- all->reg_mode += (all->l1_lambda > 0.) ? 1 : 0;
- all->reg_mode += (all->l2_lambda > 0.) ? 2 : 0;
- if (!all->quiet)
- {
- if (all->reg_mode %2 && !vm.count("bfgs"))
- cerr << "using l1 regularization = " << all->l1_lambda << endl;
- if (all->reg_mode > 1)
- cerr << "using l2 regularization = " << all->l2_lambda << endl;
- }
- if (vm.count("version") || argc == 1) {
- /* upon direct query for version -- spit it out to stdout */
- cout << version.to_string() << "\n";
- exit(0);
+ if (vm_file.count("affix") && vm.count("affix")) {
+ cerr << "should not specify --affix when loading a model trained with affix features (they're turned on by default)" << endl;
+ throw exception();
+ }
+ if (vm_file.count("affix"))
+ parse_affix_argument(all, vm_file["affix"].as<string>());
+ if (vm.count("affix")) {
+ parse_affix_argument(all, vm["affix"].as<string>());
+ stringstream ss;
+ ss << " --affix " << vm["affix"].as<string>();
+ all.options_from_file.append(ss.str());
}
if(vm.count("ngram")){
@@ -437,8 +229,8 @@ vw* parse_args(int argc, char *argv[])
throw exception();
}
- all->ngram_strings = vm["ngram"].as< vector<string> >();
- compile_gram(all->ngram_strings, all->ngram, (char*)"grams", all->quiet);
+ all.ngram_strings = vm["ngram"].as< vector<string> >();
+ compile_gram(all.ngram_strings, all.ngram, (char*)"grams", all.quiet);
}
if(vm.count("skips"))
@@ -449,63 +241,35 @@ vw* parse_args(int argc, char *argv[])
throw exception();
}
- all->skip_strings = vm["skips"].as<vector<string> >();
- compile_gram(all->skip_strings, all->skips, (char*)"skips", all->quiet);
+ all.skip_strings = vm["skips"].as<vector<string> >();
+ compile_gram(all.skip_strings, all.skips, (char*)"skips", all.quiet);
}
- if (vm.count("spelling")) {
- vector<string> spelling_ns = vm["spelling"].as< vector<string> >();
- for (size_t id=0; id<spelling_ns.size(); id++)
- if (spelling_ns[id][0] == '_') all->spelling_features[(unsigned char)' '] = true;
- else all->spelling_features[(size_t)spelling_ns[id][0]] = true;
- }
-
if (vm.count("bit_precision"))
{
- all->default_bits = false;
- all->num_bits = (uint32_t)vm["bit_precision"].as< size_t>();
- if (all->num_bits > min(32, sizeof(size_t)*8 - 3))
+ all.default_bits = false;
+ all.num_bits = (uint32_t)vm["bit_precision"].as< size_t>();
+ if (all.num_bits > min(32, sizeof(size_t)*8 - 3))
{
cout << "Only " << min(32, sizeof(size_t)*8 - 3) << " or fewer bits allowed. If this is a serious limit, speak up." << endl;
throw exception();
}
}
- if (vm.count("daemon") || vm.count("pid_file") || (vm.count("port") && !all->active) ) {
- all->daemon = true;
-
- // allow each child to process up to 1e5 connections
- all->numpasses = (size_t) 1e5;
- }
-
- if (vm.count("compressed"))
- set_compressed(all->p);
-
- if (vm.count("data")) {
- all->data_filename = vm["data"].as<string>();
- if (ends_with(all->data_filename, ".gz"))
- set_compressed(all->p);
- } else {
- all->data_filename = "";
- }
-
- if(vm.count("sort_features"))
- all->p->sort_features = true;
-
if (vm.count("quadratic"))
{
- all->pairs = vm["quadratic"].as< vector<string> >();
+ all.pairs = vm["quadratic"].as< vector<string> >();
vector<string> newpairs;
//string tmp;
char printable_start = '!';
char printable_end = '~';
int valid_ns_size = printable_end - printable_start - 1; //will skip two characters
- if(!all->quiet)
+ if(!all.quiet)
cerr<<"creating quadratic features for pairs: ";
- for (vector<string>::iterator i = all->pairs.begin(); i != all->pairs.end();i++){
- if(!all->quiet){
+ for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++){
+ if(!all.quiet){
cerr << *i << " ";
if (i->length() > 2)
cerr << endl << "warning, ignoring characters after the 2nd.\n";
@@ -553,18 +317,18 @@ vw* parse_args(int argc, char *argv[])
newpairs.push_back(string(*i));
}
}
- newpairs.swap(all->pairs);
- if(!all->quiet)
+ newpairs.swap(all.pairs);
+ if(!all.quiet)
cerr<<endl;
}
if (vm.count("cubic"))
{
- all->triples = vm["cubic"].as< vector<string> >();
- if (!all->quiet)
+ all.triples = vm["cubic"].as< vector<string> >();
+ if (!all.quiet)
{
cerr << "creating cubic features for triples: ";
- for (vector<string>::iterator i = all->triples.begin(); i != all->triples.end();i++) {
+ for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) {
cerr << *i << " ";
if (i->length() > 3)
cerr << endl << "warning, ignoring characters after the 3rd.\n";
@@ -577,51 +341,20 @@ vw* parse_args(int argc, char *argv[])
}
}
- io_buf io_temp;
- parse_regressor_args(*all, vm, io_temp);
-
- //parse flags from regressor file
- all->options_from_file_argv = VW::get_argv_from_string(all->options_from_file,all->options_from_file_argc);
-
- po::parsed_options parsed_file = po::command_line_parser(all->options_from_file_argc, all->options_from_file_argv).
- style(po::command_line_style::default_style ^ po::command_line_style::allow_guessing).
- options(desc).allow_unregistered().run();
-
- po::store(parsed_file, vm_file);
- po::notify(vm_file);
-
- if (vm.count("bfgs") || vm.count("conjugate_gradient"))
- all->l = BFGS::setup(*all, to_pass_further, vm);
- else if (vm.count("lda"))
- all->l = LDA::setup(*all, to_pass_further, vm);
- else if (vm.count("noop"))
- all->l = NOOP::setup(*all);
- else if (vm.count("print"))
- all->l = PRINT::setup(*all);
- else if (!vm.count("new_mf") && all->rank > 0)
- all->l = GDMF::setup(*all, vm);
- else if (vm.count("sendto"))
- all->l = SENDER::setup(*all, vm, all->pairs);
- else
- {
- all->l = GD::setup(*all, vm);
- all->scorer = all->l;
- }
-
for (size_t i = 0; i < 256; i++)
- all->ignore[i] = false;
- all->ignore_some = false;
+ all.ignore[i] = false;
+ all.ignore_some = false;
if (vm.count("ignore"))
{
- all->ignore_some = true;
+ all.ignore_some = true;
vector<unsigned char> ignore = vm["ignore"].as< vector<unsigned char> >();
for (vector<unsigned char>::iterator i = ignore.begin(); i != ignore.end();i++)
{
- all->ignore[*i] = true;
+ all.ignore[*i] = true;
}
- if (!all->quiet)
+ if (!all.quiet)
{
cerr << "ignoring namespaces beginning with: ";
for (vector<unsigned char>::iterator i = ignore.begin(); i != ignore.end();i++)
@@ -634,16 +367,16 @@ vw* parse_args(int argc, char *argv[])
if (vm.count("keep"))
{
for (size_t i = 0; i < 256; i++)
- all->ignore[i] = true;
+ all.ignore[i] = true;
- all->ignore_some = true;
+ all.ignore_some = true;
vector<unsigned char> keep = vm["keep"].as< vector<unsigned char> >();
for (vector<unsigned char>::iterator i = keep.begin(); i != keep.end();i++)
{
- all->ignore[*i] = false;
+ all.ignore[*i] = false;
}
- if (!all->quiet)
+ if (!all.quiet)
{
cerr << "using namespaces beginning with: ";
for (vector<unsigned char>::iterator i = keep.begin(); i != keep.end();i++)
@@ -654,35 +387,43 @@ vw* parse_args(int argc, char *argv[])
}
if (vm.count("noconstant"))
- all->add_constant = false;
-
- //if (vm.count("nonormalize"))
- // all->nonormalize = true;
+ all.add_constant = false;
+}
- if (!vm.count("lda") && !all->adaptive && !all->normalized_updates)
- all->eta *= powf((float)(all->sd->t), all->power_t);
+void parse_example_tweaks(vw& all, po::variables_map& vm)
+{
+ if (vm.count("testonly") || all.eta == 0.)
+ {
+ if (!all.quiet)
+ cerr << "only testing" << endl;
+ all.training = false;
+ if (all.lda > 0)
+ all.eta = 0;
+ }
+ else
+ all.training = true;
- if (vm.count("readable_model"))
- all->text_regressor_name = vm["readable_model"].as<string>();
+ if(all.numpasses > 1)
+ all.holdout_set_off = false;
- if (vm.count("invert_hash")){
- all->inv_hash_regressor_name = vm["invert_hash"].as<string>();
+ if(vm.count("holdout_off"))
+ all.holdout_set_off = true;
- all->hash_inv = true;
+ if(!all.holdout_set_off && (vm.count("output_feature_regularizer_binary") || vm.count("output_feature_regularizer_text")))
+ {
+ all.holdout_set_off = true;
+ cerr<<"Making holdout_set_off=true since output regularizer specified\n";
}
- if (vm.count("save_per_pass"))
- all->save_per_pass = true;
-
- if (vm.count("save_resume"))
- all->save_resume = true;
-
+ if(vm.count("sort_features"))
+ all.p->sort_features = true;
+
if (vm.count("min_prediction"))
- all->sd->min_label = vm["min_prediction"].as<float>();
+ all.sd->min_label = vm["min_prediction"].as<float>();
if (vm.count("max_prediction"))
- all->sd->max_label = vm["max_prediction"].as<float>();
+ all.sd->max_label = vm["max_prediction"].as<float>();
if (vm.count("min_prediction") || vm.count("max_prediction") || vm.count("testonly"))
- all->set_minmax = noop_mm;
+ all.set_minmax = noop_mm;
string loss_function;
if(vm.count("loss_function"))
@@ -693,30 +434,35 @@ vw* parse_args(int argc, char *argv[])
if(vm.count("quantile_tau"))
loss_parameter = vm["quantile_tau"].as<float>();
- all->loss = getLossFunction(all, loss_function, (float)loss_parameter);
-
- if (pow((double)all->eta_decay_rate, (double)all->numpasses) < 0.0001 )
- cerr << "Warning: the learning rate for the last pass is multiplied by: " << pow((double)all->eta_decay_rate, (double)all->numpasses)
- << " adjust --decay_learning_rate larger to avoid this." << endl;
+ all.loss = getLossFunction(&all, loss_function, (float)loss_parameter);
- if (!all->quiet)
+ if (all.l1_lambda < 0.) {
+ cerr << "l1_lambda should be nonnegative: resetting from " << all.l1_lambda << " to 0" << endl;
+ all.l1_lambda = 0.;
+ }
+ if (all.l2_lambda < 0.) {
+ cerr << "l2_lambda should be nonnegative: resetting from " << all.l2_lambda << " to 0" << endl;
+ all.l2_lambda = 0.;
+ }
+ all.reg_mode += (all.l1_lambda > 0.) ? 1 : 0;
+ all.reg_mode += (all.l2_lambda > 0.) ? 2 : 0;
+ if (!all.quiet)
{
- cerr << "Num weight bits = " << all->num_bits << endl;
- cerr << "learning rate = " << all->eta << endl;
- cerr << "initial_t = " << all->sd->t << endl;
- cerr << "power_t = " << all->power_t << endl;
- if (all->numpasses > 1)
- cerr << "decay_learning_rate = " << all->eta_decay_rate << endl;
- if (all->rank > 0)
- cerr << "rank = " << all->rank << endl;
+ if (all.reg_mode %2 && !vm.count("bfgs"))
+ cerr << "using l1 regularization = " << all.l1_lambda << endl;
+ if (all.reg_mode > 1)
+ cerr << "using l2 regularization = " << all.l2_lambda << endl;
}
+}
+void parse_output_preds(vw& all, po::variables_map& vm, po::variables_map& vm_file)
+{
if (vm.count("predictions")) {
- if (!all->quiet)
+ if (!all.quiet)
cerr << "predictions = " << vm["predictions"].as< string >() << endl;
if (strcmp(vm["predictions"].as< string >().c_str(), "stdout") == 0)
{
- all->final_prediction_sink.push_back((size_t) 1);//stdout
+ all.final_prediction_sink.push_back((size_t) 1);//stdout
}
else
{
@@ -729,18 +475,18 @@ vw* parse_args(int argc, char *argv[])
#endif
if (f < 0)
cerr << "Error opening the predictions file: " << fstr << endl;
- all->final_prediction_sink.push_back((size_t) f);
+ all.final_prediction_sink.push_back((size_t) f);
}
}
if (vm.count("raw_predictions")) {
- if (!all->quiet) {
+ if (!all.quiet) {
cerr << "raw predictions = " << vm["raw_predictions"].as< string >() << endl;
if (vm.count("binary") || vm_file.count("binary"))
cerr << "Warning: --raw has no defined value when --binary specified, expect no output" << endl;
}
if (strcmp(vm["raw_predictions"].as< string >().c_str(), "stdout") == 0)
- all->raw_prediction = 1;//stdout
+ all.raw_prediction = 1;//stdout
else
{
const char* t = vm["raw_predictions"].as< string >().c_str();
@@ -750,171 +496,470 @@ vw* parse_args(int argc, char *argv[])
#else
f = open(t, O_CREAT|O_WRONLY|O_LARGEFILE|O_TRUNC,0666);
#endif
- all->raw_prediction = f;
+ all.raw_prediction = f;
}
}
+}
- if (vm.count("audit")){
- all->audit = true;
+void parse_output_model(vw& all, po::variables_map& vm)
+{
+ if (vm.count("final_regressor")) {
+ all.final_regressor_name = vm["final_regressor"].as<string>();
+ if (!all.quiet)
+ cerr << "final_regressor = " << vm["final_regressor"].as<string>() << endl;
}
+ else
+ all.final_regressor_name = "";
+ if (vm.count("readable_model"))
+ all.text_regressor_name = vm["readable_model"].as<string>();
+
+ if (vm.count("invert_hash")){
+ all.inv_hash_regressor_name = vm["invert_hash"].as<string>();
+ all.hash_inv = true;
+ }
+
+ if (vm.count("save_per_pass"))
+ all.save_per_pass = true;
+
+ if (vm.count("save_resume"))
+ all.save_resume = true;
+}
+
+void parse_base_algorithm(vw& all, vector<string>& to_pass_further, po::variables_map& vm)
+{
+ //base learning algorithm.
+ if (vm.count("bfgs") || vm.count("conjugate_gradient"))
+ all.l = BFGS::setup(all, to_pass_further, vm);
+ else if (vm.count("lda"))
+ all.l = LDA::setup(all, to_pass_further, vm);
+ else if (vm.count("noop"))
+ all.l = NOOP::setup(all);
+ else if (vm.count("print"))
+ all.l = PRINT::setup(all);
+ else if (!vm.count("new_mf") && all.rank > 0)
+ all.l = GDMF::setup(all, vm);
+ else if (vm.count("sendto"))
+ all.l = SENDER::setup(all, vm, all.pairs);
+ else
+ {
+ all.l = GD::setup(all, vm);
+ all.scorer = all.l;
+ }
+}
+
+void load_input_model(vw& all, po::variables_map& vm, io_buf& io_temp)
+{
// Need to see if we have to load feature mask first or second.
// -i and -mask are from same file, load -i file first so mask can use it
if (vm.count("feature_mask") && vm.count("initial_regressor")
&& vm["feature_mask"].as<string>() == vm["initial_regressor"].as< vector<string> >()[0]) {
// load rest of regressor
- all->l->save_load(io_temp, true, false);
+ all.l->save_load(io_temp, true, false);
io_temp.close_file();
// set the mask, which will reuse -i file we just loaded
- parse_mask_regressor_args(*all, vm);
+ parse_mask_regressor_args(all, vm);
}
else {
// load mask first
- parse_mask_regressor_args(*all, vm);
+ parse_mask_regressor_args(all, vm);
// load rest of regressor
- all->l->save_load(io_temp, true, false);
+ all.l->save_load(io_temp, true, false);
io_temp.close_file();
}
+}
- bool got_mc = false;
- bool got_cs = false;
- bool got_cb = false;
-
+void parse_scorer_reductions(vw& all, vector<string>& to_pass_further, po::variables_map& vm, po::variables_map vm_file)
+{
if(vm.count("nn") || vm_file.count("nn") )
- all->l = NN::setup(*all, to_pass_further, vm, vm_file);
-
- if (vm.count("new_mf") && all->rank > 0)
- all->l = MF::setup(*all, vm);
-
+ all.l = NN::setup(all, to_pass_further, vm, vm_file);
+
+ if (vm.count("new_mf") && all.rank > 0)
+ all.l = MF::setup(all, vm);
+
if(vm.count("autolink") || vm_file.count("autolink") )
- all->l = ALINK::setup(*all, to_pass_further, vm, vm_file);
-
+ all.l = ALINK::setup(all, to_pass_further, vm, vm_file);
+
if (vm.count("lrq") || vm_file.count("lrq"))
- all->l = LRQ::setup(*all, to_pass_further, vm, vm_file);
+ all.l = LRQ::setup(all, to_pass_further, vm, vm_file);
+
+ all.l = Scorer::setup(all, to_pass_further, vm, vm_file);
+}
- all->l = Scorer::setup(*all, to_pass_further, vm, vm_file);
+LEARNER::learner* exclusive_setup(vw& all, vector<string>& to_pass_further, po::variables_map& vm, po::variables_map vm_file, bool& score_consumer, LEARNER::learner* (*setup)(vw&, vector<string>&, po::variables_map&, po::variables_map&))
+{
+ if (score_consumer) { cerr << "error: cannot specify multiple direct score consumers" << endl; throw exception(); }
+ score_consumer = true;
+ return setup(all, to_pass_further, vm, vm_file);
+}
+void parse_score_users(vw& all, vector<string>& to_pass_further, po::variables_map& vm, po::variables_map vm_file, bool& got_cs)
+{
+ bool score_consumer = false;
+
if(vm.count("top") || vm_file.count("top") )
- all->l = TOPK::setup(*all, to_pass_further, vm, vm_file);
-
+ all.l = exclusive_setup(all, to_pass_further, vm, vm_file, score_consumer, TOPK::setup);
+
if (vm.count("binary") || vm_file.count("binary"))
- all->l = BINARY::setup(*all, to_pass_further, vm, vm_file);
-
- if(vm.count("oaa") || vm_file.count("oaa") ) {
- if (got_mc) { cerr << "error: cannot specify multiple MC learners" << endl; throw exception(); }
-
- all->l = OAA::setup(*all, to_pass_further, vm, vm_file);
- got_mc = true;
- }
-
- if (vm.count("ect") || vm_file.count("ect") ) {
- if (got_mc) { cerr << "error: cannot specify multiple MC learners" << endl; throw exception(); }
-
- all->l = ECT::setup(*all, to_pass_further, vm, vm_file);
- got_mc = true;
- }
-
+ all.l = exclusive_setup(all, to_pass_further, vm, vm_file, score_consumer, BINARY::setup);
+
+ if (vm.count("oaa") || vm_file.count("oaa") )
+ all.l = exclusive_setup(all, to_pass_further, vm, vm_file, score_consumer, OAA::setup);
+
+ if (vm.count("ect") || vm_file.count("ect") )
+ all.l = exclusive_setup(all, to_pass_further, vm, vm_file, score_consumer, ECT::setup);
+
if(vm.count("csoaa") || vm_file.count("csoaa") ) {
- if (got_cs) { cerr << "error: cannot specify multiple CS learners" << endl; throw exception(); }
-
- all->l = CSOAA::setup(*all, to_pass_further, vm, vm_file);
- all->cost_sensitive = all->l;
+ all.l = exclusive_setup(all, to_pass_further, vm, vm_file, score_consumer, CSOAA::setup);
+ all.cost_sensitive = all.l;
got_cs = true;
}
-
+
if(vm.count("wap") || vm_file.count("wap") ) {
- if (got_cs) { cerr << "error: cannot specify multiple CS learners" << endl; throw exception(); }
-
- all->l = WAP::setup(*all, to_pass_further, vm, vm_file);
- all->cost_sensitive = all->l;
+ all.l = exclusive_setup(all, to_pass_further, vm, vm_file, score_consumer, WAP::setup);
+ all.cost_sensitive = all.l;
got_cs = true;
}
-
+
if(vm.count("csoaa_ldf") || vm_file.count("csoaa_ldf")) {
- if (got_cs) { cerr << "error: cannot specify multiple CS learners" << endl; throw exception(); }
-
- all->l = CSOAA_AND_WAP_LDF::setup(*all, to_pass_further, vm, vm_file);
- all->cost_sensitive = all->l;
+ all.l = exclusive_setup(all, to_pass_further, vm, vm_file, score_consumer, CSOAA_AND_WAP_LDF::setup);
+ all.cost_sensitive = all.l;
got_cs = true;
}
-
+
if(vm.count("wap_ldf") || vm_file.count("wap_ldf") ) {
- if (got_cs) { cerr << "error: cannot specify multiple CS learners" << endl; throw exception(); }
-
- all->l = CSOAA_AND_WAP_LDF::setup(*all, to_pass_further, vm, vm_file);
- all->cost_sensitive = all->l;
+ all.l = exclusive_setup(all, to_pass_further, vm, vm_file, score_consumer, CSOAA_AND_WAP_LDF::setup);
+ all.cost_sensitive = all.l;
got_cs = true;
}
+}
+void parse_cb(vw& all, vector<string>& to_pass_further, po::variables_map& vm, po::variables_map vm_file, bool& got_cs, bool& got_cb)
+{
if( vm.count("cb") || vm_file.count("cb") )
- {
- if(!got_cs) {
- if( vm_file.count("cb") ) vm.insert(pair<string,po::variable_value>(string("csoaa"),vm_file["cb"]));
- else vm.insert(pair<string,po::variable_value>(string("csoaa"),vm["cb"]));
-
- all->l = CSOAA::setup(*all, to_pass_further, vm, vm_file); // default to CSOAA unless wap is specified
- all->cost_sensitive = all->l;
- got_cs = true;
+ {
+ if(!got_cs) {
+ if( vm_file.count("cb") ) vm.insert(pair<string,po::variable_value>(string("csoaa"),vm_file["cb"]));
+ else vm.insert(pair<string,po::variable_value>(string("csoaa"),vm["cb"]));
+
+ all.l = CSOAA::setup(all, to_pass_further, vm, vm_file); // default to CSOAA unless wap is specified
+ all.cost_sensitive = all.l;
+ got_cs = true;
+ }
+
+ all.l = CB_ALGS::setup(all, to_pass_further, vm, vm_file);
+ got_cb = true;
}
- all->l = CB_ALGS::setup(*all, to_pass_further, vm, vm_file);
- got_cb = true;
- }
-
if (vm.count("cbify") || vm_file.count("cbify"))
{
if(!got_cs) {
if( vm_file.count("cbify") ) vm.insert(pair<string,po::variable_value>(string("csoaa"),vm_file["cbify"]));
else vm.insert(pair<string,po::variable_value>(string("csoaa"),vm["cbify"]));
- all->l = CSOAA::setup(*all, to_pass_further, vm, vm_file); // default to CSOAA unless wap is specified
- all->cost_sensitive = all->l;
+ all.l = CSOAA::setup(all, to_pass_further, vm, vm_file); // default to CSOAA unless wap is specified
+ all.cost_sensitive = all.l;
got_cs = true;
}
if (!got_cb) {
if( vm_file.count("cbify") ) vm.insert(pair<string,po::variable_value>(string("cb"),vm_file["cbify"]));
else vm.insert(pair<string,po::variable_value>(string("cb"),vm["cbify"]));
- all->l = CB_ALGS::setup(*all, to_pass_further, vm, vm_file);
+ all.l = CB_ALGS::setup(all, to_pass_further, vm, vm_file);
got_cb = true;
}
- all->l = CBIFY::setup(*all, to_pass_further, vm, vm_file);
+ all.l = CBIFY::setup(all, to_pass_further, vm, vm_file);
}
+}
-
- if (vm_file.count("affix") && vm.count("affix")) {
- cerr << "should not specify --affix when loading a model trained with affix features (they're turned on by default)" << endl;
- throw exception();
- }
- if (vm_file.count("affix"))
- parse_affix_argument(*all, vm_file["affix"].as<string>());
- if (vm.count("affix")) {
- parse_affix_argument(*all, vm["affix"].as<string>());
- stringstream ss;
- ss << " --affix " << vm["affix"].as<string>();
- all->options_from_file.append(ss.str());
- }
-
+void parse_search(vw& all, vector<string>& to_pass_further, po::variables_map& vm, po::variables_map vm_file, bool& got_cs, bool& got_cb)
+{
if (vm.count("search") || vm_file.count("search") ) {
if (!got_cs && !got_cb) {
if( vm_file.count("search") ) vm.insert(pair<string,po::variable_value>(string("csoaa"),vm_file["search"]));
else vm.insert(pair<string,po::variable_value>(string("csoaa"),vm["search"]));
-
- all->l = CSOAA::setup(*all, to_pass_further, vm, vm_file); // default to CSOAA unless others have been specified
- all->cost_sensitive = all->l;
+
+ all.l = CSOAA::setup(all, to_pass_further, vm, vm_file); // default to CSOAA unless others have been specified
+ all.cost_sensitive = all.l;
got_cs = true;
}
- //all->searnstr = (Searn::searn*)calloc_or_die(1, sizeof(Searn::searn));
- all->l = Searn::setup(*all, to_pass_further, vm, vm_file);
+ //all.searnstr = (Searn::searn*)calloc_or_die(1, sizeof(Searn::searn));
+ all.l = Searn::setup(all, to_pass_further, vm, vm_file);
}
+}
- if (got_cb && got_mc) {
- cerr << "error: doesn't make sense to do both MC learning and CB learning" << endl;
- throw exception();
- }
+vw* parse_args(int argc, char *argv[])
+{
+ po::options_description desc("VW options");
+
+ vw* all = new vw();
+
+ size_t random_seed = 0;
+ all->program_name = argv[0];
+
+ po::options_description in_opt("Input options");
+
+ in_opt.add_options()
+ ("data,d", po::value< string >(), "Example Set")
+ ("ring_size", po::value<size_t>(&(all->p->ring_size)), "size of example ring")
+ ("examples", po::value<size_t>(&(all->max_examples)), "number of examples to parse")
+ ("testonly,t", "Ignore label information and just test")
+ ("daemon", "persistent daemon mode on port 26542")
+ ("port", po::value<size_t>(),"port to listen on; use 0 to pick unused port")
+ ("num_children", po::value<size_t>(&(all->num_children)), "number of children for persistent daemon mode")
+ ("pid_file", po::value< string >(), "Write pid file in persistent daemon mode")
+ ("port_file", po::value< string >(), "Write port used in persistent daemon mode")
+ ("passes", po::value<size_t>(&(all->numpasses)),"Number of Training Passes")
+ ("cache,c", "Use a cache. The default is <data>.cache")
+ ("cache_file", po::value< vector<string> >(), "The location(s) of cache_file.")
+ ("kill_cache,k", "do not reuse existing cache: create a new one always")
+ ("compressed", "use gzip format whenever possible. If a cache file is being created, this option creates a compressed cache file. A mixture of raw-text & compressed inputs are supported with autodetection.")
+ ("no_stdin", "do not default to reading from stdin")
+ ("save_resume", "save extra state so learning can be resumed later with new data")
+ ;
+
+ po::options_description out_opt("Output options");
+
+ out_opt.add_options()
+ ("audit,a", "print weights of features")
+ ("predictions,p", po::value< string >(), "File to output predictions to")
+ ("raw_predictions,r", po::value< string >(), "File to output unnormalized predictions to")
+ ("sendto", po::value< vector<string> >(), "send examples to <host>")
+ ("quiet", "Don't output disgnostics and progress updates")
+ ("progress,P", po::value< string >(), "Progress update frequency. int: additive, float: multiplicative")
+ ("binary", "report loss as binary classification on -1,1")
+ ("min_prediction", po::value<float>(&(all->sd->min_label)), "Smallest prediction to output")
+ ("max_prediction", po::value<float>(&(all->sd->max_label)), "Largest prediction to output")
+ ;
+
+ po::options_description update_opt("Update options");
+
+ update_opt.add_options()
+ ("sgd", "use regular stochastic gradient descent update.")
+ ("hessian_on", "use second derivative in line search")
+ ("bfgs", "use bfgs optimization")
+ ("mem", po::value<int>(&(all->m)), "memory in bfgs")
+ ("termination", po::value<float>(&(all->rel_threshold)),"Termination threshold")
+ ("adaptive", "use adaptive, individual learning rates.")
+ ("invariant", "use safe/importance aware updates.")
+ ("normalized", "use per feature normalized updates")
+ ("exact_adaptive_norm", "use current default invariant normalized adaptive update rule")
+ ("conjugate_gradient", "use conjugate gradient based optimization")
+ ("l1", po::value<float>(&(all->l1_lambda)), "l_1 lambda")
+ ("l2", po::value<float>(&(all->l2_lambda)), "l_2 lambda")
+ ("learning_rate,l", po::value<float>(&(all->eta)), "Set Learning Rate")
+ ("loss_function", po::value<string>()->default_value("squared"), "Specify the loss function to be used, uses squared by default. Currently available ones are squared, classic, hinge, logistic and quantile.")
+ ("quantile_tau", po::value<float>()->default_value(0.5), "Parameter \\tau associated with Quantile loss. Defaults to 0.5")
+ ("power_t", po::value<float>(&(all->power_t)), "t power value")
+ ("decay_learning_rate", po::value<float>(&(all->eta_decay_rate)),
+ "Set Decay factor for learning_rate between passes")
+ ("initial_pass_length", po::value<size_t>(&(all->pass_length)), "initial number of examples per pass")
+ ("initial_t", po::value<double>(&((all->sd->t))), "initial t value")
+ ("feature_mask", po::value< string >(), "Use existing regressor to determine which parameters may be updated. If no initial_regressor given, also used for initial weights.")
+ ;
+
+ po::options_description weight_opt("Weight options");
+
+ weight_opt.add_options()
+ ("bit_precision,b", po::value<size_t>(), "number of bits in the feature table")
+ ("initial_regressor,i", po::value< vector<string> >(), "Initial regressor(s)")
+ ("final_regressor,f", po::value< string >(), "Final regressor")
+ ("initial_weight", po::value<float>(&(all->initial_weight)), "Set all weights to an initial value of 1.")
+ ("random_weights", po::value<bool>(&(all->random_weights)), "make initial weights random")
+ ("readable_model", po::value< string >(), "Output human-readable final regressor with numeric features")
+ ("invert_hash", po::value< string >(), "Output human-readable final regressor with feature names")
+ ("save_per_pass", "Save the model after every pass over data")
+ ("input_feature_regularizer", po::value< string >(&(all->per_feature_regularizer_input)), "Per feature regularization input file")
+ ("output_feature_regularizer_binary", po::value< string >(&(all->per_feature_regularizer_output)), "Per feature regularization output file")
+ ("output_feature_regularizer_text", po::value< string >(&(all->per_feature_regularizer_text)), "Per feature regularization output file, in text")
+ ;
+
+ po::options_description holdout_opt("Holdout options");
+ holdout_opt.add_options()
+ ("holdout_off", "no holdout data in multiple passes")
+ ("holdout_period", po::value<uint32_t>(&(all->holdout_period)), "holdout period for test only, default 10")
+ ("holdout_after", po::value<uint32_t>(&(all->holdout_after)), "holdout after n training examples, default off (disables holdout_period)")
+ ("early_terminate", po::value<size_t>(), "Specify the number of passes tolerated when holdout loss doesn't decrease before early termination, default is 3")
+ ;
+
+ po::options_description namespace_opt("Feature namespace options");
+ namespace_opt.add_options()
+ ("hash", po::value< string > (), "how to hash the features. Available options: strings, all")
+ ("ignore", po::value< vector<unsigned char> >(), "ignore namespaces beginning with character <arg>")
+ ("keep", po::value< vector<unsigned char> >(), "keep namespaces beginning with character <arg>")
+ ("noconstant", "Don't add a constant feature")
+ ("constant,C", po::value<float>(&(all->initial_constant)), "Set initial value of constant")
+ ("sort_features", "turn this on to disregard order in which features have been defined. This will lead to smaller cache sizes")
+ ("ngram", po::value< vector<string> >(), "Generate N grams. To generate N grams for a single namespace 'foo', arg should be fN.")
+ ("skips", po::value< vector<string> >(), "Generate skips in N grams. This in conjunction with the ngram tag can be used to generate generalized n-skip-k-gram. To generate n-skips for a single namespace 'foo', arg should be fn.")
+ ("affix", po::value<string>(), "generate prefixes/suffixes of features; argument '+2a,-3b,+1' means generate 2-char prefixes for namespace a, 3-char suffixes for b and 1 char prefixes for default namespace")
+ ("spelling", po::value< vector<string> >(), "compute spelling features for a give namespace (use '_' for default namespace)");
+ ;
+
+ po::options_description mf_opt("Matrix factorization options");
+ mf_opt.add_options()
+ ("quadratic,q", po::value< vector<string> > (),
+ "Create and use quadratic features")
+ ("q:", po::value< string >(), ": corresponds to a wildcard for all printable characters")
+ ("cubic", po::value< vector<string> > (),
+ "Create and use cubic features")
+ ("rank", po::value<uint32_t>(&(all->rank)), "rank for matrix factorization.")
+ ("new_mf", "use new, reduction-based matrix factorization")
+ ;
+
+ po::options_description lrq_opt("Low Rank Quadratic options");
+ lrq_opt.add_options()
+ ("lrq", po::value<vector<string> > (), "use low rank quadratic features")
+ ("lrqdropout", "use dropout training for low rank quadratic features")
+ ;
+
+ po::options_description multiclass_opt("Multiclass options");
+ multiclass_opt.add_options()
+ ("oaa", po::value<size_t>(), "Use one-against-all multiclass learning with <k> labels")
+ ("ect", po::value<size_t>(), "Use error correcting tournament with <k> labels")
+ ("csoaa", po::value<size_t>(), "Use one-against-all multiclass learning with <k> costs")
+ ("wap", po::value<size_t>(), "Use weighted all-pairs multiclass learning with <k> costs")
+ ("csoaa_ldf", po::value<string>(), "Use one-against-all multiclass learning with label dependent features. Specify singleline or multiline.")
+ ("wap_ldf", po::value<string>(), "Use weighted all-pairs multiclass learning with label dependent features. Specify singleline or multiline.")
+ ;
+
+ po::options_description active_opt("Active Learning options");
+ active_opt.add_options()
+ ("active_learning", "active learning mode")
+ ("active_simulation", "active learning simulation mode")
+ ("active_mellowness", po::value<float>(&(all->active_c0)), "active learning mellowness parameter c_0. Default 8")
+ ;
+
+ po::options_description cluster_opt("Parallelization options");
+ cluster_opt.add_options()
+ ("span_server", po::value<string>(&(all->span_server)), "Location of server for setting up spanning tree")
+ ("unique_id", po::value<size_t>(&(all->unique_id)),"unique id used for cluster parallel jobs")
+ ("total", po::value<size_t>(&(all->total)),"total number of nodes used in cluster parallel job")
+ ("node", po::value<size_t>(&(all->node)),"node number in cluster parallel job")
+ ;
+
+ po::options_description other_opt("Other options");
+ other_opt.add_options()
+ ("bs", po::value<size_t>(), "bootstrap mode with k rounds by online importance resampling")
+ ("top", po::value<size_t>(), "top k recommendation")
+ ("bs_type", po::value<string>(), "bootstrap mode - currently 'mean' or 'vote'")
+ ("autolink", po::value<size_t>(), "create link function with polynomial d")
+ ("cb", po::value<size_t>(), "Use contextual bandit learning with <k> costs")
+ ("lda", po::value<uint32_t>(&(all->lda)), "Run lda with <int> topics")
+ ("nn", po::value<size_t>(), "Use sigmoidal feedforward network with <k> hidden units")
+ ("cbify", po::value<size_t>(), "Convert multiclass on <k> classes into a contextual bandit problem and solve")
+ ("search", po::value<size_t>(), "use search-based structured prediction, argument=maximum action id or 0 for LDF")
+ ;
+
+ // Declare the supported options.
+ desc.add_options()
+ ("help,h","Look here: http://hunch.net/~vw/ and click on Tutorial.")
+ ("version","Version information")
+ ("random_seed", po::value<size_t>(&random_seed), "seed random number generator")
+ ("noop","do no learning")
+ ("print","print examples");
+
+ //po::positional_options_description p;
+ // Be friendly: if -d was left out, treat positional param as data file
+ //p.add("data", -1);
+
+ desc.add(in_opt)
+ .add(out_opt)
+ .add(update_opt)
+ .add(weight_opt)
+ .add(holdout_opt)
+ .add(namespace_opt)
+ .add(mf_opt)
+ .add(lrq_opt)
+ .add(multiclass_opt)
+ .add(active_opt)
+ .add(cluster_opt)
+ .add(other_opt);
+
+ po::variables_map vm = po::variables_map();
+ po::variables_map vm_file = po::variables_map(); //separate variable map for storing flags in regressor file
+
+ po::parsed_options parsed = po::command_line_parser(argc, argv).
+ style(po::command_line_style::default_style ^ po::command_line_style::allow_guessing).
+ options(desc).allow_unregistered().run(); // got rid of ".positional(p)" because it doesn't work well with unrecognized options
+ vector<string> to_pass_further = po::collect_unrecognized(parsed.options, po::include_positional);
+ string last_unrec_arg =
+ (to_pass_further.size() > 0)
+ ? string(to_pass_further[to_pass_further.size()-1]) // we want to write this down in case it's a data argument ala the positional option we got rid of
+ : "";
+
+ po::store(parsed, vm);
+ po::notify(vm);
+
+ msrand48(random_seed);
+
+ parse_diagnostics(*all, vm, desc, argc);
+
+ if (vm.count("active_simulation"))
+ all->active_simulation = true;
+
+ if (vm.count("active_learning") && !all->active_simulation)
+ all->active = true;
+
+ parse_source(*all, vm);
+
+ all->sd->weighted_unlabeled_examples = all->sd->t;
+ all->initial_t = (float)all->sd->t;
+
+ if(all->initial_t > 0)//for the normalized update: if initial_t is bigger than 1 we interpret this as if we had seen (all->initial_t) previous fake datapoints all with norm 1
+ all->normalized_sum_norm_x = all->initial_t;
+
+ //Input regressor header
+ io_buf io_temp;
+ parse_regressor_args(*all, vm, io_temp);
+
+ all->options_from_file_argv = VW::get_argv_from_string(all->options_from_file,all->options_from_file_argc);
+
+ po::parsed_options parsed_file = po::command_line_parser(all->options_from_file_argc, all->options_from_file_argv).
+ style(po::command_line_style::default_style ^ po::command_line_style::allow_guessing).
+ options(desc).allow_unregistered().run();
+
+ po::store(parsed_file, vm_file);
+ po::notify(vm_file);
+
+ parse_feature_tweaks(*all, vm, vm_file); //feature tweaks
+
+ parse_example_tweaks(*all, vm); //example manipulation
+
+ parse_base_algorithm(*all, to_pass_further, vm);
+
+ if (!all->quiet)
+ {
+ cerr << "Num weight bits = " << all->num_bits << endl;
+ cerr << "learning rate = " << all->eta << endl;
+ cerr << "initial_t = " << all->sd->t << endl;
+ cerr << "power_t = " << all->power_t << endl;
+ if (all->numpasses > 1)
+ cerr << "decay_learning_rate = " << all->eta_decay_rate << endl;
+ if (all->rank > 0)
+ cerr << "rank = " << all->rank << endl;
+ }
+
+ parse_output_model(*all, vm);
+
+ parse_output_preds(*all, vm, vm_file);
+
+ load_input_model(*all, vm, io_temp);
+
+ parse_scorer_reductions(*all, to_pass_further, vm, vm_file);
+
+ bool got_cs = false;
+
+ parse_score_users(*all, to_pass_further, vm, vm_file, got_cs);
+
+ bool got_cb = false;
+
+ parse_cb(*all, to_pass_further, vm, vm_file, got_cs, got_cb);
+
+ parse_search(*all, to_pass_further, vm, vm_file, got_cs, got_cb);
if(vm.count("bs") || vm_file.count("bs") )
all->l = BS::setup(*all, to_pass_further, vm, vm_file);
@@ -932,7 +977,6 @@ vw* parse_args(int argc, char *argv[])
#else
close(f);
#endif
- //cerr << "warning: final argument '" << last_unrec_arg << "' assumed to be input file; in the future, please use -d" << endl;
all->data_filename = last_unrec_arg;
if (ends_with(last_unrec_arg, ".gz"))
set_compressed(all->p);
@@ -949,7 +993,7 @@ vw* parse_args(int argc, char *argv[])
}
}
- parse_source_args(*all, vm, all->quiet,all->numpasses);
+ enable_sources(*all, vm, all->quiet,all->numpasses);
// force wpp to be a power of 2 to avoid 32-bit overflow
uint32_t i = 0;