Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Hofman <hofman@research-mm6.corp.sp1.yahoo.com>2011-06-10 22:43:05 +0400
committerJacob Hofman <hofman@research-mm6.corp.sp1.yahoo.com>2011-06-10 22:43:05 +0400
commitcd63c6d474b3b210bb25d8422e6de74b59d6fe5f (patch)
treefbca6a402bbbd658014dfea0310397fa7088c4a2 /parse_args.cc
parent27198deb609a4164a960b580fd405dd6a491867e (diff)
parent527b7bf6287fd298dd5ca08f53226085c02b38cd (diff)
things unbroken, maybe?
Diffstat (limited to 'parse_args.cc')
-rw-r--r--parse_args.cc123
1 files changed, 84 insertions, 39 deletions
diff --git a/parse_args.cc b/parse_args.cc
index dd9860a2..8923e26e 100644
--- a/parse_args.cc
+++ b/parse_args.cc
@@ -15,6 +15,18 @@ embodied in the content of this file are licensed under the BSD
#include "network.h"
#include "global_data.h"
+//
+// Does string end with a certain substring?
+//
+bool ends_with(string const &fullString, string const &ending)
+{
+ if (fullString.length() > ending.length()) {
+ return (fullString.compare(fullString.length() - ending.length(), ending.length(), ending) == 0);
+ } else {
+ return false;
+ }
+}
+
const float default_decay = 1.;
po::variables_map parse_args(int argc, char *argv[], boost::program_options::options_description& desc,
@@ -31,20 +43,21 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
("active_mellowness", po::value<float>(&global.active_c0)->default_value(8.f), "active learning mellowness parameter c_0. Default 8")
("adaptive", "use adaptive, individual learning rates.")
("audit,a", "print weights of features")
- ("bit_precision,b", po::value<size_t>(),
+ ("bit_precision,b", po::value<size_t>(),
"number of bits in the feature table")
("backprop", "turn on delayed backprop")
("cache,c", "Use a cache. The default is <data>.cache")
("cache_file", po::value< vector<string> >(), "The location(s) of cache_file.")
("compressed", "use gzip format whenever appropriate. If a cache file is being created, this option creates a compressed cache file. A mixture of raw-text & compressed inputs are supported if this option is on")
("conjugate_gradient", "use conjugate gradient based optimization")
- ("regularization", po::value<float>(&global.regularization)->default_value(0.), "minimize weight magnitude")
+ ("regularization", po::value<float>(&global.regularization)->default_value(0.001), "minimize weight magnitude")
("corrective", "turn on corrective updates")
("data,d", po::value< string >()->default_value(""), "Example Set")
("daemon", "read data from port 39523")
- ("decay_learning_rate", po::value<float>(&global.eta_decay_rate)->default_value(default_decay),
+ ("decay_learning_rate", po::value<float>(&global.eta_decay_rate)->default_value(default_decay),
"Set Decay factor for learning_rate between passes")
("final_regressor,f", po::value< string >(), "Final regressor")
+ ("readable_model", po::value< string >(), "Output human-readable final regressor")
("global_multiplier", po::value<float>(&global.global_multiplier)->default_value(1.0), "Global update multiplier")
("delayed_global", "Do delayed global updates")
("hash", po::value< string > (), "how to hash the features. Available options: strings, all")
@@ -53,6 +66,7 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
("ignore", po::value< vector<unsigned char> >(), "ignore namespaces beginning with character <arg>")
("initial_weight", po::value<float>(&global.initial_weight)->default_value(0.), "Set all weights to an initial value of 1.")
("initial_regressor,i", po::value< vector<string> >(), "Initial regressor(s)")
+ ("initial_pass_length", po::value<size_t>(&global.pass_length)->default_value((size_t)-1), "initial number of examples per pass")
("initial_t", po::value<float>(&(par->t))->default_value(1.), "initial t value")
("lda", po::value<size_t>(&global.lda), "Run lda with <int> topics")
("lda_alpha", po::value<float>(&global.lda_alpha)->default_value(0.1), "Prior on sparsity of per-document topic weights")
@@ -66,24 +80,22 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
("port", po::value<size_t>(),"port to listen on")
("power_t", po::value<float>(&vars.power_t)->default_value(0.5), "t power value")
("predictto", po::value< string > (), "host to send predictions to")
- ("learning_rate,l", po::value<float>(&global.eta)->default_value(10),
+ ("learning_rate,l", po::value<float>(&global.eta)->default_value(10),
"Set Learning Rate")
- ("passes", po::value<size_t>(&global.numpasses)->default_value(1),
+ ("passes", po::value<size_t>(&global.numpasses)->default_value(1),
"Number of Training Passes")
("predictions,p", po::value< string >(), "File to output predictions to")
("quadratic,q", po::value< vector<string> > (),
"Create and use quadratic features")
("quiet", "Don't output diagnostics")
("rank", po::value<size_t>(&global.rank)->default_value(0), "rank for matrix factorization.")
- ("weight_decay", po::value<float>(&global.weight_decay)->default_value(0.), "weight decay.")
- ("weight_decay_sparse", po::value<float>(&global.weight_decay_sparse)->default_value(0.), "weight decay for sparse regularization.")
("random_weights", po::value<bool>(&global.random_weights), "make initial weights random")
- ("raw_predictions,r", po::value< string >(),
+ ("raw_predictions,r", po::value< string >(),
"File to output unnormalized predictions to")
("sendto", po::value< vector<string> >(), "send example to <hosts>")
("testonly,t", "Ignore label information and just test")
("thread_bits", po::value<size_t>(&global.thread_bits)->default_value(0), "log_2 threads")
- ("loss_function", po::value<string>()->default_value("squared"), "Specify the loss function to be used, uses squared by default. Currently available ones are squared, hinge, logistic and quantile.")
+ ("loss_function", po::value<string>()->default_value("squared"), "Specify the loss function to be used, uses squared by default. Currently available ones are squared, classic, hinge, logistic and quantile.")
("quantile_tau", po::value<double>()->default_value(0.5), "Parameter \\tau associated with Quantile loss. Defaults to 0.5")
("unique_id", po::value<size_t>(&global.unique_id)->default_value(0),"unique id used for cluster parallel")
("sort_features", "turn this on to disregard order in which features have been defined. This will lead to smaller cache sizes")
@@ -115,18 +127,18 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
global.max_label = 1.;
global.lda =0;
global.random_weights = false;
-
+
global.adaptive = false;
global.audit = false;
global.active = false;
global.active_simulation =false;
global.reg = &r;
-
+
po::positional_options_description p;
// Be friendly: if -d was left out, treat positional param as data file
p.add("data", -1);
-
+
po::variables_map vm;
po::store(po::command_line_parser(argc, argv).
@@ -136,16 +148,16 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
global.weighted_unlabeled_examples = par->t;
global.initial_t = par->t;
global.partition_bits = global.thread_bits;
-
+
if (vm.count("help") || argc == 1) {
/* upon direct query for help -- spit it out to stdout */
cout << "\n" << desc << "\n";
exit(0);
}
-
- if (vm.count("active_simulation"))
+
+ if (vm.count("active_simulation"))
global.active_simulation = true;
-
+
if (vm.count("active_learning") && !global.active_simulation)
global.active = true;
@@ -169,11 +181,17 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
global.delayed_global = true;
cout << "enabling delayed_global updates" << endl;
}
-
+
if (vm.count("conjugate_gradient")) {
global.conjugate_gradient = true;
- global.stride = 8;
- cout << "enabling conjugate gradient based optimization" << endl;
+ global.stride = 4;
+ if (!global.quiet)
+ cerr << "enabling conjugate gradient based optimization" << endl;
+ if (global.numpasses < 2)
+ {
+ cout << "you must make at least 2 passes to use conjugate gradient" << endl;
+ exit(1);
+ }
}
if (vm.count("version") || argc == 1) {
@@ -182,15 +200,20 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
exit(0);
}
-
+
if(vm.count("ngram")){
global.ngram = vm["ngram"].as<size_t>();
if(!vm.count("skip_gram")) cout << "You have chosen to generate " << global.ngram << "-grams" << endl;
+ if(vm.count("sort_features"))
+ {
+ cout << "ngram is incompatible with sort_features. " << endl;
+ exit(1);
+ }
}
if(vm.count("skips"))
{
global.skips = vm["skips"].as<size_t>();
- if(!vm.count("ngram"))
+ if(!vm.count("ngram"))
{
cout << "You can not skip unless ngram is > 1" << endl;
exit(1);
@@ -207,11 +230,16 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
{
global.default_bits = false;
global.num_bits = vm["bit_precision"].as< size_t>();
+ if (global.num_bits > 29)
+ {
+ cout << "Only 29 or fewer bits allowed. If this is a serious limit, speak up." << endl;
+ exit(1);
+ }
}
- if(vm.count("compressed")){
+ string data_filename = vm["data"].as<string>();
+ if (vm.count("compressed") || ends_with(data_filename, ".gz"))
set_compressed(par);
- }
if(vm.count("sort_features"))
par->sort_features = true;
@@ -225,7 +253,7 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
else
global.quiet = false;
- if (vm.count("quadratic"))
+ if (vm.count("quadratic"))
{
global.pairs = vm["quadratic"].as< vector<string> >();
if (!global.quiet)
@@ -244,16 +272,25 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
}
}
+ for (size_t i = 0; i < 256; i++)
+ global.ignore[i] = false;
+ global.ignore_some = false;
+
if (vm.count("ignore"))
{
- global.ignore = vm["ignore"].as< vector<unsigned char> >();
+ vector<unsigned char> ignore = vm["ignore"].as< vector<unsigned char> >();
+ for (vector<unsigned char>::iterator i = ignore.begin(); i != ignore.end();i++)
+ {
+ global.ignore[*i] = true;
+ global.ignore_some = true;
+ }
if (!global.quiet)
{
cerr << "ignoring namespaces beginning with: ";
- for (vector<unsigned char>::iterator i = global.ignore.begin(); i != global.ignore.end();i++)
+ for (vector<unsigned char>::iterator i = ignore.begin(); i != ignore.end();i++)
cerr << *i << " ";
- cerr << endl;
+ cerr << endl;
}
}
@@ -261,7 +298,7 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
if (global.rank > 0) {
// store linear + 2*rank weights per index, round up to power of two
float temp = ceilf(logf((float)(global.rank*2+1)) / logf (2.f));
- global.stride = powf(2,temp);
+ global.stride = 1 << (int) temp;
global.random_weights = true;
}
@@ -269,7 +306,7 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
{
par->sort_features = true;
float temp = ceilf(logf((float)(global.lda*2+1)) / logf (2.f));
- global.stride = powf(2,temp);
+ global.stride = 1 << (int) temp;
global.random_weights = true;
}
@@ -281,7 +318,10 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
if (!vm.count("lda"))
global.eta *= pow(par->t, vars.power_t);
+ parse_source_args(vm,par,global.quiet,global.numpasses);
parse_regressor_args(vm, r, final_regressor_name, global.quiet);
+ if (vm.count("readable_model"))
+ global.text_regressor_name = vm["readable_model"].as<string>();
if (vm.count("active_c0"))
global.active_c0 = vm["active_c0"].as<float>();
@@ -294,7 +334,7 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
set_minmax = noop_mm;
string loss_function;
- if(vm.count("loss_function"))
+ if(vm.count("loss_function"))
loss_function = vm["loss_function"].as<string>();
else
loss_function = "squaredloss";
@@ -302,19 +342,25 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
double loss_parameter = 0.0;
if(vm.count("quantile_tau"))
loss_parameter = vm["quantile_tau"].as<double>();
+
+ if (global.rank != 0) {
+ loss_function = "classic";
+ cerr << "Forcing classic squared loss for matrix factorization" << endl;
+ }
+
r.loss = getLossFunction(loss_function, loss_parameter);
global.loss = r.loss;
// global.eta *= pow(par->t, vars.power_t);
-
+
if (global.eta_decay_rate != default_decay && global.numpasses == 1)
cerr << "Warning: decay_learning_rate has no effect when there is only one pass" << endl;
if (pow(global.eta_decay_rate, global.numpasses) < 0.0001 )
- cerr << "Warning: the learning rate for the last pass is multiplied by: " << pow(global.eta_decay_rate, global.numpasses)
+ cerr << "Warning: the learning rate for the last pass is multiplied by: " << pow(global.eta_decay_rate, global.numpasses)
<< " adjust to --decay_learning_rate larger to avoid this." << endl;
-
- parse_source_args(vm,par,global.quiet,global.numpasses);
+
+ //parse_source_args(vm,par,global.quiet,global.numpasses);
if (!global.quiet)
@@ -328,7 +374,7 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
if (global.rank > 0)
cerr << "rank = " << global.rank << endl;
}
-
+
if (vm.count("predictions")) {
if (!global.quiet)
cerr << "predictions = " << vm["predictions"].as< string >() << endl;
@@ -337,7 +383,7 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
int_pair pf = {1,0};
push(global.final_prediction_sink,pf);//stdout
}
- else
+ else
{
const char* fstr = (vm["predictions"].as< string >().c_str());
int_pair pf = {fileno(fopen(fstr,"w")),0};
@@ -346,7 +392,7 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
push(global.final_prediction_sink,pf);
}
}
-
+
if (vm.count("raw_predictions")) {
if (!global.quiet)
cerr << "raw predictions = " << vm["raw_predictions"].as< string >() << endl;
@@ -366,9 +412,8 @@ po::variables_map parse_args(int argc, char *argv[], boost::program_options::opt
if (!global.quiet)
cerr << "only testing" << endl;
global.training = false;
- global.weight_decay = 0;
}
- else
+ else
{
global.training = true;
if (!global.quiet)