38 files changed, 406 insertions, 442 deletions
diff --git a/vowpalwabbit/active.cc b/vowpalwabbit/active.cc
index a1070be3..23ccc2b1 100644
--- a/vowpalwabbit/active.cc
+++ b/vowpalwabbit/active.cc
@@ -153,16 +153,22 @@ namespace ACTIVE {
   
   base_learner* setup(vw& all, po::variables_map& vm)
   {//parse and set arguments
-    active& data = calloc_or_die<active>();
-
-    po::options_description active_opts("Active Learning options");
-    active_opts.add_options()
+    po::options_description opts("Active Learning options");
+    opts.add_options()
+      ("active", "enable active learning")
       ("simulation", "active learning simulation mode")
-      ("mellowness", po::value<float>(&(data.active_c0)), "active learning mellowness parameter c_0. Default 8")
-      ;
-    vm = add_options(all, active_opts);
+      ("mellowness", po::value<float>(), "active learning mellowness parameter c_0. Default 8");
+    vm = add_options(all, opts);
+    if(!vm.count("active"))
+      return NULL;
+    
+    active& data = calloc_or_die<active>();
+    data.active_c0 = 8;
     data.all=&all;
 
+    if (vm.count("mellowness"))
+      data.active_c0 = vm["mellowness"].as<float>();
+
     //Create new learner
     learner<active>* ret;
     if (vm.count("simulation"))
diff --git a/vowpalwabbit/autolink.cc b/vowpalwabbit/autolink.cc
index 7cdaecef..ba25b262 100644
--- a/vowpalwabbit/autolink.cc
+++ b/vowpalwabbit/autolink.cc
@@ -39,6 +39,13 @@ namespace ALINK {
 
   LEARNER::base_learner* setup(vw& all, po::variables_map& vm)
   {
+    po::options_description opts("Autolink options");
+    opts.add_options()
+      ("autolink", po::value<size_t>(), "create link function with polynomial d");
+    vm = add_options(all,opts);
+    if(!vm.count("autolink"))
+      return NULL;
+    
     autolink& data = calloc_or_die<autolink>();
     data.d = (uint32_t)vm["autolink"].as<size_t>();
     data.stride_shift = all.reg.stride_shift;
diff --git a/vowpalwabbit/bfgs.cc b/vowpalwabbit/bfgs.cc
index fc4ee851..50306dd6 100644
--- a/vowpalwabbit/bfgs.cc
+++ b/vowpalwabbit/bfgs.cc
@@ -63,6 +63,9 @@ namespace BFGS
 
   struct bfgs {
     vw* all;
+    int m;
+    float rel_threshold; // termination threshold
+
     double wolfe1_bound;
     
     size_t final_pass;
@@ -247,7 +250,7 @@ void bfgs_iter_start(vw& all, bfgs& b, float* mem, int& lastj, double importance
   
   origin = 0;
   for(uint32_t i = 0; i < length; i++, mem+=b.mem_stride, w+=stride) {
-    if (all.m>0)
+    if (b.m>0)
       mem[(MEM_XT+origin)%b.mem_stride] = w[W_XT]; 
     mem[(MEM_GT+origin)%b.mem_stride] = w[W_GT];
     g1_Hg1 += w[W_GT] * w[W_GT] * w[W_COND];
@@ -272,7 +275,7 @@ void bfgs_iter_middle(vw& all, bfgs& b, float* mem, double* rho, double* alpha,
   float* w0 = w;
 
   // implement conjugate gradient
-  if (all.m==0) {
+  if (b.m==0) {
     double g_Hy = 0.;
     double g_Hg = 0.;
     double y = 0.;
@@ -374,7 +377,7 @@ void bfgs_iter_middle(vw& all, bfgs& b, float* mem, double* rho, double* alpha,
 
   mem = mem0;
   w = w0;
-  lastj = (lastj<all.m-1) ? lastj+1 : all.m-1;
+  lastj = (lastj<b.m-1) ? lastj+1 : b.m-1;
   origin = (origin+b.mem_stride-2)%b.mem_stride;
   for(uint32_t i = 0; i < length; i++, mem+=b.mem_stride, w+=stride) {
     mem[(MEM_GT+origin)%b.mem_stride] = w[W_GT];
@@ -633,9 +636,9 @@ int process_pass(vw& all, bfgs& b) {
   /********************************************************************/ 
 		  else {
 		      double rel_decrease = (b.previous_loss_sum-b.loss_sum)/b.previous_loss_sum;
-		      if (!nanpattern((float)rel_decrease) && b.backstep_on && fabs(rel_decrease)<all.rel_threshold) {
+		      if (!nanpattern((float)rel_decrease) && b.backstep_on && fabs(rel_decrease)<b.rel_threshold) {
 			fprintf(stdout, "\nTermination condition reached in pass %ld: decrease in loss less than %.3f%%.\n"
-				"If you want to optimize further, decrease termination threshold.\n", (long int)b.current_pass+1, all.rel_threshold*100.0);
+				"If you want to optimize further, decrease termination threshold.\n", (long int)b.current_pass+1, b.rel_threshold*100.0);
 			status = LEARN_CONV;
 		      }
 		      b.previous_loss_sum = b.loss_sum;
@@ -913,7 +916,7 @@ void save_load(bfgs& b, io_buf& model_file, bool read, bool text)
 	      throw exception();
 	    }
 	}
-      int m = all->m;
+      int m = b.m;
       
       b.mem_stride = (m==0) ? CG_EXTRA : 2*m;
       b.mem = (float*) malloc(sizeof(float)*all->length()*(b.mem_stride));
@@ -967,8 +970,21 @@ void save_load(bfgs& b, io_buf& model_file, bool read, bool text)
 
 base_learner* setup(vw& all, po::variables_map& vm)
 {
+  po::options_description opts("LBFGS options");
+  opts.add_options()
+    ("bfgs", "use bfgs optimization")
+    ("conjugate_gradient", "use conjugate gradient based optimization")
+    ("hessian_on", "use second derivative in line search")
+    ("mem", po::value<uint32_t>()->default_value(15), "memory in bfgs")
+    ("termination", po::value<float>()->default_value(0.001f),"Termination threshold");
+  vm = add_options(all, opts);
+  if(!vm.count("bfgs") && !vm.count("conjugate_gradient"))
+    return NULL;
+  
   bfgs& b = calloc_or_die<bfgs>();
   b.all = &all;
+  b.m = vm["mem"].as<uint32_t>();
+  b.rel_threshold = vm["termination"].as<float>();
   b.wolfe1_bound = 0.01;
   b.first_hessian_on=true;
   b.first_pass = true;
@@ -979,16 +995,6 @@ base_learner* setup(vw& all, po::variables_map& vm)
   b.no_win_counter = 0;
   b.early_stop_thres = 3;
 
-  po::options_description bfgs_opts("LBFGS options");
-
-  bfgs_opts.add_options()
-    ("hessian_on", "use second derivative in line search")
-    ("mem", po::value<int>(&(all.m)), "memory in bfgs")
-    ("conjugate_gradient", "use conjugate gradient based optimization")
-    ("termination", po::value<float>(&(all.rel_threshold)),"Termination threshold");
-
-  vm = add_options(all, bfgs_opts);
-
   if(!all.holdout_set_off)
   {
     all.sd->holdout_best_loss = FLT_MAX;
@@ -996,11 +1002,11 @@ base_learner* setup(vw& all, po::variables_map& vm)
       b.early_stop_thres = vm["early_terminate"].as< size_t>();     
   }
   
-  if (vm.count("hessian_on") || all.m==0) {
+  if (vm.count("hessian_on") || b.m==0) {
     all.hessian_on = true;
   }
   if (!all.quiet) {
-    if (all.m>0)
+    if (b.m>0)
       cerr << "enabling BFGS based optimization ";
     else
       cerr << "enabling conjugate gradient optimization via BFGS ";
diff --git a/vowpalwabbit/binary.cc b/vowpalwabbit/binary.cc
index 04810e26..9951ddd3 100644
--- a/vowpalwabbit/binary.cc
+++ b/vowpalwabbit/binary.cc
@@ -28,8 +28,16 @@ namespace BINARY {
       }
   }
 
-  LEARNER::base_learner* setup(vw& all, po::variables_map& vm)
-  {
+LEARNER::base_learner* setup(vw& all, po::variables_map& vm)
+  {//parse and set arguments
+    po::options_description opts("Binary options");
+    opts.add_options()
+      ("binary", "report loss as binary classification on -1,1");
+    vm = add_options(all,opts);
+    if(!vm.count("binary"))
+      return NULL;
+
+    //Create new learner
     LEARNER::learner<char>& ret = 
       LEARNER::init_learner<char>(NULL, all.l, predict_or_learn<true>, predict_or_learn<false>);
     return make_base(ret);
diff --git a/vowpalwabbit/cb_algs.cc b/vowpalwabbit/cb_algs.cc
index bf9a6e5b..458e6b44 100644
--- a/vowpalwabbit/cb_algs.cc
+++ b/vowpalwabbit/cb_algs.cc
@@ -438,19 +438,19 @@ namespace CB_ALGS
 
   base_learner* setup(vw& all, po::variables_map& vm)
   {
+    po::options_description opts("CB options");
+    opts.add_options()
+      ("cb", po::value<size_t>(), "Use contextual bandit learning with <k> costs")
+      ("cb_type", po::value<string>(), "contextual bandit method to use in {ips,dm,dr}")
+      ("eval", "Evaluate a policy rather than optimizing.");
+    vm = add_options(all, opts);
+    if (!vm.count("cb"))
+      return NULL;
+
     cb& c = calloc_or_die<cb>();
     c.all = &all;
 
     uint32_t nb_actions = (uint32_t)vm["cb"].as<size_t>();
-    //append cb with nb_actions to file_options so it is saved to regressor later
-
-    po::options_description cb_opts("CB options");
-    cb_opts.add_options()
-      ("cb_type", po::value<string>(), "contextual bandit method to use in {ips,dm,dr}")
-      ("eval", "Evaluate a policy rather than optimizing.")
-      ;
-
-    vm = add_options(all, cb_opts);
 
     *all.file_options << " --cb " << nb_actions;
 
diff --git a/vowpalwabbit/cb_algs.h b/vowpalwabbit/cb_algs.h
index e989b5a1..7756fc6f 100644
--- a/vowpalwabbit/cb_algs.h
+++ b/vowpalwabbit/cb_algs.h
@@ -6,7 +6,6 @@ license as described in the file LICENSE.
 #pragma once
 //TODO: extend to handle CSOAA_LDF and WAP_LDF
 namespace CB_ALGS {
-
   LEARNER::base_learner* setup(vw& all, po::variables_map& vm);
 
   template <bool is_learn>
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index d8176228..9b2e3147 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -373,18 +373,20 @@ namespace CBIFY {
 
   base_learner* setup(vw& all, po::variables_map& vm)
   {//parse and set arguments
-    cbify& data = calloc_or_die<cbify>();
-
-    data.all = &all;
-    po::options_description cb_opts("CBIFY options");
-    cb_opts.add_options()
+    po::options_description opts("CBIFY options");
+    opts.add_options()
+      ("cbify", po::value<size_t>(), "Convert multiclass on <k> classes into a contextual bandit problem and solve")
       ("first", po::value<size_t>(), "tau-first exploration")
       ("epsilon",po::value<float>() ,"epsilon-greedy exploration")
       ("bag",po::value<size_t>() ,"bagging-based exploration")
       ("cover",po::value<size_t>() ,"bagging-based exploration");
-    
-    vm = add_options(all, cb_opts);
-    
+    vm = add_options(all, opts);
+    if (!vm.count("cbify"))
+      return NULL;
+
+    cbify& data = calloc_or_die<cbify>();
+
+    data.all = &all;
     data.k = (uint32_t)vm["cbify"].as<size_t>();
     *all.file_options << " --cbify " << data.k;
 
diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc
index 350d40a3..0928735a 100644
--- a/vowpalwabbit/csoaa.cc
+++ b/vowpalwabbit/csoaa.cc
@@ -13,9 +13,7 @@ license as described in the file LICENSE.
 #include "gd.h" // GD::foreach_feature() needed in subtract_example()
 
 using namespace std;
-
 using namespace LEARNER;
-
 using namespace COST_SENSITIVE;
 
 namespace CSOAA {
@@ -70,6 +68,12 @@ namespace CSOAA {
 
   base_learner* setup(vw& all, po::variables_map& vm)
   {
+    po::options_description opts("CSOAA options");
+    opts.add_options()
+      ("csoaa", po::value<size_t>(), "Use one-against-all multiclass learning with <k> costs");
+    vm = add_options(all, opts);
+    if(!vm.count("csoaa"))
+      return NULL;
     csoaa& c = calloc_or_die<csoaa>();
     c.all = &all;
     //first parse for number of actions
@@ -647,12 +651,14 @@ namespace LabelDict {
 
   base_learner* setup(vw& all, po::variables_map& vm)
   {
-    po::options_description ldf_opts("LDF Options");
-    ldf_opts.add_options()
-        ("ldf_override", po::value<string>(), "Override singleline or multiline from csoaa_ldf or wap_ldf, eg if stored in file")
-        ;
-
-    vm = add_options(all, ldf_opts);
+    po::options_description opts("LDF Options");
+    opts.add_options()
+      ("csoaa_ldf", po::value<string>(), "Use one-against-all multiclass learning with label dependent features.  Specify singleline or multiline.")
+      ("wap_ldf", po::value<string>(), "Use weighted all-pairs multiclass learning with label dependent features.  Specify singleline or multiline.")
+      ("ldf_override", po::value<string>(), "Override singleline or multiline from csoaa_ldf or wap_ldf, eg if stored in file");
+    vm = add_options(all, opts);
+    if(!vm.count("csoaa_ldf") && !vm.count("wap_ldf"))
+      return NULL;
     
     ldf& ld = calloc_or_die<ldf>();
 
diff --git a/vowpalwabbit/ect.cc b/vowpalwabbit/ect.cc
index dea87040..aba4cf46 100644
--- a/vowpalwabbit/ect.cc
+++ b/vowpalwabbit/ect.cc
@@ -364,13 +364,16 @@ namespace ECT
   
   base_learner* setup(vw& all, po::variables_map& vm)
   {
-    ect& data = calloc_or_die<ect>();
-    po::options_description ect_opts("ECT options");
-    ect_opts.add_options()
+    po::options_description opts("ECT options");
+    opts.add_options()
+      ("ect", po::value<size_t>(), "Use error correcting tournament with <k> labels")
       ("error", po::value<size_t>(), "error in ECT");
+    vm = add_options(all, opts);
+    if (!vm.count("ect")) 
+      return NULL;
+    
+    ect& data = calloc_or_die<ect>();
     
-    vm = add_options(all, ect_opts);
-
     //first parse for number of actions
     data.k = (int)vm["ect"].as<size_t>();
     
diff --git a/vowpalwabbit/gd.cc b/vowpalwabbit/gd.cc
index 851fbea7..ee6567c5 100644
--- a/vowpalwabbit/gd.cc
+++ b/vowpalwabbit/gd.cc
@@ -846,6 +846,14 @@ uint32_t ceil_log_2(uint32_t v)
 
 base_learner* setup(vw& all, po::variables_map& vm)
 {
+  po::options_description opts("Gradient Descent options");
+  opts.add_options()
+    ("sgd", "use regular stochastic gradient descent update.")
+    ("adaptive", "use adaptive, individual learning rates.")
+    ("invariant", "use safe/importance aware updates.")
+    ("normalized", "use per feature normalized updates")
+    ("exact_adaptive_norm", "use current default invariant normalized adaptive update rule");
+  vm = add_options(all, opts);
   gd& g = calloc_or_die<gd>();
   g.all = &all;
   g.all->normalized_sum_norm_x = 0;
diff --git a/vowpalwabbit/gd_mf.cc b/vowpalwabbit/gd_mf.cc
index b57328f4..1a9a6335 100644
--- a/vowpalwabbit/gd_mf.cc
+++ b/vowpalwabbit/gd_mf.cc
@@ -290,6 +290,15 @@ void mf_train(vw& all, example& ec)
 
   base_learner* setup(vw& all, po::variables_map& vm)
   {
+    po::options_description opts("Gdmf options");
+    opts.add_options()
+      ("rank", po::value<uint32_t>(), "rank for matrix factorization.");
+    vm = add_options(all, opts);
+    if(!vm.count("gdmf"))
+      return NULL;
+    else
+      all.rank = vm["gdmf"].as<uint32_t>();
+
     gdmf& data = calloc_or_die<gdmf>(); 
     data.all = &all;
 
diff --git a/vowpalwabbit/gd_mf.h b/vowpalwabbit/gd_mf.h
index a0ce2f87..db093750 100644
--- a/vowpalwabbit/gd_mf.h
+++ b/vowpalwabbit/gd_mf.h
@@ -4,12 +4,6 @@ individual contributors. All rights reserved.  Released under a BSD
 license as described in the file LICENSE.
  */
 #pragma once
-#include <math.h>
-#include "example.h"
-#include "parse_regressor.h"
-#include "parser.h"
-#include "gd.h"
-
 namespace GDMF{
   LEARNER::base_learner* setup(vw& all, po::variables_map& vm);
 }
diff --git a/vowpalwabbit/global_data.cc b/vowpalwabbit/global_data.cc
index 825977d5..b4e68aff 100644
--- a/vowpalwabbit/global_data.cc
+++ b/vowpalwabbit/global_data.cc
@@ -260,13 +260,7 @@ vw::vw()
   default_bits = true;
   daemon = false;
   num_children = 10;
-  lda_alpha = 0.1f;
-  lda_rho = 0.1f;
-  lda_D = 10000.;
-  lda_epsilon = 0.001f;
-  minibatch = 1;
   span_server = "";
-  m = 15;
   save_resume = false;
 
   random_positive_weights = false;
@@ -276,7 +270,6 @@ vw::vw()
   power_t = 0.5;
   eta = 0.5; //default learning rate for normalized adaptive updates, this is switched to 10 by default for the other updates (see parse_args.cc)
   numpasses = 1;
-  rel_threshold = 0.001f;
   rank = 0;
 
   final_prediction_sink.begin = final_prediction_sink.end=final_prediction_sink.end_array = NULL;
diff --git a/vowpalwabbit/global_data.h b/vowpalwabbit/global_data.h
index bdf76b46..664b4f51 100644
--- a/vowpalwabbit/global_data.h
+++ b/vowpalwabbit/global_data.h
@@ -193,7 +193,6 @@ struct vw {
 
   bool bfgs;
   bool hessian_on;
-  int m;
 
   bool save_resume;
   double normalized_sum_norm_x;
@@ -217,10 +216,6 @@ struct vw {
   float power_t;//the power on learning rate decay.
   int reg_mode;
 
-  size_t minibatch;
-
-  float rel_threshold; // termination threshold
-
   size_t pass_length;
   size_t numpasses;
   size_t passes_complete;
@@ -262,20 +257,16 @@ struct vw {
   size_t normalized_idx; //offset idx where the norm is stored (1 or 2 depending on whether adaptive is true)
 
   uint32_t lda;
-  float lda_alpha;
-  float lda_rho;
-  float lda_D;
-  float lda_epsilon;
 
   std::string text_regressor_name;
   std::string inv_hash_regressor_name;
-
   std::string span_server;
 
   size_t length () { return ((size_t)1) << num_bits; };
-
   uint32_t rank;
 
+  v_array<LEARNER::base_learner* (*)(vw& all, po::variables_map& vm)> reduction_stack;
+
   //Prediction output
   v_array<int> final_prediction_sink; // set to send global predictions to.
   int raw_prediction; // file descriptors for text output.
diff --git a/vowpalwabbit/kernel_svm.cc b/vowpalwabbit/kernel_svm.cc
index 9a88f17e..a8773d64 100644
--- a/vowpalwabbit/kernel_svm.cc
+++ b/vowpalwabbit/kernel_svm.cc
@@ -790,10 +790,10 @@ namespace KSVM
     cerr<<"Done with finish \n";
   }
 
-
   LEARNER::base_learner* setup(vw &all, po::variables_map& vm) {
-    po::options_description desc("KSVM options");
-    desc.add_options()
+    po::options_description opts("KSVM options");
+    opts.add_options()
+      ("ksvm", "kernel svm")
       ("reprocess", po::value<size_t>(), "number of reprocess steps for LASVM")
       ("active", "do active learning")
       ("active_c", po::value<double>(), "parameter for query prob")
@@ -805,8 +805,10 @@ namespace KSVM
       ("bandwidth", po::value<float>(), "bandwidth of rbf kernel")
       ("degree", po::value<int>(), "degree of poly kernel")
       ("lambda", po::value<double>(), "saving regularization for test time");
-    vm = add_options(all, desc);
 
+    vm = add_options(all, opts);
+    if (!vm.count("ksvm"))
+      return NULL;
     string loss_function = "hinge";
     float loss_parameter = 0.0;
     delete all.loss;
diff --git a/vowpalwabbit/kernel_svm.h b/vowpalwabbit/kernel_svm.h
index 7a65a051..563d70e2 100644
--- a/vowpalwabbit/kernel_svm.h
+++ b/vowpalwabbit/kernel_svm.h
@@ -5,6 +5,4 @@ license as described in the file LICENSE.
  */
 #pragma once
 namespace KSVM
-{
-LEARNER::base_learner* setup(vw &all, po::variables_map& vm);
-}
+{ LEARNER::base_learner* setup(vw &all, po::variables_map& vm); }
diff --git a/vowpalwabbit/lda_core.cc b/vowpalwabbit/lda_core.cc
index 8da81a4d..42cef9b5 100644
--- a/vowpalwabbit/lda_core.cc
+++ b/vowpalwabbit/lda_core.cc
@@ -33,6 +33,13 @@ public:
 };
 
   struct lda {
+    uint32_t lda;
+    float lda_alpha;
+    float lda_rho;
+    float lda_D;
+    float lda_epsilon;
+    size_t minibatch;
+
     v_array<float> Elogtheta;
     v_array<float> decay_levels;
     v_array<float> total_new;
@@ -407,31 +414,31 @@ float average_diff(vw& all, float* oldgamma, float* newgamma)
 }
 
 // Returns E_q[log p(\theta)] - E_q[log q(\theta)].
-  float theta_kl(vw& all, v_array<float>& Elogtheta, float* gamma)
+  float theta_kl(lda& l, v_array<float>& Elogtheta, float* gamma)
 {
   float gammasum = 0;
   Elogtheta.erase();
-  for (size_t k = 0; k < all.lda; k++) {
+  for (size_t k = 0; k < l.lda; k++) {
     Elogtheta.push_back(mydigamma(gamma[k]));
     gammasum += gamma[k];
   }
   float digammasum = mydigamma(gammasum);
   gammasum = mylgamma(gammasum);
-  float kl = -(all.lda*mylgamma(all.lda_alpha));
-  kl += mylgamma(all.lda_alpha*all.lda) - gammasum;
-  for (size_t k = 0; k < all.lda; k++) {
+  float kl = -(l.lda*mylgamma(l.lda_alpha));
+  kl += mylgamma(l.lda_alpha*l.lda) - gammasum;
+  for (size_t k = 0; k < l.lda; k++) {
     Elogtheta[k] -= digammasum;
-    kl += (all.lda_alpha - gamma[k]) * Elogtheta[k];
+    kl += (l.lda_alpha - gamma[k]) * Elogtheta[k];
     kl += mylgamma(gamma[k]);
   }
 
   return kl;
 }
 
-float find_cw(vw& all, float* u_for_w, float* v)
+float find_cw(lda& l, float* u_for_w, float* v)
 {
   float c_w = 0;
-  for (size_t k =0; k<all.lda; k++)
+  for (size_t k =0; k<l.lda; k++)
     c_w += u_for_w[k]*v[k];
 
   return 1.f / c_w;
@@ -444,12 +451,12 @@ float find_cw(vw& all, float* u_for_w, float* v)
 // setting of lambda based on the document passed in. The value is
 // divided by the total number of words in the document This can be
 // used as a (possibly very noisy) estimate of held-out likelihood.
-  float lda_loop(vw& all, v_array<float>& Elogtheta, float* v,weight* weights,example* ec, float power_t)
+  float lda_loop(lda& l, v_array<float>& Elogtheta, float* v,weight* weights,example* ec, float power_t)
 {
   new_gamma.erase();
   old_gamma.erase();
   
-  for (size_t i = 0; i < all.lda; i++)
+  for (size_t i = 0; i < l.lda; i++)
     {
       new_gamma.push_back(1.f);
       old_gamma.push_back(0.f);
@@ -463,11 +470,11 @@ float find_cw(vw& all, float* u_for_w, float* v)
   float doc_length = 0;
   do
     {
-      memcpy(v,new_gamma.begin,sizeof(float)*all.lda);
-      myexpdigammify(all, v);
+      memcpy(v,new_gamma.begin,sizeof(float)*l.lda);
+      myexpdigammify(*l.all, v);
 
-      memcpy(old_gamma.begin,new_gamma.begin,sizeof(float)*all.lda);
-      memset(new_gamma.begin,0,sizeof(float)*all.lda);
+      memcpy(old_gamma.begin,new_gamma.begin,sizeof(float)*l.lda);
+      memset(new_gamma.begin,0,sizeof(float)*l.lda);
 
       score = 0;
       size_t word_count = 0;
@@ -477,11 +484,11 @@ float find_cw(vw& all, float* u_for_w, float* v)
 	  feature *f = ec->atomics[*i].begin;
 	  for (; f != ec->atomics[*i].end; f++)
 	    {
-	      float* u_for_w = &weights[(f->weight_index&all.reg.weight_mask)+all.lda+1];
-	      float c_w = find_cw(all, u_for_w,v);
+	      float* u_for_w = &weights[(f->weight_index & l.all->reg.weight_mask)+l.lda+1];
+	      float c_w = find_cw(l, u_for_w,v);
 	      xc_w = c_w * f->x;
               score += -f->x*log(c_w);
-	      size_t max_k = all.lda;
+	      size_t max_k = l.lda;
 	      for (size_t k =0; k<max_k; k++) {
 		new_gamma[k] += xc_w*u_for_w[k];
 	      }
@@ -489,16 +496,16 @@ float find_cw(vw& all, float* u_for_w, float* v)
               doc_length += f->x;
 	    }
 	}
-      for (size_t k =0; k<all.lda; k++)
-	new_gamma[k] = new_gamma[k]*v[k]+all.lda_alpha;
+      for (size_t k =0; k<l.lda; k++)
+	new_gamma[k] = new_gamma[k]*v[k]+l.lda_alpha;
     }
-  while (average_diff(all, old_gamma.begin, new_gamma.begin) > all.lda_epsilon);
+  while (average_diff(*l.all, old_gamma.begin, new_gamma.begin) > l.lda_epsilon);
 
   ec->topic_predictions.erase();
-  ec->topic_predictions.resize(all.lda);
-  memcpy(ec->topic_predictions.begin,new_gamma.begin,all.lda*sizeof(float));
+  ec->topic_predictions.resize(l.lda);
+  memcpy(ec->topic_predictions.begin,new_gamma.begin,l.lda*sizeof(float));
 
-  score += theta_kl(all, Elogtheta, new_gamma.begin);
+  score += theta_kl(l, Elogtheta, new_gamma.begin);
 
   return score / doc_length;
 }
@@ -527,7 +534,7 @@ void save_load(lda& l, io_buf& model_file, bool read, bool text)
 	  for (size_t k = 0; k < all->lda; k++) {
 	    if (all->random_weights) {
 	      all->reg.weight_vector[j+k] = (float)(-log(frand48()) + 1.0f);
-	      all->reg.weight_vector[j+k] *= (float)(all->lda_D / all->lda / all->length() * 200);
+	      all->reg.weight_vector[j+k] *= (float)(l.lda_D / all->lda / all->length() * 200);
 	    }
 	  }
 	  all->reg.weight_vector[j+all->lda] = all->initial_t;
@@ -555,7 +562,7 @@ void save_load(lda& l, io_buf& model_file, bool read, bool text)
 		uint32_t ndx = stride*i+k;
 		
 		weight* v = &(all->reg.weight_vector[ndx]);
-		text_len = sprintf(buff, "%f ", *v + all->lda_rho);
+		text_len = sprintf(buff, "%f ", *v + l.lda_rho);
 		
 		brw += bin_text_read_write_fixed(model_file,(char *)v, sizeof (*v),
 						 "", read,
@@ -613,11 +620,11 @@ void save_load(lda& l, io_buf& model_file, bool read, bool text)
     
     eta = l.all->eta * powf((float)l.example_t, - l.all->power_t);
     minuseta = 1.0f - eta;
-    eta *= l.all->lda_D / batch_size;
+    eta *= l.lda_D / batch_size;
     l.decay_levels.push_back(l.decay_levels.last() + log(minuseta));
     
     l.digammas.erase();
-    float additional = (float)(l.all->length()) * l.all->lda_rho;
+    float additional = (float)(l.all->length()) * l.lda_rho;
     for (size_t i = 0; i<l.all->lda; i++) {
       l.digammas.push_back(mydigamma(l.total_lambda[i] + additional));
     }
@@ -639,14 +646,14 @@ void save_load(lda& l, io_buf& model_file, bool read, bool text)
 	for (size_t k = 0; k < l.all->lda; k++)
 	  {
 	    weights_for_w[k] *= decay;
-	    u_for_w[k] = weights_for_w[k] + l.all->lda_rho;
+	    u_for_w[k] = weights_for_w[k] + l.lda_rho;
 	  }
 	myexpdigammify_2(*l.all, u_for_w, l.digammas.begin);
       }
     
     for (size_t d = 0; d < batch_size; d++)
       {
-	float score = lda_loop(*l.all, l.Elogtheta, &(l.v[d*l.all->lda]), weights, l.examples[d],l.all->power_t);
+	float score = lda_loop(l, l.Elogtheta, &(l.v[d*l.all->lda]), weights, l.examples[d],l.all->power_t);
 	if (l.all->audit)
 	  GD::print_audit_features(*l.all, *l.examples[d]);
 	// If the doc is empty, give it loss of 0.
@@ -672,7 +679,7 @@ void save_load(lda& l, io_buf& model_file, bool read, bool text)
 	for (; s != next; s++) {
 	  float* v_s = &(l.v[s->document*l.all->lda]);
 	  float* u_for_w = &weights[(s->f.weight_index & l.all->reg.weight_mask) + l.all->lda + 1];
-	  float c_w = eta*find_cw(*l.all, u_for_w, v_s)*s->f.x;
+	  float c_w = eta*find_cw(l, u_for_w, v_s)*s->f.x;
 	  for (size_t k = 0; k < l.all->lda; k++) {
 	    float new_value = u_for_w[k]*v_s[k]*c_w;
 	    l.total_new[k] += new_value;
@@ -704,7 +711,7 @@ void save_load(lda& l, io_buf& model_file, bool read, bool text)
 	l.doc_lengths[num_ex] += (int)f->x;
       }
     }
-    if (++num_ex == l.all->minibatch)
+    if (++num_ex == l.minibatch)
       learn_batch(l);
   }
 
@@ -746,43 +753,55 @@ void end_examples(lda& l)
     ld.v.delete_v();
   }
 
+
 base_learner* setup(vw&all, po::variables_map& vm)
 {
-  lda& ld = calloc_or_die<lda>();
-  ld.sorted_features = vector<index_feature>();
-  ld.total_lambda_init = 0;
-  ld.all = &all;
-  ld.example_t = all.initial_t;
-
-  po::options_description lda_opts("LDA options");
-  lda_opts.add_options()
-    ("lda_alpha", po::value<float>(&all.lda_alpha), "Prior on sparsity of per-document topic weights")
-    ("lda_rho", po::value<float>(&all.lda_rho), "Prior on sparsity of topic distributions")
-    ("lda_D", po::value<float>(&all.lda_D), "Number of documents")
-    ("lda_epsilon", po::value<float>(&all.lda_epsilon), "Loop convergence threshold")
-    ("minibatch", po::value<size_t>(&all.minibatch), "Minibatch size, for LDA");
-
-  vm = add_options(all, lda_opts);
-
-  float temp = ceilf(logf((float)(all.lda*2+1)) / logf (2.f));
-  all.reg.stride_shift = (size_t)temp;
-  all.random_weights = true;
-  all.add_constant = false;
-
+    po::options_description opts("Lda options");
+    opts.add_options()
+      ("lda", po::value<uint32_t>(), "Run lda with <int> topics")
+      ("lda_alpha", po::value<float>()->default_value(0.1f), "Prior on sparsity of per-document topic weights")
+      ("lda_rho", po::value<float>()->default_value(0.1f), "Prior on sparsity of topic distributions")
+      ("lda_D", po::value<float>()->default_value(10000.), "Number of documents")
+      ("lda_epsilon", po::value<float>()->default_value(0.001f), "Loop convergence threshold")
+      ("minibatch", po::value<size_t>()->default_value(1), "Minibatch size, for LDA");
+    vm = add_options(all, opts);
+    if(!vm.count("lda"))
+      return NULL;
+    else
+      all.lda = vm["lda"].as<uint32_t>();
+    
+      lda& ld = calloc_or_die<lda>();
+    
+    ld.lda = all.lda;
+    ld.lda_alpha = vm["lda_alpha"].as<float>();
+    ld.lda_rho = vm["lda_rho"].as<float>();
+    ld.lda_D = vm["lda_D"].as<float>();
+    ld.lda_epsilon = vm["lda_epsilon"].as<float>();
+    ld.minibatch = vm["minibatch"].as<size_t>();
+    ld.sorted_features = vector<index_feature>();
+    ld.total_lambda_init = 0;
+    ld.all = &all;
+    ld.example_t = all.initial_t;
+    
+    float temp = ceilf(logf((float)(all.lda*2+1)) / logf (2.f));
+    all.reg.stride_shift = (size_t)temp;
+    all.random_weights = true;
+    all.add_constant = false;
+    
   *all.file_options << " --lda " << all.lda;
-
-  if (all.eta > 1.)
-    {
-      cerr << "your learning rate is too high, setting it to 1" << endl;
-      all.eta = min(all.eta,1.f);
+    
+    if (all.eta > 1.)
+      {
+	cerr << "your learning rate is too high, setting it to 1" << endl;
+	all.eta = min(all.eta,1.f);
+      }
+    
+    if (vm.count("minibatch")) {
+      size_t minibatch2 = next_pow2(ld.minibatch);
+      all.p->ring_size = all.p->ring_size > minibatch2 ? all.p->ring_size : minibatch2;
     }
-
-  if (vm.count("minibatch")) {
-    size_t minibatch2 = next_pow2(all.minibatch);
-    all.p->ring_size = all.p->ring_size > minibatch2 ? all.p->ring_size : minibatch2;
-  }
-  
-  ld.v.resize(all.lda*all.minibatch);
+    
+  ld.v.resize(all.lda*ld.minibatch);
   
   ld.decay_levels.push_back(0.f);
 
diff --git a/vowpalwabbit/log_multi.cc b/vowpalwabbit/log_multi.cc
index 226376bd..1b5eb1a5 100644
--- a/vowpalwabbit/log_multi.cc
+++ b/vowpalwabbit/log_multi.cc
@@ -81,8 +81,8 @@ namespace LOG_MULTI
     
     v_array<node> nodes;	
     
-    uint32_t max_predictors;
-    uint32_t predictors_used;
+    size_t max_predictors;
+    size_t predictors_used;
 
     bool progress;
     uint32_t swap_resist;
@@ -413,10 +413,10 @@ namespace LOG_MULTI
 	if (read)
 	  for (uint32_t j = 1; j < temp; j++)
 	    b.nodes.push_back(init_node());
-	text_len = sprintf(buff, "max_predictors = %d ",b.max_predictors);
+	text_len = sprintf(buff, "max_predictors = %ld ",b.max_predictors);
 	bin_text_read_write_fixed(model_file,(char*)&b.max_predictors, sizeof(b.max_predictors), "", read, buff, text_len, text);
 
-	text_len = sprintf(buff, "predictors_used = %d ",b.predictors_used);
+	text_len = sprintf(buff, "predictors_used = %ld ",b.predictors_used);
 	bin_text_read_write_fixed(model_file,(char*)&b.predictors_used, sizeof(b.predictors_used), "", read, buff, text_len, text);
 
 	text_len = sprintf(buff, "progress = %d ",b.progress);
@@ -500,16 +500,22 @@ namespace LOG_MULTI
   
   base_learner* setup(vw& all, po::variables_map& vm)	//learner setup
   {
-    log_multi& data = calloc_or_die<log_multi>();
-
-    po::options_description opts("TXM Online options");
+    po::options_description opts("Log Multi options");
     opts.add_options()
+      ("log_multi", po::value<size_t>(), "Use online tree for multiclass")
       ("no_progress", "disable progressive validation")
-      ("swap_resistance", po::value<uint32_t>(&(data.swap_resist))->default_value(4), "higher = more resistance to swap, default=4");
-    
+      ("swap_resistance", po::value<uint32_t>(), "higher = more resistance to swap, default=4");
     vm = add_options(all, opts);
-    
+    if(!vm.count("log_multi"))
+      return NULL;
+
+    log_multi& data = calloc_or_die<log_multi>();
     data.k = (uint32_t)vm["log_multi"].as<size_t>();
+    data.swap_resist = 4;
+
+    if (vm.count("swap_resistance"))
+      data.swap_resist = vm["swap_resistance"].as<uint32_t>();
+    
     *all.file_options << " --log_multi " << data.k;
     
     if (vm.count("no_progress"))
diff --git a/vowpalwabbit/lrq.cc b/vowpalwabbit/lrq.cc
index 268815d5..119a3a1f 100644
--- a/vowpalwabbit/lrq.cc
+++ b/vowpalwabbit/lrq.cc
@@ -189,6 +189,14 @@ namespace LRQ {
 
   base_learner* setup(vw& all, po::variables_map& vm)
   {//parse and set arguments
+    po::options_description opts("Lrq options");
+    opts.add_options()
+      ("lrq", po::value<vector<string> > (), "use low rank quadratic features")
+      ("lrqdropout", "use dropout training for low rank quadratic features");
+    vm = add_options(all, opts);
+    if(!vm.count("lrq"))
+      return NULL;
+
     LRQstate& lrq = calloc_or_die<LRQstate>();
     size_t maxk = 0;
     lrq.all = &all;
diff --git a/vowpalwabbit/mf.cc b/vowpalwabbit/mf.cc
index 4e00be8d..4db6d280 100644
--- a/vowpalwabbit/mf.cc
+++ b/vowpalwabbit/mf.cc
@@ -22,7 +22,7 @@ namespace MF {
 struct mf {
   vector<string> pairs;
 
-  uint32_t rank;
+  size_t rank;
 
   uint32_t increment;
 
@@ -188,22 +188,28 @@ void finish(mf& o) {
   o.sub_predictions.delete_v();
 }
 
-
-base_learner* setup(vw& all, po::variables_map& vm) {
-  mf* data = new mf;
-
-  // copy global data locally
-  data->all = &all;
-  data->rank = (uint32_t)vm["new_mf"].as<size_t>();
+  base_learner* setup(vw& all, po::variables_map& vm) {
+    po::options_description opts("MF options");
+    opts.add_options()
+      ("new_mf", po::value<size_t>(), "rank for reduction-based matrix factorization");
+    vm = add_options(all, opts);
+    if(!vm.count("new_mf"))
+      return NULL;
+    
+    mf& data = calloc_or_die<mf>();
+    
+    // copy global data locally
+    data.all = &all;
+    data.rank = (uint32_t)vm["new_mf"].as<size_t>();
 
   // store global pairs in local data structure and clear global pairs
   // for eventual calls to base learner
-  data->pairs = all.pairs;
+  data.pairs = all.pairs;
   all.pairs.clear();
 
   all.random_positive_weights = true;
 
-  learner<mf>& l = init_learner(data, all.l, learn, predict<false>, 2*data->rank+1);
+  learner<mf>& l = init_learner(&data, all.l, learn, predict<false>, 2*data.rank+1);
   l.set_finish(finish);
   return make_base(l);
 }
diff --git a/vowpalwabbit/mf.h b/vowpalwabbit/mf.h
index 99643601..90ddc33a 100644
--- a/vowpalwabbit/mf.h
+++ b/vowpalwabbit/mf.h
@@ -4,12 +4,6 @@ individual contributors. All rights reserved.  Released under a BSD
 license as described in the file LICENSE.
  */
 #pragma once
-#include <math.h>
-#include "example.h"
-#include "parse_regressor.h"
-#include "parser.h"
-#include "gd.h"
-
 namespace MF{
   LEARNER::base_learner* setup(vw& all, po::variables_map& vm);
 }
diff --git a/vowpalwabbit/nn.cc b/vowpalwabbit/nn.cc
index bfab6009..b59ca56b 100644
--- a/vowpalwabbit/nn.cc
+++ b/vowpalwabbit/nn.cc
@@ -310,17 +310,18 @@ CONVERSE: // That's right, I'm using goto.  So sue me.
 
   base_learner* setup(vw& all, po::variables_map& vm)
   {
-    nn& n = calloc_or_die<nn>();
-    n.all = &all;
-
-    po::options_description nn_opts("NN options");
-    nn_opts.add_options()
+    po::options_description opts("NN options");
+    opts.add_options()
+      ("nn", po::value<size_t>(), "Use sigmoidal feedforward network with <k> hidden units")
       ("inpass", "Train or test sigmoidal feedforward network with input passthrough.")
       ("dropout", "Train or test sigmoidal feedforward network using dropout.")
       ("meanfield", "Train or test sigmoidal feedforward network using mean field.");
-
-    vm = add_options(all, nn_opts);
-
+    vm = add_options(all, opts);
+    if(!vm.count("nn"))
+      return NULL;
+    
+    nn& n = calloc_or_die<nn>();
+    n.all = &all;
     //first parse for number of hidden units
     n.k = (uint32_t)vm["nn"].as<size_t>();
     *all.file_options << " --nn " << n.k;
diff --git a/vowpalwabbit/nn.h b/vowpalwabbit/nn.h
index 52e08f46..820157d7 100644
--- a/vowpalwabbit/nn.h
+++ b/vowpalwabbit/nn.h
@@ -4,9 +4,6 @@ individual contributors. All rights reserved.  Released under a BSD
 license as described in the file LICENSE.
  */
 #pragma once
-#include "global_data.h"
-#include "parse_args.h"
-
 namespace NN
 {
   LEARNER::base_learner* setup(vw& all, po::variables_map& vm);
diff --git a/vowpalwabbit/noop.cc b/vowpalwabbit/noop.cc
index 0c883a8c..065ff777 100644
--- a/vowpalwabbit/noop.cc
+++ b/vowpalwabbit/noop.cc
@@ -9,7 +9,15 @@ license as described in the file LICENSE.
 
 namespace NOOP {
   void learn(char&, LEARNER::base_learner&, example&) {}
-
-  LEARNER::base_learner* setup(vw& all)
-  { return &LEARNER::init_learner<char>(NULL, learn, 1); }
+  
+  LEARNER::base_learner* setup(vw& all, po::variables_map& vm)
+  {
+    po::options_description opts("Noop options");
+    opts.add_options()
+      ("noop","do no learning");
+    add_options(all, opts);
+    if(!vm.count("noop"))
+      return NULL;
+    
+    return &LEARNER::init_learner<char>(NULL, learn, 1); }
 }
diff --git a/vowpalwabbit/noop.h b/vowpalwabbit/noop.h
index 5220e1ee..ac8842e9 100644
--- a/vowpalwabbit/noop.h
+++ b/vowpalwabbit/noop.h
@@ -4,6 +4,5 @@ individual contributors. All rights reserved.  Released under a BSD
 license as described in the file LICENSE.
 */
 #pragma once
-namespace NOOP {
-  LEARNER::base_learner* setup(vw&);
-}
+namespace NOOP 
+{ LEARNER::base_learner* setup(vw& all, po::variables_map& vm);}
diff --git a/vowpalwabbit/oaa.cc b/vowpalwabbit/oaa.cc
index 2328b00d..03a7cac6 100644
--- a/vowpalwabbit/oaa.cc
+++ b/vowpalwabbit/oaa.cc
@@ -64,6 +64,13 @@ namespace OAA {
 
   LEARNER::base_learner* setup(vw& all, po::variables_map& vm)
   {
+    po::options_description opts("One-against-all options");
+    opts.add_options()
+      ("oaa", po::value<size_t>(), "Use one-against-all multiclass learning with <k> labels");
+    vm = add_options(all, opts);
+    if(!vm.count("oaa"))
+      return NULL;
+    
     oaa& data = calloc_or_die<oaa>();
     data.k = vm["oaa"].as<size_t>();
     data.shouldOutput = all.raw_prediction > 0;
diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc
index a760261f..715c9fca 100644
--- a/vowpalwabbit/parse_args.cc
+++ b/vowpalwabbit/parse_args.cc
@@ -17,6 +17,7 @@ license as described in the file LICENSE.
 #include "network.h"
 #include "global_data.h"
 #include "nn.h"
+#include "gd.h"
 #include "cbify.h"
 #include "oaa.h"
 #include "rand48.h"
@@ -547,9 +548,9 @@ void parse_feature_tweaks(vw& all, po::variables_map& vm)
 
 void parse_example_tweaks(vw& all, po::variables_map& vm)
 {
-  po::options_description example_opts("Example options");
+  po::options_description opts("Example options");
   
-  example_opts.add_options()
+  opts.add_options()
     ("testonly,t", "Ignore label information and just test")
     ("holdout_off", "no holdout data in multiple passes")
     ("holdout_period", po::value<uint32_t>(&(all.holdout_period)), "holdout period for test only, default 10")
@@ -566,7 +567,7 @@ void parse_example_tweaks(vw& all, po::variables_map& vm)
     ("l1", po::value<float>(&(all.l1_lambda)), "l_1 lambda")
     ("l2", po::value<float>(&(all.l2_lambda)), "l_2 lambda");
 
-  vm = add_options(all, example_opts);
+  vm = add_options(all, opts);
 
   if (vm.count("testonly") || all.eta == 0.)
     {
@@ -716,48 +717,10 @@ void parse_output_model(vw& all, po::variables_map& vm)
 
 void parse_base_algorithm(vw& all, po::variables_map& vm)
 {
-  //base learning algorithm.
-  po::options_description base_opt("base algorithms (these are exclusive)");
-  
-  base_opt.add_options()
-    ("sgd", "use regular stochastic gradient descent update.")
-    ("ftrl", "use ftrl-proximal optimization")
-    ("adaptive", "use adaptive, individual learning rates.")
-    ("invariant", "use safe/importance aware updates.")
-    ("normalized", "use per feature normalized updates")
-    ("exact_adaptive_norm", "use current default invariant normalized adaptive update rule")
-    ("bfgs", "use bfgs optimization")
-    ("lda", po::value<uint32_t>(&(all.lda)), "Run lda with <int> topics")
-    ("rank", po::value<uint32_t>(&(all.rank)), "rank for matrix factorization.")
-    ("noop","do no learning")
-    ("print","print examples")
-    ("ksvm", "kernel svm")
-    ("sendto", po::value< vector<string> >(), "send examples to <host>");
-
-  vm = add_options(all, base_opt);
-
-  if (vm.count("bfgs") || vm.count("conjugate_gradient"))
-    all.l = BFGS::setup(all, vm);
-  else if (vm.count("lda"))
-    all.l = LDA::setup(all, vm);
-  else if (vm.count("ftrl"))
+  //      all.l = GD::setup(all, vm);
+  all.scorer = all.l;
+  if (vm.count("ftrl"))
     all.l = FTRL::setup(all, vm);
-  else if (vm.count("noop"))
-    all.l = NOOP::setup(all);
-  else if (vm.count("print"))
-    all.l = PRINT::setup(all);
-  else if (all.rank > 0)
-    all.l = GDMF::setup(all, vm);
-  else if (vm.count("sendto"))
-    all.l = SENDER::setup(all, vm, all.pairs);
-  else if (vm.count("ksvm")) {
-    all.l = KSVM::setup(all, vm);
-  }
-  else
-    {
-      all.l = GD::setup(all, vm);
-      all.scorer = all.l;
-    }
 }
 
 void load_input_model(vw& all, po::variables_map& vm, io_buf& io_temp)
@@ -783,111 +746,54 @@ void load_input_model(vw& all, po::variables_map& vm, io_buf& io_temp)
   }
 }
 
-void parse_scorer_reductions(vw& all, po::variables_map& vm)
-{
-  po::options_description score_mod_opt("Score modifying options (can be combined)");
-
-  score_mod_opt.add_options()
-    ("nn", po::value<size_t>(), "Use sigmoidal feedforward network with <k> hidden units")
-    ("new_mf", po::value<size_t>(), "rank for reduction-based matrix factorization")
-    ("autolink", po::value<size_t>(), "create link function with polynomial d")
-    ("lrq", po::value<vector<string> > (), "use low rank quadratic features")
-    ("lrqdropout", "use dropout training for low rank quadratic features")
-    ("stage_poly", "use stagewise polynomial feature learning")
-    ("active", "enable active learning");
-
-  vm = add_options(all, score_mod_opt);
-
-  if (vm.count("active"))
-    all.l = ACTIVE::setup(all,vm);
-  
-  if(vm.count("nn"))
-    all.l = NN::setup(all, vm);
-  
-  if (vm.count("new_mf"))
-    all.l = MF::setup(all, vm);
-  
-  if(vm.count("autolink"))
-    all.l = ALINK::setup(all, vm);
-  
-  if (vm.count("lrq"))
-    all.l = LRQ::setup(all, vm);
-
-  if (vm.count("stage_poly"))
-    all.l = StagewisePoly::setup(all, vm);
-
-  all.l = Scorer::setup(all, vm);
-}
-
-LEARNER::base_learner* exclusive_setup(vw& all, po::variables_map& vm, bool& score_consumer, LEARNER::base_learner* (*setup)(vw&, po::variables_map&))
+LEARNER::base_learner* setup_base(vw& all, po::variables_map& vm)
 {
-  if (score_consumer) { cerr << "error: cannot specify multiple direct score consumers" << endl; throw exception(); }
-  score_consumer = true;
-  return setup(all, vm);
+  LEARNER::base_learner* ret = all.reduction_stack.pop()(all,vm);
+  if (ret == NULL)
+    return setup_next(all,vm);
+  else 
+    return ret;
 }
 
-void parse_score_users(vw& all, po::variables_map& vm, bool& got_cs)
+void parse_reductions(vw& all, po::variables_map& vm)
 {
-  po::options_description multiclass_opt("Score user options (these are exclusive)");
-  multiclass_opt.add_options()
-    ("top", po::value<size_t>(), "top k recommendation")
-    ("binary", "report loss as binary classification on -1,1")
-    ("oaa", po::value<size_t>(), "Use one-against-all multiclass learning with <k> labels")
-    ("ect", po::value<size_t>(), "Use error correcting tournament with <k> labels")
-    ("log_multi", po::value<size_t>(), "Use online tree for multiclass")
-    ("csoaa", po::value<size_t>(), "Use one-against-all multiclass learning with <k> costs")
-    ("csoaa_ldf", po::value<string>(), "Use one-against-all multiclass learning with label dependent features.  Specify singleline or multiline.")
-    ("wap_ldf", po::value<string>(), "Use weighted all-pairs multiclass learning with label dependent features.  Specify singleline or multiline.")
-    ;
-
-  vm = add_options(all, multiclass_opt);
-  bool score_consumer = false;
-  
-  if(vm.count("top"))
-    all.l = exclusive_setup(all, vm, score_consumer, TOPK::setup);
-  
-  if (vm.count("binary"))
-    all.l = exclusive_setup(all, vm, score_consumer, BINARY::setup);
-  
-  if (vm.count("oaa")) 
-    all.l = exclusive_setup(all, vm, score_consumer, OAA::setup);
-  
-  if (vm.count("ect")) 
-    all.l = exclusive_setup(all, vm, score_consumer, ECT::setup);
-  
-  if(vm.count("csoaa")) {
-    all.l = exclusive_setup(all, vm, score_consumer, CSOAA::setup);
-    all.cost_sensitive = all.l;
-    got_cs = true;
-  }
-  
-  if(vm.count("log_multi")){
-    all.l = exclusive_setup(all, vm, score_consumer, LOG_MULTI::setup);
-  }
-  
-  if(vm.count("csoaa_ldf") || vm.count("csoaa_ldf")) {
-    all.l = exclusive_setup(all, vm, score_consumer, CSOAA_AND_WAP_LDF::setup);
-    all.cost_sensitive = all.l;
-    got_cs = true;
-  }
-  
-  if(vm.count("wap_ldf") || vm.count("wap_ldf") ) {
-    all.l = exclusive_setup(all, vm, score_consumer, CSOAA_AND_WAP_LDF::setup);
-    all.cost_sensitive = all.l;
-    got_cs = true;
-  }
+  //Base algorithms
+  all.reduction_stack.push_back(GD::setup);
+  all.reduction_stack.push_back(KSVM::setup);
+  all.reduction_stack.push_back(FTRL::setup);
+  all.reduction_stack.push_back(SENDER::setup);
+  all.reduction_stack.push_back(GDMF::setup);
+  all.reduction_stack.push_back(PRINT::setup);
+  all.reduction_stack.push_back(NOOP::setup);
+  all.reduction_stack.push_back(LDA::setup);
+  all.reduction_stack.push_back(BFGS::setup);
+
+  //Score Users
+  all.reduction_stack.push_back(ACTIVE::setup);
+  all.reduction_stack.push_back(NN::setup);
+  all.reduction_stack.push_back(MF::setup);
+  all.reduction_stack.push_back(ALINK::setup);
+  all.reduction_stack.push_back(LRQ::setup);
+  all.reduction_stack.push_back(StagewisePoly::setup);
+  all.reduction_stack.push_back(Scorer::setup);
+
+  //Reductions
+  all.reduction_stack.push_back(BINARY::setup);
+  all.reduction_stack.push_back(TOPK::setup);
+  all.reduction_stack.push_back(OAA::setup);
+  all.reduction_stack.push_back(ECT::setup);
+  all.reduction_stack.push_back(LOG_MULTI::setup);
+  all.reduction_stack.push_back(CSOAA::setup);
+  all.reduction_stack.push_back(CSOAA_AND_WAP_LDF::setup);
+  all.reduction_stack.push_back(CB_ALGS::setup);
+  all.reduction_stack.push_back(CBIFY::setup);
+  all.reduction_stack.push_back(Search::setup);
+
+  all.l = setup_base(all,vm);
 }
 
 void parse_cb(vw& all, po::variables_map& vm, bool& got_cs, bool& got_cb)
 {
-  po::options_description cb_opts("Contextual Bandit options");
-    
-  cb_opts.add_options()
-    ("cb", po::value<size_t>(), "Use contextual bandit learning with <k> costs")
-    ("cbify", po::value<size_t>(), "Convert multiclass on <k> classes into a contextual bandit problem and solve");
-
-  vm = add_options(all,cb_opts);
-  
   if( vm.count("cb"))
     {
       if(!got_cs) {
@@ -895,11 +801,9 @@ void parse_cb(vw& all, po::variables_map& vm, bool& got_cs, bool& got_cb)
 	else vm.insert(pair<string,po::variable_value>(string("csoaa"),vm["cb"]));
 	
 	all.l = CSOAA::setup(all, vm);  // default to CSOAA unless wap is specified
-	all.cost_sensitive = all.l;
 	got_cs = true;
       }
       
-      all.l = CB_ALGS::setup(all, vm);
       got_cb = true;
     }
 
@@ -909,7 +813,6 @@ void parse_cb(vw& all, po::variables_map& vm, bool& got_cs, bool& got_cb)
 	vm.insert(pair<string,po::variable_value>(string("csoaa"),vm["cbify"]));
 	
 	all.l = CSOAA::setup(all, vm);  // default to CSOAA unless wap is specified
-	all.cost_sensitive = all.l;
 	got_cs = true;
       }
       
@@ -918,31 +821,7 @@ void parse_cb(vw& all, po::variables_map& vm, bool& got_cs, bool& got_cb)
 	all.l = CB_ALGS::setup(all, vm);
 	got_cb = true;
       }
-
-      all.l = CBIFY::setup(all, vm);
-    }
-}
-
-void parse_search(vw& all, po::variables_map& vm, bool& got_cs, bool& got_cb)
-{
-  po::options_description search_opts("Search");
-    
-  search_opts.add_options()
-      ("search",  po::value<size_t>(), "use search-based structured prediction, argument=maximum action id or 0 for LDF");
-
-  vm = add_options(all,search_opts);
-
-  if (vm.count("search")) {
-    if (!got_cs && !got_cb) {
-      if( vm.count("search") ) vm.insert(pair<string,po::variable_value>(string("csoaa"),vm["search"]));
-      else vm.insert(pair<string,po::variable_value>(string("csoaa"),vm["search"]));
-      
-      all.l = CSOAA::setup(all, vm);  // default to CSOAA unless others have been specified
-      all.cost_sensitive = all.l;
-      got_cs = true;
     }
-    all.l = Search::setup(all, vm);
-  }
 }
 
 void add_to_args(vw& all, int argc, char* argv[])
@@ -1056,19 +935,12 @@ vw* parse_args(int argc, char *argv[])
 
   parse_output_preds(*all, vm);
 
-  parse_scorer_reductions(*all, vm);
-
   bool got_cs = false;
   
-  parse_score_users(*all, vm, got_cs);
-
   bool got_cb = false;
   
   parse_cb(*all, vm, got_cs, got_cb);
 
-  parse_search(*all, vm, got_cs, got_cb);
-  
-
   if(vm.count("bootstrap"))
     all->l = BS::setup(*all, vm);
 
diff --git a/vowpalwabbit/parse_args.h b/vowpalwabbit/parse_args.h
index 9e16d5bc..23531050 100644
--- a/vowpalwabbit/parse_args.h
+++ b/vowpalwabbit/parse_args.h
@@ -7,3 +7,4 @@ license as described in the file LICENSE.
 #include "global_data.h"
 
 vw* parse_args(int argc, char *argv[]);
+LEARNER::base_learner* setup_next(vw& all, po::variables_map& vm);
diff --git a/vowpalwabbit/print.cc b/vowpalwabbit/print.cc
index d0dc2765..d8d263be 100644
--- a/vowpalwabbit/print.cc
+++ b/vowpalwabbit/print.cc
@@ -40,11 +40,19 @@ namespace PRINT
     GD::foreach_feature<vw, print_feature>(*(p.all), ec, *p.all);
     cout << endl;
   }
-  
-  LEARNER::base_learner* setup(vw& all)
+
+  LEARNER::base_learner* setup(vw& all, po::variables_map& vm)
   {
+    po::options_description opts("Print options");
+    opts.add_options()
+      ("print","print examples");
+    vm = add_options(all, opts);
+    if(!vm.count("print"))
+      return NULL;
+
     print& p = calloc_or_die<print>();
     p.all = &all;
+
     size_t length = ((size_t)1) << all.num_bits;
     all.reg.weight_mask = (length << all.reg.stride_shift) - 1;
     all.reg.stride_shift = 0;
diff --git a/vowpalwabbit/print.h b/vowpalwabbit/print.h
index b6a771ed..2c855eaa 100644
--- a/vowpalwabbit/print.h
+++ b/vowpalwabbit/print.h
@@ -4,6 +4,5 @@ individual contributors. All rights reserved.  Released under a BSD
 license as described in the file LICENSE.
  */
 #pragma once
-namespace PRINT {
-  LEARNER::base_learner* setup(vw& all);
-}
+namespace PRINT 
+{ LEARNER::base_learner* setup(vw& all, po::variables_map& vm);}
diff --git a/vowpalwabbit/scorer.cc b/vowpalwabbit/scorer.cc
index 50645ed8..4d396841 100644
--- a/vowpalwabbit/scorer.cc
+++ b/vowpalwabbit/scorer.cc
@@ -33,15 +33,13 @@ namespace Scorer {
 
   LEARNER::base_learner* setup(vw& all, po::variables_map& vm)
   {
-    scorer& s = calloc_or_die<scorer>();
-    s.all = &all;
-
-    po::options_description link_opts("Link options");
-
-    link_opts.add_options()
+    po::options_description opts("Link options");
+    opts.add_options()
       ("link", po::value<string>()->default_value("identity"), "Specify the link function: identity, logistic or glf1");
+    vm = add_options(all, opts);
 
-    vm = add_options(all, link_opts);
+    scorer& s = calloc_or_die<scorer>();
+    s.all = &all;
 
     LEARNER::learner<scorer>* l; 
 
diff --git a/vowpalwabbit/search.cc b/vowpalwabbit/search.cc
index fb14e65c..71f00e89 100644
--- a/vowpalwabbit/search.cc
+++ b/vowpalwabbit/search.cc
@@ -1765,37 +1765,36 @@ namespace Search {
   }
 
   base_learner* setup(vw&all, po::variables_map& vm) {
-    search& sch = calloc_or_die<search>();
-    sch.priv = new search_private();
-    search_initialize(&all, sch);
-    search_private& priv = *sch.priv;
-
-    po::options_description search_opts("Search Options");
-    search_opts.add_options()
-        ("search_task",              po::value<string>(), "the search task (use \"--search_task list\" to get a list of available tasks)")
-        ("search_interpolation",     po::value<string>(), "at what level should interpolation happen? [*data|policy]")
-        ("search_rollout",           po::value<string>(), "how should rollouts be executed?           [policy|oracle|*mix_per_state|mix_per_roll|none]")
-        ("search_rollin",            po::value<string>(), "how should past trajectories be generated? [policy|oracle|*mix_per_state|mix_per_roll]")
-
-        ("search_passes_per_policy", po::value<size_t>(), "number of passes per policy (only valid for search_interpolation=policy)     [def=1]")
-        ("search_beta",              po::value<float>(),  "interpolation rate for policies (only valid for search_interpolation=policy) [def=0.5]")
-
-        ("search_alpha",             po::value<float>(),  "annealed beta = 1-(1-alpha)^t (only valid for search_interpolation=data)     [def=1e-10]")
-
-        ("search_total_nb_policies", po::value<size_t>(), "if we are going to train the policies through multiple separate calls to vw, we need to specify this parameter and tell vw how many policies are eventually going to be trained")
-
-        ("search_trained_nb_policies", po::value<size_t>(), "the number of trained policies in a file")
-
-        ("search_allowed_transitions",po::value<string>(),"read file of allowed transitions [def: all transitions are allowed]")
-        ("search_subsample_time",    po::value<float>(),  "instead of training at all timesteps, use a subset. if value in (0,1), train on a random v%. if v>=1, train on precisely v steps per example")
-        ("search_neighbor_features", po::value<string>(), "copy features from neighboring lines. argument looks like: '-1:a,+2' meaning copy previous line namespace a and next next line from namespace _unnamed_, where ',' separates them")
-        ("search_rollout_num_steps", po::value<size_t>(), "how many calls of \"loss\" before we stop really predicting on rollouts and switch to oracle (def: 0 means \"infinite\")")
-        ("search_history_length",    po::value<size_t>(), "some tasks allow you to specify how much history their depend on; specify that here [def: 1]")
-
-        ("search_no_caching",                             "turn off the built-in caching ability (makes things slower, but technically more safe)")
-        ("search_beam",              po::value<size_t>(), "use beam search (arg = beam size, default 0 = no beam)")
-        ("search_kbest",             po::value<size_t>(), "size of k-best list to produce (must be <= beam size)")
-        ;
+    po::options_description opts("Search Options");
+    opts.add_options()
+      ("search",  po::value<size_t>(), "use search-based structured prediction, argument=maximum action id or 0 for LDF")
+      ("search_task",              po::value<string>(), "the search task (use \"--search_task list\" to get a list of available tasks)")
+      ("search_interpolation",     po::value<string>(), "at what level should interpolation happen? [*data|policy]")
+      ("search_rollout",           po::value<string>(), "how should rollouts be executed?           [policy|oracle|*mix_per_state|mix_per_roll|none]")
+      ("search_rollin",            po::value<string>(), "how should past trajectories be generated? [policy|oracle|*mix_per_state|mix_per_roll]")
+      
+      ("search_passes_per_policy", po::value<size_t>(), "number of passes per policy (only valid for search_interpolation=policy)     [def=1]")
+      ("search_beta",              po::value<float>(),  "interpolation rate for policies (only valid for search_interpolation=policy) [def=0.5]")
+      
+      ("search_alpha",             po::value<float>(),  "annealed beta = 1-(1-alpha)^t (only valid for search_interpolation=data)     [def=1e-10]")
+      
+      ("search_total_nb_policies", po::value<size_t>(), "if we are going to train the policies through multiple separate calls to vw, we need to specify this parameter and tell vw how many policies are eventually going to be trained")
+      
+      ("search_trained_nb_policies", po::value<size_t>(), "the number of trained policies in a file")
+      
+      ("search_allowed_transitions",po::value<string>(),"read file of allowed transitions [def: all transitions are allowed]")
+      ("search_subsample_time",    po::value<float>(),  "instead of training at all timesteps, use a subset. if value in (0,1), train on a random v%. if v>=1, train on precisely v steps per example")
+      ("search_neighbor_features", po::value<string>(), "copy features from neighboring lines. argument looks like: '-1:a,+2' meaning copy previous line namespace a and next next line from namespace _unnamed_, where ',' separates them")
+      ("search_rollout_num_steps", po::value<size_t>(), "how many calls of \"loss\" before we stop really predicting on rollouts and switch to oracle (def: 0 means \"infinite\")")
+      ("search_history_length",    po::value<size_t>(), "some tasks allow you to specify how much history their depend on; specify that here [def: 1]")
+      
+      ("search_no_caching",                             "turn off the built-in caching ability (makes things slower, but technically more safe)")
+      ("search_beam",              po::value<size_t>(), "use beam search (arg = beam size, default 0 = no beam)")
+      ("search_kbest",             po::value<size_t>(), "size of k-best list to produce (must be <= beam size)")
+      ;
+    vm = add_options(all, opts);
+    if (!vm.count("search"))
+      return NULL;
 
     bool has_hook_task = false;
     for (size_t i=0; i<all.args.size()-1; i++)
@@ -1805,9 +1804,12 @@ namespace Search {
       for (int i = (int)all.args.size()-2; i >= 0; i--)
         if (all.args[i] == "--search_task" && all.args[i+1] != "hook")
           all.args.erase(all.args.begin() + i, all.args.begin() + i + 2);
+    
+    search& sch = calloc_or_die<search>();
+    sch.priv = new search_private();
+    search_initialize(&all, sch);
+    search_private& priv = *sch.priv;
 
-    vm = add_options(all, search_opts);
- 
     std::string task_string;
     std::string interpolation_string = "data";
     std::string rollout_string = "mix_per_state";
@@ -1981,6 +1983,10 @@ namespace Search {
 
     priv.start_clock_time = clock();
 
+    if (!vm.count("csoaa") && !vm.count("csoaa_ldf") && !vm.count("wap_ldf") && !vm.count("cb"))
+      vm.insert(pair<string,po::variable_value>(string("csoaa"),vm["search"]));
+    base_learner* base = setup_base(all,vm);
+    
     learner<search>& l = init_learner(&sch, all.l, search_predict_or_learn<true>, 
 				      search_predict_or_learn<false>, 
 				      priv.total_number_of_policies);
diff --git a/vowpalwabbit/search.h b/vowpalwabbit/search.h
index e129de25..08633c5e 100644
--- a/vowpalwabbit/search.h
+++ b/vowpalwabbit/search.h
@@ -242,7 +242,4 @@ namespace Search {
   
   // our interface within VW
   LEARNER::base_learner* setup(vw&, po::variables_map&);
-  void search_finish(void*);
-  void search_drive(void*);
-  void search_learn(void*,example*);  
 }
diff --git a/vowpalwabbit/sender.cc b/vowpalwabbit/sender.cc
index a9ded7e4..b943caec 100644
--- a/vowpalwabbit/sender.cc
+++ b/vowpalwabbit/sender.cc
@@ -96,8 +96,15 @@ void end_examples(sender& s)
     delete s.buf;
   }
 
-  LEARNER::base_learner* setup(vw& all, po::variables_map& vm, vector<string> pairs)
-{
+  LEARNER::base_learner* setup(vw& all, po::variables_map& vm)
+  {
+  po::options_description opts("Sender options");
+  opts.add_options()
+    ("sendto", po::value< vector<string> >(), "send examples to <host>");
+  vm = add_options(all, opts);
+  if(!vm.count("sendto"))
+    return NULL;
+  
   sender& s = calloc_or_die<sender>();
   s.sd = -1;
   if (vm.count("sendto"))
diff --git a/vowpalwabbit/sender.h b/vowpalwabbit/sender.h
index 9740f159..55f10754 100644
--- a/vowpalwabbit/sender.h
+++ b/vowpalwabbit/sender.h
@@ -4,6 +4,5 @@ individual contributors. All rights reserved.  Released under a BSD
 license as described in the file LICENSE.
  */
 #pragma once
-namespace SENDER{
-  LEARNER::base_learner* setup(vw& all, po::variables_map& vm, vector<string> pairs);
-}
+namespace SENDER
+{ LEARNER::base_learner* setup(vw& all, po::variables_map& vm); }
diff --git a/vowpalwabbit/stagewise_poly.cc b/vowpalwabbit/stagewise_poly.cc
index b2e7e150..c435f45b 100644
--- a/vowpalwabbit/stagewise_poly.cc
+++ b/vowpalwabbit/stagewise_poly.cc
@@ -656,17 +656,11 @@ namespace StagewisePoly
     //#endif //DEBUG
   }
 
-
   base_learner *setup(vw &all, po::variables_map &vm)
   {
-    stagewise_poly& poly = calloc_or_die<stagewise_poly>();
-    poly.all = &all;
-
-    depthsbits_create(poly);
-    sort_data_create(poly);
-
-    po::options_description sp_opt("Stagewise poly options");
-    sp_opt.add_options()
+    po::options_description opts("Stagewise poly options");
+    opts.add_options()
+      ("stage_poly", "use stagewise polynomial feature learning")
       ("sched_exponent", po::value<float>(), "exponent controlling quantity of included features")
       ("batch_sz", po::value<uint32_t>(), "multiplier on batch size before including more features")
       ("batch_sz_no_doubling", "batch_sz does not double")
@@ -674,7 +668,14 @@ namespace StagewisePoly
       ("magic_argument", po::value<float>(), "magical feature flag")
 #endif //MAGIC_ARGUMENT
       ;
-    vm = add_options(all, sp_opt);
+    vm = add_options(all, opts);
+    if (vm.count("stage_poly"))
+      return NULL;
+    
+    stagewise_poly& poly = calloc_or_die<stagewise_poly>();
+    poly.all = &all;
+    depthsbits_create(poly);
+    sort_data_create(poly);
 
     poly.sched_exponent = vm.count("sched_exponent") ? vm["sched_exponent"].as<float>() : 1.f;
     poly.batch_sz = vm.count("batch_sz") ? vm["batch_sz"].as<uint32_t>() : 1000;
diff --git a/vowpalwabbit/topk.cc b/vowpalwabbit/topk.cc
index 445bdb23..d6ab27c5 100644
--- a/vowpalwabbit/topk.cc
+++ b/vowpalwabbit/topk.cc
@@ -104,6 +104,13 @@ namespace TOPK {
 
   LEARNER::base_learner* setup(vw& all, po::variables_map& vm)
   {
+    po::options_description opts("TOP K options");
+    opts.add_options()
+      ("top", po::value<size_t>(), "top k recommendation");
+    vm = add_options(all,opts);
+    if(!vm.count("top"))
+      return NULL;
+
     topk& data = calloc_or_die<topk>();
     data.B = (uint32_t)vm["top"].as<size_t>();
     data.all = &all;
diff --git a/vowpalwabbit/topk.h b/vowpalwabbit/topk.h
index 866d94c5..964ff618 100644
--- a/vowpalwabbit/topk.h
+++ b/vowpalwabbit/topk.h
@@ -4,14 +4,6 @@ individual contributors. All rights reserved.  Released under a BSD
 license as described in the file LICENSE.
  */
 #pragma once
-#include "io_buf.h"
-#include "parse_primitives.h"
-#include "global_data.h"
-#include "example.h"
-#include "parse_args.h"
-#include "v_hashmap.h"
-#include "simple_label.h"
-
 namespace TOPK
 {
   LEARNER::base_learner* setup(vw& all, po::variables_map& vm);