pulled john's changes

author: Hal Daume III <me@hal3.name> 2013-11-05 07:17:49 +0400
committer: Hal Daume III <me@hal3.name> 2013-11-05 07:17:49 +0400
commit: 9186c1d97a9280f4fbf7d2779006b48f44978df6 (patch)
tree: e3ec52734bc9a4a03b7274f7bd567ef736a89930
parent: 6acaab7c89673ac56b20e9dfa456c8c564cf4ad4 (diff)
parent: 5fbefcc27257b3386fd93b3983aa799c60704f47 (diff)
21 files changed, 156 insertions, 249 deletions
diff --git a/vowpalwabbit/bfgs.cc b/vowpalwabbit/bfgs.cc
index 6c090963..65f3927b 100644
--- a/vowpalwabbit/bfgs.cc
+++ b/vowpalwabbit/bfgs.cc
@@ -974,7 +974,7 @@ learner* setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po
   all.bfgs = true;
   all.reg.stride = 4;
 
-  learner* l = new learner(b,learn);
+  learner* l = new learner(b,learn, save_load, all.reg.stride);
   l->set_save_load(save_load);
   l->set_init_driver(init_driver);
   l->set_end_pass(end_pass);
diff --git a/vowpalwabbit/bs.cc b/vowpalwabbit/bs.cc
index 568e1281..f683d3a3 100644
--- a/vowpalwabbit/bs.cc
+++ b/vowpalwabbit/bs.cc
@@ -23,8 +23,6 @@ namespace BS {
   struct bs{
     uint32_t B; //number of bootstrap rounds
     size_t bs_type;
-    uint32_t increment;
-    uint32_t total_increment;
     float lb;
     float ub;
     vector<double> pred_vec;
@@ -181,12 +179,9 @@ namespace BS {
 
     for (size_t i = 1; i <= d->B; i++)
       {
-        if (i != 1)
-          update_example_indicies(ec, d->increment);
-          
         ((label_data*)ec->ld)->weight = weight_temp * weight_gen();
 
-        base.learn(ec);
+        base.learn(ec, i-1);
 
         d->pred_vec.push_back(ec->final_prediction);
 
@@ -198,8 +193,6 @@ namespace BS {
 
     ((label_data*)ec->ld)->weight = weight_temp;
 
-    update_example_indicies(ec, -d->total_increment);
-
     switch(d->bs_type)
     {
       case BS_TYPE_MEAN:
@@ -296,11 +289,8 @@ namespace BS {
 
     data->pred_vec.reserve(data->B);
     data->all = &all;
-    data->increment = all.reg.stride * all.weights_per_problem;
-    all.weights_per_problem *= data->B;
-    data->total_increment = data->increment*(data->B-1);
 
-    learner* l = new learner(data, learn, all.l);
+    learner* l = new learner(data, learn, all.l, data->B);
     l->set_finish_example(finish_example);
 
     return l;
diff --git a/vowpalwabbit/cb.cc b/vowpalwabbit/cb.cc
index 17d3c983..482edaf3 100644
--- a/vowpalwabbit/cb.cc
+++ b/vowpalwabbit/cb.cc
@@ -17,7 +17,6 @@ license as described in the file LICENSE.
 namespace CB
 {
   struct cb {
-    uint32_t increment;
     size_t cb_type;
     CSOAA::label cb_cs_ld; 
     float avg_loss_regressors;
@@ -280,18 +279,14 @@ namespace CB
 
   void call_scorer(vw& all, cb& c, example* ec, uint32_t index)
   {
-    uint32_t desired_increment = c.increment * (2*index-1);
-   
     float old_min = all.sd->min_label;
     //all.sd->min_label = c.min_cost;
     float old_max = all.sd->max_label;
     //all.sd->max_label = c.max_cost;
-    update_example_indicies(ec, desired_increment);
-    all.scorer->learn(ec);
+    all.scorer->learn(ec, 2*(index)-1);
     all.sd->min_label = old_min;
     all.sd->max_label = old_max;
-    update_example_indicies(ec, -desired_increment);
-  }
+   }
   
   float get_cost_pred(vw& all, cb& c, example* ec, uint32_t index)
   {
@@ -337,7 +332,7 @@ namespace CB
         wc.wap_value = 0.;
       
         //get cost prediction for this action
-        wc.x = get_cost_pred(all, c,ec,i);
+        wc.x = get_cost_pred(all, c, ec, i-1);
 	if (wc.x < min)
 	  {
 	    min = wc.x;
@@ -366,7 +361,7 @@ namespace CB
         wc.wap_value = 0.;
       
         //get cost prediction for this action
-        wc.x = get_cost_pred(all, c,ec,cl->weight_index);
+        wc.x = get_cost_pred(all, c, ec, cl->weight_index - 1);
 	if (wc.x < min || (wc.x == min && cl->weight_index < argmin))
 	  {
 	    min = wc.x;
@@ -405,7 +400,7 @@ namespace CB
         wc.wap_value = 0.;
 
         //get cost prediction for this label
-        wc.x = get_cost_pred(all, c,ec,i);
+        wc.x = get_cost_pred(all, c,ec, all.sd->k + i - 1);
         wc.weight_index = i;
         wc.partial_prediction = 0.;
         wc.wap_value = 0.;
@@ -430,7 +425,7 @@ namespace CB
         wc.wap_value = 0.;
 
         //get cost prediction for this label
-        wc.x = get_cost_pred(all,c,ec,cl->weight_index);
+        wc.x = get_cost_pred(all, c, ec, all.sd->k + cl->weight_index - 1);
         wc.weight_index = cl->weight_index;
         wc.partial_prediction = 0.;
         wc.wap_value = 0.;
@@ -689,7 +684,9 @@ namespace CB
       ss << " --cb " << nb_actions;
       all.options_from_file.append(ss.str());
     }
+    all.sd->k = nb_actions;
 
+    size_t problem_multiplier = 2;//default for DR
     if (vm.count("cb_type") || vm_file.count("cb_type"))
     {
       std::string type_string;
@@ -706,41 +703,38 @@ namespace CB
         all.options_from_file.append(type_string);
       }
 
-      c->increment = all.weights_per_problem * all.reg.stride;
-
-      if (type_string.compare("dr") == 0) { 
+      if (type_string.compare("dr") == 0) 
         c->cb_type = CB_TYPE_DR;
-        all.weights_per_problem *= nb_actions * 2;
-      }
-      else if (type_string.compare("dm") == 0) {
-        c->cb_type = CB_TYPE_DM;
-        all.weights_per_problem *= nb_actions * 2;
-      }
-      else if (type_string.compare("ips") == 0) {
-        c->cb_type = CB_TYPE_IPS;
-        all.weights_per_problem *= nb_actions;
-      }
+      else if (type_string.compare("dm") == 0)
+	{
+	  c->cb_type = CB_TYPE_DM;
+	  problem_multiplier = 1;
+	}
+      else if (type_string.compare("ips") == 0)
+	{
+	  c->cb_type = CB_TYPE_IPS;
+	  problem_multiplier = 1;
+	}
       else {
         std::cerr << "warning: cb_type must be in {'ips','dm','dr'}; resetting to dr." << std::endl;
         c->cb_type = CB_TYPE_DR;
-        all.weights_per_problem *= nb_actions * 2;
       }
     }
     else {
       //by default use doubly robust
       c->cb_type = CB_TYPE_DR;
-      all.weights_per_problem *= nb_actions * 2;
       all.options_from_file.append(" --cb_type dr");
     }
 
     *(all.p->lp) = CB::cb_label_parser; 
 
-    all.sd->k = nb_actions;
-
-    learner* l = new learner(c, learn, all.l);
+    learner* l = new learner(c, learn, all.l, problem_multiplier);
     l->set_finish_example(finish_example); 
     l->set_init_driver(init_driver);
     l->set_finish(finish);
+    // preserve the increment of the base learner since we are
+    // _adding_ to the number of problems rather than multiplying.
+    l->increment = all.l->increment; 
 
     return l;
   }
diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc
index aa9e5757..e3b42341 100644
--- a/vowpalwabbit/csoaa.cc
+++ b/vowpalwabbit/csoaa.cc
@@ -19,7 +19,6 @@ using namespace std;
 
 namespace CSOAA {
   struct csoaa{
-    uint32_t csoaa_increment;
     vw* all;
   };
 
@@ -320,7 +319,6 @@ namespace CSOAA {
 
     size_t prediction = 1;
     float score = FLT_MAX;
-    uint32_t current_increment = 0;
     for (wclass *cl = ld->costs.begin; cl != ld->costs.end; cl ++)
       {
         uint32_t i = cl->weight_index;
@@ -340,14 +338,7 @@ namespace CSOAA {
 
 	ec->ld = &simple_temp;
 
-        uint32_t desired_increment = c->csoaa_increment * (i-1);
-
-        if (desired_increment != current_increment) {
-	  update_example_indicies(ec, desired_increment - current_increment);
-          current_increment = desired_increment;
-        }
-
-	base.learn(ec);
+	base.learn(ec, i);
         cl->partial_prediction = ec->partial_prediction;
 	if (ec->partial_prediction < score || (ec->partial_prediction == score && i < prediction)) {
           score = ec->partial_prediction;
@@ -357,8 +348,6 @@ namespace CSOAA {
       }
     ec->ld = ld;
     ec->final_prediction = (float)prediction;
-    if (current_increment != 0)
-      update_example_indicies(ec, -current_increment);
   }
 
   void finish_example(vw& all, void*, example* ec)
@@ -388,11 +377,9 @@ namespace CSOAA {
     }
 
     *(all.p->lp) = cs_label_parser;
-    c->csoaa_increment = all.weights_per_problem * all.reg.stride;
-    all.weights_per_problem *= nb_actions;
     all.sd->k = nb_actions;
 
-    learner* l = new learner(c, learn, all.l);
+    learner* l = new learner(c, learn, all.l, nb_actions);
     l->set_finish_example(finish_example);
     return l;
   }
diff --git a/vowpalwabbit/ect.cc b/vowpalwabbit/ect.cc
index c13bf89f..83b89492 100644
--- a/vowpalwabbit/ect.cc
+++ b/vowpalwabbit/ect.cc
@@ -50,8 +50,6 @@ namespace ECT
     
     uint32_t last_pair;
     
-    uint32_t increment;
-    
     v_array<bool> tournaments_won;
 
     vw* all;
@@ -96,10 +94,10 @@ namespace ECT
     cout << endl;
   }
 
-  void create_circuit(vw& all, ect& e, uint32_t max_label, uint32_t eliminations)
+  size_t create_circuit(vw& all, ect& e, uint32_t max_label, uint32_t eliminations)
   {
     if (max_label == 1)
-      return;
+      return 0;
 
     v_array<v_array<uint32_t > > tournaments;
 
@@ -183,11 +181,8 @@ namespace ECT
     
     if ( max_label > 1)
       e.tree_height = final_depth(eliminations);
-    
-    if (e.last_pair > 0) {
-      all.weights_per_problem *= (e.last_pair + (eliminations-1));
-      e.increment = (uint32_t) all.length() / all.weights_per_problem * all.reg.stride;
-    }
+
+    return e.last_pair + (eliminations-1);
   }
 
   float ect_predict(vw& all, ect& e, learner& base, example* ec)
@@ -205,17 +200,10 @@ namespace ECT
       {
         if ((finals_winner | (((size_t)1) << i)) <= e.errors)
           {// a real choice exists
-            uint32_t offset = 0;
-	  
             uint32_t problem_number = e.last_pair + (finals_winner | (((uint32_t)1) << i)) - 1; //This is unique.
-	    offset = problem_number*e.increment;
-	  
-            update_example_indicies(ec,offset);
 	  
-            base.learn(ec);
+            base.learn(ec, problem_number);
 	  
-            update_example_indicies(ec,-offset);
-	    
 	    float pred = ec->final_prediction;
 	    if (pred > 0.)
               finals_winner = finals_winner | (((size_t)1) << i);
@@ -225,15 +213,9 @@ namespace ECT
     uint32_t id = e.final_nodes[finals_winner];
     while (id >= e.k)
       {
-	uint32_t offset = (id-e.k)*e.increment;
-	
-	ec->partial_prediction = 0;
-	update_example_indicies(ec,offset);
-	base.learn(ec);
-	float pred = ec->final_prediction;
-	update_example_indicies(ec,-offset);
+	base.learn(ec, id - e.k);
 
-	if (pred > 0.)
+	if (ec->final_prediction > 0.)
 	  id = e.directions[id].right;
 	else
 	  id = e.directions[id].left;
@@ -271,17 +253,10 @@ namespace ECT
 	simple_temp.weight = mc->weight;
 	ec->ld = &simple_temp;
 	
-	uint32_t offset = (id-e.k)*e.increment;
-	
-	update_example_indicies(ec,offset);
-	
-	ec->partial_prediction = 0;
-	base.learn(ec);
+	base.learn(ec, id-e.k);
 	simple_temp.weight = 0.;
-	ec->partial_prediction = 0;
-	base.learn(ec);//inefficient, we should extract final prediction exactly.
+	base.learn(ec, id-e.k);//inefficient, we should extract final prediction exactly.
 	float pred = ec->final_prediction;
-	update_example_indicies(ec,-offset);
 
 	bool won = pred*simple_temp.label > 0;
 
@@ -333,14 +308,7 @@ namespace ECT
 	      
                 uint32_t problem_number = e.last_pair + j*(1 << (i+1)) + (1 << i) -1;
 		
-                uint32_t offset = problem_number*e.increment;
-	      
-                update_example_indicies(ec,offset);
-                ec->partial_prediction = 0;
-	      
-		base.learn(ec);
-		
-                update_example_indicies(ec,-offset);
+		base.learn(ec, problem_number);
 		
 		float pred = ec->final_prediction;
 		if (pred > 0.)
@@ -447,10 +415,10 @@ namespace ECT
     }
 
     *(all.p->lp) = OAA::mc_label_parser;
-    create_circuit(all, *data, data->k, data->errors+1);
+    size_t wpp = create_circuit(all, *data, data->k, data->errors+1);
     data->all = &all;
     
-    learner* l = new learner(data, learn, all.l);
+    learner* l = new learner(data, learn, all.l, wpp);
     l->set_finish_example(OAA::finish_example);
     l->set_finish(finish);
 
diff --git a/vowpalwabbit/example.h b/vowpalwabbit/example.h
index b5961e61..8b096a9e 100644
--- a/vowpalwabbit/example.h
+++ b/vowpalwabbit/example.h
@@ -92,9 +92,6 @@ void free_flatten_example(flat_example* fec);
 example *alloc_example(size_t);
 void dealloc_example(void(*delete_label)(void*), example&);
 
-inline void update_example_indicies(example* ec, uint32_t amount) 
-{ ec->ft_offset += amount; }
-
 inline int example_is_newline(example* ec)
 {
   // if only index is constant namespace or no index
diff --git a/vowpalwabbit/gd.cc b/vowpalwabbit/gd.cc
index 4aa66a4a..d8d91d44 100644
--- a/vowpalwabbit/gd.cc
+++ b/vowpalwabbit/gd.cc
@@ -903,8 +903,7 @@ learner* setup(vw& all, po::variables_map& vm)
       g->early_stop_thres = vm["early_terminate"].as< size_t>();     
   }
     
-  learner* ret = new learner(g,learn);
-  ret->set_save_load(save_load);
+  learner* ret = new learner(g,learn, save_load, all.reg.stride);
   ret->set_end_pass(end_pass);
   return ret;
 }
diff --git a/vowpalwabbit/gd_mf.cc b/vowpalwabbit/gd_mf.cc
index 3d638472..727646f9 100644
--- a/vowpalwabbit/gd_mf.cc
+++ b/vowpalwabbit/gd_mf.cc
@@ -294,9 +294,9 @@ void end_pass(void* d)
   {
     gdmf* data = (gdmf*)calloc(1,sizeof(gdmf)); 
     data->all = &all;
-    learner* l = new learner(data,learn);
-    l->set_save_load(save_load);
+    learner* l = new learner(data,learn, save_load, all.reg.stride);
     l->set_end_pass(end_pass);
+
     return l;
   }
 }
diff --git a/vowpalwabbit/global_data.cc b/vowpalwabbit/global_data.cc
index 1a60cc60..597d391f 100644
--- a/vowpalwabbit/global_data.cc
+++ b/vowpalwabbit/global_data.cc
@@ -263,8 +263,6 @@ vw::vw()
 
   set_minmax = set_mm;
 
-  weights_per_problem = 1;
-
   power_t = 0.5;
   eta = 0.5; //default learning rate for normalized adaptive updates, this is switched to 10 by default for the other updates (see parse_args.cc)
   numpasses = 1;
diff --git a/vowpalwabbit/global_data.h b/vowpalwabbit/global_data.h
index 47a38d7e..0b60a890 100644
--- a/vowpalwabbit/global_data.h
+++ b/vowpalwabbit/global_data.h
@@ -161,7 +161,7 @@ struct vw {
   bool searn;
   void* /*Searn::searn*/ searnstr;
 
-  uint32_t weights_per_problem; //this stores the current number of "weight vector" required by the based learner, which is used to compute offsets when composing reductions
+  uint32_t wpp; 
 
   int stdout_fileno;
 
diff --git a/vowpalwabbit/lda_core.cc b/vowpalwabbit/lda_core.cc
index f69186e4..55d6ca12 100644
--- a/vowpalwabbit/lda_core.cc
+++ b/vowpalwabbit/lda_core.cc
@@ -757,7 +757,7 @@ learner* setup(vw&all, std::vector<std::string>&opts, po::variables_map& vm)
   
   ld->decay_levels.push_back(0.f);
   
-  learner* l = new learner(ld, learn);
+  learner* l = new learner(ld, learn, save_load, all.reg.stride);
   l->set_save_load(save_load);
   l->set_finish_example(finish_example);
   l->set_end_examples(end_examples);  
diff --git a/vowpalwabbit/learner.h b/vowpalwabbit/learner.h
index 53666766..73c41151 100644
--- a/vowpalwabbit/learner.h
+++ b/vowpalwabbit/learner.h
@@ -73,8 +73,16 @@ private:
   func_data finisher_fd;
   
 public:
+  size_t weights; //this stores the number of "weight vectors" required by the learner.
+  size_t increment;
+
   //called once for each example.  Must work under reduction.
-  inline void learn(example* ec) { learn_fd.learn_f(learn_fd.data, *learn_fd.base, ec); }
+  inline void learn(example* ec, size_t i=0) 
+  { 
+    ec->ft_offset += increment*i;
+    learn_fd.learn_f(learn_fd.data, *learn_fd.base, ec);
+    ec->ft_offset += - (increment*i);
+  }
 
   //called anytime saving or loading needs to happen. Autorecursive.
   inline void save_load(io_buf& io, bool read, bool text) { save_load_fd.save_load_f(save_load_fd.data, io, read, text); if (save_load_fd.base) save_load_fd.base->save_load(io, read, text); }
@@ -115,6 +123,9 @@ public:
 
   inline learner()
   {
+    weights = 1;
+    increment = 1;
+
     learn_fd = LEARNER::generic_learn_fd;
     finish_example_fd.data = NULL;
     finish_example_fd.finish_example_f = return_simple_example;
@@ -125,21 +136,26 @@ public:
     save_load_fd = LEARNER::generic_save_load_fd;
   }
 
-  inline learner(void *dat, void (*l)(void*, learner&, example*))
+  inline learner(void *dat, void (*l)(void*, learner&, example*), void (*sl)(void*, io_buf& io, bool read, bool text), size_t params_per_weight)
   { // the constructor for all learning algorithms.
     *this = learner();
 
     learn_fd.data = dat;
     learn_fd.learn_f = l;
+    set_save_load(sl);
+    increment = params_per_weight;
   }
 
-  inline learner(void *dat, void (*l)(void*, learner&, example*), learner* base) 
+  inline learner(void *dat, void (*l)(void*, learner&, example*), learner* base, size_t ws = 1) 
   { //the reduction constructor.
     *this = *base;
     
     learn_fd.learn_f = l;
     learn_fd.data = dat;
     learn_fd.base = base;
+
+    increment = base->increment * base->weights;
+    weights = ws;
   }
 };
 
diff --git a/vowpalwabbit/nn.cc b/vowpalwabbit/nn.cc
index 1949deea..7ab9294f 100644
--- a/vowpalwabbit/nn.cc
+++ b/vowpalwabbit/nn.cc
@@ -25,9 +25,9 @@ namespace NN {
   
   struct nn {
     uint32_t k;
-    uint32_t increment;
     loss_function* squared_loss;
     example output_layer;
+    size_t increment;
     bool dropout;
     uint64_t xsubi;
     uint64_t save_xsubi;
@@ -71,7 +71,63 @@ namespace NN {
 	x->weight_index += offset;
     }
 
-  void finish_setup (nn* n, vw& all);
+  void finish_setup (nn& n, vw& all)
+  {
+    bool initialize = true;
+
+    // TODO: output_layer audit
+
+    memset (&n.output_layer, 0, sizeof (n.output_layer));
+    n.output_layer.indices.push_back(nn_output_namespace);
+    feature output = {1., nn_constant*all.reg.stride};
+
+    for (unsigned int i = 0; i < n.k; ++i)
+      {
+        n.output_layer.atomics[nn_output_namespace].push_back(output);
+        initialize &= (all.reg.weight_vector[output.weight_index & all.reg.weight_mask] == 0);
+        ++n.output_layer.num_features;
+        output.weight_index += n.increment;
+      }
+
+    if (! n.inpass) 
+      {
+        n.output_layer.atomics[nn_output_namespace].push_back(output);
+        initialize &= (all.reg.weight_vector[output.weight_index & all.reg.weight_mask] == 0);
+        ++n.output_layer.num_features;
+      }
+
+    n.output_layer.in_use = true;
+
+    if (initialize) {
+      // output weights
+
+      float sqrtk = sqrt ((float)n.k);
+      for (feature* x = n.output_layer.atomics[nn_output_namespace].begin; 
+           x != n.output_layer.atomics[nn_output_namespace].end; 
+           ++x)
+        {
+          weight* w = &all.reg.weight_vector[x->weight_index & all.reg.weight_mask];
+
+          w[0] = (float) (frand48 () - 0.5) / sqrtk;
+
+          // prevent divide by zero error
+          if (n.dropout && all.normalized_updates)
+            w[all.normalized_idx] = 1e-4f;
+        }
+
+      // hidden biases
+
+      unsigned int weight_index = constant * all.reg.stride;
+
+      for (unsigned int i = 0; i < n.k; ++i)
+        {
+          weight_index += n.increment;
+          all.reg.weight_vector[weight_index & all.reg.weight_mask] = (float) (frand48 () - 0.5);
+        }
+    }
+
+    n.finished_setup = true;
+  }
 
   void end_pass(void* d)
   {
@@ -87,7 +143,7 @@ namespace NN {
     bool shouldOutput = n->all->raw_prediction > 0;
 
     if (! n->finished_setup)
-      finish_setup (n, *(n->all));
+      finish_setup (*n, *(n->all));
 
     label_data* ld = (label_data*)ec->ld;
     float save_label = ld->label;
@@ -112,9 +168,7 @@ namespace NN {
     ld->label = FLT_MAX;
     for (unsigned int i = 0; i < n->k; ++i)
       {
-        update_example_indicies(ec, n->increment);
-
-        base.learn(ec);
+        base.learn(ec, i);
         hidden_units[i] = ec->final_prediction;
 
         dropped_out[i] = (n->dropout && merand48 (n->xsubi) < 0.5);
@@ -124,7 +178,6 @@ namespace NN {
           outputStringStream << i << ':' << ec->partial_prediction << ',' << fasttanh (hidden_units[i]);
         }
       }
-    update_example_indicies(ec, -n->k * n->increment);
     ld->label = save_label;
     n->all->loss = save_loss;
     n->all->set_minmax = save_set_minmax;
@@ -162,7 +215,7 @@ CONVERSE: // That's right, I'm using goto.  So sue me.
       ec->atomics[nn_output_namespace] = n->output_layer.atomics[nn_output_namespace];
       ec->sum_feat_sq[nn_output_namespace] = n->output_layer.sum_feat_sq[nn_output_namespace];
       ec->total_sum_feat_sq += n->output_layer.sum_feat_sq[nn_output_namespace];
-      base.learn(ec);
+      base.learn(ec, n->k);
       n->output_layer.partial_prediction = ec->partial_prediction;
       n->output_layer.loss = ec->loss;
       ec->total_sum_feat_sq -= n->output_layer.sum_feat_sq[nn_output_namespace];
@@ -178,7 +231,7 @@ CONVERSE: // That's right, I'm using goto.  So sue me.
       n->output_layer.eta_global = ec->eta_global;
       n->output_layer.global_weight = ec->global_weight;
       n->output_layer.example_t = ec->example_t;
-      base.learn(&n->output_layer);
+      base.learn(&n->output_layer, n->k);
       n->output_layer.ld = 0;
     }
 
@@ -203,7 +256,6 @@ CONVERSE: // That's right, I'm using goto.  So sue me.
         n->all->sd->max_label = hidden_max_activation;
 
         for (unsigned int i = 0; i < n->k; ++i) {
-          update_example_indicies (ec, n->increment);
           if (! dropped_out[i]) {
             float sigmah = 
               n->output_layer.atomics[nn_output_namespace][i].x / dropscale;
@@ -213,10 +265,9 @@ CONVERSE: // That's right, I'm using goto.  So sue me.
 
             ld->label = GD::finalize_prediction (*(n->all), hidden_units[i] - gradhw);
             if (ld->label != hidden_units[i]) 
-              base.learn(ec);
+              base.learn(ec, i);
           }
         }
-        update_example_indicies (ec, -n->k*n->increment);
 
         n->all->loss = save_loss;
         n->all->set_minmax = save_set_minmax;
@@ -348,9 +399,6 @@ CONVERSE: // That's right, I'm using goto.  So sue me.
                 << (all.training ? "training" : "testing") 
                 << std::endl;
 
-    n->increment = all.reg.stride * all.weights_per_problem;
-    all.weights_per_problem *= n->k + 1;
-
     n->finished_setup = false;
     n->squared_loss = getLossFunction (0, "squared", 0);
 
@@ -360,69 +408,12 @@ CONVERSE: // That's right, I'm using goto.  So sue me.
       n->xsubi = vm["random_seed"].as<size_t>();
 
     n->save_xsubi = n->xsubi;
-    learner* l = new learner(n, learn, all.l);
+    learner* l = new learner(n, learn, all.l, n->k+1);
     l->set_finish(finish);
     l->set_finish_example(finish_example);
     l->set_end_pass(end_pass);
+    n->increment = l->increment;//Indexing of output layer is odd.
 
     return l;
   }
-
-  void finish_setup (nn* n, vw& all)
-  {
-    bool initialize = true;
-
-    // TODO: output_layer audit
-
-    memset (&n->output_layer, 0, sizeof (n->output_layer));
-    n->output_layer.indices.push_back(nn_output_namespace);
-    feature output = {1., nn_constant*all.reg.stride};
-
-    for (unsigned int i = 0; i < n->k; ++i)
-      {
-        n->output_layer.atomics[nn_output_namespace].push_back(output);
-        initialize &= (all.reg.weight_vector[output.weight_index & all.reg.weight_mask] == 0);
-        ++n->output_layer.num_features;
-        output.weight_index += n->increment;
-      }
-
-    if (! n->inpass) 
-      {
-        n->output_layer.atomics[nn_output_namespace].push_back(output);
-        initialize &= (all.reg.weight_vector[output.weight_index & all.reg.weight_mask] == 0);
-        ++n->output_layer.num_features;
-      }
-
-    n->output_layer.in_use = true;
-
-    if (initialize) {
-      // output weights
-
-      float sqrtk = sqrt ((float)n->k);
-      for (feature* x = n->output_layer.atomics[nn_output_namespace].begin; 
-           x != n->output_layer.atomics[nn_output_namespace].end; 
-           ++x)
-        {
-          weight* w = &all.reg.weight_vector[x->weight_index & all.reg.weight_mask];
-
-          w[0] = (float) (frand48 () - 0.5) / sqrtk;
-
-          // prevent divide by zero error
-          if (n->dropout && all.normalized_updates)
-            w[all.normalized_idx] = 1e-4f;
-        }
-
-      // hidden biases
-
-      unsigned int weight_index = constant * all.reg.stride;
-
-      for (unsigned int i = 0; i < n->k; ++i)
-        {
-          weight_index += n->increment;
-          all.reg.weight_vector[weight_index & all.reg.weight_mask] = (float) (frand48 () - 0.5);
-        }
-    }
-
-    n->finished_setup = true;
-  }
 }
diff --git a/vowpalwabbit/oaa.cc b/vowpalwabbit/oaa.cc
index 7561aaa7..11c2e459 100644
--- a/vowpalwabbit/oaa.cc
+++ b/vowpalwabbit/oaa.cc
@@ -20,12 +20,10 @@ namespace OAA {
 
   struct oaa{
     uint32_t k;
-    uint32_t increment;
-    uint32_t total_increment;
     vw* all;
   };
 
-  char* bufread_label(mc_label* ld, char* c)
+    char* bufread_label(mc_label* ld, char* c)
   {
     ld->label = *(uint32_t *)c;
     c += sizeof(ld->label);
@@ -221,9 +219,7 @@ namespace OAA {
           simple_temp.label = -1;
         simple_temp.weight = mc_label_data->weight;
         ec->ld = &simple_temp;
-        if (i != 1)
-          update_example_indicies(ec, o->increment);
-        base.learn(ec);
+        base.learn(ec, i-1);
         if (ec->partial_prediction > score)
           {
             score = ec->partial_prediction;
@@ -239,7 +235,6 @@ namespace OAA {
       }	
     ec->ld = mc_label_data;
     ec->final_prediction = prediction;
-    update_example_indicies(ec, -o->total_increment);
 
     if (shouldOutput) 
       all->print_text(all->raw_prediction, outputStringStream.str(), ec->tag);
@@ -266,10 +261,7 @@ namespace OAA {
     data->all = &all;
     *(all.p->lp) = mc_label_parser;
 
-    data->increment = all.reg.stride * all.weights_per_problem;
-    all.weights_per_problem *= data->k;
-    data->total_increment = data->increment*(data->k-1);
-    learner* l = new learner(data, learn, all.l);
+    learner* l = new learner(data, learn, all.l, data->k);
     l->set_finish_example(finish_example);
 
     return l;
diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc
index 5ee43ae5..d9b3975b 100644
--- a/vowpalwabbit/parse_args.cc
+++ b/vowpalwabbit/parse_args.cc
@@ -281,9 +281,6 @@ vw* parse_args(int argc, char *argv[])
       throw exception();
     }
 
-  all->l = GD::setup(*all, vm);
-  all->scorer = all->l;
-
   all->reg.stride = 4; //use stride of 4 for default invariant normalized adaptive updates
   //if we are doing matrix factorization, or user specified anything in sgd,adaptive,invariant,normalized, we turn off default update rules and use whatever user specified
   if( all->rank > 0 || !all->training || ( ( vm.count("sgd") || vm.count("adaptive") || vm.count("invariant") || vm.count("normalized") ) && !vm.count("exact_adaptive_norm")) )
@@ -319,6 +316,9 @@ vw* parse_args(int argc, char *argv[])
     }
   }
 
+  all->l = GD::setup(*all, vm);
+  all->scorer = all->l;
+
   if (vm.count("bfgs") || vm.count("conjugate_gradient")) 
     all->l = BFGS::setup(*all, to_pass_further, vm, vm_file);
 
@@ -827,10 +827,11 @@ vw* parse_args(int argc, char *argv[])
 
   // force stride * weights_per_problem to be a power of 2 to avoid 32-bit overflow
   uint32_t i = 0;
-  while (all->reg.stride * all->weights_per_problem  > (uint32_t)(1 << i))
+  size_t params_per_problem = all->l->increment * all->l->weights;
+  while (params_per_problem > (uint32_t)(1 << i))
     i++;
-  all->weights_per_problem = (1 << i) / all->reg.stride;
-  
+  all->wpp = (1 << i) / all->reg.stride;
+
   return all;
 }
 
diff --git a/vowpalwabbit/parse_example.cc b/vowpalwabbit/parse_example.cc
index 5e8511e6..0efead50 100644
--- a/vowpalwabbit/parse_example.cc
+++ b/vowpalwabbit/parse_example.cc
@@ -225,8 +225,7 @@ public:
       cout << "malformed example !\n'|' , space or EOL expected after : \"" << std::string(beginLine, reading_head - beginLine).c_str() << "\"" << endl;
     }
   }
-  
-  
+    
   inline void nameSpace(){
     cur_channel_v = 1.0;
     base = NULL;
@@ -277,7 +276,7 @@ public:
     this->endLine = endLine;
     this->p = all.p;
     this->ae = ae;
-    this->weights_per_problem = all.weights_per_problem;
+    this->weights_per_problem = all.wpp;
     this->affix_features = all.affix_features;
     audit = all.audit || all.hash_inv;
     listNameSpace();
diff --git a/vowpalwabbit/parser.cc b/vowpalwabbit/parser.cc
index 5cecc72f..4070fe01 100644
--- a/vowpalwabbit/parser.cc
+++ b/vowpalwabbit/parser.cc
@@ -780,7 +780,7 @@ void setup_example(vw& all, example* ae)
     ae->total_sum_feat_sq++;
   }
   
-  if(all.reg.stride != 1 || all.weights_per_problem != 1) //make room for per-feature information.
+  if(all.reg.stride != 1) //make room for per-feature information.
     {
       uint32_t stride = all.reg.stride;
       for (unsigned char* i = ae->indices.begin; i != ae->indices.end; i++)
diff --git a/vowpalwabbit/searn.cc b/vowpalwabbit/searn.cc
index 9ca92d31..f4921a8f 100644
--- a/vowpalwabbit/searn.cc
+++ b/vowpalwabbit/searn.cc
@@ -62,18 +62,6 @@ namespace SearnUtil
       free(ptr);
   }
 
-  void add_policy_offset(vw&all, example *ec, uint32_t increment, uint32_t policy)
-  {
-    if (policy > 0)
-      update_example_indicies(ec, policy * increment);
-  }
-
-  void remove_policy_offset(vw&all, example *ec, uint32_t increment, uint32_t policy)
-  {
-    if (policy > 0)
-      update_example_indicies(ec, -(policy * increment));
-  }
-
   int random_policy(uint64_t seed, float beta, bool allow_current_policy, int current_policy, bool allow_optimal, bool reset_seed)
   {
     if(reset_seed) //reset_seed is false for contextual bandit, so that we only reset the seed if the base learner is not a contextual bandit learner, as this breaks the exploration.
@@ -123,7 +111,7 @@ namespace SearnUtil
   void add_history_to_example(vw&all, history_info &hinfo, example* ec, history h)
   {
     uint64_t v0, v1, v, max_string_length = 0;
-    uint32_t wpp = all.weights_per_problem * all.reg.stride;
+    uint32_t wpp = all.wpp * all.reg.stride;
     if (hinfo.length == 0) return;
     if (h == NULL) {
       cerr << "error: got empty history in add_history_to_example" << endl;
@@ -360,13 +348,11 @@ namespace Searn {
     float best_prediction = 0;
     uint32_t best_action = 0;
     for (uint32_t action=0; action<num_ec; action++) {
-      SearnUtil::add_policy_offset(all, ecs[action], srn->increment, pol);
-      base.learn(ecs[action]);
+      base.learn(ecs[action], pol);
       srn->total_predictions_made++;
       srn->num_features += ecs[action]->num_features;
       srn->empty_example->in_use = true;
       base.learn(srn->empty_example);
-      SearnUtil::remove_policy_offset(all, ecs[action], srn->increment, pol);
 
       if ((action == 0) || 
           ecs[action]->partial_prediction < best_prediction) {
@@ -385,14 +371,11 @@ namespace Searn {
     void* old_label = ec->ld;
     ec->ld = valid_labels;
 
-    SearnUtil::add_policy_offset(all, ec, srn.increment, pol);
-
-    base.learn(ec);
+    base.learn(ec, pol);
     srn.total_predictions_made++;
     srn.num_features += ec->num_features;
     uint32_t final_prediction = (uint32_t)ec->final_prediction;
 
-    SearnUtil::remove_policy_offset(all, ec, srn.increment, pol);
     ec->ld = old_label;
 
     return final_prediction;
@@ -829,9 +812,7 @@ bool snapshot_binary_search_lt(v_array<snapshot_item> a, size_t desired_t, size_
     if (!isLDF(srn)) {
       void* old_label = ec[0]->ld;
       ec[0]->ld = labels;
-      SearnUtil::add_policy_offset(all, ec[0], srn.increment, srn.current_policy);
-      base.learn(ec[0]);
-      SearnUtil::remove_policy_offset(all, ec[0], srn.increment, srn.current_policy);
+      base.learn(ec[0], srn.current_policy);
       ec[0]->ld = old_label;
       srn.total_examples_generated++;
     } else { // isLDF
@@ -1089,7 +1070,7 @@ void print_update(vw& all, searn* srn)
   void add_neighbor_features(searn& srn) {
     size_t neighbor_constant = 8349204823;
     if (srn.neighbor_features.size() == 0) return;
-    uint32_t wpp = srn.all->weights_per_problem * srn.all->reg.stride;
+    uint32_t wpp = srn.all->wpp * srn.all->reg.stride;
 
     for (int32_t n=0; n<(int32_t)srn.ec_seq.size(); n++) {
       example*me = srn.ec_seq[n];
@@ -1606,8 +1587,6 @@ void print_update(vw& all, searn* srn)
     ss1 << srn->current_policy;           VW::cmd_string_replace_value(all.options_from_file,"--searn_trained_nb_policies", ss1.str()); 
     ss2 << srn->total_number_of_policies; VW::cmd_string_replace_value(all.options_from_file,"--searn_total_nb_policies",   ss2.str());
 
-    srn->increment = all.weights_per_problem * all.reg.stride;
-    all.weights_per_problem *= srn->total_number_of_policies;
     //clog << "searn increment = " << srn->increment <<  " " << all.reg.stride << endl;
     //clog << "searn current_policy = " << srn->current_policy << " total_number_of_policies = " << srn->total_number_of_policies << endl;
     
@@ -1653,7 +1632,7 @@ void print_update(vw& all, searn* srn)
 
     srn->start_clock_time = clock();
 
-    learner* l = new learner(srn, searn_learn, all.l);
+    learner* l = new learner(srn, searn_learn, all.l, srn->total_number_of_policies);
     l->set_finish_example(finish_example);
     l->set_end_examples(end_examples);
     l->set_finish(searn_finish);
diff --git a/vowpalwabbit/searn.h b/vowpalwabbit/searn.h
index 017c5674..e00bce9a 100644
--- a/vowpalwabbit/searn.h
+++ b/vowpalwabbit/searn.h
@@ -121,7 +121,7 @@ namespace Searn {
     float  alpha; //parameter used to adapt beta for dagger (see above comment), should be in (0,1)
     uint32_t current_policy;      // what policy are we training right now?
     float gamma;                  // for dagger
-    uint32_t increment;
+
     size_t num_features;
     uint32_t total_number_of_policies;
     bool do_snapshot;
diff --git a/vowpalwabbit/sender.cc b/vowpalwabbit/sender.cc
index 71b347ec..f0f5e256 100644
--- a/vowpalwabbit/sender.cc
+++ b/vowpalwabbit/sender.cc
@@ -104,6 +104,8 @@ void end_examples(void* d)
     delete s->buf;
   }
 
+  void save_load(void*, io_buf& io, bool read, bool text){}
+
   learner* setup(vw& all, po::variables_map& vm, vector<string> pairs)
 {
   sender* s = (sender*)calloc(1,sizeof(sender));
@@ -117,7 +119,7 @@ void end_examples(void* d)
   s->all = &all;
   s->delay_ring = (example**) calloc(all.p->ring_size, sizeof(example*));
 
-  learner* l = new learner(s,learn);
+  learner* l = new learner(s,learn, save_load, 1);
   l->set_finish(finish);
   l->set_finish_example(finish_example); 
   l->set_end_examples(end_examples);
diff --git a/vowpalwabbit/wap.cc b/vowpalwabbit/wap.cc
index 3ea0ac06..eadcd24f 100644
--- a/vowpalwabbit/wap.cc
+++ b/vowpalwabbit/wap.cc
@@ -19,7 +19,7 @@ using namespace std;
 
 namespace WAP {
   struct wap{
-    uint32_t increment;
+    size_t increment; //wap does funky things with the increment, so we keep explicit access
     vw* all;
   };
   
@@ -204,13 +204,8 @@ namespace WAP {
         simple_temp.weight = 0.;
         simple_temp.label = FLT_MAX;
         uint32_t myi = (uint32_t)cost_label->costs[i].weight_index;
-        if (myi!= 1)
-          update_example_indicies(ec, w.increment*(myi-1));
-        ec->partial_prediction = 0.;
         ec->ld = &simple_temp;
-        base.learn(ec);
-        if (myi != 1)
-          update_example_indicies(ec, -w.increment*(myi-1));
+        base.learn(ec, myi-1);
         if (ec->partial_prediction > score)
           {
             score = ec->partial_prediction;
@@ -257,11 +252,10 @@ namespace WAP {
     *(all.p->lp) = CSOAA::cs_label_parser;
 
     all.sd->k = (uint32_t)nb_actions;
-    all.weights_per_problem *= nb_actions;
-    w->increment = (uint32_t)((all.length()/ all.weights_per_problem) * all.reg.stride);
 
-    learner* l = new learner(w, learn, all.l);
+    learner* l = new learner(w, learn, all.l, nb_actions);
     l->set_finish_example(CSOAA::finish_example);
+    w->increment = l->increment;
 
     return l;
   }
author	Hal Daume III <me@hal3.name>	2013-11-05 07:17:49 +0400
committer	Hal Daume III <me@hal3.name>	2013-11-05 07:17:49 +0400
commit	9186c1d97a9280f4fbf7d2779006b48f44978df6 (patch)
tree	e3ec52734bc9a4a03b7274f7bd567ef736a89930
parent	6acaab7c89673ac56b20e9dfa456c8c564cf4ad4 (diff)
parent	5fbefcc27257b3386fd93b3983aa799c60704f47 (diff)