merge conflicts resolved

author: niruc <niruc@NIRUC-M6600.redmond.corp.microsoft.com> 2013-04-05 01:17:14 +0400
committer: niruc <niruc@NIRUC-M6600.redmond.corp.microsoft.com> 2013-04-05 01:17:14 +0400
commit: 571f1f36679811b1c170a76fb0487a5c6dba177d (patch)
tree: e3c24b4f49936f5d1533369786803a7f66fa23cc
parent: 605dbf1f7a418a41197336aa296d6d27be1f935d (diff)
parent: 32baa122d2ba75158ddb3f2103710cb456da3d1b (diff)
26 files changed, 227 insertions, 403 deletions
diff --git a/autogen.sh b/autogen.sh
index 41da9d87..bff53eea 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -1,2 +1,8 @@
-#! /bin/sh
-libtoolize -f -c && aclocal -I ./acinclude.d -I /usr/share/aclocal && autoheader && automake -ac -Woverride && autoconf && ./configure "$@"
+#!/bin/sh
+
+case $( uname -s ) in
+ Darwin)  alias vwlibtool=glibtoolize;;
+ *)	alias vwlibtool=libtoolize;;
+esac
+
+vwlibtool -f -c && aclocal -I ./acinclude.d -I /usr/share/aclocal && autoheader && automake -ac -Woverride && autoconf && ./configure "$@"
diff --git a/library/ezexample.h b/library/ezexample.h
index 6ba75043..fbf28854 100644
--- a/library/ezexample.h
+++ b/library/ezexample.h
@@ -118,7 +118,7 @@ class ezexample {
     if (to_ns == 0) return 0;
     if (ensure_ns_exists(to_ns)) return 0;
 
-    feature f = { v, fint * vw_ref->stride };
+    feature f = { v, fint * vw_ref->reg.stride };
     ec->atomics[to_ns].push_back(f);
     ec->sum_feat_sq[to_ns] += v * v;
     ec->total_sum_feat_sq += v * v;
diff --git a/vowpalwabbit/accumulate.cc b/vowpalwabbit/accumulate.cc
index 5881daec..3b79fa17 100644
--- a/vowpalwabbit/accumulate.cc
+++ b/vowpalwabbit/accumulate.cc
@@ -19,7 +19,7 @@ using namespace std;
 
 void accumulate(vw& all, string master_location, regressor& reg, size_t o) {
   uint32_t length = 1 << all.num_bits; //This is size of gradient
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   float* local_grad = new float[length];
   weight* weights = reg.weight_vector;
   for(uint32_t i = 0;i < length;i++) 
@@ -43,7 +43,7 @@ float accumulate_scalar(vw& all, string master_location, float local_sum) {
 
 void accumulate_avg(vw& all, string master_location, regressor& reg, size_t o) {
   uint32_t length = 1 << all.num_bits; //This is size of gradient
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   float* local_grad = new float[length];
   weight* weights = reg.weight_vector;
   float numnodes = 1.;
@@ -81,7 +81,7 @@ void accumulate_weighted_avg(vw& all, string master_location, regressor& reg) {
     return;
   }
   uint32_t length = 1 << all.num_bits; //This is size of gradient
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   weight* weights = reg.weight_vector;
   float* local_weights = new float[length];
 
diff --git a/vowpalwabbit/autolink.cc b/vowpalwabbit/autolink.cc
index 0f9a8176..53890b87 100644
--- a/vowpalwabbit/autolink.cc
+++ b/vowpalwabbit/autolink.cc
@@ -72,7 +72,7 @@ namespace ALINK {
     autolink* data = (autolink*)calloc(1,sizeof(autolink));
     data->base = all.l;
     data->d = (uint32_t)vm["autolink"].as<size_t>();
-    data->stride = all.stride;
+    data->stride = all.reg.stride;
     
     if (!vm_file.count("autolink")) 
       {
diff --git a/vowpalwabbit/bfgs.cc b/vowpalwabbit/bfgs.cc
index a264d89b..56955350 100644
--- a/vowpalwabbit/bfgs.cc
+++ b/vowpalwabbit/bfgs.cc
@@ -109,7 +109,7 @@ const char* curv_message = "Zero or negative curvature detected.\n"
 void zero_derivative(vw& all)
 {//set derivative to 0.
   uint32_t length = 1 << all.num_bits;
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   weight* weights = all.reg.weight_vector;
   for(uint32_t i = 0; i < length; i++)
     weights[stride*i+W_GT] = 0;
@@ -118,7 +118,7 @@ void zero_derivative(vw& all)
 void zero_preconditioner(vw& all)
 {//set derivative to 0.
   uint32_t length = 1 << all.num_bits;
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   weight* weights = all.reg.weight_vector;
   for(uint32_t i = 0; i < length; i++)
     weights[stride*i+W_COND] = 0;
@@ -140,48 +140,6 @@ void reset_state(vw& all, bfgs& b, bool zero)
     }
 }
 
-void quad_grad_update(weight* weights, feature& page_feature, v_array<feature> &offer_features, size_t mask, float g)
-{
-  size_t halfhash = quadratic_constant * page_feature.weight_index;
-  float update = g * page_feature.x;
-  for (feature* ele = offer_features.begin; ele != offer_features.end; ele++)
-    {
-      weight* w=&weights[(halfhash + ele->weight_index) & mask];
-      w[W_GT] += update * ele->x;
-    }
-}
-
-void cubic_grad_update(weight* weights, feature& f0, feature& f1, v_array<feature> &cross_features, size_t mask, float g)
-{
-  size_t halfhash = cubic_constant2 * (cubic_constant * f0.weight_index + f1.weight_index);
-  float update = g * f0.x * f1.x;
-  for (feature* ele = cross_features.begin; ele != cross_features.end; ele++) {
-    weight* w=&weights[(halfhash + ele->weight_index) & mask];
-    w[W_GT] += update * ele->x;
-  }
-}
-
-void quad_precond_update(weight* weights, feature& page_feature, v_array<feature> &offer_features, size_t mask, float g)
-{
-  size_t halfhash = quadratic_constant * page_feature.weight_index;
-  float update = g * page_feature.x * page_feature.x;
-  for (feature* ele = offer_features.begin; ele != offer_features.end; ele++)
-    {
-      weight* w=&weights[(halfhash + ele->weight_index) & mask];
-      w[W_COND] += update * ele->x * ele->x;
-    }
-}
-
-void cubic_precond_update(weight* weights, feature& f0, feature& f1, v_array<feature> &cross_features, size_t mask, float g)
-{
-  size_t halfhash = cubic_constant2 * (cubic_constant * f0.weight_index + f1.weight_index);
-  float update = g * f0.x * f0.x * f1.x * f1.x;
-  for (feature* ele = cross_features.begin; ele != cross_features.end; ele++) {
-    weight* w=&weights[(halfhash + ele->weight_index) & mask];
-    w[W_COND] += update * ele->x * ele->x;
-  }
-}
-
 // w[0] = weight
 // w[1] = accumulated first derivative
 // w[2] = step direction
@@ -198,6 +156,11 @@ bool test_example(example* ec)
     return GD::finalize_prediction(all, ec->partial_prediction);
   }
 
+inline void add_grad(vw& all, void* d, float f, uint32_t u)
+{
+  all.reg.weight_vector[u] += (*(float*)d) * f;
+}
+
 float predict_and_gradient(vw& all, example* &ec)
 {
   float fp = bfgs_predict(all, ec);
@@ -207,99 +170,35 @@ float predict_and_gradient(vw& all, example* &ec)
 
   float loss_grad = all.loss->first_derivative(all.sd, fp,ld->label)*ld->weight;
   
-  size_t mask = all.weight_mask;
-  weight* weights = all.reg.weight_vector;
-  for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) 
-    {
-      feature *f = ec->atomics[*i].begin;
-      for (; f != ec->atomics[*i].end; f++)
-	{
-	  weight* w = &weights[f->weight_index & mask];
-	  w[W_GT] += loss_grad * f->x;
-	}
-    }
-  for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) 
-    {
-      if (ec->atomics[(int)(*i)[0]].size() > 0)
-	{
-	  v_array<feature> temp = ec->atomics[(int)(*i)[0]];
-	  for (; temp.begin != temp.end; temp.begin++)
-	    quad_grad_update(weights, *temp.begin, ec->atomics[(int)(*i)[1]], mask, loss_grad);
-	} 
-    }
-  for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) {
-    if ((ec->atomics[(int)(*i)[0]].size() == 0) || (ec->atomics[(int)(*i)[1]].size() == 0) || (ec->atomics[(int)(*i)[2]].size() == 0)) { continue; }
-    v_array<feature> temp1 = ec->atomics[(int)(*i)[0]];
-    for (; temp1.begin != temp1.end; temp1.begin++) {
-      v_array<feature> temp2 = ec->atomics[(int)(*i)[1]];
-      for (; temp2.begin != temp2.end; temp2.begin++)
-        cubic_grad_update(weights, *temp1.begin, *temp2.begin, ec->atomics[(int)(*i)[2]], mask, loss_grad);
-    }
-  }
+  ec->ft_offset += W_GT;
+  GD::foreach_feature<add_grad>(all, ec, &loss_grad);
+  ec->ft_offset -= W_GT;
+  
   return fp;
 }
 
+inline void add_precond(vw& all, void* d, float f, uint32_t u)
+{
+  all.reg.weight_vector[u] += (*(float*)d) * f * f;
+}
+
 void update_preconditioner(vw& all, example* &ec)
 {
   label_data* ld = (label_data*)ec->ld;
   float curvature = all.loss->second_derivative(all.sd, ec->final_prediction,ld->label) * ld->weight;
   
-  size_t mask = all.weight_mask;
-  weight* weights = all.reg.weight_vector;
-  for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++)
-    {
-      feature *f = ec->atomics[*i].begin;
-      for (; f != ec->atomics[*i].end; f++)
-        {
-          weight* w = &weights[f->weight_index & mask];
-          w[W_COND] += f->x * f->x * curvature;
-        }
-    }
-  for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++)
-    {
-      if (ec->atomics[(int)(*i)[0]].size() > 0)
-        {
-          v_array<feature> temp = ec->atomics[(int)(*i)[0]];
-          for (; temp.begin != temp.end; temp.begin++)
-            quad_precond_update(weights, *temp.begin, ec->atomics[(int)(*i)[1]], mask, curvature);
-        }
-    }
-  for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) {
-    if ((ec->atomics[(int)(*i)[0]].size() == 0) || (ec->atomics[(int)(*i)[1]].size() == 0) || (ec->atomics[(int)(*i)[2]].size() == 0)) { continue; }
-    v_array<feature> temp1 = ec->atomics[(int)(*i)[0]];
-    for (; temp1.begin != temp1.end; temp1.begin++) {
-      v_array<feature> temp2 = ec->atomics[(int)(*i)[1]];
-      for (; temp2.begin != temp2.end; temp2.begin++)
-        cubic_precond_update(weights, *temp1.begin, *temp2.begin, ec->atomics[(int)(*i)[2]], mask, curvature);
-    }
-  }
+  ec->ft_offset += W_COND;
+  GD::foreach_feature<add_precond>(all, ec, &curvature);  
+  ec->ft_offset -= W_COND;
 }  
 
 
 float dot_with_direction(vw& all, example* &ec)
 {
-  float ret = 0;
+  ec->ft_offset+= W_DIR;  
+  float ret = GD::inline_predict<vec_add>(all, ec);
+  ec->ft_offset-= W_DIR;
 
-  for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) 
-    ret += sd_add<vec_add>(all, ec->atomics[*i].begin, ec->atomics[*i].end, W_DIR);
-
-  for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) {
-    if (ec->atomics[(int)(*i)[0]].size() > 0) {
-      v_array<feature> temp = ec->atomics[(int)(*i)[0]];
-      for (; temp.begin != temp.end; temp.begin++)
-        ret += one_pf_quad_predict<vec_add>(all, *temp.begin, ec->atomics[(int)(*i)[1]], W_DIR);
-    }
-  }
-
-  for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) {
-    if ((ec->atomics[(int)(*i)[0]].size() == 0) || (ec->atomics[(int)(*i)[1]].size() == 0) || (ec->atomics[(int)(*i)[2]].size() == 0)) { continue; }
-    v_array<feature> temp1 = ec->atomics[(int)(*i)[0]];
-    for (; temp1.begin != temp1.end; temp1.begin++) {
-      v_array<feature> temp2 = ec->atomics[(int)(*i)[1]];
-      for (; temp2.begin != temp2.end; temp2.begin++)
-        ret += one_pf_cubic_predict<vec_add>(all, *temp1.begin, *temp2.begin, ec->atomics[(int)(*i)[2]], W_DIR);
-    }
-  }
   return ret;
 }
 
@@ -311,7 +210,7 @@ double regularizer_direction_magnitude(vw& all, bfgs& b, float regularizer)
     return ret;
 
   uint32_t length = 1 << all.num_bits;
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   weight* weights = all.reg.weight_vector;
   if (b.regularizers == NULL)
     for(uint32_t i = 0; i < length; i++)
@@ -327,7 +226,7 @@ float direction_magnitude(vw& all)
 {//compute direction magnitude
   double ret = 0.;
   uint32_t length = 1 << all.num_bits;
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   weight* weights = all.reg.weight_vector;
   for(uint32_t i = 0; i < length; i++)
     ret += weights[stride*i+W_DIR]*weights[stride*i+W_DIR];
@@ -338,7 +237,7 @@ float direction_magnitude(vw& all)
 void bfgs_iter_start(vw& all, bfgs& b, float* mem, int& lastj, double importance_weight_sum, int&origin)
 {
   uint32_t length = 1 << all.num_bits;
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   weight* w = all.reg.weight_vector;
 
   double g1_Hg1 = 0.;
@@ -364,7 +263,7 @@ void bfgs_iter_start(vw& all, bfgs& b, float* mem, int& lastj, double importance
 void bfgs_iter_middle(vw& all, bfgs& b, float* mem, double* rho, double* alpha, int& lastj, int &origin) 
 {  
   uint32_t length = 1 << all.num_bits;
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   weight* w = all.reg.weight_vector;
   
   float* mem0 = mem;
@@ -485,7 +384,7 @@ void bfgs_iter_middle(vw& all, bfgs& b, float* mem, double* rho, double* alpha,
 
 double wolfe_eval(vw& all, bfgs& b, float* mem, double loss_sum, double previous_loss_sum, double step_size, double importance_weight_sum, int &origin, double& wolfe1) { 
   uint32_t length = 1 << all.num_bits;
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   weight* w = all.reg.weight_vector;
   
   double g0_d = 0.;
@@ -514,7 +413,7 @@ double add_regularization(vw& all, bfgs& b, float regularization)
 {//compute the derivative difference
   double ret = 0.;
   uint32_t length = 1 << all.num_bits;
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   weight* weights = all.reg.weight_vector;
   if (b.regularizers == NULL)
     {
@@ -537,7 +436,7 @@ double add_regularization(vw& all, bfgs& b, float regularization)
 void finalize_preconditioner(vw& all, bfgs& b, float regularization)
 {
   uint32_t length = 1 << all.num_bits;
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   weight* weights = all.reg.weight_vector;
 
   if (b.regularizers == NULL)
@@ -557,7 +456,7 @@ void finalize_preconditioner(vw& all, bfgs& b, float regularization)
 void preconditioner_to_regularizer(vw& all, bfgs& b, float regularization)
 {
   uint32_t length = 1 << all.num_bits;
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   weight* weights = all.reg.weight_vector;
   if (b.regularizers == NULL)
     {
@@ -581,7 +480,7 @@ void preconditioner_to_regularizer(vw& all, bfgs& b, float regularization)
 void zero_state(vw& all)
 {
   uint32_t length = 1 << all.num_bits;
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   weight* weights = all.reg.weight_vector;
   for(uint32_t i = 0; i < length; i++) 
     {
@@ -595,7 +494,7 @@ double derivative_in_direction(vw& all, bfgs& b, float* mem, int &origin)
   {  
   double ret = 0.;
   uint32_t length = 1 << all.num_bits;
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   weight* w = all.reg.weight_vector;
   
   for(uint32_t i = 0; i < length; i++, w+=stride, mem+=b.mem_stride)
@@ -606,7 +505,7 @@ double derivative_in_direction(vw& all, bfgs& b, float* mem, int &origin)
 void update_weight(vw& all, float step_size, size_t current_pass)
   {
     uint32_t length = 1 << all.num_bits;
-    size_t stride = all.stride;
+    size_t stride = all.reg.stride;
     weight* w = all.reg.weight_vector;
     
     for(uint32_t i = 0; i < length; i++, w+=stride)
@@ -877,7 +776,7 @@ void save_load_regularizer(vw& all, bfgs& b, io_buf& model_file, bool read, bool
 {
   char buff[512];
   int c = 0;
-  uint32_t stride = all.stride;
+  uint32_t stride = all.reg.stride;
   uint32_t length = 2*(1 << all.num_bits);
   uint32_t i = 0;
   size_t brw = 1;
@@ -949,7 +848,7 @@ void save_load(void* d, io_buf& model_file, bool read, bool text)
       
       if (!all->quiet) 
 	{
-	  fprintf(stderr, "m = %d\nAllocated %luM for weights and mem\n", m, (long unsigned int)all->length()*(sizeof(float)*(b->mem_stride)+sizeof(weight)*all->stride) >> 20);
+	  fprintf(stderr, "m = %d\nAllocated %luM for weights and mem\n", m, (long unsigned int)all->length()*(sizeof(float)*(b->mem_stride)+sizeof(weight)*all->reg.stride) >> 20);
 	}
       
       b->net_time = 0.0;
@@ -1024,7 +923,7 @@ void setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::va
   all.l = t;
 
   all.bfgs = true;
-  all.stride = 4;
+  all.reg.stride = 4;
   
   if (vm.count("hessian_on") || all.m==0) {
     all.hessian_on = true;
diff --git a/vowpalwabbit/cb.cc b/vowpalwabbit/cb.cc
index c99259de..9a60aa93 100644
--- a/vowpalwabbit/cb.cc
+++ b/vowpalwabbit/cb.cc
@@ -715,7 +715,7 @@ namespace CB
       all.options_from_file.append(" --cb_type dr");
     }
 
-    c->increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.stride;
+    c->increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.reg.stride;
 
     *(all.p->lp) = CB::cb_label_parser; 
 
diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc
index 0e53fcbd..885f84b9 100644
--- a/vowpalwabbit/csoaa.cc
+++ b/vowpalwabbit/csoaa.cc
@@ -382,7 +382,7 @@ namespace CSOAA {
     *(all.p->lp) = cs_label_parser;
     all.base_learner_nb_w *= nb_actions;
     c->base=all.l;
-    c->csoaa_increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.stride;
+    c->csoaa_increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.reg.stride;
     all.sd->k = nb_actions;
 
     learner l = {c, drive, learn, finish, all.l.sl};
diff --git a/vowpalwabbit/ect.cc b/vowpalwabbit/ect.cc
index ea6b5801..35201f02 100644
--- a/vowpalwabbit/ect.cc
+++ b/vowpalwabbit/ect.cc
@@ -186,7 +186,7 @@ namespace ECT
     
     if (e.last_pair > 0) {
       all.base_learner_nb_w *= (e.last_pair + (eliminations-1));
-      e.increment = (uint32_t) all.length() / all.base_learner_nb_w * all.stride;
+      e.increment = (uint32_t) all.length() / all.base_learner_nb_w * all.reg.stride;
     }
   }
 
diff --git a/vowpalwabbit/gd.cc b/vowpalwabbit/gd.cc
index 74a6b09f..9ac4e8a2 100644
--- a/vowpalwabbit/gd.cc
+++ b/vowpalwabbit/gd.cc
@@ -39,7 +39,13 @@ namespace GD
   void predict(vw& all, example* ex);
   void sync_weights(vw& all);
   
-  template <void (*T)(vw&, float, uint32_t, float, float)>
+
+  struct train_data {
+    float avg_norm;
+    float update;
+  };
+
+  template <void (*T)(vw&, void*, float, uint32_t)>
   void generic_train(vw& all, example* &ec, float update, bool sqrt_norm)
   {
     if (fabs(update) == 0.)
@@ -51,30 +57,12 @@ namespace GD
     else
       total_weight = ec->example_t;
     
-    uint32_t offset = ec->ft_offset;
     float avg_norm = all.normalized_sum_norm_x / total_weight;
     if (sqrt_norm) avg_norm = sqrt(avg_norm);
     
-    for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) 
-      for (feature* f = ec->atomics[*i].begin; f != ec->atomics[*i].end; f++)
-	T(all, f->x, f->weight_index + offset, avg_norm, update);
-    
-    for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) 
-      if ((ec->atomics[(int)(*i)[0]].size() > 0) && (ec->atomics[(int)(*i)[1]].size() > 0))
-	for (feature* f0 = ec->atomics[(int)(*i)[0]].begin; f0 != ec->atomics[(int)(*i)[0]].end; f0++) {
-	  uint32_t halfhash = quadratic_constant * (f0->weight_index + offset);
-	  for (feature* f1 = ec->atomics[(int)(*i)[1]].begin; f1 != ec->atomics[(int)(*i)[1]].end; f1++)
-	    T(all, f1->x, f1->weight_index + halfhash + offset, avg_norm, f0->x * update);
-	}
+    train_data d = {avg_norm, update};
     
-    for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) 
-      if ((ec->atomics[(int)(*i)[0]].size() > 0) && (ec->atomics[(int)(*i)[1]].size() > 0) && (ec->atomics[(int)(*i)[2]].size() > 0))
-	for (feature* f0 = ec->atomics[(int)(*i)[0]].begin; f0 != ec->atomics[(int)(*i)[0]].end; f0++)
-	  for (feature* f1 = ec->atomics[(int)(*i)[1]].begin; f1 != ec->atomics[(int)(*i)[1]].end; f1++) {
-	    uint32_t halfhash = cubic_constant2 * (cubic_constant * (f0->weight_index + offset) + f1->weight_index + offset);
-	    for (feature* f2 = ec->atomics[(int)(*i)[2]].begin; f2 != ec->atomics[(int)(*i)[2]].end; f2++)
-	      T(all, f2->x, f2->weight_index + halfhash + offset, avg_norm, f0->x * f1->x * update);
-	  }
+    foreach_feature<T>(all, ec, &d);
   }
 
 float InvSqrt(float x){
@@ -86,26 +74,30 @@ float InvSqrt(float x){
   return x;
 }
 
-inline void general_update(vw& all, float x, uint32_t fi, float avg_norm, float update)
+  inline void general_update(vw& all, void* dat, float x, uint32_t fi)
 {
-  weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+  train_data* s = (train_data*)dat;
+
+  weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
   float t = 1.f;
   if(all.adaptive) t = powf(w[1],-all.power_t);
   if(all.normalized_updates) {
-    float norm = w[all.normalized_idx] * avg_norm;
+    float norm = w[all.normalized_idx] * s->avg_norm;
     float power_t_norm = 1.f - (all.adaptive ? all.power_t : 0.f);
     t *= powf(norm*norm,-power_t_norm);
   }
-  w[0] += update * x * t;
+  w[0] += s->update * x * t;
 }
 
 template<bool adaptive, bool normalized>
-inline void specialized_update(vw& all, float x, uint32_t fi, float avg_norm, float update)
+inline void specialized_update(vw& all, void* dat, float x, uint32_t fi)
 {
-  weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+  train_data* s = (train_data*)dat;
+
+  weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
   float t = 1.f;
   float inv_norm = 1.f;
-  if(normalized) inv_norm /= (w[all.normalized_idx] * avg_norm);
+  if(normalized) inv_norm /= (w[all.normalized_idx] * s->avg_norm);
   if(adaptive) {
 #if defined(__SSE2__) && !defined(VW_LDA_NO_SSE)
     __m128 eta = _mm_load_ss(&w[1]);
@@ -118,7 +110,7 @@ inline void specialized_update(vw& all, float x, uint32_t fi, float avg_norm, fl
   } else {
     t *= inv_norm*inv_norm; //if only using normalized updates but not adaptive, need to divide by feature norm squared
   }
-  w[0] += update * x * t;
+  w[0] += s->update * x * t;
 }
 
 void learn(void* d, example* ec)
@@ -187,7 +179,7 @@ void sync_weights(vw& all) {
   if (all.sd->gravity == 0. && all.sd->contraction == 1.)  // to avoid unnecessary weight synchronization
     return;
   uint32_t length = 1 << all.num_bits;
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   for(uint32_t i = 0; i < length && all.reg_mode; i++)
     all.reg.weight_vector[stride*i] = trunc_weight(all.reg.weight_vector[stride*i], (float)all.sd->gravity) * (float)all.sd->contraction;
   all.sd->gravity = 0.;
@@ -224,14 +216,14 @@ bool operator<(const string_value& first, const string_value& second)
 void audit_feature(vw& all, feature* f, audit_data* a, vector<string_value>& results, string prepend, size_t offset = 0)
 {
   ostringstream tempstream;
-  size_t index = (f->weight_index + offset) & all.weight_mask;
+  size_t index = (f->weight_index + offset) & all.reg.weight_mask;
   weight* weights = all.reg.weight_vector;
-  size_t stride = all.stride;
+  size_t stride = all.reg.stride;
   
   tempstream << prepend;
   if (a != NULL)
     tempstream << a->space << '^' << a->feature << ':';
-  else 	if ( index == ((constant*stride)&all.weight_mask))
+  else 	if ( index == ((constant*stride)&all.reg.weight_mask))
     tempstream << "Constant:";
   
   tempstream << (index/stride & all.parse_mask) << ':' << f->x;
@@ -288,9 +280,9 @@ void print_features(vw& all, example* &ec)
       for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) 
 	for (audit_data *f = ec->audit_features[*i].begin; f != ec->audit_features[*i].end; f++)
 	  {
-	    cout << '\t' << f->space << '^' << f->feature << ':' << (f->weight_index/all.stride & all.parse_mask) << ':' << f->x;
+	    cout << '\t' << f->space << '^' << f->feature << ':' << (f->weight_index/all.reg.stride & all.parse_mask) << ':' << f->x;
 	    for (size_t k = 0; k < all.lda; k++)
-	      cout << ':' << weights[(f->weight_index+k) & all.weight_mask];
+	      cout << ':' << weights[(f->weight_index+k) & all.reg.weight_mask];
 	  }
       cout << " total of " << count << " features." << endl;
     }
@@ -349,38 +341,16 @@ void print_audit_features(vw& all, example* ec)
   print_features(all, ec);
 }
 
-template <void (*T)(vw&,float,uint32_t,float,float&,float&)>
-void norm_add(vw& all, feature* begin, feature* end, float g, float& norm, float& norm_x, uint32_t offset=0)
-{
-  for (feature* f = begin; f!= end; f++)
-    T(all, f->x, f->weight_index + offset, g, norm, norm_x);
-}
-
-template <void (*T)(vw&,float,uint32_t,float,float&,float&)>
-void norm_add_quad(vw& all, feature& f0, v_array<feature> &cross_features, float g, float& norm, float& norm_x, uint32_t offset=0)
-{
-  uint32_t halfhash = quadratic_constant * (f0.weight_index + offset);
-  float norm_new = 0.f;
-  float norm_x_new = 0.f;
-  norm_add<T>(all, cross_features.begin, cross_features.end, g * f0.x * f0.x, norm_new, norm_x_new, halfhash + offset);
-  norm   += norm_new   * f0.x * f0.x;
-  norm_x += norm_x_new * f0.x * f0.x;
-}
-
-template <void (*T)(vw&,float,uint32_t,float,float&,float&)>
-void norm_add_cubic(vw& all, feature& f0, feature& f1, v_array<feature> &cross_features, float g, float& norm, float& norm_x, uint32_t offset=0)
-{
-  uint32_t halfhash = cubic_constant2 * (cubic_constant * (f0.weight_index + offset) + f1.weight_index + offset);
-  float norm_new = 0.f;
-  float norm_x_new = 0.f;
-  norm_add<T>(all, cross_features.begin, cross_features.end, g * f0.x * f0.x * f1.x * f1.x, norm_new, norm_x_new, halfhash + offset);
-  norm   += norm_new   * f0.x * f0.x * f1.x * f1.x;
-  norm_x += norm_x_new * f0.x * f0.x * f1.x * f1.x;
-}
+  struct norm_data {
+    float g;
+    float norm;
+    float norm_x;
+  };
 
 template<bool adaptive, bool normalized>
-inline void simple_norm_compute(vw& all, float x, uint32_t fi, float g, float& norm, float& norm_x) {
-  weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+inline void simple_norm_compute(vw& all, void* v, float x, uint32_t fi) {
+  norm_data* nd=(norm_data*)v;
+  weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
   float x2 = x * x;
   float t = 1.f;
   float inv_norm = 1.f;
@@ -388,10 +358,10 @@ inline void simple_norm_compute(vw& all, float x, uint32_t fi, float g, float& n
   if(normalized) {
     inv_norm /= w[all.normalized_idx];
     inv_norm2 = inv_norm*inv_norm;
-    norm_x += x2 * inv_norm2;
+    nd->norm_x += x2 * inv_norm2;
   }
   if(adaptive){
-    w[1] += g * x2;
+    w[1] += nd->g * x2;
 #if defined(__SSE2__) && !defined(VW_LDA_NO_SSE)
     __m128 eta = _mm_load_ss(&w[1]);
     eta = _mm_rsqrt_ss(eta);
@@ -403,51 +373,38 @@ inline void simple_norm_compute(vw& all, float x, uint32_t fi, float g, float& n
   } else {
     t *= inv_norm2; //if only using normalized but not adaptive, we're dividing update by feature norm squared
   }
-  norm += x2 * t;
+  nd->norm += x2 * t;
 }
 
-inline void powert_norm_compute(vw& all, float x, uint32_t fi, float g, float& norm, float& norm_x) {
+  inline void powert_norm_compute(vw& all, void* v, float x, uint32_t fi) {
+  norm_data* nd=(norm_data*)v;
   float power_t_norm = 1.f - (all.adaptive ? all.power_t : 0.f);
 
-  weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+  weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
   float x2 = x * x;
   float t = 1.f;
   if(all.adaptive){
-    w[1] += g * x2;
+    w[1] += nd->g * x2;
     t = powf(w[1], -all.power_t);
   }
   if(all.normalized_updates) {
     float range2 = w[all.normalized_idx] * w[all.normalized_idx];
     t *= powf(range2, -power_t_norm);
-    norm_x += x2 / range2;
+    nd->norm_x += x2 / range2;
   }
-  norm += x2 * t;
+  nd->norm += x2 * t;
 }
 
-template <void (*T)(vw&,float,uint32_t,float,float&,float&)>
+  template <void (*T)(vw&,void*,float,uint32_t)>
 float compute_norm(vw& all, example* &ec)
 {//We must traverse the features in _precisely_ the same order as during training.
   label_data* ld = (label_data*)ec->ld;
   float g = all.loss->getSquareGrad(ec->final_prediction, ld->label) * ld->weight;
   if (g==0) return 1.;
 
-  float norm = 0.;
-  float norm_x = 0.;
-  uint32_t offset = ec->ft_offset;
-  
-  for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++)
-    norm_add<T>(all, ec->atomics[*i].begin, ec->atomics[*i].end, g, norm, norm_x, offset);
+  norm_data nd = {g, 0., 0.};
 
-  for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end(); i++)
-    if (ec->atomics[(int)(*i)[0]].size() > 0)
-      for (feature* f0 = ec->atomics[(int)(*i)[0]].begin; f0 != ec->atomics[(int)(*i)[0]].end; f0++)
-        norm_add_quad<T>(all, *f0, ec->atomics[(int)(*i)[1]], g, norm, norm_x, offset);
-  
-  for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) 
-    if ((ec->atomics[(int)(*i)[0]].size() > 0) && (ec->atomics[(int)(*i)[1]].size() > 0) && (ec->atomics[(int)(*i)[2]].size() > 0))
-      for (feature* f0 = ec->atomics[(int)(*i)[0]].begin; f0 != ec->atomics[(int)(*i)[0]].end; f0++)
-        for (feature* f1 = ec->atomics[(int)(*i)[1]].begin; f1 != ec->atomics[(int)(*i)[1]].end; f1++)
-          norm_add_cubic<T>(all, *f0, *f1, ec->atomics[(int)(*i)[2]], g, norm, norm_x, offset);
+  foreach_feature<T>(all, ec, &nd);
   
   if(all.normalized_updates) {
     float total_weight = 0;
@@ -456,19 +413,19 @@ float compute_norm(vw& all, example* &ec)
     else
       total_weight = ec->example_t;
     
-    all.normalized_sum_norm_x += ld->weight * norm_x;
+    all.normalized_sum_norm_x += ld->weight * nd.norm_x;
     float avg_sq_norm = all.normalized_sum_norm_x / total_weight;
     
     if(all.power_t == 0.5) {
-      if(all.adaptive) norm /= sqrt(avg_sq_norm);
-      else norm /= avg_sq_norm;
+      if(all.adaptive) nd.norm /= sqrt(avg_sq_norm);
+      else nd.norm /= avg_sq_norm;
     } else {
       float power_t_norm = 1.f - (all.adaptive ? all.power_t : 0.f);
-      norm *= powf(avg_sq_norm,-power_t_norm);
+      nd.norm *= powf(avg_sq_norm,-power_t_norm);
     }
   }
   
-  return norm;
+  return nd.norm;
 }
 
 void local_predict(vw& all, example* ec)
@@ -581,7 +538,7 @@ void predict(vw& all, example* ex)
 void save_load_regressor(vw& all, io_buf& model_file, bool read, bool text)
 {
   uint32_t length = 1 << all.num_bits;
-  uint32_t stride = all.stride;
+  uint32_t stride = all.reg.stride;
   int c = 0;
   uint32_t i = 0;
   size_t brw = 1;
@@ -674,7 +631,7 @@ void save_load_online_state(vw& all, io_buf& model_file, bool read, bool text)
 			    buff, text_len, text);
 
   uint32_t length = 1 << all.num_bits;
-  uint32_t stride = all.stride;
+  uint32_t stride = all.reg.stride;
   int c = 0;
   uint32_t i = 0;
   size_t brw = 1;
@@ -737,7 +694,7 @@ void save_load(void* data, io_buf& model_file, bool read, bool text)
       if(all->adaptive && all->initial_t > 0)
 	{
 	  uint32_t length = 1 << all->num_bits;
-	  uint32_t stride = all->stride;
+	  uint32_t stride = all->reg.stride;
 	  for (size_t j = 1; j < stride*length; j+=stride)
 	    {
 	      all->reg.weight_vector[j] = all->initial_t;   //for adaptive update, we interpret initial_t as previously seeing initial_t fake datapoints, all with squared gradient=1
diff --git a/vowpalwabbit/gd.h b/vowpalwabbit/gd.h
index 60a05857..ad655f40 100644
--- a/vowpalwabbit/gd.h
+++ b/vowpalwabbit/gd.h
@@ -20,11 +20,7 @@ namespace GD{
 void print_result(int f, float res, v_array<char> tag);
 void print_audit_features(regressor &reg, example* ec, size_t offset);
 float finalize_prediction(vw&, float ret);
-float single_quad_weight(weight* weights, feature& page_feature, feature* offer_feature, size_t mask);
-void quadratic(v_array<feature> &f, const v_array<feature> &first_part, 
-               const v_array<feature> &second_part, size_t thread_mask);
 void print_audit_features(vw&, example* ec);
-void train(weight* weights, const v_array<feature> &features, float update);
 void train_one_example(regressor& r, example* ex);
 void train_offset_example(regressor& r, example* ex, size_t offset);
 void compute_update(example* ec);
@@ -34,35 +30,55 @@ void train_one_example_single_thread(regressor& r, example* ex);
  void save_load_regressor(vw& all, io_buf& model_file, bool read, bool text);
 void output_and_account_example(example* ec);
 
-template <float (*T)(vw&,float,uint32_t)>
-float inline_predict(vw& all, example* &ec)
-{
-  float prediction = all.p->lp->get_initial(ec->ld);
+ template <void (*T)(vw&, void*, float, uint32_t)>
+   void foreach_feature(vw& all, void* dat, feature* begin, feature* end, uint32_t offset=0, float mult=1.)
+   {
+     for (feature* f = begin; f!= end; f++)
+       T(all, dat, mult*f->x, f->weight_index + offset);
+   }
+ 
+ template <void (*T)(vw&, void*, float, uint32_t)>
+   void foreach_feature(vw& all, example* ec, void* dat)
+   {
+     uint32_t offset = ec->ft_offset;
 
-  for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) 
-    prediction += sd_add<T>(all, ec->atomics[*i].begin, ec->atomics[*i].end, ec->ft_offset);
-
-  for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) {
-    if (ec->atomics[(int)(*i)[0]].size() > 0) {
-      v_array<feature> temp = ec->atomics[(int)(*i)[0]];
-      for (; temp.begin != temp.end; temp.begin++)
-        prediction += one_pf_quad_predict<T>(all,*temp.begin,ec->atomics[(int)(*i)[1]], ec->ft_offset);
-    }
-  }
-
-  for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) {
-    if ((ec->atomics[(int)(*i)[0]].size() == 0) || (ec->atomics[(int)(*i)[1]].size() == 0) || (ec->atomics[(int)(*i)[2]].size() == 0)) { continue; }
-    v_array<feature> temp1 = ec->atomics[(int)(*i)[0]];
-    for (; temp1.begin != temp1.end; temp1.begin++) {
-      v_array<feature> temp2 = ec->atomics[(int)(*i)[1]];
-      for (; temp2.begin != temp2.end; temp2.begin++) {
-        prediction += one_pf_cubic_predict<T>(all,*temp1.begin,*temp2.begin,ec->atomics[(int)(*i)[2]], ec->ft_offset);
-      }
-    }
-  }
-  
-  return prediction;
-}
+     for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) 
+       foreach_feature<T>(all, dat, ec->atomics[*i].begin, ec->atomics[*i].end, offset);
+     
+     for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) {
+       if (ec->atomics[(int)(*i)[0]].size() > 0) {
+	 v_array<feature> temp = ec->atomics[(int)(*i)[0]];
+	 for (; temp.begin != temp.end; temp.begin++)
+	   {
+	     uint32_t halfhash = quadratic_constant * (temp.begin->weight_index + offset);
+	     foreach_feature<T>(all, dat, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, 
+				halfhash + offset, temp.begin->x);
+	   }
+       }
+     }
+     
+     for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) {
+       if ((ec->atomics[(int)(*i)[0]].size() == 0) || (ec->atomics[(int)(*i)[1]].size() == 0) || (ec->atomics[(int)(*i)[2]].size() == 0)) { continue; }
+       v_array<feature> temp1 = ec->atomics[(int)(*i)[0]];
+       for (; temp1.begin != temp1.end; temp1.begin++) {
+	 v_array<feature> temp2 = ec->atomics[(int)(*i)[1]];
+	 for (; temp2.begin != temp2.end; temp2.begin++) {
+	   
+	   uint32_t halfhash = cubic_constant2 * (cubic_constant * (temp1.begin->weight_index + offset) + temp2.begin->weight_index + offset);
+	   float mult = temp1.begin->x * temp2.begin->x;
+	   return foreach_feature<T>(all, dat, ec->atomics[(int)(*i)[2]].begin, ec->atomics[(int)(*i)[2]].end, halfhash + offset, mult);
+	 }
+       }
+     }
+   }
+ 
+ template <void (*T)(vw&,void*, float,uint32_t)>
+   float inline_predict(vw& all, example* ec)
+   {
+     float prediction = all.p->lp->get_initial(ec->ld);
+     foreach_feature<T>(all, ec, &prediction);
+     return prediction;
+   }
 }
 
 #endif
diff --git a/vowpalwabbit/gd_mf.cc b/vowpalwabbit/gd_mf.cc
index a9c6920d..b2ebb7db 100644
--- a/vowpalwabbit/gd_mf.cc
+++ b/vowpalwabbit/gd_mf.cc
@@ -35,8 +35,7 @@ float mf_inline_predict(vw& all, example* &ec)
   float linear_prediction = 0;
   // linear terms
   for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) 
-    linear_prediction += sd_add<vec_add>(all, ec->atomics[*i].begin, ec->atomics[*i].end);
-    //linear_prediction += sd_add(weights,mask,ec->atomics[*i].begin, ec->atomics[*i].end);
+    GD::foreach_feature<vec_add>(all, &linear_prediction, ec->atomics[*i].begin, ec->atomics[*i].end);
 
   // store constant + linear prediction
   // note: constant is now automatically added
@@ -54,11 +53,13 @@ float mf_inline_predict(vw& all, example* &ec)
 	      // x_l * l^k
 	      // l^k is from index+1 to index+all.rank
 	      //float x_dot_l = sd_offset_add(weights, mask, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, k);
-              float x_dot_l = sd_add<vec_add>(all, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, k);
+              float x_dot_l = 0;
+	      GD::foreach_feature<vec_add>(all, &x_dot_l, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, k);
 	      // x_r * r^k
 	      // r^k is from index+all.rank+1 to index+2*all.rank
 	      //float x_dot_r = sd_offset_add(weights, mask, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k+all.rank);
-              float x_dot_r = sd_add<vec_add>(all, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k+all.rank);
+              float x_dot_r = 0;
+	      GD::foreach_feature<vec_add>(all, &x_dot_r, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k+all.rank);
 
 	      prediction += x_dot_l * x_dot_r;
 
@@ -82,7 +83,7 @@ float mf_inline_predict(vw& all, example* &ec)
 void mf_inline_train(vw& all, example* &ec, float update)
 {
       weight* weights = all.reg.weight_vector;
-      size_t mask = all.weight_mask;
+      size_t mask = all.reg.weight_mask;
       label_data* ld = (label_data*)ec->ld;
 
       // use final prediction to get update size
@@ -132,7 +133,7 @@ void mf_inline_train(vw& all, example* &ec, float update)
 void mf_print_offset_features(vw& all, example* &ec, size_t offset)
 {
   weight* weights = all.reg.weight_vector;
-  size_t mask = all.weight_mask;
+  size_t mask = all.reg.weight_mask;
   for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++) 
     if (ec->audit_features[*i].begin != ec->audit_features[*i].end)
       for (audit_data *f = ec->audit_features[*i].begin; f != ec->audit_features[*i].end; f++)
@@ -211,13 +212,13 @@ float mf_predict(vw& all, example* ex)
 {
   vw* all = (vw*)d;
   uint32_t length = 1 << all->num_bits;
-  uint32_t stride = all->stride;
+  uint32_t stride = all->reg.stride;
 
   if(read)
     {
       initialize_regressor(*all);
       if(all->random_weights)
-	for (size_t j = 0; j < all->stride*length; j++)
+	for (size_t j = 0; j < all->reg.stride*length; j++)
 	  all->reg.weight_vector[j] = (float) (0.1 * frand48()); 
     }
 
diff --git a/vowpalwabbit/global_data.cc b/vowpalwabbit/global_data.cc
index 1b3353e4..f05927d0 100644
--- a/vowpalwabbit/global_data.cc
+++ b/vowpalwabbit/global_data.cc
@@ -249,7 +249,7 @@ vw::vw()
 
   bfgs = false;
   hessian_on = false;
-  stride = 1;
+  reg.stride = 1;
   num_bits = 18;
   default_bits = true;
   daemon = false;
diff --git a/vowpalwabbit/global_data.h b/vowpalwabbit/global_data.h
index e9913df8..29a7a926 100644
--- a/vowpalwabbit/global_data.h
+++ b/vowpalwabbit/global_data.h
@@ -110,6 +110,8 @@ typedef float weight;
 
 struct regressor {
   weight* weight_vector;
+  size_t weight_mask; // (stride*(1 << num_bits) -1)
+  uint32_t stride;
 };
 
 struct vw {
@@ -161,7 +163,6 @@ struct vw {
 
   uint32_t base_learner_nb_w; //this stores the current number of "weight vector" required by the based learner, which is used to compute offsets when composing reductions
 
-  uint32_t stride;
   int stdout_fileno;
 
   std::string per_feature_regularizer_input;
@@ -181,7 +182,6 @@ struct vw {
   size_t numpasses;
   size_t passes_complete;
   size_t parse_mask; // 1 << num_bits -1
-  size_t weight_mask; // (stride*(1 << num_bits) -1)
   std::vector<std::string> pairs; // pairs of features to cross.
   std::vector<std::string> triples; // triples of features to cross.
   bool ignore_some;
diff --git a/vowpalwabbit/lda_core.cc b/vowpalwabbit/lda_core.cc
index 5a430303..36aa6493 100644
--- a/vowpalwabbit/lda_core.cc
+++ b/vowpalwabbit/lda_core.cc
@@ -457,7 +457,7 @@ v_array<float> old_gamma;
 	  feature *f = ec->atomics[*i].begin;
 	  for (; f != ec->atomics[*i].end; f++)
 	    {
-	      float* u_for_w = &weights[(f->weight_index&all.weight_mask)+all.lda+1];
+	      float* u_for_w = &weights[(f->weight_index&all.reg.weight_mask)+all.lda+1];
 	      float c_w = find_cw(all, u_for_w,v);
 	      xc_w = c_w * f->x;
               score += -f->x*log(c_w);
@@ -507,7 +507,7 @@ size_t next_pow2(size_t x) {
   lda* l = (lda*)d;
   vw* all = l->all;
   uint32_t length = 1 << all->num_bits;
-  uint32_t stride = all->stride;
+  uint32_t stride = all->reg.stride;
   
   if (read)
     {
@@ -583,10 +583,10 @@ size_t next_pow2(size_t x) {
 
   for (size_t k = 0; k < all->lda; k++)
     total_lambda.push_back(0.f);
-  size_t stride = all->stride;
+  size_t stride = all->reg.stride;
   weight* weights = reg.weight_vector;
 
-  for (size_t i =0; i <= all->weight_mask;i+=stride)
+  for (size_t i =0; i <= all->reg.weight_mask;i+=stride)
     for (size_t k = 0; k < all->lda; k++)
       total_lambda[k] += weights[i+k];
 
@@ -647,7 +647,7 @@ size_t next_pow2(size_t x) {
 	  if (last_weight_index == s->f.weight_index)
 	    continue;
 	  last_weight_index = s->f.weight_index;
-	  float* weights_for_w = &(weights[s->f.weight_index & all->weight_mask]);
+	  float* weights_for_w = &(weights[s->f.weight_index & all->reg.weight_mask]);
           float decay = fmin(1.0, exp(decay_levels.end[-2] - decay_levels.end[(int)(-1-example_t+weights_for_w[all->lda])]));
 	  float* u_for_w = weights_for_w + all->lda+1;
 
@@ -679,7 +679,7 @@ size_t next_pow2(size_t x) {
 	  while(next <= &sorted_features.back() && next->f.weight_index == s->f.weight_index)
 	    next++;
 
-	  float* word_weights = &(weights[s->f.weight_index & all->weight_mask]);
+	  float* word_weights = &(weights[s->f.weight_index & all->reg.weight_mask]);
 	  for (size_t k = 0; k < all->lda; k++) {
 	    float new_value = minuseta*word_weights[k];
 	    word_weights[k] = new_value;
@@ -687,7 +687,7 @@ size_t next_pow2(size_t x) {
 
 	  for (; s != next; s++) {
 	    float* v_s = &v[s->document*all->lda];
-	    float* u_for_w = &weights[(s->f.weight_index & all->weight_mask) + all->lda + 1];
+	    float* u_for_w = &weights[(s->f.weight_index & all->reg.weight_mask) + all->lda + 1];
 	    float c_w = eta*find_cw(*all, u_for_w, v_s)*s->f.x;
 	    for (size_t k = 0; k < all->lda; k++) {
 	      float new_value = u_for_w[k]*v_s[k]*c_w;
@@ -704,7 +704,7 @@ size_t next_pow2(size_t x) {
       if (parser_done(all->p))
 	{
 	  for (size_t i = 0; i < all->length(); i++) {
-	    weight* weights_for_w = & (weights[i*all->stride]);
+	    weight* weights_for_w = & (weights[i*all->reg.stride]);
             float decay = fmin(1.0, exp(decay_levels.last() - decay_levels.end[(int)(-1-example_t+weights_for_w[all->lda])]));
 	    for (size_t k = 0; k < all->lda; k++) {
 	      weights_for_w[k] *= decay;
@@ -746,7 +746,7 @@ learner setup(vw&all, std::vector<std::string>&opts, po::variables_map& vm)
 
   all.p->sort_features = true;
   float temp = ceilf(logf((float)(all.lda*2+1)) / logf (2.f));
-  all.stride = ((size_t)1) << (int) temp;
+  all.reg.stride = ((size_t)1) << (int) temp;
   all.random_weights = true;
   all.add_constant = false;
 
diff --git a/vowpalwabbit/nn.cc b/vowpalwabbit/nn.cc
index 8417a8f4..1c2be65c 100644
--- a/vowpalwabbit/nn.cc
+++ b/vowpalwabbit/nn.cc
@@ -194,7 +194,7 @@ CONVERSE: // That's right, I'm using goto.  So sue me.
             float sigmah = 
               n.output_layer.atomics[nn_output_namespace][i].x / dropscale;
             float sigmahprime = dropscale * (1.0f - sigmah * sigmah);
-            float nu = all.reg.weight_vector[n.output_layer.atomics[nn_output_namespace][i].weight_index & all.weight_mask];
+            float nu = all.reg.weight_vector[n.output_layer.atomics[nn_output_namespace][i].weight_index & all.reg.weight_mask];
             float gradhw = 0.5f * nu * gradient * sigmahprime;
 
             ld->label = GD::finalize_prediction (all, hidden_units[i-1] - gradhw);
@@ -368,7 +368,7 @@ CONVERSE: // That's right, I'm using goto.  So sue me.
     n->base = all.l;
 
     all.base_learner_nb_w *= (n->inpass) ? n->k + 1 : n->k;
-    n->increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.stride;
+    n->increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.reg.stride;
 
     bool initialize = true;
 
@@ -376,15 +376,15 @@ CONVERSE: // That's right, I'm using goto.  So sue me.
 
     memset (&n->output_layer, 0, sizeof (n->output_layer));
     n->output_layer.indices.push_back(nn_output_namespace);
-    feature output = {1., nn_constant*all.stride};
+    feature output = {1., nn_constant*all.reg.stride};
     n->output_layer.atomics[nn_output_namespace].push_back(output);
-    initialize &= (all.reg.weight_vector[n->output_layer.atomics[nn_output_namespace][0].weight_index & all.weight_mask] == 0);
+    initialize &= (all.reg.weight_vector[n->output_layer.atomics[nn_output_namespace][0].weight_index & all.reg.weight_mask] == 0);
 
     for (unsigned int i = 0; i < n->k; ++i)
       {
-        output.weight_index += all.stride;
+        output.weight_index += all.reg.stride;
         n->output_layer.atomics[nn_output_namespace].push_back(output);
-        initialize &= (all.reg.weight_vector[n->output_layer.atomics[nn_output_namespace][i+1].weight_index & all.weight_mask] == 0);
+        initialize &= (all.reg.weight_vector[n->output_layer.atomics[nn_output_namespace][i+1].weight_index & all.reg.weight_mask] == 0);
       }
 
     n->output_layer.num_features = n->k + 1;
@@ -399,7 +399,7 @@ CONVERSE: // That's right, I'm using goto.  So sue me.
       float sqrtk = sqrt ((float)n->k);
       for (unsigned int i = 0; i <= n->k; ++i)
         {
-          weight* w = &all.reg.weight_vector[n->output_layer.atomics[nn_output_namespace][i].weight_index & all.weight_mask];
+          weight* w = &all.reg.weight_vector[n->output_layer.atomics[nn_output_namespace][i].weight_index & all.reg.weight_mask];
 
           w[0] = (float) (frand48 () - 0.5) / sqrtk;
 
@@ -410,11 +410,11 @@ CONVERSE: // That's right, I'm using goto.  So sue me.
 
       // hidden biases
 
-      unsigned int weight_index = constant * all.stride;
+      unsigned int weight_index = constant * all.reg.stride;
 
       for (unsigned int i = 0; i < n->k; ++i)
         {
-          all.reg.weight_vector[weight_index & all.weight_mask] = (float) (frand48 () - 0.5);
+          all.reg.weight_vector[weight_index & all.reg.weight_mask] = (float) (frand48 () - 0.5);
           weight_index += n->increment;
         }
     }
diff --git a/vowpalwabbit/oaa.cc b/vowpalwabbit/oaa.cc
index 829c7d98..11bf4361 100644
--- a/vowpalwabbit/oaa.cc
+++ b/vowpalwabbit/oaa.cc
@@ -260,7 +260,7 @@ namespace OAA {
     data->all = &all;
     *(all.p->lp) = mc_label_parser;
     all.base_learner_nb_w *= data->k;
-    data->increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.stride;
+    data->increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.reg.stride;
     data->total_increment = data->increment*(data->k-1);
     data->base = all.l;
     learner l = {data, drive, learn, finish, all.l.sl};
diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc
index ed560d30..63833428 100644
--- a/vowpalwabbit/parse_args.cc
+++ b/vowpalwabbit/parse_args.cc
@@ -217,7 +217,7 @@ vw* parse_args(int argc, char *argv[])
       throw exception();
     }
 
-  all->stride = 4; //use stride of 4 for default invariant normalized adaptive updates
+  all->reg.stride = 4; //use stride of 4 for default invariant normalized adaptive updates
   //if we are doing matrix factorization, or user specified anything in sgd,adaptive,invariant,normalized, we turn off default update rules and use whatever user specified
   if( all->rank > 0 || !all->training || ( ( vm.count("sgd") || vm.count("adaptive") || vm.count("invariant") || vm.count("normalized") ) && !vm.count("exact_adaptive_norm")) )
   {
@@ -225,12 +225,12 @@ vw* parse_args(int argc, char *argv[])
     all->invariant_updates = all->training && vm.count("invariant");
     all->normalized_updates = all->training && (vm.count("normalized") && all->rank == 0);
 
-    all->stride = 1;
+    all->reg.stride = 1;
 
-    if( all->adaptive ) all->stride *= 2;
+    if( all->adaptive ) all->reg.stride *= 2;
     else all->normalized_idx = 1; //store per feature norm at 1 index offset from weight value instead of 2
 
-    if( all->normalized_updates ) all->stride *= 2;
+    if( all->normalized_updates ) all->reg.stride *= 2;
 
     if(!vm.count("learning_rate") && !vm.count("l") && !(all->adaptive && all->normalized_updates))
       all->eta = 10; //default learning rate to 10 for non default update rule
@@ -407,7 +407,7 @@ vw* parse_args(int argc, char *argv[])
   if (all->rank > 0) {
     // store linear + 2*rank weights per index, round up to power of two
     float temp = ceilf(logf((float)(all->rank*2+1)) / logf (2.f));
-    all->stride = 1 << (int) temp;
+    all->reg.stride = 1 << (int) temp;
     all->random_weights = true;
 
     if ( vm.count("adaptive") )
diff --git a/vowpalwabbit/parse_example.cc b/vowpalwabbit/parse_example.cc
index 843d96c0..a3c581b8 100644
--- a/vowpalwabbit/parse_example.cc
+++ b/vowpalwabbit/parse_example.cc
@@ -117,12 +117,12 @@ public:
       // maybeFeature --> 'String' FeatureValue
       substring feature_name=read_name();
       v = cur_channel_v * featureValue();
-      if(v == 0) return; //dont add 0 valued features to list of features
       size_t word_hash;
       if (feature_name.end != feature_name.begin)
 	word_hash = (p->hasher(feature_name,(uint32_t)channel_hash)) & mask;
       else
 	word_hash = channel_hash + anon++;
+      if(v == 0) return; //dont add 0 valued features to list of features
       feature f = {v,(uint32_t)word_hash};
       ae->sum_feat_sq[index] += v*v;
       ae->atomics[index].push_back(f);
diff --git a/vowpalwabbit/parse_regressor.cc b/vowpalwabbit/parse_regressor.cc
index 3b2a95cd..f0825823 100644
--- a/vowpalwabbit/parse_regressor.cc
+++ b/vowpalwabbit/parse_regressor.cc
@@ -27,8 +27,8 @@ using namespace std;
 void initialize_regressor(vw& all)
 {
   size_t length = ((size_t)1) << all.num_bits;
-  all.weight_mask = (all.stride * length) - 1;
-  all.reg.weight_vector = (weight *)calloc(all.stride*length, sizeof(weight));
+  all.reg.weight_mask = (all.reg.stride * length) - 1;
+  all.reg.weight_vector = (weight *)calloc(all.reg.stride*length, sizeof(weight));
   if (all.reg.weight_vector == NULL)
     {
       cerr << all.program_name << ": Failed to allocate weight array with " << all.num_bits << " bits: try decreasing -b <bits>" << endl;
@@ -37,10 +37,10 @@ void initialize_regressor(vw& all)
   if (all.random_weights)
     {
       for (size_t j = 0; j < length; j++)
-	all.reg.weight_vector[j*all.stride] = (float)(frand48() - 0.5);
+	all.reg.weight_vector[j*all.reg.stride] = (float)(frand48() - 0.5);
     }
   if (all.initial_weight != 0.)
-    for (size_t j = 0; j < all.stride*length; j+=all.stride)
+    for (size_t j = 0; j < all.reg.stride*length; j+=all.reg.stride)
       all.reg.weight_vector[j] = all.initial_weight;
 }
 
diff --git a/vowpalwabbit/parser.cc b/vowpalwabbit/parser.cc
index 5f2eeb3a..76c82958 100644
--- a/vowpalwabbit/parser.cc
+++ b/vowpalwabbit/parser.cc
@@ -459,10 +459,10 @@ void parse_source_args(vw& all, po::variables_map& vm, bool quiet, size_t passes
 #else
 	  // weights will be shared across processes, accessible to children
 	  float* shared_weights = 
-	    (float*)mmap(0,all.stride * all.length() * sizeof(float), 
+	    (float*)mmap(0,all.reg.stride * all.length() * sizeof(float), 
 			 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
 
-	  size_t float_count = all.stride * all.length();
+	  size_t float_count = all.reg.stride * all.length();
 	  weight* dest = shared_weights;
 	  memcpy(dest, all.reg.weight_vector, float_count*sizeof(float));
 	  free(all.reg.weight_vector);
@@ -763,9 +763,9 @@ void setup_example(vw& all, example* ae)
     ae->total_sum_feat_sq++;
   }
   
-  if(all.stride != 1) //make room for per-feature information.
+  if(all.reg.stride != 1) //make room for per-feature information.
     {
-      uint32_t stride = all.stride;
+      uint32_t stride = all.reg.stride;
       for (unsigned char* i = ae->indices.begin; i != ae->indices.end; i++)
 	for(feature* j = ae->atomics[*i].begin; j != ae->atomics[*i].end; j++)
 	  j->weight_index = j->weight_index*stride;
@@ -1143,4 +1143,4 @@ void end_parser(vw& all)
   #endif
   release_parser_datastructures(all);
 }
-}
-\ No newline at end of file
+}
diff --git a/vowpalwabbit/searn.cc b/vowpalwabbit/searn.cc
index 81dc5a7c..9a64244d 100644
--- a/vowpalwabbit/searn.cc
+++ b/vowpalwabbit/searn.cc
@@ -356,7 +356,7 @@ namespace Searn
       {
         feature* end = ec->atomics[*i].end;
         for (feature* f = ec->atomics[*i].begin; f!= end; f++) {
-          cerr << "\t" << f->weight_index << ":" << f->x << ":" << all.reg.weight_vector[f->weight_index & all.weight_mask];
+          cerr << "\t" << f->weight_index << ":" << f->x << ":" << all.reg.weight_vector[f->weight_index & all.reg.weight_mask];
         }
       }
     cerr << endl;
@@ -808,7 +808,7 @@ namespace Searn
     VW::cmd_string_replace_value(all.options_from_file,"--searn_total_nb_policies", ss2.str());
 
     all.base_learner_nb_w *= s->total_number_of_policies;
-    s->increment = ((uint32_t)all.length() / all.base_learner_nb_w) * all.stride;
+    s->increment = ((uint32_t)all.length() / all.base_learner_nb_w) * all.reg.stride;
     //cerr << "searn increment = " << s->increment << endl;
     
     learner l = {s, drive, learn, finish, all.l.sl};
@@ -2041,7 +2041,7 @@ namespace ImperativeSearn {
     ss2 << srn->total_number_of_policies; VW::cmd_string_replace_value(all.options_from_file,"--searn_total_nb_policies",   ss2.str());
 
     all.base_learner_nb_w *= srn->total_number_of_policies;
-    srn->increment = ((uint32_t)all.length() / all.base_learner_nb_w) * all.stride;
+    srn->increment = ((uint32_t)all.length() / all.base_learner_nb_w) * all.reg.stride;
 
     if (task_string.compare("sequence") == 0) {
       searn_task* mytask = (searn_task*)calloc(1, sizeof(searn_task));
diff --git a/vowpalwabbit/searn_sequencetask.cc b/vowpalwabbit/searn_sequencetask.cc
index bad32419..5446b3fc 100644
--- a/vowpalwabbit/searn_sequencetask.cc
+++ b/vowpalwabbit/searn_sequencetask.cc
@@ -125,7 +125,7 @@ namespace SequenceTask {
     for (size_t i=0; i < hinfo.length; i++)
       constant_pow_length *= quadratic_constant;
 
-    increment = ((uint32_t)all.length() * all.stride + 132489)/seq_max_action;
+    increment = ((uint32_t)all.length() * all.reg.stride + 132489)/seq_max_action;
 
     return true;
   }
diff --git a/vowpalwabbit/sparse_dense.h b/vowpalwabbit/sparse_dense.h
index 71762264..4de5c132 100644
--- a/vowpalwabbit/sparse_dense.h
+++ b/vowpalwabbit/sparse_dense.h
@@ -16,40 +16,16 @@ inline float trunc_weight(float w, float gravity){
   return (gravity < fabsf(w)) ? w - sign(w) * gravity : 0.f;
 }
 
-template <float (*T)(vw&,float,uint32_t)>
-float sd_add(vw& all, feature* begin, feature* end, uint32_t offset=0, float mult = 1.)
-{
-  float ret = 0.;
-  for (feature* f = begin; f!= end; f++)
-    ret += T(all, mult*f->x, f->weight_index + offset);
-  return ret;
+inline void vec_add(vw& all, void* p, float fx, uint32_t fi) {
+  *(float*)p += all.reg.weight_vector[fi & all.reg.weight_mask] * fx;
 }
 
-template <float (*T)(vw&,float,uint32_t)>
-float one_pf_quad_predict(vw& all, feature& f, v_array<feature> cross_features, uint32_t offset=0)
-{
-  uint32_t halfhash = quadratic_constant * (f.weight_index + offset);
-  return sd_add<T>(all, cross_features.begin, cross_features.end, halfhash + offset, f.x);
+inline void vec_add_trunc(vw& all, void* p, float fx, uint32_t fi) {
+  *(float*)p += trunc_weight(all.reg.weight_vector[fi & all.reg.weight_mask], (float)all.sd->gravity) * fx;
 }
 
-template <float (*T)(vw&,float,uint32_t)>
-float one_pf_cubic_predict(vw& all, feature& f0, feature& f1, v_array<feature> cross_features, uint32_t offset=0)
-{
-  uint32_t halfhash = cubic_constant2 * (cubic_constant * (f0.weight_index + offset) + f1.weight_index + offset);
-  float mult = f0.x * f1.x;
-  return sd_add<T>(all, cross_features.begin, cross_features.end, halfhash + offset, mult);
-}
-
-inline float vec_add(vw& all, float fx, uint32_t fi) {
-  return all.reg.weight_vector[fi & all.weight_mask] * fx;
-}
-
-inline float vec_add_trunc(vw& all, float fx, uint32_t fi) {
-  return trunc_weight(all.reg.weight_vector[fi & all.weight_mask], (float)all.sd->gravity) * fx;
-}
-
-inline float vec_add_rescale(vw& all, float fx, uint32_t fi) {
-  weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+inline void vec_add_rescale(vw& all, void* p, float fx, uint32_t fi) {
+  weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
   float x_abs = fabs(fx);
   if( x_abs > w[all.normalized_idx] ) {// new scale discovered
     if( w[all.normalized_idx] > 0. ) {//If the normalizer is > 0 then rescale the weight so it's as if the new scale was the old scale.
@@ -58,11 +34,11 @@ inline float vec_add_rescale(vw& all, float fx, uint32_t fi) {
     }
     w[all.normalized_idx] = x_abs;
   }
-  return w[0] * fx;
+  *(float*)p += w[0] * fx;
 }
 
-inline float vec_add_trunc_rescale(vw& all, float fx, uint32_t fi) {
-  weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+inline void vec_add_trunc_rescale(vw& all, void* p, float fx, uint32_t fi) {
+  weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
   float x_abs = fabs(fx);
   if( x_abs > w[all.normalized_idx] ) {
     if( w[all.normalized_idx] > 0. ) {
@@ -71,11 +47,11 @@ inline float vec_add_trunc_rescale(vw& all, float fx, uint32_t fi) {
     }
     w[all.normalized_idx] = x_abs;
   }
-  return trunc_weight(w[0], (float)all.sd->gravity) * fx;
+  *(float*)p += trunc_weight(w[0], (float)all.sd->gravity) * fx;
 }
 
-inline float vec_add_rescale_general(vw& all, float fx, uint32_t fi) {
-  weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+inline void vec_add_rescale_general(vw& all, void* p, float fx, uint32_t fi) {
+  weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
   float x_abs = fabs(fx);
   float power_t_norm = 1.f - (all.adaptive ? all.power_t : 0.f);
   if( x_abs > w[all.normalized_idx] ) {
@@ -85,11 +61,11 @@ inline float vec_add_rescale_general(vw& all, float fx, uint32_t fi) {
     }
     w[all.normalized_idx] = x_abs;
   }
-  return w[0] * fx;
+  *(float*)p += w[0] * fx;
 }
 
-inline float vec_add_trunc_rescale_general(vw& all, float fx, uint32_t fi) {
-  weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+inline void vec_add_trunc_rescale_general(vw& all, void* p, float fx, uint32_t fi) {
+  weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
   float x_abs = fabs(fx);
   float power_t_norm = 1.f - (all.adaptive ? all.power_t : 0.f);
   if( x_abs > w[all.normalized_idx] ) {
@@ -99,40 +75,9 @@ inline float vec_add_trunc_rescale_general(vw& all, float fx, uint32_t fi) {
     }
     w[all.normalized_idx] = x_abs;
   }
-  return trunc_weight(w[0], (float)all.sd->gravity) * fx;
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////
-
-template <void (*T)(vw&,float,uint32_t,float)>
-void sd_update(vw& all, feature* begin, feature* end, float update, uint32_t offset=0)
-{
-  for (feature* f = begin; f!= end; f++)
-    T(all, f->x, f->weight_index + offset, update);
+  *(float*)p += trunc_weight(w[0], (float)all.sd->gravity) * fx;
 }
 
-template <void (*T)(vw&,float,uint32_t,float)>
-void sd_quad_update(vw& all, feature& f, v_array<feature> cross_features, float update, uint32_t offset=0)
-{
-  size_t halfhash = quadratic_constant * (f.weight_index + offset);
-  sd_update<T>(all, cross_features.begin, cross_features.end, halfhash + offset, update * f.x);
-}
-
-template <void (*T)(vw&,float,uint32_t,float)>
-void sd_cubic_update(vw& all, feature& f0, feature& f1, v_array<feature> cross_features, float update, uint32_t offset=0)
-{
-  size_t halfhash = cubic_constant2 * (cubic_constant * (f0.weight_index + offset) + f1.weight_index + offset);
-  sd_update<T>(all, cross_features.begin, cross_features.end, update * f0.x * f1.x, halfhash + offset);
-}
-
-inline void upd_add(vw& all, float fx, uint32_t fi, float update) {
-  all.reg.weight_vector[fi] += update * fx;
-}
-
-
 void sd_offset_update(weight* weights, size_t mask, feature* begin, feature* end, size_t offset, float update, float regularization);
 
-void quadratic(v_array<feature> &f, const v_array<feature> &first_part, 
-               const v_array<feature> &second_part, size_t thread_mask);
-
 #endif
diff --git a/vowpalwabbit/vw.h b/vowpalwabbit/vw.h
index a8dcdcb2..edf179f2 100644
--- a/vowpalwabbit/vw.h
+++ b/vowpalwabbit/vw.h
@@ -82,7 +82,7 @@ namespace VW {
 	void releaseFeatureSpace(primitive_feature_space* features, size_t len);
 	
   inline float get_weight(vw& all, uint32_t index) 
-  { return all.reg.weight_vector[(index * all.stride) & all.weight_mask];}
+  { return all.reg.weight_vector[(index * all.reg.stride) & all.reg.weight_mask];}
 
   inline uint32_t num_weights(vw& all) 
   { return (uint32_t)all.length();}
diff --git a/vowpalwabbit/vw_static.vcxproj b/vowpalwabbit/vw_static.vcxproj
index 3251b96e..7b951fd3 100644
--- a/vowpalwabbit/vw_static.vcxproj
+++ b/vowpalwabbit/vw_static.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|Win32">
@@ -234,9 +234,9 @@
     <IntDir>$(SolutionDir)$(PlatformName)\$(Configuration)\</IntDir>
   </PropertyGroup>
   <ItemGroup>
+    <ClInclude Include="autolink.h" />
     <ClInclude Include="accumulate.h" />
     <ClInclude Include="allreduce.h" />
-    <ClInclude Include="autolink.h" />
     <ClInclude Include="bfgs.h" />
     <ClInclude Include="binary.h" />
     <ClInclude Include="cache.h" />
@@ -275,9 +275,9 @@
     <ClInclude Include="wap.h" />
   </ItemGroup>
   <ItemGroup>
+    <ClCompile Include="autolink.cc" />
     <ClCompile Include="accumulate.cc" />
     <ClCompile Include="allreduce.cc" />
-    <ClCompile Include="autolink.cc" />
     <ClCompile Include="beam.cc" />
     <ClCompile Include="binary.cc" />
     <ClCompile Include="bfgs.cc" />
@@ -314,4 +314,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
-\ No newline at end of file
+</Project>
diff --git a/vowpalwabbit/wap.cc b/vowpalwabbit/wap.cc
index 8c5f47dc..c596b5b2 100644
--- a/vowpalwabbit/wap.cc
+++ b/vowpalwabbit/wap.cc
@@ -293,7 +293,7 @@ namespace WAP {
 
     all.sd->k = (uint32_t)nb_actions;
     all.base_learner_nb_w *= nb_actions;
-    w->increment = (uint32_t)((all.length()/ all.base_learner_nb_w) * all.stride);
+    w->increment = (uint32_t)((all.length()/ all.base_learner_nb_w) * all.reg.stride);
 
     learner l = {w, drive, learn, finish, all.l.sl};
     w->base = all.l;
author	niruc <niruc@NIRUC-M6600.redmond.corp.microsoft.com>	2013-04-05 01:17:14 +0400
committer	niruc <niruc@NIRUC-M6600.redmond.corp.microsoft.com>	2013-04-05 01:17:14 +0400
commit	571f1f36679811b1c170a76fb0487a5c6dba177d (patch)
tree	e3c24b4f49936f5d1533369786803a7f66fa23cc
parent	605dbf1f7a418a41197336aa296d6d27be1f935d (diff)
parent	32baa122d2ba75158ddb3f2103710cb456da3d1b (diff)