1 files changed, 84 insertions, 77 deletions
diff --git a/vowpalwabbit/gd_mf.cc b/vowpalwabbit/gd_mf.cc
index 1a9a6335..4e91aeba 100644
--- a/vowpalwabbit/gd_mf.cc
+++ b/vowpalwabbit/gd_mf.cc
@@ -26,10 +26,12 @@ using namespace LEARNER;
 namespace GDMF {
   struct gdmf {
     vw* all;
+    uint32_t rank;
   };
 
-void mf_print_offset_features(vw& all, example& ec, size_t offset)
+void mf_print_offset_features(gdmf& d, example& ec, size_t offset)
 {
+  vw& all = *d.all;
   weight* weights = all.reg.weight_vector;
   size_t mask = all.reg.weight_mask;
   for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) 
@@ -53,7 +55,7 @@ void mf_print_offset_features(vw& all, example& ec, size_t offset)
     if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0)
       {
 	/* print out nsk^feature:hash:value:weight:nsk^feature^:hash:value:weight:prod_weights */
-	for (size_t k = 1; k <= all.rank; k++)
+	for (size_t k = 1; k <= d.rank; k++)
 	  {
 	    for (audit_data* f = ec.audit_features[(int)(*i)[0]].begin; f!= ec.audit_features[(int)(*i)[0]].end; f++)
 	      for (audit_data* f2 = ec.audit_features[(int)(*i)[1]].begin; f2!= ec.audit_features[(int)(*i)[1]].end; f2++)
@@ -62,11 +64,11 @@ void mf_print_offset_features(vw& all, example& ec, size_t offset)
 		       <<"(" << ((f->weight_index + offset +k) & mask)  << ")" << ':' << f->x;
 		  cout << ':' << weights[(f->weight_index + offset + k) & mask];
 		  
-		  cout << ':' << f2->space << k << '^' << f2->feature << ':' << ((f2->weight_index+k+all.rank)&mask) 
-		       <<"(" << ((f2->weight_index + offset +k+all.rank) & mask)  << ")" << ':' << f2->x;
-		  cout << ':' << weights[(f2->weight_index + offset + k+all.rank) & mask];
+		  cout << ':' << f2->space << k << '^' << f2->feature << ':' << ((f2->weight_index+k+d.rank)&mask) 
+		       <<"(" << ((f2->weight_index + offset +k+d.rank) & mask)  << ")" << ':' << f2->x;
+		  cout << ':' << weights[(f2->weight_index + offset + k+d.rank) & mask];
 		  
-		  cout << ':' <<  weights[(f->weight_index + offset + k) & mask] * weights[(f2->weight_index + offset + k + all.rank) & mask];
+		  cout << ':' <<  weights[(f->weight_index + offset + k) & mask] * weights[(f2->weight_index + offset + k + d.rank) & mask];
 		}
 	  }
       }
@@ -77,17 +79,25 @@ void mf_print_offset_features(vw& all, example& ec, size_t offset)
   cout << endl;
 }
 
-void mf_print_audit_features(vw& all, example& ec, size_t offset)
+void mf_print_audit_features(gdmf& d, example& ec, size_t offset)
 {
-  print_result(all.stdout_fileno,ec.pred.scalar,-1,ec.tag);
-  mf_print_offset_features(all, ec, offset);
+  print_result(d.all->stdout_fileno,ec.pred.scalar,-1,ec.tag);
+  mf_print_offset_features(d, ec, offset);
 }
 
-float mf_predict(vw& all, example& ec)
+float mf_predict(gdmf& d, example& ec)
 {
+  vw& all = *d.all;
   label_data& ld = ec.l.simple;
   float prediction = ld.initial;
 
+  for (vector<string>::iterator i = d.all->pairs.begin(); i != d.all->pairs.end();i++)
+    {
+      ec.num_features -= ec.atomics[(int)(*i)[0]].size() * ec.atomics[(int)(*i)[1]].size();
+      ec.num_features += ec.atomics[(int)(*i)[0]].size() * d.rank;
+      ec.num_features += ec.atomics[(int)(*i)[1]].size() * d.rank;
+    }
+
   // clear stored predictions
   ec.topic_predictions.erase();
 
@@ -107,18 +117,18 @@ float mf_predict(vw& all, example& ec)
     {
       if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0)
 	{
-	  for (uint32_t k = 1; k <= all.rank; k++)
+	  for (uint32_t k = 1; k <= d.rank; k++)
 	    {
 	      // x_l * l^k
-	      // l^k is from index+1 to index+all.rank
+	      // l^k is from index+1 to index+d.rank
 	      //float x_dot_l = sd_offset_add(weights, mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, k);
               float x_dot_l = 0.;
 	      GD::foreach_feature<float, GD::vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, x_dot_l, k);
 	      // x_r * r^k
-	      // r^k is from index+all.rank+1 to index+2*all.rank
-	      //float x_dot_r = sd_offset_add(weights, mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, k+all.rank);
+	      // r^k is from index+d.rank+1 to index+2*d.rank
+	      //float x_dot_r = sd_offset_add(weights, mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, k+d.rank);
               float x_dot_r = 0.;
-	      GD::foreach_feature<float,GD::vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, x_dot_r, k+all.rank);
+	      GD::foreach_feature<float,GD::vec_add>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, x_dot_r, k+d.rank);
 
 	      prediction += x_dot_l * x_dot_r;
 
@@ -146,7 +156,7 @@ float mf_predict(vw& all, example& ec)
     ec.loss = all.loss->getLoss(all.sd, ec.pred.scalar, ld.label) * ld.weight;
   
   if (all.audit)
-    mf_print_audit_features(all, ec, 0);
+    mf_print_audit_features(d, ec, 0);
   
   return ec.pred.scalar;
 }
@@ -158,55 +168,55 @@ void sd_offset_update(weight* weights, size_t mask, feature* begin, feature* end
     weights[(f->weight_index + offset) & mask] += update * f->x - regularization * weights[(f->weight_index + offset) & mask];
 }
 
-void mf_train(vw& all, example& ec)
-{
-      weight* weights = all.reg.weight_vector;
-      size_t mask = all.reg.weight_mask;
-      label_data& ld = ec.l.simple;
-
-      // use final prediction to get update size
+  void mf_train(gdmf& d, example& ec)
+  {
+    vw& all = *d.all;
+    weight* weights = all.reg.weight_vector;
+    size_t mask = all.reg.weight_mask;
+    label_data& ld = ec.l.simple;
+    
+    // use final prediction to get update size
       // update = eta_t*(y-y_hat) where eta_t = eta/(3*t^p) * importance weight
-      float eta_t = all.eta/pow(ec.example_t,all.power_t) / 3.f * ld.weight;
-      float update = all.loss->getUpdate(ec.pred.scalar, ld.label, eta_t, 1.); //ec.total_sum_feat_sq);
-
-      float regularization = eta_t * all.l2_lambda;
-
-      // linear update
-      for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) 
-	sd_offset_update(weights, mask, ec.atomics[*i].begin, ec.atomics[*i].end, 0, update, regularization);
-      
-      // quadratic update
-      for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) 
-	{
-	  if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0)
-	    {
-
-	      // update l^k weights
-	      for (size_t k = 1; k <= all.rank; k++)
-		{
-		  // r^k \cdot x_r
-		  float r_dot_x = ec.topic_predictions[2*k];
-		  // l^k <- l^k + update * (r^k \cdot x_r) * x_l
-		  sd_offset_update(weights, mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, k, update*r_dot_x, regularization);
-		}
-
-	      // update r^k weights
-	      for (size_t k = 1; k <= all.rank; k++)
-		{
-		  // l^k \cdot x_l
-		  float l_dot_x = ec.topic_predictions[2*k-1];
-		  // r^k <- r^k + update * (l^k \cdot x_l) * x_r
-		  sd_offset_update(weights, mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, k+all.rank, update*l_dot_x, regularization);
-		}
-
-	    }
-	}
-  if (all.triples.begin() != all.triples.end()) {
-    cerr << "cannot use triples in matrix factorization" << endl;
-    throw exception();
-  }
-}  
-
+    float eta_t = all.eta/pow(ec.example_t,all.power_t) / 3.f * ld.weight;
+    float update = all.loss->getUpdate(ec.pred.scalar, ld.label, eta_t, 1.); //ec.total_sum_feat_sq);
+    
+    float regularization = eta_t * all.l2_lambda;
+    
+    // linear update
+    for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++) 
+      sd_offset_update(weights, mask, ec.atomics[*i].begin, ec.atomics[*i].end, 0, update, regularization);
+    
+    // quadratic update
+    for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) 
+      {
+	if (ec.atomics[(int)(*i)[0]].size() > 0 && ec.atomics[(int)(*i)[1]].size() > 0)
+	  {
+	    
+	    // update l^k weights
+	    for (size_t k = 1; k <= d.rank; k++)
+	      {
+		// r^k \cdot x_r
+		float r_dot_x = ec.topic_predictions[2*k];
+		// l^k <- l^k + update * (r^k \cdot x_r) * x_l
+		sd_offset_update(weights, mask, ec.atomics[(int)(*i)[0]].begin, ec.atomics[(int)(*i)[0]].end, k, update*r_dot_x, regularization);
+	      }
+	    // update r^k weights
+	    for (size_t k = 1; k <= d.rank; k++)
+	      {
+		// l^k \cdot x_l
+		float l_dot_x = ec.topic_predictions[2*k-1];
+		// r^k <- r^k + update * (l^k \cdot x_l) * x_r
+		sd_offset_update(weights, mask, ec.atomics[(int)(*i)[1]].begin, ec.atomics[(int)(*i)[1]].end, k+d.rank, update*l_dot_x, regularization);
+	      }
+	    
+	  }
+      }
+    if (all.triples.begin() != all.triples.end()) {
+      cerr << "cannot use triples in matrix factorization" << endl;
+      throw exception();
+    }
+  }  
+  
   void save_load(gdmf& d, io_buf& model_file, bool read, bool text)
 {
   vw* all = d.all;
@@ -231,7 +241,7 @@ void mf_train(vw& all, example& ec)
       do 
 	{
 	  brw = 0;
-	  size_t K = all->rank*2+1;
+	  size_t K = d.rank*2+1;
 	  
 	  text_len = sprintf(buff, "%d ", i);
 	  brw += bin_text_read_write_fixed(model_file,(char *)&i, sizeof (i),
@@ -273,19 +283,16 @@ void mf_train(vw& all, example& ec)
   }
 
   void predict(gdmf& d, base_learner& base, example& ec)
-  {
-    vw* all = d.all;
- 
-    mf_predict(*all,ec);
+  { mf_predict(d,ec); 
   }
 
   void learn(gdmf& d, base_learner& base, example& ec)
   {
-    vw* all = d.all;
+    vw& all = *d.all;
  
-    predict(d, base, ec);
-    if (all->training && ec.l.simple.label != FLT_MAX)
-      mf_train(*all, ec);
+    mf_predict(d, ec);
+    if (all.training && ec.l.simple.label != FLT_MAX)
+      mf_train(d, ec);
   }
 
   base_learner* setup(vw& all, po::variables_map& vm)
@@ -294,16 +301,16 @@ void mf_train(vw& all, example& ec)
     opts.add_options()
       ("rank", po::value<uint32_t>(), "rank for matrix factorization.");
     vm = add_options(all, opts);
-    if(!vm.count("gdmf"))
+    if(!vm.count("rank"))
       return NULL;
-    else
-      all.rank = vm["gdmf"].as<uint32_t>();
 
     gdmf& data = calloc_or_die<gdmf>(); 
     data.all = &all;
+    data.rank = vm["rank"].as<uint32_t>();
 
+    *all.file_options << " --rank " << data.rank;
     // store linear + 2*rank weights per index, round up to power of two
-    float temp = ceilf(logf((float)(all.rank*2+1)) / logf (2.f));
+    float temp = ceilf(logf((float)(data.rank*2+1)) / logf (2.f));
     all.reg.stride_shift = (size_t) temp;
     all.random_weights = true;