things unbroken, maybe?

author: Jacob Hofman <hofman@research-mm6.corp.sp1.yahoo.com> 2011-06-10 22:43:05 +0400
committer: Jacob Hofman <hofman@research-mm6.corp.sp1.yahoo.com> 2011-06-10 22:43:05 +0400
commit: cd63c6d474b3b210bb25d8422e6de74b59d6fe5f (patch)
tree: fbca6a402bbbd658014dfea0310397fa7088c4a2 /gd_mf.cc
parent: 27198deb609a4164a960b580fd405dd6a491867e (diff)
parent: 527b7bf6287fd298dd5ca08f53226085c02b38cd (diff)
1 files changed, 20 insertions, 138 deletions
diff --git a/gd_mf.cc b/gd_mf.cc
index 840d45fe..9fb8b427 100644
--- a/gd_mf.cc
+++ b/gd_mf.cc
@@ -58,16 +58,6 @@ void* gd_mf_thread(void *in)
 	}
       else if (thread_done(thread_num))
 	{
-
-	  // decay all weights by (1-lambda)^t if global.weight_decay is set
-	  if (global.weight_decay > 0)
-	    for (size_t i = 0; i < global.length(); i++) {
-	      weight* weights_for_w = & (reg.weight_vectors[thread_num][i*global.stride]);
-	      
-	      for (size_t k = 0; k <= 2*global.rank; k++)
-		weights_for_w[k] *= pow(1 - global.weight_decay, global.weighted_examples);
-	    }
-
 	  if (global.local_prediction > 0)
 	    shutdown(global.local_prediction, SHUT_WR);
 	  return NULL;
@@ -94,10 +84,6 @@ float mf_inline_predict(regressor &reg, example* &ec, size_t thread_num)
   for (size_t* i = ec->indices.begin; i != ec->indices.end; i++) 
     linear_prediction += sd_add(weights,thread_mask,ec->atomics[*i].begin, ec->atomics[*i].end);
 
-  // yhat <- (1-lambda)^t * yhat
-  if (global.weight_decay > 0)
-    linear_prediction *= pow(1-global.weight_decay, ec->example_t);
-
   // store constant + linear prediction
   // note: constant is now automatically added
   push(ec->topic_predictions, linear_prediction);
@@ -113,86 +99,44 @@ float mf_inline_predict(regressor &reg, example* &ec, size_t thread_num)
 	    {
 	      // x_l * l^k
 	      // l^k is from index+1 to index+global.rank
-	      float first = sd_offset_add(weights, thread_mask, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, k);
+	      float x_dot_l = sd_offset_add(weights, thread_mask, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, k);
 	      // x_r * r^k
 	      // r^k is from index+global.rank+1 to index+2*global.rank
-	      float second = sd_offset_add(weights, thread_mask, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k+global.rank);
-
-	      // yhat <- (1-lambda)^t * yhat
-	      if (global.weight_decay > 0) {
-		first *= pow(1-global.weight_decay, ec->example_t);
-		second *= pow(1-global.weight_decay, ec->example_t);
-	      }
+	      float x_dot_r = sd_offset_add(weights, thread_mask, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k+global.rank);
 
-	      prediction += first * second;
+	      prediction += x_dot_l * x_dot_r;
 
 	      // store prediction from interaction terms
-	      push(ec->topic_predictions, first);
-	      push(ec->topic_predictions, second);
+	      push(ec->topic_predictions, x_dot_l);
+	      push(ec->topic_predictions, x_dot_r);
 	    }
 	}
     }
     
+  // ec->topic_predictions has linear, x_dot_l_1, x_dot_r_1, x_dot_l_2, x_dot_r_2, ... 
+
   return prediction;
 }
 
 void mf_inline_train(gd_vars& vars, regressor &reg, example* &ec, size_t thread_num, float update)
 {
-      //cout << "update = " << update << endl;
       weight* weights = reg.weight_vectors[thread_num];
       size_t thread_mask = global.thread_mask;
       label_data* ld = (label_data*)ec->ld;
 
-      // use topic_predictions for constant + linear update
-      update = reg.loss->getUpdate(ec->topic_predictions[0], ld->label, global.eta/pow(ec->example_t,vars.power_t) / 3 * ld->weight, ec->total_sum_feat_sq);
-
-      /*
-      cout << "ec->topic_predictions[0]: " << ec->topic_predictions[0] << endl;
-      cout << "ld->label: " << ld->label << endl;
-      cout << "global.eta: " << global.eta << endl;
-      cout << "power_t: " << vars.power_t << endl;
-      cout << "ld->weight: " << ld->weight << endl;
-      cout << "eta: " <<  global.eta/pow(ec->example_t,vars.power_t) / 3 * ld->weight << endl;
-      cout << "ec->total_sum_feat_sq: " << ec->total_sum_feat_sq << endl;
-      cout << "linear update: " << update << endl;
-      */
+      // use final prediction to get update size
+      update = reg.loss->getUpdate(ec->final_prediction, ld->label, global.eta/pow(ec->example_t,vars.power_t) / 3. * ld->weight, 1.); //ec->total_sum_feat_sq);
 
       // linear update
       for (size_t* i = ec->indices.begin; i != ec->indices.end; i++) 
 	sd_offset_update(weights, thread_mask, ec->atomics[*i].begin, ec->atomics[*i].end, 0, update);
       
-      // y - (old yhat linear + delta yhat linear)
-      float residual_label = ld->label - ( ec->topic_predictions[0] + update * (ec->total_sum_feat_sq) );
-
-      //cout << update * (ec->total_sum_feat_sq) << endl;
-
       // quadratic update
       for (vector<string>::iterator i = global.pairs.begin(); i != global.pairs.end();i++) 
 	{
 	  if (ec->atomics[(int)(*i)[0]].index() > 0 && ec->atomics[(int)(*i)[1]].index() > 0)
 	    {
 
-	      /* all "left" updates */
-
-	      float sum_rk_xr_sq = 0;
-	      // compute \sum_k |r^k \cdot x_r|^2
-	      // jntj: store r^k \cdot x_r's at some point?
-	      float sum_topic_predictions = 0;
-	      for (size_t k = 1; k <= global.rank; k++)
-		{
-		  float tmp = ec->topic_predictions[2*k];
-		  sum_rk_xr_sq += tmp*tmp;
-		  sum_topic_predictions += tmp*ec->topic_predictions[2*k-1];
-		}
-
-	      // |x_l|^2 \sum_k |r^k \cdot x_r|^2
-	      float norm = ec->sum_feat_sq[(int)(*i)[0]] * sum_rk_xr_sq;
-	      //float norm = 1;
-	      // \eta (y-\hat{y}) * norm
-	      update = reg.loss->getUpdate(sum_topic_predictions, residual_label, global.eta/pow(ec->example_t,vars.power_t) / 3 * ld->weight, norm);
-
-	      //cout << "left update: " << update << endl;
-
 	      // update l^k weights
 	      for (size_t k = 1; k <= global.rank; k++)
 		{
@@ -202,41 +146,17 @@ void mf_inline_train(gd_vars& vars, regressor &reg, example* &ec, size_t thread_
 		  sd_offset_update(weights, thread_mask, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, k, update*r_dot_x);
 		}
 
-	      //cout << update*norm << endl;
-
-	      /* all "right" updates */
-
-	      float sum_lk_xl_sq = 0;
-	      sum_topic_predictions += update * norm;
-	      // compute \sum_k |l^k \cdot x_l|^2
-	      for (size_t k = 1; k <= global.rank; k++)
-		{
-		  float tmp = ec->topic_predictions[2*k-1];
-		  sum_lk_xl_sq += tmp*tmp;
-		}
-
-	      // |x_r|^2 \sum_k |l^k \cdot x_l|^2
-	      norm = ec->sum_feat_sq[(int)(*i)[1]] * sum_lk_xl_sq;
-	      //norm = 1;
-	      // \eta (y-\hat{y}) * norm
-	      update = reg.loss->getUpdate(sum_topic_predictions, residual_label, global.eta/pow(ec->example_t,vars.power_t) / 3 * ld->weight, norm);
-
-	      //cout << "right update: " << update << endl;
-
-	      // update l^k weights
+	      // update r^k weights
 	      for (size_t k = 1; k <= global.rank; k++)
 		{
 		  // (l^k \cdot x_l)
 		  float l_dot_x = ec->topic_predictions[2*k-1];
 		  // update r^k with above
-		  sd_offset_update(weights, thread_mask, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k, update*l_dot_x);
+		  sd_offset_update(weights, thread_mask, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k+global.rank, update*l_dot_x);
 		}
 
-	      //cout << update*norm << endl;
-
 	    }
 	}
-
 }  
 
 void mf_print_offset_features(regressor &reg, example* &ec, size_t offset)
@@ -279,9 +199,6 @@ void mf_print_offset_features(regressor &reg, example* &ec, size_t offset)
 		}
 	  }
       }
-
-  cout << "\tConstant:0:1:" << weights[(constant+offset) & global.thread_mask] << endl;
-
 }
 
 void mf_print_audit_features(regressor &reg, example* ec, size_t offset)
@@ -294,65 +211,30 @@ void mf_local_predict(example* ec, size_t mf_num_threads, gd_vars& vars, regress
 {
   label_data* ld = (label_data*)ec->ld;
 
-  ec->final_prediction = 
-    finalize_prediction(ec->partial_prediction);
-
-  if (ec->final_prediction > 5) {
-    cout << "partial prediction " << ec->partial_prediction;
-    cout << " , final prediction " << ec->final_prediction << endl;
-  }
+  ec->final_prediction = finalize_prediction(ec->partial_prediction);
 
   if (ld->label != FLT_MAX)
     {
       ec->loss = reg.loss->getLoss(ec->final_prediction, ld->label) * ld->weight;
     }
 
-  // jntj: needs to be updated for cluster setting
-  if (global.local_prediction > 0)
-    {
-      prediction pred = {ec->example_counter, ec->final_prediction}; 
-      send_prediction(global.local_prediction, pred);
-      if (global.unique_id == 0)
-	{
-	  size_t len = sizeof(ld->label) + sizeof(ld->weight);
-	  char c[len];
-	  bufcache_simple_label(ld,c);
-	  write(global.local_prediction,c,len);
-	}
-    }
-
   if (global.audit)
     mf_print_audit_features(reg, ec, 0);
 
 }
 
-pthread_cond_t mf_finished_sum = PTHREAD_COND_INITIALIZER;
-
 float mf_predict(regressor& r, example* ex, size_t thread_num, gd_vars& vars)
 {
   float prediction = mf_inline_predict(r, ex, thread_num);
 
-  pthread_mutex_lock(&ex->lock);
-
-  ex->partial_prediction += prediction;
-  if (--ex->threads_to_finish != 0)
-    {
-      while (!ex->done)
-	pthread_cond_wait(&mf_finished_sum, &ex->lock);
-    }
-  else // We are the last thread using this example.
-    {
-      mf_local_predict(ex, global.num_threads(),vars,r);
-      ex->done = true;
-
-      pthread_cond_broadcast(&mf_finished_sum);
-
-      if (global.training && ((label_data*)(ex->ld))->label != FLT_MAX)
-	delay_example(ex,global.num_threads());
-      else
-	delay_example(ex,0);
-    }
-  pthread_mutex_unlock(&ex->lock);
+  ex->partial_prediction = prediction;
+  mf_local_predict(ex, global.num_threads(),vars,r);
+  
+  if (global.training && ((label_data*)(ex->ld))->label != FLT_MAX)
+    delay_example(ex,global.num_threads());
+  else
+    delay_example(ex,0);
+  
   return ex->final_prediction;
 }
author	Jacob Hofman <hofman@research-mm6.corp.sp1.yahoo.com>	2011-06-10 22:43:05 +0400
committer	Jacob Hofman <hofman@research-mm6.corp.sp1.yahoo.com>	2011-06-10 22:43:05 +0400
commit	cd63c6d474b3b210bb25d8422e6de74b59d6fe5f (patch)
tree	fbca6a402bbbd658014dfea0310397fa7088c4a2 /gd_mf.cc
parent	27198deb609a4164a960b580fd405dd6a491867e (diff)
parent	527b7bf6287fd298dd5ca08f53226085c02b38cd (diff)