Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Hofman <hofman@research-mm6.corp.sp1.yahoo.com>2011-06-10 22:43:05 +0400
committerJacob Hofman <hofman@research-mm6.corp.sp1.yahoo.com>2011-06-10 22:43:05 +0400
commitcd63c6d474b3b210bb25d8422e6de74b59d6fe5f (patch)
treefbca6a402bbbd658014dfea0310397fa7088c4a2 /gd_mf.cc
parent27198deb609a4164a960b580fd405dd6a491867e (diff)
parent527b7bf6287fd298dd5ca08f53226085c02b38cd (diff)
things unbroken, maybe?
Diffstat (limited to 'gd_mf.cc')
-rw-r--r--gd_mf.cc158
1 files changed, 20 insertions, 138 deletions
diff --git a/gd_mf.cc b/gd_mf.cc
index 840d45fe..9fb8b427 100644
--- a/gd_mf.cc
+++ b/gd_mf.cc
@@ -58,16 +58,6 @@ void* gd_mf_thread(void *in)
}
else if (thread_done(thread_num))
{
-
- // decay all weights by (1-lambda)^t if global.weight_decay is set
- if (global.weight_decay > 0)
- for (size_t i = 0; i < global.length(); i++) {
- weight* weights_for_w = & (reg.weight_vectors[thread_num][i*global.stride]);
-
- for (size_t k = 0; k <= 2*global.rank; k++)
- weights_for_w[k] *= pow(1 - global.weight_decay, global.weighted_examples);
- }
-
if (global.local_prediction > 0)
shutdown(global.local_prediction, SHUT_WR);
return NULL;
@@ -94,10 +84,6 @@ float mf_inline_predict(regressor &reg, example* &ec, size_t thread_num)
for (size_t* i = ec->indices.begin; i != ec->indices.end; i++)
linear_prediction += sd_add(weights,thread_mask,ec->atomics[*i].begin, ec->atomics[*i].end);
- // yhat <- (1-lambda)^t * yhat
- if (global.weight_decay > 0)
- linear_prediction *= pow(1-global.weight_decay, ec->example_t);
-
// store constant + linear prediction
// note: constant is now automatically added
push(ec->topic_predictions, linear_prediction);
@@ -113,86 +99,44 @@ float mf_inline_predict(regressor &reg, example* &ec, size_t thread_num)
{
// x_l * l^k
// l^k is from index+1 to index+global.rank
- float first = sd_offset_add(weights, thread_mask, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, k);
+ float x_dot_l = sd_offset_add(weights, thread_mask, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, k);
// x_r * r^k
// r^k is from index+global.rank+1 to index+2*global.rank
- float second = sd_offset_add(weights, thread_mask, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k+global.rank);
-
- // yhat <- (1-lambda)^t * yhat
- if (global.weight_decay > 0) {
- first *= pow(1-global.weight_decay, ec->example_t);
- second *= pow(1-global.weight_decay, ec->example_t);
- }
+ float x_dot_r = sd_offset_add(weights, thread_mask, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k+global.rank);
- prediction += first * second;
+ prediction += x_dot_l * x_dot_r;
// store prediction from interaction terms
- push(ec->topic_predictions, first);
- push(ec->topic_predictions, second);
+ push(ec->topic_predictions, x_dot_l);
+ push(ec->topic_predictions, x_dot_r);
}
}
}
+ // ec->topic_predictions has linear, x_dot_l_1, x_dot_r_1, x_dot_l_2, x_dot_r_2, ...
+
return prediction;
}
void mf_inline_train(gd_vars& vars, regressor &reg, example* &ec, size_t thread_num, float update)
{
- //cout << "update = " << update << endl;
weight* weights = reg.weight_vectors[thread_num];
size_t thread_mask = global.thread_mask;
label_data* ld = (label_data*)ec->ld;
- // use topic_predictions for constant + linear update
- update = reg.loss->getUpdate(ec->topic_predictions[0], ld->label, global.eta/pow(ec->example_t,vars.power_t) / 3 * ld->weight, ec->total_sum_feat_sq);
-
- /*
- cout << "ec->topic_predictions[0]: " << ec->topic_predictions[0] << endl;
- cout << "ld->label: " << ld->label << endl;
- cout << "global.eta: " << global.eta << endl;
- cout << "power_t: " << vars.power_t << endl;
- cout << "ld->weight: " << ld->weight << endl;
- cout << "eta: " << global.eta/pow(ec->example_t,vars.power_t) / 3 * ld->weight << endl;
- cout << "ec->total_sum_feat_sq: " << ec->total_sum_feat_sq << endl;
- cout << "linear update: " << update << endl;
- */
+ // use final prediction to get update size
+ update = reg.loss->getUpdate(ec->final_prediction, ld->label, global.eta/pow(ec->example_t,vars.power_t) / 3. * ld->weight, 1.); //ec->total_sum_feat_sq);
// linear update
for (size_t* i = ec->indices.begin; i != ec->indices.end; i++)
sd_offset_update(weights, thread_mask, ec->atomics[*i].begin, ec->atomics[*i].end, 0, update);
- // y - (old yhat linear + delta yhat linear)
- float residual_label = ld->label - ( ec->topic_predictions[0] + update * (ec->total_sum_feat_sq) );
-
- //cout << update * (ec->total_sum_feat_sq) << endl;
-
// quadratic update
for (vector<string>::iterator i = global.pairs.begin(); i != global.pairs.end();i++)
{
if (ec->atomics[(int)(*i)[0]].index() > 0 && ec->atomics[(int)(*i)[1]].index() > 0)
{
- /* all "left" updates */
-
- float sum_rk_xr_sq = 0;
- // compute \sum_k |r^k \cdot x_r|^2
- // jntj: store r^k \cdot x_r's at some point?
- float sum_topic_predictions = 0;
- for (size_t k = 1; k <= global.rank; k++)
- {
- float tmp = ec->topic_predictions[2*k];
- sum_rk_xr_sq += tmp*tmp;
- sum_topic_predictions += tmp*ec->topic_predictions[2*k-1];
- }
-
- // |x_l|^2 \sum_k |r^k \cdot x_r|^2
- float norm = ec->sum_feat_sq[(int)(*i)[0]] * sum_rk_xr_sq;
- //float norm = 1;
- // \eta (y-\hat{y}) * norm
- update = reg.loss->getUpdate(sum_topic_predictions, residual_label, global.eta/pow(ec->example_t,vars.power_t) / 3 * ld->weight, norm);
-
- //cout << "left update: " << update << endl;
-
// update l^k weights
for (size_t k = 1; k <= global.rank; k++)
{
@@ -202,41 +146,17 @@ void mf_inline_train(gd_vars& vars, regressor &reg, example* &ec, size_t thread_
sd_offset_update(weights, thread_mask, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, k, update*r_dot_x);
}
- //cout << update*norm << endl;
-
- /* all "right" updates */
-
- float sum_lk_xl_sq = 0;
- sum_topic_predictions += update * norm;
- // compute \sum_k |l^k \cdot x_l|^2
- for (size_t k = 1; k <= global.rank; k++)
- {
- float tmp = ec->topic_predictions[2*k-1];
- sum_lk_xl_sq += tmp*tmp;
- }
-
- // |x_r|^2 \sum_k |l^k \cdot x_l|^2
- norm = ec->sum_feat_sq[(int)(*i)[1]] * sum_lk_xl_sq;
- //norm = 1;
- // \eta (y-\hat{y}) * norm
- update = reg.loss->getUpdate(sum_topic_predictions, residual_label, global.eta/pow(ec->example_t,vars.power_t) / 3 * ld->weight, norm);
-
- //cout << "right update: " << update << endl;
-
- // update l^k weights
+ // update r^k weights
for (size_t k = 1; k <= global.rank; k++)
{
// (l^k \cdot x_l)
float l_dot_x = ec->topic_predictions[2*k-1];
// update r^k with above
- sd_offset_update(weights, thread_mask, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k, update*l_dot_x);
+ sd_offset_update(weights, thread_mask, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k+global.rank, update*l_dot_x);
}
- //cout << update*norm << endl;
-
}
}
-
}
void mf_print_offset_features(regressor &reg, example* &ec, size_t offset)
@@ -279,9 +199,6 @@ void mf_print_offset_features(regressor &reg, example* &ec, size_t offset)
}
}
}
-
- cout << "\tConstant:0:1:" << weights[(constant+offset) & global.thread_mask] << endl;
-
}
void mf_print_audit_features(regressor &reg, example* ec, size_t offset)
@@ -294,65 +211,30 @@ void mf_local_predict(example* ec, size_t mf_num_threads, gd_vars& vars, regress
{
label_data* ld = (label_data*)ec->ld;
- ec->final_prediction =
- finalize_prediction(ec->partial_prediction);
-
- if (ec->final_prediction > 5) {
- cout << "partial prediction " << ec->partial_prediction;
- cout << " , final prediction " << ec->final_prediction << endl;
- }
+ ec->final_prediction = finalize_prediction(ec->partial_prediction);
if (ld->label != FLT_MAX)
{
ec->loss = reg.loss->getLoss(ec->final_prediction, ld->label) * ld->weight;
}
- // jntj: needs to be updated for cluster setting
- if (global.local_prediction > 0)
- {
- prediction pred = {ec->example_counter, ec->final_prediction};
- send_prediction(global.local_prediction, pred);
- if (global.unique_id == 0)
- {
- size_t len = sizeof(ld->label) + sizeof(ld->weight);
- char c[len];
- bufcache_simple_label(ld,c);
- write(global.local_prediction,c,len);
- }
- }
-
if (global.audit)
mf_print_audit_features(reg, ec, 0);
}
-pthread_cond_t mf_finished_sum = PTHREAD_COND_INITIALIZER;
-
float mf_predict(regressor& r, example* ex, size_t thread_num, gd_vars& vars)
{
float prediction = mf_inline_predict(r, ex, thread_num);
- pthread_mutex_lock(&ex->lock);
-
- ex->partial_prediction += prediction;
- if (--ex->threads_to_finish != 0)
- {
- while (!ex->done)
- pthread_cond_wait(&mf_finished_sum, &ex->lock);
- }
- else // We are the last thread using this example.
- {
- mf_local_predict(ex, global.num_threads(),vars,r);
- ex->done = true;
-
- pthread_cond_broadcast(&mf_finished_sum);
-
- if (global.training && ((label_data*)(ex->ld))->label != FLT_MAX)
- delay_example(ex,global.num_threads());
- else
- delay_example(ex,0);
- }
- pthread_mutex_unlock(&ex->lock);
+ ex->partial_prediction = prediction;
+ mf_local_predict(ex, global.num_threads(),vars,r);
+
+ if (global.training && ((label_data*)(ex->ld))->label != FLT_MAX)
+ delay_example(ex,global.num_threads());
+ else
+ delay_example(ex,0);
+
return ex->final_prediction;
}