/* Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD (revised) license as described in the file LICENSE. */ #ifdef _WIN32 #include #else #include #endif #include "reductions.h" #include "simple_label.h" #include "gd.h" #include "rand48.h" using namespace std; using namespace LEARNER; namespace MF { struct mf { vector pairs; uint32_t rank; uint32_t increment; // array to cache w*x, (l^k * x_l) and (r^k * x_r) // [ w*(1,x_l,x_r) , l^1*x_l, r^1*x_r, l^2*x_l, r^2*x_2, ... ] v_array sub_predictions; // array for temp storage of indices v_array indices; // array for temp storage of features v_array temp_features; vw* all; }; template void predict(mf& data, learner& base, example& ec) { vw* all = data.all; float prediction = 0; if (cache_sub_predictions) data.sub_predictions.resize(2*all->rank+1, true); // predict from linear terms base.predict(ec); // store linear prediction if (cache_sub_predictions) data.sub_predictions[0] = ec.partial_prediction; prediction += ec.partial_prediction; // store namespace indices copy_array(data.indices, ec.indices); // erase indices ec.indices.erase(); ec.indices.push_back(0); // add interaction terms to prediction for (vector::iterator i = data.pairs.begin(); i != data.pairs.end(); i++) { int left_ns = (int) (*i)[0]; int right_ns = (int) (*i)[1]; if (ec.atomics[left_ns].size() > 0 && ec.atomics[right_ns].size() > 0) { for (size_t k = 1; k <= all->rank; k++) { ec.indices[0] = left_ns; // compute l^k * x_l using base learner base.predict(ec, k); float x_dot_l = ec.partial_prediction; if (cache_sub_predictions) data.sub_predictions[2*k-1] = x_dot_l; // set example to right namespace only ec.indices[0] = right_ns; // compute r^k * x_r using base learner base.predict(ec, k + all->rank); float x_dot_r = ec.partial_prediction; if (cache_sub_predictions) data.sub_predictions[2*k] = x_dot_r; // accumulate prediction prediction += (x_dot_l * x_dot_r); } } } // restore namespace indices and label copy_array(ec.indices, data.indices); // finalize prediction ec.partial_prediction = prediction; ((label_data*)ec.ld)->prediction = GD::finalize_prediction(*(data.all), ec.partial_prediction); } void learn(mf& data, learner& base, example& ec) { vw* all = data.all; // predict with current weights predict(data, base, ec); // update linear weights base.update(ec); // store namespace indices copy_array(data.indices, ec.indices); // erase indices ec.indices.erase(); ec.indices.push_back(0); // update interaction terms // looping over all pairs of non-empty namespaces for (vector::iterator i = data.pairs.begin(); i != data.pairs.end(); i++) { int left_ns = (int) (*i)[0]; int right_ns = (int) (*i)[1]; if (ec.atomics[left_ns].size() > 0 && ec.atomics[right_ns].size() > 0) { // set example to left namespace only ec.indices[0] = left_ns; // store feature values in left namespace copy_array(data.temp_features, ec.atomics[left_ns]); for (size_t k = 1; k <= all->rank; k++) { // multiply features in left namespace by r^k * x_r for (feature* f = ec.atomics[left_ns].begin; f != ec.atomics[left_ns].end; f++) f->x *= data.sub_predictions[2*k]; // update l^k using base learner base.update(ec, k); // restore left namespace features (undoing multiply) copy_array(ec.atomics[left_ns], data.temp_features); } // set example to right namespace only ec.indices[0] = right_ns; // store feature values for right namespace copy_array(data.temp_features, ec.atomics[right_ns]); for (size_t k = 1; k <= all->rank; k++) { // multiply features in right namespace by l^k * x_l for (feature* f = ec.atomics[right_ns].begin; f != ec.atomics[right_ns].end; f++) f->x *= data.sub_predictions[2*k-1]; // update r^k using base learner base.update(ec, k + all->rank); // restore right namespace features copy_array(ec.atomics[right_ns], data.temp_features); } } } // restore namespace indices copy_array(ec.indices, data.indices); } void finish(mf& o) { // restore global pairs o.all->pairs = o.pairs; // clean up local v_arrays o.indices.delete_v(); o.sub_predictions.delete_v(); } learner* setup(vw& all, po::variables_map& vm) { mf* data = new mf; // copy global data locally data->all = &all; data->rank = all.rank; // store global pairs in local data structure and clear global pairs // for eventual calls to base learner data->pairs = all.pairs; all.pairs.clear(); // initialize weights randomly if(!vm.count("initial_regressor")) { for (size_t j = 0; j < (all.reg.weight_mask + 1) >> all.reg.stride_shift; j++) all.reg.weight_vector[j << all.reg.stride_shift] = (float) (0.1 * frand48()); } learner* l = new learner(data, all.l, 2*data->rank+1); l->set_learn(); l->set_predict >(); l->set_finish(); return l; } }