Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorniruc <niruc@NIRUC-M6600.redmond.corp.microsoft.com>2013-04-05 01:17:14 +0400
committerniruc <niruc@NIRUC-M6600.redmond.corp.microsoft.com>2013-04-05 01:17:14 +0400
commit571f1f36679811b1c170a76fb0487a5c6dba177d (patch)
treee3c24b4f49936f5d1533369786803a7f66fa23cc
parent605dbf1f7a418a41197336aa296d6d27be1f935d (diff)
parent32baa122d2ba75158ddb3f2103710cb456da3d1b (diff)
merge conflicts resolved
-rwxr-xr-xautogen.sh10
-rw-r--r--library/ezexample.h2
-rw-r--r--vowpalwabbit/accumulate.cc6
-rw-r--r--vowpalwabbit/autolink.cc2
-rw-r--r--vowpalwabbit/bfgs.cc173
-rw-r--r--vowpalwabbit/cb.cc2
-rw-r--r--vowpalwabbit/csoaa.cc2
-rw-r--r--vowpalwabbit/ect.cc2
-rw-r--r--vowpalwabbit/gd.cc153
-rw-r--r--vowpalwabbit/gd.h80
-rw-r--r--vowpalwabbit/gd_mf.cc17
-rw-r--r--vowpalwabbit/global_data.cc2
-rw-r--r--vowpalwabbit/global_data.h4
-rw-r--r--vowpalwabbit/lda_core.cc18
-rw-r--r--vowpalwabbit/nn.cc18
-rw-r--r--vowpalwabbit/oaa.cc2
-rw-r--r--vowpalwabbit/parse_args.cc10
-rw-r--r--vowpalwabbit/parse_example.cc2
-rw-r--r--vowpalwabbit/parse_regressor.cc8
-rw-r--r--vowpalwabbit/parser.cc10
-rw-r--r--vowpalwabbit/searn.cc6
-rw-r--r--vowpalwabbit/searn_sequencetask.cc2
-rw-r--r--vowpalwabbit/sparse_dense.h87
-rw-r--r--vowpalwabbit/vw.h2
-rw-r--r--vowpalwabbit/vw_static.vcxproj8
-rw-r--r--vowpalwabbit/wap.cc2
26 files changed, 227 insertions, 403 deletions
diff --git a/autogen.sh b/autogen.sh
index 41da9d87..bff53eea 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -1,2 +1,8 @@
-#! /bin/sh
-libtoolize -f -c && aclocal -I ./acinclude.d -I /usr/share/aclocal && autoheader && automake -ac -Woverride && autoconf && ./configure "$@"
+#!/bin/sh
+
+case $( uname -s ) in
+ Darwin) alias vwlibtool=glibtoolize;;
+ *) alias vwlibtool=libtoolize;;
+esac
+
+vwlibtool -f -c && aclocal -I ./acinclude.d -I /usr/share/aclocal && autoheader && automake -ac -Woverride && autoconf && ./configure "$@"
diff --git a/library/ezexample.h b/library/ezexample.h
index 6ba75043..fbf28854 100644
--- a/library/ezexample.h
+++ b/library/ezexample.h
@@ -118,7 +118,7 @@ class ezexample {
if (to_ns == 0) return 0;
if (ensure_ns_exists(to_ns)) return 0;
- feature f = { v, fint * vw_ref->stride };
+ feature f = { v, fint * vw_ref->reg.stride };
ec->atomics[to_ns].push_back(f);
ec->sum_feat_sq[to_ns] += v * v;
ec->total_sum_feat_sq += v * v;
diff --git a/vowpalwabbit/accumulate.cc b/vowpalwabbit/accumulate.cc
index 5881daec..3b79fa17 100644
--- a/vowpalwabbit/accumulate.cc
+++ b/vowpalwabbit/accumulate.cc
@@ -19,7 +19,7 @@ using namespace std;
void accumulate(vw& all, string master_location, regressor& reg, size_t o) {
uint32_t length = 1 << all.num_bits; //This is size of gradient
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
float* local_grad = new float[length];
weight* weights = reg.weight_vector;
for(uint32_t i = 0;i < length;i++)
@@ -43,7 +43,7 @@ float accumulate_scalar(vw& all, string master_location, float local_sum) {
void accumulate_avg(vw& all, string master_location, regressor& reg, size_t o) {
uint32_t length = 1 << all.num_bits; //This is size of gradient
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
float* local_grad = new float[length];
weight* weights = reg.weight_vector;
float numnodes = 1.;
@@ -81,7 +81,7 @@ void accumulate_weighted_avg(vw& all, string master_location, regressor& reg) {
return;
}
uint32_t length = 1 << all.num_bits; //This is size of gradient
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
weight* weights = reg.weight_vector;
float* local_weights = new float[length];
diff --git a/vowpalwabbit/autolink.cc b/vowpalwabbit/autolink.cc
index 0f9a8176..53890b87 100644
--- a/vowpalwabbit/autolink.cc
+++ b/vowpalwabbit/autolink.cc
@@ -72,7 +72,7 @@ namespace ALINK {
autolink* data = (autolink*)calloc(1,sizeof(autolink));
data->base = all.l;
data->d = (uint32_t)vm["autolink"].as<size_t>();
- data->stride = all.stride;
+ data->stride = all.reg.stride;
if (!vm_file.count("autolink"))
{
diff --git a/vowpalwabbit/bfgs.cc b/vowpalwabbit/bfgs.cc
index a264d89b..56955350 100644
--- a/vowpalwabbit/bfgs.cc
+++ b/vowpalwabbit/bfgs.cc
@@ -109,7 +109,7 @@ const char* curv_message = "Zero or negative curvature detected.\n"
void zero_derivative(vw& all)
{//set derivative to 0.
uint32_t length = 1 << all.num_bits;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
weight* weights = all.reg.weight_vector;
for(uint32_t i = 0; i < length; i++)
weights[stride*i+W_GT] = 0;
@@ -118,7 +118,7 @@ void zero_derivative(vw& all)
void zero_preconditioner(vw& all)
{//set derivative to 0.
uint32_t length = 1 << all.num_bits;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
weight* weights = all.reg.weight_vector;
for(uint32_t i = 0; i < length; i++)
weights[stride*i+W_COND] = 0;
@@ -140,48 +140,6 @@ void reset_state(vw& all, bfgs& b, bool zero)
}
}
-void quad_grad_update(weight* weights, feature& page_feature, v_array<feature> &offer_features, size_t mask, float g)
-{
- size_t halfhash = quadratic_constant * page_feature.weight_index;
- float update = g * page_feature.x;
- for (feature* ele = offer_features.begin; ele != offer_features.end; ele++)
- {
- weight* w=&weights[(halfhash + ele->weight_index) & mask];
- w[W_GT] += update * ele->x;
- }
-}
-
-void cubic_grad_update(weight* weights, feature& f0, feature& f1, v_array<feature> &cross_features, size_t mask, float g)
-{
- size_t halfhash = cubic_constant2 * (cubic_constant * f0.weight_index + f1.weight_index);
- float update = g * f0.x * f1.x;
- for (feature* ele = cross_features.begin; ele != cross_features.end; ele++) {
- weight* w=&weights[(halfhash + ele->weight_index) & mask];
- w[W_GT] += update * ele->x;
- }
-}
-
-void quad_precond_update(weight* weights, feature& page_feature, v_array<feature> &offer_features, size_t mask, float g)
-{
- size_t halfhash = quadratic_constant * page_feature.weight_index;
- float update = g * page_feature.x * page_feature.x;
- for (feature* ele = offer_features.begin; ele != offer_features.end; ele++)
- {
- weight* w=&weights[(halfhash + ele->weight_index) & mask];
- w[W_COND] += update * ele->x * ele->x;
- }
-}
-
-void cubic_precond_update(weight* weights, feature& f0, feature& f1, v_array<feature> &cross_features, size_t mask, float g)
-{
- size_t halfhash = cubic_constant2 * (cubic_constant * f0.weight_index + f1.weight_index);
- float update = g * f0.x * f0.x * f1.x * f1.x;
- for (feature* ele = cross_features.begin; ele != cross_features.end; ele++) {
- weight* w=&weights[(halfhash + ele->weight_index) & mask];
- w[W_COND] += update * ele->x * ele->x;
- }
-}
-
// w[0] = weight
// w[1] = accumulated first derivative
// w[2] = step direction
@@ -198,6 +156,11 @@ bool test_example(example* ec)
return GD::finalize_prediction(all, ec->partial_prediction);
}
+inline void add_grad(vw& all, void* d, float f, uint32_t u)
+{
+ all.reg.weight_vector[u] += (*(float*)d) * f;
+}
+
float predict_and_gradient(vw& all, example* &ec)
{
float fp = bfgs_predict(all, ec);
@@ -207,99 +170,35 @@ float predict_and_gradient(vw& all, example* &ec)
float loss_grad = all.loss->first_derivative(all.sd, fp,ld->label)*ld->weight;
- size_t mask = all.weight_mask;
- weight* weights = all.reg.weight_vector;
- for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++)
- {
- feature *f = ec->atomics[*i].begin;
- for (; f != ec->atomics[*i].end; f++)
- {
- weight* w = &weights[f->weight_index & mask];
- w[W_GT] += loss_grad * f->x;
- }
- }
- for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++)
- {
- if (ec->atomics[(int)(*i)[0]].size() > 0)
- {
- v_array<feature> temp = ec->atomics[(int)(*i)[0]];
- for (; temp.begin != temp.end; temp.begin++)
- quad_grad_update(weights, *temp.begin, ec->atomics[(int)(*i)[1]], mask, loss_grad);
- }
- }
- for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) {
- if ((ec->atomics[(int)(*i)[0]].size() == 0) || (ec->atomics[(int)(*i)[1]].size() == 0) || (ec->atomics[(int)(*i)[2]].size() == 0)) { continue; }
- v_array<feature> temp1 = ec->atomics[(int)(*i)[0]];
- for (; temp1.begin != temp1.end; temp1.begin++) {
- v_array<feature> temp2 = ec->atomics[(int)(*i)[1]];
- for (; temp2.begin != temp2.end; temp2.begin++)
- cubic_grad_update(weights, *temp1.begin, *temp2.begin, ec->atomics[(int)(*i)[2]], mask, loss_grad);
- }
- }
+ ec->ft_offset += W_GT;
+ GD::foreach_feature<add_grad>(all, ec, &loss_grad);
+ ec->ft_offset -= W_GT;
+
return fp;
}
+inline void add_precond(vw& all, void* d, float f, uint32_t u)
+{
+ all.reg.weight_vector[u] += (*(float*)d) * f * f;
+}
+
void update_preconditioner(vw& all, example* &ec)
{
label_data* ld = (label_data*)ec->ld;
float curvature = all.loss->second_derivative(all.sd, ec->final_prediction,ld->label) * ld->weight;
- size_t mask = all.weight_mask;
- weight* weights = all.reg.weight_vector;
- for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++)
- {
- feature *f = ec->atomics[*i].begin;
- for (; f != ec->atomics[*i].end; f++)
- {
- weight* w = &weights[f->weight_index & mask];
- w[W_COND] += f->x * f->x * curvature;
- }
- }
- for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++)
- {
- if (ec->atomics[(int)(*i)[0]].size() > 0)
- {
- v_array<feature> temp = ec->atomics[(int)(*i)[0]];
- for (; temp.begin != temp.end; temp.begin++)
- quad_precond_update(weights, *temp.begin, ec->atomics[(int)(*i)[1]], mask, curvature);
- }
- }
- for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) {
- if ((ec->atomics[(int)(*i)[0]].size() == 0) || (ec->atomics[(int)(*i)[1]].size() == 0) || (ec->atomics[(int)(*i)[2]].size() == 0)) { continue; }
- v_array<feature> temp1 = ec->atomics[(int)(*i)[0]];
- for (; temp1.begin != temp1.end; temp1.begin++) {
- v_array<feature> temp2 = ec->atomics[(int)(*i)[1]];
- for (; temp2.begin != temp2.end; temp2.begin++)
- cubic_precond_update(weights, *temp1.begin, *temp2.begin, ec->atomics[(int)(*i)[2]], mask, curvature);
- }
- }
+ ec->ft_offset += W_COND;
+ GD::foreach_feature<add_precond>(all, ec, &curvature);
+ ec->ft_offset -= W_COND;
}
float dot_with_direction(vw& all, example* &ec)
{
- float ret = 0;
+ ec->ft_offset+= W_DIR;
+ float ret = GD::inline_predict<vec_add>(all, ec);
+ ec->ft_offset-= W_DIR;
- for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++)
- ret += sd_add<vec_add>(all, ec->atomics[*i].begin, ec->atomics[*i].end, W_DIR);
-
- for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) {
- if (ec->atomics[(int)(*i)[0]].size() > 0) {
- v_array<feature> temp = ec->atomics[(int)(*i)[0]];
- for (; temp.begin != temp.end; temp.begin++)
- ret += one_pf_quad_predict<vec_add>(all, *temp.begin, ec->atomics[(int)(*i)[1]], W_DIR);
- }
- }
-
- for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) {
- if ((ec->atomics[(int)(*i)[0]].size() == 0) || (ec->atomics[(int)(*i)[1]].size() == 0) || (ec->atomics[(int)(*i)[2]].size() == 0)) { continue; }
- v_array<feature> temp1 = ec->atomics[(int)(*i)[0]];
- for (; temp1.begin != temp1.end; temp1.begin++) {
- v_array<feature> temp2 = ec->atomics[(int)(*i)[1]];
- for (; temp2.begin != temp2.end; temp2.begin++)
- ret += one_pf_cubic_predict<vec_add>(all, *temp1.begin, *temp2.begin, ec->atomics[(int)(*i)[2]], W_DIR);
- }
- }
return ret;
}
@@ -311,7 +210,7 @@ double regularizer_direction_magnitude(vw& all, bfgs& b, float regularizer)
return ret;
uint32_t length = 1 << all.num_bits;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
weight* weights = all.reg.weight_vector;
if (b.regularizers == NULL)
for(uint32_t i = 0; i < length; i++)
@@ -327,7 +226,7 @@ float direction_magnitude(vw& all)
{//compute direction magnitude
double ret = 0.;
uint32_t length = 1 << all.num_bits;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
weight* weights = all.reg.weight_vector;
for(uint32_t i = 0; i < length; i++)
ret += weights[stride*i+W_DIR]*weights[stride*i+W_DIR];
@@ -338,7 +237,7 @@ float direction_magnitude(vw& all)
void bfgs_iter_start(vw& all, bfgs& b, float* mem, int& lastj, double importance_weight_sum, int&origin)
{
uint32_t length = 1 << all.num_bits;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
weight* w = all.reg.weight_vector;
double g1_Hg1 = 0.;
@@ -364,7 +263,7 @@ void bfgs_iter_start(vw& all, bfgs& b, float* mem, int& lastj, double importance
void bfgs_iter_middle(vw& all, bfgs& b, float* mem, double* rho, double* alpha, int& lastj, int &origin)
{
uint32_t length = 1 << all.num_bits;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
weight* w = all.reg.weight_vector;
float* mem0 = mem;
@@ -485,7 +384,7 @@ void bfgs_iter_middle(vw& all, bfgs& b, float* mem, double* rho, double* alpha,
double wolfe_eval(vw& all, bfgs& b, float* mem, double loss_sum, double previous_loss_sum, double step_size, double importance_weight_sum, int &origin, double& wolfe1) {
uint32_t length = 1 << all.num_bits;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
weight* w = all.reg.weight_vector;
double g0_d = 0.;
@@ -514,7 +413,7 @@ double add_regularization(vw& all, bfgs& b, float regularization)
{//compute the derivative difference
double ret = 0.;
uint32_t length = 1 << all.num_bits;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
weight* weights = all.reg.weight_vector;
if (b.regularizers == NULL)
{
@@ -537,7 +436,7 @@ double add_regularization(vw& all, bfgs& b, float regularization)
void finalize_preconditioner(vw& all, bfgs& b, float regularization)
{
uint32_t length = 1 << all.num_bits;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
weight* weights = all.reg.weight_vector;
if (b.regularizers == NULL)
@@ -557,7 +456,7 @@ void finalize_preconditioner(vw& all, bfgs& b, float regularization)
void preconditioner_to_regularizer(vw& all, bfgs& b, float regularization)
{
uint32_t length = 1 << all.num_bits;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
weight* weights = all.reg.weight_vector;
if (b.regularizers == NULL)
{
@@ -581,7 +480,7 @@ void preconditioner_to_regularizer(vw& all, bfgs& b, float regularization)
void zero_state(vw& all)
{
uint32_t length = 1 << all.num_bits;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
weight* weights = all.reg.weight_vector;
for(uint32_t i = 0; i < length; i++)
{
@@ -595,7 +494,7 @@ double derivative_in_direction(vw& all, bfgs& b, float* mem, int &origin)
{
double ret = 0.;
uint32_t length = 1 << all.num_bits;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
weight* w = all.reg.weight_vector;
for(uint32_t i = 0; i < length; i++, w+=stride, mem+=b.mem_stride)
@@ -606,7 +505,7 @@ double derivative_in_direction(vw& all, bfgs& b, float* mem, int &origin)
void update_weight(vw& all, float step_size, size_t current_pass)
{
uint32_t length = 1 << all.num_bits;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
weight* w = all.reg.weight_vector;
for(uint32_t i = 0; i < length; i++, w+=stride)
@@ -877,7 +776,7 @@ void save_load_regularizer(vw& all, bfgs& b, io_buf& model_file, bool read, bool
{
char buff[512];
int c = 0;
- uint32_t stride = all.stride;
+ uint32_t stride = all.reg.stride;
uint32_t length = 2*(1 << all.num_bits);
uint32_t i = 0;
size_t brw = 1;
@@ -949,7 +848,7 @@ void save_load(void* d, io_buf& model_file, bool read, bool text)
if (!all->quiet)
{
- fprintf(stderr, "m = %d\nAllocated %luM for weights and mem\n", m, (long unsigned int)all->length()*(sizeof(float)*(b->mem_stride)+sizeof(weight)*all->stride) >> 20);
+ fprintf(stderr, "m = %d\nAllocated %luM for weights and mem\n", m, (long unsigned int)all->length()*(sizeof(float)*(b->mem_stride)+sizeof(weight)*all->reg.stride) >> 20);
}
b->net_time = 0.0;
@@ -1024,7 +923,7 @@ void setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::va
all.l = t;
all.bfgs = true;
- all.stride = 4;
+ all.reg.stride = 4;
if (vm.count("hessian_on") || all.m==0) {
all.hessian_on = true;
diff --git a/vowpalwabbit/cb.cc b/vowpalwabbit/cb.cc
index c99259de..9a60aa93 100644
--- a/vowpalwabbit/cb.cc
+++ b/vowpalwabbit/cb.cc
@@ -715,7 +715,7 @@ namespace CB
all.options_from_file.append(" --cb_type dr");
}
- c->increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.stride;
+ c->increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.reg.stride;
*(all.p->lp) = CB::cb_label_parser;
diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc
index 0e53fcbd..885f84b9 100644
--- a/vowpalwabbit/csoaa.cc
+++ b/vowpalwabbit/csoaa.cc
@@ -382,7 +382,7 @@ namespace CSOAA {
*(all.p->lp) = cs_label_parser;
all.base_learner_nb_w *= nb_actions;
c->base=all.l;
- c->csoaa_increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.stride;
+ c->csoaa_increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.reg.stride;
all.sd->k = nb_actions;
learner l = {c, drive, learn, finish, all.l.sl};
diff --git a/vowpalwabbit/ect.cc b/vowpalwabbit/ect.cc
index ea6b5801..35201f02 100644
--- a/vowpalwabbit/ect.cc
+++ b/vowpalwabbit/ect.cc
@@ -186,7 +186,7 @@ namespace ECT
if (e.last_pair > 0) {
all.base_learner_nb_w *= (e.last_pair + (eliminations-1));
- e.increment = (uint32_t) all.length() / all.base_learner_nb_w * all.stride;
+ e.increment = (uint32_t) all.length() / all.base_learner_nb_w * all.reg.stride;
}
}
diff --git a/vowpalwabbit/gd.cc b/vowpalwabbit/gd.cc
index 74a6b09f..9ac4e8a2 100644
--- a/vowpalwabbit/gd.cc
+++ b/vowpalwabbit/gd.cc
@@ -39,7 +39,13 @@ namespace GD
void predict(vw& all, example* ex);
void sync_weights(vw& all);
- template <void (*T)(vw&, float, uint32_t, float, float)>
+
+ struct train_data {
+ float avg_norm;
+ float update;
+ };
+
+ template <void (*T)(vw&, void*, float, uint32_t)>
void generic_train(vw& all, example* &ec, float update, bool sqrt_norm)
{
if (fabs(update) == 0.)
@@ -51,30 +57,12 @@ namespace GD
else
total_weight = ec->example_t;
- uint32_t offset = ec->ft_offset;
float avg_norm = all.normalized_sum_norm_x / total_weight;
if (sqrt_norm) avg_norm = sqrt(avg_norm);
- for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++)
- for (feature* f = ec->atomics[*i].begin; f != ec->atomics[*i].end; f++)
- T(all, f->x, f->weight_index + offset, avg_norm, update);
-
- for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++)
- if ((ec->atomics[(int)(*i)[0]].size() > 0) && (ec->atomics[(int)(*i)[1]].size() > 0))
- for (feature* f0 = ec->atomics[(int)(*i)[0]].begin; f0 != ec->atomics[(int)(*i)[0]].end; f0++) {
- uint32_t halfhash = quadratic_constant * (f0->weight_index + offset);
- for (feature* f1 = ec->atomics[(int)(*i)[1]].begin; f1 != ec->atomics[(int)(*i)[1]].end; f1++)
- T(all, f1->x, f1->weight_index + halfhash + offset, avg_norm, f0->x * update);
- }
+ train_data d = {avg_norm, update};
- for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++)
- if ((ec->atomics[(int)(*i)[0]].size() > 0) && (ec->atomics[(int)(*i)[1]].size() > 0) && (ec->atomics[(int)(*i)[2]].size() > 0))
- for (feature* f0 = ec->atomics[(int)(*i)[0]].begin; f0 != ec->atomics[(int)(*i)[0]].end; f0++)
- for (feature* f1 = ec->atomics[(int)(*i)[1]].begin; f1 != ec->atomics[(int)(*i)[1]].end; f1++) {
- uint32_t halfhash = cubic_constant2 * (cubic_constant * (f0->weight_index + offset) + f1->weight_index + offset);
- for (feature* f2 = ec->atomics[(int)(*i)[2]].begin; f2 != ec->atomics[(int)(*i)[2]].end; f2++)
- T(all, f2->x, f2->weight_index + halfhash + offset, avg_norm, f0->x * f1->x * update);
- }
+ foreach_feature<T>(all, ec, &d);
}
float InvSqrt(float x){
@@ -86,26 +74,30 @@ float InvSqrt(float x){
return x;
}
-inline void general_update(vw& all, float x, uint32_t fi, float avg_norm, float update)
+ inline void general_update(vw& all, void* dat, float x, uint32_t fi)
{
- weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+ train_data* s = (train_data*)dat;
+
+ weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
float t = 1.f;
if(all.adaptive) t = powf(w[1],-all.power_t);
if(all.normalized_updates) {
- float norm = w[all.normalized_idx] * avg_norm;
+ float norm = w[all.normalized_idx] * s->avg_norm;
float power_t_norm = 1.f - (all.adaptive ? all.power_t : 0.f);
t *= powf(norm*norm,-power_t_norm);
}
- w[0] += update * x * t;
+ w[0] += s->update * x * t;
}
template<bool adaptive, bool normalized>
-inline void specialized_update(vw& all, float x, uint32_t fi, float avg_norm, float update)
+inline void specialized_update(vw& all, void* dat, float x, uint32_t fi)
{
- weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+ train_data* s = (train_data*)dat;
+
+ weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
float t = 1.f;
float inv_norm = 1.f;
- if(normalized) inv_norm /= (w[all.normalized_idx] * avg_norm);
+ if(normalized) inv_norm /= (w[all.normalized_idx] * s->avg_norm);
if(adaptive) {
#if defined(__SSE2__) && !defined(VW_LDA_NO_SSE)
__m128 eta = _mm_load_ss(&w[1]);
@@ -118,7 +110,7 @@ inline void specialized_update(vw& all, float x, uint32_t fi, float avg_norm, fl
} else {
t *= inv_norm*inv_norm; //if only using normalized updates but not adaptive, need to divide by feature norm squared
}
- w[0] += update * x * t;
+ w[0] += s->update * x * t;
}
void learn(void* d, example* ec)
@@ -187,7 +179,7 @@ void sync_weights(vw& all) {
if (all.sd->gravity == 0. && all.sd->contraction == 1.) // to avoid unnecessary weight synchronization
return;
uint32_t length = 1 << all.num_bits;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
for(uint32_t i = 0; i < length && all.reg_mode; i++)
all.reg.weight_vector[stride*i] = trunc_weight(all.reg.weight_vector[stride*i], (float)all.sd->gravity) * (float)all.sd->contraction;
all.sd->gravity = 0.;
@@ -224,14 +216,14 @@ bool operator<(const string_value& first, const string_value& second)
void audit_feature(vw& all, feature* f, audit_data* a, vector<string_value>& results, string prepend, size_t offset = 0)
{
ostringstream tempstream;
- size_t index = (f->weight_index + offset) & all.weight_mask;
+ size_t index = (f->weight_index + offset) & all.reg.weight_mask;
weight* weights = all.reg.weight_vector;
- size_t stride = all.stride;
+ size_t stride = all.reg.stride;
tempstream << prepend;
if (a != NULL)
tempstream << a->space << '^' << a->feature << ':';
- else if ( index == ((constant*stride)&all.weight_mask))
+ else if ( index == ((constant*stride)&all.reg.weight_mask))
tempstream << "Constant:";
tempstream << (index/stride & all.parse_mask) << ':' << f->x;
@@ -288,9 +280,9 @@ void print_features(vw& all, example* &ec)
for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++)
for (audit_data *f = ec->audit_features[*i].begin; f != ec->audit_features[*i].end; f++)
{
- cout << '\t' << f->space << '^' << f->feature << ':' << (f->weight_index/all.stride & all.parse_mask) << ':' << f->x;
+ cout << '\t' << f->space << '^' << f->feature << ':' << (f->weight_index/all.reg.stride & all.parse_mask) << ':' << f->x;
for (size_t k = 0; k < all.lda; k++)
- cout << ':' << weights[(f->weight_index+k) & all.weight_mask];
+ cout << ':' << weights[(f->weight_index+k) & all.reg.weight_mask];
}
cout << " total of " << count << " features." << endl;
}
@@ -349,38 +341,16 @@ void print_audit_features(vw& all, example* ec)
print_features(all, ec);
}
-template <void (*T)(vw&,float,uint32_t,float,float&,float&)>
-void norm_add(vw& all, feature* begin, feature* end, float g, float& norm, float& norm_x, uint32_t offset=0)
-{
- for (feature* f = begin; f!= end; f++)
- T(all, f->x, f->weight_index + offset, g, norm, norm_x);
-}
-
-template <void (*T)(vw&,float,uint32_t,float,float&,float&)>
-void norm_add_quad(vw& all, feature& f0, v_array<feature> &cross_features, float g, float& norm, float& norm_x, uint32_t offset=0)
-{
- uint32_t halfhash = quadratic_constant * (f0.weight_index + offset);
- float norm_new = 0.f;
- float norm_x_new = 0.f;
- norm_add<T>(all, cross_features.begin, cross_features.end, g * f0.x * f0.x, norm_new, norm_x_new, halfhash + offset);
- norm += norm_new * f0.x * f0.x;
- norm_x += norm_x_new * f0.x * f0.x;
-}
-
-template <void (*T)(vw&,float,uint32_t,float,float&,float&)>
-void norm_add_cubic(vw& all, feature& f0, feature& f1, v_array<feature> &cross_features, float g, float& norm, float& norm_x, uint32_t offset=0)
-{
- uint32_t halfhash = cubic_constant2 * (cubic_constant * (f0.weight_index + offset) + f1.weight_index + offset);
- float norm_new = 0.f;
- float norm_x_new = 0.f;
- norm_add<T>(all, cross_features.begin, cross_features.end, g * f0.x * f0.x * f1.x * f1.x, norm_new, norm_x_new, halfhash + offset);
- norm += norm_new * f0.x * f0.x * f1.x * f1.x;
- norm_x += norm_x_new * f0.x * f0.x * f1.x * f1.x;
-}
+ struct norm_data {
+ float g;
+ float norm;
+ float norm_x;
+ };
template<bool adaptive, bool normalized>
-inline void simple_norm_compute(vw& all, float x, uint32_t fi, float g, float& norm, float& norm_x) {
- weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+inline void simple_norm_compute(vw& all, void* v, float x, uint32_t fi) {
+ norm_data* nd=(norm_data*)v;
+ weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
float x2 = x * x;
float t = 1.f;
float inv_norm = 1.f;
@@ -388,10 +358,10 @@ inline void simple_norm_compute(vw& all, float x, uint32_t fi, float g, float& n
if(normalized) {
inv_norm /= w[all.normalized_idx];
inv_norm2 = inv_norm*inv_norm;
- norm_x += x2 * inv_norm2;
+ nd->norm_x += x2 * inv_norm2;
}
if(adaptive){
- w[1] += g * x2;
+ w[1] += nd->g * x2;
#if defined(__SSE2__) && !defined(VW_LDA_NO_SSE)
__m128 eta = _mm_load_ss(&w[1]);
eta = _mm_rsqrt_ss(eta);
@@ -403,51 +373,38 @@ inline void simple_norm_compute(vw& all, float x, uint32_t fi, float g, float& n
} else {
t *= inv_norm2; //if only using normalized but not adaptive, we're dividing update by feature norm squared
}
- norm += x2 * t;
+ nd->norm += x2 * t;
}
-inline void powert_norm_compute(vw& all, float x, uint32_t fi, float g, float& norm, float& norm_x) {
+ inline void powert_norm_compute(vw& all, void* v, float x, uint32_t fi) {
+ norm_data* nd=(norm_data*)v;
float power_t_norm = 1.f - (all.adaptive ? all.power_t : 0.f);
- weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+ weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
float x2 = x * x;
float t = 1.f;
if(all.adaptive){
- w[1] += g * x2;
+ w[1] += nd->g * x2;
t = powf(w[1], -all.power_t);
}
if(all.normalized_updates) {
float range2 = w[all.normalized_idx] * w[all.normalized_idx];
t *= powf(range2, -power_t_norm);
- norm_x += x2 / range2;
+ nd->norm_x += x2 / range2;
}
- norm += x2 * t;
+ nd->norm += x2 * t;
}
-template <void (*T)(vw&,float,uint32_t,float,float&,float&)>
+ template <void (*T)(vw&,void*,float,uint32_t)>
float compute_norm(vw& all, example* &ec)
{//We must traverse the features in _precisely_ the same order as during training.
label_data* ld = (label_data*)ec->ld;
float g = all.loss->getSquareGrad(ec->final_prediction, ld->label) * ld->weight;
if (g==0) return 1.;
- float norm = 0.;
- float norm_x = 0.;
- uint32_t offset = ec->ft_offset;
-
- for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++)
- norm_add<T>(all, ec->atomics[*i].begin, ec->atomics[*i].end, g, norm, norm_x, offset);
+ norm_data nd = {g, 0., 0.};
- for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end(); i++)
- if (ec->atomics[(int)(*i)[0]].size() > 0)
- for (feature* f0 = ec->atomics[(int)(*i)[0]].begin; f0 != ec->atomics[(int)(*i)[0]].end; f0++)
- norm_add_quad<T>(all, *f0, ec->atomics[(int)(*i)[1]], g, norm, norm_x, offset);
-
- for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++)
- if ((ec->atomics[(int)(*i)[0]].size() > 0) && (ec->atomics[(int)(*i)[1]].size() > 0) && (ec->atomics[(int)(*i)[2]].size() > 0))
- for (feature* f0 = ec->atomics[(int)(*i)[0]].begin; f0 != ec->atomics[(int)(*i)[0]].end; f0++)
- for (feature* f1 = ec->atomics[(int)(*i)[1]].begin; f1 != ec->atomics[(int)(*i)[1]].end; f1++)
- norm_add_cubic<T>(all, *f0, *f1, ec->atomics[(int)(*i)[2]], g, norm, norm_x, offset);
+ foreach_feature<T>(all, ec, &nd);
if(all.normalized_updates) {
float total_weight = 0;
@@ -456,19 +413,19 @@ float compute_norm(vw& all, example* &ec)
else
total_weight = ec->example_t;
- all.normalized_sum_norm_x += ld->weight * norm_x;
+ all.normalized_sum_norm_x += ld->weight * nd.norm_x;
float avg_sq_norm = all.normalized_sum_norm_x / total_weight;
if(all.power_t == 0.5) {
- if(all.adaptive) norm /= sqrt(avg_sq_norm);
- else norm /= avg_sq_norm;
+ if(all.adaptive) nd.norm /= sqrt(avg_sq_norm);
+ else nd.norm /= avg_sq_norm;
} else {
float power_t_norm = 1.f - (all.adaptive ? all.power_t : 0.f);
- norm *= powf(avg_sq_norm,-power_t_norm);
+ nd.norm *= powf(avg_sq_norm,-power_t_norm);
}
}
- return norm;
+ return nd.norm;
}
void local_predict(vw& all, example* ec)
@@ -581,7 +538,7 @@ void predict(vw& all, example* ex)
void save_load_regressor(vw& all, io_buf& model_file, bool read, bool text)
{
uint32_t length = 1 << all.num_bits;
- uint32_t stride = all.stride;
+ uint32_t stride = all.reg.stride;
int c = 0;
uint32_t i = 0;
size_t brw = 1;
@@ -674,7 +631,7 @@ void save_load_online_state(vw& all, io_buf& model_file, bool read, bool text)
buff, text_len, text);
uint32_t length = 1 << all.num_bits;
- uint32_t stride = all.stride;
+ uint32_t stride = all.reg.stride;
int c = 0;
uint32_t i = 0;
size_t brw = 1;
@@ -737,7 +694,7 @@ void save_load(void* data, io_buf& model_file, bool read, bool text)
if(all->adaptive && all->initial_t > 0)
{
uint32_t length = 1 << all->num_bits;
- uint32_t stride = all->stride;
+ uint32_t stride = all->reg.stride;
for (size_t j = 1; j < stride*length; j+=stride)
{
all->reg.weight_vector[j] = all->initial_t; //for adaptive update, we interpret initial_t as previously seeing initial_t fake datapoints, all with squared gradient=1
diff --git a/vowpalwabbit/gd.h b/vowpalwabbit/gd.h
index 60a05857..ad655f40 100644
--- a/vowpalwabbit/gd.h
+++ b/vowpalwabbit/gd.h
@@ -20,11 +20,7 @@ namespace GD{
void print_result(int f, float res, v_array<char> tag);
void print_audit_features(regressor &reg, example* ec, size_t offset);
float finalize_prediction(vw&, float ret);
-float single_quad_weight(weight* weights, feature& page_feature, feature* offer_feature, size_t mask);
-void quadratic(v_array<feature> &f, const v_array<feature> &first_part,
- const v_array<feature> &second_part, size_t thread_mask);
void print_audit_features(vw&, example* ec);
-void train(weight* weights, const v_array<feature> &features, float update);
void train_one_example(regressor& r, example* ex);
void train_offset_example(regressor& r, example* ex, size_t offset);
void compute_update(example* ec);
@@ -34,35 +30,55 @@ void train_one_example_single_thread(regressor& r, example* ex);
void save_load_regressor(vw& all, io_buf& model_file, bool read, bool text);
void output_and_account_example(example* ec);
-template <float (*T)(vw&,float,uint32_t)>
-float inline_predict(vw& all, example* &ec)
-{
- float prediction = all.p->lp->get_initial(ec->ld);
+ template <void (*T)(vw&, void*, float, uint32_t)>
+ void foreach_feature(vw& all, void* dat, feature* begin, feature* end, uint32_t offset=0, float mult=1.)
+ {
+ for (feature* f = begin; f!= end; f++)
+ T(all, dat, mult*f->x, f->weight_index + offset);
+ }
+
+ template <void (*T)(vw&, void*, float, uint32_t)>
+ void foreach_feature(vw& all, example* ec, void* dat)
+ {
+ uint32_t offset = ec->ft_offset;
- for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++)
- prediction += sd_add<T>(all, ec->atomics[*i].begin, ec->atomics[*i].end, ec->ft_offset);
-
- for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) {
- if (ec->atomics[(int)(*i)[0]].size() > 0) {
- v_array<feature> temp = ec->atomics[(int)(*i)[0]];
- for (; temp.begin != temp.end; temp.begin++)
- prediction += one_pf_quad_predict<T>(all,*temp.begin,ec->atomics[(int)(*i)[1]], ec->ft_offset);
- }
- }
-
- for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) {
- if ((ec->atomics[(int)(*i)[0]].size() == 0) || (ec->atomics[(int)(*i)[1]].size() == 0) || (ec->atomics[(int)(*i)[2]].size() == 0)) { continue; }
- v_array<feature> temp1 = ec->atomics[(int)(*i)[0]];
- for (; temp1.begin != temp1.end; temp1.begin++) {
- v_array<feature> temp2 = ec->atomics[(int)(*i)[1]];
- for (; temp2.begin != temp2.end; temp2.begin++) {
- prediction += one_pf_cubic_predict<T>(all,*temp1.begin,*temp2.begin,ec->atomics[(int)(*i)[2]], ec->ft_offset);
- }
- }
- }
-
- return prediction;
-}
+ for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++)
+ foreach_feature<T>(all, dat, ec->atomics[*i].begin, ec->atomics[*i].end, offset);
+
+ for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) {
+ if (ec->atomics[(int)(*i)[0]].size() > 0) {
+ v_array<feature> temp = ec->atomics[(int)(*i)[0]];
+ for (; temp.begin != temp.end; temp.begin++)
+ {
+ uint32_t halfhash = quadratic_constant * (temp.begin->weight_index + offset);
+ foreach_feature<T>(all, dat, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end,
+ halfhash + offset, temp.begin->x);
+ }
+ }
+ }
+
+ for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) {
+ if ((ec->atomics[(int)(*i)[0]].size() == 0) || (ec->atomics[(int)(*i)[1]].size() == 0) || (ec->atomics[(int)(*i)[2]].size() == 0)) { continue; }
+ v_array<feature> temp1 = ec->atomics[(int)(*i)[0]];
+ for (; temp1.begin != temp1.end; temp1.begin++) {
+ v_array<feature> temp2 = ec->atomics[(int)(*i)[1]];
+ for (; temp2.begin != temp2.end; temp2.begin++) {
+
+ uint32_t halfhash = cubic_constant2 * (cubic_constant * (temp1.begin->weight_index + offset) + temp2.begin->weight_index + offset);
+ float mult = temp1.begin->x * temp2.begin->x;
+ return foreach_feature<T>(all, dat, ec->atomics[(int)(*i)[2]].begin, ec->atomics[(int)(*i)[2]].end, halfhash + offset, mult);
+ }
+ }
+ }
+ }
+
+ template <void (*T)(vw&,void*, float,uint32_t)>
+ float inline_predict(vw& all, example* ec)
+ {
+ float prediction = all.p->lp->get_initial(ec->ld);
+ foreach_feature<T>(all, ec, &prediction);
+ return prediction;
+ }
}
#endif
diff --git a/vowpalwabbit/gd_mf.cc b/vowpalwabbit/gd_mf.cc
index a9c6920d..b2ebb7db 100644
--- a/vowpalwabbit/gd_mf.cc
+++ b/vowpalwabbit/gd_mf.cc
@@ -35,8 +35,7 @@ float mf_inline_predict(vw& all, example* &ec)
float linear_prediction = 0;
// linear terms
for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++)
- linear_prediction += sd_add<vec_add>(all, ec->atomics[*i].begin, ec->atomics[*i].end);
- //linear_prediction += sd_add(weights,mask,ec->atomics[*i].begin, ec->atomics[*i].end);
+ GD::foreach_feature<vec_add>(all, &linear_prediction, ec->atomics[*i].begin, ec->atomics[*i].end);
// store constant + linear prediction
// note: constant is now automatically added
@@ -54,11 +53,13 @@ float mf_inline_predict(vw& all, example* &ec)
// x_l * l^k
// l^k is from index+1 to index+all.rank
//float x_dot_l = sd_offset_add(weights, mask, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, k);
- float x_dot_l = sd_add<vec_add>(all, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, k);
+ float x_dot_l = 0;
+ GD::foreach_feature<vec_add>(all, &x_dot_l, ec->atomics[(int)(*i)[0]].begin, ec->atomics[(int)(*i)[0]].end, k);
// x_r * r^k
// r^k is from index+all.rank+1 to index+2*all.rank
//float x_dot_r = sd_offset_add(weights, mask, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k+all.rank);
- float x_dot_r = sd_add<vec_add>(all, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k+all.rank);
+ float x_dot_r = 0;
+ GD::foreach_feature<vec_add>(all, &x_dot_r, ec->atomics[(int)(*i)[1]].begin, ec->atomics[(int)(*i)[1]].end, k+all.rank);
prediction += x_dot_l * x_dot_r;
@@ -82,7 +83,7 @@ float mf_inline_predict(vw& all, example* &ec)
void mf_inline_train(vw& all, example* &ec, float update)
{
weight* weights = all.reg.weight_vector;
- size_t mask = all.weight_mask;
+ size_t mask = all.reg.weight_mask;
label_data* ld = (label_data*)ec->ld;
// use final prediction to get update size
@@ -132,7 +133,7 @@ void mf_inline_train(vw& all, example* &ec, float update)
void mf_print_offset_features(vw& all, example* &ec, size_t offset)
{
weight* weights = all.reg.weight_vector;
- size_t mask = all.weight_mask;
+ size_t mask = all.reg.weight_mask;
for (unsigned char* i = ec->indices.begin; i != ec->indices.end; i++)
if (ec->audit_features[*i].begin != ec->audit_features[*i].end)
for (audit_data *f = ec->audit_features[*i].begin; f != ec->audit_features[*i].end; f++)
@@ -211,13 +212,13 @@ float mf_predict(vw& all, example* ex)
{
vw* all = (vw*)d;
uint32_t length = 1 << all->num_bits;
- uint32_t stride = all->stride;
+ uint32_t stride = all->reg.stride;
if(read)
{
initialize_regressor(*all);
if(all->random_weights)
- for (size_t j = 0; j < all->stride*length; j++)
+ for (size_t j = 0; j < all->reg.stride*length; j++)
all->reg.weight_vector[j] = (float) (0.1 * frand48());
}
diff --git a/vowpalwabbit/global_data.cc b/vowpalwabbit/global_data.cc
index 1b3353e4..f05927d0 100644
--- a/vowpalwabbit/global_data.cc
+++ b/vowpalwabbit/global_data.cc
@@ -249,7 +249,7 @@ vw::vw()
bfgs = false;
hessian_on = false;
- stride = 1;
+ reg.stride = 1;
num_bits = 18;
default_bits = true;
daemon = false;
diff --git a/vowpalwabbit/global_data.h b/vowpalwabbit/global_data.h
index e9913df8..29a7a926 100644
--- a/vowpalwabbit/global_data.h
+++ b/vowpalwabbit/global_data.h
@@ -110,6 +110,8 @@ typedef float weight;
struct regressor {
weight* weight_vector;
+ size_t weight_mask; // (stride*(1 << num_bits) -1)
+ uint32_t stride;
};
struct vw {
@@ -161,7 +163,6 @@ struct vw {
uint32_t base_learner_nb_w; //this stores the current number of "weight vector" required by the based learner, which is used to compute offsets when composing reductions
- uint32_t stride;
int stdout_fileno;
std::string per_feature_regularizer_input;
@@ -181,7 +182,6 @@ struct vw {
size_t numpasses;
size_t passes_complete;
size_t parse_mask; // 1 << num_bits -1
- size_t weight_mask; // (stride*(1 << num_bits) -1)
std::vector<std::string> pairs; // pairs of features to cross.
std::vector<std::string> triples; // triples of features to cross.
bool ignore_some;
diff --git a/vowpalwabbit/lda_core.cc b/vowpalwabbit/lda_core.cc
index 5a430303..36aa6493 100644
--- a/vowpalwabbit/lda_core.cc
+++ b/vowpalwabbit/lda_core.cc
@@ -457,7 +457,7 @@ v_array<float> old_gamma;
feature *f = ec->atomics[*i].begin;
for (; f != ec->atomics[*i].end; f++)
{
- float* u_for_w = &weights[(f->weight_index&all.weight_mask)+all.lda+1];
+ float* u_for_w = &weights[(f->weight_index&all.reg.weight_mask)+all.lda+1];
float c_w = find_cw(all, u_for_w,v);
xc_w = c_w * f->x;
score += -f->x*log(c_w);
@@ -507,7 +507,7 @@ size_t next_pow2(size_t x) {
lda* l = (lda*)d;
vw* all = l->all;
uint32_t length = 1 << all->num_bits;
- uint32_t stride = all->stride;
+ uint32_t stride = all->reg.stride;
if (read)
{
@@ -583,10 +583,10 @@ size_t next_pow2(size_t x) {
for (size_t k = 0; k < all->lda; k++)
total_lambda.push_back(0.f);
- size_t stride = all->stride;
+ size_t stride = all->reg.stride;
weight* weights = reg.weight_vector;
- for (size_t i =0; i <= all->weight_mask;i+=stride)
+ for (size_t i =0; i <= all->reg.weight_mask;i+=stride)
for (size_t k = 0; k < all->lda; k++)
total_lambda[k] += weights[i+k];
@@ -647,7 +647,7 @@ size_t next_pow2(size_t x) {
if (last_weight_index == s->f.weight_index)
continue;
last_weight_index = s->f.weight_index;
- float* weights_for_w = &(weights[s->f.weight_index & all->weight_mask]);
+ float* weights_for_w = &(weights[s->f.weight_index & all->reg.weight_mask]);
float decay = fmin(1.0, exp(decay_levels.end[-2] - decay_levels.end[(int)(-1-example_t+weights_for_w[all->lda])]));
float* u_for_w = weights_for_w + all->lda+1;
@@ -679,7 +679,7 @@ size_t next_pow2(size_t x) {
while(next <= &sorted_features.back() && next->f.weight_index == s->f.weight_index)
next++;
- float* word_weights = &(weights[s->f.weight_index & all->weight_mask]);
+ float* word_weights = &(weights[s->f.weight_index & all->reg.weight_mask]);
for (size_t k = 0; k < all->lda; k++) {
float new_value = minuseta*word_weights[k];
word_weights[k] = new_value;
@@ -687,7 +687,7 @@ size_t next_pow2(size_t x) {
for (; s != next; s++) {
float* v_s = &v[s->document*all->lda];
- float* u_for_w = &weights[(s->f.weight_index & all->weight_mask) + all->lda + 1];
+ float* u_for_w = &weights[(s->f.weight_index & all->reg.weight_mask) + all->lda + 1];
float c_w = eta*find_cw(*all, u_for_w, v_s)*s->f.x;
for (size_t k = 0; k < all->lda; k++) {
float new_value = u_for_w[k]*v_s[k]*c_w;
@@ -704,7 +704,7 @@ size_t next_pow2(size_t x) {
if (parser_done(all->p))
{
for (size_t i = 0; i < all->length(); i++) {
- weight* weights_for_w = & (weights[i*all->stride]);
+ weight* weights_for_w = & (weights[i*all->reg.stride]);
float decay = fmin(1.0, exp(decay_levels.last() - decay_levels.end[(int)(-1-example_t+weights_for_w[all->lda])]));
for (size_t k = 0; k < all->lda; k++) {
weights_for_w[k] *= decay;
@@ -746,7 +746,7 @@ learner setup(vw&all, std::vector<std::string>&opts, po::variables_map& vm)
all.p->sort_features = true;
float temp = ceilf(logf((float)(all.lda*2+1)) / logf (2.f));
- all.stride = ((size_t)1) << (int) temp;
+ all.reg.stride = ((size_t)1) << (int) temp;
all.random_weights = true;
all.add_constant = false;
diff --git a/vowpalwabbit/nn.cc b/vowpalwabbit/nn.cc
index 8417a8f4..1c2be65c 100644
--- a/vowpalwabbit/nn.cc
+++ b/vowpalwabbit/nn.cc
@@ -194,7 +194,7 @@ CONVERSE: // That's right, I'm using goto. So sue me.
float sigmah =
n.output_layer.atomics[nn_output_namespace][i].x / dropscale;
float sigmahprime = dropscale * (1.0f - sigmah * sigmah);
- float nu = all.reg.weight_vector[n.output_layer.atomics[nn_output_namespace][i].weight_index & all.weight_mask];
+ float nu = all.reg.weight_vector[n.output_layer.atomics[nn_output_namespace][i].weight_index & all.reg.weight_mask];
float gradhw = 0.5f * nu * gradient * sigmahprime;
ld->label = GD::finalize_prediction (all, hidden_units[i-1] - gradhw);
@@ -368,7 +368,7 @@ CONVERSE: // That's right, I'm using goto. So sue me.
n->base = all.l;
all.base_learner_nb_w *= (n->inpass) ? n->k + 1 : n->k;
- n->increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.stride;
+ n->increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.reg.stride;
bool initialize = true;
@@ -376,15 +376,15 @@ CONVERSE: // That's right, I'm using goto. So sue me.
memset (&n->output_layer, 0, sizeof (n->output_layer));
n->output_layer.indices.push_back(nn_output_namespace);
- feature output = {1., nn_constant*all.stride};
+ feature output = {1., nn_constant*all.reg.stride};
n->output_layer.atomics[nn_output_namespace].push_back(output);
- initialize &= (all.reg.weight_vector[n->output_layer.atomics[nn_output_namespace][0].weight_index & all.weight_mask] == 0);
+ initialize &= (all.reg.weight_vector[n->output_layer.atomics[nn_output_namespace][0].weight_index & all.reg.weight_mask] == 0);
for (unsigned int i = 0; i < n->k; ++i)
{
- output.weight_index += all.stride;
+ output.weight_index += all.reg.stride;
n->output_layer.atomics[nn_output_namespace].push_back(output);
- initialize &= (all.reg.weight_vector[n->output_layer.atomics[nn_output_namespace][i+1].weight_index & all.weight_mask] == 0);
+ initialize &= (all.reg.weight_vector[n->output_layer.atomics[nn_output_namespace][i+1].weight_index & all.reg.weight_mask] == 0);
}
n->output_layer.num_features = n->k + 1;
@@ -399,7 +399,7 @@ CONVERSE: // That's right, I'm using goto. So sue me.
float sqrtk = sqrt ((float)n->k);
for (unsigned int i = 0; i <= n->k; ++i)
{
- weight* w = &all.reg.weight_vector[n->output_layer.atomics[nn_output_namespace][i].weight_index & all.weight_mask];
+ weight* w = &all.reg.weight_vector[n->output_layer.atomics[nn_output_namespace][i].weight_index & all.reg.weight_mask];
w[0] = (float) (frand48 () - 0.5) / sqrtk;
@@ -410,11 +410,11 @@ CONVERSE: // That's right, I'm using goto. So sue me.
// hidden biases
- unsigned int weight_index = constant * all.stride;
+ unsigned int weight_index = constant * all.reg.stride;
for (unsigned int i = 0; i < n->k; ++i)
{
- all.reg.weight_vector[weight_index & all.weight_mask] = (float) (frand48 () - 0.5);
+ all.reg.weight_vector[weight_index & all.reg.weight_mask] = (float) (frand48 () - 0.5);
weight_index += n->increment;
}
}
diff --git a/vowpalwabbit/oaa.cc b/vowpalwabbit/oaa.cc
index 829c7d98..11bf4361 100644
--- a/vowpalwabbit/oaa.cc
+++ b/vowpalwabbit/oaa.cc
@@ -260,7 +260,7 @@ namespace OAA {
data->all = &all;
*(all.p->lp) = mc_label_parser;
all.base_learner_nb_w *= data->k;
- data->increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.stride;
+ data->increment = ((uint32_t)all.length()/all.base_learner_nb_w) * all.reg.stride;
data->total_increment = data->increment*(data->k-1);
data->base = all.l;
learner l = {data, drive, learn, finish, all.l.sl};
diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc
index ed560d30..63833428 100644
--- a/vowpalwabbit/parse_args.cc
+++ b/vowpalwabbit/parse_args.cc
@@ -217,7 +217,7 @@ vw* parse_args(int argc, char *argv[])
throw exception();
}
- all->stride = 4; //use stride of 4 for default invariant normalized adaptive updates
+ all->reg.stride = 4; //use stride of 4 for default invariant normalized adaptive updates
//if we are doing matrix factorization, or user specified anything in sgd,adaptive,invariant,normalized, we turn off default update rules and use whatever user specified
if( all->rank > 0 || !all->training || ( ( vm.count("sgd") || vm.count("adaptive") || vm.count("invariant") || vm.count("normalized") ) && !vm.count("exact_adaptive_norm")) )
{
@@ -225,12 +225,12 @@ vw* parse_args(int argc, char *argv[])
all->invariant_updates = all->training && vm.count("invariant");
all->normalized_updates = all->training && (vm.count("normalized") && all->rank == 0);
- all->stride = 1;
+ all->reg.stride = 1;
- if( all->adaptive ) all->stride *= 2;
+ if( all->adaptive ) all->reg.stride *= 2;
else all->normalized_idx = 1; //store per feature norm at 1 index offset from weight value instead of 2
- if( all->normalized_updates ) all->stride *= 2;
+ if( all->normalized_updates ) all->reg.stride *= 2;
if(!vm.count("learning_rate") && !vm.count("l") && !(all->adaptive && all->normalized_updates))
all->eta = 10; //default learning rate to 10 for non default update rule
@@ -407,7 +407,7 @@ vw* parse_args(int argc, char *argv[])
if (all->rank > 0) {
// store linear + 2*rank weights per index, round up to power of two
float temp = ceilf(logf((float)(all->rank*2+1)) / logf (2.f));
- all->stride = 1 << (int) temp;
+ all->reg.stride = 1 << (int) temp;
all->random_weights = true;
if ( vm.count("adaptive") )
diff --git a/vowpalwabbit/parse_example.cc b/vowpalwabbit/parse_example.cc
index 843d96c0..a3c581b8 100644
--- a/vowpalwabbit/parse_example.cc
+++ b/vowpalwabbit/parse_example.cc
@@ -117,12 +117,12 @@ public:
// maybeFeature --> 'String' FeatureValue
substring feature_name=read_name();
v = cur_channel_v * featureValue();
- if(v == 0) return; //dont add 0 valued features to list of features
size_t word_hash;
if (feature_name.end != feature_name.begin)
word_hash = (p->hasher(feature_name,(uint32_t)channel_hash)) & mask;
else
word_hash = channel_hash + anon++;
+ if(v == 0) return; //dont add 0 valued features to list of features
feature f = {v,(uint32_t)word_hash};
ae->sum_feat_sq[index] += v*v;
ae->atomics[index].push_back(f);
diff --git a/vowpalwabbit/parse_regressor.cc b/vowpalwabbit/parse_regressor.cc
index 3b2a95cd..f0825823 100644
--- a/vowpalwabbit/parse_regressor.cc
+++ b/vowpalwabbit/parse_regressor.cc
@@ -27,8 +27,8 @@ using namespace std;
void initialize_regressor(vw& all)
{
size_t length = ((size_t)1) << all.num_bits;
- all.weight_mask = (all.stride * length) - 1;
- all.reg.weight_vector = (weight *)calloc(all.stride*length, sizeof(weight));
+ all.reg.weight_mask = (all.reg.stride * length) - 1;
+ all.reg.weight_vector = (weight *)calloc(all.reg.stride*length, sizeof(weight));
if (all.reg.weight_vector == NULL)
{
cerr << all.program_name << ": Failed to allocate weight array with " << all.num_bits << " bits: try decreasing -b <bits>" << endl;
@@ -37,10 +37,10 @@ void initialize_regressor(vw& all)
if (all.random_weights)
{
for (size_t j = 0; j < length; j++)
- all.reg.weight_vector[j*all.stride] = (float)(frand48() - 0.5);
+ all.reg.weight_vector[j*all.reg.stride] = (float)(frand48() - 0.5);
}
if (all.initial_weight != 0.)
- for (size_t j = 0; j < all.stride*length; j+=all.stride)
+ for (size_t j = 0; j < all.reg.stride*length; j+=all.reg.stride)
all.reg.weight_vector[j] = all.initial_weight;
}
diff --git a/vowpalwabbit/parser.cc b/vowpalwabbit/parser.cc
index 5f2eeb3a..76c82958 100644
--- a/vowpalwabbit/parser.cc
+++ b/vowpalwabbit/parser.cc
@@ -459,10 +459,10 @@ void parse_source_args(vw& all, po::variables_map& vm, bool quiet, size_t passes
#else
// weights will be shared across processes, accessible to children
float* shared_weights =
- (float*)mmap(0,all.stride * all.length() * sizeof(float),
+ (float*)mmap(0,all.reg.stride * all.length() * sizeof(float),
PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
- size_t float_count = all.stride * all.length();
+ size_t float_count = all.reg.stride * all.length();
weight* dest = shared_weights;
memcpy(dest, all.reg.weight_vector, float_count*sizeof(float));
free(all.reg.weight_vector);
@@ -763,9 +763,9 @@ void setup_example(vw& all, example* ae)
ae->total_sum_feat_sq++;
}
- if(all.stride != 1) //make room for per-feature information.
+ if(all.reg.stride != 1) //make room for per-feature information.
{
- uint32_t stride = all.stride;
+ uint32_t stride = all.reg.stride;
for (unsigned char* i = ae->indices.begin; i != ae->indices.end; i++)
for(feature* j = ae->atomics[*i].begin; j != ae->atomics[*i].end; j++)
j->weight_index = j->weight_index*stride;
@@ -1143,4 +1143,4 @@ void end_parser(vw& all)
#endif
release_parser_datastructures(all);
}
-} \ No newline at end of file
+}
diff --git a/vowpalwabbit/searn.cc b/vowpalwabbit/searn.cc
index 81dc5a7c..9a64244d 100644
--- a/vowpalwabbit/searn.cc
+++ b/vowpalwabbit/searn.cc
@@ -356,7 +356,7 @@ namespace Searn
{
feature* end = ec->atomics[*i].end;
for (feature* f = ec->atomics[*i].begin; f!= end; f++) {
- cerr << "\t" << f->weight_index << ":" << f->x << ":" << all.reg.weight_vector[f->weight_index & all.weight_mask];
+ cerr << "\t" << f->weight_index << ":" << f->x << ":" << all.reg.weight_vector[f->weight_index & all.reg.weight_mask];
}
}
cerr << endl;
@@ -808,7 +808,7 @@ namespace Searn
VW::cmd_string_replace_value(all.options_from_file,"--searn_total_nb_policies", ss2.str());
all.base_learner_nb_w *= s->total_number_of_policies;
- s->increment = ((uint32_t)all.length() / all.base_learner_nb_w) * all.stride;
+ s->increment = ((uint32_t)all.length() / all.base_learner_nb_w) * all.reg.stride;
//cerr << "searn increment = " << s->increment << endl;
learner l = {s, drive, learn, finish, all.l.sl};
@@ -2041,7 +2041,7 @@ namespace ImperativeSearn {
ss2 << srn->total_number_of_policies; VW::cmd_string_replace_value(all.options_from_file,"--searn_total_nb_policies", ss2.str());
all.base_learner_nb_w *= srn->total_number_of_policies;
- srn->increment = ((uint32_t)all.length() / all.base_learner_nb_w) * all.stride;
+ srn->increment = ((uint32_t)all.length() / all.base_learner_nb_w) * all.reg.stride;
if (task_string.compare("sequence") == 0) {
searn_task* mytask = (searn_task*)calloc(1, sizeof(searn_task));
diff --git a/vowpalwabbit/searn_sequencetask.cc b/vowpalwabbit/searn_sequencetask.cc
index bad32419..5446b3fc 100644
--- a/vowpalwabbit/searn_sequencetask.cc
+++ b/vowpalwabbit/searn_sequencetask.cc
@@ -125,7 +125,7 @@ namespace SequenceTask {
for (size_t i=0; i < hinfo.length; i++)
constant_pow_length *= quadratic_constant;
- increment = ((uint32_t)all.length() * all.stride + 132489)/seq_max_action;
+ increment = ((uint32_t)all.length() * all.reg.stride + 132489)/seq_max_action;
return true;
}
diff --git a/vowpalwabbit/sparse_dense.h b/vowpalwabbit/sparse_dense.h
index 71762264..4de5c132 100644
--- a/vowpalwabbit/sparse_dense.h
+++ b/vowpalwabbit/sparse_dense.h
@@ -16,40 +16,16 @@ inline float trunc_weight(float w, float gravity){
return (gravity < fabsf(w)) ? w - sign(w) * gravity : 0.f;
}
-template <float (*T)(vw&,float,uint32_t)>
-float sd_add(vw& all, feature* begin, feature* end, uint32_t offset=0, float mult = 1.)
-{
- float ret = 0.;
- for (feature* f = begin; f!= end; f++)
- ret += T(all, mult*f->x, f->weight_index + offset);
- return ret;
+inline void vec_add(vw& all, void* p, float fx, uint32_t fi) {
+ *(float*)p += all.reg.weight_vector[fi & all.reg.weight_mask] * fx;
}
-template <float (*T)(vw&,float,uint32_t)>
-float one_pf_quad_predict(vw& all, feature& f, v_array<feature> cross_features, uint32_t offset=0)
-{
- uint32_t halfhash = quadratic_constant * (f.weight_index + offset);
- return sd_add<T>(all, cross_features.begin, cross_features.end, halfhash + offset, f.x);
+inline void vec_add_trunc(vw& all, void* p, float fx, uint32_t fi) {
+ *(float*)p += trunc_weight(all.reg.weight_vector[fi & all.reg.weight_mask], (float)all.sd->gravity) * fx;
}
-template <float (*T)(vw&,float,uint32_t)>
-float one_pf_cubic_predict(vw& all, feature& f0, feature& f1, v_array<feature> cross_features, uint32_t offset=0)
-{
- uint32_t halfhash = cubic_constant2 * (cubic_constant * (f0.weight_index + offset) + f1.weight_index + offset);
- float mult = f0.x * f1.x;
- return sd_add<T>(all, cross_features.begin, cross_features.end, halfhash + offset, mult);
-}
-
-inline float vec_add(vw& all, float fx, uint32_t fi) {
- return all.reg.weight_vector[fi & all.weight_mask] * fx;
-}
-
-inline float vec_add_trunc(vw& all, float fx, uint32_t fi) {
- return trunc_weight(all.reg.weight_vector[fi & all.weight_mask], (float)all.sd->gravity) * fx;
-}
-
-inline float vec_add_rescale(vw& all, float fx, uint32_t fi) {
- weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+inline void vec_add_rescale(vw& all, void* p, float fx, uint32_t fi) {
+ weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
float x_abs = fabs(fx);
if( x_abs > w[all.normalized_idx] ) {// new scale discovered
if( w[all.normalized_idx] > 0. ) {//If the normalizer is > 0 then rescale the weight so it's as if the new scale was the old scale.
@@ -58,11 +34,11 @@ inline float vec_add_rescale(vw& all, float fx, uint32_t fi) {
}
w[all.normalized_idx] = x_abs;
}
- return w[0] * fx;
+ *(float*)p += w[0] * fx;
}
-inline float vec_add_trunc_rescale(vw& all, float fx, uint32_t fi) {
- weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+inline void vec_add_trunc_rescale(vw& all, void* p, float fx, uint32_t fi) {
+ weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
float x_abs = fabs(fx);
if( x_abs > w[all.normalized_idx] ) {
if( w[all.normalized_idx] > 0. ) {
@@ -71,11 +47,11 @@ inline float vec_add_trunc_rescale(vw& all, float fx, uint32_t fi) {
}
w[all.normalized_idx] = x_abs;
}
- return trunc_weight(w[0], (float)all.sd->gravity) * fx;
+ *(float*)p += trunc_weight(w[0], (float)all.sd->gravity) * fx;
}
-inline float vec_add_rescale_general(vw& all, float fx, uint32_t fi) {
- weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+inline void vec_add_rescale_general(vw& all, void* p, float fx, uint32_t fi) {
+ weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
float x_abs = fabs(fx);
float power_t_norm = 1.f - (all.adaptive ? all.power_t : 0.f);
if( x_abs > w[all.normalized_idx] ) {
@@ -85,11 +61,11 @@ inline float vec_add_rescale_general(vw& all, float fx, uint32_t fi) {
}
w[all.normalized_idx] = x_abs;
}
- return w[0] * fx;
+ *(float*)p += w[0] * fx;
}
-inline float vec_add_trunc_rescale_general(vw& all, float fx, uint32_t fi) {
- weight* w = &all.reg.weight_vector[fi & all.weight_mask];
+inline void vec_add_trunc_rescale_general(vw& all, void* p, float fx, uint32_t fi) {
+ weight* w = &all.reg.weight_vector[fi & all.reg.weight_mask];
float x_abs = fabs(fx);
float power_t_norm = 1.f - (all.adaptive ? all.power_t : 0.f);
if( x_abs > w[all.normalized_idx] ) {
@@ -99,40 +75,9 @@ inline float vec_add_trunc_rescale_general(vw& all, float fx, uint32_t fi) {
}
w[all.normalized_idx] = x_abs;
}
- return trunc_weight(w[0], (float)all.sd->gravity) * fx;
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////
-
-template <void (*T)(vw&,float,uint32_t,float)>
-void sd_update(vw& all, feature* begin, feature* end, float update, uint32_t offset=0)
-{
- for (feature* f = begin; f!= end; f++)
- T(all, f->x, f->weight_index + offset, update);
+ *(float*)p += trunc_weight(w[0], (float)all.sd->gravity) * fx;
}
-template <void (*T)(vw&,float,uint32_t,float)>
-void sd_quad_update(vw& all, feature& f, v_array<feature> cross_features, float update, uint32_t offset=0)
-{
- size_t halfhash = quadratic_constant * (f.weight_index + offset);
- sd_update<T>(all, cross_features.begin, cross_features.end, halfhash + offset, update * f.x);
-}
-
-template <void (*T)(vw&,float,uint32_t,float)>
-void sd_cubic_update(vw& all, feature& f0, feature& f1, v_array<feature> cross_features, float update, uint32_t offset=0)
-{
- size_t halfhash = cubic_constant2 * (cubic_constant * (f0.weight_index + offset) + f1.weight_index + offset);
- sd_update<T>(all, cross_features.begin, cross_features.end, update * f0.x * f1.x, halfhash + offset);
-}
-
-inline void upd_add(vw& all, float fx, uint32_t fi, float update) {
- all.reg.weight_vector[fi] += update * fx;
-}
-
-
void sd_offset_update(weight* weights, size_t mask, feature* begin, feature* end, size_t offset, float update, float regularization);
-void quadratic(v_array<feature> &f, const v_array<feature> &first_part,
- const v_array<feature> &second_part, size_t thread_mask);
-
#endif
diff --git a/vowpalwabbit/vw.h b/vowpalwabbit/vw.h
index a8dcdcb2..edf179f2 100644
--- a/vowpalwabbit/vw.h
+++ b/vowpalwabbit/vw.h
@@ -82,7 +82,7 @@ namespace VW {
void releaseFeatureSpace(primitive_feature_space* features, size_t len);
inline float get_weight(vw& all, uint32_t index)
- { return all.reg.weight_vector[(index * all.stride) & all.weight_mask];}
+ { return all.reg.weight_vector[(index * all.reg.stride) & all.reg.weight_mask];}
inline uint32_t num_weights(vw& all)
{ return (uint32_t)all.length();}
diff --git a/vowpalwabbit/vw_static.vcxproj b/vowpalwabbit/vw_static.vcxproj
index 3251b96e..7b951fd3 100644
--- a/vowpalwabbit/vw_static.vcxproj
+++ b/vowpalwabbit/vw_static.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
@@ -234,9 +234,9 @@
<IntDir>$(SolutionDir)$(PlatformName)\$(Configuration)\</IntDir>
</PropertyGroup>
<ItemGroup>
+ <ClInclude Include="autolink.h" />
<ClInclude Include="accumulate.h" />
<ClInclude Include="allreduce.h" />
- <ClInclude Include="autolink.h" />
<ClInclude Include="bfgs.h" />
<ClInclude Include="binary.h" />
<ClInclude Include="cache.h" />
@@ -275,9 +275,9 @@
<ClInclude Include="wap.h" />
</ItemGroup>
<ItemGroup>
+ <ClCompile Include="autolink.cc" />
<ClCompile Include="accumulate.cc" />
<ClCompile Include="allreduce.cc" />
- <ClCompile Include="autolink.cc" />
<ClCompile Include="beam.cc" />
<ClCompile Include="binary.cc" />
<ClCompile Include="bfgs.cc" />
@@ -314,4 +314,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
-</Project> \ No newline at end of file
+</Project>
diff --git a/vowpalwabbit/wap.cc b/vowpalwabbit/wap.cc
index 8c5f47dc..c596b5b2 100644
--- a/vowpalwabbit/wap.cc
+++ b/vowpalwabbit/wap.cc
@@ -293,7 +293,7 @@ namespace WAP {
all.sd->k = (uint32_t)nb_actions;
all.base_learner_nb_w *= nb_actions;
- w->increment = (uint32_t)((all.length()/ all.base_learner_nb_w) * all.stride);
+ w->increment = (uint32_t)((all.length()/ all.base_learner_nb_w) * all.reg.stride);
learner l = {w, drive, learn, finish, all.l.sl};
w->base = all.l;