Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHal Daume III <me@hal3.name>2014-03-01 21:20:01 +0400
committerHal Daume III <me@hal3.name>2014-03-01 21:20:01 +0400
commitf3b36073f693621d7858f660fa88a05f2571b8e4 (patch)
tree4f4a19b14ff113fa4a29e0b2799bb8311d09b571 /vowpalwabbit/csoaa.cc
parentef827509126c45e01f72d506a499f19dff945205 (diff)
parent3e368e9b2e6e3cf01d9ab8c59127b3d1806f43bd (diff)
fixed neighbor feature auditing, and affix feature storing in file
Diffstat (limited to 'vowpalwabbit/csoaa.cc')
-rw-r--r--vowpalwabbit/csoaa.cc310
1 files changed, 149 insertions, 161 deletions
diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc
index 2c8c80d7..1a9ca127 100644
--- a/vowpalwabbit/csoaa.cc
+++ b/vowpalwabbit/csoaa.cc
@@ -99,11 +99,6 @@ namespace CSOAA {
return 1.;
}
- float initial(void* v)
- {
- return 0.;
- }
-
char* bufcache_label(label* ld, char* c)
{
*(size_t *)c = ld->costs.size();
@@ -206,7 +201,7 @@ namespace CSOAA {
}
}
- void print_update(vw& all, bool is_test, example *ec)
+ void print_update(vw& all, bool is_test, example& ec)
{
if (all.sd->weighted_examples >= all.sd->dump_interval && !all.quiet && !all.bfgs)
{
@@ -232,8 +227,8 @@ namespace CSOAA {
(long int)all.sd->example_number,
all.sd->weighted_examples,
label_buf,
- (long unsigned int)ec->final_prediction,
- (long unsigned int)ec->num_features);
+ (long unsigned int)ec.final_prediction,
+ (long unsigned int)ec.num_features);
all.sd->weighted_holdout_examples_since_last_dump = 0;
all.sd->holdout_sum_loss_since_last_dump = 0.0;
@@ -245,8 +240,8 @@ namespace CSOAA {
(long int)all.sd->example_number,
all.sd->weighted_examples,
label_buf,
- (long unsigned int)ec->final_prediction,
- (long unsigned int)ec->num_features);
+ (long unsigned int)ec.final_prediction,
+ (long unsigned int)ec.num_features);
all.sd->sum_loss_since_last_dump = 0.0;
all.sd->old_weighted_examples = all.sd->weighted_examples;
@@ -254,14 +249,14 @@ namespace CSOAA {
}
}
- void output_example(vw& all, example* ec)
+ void output_example(vw& all, example& ec)
{
- label* ld = (label*)ec->ld;
+ label* ld = (label*)ec.ld;
float loss = 0.;
if (!is_test_label(ld))
{//need to compute exact loss
- size_t pred = (size_t)ec->final_prediction;
+ size_t pred = (size_t)ec.final_prediction;
float chosen_loss = FLT_MAX;
float min = FLT_MAX;
@@ -277,11 +272,11 @@ namespace CSOAA {
loss = chosen_loss - min;
}
- if(ec->test_only)
+ if(ec.test_only)
{
- all.sd->weighted_holdout_examples += ec->global_weight;//test weight seen
- all.sd->weighted_holdout_examples_since_last_dump += ec->global_weight;
- all.sd->weighted_holdout_examples_since_last_pass += ec->global_weight;
+ all.sd->weighted_holdout_examples += ec.global_weight;//test weight seen
+ all.sd->weighted_holdout_examples_since_last_dump += ec.global_weight;
+ all.sd->weighted_holdout_examples_since_last_pass += ec.global_weight;
all.sd->holdout_sum_loss += loss;
all.sd->holdout_sum_loss_since_last_dump += loss;
all.sd->holdout_sum_loss_since_last_pass += loss;//since last pass
@@ -289,14 +284,14 @@ namespace CSOAA {
else
{
all.sd->weighted_examples += 1.;
- all.sd->total_features += ec->num_features;
+ all.sd->total_features += ec.num_features;
all.sd->sum_loss += loss;
all.sd->sum_loss_since_last_dump += loss;
all.sd->example_number++;
}
for (int* sink = all.final_prediction_sink.begin; sink != all.final_prediction_sink.end; sink++)
- all.print((int)*sink, ec->final_prediction, 0, ec->tag);
+ all.print((int)*sink, ec.final_prediction, 0, ec.tag);
if (all.raw_prediction > 0) {
string outputString;
@@ -307,25 +302,23 @@ namespace CSOAA {
outputStringStream << cl.weight_index << ':' << cl.partial_prediction;
}
//outputStringStream << endl;
- all.print_text(all.raw_prediction, outputStringStream.str(), ec->tag);
+ all.print_text(all.raw_prediction, outputStringStream.str(), ec.tag);
}
- print_update(all, is_test_label((label*)ec->ld), ec);
+ print_update(all, is_test_label((label*)ec.ld), ec);
}
template <bool is_learn>
- void predict_or_learn(csoaa* c, learner& base, example* ec) {
- vw* all = c->all;
- label* ld = (label*)ec->ld;
-
+ void predict_or_learn(csoaa& c, learner& base, example& ec) {
+ vw* all = c.all;
+ label* ld = (label*)ec.ld;
size_t prediction = 1;
float score = FLT_MAX;
label_data simple_temp = { 0., 0., 0. };
- ec->ld = &simple_temp;
+ ec.ld = &simple_temp;
for (wclass *cl = ld->costs.begin; cl != ld->costs.end; cl ++)
{
uint32_t i = cl->weight_index;
-
if (is_learn)
{
if (cl->x == FLT_MAX || !all->training)
@@ -338,27 +331,26 @@ namespace CSOAA {
simple_temp.label = cl->x;
simple_temp.weight = 1.;
}
-
- base.learn(ec, i);
+ base.learn(ec, i-1);
}
else
- base.predict(ec, i);
+ base.predict(ec, i-1);
- cl->partial_prediction = ec->partial_prediction;
- if (ec->partial_prediction < score || (ec->partial_prediction == score && i < prediction)) {
- score = ec->partial_prediction;
+ cl->partial_prediction = ec.partial_prediction;
+ if (ec.partial_prediction < score || (ec.partial_prediction == score && i < prediction)) {
+ score = ec.partial_prediction;
prediction = i;
}
- ec->partial_prediction = 0.;
+ ec.partial_prediction = 0.;
}
- ec->ld = ld;
- ec->final_prediction = (float)prediction;
+ ec.ld = ld;
+ ec.final_prediction = (float)prediction;
}
- void finish_example(vw& all, csoaa*, example* ec)
+ void finish_example(vw& all, csoaa&, example& ec)
{
output_example(all, ec);
- VW::finish_example(all, ec);
+ VW::finish_example(all, &ec);
}
learner* setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::variables_map& vm_file)
@@ -391,9 +383,9 @@ namespace CSOAA {
return l;
}
- bool example_is_test(example* ec)
+ bool example_is_test(example& ec)
{
- v_array<CSOAA::wclass> costs = ((label*)ec->ld)->costs;
+ v_array<CSOAA::wclass> costs = ((label*)ec.ld)->costs;
if (costs.size() == 0) return true;
for (size_t j=0; j<costs.size(); j++)
if (costs[j].x != FLT_MAX) return false;
@@ -425,20 +417,20 @@ namespace LabelDict {
size_t hash_lab(size_t lab) { return 328051 + 94389193 * lab; }
- bool ec_is_label_definition(example*ec) // label defs look like "___:-1"
+ bool ec_is_label_definition(example& ec) // label defs look like "___:-1"
{
- v_array<CSOAA::wclass> costs = ((CSOAA::label*)ec->ld)->costs;
+ v_array<CSOAA::wclass> costs = ((CSOAA::label*)ec.ld)->costs;
for (size_t j=0; j<costs.size(); j++)
if (costs[j].x >= 0.) return false;
- if (ec->indices.size() == 0) return false;
- if (ec->indices.size() > 2) return false;
- if (ec->indices[0] != 'l') return false;
+ if (ec.indices.size() == 0) return false;
+ if (ec.indices.size() > 2) return false;
+ if (ec.indices[0] != 'l') return false;
return true;
}
- bool ec_is_example_header(example*ec) // example headers look like "0:-1"
+ bool ec_is_example_header(example& ec) // example headers look like "0:-1"
{
- v_array<CSOAA::wclass> costs = ((CSOAA::label*)ec->ld)->costs;
+ v_array<CSOAA::wclass> costs = ((CSOAA::label*)ec.ld)->costs;
if (costs.size() != 1) return false;
if (costs[0].weight_index != 0) return false;
if (costs[0].x >= 0) return false;
@@ -448,10 +440,10 @@ namespace LabelDict {
bool ec_seq_is_label_definition(ldf& l, v_array<example*>ec_seq)
{
if (l.ec_seq.size() == 0) return false;
- bool is_lab = ec_is_label_definition(l.ec_seq[0]);
+ bool is_lab = ec_is_label_definition(*l.ec_seq[0]);
for (size_t i=1; i<l.ec_seq.size(); i++) {
- if (is_lab != ec_is_label_definition(l.ec_seq[i])) {
- if (!((i == l.ec_seq.size()-1) && (example_is_newline(l.ec_seq[i])))) {
+ if (is_lab != ec_is_label_definition(*l.ec_seq[i])) {
+ if (!((i == l.ec_seq.size()-1) && (example_is_newline(*l.ec_seq[i])))) {
cerr << "error: mixed label definition and examples in ldf data!" << endl;
throw exception();
}
@@ -460,77 +452,77 @@ namespace LabelDict {
return is_lab;
}
- void del_example_namespace(example*ec, char ns, v_array<feature> features) {
+ void del_example_namespace(example& ec, char ns, v_array<feature> features) {
size_t numf = features.size();
- ec->num_features -= numf;
-
- assert (ec->atomics[(size_t)ns].size() >= numf);
- if (ec->atomics[(size_t)ns].size() == numf) { // did NOT have ns
- assert(ec->indices.size() > 0);
- assert(ec->indices[ec->indices.size()-1] == (size_t)ns);
- ec->indices.pop();
- ec->total_sum_feat_sq -= ec->sum_feat_sq[(size_t)ns];
- ec->atomics[(size_t)ns].erase();
- ec->sum_feat_sq[(size_t)ns] = 0.;
+ ec.num_features -= numf;
+
+ assert (ec.atomics[(size_t)ns].size() >= numf);
+ if (ec.atomics[(size_t)ns].size() == numf) { // did NOT have ns
+ assert(ec.indices.size() > 0);
+ assert(ec.indices[ec.indices.size()-1] == (size_t)ns);
+ ec.indices.pop();
+ ec.total_sum_feat_sq -= ec.sum_feat_sq[(size_t)ns];
+ ec.atomics[(size_t)ns].erase();
+ ec.sum_feat_sq[(size_t)ns] = 0.;
} else { // DID have ns
for (feature*f=features.begin; f!=features.end; f++) {
- ec->sum_feat_sq[(size_t)ns] -= f->x * f->x;
- ec->atomics[(size_t)ns].pop();
+ ec.sum_feat_sq[(size_t)ns] -= f->x * f->x;
+ ec.atomics[(size_t)ns].pop();
}
}
}
- void add_example_namespace(example*ec, char ns, v_array<feature> features) {
+ void add_example_namespace(example& ec, char ns, v_array<feature> features) {
bool has_ns = false;
- for (size_t i=0; i<ec->indices.size(); i++) {
- if (ec->indices[i] == (size_t)ns) {
+ for (size_t i=0; i<ec.indices.size(); i++) {
+ if (ec.indices[i] == (size_t)ns) {
has_ns = true;
break;
}
}
if (has_ns) {
- ec->total_sum_feat_sq -= ec->sum_feat_sq[(size_t)ns];
+ ec.total_sum_feat_sq -= ec.sum_feat_sq[(size_t)ns];
} else {
- ec->indices.push_back((size_t)ns);
- ec->sum_feat_sq[(size_t)ns] = 0;
+ ec.indices.push_back((size_t)ns);
+ ec.sum_feat_sq[(size_t)ns] = 0;
}
for (feature*f=features.begin; f!=features.end; f++) {
- ec->sum_feat_sq[(size_t)ns] += f->x * f->x;
- ec->atomics[(size_t)ns].push_back(*f);
+ ec.sum_feat_sq[(size_t)ns] += f->x * f->x;
+ ec.atomics[(size_t)ns].push_back(*f);
}
- ec->num_features += features.size();
- ec->total_sum_feat_sq += ec->sum_feat_sq[(size_t)ns];
+ ec.num_features += features.size();
+ ec.total_sum_feat_sq += ec.sum_feat_sq[(size_t)ns];
}
- void add_example_namespaces_from_example(example*target, example*source) {
- for (unsigned char* idx=source->indices.begin; idx!=source->indices.end; idx++) {
+ void add_example_namespaces_from_example(example& target, example& source) {
+ for (unsigned char* idx=source.indices.begin; idx!=source.indices.end; idx++) {
if (*idx == constant_namespace) continue;
- add_example_namespace(target, (char)*idx, source->atomics[*idx]);
+ add_example_namespace(target, (char)*idx, source.atomics[*idx]);
}
}
- void del_example_namespaces_from_example(example*target, example*source) {
- //for (size_t*idx=source->indices.begin; idx!=source->indices.end; idx++) {
- unsigned char* idx = source->indices.end;
+ void del_example_namespaces_from_example(example& target, example& source) {
+ //for (size_t*idx=source.indices.begin; idx!=source.indices.end; idx++) {
+ unsigned char* idx = source.indices.end;
idx--;
- for (; idx>=source->indices.begin; idx--) {
+ for (; idx>=source.indices.begin; idx--) {
if (*idx == constant_namespace) continue;
- del_example_namespace(target, (char)*idx, source->atomics[*idx]);
+ del_example_namespace(target, (char)*idx, source.atomics[*idx]);
}
}
- void add_example_namespace_from_memory(ldf& l, example*ec, size_t lab) {
+ void add_example_namespace_from_memory(ldf& l, example& ec, size_t lab) {
size_t lab_hash = hash_lab(lab);
v_array<feature> features = l.label_features.get(lab, lab_hash);
if (features.size() == 0) return;
add_example_namespace(ec, 'l', features);
}
- void del_example_namespace_from_memory(ldf& l, example* ec, size_t lab) {
+ void del_example_namespace_from_memory(ldf& l, example& ec, size_t lab) {
size_t lab_hash = hash_lab(lab);
v_array<feature> features = l.label_features.get(lab, lab_hash);
if (features.size() == 0) return;
@@ -628,8 +620,8 @@ namespace LabelDict {
ec->indices.decr();
}
- void make_single_prediction(vw& all, ldf& l, learner& base, example*ec, size_t*prediction, float*min_score, float*min_cost, float*max_cost) {
- label *ld = (label*)ec->ld;
+ void make_single_prediction(vw& all, ldf& l, learner& base, example& ec, size_t*prediction, float*min_score, float*min_cost, float*max_cost) {
+ label *ld = (label*)ec.ld;
v_array<CSOAA::wclass> costs = ld->costs;
label_data simple_label;
@@ -637,25 +629,25 @@ namespace LabelDict {
simple_label.initial = 0.;
simple_label.label = FLT_MAX;
simple_label.weight = 0.;
- ec->partial_prediction = 0.;
+ ec.partial_prediction = 0.;
- ec->ld = &simple_label;
+ ec.ld = &simple_label;
base.predict(ec); // make a prediction
} else {
for (size_t j=0; j<costs.size(); j++) {
simple_label.initial = 0.;
simple_label.label = FLT_MAX;
simple_label.weight = 0.;
- ec->partial_prediction = 0.;
+ ec.partial_prediction = 0.;
LabelDict::add_example_namespace_from_memory(l, ec, costs[j].weight_index);
- ec->ld = &simple_label;
+ ec.ld = &simple_label;
base.predict(ec); // make a prediction
- costs[j].partial_prediction = ec->partial_prediction;
+ costs[j].partial_prediction = ec.partial_prediction;
- if (min_score && prediction && (ec->partial_prediction < *min_score)) {
- *min_score = ec->partial_prediction;
+ if (min_score && prediction && (ec.partial_prediction < *min_score)) {
+ *min_score = ec.partial_prediction;
*prediction = costs[j].weight_index;
}
@@ -666,7 +658,7 @@ namespace LabelDict {
}
}
- ec->ld = ld;
+ ec.ld = ld;
}
@@ -674,23 +666,23 @@ namespace LabelDict {
void do_actual_learning_wap(vw& all, ldf& l, learner& base, size_t start_K)
{
size_t K = l.ec_seq.size();
- bool isTest = CSOAA::example_is_test(l.ec_seq[start_K]);
+ bool isTest = CSOAA::example_is_test(*l.ec_seq[start_K]);
size_t prediction = 0;
float min_score = FLT_MAX;
for (size_t k=start_K; k<K; k++) {
example *ec = l.ec_seq.begin[k];
- if (CSOAA::example_is_test(ec) != isTest) {
+ if (CSOAA::example_is_test(*ec) != isTest) {
isTest = true;
cerr << "warning: wap_ldf got mix of train/test data; assuming test" << endl;
}
- if (LabelDict::ec_is_example_header(l.ec_seq[k])) {
+ if (LabelDict::ec_is_example_header(*l.ec_seq[k])) {
cerr << "warning: example headers at position " << k << ": can only have in initial position!" << endl;
throw exception();
}
- make_single_prediction(all, l, base, ec, &prediction, &min_score, NULL, NULL);
+ make_single_prediction(all, l, base, *ec, &prediction, &min_score, NULL, NULL);
}
// do actual learning
@@ -718,7 +710,7 @@ namespace LabelDict {
for (size_t j1=0; j1<costs1.size(); j1++) {
if (costs1[j1].weight_index == (uint32_t)-1) continue;
if (is_learn && all.training && !isTest) {
- LabelDict::add_example_namespace_from_memory(l, ec1, costs1[j1].weight_index);
+ LabelDict::add_example_namespace_from_memory(l, *ec1, costs1[j1].weight_index);
for (size_t k2=k1+1; k2<K; k2++) {
example *ec2 = l.ec_seq.begin[k2];
@@ -732,7 +724,7 @@ namespace LabelDict {
if (value_diff < 1e-6)
continue;
- LabelDict::add_example_namespace_from_memory(l, ec2, costs2[j2].weight_index);
+ LabelDict::add_example_namespace_from_memory(l, *ec2, costs2[j2].weight_index);
// learn
ec1->example_t = l.csoaa_example_t;
@@ -742,15 +734,15 @@ namespace LabelDict {
ec1->partial_prediction = 0.;
subtract_example(all, ec1, ec2);
if (is_learn)
- base.learn(ec1);
+ base.learn(*ec1);
else
- base.predict(ec1);
+ base.predict(*ec1);
unsubtract_example(all, ec1);
- LabelDict::del_example_namespace_from_memory(l, ec2, costs2[j2].weight_index);
+ LabelDict::del_example_namespace_from_memory(l, *ec2, costs2[j2].weight_index);
}
}
- LabelDict::del_example_namespace_from_memory(l, ec1, costs1[j1].weight_index);
+ LabelDict::del_example_namespace_from_memory(l, *ec1, costs1[j1].weight_index);
}
if (prediction == costs1[j1].weight_index) prediction_is_me = true;
@@ -766,7 +758,7 @@ namespace LabelDict {
{
size_t K = l.ec_seq.size();
size_t prediction = 0;
- bool isTest = CSOAA::example_is_test(l.ec_seq[start_K]);
+ bool isTest = CSOAA::example_is_test(*l.ec_seq[start_K]);
float min_score = FLT_MAX;
float min_cost = FLT_MAX;
float max_cost = -FLT_MAX;
@@ -775,16 +767,16 @@ namespace LabelDict {
for (size_t k=start_K; k<K; k++) {
example *ec = l.ec_seq.begin[k];
- if (CSOAA::example_is_test(ec) != isTest) {
+ if (CSOAA::example_is_test(*ec) != isTest) {
isTest = true;
cerr << "warning: ldf got mix of train/test data; assuming test" << endl;
}
- if (LabelDict::ec_is_example_header(l.ec_seq[k])) {
+ if (LabelDict::ec_is_example_header(*l.ec_seq[k])) {
cerr << "warning: example headers at position " << k << ": can only have in initial position!" << endl;
throw exception();
}
//clog << "msp k=" << k << endl;
- make_single_prediction(all, l, base, ec, &prediction, &min_score, &min_cost, &max_cost);
+ make_single_prediction(all, l, base, *ec, &prediction, &min_score, &min_cost, &max_cost);
}
// do actual learning
@@ -823,12 +815,12 @@ namespace LabelDict {
//ec->partial_prediction = costs[j].partial_prediction;
//cerr << "[" << ec->partial_prediction << "," << ec->done << "]";
//ec->done = false;
- LabelDict::add_example_namespace_from_memory(l, ec, costs[j].weight_index);
+ LabelDict::add_example_namespace_from_memory(l, *ec, costs[j].weight_index);
if (is_learn)
- base.learn(ec);
+ base.learn(*ec);
else
- base.predict(ec);
- LabelDict::del_example_namespace_from_memory(l, ec, costs[j].weight_index);
+ base.predict(*ec);
+ LabelDict::del_example_namespace_from_memory(l, *ec, costs[j].weight_index);
ec->example_t = example_t;
}
@@ -873,10 +865,10 @@ namespace LabelDict {
/////////////////////// check for headers
size_t K = l.ec_seq.size();
size_t start_K = 0;
- if (LabelDict::ec_is_example_header(l.ec_seq[0])) {
+ if (LabelDict::ec_is_example_header(*l.ec_seq[0])) {
start_K = 1;
for (size_t k=1; k<K; k++)
- LabelDict::add_example_namespaces_from_example(l.ec_seq[k], l.ec_seq[0]);
+ LabelDict::add_example_namespaces_from_example(*l.ec_seq[k], *l.ec_seq[0]);
}
/////////////////////// learn
@@ -886,23 +878,23 @@ namespace LabelDict {
/////////////////////// remove header
if (start_K > 0)
for (size_t k=1; k<K; k++)
- LabelDict::del_example_namespaces_from_example(l.ec_seq[k], l.ec_seq[0]);
+ LabelDict::del_example_namespaces_from_example(*l.ec_seq[k], *l.ec_seq[0]);
}
- void output_example(vw& all, example* ec, bool&hit_loss)
+ void output_example(vw& all, example& ec, bool& hit_loss)
{
- label* ld = (label*)ec->ld;
+ label* ld = (label*)ec.ld;
v_array<CSOAA::wclass> costs = ld->costs;
if (example_is_newline(ec)) return;
if (LabelDict::ec_is_example_header(ec)) return;
if (LabelDict::ec_is_label_definition(ec)) return;
- all.sd->total_features += ec->num_features;
+ all.sd->total_features += ec.num_features;
float loss = 0.;
- size_t final_pred = (size_t)ec->final_prediction;
+ size_t final_pred = (size_t)ec.final_prediction;
if (!CSOAA::example_is_test(ec)) {
for (size_t j=0; j<costs.size(); j++) {
@@ -919,7 +911,7 @@ namespace LabelDict {
}
for (int* sink = all.final_prediction_sink.begin; sink != all.final_prediction_sink.end; sink++)
- all.print(*sink, ec->final_prediction, 0, ec->tag);
+ all.print(*sink, ec.final_prediction, 0, ec.tag);
if (all.raw_prediction > 0) {
string outputString;
@@ -929,7 +921,7 @@ namespace LabelDict {
outputStringStream << costs[i].weight_index << ':' << costs[i].partial_prediction;
}
//outputStringStream << endl;
- all.print_text(all.raw_prediction, outputStringStream.str(), ec->tag);
+ all.print_text(all.raw_prediction, outputStringStream.str(), ec.tag);
}
@@ -944,7 +936,7 @@ namespace LabelDict {
bool hit_loss = false;
for (example** ecc=l.ec_seq.begin; ecc!=l.ec_seq.end; ecc++)
- output_example(all, *ecc, hit_loss);
+ output_example(all, **ecc, hit_loss);
if (!l.is_singleline && (all.raw_prediction > 0))
all.print_text(all.raw_prediction, "", l.ec_seq[0]->tag);
@@ -960,9 +952,9 @@ namespace LabelDict {
l.ec_seq.erase();
}
- void end_pass(ldf* l)
+ void end_pass(ldf& l)
{
- l->first_pass = false;
+ l.first_pass = false;
}
/*
@@ -1021,7 +1013,7 @@ namespace LabelDict {
}
*/
- void finish_singleline_example(vw& all, ldf*, example* ec)
+ void finish_singleline_example(vw& all, ldf&, example& ec)
{
if (! LabelDict::ec_is_label_definition(ec)) {
all.sd->weighted_examples += 1;
@@ -1029,73 +1021,73 @@ namespace LabelDict {
}
bool hit_loss = false;
output_example(all, ec, hit_loss);
- VW::finish_example(all, ec);
+ VW::finish_example(all, &ec);
}
- void finish_multiline_example(vw& all, ldf* l, example* ec)
+ void finish_multiline_example(vw& all, ldf& l, example& ec)
{
- if (l->need_to_clear) {
- if (l->ec_seq.size() > 0) {
- output_example_seq(all, *l);
+ if (l.need_to_clear) {
+ if (l.ec_seq.size() > 0) {
+ output_example_seq(all, l);
global_print_newline(all);
}
- clear_seq_and_finish_examples(all, *l);
- l->need_to_clear = false;
- if (ec->in_use) VW::finish_example(all, ec);
+ clear_seq_and_finish_examples(all, l);
+ l.need_to_clear = false;
+ if (ec.in_use) VW::finish_example(all, &ec);
}
}
- void end_examples(ldf* l)
+ void end_examples(ldf& l)
{
- if (l->need_to_clear)
- l->ec_seq.erase();
+ if (l.need_to_clear)
+ l.ec_seq.erase();
}
- void finish(ldf* l)
+ void finish(ldf& l)
{
//vw* all = l->all;
- l->ec_seq.delete_v();
- LabelDict::free_label_features(*l);
+ l.ec_seq.delete_v();
+ LabelDict::free_label_features(l);
}
template <bool is_learn>
- void predict_or_learn(ldf* l, learner& base, example *ec) {
- vw* all = l->all;
- l->base = &base;
+ void predict_or_learn(ldf& l, learner& base, example &ec) {
+ vw* all = l.all;
+ l.base = &base;
bool is_test = CSOAA::example_is_test(ec) || !all->training;
if (is_test)
- make_single_prediction(*all, *l, base, ec, NULL, NULL, NULL, NULL);
+ make_single_prediction(*all, l, base, ec, NULL, NULL, NULL, NULL);
- bool need_to_break = l->ec_seq.size() >= all->p->ring_size - 2;
+ bool need_to_break = l.ec_seq.size() >= all->p->ring_size - 2;
- if (l->is_singleline)
+ if (l.is_singleline)
assert(is_test);
else if (example_is_newline(ec) || need_to_break) {
- if (need_to_break && l->first_pass)
- cerr << "warning: length of sequence at " << ec->example_counter << " exceeds ring size; breaking apart" << endl;
+ if (need_to_break && l.first_pass)
+ cerr << "warning: length of sequence at " << ec.example_counter << " exceeds ring size; breaking apart" << endl;
- do_actual_learning<is_learn>(*all, *l, base);
- l->need_to_clear = true;
+ do_actual_learning<is_learn>(*all, l, base);
+ l.need_to_clear = true;
} else if (LabelDict::ec_is_label_definition(ec)) {
- if (l->ec_seq.size() > 0) {
+ if (l.ec_seq.size() > 0) {
cerr << "error: label definition encountered in data block" << endl;
throw exception();
}
if (is_learn && ! is_test) {
- l->ec_seq.push_back(ec);
- do_actual_learning<is_learn>(*all, *l, base);
- l->need_to_clear = true;
+ l.ec_seq.push_back(&ec);
+ do_actual_learning<is_learn>(*all, l, base);
+ l.need_to_clear = true;
}
} else {
- if (l->need_to_clear) { // should only happen if we're NOT driving
- l->ec_seq.erase();
- l->need_to_clear = false;
+ if (l.need_to_clear) { // should only happen if we're NOT driving
+ l.ec_seq.erase();
+ l.need_to_clear = false;
}
- l->ec_seq.push_back(ec);
+ l.ec_seq.push_back(&ec);
}
}
@@ -1191,11 +1183,7 @@ namespace LabelDict {
for (size_t i=0; i<all.final_prediction_sink.size(); i++) {
int f = all.final_prediction_sink[i];
ssize_t t;
-#ifdef _WIN32
- t = _write(f, temp, 1);
-#else
- t = write(f, temp, 1);
-#endif
+ t = io_buf::write_file_or_socket(f, temp, 1);
if (t != 1)
std::cerr << "write error" << std::endl;
}