fixed neighbor feature auditing, and affix feature storing in file

author: Hal Daume III <me@hal3.name> 2014-03-01 21:20:01 +0400
committer: Hal Daume III <me@hal3.name> 2014-03-01 21:20:01 +0400
commit: f3b36073f693621d7858f660fa88a05f2571b8e4 (patch)
tree: 4f4a19b14ff113fa4a29e0b2799bb8311d09b571 /vowpalwabbit/csoaa.cc
parent: ef827509126c45e01f72d506a499f19dff945205 (diff)
parent: 3e368e9b2e6e3cf01d9ab8c59127b3d1806f43bd (diff)
1 files changed, 149 insertions, 161 deletions
diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc
index 2c8c80d7..1a9ca127 100644
--- a/vowpalwabbit/csoaa.cc
+++ b/vowpalwabbit/csoaa.cc
@@ -99,11 +99,6 @@ namespace CSOAA {
     return 1.;
   }
 
-  float initial(void* v)
-  {
-    return 0.;
-  }
-
   char* bufcache_label(label* ld, char* c)
   {
     *(size_t *)c = ld->costs.size();
@@ -206,7 +201,7 @@ namespace CSOAA {
     }
   }
 
-  void print_update(vw& all, bool is_test, example *ec)
+  void print_update(vw& all, bool is_test, example& ec)
   {
     if (all.sd->weighted_examples >= all.sd->dump_interval && !all.quiet && !all.bfgs)
       {
@@ -232,8 +227,8 @@ namespace CSOAA {
                 (long int)all.sd->example_number,
                 all.sd->weighted_examples,
                 label_buf,
-                (long unsigned int)ec->final_prediction,
-                (long unsigned int)ec->num_features);
+                (long unsigned int)ec.final_prediction,
+                (long unsigned int)ec.num_features);
 
           all.sd->weighted_holdout_examples_since_last_dump = 0;
           all.sd->holdout_sum_loss_since_last_dump = 0.0;
@@ -245,8 +240,8 @@ namespace CSOAA {
                 (long int)all.sd->example_number,
                 all.sd->weighted_examples,
                 label_buf,
-                (long unsigned int)ec->final_prediction,
-                (long unsigned int)ec->num_features);
+                (long unsigned int)ec.final_prediction,
+                (long unsigned int)ec.num_features);
      
         all.sd->sum_loss_since_last_dump = 0.0;
         all.sd->old_weighted_examples = all.sd->weighted_examples;
@@ -254,14 +249,14 @@ namespace CSOAA {
       }
   }
 
-  void output_example(vw& all, example* ec)
+  void output_example(vw& all, example& ec)
   {
-    label* ld = (label*)ec->ld;
+    label* ld = (label*)ec.ld;
 
     float loss = 0.;
     if (!is_test_label(ld))
       {//need to compute exact loss
-        size_t pred = (size_t)ec->final_prediction;
+        size_t pred = (size_t)ec.final_prediction;
 
         float chosen_loss = FLT_MAX;
         float min = FLT_MAX;
@@ -277,11 +272,11 @@ namespace CSOAA {
         loss = chosen_loss - min;
       }
 
-    if(ec->test_only)
+    if(ec.test_only)
       {
-        all.sd->weighted_holdout_examples += ec->global_weight;//test weight seen
-        all.sd->weighted_holdout_examples_since_last_dump += ec->global_weight;
-        all.sd->weighted_holdout_examples_since_last_pass += ec->global_weight;
+        all.sd->weighted_holdout_examples += ec.global_weight;//test weight seen
+        all.sd->weighted_holdout_examples_since_last_dump += ec.global_weight;
+        all.sd->weighted_holdout_examples_since_last_pass += ec.global_weight;
         all.sd->holdout_sum_loss += loss;
         all.sd->holdout_sum_loss_since_last_dump += loss;
         all.sd->holdout_sum_loss_since_last_pass += loss;//since last pass
@@ -289,14 +284,14 @@ namespace CSOAA {
     else
       {
         all.sd->weighted_examples += 1.;
-        all.sd->total_features += ec->num_features;
+        all.sd->total_features += ec.num_features;
         all.sd->sum_loss += loss;
         all.sd->sum_loss_since_last_dump += loss;    
         all.sd->example_number++;
       }
 
     for (int* sink = all.final_prediction_sink.begin; sink != all.final_prediction_sink.end; sink++)
-      all.print((int)*sink, ec->final_prediction, 0, ec->tag);
+      all.print((int)*sink, ec.final_prediction, 0, ec.tag);
 
     if (all.raw_prediction > 0) {
       string outputString;
@@ -307,25 +302,23 @@ namespace CSOAA {
         outputStringStream << cl.weight_index << ':' << cl.partial_prediction;
       }
       //outputStringStream << endl;
-      all.print_text(all.raw_prediction, outputStringStream.str(), ec->tag);
+      all.print_text(all.raw_prediction, outputStringStream.str(), ec.tag);
     }
 
-    print_update(all, is_test_label((label*)ec->ld), ec);
+    print_update(all, is_test_label((label*)ec.ld), ec);
   }
 
   template <bool is_learn>
-  void predict_or_learn(csoaa* c, learner& base, example* ec) {
-    vw* all = c->all;
-    label* ld = (label*)ec->ld;
-
+  void predict_or_learn(csoaa& c, learner& base, example& ec) {
+    vw* all = c.all;
+    label* ld = (label*)ec.ld;
     size_t prediction = 1;
     float score = FLT_MAX;
     label_data simple_temp = { 0., 0., 0. };
-    ec->ld = &simple_temp;
+    ec.ld = &simple_temp;
     for (wclass *cl = ld->costs.begin; cl != ld->costs.end; cl ++)
       {
         uint32_t i = cl->weight_index;
-
 	if (is_learn)
 	  {
 	    if (cl->x == FLT_MAX || !all->training)
@@ -338,27 +331,26 @@ namespace CSOAA {
 		simple_temp.label = cl->x;
 		simple_temp.weight = 1.;
 	      }
-
-	    base.learn(ec, i);
+	    base.learn(ec, i-1);
 	  }
 	else
-	  base.predict(ec, i);
+	  base.predict(ec, i-1);
 
-        cl->partial_prediction = ec->partial_prediction;
-	if (ec->partial_prediction < score || (ec->partial_prediction == score && i < prediction)) {
-          score = ec->partial_prediction;
+        cl->partial_prediction = ec.partial_prediction;
+	if (ec.partial_prediction < score || (ec.partial_prediction == score && i < prediction)) {
+          score = ec.partial_prediction;
           prediction = i;
         }
-	ec->partial_prediction = 0.;
+	ec.partial_prediction = 0.;
       }
-    ec->ld = ld;
-    ec->final_prediction = (float)prediction;
+    ec.ld = ld;
+    ec.final_prediction = (float)prediction;
   }
 
-  void finish_example(vw& all, csoaa*, example* ec)
+  void finish_example(vw& all, csoaa&, example& ec)
   {
     output_example(all, ec);
-    VW::finish_example(all, ec);
+    VW::finish_example(all, &ec);
   }
 
   learner* setup(vw& all, std::vector<std::string>&opts, po::variables_map& vm, po::variables_map& vm_file)
@@ -391,9 +383,9 @@ namespace CSOAA {
     return l;
   }
 
-  bool example_is_test(example* ec)
+  bool example_is_test(example& ec)
   {
-    v_array<CSOAA::wclass> costs = ((label*)ec->ld)->costs;
+    v_array<CSOAA::wclass> costs = ((label*)ec.ld)->costs;
     if (costs.size() == 0) return true;
     for (size_t j=0; j<costs.size(); j++)
       if (costs[j].x != FLT_MAX) return false;
@@ -425,20 +417,20 @@ namespace LabelDict {
 
   size_t hash_lab(size_t lab) { return 328051 + 94389193 * lab; }
   
-  bool ec_is_label_definition(example*ec) // label defs look like "___:-1"
+  bool ec_is_label_definition(example& ec) // label defs look like "___:-1"
   {
-    v_array<CSOAA::wclass> costs = ((CSOAA::label*)ec->ld)->costs;
+    v_array<CSOAA::wclass> costs = ((CSOAA::label*)ec.ld)->costs;
     for (size_t j=0; j<costs.size(); j++)
       if (costs[j].x >= 0.) return false;
-    if (ec->indices.size() == 0) return false;
-    if (ec->indices.size() >  2) return false;
-    if (ec->indices[0] != 'l') return false;
+    if (ec.indices.size() == 0) return false;
+    if (ec.indices.size() >  2) return false;
+    if (ec.indices[0] != 'l') return false;
     return true;    
   }
 
-  bool ec_is_example_header(example*ec)  // example headers look like "0:-1"
+  bool ec_is_example_header(example& ec)  // example headers look like "0:-1"
   {
-    v_array<CSOAA::wclass> costs = ((CSOAA::label*)ec->ld)->costs;
+    v_array<CSOAA::wclass> costs = ((CSOAA::label*)ec.ld)->costs;
     if (costs.size() != 1) return false;
     if (costs[0].weight_index != 0) return false;
     if (costs[0].x >= 0) return false;
@@ -448,10 +440,10 @@ namespace LabelDict {
   bool ec_seq_is_label_definition(ldf& l, v_array<example*>ec_seq)
   {
     if (l.ec_seq.size() == 0) return false;
-    bool is_lab = ec_is_label_definition(l.ec_seq[0]);
+    bool is_lab = ec_is_label_definition(*l.ec_seq[0]);
     for (size_t i=1; i<l.ec_seq.size(); i++) {
-      if (is_lab != ec_is_label_definition(l.ec_seq[i])) {
-        if (!((i == l.ec_seq.size()-1) && (example_is_newline(l.ec_seq[i])))) {
+      if (is_lab != ec_is_label_definition(*l.ec_seq[i])) {
+        if (!((i == l.ec_seq.size()-1) && (example_is_newline(*l.ec_seq[i])))) {
           cerr << "error: mixed label definition and examples in ldf data!" << endl;
           throw exception();
         }
@@ -460,77 +452,77 @@ namespace LabelDict {
     return is_lab;
   }
 
-  void del_example_namespace(example*ec, char ns, v_array<feature> features) {
+  void del_example_namespace(example& ec, char ns, v_array<feature> features) {
     size_t numf = features.size();
-    ec->num_features -= numf;
-
-    assert (ec->atomics[(size_t)ns].size() >= numf);
-    if (ec->atomics[(size_t)ns].size() == numf) { // did NOT have ns
-      assert(ec->indices.size() > 0);
-      assert(ec->indices[ec->indices.size()-1] == (size_t)ns);
-      ec->indices.pop();
-      ec->total_sum_feat_sq -= ec->sum_feat_sq[(size_t)ns];
-      ec->atomics[(size_t)ns].erase();
-      ec->sum_feat_sq[(size_t)ns] = 0.;
+    ec.num_features -= numf;
+
+    assert (ec.atomics[(size_t)ns].size() >= numf);
+    if (ec.atomics[(size_t)ns].size() == numf) { // did NOT have ns
+      assert(ec.indices.size() > 0);
+      assert(ec.indices[ec.indices.size()-1] == (size_t)ns);
+      ec.indices.pop();
+      ec.total_sum_feat_sq -= ec.sum_feat_sq[(size_t)ns];
+      ec.atomics[(size_t)ns].erase();
+      ec.sum_feat_sq[(size_t)ns] = 0.;
     } else { // DID have ns
       for (feature*f=features.begin; f!=features.end; f++) {
-        ec->sum_feat_sq[(size_t)ns] -= f->x * f->x;
-        ec->atomics[(size_t)ns].pop();
+        ec.sum_feat_sq[(size_t)ns] -= f->x * f->x;
+        ec.atomics[(size_t)ns].pop();
       }
     }
   }
 
-  void add_example_namespace(example*ec, char ns, v_array<feature> features) {
+  void add_example_namespace(example& ec, char ns, v_array<feature> features) {
     bool has_ns = false;
-    for (size_t i=0; i<ec->indices.size(); i++) {
-      if (ec->indices[i] == (size_t)ns) {
+    for (size_t i=0; i<ec.indices.size(); i++) {
+      if (ec.indices[i] == (size_t)ns) {
         has_ns = true;
         break;
       }
     }
     if (has_ns) {
-      ec->total_sum_feat_sq -= ec->sum_feat_sq[(size_t)ns];
+      ec.total_sum_feat_sq -= ec.sum_feat_sq[(size_t)ns];
     } else {
-      ec->indices.push_back((size_t)ns);
-      ec->sum_feat_sq[(size_t)ns] = 0;
+      ec.indices.push_back((size_t)ns);
+      ec.sum_feat_sq[(size_t)ns] = 0;
     }
 
     for (feature*f=features.begin; f!=features.end; f++) {
-      ec->sum_feat_sq[(size_t)ns] += f->x * f->x;
-      ec->atomics[(size_t)ns].push_back(*f);
+      ec.sum_feat_sq[(size_t)ns] += f->x * f->x;
+      ec.atomics[(size_t)ns].push_back(*f);
     }
 
-    ec->num_features += features.size();
-    ec->total_sum_feat_sq += ec->sum_feat_sq[(size_t)ns];
+    ec.num_features += features.size();
+    ec.total_sum_feat_sq += ec.sum_feat_sq[(size_t)ns];
   }
 
 
 
-  void add_example_namespaces_from_example(example*target, example*source) {
-    for (unsigned char* idx=source->indices.begin; idx!=source->indices.end; idx++) {
+  void add_example_namespaces_from_example(example& target, example& source) {
+    for (unsigned char* idx=source.indices.begin; idx!=source.indices.end; idx++) {
       if (*idx == constant_namespace) continue;
-      add_example_namespace(target, (char)*idx, source->atomics[*idx]);
+      add_example_namespace(target, (char)*idx, source.atomics[*idx]);
     }
   }
 
-  void del_example_namespaces_from_example(example*target, example*source) {
-    //for (size_t*idx=source->indices.begin; idx!=source->indices.end; idx++) {
-    unsigned char* idx = source->indices.end;
+  void del_example_namespaces_from_example(example& target, example& source) {
+    //for (size_t*idx=source.indices.begin; idx!=source.indices.end; idx++) {
+    unsigned char* idx = source.indices.end;
     idx--;
-    for (; idx>=source->indices.begin; idx--) {
+    for (; idx>=source.indices.begin; idx--) {
       if (*idx == constant_namespace) continue;
-      del_example_namespace(target, (char)*idx, source->atomics[*idx]);
+      del_example_namespace(target, (char)*idx, source.atomics[*idx]);
     }
   }
 
-  void add_example_namespace_from_memory(ldf& l, example*ec, size_t lab) {
+  void add_example_namespace_from_memory(ldf& l, example& ec, size_t lab) {
     size_t lab_hash = hash_lab(lab);
     v_array<feature> features = l.label_features.get(lab, lab_hash);
     if (features.size() == 0) return;
     add_example_namespace(ec, 'l', features);
   }
 
-  void del_example_namespace_from_memory(ldf& l, example* ec, size_t lab) {
+  void del_example_namespace_from_memory(ldf& l, example& ec, size_t lab) {
     size_t lab_hash = hash_lab(lab);
     v_array<feature> features = l.label_features.get(lab, lab_hash);
     if (features.size() == 0) return;
@@ -628,8 +620,8 @@ namespace LabelDict {
     ec->indices.decr();
   }
 
-  void make_single_prediction(vw& all, ldf& l, learner& base, example*ec, size_t*prediction, float*min_score, float*min_cost, float*max_cost) {
-    label   *ld = (label*)ec->ld;
+  void make_single_prediction(vw& all, ldf& l, learner& base, example& ec, size_t*prediction, float*min_score, float*min_cost, float*max_cost) {
+    label   *ld = (label*)ec.ld;
     v_array<CSOAA::wclass> costs = ld->costs;
     label_data simple_label;
 
@@ -637,25 +629,25 @@ namespace LabelDict {
       simple_label.initial = 0.;
       simple_label.label = FLT_MAX;
       simple_label.weight = 0.;
-      ec->partial_prediction = 0.;
+      ec.partial_prediction = 0.;
       
-      ec->ld = &simple_label;
+      ec.ld = &simple_label;
       base.predict(ec); // make a prediction
     } else {
       for (size_t j=0; j<costs.size(); j++) {
         simple_label.initial = 0.;
         simple_label.label = FLT_MAX;
         simple_label.weight = 0.;
-        ec->partial_prediction = 0.;
+        ec.partial_prediction = 0.;
 
         LabelDict::add_example_namespace_from_memory(l, ec, costs[j].weight_index);
       
-        ec->ld = &simple_label;
+        ec.ld = &simple_label;
         base.predict(ec); // make a prediction
-        costs[j].partial_prediction = ec->partial_prediction;
+        costs[j].partial_prediction = ec.partial_prediction;
 
-        if (min_score && prediction && (ec->partial_prediction < *min_score)) {
-          *min_score = ec->partial_prediction;
+        if (min_score && prediction && (ec.partial_prediction < *min_score)) {
+          *min_score = ec.partial_prediction;
           *prediction = costs[j].weight_index;
         }
 
@@ -666,7 +658,7 @@ namespace LabelDict {
       }
     }
     
-    ec->ld = ld;
+    ec.ld = ld;
   }
 
 
@@ -674,23 +666,23 @@ namespace LabelDict {
   void do_actual_learning_wap(vw& all, ldf& l, learner& base, size_t start_K)
   {
     size_t K = l.ec_seq.size();
-    bool   isTest = CSOAA::example_is_test(l.ec_seq[start_K]);
+    bool   isTest = CSOAA::example_is_test(*l.ec_seq[start_K]);
     size_t prediction = 0;
     float  min_score = FLT_MAX;
 
     for (size_t k=start_K; k<K; k++) {
       example *ec = l.ec_seq.begin[k];
 
-      if (CSOAA::example_is_test(ec) != isTest) {
+      if (CSOAA::example_is_test(*ec) != isTest) {
         isTest = true;
         cerr << "warning: wap_ldf got mix of train/test data; assuming test" << endl;
       }
-      if (LabelDict::ec_is_example_header(l.ec_seq[k])) {
+      if (LabelDict::ec_is_example_header(*l.ec_seq[k])) {
         cerr << "warning: example headers at position " << k << ": can only have in initial position!" << endl;
         throw exception();
       }
 
-      make_single_prediction(all, l, base, ec, &prediction, &min_score, NULL, NULL);
+      make_single_prediction(all, l, base, *ec, &prediction, &min_score, NULL, NULL);
     }
 
     // do actual learning
@@ -718,7 +710,7 @@ namespace LabelDict {
       for (size_t j1=0; j1<costs1.size(); j1++) {
         if (costs1[j1].weight_index == (uint32_t)-1) continue;
         if (is_learn && all.training && !isTest) {
-          LabelDict::add_example_namespace_from_memory(l, ec1, costs1[j1].weight_index);
+          LabelDict::add_example_namespace_from_memory(l, *ec1, costs1[j1].weight_index);
 
           for (size_t k2=k1+1; k2<K; k2++) {
             example *ec2 = l.ec_seq.begin[k2];
@@ -732,7 +724,7 @@ namespace LabelDict {
               if (value_diff < 1e-6)
                 continue;
 
-              LabelDict::add_example_namespace_from_memory(l, ec2, costs2[j2].weight_index);
+              LabelDict::add_example_namespace_from_memory(l, *ec2, costs2[j2].weight_index);
 
               // learn
               ec1->example_t = l.csoaa_example_t;
@@ -742,15 +734,15 @@ namespace LabelDict {
               ec1->partial_prediction = 0.;
               subtract_example(all, ec1, ec2);
 	      if (is_learn)
-		base.learn(ec1);
+		base.learn(*ec1);
 	      else
-		base.predict(ec1);
+		base.predict(*ec1);
               unsubtract_example(all, ec1);
               
-              LabelDict::del_example_namespace_from_memory(l, ec2, costs2[j2].weight_index);
+              LabelDict::del_example_namespace_from_memory(l, *ec2, costs2[j2].weight_index);
             }
           }
-          LabelDict::del_example_namespace_from_memory(l, ec1, costs1[j1].weight_index);
+          LabelDict::del_example_namespace_from_memory(l, *ec1, costs1[j1].weight_index);
         }
 
         if (prediction == costs1[j1].weight_index) prediction_is_me = true;
@@ -766,7 +758,7 @@ namespace LabelDict {
   {
     size_t K = l.ec_seq.size();
     size_t prediction = 0;
-    bool   isTest = CSOAA::example_is_test(l.ec_seq[start_K]);
+    bool   isTest = CSOAA::example_is_test(*l.ec_seq[start_K]);
     float  min_score = FLT_MAX;
     float  min_cost  = FLT_MAX;
     float  max_cost  = -FLT_MAX;
@@ -775,16 +767,16 @@ namespace LabelDict {
     
     for (size_t k=start_K; k<K; k++) {
       example *ec = l.ec_seq.begin[k];
-      if (CSOAA::example_is_test(ec) != isTest) {
+      if (CSOAA::example_is_test(*ec) != isTest) {
         isTest = true;
         cerr << "warning: ldf got mix of train/test data; assuming test" << endl;
       }
-      if (LabelDict::ec_is_example_header(l.ec_seq[k])) {
+      if (LabelDict::ec_is_example_header(*l.ec_seq[k])) {
         cerr << "warning: example headers at position " << k << ": can only have in initial position!" << endl;
         throw exception();
       }
       //clog << "msp k=" << k << endl;
-      make_single_prediction(all, l, base, ec, &prediction, &min_score, &min_cost, &max_cost);
+      make_single_prediction(all, l, base, *ec, &prediction, &min_score, &min_cost, &max_cost);
     }
 
     // do actual learning
@@ -823,12 +815,12 @@ namespace LabelDict {
           //ec->partial_prediction = costs[j].partial_prediction;
           //cerr << "[" << ec->partial_prediction << "," << ec->done << "]";
           //ec->done = false;
-          LabelDict::add_example_namespace_from_memory(l, ec, costs[j].weight_index);
+          LabelDict::add_example_namespace_from_memory(l, *ec, costs[j].weight_index);
 	  if (is_learn)
-	    base.learn(ec);
+	    base.learn(*ec);
 	  else
-	    base.predict(ec);
-          LabelDict::del_example_namespace_from_memory(l, ec, costs[j].weight_index);
+	    base.predict(*ec);
+          LabelDict::del_example_namespace_from_memory(l, *ec, costs[j].weight_index);
           ec->example_t = example_t;
         }
 
@@ -873,10 +865,10 @@ namespace LabelDict {
     /////////////////////// check for headers
     size_t K = l.ec_seq.size();
     size_t start_K = 0;
-    if (LabelDict::ec_is_example_header(l.ec_seq[0])) {
+    if (LabelDict::ec_is_example_header(*l.ec_seq[0])) {
       start_K = 1;
       for (size_t k=1; k<K; k++)
-        LabelDict::add_example_namespaces_from_example(l.ec_seq[k], l.ec_seq[0]);
+        LabelDict::add_example_namespaces_from_example(*l.ec_seq[k], *l.ec_seq[0]);
     }
 
     /////////////////////// learn
@@ -886,23 +878,23 @@ namespace LabelDict {
     /////////////////////// remove header
     if (start_K > 0)
       for (size_t k=1; k<K; k++)
-        LabelDict::del_example_namespaces_from_example(l.ec_seq[k], l.ec_seq[0]);
+        LabelDict::del_example_namespaces_from_example(*l.ec_seq[k], *l.ec_seq[0]);
 
   }
 
-  void output_example(vw& all, example* ec, bool&hit_loss)
+  void output_example(vw& all, example& ec, bool& hit_loss)
   {
-    label* ld = (label*)ec->ld;
+    label* ld = (label*)ec.ld;
     v_array<CSOAA::wclass> costs = ld->costs;
 
     if (example_is_newline(ec)) return;
     if (LabelDict::ec_is_example_header(ec)) return;
     if (LabelDict::ec_is_label_definition(ec)) return;
 
-    all.sd->total_features += ec->num_features;
+    all.sd->total_features += ec.num_features;
 
     float loss = 0.;
-    size_t final_pred = (size_t)ec->final_prediction;
+    size_t final_pred = (size_t)ec.final_prediction;
 
     if (!CSOAA::example_is_test(ec)) {
       for (size_t j=0; j<costs.size(); j++) {
@@ -919,7 +911,7 @@ namespace LabelDict {
     }
   
     for (int* sink = all.final_prediction_sink.begin; sink != all.final_prediction_sink.end; sink++)
-      all.print(*sink, ec->final_prediction, 0, ec->tag);
+      all.print(*sink, ec.final_prediction, 0, ec.tag);
 
     if (all.raw_prediction > 0) {
       string outputString;
@@ -929,7 +921,7 @@ namespace LabelDict {
         outputStringStream << costs[i].weight_index << ':' << costs[i].partial_prediction;
       }
       //outputStringStream << endl;
-      all.print_text(all.raw_prediction, outputStringStream.str(), ec->tag);
+      all.print_text(all.raw_prediction, outputStringStream.str(), ec.tag);
     }
     
 
@@ -944,7 +936,7 @@ namespace LabelDict {
 
       bool hit_loss = false;
       for (example** ecc=l.ec_seq.begin; ecc!=l.ec_seq.end; ecc++)
-        output_example(all, *ecc, hit_loss);
+        output_example(all, **ecc, hit_loss);
 
       if (!l.is_singleline && (all.raw_prediction > 0))
         all.print_text(all.raw_prediction, "", l.ec_seq[0]->tag);
@@ -960,9 +952,9 @@ namespace LabelDict {
     l.ec_seq.erase();
   }
 
-  void end_pass(ldf* l)
+  void end_pass(ldf& l)
   {
-    l->first_pass = false;
+    l.first_pass = false;
   }
 
 /*
@@ -1021,7 +1013,7 @@ namespace LabelDict {
   }
 */
 
-  void finish_singleline_example(vw& all, ldf*, example* ec)
+  void finish_singleline_example(vw& all, ldf&, example& ec)
   {
     if (! LabelDict::ec_is_label_definition(ec)) {
       all.sd->weighted_examples += 1;
@@ -1029,73 +1021,73 @@ namespace LabelDict {
     }
     bool hit_loss = false;
     output_example(all, ec, hit_loss);
-    VW::finish_example(all, ec);
+    VW::finish_example(all, &ec);
   }
 
-  void finish_multiline_example(vw& all, ldf* l, example* ec)
+  void finish_multiline_example(vw& all, ldf& l, example& ec)
   {
-    if (l->need_to_clear) {
-      if (l->ec_seq.size() > 0) {
-	output_example_seq(all, *l);
+    if (l.need_to_clear) {
+      if (l.ec_seq.size() > 0) {
+	output_example_seq(all, l);
         global_print_newline(all);
       }        
-      clear_seq_and_finish_examples(all, *l);
-      l->need_to_clear = false;
-      if (ec->in_use) VW::finish_example(all, ec);
+      clear_seq_and_finish_examples(all, l);
+      l.need_to_clear = false;
+      if (ec.in_use) VW::finish_example(all, &ec);
     }
   }
 
-  void end_examples(ldf* l)
+  void end_examples(ldf& l)
   {
-    if (l->need_to_clear)
-      l->ec_seq.erase();
+    if (l.need_to_clear)
+      l.ec_seq.erase();
   }
 
 
-  void finish(ldf* l)
+  void finish(ldf& l)
   {
     //vw* all = l->all;
-    l->ec_seq.delete_v();
-    LabelDict::free_label_features(*l);
+    l.ec_seq.delete_v();
+    LabelDict::free_label_features(l);
   }
 
   template <bool is_learn>
-  void predict_or_learn(ldf* l, learner& base, example *ec) {
-    vw* all = l->all;
-    l->base = &base;
+  void predict_or_learn(ldf& l, learner& base, example &ec) {
+    vw* all = l.all;
+    l.base = &base;
 
     bool is_test = CSOAA::example_is_test(ec) || !all->training;
     
     if (is_test)
-      make_single_prediction(*all, *l, base, ec, NULL, NULL, NULL, NULL);
+      make_single_prediction(*all, l, base, ec, NULL, NULL, NULL, NULL);
 
-    bool need_to_break = l->ec_seq.size() >= all->p->ring_size - 2;
+    bool need_to_break = l.ec_seq.size() >= all->p->ring_size - 2;
     
-    if (l->is_singleline)
+    if (l.is_singleline)
       assert(is_test);
     else if (example_is_newline(ec) || need_to_break) {
-      if (need_to_break && l->first_pass)
-        cerr << "warning: length of sequence at " << ec->example_counter << " exceeds ring size; breaking apart" << endl;
+      if (need_to_break && l.first_pass)
+        cerr << "warning: length of sequence at " << ec.example_counter << " exceeds ring size; breaking apart" << endl;
 
-      do_actual_learning<is_learn>(*all, *l, base);
-      l->need_to_clear = true;
+      do_actual_learning<is_learn>(*all, l, base);
+      l.need_to_clear = true;
     } else if (LabelDict::ec_is_label_definition(ec)) {
-      if (l->ec_seq.size() > 0) {
+      if (l.ec_seq.size() > 0) {
         cerr << "error: label definition encountered in data block" << endl;
         throw exception();
       }
 
       if (is_learn && ! is_test) {
-        l->ec_seq.push_back(ec);
-        do_actual_learning<is_learn>(*all, *l, base);
-        l->need_to_clear = true;
+        l.ec_seq.push_back(&ec);
+        do_actual_learning<is_learn>(*all, l, base);
+        l.need_to_clear = true;
       }
     } else {
-      if (l->need_to_clear) {  // should only happen if we're NOT driving
-        l->ec_seq.erase();
-        l->need_to_clear = false;
+      if (l.need_to_clear) {  // should only happen if we're NOT driving
+        l.ec_seq.erase();
+        l.need_to_clear = false;
       }
-      l->ec_seq.push_back(ec);
+      l.ec_seq.push_back(&ec);
     }
   }
 
@@ -1191,11 +1183,7 @@ namespace LabelDict {
     for (size_t i=0; i<all.final_prediction_sink.size(); i++) {
       int f = all.final_prediction_sink[i];
       ssize_t t;
-#ifdef _WIN32
-      t = _write(f, temp, 1);
-#else
-      t = write(f, temp, 1);
-#endif
+      t = io_buf::write_file_or_socket(f, temp, 1);
       if (t != 1)
         std::cerr << "write error" << std::endl;
     }
author	Hal Daume III <me@hal3.name>	2014-03-01 21:20:01 +0400
committer	Hal Daume III <me@hal3.name>	2014-03-01 21:20:01 +0400
commit	f3b36073f693621d7858f660fa88a05f2571b8e4 (patch)
tree	4f4a19b14ff113fa4a29e0b2799bb8311d09b571 /vowpalwabbit/csoaa.cc
parent	ef827509126c45e01f72d506a499f19dff945205 (diff)
parent	3e368e9b2e6e3cf01d9ab8c59127b3d1806f43bd (diff)