Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Langford <jl@hunch.net>2009-08-01 00:19:24 +0400
committerJohn Langford <jl@hunch.net>2009-08-01 00:19:24 +0400
commitf30c74f7d36a3b3691d1e9a598163dcecb87b493 (patch)
treed531146bcef68de85930e6e65c31ad4c868d3bcd /parse_regressor.cc
parent8b4959c461235e733cef9ccb851c3834c3155c31 (diff)
Initial release of version 3.10. I've incorporated some of the
Gordon's changes.
Diffstat (limited to 'parse_regressor.cc')
-rw-r--r--parse_regressor.cc166
1 files changed, 82 insertions, 84 deletions
diff --git a/parse_regressor.cc b/parse_regressor.cc
index 9f141be6..0d401120 100644
--- a/parse_regressor.cc
+++ b/parse_regressor.cc
@@ -1,28 +1,38 @@
+/*
+Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
+embodied in the content of this file are licensed under the BSD
+(revised) open source license
+ */
+
/*
Copyright (c) 2007 Yahoo! Inc. All rights reserved. The copyrights
embodied in the content of this file are licensed under the BSD
(revised) open source license
*/
-#include <fstream.h>
+#include <fstream>
+#include <iostream>
+#include <unistd.h>
+#include <stdlib.h>
#include "parse_regressor.h"
+#include "loss_functions.h"
+using namespace std;
void initialize_regressor(regressor &r)
{
- r.length = 1 << r.numbits;
- if (r.seg)
+ size_t length = ((size_t)1) << r.global->num_bits;
+ r.global->thread_mask = (length >> r.global->thread_bits) - 1;
+ size_t num_threads = r.global->num_threads();
+ r.weight_vectors = (weight **)malloc(num_threads * sizeof(weight*));
+ for (size_t i = 0; i < num_threads; i++)
{
- r.weights = (weight *)malloc(r.length * sizeof(weight));
- weight* end = r.weights+r.length;
- for (weight *v = r.weights; v != end; v++)
- *v = 1.;
- r.other_weights = (weight *)malloc(r.length * sizeof(weight));
- end = r.other_weights + r.length;
- for (weight *v = r.other_weights; v != end; v++)
- *v = 1.;
+ r.weight_vectors[i] = (weight *)calloc(length/num_threads, sizeof(weight));
+ if (r.weight_vectors[i] == NULL)
+ {
+ cerr << r.global->program_name << ": Failed to allocate weight array: try decreasing -b <bits>" << endl;
+ exit (1);
+ }
}
- else
- r.weights = (weight *)calloc(r.length, sizeof(weight));
}
/*
@@ -30,37 +40,41 @@ void initialize_regressor(regressor &r)
average. If none are specified, initialize according to global_seg &
numbits.
*/
-void parse_regressor(vector<string> &regressors, regressor &r)
+void parse_regressor(vector<string> regressors, regressor &r)
{
+
bool initialized = false;
for (size_t i = 0; i < regressors.size(); i++)
{
ifstream regressor(regressors[i].c_str());
- bool seg;
- regressor.read((char *)&seg, sizeof(seg));
- if (!initialized)
- r.seg = seg;
+ size_t local_num_bits;
+ regressor.read((char *)&local_num_bits, sizeof(local_num_bits));
+ if (!initialized){
+ r.global->num_bits = local_num_bits;
+ }
else
- if (seg != r.seg)
+ if (local_num_bits != r.global->num_bits)
{
- cout << "can't combine regressors from seg and gd!" << endl;
+ cout << "can't combine regressors with different feature number!" << endl;
exit (1);
}
- size_t local_numbits;
- regressor.read((char *)&local_numbits, sizeof(local_numbits));
+
+ size_t local_thread_bits;
+ regressor.read((char*)&local_thread_bits, sizeof(local_thread_bits));
if (!initialized){
- r.numbits = local_numbits;
+ r.global->thread_bits = local_thread_bits;
}
else
- if (local_numbits != r.numbits)
+ if (local_thread_bits != r.global->thread_bits)
{
- cout << "can't combine regressors with different feature number!" << endl;
+ cout << "can't combine regressors trained with different numbers of threads!" << endl;
exit (1);
}
+
int len;
regressor.read((char *)&len, sizeof(len));
-
+
vector<string> local_pairs;
for (; len > 0; len--)
{
@@ -71,94 +85,78 @@ void parse_regressor(vector<string> &regressors, regressor &r)
}
if (!initialized)
{
- r.pairs = local_pairs;
+ r.global->pairs = local_pairs;
initialize_regressor(r);
initialized = true;
}
else
- if (local_pairs != r.pairs)
+ if (local_pairs != r.global->pairs)
{
cout << "can't combine regressors with different features!" << endl;
for (size_t i = 0; i < local_pairs.size(); i++)
cout << local_pairs[i] << " " << local_pairs[i].size() << " ";
cout << endl;
- for (size_t i = 0; i < r.pairs.size(); i++)
- cout << r.pairs[i] << " " << r.pairs[i].size() << " ";
+ for (size_t i = 0; i < r.global->pairs.size(); i++)
+ cout << r.global->pairs[i] << " " << r.global->pairs[i].size() << " ";
cout << endl;
exit (1);
}
-
- if (!seg)
- while (regressor.good())
- {
- size_t hash;
- regressor.read((char *)&hash, sizeof(hash));
- weight w = 0.;
- regressor.read((char *)&w, sizeof(float));
- if (regressor.good())
- r.weights[hash] = r.weights[hash] + w;
- }
- else
+ while (regressor.good())
{
- while (regressor.good())
- {
- size_t hash;
- regressor.read((char *)&hash, sizeof(hash));
- weight first = 0.;
- regressor.read((char *)&first, sizeof(float));
- weight second = 0.;
- regressor.read((char *)&second, sizeof(float));
- if (regressor.good()) {
- r.weights[hash] = first;
- r.other_weights[hash] = second;
- }
- }
- }
+ uint32_t hash;
+ regressor.read((char *)&hash, sizeof(hash));
+ weight w = 0.;
+ regressor.read((char *)&w, sizeof(float));
+
+ size_t num_threads = r.global->num_threads();
+ if (regressor.good())
+ r.weight_vectors[hash % num_threads][hash/num_threads]
+ = r.weight_vectors[hash % num_threads][hash/num_threads] + w;
+ }
regressor.close();
}
-
if (!initialized)
initialize_regressor(r);
+
+// r.loss = getLossFunction(loss_function);
+}
+
+void free_regressor(regressor &r)
+{
+ for (size_t i = 0; i < r.global->num_threads(); i++)
+ free(r.weight_vectors[i]);
+ free(r.weight_vectors);
}
void dump_regressor(ofstream &o, regressor &r)
{
if (o.is_open())
{
- o.write((char *)&r.seg, sizeof(r.seg));
- o.write((char *)&r.numbits, sizeof(r.numbits));
- int len = r.pairs.size();
+ o.write((char *)&r.global->num_bits, sizeof(r.global->num_bits));
+ o.write((char *)&r.global->thread_bits, sizeof(r.global->thread_bits));
+ int len = r.global->pairs.size();
o.write((char *)&len, sizeof(len));
- for (vector<string>::iterator i = r.pairs.begin(); i != r.pairs.end();i++)
+ for (vector<string>::iterator i = r.global->pairs.begin(); i != r.global->pairs.end();i++)
o << (*i)[0] << (*i)[1];
- if (!r.seg)
- {
- for(weight* v = r.weights; v != r.weights+r.length; v++)
- if (*v != 0.)
- {
- size_t dist = v - r.weights;
- o.write((char *)&(dist), sizeof (dist));
- o.write((char *)v, sizeof (*v));
- }
- }
- else
+ uint32_t length = 1 << r.global->num_bits;
+ size_t num_threads = r.global->num_threads();
+ for(uint32_t i = 0; i < length; i++)
{
- for(weight* v = r.weights; v != r.weights+r.length; v++)
- if (*v != 1.)
- {
- size_t dist = v - r.weights;
- o.write((char *)&(dist), sizeof (dist));
- o.write((char *)v, sizeof (*v));
- o.write((char *)&r.other_weights[dist], sizeof (r.other_weights[dist]));
- }
+ weight v = r.weight_vectors[i%num_threads][i/num_threads];
+ if (v != 0.)
+ {
+ o.write((char *)&i, sizeof (i));
+ o.write((char *)&v, sizeof (v));
+ }
}
}
- if (r.seg)
- free(r.other_weights);
-
- free(r.weights);
o.close();
}
+void finalize_regressor(ofstream &o, regressor &r)
+{
+ dump_regressor(o,r);
+ free_regressor(r);
+}