Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cs_test/Program.cs2
-rw-r--r--library/ezexample.h2
-rw-r--r--library/ezexample_predict.cc2
-rw-r--r--library/ezexample_train.cc2
-rw-r--r--library/library_example.cc2
-rw-r--r--vowpalwabbit/hash.h6
-rw-r--r--vowpalwabbit/main.cc97
-rw-r--r--vowpalwabbit/parse_args.h21
-rw-r--r--vowpalwabbit/parser.h1
-rw-r--r--vowpalwabbit/vw.h9
-rw-r--r--vowpalwabbit/vw.vcxproj3
-rw-r--r--vowpalwabbit/vw_test.cc97
-rw-r--r--vowpalwabbit/vwdll.cpp2
13 files changed, 235 insertions, 11 deletions
diff --git a/cs_test/Program.cs b/cs_test/Program.cs
index a9e5bb99..cf45cc39 100644
--- a/cs_test/Program.cs
+++ b/cs_test/Program.cs
@@ -81,7 +81,7 @@ namespace cs_test
private static void RunParserTest()
{
- IntPtr vw = VowpalWabbitInterface.Initialize("-q st -d 0002.dat -f out2");
+ IntPtr vw = VowpalWabbitInterface.Initialize("-q st -d 0002.dat -f out");
VowpalWabbitInterface.StartParser(vw, false);
diff --git a/library/ezexample.h b/library/ezexample.h
index fbf28854..c10e7b1d 100644
--- a/library/ezexample.h
+++ b/library/ezexample.h
@@ -2,7 +2,7 @@
#define EZEXAMPLE_H
#include <stdio.h>
-#include "../vowpalwabbit/vw.h"
+#include "../vowpalwabbit/parser.h"
using namespace std;
typedef uint32_t fid;
diff --git a/library/ezexample_predict.cc b/library/ezexample_predict.cc
index 94878eb8..5305d845 100644
--- a/library/ezexample_predict.cc
+++ b/library/ezexample_predict.cc
@@ -1,5 +1,5 @@
#include <stdio.h>
-#include "../vowpalwabbit/vw.h"
+#include "../vowpalwabbit/parser.h"
#include "ezexample.h"
using namespace std;
diff --git a/library/ezexample_train.cc b/library/ezexample_train.cc
index 19a54d9d..e600e949 100644
--- a/library/ezexample_train.cc
+++ b/library/ezexample_train.cc
@@ -1,5 +1,5 @@
#include <stdio.h>
-#include "../vowpalwabbit/vw.h"
+#include "../vowpalwabbit/parser.h"
#include "ezexample.h"
using namespace std;
diff --git a/library/library_example.cc b/library/library_example.cc
index 28110e27..0140083a 100644
--- a/library/library_example.cc
+++ b/library/library_example.cc
@@ -1,5 +1,5 @@
#include <stdio.h>
-#include "../vowpalwabbit/vw.h"
+#include "../vowpalwabbit/parser.h"
using namespace std;
diff --git a/vowpalwabbit/hash.h b/vowpalwabbit/hash.h
index dc8da5e4..cffe56bf 100644
--- a/vowpalwabbit/hash.h
+++ b/vowpalwabbit/hash.h
@@ -3,5 +3,11 @@ Copyright (c) by respective owners including Yahoo!, Microsoft, and
individual contributors. All rights reserved. Released under a BSD
license as described in the file LICENSE.
*/
+
+#ifndef HASH_H
+#define HASH_H
+
const uint32_t hash_base = 0;
uint32_t uniform_hash( const void *key, size_t length, uint32_t initval);
+
+#endif
diff --git a/vowpalwabbit/main.cc b/vowpalwabbit/main.cc
new file mode 100644
index 00000000..734e148b
--- /dev/null
+++ b/vowpalwabbit/main.cc
@@ -0,0 +1,97 @@
+/*
+Copyright (c) by respective owners including Yahoo!, Microsoft, and
+individual contributors. All rights reserved. Released under a BSD
+license as described in the file LICENSE.
+ */
+
+#include <math.h>
+#include <iostream>
+#include <fstream>
+#include <float.h>
+#include <time.h>
+#ifdef _WIN32
+#include <WinSock2.h>
+#else
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#endif
+#include <sys/timeb.h>
+#include "global_data.h"
+#include "parse_example.h"
+#include "parse_args.h"
+#include "accumulate.h"
+#include "vw.h"
+#include "searn.h"
+
+using namespace std;
+
+int main(int argc, char *argv[])
+{
+ vw *all = parse_args(argc, argv);
+ struct timeb t_start, t_end;
+ ftime(&t_start);
+
+ if (!all->quiet && !all->bfgs && !all->searn)
+ {
+ const char * header_fmt = "%-10s %-10s %10s %11s %8s %8s %8s\n";
+ fprintf(stderr, header_fmt,
+ "average", "since", "example", "example",
+ "current", "current", "current");
+ fprintf(stderr, header_fmt,
+ "loss", "last", "counter", "weight", "label", "predict", "features");
+ cerr.precision(5);
+ }
+
+ VW::start_parser(*all);
+
+ all->l.drive(all);
+
+ VW::end_parser(*all);
+
+ ftime(&t_end);
+ double net_time = (int) (1000.0 * (t_end.time - t_start.time) + (t_end.millitm - t_start.millitm));
+ if(!all->quiet && all->span_server != "")
+ cerr<<"Net time taken by process = "<<net_time/(double)(1000)<<" seconds\n";
+
+ if(all->span_server != "") {
+ float loss = (float)all->sd->sum_loss;
+ all->sd->sum_loss = (double)accumulate_scalar(*all, all->span_server, loss);
+ float weighted_examples = (float)all->sd->weighted_examples;
+ all->sd->weighted_examples = (double)accumulate_scalar(*all, all->span_server, weighted_examples);
+ float weighted_labels = (float)all->sd->weighted_labels;
+ all->sd->weighted_labels = (double)accumulate_scalar(*all, all->span_server, weighted_labels);
+ float weighted_unlabeled_examples = (float)all->sd->weighted_unlabeled_examples;
+ all->sd->weighted_unlabeled_examples = (double)accumulate_scalar(*all, all->span_server, weighted_unlabeled_examples);
+ float example_number = (float)all->sd->example_number;
+ all->sd->example_number = (uint64_t)accumulate_scalar(*all, all->span_server, example_number);
+ float total_features = (float)all->sd->total_features;
+ all->sd->total_features = (uint64_t)accumulate_scalar(*all, all->span_server, total_features);
+ }
+
+ float weighted_labeled_examples = (float)(all->sd->weighted_examples - all->sd->weighted_unlabeled_examples);
+ float best_constant = (float)((all->sd->weighted_labels - all->initial_t) / weighted_labeled_examples);
+ float constant_loss = (best_constant*(1.0f - best_constant)*(1.0f - best_constant)
+ + (1.0f - best_constant)*best_constant*best_constant);
+
+ if (!all->quiet)
+ {
+ cerr.precision(4);
+ cerr << endl << "finished run";
+ cerr << endl << "number of examples = " << all->sd->example_number;
+ cerr << endl << "weighted example sum = " << all->sd->weighted_examples;
+ cerr << endl << "weighted label sum = " << all->sd->weighted_labels;
+ cerr << endl << "average loss = " << all->sd->sum_loss / all->sd->weighted_examples;
+ cerr << endl << "best constant = " << best_constant;
+ if (all->sd->min_label == 0. && all->sd->max_label == 1. && best_constant < 1. && best_constant > 0.)
+ cerr << endl << "best constant's loss = " << constant_loss;
+ cerr << endl << "total feature number = " << all->sd->total_features;
+ if (all->active_simulation)
+ cerr << endl << "total queries = " << all->sd->queries << endl;
+ cerr << endl;
+ }
+
+ VW::finish(*all);
+
+ return 0;
+}
+
diff --git a/vowpalwabbit/parse_args.h b/vowpalwabbit/parse_args.h
index a33e4788..350814ad 100644
--- a/vowpalwabbit/parse_args.h
+++ b/vowpalwabbit/parse_args.h
@@ -14,4 +14,25 @@ namespace po = boost::program_options;
vw* parse_args(int argc, char *argv[]);
+namespace VW {
+ /*
+ You must call initialize to get access to the library. The argument is a vew commandline.
+
+ Caveats:
+ (1) Some commandline parameters do not make sense as a library.
+ (2) The code is not yet reentrant.
+ */
+ vw* initialize(string s);
+
+ void cmd_string_replace_value( string& cmd, string flag_to_replace, string new_value );
+
+ char** get_argv_from_string(string s, int& argc);
+
+ /*
+ Call finish() after you are done with the vw instance. This cleans up memory usage.
+ */
+ void finish(vw& all);
+
+}
+
#endif
diff --git a/vowpalwabbit/parser.h b/vowpalwabbit/parser.h
index 495de17f..6f6f5401 100644
--- a/vowpalwabbit/parser.h
+++ b/vowpalwabbit/parser.h
@@ -9,6 +9,7 @@ license as described in the file LICENSE.
#include "io_buf.h"
#include "parse_primitives.h"
#include "example.h"
+#include "vw.h"
const size_t wap_ldf_namespace = 126;
const size_t history_namespace = 127;
diff --git a/vowpalwabbit/vw.h b/vowpalwabbit/vw.h
index ddea10ad..6f255858 100644
--- a/vowpalwabbit/vw.h
+++ b/vowpalwabbit/vw.h
@@ -11,10 +11,8 @@ license as described in the file LICENSE.
#include "hash.h"
namespace VW {
- /*
- You must call initialize to get access to the library. The argument is a vew commandline.
- Caveats:
+/* Caveats:
(1) Some commandline parameters do not make sense as a library.
(2) The code is not yet reentrant.
*/
@@ -32,15 +30,14 @@ namespace VW {
void start_parser(vw& all, bool do_init = true);
void end_parser(vw& all);
-
- //The next commands deal with creating examples. Caution: VW does not all allow creation of many examples at once by default. You can adjust the exact number by tweaking ring_size.
-
typedef pair< unsigned char, vector<feature> > feature_space; //just a helper definition.
struct primitive_feature_space { //just a helper definition.
unsigned char name;
feature* fs;
size_t len; };
+ //The next commands deal with creating examples. Caution: VW does not all allow creation of many examples at once by default. You can adjust the exact number by tweaking ring_size.
+
/* The simplest of two ways to create an example. An example_line is the literal line in a VW-format datafile.
*/
example* read_example(vw& all, char* example_line);
diff --git a/vowpalwabbit/vw.vcxproj b/vowpalwabbit/vw.vcxproj
index 93da8890..6f41110e 100644
--- a/vowpalwabbit/vw.vcxproj
+++ b/vowpalwabbit/vw.vcxproj
@@ -22,6 +22,7 @@
<ProjectGuid>{1055A78F-1E3A-4E6C-BBF5-0B63299C4ADF}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>vw</RootNamespace>
+ <ProjectName>vw</ProjectName>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@@ -178,7 +179,9 @@
<IntDir>$(SolutionDir)x86\$(Configuration)\</IntDir>
</PropertyGroup>
<ItemGroup>
+
<ClCompile Include="main.cc" />
+
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
diff --git a/vowpalwabbit/vw_test.cc b/vowpalwabbit/vw_test.cc
new file mode 100644
index 00000000..734e148b
--- /dev/null
+++ b/vowpalwabbit/vw_test.cc
@@ -0,0 +1,97 @@
+/*
+Copyright (c) by respective owners including Yahoo!, Microsoft, and
+individual contributors. All rights reserved. Released under a BSD
+license as described in the file LICENSE.
+ */
+
+#include <math.h>
+#include <iostream>
+#include <fstream>
+#include <float.h>
+#include <time.h>
+#ifdef _WIN32
+#include <WinSock2.h>
+#else
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#endif
+#include <sys/timeb.h>
+#include "global_data.h"
+#include "parse_example.h"
+#include "parse_args.h"
+#include "accumulate.h"
+#include "vw.h"
+#include "searn.h"
+
+using namespace std;
+
+int main(int argc, char *argv[])
+{
+ vw *all = parse_args(argc, argv);
+ struct timeb t_start, t_end;
+ ftime(&t_start);
+
+ if (!all->quiet && !all->bfgs && !all->searn)
+ {
+ const char * header_fmt = "%-10s %-10s %10s %11s %8s %8s %8s\n";
+ fprintf(stderr, header_fmt,
+ "average", "since", "example", "example",
+ "current", "current", "current");
+ fprintf(stderr, header_fmt,
+ "loss", "last", "counter", "weight", "label", "predict", "features");
+ cerr.precision(5);
+ }
+
+ VW::start_parser(*all);
+
+ all->l.drive(all);
+
+ VW::end_parser(*all);
+
+ ftime(&t_end);
+ double net_time = (int) (1000.0 * (t_end.time - t_start.time) + (t_end.millitm - t_start.millitm));
+ if(!all->quiet && all->span_server != "")
+ cerr<<"Net time taken by process = "<<net_time/(double)(1000)<<" seconds\n";
+
+ if(all->span_server != "") {
+ float loss = (float)all->sd->sum_loss;
+ all->sd->sum_loss = (double)accumulate_scalar(*all, all->span_server, loss);
+ float weighted_examples = (float)all->sd->weighted_examples;
+ all->sd->weighted_examples = (double)accumulate_scalar(*all, all->span_server, weighted_examples);
+ float weighted_labels = (float)all->sd->weighted_labels;
+ all->sd->weighted_labels = (double)accumulate_scalar(*all, all->span_server, weighted_labels);
+ float weighted_unlabeled_examples = (float)all->sd->weighted_unlabeled_examples;
+ all->sd->weighted_unlabeled_examples = (double)accumulate_scalar(*all, all->span_server, weighted_unlabeled_examples);
+ float example_number = (float)all->sd->example_number;
+ all->sd->example_number = (uint64_t)accumulate_scalar(*all, all->span_server, example_number);
+ float total_features = (float)all->sd->total_features;
+ all->sd->total_features = (uint64_t)accumulate_scalar(*all, all->span_server, total_features);
+ }
+
+ float weighted_labeled_examples = (float)(all->sd->weighted_examples - all->sd->weighted_unlabeled_examples);
+ float best_constant = (float)((all->sd->weighted_labels - all->initial_t) / weighted_labeled_examples);
+ float constant_loss = (best_constant*(1.0f - best_constant)*(1.0f - best_constant)
+ + (1.0f - best_constant)*best_constant*best_constant);
+
+ if (!all->quiet)
+ {
+ cerr.precision(4);
+ cerr << endl << "finished run";
+ cerr << endl << "number of examples = " << all->sd->example_number;
+ cerr << endl << "weighted example sum = " << all->sd->weighted_examples;
+ cerr << endl << "weighted label sum = " << all->sd->weighted_labels;
+ cerr << endl << "average loss = " << all->sd->sum_loss / all->sd->weighted_examples;
+ cerr << endl << "best constant = " << best_constant;
+ if (all->sd->min_label == 0. && all->sd->max_label == 1. && best_constant < 1. && best_constant > 0.)
+ cerr << endl << "best constant's loss = " << constant_loss;
+ cerr << endl << "total feature number = " << all->sd->total_features;
+ if (all->active_simulation)
+ cerr << endl << "total queries = " << all->sd->queries << endl;
+ cerr << endl;
+ }
+
+ VW::finish(*all);
+
+ return 0;
+}
+
diff --git a/vowpalwabbit/vwdll.cpp b/vowpalwabbit/vwdll.cpp
index 6c56d479..b43d4016 100644
--- a/vowpalwabbit/vwdll.cpp
+++ b/vowpalwabbit/vwdll.cpp
@@ -5,6 +5,8 @@
#include "vwdll.h"
#include "parser.h"
+#include "parse_args.h"
+#include "vw.h"
// This interface now provides "wide" functions for compatibility with .NET interop
// The default functions assume a wide (16 bit char pointer) that is converted to a utf8-string and passed to