diff options
author | niruc <niruc@NIRUC1.redmond.corp.microsoft.com> | 2013-04-09 22:02:25 +0400 |
---|---|---|
committer | niruc <niruc@NIRUC1.redmond.corp.microsoft.com> | 2013-04-09 22:29:00 +0400 |
commit | 22b461552e57a34deedcb623b3a37f943d370d78 (patch) | |
tree | 9eb53c22d9552698866ae6f9be64441aeb778863 | |
parent | c0e215ecec7eddf689283271968bcdfca8984e71 (diff) | |
parent | 0359e2a3a483761ff4e4f266446ef4fed5c1611b (diff) |
add main.cc
-rw-r--r-- | cs_test/Program.cs | 2 | ||||
-rw-r--r-- | library/ezexample.h | 2 | ||||
-rw-r--r-- | library/ezexample_predict.cc | 2 | ||||
-rw-r--r-- | library/ezexample_train.cc | 2 | ||||
-rw-r--r-- | library/library_example.cc | 2 | ||||
-rw-r--r-- | vowpalwabbit/hash.h | 6 | ||||
-rw-r--r-- | vowpalwabbit/main.cc | 97 | ||||
-rw-r--r-- | vowpalwabbit/parse_args.h | 21 | ||||
-rw-r--r-- | vowpalwabbit/parser.h | 1 | ||||
-rw-r--r-- | vowpalwabbit/vw.h | 9 | ||||
-rw-r--r-- | vowpalwabbit/vw.vcxproj | 3 | ||||
-rw-r--r-- | vowpalwabbit/vw_test.cc | 97 | ||||
-rw-r--r-- | vowpalwabbit/vwdll.cpp | 2 |
13 files changed, 235 insertions, 11 deletions
diff --git a/cs_test/Program.cs b/cs_test/Program.cs index a9e5bb99..cf45cc39 100644 --- a/cs_test/Program.cs +++ b/cs_test/Program.cs @@ -81,7 +81,7 @@ namespace cs_test private static void RunParserTest()
{
- IntPtr vw = VowpalWabbitInterface.Initialize("-q st -d 0002.dat -f out2");
+ IntPtr vw = VowpalWabbitInterface.Initialize("-q st -d 0002.dat -f out");
VowpalWabbitInterface.StartParser(vw, false);
diff --git a/library/ezexample.h b/library/ezexample.h index fbf28854..c10e7b1d 100644 --- a/library/ezexample.h +++ b/library/ezexample.h @@ -2,7 +2,7 @@ #define EZEXAMPLE_H #include <stdio.h> -#include "../vowpalwabbit/vw.h" +#include "../vowpalwabbit/parser.h" using namespace std; typedef uint32_t fid; diff --git a/library/ezexample_predict.cc b/library/ezexample_predict.cc index 94878eb8..5305d845 100644 --- a/library/ezexample_predict.cc +++ b/library/ezexample_predict.cc @@ -1,5 +1,5 @@ #include <stdio.h>
-#include "../vowpalwabbit/vw.h"
+#include "../vowpalwabbit/parser.h"
#include "ezexample.h"
using namespace std;
diff --git a/library/ezexample_train.cc b/library/ezexample_train.cc index 19a54d9d..e600e949 100644 --- a/library/ezexample_train.cc +++ b/library/ezexample_train.cc @@ -1,5 +1,5 @@ #include <stdio.h>
-#include "../vowpalwabbit/vw.h"
+#include "../vowpalwabbit/parser.h"
#include "ezexample.h"
using namespace std;
diff --git a/library/library_example.cc b/library/library_example.cc index 28110e27..0140083a 100644 --- a/library/library_example.cc +++ b/library/library_example.cc @@ -1,5 +1,5 @@ #include <stdio.h>
-#include "../vowpalwabbit/vw.h"
+#include "../vowpalwabbit/parser.h"
using namespace std;
diff --git a/vowpalwabbit/hash.h b/vowpalwabbit/hash.h index dc8da5e4..cffe56bf 100644 --- a/vowpalwabbit/hash.h +++ b/vowpalwabbit/hash.h @@ -3,5 +3,11 @@ Copyright (c) by respective owners including Yahoo!, Microsoft, and individual contributors. All rights reserved. Released under a BSD license as described in the file LICENSE. */ + +#ifndef HASH_H +#define HASH_H + const uint32_t hash_base = 0; uint32_t uniform_hash( const void *key, size_t length, uint32_t initval); + +#endif diff --git a/vowpalwabbit/main.cc b/vowpalwabbit/main.cc new file mode 100644 index 00000000..734e148b --- /dev/null +++ b/vowpalwabbit/main.cc @@ -0,0 +1,97 @@ +/* +Copyright (c) by respective owners including Yahoo!, Microsoft, and +individual contributors. All rights reserved. Released under a BSD +license as described in the file LICENSE. + */ + +#include <math.h> +#include <iostream> +#include <fstream> +#include <float.h> +#include <time.h> +#ifdef _WIN32 +#include <WinSock2.h> +#else +#include <sys/socket.h> +#include <arpa/inet.h> +#endif +#include <sys/timeb.h> +#include "global_data.h" +#include "parse_example.h" +#include "parse_args.h" +#include "accumulate.h" +#include "vw.h" +#include "searn.h" + +using namespace std; + +int main(int argc, char *argv[]) +{ + vw *all = parse_args(argc, argv); + struct timeb t_start, t_end; + ftime(&t_start); + + if (!all->quiet && !all->bfgs && !all->searn) + { + const char * header_fmt = "%-10s %-10s %10s %11s %8s %8s %8s\n"; + fprintf(stderr, header_fmt, + "average", "since", "example", "example", + "current", "current", "current"); + fprintf(stderr, header_fmt, + "loss", "last", "counter", "weight", "label", "predict", "features"); + cerr.precision(5); + } + + VW::start_parser(*all); + + all->l.drive(all); + + VW::end_parser(*all); + + ftime(&t_end); + double net_time = (int) (1000.0 * (t_end.time - t_start.time) + (t_end.millitm - t_start.millitm)); + if(!all->quiet && all->span_server != "") + cerr<<"Net time taken by process = "<<net_time/(double)(1000)<<" seconds\n"; + + if(all->span_server != "") { + float loss = (float)all->sd->sum_loss; + all->sd->sum_loss = (double)accumulate_scalar(*all, all->span_server, loss); + float weighted_examples = (float)all->sd->weighted_examples; + all->sd->weighted_examples = (double)accumulate_scalar(*all, all->span_server, weighted_examples); + float weighted_labels = (float)all->sd->weighted_labels; + all->sd->weighted_labels = (double)accumulate_scalar(*all, all->span_server, weighted_labels); + float weighted_unlabeled_examples = (float)all->sd->weighted_unlabeled_examples; + all->sd->weighted_unlabeled_examples = (double)accumulate_scalar(*all, all->span_server, weighted_unlabeled_examples); + float example_number = (float)all->sd->example_number; + all->sd->example_number = (uint64_t)accumulate_scalar(*all, all->span_server, example_number); + float total_features = (float)all->sd->total_features; + all->sd->total_features = (uint64_t)accumulate_scalar(*all, all->span_server, total_features); + } + + float weighted_labeled_examples = (float)(all->sd->weighted_examples - all->sd->weighted_unlabeled_examples); + float best_constant = (float)((all->sd->weighted_labels - all->initial_t) / weighted_labeled_examples); + float constant_loss = (best_constant*(1.0f - best_constant)*(1.0f - best_constant) + + (1.0f - best_constant)*best_constant*best_constant); + + if (!all->quiet) + { + cerr.precision(4); + cerr << endl << "finished run"; + cerr << endl << "number of examples = " << all->sd->example_number; + cerr << endl << "weighted example sum = " << all->sd->weighted_examples; + cerr << endl << "weighted label sum = " << all->sd->weighted_labels; + cerr << endl << "average loss = " << all->sd->sum_loss / all->sd->weighted_examples; + cerr << endl << "best constant = " << best_constant; + if (all->sd->min_label == 0. && all->sd->max_label == 1. && best_constant < 1. && best_constant > 0.) + cerr << endl << "best constant's loss = " << constant_loss; + cerr << endl << "total feature number = " << all->sd->total_features; + if (all->active_simulation) + cerr << endl << "total queries = " << all->sd->queries << endl; + cerr << endl; + } + + VW::finish(*all); + + return 0; +} + diff --git a/vowpalwabbit/parse_args.h b/vowpalwabbit/parse_args.h index a33e4788..350814ad 100644 --- a/vowpalwabbit/parse_args.h +++ b/vowpalwabbit/parse_args.h @@ -14,4 +14,25 @@ namespace po = boost::program_options; vw* parse_args(int argc, char *argv[]); +namespace VW { + /* + You must call initialize to get access to the library. The argument is a vew commandline. + + Caveats: + (1) Some commandline parameters do not make sense as a library. + (2) The code is not yet reentrant. + */ + vw* initialize(string s); + + void cmd_string_replace_value( string& cmd, string flag_to_replace, string new_value ); + + char** get_argv_from_string(string s, int& argc); + + /* + Call finish() after you are done with the vw instance. This cleans up memory usage. + */ + void finish(vw& all); + +} + #endif diff --git a/vowpalwabbit/parser.h b/vowpalwabbit/parser.h index 495de17f..6f6f5401 100644 --- a/vowpalwabbit/parser.h +++ b/vowpalwabbit/parser.h @@ -9,6 +9,7 @@ license as described in the file LICENSE. #include "io_buf.h" #include "parse_primitives.h" #include "example.h" +#include "vw.h" const size_t wap_ldf_namespace = 126; const size_t history_namespace = 127; diff --git a/vowpalwabbit/vw.h b/vowpalwabbit/vw.h index ddea10ad..6f255858 100644 --- a/vowpalwabbit/vw.h +++ b/vowpalwabbit/vw.h @@ -11,10 +11,8 @@ license as described in the file LICENSE. #include "hash.h" namespace VW { - /* - You must call initialize to get access to the library. The argument is a vew commandline. - Caveats: +/* Caveats: (1) Some commandline parameters do not make sense as a library. (2) The code is not yet reentrant. */ @@ -32,15 +30,14 @@ namespace VW { void start_parser(vw& all, bool do_init = true); void end_parser(vw& all); - - //The next commands deal with creating examples. Caution: VW does not all allow creation of many examples at once by default. You can adjust the exact number by tweaking ring_size. - typedef pair< unsigned char, vector<feature> > feature_space; //just a helper definition. struct primitive_feature_space { //just a helper definition. unsigned char name; feature* fs; size_t len; }; + //The next commands deal with creating examples. Caution: VW does not all allow creation of many examples at once by default. You can adjust the exact number by tweaking ring_size. + /* The simplest of two ways to create an example. An example_line is the literal line in a VW-format datafile. */ example* read_example(vw& all, char* example_line); diff --git a/vowpalwabbit/vw.vcxproj b/vowpalwabbit/vw.vcxproj index 93da8890..6f41110e 100644 --- a/vowpalwabbit/vw.vcxproj +++ b/vowpalwabbit/vw.vcxproj @@ -22,6 +22,7 @@ <ProjectGuid>{1055A78F-1E3A-4E6C-BBF5-0B63299C4ADF}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>vw</RootNamespace>
+ <ProjectName>vw</ProjectName>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@@ -178,7 +179,9 @@ <IntDir>$(SolutionDir)x86\$(Configuration)\</IntDir>
</PropertyGroup>
<ItemGroup>
+ <ClCompile Include="main.cc" />
+ </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
diff --git a/vowpalwabbit/vw_test.cc b/vowpalwabbit/vw_test.cc new file mode 100644 index 00000000..734e148b --- /dev/null +++ b/vowpalwabbit/vw_test.cc @@ -0,0 +1,97 @@ +/* +Copyright (c) by respective owners including Yahoo!, Microsoft, and +individual contributors. All rights reserved. Released under a BSD +license as described in the file LICENSE. + */ + +#include <math.h> +#include <iostream> +#include <fstream> +#include <float.h> +#include <time.h> +#ifdef _WIN32 +#include <WinSock2.h> +#else +#include <sys/socket.h> +#include <arpa/inet.h> +#endif +#include <sys/timeb.h> +#include "global_data.h" +#include "parse_example.h" +#include "parse_args.h" +#include "accumulate.h" +#include "vw.h" +#include "searn.h" + +using namespace std; + +int main(int argc, char *argv[]) +{ + vw *all = parse_args(argc, argv); + struct timeb t_start, t_end; + ftime(&t_start); + + if (!all->quiet && !all->bfgs && !all->searn) + { + const char * header_fmt = "%-10s %-10s %10s %11s %8s %8s %8s\n"; + fprintf(stderr, header_fmt, + "average", "since", "example", "example", + "current", "current", "current"); + fprintf(stderr, header_fmt, + "loss", "last", "counter", "weight", "label", "predict", "features"); + cerr.precision(5); + } + + VW::start_parser(*all); + + all->l.drive(all); + + VW::end_parser(*all); + + ftime(&t_end); + double net_time = (int) (1000.0 * (t_end.time - t_start.time) + (t_end.millitm - t_start.millitm)); + if(!all->quiet && all->span_server != "") + cerr<<"Net time taken by process = "<<net_time/(double)(1000)<<" seconds\n"; + + if(all->span_server != "") { + float loss = (float)all->sd->sum_loss; + all->sd->sum_loss = (double)accumulate_scalar(*all, all->span_server, loss); + float weighted_examples = (float)all->sd->weighted_examples; + all->sd->weighted_examples = (double)accumulate_scalar(*all, all->span_server, weighted_examples); + float weighted_labels = (float)all->sd->weighted_labels; + all->sd->weighted_labels = (double)accumulate_scalar(*all, all->span_server, weighted_labels); + float weighted_unlabeled_examples = (float)all->sd->weighted_unlabeled_examples; + all->sd->weighted_unlabeled_examples = (double)accumulate_scalar(*all, all->span_server, weighted_unlabeled_examples); + float example_number = (float)all->sd->example_number; + all->sd->example_number = (uint64_t)accumulate_scalar(*all, all->span_server, example_number); + float total_features = (float)all->sd->total_features; + all->sd->total_features = (uint64_t)accumulate_scalar(*all, all->span_server, total_features); + } + + float weighted_labeled_examples = (float)(all->sd->weighted_examples - all->sd->weighted_unlabeled_examples); + float best_constant = (float)((all->sd->weighted_labels - all->initial_t) / weighted_labeled_examples); + float constant_loss = (best_constant*(1.0f - best_constant)*(1.0f - best_constant) + + (1.0f - best_constant)*best_constant*best_constant); + + if (!all->quiet) + { + cerr.precision(4); + cerr << endl << "finished run"; + cerr << endl << "number of examples = " << all->sd->example_number; + cerr << endl << "weighted example sum = " << all->sd->weighted_examples; + cerr << endl << "weighted label sum = " << all->sd->weighted_labels; + cerr << endl << "average loss = " << all->sd->sum_loss / all->sd->weighted_examples; + cerr << endl << "best constant = " << best_constant; + if (all->sd->min_label == 0. && all->sd->max_label == 1. && best_constant < 1. && best_constant > 0.) + cerr << endl << "best constant's loss = " << constant_loss; + cerr << endl << "total feature number = " << all->sd->total_features; + if (all->active_simulation) + cerr << endl << "total queries = " << all->sd->queries << endl; + cerr << endl; + } + + VW::finish(*all); + + return 0; +} + diff --git a/vowpalwabbit/vwdll.cpp b/vowpalwabbit/vwdll.cpp index 6c56d479..b43d4016 100644 --- a/vowpalwabbit/vwdll.cpp +++ b/vowpalwabbit/vwdll.cpp @@ -5,6 +5,8 @@ #include "vwdll.h" #include "parser.h" +#include "parse_args.h" +#include "vw.h" // This interface now provides "wide" functions for compatibility with .NET interop // The default functions assume a wide (16 bit char pointer) that is converted to a utf8-string and passed to |