1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
/*
Copyright (c) by respective owners including Yahoo!, Microsoft, and
individual contributors. All rights reserved. Released under a BSD
license as described in the file LICENSE.
*/
#ifndef VOWPAL_WABBIT_H
#define VOWPAL_WABBIT_H
#include "global_data.h"
#include "example.h"
#include "hash.h"
#include "simple_label.h"
namespace VW {
/* Caveats:
(1) Some commandline parameters do not make sense as a library.
(2) The code is not yet reentrant.
*/
vw* initialize(string s);
void cmd_string_replace_value( string& cmd, string flag_to_replace, string new_value );
char** get_argv_from_string(string s, int& argc);
/*
Call finish() after you are done with the vw instance. This cleans up memory usage.
*/
void finish(vw& all);
void start_parser(vw& all, bool do_init = true);
void end_parser(vw& all);
typedef pair< unsigned char, vector<feature> > feature_space; //just a helper definition.
struct primitive_feature_space { //just a helper definition.
unsigned char name;
feature* fs;
size_t len;
};
//The next commands deal with creating examples. Caution: VW does not all allow creation of many examples at once by default. You can adjust the exact number by tweaking ring_size.
/* The simplest of two ways to create an example. An example_line is the literal line in a VW-format datafile.
*/
example* read_example(vw& all, char* example_line);
//The more complex way to create an example.
//after you create and fill feature_spaces, get an example with everything filled in.
example* import_example(vw& all, primitive_feature_space* features, size_t len);
example* import_example(vw& all, vector< feature_space > ec_info);
void parse_example_label(vw&all, example&ec, string label);
example* new_unused_example(vw& all);
example* get_example(parser* pf);
label_data* get_label(example*ec);
void add_constant_feature(vw& all, example*ec);
void add_label(example* ec, float label, float weight = 1, float base = 0);
//notify VW that you are done with the example.
void finish_example(vw& all, example* ec);
void copy_example_data(bool audit, example*, example*, size_t, void(*copy_label)(void*&,void*));
void copy_example_data(bool audit, example*, example*); // don't copy the label
// after export_example, must call releaseFeatureSpace to free native memory
primitive_feature_space* export_example(vw& all, example* e, size_t& len);
void releaseFeatureSpace(primitive_feature_space* features, size_t len);
// inlines
//First create the hash of a namespace.
inline uint32_t hash_space(vw& all, string s)
{
substring ss;
ss.begin = (char*)s.c_str();
ss.end = ss.begin + s.length();
return (uint32_t)all.p->hasher(ss,hash_base);
}
//Then use it as the seed for hashing features.
inline uint32_t hash_feature(vw& all, string s, unsigned long u)
{
substring ss;
ss.begin = (char*)s.c_str();
ss.end = ss.begin + s.length();
return (uint32_t)(all.p->hasher(ss,u) & all.parse_mask);
}
inline uint32_t hash_feature_cstr(vw& all, char* fstr, unsigned long u)
{
substring ss;
ss.begin = fstr;
ss.end = ss.begin + strlen(fstr);
return (uint32_t)(all.p->hasher(ss,u) & all.parse_mask);
}
inline float get_weight(vw& all, uint32_t index, uint32_t offset)
{ return all.reg.weight_vector[(((index << all.reg.stride_shift) + offset) & all.reg.weight_mask)];}
inline void set_weight(vw& all, uint32_t index, uint32_t offset, float value)
{ all.reg.weight_vector[(((index << all.reg.stride_shift) + offset) & all.reg.weight_mask)] = value;}
inline uint32_t num_weights(vw& all)
{ return (uint32_t)all.length();}
inline uint32_t get_stride(vw& all)
{ return (uint32_t)(1 << all.reg.stride_shift);}
inline void update_dump_interval(vw& all) {
if (all.progress_add) {
all.sd->dump_interval = (float)all.sd->weighted_examples + all.progress_arg;
} else {
all.sd->dump_interval = (float)all.sd->weighted_examples * all.progress_arg;
}
}
}
#endif
|