vowpalwabbit/searn.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

/*
Copyright (c) by respective owners including Yahoo!, Microsoft, and
individual contributors. All rights reserved.  Released under a BSD
license as described in the file LICENSE.
 */
#ifndef SEARN_H
#define SEARN_H

#include <stdio.h>
#include "parse_args.h"
#include "oaa.h"
#include "parse_primitives.h"
#include "v_hashmap.h"
#include "csoaa.h"
#include <time.h>

#define clog_print_audit_features(ec,reg) { print_audit_features(reg, ec); }

typedef uint32_t* history;

namespace SearnUtil
{
  struct history_info {
    size_t length;          // was history_length, must be >= features
    bool   bigrams;         // was sequence_bigrams
    size_t features;        // was sequence_features
    bool   bigram_features; // was sequence_bigram_features
  };
  void default_info(history_info*);


  void* calloc_or_die(size_t, size_t);
  void free_it(void*);

  int  random_policy(uint64_t, float, bool, int, bool, bool);

  void add_policy_offset(vw&, example*, uint32_t, uint32_t);
  void remove_policy_offset(vw&, example*, uint32_t, uint32_t);
 
  void add_history_to_example(vw&, history_info&, example*, history);
  void remove_history_from_example(vw&, history_info&, example*);

  size_t predict_with_history(vw&vw, example*ec, v_array<uint32_t>*ystar, history_info &hinfo, size_t*history);
}      

namespace Searn {
  using namespace SearnUtil;

  struct snapshot_item {
    size_t index;
    size_t tag;
    void  *data_ptr;
    size_t data_size;  // sizeof(data_ptr)
    size_t pred_step;  // srn->t when snapshot is made
  };
  
  struct searn_task;

  struct searn {
    // functions that you will call

    inline uint32_t predict(example** ecs, size_t ec_len, v_array<uint32_t>* yallowed, v_array<uint32_t>* ystar) // for LDF
    { return this->predict_f(*this->all, *this->base_learner, ecs, ec_len, yallowed, ystar); }

    inline uint32_t predict(example* ec, v_array<uint32_t>* yallowed, v_array<uint32_t>* ystar) // for not LDF
    { return this->predict_f(*this->all, *this->base_learner, &ec, 0, yallowed, ystar); }

    inline void     declare_loss(size_t predictions_since_last, float incr_loss)
    { return this->declare_loss_f(*this->all, predictions_since_last, incr_loss); }

    inline void     snapshot(size_t index, size_t tag, void* data_ptr, size_t sizeof_data, bool used_for_prediction)
    { return this->snapshot_f(*this->all, index, tag, data_ptr, sizeof_data, used_for_prediction); }

    // structure that you must set, and any associated data you want to store
    searn_task* task;
    void* task_data;
    bool auto_history;  // do you want us to automatically add history features?
    bool examples_dont_change;  // set to true if you don't do any internal example munging

    // data that you should not look at.  ever.
    uint32_t (*predict_f)(vw&,learner&,example**,size_t,v_array<uint32_t>*,v_array<uint32_t>*);
    void     (*declare_loss_f)(vw&,size_t,float);   // <0 means it was a test example!
    void     (*snapshot_f)(vw&,size_t,size_t,void*,size_t,bool);
    
    size_t A;             // total number of actions, [1..A]; 0 means ldf
    char state;           // current state of learning
    size_t learn_t;       // when LEARN, this is the t at which we're varying a
    uint32_t learn_a;     //   and this is the a we're varying it to
    size_t snapshot_is_equivalent_to_t;   // if we've finished snapshotting and are equiv up to this time step, then we can fast forward from there
    bool snapshot_could_match;
    size_t snapshot_last_found_pos;
    v_array<snapshot_item> snapshot_data;
    v_array<uint32_t> train_action;  // which actions did we actually take in the train (or test) pass?
    v_array< void* > train_labels;  // which labels are valid at any given time
    v_array<uint32_t> rollout_action; // for auto_history, we need a space other than train_action for rollouts
    history_info hinfo;   // default history info for auto-history
    string *neighbor_features_string;
    v_array<int32_t> neighbor_features; // ugly encoding of neighbor feature requirements
    
    bool should_produce_string;
    stringstream *pred_string;
    stringstream *truth_string;
    bool printed_output_header;

    size_t t;              // the current time step
    size_t T;              // the length of the (training) trajectory
    size_t loss_last_step; // at what time step did they last declare their loss?
    float  test_loss;      // total test loss for this example
    float  train_loss;     // total training loss for this example
    float  learn_loss;     // total loss for this "varied" example

    v_array<float> learn_losses;   // losses for all (valid) actions at learn_t
    example** learn_example_copy; // copy of example(s) at learn_t
    size_t learn_example_len;     // number of example(s) at learn_t

    float  beta;                  // interpolation rate
    bool   allow_current_policy;  // should the current policy be used for training? true for dagger
    bool   rollout_oracle; //if true then rollout are performed using oracle instead (optimal approximation discussed in searn's paper). this should be set to true for dagger
    bool   adaptive_beta; //used to implement dagger through searn. if true, beta = 1-(1-alpha)^n after n updates, and policy is mixed with oracle as \pi' = (1-beta)\pi^* + beta \pi
    bool   rollout_all_actions;   // by default we rollout all actions. This is set to false when searn is used with a contextual bandit base learner, where we rollout only one sampled action
    float  alpha; //parameter used to adapt beta for dagger (see above comment), should be in (0,1)
    uint32_t current_policy;      // what policy are we training right now?
    float gamma;                  // for dagger

    size_t num_features;
    uint32_t total_number_of_policies;
    bool do_snapshot;
    bool do_fastforward;
    float subsample_timesteps;

    size_t read_example_last_id;
    size_t passes_since_new_policy;
    size_t read_example_last_pass;
    size_t total_examples_generated;
    size_t total_predictions_made;

    bool hit_new_pass;
    
    size_t passes_per_policy;

    v_array<example*> ec_seq;

    learner* base_learner;
    vw* all;
    void* valid_labels;
    clock_t start_clock_time;
    
    example*empty_example;
  };

  template<class T> void check_option(T& ret, vw&all, po::variables_map& vm, po::variables_map& vm_file, const char* opt_name, bool default_to_cmdline, bool(*equal)(T,T), const char* mismatch_error_string, const char* required_error_string);
  void check_option(bool& ret, vw&all, po::variables_map& vm, po::variables_map& vm_file, const char* opt_name, bool default_to_cmdline, const char* mismatch_error_string);
  bool string_equal(string a, string b);
  bool float_equal(float a, float b);
  bool uint32_equal(uint32_t a, uint32_t b);
  bool size_equal(size_t a, size_t b);
  void setup_searn_options(po::options_description& desc, vw&vw, std::vector<std::string>&opts, po::variables_map& vm, po::variables_map& vm_file);


  struct searn_task {
    void (*initialize)(searn&,size_t&,std::vector<std::string>&, po::variables_map&, po::variables_map&);
    void (*finish)(searn&);
    void (*structured_predict)(searn&, example**,size_t,stringstream*,stringstream*);
  };

  learner* setup(vw&, std::vector<std::string>&, po::variables_map&, po::variables_map&);
  void searn_finish(void*);
  void searn_drive(void*);
  void searn_learn(void*,example*);
}

#endif