diff options
author | John Langford <jl@hunch.net> | 2014-11-11 18:08:17 +0300 |
---|---|---|
committer | John Langford <jl@hunch.net> | 2014-11-11 18:08:17 +0300 |
commit | d728b84ecbac9fcf9be10088f881fdec30d2a4fe (patch) | |
tree | 2366edf4f3f787eccd831d1700338e16cb8dc9fa /explore | |
parent | 1f3ee451c5ff4f23f527105a3d65d4173ff741f8 (diff) |
documented, removed bug in bagging interface, removed bug in MyInteractor
Diffstat (limited to 'explore')
-rw-r--r-- | explore/explore_sample.cpp | 4 | ||||
-rw-r--r-- | explore/static/MWTExplorer.h | 75 |
2 files changed, 73 insertions, 6 deletions
diff --git a/explore/explore_sample.cpp b/explore/explore_sample.cpp index e6e04be2..57e087f3 100644 --- a/explore/explore_sample.cpp +++ b/explore/explore_sample.cpp @@ -56,7 +56,7 @@ private: template <class Ctx>
struct MyInteraction
{
- Ctx& Context;
+ Ctx Context;
u32 Action;
float Probability;
string Unique_Key;
@@ -133,7 +133,7 @@ int main(int argc, char* argv[]) policy_functions.push_back(unique_ptr<IPolicy<MyContext>>(new MyPolicy()));
}
int num_actions = 10;
- BaggingExplorer<MyContext> explorer(policy_functions, num_bags, num_actions);
+ BaggingExplorer<MyContext> explorer(policy_functions, num_actions);
MyContext ctx;
string unique_key = "eventid";
u32 action = mwt.Choose_Action(explorer, unique_key, ctx);
diff --git a/explore/static/MWTExplorer.h b/explore/static/MWTExplorer.h index 76770492..61b35da3 100644 --- a/explore/static/MWTExplorer.h +++ b/explore/static/MWTExplorer.h @@ -34,7 +34,7 @@ class MwtExplorer; // // Exposes a method to record exploration data based on generic contexts. Exploration data -// is specified as a set of tuples <context, action, probability> as described below. An +// is specified as a set of tuples <context, action, probability, key> as described below. An // application passes an IRecorder object to the @MwtExplorer constructor. See // @StringRecorder for a sample IRecorder object. // @@ -170,10 +170,21 @@ private: vector<Feature>& m_features; }; + +// +// The epsilon greedy exploration class. This is a good choice if you +// have no idea which actions should be preferred. Epsilon greedy is also computationally cheap. +// + template <class Ctx> class EpsilonGreedyExplorer { public: + //The constructor is the only public member, because this should be used with the MwtExplorer. + // + //@param default_policy A default function which outputs an action given a context. + //@param epsilon The probability of a random exploration. + //@param num_actions The number of actions to randomize over. EpsilonGreedyExplorer(IPolicy<Ctx>& default_policy, float epsilon, u32 num_actions) : m_default_policy(default_policy), m_epsilon(epsilon), m_num_actions(num_actions) { @@ -244,10 +255,21 @@ private: friend class MwtExplorer<Ctx>; }; +// +// In some cases, different actions have a different scores, and you +// would prefer to choose actions with large scores. Softmax allows +// you to do that. +// + template <class Ctx> class SoftmaxExplorer { public: + //The constructor is the only public member, because this should be used with the MwtExplorer. + // + //@param default_scorer A function which outputs a score for each action. + //@param lambda lambda = 0 implies uniform distribution. Large lambda is equivalent to a max. + //@param num_actions The number of actions to randomize over. SoftmaxExplorer(IScorer<Ctx>& default_scorer, float lambda, u32 num_actions) : m_default_scorer(default_scorer), m_lambda(lambda), m_num_actions(num_actions) { @@ -323,10 +345,19 @@ private: friend class MwtExplorer<Ctx>; }; +// +// GenericExplorer provides complete flexibility. You can create any +// distribution over actions desired, and it will draw from that. +// + template <class Ctx> class GenericExplorer { public: + //The constructor is the only public member, because this should be used with the MwtExplorer. + // + //@param default_scorer A function which outputs the probability of each action. + //@param num_actions The number of actions to randomize over. GenericExplorer(IScorer<Ctx>& default_scorer, u32 num_actions) : m_default_scorer(default_scorer), m_num_actions(num_actions) { @@ -394,10 +425,20 @@ private: friend class MwtExplorer<Ctx>; }; +// +// The tau-first explorer collects precisely tau uniform random +// exploration events, and then uses the default policy. +// + template <class Ctx> class TauFirstExplorer { public: + //The constructor is the only public member, because this should be used with the MwtExplorer. + // + //@param default_policy A default policy after randomization finishes. + //@param tau The number of events to be uniform over. + //@param num_actions The number of actions to randomize over. TauFirstExplorer(IPolicy<Ctx>& default_policy, u32 tau, u32 num_actions) : m_default_policy(default_policy), m_tau(tau), m_num_actions(num_actions) { @@ -449,21 +490,32 @@ private: friend class MwtExplorer<Ctx>; }; +// +// The Bagging explorer randomizes over the actions chosen by a set of +// default policies. This performs well statistically but can be +// computationally expensive. +// + template <class Ctx> class BaggingExplorer { public: - BaggingExplorer(vector<unique_ptr<IPolicy<Ctx>>>& default_policy_functions, u32 bags, u32 num_actions) : + //The constructor is the only public member, because this should be used with the MwtExplorer. + // + //@param default_policy_functions A set of default policies to be uniform random over. + //@param num_actions The number of actions to randomize over. + + BaggingExplorer(vector<unique_ptr<IPolicy<Ctx>>>& default_policy_functions, u32 num_actions) : m_default_policy_functions(default_policy_functions), - m_bags(bags), m_num_actions(num_actions) { + m_bags = default_policy_functions.size(); if (m_num_actions < 1) { throw std::invalid_argument("Number of actions must be at least 1."); } - if (bags < 1) + if (m_bags < 1) { throw std::invalid_argument("Number of bags must be at least 1."); } @@ -517,15 +569,30 @@ private: friend class MwtExplorer<Ctx>; }; +// +//The top level MwtExplorer class. Using this makes sure that the +//right bits are used recorded and good random actions are chosen. +// + template <class Ctx> class MwtExplorer { public: + // The constructor + // + // @param appid This should be unique to your experiment or your risk nasty correlation bugs. + // @param recorder The recorder us a user-specified class for recording the appropriate bits for use in evaluation and learning. + MwtExplorer(std::string app_id, IRecorder<Ctx>& recorder) : m_recorder(recorder) { m_app_id = HashUtils::Compute_Id_Hash(app_id); } + // Choose_Action should be drop-in replacement for any existing policy function. + // + // @param explorer An existing exploration algorithm (one of the above) which uses the default policy as a callback. + // @param unique_key A unique identifier for the experimental unit. This could be a user id, a session id, etc... + // @param context The context upon which a decision is made. See SimpleContext above for an example. template <class Exp> u32 Choose_Action(Exp& explorer, string unique_key, Ctx& context) { |