diff options
author | John Langford <jl@hunch.net> | 2012-05-30 02:04:27 +0400 |
---|---|---|
committer | John Langford <jl@hunch.net> | 2012-05-30 02:04:27 +0400 |
commit | 6dc3ddde0555059b334e9688552d2d419b07e330 (patch) | |
tree | 85491cfaddffb68e440044a959f717572f667f41 /vowpalwabbit/parse_primitives.h | |
parent | 463f02e4a6b0a4be4bfa8f8e35facc26e4cbdc62 (diff) |
nearly all state now addressable
Diffstat (limited to 'vowpalwabbit/parse_primitives.h')
-rw-r--r-- | vowpalwabbit/parse_primitives.h | 56 |
1 files changed, 54 insertions, 2 deletions
diff --git a/vowpalwabbit/parse_primitives.h b/vowpalwabbit/parse_primitives.h index 8ce6f335..26d316f0 100644 --- a/vowpalwabbit/parse_primitives.h +++ b/vowpalwabbit/parse_primitives.h @@ -8,6 +8,7 @@ embodied in the content of this file are licensed under the BSD #define PP #include<iostream> +#include <stdint.h> #include "v_array.h" #include "io.h" @@ -16,17 +17,68 @@ struct substring { char *end; }; +struct shared_data { + size_t queries; + + uint64_t example_number; + uint64_t total_features; + + double t; + double weighted_examples; + double weighted_unlabeled_examples; + double old_weighted_examples; + double weighted_labels; + double sum_loss; + double sum_loss_since_last_dump; + float dump_interval;// when should I update for the user. + double gravity; + double contraction; + double min_label;//minimum label encountered + double max_label;//maximum label encountered + + bool binary_label; + uint32_t k; +}; + struct label_parser { void (*default_label)(void*); - void (*parse_label)(void*, v_array<substring>&); + void (*parse_label)(shared_data*, void*, v_array<substring>&); void (*cache_label)(void*, io_buf& cache); - size_t (*read_cached_label)(void*, io_buf& cache); + size_t (*read_cached_label)(shared_data*, void*, io_buf& cache); void (*delete_label)(void*); float (*get_weight)(void*); float (*get_initial)(void*); size_t label_size; }; +typedef size_t (*hash_func_t)(substring, unsigned long); + +struct parser { + v_array<substring> channels;//helper(s) for text parsing + v_array<substring> words; + v_array<substring> name; + + io_buf* input; //Input source(s) + int (*reader)(void*, void* ae); + hash_func_t hasher; + bool resettable; //Whether or not the input can be reset. + io_buf* output; //Where to output the cache. + bool write_cache; + bool sort_features; + bool sorted_cache; + + size_t ring_size; + uint64_t parsed_examples; // The index of the parsed example. + uint64_t local_example_number; + + v_array<size_t> ids; //unique ids for sources + v_array<size_t> counts; //partial examples received from sources + size_t finished_count;//the number of finished examples; + int label_sock; + int bound_sock; + int max_fd; +}; + //chop up the string into a v_array of substring. void tokenize(char delim, substring s, v_array<substring> &ret); |