diff options
author | Hal Daume III <me@hal3.name> | 2012-06-01 00:44:23 +0400 |
---|---|---|
committer | Hal Daume III <me@hal3.name> | 2012-06-01 00:44:23 +0400 |
commit | a7151261249ec358e574d23fed13064a7281fd99 (patch) | |
tree | 99a85a6d74d162d3c7c0e41bacdaf774d28c4bcf | |
parent | da64b998b59e7e8e45d049d79dc70da4618363fe (diff) |
more tests for searn, more refactoring
35 files changed, 723 insertions, 117 deletions
@@ -30,7 +30,7 @@ FLAGS = $(ARCH) $(WARN_FLAGS) $(OPTIM_FLAGS) -D_FILE_OFFSET_BITS=64 -I $(BOOST_I #FLAGS = -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 -I $(BOOST_INCLUDE) -pg -g # for valgrind -FLAGS = -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 -I $(BOOST_INCLUDE) -g -O0 +#FLAGS = -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 -I $(BOOST_INCLUDE) -g -O0 BINARIES = vw active_interactor MANPAGES = vw.1 diff --git a/test/RunTests b/test/RunTests index 048503df..720d56d5 100755 --- a/test/RunTests +++ b/test/RunTests @@ -459,17 +459,17 @@ __DATA__ train-sets/ref/wsj_small-tm.dat.stderr # Test 14: Run searn on seq_small for 12 passes, 4 passes per policy -{VW} -c -d train-sets/seq_small --passes 12 --searn_passes_per_policy 4 --searn sequence --searn_max_action 4 && rm -f train-sets/seq_small.cache +{VW} -c -d train-sets/seq_small --passes 12 --searn_passes_per_policy 4 --searn 4 --searn_task sequence && rm -f train-sets/seq_small.cache train-sets/ref/searn_small.stdout train-sets/ref/searn_small.stderr # Test 15: Run searn on wsj_small for 12 passes, 4 passes per policy, extra features -{VW} -c -d train-sets/wsj_small.dat.gz --passes 12 --searn_passes_per_policy 4 --searn sequence --searn_max_action 45 --searn_sequencetask_history 2 --searn_sequencetask_bigrams --searn_sequencetask_features 1 && rm -f train-sets/wsj_small.dat.gz.cache +{VW} -c -d train-sets/wsj_small.dat.gz --passes 12 --searn_passes_per_policy 4 --searn_task sequence --searn 45 --searn_sequencetask_history 2 --searn_sequencetask_bigrams --searn_sequencetask_features 1 && rm -f train-sets/wsj_small.dat.gz.cache train-sets/ref/searn_wsj.stdout train-sets/ref/searn_wsj.stderr # Test 16: Run searn (wap) on wsj_small for 2 passes, 1 pass per policy, extra features -{VW} -c -d train-sets/wsj_small.dat.gz --passes 2 --searn_passes_per_policy 1 --searn sequence --searn_max_action 45 --wap 45 --searn_history 2 --searn_bigrams --searn_features 1 && rm -f train-sets/wsj_small.dat.gz.cache +{VW} -c -d train-sets/wsj_small.dat.gz --passes 2 --searn_passes_per_policy 1 --searn_task sequence --searn 45 --wap 45 --searn_sequencetask_history 2 --searn_sequencetask_bigrams --searn_sequencetask_features 1 && rm -f train-sets/wsj_small.dat.gz.cache train-sets/ref/searn_wsj2.dat.stdout train-sets/ref/searn_wsj2.dat.stderr diff --git a/test/test-sets/ref/0001.stderr b/test/test-sets/ref/0001.stderr index 8a62ef04..149aeac8 100644 --- a/test/test-sets/ref/0001.stderr +++ b/test/test-sets/ref/0001.stderr @@ -1,12 +1,13 @@ -using no cache -Reading from train-sets/0001.dat -num sources = 1 Num weight bits = 17 learning rate = 10 initial_t = 1 power_t = 0.5 predictions = 001.predict.tmp only testing +warning: final argument 'train-sets/0001.dat' assumed to be input file; in the future, please use -d +using no cache +Reading from train-sets/0001.dat +num sources = 1 average since example example current current current loss last counter weight label predict features 0.000000 0.000000 3 3.0 0.0000 0.0000 326 diff --git a/test/test-sets/ref/0002b.stderr b/test/test-sets/ref/0002b.stderr index 4765f0a7..3ae7448e 100644 --- a/test/test-sets/ref/0002b.stderr +++ b/test/test-sets/ref/0002b.stderr @@ -1,12 +1,12 @@ -using no cache -Reading from train-sets/0002.dat -num sources = 1 Num weight bits = 18 learning rate = 10 initial_t = 1 power_t = 0.5 predictions = 0002b.predict only testing +using no cache +Reading from train-sets/0002.dat +num sources = 1 average since example example current current current loss last counter weight label predict features 0.005280 0.005280 3 3.0 0.5498 0.4980 15 diff --git a/test/test-sets/ref/0002c.stderr b/test/test-sets/ref/0002c.stderr index ad7a2197..92233c36 100644 --- a/test/test-sets/ref/0002c.stderr +++ b/test/test-sets/ref/0002c.stderr @@ -1,12 +1,12 @@ -using no cache -Reading from train-sets/0002.dat -num sources = 1 Num weight bits = 18 learning rate = 10 initial_t = 1 power_t = 0.5 predictions = 0002c.predict only testing +using no cache +Reading from train-sets/0002.dat +num sources = 1 average since example example current current current loss last counter weight label predict features 0.002276 0.002276 3 3.0 0.5498 0.5361 184 diff --git a/test/train-sets/chunk_to_features.pl b/test/train-sets/chunk_to_features.pl new file mode 100755 index 00000000..24ba8039 --- /dev/null +++ b/test/train-sets/chunk_to_features.pl @@ -0,0 +1,78 @@ +#!/usr/bin/perl -w +use strict; + +my %cdict = (); my $cdictNum = 1; +while (1) { + my $cdictFile = shift or last; + open F, $cdictFile or die; + while (<F>) { + chomp; + my ($c, $num) = split; + $cdict{$c} = $num; + if ($num+1 > $cdictNum) { $cdictNum = $num + 1; } + } + close F or die; +} + +my @w = (); my @t = (); my @c = (); +while (<>) { + chomp; + if (/^[\s]*$/) { dumpit(); print "\n"; @w = (); @t = (); @c = (); next; } + + my ($w,$t,$c) = split; + #if ($c =~ /-NP/) { push @c, "1"; } else { push @c, "-1"; } + if (not exists $cdict{$c}) { + $cdict{$c} = $cdictNum; + $cdictNum++; + print STDERR "$c\t$cdict{$c}\n"; + } + + push @c, $cdict{$c}; + push @t, $t; + push @w, $w; +} + +sub dumpit { + for (my $n=0; $n<@c; $n++) { + my %f = (); + for (my $m=-2; $m<=+2; $m++) { + computef(\%f, '_'.$m, $n+$m); + } + print $c[$n] . ' |'; + foreach my $f (keys %f) { + $f =~ s/:/-COL-/g; + $f =~ s/\|/-PIP-/g; + print ' ' . $f; + } + print "\n"; + } +} + +sub computef { + my ($f, $s0, $i) = @_; + + if ($i < 0) { $f->{"w".$s0."=<s>" } = 1; return; } + if ($i >= @c) { $f->{"w".$s0."=</s>"} = 1; return; } + + my $w = $w[$i]; my $p = $t[$i]; my $l = lc($w[$i]); + + $f->{"w".$s0."=".$w} = 1; +# $f->"p:=".$p} = 1; + $f->{"l".$s0."=".$l} = 1; + + my $c = $w; + $c =~ s/[A-Z]+/A/g; + $c =~ s/[a-z]+/a/g; + $c =~ s/[0-9]+/0/g; + $c =~ s/[^\.Aa0]+/\#/g; + $f->{"c".$s0."=".$c} = 1; + $f->{"c".$s0."=".$c."_fw=".(($i==0) ? "y" : "n")} = 1; + + my $N = length($l); + $f->{"pre1".$s0."=".substr($l,0,1)} = 1; + $f->{"pre2".$s0."=".substr($l,0,2)} = 1; + $f->{"pre3".$s0."=".substr($l,0,3)} = 1; + $f->{"suf1".$s0."=".substr($l,$N-1,1)} = 1; + $f->{"suf2".$s0."=".substr($l,$N-2,2)} = 1; + $f->{"suf3".$s0."=".substr($l,$N-3,3)} = 1; +} diff --git a/test/train-sets/cs_test.pred b/test/train-sets/cs_test.pred new file mode 100644 index 00000000..df8df931 --- /dev/null +++ b/test/train-sets/cs_test.pred @@ -0,0 +1,300 @@ +1.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +1.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 +2.000000 diff --git a/test/train-sets/eval_chunking.pl b/test/train-sets/eval_chunking.pl new file mode 100755 index 00000000..78f7b92c --- /dev/null +++ b/test/train-sets/eval_chunking.pl @@ -0,0 +1,110 @@ +#!/usr/bin/perl -w +use strict; + +my $rdictFile = shift or die; +my $truthFile = shift or die; + +my %rdict = (); my $rdictNum = 1; +open F, $rdictFile or die; +while (<F>) { + chomp; + my ($c, $num) = split; + if ($c =~ /^[BI]-O$/) { $c = 'O'; } + $rdict{$num} = $c; + if ($num+1 > $rdictNum) { $rdictNum = $num + 1; } +} +close F or die; + +my $np = 0; +my $nt = 0; +my $ni = 0; +my $nil = 0; + +my $nc = 0; +my $ncl = 0; +my $na = 0; + +my @truth = (); +if ($truthFile =~ /.gz$/ ) { open T, "zcat $truthFile |" or die; } +elsif ($truthFile =~ /.bz2$/) { open T, "bzcat $truthFile |" or die; } +else { open T, $truthFile or die; } +while (<T>) { + chomp; + if (/^[\s]*$/) { runit(); @truth = (); next; } + my ($c) = split; + if (not defined $rdict{$c}) { die $c; } + push @truth, $rdict{$c}; +} +close T; + +my $p = $ni / (($np > 0) ? $np : 1); +my $r = $ni / (($nt > 0) ? $nt : 1); +my $f = 2 * $p * $r / ($p + $r); +my $a = $nc / (($na > 0) ? $na : 1); +my $pl = $nil / (($np > 0) ? $np : 1); +my $rl = $nil / (($nt > 0) ? $nt : 1); +my $fl = 2 * $pl * $rl / ($pl + $rl); +my $al = $ncl / (($na > 0) ? $na : 1); + +$p = int($p * 1000)/10; $r = int($r * 1000)/10; $f = int($f * 1000)/10; $a = int ($a * 1000)/10; +$pl = int($pl * 1000)/10; $rl = int($rl * 1000)/10; $fl = int($fl * 1000)/10; $al = int ($al * 1000)/10; + +print "unlabeled: p=$p\tr=$r\tf=$f\tacc=$a\n"; +print " labeled: p=$pl\tr=$rl\tf=$fl\tacc=$al\n"; + + +sub runit { + my $N = scalar @truth; + my @pred = (); + for (my $n=0; $n<$N; $n++) { + $_ = <>; + chomp; + $_ = int($_); + if (not defined $rdict{$_}) { die $_; } + push @pred, $rdict{$_}; + } + $_ = <>; chomp; + if (not /^\s*$/) { die; } + + $na += $N; + for (my $n=0; $n<$N; $n++) { + if ($pred[$n] eq $truth[$n]) { $ncl++; } + if (substr($pred[$n],0,1) eq substr($truth[$n],0,1)) { $nc++; } + } + + my %c1 = chunksof(@truth); + my %c2 = chunksof(@pred); + + $np += scalar keys %c1; + $nt += scalar keys %c2; + foreach my $c (keys %c1) { + if (exists $c2{$c}) { + $ni++; + if ($c2{$c} eq $c1{$c}) { + $nil++; + } + } + } +} + +sub chunksof { + my @l = @_; + my $i = 0; + my %c = (); + while ($i < @l) { + if ($l[$i] =~ /^B-(.+)$/) { + my $lab = $1; + if ($lab eq 'O') { $i++; next; } + my $j = $i+1; + while ($j < @l) { + if ($l[$j] eq "I-$lab") { $j++; } + else { last; } + } + $c{"$i $j"} = $lab; + $i = $j; + } else { + $i++; + } + } + return (%c); +} diff --git a/test/train-sets/ner.cdict b/test/train-sets/ner.cdict new file mode 100644 index 00000000..669cc7bc --- /dev/null +++ b/test/train-sets/ner.cdict @@ -0,0 +1,9 @@ +B-ORG 1 +B-O 2 +B-MISC 3 +B-PER 4 +I-PER 5 +B-LOC 6 +I-ORG 7 +I-MISC 8 +I-LOC 9 diff --git a/test/train-sets/ner.pred b/test/train-sets/ner.pred new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/test/train-sets/ner.pred diff --git a/test/train-sets/ner.test.gz b/test/train-sets/ner.test.gz Binary files differnew file mode 100644 index 00000000..499b74e0 --- /dev/null +++ b/test/train-sets/ner.test.gz diff --git a/test/train-sets/ner.tm b/test/train-sets/ner.tm new file mode 100644 index 00000000..43a9fe14 --- /dev/null +++ b/test/train-sets/ner.tm @@ -0,0 +1,11 @@ +9 +1 1 1 1 0 1 0 0 0 +1 1 1 1 0 1 1 0 0 +1 1 1 1 0 1 0 0 0 +1 1 1 1 0 1 0 1 0 +0 1 1 0 1 1 0 0 0 +0 1 0 0 1 0 0 0 0 +1 1 1 1 0 1 0 0 1 +1 1 1 1 0 1 1 0 0 +1 1 1 1 0 1 0 1 0 +1 1 1 0 0 1 0 0 1 diff --git a/test/train-sets/ner.train.gz b/test/train-sets/ner.train.gz Binary files differnew file mode 100644 index 00000000..ba01acd5 --- /dev/null +++ b/test/train-sets/ner.train.gz diff --git a/test/train-sets/ref/0001.stderr b/test/train-sets/ref/0001.stderr index edef747e..5e1d3426 100644 --- a/test/train-sets/ref/0001.stderr +++ b/test/train-sets/ref/0001.stderr @@ -1,14 +1,14 @@ You have chosen to generate 3-grams You have chosen to generate 1-skip-3-grams final_regressor = models/0001.model -creating cache_file = train-sets/0001.dat.cache -Reading from train-sets/0001.dat -num sources = 1 Num weight bits = 17 learning rate = 2.56e+06 initial_t = 128000 power_t = 1 decay_learning_rate = 1 +creating cache_file = train-sets/0001.dat.cache +Reading from train-sets/0001.dat +num sources = 1 average since example example current current current loss last counter weight label predict features 1.000000 1.000000 3 3.0 0.0000 1.0000 326 diff --git a/test/train-sets/ref/0002.stderr b/test/train-sets/ref/0002.stderr index 7d14e85c..a3a5c9a9 100644 --- a/test/train-sets/ref/0002.stderr +++ b/test/train-sets/ref/0002.stderr @@ -1,11 +1,11 @@ final_regressor = models/0002.model -using no cache -Reading from train-sets/0002.dat -num sources = 1 Num weight bits = 18 learning rate = 10 initial_t = 1 power_t = 0.5 +using no cache +Reading from train-sets/0002.dat +num sources = 1 average since example example current current current loss last counter weight label predict features 0.110447 0.110447 3 3.0 0.5498 0.3591 15 diff --git a/test/train-sets/ref/0002a.stderr b/test/train-sets/ref/0002a.stderr index 6cc621c3..55e54a58 100644 --- a/test/train-sets/ref/0002a.stderr +++ b/test/train-sets/ref/0002a.stderr @@ -1,12 +1,13 @@ creating quadratic features for pairs: Tf ff final_regressor = models/0002a.model -using no cache -Reading from train-sets/0002.dat -num sources = 1 Num weight bits = 18 learning rate = 10 initial_t = 1 power_t = 0 +warning: final argument 'train-sets/0002.dat' assumed to be input file; in the future, please use -d +using no cache +Reading from train-sets/0002.dat +num sources = 1 average since example example current current current loss last counter weight label predict features 0.146961 0.146961 3 3.0 0.5498 0.2139 197 diff --git a/test/train-sets/ref/0002c.stderr b/test/train-sets/ref/0002c.stderr index 2d9f312b..b7fe4901 100644 --- a/test/train-sets/ref/0002c.stderr +++ b/test/train-sets/ref/0002c.stderr @@ -1,12 +1,13 @@ creating quadratic features for pairs: ff final_regressor = models/0002c.model -using no cache -Reading from train-sets/0002.dat -num sources = 1 Num weight bits = 18 learning rate = 10 initial_t = 1 power_t = 0 +warning: final argument 'train-sets/0002.dat' assumed to be input file; in the future, please use -d +using no cache +Reading from train-sets/0002.dat +num sources = 1 average since example example current current current loss last counter weight label predict features 0.134680 0.134680 3 3.0 0.5498 0.2361 184 diff --git a/test/train-sets/ref/cs_test.ldf.csoaa.stderr b/test/train-sets/ref/cs_test.ldf.csoaa.stderr index f3c6d06a..d6a9c73b 100644 --- a/test/train-sets/ref/cs_test.ldf.csoaa.stderr +++ b/test/train-sets/ref/cs_test.ldf.csoaa.stderr @@ -1,6 +1,3 @@ -creating cache_file = train-sets/cs_test.ldf.cache -Reading from train-sets/cs_test.ldf -num sources = 1 Num weight bits = 18 learning rate = 10 initial_t = 1 @@ -8,6 +5,9 @@ power_t = 0.5 decay_learning_rate = 1 predictions = cs_test.ldf.csoaa.predict warning: turning off constant for label dependent features; use --noconstant +creating cache_file = train-sets/cs_test.ldf.cache +Reading from train-sets/cs_test.ldf +num sources = 1 average since example example current current current loss last counter weight label predict features 0.000000 0.000000 3 3.0 known 0 3 diff --git a/test/train-sets/ref/cs_test.ldf.wap.stderr b/test/train-sets/ref/cs_test.ldf.wap.stderr index 1a06a442..5405ca82 100644 --- a/test/train-sets/ref/cs_test.ldf.wap.stderr +++ b/test/train-sets/ref/cs_test.ldf.wap.stderr @@ -1,20 +1,19 @@ -creating cache_file = train-sets/cs_test.ldf.cache -Reading from train-sets/cs_test.ldf -num sources = 1 Num weight bits = 18 learning rate = 10 initial_t = 1 power_t = 0.5 decay_learning_rate = 1 predictions = cs_test.ldf.wap.predict -warning: turning off constant for label dependent features; use --noconstant +creating cache_file = train-sets/cs_test.ldf.cache +Reading from train-sets/cs_test.ldf +num sources = 1 average since example example current current current loss last counter weight label predict features -0.000000 0.000000 3 3.0 known 0 3 -0.000000 0.000000 6 6.0 known 0 3 -0.000000 0.000000 11 11.0 known 0 3 -0.000000 0.000000 22 22.0 known 0 3 -0.000000 0.000000 44 44.0 known 1 3 +0.000000 0.000000 3 3.0 known 0 4 +0.000000 0.000000 6 6.0 known 0 4 +0.000000 0.000000 11 11.0 known 0 4 +0.000000 0.000000 22 22.0 known 0 4 +0.000000 0.000000 44 44.0 known 1 4 finished run number of examples = 70 @@ -22,4 +21,4 @@ weighted example sum = 70 weighted label sum = 0 average loss = 0 best constant = -0.01449 -total feature number = 210 +total feature number = 280 diff --git a/test/train-sets/ref/rcv1_small.stderr b/test/train-sets/ref/rcv1_small.stderr index fe58cb4a..8f5e861c 100644 --- a/test/train-sets/ref/rcv1_small.stderr +++ b/test/train-sets/ref/rcv1_small.stderr @@ -1,7 +1,4 @@ enabling BFGS based optimization **without** curvature calculation -creating cache_file = train-sets/rcv1_small.dat.cache -Reading from train-sets/rcv1_small.dat -num sources = 1 Num weight bits = 20 learning rate = 10 initial_t = 1 @@ -11,15 +8,18 @@ using l2 regularization m = 7 Allocated 72M for weights and mem ## avg. loss der. mag. d. m. cond. wolfe1 wolfe2 mix fraction curvature dir. magnitude step size time - 1 6.931472e-01 1.859805e-03 3.768599e+00 3.414409e+01 1.977478e+04 1.103734e-01 2.119 - 3 4.624629e-01 1.085868e-02 1.936935e+00 0.554592 0.194583 2.208660e+02 1.000000e+00 2.656 - 4 3.384483e-01 4.246157e-04 1.726574e-01 0.520733 0.133178 3.415247e+01 1.000000e+00 3.323 - 5 3.184994e-01 7.930477e-05 6.343691e-02 0.751656 0.517330 6.775453e+01 1.000000e+00 4.025 - 6 3.024582e-01 2.887344e-06 1.460970e-02 0.657158 0.328843 3.618752e+01 1.000000e+00 5.114 - 7 2.967627e-01 4.279802e-06 3.517537e-03 0.645592 0.293725 1.585137e+01 1.000000e+00 6.356 - 8 2.952703e-01 2.241755e-06 1.631614e-03 0.527831 0.052654 5.802390e+00 1.000000e+00 7.734 - 9 2.950591e-01 7.095166e-07 1.546886e-03 0.199872 -0.608863 7.389087e-01 1.000000e+00 9.263 -10 2.948347e-01 2.803435e-07 1.223611e-04 0.585419 0.171988 1.453111e-01 1.000000e+00 12.281 +creating cache_file = train-sets/rcv1_small.dat.cache +Reading from train-sets/rcv1_small.dat +num sources = 1 + 1 6.931472e-01 1.859805e-03 3.768599e+00 3.414409e+01 1.977478e+04 1.103734e-01 0.769 + 3 4.624629e-01 1.085868e-02 1.936935e+00 0.554592 0.194583 2.208660e+02 1.000000e+00 0.934 + 4 3.384483e-01 4.246157e-04 1.726574e-01 0.520733 0.133178 3.415247e+01 1.000000e+00 1.142 + 5 3.184994e-01 7.930477e-05 6.343691e-02 0.751656 0.517330 6.775453e+01 1.000000e+00 1.394 + 6 3.024582e-01 2.887344e-06 1.460970e-02 0.657158 0.328843 3.618752e+01 1.000000e+00 1.673 + 7 2.967627e-01 4.279802e-06 3.517537e-03 0.645592 0.293725 1.585137e+01 1.000000e+00 1.984 + 8 2.952703e-01 2.241755e-06 1.631614e-03 0.527831 0.052654 5.802390e+00 1.000000e+00 2.332 + 9 2.950591e-01 7.095166e-07 1.546886e-03 0.199872 -0.608863 7.389087e-01 1.000000e+00 2.712 +10 2.948347e-01 2.803435e-07 1.223611e-04 0.585419 0.171988 1.453111e-01 1.000000e+00 3.432 finished run diff --git a/test/train-sets/ref/searn_small.stderr b/test/train-sets/ref/searn_small.stderr new file mode 100644 index 00000000..0d2a3a1e --- /dev/null +++ b/test/train-sets/ref/searn_small.stderr @@ -0,0 +1,23 @@ +Num weight bits = 18 +learning rate = 10 +initial_t = 1 +power_t = 0.5 +decay_learning_rate = 1 +creating cache_file = train-sets/seq_small.cache +Reading from train-sets/seq_small +num sources = 1 +average since example example current current current +loss last counter weight label predict features +#pol average since sequence example current label current predicted current cur cur predic. examples +chng loss last counter weight sequence prefix sequence prefix features pass pol made gener. + 0 1.333333 1.333333 3 3.000000 [1 3 2 1 4 3 ] [1 3 2 1 4 3 ] 18 2 0 18 12 + 1 1.000000 0.666667 6 6.000000 [1 3 2 1 4 3 ] [1 3 2 1 4 3 ] 18 5 1 49 30 + 1 0.727273 0.400000 11 11.000000 [1 3 2 1 4 3 ] [1 3 2 1 4 3 ] 18 10 2 162 60 + +finished run +number of examples = 12 +weighted example sum = 12 +weighted label sum = 0 +average loss = 0.6667 +best constant = -0.09091 +total feature number = 552 diff --git a/test/train-sets/ref/searn_small.stdout b/test/train-sets/ref/searn_small.stdout new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/test/train-sets/ref/searn_small.stdout diff --git a/test/train-sets/ref/searn_wsj.stderr b/test/train-sets/ref/searn_wsj.stderr new file mode 100644 index 00000000..3d156c0e --- /dev/null +++ b/test/train-sets/ref/searn_wsj.stderr @@ -0,0 +1,29 @@ +Num weight bits = 18 +learning rate = 10 +initial_t = 1 +power_t = 0.5 +decay_learning_rate = 1 +creating cache_file = train-sets/wsj_small.dat.gz.cache +Reading from train-sets/wsj_small.dat.gz +num sources = 1 +average since example example current current current +loss last counter weight label predict features +#pol average since sequence example current label current predicted current cur cur predic. examples +chng loss last counter weight sequence prefix sequence prefix features pass pol made gener. + 0 22.000000 22.000000 3 3.000000 [14 10 13 9 1 2 1 4..] [11 11 11 1 2 1 2 1..] 2659 0 0 93 64 + 0 21.000000 20.000000 6 6.000000 [19 2 22 4 3 9 1 1 ..] [1 2 3 9 1 2 1 1 12..] 3324 0 0 196 160 + 0 18.363636 15.200000 11 11.000000 [29 4 3 9 1 1 23 8 ..] [1 2 3 9 1 2 1 10 7..] 1424 0 0 328 312 + 0 15.000000 11.636364 22 22.000000 [11 11 21 3 10 13 3..] [11 11 21 3 1 2 3 1..] 3419 0 0 613 576 + 0 12.681818 10.363636 44 44.000000 [3 26 9 1 4 3 1 2 5..] [3 1 1 1 2 3 1 2 11..] 1139 0 0 1120 1107 + 0 9.988506 7.232558 87 87.000000 [11 11 12 9 1 2 11 ..] [11 11 12 9 1 2 11 ..] 2564 1 0 2220 2192 + 0 6.316092 2.643678 174 174.000000 [11 1 10 13 2 17 30..] [11 1 10 13 2 17 30..] 1044 2 0 4370 4358 + 1 4.597701 2.879310 348 348.000000 [2 11 2 11 12 3 11 ..] [2 25 1 1 12 3 11 1..] 2279 4 1 49100 8673 + 1 4.612069 4.626437 696 696.000000 [19 22 4 5 3 1 2 1 ..] [19 22 34 9 12 1 2 ..] 2754 8 2 459337 17212 + +finished run +number of examples = 936 +weighted example sum = 936 +weighted label sum = 0 +average loss = 4.706 +best constant = -0.00107 +total feature number = 78376268 diff --git a/test/train-sets/ref/searn_wsj.stdout b/test/train-sets/ref/searn_wsj.stdout new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/test/train-sets/ref/searn_wsj.stdout diff --git a/test/train-sets/ref/searn_wsj2.dat.stderr b/test/train-sets/ref/searn_wsj2.dat.stderr new file mode 100644 index 00000000..5cf68832 --- /dev/null +++ b/test/train-sets/ref/searn_wsj2.dat.stderr @@ -0,0 +1,26 @@ +Num weight bits = 18 +learning rate = 10 +initial_t = 1 +power_t = 0.5 +decay_learning_rate = 1 +creating cache_file = train-sets/wsj_small.dat.gz.cache +Reading from train-sets/wsj_small.dat.gz +num sources = 1 +average since example example current current current +loss last counter weight label predict features +#pol average since sequence example current label current predicted current cur cur predic. examples +chng loss last counter weight sequence prefix sequence prefix features pass pol made gener. + 0 21.666667 21.666667 3 3.000000 [14 10 13 9 1 2 1 4..] [11 11 11 15 9 9 1 ..] 2659 0 0 93 64 + 0 23.666667 25.666667 6 6.000000 [19 2 22 4 3 9 1 1 ..] [19 2 11 11 11 11 1..] 3324 0 0 196 160 + 0 20.909091 17.600000 11 11.000000 [29 4 3 9 1 1 23 8 ..] [19 2 3 9 1 6 28 29..] 1424 0 0 328 312 + 0 16.318182 11.727273 22 22.000000 [11 11 21 3 10 13 3..] [11 11 21 3 1 2 3 1..] 3419 0 0 613 576 + 0 12.727273 9.136364 44 44.000000 [3 26 9 1 4 3 1 2 5..] [3 11 11 1 2 3 1 2 ..] 1139 0 0 1120 1107 + 1 11.137931 9.511628 87 87.000000 [11 11 12 9 1 2 11 ..] [11 11 12 11 11 11 ..] 2564 1 1 13460 2192 + +finished run +number of examples = 156 +weighted example sum = 156 +weighted label sum = 0 +average loss = 8.532 +best constant = -0.006452 +total feature number = 7819789 diff --git a/test/train-sets/ref/searn_wsj2.dat.stdout b/test/train-sets/ref/searn_wsj2.dat.stdout new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/test/train-sets/ref/searn_wsj2.dat.stdout diff --git a/test/train-sets/ref/seq_small.stderr b/test/train-sets/ref/seq_small.stderr index 67c07e03..6fd97a8b 100644 --- a/test/train-sets/ref/seq_small.stderr +++ b/test/train-sets/ref/seq_small.stderr @@ -1,18 +1,18 @@ -creating cache_file = train-sets/seq_small.cache -Reading from train-sets/seq_small -num sources = 1 Num weight bits = 18 learning rate = 10 initial_t = 1 power_t = 0.5 decay_learning_rate = 1 -average since sequence example current label current predicted current cur cur predic. examples -loss last counter weight sequence prefix sequence prefix features pass pol made gener. -0.666667 0.666667 1 6.0 [ 1 3 2 1 4 3 ] [ 1 1 1 1 1 1 ] 12 0 0 6 0 -0.333333 0.000000 2 12.0 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 1 0 12 6 -0.222222 0.000000 3 18.0 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 2 0 18 12 -0.166667 0.000000 4 24.0 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 3 0 24 18 -0.083333 0.000000 8 48.0 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 7 1 72 42 +creating cache_file = train-sets/seq_small.cache +Reading from train-sets/seq_small +num sources = 1 +average since sequence example current label current predicted current cur cur predic. examples +loss last counter weight sequence prefix sequence prefix features pass pol made gener. +0.666667 0.666667 1 6.000000 [ 1 3 2 1 4 3 ] [ 1 1 1 1 1 1 ] 12 0 0 6 0 +0.333333 0.000000 2 12.000000 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 1 0 12 6 +0.222222 0.000000 3 18.000000 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 2 0 18 12 +0.166667 0.000000 4 24.000000 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 3 0 24 18 +0.083333 0.000000 8 48.000000 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 7 1 51 42 finished run number of examples = 12 diff --git a/test/train-sets/ref/wiki1K.stderr b/test/train-sets/ref/wiki1K.stderr index 9531eb9c..4f28322b 100644 --- a/test/train-sets/ref/wiki1K.stderr +++ b/test/train-sets/ref/wiki1K.stderr @@ -1,22 +1,23 @@ your learning rate is too high, setting it to 1 -using no cache -Reading from train-sets/wiki1K.dat -num sources = 1 Num weight bits = 13 learning rate = 1 initial_t = 1 power_t = 0.5 +warning: final argument 'train-sets/wiki1K.dat' assumed to be input file; in the future, please use -d +using no cache +Reading from train-sets/wiki1K.dat +num sources = 1 average since example example current current current loss last counter weight label predict features -10.276575 10.276575 3 3.0 unknown 0.0000 37 -10.341718 10.406862 6 6.0 unknown 0.0000 13 -10.311285 10.274764 11 11.0 unknown 0.0000 31 -10.452045 10.592805 22 22.0 unknown 0.0000 1 -10.439284 10.426523 44 44.0 unknown 0.0000 165 -10.459843 10.480881 87 87.0 unknown 0.0000 28 -10.093540 9.727236 174 174.0 unknown 0.0000 16 -9.566517 9.039494 348 348.0 unknown 0.0000 1 -9.066497 8.566477 696 696.0 unknown 0.0000 142 +10.276562 10.276562 3 3.0 unknown 0.0000 37 +10.341712 10.406861 6 6.0 unknown 0.0000 13 +10.311279 10.274760 11 11.0 unknown 0.0000 31 +10.452043 10.592806 22 22.0 unknown 0.0000 1 +10.439283 10.426523 44 44.0 unknown 0.0000 165 +10.459842 10.480879 87 87.0 unknown 0.0000 28 +10.093538 9.727235 174 174.0 unknown 0.0000 16 +9.566512 9.039486 348 348.0 unknown 0.0000 1 +9.066552 8.566591 696 696.0 unknown 0.0000 142 finished run number of examples = 1000 diff --git a/test/train-sets/ref/wsj_small-tm.dat.stderr b/test/train-sets/ref/wsj_small-tm.dat.stderr index 9da3f9bc..6693ac96 100644 --- a/test/train-sets/ref/wsj_small-tm.dat.stderr +++ b/test/train-sets/ref/wsj_small-tm.dat.stderr @@ -1,27 +1,28 @@ -using cache_file = train-sets/wsj_small.dat.gz.cache -ignoring text input in favor of cache input -num sources = 1 Num weight bits = 18 learning rate = 10 initial_t = 1 power_t = 0.5 -average since sequence example current label current predicted current cur cur predic. examples -loss last counter weight sequence prefix sequence prefix features pass pol made gener. -0.810811 0.810811 1 37.0 [ 1 2 3 1 4 ] [ 1 1 1 1 1 ] 1654 0 0 37 0 -0.781250 0.740741 2 64.0 [ 11 2 3 11 11 ] [ 11 26 9 11 26 ] 1194 0 0 705 37 -0.731183 0.620690 3 93.0 [ 14 10 13 9 1 ] [ 11 15 16 1 1 ] 1286 0 0 1105 64 -0.720930 0.694444 4 129.0 [ 3 4 6 3 1 ] [ 11 11 2 3 11 ] 1608 0 0 1494 93 -0.706250 0.645161 5 160.0 [ 19 3 10 2 1 ] [ 2 3 1 2 1 ] 1378 0 0 2170 129 -0.678571 0.555556 6 196.0 [ 19 2 22 4 3 ] [ 11 2 11 11 11 ] 1608 0 0 2462 160 -0.676596 0.666667 7 235.0 [ 10 2 3 1 10 ] [ 1 2 11 1 1 ] 1746 0 0 3061 196 -0.614731 0.491525 12 353.0 [ 5 12 11 11 21 ] [ 11 12 9 1 21 ] 1102 0 0 5473 328 -0.482955 0.350427 25 704.0 [ 10 13 22 4 9 ] [ 10 2 1 4 1 ] 1148 0 0 12574 678 -0.398449 0.315126 57 1418.0 [ 19 1 4 6 36 ] [ 19 1 4 6 5 ] 2252 0 0 25497 1368 +cannot have --sequence_transition_file and zero history length, setting history length to 1 +creating cache_file = train-sets/wsj_small.dat.gz.cache +Reading from train-sets/wsj_small.dat.gz +num sources = 1 +average since sequence example current label current predicted current cur cur predic. examples +loss last counter weight sequence prefix sequence prefix features pass pol made gener. +0.810811 0.810811 1 37.000000 [ 1 2 3 1 4 ] [ 1 1 1 1 1 ] 1654 0 0 37 0 +0.750000 0.666667 2 64.000000 [ 11 2 3 11 11 ] [ 9 9 9 11 9 ] 1194 0 0 837 37 +0.698925 0.586207 3 93.000000 [ 14 10 13 9 1 ] [ 11 15 11 1 9 ] 1286 0 0 1457 64 +0.689922 0.666667 4 129.000000 [ 3 4 6 3 1 ] [ 11 11 2 3 11 ] 1608 0 0 2088 93 +0.675000 0.612903 5 160.000000 [ 19 3 10 2 1 ] [ 2 3 1 2 1 ] 1378 0 0 2892 129 +0.642857 0.500000 6 196.000000 [ 19 2 22 4 3 ] [ 19 2 11 11 11 ] 1608 0 0 3611 160 +0.634043 0.589744 7 235.000000 [ 10 2 3 1 10 ] [ 19 2 3 1 11 ] 1746 0 0 4423 196 +0.575071 0.457627 12 353.000000 [ 5 12 11 11 21 ] [ 11 12 9 1 21 ] 1102 0 0 7489 328 +0.451705 0.327635 25 704.000000 [ 10 13 22 4 9 ] [ 10 13 22 4 3 ] 1148 0 0 15598 678 +0.375882 0.301120 57 1418.000000 [ 19 1 4 6 36 ] [ 19 14 4 6 5 ] 2252 0 0 31169 1368 finished run number of examples = 78 weighted example sum = 1932 weighted label sum = 0 -average loss = 0.367 +average loss = 0.3437 best constant = -0.0005179 total feature number = 85128 diff --git a/test/train-sets/ref/wsj_small.dat.stderr b/test/train-sets/ref/wsj_small.dat.stderr index f2c6101f..5d6b2738 100644 --- a/test/train-sets/ref/wsj_small.dat.stderr +++ b/test/train-sets/ref/wsj_small.dat.stderr @@ -1,29 +1,29 @@ -creating cache_file = train-sets/wsj_small.dat.gz.cache -Reading from train-sets/wsj_small.dat.gz -num sources = 1 Num weight bits = 18 learning rate = 10 initial_t = 1 power_t = 0.5 decay_learning_rate = 1 -average since sequence example current label current predicted current cur cur predic. examples -loss last counter weight sequence prefix sequence prefix features pass pol made gener. -0.810811 0.810811 1 37.0 [ 1 2 3 1 4 ] [ 1 1 1 1 1 ] 1654 0 0 37 0 -0.750000 0.666667 2 64.0 [ 11 2 3 11 11 ] [ 1 2 11 12 9 ] 1194 0 0 64 37 -0.698925 0.586207 3 93.0 [ 14 10 13 9 1 ] [ 11 11 11 15 9 ] 1286 0 0 93 64 -0.775194 0.972222 4 129.0 [ 3 4 6 3 1 ] [ 11 11 11 11 11 ] 1608 0 0 129 93 -0.756250 0.677419 5 160.0 [ 19 3 10 2 1 ] [ 14 10 1 2 1 ] 1378 0 0 160 129 -0.724490 0.583333 6 196.0 [ 19 2 22 4 3 ] [ 19 2 11 11 11 ] 1608 0 0 196 160 -0.744681 0.846154 7 235.0 [ 10 2 3 1 10 ] [ 19 2 11 11 11 ] 1746 0 0 235 196 -0.705382 0.627119 12 353.0 [ 5 12 11 11 21 ] [ 11 12 9 1 2 ] 1102 0 0 353 328 -0.575284 0.444444 25 704.0 [ 10 13 22 4 9 ] [ 10 13 3 9 1 ] 1148 0 0 704 678 -0.482370 0.390756 57 1418.0 [ 19 1 4 6 36 ] [ 19 3 4 6 5 ] 2252 0 0 1418 1368 -0.309345 0.130909 110 2793.0 [ 9 1 10 21 2 ] [ 9 1 10 21 2 ] 1792 1 1 21055 2753 +creating cache_file = train-sets/wsj_small.dat.gz.cache +Reading from train-sets/wsj_small.dat.gz +num sources = 1 +average since sequence example current label current predicted current cur cur predic. examples +loss last counter weight sequence prefix sequence prefix features pass pol made gener. +0.810811 0.810811 1 37.000000 [ 1 2 3 1 4 ] [ 1 1 1 1 1 ] 1654 0 0 37 0 +0.750000 0.666667 2 64.000000 [ 11 2 3 11 11 ] [ 1 2 11 12 9 ] 1194 0 0 64 37 +0.698925 0.586207 3 93.000000 [ 14 10 13 9 1 ] [ 11 11 11 15 9 ] 1286 0 0 93 64 +0.775194 0.972222 4 129.000000 [ 3 4 6 3 1 ] [ 11 11 11 11 11 ] 1608 0 0 129 93 +0.756250 0.677419 5 160.000000 [ 19 3 10 2 1 ] [ 14 10 1 2 1 ] 1378 0 0 160 129 +0.724490 0.583333 6 196.000000 [ 19 2 22 4 3 ] [ 19 2 11 11 11 ] 1608 0 0 196 160 +0.744681 0.846154 7 235.000000 [ 10 2 3 1 10 ] [ 19 2 11 11 11 ] 1746 0 0 235 196 +0.705382 0.627119 12 353.000000 [ 5 12 11 11 21 ] [ 11 12 9 1 2 ] 1102 0 0 353 328 +0.575284 0.444444 25 704.000000 [ 10 13 22 4 9 ] [ 10 13 3 9 1 ] 1148 0 0 704 678 +0.482370 0.390756 57 1418.000000 [ 19 1 4 6 36 ] [ 19 3 4 6 5 ] 2252 0 0 1418 1368 +0.309345 0.130909 110 2793.000000 [ 9 1 10 21 2 ] [ 9 1 10 21 2 ] 1792 1 1 37389 2753 finished run number of examples = 156 weighted example sum = 3864 weighted label sum = 0 -average loss = 0.2345 +average loss = 0.2347 best constant = -0.0002589 total feature number = 170256 diff --git a/test/train-sets/ref/zero.stderr b/test/train-sets/ref/zero.stderr index 44922058..1db31510 100644 --- a/test/train-sets/ref/zero.stderr +++ b/test/train-sets/ref/zero.stderr @@ -1,7 +1,4 @@ enabling BFGS based optimization **without** curvature calculation -creating cache_file = train-sets/zero.dat.cache -Reading from train-sets/zero.dat -num sources = 1 Num weight bits = 20 learning rate = 10 initial_t = 1 @@ -11,8 +8,11 @@ using l2 regularization m = 7 Allocated 72M for weights and mem ## avg. loss der. mag. d. m. cond. wolfe1 wolfe2 mix fraction curvature dir. magnitude step size time - 1 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.258 - 3 0.000000e+00 0.000000e+00 0.000000e+00 nan nan +creating cache_file = train-sets/zero.dat.cache +Reading from train-sets/zero.dat +num sources = 1 + 1 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.130 + 3 0.000000e+00 0.000000e+00 0.000000e+00 -nan -nan finished run diff --git a/vowpalwabbit/searn.cc b/vowpalwabbit/searn.cc index 06984d2a..e1012bd4 100644 --- a/vowpalwabbit/searn.cc +++ b/vowpalwabbit/searn.cc @@ -426,7 +426,7 @@ namespace Searn void parse_flags(vw&all, std::vector<std::string>&opts, po::variables_map& vm, void (*base_l)(vw&,example*), void (*base_f)(vw&)) { - po::options_description desc("Sequence options"); + po::options_description desc("Searn options"); desc.add_options() ("searn_task", po::value<string>(), "the searn task") ("searn_rollout", po::value<size_t>(), "maximum rollout length") @@ -480,7 +480,7 @@ namespace Searn if (vm.count("searn_passes_per_policy")) passes_per_policy = vm["searn_passes_per_policy"].as<size_t>(); if (vm.count("searn_beta")) beta = vm["searn_beta"].as<float>(); if (vm.count("searn_gamma")) gamma = vm["searn_gamma"].as<float>(); - if (vm.count("searn_recombine")) do_recombination = true; + if (vm.count("searn_norecombine")) do_recombination = false; if (vm.count("searn_allow_current_policy")) allow_current_policy = true; if (beta <= 0 || beta >= 1) { @@ -494,7 +494,7 @@ namespace Searn } if (task.initialize != NULL) - if (!task.initialize(vm)) { + if (!task.initialize(opts, vm)) { std::cerr << "error: task did not initialize properly" << std::endl; exit(-1); } diff --git a/vowpalwabbit/searn.h b/vowpalwabbit/searn.h index a6835e6f..e0eb88e0 100644 --- a/vowpalwabbit/searn.h +++ b/vowpalwabbit/searn.h @@ -126,7 +126,7 @@ namespace Searn // your task might need to initialize some memory at startup or // parse command line arguments: do that in initialize - bool (*initialize)(po::variables_map& vm); + bool (*initialize)(std::vector<std::string>&opts, po::variables_map& vm); // your task might need to free some memory at the end of running: // do that in finalize diff --git a/vowpalwabbit/searn_sequencetask.cc b/vowpalwabbit/searn_sequencetask.cc index 9dcac60e..87e2c6ee 100644 --- a/vowpalwabbit/searn_sequencetask.cc +++ b/vowpalwabbit/searn_sequencetask.cc @@ -34,17 +34,33 @@ namespace SequenceTask { // done. }; - bool initialize(po::variables_map& vm) + bool initialize(std::vector<std::string>&opts, po::variables_map& vm) { SearnUtil::default_info(&hinfo); + po::options_description desc("Searn[sequence] options"); + desc.add_options() + ("searn_sequencetask_history", po::value<size_t>(), "length of history to use") + ("searn_sequencetask_features", po::value<size_t>(), "length of history to pair with observed features") + ("searn_sequencetask_bigrams", "use bigrams from history") + ("searn_sequencetask_bigram_features", "use bigrams from history paired with observed features") + ("searn_sequencetask_fake_ldf", "pretend like we're an LDF model even though we need not be"); + + po::parsed_options parsed = po::command_line_parser(opts). + style(po::command_line_style::default_style ^ po::command_line_style::allow_guessing). + options(desc).allow_unregistered().run(); + opts = po::collect_unrecognized(parsed.options, po::include_positional); + po::store(parsed, vm); + po::notify(vm); + + if (vm.count("searn_sequencetask_bigrams")) hinfo.bigrams = true; if (vm.count("searn_sequencetask_history")) hinfo.length = vm["searn_sequencetask_history"].as<size_t>(); if (vm.count("searn_sequencetask_bigram_features")) hinfo.bigram_features = true; if (vm.count("searn_sequencetask_features")) hinfo.features = vm["searn_sequencetask_features"].as<size_t>(); if (vm.count("searn_sequencetask_fake_ldf")) fake_as_ldf = true; - seq_max_action = vm["searn_max_action"].as<size_t>(); + seq_max_action = vm["searn"].as<size_t>(); constant_pow_length = 1; for (size_t i=0; i < hinfo.length; i++) constant_pow_length *= quadratic_constant; diff --git a/vowpalwabbit/searn_sequencetask.h b/vowpalwabbit/searn_sequencetask.h index a8653aac..d489ff66 100644 --- a/vowpalwabbit/searn_sequencetask.h +++ b/vowpalwabbit/searn_sequencetask.h @@ -14,7 +14,7 @@ namespace SequenceTask { void finish(state); void start_state_multiline(example**, size_t, state*); void cs_example(vw&, state, example*&, bool); - bool initialize(po::variables_map& vm); + bool initialize(std::vector<std::string>&opts, po::variables_map& vm); size_t hash(state); bool equivalent(state, state); std::string to_string(state, bool, std::vector<action>); |