Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHal Daume III <me@hal3.name>2012-06-01 00:44:23 +0400
committerHal Daume III <me@hal3.name>2012-06-01 00:44:23 +0400
commita7151261249ec358e574d23fed13064a7281fd99 (patch)
tree99a85a6d74d162d3c7c0e41bacdaf774d28c4bcf
parentda64b998b59e7e8e45d049d79dc70da4618363fe (diff)
more tests for searn, more refactoring
-rw-r--r--Makefile2
-rwxr-xr-xtest/RunTests6
-rw-r--r--test/test-sets/ref/0001.stderr7
-rw-r--r--test/test-sets/ref/0002b.stderr6
-rw-r--r--test/test-sets/ref/0002c.stderr6
-rwxr-xr-xtest/train-sets/chunk_to_features.pl78
-rw-r--r--test/train-sets/cs_test.pred300
-rwxr-xr-xtest/train-sets/eval_chunking.pl110
-rw-r--r--test/train-sets/ner.cdict9
-rw-r--r--test/train-sets/ner.pred0
-rw-r--r--test/train-sets/ner.test.gzbin0 -> 4817758 bytes
-rw-r--r--test/train-sets/ner.tm11
-rw-r--r--test/train-sets/ner.train.gzbin0 -> 26912415 bytes
-rw-r--r--test/train-sets/ref/0001.stderr6
-rw-r--r--test/train-sets/ref/0002.stderr6
-rw-r--r--test/train-sets/ref/0002a.stderr7
-rw-r--r--test/train-sets/ref/0002c.stderr7
-rw-r--r--test/train-sets/ref/cs_test.ldf.csoaa.stderr6
-rw-r--r--test/train-sets/ref/cs_test.ldf.wap.stderr19
-rw-r--r--test/train-sets/ref/rcv1_small.stderr24
-rw-r--r--test/train-sets/ref/searn_small.stderr23
-rw-r--r--test/train-sets/ref/searn_small.stdout0
-rw-r--r--test/train-sets/ref/searn_wsj.stderr29
-rw-r--r--test/train-sets/ref/searn_wsj.stdout0
-rw-r--r--test/train-sets/ref/searn_wsj2.dat.stderr26
-rw-r--r--test/train-sets/ref/searn_wsj2.dat.stdout0
-rw-r--r--test/train-sets/ref/seq_small.stderr20
-rw-r--r--test/train-sets/ref/wiki1K.stderr25
-rw-r--r--test/train-sets/ref/wsj_small-tm.dat.stderr33
-rw-r--r--test/train-sets/ref/wsj_small.dat.stderr34
-rw-r--r--test/train-sets/ref/zero.stderr10
-rw-r--r--vowpalwabbit/searn.cc6
-rw-r--r--vowpalwabbit/searn.h2
-rw-r--r--vowpalwabbit/searn_sequencetask.cc20
-rw-r--r--vowpalwabbit/searn_sequencetask.h2
35 files changed, 723 insertions, 117 deletions
diff --git a/Makefile b/Makefile
index 64d4ca07..0a085a4c 100644
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,7 @@ FLAGS = $(ARCH) $(WARN_FLAGS) $(OPTIM_FLAGS) -D_FILE_OFFSET_BITS=64 -I $(BOOST_I
#FLAGS = -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 -I $(BOOST_INCLUDE) -pg -g
# for valgrind
-FLAGS = -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 -I $(BOOST_INCLUDE) -g -O0
+#FLAGS = -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 -I $(BOOST_INCLUDE) -g -O0
BINARIES = vw active_interactor
MANPAGES = vw.1
diff --git a/test/RunTests b/test/RunTests
index 048503df..720d56d5 100755
--- a/test/RunTests
+++ b/test/RunTests
@@ -459,17 +459,17 @@ __DATA__
train-sets/ref/wsj_small-tm.dat.stderr
# Test 14: Run searn on seq_small for 12 passes, 4 passes per policy
-{VW} -c -d train-sets/seq_small --passes 12 --searn_passes_per_policy 4 --searn sequence --searn_max_action 4 && rm -f train-sets/seq_small.cache
+{VW} -c -d train-sets/seq_small --passes 12 --searn_passes_per_policy 4 --searn 4 --searn_task sequence && rm -f train-sets/seq_small.cache
train-sets/ref/searn_small.stdout
train-sets/ref/searn_small.stderr
# Test 15: Run searn on wsj_small for 12 passes, 4 passes per policy, extra features
-{VW} -c -d train-sets/wsj_small.dat.gz --passes 12 --searn_passes_per_policy 4 --searn sequence --searn_max_action 45 --searn_sequencetask_history 2 --searn_sequencetask_bigrams --searn_sequencetask_features 1 && rm -f train-sets/wsj_small.dat.gz.cache
+{VW} -c -d train-sets/wsj_small.dat.gz --passes 12 --searn_passes_per_policy 4 --searn_task sequence --searn 45 --searn_sequencetask_history 2 --searn_sequencetask_bigrams --searn_sequencetask_features 1 && rm -f train-sets/wsj_small.dat.gz.cache
train-sets/ref/searn_wsj.stdout
train-sets/ref/searn_wsj.stderr
# Test 16: Run searn (wap) on wsj_small for 2 passes, 1 pass per policy, extra features
-{VW} -c -d train-sets/wsj_small.dat.gz --passes 2 --searn_passes_per_policy 1 --searn sequence --searn_max_action 45 --wap 45 --searn_history 2 --searn_bigrams --searn_features 1 && rm -f train-sets/wsj_small.dat.gz.cache
+{VW} -c -d train-sets/wsj_small.dat.gz --passes 2 --searn_passes_per_policy 1 --searn_task sequence --searn 45 --wap 45 --searn_sequencetask_history 2 --searn_sequencetask_bigrams --searn_sequencetask_features 1 && rm -f train-sets/wsj_small.dat.gz.cache
train-sets/ref/searn_wsj2.dat.stdout
train-sets/ref/searn_wsj2.dat.stderr
diff --git a/test/test-sets/ref/0001.stderr b/test/test-sets/ref/0001.stderr
index 8a62ef04..149aeac8 100644
--- a/test/test-sets/ref/0001.stderr
+++ b/test/test-sets/ref/0001.stderr
@@ -1,12 +1,13 @@
-using no cache
-Reading from train-sets/0001.dat
-num sources = 1
Num weight bits = 17
learning rate = 10
initial_t = 1
power_t = 0.5
predictions = 001.predict.tmp
only testing
+warning: final argument 'train-sets/0001.dat' assumed to be input file; in the future, please use -d
+using no cache
+Reading from train-sets/0001.dat
+num sources = 1
average since example example current current current
loss last counter weight label predict features
0.000000 0.000000 3 3.0 0.0000 0.0000 326
diff --git a/test/test-sets/ref/0002b.stderr b/test/test-sets/ref/0002b.stderr
index 4765f0a7..3ae7448e 100644
--- a/test/test-sets/ref/0002b.stderr
+++ b/test/test-sets/ref/0002b.stderr
@@ -1,12 +1,12 @@
-using no cache
-Reading from train-sets/0002.dat
-num sources = 1
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0.5
predictions = 0002b.predict
only testing
+using no cache
+Reading from train-sets/0002.dat
+num sources = 1
average since example example current current current
loss last counter weight label predict features
0.005280 0.005280 3 3.0 0.5498 0.4980 15
diff --git a/test/test-sets/ref/0002c.stderr b/test/test-sets/ref/0002c.stderr
index ad7a2197..92233c36 100644
--- a/test/test-sets/ref/0002c.stderr
+++ b/test/test-sets/ref/0002c.stderr
@@ -1,12 +1,12 @@
-using no cache
-Reading from train-sets/0002.dat
-num sources = 1
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0.5
predictions = 0002c.predict
only testing
+using no cache
+Reading from train-sets/0002.dat
+num sources = 1
average since example example current current current
loss last counter weight label predict features
0.002276 0.002276 3 3.0 0.5498 0.5361 184
diff --git a/test/train-sets/chunk_to_features.pl b/test/train-sets/chunk_to_features.pl
new file mode 100755
index 00000000..24ba8039
--- /dev/null
+++ b/test/train-sets/chunk_to_features.pl
@@ -0,0 +1,78 @@
+#!/usr/bin/perl -w
+use strict;
+
+my %cdict = (); my $cdictNum = 1;
+while (1) {
+ my $cdictFile = shift or last;
+ open F, $cdictFile or die;
+ while (<F>) {
+ chomp;
+ my ($c, $num) = split;
+ $cdict{$c} = $num;
+ if ($num+1 > $cdictNum) { $cdictNum = $num + 1; }
+ }
+ close F or die;
+}
+
+my @w = (); my @t = (); my @c = ();
+while (<>) {
+ chomp;
+ if (/^[\s]*$/) { dumpit(); print "\n"; @w = (); @t = (); @c = (); next; }
+
+ my ($w,$t,$c) = split;
+ #if ($c =~ /-NP/) { push @c, "1"; } else { push @c, "-1"; }
+ if (not exists $cdict{$c}) {
+ $cdict{$c} = $cdictNum;
+ $cdictNum++;
+ print STDERR "$c\t$cdict{$c}\n";
+ }
+
+ push @c, $cdict{$c};
+ push @t, $t;
+ push @w, $w;
+}
+
+sub dumpit {
+ for (my $n=0; $n<@c; $n++) {
+ my %f = ();
+ for (my $m=-2; $m<=+2; $m++) {
+ computef(\%f, '_'.$m, $n+$m);
+ }
+ print $c[$n] . ' |';
+ foreach my $f (keys %f) {
+ $f =~ s/:/-COL-/g;
+ $f =~ s/\|/-PIP-/g;
+ print ' ' . $f;
+ }
+ print "\n";
+ }
+}
+
+sub computef {
+ my ($f, $s0, $i) = @_;
+
+ if ($i < 0) { $f->{"w".$s0."=<s>" } = 1; return; }
+ if ($i >= @c) { $f->{"w".$s0."=</s>"} = 1; return; }
+
+ my $w = $w[$i]; my $p = $t[$i]; my $l = lc($w[$i]);
+
+ $f->{"w".$s0."=".$w} = 1;
+# $f->"p:=".$p} = 1;
+ $f->{"l".$s0."=".$l} = 1;
+
+ my $c = $w;
+ $c =~ s/[A-Z]+/A/g;
+ $c =~ s/[a-z]+/a/g;
+ $c =~ s/[0-9]+/0/g;
+ $c =~ s/[^\.Aa0]+/\#/g;
+ $f->{"c".$s0."=".$c} = 1;
+ $f->{"c".$s0."=".$c."_fw=".(($i==0) ? "y" : "n")} = 1;
+
+ my $N = length($l);
+ $f->{"pre1".$s0."=".substr($l,0,1)} = 1;
+ $f->{"pre2".$s0."=".substr($l,0,2)} = 1;
+ $f->{"pre3".$s0."=".substr($l,0,3)} = 1;
+ $f->{"suf1".$s0."=".substr($l,$N-1,1)} = 1;
+ $f->{"suf2".$s0."=".substr($l,$N-2,2)} = 1;
+ $f->{"suf3".$s0."=".substr($l,$N-3,3)} = 1;
+}
diff --git a/test/train-sets/cs_test.pred b/test/train-sets/cs_test.pred
new file mode 100644
index 00000000..df8df931
--- /dev/null
+++ b/test/train-sets/cs_test.pred
@@ -0,0 +1,300 @@
+1.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+1.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
+2.000000
diff --git a/test/train-sets/eval_chunking.pl b/test/train-sets/eval_chunking.pl
new file mode 100755
index 00000000..78f7b92c
--- /dev/null
+++ b/test/train-sets/eval_chunking.pl
@@ -0,0 +1,110 @@
+#!/usr/bin/perl -w
+use strict;
+
+my $rdictFile = shift or die;
+my $truthFile = shift or die;
+
+my %rdict = (); my $rdictNum = 1;
+open F, $rdictFile or die;
+while (<F>) {
+ chomp;
+ my ($c, $num) = split;
+ if ($c =~ /^[BI]-O$/) { $c = 'O'; }
+ $rdict{$num} = $c;
+ if ($num+1 > $rdictNum) { $rdictNum = $num + 1; }
+}
+close F or die;
+
+my $np = 0;
+my $nt = 0;
+my $ni = 0;
+my $nil = 0;
+
+my $nc = 0;
+my $ncl = 0;
+my $na = 0;
+
+my @truth = ();
+if ($truthFile =~ /.gz$/ ) { open T, "zcat $truthFile |" or die; }
+elsif ($truthFile =~ /.bz2$/) { open T, "bzcat $truthFile |" or die; }
+else { open T, $truthFile or die; }
+while (<T>) {
+ chomp;
+ if (/^[\s]*$/) { runit(); @truth = (); next; }
+ my ($c) = split;
+ if (not defined $rdict{$c}) { die $c; }
+ push @truth, $rdict{$c};
+}
+close T;
+
+my $p = $ni / (($np > 0) ? $np : 1);
+my $r = $ni / (($nt > 0) ? $nt : 1);
+my $f = 2 * $p * $r / ($p + $r);
+my $a = $nc / (($na > 0) ? $na : 1);
+my $pl = $nil / (($np > 0) ? $np : 1);
+my $rl = $nil / (($nt > 0) ? $nt : 1);
+my $fl = 2 * $pl * $rl / ($pl + $rl);
+my $al = $ncl / (($na > 0) ? $na : 1);
+
+$p = int($p * 1000)/10; $r = int($r * 1000)/10; $f = int($f * 1000)/10; $a = int ($a * 1000)/10;
+$pl = int($pl * 1000)/10; $rl = int($rl * 1000)/10; $fl = int($fl * 1000)/10; $al = int ($al * 1000)/10;
+
+print "unlabeled: p=$p\tr=$r\tf=$f\tacc=$a\n";
+print " labeled: p=$pl\tr=$rl\tf=$fl\tacc=$al\n";
+
+
+sub runit {
+ my $N = scalar @truth;
+ my @pred = ();
+ for (my $n=0; $n<$N; $n++) {
+ $_ = <>;
+ chomp;
+ $_ = int($_);
+ if (not defined $rdict{$_}) { die $_; }
+ push @pred, $rdict{$_};
+ }
+ $_ = <>; chomp;
+ if (not /^\s*$/) { die; }
+
+ $na += $N;
+ for (my $n=0; $n<$N; $n++) {
+ if ($pred[$n] eq $truth[$n]) { $ncl++; }
+ if (substr($pred[$n],0,1) eq substr($truth[$n],0,1)) { $nc++; }
+ }
+
+ my %c1 = chunksof(@truth);
+ my %c2 = chunksof(@pred);
+
+ $np += scalar keys %c1;
+ $nt += scalar keys %c2;
+ foreach my $c (keys %c1) {
+ if (exists $c2{$c}) {
+ $ni++;
+ if ($c2{$c} eq $c1{$c}) {
+ $nil++;
+ }
+ }
+ }
+}
+
+sub chunksof {
+ my @l = @_;
+ my $i = 0;
+ my %c = ();
+ while ($i < @l) {
+ if ($l[$i] =~ /^B-(.+)$/) {
+ my $lab = $1;
+ if ($lab eq 'O') { $i++; next; }
+ my $j = $i+1;
+ while ($j < @l) {
+ if ($l[$j] eq "I-$lab") { $j++; }
+ else { last; }
+ }
+ $c{"$i $j"} = $lab;
+ $i = $j;
+ } else {
+ $i++;
+ }
+ }
+ return (%c);
+}
diff --git a/test/train-sets/ner.cdict b/test/train-sets/ner.cdict
new file mode 100644
index 00000000..669cc7bc
--- /dev/null
+++ b/test/train-sets/ner.cdict
@@ -0,0 +1,9 @@
+B-ORG 1
+B-O 2
+B-MISC 3
+B-PER 4
+I-PER 5
+B-LOC 6
+I-ORG 7
+I-MISC 8
+I-LOC 9
diff --git a/test/train-sets/ner.pred b/test/train-sets/ner.pred
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/test/train-sets/ner.pred
diff --git a/test/train-sets/ner.test.gz b/test/train-sets/ner.test.gz
new file mode 100644
index 00000000..499b74e0
--- /dev/null
+++ b/test/train-sets/ner.test.gz
Binary files differ
diff --git a/test/train-sets/ner.tm b/test/train-sets/ner.tm
new file mode 100644
index 00000000..43a9fe14
--- /dev/null
+++ b/test/train-sets/ner.tm
@@ -0,0 +1,11 @@
+9
+1 1 1 1 0 1 0 0 0
+1 1 1 1 0 1 1 0 0
+1 1 1 1 0 1 0 0 0
+1 1 1 1 0 1 0 1 0
+0 1 1 0 1 1 0 0 0
+0 1 0 0 1 0 0 0 0
+1 1 1 1 0 1 0 0 1
+1 1 1 1 0 1 1 0 0
+1 1 1 1 0 1 0 1 0
+1 1 1 0 0 1 0 0 1
diff --git a/test/train-sets/ner.train.gz b/test/train-sets/ner.train.gz
new file mode 100644
index 00000000..ba01acd5
--- /dev/null
+++ b/test/train-sets/ner.train.gz
Binary files differ
diff --git a/test/train-sets/ref/0001.stderr b/test/train-sets/ref/0001.stderr
index edef747e..5e1d3426 100644
--- a/test/train-sets/ref/0001.stderr
+++ b/test/train-sets/ref/0001.stderr
@@ -1,14 +1,14 @@
You have chosen to generate 3-grams
You have chosen to generate 1-skip-3-grams
final_regressor = models/0001.model
-creating cache_file = train-sets/0001.dat.cache
-Reading from train-sets/0001.dat
-num sources = 1
Num weight bits = 17
learning rate = 2.56e+06
initial_t = 128000
power_t = 1
decay_learning_rate = 1
+creating cache_file = train-sets/0001.dat.cache
+Reading from train-sets/0001.dat
+num sources = 1
average since example example current current current
loss last counter weight label predict features
1.000000 1.000000 3 3.0 0.0000 1.0000 326
diff --git a/test/train-sets/ref/0002.stderr b/test/train-sets/ref/0002.stderr
index 7d14e85c..a3a5c9a9 100644
--- a/test/train-sets/ref/0002.stderr
+++ b/test/train-sets/ref/0002.stderr
@@ -1,11 +1,11 @@
final_regressor = models/0002.model
-using no cache
-Reading from train-sets/0002.dat
-num sources = 1
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0.5
+using no cache
+Reading from train-sets/0002.dat
+num sources = 1
average since example example current current current
loss last counter weight label predict features
0.110447 0.110447 3 3.0 0.5498 0.3591 15
diff --git a/test/train-sets/ref/0002a.stderr b/test/train-sets/ref/0002a.stderr
index 6cc621c3..55e54a58 100644
--- a/test/train-sets/ref/0002a.stderr
+++ b/test/train-sets/ref/0002a.stderr
@@ -1,12 +1,13 @@
creating quadratic features for pairs: Tf ff
final_regressor = models/0002a.model
-using no cache
-Reading from train-sets/0002.dat
-num sources = 1
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0
+warning: final argument 'train-sets/0002.dat' assumed to be input file; in the future, please use -d
+using no cache
+Reading from train-sets/0002.dat
+num sources = 1
average since example example current current current
loss last counter weight label predict features
0.146961 0.146961 3 3.0 0.5498 0.2139 197
diff --git a/test/train-sets/ref/0002c.stderr b/test/train-sets/ref/0002c.stderr
index 2d9f312b..b7fe4901 100644
--- a/test/train-sets/ref/0002c.stderr
+++ b/test/train-sets/ref/0002c.stderr
@@ -1,12 +1,13 @@
creating quadratic features for pairs: ff
final_regressor = models/0002c.model
-using no cache
-Reading from train-sets/0002.dat
-num sources = 1
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0
+warning: final argument 'train-sets/0002.dat' assumed to be input file; in the future, please use -d
+using no cache
+Reading from train-sets/0002.dat
+num sources = 1
average since example example current current current
loss last counter weight label predict features
0.134680 0.134680 3 3.0 0.5498 0.2361 184
diff --git a/test/train-sets/ref/cs_test.ldf.csoaa.stderr b/test/train-sets/ref/cs_test.ldf.csoaa.stderr
index f3c6d06a..d6a9c73b 100644
--- a/test/train-sets/ref/cs_test.ldf.csoaa.stderr
+++ b/test/train-sets/ref/cs_test.ldf.csoaa.stderr
@@ -1,6 +1,3 @@
-creating cache_file = train-sets/cs_test.ldf.cache
-Reading from train-sets/cs_test.ldf
-num sources = 1
Num weight bits = 18
learning rate = 10
initial_t = 1
@@ -8,6 +5,9 @@ power_t = 0.5
decay_learning_rate = 1
predictions = cs_test.ldf.csoaa.predict
warning: turning off constant for label dependent features; use --noconstant
+creating cache_file = train-sets/cs_test.ldf.cache
+Reading from train-sets/cs_test.ldf
+num sources = 1
average since example example current current current
loss last counter weight label predict features
0.000000 0.000000 3 3.0 known 0 3
diff --git a/test/train-sets/ref/cs_test.ldf.wap.stderr b/test/train-sets/ref/cs_test.ldf.wap.stderr
index 1a06a442..5405ca82 100644
--- a/test/train-sets/ref/cs_test.ldf.wap.stderr
+++ b/test/train-sets/ref/cs_test.ldf.wap.stderr
@@ -1,20 +1,19 @@
-creating cache_file = train-sets/cs_test.ldf.cache
-Reading from train-sets/cs_test.ldf
-num sources = 1
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0.5
decay_learning_rate = 1
predictions = cs_test.ldf.wap.predict
-warning: turning off constant for label dependent features; use --noconstant
+creating cache_file = train-sets/cs_test.ldf.cache
+Reading from train-sets/cs_test.ldf
+num sources = 1
average since example example current current current
loss last counter weight label predict features
-0.000000 0.000000 3 3.0 known 0 3
-0.000000 0.000000 6 6.0 known 0 3
-0.000000 0.000000 11 11.0 known 0 3
-0.000000 0.000000 22 22.0 known 0 3
-0.000000 0.000000 44 44.0 known 1 3
+0.000000 0.000000 3 3.0 known 0 4
+0.000000 0.000000 6 6.0 known 0 4
+0.000000 0.000000 11 11.0 known 0 4
+0.000000 0.000000 22 22.0 known 0 4
+0.000000 0.000000 44 44.0 known 1 4
finished run
number of examples = 70
@@ -22,4 +21,4 @@ weighted example sum = 70
weighted label sum = 0
average loss = 0
best constant = -0.01449
-total feature number = 210
+total feature number = 280
diff --git a/test/train-sets/ref/rcv1_small.stderr b/test/train-sets/ref/rcv1_small.stderr
index fe58cb4a..8f5e861c 100644
--- a/test/train-sets/ref/rcv1_small.stderr
+++ b/test/train-sets/ref/rcv1_small.stderr
@@ -1,7 +1,4 @@
enabling BFGS based optimization **without** curvature calculation
-creating cache_file = train-sets/rcv1_small.dat.cache
-Reading from train-sets/rcv1_small.dat
-num sources = 1
Num weight bits = 20
learning rate = 10
initial_t = 1
@@ -11,15 +8,18 @@ using l2 regularization
m = 7
Allocated 72M for weights and mem
## avg. loss der. mag. d. m. cond. wolfe1 wolfe2 mix fraction curvature dir. magnitude step size time
- 1 6.931472e-01 1.859805e-03 3.768599e+00 3.414409e+01 1.977478e+04 1.103734e-01 2.119
- 3 4.624629e-01 1.085868e-02 1.936935e+00 0.554592 0.194583 2.208660e+02 1.000000e+00 2.656
- 4 3.384483e-01 4.246157e-04 1.726574e-01 0.520733 0.133178 3.415247e+01 1.000000e+00 3.323
- 5 3.184994e-01 7.930477e-05 6.343691e-02 0.751656 0.517330 6.775453e+01 1.000000e+00 4.025
- 6 3.024582e-01 2.887344e-06 1.460970e-02 0.657158 0.328843 3.618752e+01 1.000000e+00 5.114
- 7 2.967627e-01 4.279802e-06 3.517537e-03 0.645592 0.293725 1.585137e+01 1.000000e+00 6.356
- 8 2.952703e-01 2.241755e-06 1.631614e-03 0.527831 0.052654 5.802390e+00 1.000000e+00 7.734
- 9 2.950591e-01 7.095166e-07 1.546886e-03 0.199872 -0.608863 7.389087e-01 1.000000e+00 9.263
-10 2.948347e-01 2.803435e-07 1.223611e-04 0.585419 0.171988 1.453111e-01 1.000000e+00 12.281
+creating cache_file = train-sets/rcv1_small.dat.cache
+Reading from train-sets/rcv1_small.dat
+num sources = 1
+ 1 6.931472e-01 1.859805e-03 3.768599e+00 3.414409e+01 1.977478e+04 1.103734e-01 0.769
+ 3 4.624629e-01 1.085868e-02 1.936935e+00 0.554592 0.194583 2.208660e+02 1.000000e+00 0.934
+ 4 3.384483e-01 4.246157e-04 1.726574e-01 0.520733 0.133178 3.415247e+01 1.000000e+00 1.142
+ 5 3.184994e-01 7.930477e-05 6.343691e-02 0.751656 0.517330 6.775453e+01 1.000000e+00 1.394
+ 6 3.024582e-01 2.887344e-06 1.460970e-02 0.657158 0.328843 3.618752e+01 1.000000e+00 1.673
+ 7 2.967627e-01 4.279802e-06 3.517537e-03 0.645592 0.293725 1.585137e+01 1.000000e+00 1.984
+ 8 2.952703e-01 2.241755e-06 1.631614e-03 0.527831 0.052654 5.802390e+00 1.000000e+00 2.332
+ 9 2.950591e-01 7.095166e-07 1.546886e-03 0.199872 -0.608863 7.389087e-01 1.000000e+00 2.712
+10 2.948347e-01 2.803435e-07 1.223611e-04 0.585419 0.171988 1.453111e-01 1.000000e+00 3.432
finished run
diff --git a/test/train-sets/ref/searn_small.stderr b/test/train-sets/ref/searn_small.stderr
new file mode 100644
index 00000000..0d2a3a1e
--- /dev/null
+++ b/test/train-sets/ref/searn_small.stderr
@@ -0,0 +1,23 @@
+Num weight bits = 18
+learning rate = 10
+initial_t = 1
+power_t = 0.5
+decay_learning_rate = 1
+creating cache_file = train-sets/seq_small.cache
+Reading from train-sets/seq_small
+num sources = 1
+average since example example current current current
+loss last counter weight label predict features
+#pol average since sequence example current label current predicted current cur cur predic. examples
+chng loss last counter weight sequence prefix sequence prefix features pass pol made gener.
+ 0 1.333333 1.333333 3 3.000000 [1 3 2 1 4 3 ] [1 3 2 1 4 3 ] 18 2 0 18 12
+ 1 1.000000 0.666667 6 6.000000 [1 3 2 1 4 3 ] [1 3 2 1 4 3 ] 18 5 1 49 30
+ 1 0.727273 0.400000 11 11.000000 [1 3 2 1 4 3 ] [1 3 2 1 4 3 ] 18 10 2 162 60
+
+finished run
+number of examples = 12
+weighted example sum = 12
+weighted label sum = 0
+average loss = 0.6667
+best constant = -0.09091
+total feature number = 552
diff --git a/test/train-sets/ref/searn_small.stdout b/test/train-sets/ref/searn_small.stdout
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/test/train-sets/ref/searn_small.stdout
diff --git a/test/train-sets/ref/searn_wsj.stderr b/test/train-sets/ref/searn_wsj.stderr
new file mode 100644
index 00000000..3d156c0e
--- /dev/null
+++ b/test/train-sets/ref/searn_wsj.stderr
@@ -0,0 +1,29 @@
+Num weight bits = 18
+learning rate = 10
+initial_t = 1
+power_t = 0.5
+decay_learning_rate = 1
+creating cache_file = train-sets/wsj_small.dat.gz.cache
+Reading from train-sets/wsj_small.dat.gz
+num sources = 1
+average since example example current current current
+loss last counter weight label predict features
+#pol average since sequence example current label current predicted current cur cur predic. examples
+chng loss last counter weight sequence prefix sequence prefix features pass pol made gener.
+ 0 22.000000 22.000000 3 3.000000 [14 10 13 9 1 2 1 4..] [11 11 11 1 2 1 2 1..] 2659 0 0 93 64
+ 0 21.000000 20.000000 6 6.000000 [19 2 22 4 3 9 1 1 ..] [1 2 3 9 1 2 1 1 12..] 3324 0 0 196 160
+ 0 18.363636 15.200000 11 11.000000 [29 4 3 9 1 1 23 8 ..] [1 2 3 9 1 2 1 10 7..] 1424 0 0 328 312
+ 0 15.000000 11.636364 22 22.000000 [11 11 21 3 10 13 3..] [11 11 21 3 1 2 3 1..] 3419 0 0 613 576
+ 0 12.681818 10.363636 44 44.000000 [3 26 9 1 4 3 1 2 5..] [3 1 1 1 2 3 1 2 11..] 1139 0 0 1120 1107
+ 0 9.988506 7.232558 87 87.000000 [11 11 12 9 1 2 11 ..] [11 11 12 9 1 2 11 ..] 2564 1 0 2220 2192
+ 0 6.316092 2.643678 174 174.000000 [11 1 10 13 2 17 30..] [11 1 10 13 2 17 30..] 1044 2 0 4370 4358
+ 1 4.597701 2.879310 348 348.000000 [2 11 2 11 12 3 11 ..] [2 25 1 1 12 3 11 1..] 2279 4 1 49100 8673
+ 1 4.612069 4.626437 696 696.000000 [19 22 4 5 3 1 2 1 ..] [19 22 34 9 12 1 2 ..] 2754 8 2 459337 17212
+
+finished run
+number of examples = 936
+weighted example sum = 936
+weighted label sum = 0
+average loss = 4.706
+best constant = -0.00107
+total feature number = 78376268
diff --git a/test/train-sets/ref/searn_wsj.stdout b/test/train-sets/ref/searn_wsj.stdout
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/test/train-sets/ref/searn_wsj.stdout
diff --git a/test/train-sets/ref/searn_wsj2.dat.stderr b/test/train-sets/ref/searn_wsj2.dat.stderr
new file mode 100644
index 00000000..5cf68832
--- /dev/null
+++ b/test/train-sets/ref/searn_wsj2.dat.stderr
@@ -0,0 +1,26 @@
+Num weight bits = 18
+learning rate = 10
+initial_t = 1
+power_t = 0.5
+decay_learning_rate = 1
+creating cache_file = train-sets/wsj_small.dat.gz.cache
+Reading from train-sets/wsj_small.dat.gz
+num sources = 1
+average since example example current current current
+loss last counter weight label predict features
+#pol average since sequence example current label current predicted current cur cur predic. examples
+chng loss last counter weight sequence prefix sequence prefix features pass pol made gener.
+ 0 21.666667 21.666667 3 3.000000 [14 10 13 9 1 2 1 4..] [11 11 11 15 9 9 1 ..] 2659 0 0 93 64
+ 0 23.666667 25.666667 6 6.000000 [19 2 22 4 3 9 1 1 ..] [19 2 11 11 11 11 1..] 3324 0 0 196 160
+ 0 20.909091 17.600000 11 11.000000 [29 4 3 9 1 1 23 8 ..] [19 2 3 9 1 6 28 29..] 1424 0 0 328 312
+ 0 16.318182 11.727273 22 22.000000 [11 11 21 3 10 13 3..] [11 11 21 3 1 2 3 1..] 3419 0 0 613 576
+ 0 12.727273 9.136364 44 44.000000 [3 26 9 1 4 3 1 2 5..] [3 11 11 1 2 3 1 2 ..] 1139 0 0 1120 1107
+ 1 11.137931 9.511628 87 87.000000 [11 11 12 9 1 2 11 ..] [11 11 12 11 11 11 ..] 2564 1 1 13460 2192
+
+finished run
+number of examples = 156
+weighted example sum = 156
+weighted label sum = 0
+average loss = 8.532
+best constant = -0.006452
+total feature number = 7819789
diff --git a/test/train-sets/ref/searn_wsj2.dat.stdout b/test/train-sets/ref/searn_wsj2.dat.stdout
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/test/train-sets/ref/searn_wsj2.dat.stdout
diff --git a/test/train-sets/ref/seq_small.stderr b/test/train-sets/ref/seq_small.stderr
index 67c07e03..6fd97a8b 100644
--- a/test/train-sets/ref/seq_small.stderr
+++ b/test/train-sets/ref/seq_small.stderr
@@ -1,18 +1,18 @@
-creating cache_file = train-sets/seq_small.cache
-Reading from train-sets/seq_small
-num sources = 1
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0.5
decay_learning_rate = 1
-average since sequence example current label current predicted current cur cur predic. examples
-loss last counter weight sequence prefix sequence prefix features pass pol made gener.
-0.666667 0.666667 1 6.0 [ 1 3 2 1 4 3 ] [ 1 1 1 1 1 1 ] 12 0 0 6 0
-0.333333 0.000000 2 12.0 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 1 0 12 6
-0.222222 0.000000 3 18.0 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 2 0 18 12
-0.166667 0.000000 4 24.0 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 3 0 24 18
-0.083333 0.000000 8 48.0 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 7 1 72 42
+creating cache_file = train-sets/seq_small.cache
+Reading from train-sets/seq_small
+num sources = 1
+average since sequence example current label current predicted current cur cur predic. examples
+loss last counter weight sequence prefix sequence prefix features pass pol made gener.
+0.666667 0.666667 1 6.000000 [ 1 3 2 1 4 3 ] [ 1 1 1 1 1 1 ] 12 0 0 6 0
+0.333333 0.000000 2 12.000000 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 1 0 12 6
+0.222222 0.000000 3 18.000000 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 2 0 18 12
+0.166667 0.000000 4 24.000000 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 3 0 24 18
+0.083333 0.000000 8 48.000000 [ 1 3 2 1 4 3 ] [ 1 3 2 1 4 3 ] 12 7 1 51 42
finished run
number of examples = 12
diff --git a/test/train-sets/ref/wiki1K.stderr b/test/train-sets/ref/wiki1K.stderr
index 9531eb9c..4f28322b 100644
--- a/test/train-sets/ref/wiki1K.stderr
+++ b/test/train-sets/ref/wiki1K.stderr
@@ -1,22 +1,23 @@
your learning rate is too high, setting it to 1
-using no cache
-Reading from train-sets/wiki1K.dat
-num sources = 1
Num weight bits = 13
learning rate = 1
initial_t = 1
power_t = 0.5
+warning: final argument 'train-sets/wiki1K.dat' assumed to be input file; in the future, please use -d
+using no cache
+Reading from train-sets/wiki1K.dat
+num sources = 1
average since example example current current current
loss last counter weight label predict features
-10.276575 10.276575 3 3.0 unknown 0.0000 37
-10.341718 10.406862 6 6.0 unknown 0.0000 13
-10.311285 10.274764 11 11.0 unknown 0.0000 31
-10.452045 10.592805 22 22.0 unknown 0.0000 1
-10.439284 10.426523 44 44.0 unknown 0.0000 165
-10.459843 10.480881 87 87.0 unknown 0.0000 28
-10.093540 9.727236 174 174.0 unknown 0.0000 16
-9.566517 9.039494 348 348.0 unknown 0.0000 1
-9.066497 8.566477 696 696.0 unknown 0.0000 142
+10.276562 10.276562 3 3.0 unknown 0.0000 37
+10.341712 10.406861 6 6.0 unknown 0.0000 13
+10.311279 10.274760 11 11.0 unknown 0.0000 31
+10.452043 10.592806 22 22.0 unknown 0.0000 1
+10.439283 10.426523 44 44.0 unknown 0.0000 165
+10.459842 10.480879 87 87.0 unknown 0.0000 28
+10.093538 9.727235 174 174.0 unknown 0.0000 16
+9.566512 9.039486 348 348.0 unknown 0.0000 1
+9.066552 8.566591 696 696.0 unknown 0.0000 142
finished run
number of examples = 1000
diff --git a/test/train-sets/ref/wsj_small-tm.dat.stderr b/test/train-sets/ref/wsj_small-tm.dat.stderr
index 9da3f9bc..6693ac96 100644
--- a/test/train-sets/ref/wsj_small-tm.dat.stderr
+++ b/test/train-sets/ref/wsj_small-tm.dat.stderr
@@ -1,27 +1,28 @@
-using cache_file = train-sets/wsj_small.dat.gz.cache
-ignoring text input in favor of cache input
-num sources = 1
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0.5
-average since sequence example current label current predicted current cur cur predic. examples
-loss last counter weight sequence prefix sequence prefix features pass pol made gener.
-0.810811 0.810811 1 37.0 [ 1 2 3 1 4 ] [ 1 1 1 1 1 ] 1654 0 0 37 0
-0.781250 0.740741 2 64.0 [ 11 2 3 11 11 ] [ 11 26 9 11 26 ] 1194 0 0 705 37
-0.731183 0.620690 3 93.0 [ 14 10 13 9 1 ] [ 11 15 16 1 1 ] 1286 0 0 1105 64
-0.720930 0.694444 4 129.0 [ 3 4 6 3 1 ] [ 11 11 2 3 11 ] 1608 0 0 1494 93
-0.706250 0.645161 5 160.0 [ 19 3 10 2 1 ] [ 2 3 1 2 1 ] 1378 0 0 2170 129
-0.678571 0.555556 6 196.0 [ 19 2 22 4 3 ] [ 11 2 11 11 11 ] 1608 0 0 2462 160
-0.676596 0.666667 7 235.0 [ 10 2 3 1 10 ] [ 1 2 11 1 1 ] 1746 0 0 3061 196
-0.614731 0.491525 12 353.0 [ 5 12 11 11 21 ] [ 11 12 9 1 21 ] 1102 0 0 5473 328
-0.482955 0.350427 25 704.0 [ 10 13 22 4 9 ] [ 10 2 1 4 1 ] 1148 0 0 12574 678
-0.398449 0.315126 57 1418.0 [ 19 1 4 6 36 ] [ 19 1 4 6 5 ] 2252 0 0 25497 1368
+cannot have --sequence_transition_file and zero history length, setting history length to 1
+creating cache_file = train-sets/wsj_small.dat.gz.cache
+Reading from train-sets/wsj_small.dat.gz
+num sources = 1
+average since sequence example current label current predicted current cur cur predic. examples
+loss last counter weight sequence prefix sequence prefix features pass pol made gener.
+0.810811 0.810811 1 37.000000 [ 1 2 3 1 4 ] [ 1 1 1 1 1 ] 1654 0 0 37 0
+0.750000 0.666667 2 64.000000 [ 11 2 3 11 11 ] [ 9 9 9 11 9 ] 1194 0 0 837 37
+0.698925 0.586207 3 93.000000 [ 14 10 13 9 1 ] [ 11 15 11 1 9 ] 1286 0 0 1457 64
+0.689922 0.666667 4 129.000000 [ 3 4 6 3 1 ] [ 11 11 2 3 11 ] 1608 0 0 2088 93
+0.675000 0.612903 5 160.000000 [ 19 3 10 2 1 ] [ 2 3 1 2 1 ] 1378 0 0 2892 129
+0.642857 0.500000 6 196.000000 [ 19 2 22 4 3 ] [ 19 2 11 11 11 ] 1608 0 0 3611 160
+0.634043 0.589744 7 235.000000 [ 10 2 3 1 10 ] [ 19 2 3 1 11 ] 1746 0 0 4423 196
+0.575071 0.457627 12 353.000000 [ 5 12 11 11 21 ] [ 11 12 9 1 21 ] 1102 0 0 7489 328
+0.451705 0.327635 25 704.000000 [ 10 13 22 4 9 ] [ 10 13 22 4 3 ] 1148 0 0 15598 678
+0.375882 0.301120 57 1418.000000 [ 19 1 4 6 36 ] [ 19 14 4 6 5 ] 2252 0 0 31169 1368
finished run
number of examples = 78
weighted example sum = 1932
weighted label sum = 0
-average loss = 0.367
+average loss = 0.3437
best constant = -0.0005179
total feature number = 85128
diff --git a/test/train-sets/ref/wsj_small.dat.stderr b/test/train-sets/ref/wsj_small.dat.stderr
index f2c6101f..5d6b2738 100644
--- a/test/train-sets/ref/wsj_small.dat.stderr
+++ b/test/train-sets/ref/wsj_small.dat.stderr
@@ -1,29 +1,29 @@
-creating cache_file = train-sets/wsj_small.dat.gz.cache
-Reading from train-sets/wsj_small.dat.gz
-num sources = 1
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0.5
decay_learning_rate = 1
-average since sequence example current label current predicted current cur cur predic. examples
-loss last counter weight sequence prefix sequence prefix features pass pol made gener.
-0.810811 0.810811 1 37.0 [ 1 2 3 1 4 ] [ 1 1 1 1 1 ] 1654 0 0 37 0
-0.750000 0.666667 2 64.0 [ 11 2 3 11 11 ] [ 1 2 11 12 9 ] 1194 0 0 64 37
-0.698925 0.586207 3 93.0 [ 14 10 13 9 1 ] [ 11 11 11 15 9 ] 1286 0 0 93 64
-0.775194 0.972222 4 129.0 [ 3 4 6 3 1 ] [ 11 11 11 11 11 ] 1608 0 0 129 93
-0.756250 0.677419 5 160.0 [ 19 3 10 2 1 ] [ 14 10 1 2 1 ] 1378 0 0 160 129
-0.724490 0.583333 6 196.0 [ 19 2 22 4 3 ] [ 19 2 11 11 11 ] 1608 0 0 196 160
-0.744681 0.846154 7 235.0 [ 10 2 3 1 10 ] [ 19 2 11 11 11 ] 1746 0 0 235 196
-0.705382 0.627119 12 353.0 [ 5 12 11 11 21 ] [ 11 12 9 1 2 ] 1102 0 0 353 328
-0.575284 0.444444 25 704.0 [ 10 13 22 4 9 ] [ 10 13 3 9 1 ] 1148 0 0 704 678
-0.482370 0.390756 57 1418.0 [ 19 1 4 6 36 ] [ 19 3 4 6 5 ] 2252 0 0 1418 1368
-0.309345 0.130909 110 2793.0 [ 9 1 10 21 2 ] [ 9 1 10 21 2 ] 1792 1 1 21055 2753
+creating cache_file = train-sets/wsj_small.dat.gz.cache
+Reading from train-sets/wsj_small.dat.gz
+num sources = 1
+average since sequence example current label current predicted current cur cur predic. examples
+loss last counter weight sequence prefix sequence prefix features pass pol made gener.
+0.810811 0.810811 1 37.000000 [ 1 2 3 1 4 ] [ 1 1 1 1 1 ] 1654 0 0 37 0
+0.750000 0.666667 2 64.000000 [ 11 2 3 11 11 ] [ 1 2 11 12 9 ] 1194 0 0 64 37
+0.698925 0.586207 3 93.000000 [ 14 10 13 9 1 ] [ 11 11 11 15 9 ] 1286 0 0 93 64
+0.775194 0.972222 4 129.000000 [ 3 4 6 3 1 ] [ 11 11 11 11 11 ] 1608 0 0 129 93
+0.756250 0.677419 5 160.000000 [ 19 3 10 2 1 ] [ 14 10 1 2 1 ] 1378 0 0 160 129
+0.724490 0.583333 6 196.000000 [ 19 2 22 4 3 ] [ 19 2 11 11 11 ] 1608 0 0 196 160
+0.744681 0.846154 7 235.000000 [ 10 2 3 1 10 ] [ 19 2 11 11 11 ] 1746 0 0 235 196
+0.705382 0.627119 12 353.000000 [ 5 12 11 11 21 ] [ 11 12 9 1 2 ] 1102 0 0 353 328
+0.575284 0.444444 25 704.000000 [ 10 13 22 4 9 ] [ 10 13 3 9 1 ] 1148 0 0 704 678
+0.482370 0.390756 57 1418.000000 [ 19 1 4 6 36 ] [ 19 3 4 6 5 ] 2252 0 0 1418 1368
+0.309345 0.130909 110 2793.000000 [ 9 1 10 21 2 ] [ 9 1 10 21 2 ] 1792 1 1 37389 2753
finished run
number of examples = 156
weighted example sum = 3864
weighted label sum = 0
-average loss = 0.2345
+average loss = 0.2347
best constant = -0.0002589
total feature number = 170256
diff --git a/test/train-sets/ref/zero.stderr b/test/train-sets/ref/zero.stderr
index 44922058..1db31510 100644
--- a/test/train-sets/ref/zero.stderr
+++ b/test/train-sets/ref/zero.stderr
@@ -1,7 +1,4 @@
enabling BFGS based optimization **without** curvature calculation
-creating cache_file = train-sets/zero.dat.cache
-Reading from train-sets/zero.dat
-num sources = 1
Num weight bits = 20
learning rate = 10
initial_t = 1
@@ -11,8 +8,11 @@ using l2 regularization
m = 7
Allocated 72M for weights and mem
## avg. loss der. mag. d. m. cond. wolfe1 wolfe2 mix fraction curvature dir. magnitude step size time
- 1 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.258
- 3 0.000000e+00 0.000000e+00 0.000000e+00 nan nan
+creating cache_file = train-sets/zero.dat.cache
+Reading from train-sets/zero.dat
+num sources = 1
+ 1 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.130
+ 3 0.000000e+00 0.000000e+00 0.000000e+00 -nan -nan
finished run
diff --git a/vowpalwabbit/searn.cc b/vowpalwabbit/searn.cc
index 06984d2a..e1012bd4 100644
--- a/vowpalwabbit/searn.cc
+++ b/vowpalwabbit/searn.cc
@@ -426,7 +426,7 @@ namespace Searn
void parse_flags(vw&all, std::vector<std::string>&opts, po::variables_map& vm, void (*base_l)(vw&,example*), void (*base_f)(vw&))
{
- po::options_description desc("Sequence options");
+ po::options_description desc("Searn options");
desc.add_options()
("searn_task", po::value<string>(), "the searn task")
("searn_rollout", po::value<size_t>(), "maximum rollout length")
@@ -480,7 +480,7 @@ namespace Searn
if (vm.count("searn_passes_per_policy")) passes_per_policy = vm["searn_passes_per_policy"].as<size_t>();
if (vm.count("searn_beta")) beta = vm["searn_beta"].as<float>();
if (vm.count("searn_gamma")) gamma = vm["searn_gamma"].as<float>();
- if (vm.count("searn_recombine")) do_recombination = true;
+ if (vm.count("searn_norecombine")) do_recombination = false;
if (vm.count("searn_allow_current_policy")) allow_current_policy = true;
if (beta <= 0 || beta >= 1) {
@@ -494,7 +494,7 @@ namespace Searn
}
if (task.initialize != NULL)
- if (!task.initialize(vm)) {
+ if (!task.initialize(opts, vm)) {
std::cerr << "error: task did not initialize properly" << std::endl;
exit(-1);
}
diff --git a/vowpalwabbit/searn.h b/vowpalwabbit/searn.h
index a6835e6f..e0eb88e0 100644
--- a/vowpalwabbit/searn.h
+++ b/vowpalwabbit/searn.h
@@ -126,7 +126,7 @@ namespace Searn
// your task might need to initialize some memory at startup or
// parse command line arguments: do that in initialize
- bool (*initialize)(po::variables_map& vm);
+ bool (*initialize)(std::vector<std::string>&opts, po::variables_map& vm);
// your task might need to free some memory at the end of running:
// do that in finalize
diff --git a/vowpalwabbit/searn_sequencetask.cc b/vowpalwabbit/searn_sequencetask.cc
index 9dcac60e..87e2c6ee 100644
--- a/vowpalwabbit/searn_sequencetask.cc
+++ b/vowpalwabbit/searn_sequencetask.cc
@@ -34,17 +34,33 @@ namespace SequenceTask {
// done.
};
- bool initialize(po::variables_map& vm)
+ bool initialize(std::vector<std::string>&opts, po::variables_map& vm)
{
SearnUtil::default_info(&hinfo);
+ po::options_description desc("Searn[sequence] options");
+ desc.add_options()
+ ("searn_sequencetask_history", po::value<size_t>(), "length of history to use")
+ ("searn_sequencetask_features", po::value<size_t>(), "length of history to pair with observed features")
+ ("searn_sequencetask_bigrams", "use bigrams from history")
+ ("searn_sequencetask_bigram_features", "use bigrams from history paired with observed features")
+ ("searn_sequencetask_fake_ldf", "pretend like we're an LDF model even though we need not be");
+
+ po::parsed_options parsed = po::command_line_parser(opts).
+ style(po::command_line_style::default_style ^ po::command_line_style::allow_guessing).
+ options(desc).allow_unregistered().run();
+ opts = po::collect_unrecognized(parsed.options, po::include_positional);
+ po::store(parsed, vm);
+ po::notify(vm);
+
+
if (vm.count("searn_sequencetask_bigrams")) hinfo.bigrams = true;
if (vm.count("searn_sequencetask_history")) hinfo.length = vm["searn_sequencetask_history"].as<size_t>();
if (vm.count("searn_sequencetask_bigram_features")) hinfo.bigram_features = true;
if (vm.count("searn_sequencetask_features")) hinfo.features = vm["searn_sequencetask_features"].as<size_t>();
if (vm.count("searn_sequencetask_fake_ldf")) fake_as_ldf = true;
- seq_max_action = vm["searn_max_action"].as<size_t>();
+ seq_max_action = vm["searn"].as<size_t>();
constant_pow_length = 1;
for (size_t i=0; i < hinfo.length; i++)
constant_pow_length *= quadratic_constant;
diff --git a/vowpalwabbit/searn_sequencetask.h b/vowpalwabbit/searn_sequencetask.h
index a8653aac..d489ff66 100644
--- a/vowpalwabbit/searn_sequencetask.h
+++ b/vowpalwabbit/searn_sequencetask.h
@@ -14,7 +14,7 @@ namespace SequenceTask {
void finish(state);
void start_state_multiline(example**, size_t, state*);
void cs_example(vw&, state, example*&, bool);
- bool initialize(po::variables_map& vm);
+ bool initialize(std::vector<std::string>&opts, po::variables_map& vm);
size_t hash(state);
bool equivalent(state, state);
std::string to_string(state, bool, std::vector<action>);