diff options
author | Paul Mineiro <paul-github@mineiro.com> | 2014-08-20 08:52:53 +0400 |
---|---|---|
committer | Paul Mineiro <paul-github@mineiro.com> | 2014-08-20 08:52:53 +0400 |
commit | ff045938d57c435c366613f012c01ccf93162fa1 (patch) | |
tree | d1b27dac9ba36c2f4671da8bb9f18197b0389972 /demo/dna | |
parent | b19ec85b7c95758ef7174717f212c38b9bb0a263 (diff) |
nn hogwild training fix + demo
Diffstat (limited to 'demo/dna')
-rw-r--r-- | demo/dna/README | 12 | ||||
-rwxr-xr-x | demo/dna/do-dnahogwild-multicore-train | 47 | ||||
-rwxr-xr-x | demo/dna/do-dnahogwildnn-multicore-train | 47 |
3 files changed, 105 insertions, 1 deletions
diff --git a/demo/dna/README b/demo/dna/README index 8a10a775..b5767565 100644 --- a/demo/dna/README +++ b/demo/dna/README @@ -20,7 +20,7 @@ Scale Learning Challenge (http://largescale.ml.tu-berlin.de/summary/). results in APR of 0.512 * make dnann.perf - as above but with additionally 1 neural network hidden node + same as dna.perf, but with additionally 1 neural network hidden node slower (by circa 60 seconds) but better results in APR of 0.532 @@ -34,3 +34,13 @@ Scale Learning Challenge (http://largescale.ml.tu-berlin.de/summary/). subsequently, 6 minute per pass if you have SSD or enough RAM cache 10 passes = 60 minutes (x 6 cores) results in APR of 0.545 + + * make dnahogwild.perf + same as dna.perf, but trained via lock-free multicore sgd ("hogwild") + rather than parallel sgd + averaging + nondeterministic, but a typical result is APR of 0.516 + + * make dnahogwildnn.perf + same as dnann.perf, but trained via lock-free multicore sgd ("hogwild") + rather than parallel sgd + averaging + nondeterministic, but a typical result is APR of 0.536 diff --git a/demo/dna/do-dnahogwild-multicore-train b/demo/dna/do-dnahogwild-multicore-train new file mode 100755 index 00000000..37270bf2 --- /dev/null +++ b/demo/dna/do-dnahogwild-multicore-train @@ -0,0 +1,47 @@ +#! /bin/zsh + +rm -f dnahogwild.model + +set -e + +nukeem() { \ + trap - INT QUIT TERM + pkill -9 -f 'vw.*--port 26543' +} + +learner() { + ./quaddna2vw | \ + netcat localhost 26543 > /dev/null +} + +{ + ../../vowpalwabbit/vw -f dnahogwild.model \ + --loss_function logistic \ + -b 18 -l 0.0625 --adaptive --invariant \ + --daemon --num_children 4 --port 26543 2>&1 | \ + perl -lane 'print $_ unless $c{$F[2]}++;' +} & + +trap 'nukeem; exit 1' INT QUIT TERM + +while ! netcat -z localhost 26543 + do + sleep 1 + done + +paste -d' ' \ + <(bzcat dna_train.lab.bz2) \ + <(bzcat dna_train.dat.bz2) | \ +tail -n +1000000 | \ +./map \ + >(learner) \ + >(learner) \ + >(learner) \ + >(learner) + +pkill -f 'vw.*--port 26543' + +while test ! -s dnahogwild.model + do + sleep 1 + done diff --git a/demo/dna/do-dnahogwildnn-multicore-train b/demo/dna/do-dnahogwildnn-multicore-train new file mode 100755 index 00000000..95ef5e93 --- /dev/null +++ b/demo/dna/do-dnahogwildnn-multicore-train @@ -0,0 +1,47 @@ +#! /bin/zsh + +rm -f dnahogwildnn.model + +set -e + +nukeem() { \ + trap - INT QUIT TERM + pkill -9 -f 'vw.*--port 26544' +} + +learner() { + ./quaddna2vw | \ + netcat localhost 26544 > /dev/null +} + +{ + ../../vowpalwabbit/vw -f dnahogwildnn.model \ + --loss_function logistic --nn 1 --inpass \ + -b 18 -l 0.015 --adaptive --invariant \ + --daemon --num_children 4 --port 26544 2>&1 | \ + perl -lane 'print $_ unless $c{$F[2]}++;' +} & + +trap 'nukeem; exit 1' INT QUIT TERM + +while ! netcat -z localhost 26544 + do + sleep 1 + done + +paste -d' ' \ + <(bzcat dna_train.lab.bz2) \ + <(bzcat dna_train.dat.bz2) | \ +tail -n +1000000 | \ +./map \ + >(learner) \ + >(learner) \ + >(learner) \ + >(learner) + +pkill -f 'vw.*--port 26544' + +while test ! -s dnahogwildnn.model + do + sleep 1 + done |