Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/demo/dna
diff options
context:
space:
mode:
authorPaul Mineiro <paul-github@mineiro.com>2014-08-20 08:52:53 +0400
committerPaul Mineiro <paul-github@mineiro.com>2014-08-20 08:52:53 +0400
commitff045938d57c435c366613f012c01ccf93162fa1 (patch)
treed1b27dac9ba36c2f4671da8bb9f18197b0389972 /demo/dna
parentb19ec85b7c95758ef7174717f212c38b9bb0a263 (diff)
nn hogwild training fix + demo
Diffstat (limited to 'demo/dna')
-rw-r--r--demo/dna/README12
-rwxr-xr-xdemo/dna/do-dnahogwild-multicore-train47
-rwxr-xr-xdemo/dna/do-dnahogwildnn-multicore-train47
3 files changed, 105 insertions, 1 deletions
diff --git a/demo/dna/README b/demo/dna/README
index 8a10a775..b5767565 100644
--- a/demo/dna/README
+++ b/demo/dna/README
@@ -20,7 +20,7 @@ Scale Learning Challenge (http://largescale.ml.tu-berlin.de/summary/).
results in APR of 0.512
* make dnann.perf
- as above but with additionally 1 neural network hidden node
+ same as dna.perf, but with additionally 1 neural network hidden node
slower (by circa 60 seconds) but better
results in APR of 0.532
@@ -34,3 +34,13 @@ Scale Learning Challenge (http://largescale.ml.tu-berlin.de/summary/).
subsequently, 6 minute per pass if you have SSD or enough RAM cache
10 passes = 60 minutes (x 6 cores)
results in APR of 0.545
+
+ * make dnahogwild.perf
+ same as dna.perf, but trained via lock-free multicore sgd ("hogwild")
+ rather than parallel sgd + averaging
+ nondeterministic, but a typical result is APR of 0.516
+
+ * make dnahogwildnn.perf
+ same as dnann.perf, but trained via lock-free multicore sgd ("hogwild")
+ rather than parallel sgd + averaging
+ nondeterministic, but a typical result is APR of 0.536
diff --git a/demo/dna/do-dnahogwild-multicore-train b/demo/dna/do-dnahogwild-multicore-train
new file mode 100755
index 00000000..37270bf2
--- /dev/null
+++ b/demo/dna/do-dnahogwild-multicore-train
@@ -0,0 +1,47 @@
+#! /bin/zsh
+
+rm -f dnahogwild.model
+
+set -e
+
+nukeem() { \
+ trap - INT QUIT TERM
+ pkill -9 -f 'vw.*--port 26543'
+}
+
+learner() {
+ ./quaddna2vw | \
+ netcat localhost 26543 > /dev/null
+}
+
+{
+ ../../vowpalwabbit/vw -f dnahogwild.model \
+ --loss_function logistic \
+ -b 18 -l 0.0625 --adaptive --invariant \
+ --daemon --num_children 4 --port 26543 2>&1 | \
+ perl -lane 'print $_ unless $c{$F[2]}++;'
+} &
+
+trap 'nukeem; exit 1' INT QUIT TERM
+
+while ! netcat -z localhost 26543
+ do
+ sleep 1
+ done
+
+paste -d' ' \
+ <(bzcat dna_train.lab.bz2) \
+ <(bzcat dna_train.dat.bz2) | \
+tail -n +1000000 | \
+./map \
+ >(learner) \
+ >(learner) \
+ >(learner) \
+ >(learner)
+
+pkill -f 'vw.*--port 26543'
+
+while test ! -s dnahogwild.model
+ do
+ sleep 1
+ done
diff --git a/demo/dna/do-dnahogwildnn-multicore-train b/demo/dna/do-dnahogwildnn-multicore-train
new file mode 100755
index 00000000..95ef5e93
--- /dev/null
+++ b/demo/dna/do-dnahogwildnn-multicore-train
@@ -0,0 +1,47 @@
+#! /bin/zsh
+
+rm -f dnahogwildnn.model
+
+set -e
+
+nukeem() { \
+ trap - INT QUIT TERM
+ pkill -9 -f 'vw.*--port 26544'
+}
+
+learner() {
+ ./quaddna2vw | \
+ netcat localhost 26544 > /dev/null
+}
+
+{
+ ../../vowpalwabbit/vw -f dnahogwildnn.model \
+ --loss_function logistic --nn 1 --inpass \
+ -b 18 -l 0.015 --adaptive --invariant \
+ --daemon --num_children 4 --port 26544 2>&1 | \
+ perl -lane 'print $_ unless $c{$F[2]}++;'
+} &
+
+trap 'nukeem; exit 1' INT QUIT TERM
+
+while ! netcat -z localhost 26544
+ do
+ sleep 1
+ done
+
+paste -d' ' \
+ <(bzcat dna_train.lab.bz2) \
+ <(bzcat dna_train.dat.bz2) | \
+tail -n +1000000 | \
+./map \
+ >(learner) \
+ >(learner) \
+ >(learner) \
+ >(learner)
+
+pkill -f 'vw.*--port 26544'
+
+while test ! -s dnahogwildnn.model
+ do
+ sleep 1
+ done