demo/movielens/Makefile


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156

SHELL=/bin/zsh
VW=../../vowpalwabbit/vw

.SECONDARY:

all:
	@cat README.md

shootout: $(foreach what,linear lrq lrqdropout lrqdropouthogwild,$(what).print)

clean:
	rm -f $(wildcard *results*) $(wildcard *.vw) $(wildcard *.model.txt) $(wildcard *.pdf)

ml-%.zip:
	@echo "downloading movielens $*"
	@wget http://files.grouplens.org/datasets/movielens/ml-$*.zip

ml-%/ratings.dat: ml-%.zip
	@rm -rf ml-$*
	@unzip -qq $<
	@(test -d ml-10M100K && mv -f ml-10M100K ml-10m) || true
	@rm -rf __MACOSX
	@touch ml-$*/*

ml-%.ratings.train.vw: ml-%/ratings.dat
	@echo -n "preprocessing movielens $* ..."
	@./ratings2vw ml-$*.ratings.pre.train.vw ml-$*.ratings.test.vw $<
	@perl -ne 'BEGIN { srand 8675309; }; 		\
	           1; print join "\t", rand (), $$_;' 	\
	      ml-$*.ratings.pre.train.vw | sort -k1 |	\
	      cut -f2- > ml-$*.ratings.train.vw
	@rm -f ml-$*.ratings.pre.train.vw
	@echo " complete"

%.test.vw: %.train.vw
	@true

%.print: %.results
	@printf "%s test MAE is %3.3f\n" $* $$(cat $*.results)

#---------------------------------------------------------------------
#               linear model (no interaction terms)               
#---------------------------------------------------------------------

linear.results: ml-1m.ratings.test.vw ml-1m.ratings.train.vw
	@echo "****************************************************"
	@echo "*   training linear model (no interaction terms)   *"
	@echo "****************************************************"
	@echo
	@${VW} --loss_function quantile -l 1 -b 24 --passes 100 	\
	  -k --cache_file $@.cache -d $(word 2,$+) --holdout_off	\
	  --adaptive --invariant -f $@.model
	@echo "****************************************************"
	@echo "*   testing linear model (no interaction terms)    *"
	@echo "****************************************************"
	@echo
	@${VW} --loss_function quantile -t -i $@.model 			\
	  -d $(word 1,$+) -p						\
	  >(perl -lane '$$s+=abs(($$F[0]-$$F[1])); } { 			\
			1; print $$s/$$.;' > $@)
	@echo "****************************************************"
	@echo "*   saving human readable model (--invert_hash)    *"
	@echo "****************************************************"
	@echo
	@${VW} -i $@.model -t --invert_hash $@.model.txt -d $(word 2,$+)
	@rm -f $@.cache $@.model

#---------------------------------------------------------------------
#               low-rank interaction model (without dropout)
#---------------------------------------------------------------------

lrq.results: ml-1m.ratings.test.vw ml-1m.ratings.train.vw
	@echo "*********************************************************"
	@echo "* training low-rank interaction model (without dropout) *"
	@echo "*                                                       *"
	@echo "* vw --lrq um7 ...                                      *"
	@echo "*********************************************************"
	@echo
	@${VW} --loss_function quantile -l 0.1 -b 24 --passes 100 	\
	  -k --cache_file $@.cache -d $(word 2,$+) --holdout_off	\
	  --power_t 0.333 --l2 1.25e-7 --lrq um7 --adaptive --invariant -f $@.model
	@echo "********************************************************"
	@echo "* testing low-rank interaction model (without dropout) *"
	@echo "********************************************************"
	@echo
	@${VW} --loss_function quantile -t -i $@.model 			\
	  -d $(word 1,$+) -p						\
	  >(perl -lane '$$s+=abs(($$F[0]-$$F[1])); } { 			\
			1; print $$s/$$.;' > $@)
	@echo "****************************************************"
	@echo "*   saving human readable model (--invert_hash)    *"
	@echo "****************************************************"
	@echo
	@${VW} -i $@.model -t --invert_hash $@.model.txt -d $(word 2,$+)
	@rm -f $@.cache $@.model

#---------------------------------------------------------------------
#               low-rank interaction model (with dropout)
#---------------------------------------------------------------------

lrqdropout.results: ml-1m.ratings.test.vw ml-1m.ratings.train.vw
	@echo "******************************************************"
	@echo "* training low-rank interaction model (with dropout) *"
	@echo "*                                                    *"
	@echo "* vw --lrq um14 --lrqdropout ...                     *"
	@echo "******************************************************"
	@echo
	@${VW} --loss_function quantile -l 0.45 -b 24 --passes 100 	\
	  -k --cache_file $@.cache -d $(word 2,$+) --holdout_off	\
	  --lrq um14 --lrqdropout --adaptive --invariant -f $@.model
	@echo "*****************************************************"
	@echo "* testing low-rank interaction model (with dropout) *"
	@echo "*****************************************************"
	@echo
	@${VW} --loss_function quantile -t -i $@.model 			\
	  -d $(word 1,$+) -p						\
	  >(perl -lane '$$s+=abs(($$F[0]-$$F[1])); } { 			\
			1; print $$s/$$.;' > $@)
	@echo "****************************************************"
	@echo "*   saving human readable model (--invert_hash)    *"
	@echo "****************************************************"
	@echo
	@${VW} -i $@.model -t --invert_hash $@.model.txt -d $(word 2,$+)
	@rm -f $@.cache $@.model

movie_dendrogram.pdf: lrqdropout.results
	@echo "*******************************************************"
	@echo "* Generating movie clustering based on latent factors *"
	@echo "*         (Requires an installation of R)             *"
	@echo "*******************************************************"
	@grep ^lrq^m $<.model.txt | tr '^' ':' > $<.model.csv
	@Rscript visualize_factors.R
	@rm -f $@.model.csv

#---------------------------------------------------------------------
#               low-rank interaction model (with dropout)
#                  (HOGWILD training mode)
#---------------------------------------------------------------------

lrqdropouthogwild.results: ml-1m.ratings.test.vw ml-1m.ratings.train.vw do-lrq-hogwild
	@echo "******************************************************"
	@echo "* training low-rank interaction model (with dropout) *"
	@echo "*      (HOGWILD training mode)                       *"
	@echo "*                                                    *"
	@echo "* vw --lrq um14 --lrqdropout ...                     *"
	@echo "******************************************************"
	@echo
	@./do-lrq-hogwild $@.model
	@echo "*****************************************************"
	@echo "* testing low-rank interaction model (with dropout) *"
	@echo "*****************************************************"
	@echo
	@${VW} --loss_function quantile -t -i $@.model 			\
	  -d $(word 1,$+) -p						\
	  >(perl -lane '$$s+=abs(($$F[0]-$$F[1])); } { 			\
			1; print $$s/$$.;' > $@)