test/train-sets/ref/rcv1_small.stderr


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

using l2 regularization = 1
enabling BFGS based optimization **without** curvature calculation
Num weight bits = 20
learning rate = 0.5
initial_t = 0
power_t = 0.5
decay_learning_rate = 1
m = 7
Allocated 72M for weights and mem
## avg. loss 	der. mag. 	d. m. cond.	 wolfe1    	wolfe2    	mix fraction	curvature 	dir. magnitude	step size 
creating cache_file = train-sets/rcv1_small.dat.cache
Reading datafile = train-sets/rcv1_small.dat
num sources = 1
 1 0.69315   	0.00266   	0.39836   	          	          	          	0.48297   	156.06891 	0.82481   
 3 0.52081   	0.00612   	0.13742   	 0.524507  	0.091092  	          	          	42.96110  	1.00000   
 4 0.48999   	0.00251   	0.04577   	 0.286251  	-0.382286 	          	          	2.38434   	1.00000   
 5 0.47993   	0.00007   	0.00448   	 0.617627  	0.225808  	          	          	0.71197   	1.00000   
 6 0.47794   	0.00001   	0.00174   	 0.691628  	0.377800  	          	          	0.93214   	1.00000   
 7 0.47685   	0.00001   	0.00041   	 0.606087  	0.209707  	          	          	0.18141   	1.00000   
 8 0.47668   	0.00000   	0.00001   	 0.538842  	0.077584  	          	          	0.00292   	1.00000   

finished run
number of examples = 8000
weighted example sum = 8000
weighted label sum = -656
average loss = 0.461878
best constant = -0.164369
best constant's loss = 0.689781
total feature number = 629912