Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
decay_learning_rate = 1
creating cache_file = train-sets/big-constant.dat.cache
Reading datafile = train-sets/big-constant.dat
num sources = 1
average    since         example     example  current  current  current
loss       last          counter      weight    label  predict features
0.102639   0.102639            1         1.0 1000.3204 1000.0000       23
0.280279   0.457919            2         2.0 999.5383 1000.2150       23
3.338668   6.397057            4         4.0 1001.4026 998.4963       23
3.873663   4.408658            8         8.0 1000.9437 999.5264       23
3.304812   2.735960           16        16.0 1003.0878 1000.4187       23
2.513803   1.722794           32        32.0 1000.0400 999.6024       23
1.959958   1.406113           64        64.0 1001.1794 1000.0224       23
1.739407   1.518856          128       128.0 998.8786 999.7367       23
1.316679   0.893950          256       256.0 1000.6227 1000.4100       23
0.923732   0.530785          512       512.0 1001.3120 1000.6509       23
0.579187   0.234643         1024      1024.0 1001.0090 1000.9327       23
0.327981   0.076775         2048      2048.0 999.2150 999.4156       23
0.172062   0.016143         4096      4096.0 999.5161 999.5190       23
0.086926   0.001790         8192      8192.0 999.5844 999.5410       23

finished run
number of examples per pass = 100
passes used = 100
weighted example sum = 10000
weighted label sum = 9.99896e+06
average loss = 0.0712402
best constant = 999.896
total feature number = 230000