Num weight bits = 18
learning rate = 10
initial_t = 10
power_t = 0.5
using no cache
Reading datafile = train-sets/rcv1_small.dat
num sources = 1
average    since         example     example  current  current  current
loss       last          counter      weight    label  predict features
1.000000   1.000000            1       490.2   1.0000   0.0000       50
0.923209   0.617612           83       613.4   1.0000   0.0494       46
0.843993   0.528494          169       767.4  unknown  -0.0771       87
0.772938   0.491751          261       961.3   1.0000   0.1576       13
0.688758   0.352542          430      1201.9   1.0000  -0.0663       18
0.614280   0.322631          569      1508.9   1.0000   0.3900       29
0.540212   0.262029          796      1910.6  -1.0000  -0.5583       46
0.469826   0.188841         1190      2389.2  unknown   0.3206       44
0.430273   0.272207         1491      2987.1  -1.0000   0.1254       70
0.376527   0.161817         1901      3734.8  unknown  -0.4625       48
0.370952   0.348673         2127      4669.3  unknown  -0.4482      179
0.359658   0.314489         2813      5836.9  unknown  -0.2033       35
0.328361   0.203584         3462      7300.9  -1.0000  -1.0000      151
0.296159   0.170245         4340      9168.1  -1.0000  -0.5148       70
0.279052   0.210640         5887     11460.5  unknown   0.5904       40
0.255498   0.163368         6996     14390.5   1.0000   0.5009       49
0.240619   0.181113         9174     17988.9  unknown  -0.7411       57

finished run
number of examples per pass = 10000
passes used = 1
weighted example sum = 19090.5
weighted label sum = -1355
average loss = 0.240509
best constant = -0.136917
total feature number = 779394
total queries = 889