1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
/*
Copyright (c) by respective owners including Yahoo!, Microsoft, and
individual contributors. All rights reserved. Released under a BSD
license as described in the file LICENSE.
*/
#include "global_data.h"
int order_features(const void* first, const void* second)
{
return ((feature*)first)->weight_index - ((feature*)second)->weight_index;
}
int order_audit_features(const void* first, const void* second)
{
return (int)(((audit_data*)first)->weight_index) - (int)(((audit_data*)second)->weight_index);
}
void unique_features(v_array<feature>& features, int max=-1)
{
if (features.empty())
return;
feature* last = features.begin;
if (max < 0)
{
for (feature* current = features.begin+1; current != features.end; current++)
if (current->weight_index != last->weight_index)
*(++last) = *current;
}
else
for (feature* current = features.begin+1; current != features.end && last+1 < features.begin+max; current++)
if (current->weight_index != last->weight_index)
*(++last) = *current;
features.end = ++last;
}
void unique_audit_features(v_array<audit_data>& features, int max = -1)
{
if (features.empty())
return;
audit_data* last = features.begin;
if (max < 0)
{
for (audit_data* current = features.begin+1;
current != features.end; current++)
if (current->weight_index != last->weight_index)
*(++last) = *current;
}
else
for (audit_data* current = features.begin+1;
current != features.end && last+1 < features.begin+max; current++)
if (current->weight_index != last->weight_index)
*(++last) = *current;
features.end = ++last;
}
void unique_sort_features(bool audit, uint32_t parse_mask, example* ae)
{
for (unsigned char* b = ae->indices.begin; b != ae->indices.end; b++)
{
v_array<feature> features = ae->atomics[*b];
for (size_t i = 0; i < features.size(); i++)
features[i].weight_index &= parse_mask;
qsort(features.begin, features.size(), sizeof(feature),
order_features);
unique_features(ae->atomics[*b]);
if (audit)
{
v_array<audit_data> afeatures = ae->audit_features[*b];
for (size_t i = 0; i < ae->atomics[*b].size(); i++)
afeatures[i].weight_index &= parse_mask;
qsort(afeatures.begin, afeatures.size(), sizeof(audit_data),
order_audit_features);
unique_audit_features(afeatures);
}
}
ae->sorted=true;
}
|