Welcome to mirror list, hosted at ThFree Co, Russian Federation.

unique_sort.cc « vowpalwabbit - github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: c682cf632cd568f9441e5f8090a9b31477e9a3e6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
/*
Copyright (c) by respective owners including Yahoo!, Microsoft, and
individual contributors. All rights reserved.  Released under a BSD
license as described in the file LICENSE.
 */
#include "global_data.h"

int order_features(const void* first, const void* second)
{
  return ((feature*)first)->weight_index - ((feature*)second)->weight_index;
}

int order_audit_features(const void* first, const void* second)
{
  return (int)(((audit_data*)first)->weight_index) - (int)(((audit_data*)second)->weight_index);
}

void unique_features(v_array<feature>& features, int max=-1)
{
  if (features.empty())
    return;
  feature* last = features.begin;
  if (max < 0)
    {
      for (feature* current = features.begin+1; current != features.end; current++)
	if (current->weight_index != last->weight_index) 
	  *(++last) = *current;
    }
  else
    for (feature* current = features.begin+1; current != features.end && last+1 < features.begin+max; current++)
      if (current->weight_index != last->weight_index) 
	*(++last) = *current;
  
  features.end = ++last;
}

void unique_audit_features(v_array<audit_data>& features, int max = -1)
{
  if (features.empty())
    return;
  audit_data* last = features.begin;
  if (max < 0)
    {
      for (audit_data* current = features.begin+1; 
	   current != features.end; current++)
	if (current->weight_index != last->weight_index) 
	  *(++last) = *current;
    }
  else
    for (audit_data* current = features.begin+1; 
	 current != features.end && last+1 < features.begin+max; current++)
      if (current->weight_index != last->weight_index) 
	*(++last) = *current;
    
  features.end = ++last;
}

void unique_sort_features(bool audit, uint32_t parse_mask, example* ae)
{
  for (unsigned char* b = ae->indices.begin; b != ae->indices.end; b++)
    {
      v_array<feature> features = ae->atomics[*b];
      
      for (size_t i = 0; i < features.size(); i++)
	features[i].weight_index &= parse_mask;
      qsort(features.begin, features.size(), sizeof(feature), 
	    order_features);
      unique_features(ae->atomics[*b]);
      
      if (audit)
	{
	  v_array<audit_data> afeatures = ae->audit_features[*b];

	  for (size_t i = 0; i < ae->atomics[*b].size(); i++)
	    afeatures[i].weight_index &= parse_mask;
	  
	  qsort(afeatures.begin, afeatures.size(), sizeof(audit_data), 
		order_audit_features);
	  unique_audit_features(afeatures);
	}
    }
  ae->sorted=true;
}