1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
/*
Copyright (c) by respective owners including Yahoo!, Microsoft, and
individual contributors. All rights reserved. Released under a BSD (revised)
license as described in the file LICENSE.
*/
/*
This implements the allreduce function of MPI. Code primarily by
Alekh Agarwal and John Langford, with help Olivier Chapelle.
*/
#include <iostream>
#include <sys/timeb.h>
#include <cmath>
#include <stdint.h>
#include "accumulate.h"
#include "global_data.h"
using namespace std;
void add_float(float& c1, const float& c2) { c1 += c2; }
void accumulate(vw& all, string master_location, regressor& reg, size_t o) {
uint32_t length = 1 << all.num_bits; //This is size of gradient
size_t stride = 1 << all.reg.stride_shift;
float* local_grad = new float[length];
weight* weights = reg.weight_vector;
for(uint32_t i = 0;i < length;i++)
local_grad[i] = weights[stride*i+o];
all_reduce<float, add_float>(local_grad, length, master_location, all.unique_id, all.total, all.node, all.socks);
for(uint32_t i = 0;i < length;i++)
weights[stride*i+o] = local_grad[i];
delete[] local_grad;
}
float accumulate_scalar(vw& all, string master_location, float local_sum) {
float temp = local_sum;
all_reduce<float, add_float>(&temp, 1, master_location, all.unique_id, all.total, all.node, all.socks);
return temp;
}
void accumulate_avg(vw& all, string master_location, regressor& reg, size_t o) {
uint32_t length = 1 << all.num_bits; //This is size of gradient
size_t stride = 1 << all.reg.stride_shift;
float* local_grad = new float[length];
weight* weights = reg.weight_vector;
float numnodes = (float)all.total;
for(uint32_t i = 0;i < length;i++)
local_grad[i] = weights[stride*i+o];
all_reduce<float, add_float>(local_grad, length, master_location, all.unique_id, all.total, all.node, all.socks);
for(uint32_t i = 0;i < length;i++)
weights[stride*i+o] = local_grad[i]/numnodes;
delete[] local_grad;
}
float max_elem(float* arr, int length) {
float max = arr[0];
for(int i = 1;i < length;i++)
if(arr[i] > max) max = arr[i];
return max;
}
float min_elem(float* arr, int length) {
float min = arr[0];
for(int i = 1;i < length;i++)
if(arr[i] < min && arr[i] > 0.001) min = arr[i];
return min;
}
void accumulate_weighted_avg(vw& all, string master_location, regressor& reg) {
if(!all.adaptive) {
cerr<<"Weighted averaging is implemented only for adaptive gradient, use accumulate_avg instead\n";
return;
}
uint32_t length = 1 << all.num_bits; //This is the number of parameters
size_t stride = 1 << all.reg.stride_shift;
weight* weights = reg.weight_vector;
float* local_weights = new float[length];
for(uint32_t i = 0;i < length;i++)
local_weights[i] = weights[stride*i+1];
//First compute weights for averaging
all_reduce<float, add_float>(local_weights, length, master_location, all.unique_id, all.total, all.node, all.socks);
for(uint32_t i = 0;i < length;i++) //Compute weighted versions
if(local_weights[i] > 0) {
float ratio = weights[stride*i+1]/local_weights[i];
local_weights[i] = weights[stride*i] * ratio;
weights[stride*i] *= ratio;
weights[stride*i+1] *= ratio; //A crude max
if (all.normalized_updates)
weights[stride*i+all.normalized_idx] *= ratio; //A crude max
}
else {
local_weights[i] = 0;
weights[stride*i] = 0;
}
all_reduce<float, add_float>(weights, length*stride, master_location, all.unique_id, all.total, all.node, all.socks);
delete[] local_weights;
}
|