blob: 85bda93d48deae1c1ed979e37e87be76d1ac68c8 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
#include "THCUNN.h"
#include "THCHalf.h"
#include "THCHalfAutoNumerics.cuh"
#include <THC/THCApply.cuh>
template <typename T>
struct sigmoid_updateGradInput_functor {
__device__ __forceinline__ void operator()(T* gradInput, const T *output, const T *gradOutput) const {
*gradInput = *gradOutput * (1.f - *output) * (*output);
}
};
#ifdef CUDA_HALF_TENSOR
template <>
struct sigmoid_updateGradInput_functor<half> {
__device__ __forceinline__ void operator()(half* gradInput, const half *output, const half *gradOutput) const {
#ifdef CUDA_HALF_INSTRUCTIONS
const half one = __float2half(1.f);
*gradInput = __hmul(*gradOutput, __hmul(__hadd(one, __hneg(*output)), *output));
#else
const float out = __half2float(*output);
const float go = __half2float(*gradOutput);
*gradInput = __float2half(go * (1.f - out) * out);
#endif
}
};
#endif
#include "generic/Sigmoid.cu"
#include "THCGenerateFloatTypes.h"
|