blob: 84bcf32c164609f4907eef56ec5e9277629b5e25 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
#include "THCUNN.h"
#include "THCHalf.h"
#include "THCHalfAutoNumerics.cuh"
#include <THC/THCApply.cuh>
template <typename T>
struct SigmoidGradInputOp {
__device__ __forceinline__ void operator()(T* gradInput, const T *output, const T *gradOutput) const {
*gradInput = *gradOutput * (1.f - *output) * (*output);
}
};
#ifdef CUDA_HALF_TENSOR
template <>
struct SigmoidGradInputOp<half> {
__device__ __forceinline__ void operator()(half* gradInput, const half *output, const half *gradOutput) const {
#ifdef CUDA_HALF_INSTRUCTIONS
half one = __float2half(1.f);
*gradInput = __hmul(*gradOutput, __hmul(__hadd(one, __hneg(*output)), *output));
#else
float out = __half2float(*output);
float go = __half2float(*gradOutput);
*gradInput = __float2half(go * (1.f - out) * out);
#endif
}
};
#endif
#include "generic/Sigmoid.cu"
#include "THCGenerateFloatTypes.h"
|