blob: 6781f33e8bb5dc1bb36aedcb46352529e8c8026b (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
|
#include "THCUNN.h"
#include "THCHalf.h"
#include "THCHalfAutoNumerics.cuh"
#include <THC/THCApply.cuh>
template <typename T>
struct tanh_updateGradInput_functor
{
__device__ __forceinline__ void operator()(T *gradInput,
const T *output, const T *gradOutput) const {
*gradInput = *gradOutput * (1.f - *output * *output);
}
};
#ifdef CUDA_HALF_TENSOR
template <>
struct tanh_updateGradInput_functor<half>
{
__device__ __forceinline__ void operator()(half *gradInput,
const half *output, const half *gradOutput) const {
#ifdef CUDA_HALF_INSTRUCTIONS
const half one = __float2half(1.f);
const half out_square = __hmul(*output, *output);
*gradInput = __hmul(*gradOutput, __hadd(one, __hneg(out_square)));
#else
const float out = __half2float(*output);
const float go = __half2float(*gradOutput);
*gradInput = __float2half(go * (1.f - out * out));
#endif
}
};
#endif
#include "generic/Tanh.cu"
#include "THCGenerateFloatTypes.h"
|