diff options
Diffstat (limited to 'lib/THCUNN/FusedRNNKernel.cu')
-rw-r--r-- | lib/THCUNN/FusedRNNKernel.cu | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/lib/THCUNN/FusedRNNKernel.cu b/lib/THCUNN/FusedRNNKernel.cu new file mode 100644 index 0000000..6a65d3e --- /dev/null +++ b/lib/THCUNN/FusedRNNKernel.cu @@ -0,0 +1,46 @@ +#include "THCUNN.h" +#include "THCHalf.h" +#include "THCHalfAutoNumerics.cuh" +#include "THCNumerics.cuh" +#include <THC/THCApply.cuh> + +template <typename T> +struct TensorSigmoidOp { + __device__ __forceinline__ void operator()(T* out, T* in) const { + T one = (T) 1.0; + *out = one / (one + THCNumerics<T>::exp(- *in)); + } + + __device__ __forceinline__ void operator()(T* v) const { + T one = (T) 1.0; + *v = one / (one + THCNumerics<T>::exp(- *v)); + } +}; + +#ifdef CUDA_HALF_TENSOR +template <> +struct TensorSigmoidOp<half> { + __device__ __forceinline__ void operator()(half* out, half* in) const { +#ifdef CUDA_HALF_INSTRUCTIONS + half one = ScalarConvert<int, half>::to(1); + *out = hdiv(one, __hadd(one, hexp(__hneg(*in)))); +#else + float fin = __half2float(*in); + *out = __float2half(1.0f / (1.0f + expf(- fin))); +#endif + } + + __device__ __forceinline__ void operator()(half* v) const { +#ifdef CUDA_HALF_INSTRUCTIONS + half one = ScalarConvert<int, half>::to(1); + *v = hdiv(one, __hadd(one, hexp(__hneg(*v)))); +#else + float fv = __half2float(*v); + *v = __float2half(1.0f / (1.0f + expf(- fv))); +#endif + } +}; +#endif + +#include "generic/FusedRNNKernel.cu" +#include "THCGenerateFloatTypes.h" |