From 0d85922d116879448485ef88ae21e83a9255a0b0 Mon Sep 17 00:00:00 2001
From: Soumith Chintala <soumith@gmail.com>
Date: Wed, 25 Jan 2017 16:13:20 -0500
Subject: Revert "Convert real to accreal in libTHCUNN"

---
 THCUNN.lua                                         |  40 +++++---
 lib/THCUNN/SparseLinear.cu                         |  10 +-
 lib/THCUNN/generic/BatchNormalization.cu           |   2 +-
 lib/THCUNN/generic/ELU.cu                          |   6 +-
 lib/THCUNN/generic/HardTanh.cu                     |  14 +--
 lib/THCUNN/generic/LeakyReLU.cu                    |   8 +-
 lib/THCUNN/generic/LookupTable.cu                  |   9 +-
 lib/THCUNN/generic/MarginCriterion.cu              |   7 +-
 lib/THCUNN/generic/MultiMarginCriterion.cu         |   6 +-
 lib/THCUNN/generic/PReLU.cu                        |   3 +-
 lib/THCUNN/generic/SoftPlus.cu                     |  12 +--
 lib/THCUNN/generic/SoftShrink.cu                   |   6 +-
 lib/THCUNN/generic/SparseLinear.cu                 |  10 +-
 lib/THCUNN/generic/SpatialConvolutionLocal.cu      |   3 +-
 lib/THCUNN/generic/SpatialConvolutionMM.cu         |   3 +-
 lib/THCUNN/generic/SpatialCrossMapLRN.cu           |  24 ++---
 lib/THCUNN/generic/SpatialDilatedConvolution.cu    |   3 +-
 lib/THCUNN/generic/SpatialFullConvolution.cu       |   3 +-
 lib/THCUNN/generic/SpatialSubSampling.cu           |   2 +-
 lib/THCUNN/generic/Sqrt.cu                         |   3 +-
 lib/THCUNN/generic/THCUNN.h                        |  96 +++++++++---------
 lib/THCUNN/generic/TemporalConvolution.cu          |   3 +-
 lib/THCUNN/generic/Threshold.cu                    |  12 +--
 lib/THCUNN/generic/VolumetricConvolution.cu        |   3 +-
 lib/THCUNN/generic/VolumetricDilatedConvolution.cu |   3 +-
 lib/THCUNN/generic/VolumetricFullConvolution.cu    |   3 +-
 test.lua                                           | 111 +++++++++++----------
 27 files changed, 188 insertions(+), 217 deletions(-)

diff --git a/THCUNN.lua b/THCUNN.lua
index d5bf1c2..6776a23 100644
--- a/THCUNN.lua
+++ b/THCUNN.lua
@@ -45,7 +45,7 @@ local replacements_generic =
     ['THCTensor'] = 'THCudaTensor',
     ['THCIndexTensor'] = 'THCudaLongTensor',
     ['TYPE'] = 'Cuda',
-    ['real'] = 'float',
+    ['real'] = 'float'
   },
   {
     ['THCTensor'] = 'THCudaDoubleTensor',
@@ -55,13 +55,6 @@ local replacements_generic =
    }
 }
 
--- gsub(s, 'real', 'float') changes accreal to accfloat.
--- typedef accfloat ahead of time.
-ffi.cdef("typedef float accfloat;")
--- gsub(s, 'real', 'double') changes accreal to accfloat.
--- typedef accdouble ahead of time
-ffi.cdef("typedef double accdouble;")
-
 if cutorch.hasHalf then
   ffi.cdef("half THC_float2half(float a);")
   ffi.cdef("float THC_half2float(half a);")
@@ -70,12 +63,9 @@ if cutorch.hasHalf then
     ['THCTensor'] = 'THCudaHalfTensor',
     ['THCIndexTensor'] = 'THCudaLongTensor',
     ['TYPE'] = 'CudaHalf',
-    ['real'] = 'half',
+    ['real'] = 'half'
   }
   table.insert(replacements_generic, half_replacement)
-  -- gsub(s, 'real', 'double') changes accreal to accfloat.
-  -- typedef acchalf ahead of time
-  ffi.cdef("typedef float acchalf;")
 end
 
 for i=1,#replacements_generic do
@@ -143,9 +133,29 @@ THNN.kernels['torch.CudaDoubleTensor'] = THNN.bind(THCUNN.C, function_names_gene
 torch.getmetatable('torch.CudaDoubleTensor').THNN = THNN.kernels['torch.CudaDoubleTensor']
 
 if cutorch.hasHalf then
-   local raw_half_functions = THNN.bind(THCUNN.C, function_names_generic, 'CudaHalf', THCUNN.getState)
-   THNN.kernels['torch.CudaHalfTensor'] = raw_half_functions
-   torch.getmetatable('torch.CudaHalfTensor').THNN = THNN.kernels['torch.CudaHalfTensor']
+-- in order to call 'half' functions from lua, convert real arguments from
+-- to half since there is no other defined conversion
+local transform_reals_to_half = function(func_name, real_args, ...)
+    t = {}
+    -- this select logic is necessary to deal with nil arguments
+    for i = 1, select('#', ...) do
+        t[i] = select(i, ...)
+    end
+    for k,v in ipairs(real_args[func_name]) do
+        -- first argument (THCState) is added implicitly by bind
+        t[v-1] = THC.THC_float2half(t[v-1])
+    end
+    return t
+end
+
+local raw_half_functions = THNN.bind(THCUNN.C, function_names_generic, 'CudaHalf', THCUNN.getState)
+for k,v in pairs(raw_half_functions) do
+    -- select required in case there are trailing nils
+    raw_half_functions[k] = function(...) v(unpack(transform_reals_to_half(k, real_args, ...), 1, select("#",...)))
+end
+end
+THNN.kernels['torch.CudaHalfTensor'] = raw_half_functions
+torch.getmetatable('torch.CudaHalfTensor').THNN = THNN.kernels['torch.CudaHalfTensor']
 end
 
 local function Module__converter(type)
diff --git a/lib/THCUNN/SparseLinear.cu b/lib/THCUNN/SparseLinear.cu
index f36206f..a7ffa1e 100644
--- a/lib/THCUNN/SparseLinear.cu
+++ b/lib/THCUNN/SparseLinear.cu
@@ -34,8 +34,8 @@ void THNN_CudaHalfSparseLinear_accGradParameters(
           THCudaHalfTensor *gradBias,
           THCudaHalfTensor *weight,
           THCudaHalfTensor *bias,
-          float weightDecay,
-          float scale) {
+          double weightDecay,
+          double scale) {
   THError("THCudaHalfTensor not supported with SparseLinear");
 }
 
@@ -56,8 +56,8 @@ void THNN_CudaHalfSparseLinear_legacyAccGradParameters(
           THCudaHalfTensor *gradBias,
           THCudaHalfTensor *weight,
           THCudaHalfTensor *bias,
-          float weightDecay,
-          float scale) {
+          double weightDecay,
+          double scale) {
   THError("THCudaHalfTensor not supported with SparseLinear");
 }
 
@@ -76,7 +76,7 @@ void THNN_CudaHalfSparseLinear_updateParameters(
           THCudaHalfTensor *gradWeight,
           THCudaHalfTensor *gradBias,
           THCudaHalfTensor *lastInput,
-          float learningRate) {
+          double learningRate) {
   THError("THCudaHalfTensor not supported with SparseLinear");
 }
 #endif
diff --git a/lib/THCUNN/generic/BatchNormalization.cu b/lib/THCUNN/generic/BatchNormalization.cu
index d42f18e..cbe99f3 100644
--- a/lib/THCUNN/generic/BatchNormalization.cu
+++ b/lib/THCUNN/generic/BatchNormalization.cu
@@ -69,7 +69,7 @@ void THNN_(BatchNormalization_backward)(
   THCState *state, THCTensor *input_, THCTensor *gradOutput_,
   THCTensor *gradInput_, THCTensor *gradWeight_, THCTensor *gradBias_,
   THCTensor *weight_, THCTensor *runningMean_, THCTensor *runningVar_,
-  THCTensor *saveMean_, THCTensor *saveStd_, bool train, double scale, double eps) {
+  THCTensor *saveMean_, THCTensor *saveStd_, bool train, float scale, double eps) {
 
   THCUNN_check_shape(state, input_, gradOutput_);
   DeviceTensor3 input = devicetensor<3>(state, input_);
diff --git a/lib/THCUNN/generic/ELU.cu b/lib/THCUNN/generic/ELU.cu
index 4b8da27..0beb5a1 100644
--- a/lib/THCUNN/generic/ELU.cu
+++ b/lib/THCUNN/generic/ELU.cu
@@ -9,10 +9,9 @@ void THNN_(ELU_updateOutput)(
            THCState *state,
            THCTensor *input,
            THCTensor *output,
-           accreal alpha_,
+           real alpha,
            bool inplace)
 {
-  real alpha = ScalarConvert<accreal, real>::to(alpha_);
   THCUNN_assertSameGPU(state, 2, input, output);
 
   if (inplace)
@@ -34,10 +33,9 @@ void THNN_(ELU_updateGradInput)(
            THCTensor *gradOutput,
            THCTensor *gradInput,
            THCTensor *output,
-           accreal alpha_,
+           real alpha,
            bool inplace)
 {
-  real alpha = ScalarConvert<accreal, real>::to(alpha_);
   THCUNN_check_nElement(state, input, gradOutput);
   THCUNN_assertSameGPU(state, 3, output, gradOutput, gradInput);
 
diff --git a/lib/THCUNN/generic/HardTanh.cu b/lib/THCUNN/generic/HardTanh.cu
index 47835f0..0651431 100644
--- a/lib/THCUNN/generic/HardTanh.cu
+++ b/lib/THCUNN/generic/HardTanh.cu
@@ -8,13 +8,10 @@ void THNN_(HardTanh_updateOutput)(
            THCState *state,
            THCTensor *input,
            THCTensor *output,
-           accreal min_val_,
-           accreal max_val_,
+           real min_val,
+           real max_val,
            bool inplace)
 {
-  real min_val = ScalarConvert<accreal, real>::to(min_val_);
-  real max_val = ScalarConvert<accreal, real>::to(max_val_);
-
   THCUNN_assertSameGPU(state, 2, input, output);
   if(inplace)
   {
@@ -34,13 +31,10 @@ void THNN_(HardTanh_updateGradInput)(
            THCTensor *input,
            THCTensor *gradOutput,
            THCTensor *gradInput,
-           accreal min_val_,
-           accreal max_val_,
+           real min_val,
+           real max_val,
            bool inplace)
 {
-  real min_val = ScalarConvert<accreal, real>::to(min_val_);
-  real max_val = ScalarConvert<accreal, real>::to(max_val_);
-
   THCUNN_check_nElement(state, input, gradOutput);
   THCUNN_assertSameGPU(state, 3, input, gradOutput, gradInput);
 
diff --git a/lib/THCUNN/generic/LeakyReLU.cu b/lib/THCUNN/generic/LeakyReLU.cu
index 179819d..23cf59a 100644
--- a/lib/THCUNN/generic/LeakyReLU.cu
+++ b/lib/THCUNN/generic/LeakyReLU.cu
@@ -8,11 +8,9 @@ void THNN_(LeakyReLU_updateOutput)(
            THCState *state,
            THCTensor *input,
            THCTensor *output,
-           accreal negval_,
+           real negval,
            bool inplace)
 {
-  real negval = ScalarConvert<accreal, real>::to(negval_);
-
   THCUNN_assertSameGPU(state, 2, input, output);
 
   if (inplace)
@@ -34,11 +32,9 @@ void THNN_(LeakyReLU_updateGradInput)(
            THCTensor *input,
            THCTensor *gradOutput,
            THCTensor *gradInput,
-           accreal negval_,
+           real negval,
            bool inplace)
 {
-  real negval = ScalarConvert<accreal, real>::to(negval_);
-
   THCUNN_check_nElement(state, input, gradOutput);
   THCUNN_assertSameGPU(state, 3, input, gradInput, gradOutput);
 
diff --git a/lib/THCUNN/generic/LookupTable.cu b/lib/THCUNN/generic/LookupTable.cu
index fa7c5ac..bd59a04 100644
--- a/lib/THCUNN/generic/LookupTable.cu
+++ b/lib/THCUNN/generic/LookupTable.cu
@@ -12,9 +12,8 @@ void THNN_(LookupTable_accGradParameters)(
            THCIndexTensor *indices,
            bool scaleGradByFreq,
            int paddingValue,
-           accreal scale_)
+           real scale)
 {
-  real scale = ScalarConvert<accreal, real>::to(scale_);
   THCUNN_assertSameGPU(state, 5, input, gradOutput, gradWeight, sorted, indices);
   gradOutput = THCTensor_(newContiguous)(state, gradOutput);
   if (!(THCIndexTensor_(isContiguous)(state, input) &&
@@ -120,11 +119,9 @@ void THNN_(LookupTable_renorm)(
            THCState *state,
            THCIndexTensor *idx,
            THCTensor *weight,
-           accreal maxNorm_,
-           accreal normType_)
+           real maxNorm,
+           real normType)
 {
-  real maxNorm = ScalarConvert<accreal, real>::to(maxNorm_);
-  real normType = ScalarConvert<accreal, real>::to(normType_);
   THCUNN_assertSameGPU(state, 2, idx, weight);
   if (!(THCIndexTensor_(isContiguous)(state, idx) &&
         THCTensor_(isContiguous)(state, weight)))
diff --git a/lib/THCUNN/generic/MarginCriterion.cu b/lib/THCUNN/generic/MarginCriterion.cu
index 221f9d9..d5678ec 100644
--- a/lib/THCUNN/generic/MarginCriterion.cu
+++ b/lib/THCUNN/generic/MarginCriterion.cu
@@ -8,9 +8,8 @@ void THNN_(MarginCriterion_updateOutput)(
            THCTensor *target,
            THCTensor *output,
            bool sizeAverage,
-           accreal margin_)
+           real margin)
 {
-  real margin = ScalarConvert<accreal, real>::to(margin_);
   THCUNN_check_nElement(state, input, target);
   THCUNN_check_dim_size(state, output, 1, 0, 1);
   THCUNN_assertSameGPU(state, 2, input, target);
@@ -41,10 +40,8 @@ void THNN_(MarginCriterion_updateGradInput)(
            THCTensor *target,
            THCTensor *gradInput,
            bool sizeAverage,
-           accreal margin_)
+           real margin)
 {
-  real margin = ScalarConvert<accreal, real>::to(margin_);
-
   THCUNN_check_nElement(state, input, target);
   THCUNN_assertSameGPU(state, 3, input, target, gradInput);
 
diff --git a/lib/THCUNN/generic/MultiMarginCriterion.cu b/lib/THCUNN/generic/MultiMarginCriterion.cu
index c3ff2d6..8026331 100644
--- a/lib/THCUNN/generic/MultiMarginCriterion.cu
+++ b/lib/THCUNN/generic/MultiMarginCriterion.cu
@@ -11,9 +11,8 @@ void THNN_(MultiMarginCriterion_updateOutput)(
            bool sizeAverage,
            int p,
            THCTensor *weights,
-           accreal margin_)
+           real margin)
 {
-  real margin = ScalarConvert<accreal, real>::to(margin_);
   THCUNN_assertSameGPU(state, 2, input, target);
   input = THCTensor_(newContiguous)(state, input);
   if(weights)
@@ -103,9 +102,8 @@ void THNN_(MultiMarginCriterion_updateGradInput)(
            bool sizeAverage,
            int p,
            THCTensor *weights,
-           accreal margin_)
+           real margin)
 {
-  real margin = ScalarConvert<accreal, real>::to(margin_);
   THCUNN_assertSameGPU(state, 3, input, gradInput, target);
   input = THCTensor_(newContiguous)(state, input);
   THCTensor_(resizeAs)(state, gradInput, input);
diff --git a/lib/THCUNN/generic/PReLU.cu b/lib/THCUNN/generic/PReLU.cu
index db9b0d2..89087fb 100644
--- a/lib/THCUNN/generic/PReLU.cu
+++ b/lib/THCUNN/generic/PReLU.cu
@@ -92,9 +92,8 @@ void THNN_(PReLU_accGradParameters)(
            THCTensor *gradWeightBuf,
            THCTensor *gradWeightBuf2,
            long nOutputPlane,
-           accreal scale_)
+           real scale)
 {
-  real scale = ScalarConvert<accreal, real>::to(scale_);
   THCUNN_check_nElement(state, input, gradOutput);
   // use grad input for temporary storage, then call updateGradInput again
 
diff --git a/lib/THCUNN/generic/SoftPlus.cu b/lib/THCUNN/generic/SoftPlus.cu
index 17cde70..e72038e 100644
--- a/lib/THCUNN/generic/SoftPlus.cu
+++ b/lib/THCUNN/generic/SoftPlus.cu
@@ -8,11 +8,9 @@ void THNN_(SoftPlus_updateOutput)(
            THCState *state,
            THCTensor *input,
            THCTensor *output,
-           accreal beta_,
-           accreal threshold_)
+           real beta,
+           real threshold)
 {
-  real beta = ScalarConvert<accreal, real>::to(beta_);
-  real threshold = ScalarConvert<accreal, real>::to(threshold_);
   THCUNN_assertSameGPU(state, 2, input, output);
   THCTensor_(resizeAs)(state, output, input);
   THC_pointwiseApply2(state, output, input, softPlusupdateOutput_functor<real>(threshold, beta));
@@ -24,11 +22,9 @@ void THNN_(SoftPlus_updateGradInput)(
            THCTensor *gradOutput,
            THCTensor *gradInput,
            THCTensor *output,
-           accreal beta_,
-           accreal threshold_)
+           real beta,
+           real threshold)
 {
-  real beta = ScalarConvert<accreal, real>::to(beta_);
-  real threshold = ScalarConvert<accreal, real>::to(threshold_);
   THCUNN_check_nElement(state, input, gradOutput);
   THCUNN_assertSameGPU(state, 4, input, output, gradOutput, gradInput);
   THCTensor_(resizeAs)(state, gradInput, output);
diff --git a/lib/THCUNN/generic/SoftShrink.cu b/lib/THCUNN/generic/SoftShrink.cu
index 9e47695..261593f 100644
--- a/lib/THCUNN/generic/SoftShrink.cu
+++ b/lib/THCUNN/generic/SoftShrink.cu
@@ -8,9 +8,8 @@ void THNN_(SoftShrink_updateOutput)(
            THCState *state,
            THCTensor *input,
            THCTensor *output,
-           accreal lambda_)
+           real lambda)
 {
-  real lambda = ScalarConvert<accreal, real>::to(lambda_);
   THCUNN_assertSameGPU(state, 2, input, output);
   THCTensor_(resizeAs)(state, output, input);
   THC_pointwiseApply2(state, output, input, SoftShrinkUpdateOutput<real>(lambda));
@@ -22,9 +21,8 @@ void THNN_(SoftShrink_updateGradInput)(
            THCTensor *input,
            THCTensor *gradOutput,
            THCTensor *gradInput,
-           accreal lambda_)
+           real lambda)
 {
-  real lambda = ScalarConvert<accreal, real>::to(lambda_);
   THCUNN_check_nElement(state, input, gradOutput);
   THCUNN_assertSameGPU(state, 3, input, gradOutput, gradInput);
   THCTensor_(resizeAs)(state, gradInput, input);
diff --git a/lib/THCUNN/generic/SparseLinear.cu b/lib/THCUNN/generic/SparseLinear.cu
index 6838cac..f22b233 100644
--- a/lib/THCUNN/generic/SparseLinear.cu
+++ b/lib/THCUNN/generic/SparseLinear.cu
@@ -127,8 +127,8 @@ void THNN_(SparseLinear_accGradParameters)(
            THCTensor *gradBias,
            THCTensor *weight,
            THCTensor *bias,
-           accreal weightDecay,
-           accreal scale)
+           double weightDecay,
+           double scale)
 {
   long outDim = THCTensor_(size)(state, weight, 0);
   long inDim = THCTensor_(size)(state, weight, 1);
@@ -237,8 +237,8 @@ void THNN_(SparseLinear_legacyAccGradParameters)(
            THCTensor *gradBias,
            THCTensor *weight,
            THCTensor *bias,
-           accreal weightDecay,
-           accreal scale) {
+           double weightDecay,
+           double scale) {
   THError("CUDA does not support legacy input format, please use a table of nnz x 2 vectors");
 }
 
@@ -259,7 +259,7 @@ void THNN_(SparseLinear_updateParameters)(
            THCTensor *gradWeight,
            THCTensor *gradBias,
            THCTensor *lastInput,
-           accreal learningRate) {
+           double learningRate) {
   THCTensor_(cadd)(state, weight, weight, -learningRate, gradWeight);
   THCTensor_(cadd)(state, bias, bias, -learningRate, gradBias);
 }
diff --git a/lib/THCUNN/generic/SpatialConvolutionLocal.cu b/lib/THCUNN/generic/SpatialConvolutionLocal.cu
index 0d4b9ad..afbc24d 100644
--- a/lib/THCUNN/generic/SpatialConvolutionLocal.cu
+++ b/lib/THCUNN/generic/SpatialConvolutionLocal.cu
@@ -309,9 +309,8 @@ void THNN_(SpatialConvolutionLocal_accGradParameters)(
            int padW, int padH,
            long inputWidth, long inputHeight,
            long outputWidth, long outputHeight,
-           accreal scale_)
+           real scale)
 {
-  real scale = ScalarConvert<accreal, real>::to(scale_);
   THCUNN_assertSameGPU(state, 5, input, gradOutput, gradWeight,
                        gradBias, finput);
 
diff --git a/lib/THCUNN/generic/SpatialConvolutionMM.cu b/lib/THCUNN/generic/SpatialConvolutionMM.cu
index b4ae8e5..e7aeacb 100644
--- a/lib/THCUNN/generic/SpatialConvolutionMM.cu
+++ b/lib/THCUNN/generic/SpatialConvolutionMM.cu
@@ -335,9 +335,8 @@ void THNN_(SpatialConvolutionMM_accGradParameters)(
            int kW, int kH,
            int dW, int dH,
            int padW, int padH,
-           accreal scale_) {
+           real scale) {
 
-  real scale = ScalarConvert<accreal, real>::to(scale_);
   THCUNN_assertSameGPU(state, 5, input, gradOutput, gradWeight, columns, ones);
   if (gradBias) {
    THCUNN_assertSameGPU(state, 2, gradWeight, gradBias);
diff --git a/lib/THCUNN/generic/SpatialCrossMapLRN.cu b/lib/THCUNN/generic/SpatialCrossMapLRN.cu
index 6b79c15..a09ea0b 100644
--- a/lib/THCUNN/generic/SpatialCrossMapLRN.cu
+++ b/lib/THCUNN/generic/SpatialCrossMapLRN.cu
@@ -3,12 +3,8 @@
 #else
 
 void LRNforward(THCState* state, THCTensor* input, THCTensor* output,
-    THCTensor* scale, int local_size, accreal alpha_, accreal beta_, accreal k_)
+    THCTensor* scale, int local_size, real alpha, real beta, real k)
 {
-  real alpha = ScalarConvert<accreal, real>::to(alpha_);
-  real beta = ScalarConvert<accreal, real>::to(beta_);
-  real k = ScalarConvert<accreal, real>::to(k_);
-
   THCTensor_(resizeAs)(state, output, input);
   THCTensor_(resizeAs)(state, scale, input);
 
@@ -49,12 +45,8 @@ void LRNforward(THCState* state, THCTensor* input, THCTensor* output,
 
 void LRNbackward(THCState* state, THCTensor* input, THCTensor* output,
     THCTensor* gradOutput, THCTensor* gradInput, THCTensor* scale,
-    int local_size, accreal alpha_, accreal beta_, accreal k_)
+    int local_size, real alpha, real beta, real k)
 {
-  real alpha = ScalarConvert<accreal, real>::to(alpha_);
-  real beta = ScalarConvert<accreal, real>::to(beta_);
-  real k = ScalarConvert<accreal, real>::to(k_);
-
   THCTensor_(resizeAs)(state, gradInput, input);
 
   int batchSize;
@@ -97,9 +89,9 @@ void THNN_(SpatialCrossMapLRN_updateOutput)(
     THCTensor *output,
     THCTensor *scale,
     int size,
-    accreal alpha,
-    accreal beta,
-    accreal k)
+    real alpha,
+    real beta,
+    real k)
 {
   LRNforward(state, input, output, scale, size, alpha, beta, k);
 }
@@ -112,9 +104,9 @@ void THNN_(SpatialCrossMapLRN_updateGradInput)(
     THCTensor *scale,
     THCTensor *output,
     int size,
-    accreal alpha,
-    accreal beta,
-    accreal k)
+    real alpha,
+    real beta,
+    real k)
 {
   LRNbackward(state, input, output, gradOutput, gradInput, scale, size, alpha, beta, k);
 }
diff --git a/lib/THCUNN/generic/SpatialDilatedConvolution.cu b/lib/THCUNN/generic/SpatialDilatedConvolution.cu
index 02a640b..7b656d3 100644
--- a/lib/THCUNN/generic/SpatialDilatedConvolution.cu
+++ b/lib/THCUNN/generic/SpatialDilatedConvolution.cu
@@ -322,9 +322,8 @@ void THNN_(SpatialDilatedConvolution_accGradParameters)(
            int dW, int dH,
            int padW, int padH,
            int dilationW, int dilationH,
-           accreal scale_) {
+           real scale) {
 
-  real scale = ScalarConvert<accreal, real>::to(scale_);
   THCUNN_assertSameGPU(state, 5, input, gradOutput, gradWeight, columns, ones);
   if (gradBias) {
    THCUNN_assertSameGPU(state, 2, gradWeight, gradBias);
diff --git a/lib/THCUNN/generic/SpatialFullConvolution.cu b/lib/THCUNN/generic/SpatialFullConvolution.cu
index 54fda23..7a5d7ea 100644
--- a/lib/THCUNN/generic/SpatialFullConvolution.cu
+++ b/lib/THCUNN/generic/SpatialFullConvolution.cu
@@ -315,9 +315,8 @@ void THNN_(SpatialFullConvolution_accGradParameters)(
            int dW, int dH,
            int padW, int padH,
            int adjW, int adjH,
-           accreal scale_)
+           real scale)
 {
-  real scale = ScalarConvert<accreal, real>::to(scale_);
   int nInputPlane = THCTensor_(size)(state, gradWeight, 0);
   int nOutputPlane = THCTensor_(size)(state, gradWeight, 1);
 
diff --git a/lib/THCUNN/generic/SpatialSubSampling.cu b/lib/THCUNN/generic/SpatialSubSampling.cu
index ef3c508..b918962 100644
--- a/lib/THCUNN/generic/SpatialSubSampling.cu
+++ b/lib/THCUNN/generic/SpatialSubSampling.cu
@@ -191,7 +191,7 @@ void THNN_(SpatialSubSampling_accGradParameters)(
            THCTensor *gradBias,
            int kW, int kH,
            int dW, int dH,
-           accreal scale)
+           float scale)
 {
   THCUNN_assertSameGPU(state, 4, input, gradOutput, gradWeight, gradBias);
   THNN_(SpatialSubSampling_shapeCheck)(state, input, gradOutput, gradWeight, kW, kH);
diff --git a/lib/THCUNN/generic/Sqrt.cu b/lib/THCUNN/generic/Sqrt.cu
index b6a68f8..3602cbe 100644
--- a/lib/THCUNN/generic/Sqrt.cu
+++ b/lib/THCUNN/generic/Sqrt.cu
@@ -8,9 +8,8 @@ void THNN_(Sqrt_updateOutput)(
            THCState *state,
            THCTensor *input,
            THCTensor *output,
-           accreal eps_)
+           real eps)
 {
-  real eps = ScalarConvert<accreal, real>::to(eps_);
   THCUNN_assertSameGPU(state, 2, input, output);
   THCTensor_(resizeAs)(state, output, input);
   THC_pointwiseApply2(state, output, input, sqrtupdateOutput_functor<real>(eps));
diff --git a/lib/THCUNN/generic/THCUNN.h b/lib/THCUNN/generic/THCUNN.h
index c9d7e2c..bf903b9 100644
--- a/lib/THCUNN/generic/THCUNN.h
+++ b/lib/THCUNN/generic/THCUNN.h
@@ -54,7 +54,7 @@ TH_API void THNN_(BatchNormalization_backward)(
                   THCTensor *saveMean_,
                   THCTensor *saveStd_,
                   bool train,
-                  double scale,
+                  float scale,
                   double eps);
 
 TH_API void THNN_(BCECriterion_updateOutput)(
@@ -109,7 +109,7 @@ TH_API void THNN_(ELU_updateOutput)(
                   THCState *state,
                   THCTensor *input,
                   THCTensor *output,
-                  accreal alpha,
+                  real alpha,
                   bool inplace);
 
 TH_API void THNN_(ELU_updateGradInput)(
@@ -118,15 +118,15 @@ TH_API void THNN_(ELU_updateGradInput)(
                   THCTensor *gradOutput,
                   THCTensor *gradInput,
                   THCTensor *output,
-                  accreal alpha,
+                  real alpha,
                   bool inplace);
 
 TH_API void THNN_(HardTanh_updateOutput)(
                   THCState *state,
                   THCTensor *input,
                   THCTensor *output,
-                  accreal min_val,
-                  accreal max_val,
+                  real min_val,
+                  real max_val,
                   bool inplace);
 
 TH_API void THNN_(HardTanh_updateGradInput)(
@@ -134,15 +134,15 @@ TH_API void THNN_(HardTanh_updateGradInput)(
                   THCTensor *input,
                   THCTensor *gradOutput,
                   THCTensor *gradInput,
-                  accreal min_val,
-                  accreal max_val,
+                  real min_val,
+                  real max_val,
                   bool inplace);
 
 TH_API void THNN_(LeakyReLU_updateOutput)(
                   THCState *state,
                   THCTensor *input,
                   THCTensor *output,
-                  accreal negval,
+                  real negval,
                   bool inplace);
 
 TH_API void THNN_(LeakyReLU_updateGradInput)(
@@ -150,7 +150,7 @@ TH_API void THNN_(LeakyReLU_updateGradInput)(
                   THCTensor *input,
                   THCTensor *gradOutput,
                   THCTensor *gradInput,
-                  accreal negval,
+                  real negval,
                   bool inplace);
 
 TH_API void THNN_(LogSigmoid_updateOutput)(
@@ -188,14 +188,14 @@ TH_API void THNN_(LookupTable_accGradParameters)(
                   THCIndexTensor *indices,      // [OPTIONAL]
                   bool scaleGradByFreq,
                   int paddingValue,
-                  accreal scale);
+                  real scale);
 
 TH_API void THNN_(LookupTable_renorm)(
                   THCState *state,
                   THCIndexTensor *idx,
                   THCTensor *weight,
-                  accreal maxNorm,
-                  accreal normType);
+                  real maxNorm,
+                  real normType);
 
 TH_API void THNN_(L1Cost_updateOutput)(
                   THCState *state,
@@ -214,7 +214,7 @@ TH_API void THNN_(MarginCriterion_updateOutput)(
                   THCTensor *target,
                   THCTensor *output,
                   bool sizeAverage,
-                  accreal margin);
+                  real margin);
 
 TH_API void THNN_(MarginCriterion_updateGradInput)(
                   THCState *state,
@@ -222,7 +222,7 @@ TH_API void THNN_(MarginCriterion_updateGradInput)(
                   THCTensor *target,
                   THCTensor *gradInput,
                   bool sizeAverage,
-                  accreal margin);
+                  real margin);
 
 TH_API void THNN_(MSECriterion_updateOutput)(
                   THCState *state,
@@ -262,7 +262,7 @@ TH_API void THNN_(MultiMarginCriterion_updateOutput)(
                   bool sizeAverage,
                   int p,
                   THCTensor *weights,           // [OPTIONAL]
-                  accreal margin);
+                  real margin);
 
 TH_API void THNN_(MultiMarginCriterion_updateGradInput)(
                   THCState *state,
@@ -272,7 +272,7 @@ TH_API void THNN_(MultiMarginCriterion_updateGradInput)(
                   bool sizeAverage,
                   int p,
                   THCTensor *weights,           // [OPTIONAL]
-                  accreal margin);
+                  real margin);
 
 TH_API void THNN_(PReLU_updateOutput)(
                   THCState *state,
@@ -299,7 +299,7 @@ TH_API void THNN_(PReLU_accGradParameters)(
                   THCTensor *gradWeightBuf,
                   THCTensor *gradWeightBuf2,
                   long nOutputPlane,
-                  accreal scale);
+                  real scale);
 
 TH_API void THNN_(SmoothL1Criterion_updateOutput)(
                   THCState *state,
@@ -330,8 +330,8 @@ TH_API void THNN_(SparseLinear_accGradParameters)(
                   THCTensor *gradBias,
                   THCTensor *weight,
                   THCTensor *bias,
-                  accreal weightDecay,
-                  accreal scale);
+                  double weightDecay,
+                  double scale);
 
 TH_API void THNN_(SparseLinear_legacyUpdateOutput)(
                   THCState *state,
@@ -348,8 +348,8 @@ TH_API void THNN_(SparseLinear_legacyAccGradParameters)(
                   THCTensor *gradBias,
                   THCTensor *weight,
                   THCTensor *bias,
-                  accreal weightDecay,
-                  accreal scale);
+                  double weightDecay,
+                  double scale);
 
 TH_API void THNN_(SparseLinear_zeroGradParameters)(
                   THCState *state,
@@ -364,7 +364,7 @@ TH_API void THNN_(SparseLinear_updateParameters)(
                   THCTensor *gradWeight,
                   THCTensor *gradBias,
                   THCTensor *lastInput,
-                  accreal learningRate);
+                  double learningRate);
 
 TH_API void THNN_(SpatialAdaptiveMaxPooling_updateOutput)(
                   THCState *state,
@@ -461,7 +461,7 @@ TH_API void THNN_(SpatialConvolutionLocal_accGradParameters)(
                   int padW, int padH,
                   long inputWidth, long inputHeight,
                   long outputWidth, long outputHeight,
-                  accreal scale);
+                  real scale);
 
 TH_API void THNN_(SpatialConvolutionMM_updateOutput)(
                   THCState *state,
@@ -498,7 +498,7 @@ TH_API void THNN_(SpatialConvolutionMM_accGradParameters)(
                   int kW, int kH,
                   int dW, int dH,
                   int padW, int padH,
-                  accreal scale);
+                  real scale);
 
 TH_API void THNN_(SpatialCrossMapLRN_updateOutput)(
                   THCState *state,
@@ -506,9 +506,9 @@ TH_API void THNN_(SpatialCrossMapLRN_updateOutput)(
                   THCTensor *output,
                   THCTensor *scale,
                   int size,
-                  accreal alpha,
-                  accreal beta,
-                  accreal k);
+                  real alpha,
+                  real beta,
+                  real k);
 
 TH_API void THNN_(SpatialCrossMapLRN_updateGradInput)(
                   THCState *state,
@@ -518,9 +518,9 @@ TH_API void THNN_(SpatialCrossMapLRN_updateGradInput)(
                   THCTensor *scale,
                   THCTensor *output,
                   int size,
-                  accreal alpha,
-                  accreal beta,
-                  accreal k);
+                  real alpha,
+                  real beta,
+                  real k);
 
 TH_API void THNN_(SpatialDilatedConvolution_updateOutput)(
                   THCState *state,
@@ -559,7 +559,7 @@ TH_API void THNN_(SpatialDilatedConvolution_accGradParameters)(
                   int dW, int dH,
                   int padW, int padH,
                   int dilationW, int dilationH,
-                  accreal scale);
+                  real scale);
 
 TH_API void THNN_(SpatialDilatedMaxPooling_updateOutput)(
                   THCState *state,
@@ -639,7 +639,7 @@ TH_API void THNN_(SpatialFullConvolution_accGradParameters)(
                   int dW, int dH,
                   int padW, int padH,
                   int adjW, int adjH,
-                  accreal scale);
+                  real scale);
 
 TH_API void THNN_(SpatialMaxPooling_updateOutput)(
                   THCState *state,
@@ -733,7 +733,7 @@ TH_API void THNN_(SpatialSubSampling_accGradParameters)(
                   THCTensor *gradBias,
                   int kW, int kH,
                   int dW, int dH,
-                  accreal scale);
+                  float scale);
 
 TH_API void THNN_(SpatialUpSamplingBilinear_updateOutput)(
                   THCState *state,
@@ -830,8 +830,8 @@ TH_API void THNN_(SoftPlus_updateOutput)(
                   THCState *state,
                   THCTensor *input,
                   THCTensor *output,
-                  accreal beta,
-                  accreal threshold);
+                  real beta,
+                  real threshold);
 
 TH_API void THNN_(SoftPlus_updateGradInput)(
                   THCState *state,
@@ -839,21 +839,21 @@ TH_API void THNN_(SoftPlus_updateGradInput)(
                   THCTensor *gradOutput,
                   THCTensor *gradInput,
                   THCTensor *output,
-                  accreal beta,
-                  accreal threshold);
+                  real beta,
+                  real threshold);
 
 TH_API void THNN_(SoftShrink_updateOutput)(
                   THCState *state,
                   THCTensor *input,
                   THCTensor *output,
-                  accreal lambda);
+                  real lambda);
 
 TH_API void THNN_(SoftShrink_updateGradInput)(
                   THCState *state,
                   THCTensor *input,
                   THCTensor *gradOutput,
                   THCTensor *gradInput,
-                  accreal lambda);
+                  real lambda);
 
 TH_API void THNN_(Square_updateOutput)(
                   THCState *state,
@@ -870,7 +870,7 @@ TH_API void THNN_(Sqrt_updateOutput)(
                   THCState *state,
                   THCTensor *input,
                   THCTensor *output,
-                  accreal eps);
+                  real eps);
 
 TH_API void THNN_(Sqrt_updateGradInput)(
                   THCState *state,
@@ -916,7 +916,7 @@ TH_API void THNN_(TemporalConvolution_accGradParameters)(
                   THCTensor *gradWeight,
                   THCTensor *gradBias,
                   int kW, int dW,
-                  accreal scale);
+                  real scale);
 
 TH_API void THNN_(TemporalMaxPooling_updateOutput)(
                   THCState *state,
@@ -937,8 +937,8 @@ TH_API void THNN_(Threshold_updateOutput)(
                   THCState *state,
                   THCTensor *input,
                   THCTensor *output,
-                  accreal threshold,
-                  accreal val,
+                  real threshold,
+                  real val,
                   bool inplace);
 
 TH_API void THNN_(Threshold_updateGradInput)(
@@ -946,8 +946,8 @@ TH_API void THNN_(Threshold_updateGradInput)(
                   THCTensor *input,
                   THCTensor *gradOutput,
                   THCTensor *gradInput,
-                  accreal threshold,
-                  accreal val,
+                  real threshold,
+                  real val,
                   bool inplace);
 
 TH_API void THNN_(VolumetricAveragePooling_updateOutput)(
@@ -996,7 +996,7 @@ TH_API void THNN_(VolumetricConvolution_accGradParameters)(
                   THCTensor *fgradInput,
                   int dT, int dW, int dH,
                   int padT, int padW, int padH,
-                  accreal scale);
+                  real scale);
 
 TH_API void THNN_(VolumetricDilatedConvolution_updateOutput)(
                   THCState *state,
@@ -1035,7 +1035,7 @@ TH_API void THNN_(VolumetricDilatedConvolution_accGradParameters)(
                   int dT, int dW, int dH,
                   int padT, int padW, int padH,
                   int dilationT, int dilationW, int dilationH,
-                  accreal scale);
+                  real scale);
 
 TH_API void THNN_(VolumetricDilatedMaxPooling_updateOutput)(
                   THCState *state,
@@ -1095,7 +1095,7 @@ TH_API void THNN_(VolumetricFullConvolution_accGradParameters)(
                   int dT, int dW, int dH,
                   int padT, int padW, int padH,
                   int adjT, int adjW, int adjH,
-                  accreal scale);
+                  real scale);
 
 TH_API void THNN_(VolumetricMaxPooling_updateOutput)(
                   THCState *state,
diff --git a/lib/THCUNN/generic/TemporalConvolution.cu b/lib/THCUNN/generic/TemporalConvolution.cu
index 5658527..a51894d 100644
--- a/lib/THCUNN/generic/TemporalConvolution.cu
+++ b/lib/THCUNN/generic/TemporalConvolution.cu
@@ -273,9 +273,8 @@ void THNN_(TemporalConvolution_accGradParameters)(
            THCTensor *gradWeight,
            THCTensor *gradBias,
            int kW, int dW,
-           accreal scale_) {
+           real scale) {
 
-  real scale = ScalarConvert<accreal, real>::to(scale_);
   long nInputFrame;
   long nOutputFrame;
 
diff --git a/lib/THCUNN/generic/Threshold.cu b/lib/THCUNN/generic/Threshold.cu
index 0b7b79e..4f9f622 100644
--- a/lib/THCUNN/generic/Threshold.cu
+++ b/lib/THCUNN/generic/Threshold.cu
@@ -8,12 +8,10 @@ void THNN_(Threshold_updateOutput)(
            THCState *state,
            THCTensor *input,
            THCTensor *output,
-           accreal threshold_,
-           accreal val_,
+           real threshold,
+           real val,
            bool inplace)
 {
-  real threshold = ScalarConvert<accreal, real>::to(threshold_);
-  real val = ScalarConvert<accreal, real>::to(val_);
   THCUNN_assertSameGPU(state, 2, input, output);
 
   if (inplace)
@@ -39,12 +37,10 @@ void THNN_(Threshold_updateGradInput)(
            THCTensor *input,
            THCTensor *gradOutput,
            THCTensor *gradInput,
-           accreal threshold_,
-           accreal val_,
+           real threshold,
+           real val,
            bool inplace)
 {
-  real threshold = ScalarConvert<accreal, real>::to(threshold_);
-  real val = ScalarConvert<accreal, real>::to(val_);
   THCUNN_check_nElement(state, input, gradOutput);
   THCUNN_assertSameGPU(state, 3, input, gradInput, gradOutput);
 
diff --git a/lib/THCUNN/generic/VolumetricConvolution.cu b/lib/THCUNN/generic/VolumetricConvolution.cu
index 5b982c9..d6da545 100644
--- a/lib/THCUNN/generic/VolumetricConvolution.cu
+++ b/lib/THCUNN/generic/VolumetricConvolution.cu
@@ -362,9 +362,8 @@ void THNN_(VolumetricConvolution_accGradParameters)(
            THCTensor *fgradInput,
            int dT, int dW, int dH,
            int padT, int padW, int padH,
-           accreal scale_)
+           real scale)
 {
-  real scale = ScalarConvert<accreal, real>::to(scale_);
   THCTensor *columns = finput;
   THCTensor *ones = fgradInput;
   THCUNN_assertSameGPU(state, 6, input, gradOutput, gradWeight, gradBias, columns, ones);
diff --git a/lib/THCUNN/generic/VolumetricDilatedConvolution.cu b/lib/THCUNN/generic/VolumetricDilatedConvolution.cu
index ffeea7f..b0145a5 100644
--- a/lib/THCUNN/generic/VolumetricDilatedConvolution.cu
+++ b/lib/THCUNN/generic/VolumetricDilatedConvolution.cu
@@ -336,9 +336,8 @@ void THNN_(VolumetricDilatedConvolution_accGradParameters)(
            int dT, int dW, int dH,
            int padT, int padW, int padH,
            int dilationT, int dilationW, int dilationH,
-           accreal scale_) {
+           real scale) {
 
-  real scale = ScalarConvert<accreal, real>::to(scale_);
   THCUNN_assertSameGPU(state, 5, input, gradOutput, gradWeight, columns, ones);
   if (gradBias) {
    THCUNN_assertSameGPU(state, 2, gradWeight, gradBias);
diff --git a/lib/THCUNN/generic/VolumetricFullConvolution.cu b/lib/THCUNN/generic/VolumetricFullConvolution.cu
index 127babc..334c7da 100644
--- a/lib/THCUNN/generic/VolumetricFullConvolution.cu
+++ b/lib/THCUNN/generic/VolumetricFullConvolution.cu
@@ -344,9 +344,8 @@ void THNN_(VolumetricFullConvolution_accGradParameters)(
            int dT, int dW, int dH,
            int padT, int padW, int padH,
            int adjT, int adjW, int adjH,
-           accreal scale_)
+           real scale)
 {
-  real scale = ScalarConvert<accreal, real>::to(scale_);
   THCTensor  *columns = finput;
   THCTensor  *ones = fgradInput;
 
diff --git a/test.lua b/test.lua
index 5ab07bf..c3ed9bb 100644
--- a/test.lua
+++ b/test.lua
@@ -365,17 +365,17 @@ function cunntest.Square_transposed()
 end
 
 function cunntest.SoftShrink_forward()
-  local r = math.random()
+  local r = THC.THC_half2float(THC.THC_float2half(math.random()))
   pointwise_forward(nn.SoftShrink(r), 'SoftShrink', precision_forward)
 end
 
 function cunntest.SoftShrink_backward()
-  local r = math.random()
+  local r = THC.THC_half2float(THC.THC_float2half(math.random()))
   pointwise_backward(nn.SoftShrink(r), 'SoftShrink', precision_backward)
 end
 
 function cunntest.SoftShrink_transposed()
-  local r = math.random()
+  local r = THC.THC_half2float(THC.THC_float2half(math.random()))
   pointwise_transposed(nn.SoftShrink(r), 'SoftShrink', precision_backward)
 end
 
@@ -2045,8 +2045,8 @@ function cunntest.SpatialMaxPooling_forward()
    local sj = math.random(1,4)
    local outi = math.random(32,256)
    local outj = math.random(32,256)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local ini = (outi-1)*si+ki - padi*2
    local inj = (outj-1)*sj+kj - padj*2
    local ceil_mode = math.random(0,1) == 1
@@ -2083,8 +2083,8 @@ function cunntest.SpatialMaxPooling_forward_batch()
    local sj = math.random(2,4)
    local outi = math.random(32,256)
    local outj = math.random(32,256)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local ini = (outi-1)*si+ki - padi*2
    local inj = (outj-1)*sj+kj - padj*2
    local ceil_mode = math.random(0,1) == 1
@@ -2118,8 +2118,8 @@ function cunntest.SpatialMaxUnpooling_forward_batch()
    local sj = kj
    local outi = math.random(32,256)
    local outj = math.random(32,256)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local ceil_mode = math.random(0,1) == 1
    local fun = ceil_mode and torch.ceil or torch.floor
    local ini = fun((outi + padi*2 - ki)/si) +1
@@ -2159,8 +2159,8 @@ function cunntest.SpatialMaxPooling_backward()
    local sj = math.random(1,4)
    local outi = math.random(32,64)
    local outj = math.random(32,64)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local ini = (outi-1)*si+ki - padi*2
    local inj = (outj-1)*sj+kj - padj*2
    local ceil_mode = true--math.random(0,1) == 1
@@ -2203,8 +2203,8 @@ function cunntest.SpatialMaxPooling_backward_batch()
    local sj = math.random(2,4)
    local outi = math.random(32,64)
    local outj = math.random(32,64)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local ini = (outi-1)*si+ki - padi*2
    local inj = (outj-1)*sj+kj - padj*2
    local ceil_mode = math.random(0,1) == 1
@@ -2246,8 +2246,8 @@ function cunntest.SpatialMaxUnpooling_backward_batch()
    local sj = kj
    local outi = math.random(32,256)
    local outj = math.random(32,256)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local ceil_mode = math.random(0,1) == 1
    local fun = ceil_mode and torch.ceil or torch.floor
    local ini = fun((outi + padi*2 - ki)/si) +1
@@ -2296,8 +2296,8 @@ function cunntest.SpatialDilatedMaxPooling_forward()
    local sj = math.random(1,4)
    local outi = math.random(32,256)
    local outj = math.random(32,256)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local dilationi = math.random(1,10)
    local dilationj = math.random(1,10)
    local ini = (outi-1)*si+(dilationi*(ki-1)+1)-2*padi
@@ -2336,8 +2336,8 @@ function cunntest.SpatialDilatedMaxPooling_forward_batch()
    local sj = math.random(2,4)
    local outi = math.random(32,256)
    local outj = math.random(32,256)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local dilationi = math.random(1,10)
    local dilationj = math.random(1,10)
    local ini = (outi-1)*si+(dilationi*(ki-1)+1)-2*padi
@@ -2372,8 +2372,8 @@ function cunntest.SpatialDilatedMaxPooling_backward()
    local sj = math.random(1,4)
    local outi = math.random(32,64)
    local outj = math.random(32,64)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local dilationi = math.random(1,10)
    local dilationj = math.random(1,10)
    local ini = (outi-1)*si+(dilationi*(ki-1)+1)-2*padi
@@ -2417,8 +2417,8 @@ function cunntest.SpatialDilatedMaxPooling_backward_batch()
    local sj = math.random(2,4)
    local outi = math.random(32,64)
    local outj = math.random(32,64)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local dilationi = math.random(1,10)
    local dilationj = math.random(1,10)
    local ini = (outi-1)*si+(dilationi*(ki-1)+1)-2*padi
@@ -2611,8 +2611,8 @@ function cunntest.SpatialAveragePooling_forward()
    local sj = math.random(1,kj)
    local outi = math.random(32,256)
    local outj = math.random(32,256)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local ini = (outi-1)*si+ki - padi*2
    local inj = (outj-1)*sj+kj - padj*2
    local ceil_mode = math.random(0,1) == 1
@@ -2650,8 +2650,8 @@ function cunntest.SpatialAveragePooling_forward_batch()
    local sj = math.random(1,kj)
    local outi = math.random(32,256)
    local outj = math.random(32,256)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local ini = (outi-1)*si+ki - padi*2
    local inj = (outj-1)*sj+kj - padj*2
    local ceil_mode = math.random(0,1) == 1
@@ -2688,8 +2688,8 @@ function cunntest.SpatialAveragePooling_backward()
    local sj = math.random(1,kj)
    local outi = math.random(32,64)
    local outj = math.random(32,64)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local ini = (outi-1)*si+ki - padi*2
    local inj = (outj-1)*sj+kj - padj*2
    local ceil_mode = math.random(0,1) == 1
@@ -2735,8 +2735,8 @@ function cunntest.SpatialAveragePooling_backward_batch()
    local sj = math.random(1,kj)
    local outi = math.random(32,64)
    local outj = math.random(32,64)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local ini = (outi-1)*si+ki - padi*2
    local inj = (outj-1)*sj+kj - padj*2
    local ceil_mode = math.random(0,1) == 1
@@ -3401,6 +3401,9 @@ function cunntest.mse()
          local cout = cmod:forward(cinput,ctarget)
          local cgin = cmod:backward(cinput,ctarget)
 
+         if (typename == 'torch.CudaHalfTensor') then
+            fout = THC.THC_half2float(THC.THC_float2half(fout))
+         end
          mytester:assertlt(math.abs(fout-cout), precision_forward_type(0.02, typename),
             string.format('error  on output with %s', typename))
          local gerr = cgin:double() - fgin:double()
@@ -3432,6 +3435,9 @@ function cunntest.SmoothL1()
          local cout = cmod:forward(cinput,ctarget)
          local cgin = cmod:backward(cinput,ctarget)
 
+         if (typename == 'torch.CudaHalfTensor') then
+            fout = THC.THC_half2float(THC.THC_float2half(fout))
+         end
          mytester:assertlt(math.abs(fout-cout), 0.01, string.format('error  on output with %s', typename))
          local gerr = cgin:double() - fgin:double()
          mytester:assertlt(gerr:abs():max(), precision_forward_type(precision_forward, typename),
@@ -3994,6 +4000,9 @@ function cunntest.l1cost()
      local cout = cmod:forward(cinput)
      local cgin = cmod:backward(cinput)
 
+     if (typename == 'torch.CudaHalfTensor') then
+        fout = THC.THC_half2float(THC.THC_float2half(fout))
+     end
      mytester:assertlt(math.abs(fout-cout), precision_forward_type(precision_forward, typename),
         string.format('error  on output with %s', typename))
      local gerr = cgin:double() - fgin:double()
@@ -4395,9 +4404,9 @@ function cunntest.VolumetricMaxPooling_forward()
    local iT = math.random(kT*2, 60)
    local iH = math.random(kH*2, 60)
    local iW = math.random(kW*2, 60)
-   local padT = math.random(0,math.floor(kT/2)-1)
-   local padH = math.random(0,math.floor(kH/2)-1)
-   local padW = math.random(0,math.floor(kW/2)-1)
+   local padT = math.random(0,kT/2-1)
+   local padH = math.random(0,kH/2-1)
+   local padW = math.random(0,kW/2-1)
    local iF = math.random(1, 16) -- features
    local oT = math.floor((iT - kT + 2*padT) / dT + 1)
    local oH = math.floor((iH - kH + 2*padH) / dH + 1)
@@ -4431,9 +4440,9 @@ function cunntest.VolumetricMaxPooling_backward()
    local iT = math.random(kT*2, 60)
    local iH = math.random(kH*2, 60)
    local iW = math.random(kW*2, 60)
-   local padT = math.random(0,math.floor(kT/2)-1)
-   local padH = math.random(0,math.floor(kH/2)-1)
-   local padW = math.random(0,math.floor(kW/2)-1)
+   local padT = math.random(0,kT/2-1)
+   local padH = math.random(0,kH/2-1)
+   local padW = math.random(0,kW/2-1)
    local iF = math.random(1, 16) -- features
    local oT = math.floor((iT - kT + 2*padT) / dT + 1)
    local oH = math.floor((iH - kH + 2*padH) / dH + 1)
@@ -4475,9 +4484,9 @@ function cunntest.VolumetricDilatedMaxPooling_forward_batch()
    local outt = math.random(1,10)
    local outi = math.random(1,33)
    local outj = math.random(1,33)
-   local padt = math.random(0,math.floor(kt/2)-1)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padt = math.random(0,kt/2-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local dilationt = math.random(1,10)
    local dilationi = math.random(1,10)
    local dilationj = math.random(1,10)
@@ -4519,9 +4528,9 @@ function cunntest.VolumetricDilatedMaxPooling_backward_batch()
    local outt = math.random(8,16)
    local outi = math.random(8,16)
    local outj = math.random(8,16)
-   local padt = math.random(0,math.floor(kt/2)-1)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padt = math.random(0,kt/2-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local dilationt = math.random(1,10)
    local dilationi = math.random(1,10)
    local dilationj = math.random(1,10)
@@ -4569,9 +4578,9 @@ function cunntest.VolumetricMaxUnpooling_forward_batch()
    local outt = math.random(32,128)
    local outi = math.random(32,128)
    local outj = math.random(32,128)
-   local padt = math.random(0,math.floor(kt/2)-1)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padt = math.random(0,kt/2-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local it = math.max(((outt + padt*2 - kt)/st) +1, kt)
    local ii = math.max(((outi + padi*2 - ki)/si) +1, ki)
    local ij = math.max(((outj + padj*2 - kj)/sj) +1, kj)
@@ -4610,9 +4619,9 @@ function cunntest.VolumetricMaxUnpooling_backward_batch()
    local outt = math.random(32,128)
    local outi = math.random(32,128)
    local outj = math.random(32,128)
-   local padt = math.random(0,math.floor(kt/2)-1)
-   local padi = math.random(0,math.floor(ki/2)-1)
-   local padj = math.random(0,math.floor(kj/2)-1)
+   local padt = math.random(0,kt/2-1)
+   local padi = math.random(0,ki/2-1)
+   local padj = math.random(0,kj/2-1)
    local it = math.max(((outt + padt*2 - kt)/st) +1, kt)
    local ii = math.max(((outi + padi*2 - ki)/si) +1, ki)
    local ij = math.max(((outj + padj*2 - kj)/sj) +1, kj)
@@ -4937,8 +4946,8 @@ function cunntest.VolumetricFullConvolution_pair_test()
     local dT = math.random(1,3)
     local dH = math.random(1,3)
     local dW = dH
-    local pT = math.floor((kT-1)/2)
-    local pH = math.floor((kH-1)/2)
+    local pT = (kT-1)/2
+    local pH = (kH-1)/2
     local pW = pH
 
     local inChan = math.random(1,32)
-- 
cgit v1.2.3