diff options
Diffstat (limited to 'generic/HardTanh.c')
-rw-r--r-- | generic/HardTanh.c | 82 |
1 files changed, 70 insertions, 12 deletions
diff --git a/generic/HardTanh.c b/generic/HardTanh.c index 3764095..bfd1a42 100644 --- a/generic/HardTanh.c +++ b/generic/HardTanh.c @@ -8,14 +8,42 @@ static int nn_(HardTanh_updateOutput)(lua_State *L) THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); THTensor_(resizeAs)(output, input); + + if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output)) + { + TH_TENSOR_APPLY2(real, output, real, input, \ + if(*input_data < -1) \ + *output_data = -1; \ + else if(*input_data <= 1) \ + *output_data = *input_data; \ + else \ + *output_data = 1;); + } + else + { + real* output_data = THTensor_(data)(output); + real* input_data = THTensor_(data)(input); + long k; - TH_TENSOR_APPLY2(real, output, real, input, \ - if(*input_data < -1) \ - *output_data = -1; \ - else if(*input_data <= 1) \ - *output_data = *input_data; \ - else \ - *output_data = 1;) + +#pragma omp parallel for private(k) + for (k = 0; k < input->size[0]; k++) + { + real* ptr_output = output_data + k*input->stride[0]; + real* ptr_input = input_data + k*input->stride[0]; + long i; + for (i = 0; i < input->stride[0]; i++) + { + if(ptr_input[i] < -1) + ptr_output[i] = -1; + else if (ptr_input[i] <= 1) + ptr_output[i] = ptr_input[i]; + else + ptr_output[i] = 1; + } + } + } + return 1; } @@ -26,11 +54,41 @@ static int nn_(HardTanh_updateGradInput)(lua_State *L) THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); THTensor_(resizeAs)(gradInput, input); - TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \ - if(*input_data < -1 || *input_data > 1) \ - *gradInput_data = 0; \ - else \ - *gradInput_data = *gradOutput_data;); + + if (input->nDimension == 1 || + !THTensor_(isContiguous)(input) || + !THTensor_(isContiguous)(gradOutput) || + !THTensor_(isContiguous)(gradInput)) + { + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \ + if(*input_data < -1 || *input_data > 1) \ + *gradInput_data = 0; \ + else \ + *gradInput_data = *gradOutput_data;); + } + else + { + real* gradOutput_data = THTensor_(data)(gradOutput); + real* gradInput_data = THTensor_(data)(gradInput); + real* input_data = THTensor_(data)(input); + long k; + +#pragma omp parallel for private(k) + for (k = 0; k < input->size[0]; k++) + { + real* ptr_gradOutput = gradOutput_data + k*input->stride[0]; + real* ptr_gradInput = gradInput_data + k*input->stride[0]; + real* ptr_input = input_data + k*input->stride[0]; + long i; + for (i = 0; i < input->stride[0]; i++) + { + if(ptr_input[i] < -1 || ptr_input[i] > 1) + ptr_gradInput[i] = 0; + else + ptr_gradInput[i] = ptr_gradOutput[i]; + } + } + } return 1; } |