Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/nn.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlykhan Tejani <alykhan.tejani@gmail.com>2017-07-24 21:58:43 +0300
committerSoumith Chintala <soumith@gmail.com>2017-08-27 05:19:39 +0300
commitcd13dead56a74a0fdb3522ba42f12c36a75ba9b0 (patch)
tree9e353922684c80ed06323244f514f0ef72bf5945
parentc5972818f19fbc40855137d86d1b175c8dc04a5a (diff)
Add numerically stable logsigmoid
-rw-r--r--lib/THNN/generic/LogSigmoid.c27
1 files changed, 21 insertions, 6 deletions
diff --git a/lib/THNN/generic/LogSigmoid.c b/lib/THNN/generic/LogSigmoid.c
index 651d560..5569f1f 100644
--- a/lib/THNN/generic/LogSigmoid.c
+++ b/lib/THNN/generic/LogSigmoid.c
@@ -10,11 +10,12 @@ void THNN_(LogSigmoid_updateOutput)(
{
THTensor_(resizeAs)(output, input);
THTensor_(resizeAs)(buffer, input);
-
+ //Use the LogSumExp trick to make this stable against overflow
TH_TENSOR_APPLY3(real, output, real, input, real, buffer,
- real z = exp(-*input_data);
+ real max_elem = fmax(0, -*input_data);
+ real z = exp(-max_elem) + exp(-*input_data - max_elem);
*buffer_data = z;
- *output_data = -log(1. + z);
+ *output_data = -(max_elem + log(z));
);
}
@@ -27,10 +28,24 @@ void THNN_(LogSigmoid_updateGradInput)(
{
THNN_CHECK_NELEMENT(input, gradOutput);
THTensor_(resizeAs)(gradInput, buffer);
- TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, buffer,
+/* deriv of -max(0,-x) - log(e(0 - max(0,-x)) + e(-x - max(0,-x)) is
+ * -max_deriv - (-max_deriv*e(0-max(0,-x)) + (-1 - max_deriv)*e(-x - max(0,-x)))/z
+ * where z = e(0 - max(0,-x)) + e(-x - max(0,-x))
+ * which simplifies to
+ * -max_deriv - (z-1)/z if x is >= 0 or
+ * -max_deriv + (z-1)/z if x is < 0
+ */
+ TH_TENSOR_APPLY3(real, input, real, gradInput, real, buffer,
real z = *buffer_data;
- *gradInput_data = *gradOutput_data * z / (1. + z);
- );
+ real max_deriv = 0.0;
+ real sign = -1.0;
+ if (*input_data < 0){
+ max_deriv = -1.0;
+ sign = 1.0;
+ }
+ *gradInput_data = -max_deriv - sign*((z - 1.0)/ z);
+ );
+ THTensor_(cmul)(gradInput, gradOutput, gradInput);
}
#endif