Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/nn.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSoumith Chintala <soumith@gmail.com>2014-11-27 02:40:45 +0300
committerSoumith Chintala <soumith@gmail.com>2014-11-27 02:40:45 +0300
commitb9212fde4c889d2e811534777ebb124b08ba2535 (patch)
treef44f1828d610690e4995ef0fdf0cc31a9daa266e
parent4415d82948b0cf8317e7a5ba39b47f31eda4bccf (diff)
parent457546e22c27115f76de700e3c1f8befa05b08f8 (diff)
Merge pull request #113 from ajtulloch/log-soft-max
generic/LogSoftMax speedups
-rw-r--r--generic/LogSoftMax.c37
1 files changed, 22 insertions, 15 deletions
diff --git a/generic/LogSoftMax.c b/generic/LogSoftMax.c
index 7741e3b..75b8587 100644
--- a/generic/LogSoftMax.c
+++ b/generic/LogSoftMax.c
@@ -26,12 +26,19 @@ static int nn_(LogSoftMax_updateOutput)(lua_State *L)
input = THTensor_(newContiguous)(input);
THTensor_(resizeAs)(output, input);
- input_data = THTensor_(data)(input);
- output_data = THTensor_(data)(output);
+ real* input_data0 = THTensor_(data)(input);
+ real* output_data0 = THTensor_(data)(output);
+
+ accreal logsum;
+ real maxInput;
+#pragma omp parallel for private(t, d, maxInput, logsum, input_data, \
+ output_data)
for(t = 0; t < nframe; t++)
{
- accreal logsum = 0;
- real maxInput = -THInf;
+ logsum = 0;
+ maxInput = -THInf;
+ input_data = input_data0 + dim*t;
+ output_data = output_data0 + dim*t;
for(d = 0; d < dim; d++)
maxInput = THMax(maxInput, input_data[d]);
@@ -42,9 +49,6 @@ static int nn_(LogSoftMax_updateOutput)(lua_State *L)
for(d = 0; d < dim; d++)
output_data[d] = input_data[d] - logsum;
-
- input_data += dim;
- output_data += dim;
}
THTensor_(free)(input);
@@ -75,21 +79,24 @@ static int nn_(LogSoftMax_updateGradInput)(lua_State *L)
THError("vector or matrix expected");
THTensor_(resizeAs)(gradInput, output);
- gradInput_data = THTensor_(data)(gradInput);
- output_data = THTensor_(data)(output);
- gradOutput_data = THTensor_(data)(gradOutput);
+ real* gradInput_data0 = THTensor_(data)(gradInput);
+ real* output_data0 = THTensor_(data)(output);
+ real* gradOutput_data0 = THTensor_(data)(gradOutput);
+ accreal sum;
+#pragma omp parallel for private(t, sum, d, gradInput_data, output_data, \
+ gradOutput_data)
for(t = 0; t < nframe; t++)
{
- accreal sum = 0;
+ sum = 0;
+ gradInput_data = gradInput_data0 + dim*t;
+ output_data = output_data0 + dim*t;
+ gradOutput_data = gradOutput_data0 + dim*t;
+
for(d = 0; d < dim; d++)
sum += gradOutput_data[d];
for(d = 0; d < dim; d++)
gradInput_data[d] = gradOutput_data[d] - exp(output_data[d])*sum;
-
- gradInput_data += dim;
- output_data += dim;
- gradOutput_data += dim;
}
return 1;