Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/torch7.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjokeren <robinho364@gmail.com>2017-01-30 15:07:49 +0300
committerSoumith Chintala <soumith@gmail.com>2017-02-23 14:01:13 +0300
commit503ee716da6c9eb64b471614792132552e247391 (patch)
tree8b5662ba4d0c32e1735523cb6deb5595c41a3328
parentc3f920f75439ca17704890510f5662b4e4e8af0f (diff)
THTensorApply2 counter compress
-rw-r--r--lib/TH/THTensorApply.h120
1 files changed, 69 insertions, 51 deletions
diff --git a/lib/TH/THTensorApply.h b/lib/TH/THTensorApply.h
index 32cce0f..4ce4006 100644
--- a/lib/TH/THTensorApply.h
+++ b/lib/TH/THTensorApply.h
@@ -209,12 +209,13 @@
#define TH_TENSOR_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \
{ \
TYPE1 *TENSOR1##_data = NULL; \
- long *TENSOR1##_counter = NULL; \
+ long *TENSOR1##_counter = NULL, *TENSOR1##_dims = NULL, *TENSOR1##_strides = NULL; \
long TENSOR1##_stride = 0, TENSOR1##_size = 0, TENSOR1##_dim = 0, TENSOR1##_i, TENSOR1##_n; \
TYPE2 *TENSOR2##_data = NULL; \
- long *TENSOR2##_counter = NULL; \
+ long *TENSOR2##_counter = NULL, *TENSOR2##_dims = NULL, *TENSOR2##_strides = NULL; \
long TENSOR2##_stride = 0, TENSOR2##_size = 0, TENSOR2##_dim = 0, TENSOR2##_i, TENSOR2##_n; \
int TH_TENSOR_APPLY_hasFinished = 0; \
+ long TH_TENSOR_dim_index = 0; \
\
TENSOR1##_n = (TENSOR1->nDimension ? 1 : 0); \
for(TENSOR1##_i = 0; TENSOR1##_i < TENSOR1->nDimension; TENSOR1##_i++) \
@@ -232,48 +233,64 @@
else \
{ \
TENSOR1##_data = TENSOR1->storage->data+TENSOR1->storageOffset; \
- for(TENSOR1##_dim = TENSOR1->nDimension-1; TENSOR1##_dim >= 0; TENSOR1##_dim--) \
+ TENSOR1##_dim = 1; \
+ for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; TENSOR1##_i--) \
{ \
- if(TENSOR1->size[TENSOR1##_dim] != 1) \
- break; \
+ if(TENSOR1->stride[TENSOR1##_i] != TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) \
+ TENSOR1##_dim++; \
} \
- TENSOR1##_stride = (TENSOR1##_dim == -1 ? 0 : TENSOR1->stride[TENSOR1##_dim]); \
- TENSOR1##_size = 1; \
- for(TENSOR1##_dim = TENSOR1->nDimension-1; TENSOR1##_dim >= 0; TENSOR1##_dim--) \
- { \
- if(TENSOR1->size[TENSOR1##_dim] != 1) \
- { \
- if(TENSOR1->stride[TENSOR1##_dim] == TENSOR1##_size) \
- TENSOR1##_size *= TENSOR1->size[TENSOR1##_dim]; \
- else \
- break; \
+ TENSOR1##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR1##_dim)); \
+ TENSOR1##_dims = TENSOR1##_counter + TENSOR1##_dim; \
+ TENSOR1##_strides = TENSOR1##_counter + 2*TENSOR1##_dim; \
+ TH_TENSOR_dim_index = TENSOR1##_dim-1; \
+ TENSOR1##_dims[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1->nDimension-1]; \
+ TENSOR1##_strides[TH_TENSOR_dim_index] = TENSOR1->stride[TENSOR1->nDimension-1]; \
+ for(TENSOR1##_i = TENSOR1##_dim-1; TENSOR1##_i >= 0; --TENSOR1##_i) { \
+ TENSOR1##_counter[TENSOR1##_i] = 0; \
+ } \
+ for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; --TENSOR1##_i) { \
+ if (TENSOR1->stride[TENSOR1##_i] == TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) { \
+ TENSOR1##_dims[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i] * TENSOR1##_dims[TH_TENSOR_dim_index]; \
+ } else { \
+ --TH_TENSOR_dim_index; \
+ TENSOR1##_dims[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i]; \
+ TENSOR1##_strides[TH_TENSOR_dim_index] = TENSOR1->stride[TENSOR1##_i]; \
} \
} \
- TENSOR1##_counter = (long*)THAlloc(sizeof(long)*(TENSOR1##_dim+1)); \
- for(TENSOR1##_i = 0; TENSOR1##_i <= TENSOR1##_dim; TENSOR1##_i++) \
- TENSOR1##_counter[TENSOR1##_i] = 0; \
+ /* it will be used for offset updates while looping through the largest contiguous section */ \
+ TENSOR1##_size = TENSOR1##_dims[TENSOR1##_dim-1]; \
+ /* what is the largest contiguous section? size will store the size of this section */ \
+ TENSOR1##_stride = TENSOR1##_strides[TENSOR1##_dim-1]; \
\
TENSOR2##_data = TENSOR2->storage->data+TENSOR2->storageOffset; \
- for(TENSOR2##_dim = TENSOR2->nDimension-1; TENSOR2##_dim >= 0; TENSOR2##_dim--) \
+ TENSOR2##_dim = 1; \
+ for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; TENSOR2##_i--) \
{ \
- if(TENSOR2->size[TENSOR2##_dim] != 1) \
- break; \
+ if(TENSOR2->stride[TENSOR2##_i] != TENSOR2->stride[TENSOR2##_i+1] * TENSOR2->size[TENSOR2##_i+1]) \
+ TENSOR2##_dim++; \
} \
- TENSOR2##_stride = (TENSOR2##_dim == -1 ? 0 : TENSOR2->stride[TENSOR2##_dim]); \
- TENSOR2##_size = 1; \
- for(TENSOR2##_dim = TENSOR2->nDimension-1; TENSOR2##_dim >= 0; TENSOR2##_dim--) \
- { \
- if(TENSOR2->size[TENSOR2##_dim] != 1) \
- { \
- if(TENSOR2->stride[TENSOR2##_dim] == TENSOR2##_size) \
- TENSOR2##_size *= TENSOR2->size[TENSOR2##_dim]; \
- else \
- break; \
+ TENSOR2##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR2##_dim)); \
+ TENSOR2##_dims = TENSOR2##_counter + TENSOR2##_dim; \
+ TENSOR2##_strides = TENSOR2##_counter + 2*TENSOR2##_dim; \
+ TH_TENSOR_dim_index = TENSOR2##_dim-1; \
+ TENSOR2##_dims[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2->nDimension-1]; \
+ TENSOR2##_strides[TH_TENSOR_dim_index] = TENSOR2->stride[TENSOR2->nDimension-1]; \
+ for(TENSOR2##_i = TENSOR2##_dim-1; TENSOR2##_i >= 0; --TENSOR2##_i) { \
+ TENSOR2##_counter[TENSOR2##_i] = 0; \
+ } \
+ for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; --TENSOR2##_i) { \
+ if (TENSOR2->stride[TENSOR2##_i] == TENSOR2->stride[TENSOR2##_i+1] * TENSOR2->size[TENSOR2##_i+1]) { \
+ TENSOR2##_dims[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2##_i] * TENSOR2##_dims[TH_TENSOR_dim_index]; \
+ } else { \
+ --TH_TENSOR_dim_index; \
+ TENSOR2##_dims[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2##_i]; \
+ TENSOR2##_strides[TH_TENSOR_dim_index] = TENSOR2->stride[TENSOR2##_i]; \
} \
} \
- TENSOR2##_counter = (long*)THAlloc(sizeof(long)*(TENSOR2##_dim+1)); \
- for(TENSOR2##_i = 0; TENSOR2##_i <= TENSOR2##_dim; TENSOR2##_i++) \
- TENSOR2##_counter[TENSOR2##_i] = 0; \
+ /* it will be used for offset updates while looping through the largest contiguous section */ \
+ TENSOR2##_size = TENSOR2##_dims[TENSOR2##_dim-1]; \
+ /* what is the largest contiguous section? size will store the size of this section */ \
+ TENSOR2##_stride = TENSOR2##_strides[TENSOR2##_dim-1]; \
} \
\
TENSOR1##_i = 0; \
@@ -287,16 +304,16 @@
\
if(TENSOR1##_i == TENSOR1##_size) \
{ \
- if(TENSOR1##_dim == -1) \
+ if(TENSOR1##_dim == 1) \
break; \
\
TENSOR1##_data -= TENSOR1##_size*TENSOR1##_stride; \
- for(TENSOR1##_i = TENSOR1##_dim; TENSOR1##_i >= 0; TENSOR1##_i--) \
+ for(TENSOR1##_i = TENSOR1##_dim-2; TENSOR1##_i >= 0; TENSOR1##_i--) \
{ \
TENSOR1##_counter[TENSOR1##_i]++; \
- TENSOR1##_data += TENSOR1->stride[TENSOR1##_i]; \
+ TENSOR1##_data += TENSOR1##_strides[TENSOR1##_i]; \
\
- if(TENSOR1##_counter[TENSOR1##_i] == TENSOR1->size[TENSOR1##_i]) \
+ if(TENSOR1##_counter[TENSOR1##_i] == TENSOR1##_dims[TENSOR1##_i]) \
{ \
if(TENSOR1##_i == 0) \
{ \
@@ -305,7 +322,7 @@
} \
else \
{ \
- TENSOR1##_data -= TENSOR1##_counter[TENSOR1##_i]*TENSOR1->stride[TENSOR1##_i]; \
+ TENSOR1##_data -= TENSOR1##_counter[TENSOR1##_i]*TENSOR1##_strides[TENSOR1##_i]; \
TENSOR1##_counter[TENSOR1##_i] = 0; \
} \
} \
@@ -317,16 +334,16 @@
\
if(TENSOR2##_i == TENSOR2##_size) \
{ \
- if(TENSOR2##_dim == -1) \
+ if(TENSOR2##_dim == 1) \
break; \
\
TENSOR2##_data -= TENSOR2##_size*TENSOR2##_stride; \
- for(TENSOR2##_i = TENSOR2##_dim; TENSOR2##_i >= 0; TENSOR2##_i--) \
+ for(TENSOR2##_i = TENSOR2##_dim-2; TENSOR2##_i >= 0; TENSOR2##_i--) \
{ \
TENSOR2##_counter[TENSOR2##_i]++; \
- TENSOR2##_data += TENSOR2->stride[TENSOR2##_i]; \
+ TENSOR2##_data += TENSOR2##_strides[TENSOR2##_i]; \
\
- if(TENSOR2##_counter[TENSOR2##_i] == TENSOR2->size[TENSOR2##_i]) \
+ if(TENSOR2##_counter[TENSOR2##_i] == TENSOR2##_dims[TENSOR2##_i]) \
{ \
if(TENSOR2##_i == 0) \
{ \
@@ -335,7 +352,7 @@
} \
else \
{ \
- TENSOR2##_data -= TENSOR2##_counter[TENSOR2##_i]*TENSOR2->stride[TENSOR2##_i]; \
+ TENSOR2##_data -= TENSOR2##_counter[TENSOR2##_i]*TENSOR2##_strides[TENSOR2##_i]; \
TENSOR2##_counter[TENSOR2##_i] = 0; \
} \
} \
@@ -378,6 +395,7 @@
long *TENSOR##_counter = NULL, *TENSOR##_dims = NULL, *TENSOR##_strides = NULL; \
long TENSOR##_stride = 0, TENSOR##_size = 0, TENSOR##_dim = 0, TENSOR##_i; \
int TH_TENSOR_APPLY_hasFinished = 0; \
+ long TH_TENSOR_dim_index = 0; \
\
if(TENSOR->nDimension == 0) \
TH_TENSOR_APPLY_hasFinished = 1; \
@@ -400,9 +418,9 @@
TENSOR##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR##_dim)); \
TENSOR##_dims = TENSOR##_counter + TENSOR##_dim; \
TENSOR##_strides = TENSOR##_counter + 2*TENSOR##_dim; \
- long dim_index = TENSOR##_dim-1; \
- TENSOR##_dims[dim_index] = TENSOR->size[TENSOR->nDimension-1]; \
- TENSOR##_strides[dim_index] = TENSOR->stride[TENSOR->nDimension-1]; \
+ TH_TENSOR_dim_index = TENSOR##_dim-1; \
+ TENSOR##_dims[TH_TENSOR_dim_index] = TENSOR->size[TENSOR->nDimension-1]; \
+ TENSOR##_strides[TH_TENSOR_dim_index] = TENSOR->stride[TENSOR->nDimension-1]; \
/* what is the first stride? */ \
/* TENSOR##_counter tracks where we are in the storage. The offset into the */ \
/* storage is given by storage_offset + (i * j), where i is the stride */ \
@@ -412,11 +430,11 @@
} \
for(TENSOR##_i = TENSOR->nDimension-2; TENSOR##_i >= 0; --TENSOR##_i) { \
if (TENSOR->stride[TENSOR##_i] == TENSOR->stride[TENSOR##_i+1] * TENSOR->size[TENSOR##_i+1]) { \
- TENSOR##_dims[dim_index] = TENSOR->size[TENSOR##_i] * TENSOR##_dims[dim_index]; \
+ TENSOR##_dims[TH_TENSOR_dim_index] = TENSOR->size[TENSOR##_i] * TENSOR##_dims[TH_TENSOR_dim_index]; \
} else { \
- --dim_index; \
- TENSOR##_dims[dim_index] = TENSOR->size[TENSOR##_i]; \
- TENSOR##_strides[dim_index] = TENSOR->stride[TENSOR##_i]; \
+ --TH_TENSOR_dim_index; \
+ TENSOR##_dims[TH_TENSOR_dim_index] = TENSOR->size[TENSOR##_i]; \
+ TENSOR##_strides[TH_TENSOR_dim_index] = TENSOR->stride[TENSOR##_i]; \
} \
} \
/* it will be used for offset updates while looping through the largest contiguous section */ \