Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/torch7.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjokeren <robinho364@gmail.com>2017-02-01 09:06:04 +0300
committerSoumith Chintala <soumith@gmail.com>2017-02-23 14:01:13 +0300
commit1c2a39da74c6ea14c252b1915ca522191b12ec81 (patch)
tree3aafb09b6e13048cc46d0fb273eeebdbbe887c7d
parent8701a03bebae65d37e7585d9577cb6faeccf0a32 (diff)
THTensorApply3 contiguous optimizations
-rw-r--r--lib/TH/THTensorApply.h400
-rw-r--r--lib/TH/generic/THTensorCopy.c2
-rw-r--r--lib/TH/generic/THTensorMath.c30
3 files changed, 160 insertions, 272 deletions
diff --git a/lib/TH/THTensorApply.h b/lib/TH/THTensorApply.h
index b88983b..ef9d5f1 100644
--- a/lib/TH/THTensorApply.h
+++ b/lib/TH/THTensorApply.h
@@ -13,6 +13,7 @@
long *TENSOR3##_counter = NULL, *TENSOR3##_sizes = NULL, *TENSOR3##_strides = NULL; \
long TENSOR3##_stride = 0, TENSOR3##_size = 0, TENSOR3##_dim = 0, TENSOR3##_i, TENSOR3##_n; \
int TH_TENSOR_APPLY_hasFinished = 0; \
+ int TH_TENSOR1_contiguous = 1, TH_TENSOR2_contiguous = 1, TH_TENSOR3_contiguous = 1; \
long TH_TENSOR_dim_index = 0; \
\
TENSOR1##_n = (TENSOR1->nDimension ? 1 : 0); \
@@ -35,88 +36,131 @@
else \
{ \
TENSOR1##_data = TENSOR1->storage->data+TENSOR1->storageOffset; \
- TENSOR1##_dim = 1; \
- for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; TENSOR1##_i--) \
- { \
- if(TENSOR1->stride[TENSOR1##_i] != TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) \
- TENSOR1##_dim++; \
- } \
- TENSOR1##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR1##_dim)); \
- TENSOR1##_sizes = TENSOR1##_counter + TENSOR1##_dim; \
- TENSOR1##_strides = TENSOR1##_counter + 2*TENSOR1##_dim; \
- TH_TENSOR_dim_index = TENSOR1##_dim-1; \
- TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1->nDimension-1]; \
- TENSOR1##_strides[TH_TENSOR_dim_index] = TENSOR1->stride[TENSOR1->nDimension-1]; \
- for(TENSOR1##_i = TENSOR1##_dim-1; TENSOR1##_i >= 0; --TENSOR1##_i) { \
- TENSOR1##_counter[TENSOR1##_i] = 0; \
+ TENSOR1##_size = 1; \
+ TENSOR1##_stride = 1; \
+ for(TENSOR1##_i = TENSOR1->nDimension-1; TENSOR1##_i >= 0; TENSOR1##_i--) { \
+ if(TENSOR1->size[TENSOR1##_i] != 1) { \
+ if(TENSOR1->stride[TENSOR1##_i] == TENSOR1##_size) \
+ TENSOR1##_size *= TENSOR1->size[TENSOR1##_i]; \
+ else{ \
+ TH_TENSOR1_contiguous = 0; \
+ break; \
+ } \
+ } \
} \
- for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; --TENSOR1##_i) { \
- if (TENSOR1->stride[TENSOR1##_i] == TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) { \
- TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i] * TENSOR1##_sizes[TH_TENSOR_dim_index]; \
- } else { \
- --TH_TENSOR_dim_index; \
- TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i]; \
- TENSOR1##_strides[TH_TENSOR_dim_index] = TENSOR1->stride[TENSOR1##_i]; \
+ if (!TH_TENSOR1_contiguous) { \
+ TENSOR1##_dim = 1; \
+ for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; TENSOR1##_i--) \
+ { \
+ if(TENSOR1->stride[TENSOR1##_i] != TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) \
+ TENSOR1##_dim++; \
+ } \
+ TENSOR1##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR1##_dim)); \
+ TENSOR1##_sizes = TENSOR1##_counter + TENSOR1##_dim; \
+ TENSOR1##_strides = TENSOR1##_counter + 2*TENSOR1##_dim; \
+ TH_TENSOR_dim_index = TENSOR1##_dim-1; \
+ TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1->nDimension-1]; \
+ TENSOR1##_strides[TH_TENSOR_dim_index] = TENSOR1->stride[TENSOR1->nDimension-1]; \
+ for(TENSOR1##_i = TENSOR1##_dim-1; TENSOR1##_i >= 0; --TENSOR1##_i) { \
+ TENSOR1##_counter[TENSOR1##_i] = 0; \
+ } \
+ for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; --TENSOR1##_i) { \
+ if (TENSOR1->stride[TENSOR1##_i] == TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) { \
+ TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i] * TENSOR1##_sizes[TH_TENSOR_dim_index]; \
+ } else { \
+ --TH_TENSOR_dim_index; \
+ TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i]; \
+ TENSOR1##_strides[TH_TENSOR_dim_index] = TENSOR1->stride[TENSOR1##_i]; \
+ } \
} \
+ TENSOR1##_size = TENSOR1##_sizes[TENSOR1##_dim-1]; \
+ TENSOR1##_stride = TENSOR1##_strides[TENSOR1##_dim-1]; \
} \
- TENSOR1##_size = TENSOR1##_sizes[TENSOR1##_dim-1]; \
- TENSOR1##_stride = TENSOR1##_strides[TENSOR1##_dim-1]; \
\
TENSOR2##_data = TENSOR2->storage->data+TENSOR2->storageOffset; \
- TENSOR2##_dim = 1; \
- for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; TENSOR2##_i--) \
- { \
- if(TENSOR2->stride[TENSOR2##_i] != TENSOR2->stride[TENSOR2##_i+1] * TENSOR2->size[TENSOR2##_i+1]) \
- TENSOR2##_dim++; \
- } \
- TENSOR2##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR2##_dim)); \
- TENSOR2##_sizes = TENSOR2##_counter + TENSOR2##_dim; \
- TENSOR2##_strides = TENSOR2##_counter + 2*TENSOR2##_dim; \
- TH_TENSOR_dim_index = TENSOR2##_dim-1; \
- TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2->nDimension-1]; \
- TENSOR2##_strides[TH_TENSOR_dim_index] = TENSOR2->stride[TENSOR2->nDimension-1]; \
- for(TENSOR2##_i = TENSOR2##_dim-1; TENSOR2##_i >= 0; --TENSOR2##_i) { \
- TENSOR2##_counter[TENSOR2##_i] = 0; \
+ TENSOR2##_size = 1; \
+ TENSOR2##_stride = 1; \
+ for(TENSOR2##_i = TENSOR2->nDimension-1; TENSOR2##_i >= 0; TENSOR2##_i--) { \
+ if(TENSOR2->size[TENSOR2##_i] != 1) { \
+ if(TENSOR2->stride[TENSOR2##_i] == TENSOR2##_size) \
+ TENSOR2##_size *= TENSOR2->size[TENSOR2##_i]; \
+ else{ \
+ TH_TENSOR2_contiguous = 0; \
+ break; \
+ } \
+ } \
} \
- for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; --TENSOR2##_i) { \
- if (TENSOR2->stride[TENSOR2##_i] == TENSOR2->stride[TENSOR2##_i+1] * TENSOR2->size[TENSOR2##_i+1]) { \
- TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2##_i] * TENSOR2##_sizes[TH_TENSOR_dim_index]; \
- } else { \
- --TH_TENSOR_dim_index; \
- TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2##_i]; \
- TENSOR2##_strides[TH_TENSOR_dim_index] = TENSOR2->stride[TENSOR2##_i]; \
+ if (!TH_TENSOR2_contiguous) { \
+ TENSOR2##_dim = 1; \
+ for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; TENSOR2##_i--) \
+ { \
+ if(TENSOR2->stride[TENSOR2##_i] != TENSOR2->stride[TENSOR2##_i+1] * TENSOR2->size[TENSOR2##_i+1]) \
+ TENSOR2##_dim++; \
+ } \
+ TENSOR2##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR2##_dim)); \
+ TENSOR2##_sizes = TENSOR2##_counter + TENSOR2##_dim; \
+ TENSOR2##_strides = TENSOR2##_counter + 2*TENSOR2##_dim; \
+ TH_TENSOR_dim_index = TENSOR2##_dim-1; \
+ TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2->nDimension-1]; \
+ TENSOR2##_strides[TH_TENSOR_dim_index] = TENSOR2->stride[TENSOR2->nDimension-1]; \
+ for(TENSOR2##_i = TENSOR2##_dim-1; TENSOR2##_i >= 0; --TENSOR2##_i) { \
+ TENSOR2##_counter[TENSOR2##_i] = 0; \
+ } \
+ for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; --TENSOR2##_i) { \
+ if (TENSOR2->stride[TENSOR2##_i] == TENSOR2->stride[TENSOR2##_i+1] * TENSOR2->size[TENSOR2##_i+1]) { \
+ TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2##_i] * TENSOR2##_sizes[TH_TENSOR_dim_index]; \
+ } else { \
+ --TH_TENSOR_dim_index; \
+ TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2##_i]; \
+ TENSOR2##_strides[TH_TENSOR_dim_index] = TENSOR2->stride[TENSOR2##_i]; \
+ } \
} \
+ TENSOR2##_size = TENSOR2##_sizes[TENSOR2##_dim-1]; \
+ TENSOR2##_stride = TENSOR2##_strides[TENSOR2##_dim-1]; \
} \
- TENSOR2##_size = TENSOR2##_sizes[TENSOR2##_dim-1]; \
- TENSOR2##_stride = TENSOR2##_strides[TENSOR2##_dim-1]; \
\
TENSOR3##_data = TENSOR3->storage->data+TENSOR3->storageOffset; \
- TENSOR3##_dim = 1; \
- for(TENSOR3##_i = TENSOR3->nDimension-2; TENSOR3##_i >= 0; TENSOR3##_i--) \
- { \
- if(TENSOR3->stride[TENSOR3##_i] != TENSOR3->stride[TENSOR3##_i+1] * TENSOR3->size[TENSOR3##_i+1]) \
- TENSOR3##_dim++; \
- } \
- TENSOR3##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR3##_dim)); \
- TENSOR3##_sizes = TENSOR3##_counter + TENSOR3##_dim; \
- TENSOR3##_strides = TENSOR3##_counter + 2*TENSOR3##_dim; \
- TH_TENSOR_dim_index = TENSOR3##_dim-1; \
- TENSOR3##_sizes[TH_TENSOR_dim_index] = TENSOR3->size[TENSOR3->nDimension-1]; \
- TENSOR3##_strides[TH_TENSOR_dim_index] = TENSOR3->stride[TENSOR3->nDimension-1]; \
- for(TENSOR3##_i = TENSOR3##_dim-1; TENSOR3##_i >= 0; --TENSOR3##_i) { \
- TENSOR3##_counter[TENSOR3##_i] = 0; \
+ TENSOR3##_size = 1; \
+ TENSOR3##_stride = 1; \
+ for(TENSOR3##_i = TENSOR3->nDimension-1; TENSOR3##_i >= 0; TENSOR3##_i--) { \
+ if(TENSOR3->size[TENSOR3##_i] != 1) { \
+ if(TENSOR3->stride[TENSOR3##_i] == TENSOR3##_size) \
+ TENSOR3##_size *= TENSOR3->size[TENSOR3##_i]; \
+ else{ \
+ TH_TENSOR3_contiguous = 0; \
+ break; \
+ } \
+ } \
} \
- for(TENSOR3##_i = TENSOR3->nDimension-2; TENSOR3##_i >= 0; --TENSOR3##_i) { \
- if (TENSOR3->stride[TENSOR3##_i] == TENSOR3->stride[TENSOR3##_i+1] * TENSOR3->size[TENSOR3##_i+1]) { \
- TENSOR3##_sizes[TH_TENSOR_dim_index] = TENSOR3->size[TENSOR3##_i] * TENSOR3##_sizes[TH_TENSOR_dim_index]; \
- } else { \
- --TH_TENSOR_dim_index; \
- TENSOR3##_sizes[TH_TENSOR_dim_index] = TENSOR3->size[TENSOR3##_i]; \
- TENSOR3##_strides[TH_TENSOR_dim_index] = TENSOR3->stride[TENSOR3##_i]; \
+ if (!TH_TENSOR3_contiguous) { \
+ TENSOR3##_data = TENSOR3->storage->data+TENSOR3->storageOffset; \
+ TENSOR3##_dim = 1; \
+ for(TENSOR3##_i = TENSOR3->nDimension-2; TENSOR3##_i >= 0; TENSOR3##_i--) \
+ { \
+ if(TENSOR3->stride[TENSOR3##_i] != TENSOR3->stride[TENSOR3##_i+1] * TENSOR3->size[TENSOR3##_i+1]) \
+ TENSOR3##_dim++; \
+ } \
+ TENSOR3##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR3##_dim)); \
+ TENSOR3##_sizes = TENSOR3##_counter + TENSOR3##_dim; \
+ TENSOR3##_strides = TENSOR3##_counter + 2*TENSOR3##_dim; \
+ TH_TENSOR_dim_index = TENSOR3##_dim-1; \
+ TENSOR3##_sizes[TH_TENSOR_dim_index] = TENSOR3->size[TENSOR3->nDimension-1]; \
+ TENSOR3##_strides[TH_TENSOR_dim_index] = TENSOR3->stride[TENSOR3->nDimension-1]; \
+ for(TENSOR3##_i = TENSOR3##_dim-1; TENSOR3##_i >= 0; --TENSOR3##_i) { \
+ TENSOR3##_counter[TENSOR3##_i] = 0; \
+ } \
+ for(TENSOR3##_i = TENSOR3->nDimension-2; TENSOR3##_i >= 0; --TENSOR3##_i) { \
+ if (TENSOR3->stride[TENSOR3##_i] == TENSOR3->stride[TENSOR3##_i+1] * TENSOR3->size[TENSOR3##_i+1]) { \
+ TENSOR3##_sizes[TH_TENSOR_dim_index] = TENSOR3->size[TENSOR3##_i] * TENSOR3##_sizes[TH_TENSOR_dim_index]; \
+ } else { \
+ --TH_TENSOR_dim_index; \
+ TENSOR3##_sizes[TH_TENSOR_dim_index] = TENSOR3->size[TENSOR3##_i]; \
+ TENSOR3##_strides[TH_TENSOR_dim_index] = TENSOR3->stride[TENSOR3##_i]; \
+ } \
} \
+ TENSOR3##_size = TENSOR3##_sizes[TENSOR3##_dim-1]; \
+ TENSOR3##_stride = TENSOR3##_strides[TENSOR3##_dim-1]; \
} \
- TENSOR3##_size = TENSOR3##_sizes[TENSOR3##_dim-1]; \
- TENSOR3##_stride = TENSOR3##_strides[TENSOR3##_dim-1]; \
} \
\
TENSOR1##_i = 0; \
@@ -131,6 +175,9 @@
\
if(TENSOR1##_i == TENSOR1##_size) \
{ \
+ if(TH_TENSOR1_contiguous) \
+ break; \
+\
if(TENSOR1##_dim == 1) \
break; \
\
@@ -161,6 +208,9 @@
\
if(TENSOR2##_i == TENSOR2##_size) \
{ \
+ if(TH_TENSOR2_contiguous) \
+ break; \
+\
if(TENSOR2##_dim == 1) \
break; \
\
@@ -191,6 +241,9 @@
\
if(TENSOR3##_i == TENSOR3##_size) \
{ \
+ if(TH_TENSOR3_contiguous) \
+ break; \
+\
if(TENSOR3##_dim == 1) \
break; \
\
@@ -219,9 +272,12 @@
TENSOR3##_i = 0; \
} \
} \
- THFree(TENSOR1##_counter); \
- THFree(TENSOR2##_counter); \
- THFree(TENSOR3##_counter); \
+ if(TH_TENSOR1_contiguous) \
+ THFree(TENSOR1##_counter); \
+ if(TH_TENSOR2_contiguous) \
+ THFree(TENSOR2##_counter); \
+ if(TH_TENSOR3_contiguous) \
+ THFree(TENSOR3##_counter); \
}
#define TH_TENSOR_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \
@@ -233,6 +289,7 @@
long *TENSOR2##_counter = NULL, *TENSOR2##_sizes = NULL, *TENSOR2##_strides = NULL; \
long TENSOR2##_stride = 0, TENSOR2##_size = 0, TENSOR2##_dim = 0, TENSOR2##_i, TENSOR2##_n; \
int TH_TENSOR_APPLY_hasFinished = 0; \
+ int TH_TENSOR1_contiguous = 1, TH_TENSOR2_contiguous = 1; \
long TH_TENSOR_dim_index = 0; \
\
TENSOR1##_n = (TENSOR1->nDimension ? 1 : 0); \
@@ -251,164 +308,19 @@
else \
{ \
TENSOR1##_data = TENSOR1->storage->data+TENSOR1->storageOffset; \
- TENSOR1##_dim = 1; \
- for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; TENSOR1##_i--) \
- { \
- if(TENSOR1->stride[TENSOR1##_i] != TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) \
- TENSOR1##_dim++; \
- } \
- TENSOR1##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR1##_dim)); \
- TENSOR1##_sizes = TENSOR1##_counter + TENSOR1##_dim; \
- TENSOR1##_strides = TENSOR1##_counter + 2*TENSOR1##_dim; \
- TH_TENSOR_dim_index = TENSOR1##_dim-1; \
- TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1->nDimension-1]; \
- TENSOR1##_strides[TH_TENSOR_dim_index] = TENSOR1->stride[TENSOR1->nDimension-1]; \
- for(TENSOR1##_i = TENSOR1##_dim-1; TENSOR1##_i >= 0; --TENSOR1##_i) { \
- TENSOR1##_counter[TENSOR1##_i] = 0; \
- } \
- for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; --TENSOR1##_i) { \
- if (TENSOR1->stride[TENSOR1##_i] == TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) { \
- TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i] * TENSOR1##_sizes[TH_TENSOR_dim_index]; \
- } else { \
- --TH_TENSOR_dim_index; \
- TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i]; \
- TENSOR1##_strides[TH_TENSOR_dim_index] = TENSOR1->stride[TENSOR1##_i]; \
+ TENSOR1##_size = 1; \
+ TENSOR1##_stride = 1; \
+ for(TENSOR1##_i = TENSOR1->nDimension-1; TENSOR1##_i >= 0; TENSOR1##_i--) { \
+ if(TENSOR1->size[TENSOR1##_i] != 1) { \
+ if(TENSOR1->stride[TENSOR1##_i] == TENSOR1##_size) \
+ TENSOR1##_size *= TENSOR1->size[TENSOR1##_i]; \
+ else{ \
+ TH_TENSOR1_contiguous = 0; \
+ break; \
+ } \
} \
} \
- TENSOR1##_size = TENSOR1##_sizes[TENSOR1##_dim-1]; \
- TENSOR1##_stride = TENSOR1##_strides[TENSOR1##_dim-1]; \
-\
- TENSOR2##_data = TENSOR2->storage->data+TENSOR2->storageOffset; \
- TENSOR2##_dim = 1; \
- for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; TENSOR2##_i--) \
- { \
- if(TENSOR2->stride[TENSOR2##_i] != TENSOR2->stride[TENSOR2##_i+1] * TENSOR2->size[TENSOR2##_i+1]) \
- TENSOR2##_dim++; \
- } \
- TENSOR2##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR2##_dim)); \
- TENSOR2##_sizes = TENSOR2##_counter + TENSOR2##_dim; \
- TENSOR2##_strides = TENSOR2##_counter + 2*TENSOR2##_dim; \
- TH_TENSOR_dim_index = TENSOR2##_dim-1; \
- TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2->nDimension-1]; \
- TENSOR2##_strides[TH_TENSOR_dim_index] = TENSOR2->stride[TENSOR2->nDimension-1]; \
- for(TENSOR2##_i = TENSOR2##_dim-1; TENSOR2##_i >= 0; --TENSOR2##_i) { \
- TENSOR2##_counter[TENSOR2##_i] = 0; \
- } \
- for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; --TENSOR2##_i) { \
- if (TENSOR2->stride[TENSOR2##_i] == TENSOR2->stride[TENSOR2##_i+1] * TENSOR2->size[TENSOR2##_i+1]) { \
- TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2##_i] * TENSOR2##_sizes[TH_TENSOR_dim_index]; \
- } else { \
- --TH_TENSOR_dim_index; \
- TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2##_i]; \
- TENSOR2##_strides[TH_TENSOR_dim_index] = TENSOR2->stride[TENSOR2##_i]; \
- } \
- } \
- TENSOR2##_size = TENSOR2##_sizes[TENSOR2##_dim-1]; \
- TENSOR2##_stride = TENSOR2##_strides[TENSOR2##_dim-1]; \
- } \
-\
- TENSOR1##_i = 0; \
- TENSOR2##_i = 0; \
- while(!TH_TENSOR_APPLY_hasFinished) \
- { \
- for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride) /* 0 et pas TENSOR##_dim! */ \
- { \
- CODE \
- } \
-\
- if(TENSOR1##_i == TENSOR1##_size) \
- { \
- if(TENSOR1##_dim == 1) \
- break; \
-\
- TENSOR1##_data -= TENSOR1##_size*TENSOR1##_stride; \
- for(TENSOR1##_i = TENSOR1##_dim-2; TENSOR1##_i >= 0; TENSOR1##_i--) \
- { \
- TENSOR1##_counter[TENSOR1##_i]++; \
- TENSOR1##_data += TENSOR1##_strides[TENSOR1##_i]; \
-\
- if(TENSOR1##_counter[TENSOR1##_i] == TENSOR1##_sizes[TENSOR1##_i]) \
- { \
- if(TENSOR1##_i == 0) \
- { \
- TH_TENSOR_APPLY_hasFinished = 1; \
- break; \
- } \
- else \
- { \
- TENSOR1##_data -= TENSOR1##_counter[TENSOR1##_i]*TENSOR1##_strides[TENSOR1##_i]; \
- TENSOR1##_counter[TENSOR1##_i] = 0; \
- } \
- } \
- else \
- break; \
- } \
- TENSOR1##_i = 0; \
- } \
-\
- if(TENSOR2##_i == TENSOR2##_size) \
- { \
- if(TENSOR2##_dim == 1) \
- break; \
-\
- TENSOR2##_data -= TENSOR2##_size*TENSOR2##_stride; \
- for(TENSOR2##_i = TENSOR2##_dim-2; TENSOR2##_i >= 0; TENSOR2##_i--) \
- { \
- TENSOR2##_counter[TENSOR2##_i]++; \
- TENSOR2##_data += TENSOR2##_strides[TENSOR2##_i]; \
-\
- if(TENSOR2##_counter[TENSOR2##_i] == TENSOR2##_sizes[TENSOR2##_i]) \
- { \
- if(TENSOR2##_i == 0) \
- { \
- TH_TENSOR_APPLY_hasFinished = 1; \
- break; \
- } \
- else \
- { \
- TENSOR2##_data -= TENSOR2##_counter[TENSOR2##_i]*TENSOR2##_strides[TENSOR2##_i]; \
- TENSOR2##_counter[TENSOR2##_i] = 0; \
- } \
- } \
- else \
- break; \
- } \
- TENSOR2##_i = 0; \
- } \
- } \
- THFree(TENSOR1##_counter); \
- THFree(TENSOR2##_counter); \
-}
-
-#define TH_TENSOR_APPLY2_CONTIGUOUS(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \
-{ \
- TYPE1 *TENSOR1##_data = NULL; \
- long *TENSOR1##_counter = NULL, *TENSOR1##_sizes = NULL, *TENSOR1##_strides = NULL; \
- long TENSOR1##_stride = 0, TENSOR1##_size = 0, TENSOR1##_dim = 0, TENSOR1##_i, TENSOR1##_n; \
- TYPE2 *TENSOR2##_data = NULL; \
- long *TENSOR2##_counter = NULL, *TENSOR2##_sizes = NULL, *TENSOR2##_strides = NULL; \
- long TENSOR2##_stride = 0, TENSOR2##_size = 0, TENSOR2##_dim = 0, TENSOR2##_i, TENSOR2##_n; \
- int TH_TENSOR_APPLY_hasFinished = 0; \
- int TH_TENSOR1_contiguous = 0, TH_TENSOR2_contiguous = 0; \
- long TH_TENSOR_dim_index = 0; \
-\
- TENSOR1##_n = (TENSOR1->nDimension ? 1 : 0); \
- for(TENSOR1##_i = 0; TENSOR1##_i < TENSOR1->nDimension; TENSOR1##_i++) \
- TENSOR1##_n *= TENSOR1->size[TENSOR1##_i]; \
-\
- TENSOR2##_n = (TENSOR2->nDimension ? 1 : 0); \
- for(TENSOR2##_i = 0; TENSOR2##_i < TENSOR2->nDimension; TENSOR2##_i++) \
- TENSOR2##_n *= TENSOR2->size[TENSOR2##_i]; \
-\
- if(TENSOR1##_n != TENSOR2##_n) /* should we do the check in the function instead? i think so */ \
- THError("inconsistent tensor size"); \
-\
- if(TENSOR1->nDimension == 0) \
- TH_TENSOR_APPLY_hasFinished = 1; \
- else \
- { \
- TENSOR1##_data = TENSOR1->storage->data+TENSOR1->storageOffset; \
- if (!THTensor_(isContiguous)(TENSOR1)) { \
+ if (!TH_TENSOR1_contiguous) { \
TENSOR1##_dim = 1; \
for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; TENSOR1##_i--) \
{ \
@@ -425,7 +337,7 @@
TENSOR1##_counter[TENSOR1##_i] = 0; \
} \
for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; --TENSOR1##_i) { \
- if (TENSOR1->stride[TENSOR1##_i] == TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) { \
+ if(TENSOR1->stride[TENSOR1##_i] == TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) { \
TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i] * TENSOR1##_sizes[TH_TENSOR_dim_index]; \
} else { \
--TH_TENSOR_dim_index; \
@@ -435,14 +347,22 @@
} \
TENSOR1##_size = TENSOR1##_sizes[TENSOR1##_dim-1]; \
TENSOR1##_stride = TENSOR1##_strides[TENSOR1##_dim-1]; \
- } else { \
- TH_TENSOR1_contiguous = 1; \
- TENSOR1##_size = THTensor_(nElement)(TENSOR1); \
- TENSOR1##_stride = 1; \
} \
\
TENSOR2##_data = TENSOR2->storage->data+TENSOR2->storageOffset; \
- if (!THTensor_(isContiguous)(TENSOR2)) { \
+ TENSOR2##_size = 1; \
+ TENSOR2##_stride = 1; \
+ for(TENSOR2##_i = TENSOR2->nDimension-1; TENSOR2##_i >= 0; TENSOR2##_i--) { \
+ if(TENSOR2->size[TENSOR2##_i] != 1) { \
+ if(TENSOR2->stride[TENSOR2##_i] == TENSOR2##_size) \
+ TENSOR2##_size *= TENSOR2->size[TENSOR2##_i]; \
+ else{ \
+ TH_TENSOR2_contiguous = 0; \
+ break; \
+ } \
+ } \
+ } \
+ if(!TH_TENSOR2_contiguous) { \
TENSOR2##_dim = 1; \
for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; TENSOR2##_i--) \
{ \
@@ -470,8 +390,10 @@
TENSOR2##_size = TENSOR2##_sizes[TENSOR2##_dim-1]; \
TENSOR2##_stride = TENSOR2##_strides[TENSOR2##_dim-1]; \
} else { \
- TH_TENSOR2_contiguous = 1; \
- TENSOR2##_size = THTensor_(nElement)(TENSOR2); \
+ TENSOR2##_size = 1; \
+ for(TENSOR2##_i = TENSOR2->nDimension-1; TENSOR2##_i >= 0; --TENSOR2##_i) { \
+ TENSOR2##_size *= TENSOR2->size[TENSOR2##_i]; \
+ } \
TENSOR2##_stride = 1; \
} \
} \
@@ -551,12 +473,10 @@
TENSOR2##_i = 0; \
} \
} \
- if (!THTensor_(isContiguous)(TENSOR1)) { \
+ if (!TH_TENSOR1_contiguous) \
THFree(TENSOR1##_counter); \
- } \
- if (!THTensor_(isContiguous)(TENSOR2)) { \
+ if (!TH_TENSOR2_contiguous) \
THFree(TENSOR2##_counter); \
- } \
}
/*
diff --git a/lib/TH/generic/THTensorCopy.c b/lib/TH/generic/THTensorCopy.c
index cc92c33..5dfdcf1 100644
--- a/lib/TH/generic/THTensorCopy.c
+++ b/lib/TH/generic/THTensorCopy.c
@@ -15,8 +15,6 @@ void THTensor_(copy)(THTensor *tensor, THTensor *src)
for (i=0; i<sz; ++i)
rp[i] = sp[i];
#endif
- } else if (THTensor_(isContiguous)(tensor) || THTensor_(isContiguous)(src)) {
- TH_TENSOR_APPLY2_CONTIGUOUS(real, tensor, real, src, *tensor_data = *src_data;)
} else {
TH_TENSOR_APPLY2(real, tensor, real, src, *tensor_data = *src_data;)
}
diff --git a/lib/TH/generic/THTensorMath.c b/lib/TH/generic/THTensorMath.c
index 302923e..c27fa00 100644
--- a/lib/TH/generic/THTensorMath.c
+++ b/lib/TH/generic/THTensorMath.c
@@ -508,8 +508,6 @@ void THTensor_(add)(THTensor *r_, THTensor *t, real value)
ptrdiff_t i_end = tid == num_threads - 1 ? sz : i + sz / num_threads;
THVector_(add)(rp+i, tp+i, value, i_end-i);
}
- } else if (THTensor_(isContiguous)(r_) || THTensor_(isContiguous)(t)) {
- TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = *t_data + value;);
} else {
TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data + value;);
}
@@ -540,8 +538,6 @@ void THTensor_(mul)(THTensor *r_, THTensor *t, real value)
ptrdiff_t i_end = tid == num_threads - 1 ? sz : i + sz / num_threads;
THVector_(mul)(rp+i, tp+i, value, i_end-i);
}
- } else if (THTensor_(isContiguous)(r_) || THTensor_(isContiguous)(t)) {
- TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = *t_data * value;);
} else {
TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data * value;);
}
@@ -567,8 +563,6 @@ void THTensor_(div)(THTensor *r_, THTensor *t, real value)
ptrdiff_t i_end = tid == num_threads - 1 ? sz : i + sz / num_threads;
THVector_(div)(rp+i, tp+i, value, i_end-i);
}
- } else if (THTensor_(isContiguous)(r_) || THTensor_(isContiguous)(t)) {
- TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = *t_data / value;);
} else {
TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data / value;);
}
@@ -660,12 +654,6 @@ void THTensor_(fmod)(THTensor *r_, THTensor *t, real value)
rp[i] = tp[i] % value;
#endif
}
- } else if (THTensor_(isContiguous)(r_) || THTensor_(isContiguous)(t)) {
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
- TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = fmod(*t_data, value););
-#else
- TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = (*t_data % value););
-#endif
} else {
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
TH_TENSOR_APPLY2(real, r_, real, t, *r__data = fmod(*t_data, value););
@@ -691,13 +679,6 @@ void THTensor_(remainder)(THTensor *r_, THTensor *t, real value)
rp[i] = tp[i] - value * (tp[i] / value); // There is no NAN for integers
#endif
}
- } else if (THTensor_(isContiguous)(r_) || THTensor_(isContiguous)(t)) {
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
- TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = (value == 0)? NAN : *t_data - value * floor(*t_data / value););
-#else
- // There is no NAN for integers
- TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = *t_data - value * (*t_data / value););
-#endif
} else {
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (value == 0)? NAN : *t_data - value * floor(*t_data / value););
@@ -789,8 +770,6 @@ void THTensor_(clamp)(THTensor *r_, THTensor *t, real min_value, real max_value)
#pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
for (i=0; i<sz; i++)
rp[i] = (tp[i] < min_value) ? min_value : (tp[i] > max_value ? max_value : tp[i]);
- } else if (THTensor_(isContiguous)(r_) || THTensor_(isContiguous)(t)) {
- TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = (*t_data < min_value) ? min_value : (*t_data > max_value ? max_value : *t_data););
} else {
TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (*t_data < min_value) ? min_value : (*t_data > max_value ? max_value : *t_data););
}
@@ -1121,8 +1100,6 @@ void THTensor_(tpow)(THTensor *r_, real value, THTensor *t)
#pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
for (i=0; i<sz; i++)
rp[i] = pow(value, tp[i]);
- } else if (THTensor_(isContiguous)(r_) || THTensor_(isContiguous)(t)) {
- TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = pow(value, *t_data););
} else {
TH_TENSOR_APPLY2(real, r_, real, t, *r__data = pow(value, *t_data););
}
@@ -2563,13 +2540,6 @@ int THTensor_(equal)(THTensor *ta, THTensor* tb)
for (i=0; i<sz; ++i){
if(tap[i] != tbp[i]) return 0;
}
- } else if (THTensor_(isContiguous)(ta) || THTensor_(isContiguous)(tb)) {
- // Short-circuit the apply function on inequality
- TH_TENSOR_APPLY2_CONTIGUOUS(real, ta, real, tb,
- if (equal && *ta_data != *tb_data) {
- equal = 0;
- TH_TENSOR_APPLY_hasFinished = 1; break;
- })
} else {
// Short-circuit the apply function on inequality
TH_TENSOR_APPLY2(real, ta, real, tb,