diff options
author | jokeren <robinho364@gmail.com> | 2017-02-01 09:06:04 +0300 |
---|---|---|
committer | Soumith Chintala <soumith@gmail.com> | 2017-02-23 14:01:13 +0300 |
commit | 1c2a39da74c6ea14c252b1915ca522191b12ec81 (patch) | |
tree | 3aafb09b6e13048cc46d0fb273eeebdbbe887c7d | |
parent | 8701a03bebae65d37e7585d9577cb6faeccf0a32 (diff) |
THTensorApply3 contiguous optimizations
-rw-r--r-- | lib/TH/THTensorApply.h | 400 | ||||
-rw-r--r-- | lib/TH/generic/THTensorCopy.c | 2 | ||||
-rw-r--r-- | lib/TH/generic/THTensorMath.c | 30 |
3 files changed, 160 insertions, 272 deletions
diff --git a/lib/TH/THTensorApply.h b/lib/TH/THTensorApply.h index b88983b..ef9d5f1 100644 --- a/lib/TH/THTensorApply.h +++ b/lib/TH/THTensorApply.h @@ -13,6 +13,7 @@ long *TENSOR3##_counter = NULL, *TENSOR3##_sizes = NULL, *TENSOR3##_strides = NULL; \ long TENSOR3##_stride = 0, TENSOR3##_size = 0, TENSOR3##_dim = 0, TENSOR3##_i, TENSOR3##_n; \ int TH_TENSOR_APPLY_hasFinished = 0; \ + int TH_TENSOR1_contiguous = 1, TH_TENSOR2_contiguous = 1, TH_TENSOR3_contiguous = 1; \ long TH_TENSOR_dim_index = 0; \ \ TENSOR1##_n = (TENSOR1->nDimension ? 1 : 0); \ @@ -35,88 +36,131 @@ else \ { \ TENSOR1##_data = TENSOR1->storage->data+TENSOR1->storageOffset; \ - TENSOR1##_dim = 1; \ - for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; TENSOR1##_i--) \ - { \ - if(TENSOR1->stride[TENSOR1##_i] != TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) \ - TENSOR1##_dim++; \ - } \ - TENSOR1##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR1##_dim)); \ - TENSOR1##_sizes = TENSOR1##_counter + TENSOR1##_dim; \ - TENSOR1##_strides = TENSOR1##_counter + 2*TENSOR1##_dim; \ - TH_TENSOR_dim_index = TENSOR1##_dim-1; \ - TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1->nDimension-1]; \ - TENSOR1##_strides[TH_TENSOR_dim_index] = TENSOR1->stride[TENSOR1->nDimension-1]; \ - for(TENSOR1##_i = TENSOR1##_dim-1; TENSOR1##_i >= 0; --TENSOR1##_i) { \ - TENSOR1##_counter[TENSOR1##_i] = 0; \ + TENSOR1##_size = 1; \ + TENSOR1##_stride = 1; \ + for(TENSOR1##_i = TENSOR1->nDimension-1; TENSOR1##_i >= 0; TENSOR1##_i--) { \ + if(TENSOR1->size[TENSOR1##_i] != 1) { \ + if(TENSOR1->stride[TENSOR1##_i] == TENSOR1##_size) \ + TENSOR1##_size *= TENSOR1->size[TENSOR1##_i]; \ + else{ \ + TH_TENSOR1_contiguous = 0; \ + break; \ + } \ + } \ } \ - for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; --TENSOR1##_i) { \ - if (TENSOR1->stride[TENSOR1##_i] == TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) { \ - TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i] * TENSOR1##_sizes[TH_TENSOR_dim_index]; \ - } else { \ - --TH_TENSOR_dim_index; \ - TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i]; \ - TENSOR1##_strides[TH_TENSOR_dim_index] = TENSOR1->stride[TENSOR1##_i]; \ + if (!TH_TENSOR1_contiguous) { \ + TENSOR1##_dim = 1; \ + for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; TENSOR1##_i--) \ + { \ + if(TENSOR1->stride[TENSOR1##_i] != TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) \ + TENSOR1##_dim++; \ + } \ + TENSOR1##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR1##_dim)); \ + TENSOR1##_sizes = TENSOR1##_counter + TENSOR1##_dim; \ + TENSOR1##_strides = TENSOR1##_counter + 2*TENSOR1##_dim; \ + TH_TENSOR_dim_index = TENSOR1##_dim-1; \ + TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1->nDimension-1]; \ + TENSOR1##_strides[TH_TENSOR_dim_index] = TENSOR1->stride[TENSOR1->nDimension-1]; \ + for(TENSOR1##_i = TENSOR1##_dim-1; TENSOR1##_i >= 0; --TENSOR1##_i) { \ + TENSOR1##_counter[TENSOR1##_i] = 0; \ + } \ + for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; --TENSOR1##_i) { \ + if (TENSOR1->stride[TENSOR1##_i] == TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) { \ + TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i] * TENSOR1##_sizes[TH_TENSOR_dim_index]; \ + } else { \ + --TH_TENSOR_dim_index; \ + TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i]; \ + TENSOR1##_strides[TH_TENSOR_dim_index] = TENSOR1->stride[TENSOR1##_i]; \ + } \ } \ + TENSOR1##_size = TENSOR1##_sizes[TENSOR1##_dim-1]; \ + TENSOR1##_stride = TENSOR1##_strides[TENSOR1##_dim-1]; \ } \ - TENSOR1##_size = TENSOR1##_sizes[TENSOR1##_dim-1]; \ - TENSOR1##_stride = TENSOR1##_strides[TENSOR1##_dim-1]; \ \ TENSOR2##_data = TENSOR2->storage->data+TENSOR2->storageOffset; \ - TENSOR2##_dim = 1; \ - for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; TENSOR2##_i--) \ - { \ - if(TENSOR2->stride[TENSOR2##_i] != TENSOR2->stride[TENSOR2##_i+1] * TENSOR2->size[TENSOR2##_i+1]) \ - TENSOR2##_dim++; \ - } \ - TENSOR2##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR2##_dim)); \ - TENSOR2##_sizes = TENSOR2##_counter + TENSOR2##_dim; \ - TENSOR2##_strides = TENSOR2##_counter + 2*TENSOR2##_dim; \ - TH_TENSOR_dim_index = TENSOR2##_dim-1; \ - TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2->nDimension-1]; \ - TENSOR2##_strides[TH_TENSOR_dim_index] = TENSOR2->stride[TENSOR2->nDimension-1]; \ - for(TENSOR2##_i = TENSOR2##_dim-1; TENSOR2##_i >= 0; --TENSOR2##_i) { \ - TENSOR2##_counter[TENSOR2##_i] = 0; \ + TENSOR2##_size = 1; \ + TENSOR2##_stride = 1; \ + for(TENSOR2##_i = TENSOR2->nDimension-1; TENSOR2##_i >= 0; TENSOR2##_i--) { \ + if(TENSOR2->size[TENSOR2##_i] != 1) { \ + if(TENSOR2->stride[TENSOR2##_i] == TENSOR2##_size) \ + TENSOR2##_size *= TENSOR2->size[TENSOR2##_i]; \ + else{ \ + TH_TENSOR2_contiguous = 0; \ + break; \ + } \ + } \ } \ - for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; --TENSOR2##_i) { \ - if (TENSOR2->stride[TENSOR2##_i] == TENSOR2->stride[TENSOR2##_i+1] * TENSOR2->size[TENSOR2##_i+1]) { \ - TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2##_i] * TENSOR2##_sizes[TH_TENSOR_dim_index]; \ - } else { \ - --TH_TENSOR_dim_index; \ - TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2##_i]; \ - TENSOR2##_strides[TH_TENSOR_dim_index] = TENSOR2->stride[TENSOR2##_i]; \ + if (!TH_TENSOR2_contiguous) { \ + TENSOR2##_dim = 1; \ + for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; TENSOR2##_i--) \ + { \ + if(TENSOR2->stride[TENSOR2##_i] != TENSOR2->stride[TENSOR2##_i+1] * TENSOR2->size[TENSOR2##_i+1]) \ + TENSOR2##_dim++; \ + } \ + TENSOR2##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR2##_dim)); \ + TENSOR2##_sizes = TENSOR2##_counter + TENSOR2##_dim; \ + TENSOR2##_strides = TENSOR2##_counter + 2*TENSOR2##_dim; \ + TH_TENSOR_dim_index = TENSOR2##_dim-1; \ + TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2->nDimension-1]; \ + TENSOR2##_strides[TH_TENSOR_dim_index] = TENSOR2->stride[TENSOR2->nDimension-1]; \ + for(TENSOR2##_i = TENSOR2##_dim-1; TENSOR2##_i >= 0; --TENSOR2##_i) { \ + TENSOR2##_counter[TENSOR2##_i] = 0; \ + } \ + for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; --TENSOR2##_i) { \ + if (TENSOR2->stride[TENSOR2##_i] == TENSOR2->stride[TENSOR2##_i+1] * TENSOR2->size[TENSOR2##_i+1]) { \ + TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2##_i] * TENSOR2##_sizes[TH_TENSOR_dim_index]; \ + } else { \ + --TH_TENSOR_dim_index; \ + TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2##_i]; \ + TENSOR2##_strides[TH_TENSOR_dim_index] = TENSOR2->stride[TENSOR2##_i]; \ + } \ } \ + TENSOR2##_size = TENSOR2##_sizes[TENSOR2##_dim-1]; \ + TENSOR2##_stride = TENSOR2##_strides[TENSOR2##_dim-1]; \ } \ - TENSOR2##_size = TENSOR2##_sizes[TENSOR2##_dim-1]; \ - TENSOR2##_stride = TENSOR2##_strides[TENSOR2##_dim-1]; \ \ TENSOR3##_data = TENSOR3->storage->data+TENSOR3->storageOffset; \ - TENSOR3##_dim = 1; \ - for(TENSOR3##_i = TENSOR3->nDimension-2; TENSOR3##_i >= 0; TENSOR3##_i--) \ - { \ - if(TENSOR3->stride[TENSOR3##_i] != TENSOR3->stride[TENSOR3##_i+1] * TENSOR3->size[TENSOR3##_i+1]) \ - TENSOR3##_dim++; \ - } \ - TENSOR3##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR3##_dim)); \ - TENSOR3##_sizes = TENSOR3##_counter + TENSOR3##_dim; \ - TENSOR3##_strides = TENSOR3##_counter + 2*TENSOR3##_dim; \ - TH_TENSOR_dim_index = TENSOR3##_dim-1; \ - TENSOR3##_sizes[TH_TENSOR_dim_index] = TENSOR3->size[TENSOR3->nDimension-1]; \ - TENSOR3##_strides[TH_TENSOR_dim_index] = TENSOR3->stride[TENSOR3->nDimension-1]; \ - for(TENSOR3##_i = TENSOR3##_dim-1; TENSOR3##_i >= 0; --TENSOR3##_i) { \ - TENSOR3##_counter[TENSOR3##_i] = 0; \ + TENSOR3##_size = 1; \ + TENSOR3##_stride = 1; \ + for(TENSOR3##_i = TENSOR3->nDimension-1; TENSOR3##_i >= 0; TENSOR3##_i--) { \ + if(TENSOR3->size[TENSOR3##_i] != 1) { \ + if(TENSOR3->stride[TENSOR3##_i] == TENSOR3##_size) \ + TENSOR3##_size *= TENSOR3->size[TENSOR3##_i]; \ + else{ \ + TH_TENSOR3_contiguous = 0; \ + break; \ + } \ + } \ } \ - for(TENSOR3##_i = TENSOR3->nDimension-2; TENSOR3##_i >= 0; --TENSOR3##_i) { \ - if (TENSOR3->stride[TENSOR3##_i] == TENSOR3->stride[TENSOR3##_i+1] * TENSOR3->size[TENSOR3##_i+1]) { \ - TENSOR3##_sizes[TH_TENSOR_dim_index] = TENSOR3->size[TENSOR3##_i] * TENSOR3##_sizes[TH_TENSOR_dim_index]; \ - } else { \ - --TH_TENSOR_dim_index; \ - TENSOR3##_sizes[TH_TENSOR_dim_index] = TENSOR3->size[TENSOR3##_i]; \ - TENSOR3##_strides[TH_TENSOR_dim_index] = TENSOR3->stride[TENSOR3##_i]; \ + if (!TH_TENSOR3_contiguous) { \ + TENSOR3##_data = TENSOR3->storage->data+TENSOR3->storageOffset; \ + TENSOR3##_dim = 1; \ + for(TENSOR3##_i = TENSOR3->nDimension-2; TENSOR3##_i >= 0; TENSOR3##_i--) \ + { \ + if(TENSOR3->stride[TENSOR3##_i] != TENSOR3->stride[TENSOR3##_i+1] * TENSOR3->size[TENSOR3##_i+1]) \ + TENSOR3##_dim++; \ + } \ + TENSOR3##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR3##_dim)); \ + TENSOR3##_sizes = TENSOR3##_counter + TENSOR3##_dim; \ + TENSOR3##_strides = TENSOR3##_counter + 2*TENSOR3##_dim; \ + TH_TENSOR_dim_index = TENSOR3##_dim-1; \ + TENSOR3##_sizes[TH_TENSOR_dim_index] = TENSOR3->size[TENSOR3->nDimension-1]; \ + TENSOR3##_strides[TH_TENSOR_dim_index] = TENSOR3->stride[TENSOR3->nDimension-1]; \ + for(TENSOR3##_i = TENSOR3##_dim-1; TENSOR3##_i >= 0; --TENSOR3##_i) { \ + TENSOR3##_counter[TENSOR3##_i] = 0; \ + } \ + for(TENSOR3##_i = TENSOR3->nDimension-2; TENSOR3##_i >= 0; --TENSOR3##_i) { \ + if (TENSOR3->stride[TENSOR3##_i] == TENSOR3->stride[TENSOR3##_i+1] * TENSOR3->size[TENSOR3##_i+1]) { \ + TENSOR3##_sizes[TH_TENSOR_dim_index] = TENSOR3->size[TENSOR3##_i] * TENSOR3##_sizes[TH_TENSOR_dim_index]; \ + } else { \ + --TH_TENSOR_dim_index; \ + TENSOR3##_sizes[TH_TENSOR_dim_index] = TENSOR3->size[TENSOR3##_i]; \ + TENSOR3##_strides[TH_TENSOR_dim_index] = TENSOR3->stride[TENSOR3##_i]; \ + } \ } \ + TENSOR3##_size = TENSOR3##_sizes[TENSOR3##_dim-1]; \ + TENSOR3##_stride = TENSOR3##_strides[TENSOR3##_dim-1]; \ } \ - TENSOR3##_size = TENSOR3##_sizes[TENSOR3##_dim-1]; \ - TENSOR3##_stride = TENSOR3##_strides[TENSOR3##_dim-1]; \ } \ \ TENSOR1##_i = 0; \ @@ -131,6 +175,9 @@ \ if(TENSOR1##_i == TENSOR1##_size) \ { \ + if(TH_TENSOR1_contiguous) \ + break; \ +\ if(TENSOR1##_dim == 1) \ break; \ \ @@ -161,6 +208,9 @@ \ if(TENSOR2##_i == TENSOR2##_size) \ { \ + if(TH_TENSOR2_contiguous) \ + break; \ +\ if(TENSOR2##_dim == 1) \ break; \ \ @@ -191,6 +241,9 @@ \ if(TENSOR3##_i == TENSOR3##_size) \ { \ + if(TH_TENSOR3_contiguous) \ + break; \ +\ if(TENSOR3##_dim == 1) \ break; \ \ @@ -219,9 +272,12 @@ TENSOR3##_i = 0; \ } \ } \ - THFree(TENSOR1##_counter); \ - THFree(TENSOR2##_counter); \ - THFree(TENSOR3##_counter); \ + if(TH_TENSOR1_contiguous) \ + THFree(TENSOR1##_counter); \ + if(TH_TENSOR2_contiguous) \ + THFree(TENSOR2##_counter); \ + if(TH_TENSOR3_contiguous) \ + THFree(TENSOR3##_counter); \ } #define TH_TENSOR_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \ @@ -233,6 +289,7 @@ long *TENSOR2##_counter = NULL, *TENSOR2##_sizes = NULL, *TENSOR2##_strides = NULL; \ long TENSOR2##_stride = 0, TENSOR2##_size = 0, TENSOR2##_dim = 0, TENSOR2##_i, TENSOR2##_n; \ int TH_TENSOR_APPLY_hasFinished = 0; \ + int TH_TENSOR1_contiguous = 1, TH_TENSOR2_contiguous = 1; \ long TH_TENSOR_dim_index = 0; \ \ TENSOR1##_n = (TENSOR1->nDimension ? 1 : 0); \ @@ -251,164 +308,19 @@ else \ { \ TENSOR1##_data = TENSOR1->storage->data+TENSOR1->storageOffset; \ - TENSOR1##_dim = 1; \ - for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; TENSOR1##_i--) \ - { \ - if(TENSOR1->stride[TENSOR1##_i] != TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) \ - TENSOR1##_dim++; \ - } \ - TENSOR1##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR1##_dim)); \ - TENSOR1##_sizes = TENSOR1##_counter + TENSOR1##_dim; \ - TENSOR1##_strides = TENSOR1##_counter + 2*TENSOR1##_dim; \ - TH_TENSOR_dim_index = TENSOR1##_dim-1; \ - TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1->nDimension-1]; \ - TENSOR1##_strides[TH_TENSOR_dim_index] = TENSOR1->stride[TENSOR1->nDimension-1]; \ - for(TENSOR1##_i = TENSOR1##_dim-1; TENSOR1##_i >= 0; --TENSOR1##_i) { \ - TENSOR1##_counter[TENSOR1##_i] = 0; \ - } \ - for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; --TENSOR1##_i) { \ - if (TENSOR1->stride[TENSOR1##_i] == TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) { \ - TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i] * TENSOR1##_sizes[TH_TENSOR_dim_index]; \ - } else { \ - --TH_TENSOR_dim_index; \ - TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i]; \ - TENSOR1##_strides[TH_TENSOR_dim_index] = TENSOR1->stride[TENSOR1##_i]; \ + TENSOR1##_size = 1; \ + TENSOR1##_stride = 1; \ + for(TENSOR1##_i = TENSOR1->nDimension-1; TENSOR1##_i >= 0; TENSOR1##_i--) { \ + if(TENSOR1->size[TENSOR1##_i] != 1) { \ + if(TENSOR1->stride[TENSOR1##_i] == TENSOR1##_size) \ + TENSOR1##_size *= TENSOR1->size[TENSOR1##_i]; \ + else{ \ + TH_TENSOR1_contiguous = 0; \ + break; \ + } \ } \ } \ - TENSOR1##_size = TENSOR1##_sizes[TENSOR1##_dim-1]; \ - TENSOR1##_stride = TENSOR1##_strides[TENSOR1##_dim-1]; \ -\ - TENSOR2##_data = TENSOR2->storage->data+TENSOR2->storageOffset; \ - TENSOR2##_dim = 1; \ - for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; TENSOR2##_i--) \ - { \ - if(TENSOR2->stride[TENSOR2##_i] != TENSOR2->stride[TENSOR2##_i+1] * TENSOR2->size[TENSOR2##_i+1]) \ - TENSOR2##_dim++; \ - } \ - TENSOR2##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR2##_dim)); \ - TENSOR2##_sizes = TENSOR2##_counter + TENSOR2##_dim; \ - TENSOR2##_strides = TENSOR2##_counter + 2*TENSOR2##_dim; \ - TH_TENSOR_dim_index = TENSOR2##_dim-1; \ - TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2->nDimension-1]; \ - TENSOR2##_strides[TH_TENSOR_dim_index] = TENSOR2->stride[TENSOR2->nDimension-1]; \ - for(TENSOR2##_i = TENSOR2##_dim-1; TENSOR2##_i >= 0; --TENSOR2##_i) { \ - TENSOR2##_counter[TENSOR2##_i] = 0; \ - } \ - for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; --TENSOR2##_i) { \ - if (TENSOR2->stride[TENSOR2##_i] == TENSOR2->stride[TENSOR2##_i+1] * TENSOR2->size[TENSOR2##_i+1]) { \ - TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2##_i] * TENSOR2##_sizes[TH_TENSOR_dim_index]; \ - } else { \ - --TH_TENSOR_dim_index; \ - TENSOR2##_sizes[TH_TENSOR_dim_index] = TENSOR2->size[TENSOR2##_i]; \ - TENSOR2##_strides[TH_TENSOR_dim_index] = TENSOR2->stride[TENSOR2##_i]; \ - } \ - } \ - TENSOR2##_size = TENSOR2##_sizes[TENSOR2##_dim-1]; \ - TENSOR2##_stride = TENSOR2##_strides[TENSOR2##_dim-1]; \ - } \ -\ - TENSOR1##_i = 0; \ - TENSOR2##_i = 0; \ - while(!TH_TENSOR_APPLY_hasFinished) \ - { \ - for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride) /* 0 et pas TENSOR##_dim! */ \ - { \ - CODE \ - } \ -\ - if(TENSOR1##_i == TENSOR1##_size) \ - { \ - if(TENSOR1##_dim == 1) \ - break; \ -\ - TENSOR1##_data -= TENSOR1##_size*TENSOR1##_stride; \ - for(TENSOR1##_i = TENSOR1##_dim-2; TENSOR1##_i >= 0; TENSOR1##_i--) \ - { \ - TENSOR1##_counter[TENSOR1##_i]++; \ - TENSOR1##_data += TENSOR1##_strides[TENSOR1##_i]; \ -\ - if(TENSOR1##_counter[TENSOR1##_i] == TENSOR1##_sizes[TENSOR1##_i]) \ - { \ - if(TENSOR1##_i == 0) \ - { \ - TH_TENSOR_APPLY_hasFinished = 1; \ - break; \ - } \ - else \ - { \ - TENSOR1##_data -= TENSOR1##_counter[TENSOR1##_i]*TENSOR1##_strides[TENSOR1##_i]; \ - TENSOR1##_counter[TENSOR1##_i] = 0; \ - } \ - } \ - else \ - break; \ - } \ - TENSOR1##_i = 0; \ - } \ -\ - if(TENSOR2##_i == TENSOR2##_size) \ - { \ - if(TENSOR2##_dim == 1) \ - break; \ -\ - TENSOR2##_data -= TENSOR2##_size*TENSOR2##_stride; \ - for(TENSOR2##_i = TENSOR2##_dim-2; TENSOR2##_i >= 0; TENSOR2##_i--) \ - { \ - TENSOR2##_counter[TENSOR2##_i]++; \ - TENSOR2##_data += TENSOR2##_strides[TENSOR2##_i]; \ -\ - if(TENSOR2##_counter[TENSOR2##_i] == TENSOR2##_sizes[TENSOR2##_i]) \ - { \ - if(TENSOR2##_i == 0) \ - { \ - TH_TENSOR_APPLY_hasFinished = 1; \ - break; \ - } \ - else \ - { \ - TENSOR2##_data -= TENSOR2##_counter[TENSOR2##_i]*TENSOR2##_strides[TENSOR2##_i]; \ - TENSOR2##_counter[TENSOR2##_i] = 0; \ - } \ - } \ - else \ - break; \ - } \ - TENSOR2##_i = 0; \ - } \ - } \ - THFree(TENSOR1##_counter); \ - THFree(TENSOR2##_counter); \ -} - -#define TH_TENSOR_APPLY2_CONTIGUOUS(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \ -{ \ - TYPE1 *TENSOR1##_data = NULL; \ - long *TENSOR1##_counter = NULL, *TENSOR1##_sizes = NULL, *TENSOR1##_strides = NULL; \ - long TENSOR1##_stride = 0, TENSOR1##_size = 0, TENSOR1##_dim = 0, TENSOR1##_i, TENSOR1##_n; \ - TYPE2 *TENSOR2##_data = NULL; \ - long *TENSOR2##_counter = NULL, *TENSOR2##_sizes = NULL, *TENSOR2##_strides = NULL; \ - long TENSOR2##_stride = 0, TENSOR2##_size = 0, TENSOR2##_dim = 0, TENSOR2##_i, TENSOR2##_n; \ - int TH_TENSOR_APPLY_hasFinished = 0; \ - int TH_TENSOR1_contiguous = 0, TH_TENSOR2_contiguous = 0; \ - long TH_TENSOR_dim_index = 0; \ -\ - TENSOR1##_n = (TENSOR1->nDimension ? 1 : 0); \ - for(TENSOR1##_i = 0; TENSOR1##_i < TENSOR1->nDimension; TENSOR1##_i++) \ - TENSOR1##_n *= TENSOR1->size[TENSOR1##_i]; \ -\ - TENSOR2##_n = (TENSOR2->nDimension ? 1 : 0); \ - for(TENSOR2##_i = 0; TENSOR2##_i < TENSOR2->nDimension; TENSOR2##_i++) \ - TENSOR2##_n *= TENSOR2->size[TENSOR2##_i]; \ -\ - if(TENSOR1##_n != TENSOR2##_n) /* should we do the check in the function instead? i think so */ \ - THError("inconsistent tensor size"); \ -\ - if(TENSOR1->nDimension == 0) \ - TH_TENSOR_APPLY_hasFinished = 1; \ - else \ - { \ - TENSOR1##_data = TENSOR1->storage->data+TENSOR1->storageOffset; \ - if (!THTensor_(isContiguous)(TENSOR1)) { \ + if (!TH_TENSOR1_contiguous) { \ TENSOR1##_dim = 1; \ for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; TENSOR1##_i--) \ { \ @@ -425,7 +337,7 @@ TENSOR1##_counter[TENSOR1##_i] = 0; \ } \ for(TENSOR1##_i = TENSOR1->nDimension-2; TENSOR1##_i >= 0; --TENSOR1##_i) { \ - if (TENSOR1->stride[TENSOR1##_i] == TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) { \ + if(TENSOR1->stride[TENSOR1##_i] == TENSOR1->stride[TENSOR1##_i+1] * TENSOR1->size[TENSOR1##_i+1]) { \ TENSOR1##_sizes[TH_TENSOR_dim_index] = TENSOR1->size[TENSOR1##_i] * TENSOR1##_sizes[TH_TENSOR_dim_index]; \ } else { \ --TH_TENSOR_dim_index; \ @@ -435,14 +347,22 @@ } \ TENSOR1##_size = TENSOR1##_sizes[TENSOR1##_dim-1]; \ TENSOR1##_stride = TENSOR1##_strides[TENSOR1##_dim-1]; \ - } else { \ - TH_TENSOR1_contiguous = 1; \ - TENSOR1##_size = THTensor_(nElement)(TENSOR1); \ - TENSOR1##_stride = 1; \ } \ \ TENSOR2##_data = TENSOR2->storage->data+TENSOR2->storageOffset; \ - if (!THTensor_(isContiguous)(TENSOR2)) { \ + TENSOR2##_size = 1; \ + TENSOR2##_stride = 1; \ + for(TENSOR2##_i = TENSOR2->nDimension-1; TENSOR2##_i >= 0; TENSOR2##_i--) { \ + if(TENSOR2->size[TENSOR2##_i] != 1) { \ + if(TENSOR2->stride[TENSOR2##_i] == TENSOR2##_size) \ + TENSOR2##_size *= TENSOR2->size[TENSOR2##_i]; \ + else{ \ + TH_TENSOR2_contiguous = 0; \ + break; \ + } \ + } \ + } \ + if(!TH_TENSOR2_contiguous) { \ TENSOR2##_dim = 1; \ for(TENSOR2##_i = TENSOR2->nDimension-2; TENSOR2##_i >= 0; TENSOR2##_i--) \ { \ @@ -470,8 +390,10 @@ TENSOR2##_size = TENSOR2##_sizes[TENSOR2##_dim-1]; \ TENSOR2##_stride = TENSOR2##_strides[TENSOR2##_dim-1]; \ } else { \ - TH_TENSOR2_contiguous = 1; \ - TENSOR2##_size = THTensor_(nElement)(TENSOR2); \ + TENSOR2##_size = 1; \ + for(TENSOR2##_i = TENSOR2->nDimension-1; TENSOR2##_i >= 0; --TENSOR2##_i) { \ + TENSOR2##_size *= TENSOR2->size[TENSOR2##_i]; \ + } \ TENSOR2##_stride = 1; \ } \ } \ @@ -551,12 +473,10 @@ TENSOR2##_i = 0; \ } \ } \ - if (!THTensor_(isContiguous)(TENSOR1)) { \ + if (!TH_TENSOR1_contiguous) \ THFree(TENSOR1##_counter); \ - } \ - if (!THTensor_(isContiguous)(TENSOR2)) { \ + if (!TH_TENSOR2_contiguous) \ THFree(TENSOR2##_counter); \ - } \ } /* diff --git a/lib/TH/generic/THTensorCopy.c b/lib/TH/generic/THTensorCopy.c index cc92c33..5dfdcf1 100644 --- a/lib/TH/generic/THTensorCopy.c +++ b/lib/TH/generic/THTensorCopy.c @@ -15,8 +15,6 @@ void THTensor_(copy)(THTensor *tensor, THTensor *src) for (i=0; i<sz; ++i) rp[i] = sp[i]; #endif - } else if (THTensor_(isContiguous)(tensor) || THTensor_(isContiguous)(src)) { - TH_TENSOR_APPLY2_CONTIGUOUS(real, tensor, real, src, *tensor_data = *src_data;) } else { TH_TENSOR_APPLY2(real, tensor, real, src, *tensor_data = *src_data;) } diff --git a/lib/TH/generic/THTensorMath.c b/lib/TH/generic/THTensorMath.c index 302923e..c27fa00 100644 --- a/lib/TH/generic/THTensorMath.c +++ b/lib/TH/generic/THTensorMath.c @@ -508,8 +508,6 @@ void THTensor_(add)(THTensor *r_, THTensor *t, real value) ptrdiff_t i_end = tid == num_threads - 1 ? sz : i + sz / num_threads; THVector_(add)(rp+i, tp+i, value, i_end-i); } - } else if (THTensor_(isContiguous)(r_) || THTensor_(isContiguous)(t)) { - TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = *t_data + value;); } else { TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data + value;); } @@ -540,8 +538,6 @@ void THTensor_(mul)(THTensor *r_, THTensor *t, real value) ptrdiff_t i_end = tid == num_threads - 1 ? sz : i + sz / num_threads; THVector_(mul)(rp+i, tp+i, value, i_end-i); } - } else if (THTensor_(isContiguous)(r_) || THTensor_(isContiguous)(t)) { - TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = *t_data * value;); } else { TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data * value;); } @@ -567,8 +563,6 @@ void THTensor_(div)(THTensor *r_, THTensor *t, real value) ptrdiff_t i_end = tid == num_threads - 1 ? sz : i + sz / num_threads; THVector_(div)(rp+i, tp+i, value, i_end-i); } - } else if (THTensor_(isContiguous)(r_) || THTensor_(isContiguous)(t)) { - TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = *t_data / value;); } else { TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data / value;); } @@ -660,12 +654,6 @@ void THTensor_(fmod)(THTensor *r_, THTensor *t, real value) rp[i] = tp[i] % value; #endif } - } else if (THTensor_(isContiguous)(r_) || THTensor_(isContiguous)(t)) { -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = fmod(*t_data, value);); -#else - TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = (*t_data % value);); -#endif } else { #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) TH_TENSOR_APPLY2(real, r_, real, t, *r__data = fmod(*t_data, value);); @@ -691,13 +679,6 @@ void THTensor_(remainder)(THTensor *r_, THTensor *t, real value) rp[i] = tp[i] - value * (tp[i] / value); // There is no NAN for integers #endif } - } else if (THTensor_(isContiguous)(r_) || THTensor_(isContiguous)(t)) { -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = (value == 0)? NAN : *t_data - value * floor(*t_data / value);); -#else - // There is no NAN for integers - TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = *t_data - value * (*t_data / value);); -#endif } else { #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (value == 0)? NAN : *t_data - value * floor(*t_data / value);); @@ -789,8 +770,6 @@ void THTensor_(clamp)(THTensor *r_, THTensor *t, real min_value, real max_value) #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; i<sz; i++) rp[i] = (tp[i] < min_value) ? min_value : (tp[i] > max_value ? max_value : tp[i]); - } else if (THTensor_(isContiguous)(r_) || THTensor_(isContiguous)(t)) { - TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = (*t_data < min_value) ? min_value : (*t_data > max_value ? max_value : *t_data);); } else { TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (*t_data < min_value) ? min_value : (*t_data > max_value ? max_value : *t_data);); } @@ -1121,8 +1100,6 @@ void THTensor_(tpow)(THTensor *r_, real value, THTensor *t) #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) for (i=0; i<sz; i++) rp[i] = pow(value, tp[i]); - } else if (THTensor_(isContiguous)(r_) || THTensor_(isContiguous)(t)) { - TH_TENSOR_APPLY2_CONTIGUOUS(real, r_, real, t, *r__data = pow(value, *t_data);); } else { TH_TENSOR_APPLY2(real, r_, real, t, *r__data = pow(value, *t_data);); } @@ -2563,13 +2540,6 @@ int THTensor_(equal)(THTensor *ta, THTensor* tb) for (i=0; i<sz; ++i){ if(tap[i] != tbp[i]) return 0; } - } else if (THTensor_(isContiguous)(ta) || THTensor_(isContiguous)(tb)) { - // Short-circuit the apply function on inequality - TH_TENSOR_APPLY2_CONTIGUOUS(real, ta, real, tb, - if (equal && *ta_data != *tb_data) { - equal = 0; - TH_TENSOR_APPLY_hasFinished = 1; break; - }) } else { // Short-circuit the apply function on inequality TH_TENSOR_APPLY2(real, ta, real, tb, |