Add some documentation for APPLY and DIM_APPLY macros

author: Trevor Killeen <killeentm@gmail.com> 2016-11-22 01:02:33 +0300
committer: Trevor Killeen <killeentm@gmail.com> 2016-11-22 01:02:33 +0300
commit: 3eaefc924f37159a587c8fdf0e26064c2f99ad8b (patch)
tree: 278a280cb696489112b8007c19f836f616bf167d
parent: 552b086a903582359186c0d8790de6877fb0bc13 (diff)
2 files changed, 103 insertions, 3 deletions
diff --git a/lib/TH/THTensorApply.h b/lib/TH/THTensorApply.h
index f525088..4fd69d4 100644
--- a/lib/TH/THTensorApply.h
+++ b/lib/TH/THTensorApply.h
@@ -348,6 +348,29 @@
   THFree(TENSOR2##_counter); \
 }
 
+/*
+ * The basic strategy for apply is as follows:
+ *
+ * 1. Starting with the outermost index, loop until we reach a dimension where the
+ * data is no longer contiguous, i.e. the stride at that dimension is not equal to
+ * the size of the tensor defined by the outer dimensions. Let's call this outer
+ * (contiguous) tensor A. Note that if the Tensor is contiguous, then A is equal
+ * to the entire Tensor. Let's call the inner tensor B.
+ *
+ * 2. We loop through the indices in B, starting at its outermost dimension. For
+ * example, if B is a 2x2 matrix, then we do:
+ *
+ * B[0][0]
+ * B[0][1]
+ * B[1][0]
+ * B[1][1]
+ *
+ * We set the offset into the underlying storage as (storageOffset + stride_B * index_B),
+ * i.e. basically we compute the offset into the storage as we would normally for a
+ * Tensor. But because we are guaranteed the subsequent data is contiguous in memory, we
+ * can simply loop for sizeof(A) iterations and perform the operation, without having to
+ * follow the order described by the strides of A.
+ */
 #define TH_TENSOR_APPLY(TYPE, TENSOR, CODE) \
 { \
   TYPE *TENSOR##_data = NULL; \
@@ -362,7 +385,7 @@
     TENSOR##_data = TENSOR->storage->data+TENSOR->storageOffset; \
 \
     /* what is the first stride (ignore first dims=1)? */ \
-    /* it will be used for the whole largest contiguous section */ \
+    /* it will be used for offset updates while looping through the largest contiguous section */ \
     for(TENSOR##_dim = TENSOR->nDimension-1; TENSOR##_dim >= 0; TENSOR##_dim--) \
     { \
       if(TENSOR->size[TENSOR##_dim] != 1) \
@@ -370,7 +393,7 @@
     } \
     TENSOR##_stride = (TENSOR##_dim == -1 ? 0 : TENSOR->stride[TENSOR##_dim]); \
 \
-    /* what is the largest contiguous section? */ \
+    /* what is the largest contiguous section? size will store the size of this section */ \
     TENSOR##_size = 1; \
     for(TENSOR##_dim = TENSOR->nDimension-1; TENSOR##_dim >= 0; TENSOR##_dim--) \
     { \
@@ -383,7 +406,13 @@
       } \
     } \
 \
-    /* counter over found dimensions */ \
+    /* allocate an array of k+1 elements, where k is the first index that */ \
+    /* break contiguity. Note that if the tensor is contiguous, then k is -1 and */ \
+    /* this counter array is empty. */ \
+\
+    /* TENSOR##_counter tracks where we are in the storage. The offset into the */ \
+    /* storage is given by storage_offset + (i * j), where i is the stride */ \
+    /* vector and j is tensor_counter vector. This sets the starting position for the loop. */ \
     TENSOR##_counter = (long*)THAlloc(sizeof(long)*(TENSOR##_dim+1)); \
     for(TENSOR##_i = 0; TENSOR##_i <= TENSOR##_dim; TENSOR##_i++) \
       TENSOR##_counter[TENSOR##_i] = 0; \
@@ -391,18 +420,24 @@
 \
   while(!TH_TENSOR_APPLY_hasFinished) \
   { \
+    /* Loop through the contiguous section of the Tensor */ \
     for(TENSOR##_i = 0; TENSOR##_i < TENSOR##_size; TENSOR##_i++, TENSOR##_data += TENSOR##_stride) /* 0 et pas TENSOR##_dim! */ \
     { \
       CODE \
     } \
 \
+\
+    /* Handle corner case where the entire Tensor was contiguous */ \
     if(TENSOR##_dim == -1) \
        break; \
  \
+    /* Reset pointer to beginning of loop */ \
     TENSOR##_data -= TENSOR##_i*TENSOR##_stride; \
     for(TENSOR##_i = TENSOR##_dim; TENSOR##_i >= 0; TENSOR##_i--) \
     { \
       TENSOR##_counter[TENSOR##_i]++; \
+\
+      /* Jump ahread by the stride of this dimension */ \
       TENSOR##_data += TENSOR->stride[TENSOR##_i]; \
 \
       if(TENSOR##_counter[TENSOR##_i]  == TENSOR->size[TENSOR##_i]) \
@@ -414,6 +449,7 @@
         } \
         else \
         { \
+          /* Reset the pointer to the beginning of the chunk defined by this dimension */ \
           TENSOR##_data -= TENSOR##_counter[TENSOR##_i]*TENSOR->stride[TENSOR##_i]; \
           TENSOR##_counter[TENSOR##_i] = 0; \
         } \
diff --git a/lib/TH/THTensorDimApply.h b/lib/TH/THTensorDimApply.h
index 40822aa..df333fa 100644
--- a/lib/TH/THTensorDimApply.h
+++ b/lib/TH/THTensorDimApply.h
@@ -91,6 +91,23 @@
   THFree(TH_TENSOR_DIM_APPLY_counter); \
 }
 
+/**
+ * Similar to DIM_APPLY(...) but we maintain two sets of pointers: one for the first tensor
+ * and one for the second. The two tensors must have the same shape, other than at the
+ * specified DIMENSION. This function makes it easy to store the output from reducing the
+ * TENSOR at index. For example, in the sum example described below, we could instead do:
+ *
+ * long i = 0;
+ * TYPE1 sum;
+ *
+ * for (i = 0; i < TENSOR1##_size; ++i) {
+ *   sum += TENSOR1##_data[i * TENSOR1##_stride]
+ * }
+ * *TENSOR2##_data = (TYPE2) sum;
+ *
+ * In particular, we guarantee that the offset into TENSOR2 will be what you would get if
+ * you applied all of the index values used to generate the offset into TENSOR1.
+ */
 #define TH_TENSOR_DIM_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, DIMENSION, CODE) \
 { \
   TYPE1 *TENSOR1##_data = NULL; \
@@ -169,6 +186,45 @@
   THFree(TH_TENSOR_DIM_APPLY_counter); \
 }
 
+/**
+ * The basic idea for DIM_APPLY: Given a TENSOR and a DIMENSION, provide access to the data stored
+ * at all sets of dimension values other than DIMENSION, such that we can get all the values at those
+ * fixed indices for the various values at DIMENSION.
+ *
+ * Suppose we have a 2x3x4 Tensor A, and we have DIMENSION=2. Then we will hit CODE (2x3) times, and the
+ * pointer into storage will be at:
+ *
+ * A[0][0]
+ * A[0][1]
+ * A[0][2]
+ * A[1][0]
+ * A[1][1]
+ * A[1][2]
+ *
+ * And at each point, we can access the data for each of the four elements of the Tensor via
+ * TENSOR##_stride. So for example, if we wanted to sum the elements there, we could do:
+ *
+ * long i = 0;
+ * TYPE sum;
+ * for (i = 0; i < TENSOR##_size; i++) {
+ *  sum += TENSOR##_data[i * TENSOR##_stride]
+ * }
+ *
+ * Note that we don't have to have DIMENSION be the last tensor. If we have DIMENSION=1, then we will hit the
+ * code (2x4) times, with pointer into the storage at:
+ *
+ * offset +
+ *   stride_0 * 0 + stride_2 * 0
+ *   stride_0 * 1 + stride_2 * 0
+ *   stride_0 * 0 + stride_2 * 1
+ *   stride_0 * 1 + stride_2 * 1
+ *   stride_0 * 0 + stride_2 * 2
+ *   stride_0 * 1 + stride_2 * 2
+ *   stride_0 * 0 + stride_2 * 3
+ *   stride_0 * 1 + stride_2 * 3
+ *
+ * So we can again sum over the values at DIMENSION with the other indices fixed.
+ */
 #define TH_TENSOR_DIM_APPLY(TYPE, TENSOR, DIMENSION, CODE) \
 { \
   TYPE *TENSOR##_data = NULL; \
@@ -183,6 +239,7 @@
   TENSOR##_data = (TENSOR)->storage->data+(TENSOR)->storageOffset; \
   TENSOR##_stride = (TENSOR)->stride[DIMENSION]; \
   TENSOR##_size = TENSOR->size[DIMENSION]; \
+  /* Counter stores the indices into the Tensor at any time */ \
   TH_TENSOR_DIM_APPLY_counter = (long*)THAlloc(sizeof(long)*(TENSOR->nDimension)); \
   for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR->nDimension; TH_TENSOR_DIM_APPLY_i++) \
     TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
@@ -196,6 +253,10 @@
  \
     for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR->nDimension; TH_TENSOR_DIM_APPLY_i++) \
     { \
+       /* Check if the index is equal to DIMENSION. We don't need to update the */ \
+       /* offset if this is the case, and can consider the next index. However, */ \
+       /* in the case that the DIMENSION is the last index in the Tensor, then */ \
+       /* we have parsed the entire tensor and can exit */ \
       if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \
       { \
         if(TH_TENSOR_DIM_APPLY_i == TENSOR->nDimension-1) \
@@ -206,11 +267,13 @@
         continue; \
       } \
 \
+      /* Bump the counter at this index, update the pointer */ \
       TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \
       TENSOR##_data += TENSOR->stride[TH_TENSOR_DIM_APPLY_i]; \
 \
       if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR->size[TH_TENSOR_DIM_APPLY_i]) \
       { \
+        /* Handled TENSOR_size(dim) iterations for DIM_APPLY_i. If this is the last dimension, exit */ \
         if(TH_TENSOR_DIM_APPLY_i == TENSOR->nDimension-1) \
         { \
           TH_TENSOR_DIM_APPLY_hasFinished = 1; \
@@ -218,6 +281,7 @@
         } \
         else \
         { \
+          /* Reset the counter, and the pointer to the beginning of the storage for this combination of indices */ \
           TENSOR##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR->stride[TH_TENSOR_DIM_APPLY_i]; \
           TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
         } \
author	Trevor Killeen <killeentm@gmail.com>	2016-11-22 01:02:33 +0300
committer	Trevor Killeen <killeentm@gmail.com>	2016-11-22 01:02:33 +0300
commit	3eaefc924f37159a587c8fdf0e26064c2f99ad8b (patch)
tree	278a280cb696489112b8007c19f836f616bf167d
parent	552b086a903582359186c0d8790de6877fb0bc13 (diff)