Welcome to mirror list, hosted at ThFree Co, Russian Federation.

THCStorageCopy.cu « generic « THC « lib - github.com/torch/cutorch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 298f71795538e1053ecf41ec63a381fc7be539d3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCStorageCopy.cu"
#else

void THCStorage_(rawCopy)(THCState *state, THCStorage *self, real *src)
{
  THCudaCheck(cudaMemcpyAsync(self->data, src, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state)));
}

void THCStorage_(copy)(THCState *state, THCStorage *self, THCStorage *src)
{
  THArgCheck(self->size == src->size, 2, "size does not match");
  THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state)));
}

void THCStorage_(copyCuda)(THCState *state, THCStorage *self, THCStorage *src)
{
  THArgCheck(self->size == src->size, 2, "size does not match");
  THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state)));
}

// conversions are mediated by the CPU
// yes, this is slow; feel free to write CUDA kernels for this
#ifndef THC_REAL_IS_HALF
#define THC_CUDA_STORAGE_IMPLEMENT_COPY(TYPEC,TYPECUDA)                            \
  void THCStorage_(copyCuda##TYPEC)(THCState *state, THCStorage *self, struct THCuda##TYPECUDA##Storage *src)  \
  {                                                                      \
    if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) {                        \
      THCStorage_(copy)(state, self, (THCStorage*) src);   /* cast just removes compiler warning */ \
    } else {                                                             \
      THArgCheck(self->size == src->size, 2, "size does not match");     \
      TH##TYPEC##Storage *buffer1 = TH##TYPEC##Storage_newWithSize(src->size); \
      THStorage *buffer2  = THStorage_(newWithSize)(src->size);          \
      TH##TYPEC##Storage_copyCuda(state, buffer1, src);                  \
      THStorage_(copy##TYPEC)(buffer2, buffer1);                         \
      THCStorage_(copyCPU)(state, self, buffer2);                        \
      TH##TYPEC##Storage_free(buffer1);                                  \
      THStorage_(free)(buffer2);                                         \
    }                                                                    \
  }
#else
#define THC_CUDA_STORAGE_IMPLEMENT_COPY(TYPEC,TYPECUDA)                            \
  void THCStorage_(copyCuda##TYPEC)(THCState *state, THCStorage *self, struct THCuda##TYPECUDA##Storage *src)  \
  {                                                                      \
    THArgCheck(self->size == src->size, 2, "size does not match");       \
    if(THCTypeIdx_(TYPEC) == THCTypeIdxFloat) {                          \
      THCFloat2Half(state, self->data, (float*) src->data, src->size);   /* cast removes compiler error */     \
    } else {                                                             \
      THCudaStorage *buffer = THCudaStorage_newWithSize(state, src->size); \
      THCudaStorage_copyCuda##TYPEC(state, buffer, src);                 \
      THCFloat2Half(state, self->data, buffer->data, buffer->size);      \
      THCudaStorage_free(state, buffer);                                 \
    }                                                                    \
  }
#endif

THC_CUDA_STORAGE_IMPLEMENT_COPY(Byte,Byte)
THC_CUDA_STORAGE_IMPLEMENT_COPY(Char,Char)
THC_CUDA_STORAGE_IMPLEMENT_COPY(Short,Short)
THC_CUDA_STORAGE_IMPLEMENT_COPY(Int,Int)
THC_CUDA_STORAGE_IMPLEMENT_COPY(Long,Long)
THC_CUDA_STORAGE_IMPLEMENT_COPY(Float,)  // i.e. float
THC_CUDA_STORAGE_IMPLEMENT_COPY(Double,Double)

#ifdef CUDA_HALF_TENSOR
#define FLOAT_COPY(TYPE) TH_CONCAT_3(TH, CReal, Storage_copyCudaFloat)
void THCStorage_(copyCudaHalf)(THCState *state, THCStorage *self, struct THCudaHalfStorage *src)
{
    if(THCTypeIdx_(Real) == THCTypeIdxHalf) {
      THCStorage_(copy)(state, self, (THCStorage*) src);   /* cast just removes compiler warning */
    } else {
        THArgCheck(self->size == src->size, 2, "size does not match");
        THCudaStorage *buffer = THCudaStorage_newWithSize(state, src->size);
        THCHalf2Float(state, buffer->data, src->data, src->size);
        FLOAT_COPY(Real)(state, self, buffer);
        THCudaStorage_free(state, buffer);
    }
}
#undef FLOAT_COPY
#endif // CUDA_HALF_TENSOR

#undef THC_CUDA_STORAGE_IMPLEMENT_COPY

#endif