1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCStorageCopy.cu"
#else
void THCStorage_(rawCopy)(THCState *state, THCStorage *self, real *src)
{
THCudaCheck(cudaMemcpyAsync(self->data, src, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state)));
}
void THCStorage_(copy)(THCState *state, THCStorage *self, THCStorage *src)
{
THArgCheck(self->size == src->size, 2, "size does not match");
THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state)));
}
void THCStorage_(copyCuda)(THCState *state, THCStorage *self, THCStorage *src)
{
THArgCheck(self->size == src->size, 2, "size does not match");
THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state)));
}
// conversions are mediated by the CPU
// yes, this is slow; feel free to write CUDA kernels for this
#ifndef THC_REAL_IS_HALF
#define THC_CUDA_STORAGE_IMPLEMENT_COPY(TYPEC,TYPECUDA) \
void THCStorage_(copyCuda##TYPEC)(THCState *state, THCStorage *self, struct THCuda##TYPECUDA##Storage *src) \
{ \
if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \
THCStorage_(copy)(state, self, (THCStorage*) src); /* cast just removes compiler warning */ \
} else { \
THArgCheck(self->size == src->size, 2, "size does not match"); \
TH##TYPEC##Storage *buffer1 = TH##TYPEC##Storage_newWithSize(src->size); \
THStorage *buffer2 = THStorage_(newWithSize)(src->size); \
TH##TYPEC##Storage_copyCuda(state, buffer1, src); \
THStorage_(copy##TYPEC)(buffer2, buffer1); \
THCStorage_(copyCPU)(state, self, buffer2); \
TH##TYPEC##Storage_free(buffer1); \
THStorage_(free)(buffer2); \
} \
}
#else
#define THC_CUDA_STORAGE_IMPLEMENT_COPY(TYPEC,TYPECUDA) \
void THCStorage_(copyCuda##TYPEC)(THCState *state, THCStorage *self, struct THCuda##TYPECUDA##Storage *src) \
{ \
THArgCheck(self->size == src->size, 2, "size does not match"); \
if(THCTypeIdx_(TYPEC) == THCTypeIdxFloat) { \
THCFloat2Half(state, self->data, (float*) src->data, src->size); /* cast removes compiler error */ \
} else { \
THCudaStorage *buffer = THCudaStorage_newWithSize(state, src->size); \
THCudaStorage_copyCuda##TYPEC(state, buffer, src); \
THCFloat2Half(state, self->data, buffer->data, buffer->size); \
THCudaStorage_free(state, buffer); \
} \
}
#endif
THC_CUDA_STORAGE_IMPLEMENT_COPY(Byte,Byte)
THC_CUDA_STORAGE_IMPLEMENT_COPY(Char,Char)
THC_CUDA_STORAGE_IMPLEMENT_COPY(Short,Short)
THC_CUDA_STORAGE_IMPLEMENT_COPY(Int,Int)
THC_CUDA_STORAGE_IMPLEMENT_COPY(Long,Long)
THC_CUDA_STORAGE_IMPLEMENT_COPY(Float,) // i.e. float
THC_CUDA_STORAGE_IMPLEMENT_COPY(Double,Double)
#ifdef CUDA_HALF_TENSOR
#define FLOAT_COPY(TYPE) TH_CONCAT_3(TH, CReal, Storage_copyCudaFloat)
void THCStorage_(copyCudaHalf)(THCState *state, THCStorage *self, struct THCudaHalfStorage *src)
{
if(THCTypeIdx_(Real) == THCTypeIdxHalf) {
THCStorage_(copy)(state, self, (THCStorage*) src); /* cast just removes compiler warning */
} else {
THArgCheck(self->size == src->size, 2, "size does not match");
THCudaStorage *buffer = THCudaStorage_newWithSize(state, src->size);
THCHalf2Float(state, buffer->data, src->data, src->size);
FLOAT_COPY(Real)(state, self, buffer);
THCudaStorage_free(state, buffer);
}
}
#undef FLOAT_COPY
#endif // CUDA_HALF_TENSOR
#undef THC_CUDA_STORAGE_IMPLEMENT_COPY
#endif
|