Welcome to mirror list, hosted at ThFree Co, Russian Federation.

device_cuda.h « cuda « device « cycles « intern - git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: c3271c3cfcf5582e5582f8f10f10f477bef03973 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
/*
 * Copyright 2011-2013 Blender Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifdef WITH_CUDA

#  include "device/device.h"
#  include "device/device_denoising.h"
#  include "device/device_split_kernel.h"

#  include "util/util_map.h"
#  include "util/util_task.h"

#  ifdef WITH_CUDA_DYNLOAD
#    include "cuew.h"
#  else
#    include "util/util_opengl.h"
#    include <cuda.h>
#    include <cudaGL.h>
#  endif

CCL_NAMESPACE_BEGIN

class CUDASplitKernel;

class CUDADevice : public Device {

  friend class CUDASplitKernelFunction;
  friend class CUDASplitKernel;
  friend class CUDAContextScope;

 public:
  DedicatedTaskPool task_pool;
  CUdevice cuDevice;
  CUcontext cuContext;
  CUmodule cuModule, cuFilterModule;
  size_t device_texture_headroom;
  size_t device_working_headroom;
  bool move_texture_to_host;
  size_t map_host_used;
  size_t map_host_limit;
  int can_map_host;
  int pitch_alignment;
  int cuDevId;
  int cuDevArchitecture;
  bool first_error;
  CUDASplitKernel *split_kernel;

  struct CUDAMem {
    CUDAMem() : texobject(0), array(0), use_mapped_host(false)
    {
    }

    CUtexObject texobject;
    CUarray array;

    /* If true, a mapped host memory in shared_pointer is being used. */
    bool use_mapped_host;
  };
  typedef map<device_memory *, CUDAMem> CUDAMemMap;
  CUDAMemMap cuda_mem_map;
  thread_mutex cuda_mem_map_mutex;

  struct PixelMem {
    GLuint cuPBO;
    CUgraphicsResource cuPBOresource;
    GLuint cuTexId;
    int w, h;
  };
  map<device_ptr, PixelMem> pixel_mem_map;

  /* Bindless Textures */
  device_vector<TextureInfo> texture_info;
  bool need_texture_info;

  /* Kernels */
  struct {
    bool loaded;

    CUfunction adaptive_stopping;
    CUfunction adaptive_filter_x;
    CUfunction adaptive_filter_y;
    CUfunction adaptive_scale_samples;
    int adaptive_num_threads_per_block;
  } functions;

  static bool have_precompiled_kernels();

  virtual bool show_samples() const override;

  virtual BVHLayoutMask get_bvh_layout_mask() const override;

  void set_error(const string &error) override;

  CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_);

  virtual ~CUDADevice();

  bool support_device(const DeviceRequestedFeatures & /*requested_features*/);

  bool check_peer_access(Device *peer_device) override;

  bool use_adaptive_compilation();

  bool use_split_kernel();

  virtual string compile_kernel_get_common_cflags(
      const DeviceRequestedFeatures &requested_features, bool filter = false, bool split = false);

  string compile_kernel(const DeviceRequestedFeatures &requested_features,
                        const char *name,
                        const char *base = "cuda",
                        bool force_ptx = false);

  virtual bool load_kernels(const DeviceRequestedFeatures &requested_features) override;

  void load_functions();

  void reserve_local_memory(const DeviceRequestedFeatures &requested_features);

  void init_host_memory();

  void load_texture_info();

  void move_textures_to_host(size_t size, bool for_texture);

  CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);

  void generic_copy_to(device_memory &mem);

  void generic_free(device_memory &mem);

  void mem_alloc(device_memory &mem) override;

  void mem_copy_to(device_memory &mem) override;

  void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;

  void mem_zero(device_memory &mem) override;

  void mem_free(device_memory &mem) override;

  device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;

  virtual void const_copy_to(const char *name, void *host, size_t size) override;

  void global_alloc(device_memory &mem);

  void global_free(device_memory &mem);

  void tex_alloc(device_texture &mem);

  void tex_free(device_texture &mem);

  bool denoising_non_local_means(device_ptr image_ptr,
                                 device_ptr guide_ptr,
                                 device_ptr variance_ptr,
                                 device_ptr out_ptr,
                                 DenoisingTask *task);

  bool denoising_construct_transform(DenoisingTask *task);

  bool denoising_accumulate(device_ptr color_ptr,
                            device_ptr color_variance_ptr,
                            device_ptr scale_ptr,
                            int frame,
                            DenoisingTask *task);

  bool denoising_solve(device_ptr output_ptr, DenoisingTask *task);

  bool denoising_combine_halves(device_ptr a_ptr,
                                device_ptr b_ptr,
                                device_ptr mean_ptr,
                                device_ptr variance_ptr,
                                int r,
                                int4 rect,
                                DenoisingTask *task);

  bool denoising_divide_shadow(device_ptr a_ptr,
                               device_ptr b_ptr,
                               device_ptr sample_variance_ptr,
                               device_ptr sv_variance_ptr,
                               device_ptr buffer_variance_ptr,
                               DenoisingTask *task);

  bool denoising_get_feature(int mean_offset,
                             int variance_offset,
                             device_ptr mean_ptr,
                             device_ptr variance_ptr,
                             float scale,
                             DenoisingTask *task);

  bool denoising_write_feature(int out_offset,
                               device_ptr from_ptr,
                               device_ptr buffer_ptr,
                               DenoisingTask *task);

  bool denoising_detect_outliers(device_ptr image_ptr,
                                 device_ptr variance_ptr,
                                 device_ptr depth_ptr,
                                 device_ptr output_ptr,
                                 DenoisingTask *task);

  void denoise(RenderTile &rtile, DenoisingTask &denoising);

  void adaptive_sampling_filter(uint filter_sample,
                                WorkTile *wtile,
                                CUdeviceptr d_wtile,
                                CUstream stream = 0);
  void adaptive_sampling_post(RenderTile &rtile,
                              WorkTile *wtile,
                              CUdeviceptr d_wtile,
                              CUstream stream = 0);

  void render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);

  void film_convert(DeviceTask &task,
                    device_ptr buffer,
                    device_ptr rgba_byte,
                    device_ptr rgba_half);

  void shader(DeviceTask &task);

  CUdeviceptr map_pixels(device_ptr mem);

  void unmap_pixels(device_ptr mem);

  void pixels_alloc(device_memory &mem);

  void pixels_copy_from(device_memory &mem, int y, int w, int h);

  void pixels_free(device_memory &mem);

  void draw_pixels(device_memory &mem,
                   int y,
                   int w,
                   int h,
                   int width,
                   int height,
                   int dx,
                   int dy,
                   int dw,
                   int dh,
                   bool transparent,
                   const DeviceDrawParams &draw_params) override;

  void thread_run(DeviceTask &task);

  virtual void task_add(DeviceTask &task) override;

  virtual void task_wait() override;

  virtual void task_cancel() override;
};

CCL_NAMESPACE_END

#endif