diff options
Diffstat (limited to 'intern/cycles/render')
-rw-r--r-- | intern/cycles/render/buffers.cpp | 63 | ||||
-rw-r--r-- | intern/cycles/render/buffers.h | 15 | ||||
-rw-r--r-- | intern/cycles/render/film.cpp | 22 | ||||
-rw-r--r-- | intern/cycles/render/film.h | 7 | ||||
-rw-r--r-- | intern/cycles/render/mesh.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/render/session.cpp | 215 | ||||
-rw-r--r-- | intern/cycles/render/session.h | 26 | ||||
-rw-r--r-- | intern/cycles/render/tile.cpp | 199 | ||||
-rw-r--r-- | intern/cycles/render/tile.h | 33 |
9 files changed, 464 insertions, 118 deletions
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index fe2c2e78926..cf402c3f214 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -42,6 +42,9 @@ BufferParams::BufferParams() full_width = 0; full_height = 0; + denoising_data_pass = false; + denoising_clean_pass = false; + Pass::add(PASS_COMBINED, passes); } @@ -68,10 +71,25 @@ int BufferParams::get_passes_size() for(size_t i = 0; i < passes.size(); i++) size += passes[i].components; - + + if(denoising_data_pass) { + size += DENOISING_PASS_SIZE_BASE; + if(denoising_clean_pass) size += DENOISING_PASS_SIZE_CLEAN; + } + return align_up(size, 4); } +int BufferParams::get_denoising_offset() +{ + int offset = 0; + + for(size_t i = 0; i < passes.size(); i++) + offset += passes[i].components; + + return offset; +} + /* Render Buffer Task */ RenderTile::RenderTile() @@ -138,12 +156,51 @@ void RenderBuffers::reset(Device *device, BufferParams& params_) device->mem_alloc("rng_state", rng_state, MEM_READ_WRITE); } -bool RenderBuffers::copy_from_device() +bool RenderBuffers::copy_from_device(Device *from_device) { if(!buffer.device_pointer) return false; - device->mem_copy_from(buffer, 0, params.width, params.height, params.get_passes_size()*sizeof(float)); + if(!from_device) { + from_device = device; + } + + from_device->mem_copy_from(buffer, 0, params.width, params.height, params.get_passes_size()*sizeof(float)); + + return true; +} + +bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels) +{ + float scale = 1.0f/sample; + + if(offset == DENOISING_PASS_COLOR) { + scale *= exposure; + } + else if(offset == DENOISING_PASS_COLOR_VAR) { + scale *= exposure*exposure; + } + + offset += params.get_denoising_offset(); + float *in = (float*)buffer.data_pointer + offset; + int pass_stride = params.get_passes_size(); + int size = params.width*params.height; + + if(components == 1) { + for(int i = 0; i < size; i++, in += pass_stride, pixels++) { + pixels[0] = in[0]*scale; + } + } + else if(components == 3) { + for(int i = 0; i < size; i++, in += pass_stride, pixels += 3) { + pixels[0] = in[0]*scale; + pixels[1] = in[1]*scale; + pixels[2] = in[2]*scale; + } + } + else { + return false; + } return true; } diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 5c78971678a..e56556c8abe 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -51,6 +51,9 @@ public: /* passes */ array<Pass> passes; + bool denoising_data_pass; + /* If only some light path types should be denoised, an additional pass is needed. */ + bool denoising_clean_pass; /* functions */ BufferParams(); @@ -59,6 +62,7 @@ public: bool modified(const BufferParams& params); void add_pass(PassType type); int get_passes_size(); + int get_denoising_offset(); }; /* Render Buffers */ @@ -73,18 +77,19 @@ public: /* random number generator state */ device_vector<uint> rng_state; + Device *device; + explicit RenderBuffers(Device *device); ~RenderBuffers(); void reset(Device *device, BufferParams& params); - bool copy_from_device(); + bool copy_from_device(Device *from_device = NULL); bool get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels); + bool get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels); protected: void device_free(); - - Device *device; }; /* Display Buffer @@ -131,6 +136,9 @@ protected: class RenderTile { public: + typedef enum { PATH_TRACE, DENOISE } Task; + + Task task; int x, y, w, h; int start_sample; int num_samples; @@ -138,6 +146,7 @@ public: int resolution; int offset; int stride; + int tile_index; device_ptr buffer; device_ptr rng_state; diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp index 7809f4345f1..c8213d258d5 100644 --- a/intern/cycles/render/film.cpp +++ b/intern/cycles/render/film.cpp @@ -279,6 +279,10 @@ NODE_DEFINE(Film) SOCKET_BOOLEAN(use_sample_clamp, "Use Sample Clamp", false); + SOCKET_BOOLEAN(denoising_data_pass, "Generate Denoising Data Pass", false); + SOCKET_BOOLEAN(denoising_clean_pass, "Generate Denoising Clean Pass", false); + SOCKET_INT(denoising_flags, "Denoising Flags", 0); + return type; } @@ -437,6 +441,20 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->pass_stride += pass.components; } + kfilm->pass_denoising_data = 0; + kfilm->pass_denoising_clean = 0; + kfilm->denoising_flags = 0; + if(denoising_data_pass) { + kfilm->pass_denoising_data = kfilm->pass_stride; + kfilm->pass_stride += DENOISING_PASS_SIZE_BASE; + kfilm->denoising_flags = denoising_flags; + if(denoising_clean_pass) { + kfilm->pass_denoising_clean = kfilm->pass_stride; + kfilm->pass_stride += DENOISING_PASS_SIZE_CLEAN; + kfilm->use_light_pass = 1; + } + } + kfilm->pass_stride = align_up(kfilm->pass_stride, 4); kfilm->pass_alpha_threshold = pass_alpha_threshold; @@ -451,6 +469,10 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->mist_inv_depth = (mist_depth > 0.0f)? 1.0f/mist_depth: 0.0f; kfilm->mist_falloff = mist_falloff; + pass_stride = kfilm->pass_stride; + denoising_data_offset = kfilm->pass_denoising_data; + denoising_clean_offset = kfilm->pass_denoising_clean; + need_update = false; } diff --git a/intern/cycles/render/film.h b/intern/cycles/render/film.h index 83c941d5c57..29b1e7e9157 100644 --- a/intern/cycles/render/film.h +++ b/intern/cycles/render/film.h @@ -57,8 +57,15 @@ public: float exposure; array<Pass> passes; + bool denoising_data_pass; + bool denoising_clean_pass; + int denoising_flags; float pass_alpha_threshold; + int pass_stride; + int denoising_data_offset; + int denoising_clean_offset; + FilterType filter_type; float filter_width; size_t filter_table_offset; diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 33d1936659b..03825f780e0 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -903,7 +903,7 @@ void Mesh::pack_normals(Scene *scene, uint *tri_shader, float4 *vnormal) float3 vNi = vN[i]; if(do_transform) - vNi = normalize(transform_direction(&ntfm, vNi)); + vNi = safe_normalize(transform_direction(&ntfm, vNi)); vnormal[i] = make_float4(vNi.x, vNi.y, vNi.z, 0.0f); } diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index c9b5547b407..3eaf34c847f 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -114,8 +114,9 @@ Session::~Session() } /* clean up */ - foreach(RenderBuffers *buffers, tile_buffers) - delete buffers; + foreach(RenderTile &rtile, render_tiles) + delete rtile.buffers; + tile_manager.free_device(); delete buffers; delete display; @@ -268,8 +269,8 @@ void Session::run_gpu() /* update status and timing */ update_status_time(); - /* path trace */ - path_trace(); + /* render */ + render(); device->task_wait(); @@ -358,20 +359,22 @@ bool Session::acquire_tile(Device *tile_device, RenderTile& rtile) thread_scoped_lock tile_lock(tile_mutex); /* get next tile from manager */ - Tile tile; + Tile *tile; int device_num = device->device_number(tile_device); if(!tile_manager.next_tile(tile, device_num)) return false; /* fill render tile */ - rtile.x = tile_manager.state.buffer.full_x + tile.x; - rtile.y = tile_manager.state.buffer.full_y + tile.y; - rtile.w = tile.w; - rtile.h = tile.h; + rtile.x = tile_manager.state.buffer.full_x + tile->x; + rtile.y = tile_manager.state.buffer.full_y + tile->y; + rtile.w = tile->w; + rtile.h = tile->h; rtile.start_sample = tile_manager.state.sample; rtile.num_samples = tile_manager.state.num_samples; rtile.resolution = tile_manager.state.resolution_divider; + rtile.tile_index = tile->index; + rtile.task = (tile->state == Tile::DENOISE)? RenderTile::DENOISE: RenderTile::PATH_TRACE; tile_lock.unlock(); @@ -383,54 +386,70 @@ bool Session::acquire_tile(Device *tile_device, RenderTile& rtile) rtile.buffer = buffers->buffer.device_pointer; rtile.rng_state = buffers->rng_state.device_pointer; rtile.buffers = buffers; + tile->buffers = buffers; device->map_tile(tile_device, rtile); return true; } - /* fill buffer parameters */ - BufferParams buffer_params = tile_manager.params; - buffer_params.full_x = rtile.x; - buffer_params.full_y = rtile.y; - buffer_params.width = rtile.w; - buffer_params.height = rtile.h; - - buffer_params.get_offset_stride(rtile.offset, rtile.stride); - - RenderBuffers *tilebuffers; + bool store_rtile = false; + if(tile->buffers == NULL) { + /* fill buffer parameters */ + BufferParams buffer_params = tile_manager.params; + buffer_params.full_x = rtile.x; + buffer_params.full_y = rtile.y; + buffer_params.width = rtile.w; + buffer_params.height = rtile.h; + + /* allocate buffers */ + if(params.progressive_refine) { + tile_lock.lock(); + + if(render_tiles.size() == 0) { + RenderTile nulltile; + nulltile.buffers = NULL; + render_tiles.resize(tile_manager.state.num_tiles, nulltile); + } - /* allocate buffers */ - if(params.progressive_refine) { - tile_lock.lock(); + /* In certain circumstances number of tiles in the tile manager could + * be changed. This is not supported by the progressive refine feature. + */ + assert(render_tiles.size() == tile_manager.state.num_tiles); - if(tile_buffers.size() == 0) - tile_buffers.resize(tile_manager.state.num_tiles, NULL); + RenderTile &stored_rtile = render_tiles[tile->index]; + if(stored_rtile.buffers == NULL) { + tile->buffers = new RenderBuffers(tile_device); + tile->buffers->reset(tile_device, buffer_params); + store_rtile = true; + } + else { + assert(rtile.x == stored_rtile.x && + rtile.y == stored_rtile.y && + rtile.w == stored_rtile.w && + rtile.h == stored_rtile.h); + tile_lock.unlock(); + tile->buffers = stored_rtile.buffers; + } + } + else { + tile->buffers = new RenderBuffers(tile_device); - /* In certain circumstances number of tiles in the tile manager could - * be changed. This is not supported by the progressive refine feature. - */ - assert(tile_buffers.size() == tile_manager.state.num_tiles); + tile->buffers->reset(tile_device, buffer_params); + } + } - tilebuffers = tile_buffers[tile.index]; - if(tilebuffers == NULL) { - tilebuffers = new RenderBuffers(tile_device); - tile_buffers[tile.index] = tilebuffers; + tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride); - tilebuffers->reset(tile_device, buffer_params); - } + rtile.buffer = tile->buffers->buffer.device_pointer; + rtile.rng_state = tile->buffers->rng_state.device_pointer; + rtile.buffers = tile->buffers; + rtile.sample = 0; + if(store_rtile) { + render_tiles[tile->index] = rtile; tile_lock.unlock(); } - else { - tilebuffers = new RenderBuffers(tile_device); - - tilebuffers->reset(tile_device, buffer_params); - } - - rtile.buffer = tilebuffers->buffer.device_pointer; - rtile.rng_state = tilebuffers->rng_state.device_pointer; - rtile.buffers = tilebuffers; /* this will tag tile as IN PROGRESS in blender-side render pipeline, * which is needed to highlight currently rendering tile before first @@ -449,7 +468,7 @@ void Session::update_tile_sample(RenderTile& rtile) if(params.progressive_refine == false) { /* todo: optimize this by making it thread safe and removing lock */ - update_render_tile_cb(rtile); + update_render_tile_cb(rtile, true); } } @@ -462,18 +481,75 @@ void Session::release_tile(RenderTile& rtile) progress.add_finished_tile(); - if(write_render_tile_cb) { - if(params.progressive_refine == false) { - /* todo: optimize this by making it thread safe and removing lock */ - write_render_tile_cb(rtile); + bool delete_tile; - delete rtile.buffers; + if(tile_manager.finish_tile(rtile.tile_index, delete_tile)) { + if(write_render_tile_cb && params.progressive_refine == false) { + write_render_tile_cb(rtile); + if(delete_tile) { + delete rtile.buffers; + tile_manager.state.tiles[rtile.tile_index].buffers = NULL; + } + } + } + else { + if(update_render_tile_cb && params.progressive_refine == false) { + update_render_tile_cb(rtile, false); } } update_status_time(); } +void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device) +{ + thread_scoped_lock tile_lock(tile_mutex); + + int center_idx = tiles[4].tile_index; + assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE); + BufferParams buffer_params = tile_manager.params; + int4 image_region = make_int4(buffer_params.full_x, buffer_params.full_y, + buffer_params.full_x + buffer_params.width, buffer_params.full_y + buffer_params.height); + + for(int dy = -1, i = 0; dy <= 1; dy++) { + for(int dx = -1; dx <= 1; dx++, i++) { + int px = tiles[4].x + dx*params.tile_size.x; + int py = tiles[4].y + dy*params.tile_size.y; + if(px >= image_region.x && py >= image_region.y && + px < image_region.z && py < image_region.w) { + int tile_index = center_idx + dy*tile_manager.state.tile_stride + dx; + Tile *tile = &tile_manager.state.tiles[tile_index]; + assert(tile->buffers); + + tiles[i].buffer = tile->buffers->buffer.device_pointer; + tiles[i].x = tile_manager.state.buffer.full_x + tile->x; + tiles[i].y = tile_manager.state.buffer.full_y + tile->y; + tiles[i].w = tile->w; + tiles[i].h = tile->h; + tiles[i].buffers = tile->buffers; + + tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride); + } + else { + tiles[i].buffer = (device_ptr)NULL; + tiles[i].buffers = NULL; + tiles[i].x = clamp(px, image_region.x, image_region.z); + tiles[i].y = clamp(py, image_region.y, image_region.w); + tiles[i].w = tiles[i].h = 0; + } + } + } + + assert(tiles[4].buffers); + device->map_neighbor_tiles(tile_device, tiles); +} + +void Session::unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device) +{ + thread_scoped_lock tile_lock(tile_mutex); + device->unmap_neighbor_tiles(tile_device, tiles); +} + void Session::run_cpu() { bool tiles_written = false; @@ -558,8 +634,8 @@ void Session::run_cpu() /* update status and timing */ update_status_time(); - /* path trace */ - path_trace(); + /* render */ + render(); /* update status and timing */ update_status_time(); @@ -744,10 +820,10 @@ void Session::reset(BufferParams& buffer_params, int samples) if(params.progressive_refine) { thread_scoped_lock buffers_lock(buffers_mutex); - foreach(RenderBuffers *buffers, tile_buffers) - delete buffers; + foreach(RenderTile &rtile, render_tiles) + delete rtile.buffers; - tile_buffers.clear(); + render_tiles.clear(); } } @@ -882,13 +958,15 @@ void Session::update_status_time(bool show_pause, bool show_done) progress.set_status(status, substatus); } -void Session::path_trace() +void Session::render() { /* add path trace task */ - DeviceTask task(DeviceTask::PATH_TRACE); + DeviceTask task(DeviceTask::RENDER); task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2); task.release_tile = function_bind(&Session::release_tile, this, _1); + task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2); + task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2); task.get_cancel = function_bind(&Progress::get_cancel, &this->progress); task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1); task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2); @@ -897,6 +975,18 @@ void Session::path_trace() task.requested_tile_size = params.tile_size; task.passes_size = tile_manager.params.get_passes_size(); + if(params.use_denoising) { + task.denoising_radius = params.denoising_radius; + task.denoising_strength = params.denoising_strength; + task.denoising_feature_strength = params.denoising_feature_strength; + task.denoising_relative_pca = params.denoising_relative_pca; + + assert(!scene->film->need_update); + task.pass_stride = scene->film->pass_stride; + task.pass_denoising_data = scene->film->denoising_data_offset; + task.pass_denoising_clean = scene->film->denoising_clean_offset; + } + device->task_add(task); } @@ -940,9 +1030,7 @@ bool Session::update_progressive_refine(bool cancel) } if(params.progressive_refine) { - foreach(RenderBuffers *buffers, tile_buffers) { - RenderTile rtile; - rtile.buffers = buffers; + foreach(RenderTile &rtile, render_tiles) { rtile.sample = sample; if(write) { @@ -951,7 +1039,7 @@ bool Session::update_progressive_refine(bool cancel) } else { if(update_render_tile_cb) - update_render_tile_cb(rtile); + update_render_tile_cb(rtile, true); } } } @@ -965,10 +1053,11 @@ void Session::device_free() { scene->device_free(); - foreach(RenderBuffers *buffers, tile_buffers) - delete buffers; + foreach(RenderTile &tile, render_tiles) + delete tile.buffers; + tile_manager.free_device(); - tile_buffers.clear(); + render_tiles.clear(); /* used from background render only, so no need to * re-create render/display buffers here diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index a7e5f78a64d..a7ca90abbce 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -57,6 +57,12 @@ public: bool display_buffer_linear; + bool use_denoising; + int denoising_radius; + float denoising_strength; + float denoising_feature_strength; + bool denoising_relative_pca; + double cancel_timeout; double reset_timeout; double text_timeout; @@ -77,6 +83,12 @@ public: start_resolution = INT_MAX; threads = 0; + use_denoising = false; + denoising_radius = 8; + denoising_strength = 0.0f; + denoising_feature_strength = 0.0f; + denoising_relative_pca = false; + display_buffer_linear = false; cancel_timeout = 0.1; @@ -99,6 +111,11 @@ public: && tile_size == params.tile_size && start_resolution == params.start_resolution && threads == params.threads + && use_denoising == params.use_denoising + && denoising_radius == params.denoising_radius + && denoising_strength == params.denoising_strength + && denoising_feature_strength == params.denoising_feature_strength + && denoising_relative_pca == params.denoising_relative_pca && display_buffer_linear == params.display_buffer_linear && cancel_timeout == params.cancel_timeout && reset_timeout == params.reset_timeout @@ -126,7 +143,7 @@ public: Stats stats; function<void(RenderTile&)> write_render_tile_cb; - function<void(RenderTile&)> update_render_tile_cb; + function<void(RenderTile&, bool)> update_render_tile_cb; explicit Session(const SessionParams& params); ~Session(); @@ -162,7 +179,7 @@ protected: void update_status_time(bool show_pause = false, bool show_done = false); void tonemap(int sample); - void path_trace(); + void render(); void reset_(BufferParams& params, int samples); void run_cpu(); @@ -177,6 +194,9 @@ protected: void update_tile_sample(RenderTile& tile); void release_tile(RenderTile& tile); + void map_neighbor_tiles(RenderTile *tiles, Device *tile_device); + void unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device); + bool device_use_gl; thread *session_thread; @@ -202,7 +222,7 @@ protected: double last_update_time; bool update_progressive_refine(bool cancel); - vector<RenderBuffers *> tile_buffers; + vector<RenderTile> render_tiles; DeviceRequestedFeatures get_requested_device_features(); diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index 944e746ca2d..176a1f4f0f3 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -25,37 +25,39 @@ namespace { class TileComparator { public: - TileComparator(TileOrder order, int2 center) - : order_(order), - center_(center) + TileComparator(TileOrder order_, int2 center_, Tile *tiles_) + : order(order_), + center(center_), + tiles(tiles_) {} - bool operator()(Tile &a, Tile &b) + bool operator()(int a, int b) { - switch(order_) { + switch(order) { case TILE_CENTER: { - float2 dist_a = make_float2(center_.x - (a.x + a.w/2), - center_.y - (a.y + a.h/2)); - float2 dist_b = make_float2(center_.x - (b.x + b.w/2), - center_.y - (b.y + b.h/2)); + float2 dist_a = make_float2(center.x - (tiles[a].x + tiles[a].w/2), + center.y - (tiles[a].y + tiles[a].h/2)); + float2 dist_b = make_float2(center.x - (tiles[b].x + tiles[b].w/2), + center.y - (tiles[b].y + tiles[b].h/2)); return dot(dist_a, dist_a) < dot(dist_b, dist_b); } case TILE_LEFT_TO_RIGHT: - return (a.x == b.x)? (a.y < b.y): (a.x < b.x); + return (tiles[a].x == tiles[b].x)? (tiles[a].y < tiles[b].y): (tiles[a].x < tiles[b].x); case TILE_RIGHT_TO_LEFT: - return (a.x == b.x)? (a.y < b.y): (a.x > b.x); + return (tiles[a].x == tiles[b].x)? (tiles[a].y < tiles[b].y): (tiles[a].x > tiles[b].x); case TILE_TOP_TO_BOTTOM: - return (a.y == b.y)? (a.x < b.x): (a.y > b.y); + return (tiles[a].y == tiles[b].y)? (tiles[a].x < tiles[b].x): (tiles[a].y > tiles[b].y); case TILE_BOTTOM_TO_TOP: default: - return (a.y == b.y)? (a.x < b.x): (a.y < b.y); + return (tiles[a].y == tiles[b].y)? (tiles[a].x < tiles[b].x): (tiles[a].y < tiles[b].y); } } protected: - TileOrder order_; - int2 center_; + TileOrder order; + int2 center; + Tile *tiles; }; inline int2 hilbert_index_to_pos(int n, int d) @@ -96,6 +98,7 @@ TileManager::TileManager(bool progressive_, int num_samples_, int2 tile_size_, i num_devices = num_devices_; preserve_tile_device = preserve_tile_device_; background = background_; + schedule_denoising = false; range_start_sample = 0; range_num_samples = -1; @@ -108,6 +111,16 @@ TileManager::~TileManager() { } +void TileManager::free_device() +{ + if(schedule_denoising) { + for(int i = 0; i < state.tiles.size(); i++) { + delete state.tiles[i].buffers; + state.tiles[i].buffers = NULL; + } + } +} + static int get_divider(int w, int h, int start_resolution) { int divider = 1; @@ -133,6 +146,8 @@ void TileManager::reset(BufferParams& params_, int num_samples_) state.num_tiles = 0; state.num_samples = 0; state.resolution_divider = get_divider(params.width, params.height, start_resolution); + state.render_tiles.clear(); + state.denoising_tiles.clear(); state.tiles.clear(); } @@ -157,6 +172,9 @@ void TileManager::set_samples(int num_samples_) } state.total_pixel_samples = pixel_samples + (uint64_t)get_num_effective_samples() * params.width*params.height; + if(schedule_denoising) { + state.total_pixel_samples += params.width*params.height; + } } } @@ -169,32 +187,36 @@ int TileManager::gen_tiles(bool sliced) int image_h = max(1, params.height/resolution); int2 center = make_int2(image_w/2, image_h/2); - state.tiles.clear(); - int num_logical_devices = preserve_tile_device? num_devices: 1; int num = min(image_h, num_logical_devices); int slice_num = sliced? num: 1; - int tile_index = 0; + int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x); state.tiles.clear(); - state.tiles.resize(num); - vector<list<Tile> >::iterator tile_list = state.tiles.begin(); + state.render_tiles.clear(); + state.denoising_tiles.clear(); + state.render_tiles.resize(num); + state.denoising_tiles.resize(num); + state.tile_stride = tile_w; + vector<list<int> >::iterator tile_list; + tile_list = state.render_tiles.begin(); if(tile_order == TILE_HILBERT_SPIRAL) { assert(!sliced); + int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y); + state.tiles.resize(tile_w*tile_h); + /* Size of blocks in tiles, must be a power of 2 */ const int hilbert_size = (max(tile_size.x, tile_size.y) <= 12)? 8: 4; - int tile_w = (tile_size.x >= image_w)? 1: (image_w + tile_size.x - 1)/tile_size.x; - int tile_h = (tile_size.y >= image_h)? 1: (image_h + tile_size.y - 1)/tile_size.y; - int tiles_per_device = (tile_w * tile_h + num - 1) / num; + int tiles_per_device = divide_up(tile_w * tile_h, num); int cur_device = 0, cur_tiles = 0; int2 block_size = tile_size * make_int2(hilbert_size, hilbert_size); /* Number of blocks to fill the image */ - int blocks_x = (block_size.x >= image_w)? 1: (image_w + block_size.x - 1)/block_size.x; - int blocks_y = (block_size.y >= image_h)? 1: (image_h + block_size.y - 1)/block_size.y; + int blocks_x = (block_size.x >= image_w)? 1: divide_up(image_w, block_size.x); + int blocks_y = (block_size.y >= image_h)? 1: divide_up(image_h, block_size.y); int n = max(blocks_x, blocks_y) | 0x1; /* Side length of the spiral (must be odd) */ /* Offset of spiral (to keep it centered) */ int2 offset = make_int2((image_w - n*block_size.x)/2, (image_h - n*block_size.y)/2); @@ -225,9 +247,11 @@ int TileManager::gen_tiles(bool sliced) if(pos.x >= 0 && pos.y >= 0 && pos.x < image_w && pos.y < image_h) { int w = min(tile_size.x, image_w - pos.x); int h = min(tile_size.y, image_h - pos.y); - tile_list->push_front(Tile(tile_index, pos.x, pos.y, w, h, cur_device)); + int2 ipos = pos / tile_size; + int idx = ipos.y*tile_w + ipos.x; + state.tiles[idx] = Tile(idx, pos.x, pos.y, w, h, cur_device, Tile::RENDER); + tile_list->push_front(idx); cur_tiles++; - tile_index++; if(cur_tiles == tiles_per_device) { tile_list++; @@ -271,27 +295,28 @@ int TileManager::gen_tiles(bool sliced) break; } } - return tile_index; + return tile_w*tile_h; } + int idx = 0; for(int slice = 0; slice < slice_num; slice++) { int slice_y = (image_h/slice_num)*slice; int slice_h = (slice == slice_num-1)? image_h - slice*(image_h/slice_num): image_h/slice_num; - int tile_w = (tile_size.x >= image_w)? 1: (image_w + tile_size.x - 1)/tile_size.x; - int tile_h = (tile_size.y >= slice_h)? 1: (slice_h + tile_size.y - 1)/tile_size.y; + int tile_h = (tile_size.y >= slice_h)? 1: divide_up(slice_h, tile_size.y); - int tiles_per_device = (tile_w * tile_h + num - 1) / num; + int tiles_per_device = divide_up(tile_w * tile_h, num); int cur_device = 0, cur_tiles = 0; for(int tile_y = 0; tile_y < tile_h; tile_y++) { - for(int tile_x = 0; tile_x < tile_w; tile_x++, tile_index++) { + for(int tile_x = 0; tile_x < tile_w; tile_x++, idx++) { int x = tile_x * tile_size.x; int y = tile_y * tile_size.y; int w = (tile_x == tile_w-1)? image_w - x: tile_size.x; int h = (tile_y == tile_h-1)? slice_h - y: tile_size.y; - tile_list->push_back(Tile(tile_index, x, y + slice_y, w, h, sliced? slice: cur_device)); + state.tiles.push_back(Tile(idx, x, y + slice_y, w, h, sliced? slice: cur_device, Tile::RENDER)); + tile_list->push_back(idx); if(!sliced) { cur_tiles++; @@ -299,7 +324,7 @@ int TileManager::gen_tiles(bool sliced) if(cur_tiles == tiles_per_device) { /* Tiles are already generated in Bottom-to-Top order, so no sort is necessary in that case. */ if(tile_order != TILE_BOTTOM_TO_TOP) { - tile_list->sort(TileComparator(tile_order, center)); + tile_list->sort(TileComparator(tile_order, center, &state.tiles[0])); } tile_list++; cur_tiles = 0; @@ -313,7 +338,7 @@ int TileManager::gen_tiles(bool sliced) } } - return tile_index; + return idx; } void TileManager::set_tiles() @@ -333,15 +358,111 @@ void TileManager::set_tiles() state.buffer.full_height = max(1, params.full_height/resolution); } -bool TileManager::next_tile(Tile& tile, int device) +int TileManager::get_neighbor_index(int index, int neighbor) +{ + static const int dx[] = {-1, 0, 1, -1, 1, -1, 0, 1, 0}, dy[] = {-1, -1, -1, 0, 0, 1, 1, 1, 0}; + + int resolution = state.resolution_divider; + int image_w = max(1, params.width/resolution); + int image_h = max(1, params.height/resolution); + int tile_w = (tile_size.x >= image_w)? 1: divide_up(image_w, tile_size.x); + int tile_h = (tile_size.y >= image_h)? 1: divide_up(image_h, tile_size.y); + + int nx = state.tiles[index].x/tile_size.x + dx[neighbor], ny = state.tiles[index].y/tile_size.y + dy[neighbor]; + if(nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h) + return -1; + + return ny*state.tile_stride + nx; +} + +/* Checks whether all neighbors of a tile (as well as the tile itself) are at least at state min_state. */ +bool TileManager::check_neighbor_state(int index, Tile::State min_state) +{ + if(index < 0 || state.tiles[index].state < min_state) { + return false; + } + for(int neighbor = 0; neighbor < 9; neighbor++) { + int nindex = get_neighbor_index(index, neighbor); + /* Out-of-bounds tiles don't matter. */ + if(nindex >= 0 && state.tiles[nindex].state < min_state) { + return false; + } + } + + return true; +} + +/* Returns whether the tile should be written (and freed if no denoising is used) instead of updating. */ +bool TileManager::finish_tile(int index, bool &delete_tile) +{ + delete_tile = false; + + switch(state.tiles[index].state) { + case Tile::RENDER: + { + if(!schedule_denoising) { + state.tiles[index].state = Tile::DONE; + delete_tile = true; + return true; + } + state.tiles[index].state = Tile::RENDERED; + /* For each neighbor and the tile itself, check whether all of its neighbors have been rendered. If yes, it can be denoised. */ + for(int neighbor = 0; neighbor < 9; neighbor++) { + int nindex = get_neighbor_index(index, neighbor); + if(check_neighbor_state(nindex, Tile::RENDERED)) { + state.tiles[nindex].state = Tile::DENOISE; + state.denoising_tiles[state.tiles[nindex].device].push_back(nindex); + } + } + return false; + } + case Tile::DENOISE: + { + state.tiles[index].state = Tile::DENOISED; + /* For each neighbor and the tile itself, check whether all of its neighbors have been denoised. If yes, it can be freed. */ + for(int neighbor = 0; neighbor < 9; neighbor++) { + int nindex = get_neighbor_index(index, neighbor); + if(check_neighbor_state(nindex, Tile::DENOISED)) { + state.tiles[nindex].state = Tile::DONE; + /* It can happen that the tile just finished denoising and already can be freed here. + * However, in that case it still has to be written before deleting, so we can't delete it yet. */ + if(neighbor == 8) { + delete_tile = true; + } + else { + delete state.tiles[nindex].buffers; + state.tiles[nindex].buffers = NULL; + } + } + } + return true; + } + default: + assert(false); + return true; + } +} + +bool TileManager::next_tile(Tile* &tile, int device) { int logical_device = preserve_tile_device? device: 0; - if((logical_device >= state.tiles.size()) || state.tiles[logical_device].empty()) + if(logical_device >= state.render_tiles.size()) + return false; + + if(!state.denoising_tiles[logical_device].empty()) { + int idx = state.denoising_tiles[logical_device].front(); + state.denoising_tiles[logical_device].pop_front(); + tile = &state.tiles[idx]; + return true; + } + + if(state.render_tiles[logical_device].empty()) return false; - tile = Tile(state.tiles[logical_device].front()); - state.tiles[logical_device].pop_front(); + int idx = state.render_tiles[logical_device].front(); + state.render_tiles[logical_device].pop_front(); + tile = &state.tiles[idx]; return true; } diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h index 622b89f7670..e39a8f0627a 100644 --- a/intern/cycles/render/tile.h +++ b/intern/cycles/render/tile.h @@ -31,12 +31,20 @@ public: int index; int x, y, w, h; int device; + /* RENDER: The tile has to be rendered. + * RENDERED: The tile has been rendered, but can't be denoised yet (waiting for neighbors). + * DENOISE: The tile can be denoised now. + * DENOISED: The tile has been denoised, but can't be freed yet (waiting for neighbors). + * DONE: The tile is finished and has been freed. */ + typedef enum { RENDER = 0, RENDERED, DENOISE, DENOISED, DONE } State; + State state; + RenderBuffers *buffers; Tile() {} - Tile(int index_, int x_, int y_, int w_, int h_, int device_) - : index(index_), x(x_), y(y_), w(w_), h(h_), device(device_) {} + Tile(int index_, int x_, int y_, int w_, int h_, int device_, State state_ = RENDER) + : index(index_), x(x_), y(y_), w(w_), h(h_), device(device_), state(state_), buffers(NULL) {} }; /* Tile order */ @@ -58,6 +66,8 @@ public: BufferParams params; struct State { + vector<Tile> tiles; + int tile_stride; BufferParams buffer; int sample; int num_samples; @@ -67,9 +77,12 @@ public: /* Total samples over all pixels: Generally num_samples*num_pixels, * but can be higher due to the initial resolution division for previews. */ uint64_t total_pixel_samples; - /* This vector contains a list of tiles for every logical device in the session. - * In each list, the tiles are sorted according to the tile order setting. */ - vector<list<Tile> > tiles; + + /* These lists contain the indices of the tiles to be rendered/denoised and are used + * when acquiring a new tile for the device. + * Each list in each vector is for one logical device. */ + vector<list<int> > render_tiles; + vector<list<int> > denoising_tiles; } state; int num_samples; @@ -78,10 +91,12 @@ public: bool preserve_tile_device, bool background, TileOrder tile_order, int num_devices = 1); ~TileManager(); + void free_device(); void reset(BufferParams& params, int num_samples); void set_samples(int num_samples); bool next(); - bool next_tile(Tile& tile, int device = 0); + bool next_tile(Tile* &tile, int device = 0); + bool finish_tile(int index, bool& delete_tile); bool done(); void set_tile_order(TileOrder tile_order_) { tile_order = tile_order_; } @@ -96,6 +111,9 @@ public: /* Get number of actual samples to render. */ int get_num_effective_samples(); + + /* Schedule tiles for denoising after they've been rendered. */ + bool schedule_denoising; protected: void set_tiles(); @@ -127,6 +145,9 @@ protected: /* Generate tile list, return number of tiles. */ int gen_tiles(bool sliced); + + int get_neighbor_index(int index, int neighbor); + bool check_neighbor_state(int index, Tile::State state); }; CCL_NAMESPACE_END |