diff options
author | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2012-09-04 17:29:07 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2012-09-04 17:29:07 +0400 |
commit | adea12cb01e4c4f18f345dfbbf49e9e622192e4e (patch) | |
tree | b43018344c696e4d59437fabc7f17f5b9d6a8e80 /intern/cycles | |
parent | 68563134d4800be4eb46aa6b598fd719cdaf2980 (diff) |
Cycles: merge of changes from tomato branch.
Regular rendering now works tiled, and supports save buffers to save memory
during render and cache render results.
Brick texture node by Thomas.
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Nodes/Textures#Brick_Texture
Image texture Blended Box Mapping.
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Nodes/Textures#Image_Texture
http://mango.blender.org/production/blended_box/
Various bug fixes by Sergey and Campbell.
* Fix for reading freed memory in some node setups.
* Fix incorrect memory read when synchronizing mesh motion.
* Fix crash appearing when direct light usage is different on different layers.
* Fix for vector pass gives wrong result in some circumstances.
* Fix for wrong resolution used for rendering Render Layer node.
* Option to cancel rendering when doing initial synchronization.
* No more texture limit when using CPU render.
* Many fixes for new tiled rendering.
Diffstat (limited to 'intern/cycles')
50 files changed, 1576 insertions, 602 deletions
diff --git a/intern/cycles/app/cycles_test.cpp b/intern/cycles/app/cycles_test.cpp index 75f76efc8e1..5ee351260c5 100644 --- a/intern/cycles/app/cycles_test.cpp +++ b/intern/cycles/app/cycles_test.cpp @@ -66,12 +66,13 @@ static void session_print(const string& str) static void session_print_status() { - int sample; + int sample, tile; double total_time, sample_time; string status, substatus; /* get status */ - options.session->progress.get_sample(sample, total_time, sample_time); + sample = options.session->progress.get_sample(); + options.session->progress.get_tile(tile, total_time, sample_time); options.session->progress.get_status(status, substatus); if(substatus != "") @@ -111,7 +112,7 @@ static void session_init() static void scene_init(int width, int height) { - options.scene = new Scene(options.scene_params); + options.scene = new Scene(options.scene_params, options.session_params.device); xml_read_file(options.scene, options.filepath.c_str()); if (width == 0 || height == 0) { @@ -147,11 +148,12 @@ static void display_info(Progress& progress) latency = (elapsed - last); last = elapsed; - int sample; + int sample, tile; double total_time, sample_time; string status, substatus; - progress.get_sample(sample, total_time, sample_time); + sample = progress.get_sample(); + progress.get_tile(tile, total_time, sample_time); progress.get_status(status, substatus); if(substatus != "") diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp index 5569df927fb..87a238e508c 100644 --- a/intern/cycles/app/cycles_xml.cpp +++ b/intern/cycles/app/cycles_xml.cpp @@ -379,6 +379,9 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug else if(string_iequals(node.name(), "checker_texture")) { snode = new CheckerTextureNode(); } + else if(string_iequals(node.name(), "brick_texture")) { + snode = new BrickTextureNode(); + } else if(string_iequals(node.name(), "gradient_texture")) { GradientTextureNode *blend = new GradientTextureNode(); xml_read_enum(&blend->type, GradientTextureNode::type_enum, node, "type"); diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 7a22cba316e..7f3eca471e6 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -241,12 +241,14 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): min=1, max=4096, default=1024, ) - cls.debug_min_size = IntProperty( - name="Min Size", - description="", - min=1, max=4096, - default=64, + + cls.resolution_divider = IntProperty( + name="Resolution Divider", + description="For viewport render, the number of lower resolutions to render before the full resolution", + min=1, max=512, + default=4, ) + cls.debug_reset_timeout = FloatProperty( name="Reset timeout", description="", diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index c30581ecd40..bf44a558b1a 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -197,8 +197,13 @@ class CyclesRender_PT_performance(CyclesButtonsPanel, Panel): sub = col.column(align=True) sub.label(text="Tiles:") - sub.prop(cscene, "debug_tile_size") - sub.prop(cscene, "debug_min_size") + + sub.prop(rd, "parts_x", text="X") + sub.prop(rd, "parts_y", text="Y") + + subsub = sub.column() + subsub.enabled = not rd.use_border + subsub.prop(rd, "use_save_buffers") col = split.column() @@ -208,6 +213,10 @@ class CyclesRender_PT_performance(CyclesButtonsPanel, Panel): sub.prop(cscene, "debug_use_spatial_splits") sub.prop(cscene, "use_cache") + sub = col.column(align=True) + sub.label(text="Viewport:") + sub.prop(cscene, "resolution_divider") + class CyclesRender_PT_layers(CyclesButtonsPanel, Panel): bl_label = "Layers" diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp index 16e4ceded89..9764f24a893 100644 --- a/intern/cycles/blender/blender_mesh.cpp +++ b/intern/cycles/blender/blender_mesh.cpp @@ -317,11 +317,11 @@ void BlenderSync::sync_mesh_motion(BL::Object b_ob, Mesh *mesh, int motion) BL::Mesh::vertices_iterator v; AttributeStandard std = (motion == -1)? ATTR_STD_MOTION_PRE: ATTR_STD_MOTION_POST; Attribute *attr_M = mesh->attributes.add(std); - float3 *M = attr_M->data_float3(); + float3 *M = attr_M->data_float3(), *cur_M; size_t i = 0; - for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end() && i < size; ++v, M++, i++) - *M = get_float3(v->co()); + for(b_mesh.vertices.begin(v), cur_M = M; v != b_mesh.vertices.end() && i < size; ++v, cur_M++, i++) + *cur_M = get_float3(v->co()); /* if number of vertices changed, or if coordinates stayed the same, drop it */ if(i != size || memcmp(M, &mesh->verts[0], sizeof(float3)*size) == 0) diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp index 55291639ae1..eb9deb0de2d 100644 --- a/intern/cycles/blender/blender_object.cpp +++ b/intern/cycles/blender/blender_object.cpp @@ -300,14 +300,16 @@ void BlenderSync::sync_objects(BL::SpaceView3D b_v3d, int motion) BL::Scene b_sce = b_scene; int particle_offset = 1; /* first particle is dummy for regular, non-instanced objects */ - for(; b_sce; b_sce = b_sce.background_set()) { - for(b_sce.objects.begin(b_ob); b_ob != b_sce.objects.end(); ++b_ob) { + bool cancel = false; + + for(; b_sce && !cancel; b_sce = b_sce.background_set()) { + for(b_sce.objects.begin(b_ob); b_ob != b_sce.objects.end() && !cancel; ++b_ob) { bool hide = (render_layer.use_viewport_visibility)? b_ob->hide(): b_ob->hide_render(); - uint ob_layer = get_layer(b_ob->layers(), b_ob->layers_local_view(), object_is_light(*b_ob)); - CYCLES_LOCAL_LAYER_HACK(render_layer.use_localview, ob_layer); + uint ob_layer = get_layer(b_ob->layers(), b_ob->layers_local_view(), render_layer.use_localview, object_is_light(*b_ob)); hide = hide || !(ob_layer & scene_layer); if(!hide) { + progress.set_status("Synchronizing object", (*b_ob).name()); int num_particles = object_count_particles(*b_ob); @@ -349,10 +351,12 @@ void BlenderSync::sync_objects(BL::SpaceView3D b_v3d, int motion) particle_offset += num_particles; } + + cancel = progress.get_cancel(); } } - if(!motion) { + if(!cancel && !motion) { sync_background_light(); /* handle removed data and modified pointers */ diff --git a/intern/cycles/blender/blender_particles.cpp b/intern/cycles/blender/blender_particles.cpp index 177912cd8f0..f309960fc55 100644 --- a/intern/cycles/blender/blender_particles.cpp +++ b/intern/cycles/blender/blender_particles.cpp @@ -199,8 +199,7 @@ void BlenderSync::sync_particle_systems() for(; b_sce; b_sce = b_sce.background_set()) { for(b_sce.objects.begin(b_ob); b_ob != b_sce.objects.end(); ++b_ob) { bool hide = (render_layer.use_viewport_visibility)? b_ob->hide(): b_ob->hide_render(); - uint ob_layer = get_layer(b_ob->layers(), b_ob->layers_local_view(), object_is_light(*b_ob)); - CYCLES_LOCAL_LAYER_HACK(render_layer.use_localview, ob_layer); + uint ob_layer = get_layer(b_ob->layers(), b_ob->layers_local_view(), render_layer.use_localview, object_is_light(*b_ob)); hide = hide || !(ob_layer & scene_layer); if(!hide) { diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp index 4560c2f8543..96d5bb61ff8 100644 --- a/intern/cycles/blender/blender_python.cpp +++ b/intern/cycles/blender/blender_python.cpp @@ -80,6 +80,8 @@ static PyObject *create_func(PyObject *self, PyObject *args) /* create session */ BlenderSession *session; + Py_BEGIN_ALLOW_THREADS + if(rv3d) { /* interactive session */ int width = region.width(); @@ -91,7 +93,9 @@ static PyObject *create_func(PyObject *self, PyObject *args) /* offline session */ session = new BlenderSession(engine, userpref, data, scene); } - + + Py_END_ALLOW_THREADS + return PyLong_FromVoidPtr(session); } @@ -136,9 +140,13 @@ static PyObject *draw_func(PyObject *self, PyObject *args) static PyObject *sync_func(PyObject *self, PyObject *value) { + Py_BEGIN_ALLOW_THREADS + BlenderSession *session = (BlenderSession*)PyLong_AsVoidPtr(value); session->synchronize(); + Py_END_ALLOW_THREADS + Py_RETURN_NONE; } diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index d09e43bd76d..7b80c520e72 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -42,14 +42,13 @@ CCL_NAMESPACE_BEGIN BlenderSession::BlenderSession(BL::RenderEngine b_engine_, BL::UserPreferences b_userpref_, BL::BlendData b_data_, BL::Scene b_scene_) : b_engine(b_engine_), b_userpref(b_userpref_), b_data(b_data_), b_scene(b_scene_), - b_v3d(PointerRNA_NULL), b_rv3d(PointerRNA_NULL), - b_rr(PointerRNA_NULL), b_rlay(PointerRNA_NULL) + b_v3d(PointerRNA_NULL), b_rv3d(PointerRNA_NULL) { /* offline render */ - BL::RenderSettings r = b_scene.render(); - width = (int)(r.resolution_x()*r.resolution_percentage()/100); - height = (int)(r.resolution_y()*r.resolution_percentage()/100); + width = b_engine.resolution_x(); + height = b_engine.resolution_y(); + background = true; last_redraw_time = 0.0f; @@ -60,7 +59,7 @@ BlenderSession::BlenderSession(BL::RenderEngine b_engine_, BL::UserPreferences b BL::BlendData b_data_, BL::Scene b_scene_, BL::SpaceView3D b_v3d_, BL::RegionView3D b_rv3d_, int width_, int height_) : b_engine(b_engine_), b_userpref(b_userpref_), b_data(b_data_), b_scene(b_scene_), - b_v3d(b_v3d_), b_rv3d(b_rv3d_), b_rr(PointerRNA_NULL), b_rlay(PointerRNA_NULL) + b_v3d(b_v3d_), b_rv3d(b_rv3d_) { /* 3d view render */ width = width_; @@ -80,17 +79,24 @@ BlenderSession::~BlenderSession() void BlenderSession::create_session() { SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background); - SessionParams session_params = BlenderSync::get_session_params(b_userpref, b_scene, background); + SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background); /* reset status/progress */ last_status = ""; last_progress = -1.0f; /* create scene */ - scene = new Scene(scene_params); + scene = new Scene(scene_params, session_params.device); + + /* create session */ + session = new Session(session_params); + session->scene = scene; + session->progress.set_update_callback(function_bind(&BlenderSession::tag_redraw, this)); + session->progress.set_cancel_callback(function_bind(&BlenderSession::test_cancel, this)); + session->set_pause(BlenderSync::get_session_pause(b_scene, background)); /* create sync */ - sync = new BlenderSync(b_data, b_scene, scene, !background); + sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress); sync->sync_data(b_v3d, b_engine.camera_override()); if(b_rv3d) @@ -98,13 +104,6 @@ void BlenderSession::create_session() else sync->sync_camera(b_engine.camera_override(), width, height); - /* create session */ - session = new Session(session_params); - session->scene = scene; - session->progress.set_update_callback(function_bind(&BlenderSession::tag_redraw, this)); - session->progress.set_cancel_callback(function_bind(&BlenderSession::test_cancel, this)); - session->set_pause(BlenderSync::get_session_pause(b_scene, background)); - /* set buffer parameters */ BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, scene->camera, width, height); session->reset(buffer_params, session_params.samples); @@ -177,35 +176,100 @@ static PassType get_pass_type(BL::RenderPass b_pass) return PASS_NONE; } +static BL::RenderResult begin_render_result(BL::RenderEngine b_engine, int x, int y, int w, int h, const char *layername) +{ + RenderResult *rrp = RE_engine_begin_result((RenderEngine*)b_engine.ptr.data, x, y, w, h, layername); + PointerRNA rrptr; + RNA_pointer_create(NULL, &RNA_RenderResult, rrp, &rrptr); + return BL::RenderResult(rrptr); +} + +static void end_render_result(BL::RenderEngine b_engine, BL::RenderResult b_rr, bool cancel = false) +{ + RE_engine_end_result((RenderEngine*)b_engine.ptr.data, (RenderResult*)b_rr.ptr.data, (int)cancel); +} + +void BlenderSession::do_write_update_render_tile(RenderTile& rtile, bool do_update_only) +{ + BufferParams& params = rtile.buffers->params; + int x = params.full_x - session->tile_manager.params.full_x; + int y = params.full_y - session->tile_manager.params.full_y; + int w = params.width; + int h = params.height; + + /* get render result */ + BL::RenderResult b_rr = begin_render_result(b_engine, x, y, w, h, b_rlay_name.c_str()); + + /* can happen if the intersected rectangle gives 0 width or height */ + if (b_rr.ptr.data == NULL) { + return; + } + + BL::RenderResult::layers_iterator b_single_rlay; + b_rr.layers.begin(b_single_rlay); + BL::RenderLayer b_rlay = *b_single_rlay; + + if (do_update_only) { + /* update only needed */ + update_render_result(b_rr, b_rlay, rtile); + end_render_result(b_engine, b_rr, true); + } + else { + /* write result */ + write_render_result(b_rr, b_rlay, rtile); + end_render_result(b_engine, b_rr); + } +} + +void BlenderSession::write_render_tile(RenderTile& rtile) +{ + do_write_update_render_tile(rtile, false); +} + +void BlenderSession::update_render_tile(RenderTile& rtile) +{ + do_write_update_render_tile(rtile, true); +} + void BlenderSession::render() { + /* set callback to write out render results */ + session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1); + session->update_render_tile_cb = function_bind(&BlenderSession::update_render_tile, this, _1); + /* get buffer parameters */ - SessionParams session_params = BlenderSync::get_session_params(b_userpref, b_scene, background); + SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background); BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, scene->camera, width, height); - int w = buffer_params.width, h = buffer_params.height; - - /* create render result */ - RenderResult *rrp = RE_engine_begin_result((RenderEngine*)b_engine.ptr.data, 0, 0, w, h); - PointerRNA rrptr; - RNA_pointer_create(NULL, &RNA_RenderResult, rrp, &rrptr); - b_rr = BL::RenderResult(rrptr); + /* render each layer */ BL::RenderSettings r = b_scene.render(); - BL::RenderResult::layers_iterator b_iter; - BL::RenderLayers b_rr_layers(r.ptr); + BL::RenderSettings::layers_iterator b_iter; - /* render each layer */ - for(b_rr.layers.begin(b_iter); b_iter != b_rr.layers.end(); ++b_iter) { - /* set layer */ - b_rlay = *b_iter; + for(r.layers.begin(b_iter); b_iter != r.layers.end(); ++b_iter) { + b_rlay_name = b_iter->name(); + + /* temporary render result to find needed passes */ + BL::RenderResult b_rr = begin_render_result(b_engine, 0, 0, 1, 1, b_rlay_name.c_str()); + BL::RenderResult::layers_iterator b_single_rlay; + b_rr.layers.begin(b_single_rlay); + + /* layer will be missing if it was disabled in the UI */ + if(b_single_rlay == b_rr.layers.end()) { + end_render_result(b_engine, b_rr, true); + continue; + } + + BL::RenderLayer b_rlay = *b_single_rlay; /* add passes */ vector<Pass> passes; Pass::add(PASS_COMBINED, passes); if(session_params.device.advanced_shading) { + + /* loop over passes */ BL::RenderLayer::passes_iterator b_pass_iter; - + for(b_rlay.passes.begin(b_pass_iter); b_pass_iter != b_rlay.passes.end(); ++b_pass_iter) { BL::RenderPass b_pass(*b_pass_iter); PassType pass_type = get_pass_type(b_pass); @@ -217,13 +281,16 @@ void BlenderSession::render() } } + /* free result without merging */ + end_render_result(b_engine, b_rr, true); + buffer_params.passes = passes; scene->film->tag_passes_update(scene, passes); scene->film->tag_update(scene); scene->integrator->tag_update(scene); /* update scene */ - sync->sync_data(b_v3d, b_engine.camera_override(), b_iter->name().c_str()); + sync->sync_data(b_v3d, b_engine.camera_override(), b_rlay_name.c_str()); /* update session */ int samples = sync->get_layer_samples(); @@ -235,19 +302,16 @@ void BlenderSession::render() if(session->progress.get_cancel()) break; - - /* write result */ - write_render_result(); } - /* delete render result */ - RE_engine_end_result((RenderEngine*)b_engine.ptr.data, (RenderResult*)b_rr.ptr.data); + /* clear callback */ + session->write_render_tile_cb = NULL; + session->update_render_tile_cb = NULL; } -void BlenderSession::write_render_result() +void BlenderSession::do_write_update_render_result(BL::RenderResult b_rr, BL::RenderLayer b_rlay, RenderTile& rtile, bool do_update_only) { - /* get state */ - RenderBuffers *buffers = session->buffers; + RenderBuffers *buffers = rtile.buffers; /* copy data from device */ if(!buffers->copy_from_device()) @@ -255,41 +319,49 @@ void BlenderSession::write_render_result() BufferParams& params = buffers->params; float exposure = scene->film->exposure; - double total_time, sample_time; - int sample; - - session->progress.get_sample(sample, total_time, sample_time); vector<float> pixels(params.width*params.height*4); - /* copy each pass */ - BL::RenderLayer::passes_iterator b_iter; - - for(b_rlay.passes.begin(b_iter); b_iter != b_rlay.passes.end(); ++b_iter) { - BL::RenderPass b_pass(*b_iter); + if (!do_update_only) { + /* copy each pass */ + BL::RenderLayer::passes_iterator b_iter; + + for(b_rlay.passes.begin(b_iter); b_iter != b_rlay.passes.end(); ++b_iter) { + BL::RenderPass b_pass(*b_iter); - /* find matching pass type */ - PassType pass_type = get_pass_type(b_pass); - int components = b_pass.channels(); + /* find matching pass type */ + PassType pass_type = get_pass_type(b_pass); + int components = b_pass.channels(); - /* copy pixels */ - if(buffers->get_pass(pass_type, exposure, sample, components, &pixels[0])) - rna_RenderPass_rect_set(&b_pass.ptr, &pixels[0]); + /* copy pixels */ + if(buffers->get_pass_rect(pass_type, exposure, rtile.sample, components, &pixels[0])) + rna_RenderPass_rect_set(&b_pass.ptr, &pixels[0]); + } } /* copy combined pass */ - if(buffers->get_pass(PASS_COMBINED, exposure, sample, 4, &pixels[0])) + if(buffers->get_pass_rect(PASS_COMBINED, exposure, rtile.sample, 4, &pixels[0])) rna_RenderLayer_rect_set(&b_rlay.ptr, &pixels[0]); /* tag result as updated */ RE_engine_update_result((RenderEngine*)b_engine.ptr.data, (RenderResult*)b_rr.ptr.data); } +void BlenderSession::write_render_result(BL::RenderResult b_rr, BL::RenderLayer b_rlay, RenderTile& rtile) +{ + do_write_update_render_result(b_rr, b_rlay, rtile, false); +} + +void BlenderSession::update_render_result(BL::RenderResult b_rr, BL::RenderLayer b_rlay, RenderTile& rtile) +{ + do_write_update_render_result(b_rr, b_rlay, rtile, true); +} + void BlenderSession::synchronize() { /* on session/scene parameter changes, we recreate session entirely */ SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background); - SessionParams session_params = BlenderSync::get_session_params(b_userpref, b_scene, background); + SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background); if(session->params.modified(session_params) || scene->params.modified(scene_params)) @@ -364,7 +436,7 @@ bool BlenderSession::draw(int w, int h) /* reset if requested */ if(reset) { - SessionParams session_params = BlenderSync::get_session_params(b_userpref, b_scene, background); + SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background); BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, scene->camera, w, h); session->reset(buffer_params, session_params.samples); @@ -387,11 +459,16 @@ void BlenderSession::get_status(string& status, string& substatus) void BlenderSession::get_progress(float& progress, double& total_time) { - double sample_time; - int sample; + double tile_time; + int tile, sample, samples_per_tile; + int tile_total = session->tile_manager.state.num_tiles; + + session->progress.get_tile(tile, total_time, tile_time); - session->progress.get_sample(sample, total_time, sample_time); - progress = ((float)sample/(float)session->params.samples); + sample = session->progress.get_sample(); + samples_per_tile = session->tile_manager.state.num_samples; + + progress = ((float)sample/(float)(tile_total * samples_per_tile)); } void BlenderSession::update_status_progress() @@ -404,8 +481,13 @@ void BlenderSession::update_status_progress() get_status(status, substatus); get_progress(progress, total_time); + timestatus = b_scene.name(); + if(b_rlay_name != "") + timestatus += ", " + b_rlay_name; + timestatus += " | "; + BLI_timestr(total_time, time_str); - timestatus = "Elapsed: " + string(time_str) + " | "; + timestatus += "Elapsed: " + string(time_str) + " | "; if(substatus.size() > 0) status += " | " + substatus; @@ -435,7 +517,6 @@ void BlenderSession::tag_redraw() /* offline render, redraw if timeout passed */ if(time_dt() - last_redraw_time > 1.0) { - write_render_result(); engine_tag_redraw((RenderEngine*)b_engine.ptr.data); last_redraw_time = time_dt(); } diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h index b98e3ffed54..d52e0103bbf 100644 --- a/intern/cycles/blender/blender_session.h +++ b/intern/cycles/blender/blender_session.h @@ -29,6 +29,8 @@ CCL_NAMESPACE_BEGIN class Scene; class Session; +class RenderBuffers; +class RenderTile; class BlenderSession { public: @@ -46,7 +48,14 @@ public: /* offline render */ void render(); - void write_render_result(); + + void write_render_result(BL::RenderResult b_rr, BL::RenderLayer b_rlay, RenderTile& rtile); + void write_render_tile(RenderTile& rtile); + + /* update functions are used to update display buffer only after sample was rendered + * only needed for better visual feedback */ + void update_render_result(BL::RenderResult b_rr, BL::RenderLayer b_rlay, RenderTile& rtile); + void update_render_tile(RenderTile& rtile); /* interactive updates */ void synchronize(); @@ -72,13 +81,16 @@ public: BL::Scene b_scene; BL::SpaceView3D b_v3d; BL::RegionView3D b_rv3d; - BL::RenderResult b_rr; - BL::RenderLayer b_rlay; + string b_rlay_name; string last_status; float last_progress; int width, height; + +protected: + void do_write_update_render_result(BL::RenderResult b_rr, BL::RenderLayer b_rlay, RenderTile& rtile, bool do_update_only); + void do_write_update_render_tile(RenderTile& rtile, bool do_update_only); }; CCL_NAMESPACE_END diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp index b82fee5edf0..9758d9bf92a 100644 --- a/intern/cycles/blender/blender_shader.cpp +++ b/intern/cycles/blender/blender_shader.cpp @@ -406,6 +406,8 @@ static ShaderNode *add_node(BL::BlendData b_data, BL::Scene b_scene, ShaderGraph if(b_image) image->filename = image_user_file_path(b_image_node.image_user(), b_image, b_scene.frame_current()); image->color_space = ImageTextureNode::color_space_enum[(int)b_image_node.color_space()]; + image->projection = ImageTextureNode::projection_enum[(int)b_image_node.projection()]; + image->projection_blend = b_image_node.projection_blend(); get_tex_mapping(&image->tex_mapping, b_image_node.texture_mapping()); node = image; break; @@ -461,6 +463,17 @@ static ShaderNode *add_node(BL::BlendData b_data, BL::Scene b_scene, ShaderGraph node = checker; break; } + case BL::ShaderNode::type_TEX_BRICK: { + BL::ShaderNodeTexBrick b_brick_node(b_node); + BrickTextureNode *brick = new BrickTextureNode(); + brick->offset = b_brick_node.offset(); + brick->offset_frequency = b_brick_node.offset_frequency(); + brick->squash = b_brick_node.squash(); + brick->squash_frequency = b_brick_node.squash_frequency(); + get_tex_mapping(&brick->tex_mapping, b_brick_node.texture_mapping()); + node = brick; + break; + } case BL::ShaderNode::type_TEX_NOISE: { BL::ShaderNodeTexNoise b_noise_node(b_node); NoiseTextureNode *noise = new NoiseTextureNode(); diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 6d014a91a9c..907573cf072 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -40,8 +40,9 @@ CCL_NAMESPACE_BEGIN /* Constructor */ -BlenderSync::BlenderSync(BL::BlendData b_data_, BL::Scene b_scene_, Scene *scene_, bool preview_) -: b_data(b_data_), b_scene(b_scene_), +BlenderSync::BlenderSync(BL::RenderEngine b_engine_, BL::BlendData b_data_, BL::Scene b_scene_, Scene *scene_, bool preview_, Progress &progress_) +: b_engine(b_engine_), + b_data(b_data_), b_scene(b_scene_), shader_map(&scene_->shaders), object_map(&scene_->objects), mesh_map(&scene_->meshes), @@ -49,7 +50,8 @@ BlenderSync::BlenderSync(BL::BlendData b_data_, BL::Scene b_scene_, Scene *scene particle_system_map(&scene_->particle_systems), world_map(NULL), world_recalc(false), - experimental(false) + experimental(false), + progress(progress_) { scene = scene_; preview = preview_; @@ -229,8 +231,7 @@ void BlenderSync::sync_render_layers(BL::SpaceView3D b_v3d, const char *layer) } else { render_layer.use_localview = (b_v3d.local_view() ? true : false); - render_layer.scene_layer = get_layer(b_v3d.layers(), b_v3d.layers_local_view()); - CYCLES_LOCAL_LAYER_HACK(render_layer.use_localview, render_layer.scene_layer); + render_layer.scene_layer = get_layer(b_v3d.layers(), b_v3d.layers_local_view(), render_layer.use_localview); render_layer.layer = render_layer.scene_layer; render_layer.holdout_layer = 0; render_layer.material_override = PointerRNA_NULL; @@ -296,7 +297,7 @@ bool BlenderSync::get_session_pause(BL::Scene b_scene, bool background) return (background)? false: get_boolean(cscene, "preview_pause"); } -SessionParams BlenderSync::get_session_params(BL::UserPreferences b_userpref, BL::Scene b_scene, bool background) +SessionParams BlenderSync::get_session_params(BL::RenderEngine b_engine, BL::UserPreferences b_userpref, BL::Scene b_scene, bool background) { SessionParams params; PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); @@ -350,25 +351,39 @@ SessionParams BlenderSync::get_session_params(BL::UserPreferences b_userpref, BL } } + /* tiles */ + if(params.device.type != DEVICE_CPU && !background) { + /* currently GPU could be much slower than CPU when using tiles, + * still need to be investigated, but meanwhile make it possible + * to work in viewport smoothly + */ + int debug_tile_size = get_int(cscene, "debug_tile_size"); + + params.tile_size = make_int2(debug_tile_size, debug_tile_size); + } + else { + int tile_x = b_engine.tile_x(); + int tile_y = b_engine.tile_y(); + + params.tile_size = make_int2(tile_x, tile_y); + } + + params.resolution = 1 << get_int(cscene, "resolution_divider"); + /* other parameters */ params.threads = b_scene.render().threads(); - params.tile_size = get_int(cscene, "debug_tile_size"); - params.min_size = get_int(cscene, "debug_min_size"); + params.cancel_timeout = get_float(cscene, "debug_cancel_timeout"); params.reset_timeout = get_float(cscene, "debug_reset_timeout"); params.text_timeout = get_float(cscene, "debug_text_timeout"); if(background) { - params.progressive = true; - params.min_size = INT_MAX; + params.progressive = false; + params.resolution = 1; } else params.progressive = true; - /* todo: multi device only works with single tiles now */ - if(params.device.type == DEVICE_MULTI) - params.tile_size = INT_MAX; - return params; } diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h index f3bd06b3a53..27f6b6ee4ee 100644 --- a/intern/cycles/blender/blender_sync.h +++ b/intern/cycles/blender/blender_sync.h @@ -49,7 +49,7 @@ class ShaderNode; class BlenderSync { public: - BlenderSync(BL::BlendData b_data, BL::Scene b_scene, Scene *scene_, bool preview_); + BlenderSync(BL::RenderEngine b_engine_, BL::BlendData b_data, BL::Scene b_scene, Scene *scene_, bool preview_, Progress &progress_); ~BlenderSync(); /* sync */ @@ -61,7 +61,7 @@ public: /* get parameters */ static SceneParams get_scene_params(BL::Scene b_scene, bool background); - static SessionParams get_session_params(BL::UserPreferences b_userpref, BL::Scene b_scene, bool background); + static SessionParams get_session_params(BL::RenderEngine b_engine, BL::UserPreferences b_userpref, BL::Scene b_scene, bool background); static bool get_session_pause(BL::Scene b_scene, bool background); static BufferParams get_buffer_params(BL::Scene b_scene, Camera *cam, int width, int height); @@ -97,6 +97,7 @@ private: int object_count_particles(BL::Object b_ob); /* variables */ + BL::RenderEngine b_engine; BL::BlendData b_data; BL::Scene b_scene; @@ -132,21 +133,9 @@ private: bool use_localview; int samples; } render_layer; -}; -/* we don't have spare bits for localview (normally 20-28) - * because PATH_RAY_LAYER_SHIFT uses 20-32. - * So - check if we have localview and if so, shift local - * view bits down to 1-8, since this is done for the view - * port only - it should be OK and not conflict with - * render layers. - Campbell. - * - * ... as an alternative we could use uint64_t - */ -#define CYCLES_LOCAL_LAYER_HACK(use_localview, layer) \ - if (use_localview) { \ - layer >>= 20; \ - } (void)0 + Progress &progress; +}; CCL_NAMESPACE_END diff --git a/intern/cycles/blender/blender_util.h b/intern/cycles/blender/blender_util.h index 2e9b201c0e2..d0fca9a9fb9 100644 --- a/intern/cycles/blender/blender_util.h +++ b/intern/cycles/blender/blender_util.h @@ -40,9 +40,9 @@ void rna_Object_create_duplilist(void *ob, void *reports, void *sce); void rna_Object_free_duplilist(void *ob, void *reports); void rna_RenderLayer_rect_set(PointerRNA *ptr, const float *values); void rna_RenderPass_rect_set(PointerRNA *ptr, const float *values); -struct RenderResult *RE_engine_begin_result(struct RenderEngine *engine, int x, int y, int w, int h); +struct RenderResult *RE_engine_begin_result(struct RenderEngine *engine, int x, int y, int w, int h, const char *layername); void RE_engine_update_result(struct RenderEngine *engine, struct RenderResult *result); -void RE_engine_end_result(struct RenderEngine *engine, struct RenderResult *result); +void RE_engine_end_result(struct RenderEngine *engine, struct RenderResult *result, int cancel); int RE_engine_test_break(struct RenderEngine *engine); void RE_engine_update_stats(struct RenderEngine *engine, const char *stats, const char *info); void RE_engine_update_progress(struct RenderEngine *engine, float progress); @@ -55,7 +55,6 @@ void rna_ColorRamp_eval(void *coba, float position, float color[4]); void rna_Scene_frame_set(void *scene, int frame, float subframe); void BKE_image_user_frame_calc(void *iuser, int cfra, int fieldnr); void BKE_image_user_file_path(void *iuser, void *ima, char *path); - } CCL_NAMESPACE_BEGIN @@ -171,7 +170,7 @@ static inline uint get_layer(BL::Array<int, 20> array) return layer; } -static inline uint get_layer(BL::Array<int, 20> array, BL::Array<int, 8> local_array, bool is_light = false) +static inline uint get_layer(BL::Array<int, 20> array, BL::Array<int, 8> local_array, bool use_local, bool is_light = false) { uint layer = 0; @@ -189,7 +188,14 @@ static inline uint get_layer(BL::Array<int, 20> array, BL::Array<int, 8> local_a if(local_array[i]) layer |= (1 << (20+i)); } - + + /* we don't have spare bits for localview (normally 20-28) because + * PATH_RAY_LAYER_SHIFT uses 20-32. So - check if we have localview and if + * so, shift local view bits down to 1-8, since this is done for the view + * port only - it should be OK and not conflict with render layers. */ + if(use_local) + layer >>= 20; + return layer; } diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt index 17072d230bb..6038abd815e 100644 --- a/intern/cycles/device/CMakeLists.txt +++ b/intern/cycles/device/CMakeLists.txt @@ -17,6 +17,7 @@ set(SRC device_multi.cpp device_network.cpp device_opencl.cpp + device_task.cpp ) set(SRC_HEADERS @@ -24,6 +25,7 @@ set(SRC_HEADERS device_memory.h device_intern.h device_network.h + device_task.h ) add_definitions(-DGLEW_STATIC) diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 33040f287d1..9a4d364a9b8 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -33,65 +33,6 @@ CCL_NAMESPACE_BEGIN -/* Device Task */ - -DeviceTask::DeviceTask(Type type_) -: type(type_), x(0), y(0), w(0), h(0), rng_state(0), rgba(0), buffer(0), - sample(0), resolution(0), - shader_input(0), shader_output(0), - shader_eval_type(0), shader_x(0), shader_w(0) -{ -} - -void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size) -{ - int num; - - if(type == SHADER) { - num = (shader_w + max_size - 1)/max_size; - } - else { - max_size = max(1, max_size/w); - num = (h + max_size - 1)/max_size; - } - - split(tasks, num); -} - -void DeviceTask::split(list<DeviceTask>& tasks, int num) -{ - if(type == SHADER) { - num = min(shader_w, num); - - for(int i = 0; i < num; i++) { - int tx = shader_x + (shader_w/num)*i; - int tw = (i == num-1)? shader_w - i*(shader_w/num): shader_w/num; - - DeviceTask task = *this; - - task.shader_x = tx; - task.shader_w = tw; - - tasks.push_back(task); - } - } - else { - num = min(h, num); - - for(int i = 0; i < num; i++) { - int ty = y + (h/num)*i; - int th = (i == num-1)? h - i*(h/num): h/num; - - DeviceTask task = *this; - - task.y = ty; - task.h = th; - - tasks.push_back(task); - } - } -} - /* Device */ void Device::pixels_alloc(device_memory& mem) diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index b17abac2a1b..2ee2e044618 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -22,10 +22,10 @@ #include <stdlib.h> #include "device_memory.h" +#include "device_task.h" #include "util_list.h" #include "util_string.h" -#include "util_task.h" #include "util_thread.h" #include "util_types.h" #include "util_vector.h" @@ -33,6 +33,7 @@ CCL_NAMESPACE_BEGIN class Progress; +class RenderTile; /* Device Types */ @@ -67,32 +68,6 @@ public: } }; -/* Device Task */ - -class DeviceTask : public Task { -public: - typedef enum { PATH_TRACE, TONEMAP, SHADER } Type; - Type type; - - int x, y, w, h; - device_ptr rng_state; - device_ptr rgba; - device_ptr buffer; - int sample; - int resolution; - int offset, stride; - - device_ptr shader_input; - device_ptr shader_output; - int shader_eval_type; - int shader_x, shader_w; - - DeviceTask(Type type = PATH_TRACE); - - void split(list<DeviceTask>& tasks, int num); - void split_max_size(list<DeviceTask>& tasks, int max_size); -}; - /* Device */ class Device { @@ -150,6 +125,10 @@ public: void server_run(); #endif + /* multi device */ + virtual void map_tile(Device *sub_device, RenderTile& tile) {} + virtual int device_number(Device *sub_device) { return 0; } + /* static */ static Device *create(DeviceInfo& info, bool background = true, int threads = 0); diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 070b20aec49..4c54671b0d0 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -27,6 +27,8 @@ #include "osl_shader.h" +#include "buffers.h" + #include "util_debug.h" #include "util_foreach.h" #include "util_function.h" @@ -141,28 +143,56 @@ public: OSLShader::thread_init(kg); #endif -#ifdef WITH_OPTIMIZED_KERNEL - if(system_cpu_support_optimized()) { - for(int y = task.y; y < task.y + task.h; y++) { - for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_optimized_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state, - task.sample, x, y, task.offset, task.stride); + RenderTile tile; + + while(task.acquire_tile(this, tile)) { + float *render_buffer = (float*)tile.buffer; + uint *rng_state = (uint*)tile.rng_state; + int start_sample = tile.start_sample; + int end_sample = tile.start_sample + tile.num_samples; - if(task_pool.cancelled()) - break; +#ifdef WITH_OPTIMIZED_KERNEL + if(system_cpu_support_optimized()) { + for(int sample = start_sample; sample < end_sample; sample++) { + if (task.get_cancel() || task_pool.cancelled()) + break; + + for(int y = tile.y; y < tile.y + tile.h; y++) { + for(int x = tile.x; x < tile.x + tile.w; x++) { + kernel_cpu_optimized_path_trace(kg, render_buffer, rng_state, + sample, x, y, tile.offset, tile.stride); + } + } + + tile.sample = sample + 1; + + task.update_progress(tile); + } } - } - else + else #endif - { - for(int y = task.y; y < task.y + task.h; y++) { - for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state, - task.sample, x, y, task.offset, task.stride); - - if(task_pool.cancelled()) - break; + { + for(int sample = start_sample; sample < end_sample; sample++) { + if (task.get_cancel() || task_pool.cancelled()) + break; + + for(int y = tile.y; y < tile.y + tile.h; y++) { + for(int x = tile.x; x < tile.x + tile.w; x++) { + kernel_cpu_path_trace(kg, render_buffer, rng_state, + sample, x, y, tile.offset, tile.stride); + } + } + + tile.sample = sample + 1; + + task.update_progress(tile); + } } + + task.release_tile(tile); + + if(task_pool.cancelled()) + break; } #ifdef WITH_OSL @@ -228,8 +258,7 @@ public: /* split task into smaller ones, more than number of threads for uneven * workloads where some parts of the image render slower than others */ list<DeviceTask> tasks; - - task.split(tasks, TaskScheduler::num_threads()*10); + task.split(tasks, TaskScheduler::num_threads()+1); foreach(DeviceTask& task, tasks) task_pool.push(new CPUDeviceTask(this, task)); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 357f99145b2..c8dcfdc2f3d 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -23,6 +23,8 @@ #include "device.h" #include "device_intern.h" +#include "buffers.h" + #include "util_cuda.h" #include "util_debug.h" #include "util_map.h" @@ -37,6 +39,7 @@ CCL_NAMESPACE_BEGIN class CUDADevice : public Device { public: + TaskPool task_pool; CUdevice cuDevice; CUcontext cuContext; CUmodule cuModule; @@ -192,6 +195,8 @@ public: ~CUDADevice() { + task_pool.stop(); + cuda_push_context(); cuda_assert(cuCtxDetach(cuContext)) } @@ -466,13 +471,13 @@ public: } } - void path_trace(DeviceTask& task) + void path_trace(RenderTile& rtile, int sample) { cuda_push_context(); CUfunction cuPathTrace; - CUdeviceptr d_buffer = cuda_device_ptr(task.buffer); - CUdeviceptr d_rng_state = cuda_device_ptr(task.rng_state); + CUdeviceptr d_buffer = cuda_device_ptr(rtile.buffer); + CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state); /* get kernel function */ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")) @@ -486,29 +491,28 @@ public: cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state))) offset += sizeof(d_rng_state); - int sample = task.sample; offset = align_up(offset, __alignof(sample)); - cuda_assert(cuParamSeti(cuPathTrace, offset, task.sample)) - offset += sizeof(task.sample); + cuda_assert(cuParamSeti(cuPathTrace, offset, sample)) + offset += sizeof(sample); - cuda_assert(cuParamSeti(cuPathTrace, offset, task.x)) - offset += sizeof(task.x); + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x)) + offset += sizeof(rtile.x); - cuda_assert(cuParamSeti(cuPathTrace, offset, task.y)) - offset += sizeof(task.y); + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y)) + offset += sizeof(rtile.y); - cuda_assert(cuParamSeti(cuPathTrace, offset, task.w)) - offset += sizeof(task.w); + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w)) + offset += sizeof(rtile.w); - cuda_assert(cuParamSeti(cuPathTrace, offset, task.h)) - offset += sizeof(task.h); + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h)) + offset += sizeof(rtile.h); - cuda_assert(cuParamSeti(cuPathTrace, offset, task.offset)) - offset += sizeof(task.offset); + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset)) + offset += sizeof(rtile.offset); - cuda_assert(cuParamSeti(cuPathTrace, offset, task.stride)) - offset += sizeof(task.stride); + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride)) + offset += sizeof(rtile.stride); cuda_assert(cuParamSetSize(cuPathTrace, offset)) @@ -520,23 +524,25 @@ public: int xthreads = 8; int ythreads = 8; #endif - int xblocks = (task.w + xthreads - 1)/xthreads; - int yblocks = (task.h + ythreads - 1)/ythreads; + int xblocks = (rtile.w + xthreads - 1)/xthreads; + int yblocks = (rtile.h + ythreads - 1)/ythreads; cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)) cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1)) cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks)) + cuda_assert(cuCtxSynchronize()) + cuda_pop_context(); } - void tonemap(DeviceTask& task) + void tonemap(DeviceTask& task, device_ptr buffer, device_ptr rgba) { cuda_push_context(); CUfunction cuFilmConvert; - CUdeviceptr d_rgba = map_pixels(task.rgba); - CUdeviceptr d_buffer = cuda_device_ptr(task.buffer); + CUdeviceptr d_rgba = map_pixels(rgba); + CUdeviceptr d_buffer = cuda_device_ptr(buffer); /* get kernel function */ cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_tonemap")) @@ -820,27 +826,71 @@ public: Device::draw_pixels(mem, y, w, h, dy, width, height, transparent); } - void task_add(DeviceTask& task) + void thread_run(DeviceTask *task) { - if(task.type == DeviceTask::TONEMAP) - tonemap(task); - else if(task.type == DeviceTask::PATH_TRACE) - path_trace(task); - else if(task.type == DeviceTask::SHADER) - shader(task); + if(task->type == DeviceTask::PATH_TRACE) { + RenderTile tile; + + /* keep rendering tiles until done */ + while(task->acquire_tile(this, tile)) { + int start_sample = tile.start_sample; + int end_sample = tile.start_sample + tile.num_samples; + + for(int sample = start_sample; sample < end_sample; sample++) { + if (task->get_cancel()) + break; + + path_trace(tile, sample); + + tile.sample = sample + 1; + + task->update_progress(tile); + } + + task->release_tile(tile); + } + } + else if(task->type == DeviceTask::SHADER) { + shader(*task); + + cuda_push_context(); + cuda_assert(cuCtxSynchronize()) + cuda_pop_context(); + } } - void task_wait() + class CUDADeviceTask : public DeviceTask { + public: + CUDADeviceTask(CUDADevice *device, DeviceTask& task) + : DeviceTask(task) + { + run = function_bind(&CUDADevice::thread_run, device, this); + } + }; + + void task_add(DeviceTask& task) { - cuda_push_context(); + if(task.type == DeviceTask::TONEMAP) { + /* must be done in main thread due to opengl access */ + tonemap(task, task.buffer, task.rgba); - cuda_assert(cuCtxSynchronize()) + cuda_push_context(); + cuda_assert(cuCtxSynchronize()) + cuda_pop_context(); + } + else { + task_pool.push(new CUDADeviceTask(this, task)); + } + } - cuda_pop_context(); + void task_wait() + { + task_pool.wait_work(); } void task_cancel() { + task_pool.cancel(); } }; diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 83e69b98f5d..546ffe5e4b9 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -23,6 +23,8 @@ #include "device_intern.h" #include "device_network.h" +#include "buffers.h" + #include "util_foreach.h" #include "util_list.h" #include "util_map.h" @@ -255,6 +257,30 @@ public: rgba.device_pointer = tmp; } + void map_tile(Device *sub_device, RenderTile& tile) + { + foreach(SubDevice& sub, devices) { + if(sub.device == sub_device) { + if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer]; + if(tile.rng_state) tile.rng_state = sub.ptr_map[tile.rng_state]; + if(tile.rgba) tile.rgba = sub.ptr_map[tile.rgba]; + } + } + } + + int device_number(Device *sub_device) + { + int i = 0; + + foreach(SubDevice& sub, devices) { + if(sub.device == sub_device) + return i; + i++; + } + + return -1; + } + void task_add(DeviceTask& task) { list<DeviceTask> tasks; @@ -266,7 +292,6 @@ public: tasks.pop_front(); if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer]; - if(task.rng_state) subtask.rng_state = sub.ptr_map[task.rng_state]; if(task.rgba) subtask.rgba = sub.ptr_map[task.rgba]; if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input]; if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output]; diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index c9ec7c75063..3c78b4895ae 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -25,6 +25,8 @@ #include "device.h" #include "device_intern.h" +#include "buffers.h" + #include "util_foreach.h" #include "util_map.h" #include "util_math.h" @@ -41,6 +43,7 @@ CCL_NAMESPACE_BEGIN class OpenCLDevice : public Device { public: + TaskPool task_pool; cl_context cxContext; cl_command_queue cqCommandQueue; cl_platform_id cpPlatform; @@ -435,6 +438,8 @@ public: ~OpenCLDevice() { + task_pool.stop(); + if(null_mem) clReleaseMemObject(CL_MEM_PTR(null_mem)); @@ -540,19 +545,19 @@ public: return global_size + ((r == 0)? 0: group_size - r); } - void path_trace(DeviceTask& task) + void path_trace(RenderTile& rtile, int sample) { /* cast arguments to cl types */ cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); - cl_mem d_buffer = CL_MEM_PTR(task.buffer); - cl_mem d_rng_state = CL_MEM_PTR(task.rng_state); - cl_int d_x = task.x; - cl_int d_y = task.y; - cl_int d_w = task.w; - cl_int d_h = task.h; - cl_int d_sample = task.sample; - cl_int d_offset = task.offset; - cl_int d_stride = task.stride; + cl_mem d_buffer = CL_MEM_PTR(rtile.buffer); + cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state); + cl_int d_x = rtile.x; + cl_int d_y = rtile.y; + cl_int d_w = rtile.w; + cl_int d_h = rtile.h; + cl_int d_sample = sample; + cl_int d_offset = rtile.offset; + cl_int d_stride = rtile.stride; /* sample arguments */ int narg = 0; @@ -613,12 +618,12 @@ public: return err; } - void tonemap(DeviceTask& task) + void tonemap(DeviceTask& task, device_ptr buffer, device_ptr rgba) { /* cast arguments to cl types */ cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); - cl_mem d_rgba = CL_MEM_PTR(task.rgba); - cl_mem d_buffer = CL_MEM_PTR(task.buffer); + cl_mem d_rgba = CL_MEM_PTR(rgba); + cl_mem d_buffer = CL_MEM_PTR(buffer); cl_int d_x = task.x; cl_int d_y = task.y; cl_int d_w = task.w; @@ -667,30 +672,57 @@ public: opencl_assert(clFinish(cqCommandQueue)); } - void task_add(DeviceTask& maintask) + void thread_run(DeviceTask *task) { - list<DeviceTask> tasks; + if(task->type == DeviceTask::TONEMAP) { + tonemap(*task, task->buffer, task->rgba); + } + else if(task->type == DeviceTask::PATH_TRACE) { + RenderTile tile; + + /* keep rendering tiles until done */ + while(task->acquire_tile(this, tile)) { + int start_sample = tile.start_sample; + int end_sample = tile.start_sample + tile.num_samples; - /* arbitrary limit to work around apple ATI opencl issue */ - if(platform_name == "Apple") - maintask.split_max_size(tasks, 76800); - else - tasks.push_back(maintask); + for(int sample = start_sample; sample < end_sample; sample++) { + if (task->get_cancel()) + break; + + path_trace(tile, sample); + + tile.sample = sample + 1; - foreach(DeviceTask& task, tasks) { - if(task.type == DeviceTask::TONEMAP) - tonemap(task); - else if(task.type == DeviceTask::PATH_TRACE) - path_trace(task); + task->update_progress(tile); + } + + task->release_tile(tile); + } } } + class OpenCLDeviceTask : public DeviceTask { + public: + OpenCLDeviceTask(OpenCLDevice *device, DeviceTask& task) + : DeviceTask(task) + { + run = function_bind(&OpenCLDevice::thread_run, device, this); + } + }; + + void task_add(DeviceTask& task) + { + task_pool.push(new OpenCLDeviceTask(this, task)); + } + void task_wait() { + task_pool.wait_work(); } void task_cancel() { + task_pool.cancel(); } }; diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp new file mode 100644 index 00000000000..c85e182d629 --- /dev/null +++ b/intern/cycles/device/device_task.cpp @@ -0,0 +1,113 @@ +/* + * Copyright 2011, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <stdlib.h> +#include <string.h> + +#include "device_task.h" + +#include "util_algorithm.h" +#include "util_time.h" + +CCL_NAMESPACE_BEGIN + +/* Device Task */ + +DeviceTask::DeviceTask(Type type_) +: type(type_), x(0), y(0), w(0), h(0), rgba(0), buffer(0), + sample(0), num_samples(1), resolution(0), + shader_input(0), shader_output(0), + shader_eval_type(0), shader_x(0), shader_w(0) +{ + last_update_time = time_dt(); +} + +void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size) +{ + int num; + + if(type == SHADER) { + num = (shader_w + max_size - 1)/max_size; + } + else { + max_size = max(1, max_size/w); + num = (h + max_size - 1)/max_size; + } + + split(tasks, num); +} + +void DeviceTask::split(list<DeviceTask>& tasks, int num) +{ + if(type == SHADER) { + num = min(shader_w, num); + + for(int i = 0; i < num; i++) { + int tx = shader_x + (shader_w/num)*i; + int tw = (i == num-1)? shader_w - i*(shader_w/num): shader_w/num; + + DeviceTask task = *this; + + task.shader_x = tx; + task.shader_w = tw; + + tasks.push_back(task); + } + } + else if(type == PATH_TRACE) { + for(int i = 0; i < num; i++) + tasks.push_back(*this); + } + else { + num = min(h, num); + + for(int i = 0; i < num; i++) { + int ty = y + (h/num)*i; + int th = (i == num-1)? h - i*(h/num): h/num; + + DeviceTask task = *this; + + task.y = ty; + task.h = th; + + tasks.push_back(task); + } + } +} + +void DeviceTask::update_progress(RenderTile &rtile) +{ + if (type != PATH_TRACE) + return; + + if(update_progress_sample) + update_progress_sample(); + + if(update_tile_sample) { + double current_time = time_dt(); + + if (current_time - last_update_time >= 1.0f) { + update_tile_sample(rtile); + + last_update_time = current_time; + } + } +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h new file mode 100644 index 00000000000..cfb3d8d988e --- /dev/null +++ b/intern/cycles/device/device_task.h @@ -0,0 +1,75 @@ +/* + * Copyright 2011, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __DEVICE_TASK_H__ +#define __DEVICE_TASK_H__ + +#include "device_memory.h" + +#include "util_function.h" +#include "util_list.h" +#include "util_task.h" + +CCL_NAMESPACE_BEGIN + +/* Device Task */ + +class Device; +class RenderBuffers; +class RenderTile; +class Tile; + +class DeviceTask : public Task { +public: + typedef enum { PATH_TRACE, TONEMAP, SHADER } Type; + Type type; + + int x, y, w, h; + device_ptr rgba; + device_ptr buffer; + int sample; + int num_samples; + int resolution; + int offset, stride; + + device_ptr shader_input; + device_ptr shader_output; + int shader_eval_type; + int shader_x, shader_w; + + DeviceTask(Type type = PATH_TRACE); + + void split(list<DeviceTask>& tasks, int num); + void split_max_size(list<DeviceTask>& tasks, int max_size); + + void update_progress(RenderTile &rtile); + + boost::function<bool(Device *device, RenderTile&)> acquire_tile; + boost::function<void(void)> update_progress_sample; + boost::function<void(RenderTile&)> update_tile_sample; + boost::function<void(RenderTile&)> release_tile; + boost::function<bool(void)> get_cancel; + +protected: + double last_update_time; +}; + +CCL_NAMESPACE_END + +#endif /* __DEVICE_TASK_H__ */ + diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 98cb16d5dfc..c26954e23b6 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -61,6 +61,7 @@ set(SRC_SVM_HEADERS svm/svm_closure.h svm/svm_convert.h svm/svm_checker.h + svm/svm_brick.h svm/svm_displace.h svm/svm_fresnel.h svm/svm_gamma.h diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp index 667db1e5f03..62d79bdd946 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernel.cpp @@ -87,14 +87,10 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t else if(strstr(name, "__tex_image_float")) { texture_image_float4 *tex = NULL; int id = atoi(name + strlen("__tex_image_float_")); + int array_index = id; - switch(id) { - case 95: tex = &kg->__tex_image_float_095; break; - case 96: tex = &kg->__tex_image_float_096; break; - case 97: tex = &kg->__tex_image_float_097; break; - case 98: tex = &kg->__tex_image_float_098; break; - case 99: tex = &kg->__tex_image_float_099; break; - default: break; + if (array_index >= 0 && array_index < MAX_FLOAT_IMAGES) { + tex = &kg->texture_float_images[array_index]; } if(tex) { @@ -106,104 +102,10 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t else if(strstr(name, "__tex_image")) { texture_image_uchar4 *tex = NULL; int id = atoi(name + strlen("__tex_image_")); + int array_index = id - MAX_FLOAT_IMAGES; - switch(id) { - case 0: tex = &kg->__tex_image_000; break; - case 1: tex = &kg->__tex_image_001; break; - case 2: tex = &kg->__tex_image_002; break; - case 3: tex = &kg->__tex_image_003; break; - case 4: tex = &kg->__tex_image_004; break; - case 5: tex = &kg->__tex_image_005; break; - case 6: tex = &kg->__tex_image_006; break; - case 7: tex = &kg->__tex_image_007; break; - case 8: tex = &kg->__tex_image_008; break; - case 9: tex = &kg->__tex_image_009; break; - case 10: tex = &kg->__tex_image_010; break; - case 11: tex = &kg->__tex_image_011; break; - case 12: tex = &kg->__tex_image_012; break; - case 13: tex = &kg->__tex_image_013; break; - case 14: tex = &kg->__tex_image_014; break; - case 15: tex = &kg->__tex_image_015; break; - case 16: tex = &kg->__tex_image_016; break; - case 17: tex = &kg->__tex_image_017; break; - case 18: tex = &kg->__tex_image_018; break; - case 19: tex = &kg->__tex_image_019; break; - case 20: tex = &kg->__tex_image_020; break; - case 21: tex = &kg->__tex_image_021; break; - case 22: tex = &kg->__tex_image_022; break; - case 23: tex = &kg->__tex_image_023; break; - case 24: tex = &kg->__tex_image_024; break; - case 25: tex = &kg->__tex_image_025; break; - case 26: tex = &kg->__tex_image_026; break; - case 27: tex = &kg->__tex_image_027; break; - case 28: tex = &kg->__tex_image_028; break; - case 29: tex = &kg->__tex_image_029; break; - case 30: tex = &kg->__tex_image_030; break; - case 31: tex = &kg->__tex_image_031; break; - case 32: tex = &kg->__tex_image_032; break; - case 33: tex = &kg->__tex_image_033; break; - case 34: tex = &kg->__tex_image_034; break; - case 35: tex = &kg->__tex_image_035; break; - case 36: tex = &kg->__tex_image_036; break; - case 37: tex = &kg->__tex_image_037; break; - case 38: tex = &kg->__tex_image_038; break; - case 39: tex = &kg->__tex_image_039; break; - case 40: tex = &kg->__tex_image_040; break; - case 41: tex = &kg->__tex_image_041; break; - case 42: tex = &kg->__tex_image_042; break; - case 43: tex = &kg->__tex_image_043; break; - case 44: tex = &kg->__tex_image_044; break; - case 45: tex = &kg->__tex_image_045; break; - case 46: tex = &kg->__tex_image_046; break; - case 47: tex = &kg->__tex_image_047; break; - case 48: tex = &kg->__tex_image_048; break; - case 49: tex = &kg->__tex_image_049; break; - case 50: tex = &kg->__tex_image_050; break; - case 51: tex = &kg->__tex_image_051; break; - case 52: tex = &kg->__tex_image_052; break; - case 53: tex = &kg->__tex_image_053; break; - case 54: tex = &kg->__tex_image_054; break; - case 55: tex = &kg->__tex_image_055; break; - case 56: tex = &kg->__tex_image_056; break; - case 57: tex = &kg->__tex_image_057; break; - case 58: tex = &kg->__tex_image_058; break; - case 59: tex = &kg->__tex_image_059; break; - case 60: tex = &kg->__tex_image_060; break; - case 61: tex = &kg->__tex_image_061; break; - case 62: tex = &kg->__tex_image_062; break; - case 63: tex = &kg->__tex_image_063; break; - case 64: tex = &kg->__tex_image_064; break; - case 65: tex = &kg->__tex_image_065; break; - case 66: tex = &kg->__tex_image_066; break; - case 67: tex = &kg->__tex_image_067; break; - case 68: tex = &kg->__tex_image_068; break; - case 69: tex = &kg->__tex_image_069; break; - case 70: tex = &kg->__tex_image_070; break; - case 71: tex = &kg->__tex_image_071; break; - case 72: tex = &kg->__tex_image_072; break; - case 73: tex = &kg->__tex_image_073; break; - case 74: tex = &kg->__tex_image_074; break; - case 75: tex = &kg->__tex_image_075; break; - case 76: tex = &kg->__tex_image_076; break; - case 77: tex = &kg->__tex_image_077; break; - case 78: tex = &kg->__tex_image_078; break; - case 79: tex = &kg->__tex_image_079; break; - case 80: tex = &kg->__tex_image_080; break; - case 81: tex = &kg->__tex_image_081; break; - case 82: tex = &kg->__tex_image_082; break; - case 83: tex = &kg->__tex_image_083; break; - case 84: tex = &kg->__tex_image_084; break; - case 85: tex = &kg->__tex_image_085; break; - case 86: tex = &kg->__tex_image_086; break; - case 87: tex = &kg->__tex_image_087; break; - case 88: tex = &kg->__tex_image_088; break; - case 89: tex = &kg->__tex_image_089; break; - case 90: tex = &kg->__tex_image_090; break; - case 91: tex = &kg->__tex_image_091; break; - case 92: tex = &kg->__tex_image_092; break; - case 93: tex = &kg->__tex_image_093; break; - case 94: tex = &kg->__tex_image_094; break; - default: break; + if (array_index >= 0 && array_index < MAX_BYTE_IMAGES) { + tex = &kg->texture_byte_images[array_index]; } if(tex) { diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index cc8f1f3323b..45f653a686c 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -158,7 +158,7 @@ typedef texture_image<uchar4> texture_image_uchar4; #define kernel_tex_fetch_m128(tex, index) (kg->tex.fetch_m128(index)) #define kernel_tex_fetch_m128i(tex, index) (kg->tex.fetch_m128i(index)) #define kernel_tex_interp(tex, t, size) (kg->tex.interp(t, size)) -#define kernel_tex_image_interp(tex, x, y) (kg->tex.interp(x, y)) +#define kernel_tex_image_interp(tex, x, y) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp(x, y) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp(x, y)) #define kernel_data (kg->__data) diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index a99fffbc519..1e56c11ab90 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -35,10 +35,15 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_CPU__ +#define MAX_BYTE_IMAGES 512 +#define MAX_FLOAT_IMAGES 5 + typedef struct KernelGlobals { + texture_image_uchar4 texture_byte_images[MAX_BYTE_IMAGES]; + texture_image_float4 texture_float_images[MAX_FLOAT_IMAGES]; #define KERNEL_TEX(type, ttype, name) ttype name; -#define KERNEL_IMAGE_TEX(type, ttype, name) ttype name; +#define KERNEL_IMAGE_TEX(type, ttype, name) #include "kernel_textures.h" KernelData __data; diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index fc67ca98039..8e3a0c6e628 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -689,7 +689,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam if(kernel_data.integrator.use_ambient_occlusion) { int num_samples = kernel_data.integrator.ao_samples; float num_samples_inv = 1.0f/num_samples; - float ao_factor = kernel_data.background.ao_factor/num_samples; + float ao_factor = kernel_data.background.ao_factor; for(int j = 0; j < num_samples; j++) { /* todo: solve correlation */ diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index c1b8eed3dff..4855a948c6e 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -66,12 +66,14 @@ KERNEL_TEX(float, texture_float, __filter_table) /* sobol */ KERNEL_TEX(uint, texture_uint, __sobol_directions) +/* full-float image */ +KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_000) +KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_001) +KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_002) +KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_003) +KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_004) + /* image */ -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_000) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_001) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_002) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_003) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_004) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_005) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_006) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_007) @@ -162,13 +164,11 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_091) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_092) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_093) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_094) - -/* full-float image */ -KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_095) -KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_096) -KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_097) -KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_098) -KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float_099) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_095) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_096) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_097) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_098) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_099) /* packed image (opencl) */ KERNEL_TEX(uchar4, texture_uchar4, __tex_image_packed) diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 8901e5e9628..5b0f192ea47 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -154,6 +154,7 @@ CCL_NAMESPACE_END #include "svm_value.h" #include "svm_voronoi.h" #include "svm_checker.h" +#include "svm_brick.h" CCL_NAMESPACE_BEGIN @@ -220,6 +221,9 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT case NODE_TEX_IMAGE: svm_node_tex_image(kg, sd, stack, node); break; + case NODE_TEX_IMAGE_BOX: + svm_node_tex_image_box(kg, sd, stack, node); + break; case NODE_TEX_ENVIRONMENT: svm_node_tex_environment(kg, sd, stack, node); break; @@ -249,6 +253,9 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT case NODE_TEX_CHECKER: svm_node_tex_checker(kg, sd, stack, node, &offset); break; + case NODE_TEX_BRICK: + svm_node_tex_brick(kg, sd, stack, node, &offset); + break; #endif case NODE_CAMERA: svm_node_camera(kg, sd, stack, node.y, node.z, node.w); diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h new file mode 100644 index 00000000000..50de19b825d --- /dev/null +++ b/intern/cycles/kernel/svm/svm_brick.h @@ -0,0 +1,114 @@ +/* + * Copyright 2012, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +CCL_NAMESPACE_BEGIN + +/* Brick */ + +__device_noinline float brick_noise(int n) /* fast integer noise */ +{ + int nn; + n = (n >> 13) ^ n; + nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff; + return 0.5f * ((float)nn / 1073741824.0f); +} + +__device_noinline float svm_brick(float3 p, float scale, float mortar_size, float bias, + float brick_width, float row_height, float offset_amount, int offset_frequency, + float squash_amount, int squash_frequency, float *tint) +{ + p *= scale; + + int bricknum, rownum; + float offset = 0.0f; + float x, y; + + rownum = (int)floor(p.y / row_height); + + if(offset_frequency && squash_frequency) { + brick_width *= ((int)(rownum) % squash_frequency ) ? 1.0f : squash_amount; /* squash */ + offset = ((int)(rownum) % offset_frequency ) ? 0 : (brick_width*offset_amount); /* offset */ + } + + bricknum = (int)floor((p.x+offset) / brick_width); + + x = (p.x+offset) - brick_width*bricknum; + y = p.y - row_height*rownum; + + *tint = clamp((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias), 0.0f, 1.0f); + + return (x < mortar_size || y < mortar_size || + x > (brick_width - mortar_size) || + y > (row_height - mortar_size)) ? 1.0f : 0.0f; +} + +__device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +{ + uint4 node2 = read_node(kg, offset); + uint4 node3 = read_node(kg, offset); + + /* Input and Output Sockets */ + uint co_offset, color1_offset, color2_offset, mortar_offset, scale_offset; + uint mortar_size_offset, bias_offset, brick_width_offset, row_height_offset; + uint color_offset, fac_offset; + + /* RNA properties */ + uint offset_frequency, squash_frequency; + + float tint = 0; + + decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset); + decode_node_uchar4(node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset); + decode_node_uchar4(node.w, &row_height_offset, &color_offset, &fac_offset, NULL); + + decode_node_uchar4(node2.x, &offset_frequency, &squash_frequency, NULL, NULL); + + float3 co = stack_load_float3(stack, co_offset); + + float3 color1 = stack_load_float3(stack, color1_offset); + float3 color2 = stack_load_float3(stack, color2_offset); + float3 mortar = stack_load_float3(stack, mortar_offset); + + float scale = stack_load_float_default(stack, scale_offset, node2.y); + float mortar_size = stack_load_float_default(stack, mortar_size_offset, node2.z); + float bias = stack_load_float_default(stack, bias_offset, node2.w); + float brick_width = stack_load_float_default(stack, brick_width_offset, node3.x); + float row_height = stack_load_float_default(stack, row_height_offset, node3.y); + float offset_amount = __int_as_float(node3.z); + float squash_amount = __int_as_float(node3.w); + + float f = svm_brick(co, scale, mortar_size, bias, brick_width, row_height, + offset_amount, offset_frequency, squash_amount, squash_frequency, + &tint); + + if(f != 1.0f) { + float facm = 1.0f - tint; + + color1.x = facm * (color1.x) + tint * color2.x; + color1.y = facm * (color1.y) + tint * color2.y; + color1.z = facm * (color1.z) + tint * color2.z; + } + + if(stack_valid(color_offset)) + stack_store_float3(stack, color_offset, (f == 1.0f)? mortar: color1); + if(stack_valid(fac_offset)) + stack_store_float(stack, fac_offset, f); +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 3b2b9204d86..662419418e3 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -50,7 +50,7 @@ __device_inline float svm_image_texture_frac(float x, int *ix) return x - (float)i; } -__device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y) +__device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb) { uint4 info = kernel_tex_fetch(__tex_image_packed_info, id); uint width = info.x; @@ -82,15 +82,24 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y) r += ty*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + niy*width); r += ty*tx*svm_image_texture_read(kg, offset + nix + niy*width); + if(srgb) { + r.x = color_srgb_to_scene_linear(r.x); + r.y = color_srgb_to_scene_linear(r.y); + r.z = color_srgb_to_scene_linear(r.z); + } + return r; } #else -__device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y) +__device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb) { float4 r; +#ifdef __KERNEL_CPU__ + r = kernel_tex_image_interp(id, x, y); +#else /* not particularly proud of this massive switch, what are the * alternatives? * - use a single big 1D texture, and do our own lookup/filtering @@ -101,11 +110,11 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y) * we still need some for other storage */ switch(id) { - case 0: r = kernel_tex_image_interp(__tex_image_000, x, y); break; - case 1: r = kernel_tex_image_interp(__tex_image_001, x, y); break; - case 2: r = kernel_tex_image_interp(__tex_image_002, x, y); break; - case 3: r = kernel_tex_image_interp(__tex_image_003, x, y); break; - case 4: r = kernel_tex_image_interp(__tex_image_004, x, y); break; + case 0: r = kernel_tex_image_interp(__tex_image_float_000, x, y); break; + case 1: r = kernel_tex_image_interp(__tex_image_float_001, x, y); break; + case 2: r = kernel_tex_image_interp(__tex_image_float_002, x, y); break; + case 3: r = kernel_tex_image_interp(__tex_image_float_003, x, y); break; + case 4: r = kernel_tex_image_interp(__tex_image_float_004, x, y); break; case 5: r = kernel_tex_image_interp(__tex_image_005, x, y); break; case 6: r = kernel_tex_image_interp(__tex_image_006, x, y); break; case 7: r = kernel_tex_image_interp(__tex_image_007, x, y); break; @@ -196,15 +205,22 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y) case 92: r = kernel_tex_image_interp(__tex_image_092, x, y); break; case 93: r = kernel_tex_image_interp(__tex_image_093, x, y); break; case 94: r = kernel_tex_image_interp(__tex_image_094, x, y); break; - case 95: r = kernel_tex_image_interp(__tex_image_float_095, x, y); break; - case 96: r = kernel_tex_image_interp(__tex_image_float_096, x, y); break; - case 97: r = kernel_tex_image_interp(__tex_image_float_097, x, y); break; - case 98: r = kernel_tex_image_interp(__tex_image_float_098, x, y); break; - case 99: r = kernel_tex_image_interp(__tex_image_float_099, x, y); break; + case 95: r = kernel_tex_image_interp(__tex_image_095, x, y); break; + case 96: r = kernel_tex_image_interp(__tex_image_096, x, y); break; + case 97: r = kernel_tex_image_interp(__tex_image_097, x, y); break; + case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break; + case 99: r = kernel_tex_image_interp(__tex_image_099, x, y); break; default: kernel_assert(0); return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } +#endif + + if(srgb) { + r.x = color_srgb_to_scene_linear(r.x); + r.y = color_srgb_to_scene_linear(r.y); + r.z = color_srgb_to_scene_linear(r.z); + } return r; } @@ -219,21 +235,102 @@ __device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb); float3 co = stack_load_float3(stack, co_offset); - float4 f = svm_image_texture(kg, id, co.x, co.y); - float3 r = make_float3(f.x, f.y, f.z); + float4 f = svm_image_texture(kg, id, co.x, co.y, srgb); - if(srgb) { - r.x = color_srgb_to_scene_linear(r.x); - r.y = color_srgb_to_scene_linear(r.y); - r.z = color_srgb_to_scene_linear(r.z); + if(stack_valid(out_offset)) + stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z)); + if(stack_valid(alpha_offset)) + stack_store_float(stack, alpha_offset, f.w); +} + +__device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +{ + /* get object space normal */ + float3 N = sd->N; + + N = sd->N; + if(sd->object != ~0) + object_inverse_normal_transform(kg, sd, &N); + + /* project from direction vector to barycentric coordinates in triangles */ + N.x = fabsf(N.x); + N.y = fabsf(N.y); + N.z = fabsf(N.z); + + N /= (N.x + N.y + N.z); + + /* basic idea is to think of this as a triangle, each corner representing + * one of the 3 faces of the cube. in the corners we have single textures, + * in between we blend between two textures, and in the middle we a blend + * between three textures. + * + * the Nxyz values are the barycentric coordinates in an equilateral + * triangle, which in case of blending in the middle has a smaller + * equilateral triangle where 3 textures blend. this divides things into + * 7 zones, with an if() test for each zone */ + + float3 weight = make_float3(0.0f, 0.0f, 0.0f); + float blend = __int_as_float(node.w); + float limit = 0.5f*(1.0f + blend); + + /* first test for corners with single texture */ + if(N.x > limit*(N.x + N.y) && N.x > limit*(N.x + N.z)) { + weight.x = 1.0f; + } + else if(N.y > limit*(N.x + N.y) && N.y > limit*(N.y + N.z)) { + weight.y = 1.0f; } + else if(N.z > limit*(N.x + N.z) && N.z > limit*(N.y + N.z)) { + weight.z = 1.0f; + } + else if(blend > 0.0f) { + /* in case of blending, test for mixes between two textures */ + if(N.z < (1.0f - limit)*(N.y + N.x)) { + weight.x = N.x/(N.x + N.y); + weight.x = clamp((weight.x - 0.5f*(1.0f - blend))/blend, 0.0f, 1.0f); + weight.y = 1.0f - weight.x; + } + else if(N.x < (1.0f - limit)*(N.y + N.z)) { + weight.y = N.y/(N.y + N.z); + weight.y = clamp((weight.y - 0.5f*(1.0f - blend))/blend, 0.0f, 1.0f); + weight.z = 1.0f - weight.y; + } + else if(N.y < (1.0f - limit)*(N.x + N.z)) { + weight.x = N.x/(N.x + N.z); + weight.x = clamp((weight.x - 0.5f*(1.0f - blend))/blend, 0.0f, 1.0f); + weight.z = 1.0f - weight.x; + } + else { + /* last case, we have a mix between three */ + weight.x = ((2.0f - limit)*N.x + (limit - 1.0f))/(2.0f*limit - 1.0f); + weight.y = ((2.0f - limit)*N.y + (limit - 1.0f))/(2.0f*limit - 1.0f); + weight.z = ((2.0f - limit)*N.z + (limit - 1.0f))/(2.0f*limit - 1.0f); + } + } + + /* now fetch textures */ + uint co_offset, out_offset, alpha_offset, srgb; + decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb); + + float3 co = stack_load_float3(stack, co_offset); + uint id = node.y; + + float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + if(weight.x > 0.0f) + f += weight.x*svm_image_texture(kg, id, co.y, co.z, srgb); + if(weight.y > 0.0f) + f += weight.y*svm_image_texture(kg, id, co.x, co.z, srgb); + if(weight.z > 0.0f) + f += weight.z*svm_image_texture(kg, id, co.y, co.x, srgb); if(stack_valid(out_offset)) - stack_store_float3(stack, out_offset, r); + stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z)); if(stack_valid(alpha_offset)) stack_store_float(stack, alpha_offset, f.w); } + __device void svm_node_tex_environment(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { uint id = node.y; @@ -252,17 +349,10 @@ __device void svm_node_tex_environment(KernelGlobals *kg, ShaderData *sd, float else uv = direction_to_mirrorball(co); - float4 f = svm_image_texture(kg, id, uv.x, uv.y); - float3 r = make_float3(f.x, f.y, f.z); - - if(srgb) { - r.x = color_srgb_to_scene_linear(r.x); - r.y = color_srgb_to_scene_linear(r.y); - r.z = color_srgb_to_scene_linear(r.z); - } + float4 f = svm_image_texture(kg, id, uv.x, uv.y, srgb); if(stack_valid(out_offset)) - stack_store_float3(stack, out_offset, r); + stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z)); if(stack_valid(alpha_offset)) stack_store_float(stack, alpha_offset, f.w); } diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index 16c726e7faa..c82eafc790a 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -40,6 +40,7 @@ typedef enum NodeType { NODE_MIX_CLOSURE, NODE_JUMP, NODE_TEX_IMAGE, + NODE_TEX_IMAGE_BOX, NODE_TEX_SKY, NODE_GEOMETRY, NODE_LIGHT_PATH, @@ -89,7 +90,8 @@ typedef enum NodeType { NODE_MIN_MAX, NODE_LIGHT_FALLOFF, NODE_OBJECT_INFO, - NODE_PARTICLE_INFO + NODE_PARTICLE_INFO, + NODE_TEX_BRICK } NodeType; typedef enum NodeAttributeType { diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index a79a3591e0f..51568f65323 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -74,6 +74,29 @@ int BufferParams::get_passes_size() return align_up(size, 4); } +/* Render Buffer Task */ + +RenderTile::RenderTile() +{ + x = 0; + y = 0; + w = 0; + h = 0; + + start_sample = 0; + num_samples = 0; + resolution = 0; + + offset = 0; + stride = 0; + + buffer = 0; + rng_state = 0; + rgba = 0; + + buffers = NULL; +} + /* Render Buffers */ RenderBuffers::RenderBuffers(Device *device_) @@ -135,7 +158,7 @@ bool RenderBuffers::copy_from_device() return true; } -bool RenderBuffers::get_pass(PassType type, float exposure, int sample, int components, float *pixels) +bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels) { int pass_offset = 0; diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 78712ed89ef..ee0d78a1cd8 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -67,12 +67,11 @@ class RenderBuffers { public: /* buffer parameters */ BufferParams params; + /* float buffer */ device_vector<float> buffer; /* random number generator state */ device_vector<uint> rng_state; - /* mutex, must be locked manually by callers */ - thread_mutex mutex; RenderBuffers(Device *device); ~RenderBuffers(); @@ -80,7 +79,7 @@ public: void reset(Device *device, BufferParams& params); bool copy_from_device(); - bool get_pass(PassType type, float exposure, int sample, int components, float *pixels); + bool get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels); protected: void device_free(); @@ -105,8 +104,6 @@ public: bool transparent; /* byte buffer for tonemapped result */ device_vector<uchar4> rgba; - /* mutex, must be locked manually by callers */ - thread_mutex mutex; DisplayBuffer(Device *device); ~DisplayBuffer(); @@ -124,6 +121,28 @@ protected: Device *device; }; +/* Render Tile + * Rendering task on a buffer */ + +class RenderTile { +public: + int x, y, w, h; + int start_sample; + int num_samples; + int sample; + int resolution; + int offset; + int stride; + + device_ptr buffer; + device_ptr rng_state; + device_ptr rgba; + + RenderBuffers *buffers; + + RenderTile(); +}; + CCL_NAMESPACE_END #endif /* __BUFFERS_H__ */ diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp index 55a0f23f8d0..e44caa90f12 100644 --- a/intern/cycles/render/camera.cpp +++ b/intern/cycles/render/camera.cpp @@ -75,6 +75,7 @@ Camera::Camera() need_update = true; need_device_update = true; + previous_need_motion = -1; } Camera::~Camera() @@ -140,8 +141,17 @@ void Camera::update() void Camera::device_update(Device *device, DeviceScene *dscene, Scene *scene) { + Scene::MotionType need_motion = scene->need_motion(); + update(); + if (previous_need_motion != need_motion) { + /* scene's motion model could have been changed since previous device + * camera update this could happen for example in case when one render + * layer has got motion pass and another not */ + need_device_update = true; + } + if(!need_device_update) return; @@ -159,7 +169,6 @@ void Camera::device_update(Device *device, DeviceScene *dscene, Scene *scene) kcam->worldtocamera = transform_inverse(cameratoworld); /* camera motion */ - Scene::MotionType need_motion = scene->need_motion(); kcam->have_motion = 0; if(need_motion == Scene::MOTION_PASS) { @@ -226,6 +235,7 @@ void Camera::device_update(Device *device, DeviceScene *dscene, Scene *scene) kcam->cliplength = (farclip == FLT_MAX)? FLT_MAX: farclip - nearclip; need_device_update = false; + previous_need_motion = need_motion; } void Camera::device_free(Device *device, DeviceScene *dscene) diff --git a/intern/cycles/render/camera.h b/intern/cycles/render/camera.h index d2a3cce1817..82852bde5e0 100644 --- a/intern/cycles/render/camera.h +++ b/intern/cycles/render/camera.h @@ -91,6 +91,7 @@ public: /* update */ bool need_update; bool need_device_update; + int previous_need_motion; /* functions */ Camera(); diff --git a/intern/cycles/render/graph.cpp b/intern/cycles/render/graph.cpp index 6ed0812a239..20fbfa0cf27 100644 --- a/intern/cycles/render/graph.cpp +++ b/intern/cycles/render/graph.cpp @@ -402,6 +402,20 @@ void ShaderGraph::clean() /* break cycles */ break_cycles(output(), visited, on_stack); + /* disconnect unused nodes */ + foreach(ShaderNode *node, nodes) { + if(!visited[node->id]) { + foreach(ShaderInput *to, node->inputs) { + ShaderOutput *from = to->link; + + if (from) { + to->link = NULL; + from->links.erase(remove(from->links.begin(), from->links.end(), to), from->links.end()); + } + } + } + } + /* remove unused nodes */ foreach(ShaderNode *node, nodes) { if(visited[node->id]) diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 1af0972ecf9..4ee024dd52a 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -36,6 +36,10 @@ ImageManager::ImageManager() need_update = true; pack_images = false; osl_texture_system = NULL; + + tex_num_images = TEX_NUM_IMAGES; + tex_num_float_images = TEX_NUM_FLOAT_IMAGES; + tex_image_byte_start = TEX_IMAGE_BYTE_START; } ImageManager::~ImageManager() @@ -56,6 +60,13 @@ void ImageManager::set_osl_texture_system(void *texture_system) osl_texture_system = texture_system; } +void ImageManager::set_extended_image_limits(void) +{ + tex_num_images = TEX_EXTENDED_NUM_IMAGES; + tex_num_float_images = TEX_EXTENDED_NUM_FLOAT_IMAGES; + tex_image_byte_start = TEX_EXTENDED_IMAGE_BYTE_START; +} + static bool is_float_image(const string& filename) { ImageInput *in = ImageInput::create(filename); @@ -97,7 +108,7 @@ int ImageManager::add_image(const string& filename, bool& is_float) for(slot = 0; slot < float_images.size(); slot++) { if(float_images[slot] && float_images[slot]->filename == filename) { float_images[slot]->users++; - return slot+TEX_IMAGE_FLOAT_START; + return slot; } } @@ -110,8 +121,8 @@ int ImageManager::add_image(const string& filename, bool& is_float) if(slot == float_images.size()) { /* max images limit reached */ if(float_images.size() == TEX_NUM_FLOAT_IMAGES) { - printf("ImageManager::add_image: byte image limit reached %d, skipping '%s'\n", - TEX_NUM_IMAGES, filename.c_str()); + printf("ImageManager::add_image: float image limit reached %d, skipping '%s'\n", + tex_num_float_images, filename.c_str()); return -1; } @@ -125,14 +136,12 @@ int ImageManager::add_image(const string& filename, bool& is_float) img->users = 1; float_images[slot] = img; - /* report slot out of total set of textures */ - slot += TEX_IMAGE_FLOAT_START; } else { for(slot = 0; slot < images.size(); slot++) { if(images[slot] && images[slot]->filename == filename) { images[slot]->users++; - return slot; + return slot+tex_image_byte_start; } } @@ -144,9 +153,9 @@ int ImageManager::add_image(const string& filename, bool& is_float) if(slot == images.size()) { /* max images limit reached */ - if(images.size() == TEX_NUM_IMAGES) { + if(images.size() == tex_num_images) { printf("ImageManager::add_image: byte image limit reached %d, skipping '%s'\n", - TEX_NUM_IMAGES, filename.c_str()); + tex_num_images, filename.c_str()); return -1; } @@ -160,6 +169,8 @@ int ImageManager::add_image(const string& filename, bool& is_float) img->users = 1; images[slot] = img; + + slot += tex_image_byte_start; } need_update = true; @@ -340,20 +351,20 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl Image *img; bool is_float; - if(slot < TEX_IMAGE_FLOAT_START) { - img = images[slot]; + if(slot >= tex_image_byte_start) { + img = images[slot - tex_image_byte_start]; is_float = false; } else { - img = float_images[slot - TEX_IMAGE_FLOAT_START]; + img = float_images[slot]; is_float = true; } if(is_float) { - string filename = path_filename(float_images[slot - TEX_IMAGE_FLOAT_START]->filename); + string filename = path_filename(float_images[slot]->filename); progress->set_status("Updating Images", "Loading " + filename); - device_vector<float4>& tex_img = dscene->tex_float_image[slot - TEX_IMAGE_FLOAT_START]; + device_vector<float4>& tex_img = dscene->tex_float_image[slot]; if(tex_img.device_pointer) device->tex_free(tex_img); @@ -377,10 +388,10 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl device->tex_alloc(name.c_str(), tex_img, true, true); } else { - string filename = path_filename(images[slot]->filename); + string filename = path_filename(images[slot - tex_image_byte_start]->filename); progress->set_status("Updating Images", "Loading " + filename); - device_vector<uchar4>& tex_img = dscene->tex_image[slot]; + device_vector<uchar4>& tex_img = dscene->tex_image[slot - tex_image_byte_start]; if(tex_img.device_pointer) device->tex_free(tex_img); @@ -412,12 +423,12 @@ void ImageManager::device_free_image(Device *device, DeviceScene *dscene, int sl Image *img; bool is_float; - if(slot < TEX_IMAGE_FLOAT_START) { - img = images[slot]; + if(slot >= tex_image_byte_start) { + img = images[slot - tex_image_byte_start]; is_float = false; } else { - img = float_images[slot - TEX_IMAGE_FLOAT_START]; + img = float_images[slot]; is_float = true; } @@ -429,18 +440,18 @@ void ImageManager::device_free_image(Device *device, DeviceScene *dscene, int sl #endif } else if(is_float) { - device->tex_free(dscene->tex_float_image[slot - TEX_IMAGE_FLOAT_START]); - dscene->tex_float_image[slot - TEX_IMAGE_FLOAT_START].clear(); + device->tex_free(dscene->tex_float_image[slot]); + dscene->tex_float_image[slot].clear(); - delete float_images[slot - TEX_IMAGE_FLOAT_START]; - float_images[slot - TEX_IMAGE_FLOAT_START] = NULL; + delete float_images[slot]; + float_images[slot] = NULL; } else { - device->tex_free(dscene->tex_image[slot]); - dscene->tex_image[slot].clear(); + device->tex_free(dscene->tex_image[slot - tex_image_byte_start]); + dscene->tex_image[slot - tex_image_byte_start].clear(); - delete images[slot]; - images[slot] = NULL; + delete images[slot - tex_image_byte_start]; + images[slot - tex_image_byte_start] = NULL; } } } @@ -457,11 +468,11 @@ void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress& continue; if(images[slot]->users == 0) { - device_free_image(device, dscene, slot); + device_free_image(device, dscene, slot + tex_image_byte_start); } else if(images[slot]->need_load) { if(!osl_texture_system) - pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot, &progress)); + pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot + tex_image_byte_start, &progress)); } } @@ -470,11 +481,11 @@ void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress& continue; if(float_images[slot]->users == 0) { - device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START); + device_free_image(device, dscene, slot); } else if(float_images[slot]->need_load) { if(!osl_texture_system) - pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot + TEX_IMAGE_FLOAT_START, &progress)); + pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot, &progress)); } } @@ -526,9 +537,9 @@ void ImageManager::device_pack_images(Device *device, DeviceScene *dscene, Progr void ImageManager::device_free(Device *device, DeviceScene *dscene) { for(size_t slot = 0; slot < images.size(); slot++) - device_free_image(device, dscene, slot); + device_free_image(device, dscene, slot + tex_image_byte_start); for(size_t slot = 0; slot < float_images.size(); slot++) - device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START); + device_free_image(device, dscene, slot); device->tex_free(dscene->tex_image_packed); dscene->tex_image_packed.clear(); diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index ef046cfcafb..04a705c27bf 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -28,8 +28,11 @@ CCL_NAMESPACE_BEGIN #define TEX_NUM_FLOAT_IMAGES 5 #define TEX_NUM_IMAGES 95 -#define TEX_IMAGE_MAX (TEX_NUM_IMAGES + TEX_NUM_FLOAT_IMAGES) -#define TEX_IMAGE_FLOAT_START TEX_NUM_IMAGES +#define TEX_IMAGE_BYTE_START TEX_NUM_FLOAT_IMAGES + +#define TEX_EXTENDED_NUM_FLOAT_IMAGES 5 +#define TEX_EXTENDED_NUM_IMAGES 512 +#define TEX_EXTENDED_IMAGE_BYTE_START TEX_EXTENDED_NUM_FLOAT_IMAGES /* color to use when textures are not found */ #define TEX_IMAGE_MISSING_R 1 @@ -55,9 +58,15 @@ public: void set_osl_texture_system(void *texture_system); void set_pack_images(bool pack_images_); + void set_extended_image_limits(void); + bool need_update; private: + int tex_num_images; + int tex_num_float_images; + int tex_image_byte_start; + struct Image { string filename; diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index d5ca20e6af1..da511b2d2f4 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -112,7 +112,18 @@ static ShaderEnum color_space_init() return enm; } +static ShaderEnum image_projection_init() +{ + ShaderEnum enm; + + enm.insert("Flat", 0); + enm.insert("Box", 1); + + return enm; +} + ShaderEnum ImageTextureNode::color_space_enum = color_space_init(); +ShaderEnum ImageTextureNode::projection_enum = image_projection_init(); ImageTextureNode::ImageTextureNode() : TextureNode("image_texture") @@ -122,6 +133,8 @@ ImageTextureNode::ImageTextureNode() is_float = false; filename = ""; color_space = ustring("Color"); + projection = ustring("Flat");; + projection_blend = 0.0f; add_input("Vector", SHADER_SOCKET_POINT, ShaderInput::TEXTURE_UV); add_output("Color", SHADER_SOCKET_COLOR); @@ -169,13 +182,25 @@ void ImageTextureNode::compile(SVMCompiler& compiler) tex_mapping.compile(compiler, vector_in->stack_offset, vector_offset); } - compiler.add_node(NODE_TEX_IMAGE, - slot, - compiler.encode_uchar4( - vector_offset, - color_out->stack_offset, - alpha_out->stack_offset, - srgb)); + if(projection == "Flat") { + compiler.add_node(NODE_TEX_IMAGE, + slot, + compiler.encode_uchar4( + vector_offset, + color_out->stack_offset, + alpha_out->stack_offset, + srgb)); + } + else { + compiler.add_node(NODE_TEX_IMAGE_BOX, + slot, + compiler.encode_uchar4( + vector_offset, + color_out->stack_offset, + alpha_out->stack_offset, + srgb), + __float_as_int(projection_blend)); + } if(vector_offset != vector_in->stack_offset) compiler.stack_clear_offset(vector_in->type, vector_offset); @@ -205,7 +230,7 @@ void ImageTextureNode::compile(OSLCompiler& compiler) /* Environment Texture */ -static ShaderEnum projection_init() +static ShaderEnum env_projection_init() { ShaderEnum enm; @@ -216,7 +241,7 @@ static ShaderEnum projection_init() } ShaderEnum EnvironmentTextureNode::color_space_enum = color_space_init(); -ShaderEnum EnvironmentTextureNode::projection_enum = projection_init(); +ShaderEnum EnvironmentTextureNode::projection_enum = env_projection_init(); EnvironmentTextureNode::EnvironmentTextureNode() : TextureNode("environment_texture") @@ -873,6 +898,98 @@ void CheckerTextureNode::compile(OSLCompiler& compiler) compiler.add(this, "node_checker_texture"); } +/* Brick Texture */ + +BrickTextureNode::BrickTextureNode() +: TextureNode("brick_texture") +{ + offset = 0.5f; + offset_frequency = 2; + squash = 1.0f; + squash_frequency = 2; + + add_input("Vector", SHADER_SOCKET_POINT, ShaderInput::TEXTURE_GENERATED); + add_input("Color1", SHADER_SOCKET_COLOR); + add_input("Color2", SHADER_SOCKET_COLOR); + add_input("Mortar", SHADER_SOCKET_COLOR); + add_input("Scale", SHADER_SOCKET_FLOAT, 5.0f); + add_input("Mortar Size", SHADER_SOCKET_FLOAT, 0.02f); + add_input("Bias", SHADER_SOCKET_FLOAT, 0.0f); + add_input("Brick Width", SHADER_SOCKET_FLOAT, 0.5f); + add_input("Row Height", SHADER_SOCKET_FLOAT, 0.25f); + + add_output("Color", SHADER_SOCKET_COLOR); + add_output("Fac", SHADER_SOCKET_FLOAT); +} + +void BrickTextureNode::compile(SVMCompiler& compiler) +{ + ShaderInput *vector_in = input("Vector"); + ShaderInput *color1_in = input("Color1"); + ShaderInput *color2_in = input("Color2"); + ShaderInput *mortar_in = input("Mortar"); + ShaderInput *scale_in = input("Scale"); + ShaderInput *mortar_size_in = input("Mortar Size"); + ShaderInput *bias_in = input("Bias"); + ShaderInput *brick_width_in = input("Brick Width"); + ShaderInput *row_height_in = input("Row Height"); + + ShaderOutput *color_out = output("Color"); + ShaderOutput *fac_out = output("Fac"); + + compiler.stack_assign(vector_in); + compiler.stack_assign(color1_in); + compiler.stack_assign(color2_in); + compiler.stack_assign(mortar_in); + if(scale_in->link) compiler.stack_assign(scale_in); + if(mortar_size_in->link) compiler.stack_assign(mortar_size_in); + if(bias_in->link) compiler.stack_assign(bias_in); + if(brick_width_in->link) compiler.stack_assign(brick_width_in); + if(row_height_in->link) compiler.stack_assign(row_height_in); + + int vector_offset = vector_in->stack_offset; + + if(!tex_mapping.skip()) { + vector_offset = compiler.stack_find_offset(SHADER_SOCKET_VECTOR); + tex_mapping.compile(compiler, vector_in->stack_offset, vector_offset); + } + + if(!color_out->links.empty()) + compiler.stack_assign(color_out); + if(!fac_out->links.empty()) + compiler.stack_assign(fac_out); + + compiler.add_node(NODE_TEX_BRICK, + compiler.encode_uchar4(vector_offset, + color1_in->stack_offset, color2_in->stack_offset, mortar_in->stack_offset), + compiler.encode_uchar4(scale_in->stack_offset, + mortar_size_in->stack_offset, bias_in->stack_offset, brick_width_in->stack_offset), + compiler.encode_uchar4(row_height_in->stack_offset, + color_out->stack_offset, fac_out->stack_offset)); + + compiler.add_node(compiler.encode_uchar4(offset_frequency, squash_frequency), + __float_as_int(scale_in->value.x), + __float_as_int(mortar_size_in->value.x), + __float_as_int(bias_in->value.x)); + + compiler.add_node(__float_as_int(brick_width_in->value.x), + __float_as_int(row_height_in->value.x), + __float_as_int(offset), + __float_as_int(squash)); + + if(vector_offset != vector_in->stack_offset) + compiler.stack_clear_offset(vector_in->type, vector_offset); +} + +void BrickTextureNode::compile(OSLCompiler& compiler) +{ + compiler.parameter("Offset", offset); + compiler.parameter("Offset Frequency", offset_frequency); + compiler.parameter("Squash", squash); + compiler.parameter("Squash Frequency", squash_frequency); + compiler.add(this, "node_brick_texture"); +} + /* Normal */ NormalNode::NormalNode() diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h index 650d6092f29..82bead7e41a 100644 --- a/intern/cycles/render/nodes.h +++ b/intern/cycles/render/nodes.h @@ -70,8 +70,11 @@ public: bool is_float; string filename; ustring color_space; + ustring projection; + float projection_blend; static ShaderEnum color_space_enum; + static ShaderEnum projection_enum; }; class EnvironmentTextureNode : public TextureNode { @@ -155,6 +158,14 @@ public: SHADER_NODE_CLASS(CheckerTextureNode) }; +class BrickTextureNode : public TextureNode { +public: + SHADER_NODE_CLASS(BrickTextureNode) + + float offset, squash; + int offset_frequency, squash_frequency; +}; + class MappingNode : public ShaderNode { public: SHADER_NODE_CLASS(MappingNode) diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index 4f5420dec61..071338d49c2 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -38,7 +38,7 @@ CCL_NAMESPACE_BEGIN -Scene::Scene(const SceneParams& params_) +Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_) : params(params_) { device = NULL; @@ -55,6 +55,9 @@ Scene::Scene(const SceneParams& params_) image_manager = new ImageManager(); shader_manager = ShaderManager::create(this); particle_system_manager = new ParticleSystemManager(); + + if (device_info_.type == DEVICE_CPU) + image_manager->set_extended_image_limits(); } Scene::~Scene() diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index d9341af08e0..f6c1ef44146 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -37,6 +37,7 @@ class AttributeRequestSet; class Background; class Camera; class Device; +class DeviceInfo; class Film; class Filter; class Integrator; @@ -99,8 +100,8 @@ public: device_vector<uint> sobol_directions; /* images */ - device_vector<uchar4> tex_image[TEX_NUM_IMAGES]; - device_vector<float4> tex_float_image[TEX_NUM_FLOAT_IMAGES]; + device_vector<uchar4> tex_image[TEX_EXTENDED_NUM_IMAGES]; + device_vector<float4> tex_float_image[TEX_EXTENDED_NUM_FLOAT_IMAGES]; /* opencl images */ device_vector<uchar4> tex_image_packed; @@ -183,7 +184,7 @@ public: /* mutex must be locked manually by callers */ thread_mutex mutex; - Scene(const SceneParams& params); + Scene(const SceneParams& params, const DeviceInfo& device_info); ~Scene(); void device_update(Device *device, Progress& progress); diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index a9f7e5beb56..2fb1f49e563 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -27,6 +27,7 @@ #include "util_foreach.h" #include "util_function.h" +#include "util_math.h" #include "util_opengl.h" #include "util_task.h" #include "util_time.h" @@ -35,15 +36,23 @@ CCL_NAMESPACE_BEGIN Session::Session(const SessionParams& params_) : params(params_), - tile_manager(params.progressive, params.samples, params.tile_size, params.min_size) + tile_manager(params.progressive, params.samples, params.tile_size, params.resolution, + (params.background)? 1: max(params.device.multi_devices.size(), 1)) { device_use_gl = ((params.device.type != DEVICE_CPU) && !params.background); TaskScheduler::init(params.threads); device = Device::create(params.device, params.background, params.threads); - buffers = new RenderBuffers(device); - display = new DisplayBuffer(device); + + if(params.background) { + buffers = NULL; + display = NULL; + } + else { + buffers = new RenderBuffers(device); + display = new DisplayBuffer(device); + } session_thread = NULL; scene = NULL; @@ -52,7 +61,6 @@ Session::Session(const SessionParams& params_) reset_time = 0.0; preview_time = 0.0; paused_time = 0.0; - sample = 0; delayed_reset.do_reset = false; delayed_reset.samples = 0; @@ -81,7 +89,7 @@ Session::~Session() wait(); } - if(params.output_path != "") { + if(display && params.output_path != "") { tonemap(); progress.set_status("Writing Image", params.output_path); @@ -118,8 +126,8 @@ void Session::reset_gpu(BufferParams& buffer_params, int samples) /* block for buffer acces and reset immediately. we can't do this * in the thread, because we need to allocate an OpenGL buffer, and * that only works in the main thread */ - thread_scoped_lock display_lock(display->mutex); - thread_scoped_lock buffers_lock(buffers->mutex); + thread_scoped_lock display_lock(display_mutex); + thread_scoped_lock buffers_lock(buffers_mutex); display_outdated = true; reset_time = time_dt(); @@ -135,7 +143,7 @@ void Session::reset_gpu(BufferParams& buffer_params, int samples) bool Session::draw_gpu(BufferParams& buffer_params) { /* block for buffer access */ - thread_scoped_lock display_lock(display->mutex); + thread_scoped_lock display_lock(display_mutex); /* first check we already rendered something */ if(gpu_draw_ready) { @@ -145,7 +153,7 @@ bool Session::draw_gpu(BufferParams& buffer_params) /* for CUDA we need to do tonemapping still, since we can * only access GL buffers from the main thread */ if(gpu_need_tonemap) { - thread_scoped_lock buffers_lock(buffers->mutex); + thread_scoped_lock buffers_lock(buffers_mutex); tonemap(); gpu_need_tonemap = false; gpu_need_tonemap_cond.notify_all(); @@ -226,23 +234,18 @@ void Session::run_gpu() /* buffers mutex is locked entirely while rendering each * sample, and released/reacquired on each iteration to allow * reset and draw in between */ - thread_scoped_lock buffers_lock(buffers->mutex); + thread_scoped_lock buffers_lock(buffers_mutex); /* update status and timing */ update_status_time(); /* path trace */ - foreach(Tile& tile, tile_manager.state.tiles) { - path_trace(tile); - - device->task_wait(); + path_trace(); - if(device->error_message() != "") - progress.set_cancel(device->error_message()); + device->task_wait(); - if(progress.get_cancel()) - break; - } + if(device->error_message() != "") + progress.set_cancel(device->error_message()); /* update status and timing */ update_status_time(); @@ -289,7 +292,7 @@ void Session::reset_cpu(BufferParams& buffer_params, int samples) bool Session::draw_cpu(BufferParams& buffer_params) { - thread_scoped_lock display_lock(display->mutex); + thread_scoped_lock display_lock(display_mutex); /* first check we already rendered something */ if(display->draw_ready()) { @@ -308,13 +311,101 @@ bool Session::draw_cpu(BufferParams& buffer_params) return false; } +bool Session::acquire_tile(Device *tile_device, RenderTile& rtile) +{ + if(progress.get_cancel()) + return false; + + thread_scoped_lock tile_lock(tile_mutex); + + /* get next tile from manager */ + Tile tile; + int device_num = device->device_number(tile_device); + + if(!tile_manager.next_tile(tile, device_num)) + return false; + + /* fill render tile */ + rtile.x = tile_manager.state.buffer.full_x + tile.x; + rtile.y = tile_manager.state.buffer.full_y + tile.y; + rtile.w = tile.w; + rtile.h = tile.h; + rtile.start_sample = tile_manager.state.sample; + rtile.num_samples = tile_manager.state.num_samples; + rtile.resolution = tile_manager.state.resolution; + + tile_lock.unlock(); + + /* in case of a permant buffer, return it, otherwise we will allocate + * a new temporary buffer */ + if(!write_render_tile_cb) { + tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride); + + rtile.buffer = buffers->buffer.device_pointer; + rtile.rng_state = buffers->rng_state.device_pointer; + rtile.rgba = display->rgba.device_pointer; + rtile.buffers = buffers; + + device->map_tile(tile_device, rtile); + + return true; + } + + /* fill buffer parameters */ + BufferParams buffer_params = tile_manager.params; + buffer_params.full_x = rtile.x; + buffer_params.full_y = rtile.y; + buffer_params.width = rtile.w; + buffer_params.height = rtile.h; + + buffer_params.get_offset_stride(rtile.offset, rtile.stride); + + /* allocate buffers */ + RenderBuffers *tilebuffers = new RenderBuffers(tile_device); + tilebuffers->reset(tile_device, buffer_params); + + rtile.buffer = tilebuffers->buffer.device_pointer; + rtile.rng_state = tilebuffers->rng_state.device_pointer; + rtile.rgba = 0; + rtile.buffers = tilebuffers; + + return true; +} + +void Session::update_tile_sample(RenderTile& rtile) +{ + thread_scoped_lock tile_lock(tile_mutex); + + if(update_render_tile_cb) { + /* todo: optimize this by making it thread safe and removing lock */ + + update_render_tile_cb(rtile); + } + + update_status_time(); +} + +void Session::release_tile(RenderTile& rtile) +{ + thread_scoped_lock tile_lock(tile_mutex); + + if(write_render_tile_cb) { + /* todo: optimize this by making it thread safe and removing lock */ + write_render_tile_cb(rtile); + + delete rtile.buffers; + } + + update_status_time(); +} + void Session::run_cpu() { { /* reset once to start */ thread_scoped_lock reset_lock(delayed_reset.mutex); - thread_scoped_lock buffers_lock(buffers->mutex); - thread_scoped_lock display_lock(display->mutex); + thread_scoped_lock buffers_lock(buffers_mutex); + thread_scoped_lock display_lock(display_mutex); reset_(delayed_reset.params, delayed_reset.samples); delayed_reset.do_reset = false; @@ -364,7 +455,7 @@ void Session::run_cpu() /* buffers mutex is locked entirely while rendering each * sample, and released/reacquired on each iteration to allow * reset and draw in between */ - thread_scoped_lock buffers_lock(buffers->mutex); + thread_scoped_lock buffers_lock(buffers_mutex); /* update scene */ update_scene(); @@ -379,8 +470,7 @@ void Session::run_cpu() update_status_time(); /* path trace */ - foreach(Tile& tile, tile_manager.state.tiles) - path_trace(tile); + path_trace(); /* update status and timing */ update_status_time(); @@ -396,8 +486,8 @@ void Session::run_cpu() { thread_scoped_lock reset_lock(delayed_reset.mutex); - thread_scoped_lock buffers_lock(buffers->mutex); - thread_scoped_lock display_lock(display->mutex); + thread_scoped_lock buffers_lock(buffers_mutex); + thread_scoped_lock display_lock(display_mutex); if(delayed_reset.do_reset) { /* reset rendering if request from main thread */ @@ -442,6 +532,9 @@ void Session::run() /* run */ if(!progress.get_cancel()) { + /* reset number of rendered samples */ + progress.reset_sample(); + if(device_use_gl) run_gpu(); else @@ -465,10 +558,12 @@ bool Session::draw(BufferParams& buffer_params) void Session::reset_(BufferParams& buffer_params, int samples) { - if(buffer_params.modified(buffers->params)) { - gpu_draw_ready = false; - buffers->reset(device, buffer_params); - display->reset(device, buffer_params); + if(buffers) { + if(buffer_params.modified(buffers->params)) { + gpu_draw_ready = false; + buffers->reset(device, buffer_params); + display->reset(device, buffer_params); + } } tile_manager.reset(buffer_params, samples); @@ -476,7 +571,6 @@ void Session::reset_(BufferParams& buffer_params, int samples) start_time = time_dt(); preview_time = 0.0; paused_time = 0.0; - sample = 0; if(!params.background) progress.set_start_time(start_time + paused_time); @@ -532,8 +626,6 @@ void Session::update_scene() { thread_scoped_lock scene_lock(scene->mutex); - progress.set_status("Updating Scene"); - /* update camera if dimensions changed for progressive render. the camera * knows nothing about progressive or cropped rendering, it just gets the * image dimensions passed in */ @@ -548,20 +640,47 @@ void Session::update_scene() } /* update scene */ - if(scene->need_update()) + if(scene->need_update()) { + progress.set_status("Updating Scene"); scene->device_update(device, progress); + } } void Session::update_status_time(bool show_pause, bool show_done) { int sample = tile_manager.state.sample; int resolution = tile_manager.state.resolution; + int num_tiles = tile_manager.state.num_tiles; + int tile = tile_manager.state.num_rendered_tiles; /* update status */ string status, substatus; - if(!params.progressive) - substatus = "Path Tracing"; + if(!params.progressive) { + substatus = string_printf("Path Tracing Tile %d/%d", tile, num_tiles); + + if(params.device.type == DEVICE_CUDA || params.device.type == DEVICE_OPENCL || + (params.device.type == DEVICE_CPU && num_tiles == 1)) { + /* when rendering on GPU multithreading happens within single tile, as in + * tiles are handling sequentially and in this case we could display + * currently rendering sample number + * this helps a lot from feedback point of view. + * also display the info on CPU, when using 1 tile only + */ + + int sample = progress.get_sample(), num_samples = tile_manager.state.num_samples; + + if(tile > 1) { + /* sample counter is global for all tiles, subtract samples + * from already finished tiles to get sample counter for + * current tile only + */ + sample -= (tile - 1) * num_samples; + } + + substatus += string_printf(", Sample %d/%d", sample, num_samples); + } + } else if(params.samples == INT_MAX) substatus = string_printf("Path Tracing Sample %d", sample+1); else @@ -580,28 +699,29 @@ void Session::update_status_time(bool show_pause, bool show_done) if(preview_time == 0.0 && resolution == 1) preview_time = time_dt(); - double sample_time = (sample == 0)? 0.0: (time_dt() - preview_time - paused_time)/(sample); + double tile_time = (tile == 0)? 0.0: (time_dt() - preview_time - paused_time)/(sample); /* negative can happen when we pause a bit before rendering, can discard that */ if(preview_time < 0.0) preview_time = 0.0; - progress.set_sample(sample + 1, sample_time); + progress.set_tile(tile, tile_time); +} + +void Session::update_progress_sample() +{ + progress.increment_sample(); } -void Session::path_trace(Tile& tile) +void Session::path_trace() { /* add path trace task */ DeviceTask task(DeviceTask::PATH_TRACE); - - task.x = tile_manager.state.buffer.full_x + tile.x; - task.y = tile_manager.state.buffer.full_y + tile.y; - task.w = tile.w; - task.h = tile.h; - task.buffer = buffers->buffer.device_pointer; - task.rng_state = buffers->rng_state.device_pointer; - task.sample = tile_manager.state.sample; - task.resolution = tile_manager.state.resolution; - tile_manager.state.buffer.get_offset_stride(task.offset, task.stride); + + task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2); + task.release_tile = function_bind(&Session::release_tile, this, _1); + task.get_cancel = function_bind(&Progress::get_cancel, &this->progress); + task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1); + task.update_progress_sample = function_bind(&Session::update_progress_sample, this); device->task_add(task); } diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index 90616f011ea..7b01357a2b7 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -47,8 +47,8 @@ public: bool progressive; bool experimental; int samples; - int tile_size; - int min_size; + int2 tile_size; + int resolution; int threads; double cancel_timeout; @@ -63,8 +63,8 @@ public: progressive = false; experimental = false; samples = INT_MAX; - tile_size = 64; - min_size = 64; + tile_size = make_int2(64, 64); + resolution = 4; threads = 0; cancel_timeout = 0.1; @@ -81,7 +81,7 @@ public: && progressive == params.progressive && experimental == params.experimental && tile_size == params.tile_size - && min_size == params.min_size + && resolution == params.resolution && threads == params.threads && cancel_timeout == params.cancel_timeout && reset_timeout == params.reset_timeout @@ -102,7 +102,10 @@ public: DisplayBuffer *display; Progress progress; SessionParams params; - int sample; + TileManager tile_manager; + + boost::function<void(RenderTile&)> write_render_tile_cb; + boost::function<void(RenderTile&)> update_render_tile_cb; Session(const SessionParams& params); ~Session(); @@ -130,7 +133,7 @@ protected: void update_status_time(bool show_pause = false, bool show_done = false); void tonemap(); - void path_trace(Tile& tile); + void path_trace(); void reset_(BufferParams& params, int samples); void run_cpu(); @@ -141,7 +144,12 @@ protected: bool draw_gpu(BufferParams& params); void reset_gpu(BufferParams& params, int samples); - TileManager tile_manager; + bool acquire_tile(Device *tile_device, RenderTile& tile); + void update_tile_sample(RenderTile& tile); + void release_tile(RenderTile& tile); + + void update_progress_sample(); + bool device_use_gl; thread *session_thread; @@ -155,6 +163,9 @@ protected: bool pause; thread_condition_variable pause_cond; thread_mutex pause_mutex; + thread_mutex tile_mutex; + thread_mutex buffers_mutex; + thread_mutex display_mutex; bool kernels_loaded; diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index 04e48d44029..b4156fd9471 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -19,14 +19,16 @@ #include "tile.h" #include "util_algorithm.h" +#include "util_types.h" CCL_NAMESPACE_BEGIN -TileManager::TileManager(bool progressive_, int samples_, int tile_size_, int min_size_) +TileManager::TileManager(bool progressive_, int num_samples_, int2 tile_size_, int resolution_, int num_devices_) { progressive = progressive_; tile_size = tile_size_; - min_size = min_size_; + resolution = resolution_; + num_devices = num_devices_; BufferParams buffer_params; reset(buffer_params, 0); @@ -36,34 +38,24 @@ TileManager::~TileManager() { } -void TileManager::reset(BufferParams& params_, int samples_) +void TileManager::reset(BufferParams& params_, int num_samples_) { params = params_; - start_resolution = 1; - - int w = params.width, h = params.height; - - if(min_size != INT_MAX) { - while(w*h > min_size*min_size) { - w = max(1, w/2); - h = max(1, h/2); - - start_resolution *= 2; - } - } - - samples = samples_; + num_samples = num_samples_; state.buffer = BufferParams(); state.sample = -1; - state.resolution = start_resolution; + state.num_tiles = 0; + state.num_rendered_tiles = 0; + state.num_samples = 0; + state.resolution = resolution; state.tiles.clear(); } -void TileManager::set_samples(int samples_) +void TileManager::set_samples(int num_samples_) { - samples = samples_; + num_samples = num_samples_; } void TileManager::set_tiles() @@ -71,24 +63,34 @@ void TileManager::set_tiles() int resolution = state.resolution; int image_w = max(1, params.width/resolution); int image_h = max(1, params.height/resolution); - int tile_w = (tile_size >= image_w)? 1: (image_w + tile_size - 1)/tile_size; - int tile_h = (tile_size >= image_h)? 1: (image_h + tile_size - 1)/tile_size; - int sub_w = image_w/tile_w; - int sub_h = image_h/tile_h; state.tiles.clear(); - for(int tile_y = 0; tile_y < tile_h; tile_y++) { - for(int tile_x = 0; tile_x < tile_w; tile_x++) { - int x = tile_x * sub_w; - int y = tile_y * sub_h; - int w = (tile_x == tile_w-1)? image_w - x: sub_w; - int h = (tile_y == tile_h-1)? image_h - y: sub_h; + int num = min(image_h, num_devices); + + for(int device = 0; device < num; device++) { + int device_y = (image_h/num)*device; + int device_h = (device == num-1)? image_h - device*(image_h/num): image_h/num; + + int tile_w = (tile_size.x >= image_w)? 1: (image_w + tile_size.x - 1)/tile_size.x; + int tile_h = (tile_size.y >= device_h)? 1: (device_h + tile_size.y - 1)/tile_size.y; + int sub_w = (image_w + tile_w - 1)/tile_w; + int sub_h = (device_h + tile_h - 1)/tile_h; - state.tiles.push_back(Tile(x, y, w, h)); + for(int tile_y = 0; tile_y < tile_h; tile_y++) { + for(int tile_x = 0; tile_x < tile_w; tile_x++) { + int x = tile_x * sub_w; + int y = tile_y * sub_h; + int w = (tile_x == tile_w-1)? image_w - x: sub_w; + int h = (tile_y == tile_h-1)? device_h - y: sub_h; + + state.tiles.push_back(Tile(x, y + device_y, w, h, device)); + } } } + state.num_tiles = state.tiles.size(); + state.buffer.width = image_w; state.buffer.height = image_h; @@ -98,9 +100,74 @@ void TileManager::set_tiles() state.buffer.full_height = max(1, params.full_height/resolution); } +list<Tile>::iterator TileManager::next_center_tile(int device) +{ + list<Tile>::iterator iter, best = state.tiles.end(); + + int resolution = state.resolution; + int image_w = max(1, params.width/resolution); + int image_h = max(1, params.height/resolution); + + int num = min(image_h, num_devices); + + int device_y = (image_h / num) * device; + int device_h = (device == num - 1) ? image_h - device * (image_h / num) : image_h / num; + + int64_t centx = image_w / 2, centy = device_y + device_h / 2, tot = 1; + int64_t mindist = (int64_t) image_w * (int64_t) device_h; + + /* find center of rendering tiles, image center counts for 1 too */ + for(iter = state.tiles.begin(); iter != state.tiles.end(); iter++) { + if(iter->rendering) { + Tile &cur_tile = *iter; + centx += cur_tile.x + cur_tile.w / 2; + centy += cur_tile.y + cur_tile.h / 2; + tot++; + } + } + + centx /= tot; + centy /= tot; + + /* closest of the non-rendering tiles */ + for(iter = state.tiles.begin(); iter != state.tiles.end(); iter++) { + if(iter->device == device && iter->rendering == false) { + Tile &cur_tile = *iter; + + int64_t distx = centx - (cur_tile.x + cur_tile.w / 2); + int64_t disty = centy - (cur_tile.y + cur_tile.h / 2); + distx = (int64_t) sqrt((double)distx * distx + disty * disty); + + if(distx < mindist) { + best = iter; + mindist = distx; + } + } + } + + return best; +} + +bool TileManager::next_tile(Tile& tile, int device) +{ + list<Tile>::iterator tile_it; + + tile_it = next_center_tile(device); + + if(tile_it != state.tiles.end()) { + tile_it->rendering = true; + tile = *tile_it; + state.num_rendered_tiles++; + + return true; + } + + return false; +} + bool TileManager::done() { - return (state.sample+1 >= samples && state.resolution == 1); + return (state.sample+state.num_samples >= num_samples && state.resolution == 1); } bool TileManager::next() @@ -111,10 +178,17 @@ bool TileManager::next() if(progressive && state.resolution > 1) { state.sample = 0; state.resolution /= 2; + state.num_samples = 1; set_tiles(); } else { state.sample++; + + if(progressive) + state.num_samples = 1; + else + state.num_samples = num_samples; + state.resolution = 1; set_tiles(); } diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h index b6e610c8d90..29f2b1ef9f9 100644 --- a/intern/cycles/render/tile.h +++ b/intern/cycles/render/tile.h @@ -31,9 +31,14 @@ CCL_NAMESPACE_BEGIN class Tile { public: int x, y, w, h; + int device; + bool rendering; - Tile(int x_, int y_, int w_, int h_) - : x(x_), y(y_), w(w_), h(h_) {} + Tile() + {} + + Tile(int x_, int y_, int w_, int h_, int device_) + : x(x_), y(y_), w(w_), h(h_), device(device_), rendering(false) {} }; /* Tile Manager */ @@ -45,27 +50,34 @@ public: struct State { BufferParams buffer; int sample; + int num_samples; int resolution; + int num_tiles; + int num_rendered_tiles; list<Tile> tiles; } state; - TileManager(bool progressive, int samples, int tile_size, int min_size); + TileManager(bool progressive, int num_samples, int2 tile_size, int resolution, int num_devices = 1); ~TileManager(); - void reset(BufferParams& params, int samples); - void set_samples(int samples); + void reset(BufferParams& params, int num_samples); + void set_samples(int num_samples); bool next(); + bool next_tile(Tile& tile, int device = 0); bool done(); protected: void set_tiles(); bool progressive; - int samples; - int tile_size; - int min_size; + int num_samples; + int2 tile_size; + int resolution; + int num_devices; int start_resolution; + + list<Tile>::iterator next_center_tile(int device = 0); }; CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index a6bc478ee64..71a5dedeaa4 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -277,6 +277,11 @@ __device_inline float cross(const float2 a, const float2 b) #ifndef __KERNEL_OPENCL__ +__device_inline bool operator==(const int2 a, const int2 b) +{ + return (a.x == b.x && a.y == b.y); +} + __device_inline float len(const float2 a) { return sqrtf(dot(a, a)); diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h index c63aa841c52..ab9ab7243e9 100644 --- a/intern/cycles/util/util_progress.h +++ b/intern/cycles/util/util_progress.h @@ -36,10 +36,11 @@ class Progress { public: Progress() { + tile = 0; sample = 0; start_time = time_dt(); total_time = 0.0f; - sample_time = 0.0f; + tile_time = 0.0f; status = "Initializing"; substatus = ""; update_cb = NULL; @@ -57,8 +58,10 @@ public: { thread_scoped_lock lock(progress.progress_mutex); - progress.get_sample(sample, total_time, sample_time); progress.get_status(status, substatus); + progress.get_tile(tile, total_time, tile_time); + + sample = progress.get_sample(); return *this; } @@ -90,7 +93,7 @@ public: cancel_cb = function; } - /* sample and timing information */ + /* tile and timing information */ void set_start_time(double start_time_) { @@ -99,22 +102,41 @@ public: start_time = start_time_; } - void set_sample(int sample_, double sample_time_) + void set_tile(int tile_, double tile_time_) { thread_scoped_lock lock(progress_mutex); - sample = sample_; + tile = tile_; total_time = time_dt() - start_time; - sample_time = sample_time_; + tile_time = tile_time_; } - void get_sample(int& sample_, double& total_time_, double& sample_time_) + void get_tile(int& tile_, double& total_time_, double& tile_time_) { thread_scoped_lock lock(progress_mutex); - sample_ = sample; + tile_ = tile; total_time_ = (total_time > 0.0)? total_time: 0.0; - sample_time_ = sample_time; + tile_time_ = tile_time; + } + + void reset_sample() + { + thread_scoped_lock lock(progress_mutex); + + sample = 0; + } + + void increment_sample() + { + thread_scoped_lock lock(progress_mutex); + + sample++; + } + + int get_sample() + { + return sample; } /* status messages */ @@ -170,11 +192,12 @@ protected: boost::function<void(void)> update_cb; boost::function<void(void)> cancel_cb; - int sample; + int tile; /* counter for rendered tiles */ + int sample; /* counter of rendered samples, global for all tiles */ double start_time; double total_time; - double sample_time; + double tile_time; string status; string substatus; |