Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorCampbell Barton <ideasman42@gmail.com>2017-03-25 05:39:47 +0300
committerCampbell Barton <ideasman42@gmail.com>2017-03-25 05:39:47 +0300
commit0c93bc2b637a6a3fe01b174dad2fb61895d004d2 (patch)
treeb877a434d2a7af2dfdf727b9794fa46359ba670a /intern
parent5d6e9f237be3379b63169f86b8f4117752d095b3 (diff)
parenta7f16c17c260f311e136758497e5490b226ebc03 (diff)
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/blender/blender_mesh.cpp2
-rw-r--r--intern/cycles/blender/blender_session.cpp8
-rw-r--r--intern/cycles/blender/blender_session.h1
-rw-r--r--intern/cycles/device/device_cuda.cpp2
-rw-r--r--intern/cycles/device/opencl/opencl.h47
-rw-r--r--intern/cycles/device/opencl/opencl_base.cpp6
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp10
-rw-r--r--intern/cycles/device/opencl/opencl_util.cpp367
-rw-r--r--intern/cycles/kernel/CMakeLists.txt1
-rw-r--r--intern/cycles/kernel/bvh/bvh.h7
-rw-r--r--intern/cycles/kernel/bvh/bvh_shadow_all.h12
-rw-r--r--intern/cycles/kernel/bvh/bvh_subsurface.h6
-rw-r--r--intern/cycles/kernel/bvh/bvh_traversal.h10
-rw-r--r--intern/cycles/kernel/bvh/bvh_volume.h10
-rw-r--r--intern/cycles/kernel/bvh/bvh_volume_all.h12
-rw-r--r--intern/cycles/kernel/bvh/qbvh_shadow_all.h10
-rw-r--r--intern/cycles/kernel/bvh/qbvh_subsurface.h6
-rw-r--r--intern/cycles/kernel/bvh/qbvh_traversal.h10
-rw-r--r--intern/cycles/kernel/bvh/qbvh_volume.h10
-rw-r--r--intern/cycles/kernel/bvh/qbvh_volume_all.h10
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet_multi.h2
-rw-r--r--intern/cycles/kernel/geom/geom_curve.h4
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle_intersect.h47
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h373
-rw-r--r--intern/cycles/kernel/kernel_accumulate.h4
-rw-r--r--intern/cycles/kernel/kernel_compat_cpu.h6
-rw-r--r--intern/cycles/kernel/kernel_emission.h2
-rw-r--r--intern/cycles/kernel/kernel_math.h1
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h4
-rw-r--r--intern/cycles/render/nodes.h6
-rw-r--r--intern/cycles/render/session.cpp4
-rw-r--r--intern/cycles/render/tile.cpp2
-rw-r--r--intern/cycles/render/tile.h1
-rw-r--r--intern/cycles/util/CMakeLists.txt1
-rw-r--r--intern/cycles/util/util_math.h204
-rw-r--r--intern/cycles/util/util_math_intersect.h326
36 files changed, 855 insertions, 679 deletions
diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp
index 78f73d8e062..f949b530f90 100644
--- a/intern/cycles/blender/blender_mesh.cpp
+++ b/intern/cycles/blender/blender_mesh.cpp
@@ -819,7 +819,7 @@ static void create_mesh(Scene *scene,
int shader = clamp(p->material_index(), 0, used_shaders.size()-1);
bool smooth = p->use_smooth() || use_loop_normals;
- vi.reserve(n);
+ vi.resize(n);
for(int i = 0; i < n; i++) {
/* NOTE: Autosmooth is already taken care about. */
vi[i] = b_mesh.loops[p->loop_start() + i].vertex_index();
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index c5f5ffe9928..d00b4b67a58 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -70,6 +70,7 @@ BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
background = true;
last_redraw_time = 0.0;
start_resize_time = 0.0;
+ last_status_time = 0.0;
}
BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
@@ -95,6 +96,7 @@ BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
background = false;
last_redraw_time = 0.0;
start_resize_time = 0.0;
+ last_status_time = 0.0;
}
BlenderSession::~BlenderSession()
@@ -991,10 +993,14 @@ void BlenderSession::update_status_progress()
if(substatus.size() > 0)
status += " | " + substatus;
- if(status != last_status) {
+ double current_time = time_dt();
+ /* When rendering in a window, redraw the status at least once per second to keep the elapsed and remaining time up-to-date.
+ * For headless rendering, only report when something significant changes to keep the console output readable. */
+ if(status != last_status || (!headless && (current_time - last_status_time) > 1.0)) {
b_engine.update_stats("", (timestatus + scene + status).c_str());
b_engine.update_memory_stats(mem_used, mem_peak);
last_status = status;
+ last_status_time = current_time;
}
if(progress != last_progress) {
b_engine.update_progress(progress);
diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h
index 700b8acec1b..33b88b9ab5e 100644
--- a/intern/cycles/blender/blender_session.h
+++ b/intern/cycles/blender/blender_session.h
@@ -113,6 +113,7 @@ public:
string last_status;
string last_error;
float last_progress;
+ double last_status_time;
int width, height;
double start_resize_time;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 58471ba67c2..08dfa181385 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1500,7 +1500,7 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory
cuda_assert(cuLaunchKernel(state_buffer_size,
1, 1, 1,
1, 1, 1,
- 0, 0, &args, 0));
+ 0, 0, (void**)&args, 0));
device->cuda_pop_context();
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index be7e9fa5e30..59e61aad25c 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -91,6 +91,53 @@ public:
static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
bool force_all = false);
static bool use_single_program();
+
+ /* ** Some handy shortcuts to low level cl*GetInfo() functions. ** */
+
+ /* Platform information. */
+ static bool get_num_platforms(cl_uint *num_platforms, cl_int *error = NULL);
+ static cl_uint get_num_platforms();
+
+ static bool get_platforms(vector<cl_platform_id> *platform_ids,
+ cl_int *error = NULL);
+ static vector<cl_platform_id> get_platforms();
+
+ static bool get_platform_name(cl_platform_id platform_id,
+ string *platform_name);
+ static string get_platform_name(cl_platform_id platform_id);
+
+ static bool get_num_platform_devices(cl_platform_id platform_id,
+ cl_device_type device_type,
+ cl_uint *num_devices,
+ cl_int *error = NULL);
+ static cl_uint get_num_platform_devices(cl_platform_id platform_id,
+ cl_device_type device_type);
+
+ static bool get_platform_devices(cl_platform_id platform_id,
+ cl_device_type device_type,
+ vector<cl_device_id> *device_ids,
+ cl_int* error = NULL);
+ static vector<cl_device_id> get_platform_devices(cl_platform_id platform_id,
+ cl_device_type device_type);
+
+ /* Device information. */
+ static bool get_device_name(cl_device_id device_id,
+ string *device_name,
+ cl_int* error = NULL);
+
+ static string get_device_name(cl_device_id device_id);
+
+ static bool get_device_type(cl_device_id device_id,
+ cl_device_type *device_type,
+ cl_int* error = NULL);
+ static cl_device_type get_device_type(cl_device_id device_id);
+
+ /* Get somewhat more readable device name.
+ * Main difference is AMD OpenCL here which only gives code name
+ * for the regular device name. This will give more sane device
+ * name using some extensions.
+ */
+ static string get_readable_device_name(cl_device_id device_id);
};
/* Thread safe cache for contexts and programs.
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index 51ff39f0ad3..0328dfed689 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -152,10 +152,8 @@ OpenCLDeviceBase::~OpenCLDeviceBase()
void CL_CALLBACK OpenCLDeviceBase::context_notify_callback(const char *err_info,
const void * /*private_info*/, size_t /*cb*/, void *user_data)
{
- char name[256];
- clGetDeviceInfo((cl_device_id)user_data, CL_DEVICE_NAME, sizeof(name), &name, NULL);
-
- fprintf(stderr, "OpenCL error (%s): %s\n", name, err_info);
+ string device_name = OpenCLInfo::get_device_name((cl_device_id)user_data);
+ fprintf(stderr, "OpenCL error (%s): %s\n", device_name.c_str(), err_info);
}
bool OpenCLDeviceBase::opencl_version_check()
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 3faae4039e3..d50ae585062 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -41,11 +41,7 @@ static string get_build_options(OpenCLDeviceBase *device, const DeviceRequestedF
/* Set compute device build option. */
cl_device_type device_type;
- device->ciErr = clGetDeviceInfo(device->cdDevice,
- CL_DEVICE_TYPE,
- sizeof(cl_device_type),
- &device_type,
- NULL);
+ OpenCLInfo::get_device_type(device->cdDevice, &device_type, &device->ciErr);
assert(device->ciErr == CL_SUCCESS);
if(device_type == CL_DEVICE_TYPE_GPU) {
build_options += " -D__COMPUTE_DEVICE_GPU__";
@@ -346,9 +342,7 @@ public:
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask */*task*/)
{
- cl_device_type type;
- clGetDeviceInfo(device->cdDevice, CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL);
-
+ cl_device_type type = OpenCLInfo::get_device_type(device->cdDevice);
/* Use small global size on CPU devices as it seems to be much faster. */
if(type == CL_DEVICE_TYPE_CPU) {
VLOG(1) << "Global size: (64, 64).";
diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp
index a689c7eae26..1f5b9ee0896 100644
--- a/intern/cycles/device/opencl/opencl_util.cpp
+++ b/intern/cycles/device/opencl/opencl_util.cpp
@@ -600,11 +600,20 @@ bool OpenCLInfo::device_supported(const string& platform_name,
const cl_device_id device_id)
{
cl_device_type device_type;
- clGetDeviceInfo(device_id,
- CL_DEVICE_TYPE,
- sizeof(cl_device_type),
- &device_type,
- NULL);
+ if(!get_device_type(device_id, &device_type)) {
+ return false;
+ }
+ string device_name;
+ if(!get_device_name(device_id, &device_name)) {
+ return false;
+ }
+ /* It is possible tyo have Iris GPU on AMD/Apple OpenCL framework
+ * (aka, it will not be on Intel framework). This isn't supported
+ * and needs an explicit blacklist.
+ */
+ if(strstr(device_name.c_str(), "Iris")) {
+ return false;
+ }
if(platform_name == "AMD Accelerated Parallel Processing" &&
device_type == CL_DEVICE_TYPE_GPU)
{
@@ -718,39 +727,30 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
return;
}
+ cl_int error;
vector<cl_device_id> device_ids;
- cl_uint num_devices = 0;
vector<cl_platform_id> platform_ids;
- cl_uint num_platforms = 0;
- /* Get devices. */
- if(clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS ||
- num_platforms == 0)
- {
- FIRST_VLOG(2) << "No OpenCL platforms were found.";
+ /* Get platforms. */
+ if(!get_platforms(&platform_ids, &error)) {
+ FIRST_VLOG(2) << "Error fetching platforms:"
+ << string(clewErrorString(error));
first_time = false;
return;
}
- platform_ids.resize(num_platforms);
- if(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL) != CL_SUCCESS) {
- FIRST_VLOG(2) << "Failed to fetch platform IDs from the driver..";
+ if(platform_ids.size() == 0) {
+ FIRST_VLOG(2) << "No OpenCL platforms were found.";
first_time = false;
return;
}
/* Devices are numbered consecutively across platforms. */
- for(int platform = 0; platform < num_platforms; platform++) {
+ for(int platform = 0; platform < platform_ids.size(); platform++) {
cl_platform_id platform_id = platform_ids[platform];
- char pname[256];
- if(clGetPlatformInfo(platform_id,
- CL_PLATFORM_NAME,
- sizeof(pname),
- &pname,
- NULL) != CL_SUCCESS)
- {
+ string platform_name;
+ if(!get_platform_name(platform_id, &platform_name)) {
FIRST_VLOG(2) << "Failed to get platform name, ignoring.";
continue;
}
- string platform_name = pname;
FIRST_VLOG(2) << "Enumerating devices for platform "
<< platform_name << ".";
if(!platform_version_check(platform_id)) {
@@ -758,39 +758,28 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
<< " due to too old compiler version.";
continue;
}
- num_devices = 0;
- cl_int ciErr;
- if((ciErr = clGetDeviceIDs(platform_id,
- device_type,
- 0,
- NULL,
- &num_devices)) != CL_SUCCESS || num_devices == 0)
+ if(!get_platform_devices(platform_id,
+ device_type,
+ &device_ids,
+ &error))
{
FIRST_VLOG(2) << "Ignoring platform " << platform_name
- << ", failed to fetch number of devices: " << string(clewErrorString(ciErr));
+ << ", failed to fetch of devices: "
+ << string(clewErrorString(error));
continue;
}
- device_ids.resize(num_devices);
- if(clGetDeviceIDs(platform_id,
- device_type,
- num_devices,
- &device_ids[0],
- NULL) != CL_SUCCESS)
- {
+ if(device_ids.size() == 0) {
FIRST_VLOG(2) << "Ignoring platform " << platform_name
- << ", failed to fetch devices list.";
+ << ", it has no devices.";
continue;
}
- for(int num = 0; num < num_devices; num++) {
- cl_device_id device_id = device_ids[num];
- char device_name[1024] = "\0";
- if(clGetDeviceInfo(device_id,
- CL_DEVICE_NAME,
- sizeof(device_name),
- &device_name,
- NULL) != CL_SUCCESS)
- {
- FIRST_VLOG(2) << "Failed to fetch device name, ignoring.";
+ for(int num = 0; num < device_ids.size(); num++) {
+ const cl_device_id device_id = device_ids[num];
+ string device_name;
+ if(!get_device_name(device_id, &device_name, &error)) {
+ FIRST_VLOG(2) << "Failed to fetch device name: "
+ << string(clewErrorString(error))
+ << ", ignoring.";
continue;
}
if(!device_version_check(device_id)) {
@@ -802,24 +791,28 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
device_supported(platform_name, device_id))
{
cl_device_type device_type;
- if(clGetDeviceInfo(device_id,
- CL_DEVICE_TYPE,
- sizeof(cl_device_type),
- &device_type,
- NULL) != CL_SUCCESS)
- {
+ if(!get_device_type(device_id, &device_type, &error)) {
FIRST_VLOG(2) << "Ignoring device " << device_name
- << ", failed to fetch device type.";
+ << ", failed to fetch device type:"
+ << string(clewErrorString(error));
continue;
}
- FIRST_VLOG(2) << "Adding new device " << device_name << ".";
+ string readable_device_name =
+ get_readable_device_name(device_id);
+ if(readable_device_name != device_name) {
+ FIRST_VLOG(2) << "Using more readable device name: "
+ << readable_device_name;
+ }
+ FIRST_VLOG(2) << "Adding new device "
+ << readable_device_name << ".";
string hardware_id = get_hardware_id(platform_name, device_id);
- usable_devices->push_back(OpenCLPlatformDevice(platform_id,
- platform_name,
- device_id,
- device_type,
- device_name,
- hardware_id));
+ usable_devices->push_back(OpenCLPlatformDevice(
+ platform_id,
+ platform_name,
+ device_id,
+ device_type,
+ readable_device_name,
+ hardware_id));
}
else {
FIRST_VLOG(2) << "Ignoring device " << device_name
@@ -830,6 +823,252 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
first_time = false;
}
+bool OpenCLInfo::get_platforms(vector<cl_platform_id> *platform_ids,
+ cl_int *error)
+{
+ /* Reset from possible previous state. */
+ platform_ids->resize(0);
+ cl_uint num_platforms;
+ if(!get_num_platforms(&num_platforms, error)) {
+ return false;
+ }
+ /* Get actual platforms. */
+ cl_int err;
+ platform_ids->resize(num_platforms);
+ if((err = clGetPlatformIDs(num_platforms,
+ &platform_ids->at(0),
+ NULL)) != CL_SUCCESS) {
+ if(error != NULL) {
+ *error = err;
+ }
+ return false;
+ }
+ if(error != NULL) {
+ *error = CL_SUCCESS;
+ }
+ return true;
+}
+
+vector<cl_platform_id> OpenCLInfo::get_platforms()
+{
+ vector<cl_platform_id> platform_ids;
+ get_platforms(&platform_ids);
+ return platform_ids;
+}
+
+bool OpenCLInfo::get_num_platforms(cl_uint *num_platforms, cl_int *error)
+{
+ cl_int err;
+ if((err = clGetPlatformIDs(0, NULL, num_platforms)) != CL_SUCCESS) {
+ if(error != NULL) {
+ *error = err;
+ }
+ *num_platforms = 0;
+ return false;
+ }
+ if(error != NULL) {
+ *error = CL_SUCCESS;
+ }
+ return true;
+}
+
+cl_uint OpenCLInfo::get_num_platforms()
+{
+ cl_uint num_platforms;
+ if(!get_num_platforms(&num_platforms)) {
+ return 0;
+ }
+ return num_platforms;
+}
+
+bool OpenCLInfo::get_platform_name(cl_platform_id platform_id,
+ string *platform_name)
+{
+ char buffer[256];
+ if(clGetPlatformInfo(platform_id,
+ CL_PLATFORM_NAME,
+ sizeof(buffer),
+ &buffer,
+ NULL) != CL_SUCCESS)
+ {
+ *platform_name = "";
+ return false;
+ }
+ *platform_name = buffer;
+ return true;
+}
+
+string OpenCLInfo::get_platform_name(cl_platform_id platform_id)
+{
+ string platform_name;
+ if (!get_platform_name(platform_id, &platform_name)) {
+ return "";
+ }
+ return platform_name;
+}
+
+bool OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
+ cl_device_type device_type,
+ cl_uint *num_devices,
+ cl_int *error)
+{
+ cl_int err;
+ if((err = clGetDeviceIDs(platform_id,
+ device_type,
+ 0,
+ NULL,
+ num_devices)) != CL_SUCCESS)
+ {
+ if(error != NULL) {
+ *error = err;
+ }
+ *num_devices = 0;
+ return false;
+ }
+ if(error != NULL) {
+ *error = CL_SUCCESS;
+ }
+ return true;
+}
+
+cl_uint OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
+ cl_device_type device_type)
+{
+ cl_uint num_devices;
+ if(!get_num_platform_devices(platform_id,
+ device_type,
+ &num_devices))
+ {
+ return 0;
+ }
+ return num_devices;
+}
+
+bool OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
+ cl_device_type device_type,
+ vector<cl_device_id> *device_ids,
+ cl_int* error)
+{
+ /* Reset from possible previous state. */
+ device_ids->resize(0);
+ /* Get number of devices to pre-allocate memory. */
+ cl_uint num_devices;
+ if(!get_num_platform_devices(platform_id,
+ device_type,
+ &num_devices,
+ error))
+ {
+ return false;
+ }
+ /* Get actual device list. */
+ device_ids->resize(num_devices);
+ cl_int err;
+ if((err = clGetDeviceIDs(platform_id,
+ device_type,
+ num_devices,
+ &device_ids->at(0),
+ NULL)) != CL_SUCCESS)
+ {
+ if(error != NULL) {
+ *error = err;
+ }
+ return false;
+ }
+ if(error != NULL) {
+ *error = CL_SUCCESS;
+ }
+ return true;
+}
+
+vector<cl_device_id> OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
+ cl_device_type device_type)
+{
+ vector<cl_device_id> devices;
+ get_platform_devices(platform_id, device_type, &devices);
+ return devices;
+}
+
+bool OpenCLInfo::get_device_name(cl_device_id device_id,
+ string *device_name,
+ cl_int* error)
+{
+ char buffer[1024];
+ cl_int err;
+ if((err = clGetDeviceInfo(device_id,
+ CL_DEVICE_NAME,
+ sizeof(buffer),
+ &buffer,
+ NULL)) != CL_SUCCESS)
+ {
+ if(error != NULL) {
+ *error = err;
+ }
+ *device_name = "";
+ return false;
+ }
+ if(error != NULL) {
+ *error = CL_SUCCESS;
+ }
+ *device_name = buffer;
+ return true;
+}
+
+string OpenCLInfo::get_device_name(cl_device_id device_id)
+{
+ string device_name;
+ if(!get_device_name(device_id, &device_name)) {
+ return "";
+ }
+ return device_name;
+}
+
+bool OpenCLInfo::get_device_type(cl_device_id device_id,
+ cl_device_type *device_type,
+ cl_int* error)
+{
+ cl_int err;
+ if((err = clGetDeviceInfo(device_id,
+ CL_DEVICE_TYPE,
+ sizeof(cl_device_type),
+ device_type,
+ NULL)) != CL_SUCCESS)
+ {
+ if(error != NULL) {
+ *error = err;
+ }
+ *device_type = 0;
+ return false;
+ }
+ if(error != NULL) {
+ *error = CL_SUCCESS;
+ }
+ return true;
+}
+
+cl_device_type OpenCLInfo::get_device_type(cl_device_id device_id)
+{
+ cl_device_type device_type;
+ if(!get_device_type(device_id, &device_type)) {
+ return 0;
+ }
+ return device_type;
+}
+
+string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
+{
+ char board_name[1024];
+ if(clGetDeviceInfo(device_id,
+ CL_DEVICE_BOARD_NAME_AMD,
+ sizeof(board_name),
+ &board_name,
+ NULL) == CL_SUCCESS)
+ {
+ return board_name;
+ }
+ /* Fallback to standard device name API. */
+ return get_device_name(device_id);
+}
+
CCL_NAMESPACE_END
#endif
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 53c872e829d..8a8c3968c02 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -196,6 +196,7 @@ set(SRC_UTIL_HEADERS
../util/util_hash.h
../util/util_math.h
../util/util_math_fast.h
+ ../util/util_math_intersect.h
../util/util_static_assert.h
../util/util_transform.h
../util/util_texture.h
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 321983c1abc..598e138dbea 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -202,8 +202,9 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
}
#ifdef __SUBSURFACE__
+/* Note: ray is passed by value to work around a possible CUDA compiler bug. */
ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
- const Ray *ray,
+ const Ray ray,
SubsurfaceIntersection *ss_isect,
int subsurface_object,
uint *lcg_state,
@@ -212,7 +213,7 @@ ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
return bvh_intersect_subsurface_motion(kg,
- ray,
+ &ray,
ss_isect,
subsurface_object,
lcg_state,
@@ -220,7 +221,7 @@ ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
}
#endif /* __OBJECT_MOTION__ */
return bvh_intersect_subsurface(kg,
- ray,
+ &ray,
ss_isect,
subsurface_object,
lcg_state,
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index b4f65bc8efd..8f7c005e961 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -100,8 +100,8 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif /* __KERNEL_SSE2__ */
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
+ TriangleIsectPrecalc isect_precalc;
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
/* traversal loop */
do {
@@ -209,9 +209,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
hit = motion_triangle_intersect(kg,
+ &isect_precalc,
isect_array,
P,
- dir,
ray->time,
PATH_RAY_SHADOW,
object,
@@ -314,7 +314,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
num_hits_in_instance = 0;
isect_array->t = isect_t;
@@ -354,7 +354,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
/* scale isect->t to adjust for instancing */
for(int i = 0; i < num_hits_in_instance; i++) {
@@ -367,7 +367,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
}
isect_t = tmax;
diff --git a/intern/cycles/kernel/bvh/bvh_subsurface.h b/intern/cycles/kernel/bvh/bvh_subsurface.h
index 583f7f7c469..cb7a4e3bc31 100644
--- a/intern/cycles/kernel/bvh/bvh_subsurface.h
+++ b/intern/cycles/kernel/bvh/bvh_subsurface.h
@@ -109,8 +109,8 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
+ TriangleIsectPrecalc isect_precalc;
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
/* traversal loop */
do {
@@ -214,9 +214,9 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
motion_triangle_intersect_subsurface(kg,
+ &isect_precalc,
ss_isect,
P,
- dir,
ray->time,
object,
prim_addr,
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index 0eca0c8e38b..eac98a3165a 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -104,8 +104,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
+ TriangleIsectPrecalc isect_precalc;
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
/* traversal loop */
do {
@@ -267,9 +267,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
+ &isect_precalc,
isect,
P,
- dir,
ray->time,
visibility,
object,
@@ -358,7 +358,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
# if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
@@ -395,7 +395,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# else
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
# if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index 136034aa484..da97dae0b99 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -97,8 +97,8 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
+ TriangleIsectPrecalc isect_precalc;
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
/* traversal loop */
do {
@@ -215,9 +215,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
continue;
}
motion_triangle_intersect(kg,
+ &isect_precalc,
isect,
P,
- dir,
ray->time,
visibility,
object,
@@ -243,7 +243,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
# if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
@@ -286,7 +286,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
# if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index 6f3346e7634..6efb7e265d0 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -101,8 +101,8 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif /* __KERNEL_SSE2__ */
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
+ TriangleIsectPrecalc isect_precalc;
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
/* traversal loop */
do {
@@ -243,9 +243,9 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
continue;
}
hit = motion_triangle_intersect(kg,
+ &isect_precalc,
isect_array,
P,
- dir,
ray->time,
visibility,
object,
@@ -294,7 +294,7 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
num_hits_in_instance = 0;
isect_array->t = isect_t;
@@ -340,7 +340,7 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# else
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
/* Scale isect->t to adjust for instancing. */
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
@@ -352,7 +352,7 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
}
isect_t = tmax;
diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
index 2a4da3eea82..5d960787134 100644
--- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
@@ -96,8 +96,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
+ TriangleIsectPrecalc isect_precalc;
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
/* Traversal loop. */
do {
@@ -290,9 +290,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
hit = motion_triangle_intersect(kg,
+ &isect_precalc,
isect_array,
P,
- dir,
ray->time,
PATH_RAY_SHADOW,
object,
@@ -414,7 +414,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
@@ -471,7 +471,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr].addr;
diff --git a/intern/cycles/kernel/bvh/qbvh_subsurface.h b/intern/cycles/kernel/bvh/qbvh_subsurface.h
index a6431a94e6e..d67a7826589 100644
--- a/intern/cycles/kernel/bvh/qbvh_subsurface.h
+++ b/intern/cycles/kernel/bvh/qbvh_subsurface.h
@@ -105,8 +105,8 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
+ TriangleIsectPrecalc isect_precalc;
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
/* Traversal loop. */
do {
@@ -270,9 +270,9 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
for(; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
motion_triangle_intersect_subsurface(kg,
+ &isect_precalc,
ss_isect,
P,
- dir,
ray->time,
object,
prim_addr,
diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h
index c20a8f3703f..d88e0e07203 100644
--- a/intern/cycles/kernel/bvh/qbvh_traversal.h
+++ b/intern/cycles/kernel/bvh/qbvh_traversal.h
@@ -106,8 +106,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
+ TriangleIsectPrecalc isect_precalc;
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
/* Traversal loop. */
do {
@@ -354,9 +354,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
+ &isect_precalc,
isect,
P,
- dir,
ray->time,
visibility,
object,
@@ -447,7 +447,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
@@ -489,7 +489,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr].addr;
diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h
index 859c5da808b..7cfb2cf8c18 100644
--- a/intern/cycles/kernel/bvh/qbvh_volume.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume.h
@@ -91,8 +91,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
+ TriangleIsectPrecalc isect_precalc;
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
/* Traversal loop. */
do {
@@ -281,7 +281,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
continue;
}
/* Intersect ray against primitive. */
- motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr);
+ motion_triangle_intersect(kg, &isect_precalc, isect, P, ray->time, visibility, object, prim_addr);
}
break;
}
@@ -316,7 +316,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
@@ -362,7 +362,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr].addr;
diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h
index bbe588c878f..3dbac8446d7 100644
--- a/intern/cycles/kernel/bvh/qbvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h
@@ -95,8 +95,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
- IsectPrecalc isect_precalc;
- triangle_intersect_precalc(dir, &isect_precalc);
+ TriangleIsectPrecalc isect_precalc;
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
/* Traversal loop. */
do {
@@ -309,7 +309,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
continue;
}
/* Intersect ray against primitive. */
- hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
+ hit = motion_triangle_intersect(kg, &isect_precalc, isect_array, P, ray->time, visibility, object, prim_addr);
if(hit) {
/* Move on to next entry in intersections array. */
isect_array++;
@@ -367,7 +367,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
num_hits_in_instance = 0;
isect_array->t = isect_t;
@@ -432,7 +432,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
- triangle_intersect_precalc(dir, &isect_precalc);
+ ray_triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr].addr;
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
index aa4b91eac40..1dd24078037 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
@@ -83,7 +83,7 @@ ccl_device_forceinline float3 mf_sample_vndf(const float3 wi, const float2 alpha
const float3 wi_11 = normalize(make_float3(alpha.x*wi.x, alpha.y*wi.y, wi.z));
const float2 slope_11 = mf_sampleP22_11(wi_11.z, randU);
- const float2 cossin_phi = safe_normalize(make_float2(wi_11.x, wi_11.y));
+ const float3 cossin_phi = safe_normalize(make_float3(wi_11.x, wi_11.y, 0.0f));
const float slope_x = alpha.x*(cossin_phi.x * slope_11.x - cossin_phi.y * slope_11.y);
const float slope_y = alpha.y*(cossin_phi.y * slope_11.x + cossin_phi.x * slope_11.y);
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index 7cc840ce78d..4f84b80a5a8 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -264,7 +264,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
int ka = max(k0 - 1, v00.x);
int kb = min(k1 + 1, v00.x + v00.y - 1);
-#ifdef __KERNEL_AVX2__
+#if defined(__KERNEL_AVX2__) && (!defined(_MSC_VER) || _MSC_VER > 1800)
avxf P_curve_0_1, P_curve_2_3;
if(is_curve_primitive) {
P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
@@ -299,7 +299,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
-#ifdef __KERNEL_AVX2__
+#if defined(__KERNEL_AVX2__) && (!defined(_MSC_VER) || _MSC_VER > 1800)
const avxf vPP = _mm256_broadcast_ps(&P.m128);
const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
index 2500228281e..971a34308f1 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
@@ -166,14 +166,15 @@ float3 motion_triangle_refine_subsurface(KernelGlobals *kg,
* time and do a ray intersection with the resulting triangle.
*/
-ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
- Intersection *isect,
- float3 P,
- float3 dir,
- float time,
- uint visibility,
- int object,
- int prim_addr)
+ccl_device_inline bool motion_triangle_intersect(
+ KernelGlobals *kg,
+ const TriangleIsectPrecalc *isect_precalc,
+ Intersection *isect,
+ float3 P,
+ float time,
+ uint visibility,
+ int object,
+ int prim_addr)
{
/* Primitive index for vertex location lookup. */
int prim = kernel_tex_fetch(__prim_index, prim_addr);
@@ -185,11 +186,15 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
motion_triangle_vertices(kg, fobject, prim, time, verts);
/* Ray-triangle intersection, unoptimized. */
float t, u, v;
- if(ray_triangle_intersect_uv(P,
- dir,
- isect->t,
- verts[2], verts[0], verts[1],
- &u, &v, &t))
+ if(ray_triangle_intersect(isect_precalc,
+ P,
+ isect->t,
+#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+ (ssef*)verts,
+#else
+ verts[0], verts[1], verts[2],
+#endif
+ &u, &v, &t))
{
#ifdef __VISIBILITY_FLAG__
/* Visibility flag test. we do it here under the assumption
@@ -217,9 +222,9 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg,
#ifdef __SUBSURFACE__
ccl_device_inline void motion_triangle_intersect_subsurface(
KernelGlobals *kg,
+ const TriangleIsectPrecalc *isect_precalc,
SubsurfaceIntersection *ss_isect,
float3 P,
- float3 dir,
float time,
int object,
int prim_addr,
@@ -237,11 +242,15 @@ ccl_device_inline void motion_triangle_intersect_subsurface(
motion_triangle_vertices(kg, fobject, prim, time, verts);
/* Ray-triangle intersection, unoptimized. */
float t, u, v;
- if(ray_triangle_intersect_uv(P,
- dir,
- tmax,
- verts[2], verts[0], verts[1],
- &u, &v, &t))
+ if(ray_triangle_intersect(isect_precalc,
+ P,
+ tmax,
+#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+ (ssef*)verts,
+#else
+ verts[0], verts[1], verts[2],
+#endif
+ &u, &v, &t))
{
for(int i = min(max_hits, ss_isect->num_hits) - 1; i >= 0; --i) {
if(ss_isect->hits[i].t == t) {
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index 4d234dd62bd..584d0b3508f 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -22,16 +22,6 @@
CCL_NAMESPACE_BEGIN
-/* Workaround stupidness of CUDA/OpenCL which doesn't allow to access indexed
- * component of float3 value.
- */
-#ifndef __KERNEL_CPU__
-# define IDX(vec, idx) \
- ((idx == 0) ? ((vec).x) : ( (idx == 1) ? ((vec).y) : ((vec).z) ))
-#else
-# define IDX(vec, idx) ((vec)[idx])
-#endif
-
/* Ray-Triangle intersection for BVH traversal
*
* Sven Woop
@@ -40,214 +30,50 @@ CCL_NAMESPACE_BEGIN
* http://jcgt.org/published/0002/01/05/paper.pdf
*/
-/* Precalculated data for the ray->tri intersection. */
-typedef struct IsectPrecalc {
- /* Maximal dimension kz, and orthogonal dimensions. */
- int kx, ky, kz;
-
- /* Shear constants. */
- float Sx, Sy, Sz;
-} IsectPrecalc;
-
-#if (defined(__KERNEL_OPENCL_APPLE__)) || \
- (defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86)))
-ccl_device_noinline
-#else
-ccl_device_inline
-#endif
-void triangle_intersect_precalc(float3 dir,
- IsectPrecalc *isect_precalc)
-{
- /* Calculate dimension where the ray direction is maximal. */
-#ifndef __KERNEL_SSE__
- int kz = util_max_axis(make_float3(fabsf(dir.x),
- fabsf(dir.y),
- fabsf(dir.z)));
- int kx = kz + 1; if(kx == 3) kx = 0;
- int ky = kx + 1; if(ky == 3) ky = 0;
-#else
- int kx, ky, kz;
- /* Avoiding mispredicted branch on direction. */
- kz = util_max_axis(fabs(dir));
- static const char inc_xaxis[] = {1, 2, 0, 55};
- static const char inc_yaxis[] = {2, 0, 1, 55};
- kx = inc_xaxis[kz];
- ky = inc_yaxis[kz];
-#endif
-
- float dir_kz = IDX(dir, kz);
-
- /* Swap kx and ky dimensions to preserve winding direction of triangles. */
- if(dir_kz < 0.0f) {
- int tmp = kx;
- kx = ky;
- ky = tmp;
- }
-
- /* Calculate the shear constants. */
- float inv_dir_z = 1.0f / dir_kz;
- isect_precalc->Sx = IDX(dir, kx) * inv_dir_z;
- isect_precalc->Sy = IDX(dir, ky) * inv_dir_z;
- isect_precalc->Sz = inv_dir_z;
-
- /* Store the dimensions. */
- isect_precalc->kx = kx;
- isect_precalc->ky = ky;
- isect_precalc->kz = kz;
-}
-
-/* TODO(sergey): Make it general utility function. */
-ccl_device_inline float xor_signmask(float x, int y)
-{
- return __int_as_float(__float_as_int(x) ^ y);
-}
-
ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
- const IsectPrecalc *isect_precalc,
+ const TriangleIsectPrecalc *isect_precalc,
Intersection *isect,
float3 P,
uint visibility,
int object,
int prim_addr)
{
- const int kx = isect_precalc->kx;
- const int ky = isect_precalc->ky;
- const int kz = isect_precalc->kz;
- const float Sx = isect_precalc->Sx;
- const float Sy = isect_precalc->Sy;
- const float Sz = isect_precalc->Sz;
-
- /* Calculate vertices relative to ray origin. */
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
- const avxf avxf_P(P.m128, P.m128);
-
- const avxf tri_ab = kernel_tex_fetch_avxf(__prim_tri_verts, tri_vindex + 0);
- const avxf tri_bc = kernel_tex_fetch_avxf(__prim_tri_verts, tri_vindex + 1);
-
- const avxf AB = tri_ab - avxf_P;
- const avxf BC = tri_bc - avxf_P;
-
- const __m256i permute_mask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx);
-
- const avxf AB_k = shuffle(AB, permute_mask);
- const avxf BC_k = shuffle(BC, permute_mask);
-
- /* Akz, Akz, Bkz, Bkz, Bkz, Bkz, Ckz, Ckz */
- const avxf ABBC_kz = shuffle<2>(AB_k, BC_k);
-
- /* Akx, Aky, Bkx, Bky, Bkx,Bky, Ckx, Cky */
- const avxf ABBC_kxy = shuffle<0,1,0,1>(AB_k, BC_k);
-
- const avxf Sxy(Sy, Sx, Sy, Sx);
-
- /* Ax, Ay, Bx, By, Bx, By, Cx, Cy */
- const avxf ABBC_xy = nmadd(ABBC_kz, Sxy, ABBC_kxy);
-
- float ABBC_kz_array[8];
- _mm256_storeu_ps((float*)&ABBC_kz_array, ABBC_kz);
-
- const float A_kz = ABBC_kz_array[0];
- const float B_kz = ABBC_kz_array[2];
- const float C_kz = ABBC_kz_array[6];
-
- /* By, Bx, Cy, Cx, By, Bx, Ay, Ax */
- const avxf BCBA_yx = permute<3,2,7,6,3,2,1,0>(ABBC_xy);
-
- const avxf neg_mask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000);
-
- /* W U V
- * (AxBy-AyBx) (BxCy-ByCx) XX XX (BxBy-ByBx) (CxAy-CyAx) XX XX
- */
- const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, neg_mask /* Dont care */);
-
- const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ neg_mask;
-
- /* Calculate scaled barycentric coordinates. */
- float WUVW_array[4];
- _mm_storeu_ps((float*)&WUVW_array, _mm256_castps256_ps128 (WUVWnegWUVW));
-
- const float W = WUVW_array[0];
- const float U = WUVW_array[1];
- const float V = WUVW_array[2];
-
- const int WUVW_mask = 0x7 & _mm256_movemask_ps(WUVWnegWUVW);
- const int WUVW_zero = 0x7 & _mm256_movemask_ps(_mm256_cmp_ps(WUVWnegWUVW,
- _mm256_setzero_ps(), 0));
-
- if(!((WUVW_mask == 7) || (WUVW_mask == 0)) && ((WUVW_mask | WUVW_zero) != 7)) {
- return false;
- }
+ const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
#else
const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
- const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
- const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
- const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
-
- const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
- const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
- const float C_kx = IDX(C, kx), C_ky = IDX(C, ky), C_kz = IDX(C, kz);
-
- /* Perform shear and scale of vertices. */
- const float Ax = A_kx - Sx * A_kz;
- const float Ay = A_ky - Sy * A_kz;
- const float Bx = B_kx - Sx * B_kz;
- const float By = B_ky - Sy * B_kz;
- const float Cx = C_kx - Sx * C_kz;
- const float Cy = C_ky - Sy * C_kz;
-
- /* Calculate scaled barycentric coordinates. */
- float U = Cx * By - Cy * Bx;
- float V = Ax * Cy - Ay * Cx;
- float W = Bx * Ay - By * Ax;
- if((U < 0.0f || V < 0.0f || W < 0.0f) &&
- (U > 0.0f || V > 0.0f || W > 0.0f))
- {
- return false;
- }
#endif
-
- /* Calculate determinant. */
- float det = U + V + W;
- if(UNLIKELY(det == 0.0f)) {
- return false;
- }
-
- /* Calculate scaled z-coordinates of vertices and use them to calculate
- * the hit distance.
- */
- const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz;
- const int sign_det = (__float_as_int(det) & 0x80000000);
- const float sign_T = xor_signmask(T, sign_det);
- if((sign_T < 0.0f) ||
- (sign_T > isect->t * xor_signmask(det, sign_det)))
+ float t, u, v;
+ if(ray_triangle_intersect(isect_precalc,
+ P, isect->t,
+#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+ ssef_verts,
+#else
+ float4_to_float3(tri_a),
+ float4_to_float3(tri_b),
+ float4_to_float3(tri_c),
+#endif
+ &u, &v, &t))
{
- return false;
- }
-
#ifdef __VISIBILITY_FLAG__
- /* visibility flag test. we do it here under the assumption
- * that most triangles are culled by node flags */
- if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
+ /* Visibility flag test. we do it here under the assumption
+ * that most triangles are culled by node flags.
+ */
+ if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
#endif
- {
-#ifdef __KERNEL_CUDA__
- if(A == B && B == C) {
- return false;
+ {
+ isect->prim = prim_addr;
+ isect->object = object;
+ isect->type = PRIMITIVE_TRIANGLE;
+ isect->u = u;
+ isect->v = v;
+ isect->t = t;
+ return true;
}
-#endif
- /* Normalize U, V, W, and T. */
- const float inv_det = 1.0f / det;
- isect->prim = prim_addr;
- isect->object = object;
- isect->type = PRIMITIVE_TRIANGLE;
- isect->u = U * inv_det;
- isect->v = V * inv_det;
- isect->t = T * inv_det;
- return true;
}
return false;
}
@@ -260,7 +86,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
#ifdef __SUBSURFACE__
ccl_device_inline void triangle_intersect_subsurface(
KernelGlobals *kg,
- const IsectPrecalc *isect_precalc,
+ const TriangleIsectPrecalc *isect_precalc,
SubsurfaceIntersection *ss_isect,
float3 P,
int object,
@@ -269,129 +95,30 @@ ccl_device_inline void triangle_intersect_subsurface(
uint *lcg_state,
int max_hits)
{
- const int kx = isect_precalc->kx;
- const int ky = isect_precalc->ky;
- const int kz = isect_precalc->kz;
- const float Sx = isect_precalc->Sx;
- const float Sy = isect_precalc->Sy;
- const float Sz = isect_precalc->Sz;
-
- /* Calculate vertices relative to ray origin. */
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
- const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
- tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
- tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
- const avxf avxf_P(P.m128, P.m128);
-
- const avxf tri_ab = kernel_tex_fetch_avxf(__prim_tri_verts, tri_vindex + 0);
- const avxf tri_bc = kernel_tex_fetch_avxf(__prim_tri_verts, tri_vindex + 1);
-
- const avxf AB = tri_ab - avxf_P;
- const avxf BC = tri_bc - avxf_P;
-
- const __m256i permuteMask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx);
-
- const avxf AB_k = shuffle(AB, permuteMask);
- const avxf BC_k = shuffle(BC, permuteMask);
-
- /* Akz, Akz, Bkz, Bkz, Bkz, Bkz, Ckz, Ckz */
- const avxf ABBC_kz = shuffle<2>(AB_k, BC_k);
-
- /* Akx, Aky, Bkx, Bky, Bkx,Bky, Ckx, Cky */
- const avxf ABBC_kxy = shuffle<0,1,0,1>(AB_k, BC_k);
-
- const avxf Sxy(Sy, Sx, Sy, Sx);
-
- /* Ax, Ay, Bx, By, Bx, By, Cx, Cy */
- const avxf ABBC_xy = nmadd(ABBC_kz, Sxy, ABBC_kxy);
-
- float ABBC_kz_array[8];
- _mm256_storeu_ps((float*)&ABBC_kz_array, ABBC_kz);
-
- const float A_kz = ABBC_kz_array[0];
- const float B_kz = ABBC_kz_array[2];
- const float C_kz = ABBC_kz_array[6];
-
- /* By, Bx, Cy, Cx, By, Bx, Ay, Ax */
- const avxf BCBA_yx = permute<3,2,7,6,3,2,1,0>(ABBC_xy);
-
- const avxf negMask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000);
-
- /* W U V
- * (AxBy-AyBx) (BxCy-ByCx) XX XX (BxBy-ByBx) (CxAy-CyAx) XX XX
- */
- const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, negMask /* Dont care */);
-
- const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ negMask;
-
- /* Calculate scaled barycentric coordinates. */
- float WUVW_array[4];
- _mm_storeu_ps((float*)&WUVW_array, _mm256_castps256_ps128 (WUVWnegWUVW));
-
- const float W = WUVW_array[0];
- const float U = WUVW_array[1];
- const float V = WUVW_array[2];
-
- const int WUVW_mask = 0x7 & _mm256_movemask_ps(WUVWnegWUVW);
- const int WUVW_zero = 0x7 & _mm256_movemask_ps(_mm256_cmp_ps(WUVWnegWUVW,
- _mm256_setzero_ps(), 0));
-
- if(!((WUVW_mask == 7) || (WUVW_mask == 0)) && ((WUVW_mask | WUVW_zero) != 7)) {
- return;
- }
+ const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
#else
- const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
- const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
- const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
-
- const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
- const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
- const float C_kx = IDX(C, kx), C_ky = IDX(C, ky), C_kz = IDX(C, kz);
-
- /* Perform shear and scale of vertices. */
- const float Ax = A_kx - Sx * A_kz;
- const float Ay = A_ky - Sy * A_kz;
- const float Bx = B_kx - Sx * B_kz;
- const float By = B_ky - Sy * B_kz;
- const float Cx = C_kx - Sx * C_kz;
- const float Cy = C_ky - Sy * C_kz;
-
- /* Calculate scaled barycentric coordinates. */
- float U = Cx * By - Cy * Bx;
- float V = Ax * Cy - Ay * Cx;
- float W = Bx * Ay - By * Ax;
-
- if((U < 0.0f || V < 0.0f || W < 0.0f) &&
- (U > 0.0f || V > 0.0f || W > 0.0f))
- {
- return;
- }
+ const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)),
+ tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)),
+ tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2));
#endif
-
- /* Calculate determinant. */
- float det = U + V + W;
- if(UNLIKELY(det == 0.0f)) {
- return;
- }
-
- /* Calculate scaled z−coordinates of vertices and use them to calculate
- * the hit distance.
- */
- const int sign_det = (__float_as_int(det) & 0x80000000);
- const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz;
- const float sign_T = xor_signmask(T, sign_det);
- if((sign_T < 0.0f) ||
- (sign_T > tmax * xor_signmask(det, sign_det)))
+ float t, u, v;
+ if(!ray_triangle_intersect(isect_precalc,
+ P, tmax,
+#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+ ssef_verts,
+#else
+ tri_a,
+ tri_b,
+ tri_c,
+#endif
+ &u, &v, &t))
{
return;
}
- /* Normalize U, V, W, and T. */
- const float inv_det = 1.0f / det;
-
- const float t = T * inv_det;
for(int i = min(max_hits, ss_isect->num_hits) - 1; i >= 0; --i) {
if(ss_isect->hits[i].t == t) {
return;
@@ -418,16 +145,18 @@ ccl_device_inline void triangle_intersect_subsurface(
isect->prim = prim_addr;
isect->object = object;
isect->type = PRIMITIVE_TRIANGLE;
- isect->u = U * inv_det;
- isect->v = V * inv_det;
+ isect->u = u;
+ isect->v = v;
isect->t = t;
/* Record geometric normal. */
- /* TODO(sergey): Use float4_to_float3() on just an edges. */
- const float3 v0 = float4_to_float3(tri_a);
- const float3 v1 = float4_to_float3(tri_b);
- const float3 v2 = float4_to_float3(tri_c);
- ss_isect->Ng[hit] = normalize(cross(v1 - v0, v2 - v0));
+ /* TODO(sergey): Check whether it's faster to re-use ssef verts. */
+#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+ const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)),
+ tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)),
+ tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2));
+#endif
+ ss_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
}
#endif
@@ -570,6 +299,4 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
return P;
}
-#undef IDX
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 6c3ee6b8098..c589c112cc2 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -399,7 +399,7 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi
float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
/* Reject invalid value */
- if(!isfinite(sum)) {
+ if(!isfinite_safe(sum)) {
kernel_assert(!"Non-finite sum in path_radiance_clamp_and_sum!");
L_sum = make_float3(0.0f, 0.0f, 0.0f);
@@ -468,7 +468,7 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi
/* Reject invalid value */
float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
- if(!isfinite(sum)) {
+ if(!isfinite_safe(sum)) {
kernel_assert(!"Non-finite final sum in path_radiance_clamp_and_sum!");
L_sum = make_float3(0.0f, 0.0f, 0.0f);
}
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index e347a1eca18..96276f313f8 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -87,9 +87,9 @@ template<typename T> struct texture {
ccl_always_inline avxf fetch_avxf(const int index)
{
kernel_assert(index >= 0 && (index+1) < width);
- ssef *ssefData = (ssef*)data;
- ssef *ssefNodeData = &ssefData[index];
- return _mm256_loadu_ps((float *)ssefNodeData);
+ ssef *ssef_data = (ssef*)data;
+ ssef *ssef_node_data = &ssef_data[index];
+ return _mm256_loadu_ps((float *)ssef_node_data);
}
#endif
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index bc2d9604122..cf14a159e47 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -157,7 +157,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
return false;
if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
- float probability = max3(bsdf_eval_sum(eval)) * kernel_data.integrator.light_inv_rr_threshold;
+ float probability = max3(fabs(bsdf_eval_sum(eval))) * kernel_data.integrator.light_inv_rr_threshold;
if(probability < 1.0f) {
if(rand_terminate >= probability) {
return false;
diff --git a/intern/cycles/kernel/kernel_math.h b/intern/cycles/kernel/kernel_math.h
index 9bee5603474..7653fa53247 100644
--- a/intern/cycles/kernel/kernel_math.h
+++ b/intern/cycles/kernel/kernel_math.h
@@ -20,6 +20,7 @@
#include "util_color.h"
#include "util_math.h"
#include "util_math_fast.h"
+#include "util_math_intersect.h"
#include "util_texture.h"
#include "util_transform.h"
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index fe88ba4ff05..64d240d779d 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -293,7 +293,7 @@ ccl_device_inline int subsurface_scatter_multi_intersect(
/* intersect with the same object. if multiple intersections are found it
* will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
scene_intersect_subsurface(kg,
- ray,
+ *ray,
ss_isect,
sd->object,
lcg_state,
@@ -448,7 +448,7 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathS
/* intersect with the same object. if multiple intersections are
* found it will randomly pick one of them */
SubsurfaceIntersection ss_isect;
- scene_intersect_subsurface(kg, &ray, &ss_isect, sd->object, lcg_state, 1);
+ scene_intersect_subsurface(kg, ray, &ss_isect, sd->object, lcg_state, 1);
/* evaluate bssrdf */
if(ss_isect.num_hits > 0) {
diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h
index d159c801810..dfc44dbbf4a 100644
--- a/intern/cycles/render/nodes.h
+++ b/intern/cycles/render/nodes.h
@@ -324,7 +324,7 @@ private:
class BsdfNode : public ShaderNode {
public:
explicit BsdfNode(const NodeType *node_type);
- SHADER_NODE_BASE_CLASS(BsdfNode);
+ SHADER_NODE_BASE_CLASS(BsdfNode)
bool has_spatial_varying() { return true; }
void compile(SVMCompiler& compiler, ShaderInput *param1, ShaderInput *param2, ShaderInput *param3 = NULL, ShaderInput *param4 = NULL);
@@ -641,7 +641,7 @@ public:
class MixClosureWeightNode : public ShaderNode {
public:
- SHADER_NODE_CLASS(MixClosureWeightNode);
+ SHADER_NODE_CLASS(MixClosureWeightNode)
float weight;
float fac;
@@ -887,7 +887,7 @@ public:
class CurvesNode : public ShaderNode {
public:
explicit CurvesNode(const NodeType *node_type);
- SHADER_NODE_BASE_CLASS(CurvesNode);
+ SHADER_NODE_BASE_CLASS(CurvesNode)
virtual int get_group() { return NODE_GROUP_LEVEL_3; }
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 0c7bd271371..e7050f9ef37 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -833,7 +833,7 @@ void Session::update_status_time(bool show_pause, bool show_done)
int progressive_sample = tile_manager.state.sample;
int num_samples = tile_manager.get_num_effective_samples();
- int tile = tile_manager.state.num_rendered_tiles;
+ int tile = progress.get_finished_tiles();
int num_tiles = tile_manager.state.num_tiles;
/* update status */
@@ -841,7 +841,7 @@ void Session::update_status_time(bool show_pause, bool show_done)
if(!params.progressive) {
const bool is_cpu = params.device.type == DEVICE_CPU;
- const bool is_last_tile = (progress.get_finished_tiles() + 1) == num_tiles;
+ const bool is_last_tile = (tile + 1) == num_tiles;
substatus = string_printf("Path Tracing Tile %d/%d", tile, num_tiles);
diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp
index a493c3fa1cd..76d0b9a57c2 100644
--- a/intern/cycles/render/tile.cpp
+++ b/intern/cycles/render/tile.cpp
@@ -131,7 +131,6 @@ void TileManager::reset(BufferParams& params_, int num_samples_)
state.buffer = BufferParams();
state.sample = range_start_sample - 1;
state.num_tiles = 0;
- state.num_rendered_tiles = 0;
state.num_samples = 0;
state.resolution_divider = get_divider(params.width, params.height, start_resolution);
state.tiles.clear();
@@ -343,7 +342,6 @@ bool TileManager::next_tile(Tile& tile, int device)
tile = Tile(state.tiles[logical_device].front());
state.tiles[logical_device].pop_front();
- state.num_rendered_tiles++;
return true;
}
diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h
index 5d92ebac355..85fc89bc397 100644
--- a/intern/cycles/render/tile.h
+++ b/intern/cycles/render/tile.h
@@ -63,7 +63,6 @@ public:
int num_samples;
int resolution_divider;
int num_tiles;
- int num_rendered_tiles;
/* Total samples over all pixels: Generally num_samples*num_pixels,
* but can be higher due to the initial resolution division for previews. */
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index d8abf671bd6..6bd47120482 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -52,6 +52,7 @@ set(SRC_HEADERS
util_math.h
util_math_cdf.h
util_math_fast.h
+ util_math_intersect.h
util_md5.h
util_opengl.h
util_optimization.h
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 27d4ae510c7..2af0e56325f 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -1241,19 +1241,6 @@ ccl_device_inline float __uint_as_float(uint i)
return u.f;
}
-/* Versions of functions which are safe for fast math. */
-ccl_device_inline bool isnan_safe(float f)
-{
- unsigned int x = __float_as_uint(f);
- return (x << 1) > 0xff000000u;
-}
-
-ccl_device_inline bool isfinite_safe(float f)
-{
- /* By IEEE 754 rule, 2*Inf equals Inf */
- unsigned int x = __float_as_uint(f);
- return (f == f) && (x == 0 || (f != 2.0f*f));
-}
/* Interpolation */
@@ -1271,6 +1258,20 @@ ccl_device_inline float triangle_area(const float3& v1, const float3& v2, const
#endif
+/* Versions of functions which are safe for fast math. */
+ccl_device_inline bool isnan_safe(float f)
+{
+ unsigned int x = __float_as_uint(f);
+ return (x << 1) > 0xff000000u;
+}
+
+ccl_device_inline bool isfinite_safe(float f)
+{
+ /* By IEEE 754 rule, 2*Inf equals Inf */
+ unsigned int x = __float_as_uint(f);
+ return (f == f) && (x == 0 || (f != 2.0f*f)) && !((x << 1) > 0xff000000u);
+}
+
/* Orthonormal vectors */
ccl_device_inline void make_orthonormals(const float3 N, float3 *a, float3 *b)
@@ -1451,181 +1452,9 @@ ccl_device_inline float beta(float x, float y)
#endif
}
-/* Ray Intersection */
-
-ccl_device bool ray_sphere_intersect(
- float3 ray_P, float3 ray_D, float ray_t,
- float3 sphere_P, float sphere_radius,
- float3 *isect_P, float *isect_t)
+ccl_device_inline float xor_signmask(float x, int y)
{
- float3 d = sphere_P - ray_P;
- float radiussq = sphere_radius*sphere_radius;
- float tsq = dot(d, d);
-
- if(tsq > radiussq) { /* ray origin outside sphere */
- float tp = dot(d, ray_D);
-
- if(tp < 0.0f) /* dir points away from sphere */
- return false;
-
- float dsq = tsq - tp*tp; /* pythagoras */
-
- if(dsq > radiussq) /* closest point on ray outside sphere */
- return false;
-
- float t = tp - sqrtf(radiussq - dsq); /* pythagoras */
-
- if(t < ray_t) {
- *isect_t = t;
- *isect_P = ray_P + ray_D*t;
- return true;
- }
- }
-
- return false;
-}
-
-ccl_device bool ray_aligned_disk_intersect(
- float3 ray_P, float3 ray_D, float ray_t,
- float3 disk_P, float disk_radius,
- float3 *isect_P, float *isect_t)
-{
- /* aligned disk normal */
- float disk_t;
- float3 disk_N = normalize_len(ray_P - disk_P, &disk_t);
- float div = dot(ray_D, disk_N);
-
- if(UNLIKELY(div == 0.0f))
- return false;
-
- /* compute t to intersection point */
- float t = -disk_t/div;
- if(t < 0.0f || t > ray_t)
- return false;
-
- /* test if within radius */
- float3 P = ray_P + ray_D*t;
- if(len_squared(P - disk_P) > disk_radius*disk_radius)
- return false;
-
- *isect_P = P;
- *isect_t = t;
-
- return true;
-}
-
-ccl_device bool ray_triangle_intersect(
- float3 ray_P, float3 ray_D, float ray_t,
- float3 v0, float3 v1, float3 v2,
- float3 *isect_P, float *isect_t)
-{
- /* Calculate intersection */
- float3 e1 = v1 - v0;
- float3 e2 = v2 - v0;
- float3 s1 = cross(ray_D, e2);
-
- const float divisor = dot(s1, e1);
- if(UNLIKELY(divisor == 0.0f))
- return false;
-
- const float invdivisor = 1.0f/divisor;
-
- /* compute first barycentric coordinate */
- const float3 d = ray_P - v0;
- const float u = dot(d, s1)*invdivisor;
- if(u < 0.0f)
- return false;
-
- /* Compute second barycentric coordinate */
- const float3 s2 = cross(d, e1);
- const float v = dot(ray_D, s2)*invdivisor;
- if(v < 0.0f)
- return false;
-
- const float b0 = 1.0f - u - v;
- if(b0 < 0.0f)
- return false;
-
- /* compute t to intersection point */
- const float t = dot(e2, s2)*invdivisor;
- if(t < 0.0f || t > ray_t)
- return false;
-
- *isect_t = t;
- *isect_P = ray_P + ray_D*t;
-
- return true;
-}
-
-ccl_device_inline bool ray_triangle_intersect_uv(
- float3 ray_P, float3 ray_D, float ray_t,
- float3 v0, float3 v1, float3 v2,
- float *isect_u, float *isect_v, float *isect_t)
-{
- /* Calculate intersection */
- float3 e1 = v1 - v0;
- float3 e2 = v2 - v0;
- float3 s1 = cross(ray_D, e2);
-
- const float divisor = dot(s1, e1);
- if(UNLIKELY(divisor == 0.0f))
- return false;
-
- const float invdivisor = 1.0f/divisor;
-
- /* compute first barycentric coordinate */
- const float3 d = ray_P - v0;
- const float u = dot(d, s1)*invdivisor;
- if(u < 0.0f)
- return false;
-
- /* Compute second barycentric coordinate */
- const float3 s2 = cross(d, e1);
- const float v = dot(ray_D, s2)*invdivisor;
- if(v < 0.0f)
- return false;
-
- const float b0 = 1.0f - u - v;
- if(b0 < 0.0f)
- return false;
-
- /* compute t to intersection point */
- const float t = dot(e2, s2)*invdivisor;
- if(t < 0.0f || t > ray_t)
- return false;
-
- *isect_u = u;
- *isect_v = v;
- *isect_t = t;
-
- return true;
-}
-
-ccl_device bool ray_quad_intersect(float3 ray_P, float3 ray_D, float ray_mint, float ray_maxt,
- float3 quad_P, float3 quad_u, float3 quad_v, float3 quad_n,
- float3 *isect_P, float *isect_t, float *isect_u, float *isect_v)
-{
- float t = -(dot(ray_P, quad_n) - dot(quad_P, quad_n)) / dot(ray_D, quad_n);
- if(t < ray_mint || t > ray_maxt)
- return false;
-
- float3 hit = ray_P + t*ray_D;
- float3 inplane = hit - quad_P;
-
- float u = dot(inplane, quad_u) / dot(quad_u, quad_u) + 0.5f;
- if(u < 0.0f || u > 1.0f)
- return false;
-
- float v = dot(inplane, quad_v) / dot(quad_v, quad_v) + 0.5f;
- if(v < 0.0f || v > 1.0f)
- return false;
-
- if(isect_P) *isect_P = hit;
- if(isect_t) *isect_t = t;
- if(isect_u) *isect_u = u;
- if(isect_v) *isect_v = v;
-
- return true;
+ return __int_as_float(__float_as_int(x) ^ y);
}
/* projections */
@@ -1690,4 +1519,3 @@ ccl_device_inline int util_max_axis(float3 vec)
CCL_NAMESPACE_END
#endif /* __UTIL_MATH_H__ */
-
diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h
new file mode 100644
index 00000000000..9e0587e1afb
--- /dev/null
+++ b/intern/cycles/util/util_math_intersect.h
@@ -0,0 +1,326 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_INTERSECT_H__
+#define __UTIL_MATH_INTERSECT_H__
+
+CCL_NAMESPACE_BEGIN
+
+/* Ray Intersection */
+
+ccl_device bool ray_sphere_intersect(
+ float3 ray_P, float3 ray_D, float ray_t,
+ float3 sphere_P, float sphere_radius,
+ float3 *isect_P, float *isect_t)
+{
+ const float3 d = sphere_P - ray_P;
+ const float radiussq = sphere_radius*sphere_radius;
+ const float tsq = dot(d, d);
+
+ if(tsq > radiussq) {
+ /* Ray origin outside sphere. */
+ const float tp = dot(d, ray_D);
+ if(tp < 0.0f) {
+ /* Ray points away from sphere. */
+ return false;
+ }
+ const float dsq = tsq - tp*tp; /* pythagoras */
+ if(dsq > radiussq) {
+ /* Closest point on ray outside sphere. */
+ return false;
+ }
+ const float t = tp - sqrtf(radiussq - dsq); /* pythagoras */
+ if(t < ray_t) {
+ *isect_t = t;
+ *isect_P = ray_P + ray_D*t;
+ return true;
+ }
+ }
+ return false;
+}
+
+ccl_device bool ray_aligned_disk_intersect(
+ float3 ray_P, float3 ray_D, float ray_t,
+ float3 disk_P, float disk_radius,
+ float3 *isect_P, float *isect_t)
+{
+ /* Aligned disk normal. */
+ float disk_t;
+ const float3 disk_N = normalize_len(ray_P - disk_P, &disk_t);
+ const float div = dot(ray_D, disk_N);
+ if(UNLIKELY(div == 0.0f)) {
+ return false;
+ }
+ /* Compute t to intersection point. */
+ const float t = -disk_t/div;
+ if(t < 0.0f || t > ray_t) {
+ return false;
+ }
+ /* Test if within radius. */
+ float3 P = ray_P + ray_D*t;
+ if(len_squared(P - disk_P) > disk_radius*disk_radius) {
+ return false;
+ }
+ *isect_P = P;
+ *isect_t = t;
+ return true;
+}
+
+/* Optimized watertight ray-triangle intersection.
+ *
+ * Sven Woop
+ * Watertight Ray/Triangle Intersection
+ *
+ * http://jcgt.org/published/0002/01/05/paper.pdf
+ */
+
+/* Precalculated data for the ray->tri intersection. */
+typedef struct TriangleIsectPrecalc {
+ /* Maximal dimension kz, and orthogonal dimensions. */
+ int kx, ky, kz;
+
+ /* Shear constants. */
+ float Sx, Sy, Sz;
+} TriangleIsectPrecalc;
+
+/* Workaround stupidness of CUDA/OpenCL which doesn't allow to access indexed
+ * component of float3 value.
+ */
+#ifdef __KERNEL_GPU__
+# define IDX(vec, idx) \
+ ((idx == 0) ? ((vec).x) : ( (idx == 1) ? ((vec).y) : ((vec).z) ))
+#else
+# define IDX(vec, idx) ((vec)[idx])
+#endif
+
+#if (defined(__KERNEL_OPENCL_APPLE__)) || \
+ (defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86)))
+ccl_device_noinline
+#else
+ccl_device_inline
+#endif
+void ray_triangle_intersect_precalc(float3 dir,
+ TriangleIsectPrecalc *isect_precalc)
+{
+ /* Calculate dimension where the ray direction is maximal. */
+#ifndef __KERNEL_SSE__
+ int kz = util_max_axis(make_float3(fabsf(dir.x),
+ fabsf(dir.y),
+ fabsf(dir.z)));
+ int kx = kz + 1; if(kx == 3) kx = 0;
+ int ky = kx + 1; if(ky == 3) ky = 0;
+#else
+ int kx, ky, kz;
+ /* Avoiding mispredicted branch on direction. */
+ kz = util_max_axis(fabs(dir));
+ static const char inc_xaxis[] = {1, 2, 0, 55};
+ static const char inc_yaxis[] = {2, 0, 1, 55};
+ kx = inc_xaxis[kz];
+ ky = inc_yaxis[kz];
+#endif
+
+ float dir_kz = IDX(dir, kz);
+
+ /* Swap kx and ky dimensions to preserve winding direction of triangles. */
+ if(dir_kz < 0.0f) {
+ int tmp = kx;
+ kx = ky;
+ ky = tmp;
+ }
+
+ /* Calculate the shear constants. */
+ float inv_dir_z = 1.0f / dir_kz;
+ isect_precalc->Sx = IDX(dir, kx) * inv_dir_z;
+ isect_precalc->Sy = IDX(dir, ky) * inv_dir_z;
+ isect_precalc->Sz = inv_dir_z;
+
+ /* Store the dimensions. */
+ isect_precalc->kx = kx;
+ isect_precalc->ky = ky;
+ isect_precalc->kz = kz;
+}
+
+ccl_device_forceinline bool ray_triangle_intersect(
+ const TriangleIsectPrecalc *isect_precalc,
+ float3 ray_P, float ray_t,
+#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+ const ssef *ssef_verts,
+#else
+ const float3 tri_a, const float3 tri_b, const float3 tri_c,
+#endif
+ float *isect_u, float *isect_v, float *isect_t)
+{
+ const int kx = isect_precalc->kx;
+ const int ky = isect_precalc->ky;
+ const int kz = isect_precalc->kz;
+ const float Sx = isect_precalc->Sx;
+ const float Sy = isect_precalc->Sy;
+ const float Sz = isect_precalc->Sz;
+
+#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+ const avxf avxf_P(ray_P.m128, ray_P.m128);
+ const avxf tri_ab(_mm256_loadu_ps((float *)(ssef_verts)));
+ const avxf tri_bc(_mm256_loadu_ps((float *)(ssef_verts + 1)));
+
+ const avxf AB = tri_ab - avxf_P;
+ const avxf BC = tri_bc - avxf_P;
+
+ const __m256i permute_mask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx);
+
+ const avxf AB_k = shuffle(AB, permute_mask);
+ const avxf BC_k = shuffle(BC, permute_mask);
+
+ /* Akz, Akz, Bkz, Bkz, Bkz, Bkz, Ckz, Ckz */
+ const avxf ABBC_kz = shuffle<2>(AB_k, BC_k);
+
+ /* Akx, Aky, Bkx, Bky, Bkx,Bky, Ckx, Cky */
+ const avxf ABBC_kxy = shuffle<0,1,0,1>(AB_k, BC_k);
+
+ const avxf Sxy(Sy, Sx, Sy, Sx);
+
+ /* Ax, Ay, Bx, By, Bx, By, Cx, Cy */
+ const avxf ABBC_xy = nmadd(ABBC_kz, Sxy, ABBC_kxy);
+
+ float ABBC_kz_array[8];
+ _mm256_storeu_ps((float*)&ABBC_kz_array, ABBC_kz);
+
+ const float A_kz = ABBC_kz_array[0];
+ const float B_kz = ABBC_kz_array[2];
+ const float C_kz = ABBC_kz_array[6];
+
+ /* By, Bx, Cy, Cx, By, Bx, Ay, Ax */
+ const avxf BCBA_yx = permute<3,2,7,6,3,2,1,0>(ABBC_xy);
+
+ const avxf neg_mask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000);
+
+ /* W U V
+ * (AxBy-AyBx) (BxCy-ByCx) XX XX (BxBy-ByBx) (CxAy-CyAx) XX XX
+ */
+ const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, neg_mask /* Dont care */);
+
+ const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ neg_mask;
+
+ /* Calculate scaled barycentric coordinates. */
+ float WUVW_array[4];
+ _mm_storeu_ps((float*)&WUVW_array, _mm256_castps256_ps128 (WUVWnegWUVW));
+
+ const float W = WUVW_array[0];
+ const float U = WUVW_array[1];
+ const float V = WUVW_array[2];
+
+ const int WUVW_mask = 0x7 & _mm256_movemask_ps(WUVWnegWUVW);
+ const int WUVW_zero = 0x7 & _mm256_movemask_ps(_mm256_cmp_ps(WUVWnegWUVW,
+ _mm256_setzero_ps(), 0));
+
+ if(!((WUVW_mask == 7) || (WUVW_mask == 0)) && ((WUVW_mask | WUVW_zero) != 7)) {
+ return false;
+ }
+#else
+ /* Calculate vertices relative to ray origin. */
+ const float3 A = make_float3(tri_a.x - ray_P.x, tri_a.y - ray_P.y, tri_a.z - ray_P.z);
+ const float3 B = make_float3(tri_b.x - ray_P.x, tri_b.y - ray_P.y, tri_b.z - ray_P.z);
+ const float3 C = make_float3(tri_c.x - ray_P.x, tri_c.y - ray_P.y, tri_c.z - ray_P.z);
+
+ const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
+ const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
+ const float C_kx = IDX(C, kx), C_ky = IDX(C, ky), C_kz = IDX(C, kz);
+
+ /* Perform shear and scale of vertices. */
+ const float Ax = A_kx - Sx * A_kz;
+ const float Ay = A_ky - Sy * A_kz;
+ const float Bx = B_kx - Sx * B_kz;
+ const float By = B_ky - Sy * B_kz;
+ const float Cx = C_kx - Sx * C_kz;
+ const float Cy = C_ky - Sy * C_kz;
+
+ /* Calculate scaled barycentric coordinates. */
+ float U = Cx * By - Cy * Bx;
+ float V = Ax * Cy - Ay * Cx;
+ float W = Bx * Ay - By * Ax;
+ if((U < 0.0f || V < 0.0f || W < 0.0f) &&
+ (U > 0.0f || V > 0.0f || W > 0.0f))
+ {
+ return false;
+ }
+#endif
+
+ /* Calculate determinant. */
+ float det = U + V + W;
+ if(UNLIKELY(det == 0.0f)) {
+ return false;
+ }
+
+ /* Calculate scaled z-coordinates of vertices and use them to calculate
+ * the hit distance.
+ */
+ const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz;
+ const int sign_det = (__float_as_int(det) & 0x80000000);
+ const float sign_T = xor_signmask(T, sign_det);
+ if((sign_T < 0.0f) ||
+ (sign_T > ray_t * xor_signmask(det, sign_det)))
+ {
+ return false;
+ }
+
+ /* Workaround precision error on CUDA. */
+#ifdef __KERNEL_CUDA__
+ if(A == B && B == C) {
+ return false;
+ }
+#endif
+ const float inv_det = 1.0f / det;
+ *isect_u = U * inv_det;
+ *isect_v = V * inv_det;
+ *isect_t = T * inv_det;
+ return true;
+}
+
+#undef IDX
+
+ccl_device bool ray_quad_intersect(float3 ray_P, float3 ray_D,
+ float ray_mint, float ray_maxt,
+ float3 quad_P,
+ float3 quad_u, float3 quad_v, float3 quad_n,
+ float3 *isect_P, float *isect_t,
+ float *isect_u, float *isect_v)
+{
+ /* Perform intersection test. */
+ float t = -(dot(ray_P, quad_n) - dot(quad_P, quad_n)) / dot(ray_D, quad_n);
+ if(t < ray_mint || t > ray_maxt) {
+ return false;
+ }
+ const float3 hit = ray_P + t*ray_D;
+ const float3 inplane = hit - quad_P;
+ const float u = dot(inplane, quad_u) / dot(quad_u, quad_u) + 0.5f;
+ if(u < 0.0f || u > 1.0f) {
+ return false;
+ }
+ const float v = dot(inplane, quad_v) / dot(quad_v, quad_v) + 0.5f;
+ if(v < 0.0f || v > 1.0f) {
+ return false;
+ }
+ /* Store the result. */
+ /* TODO(sergey): Check whether we can avoid some checks here. */
+ if(isect_P != NULL) *isect_P = hit;
+ if(isect_t != NULL) *isect_t = t;
+ if(isect_u != NULL) *isect_u = u;
+ if(isect_v != NULL) *isect_v = v;
+ return true;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_INTERSECT_H__ */