From fa9175ff02acffd02679398e17732f3832d3fc28 Mon Sep 17 00:00:00 2001 From: Stefan Werner Date: Thu, 8 Mar 2018 00:15:41 +0100 Subject: Code refactor: use KernelLight instead of float4 arrays. Original patch by Stefan with modifications by Brecht. --- intern/cycles/kernel/geom/geom_object.h | 18 ++-- intern/cycles/kernel/kernel_light.h | 148 +++++++++++++++++--------------- intern/cycles/kernel/kernel_textures.h | 4 +- intern/cycles/kernel/kernel_types.h | 56 +++++++++++- 4 files changed, 145 insertions(+), 81 deletions(-) (limited to 'intern/cycles/kernel') diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index 6bb4d134b99..9d833b77bdb 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -60,12 +60,17 @@ ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object ccl_device_inline Transform lamp_fetch_transform(KernelGlobals *kg, int lamp, bool inverse) { - int offset = lamp*LIGHT_SIZE + (inverse? 8 : 5); - Transform tfm; - tfm.x = kernel_tex_fetch(__light_data, offset + 0); - tfm.y = kernel_tex_fetch(__light_data, offset + 1); - tfm.z = kernel_tex_fetch(__light_data, offset + 2); + if(inverse) { + tfm.x = kernel_tex_fetch(__lights, lamp).itfm[0]; + tfm.y = kernel_tex_fetch(__lights, lamp).itfm[1]; + tfm.z = kernel_tex_fetch(__lights, lamp).itfm[2]; + } + else { + tfm.x = kernel_tex_fetch(__lights, lamp).tfm[0]; + tfm.y = kernel_tex_fetch(__lights, lamp).tfm[1]; + tfm.z = kernel_tex_fetch(__lights, lamp).tfm[2]; + } tfm.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f); return tfm; @@ -240,8 +245,7 @@ ccl_device_inline float lamp_random_number(KernelGlobals *kg, int lamp) if(lamp == LAMP_NONE) return 0.0f; - float4 f = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 4); - return f.y; + return kernel_tex_fetch(__lights, lamp).random; } /* Per object random number for shader variation */ diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index dfa3150dc92..aaf7a7abdd4 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -255,11 +255,11 @@ ccl_device_inline bool background_portal_data_fetch_and_check_side(KernelGlobals float3 *lightpos, float3 *dir) { - float4 data0 = kernel_tex_fetch(__light_data, (index + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 0); - float4 data3 = kernel_tex_fetch(__light_data, (index + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 3); + int portal = kernel_data.integrator.portal_offset + index; + const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal); - *lightpos = make_float3(data0.y, data0.z, data0.w); - *dir = make_float3(data3.y, data3.z, data3.w); + *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]); + *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]); /* Check whether portal is on the right side. */ if(dot(*dir, P - *lightpos) > 1e-4f) @@ -291,11 +291,10 @@ ccl_device_inline float background_portal_pdf(KernelGlobals *kg, } num_possible++; - float4 data1 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 1); - float4 data2 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 2); - - float3 axisu = make_float3(data1.y, data1.z, data1.w); - float3 axisv = make_float3(data2.y, data2.z, data2.w); + int portal = kernel_data.integrator.portal_offset + p; + const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal); + float3 axisu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]); + float3 axisv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]); if(!ray_quad_intersect(P, direction, 1e-4f, FLT_MAX, lightpos, axisu, axisv, dir, NULL, NULL, NULL, NULL)) continue; @@ -346,10 +345,10 @@ ccl_device float3 background_portal_sample(KernelGlobals *kg, if(portal == 0) { /* p is the portal to be sampled. */ - float4 data1 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 1); - float4 data2 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 2); - float3 axisu = make_float3(data1.y, data1.z, data1.w); - float3 axisv = make_float3(data2.y, data2.z, data2.w); + int portal = kernel_data.integrator.portal_offset + p; + const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal); + float3 axisu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]); + float3 axisv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]); *pdf = area_light_sample(P, &lightpos, axisu, axisv, @@ -479,14 +478,10 @@ ccl_device float3 sphere_light_sample(float3 P, float3 center, float radius, flo return disk_light_sample(normalize(P - center), randu, randv)*radius; } -ccl_device float spot_light_attenuation(float4 data1, float4 data2, LightSample *ls) +ccl_device float spot_light_attenuation(float3 dir, float spot_angle, float spot_smooth, LightSample *ls) { - float3 dir = make_float3(data2.y, data2.z, data2.w); float3 I = ls->Ng; - float spot_angle = data1.w; - float spot_smooth = data2.x; - float attenuation = dot(dir, I); if(attenuation <= spot_angle) { @@ -518,12 +513,10 @@ ccl_device_inline bool lamp_light_sample(KernelGlobals *kg, float3 P, LightSample *ls) { - float4 data0 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 0); - float4 data1 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 1); - - LightType type = (LightType)__float_as_int(data0.x); + const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + LightType type = (LightType)klight->type; ls->type = type; - ls->shader = __float_as_int(data1.x); + ls->shader = klight->shader_id; ls->object = PRIM_NONE; ls->prim = PRIM_NONE; ls->lamp = lamp; @@ -532,10 +525,10 @@ ccl_device_inline bool lamp_light_sample(KernelGlobals *kg, if(type == LIGHT_DISTANT) { /* distant light */ - float3 lightD = make_float3(data0.y, data0.z, data0.w); + float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]); float3 D = lightD; - float radius = data1.y; - float invarea = data1.w; + float radius = klight->distant.radius; + float invarea = klight->distant.invarea; if(radius > 0.0f) D = distant_light_sample(D, radius, randu, randv); @@ -562,10 +555,10 @@ ccl_device_inline bool lamp_light_sample(KernelGlobals *kg, } #endif else { - ls->P = make_float3(data0.y, data0.z, data0.w); + ls->P = make_float3(klight->co[0], klight->co[1], klight->co[2]); if(type == LIGHT_POINT || type == LIGHT_SPOT) { - float radius = data1.y; + float radius = klight->spot.radius; if(radius > 0.0f) /* sphere light */ @@ -574,14 +567,19 @@ ccl_device_inline bool lamp_light_sample(KernelGlobals *kg, ls->D = normalize_len(ls->P - P, &ls->t); ls->Ng = -ls->D; - float invarea = data1.z; + float invarea = klight->spot.invarea; ls->eval_fac = (0.25f*M_1_PI_F)*invarea; ls->pdf = invarea; if(type == LIGHT_SPOT) { /* spot light attenuation */ - float4 data2 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 2); - ls->eval_fac *= spot_light_attenuation(data1, data2, ls); + float3 dir = make_float3(klight->spot.dir[0], + klight->spot.dir[1], + klight->spot.dir[2]); + ls->eval_fac *= spot_light_attenuation(dir, + klight->spot.spot_angle, + klight->spot.spot_smooth, + ls); if(ls->eval_fac == 0.0f) { return false; } @@ -594,12 +592,15 @@ ccl_device_inline bool lamp_light_sample(KernelGlobals *kg, } else { /* area light */ - float4 data2 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 2); - float4 data3 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 3); - - float3 axisu = make_float3(data1.y, data1.z, data1.w); - float3 axisv = make_float3(data2.y, data2.z, data2.w); - float3 D = make_float3(data3.y, data3.z, data3.w); + float3 axisu = make_float3(klight->area.axisu[0], + klight->area.axisu[1], + klight->area.axisu[2]); + float3 axisv = make_float3(klight->area.axisv[0], + klight->area.axisv[1], + klight->area.axisv[2]); + float3 D = make_float3(klight->area.dir[0], + klight->area.dir[1], + klight->area.dir[2]); if(dot(ls->P - P, D) > 0.0f) { return false; @@ -618,7 +619,7 @@ ccl_device_inline bool lamp_light_sample(KernelGlobals *kg, ls->Ng = D; ls->D = normalize_len(ls->P - P, &ls->t); - float invarea = data2.x; + float invarea = klight->area.invarea; ls->eval_fac = 0.25f*invarea; } } @@ -630,12 +631,10 @@ ccl_device_inline bool lamp_light_sample(KernelGlobals *kg, ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls) { - float4 data0 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 0); - float4 data1 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 1); - - LightType type = (LightType)__float_as_int(data0.x); + const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + LightType type = (LightType)klight->type; ls->type = type; - ls->shader = __float_as_int(data1.x); + ls->shader = klight->shader_id; ls->object = PRIM_NONE; ls->prim = PRIM_NONE; ls->lamp = lamp; @@ -648,7 +647,7 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, if(type == LIGHT_DISTANT) { /* distant light */ - float radius = data1.y; + float radius = klight->distant.radius; if(radius == 0.0f) return false; @@ -670,9 +669,9 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, * P */ - float3 lightD = make_float3(data0.y, data0.z, data0.w); + float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]); float costheta = dot(-lightD, D); - float cosangle = data1.z; + float cosangle = klight->distant.cosangle; if(costheta < cosangle) return false; @@ -683,13 +682,14 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, ls->t = FLT_MAX; /* compute pdf */ - float invarea = data1.w; + float invarea = klight->distant.invarea; ls->pdf = invarea/(costheta*costheta*costheta); ls->eval_fac = ls->pdf; } else if(type == LIGHT_POINT || type == LIGHT_SPOT) { - float3 lightP = make_float3(data0.y, data0.z, data0.w); - float radius = data1.y; + float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]); + + float radius = klight->spot.radius; /* sphere light */ if(radius == 0.0f) @@ -704,14 +704,19 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, ls->Ng = -D; ls->D = D; - float invarea = data1.z; + float invarea = klight->spot.invarea; ls->eval_fac = (0.25f*M_1_PI_F)*invarea; ls->pdf = invarea; if(type == LIGHT_SPOT) { /* spot light attenuation */ - float4 data2 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 2); - ls->eval_fac *= spot_light_attenuation(data1, data2, ls); + float3 dir = make_float3(klight->spot.dir[0], + klight->spot.dir[1], + klight->spot.dir[2]); + ls->eval_fac *= spot_light_attenuation(dir, + klight->spot.spot_angle, + klight->spot.spot_smooth, + ls); if(ls->eval_fac == 0.0f) return false; @@ -726,22 +731,25 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, } else if(type == LIGHT_AREA) { /* area light */ - float4 data2 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 2); - float4 data3 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 3); - - float invarea = data2.x; + float invarea = klight->area.invarea; if(invarea == 0.0f) return false; - float3 axisu = make_float3(data1.y, data1.z, data1.w); - float3 axisv = make_float3(data2.y, data2.z, data2.w); - float3 Ng = make_float3(data3.y, data3.z, data3.w); + float3 axisu = make_float3(klight->area.axisu[0], + klight->area.axisu[1], + klight->area.axisu[2]); + float3 axisv = make_float3(klight->area.axisv[0], + klight->area.axisv[1], + klight->area.axisv[2]); + float3 Ng = make_float3(klight->area.dir[0], + klight->area.dir[1], + klight->area.dir[2]); /* one sided */ if(dot(D, Ng) >= 0.0f) return false; - float3 light_P = make_float3(data0.y, data0.z, data0.w); + float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]); if(!ray_quad_intersect(P, D, 0.0f, t, light_P, axisu, axisv, Ng, @@ -1040,7 +1048,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu) int half_len = len >> 1; int middle = first + half_len; - if(r < kernel_tex_fetch(__light_distribution, middle).x) { + if(r < kernel_tex_fetch(__light_distribution, middle).totarea) { len = half_len; } else { @@ -1055,8 +1063,8 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu) /* Rescale to reuse random number. this helps the 2D samples within * each area light be stratified as well. */ - float distr_min = kernel_tex_fetch(__light_distribution, index).x; - float distr_max = kernel_tex_fetch(__light_distribution, index+1).x; + float distr_min = kernel_tex_fetch(__light_distribution, index).totarea; + float distr_max = kernel_tex_fetch(__light_distribution, index+1).totarea; *randu = (r - distr_min)/(distr_max - distr_min); return index; @@ -1066,8 +1074,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu) ccl_device bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce) { - float4 data4 = kernel_tex_fetch(__light_data, index*LIGHT_SIZE + 4); - return (bounce > __float_as_int(data4.x)); + return (bounce > kernel_tex_fetch(__lights, index).max_bounces); } ccl_device_noinline bool light_sample(KernelGlobals *kg, @@ -1082,12 +1089,12 @@ ccl_device_noinline bool light_sample(KernelGlobals *kg, int index = light_distribution_sample(kg, &randu); /* fetch light data */ - float4 l = kernel_tex_fetch(__light_distribution, index); - int prim = __float_as_int(l.y); + const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution, index); + int prim = kdistribution->prim; if(prim >= 0) { - int object = __float_as_int(l.w); - int shader_flag = __float_as_int(l.z); + int object = kdistribution->mesh_light.object_id; + int shader_flag = kdistribution->mesh_light.shader_flag; triangle_light_sample(kg, prim, object, randu, randv, time, ls, P); ls->shader |= shader_flag; @@ -1106,8 +1113,7 @@ ccl_device_noinline bool light_sample(KernelGlobals *kg, ccl_device int light_select_num_samples(KernelGlobals *kg, int index) { - float4 data3 = kernel_tex_fetch(__light_data, index*LIGHT_SIZE + 3); - return __float_as_int(data3.x); + return kernel_tex_fetch(__lights, index).samples; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index 427ed142e7f..601165a3848 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -55,8 +55,8 @@ KERNEL_TEX(float4, __attributes_float3) KERNEL_TEX(uchar4, __attributes_uchar4) /* lights */ -KERNEL_TEX(float4, __light_distribution) -KERNEL_TEX(float4, __light_data) +KERNEL_TEX(KernelLightDistribution, __light_distribution) +KERNEL_TEX(KernelLight, __lights) KERNEL_TEX(float2, __light_background_marginal_cdf) KERNEL_TEX(float2, __light_background_conditional_cdf) diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index b6f1a3a6d98..198ce39c63f 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -36,7 +36,6 @@ CCL_NAMESPACE_BEGIN /* Constants */ #define OBJECT_VECTOR_SIZE 6 -#define LIGHT_SIZE 11 #define FILTER_TABLE_SIZE 1024 #define RAMP_TABLE_SIZE 256 #define SHUTTER_TABLE_SIZE 256 @@ -1456,6 +1455,61 @@ typedef struct KernelObject { } KernelObject;; static_assert_align(KernelObject, 16); +typedef struct KernelSpotLight { + float radius; + float invarea; + float spot_angle; + float spot_smooth; + float dir[3]; +} KernelSpotLight; + +/* PointLight is SpotLight with only radius and invarea being used. */ + +typedef struct KernelAreaLight { + float axisu[3]; + float invarea; + float axisv[3]; + float dir[3]; +} KernelAreaLight; + +typedef struct KernelDistantLight { + float radius; + float cosangle; + float invarea; +} KernelDistantLight; + +typedef struct KernelLight { + int type; + float co[3]; + int shader_id; + int samples; + float max_bounces; + float random; + float4 tfm[3]; + float4 itfm[3]; + union { + KernelSpotLight spot; + KernelAreaLight area; + KernelDistantLight distant; + }; +} KernelLight; +static_assert_align(KernelLight, 16); + +typedef struct KernelLightDistribution { + float totarea; + int prim; + union { + struct { + int shader_flag; + int object_id; + } mesh_light; + struct { + float pad; + float size; + } lamp; + }; +} KernelLightDistribution; +static_assert_align(KernelLightDistribution, 16); /* Declarations required for split kernel */ -- cgit v1.2.3