Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2014-06-20 23:21:05 +0400
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2014-06-22 00:31:44 +0400
commit8fbd71e5f2862e6efd0e3bcb015f1f6201ccca14 (patch)
tree0d889efc32554d0eb4ad6f8b1f46b4885b19991d
parent88d8358f91e1306a67d59250162443194ee9edcf (diff)
Cycles: improved Beckmann sampling using precomputed data
It turns out that the new Beckmann sampling function doesn't work well with Quasi Monte Carlo sampling, mainly near normal incidence where it can be worse than the previous sampler. In the new sampler the random number pattern gets split in two, warped and overlapped, which hurts the stratification, see the visualization in the differential revision. Now we use a precomputed table, which is much better behaved. GGX does not seem to benefit from using a precomputed table. Disadvantage is that this table adds 1MB of memory usage and 0.03s startup time to every render (on my quad core CPU). Differential Revision: https://developer.blender.org/D614
-rw-r--r--intern/cycles/kernel/closure/bsdf.h4
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet.h40
-rw-r--r--intern/cycles/kernel/kernel_types.h14
-rw-r--r--intern/cycles/kernel/osl/osl_closures.cpp2
-rw-r--r--intern/cycles/kernel/osl/osl_closures.h11
-rw-r--r--intern/cycles/kernel/svm/svm_blackbody.h2
-rw-r--r--intern/cycles/render/shader.cpp108
-rw-r--r--intern/cycles/render/shader.h1
8 files changed, 153 insertions, 29 deletions
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 8ddf4971909..81c239ea0c9 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -85,13 +85,13 @@ ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const Shader
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
- label = bsdf_microfacet_ggx_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_microfacet_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
- label = bsdf_microfacet_beckmann_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
+ label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
break;
#ifdef __ANISOTROPIC__
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 4a3d223d765..e130349bca2 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -176,8 +176,8 @@ ccl_device float approx_erfinvf(float z)
* E. Heitz and E. d'Eon, EGSR 2014 */
ccl_device_inline void microfacet_beckmann_sample_slopes(
+ KernelGlobals *kg,
const float cos_theta_i, const float sin_theta_i,
- const float alpha_x, const float alpha_y,
float randu, float randv, float *slope_x, float *slope_y,
float *G1i)
{
@@ -200,10 +200,12 @@ ccl_device_inline void microfacet_beckmann_sample_slopes(
const float SQRT_PI_INV = 0.56418958354f;
const float Lambda = 0.5f*(erf_a - 1.0f) + (0.5f*SQRT_PI_INV)*(exp_a2*inv_a);
const float G1 = 1.0f/(1.0f + Lambda); /* masking */
- const float C = 1.0f - G1 * erf_a;
*G1i = G1;
+#if 0
+ const float C = 1.0f - G1 * erf_a;
+
/* sample slope X */
if(randu < C) {
/* rescale randu */
@@ -238,11 +240,20 @@ ccl_device_inline void microfacet_beckmann_sample_slopes(
/* sample slope Y */
*slope_y = approx_erfinvf(2.0f*randv - 1.0f);
+#else
+ /* use precomputed table, because it better preserves stratification
+ * of the random number pattern */
+ int beckmann_table_offset = kernel_data.tables.beckmann_offset;
+
+ *slope_x = lookup_table_read_2D(kg, randu, cos_theta_i,
+ beckmann_table_offset, BECKMANN_TABLE_SIZE, BECKMANN_TABLE_SIZE);
+ *slope_y = approx_erfinvf(2.0f*randv - 1.0f);
+#endif
+
}
ccl_device_inline void microfacet_ggx_sample_slopes(
const float cos_theta_i, const float sin_theta_i,
- const float alpha_x, const float alpha_y,
float randu, float randv, float *slope_x, float *slope_y,
float *G1i)
{
@@ -290,7 +301,8 @@ ccl_device_inline void microfacet_ggx_sample_slopes(
*slope_y = S * z * safe_sqrtf(1.0f + (*slope_x)*(*slope_x));
}
-ccl_device_inline float3 microfacet_sample_stretched(const float3 omega_i,
+ccl_device_inline float3 microfacet_sample_stretched(
+ KernelGlobals *kg, const float3 omega_i,
const float alpha_x, const float alpha_y,
const float randu, const float randv,
bool beckmann, float *G1i)
@@ -317,12 +329,14 @@ ccl_device_inline float3 microfacet_sample_stretched(const float3 omega_i,
/* 2. sample P22_{omega_i}(x_slope, y_slope, 1, 1) */
float slope_x, slope_y;
- if(beckmann)
- microfacet_beckmann_sample_slopes(costheta_, sintheta_,
- alpha_x, alpha_y, randu, randv, &slope_x, &slope_y, G1i);
- else
+ if(beckmann) {
+ microfacet_beckmann_sample_slopes(kg, costheta_, sintheta_,
+ randu, randv, &slope_x, &slope_y, G1i);
+ }
+ else {
microfacet_ggx_sample_slopes(costheta_, sintheta_,
- alpha_x, alpha_y, randu, randv, &slope_x, &slope_y, G1i);
+ randu, randv, &slope_x, &slope_y, G1i);
+ }
/* 3. rotate */
float tmp = cosphi_*slope_x - sinphi_*slope_y;
@@ -530,7 +544,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, con
return make_float3(out, out, out);
}
-ccl_device int bsdf_microfacet_ggx_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
{
float alpha_x = sc->data0;
float alpha_y = sc->data1;
@@ -552,7 +566,7 @@ ccl_device int bsdf_microfacet_ggx_sample(const ShaderClosure *sc, float3 Ng, fl
float3 local_m;
float G1o;
- local_m = microfacet_sample_stretched(local_I, alpha_x, alpha_y,
+ local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_y,
randu, randv, false, &G1o);
float3 m = X*local_m.x + Y*local_m.y + Z*local_m.z;
@@ -878,7 +892,7 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc
return make_float3(out, out, out);
}
-ccl_device int bsdf_microfacet_beckmann_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
+ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf)
{
float alpha_x = sc->data0;
float alpha_y = sc->data1;
@@ -900,7 +914,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(const ShaderClosure *sc, float3 N
float3 local_m;
float G1o;
- local_m = microfacet_sample_stretched(local_I, alpha_x, alpha_x,
+ local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_x,
randu, randv, true, &G1o);
float3 m = X*local_m.x + Y*local_m.y + Z*local_m.z;
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 652a7848d73..d81909a623a 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -44,6 +44,8 @@ CCL_NAMESPACE_BEGIN
#define BB_TABLE_YPOWER 5.0f
#define BB_TABLE_SPACING 2.0f
+#define BECKMANN_TABLE_SIZE 256
+
#define TEX_NUM_FLOAT_IMAGES 5
#define SHADER_NONE (~0)
@@ -933,11 +935,11 @@ typedef struct KernelCurves {
float maximum_width;
} KernelCurves;
-typedef struct KernelBlackbody {
- int table_offset;
- int pad1, pad2, pad3;
-} KernelBlackbody;
-
+typedef struct KernelTables {
+ int blackbody_offset;
+ int beckmann_offset;
+ int pad1, pad2;
+} KernelTables;
typedef struct KernelData {
KernelCamera cam;
@@ -946,7 +948,7 @@ typedef struct KernelData {
KernelIntegrator integrator;
KernelBVH bvh;
KernelCurves curve;
- KernelBlackbody blackbody;
+ KernelTables tables;
} KernelData;
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp
index c1560ea2d01..f102e79f5c7 100644
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@@ -41,6 +41,8 @@
#include "util_param.h"
#include "kernel_types.h"
+#include "kernel_compat_cpu.h"
+#include "kernel_globals.h"
#include "kernel_montecarlo.h"
#include "closure/bsdf_util.h"
diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h
index 218cf1c19cc..a543907e884 100644
--- a/intern/cycles/kernel/osl/osl_closures.h
+++ b/intern/cycles/kernel/osl/osl_closures.h
@@ -149,17 +149,18 @@ public: \
\
void blur(float roughness) \
{ \
- bsdf_##svmlower##_blur(&sc, roughness); \
} \
\
float3 eval_reflect(const float3 &omega_out, const float3 &omega_in, float& pdf) const \
{ \
- return bsdf_##svmlower##_eval_reflect(&sc, omega_out, omega_in, &pdf); \
+ pdf = 0; \
+ return make_float3(0, 0, 0); \
} \
\
float3 eval_transmit(const float3 &omega_out, const float3 &omega_in, float& pdf) const \
{ \
- return bsdf_##svmlower##_eval_transmit(&sc, omega_out, omega_in, &pdf); \
+ pdf = 0; \
+ return make_float3(0, 0, 0); \
} \
\
int sample(const float3 &Ng, \
@@ -168,8 +169,8 @@ public: \
float3 &omega_in, float3 &domega_in_dx, float3 &domega_in_dy, \
float &pdf, float3 &eval) const \
{ \
- return bsdf_##svmlower##_sample(&sc, Ng, omega_out, domega_out_dx, domega_out_dy, \
- randu, randv, &eval, &omega_in, &domega_in_dx, &domega_in_dy, &pdf); \
+ pdf = 0; \
+ return LABEL_NONE; \
} \
}; \
\
diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h
index 63dbf27d35e..15257aed92e 100644
--- a/intern/cycles/kernel/svm/svm_blackbody.h
+++ b/intern/cycles/kernel/svm/svm_blackbody.h
@@ -55,7 +55,7 @@ ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *sta
just one (the OSL-lerp is also automatically done for us by "lookup_table_read") */
float t = powf((temperature - BB_DRAPPER) * (1.0f / BB_TABLE_SPACING), (1.0f / BB_TABLE_XPOWER));
- int blackbody_table_offset = kernel_data.blackbody.table_offset;
+ int blackbody_table_offset = kernel_data.tables.blackbody_offset;
/* Retrieve colors from the lookup table */
float lutval = t*lookuptablenormalize;
diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp
index 662caed72f1..87107ae642d 100644
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@@ -31,6 +31,95 @@
CCL_NAMESPACE_BEGIN
+/* Beckmann sampling precomputed table, see bsdf_microfacet.h */
+
+/* 2D slope distribution (alpha = 1.0) */
+static float beckmann_table_P22(const float slope_x, const float slope_y)
+{
+ return expf(-(slope_x*slope_x + slope_y*slope_y));
+}
+
+/* maximal slope amplitude (range that contains 99.99% of the distribution) */
+static float beckmann_table_slope_max()
+{
+ return 6.0;
+}
+
+static void beckmann_table_rows(float *table, int row_from, int row_to)
+{
+ /* allocate temporary data */
+ const int DATA_TMP_SIZE = 512;
+ vector<double> slope_x(DATA_TMP_SIZE);
+ vector<double> CDF_P22_omega_i(DATA_TMP_SIZE);
+
+ /* loop over incident directions */
+ for(int index_theta = row_from; index_theta < row_to; index_theta++) {
+ /* incident vector */
+ const float cos_theta = index_theta / (BECKMANN_TABLE_SIZE - 1.0f);
+ const float sin_theta = safe_sqrtf(1.0f - cos_theta*cos_theta);
+
+ /* for a given incident vector
+ * integrate P22_{omega_i}(x_slope, 1, 1), Eq. (10) */
+ slope_x[0] = -beckmann_table_slope_max();
+ CDF_P22_omega_i[0] = 0;
+
+ for(int index_slope_x = 1; index_slope_x < DATA_TMP_SIZE; ++index_slope_x) {
+ /* slope_x */
+ slope_x[index_slope_x] = -beckmann_table_slope_max() + 2.0f * beckmann_table_slope_max() * index_slope_x/(DATA_TMP_SIZE - 1.0f);
+
+ /* dot product with incident vector */
+ float dot_product = fmaxf(0.0f, -slope_x[index_slope_x]*sin_theta + cos_theta);
+ /* marginalize P22_{omega_i}(x_slope, 1, 1), Eq. (10) */
+ float P22_omega_i = 0.0f;
+
+ for(int j = 0; j < 100; ++j) {
+ float slope_y = -beckmann_table_slope_max() + 2.0f * beckmann_table_slope_max() * j * (1.0f/99.0f);
+ P22_omega_i += dot_product * beckmann_table_P22(slope_x[index_slope_x], slope_y);
+ }
+
+ /* CDF of P22_{omega_i}(x_slope, 1, 1), Eq. (10) */
+ CDF_P22_omega_i[index_slope_x] = CDF_P22_omega_i[index_slope_x - 1] + P22_omega_i;
+ }
+
+ /* renormalize CDF_P22_omega_i */
+ for(int index_slope_x = 1; index_slope_x < DATA_TMP_SIZE; ++index_slope_x)
+ CDF_P22_omega_i[index_slope_x] /= CDF_P22_omega_i[DATA_TMP_SIZE - 1];
+
+ /* loop over random number U1 */
+ int index_slope_x = 0;
+
+ for(int index_U = 0; index_U < BECKMANN_TABLE_SIZE; ++index_U) {
+ const float U = 0.0000001f + 0.9999998f * index_U / (float)(BECKMANN_TABLE_SIZE - 1);
+
+ /* inverse CDF_P22_omega_i, solve Eq.(11) */
+ while(CDF_P22_omega_i[index_slope_x] <= U)
+ ++index_slope_x;
+
+ const double interp =
+ (CDF_P22_omega_i[index_slope_x] - U) /
+ (CDF_P22_omega_i[index_slope_x] - CDF_P22_omega_i[index_slope_x - 1]);
+
+ /* store value */
+ table[index_U + index_theta*BECKMANN_TABLE_SIZE] = (float)(
+ interp * slope_x[index_slope_x - 1]
+ + (1.0f-interp) * slope_x[index_slope_x]);
+ }
+ }
+}
+
+static void beckmann_table_build(vector<float>& table)
+{
+ table.resize(BECKMANN_TABLE_SIZE*BECKMANN_TABLE_SIZE);
+
+ /* multithreaded build */
+ TaskPool pool;
+
+ for(int i = 0; i < BECKMANN_TABLE_SIZE; i+=8)
+ pool.push(function_bind(&beckmann_table_rows, &table[0], i, i+8));
+
+ pool.wait_work();
+}
+
/* Shader */
Shader::Shader()
@@ -138,6 +227,7 @@ ShaderManager::ShaderManager()
{
need_update = true;
blackbody_table_offset = TABLE_OFFSET_INVALID;
+ beckmann_table_offset = TABLE_OFFSET_INVALID;
}
ShaderManager::~ShaderManager()
@@ -282,19 +372,28 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc
device->tex_alloc("__shader_flag", dscene->shader_flag);
/* blackbody lookup table */
- KernelBlackbody *kblackbody = &dscene->data.blackbody;
+ KernelTables *ktables = &dscene->data.tables;
if(has_converter_blackbody && blackbody_table_offset == TABLE_OFFSET_INVALID) {
vector<float> table = blackbody_table();
blackbody_table_offset = scene->lookup_tables->add_table(dscene, table);
- kblackbody->table_offset = (int)blackbody_table_offset;
+ ktables->blackbody_offset = (int)blackbody_table_offset;
}
else if(!has_converter_blackbody && blackbody_table_offset != TABLE_OFFSET_INVALID) {
scene->lookup_tables->remove_table(blackbody_table_offset);
blackbody_table_offset = TABLE_OFFSET_INVALID;
}
+ /* beckmann lookup table */
+ if(beckmann_table_offset == TABLE_OFFSET_INVALID) {
+ vector<float> table;
+ beckmann_table_build(table);
+ beckmann_table_offset = scene->lookup_tables->add_table(dscene, table);
+
+ ktables->beckmann_offset = (int)beckmann_table_offset;
+ }
+
/* integrator */
KernelIntegrator *kintegrator = &dscene->data.integrator;
kintegrator->use_volumes = has_volumes;
@@ -308,6 +407,11 @@ void ShaderManager::device_free_common(Device *device, DeviceScene *dscene, Scen
blackbody_table_offset = TABLE_OFFSET_INVALID;
}
+ if(beckmann_table_offset != TABLE_OFFSET_INVALID) {
+ scene->lookup_tables->remove_table(beckmann_table_offset);
+ beckmann_table_offset = TABLE_OFFSET_INVALID;
+ }
+
device->tex_free(dscene->shader_flag);
dscene->shader_flag.clear();
}
diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h
index 84be4b469d8..0ed6d2ddf01 100644
--- a/intern/cycles/render/shader.h
+++ b/intern/cycles/render/shader.h
@@ -149,6 +149,7 @@ protected:
AttributeIDMap unique_attribute_id;
size_t blackbody_table_offset;
+ size_t beckmann_table_offset;
};
CCL_NAMESPACE_END