EEVEE: Update LUT GGX generation shader

This modifies the principled BSDF and the Glass BSDF which now have better fit to multiscatter GGX. Code to generate the LUT have been updated and can run at runtime. The refraction LUT has been changed to have the critical angle always centered around one pixel so that interpolation can be mitigated. Offline LUT data will be updated in another commit This simplify the BTDF retreival removing the manual clean cut at low roughness. This maximize the precision of the LUT by scalling the sides by the critical angle. I also touched the ior > 1.0 approximation to be smoother. Also incluse some cleanup of bsdf_sampling.glsl
author: Clément Foucault <foucault.clem@gmail.com> 2021-02-13 20:50:09 +0300
committer: Clément Foucault <foucault.clem@gmail.com> 2021-02-13 20:52:19 +0300
commit: 83ac8628c490eda4fa5237b7a4256bc670dc0682 (patch)
tree: 89563a6d46dcea4ea51b70d23b3a73e7637161a6 /source
parent: 06492fd61984c1a92fb1f93d30028de97ead451f (diff)
15 files changed, 303 insertions, 282 deletions
diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt
index 8abd353d36d..dff9e302fdf 100644
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -90,6 +90,7 @@ set(SRC
   engines/eevee/eevee_lights.c
   engines/eevee/eevee_lookdev.c
   engines/eevee/eevee_lut.c
+  engines/eevee/eevee_lut_gen.c
   engines/eevee/eevee_materials.c
   engines/eevee/eevee_mist.c
   engines/eevee/eevee_motion_blur.c
diff --git a/source/blender/draw/engines/eevee/eevee_lut_gen.c b/source/blender/draw/engines/eevee/eevee_lut_gen.c
index 18645fea5e0..770134d27f9 100644
--- a/source/blender/draw/engines/eevee/eevee_lut_gen.c
+++ b/source/blender/draw/engines/eevee/eevee_lut_gen.c
@@ -27,145 +27,97 @@
 
 #include "DRW_render.h"
 
-#include "BLI_alloca.h"
+#include "BLI_fileops.h"
 #include "BLI_rand.h"
 #include "BLI_string_utils.h"
 
 #include "eevee_private.h"
 
-static struct GPUTexture *create_ggx_lut_texture(int UNUSED(w), int UNUSED(h))
-{
-  struct GPUTexture *tex;
-  struct GPUFrameBuffer *fb = NULL;
-  static float samples_len = 8192.0f;
-  static float inv_samples_len = 1.0f / 8192.0f;
+#define DO_FILE_OUTPUT 0
 
-  DRWPass *pass = DRW_pass_create("LightProbe Filtering", DRW_STATE_WRITE_COLOR);
+float *EEVEE_lut_update_ggx_brdf(int lut_size)
+{
+  DRWPass *pass = DRW_pass_create(__func__, DRW_STATE_WRITE_COLOR);
   DRWShadingGroup *grp = DRW_shgroup_create(EEVEE_shaders_ggx_lut_sh_get(), pass);
-  DRW_shgroup_uniform_float(grp, "sampleCount", &samples_len, 1);
-  DRW_shgroup_uniform_float(grp, "invSampleCount", &inv_samples_len, 1);
-  DRW_shgroup_uniform_texture(grp, "texHammersley", e_data.hammersley);
-  DRW_shgroup_uniform_texture(grp, "texJitter", e_data.jitter);
-
-  struct GPUBatch *geom = DRW_cache_fullscreen_quad_get();
-  DRW_shgroup_call(grp, geom, NULL);
-
-  float *texels = MEM_mallocN(sizeof(float[2]) * w * h, "lut");
-
-  tex = DRW_texture_create_2d(w, h, GPU_RG16F, DRW_TEX_FILTER, (float *)texels);
-
-  DRWFboTexture tex_filter = {&tex, GPU_RG16F, DRW_TEX_FILTER};
-  GPU_framebuffer_init(&fb, &draw_engine_eevee_type, w, h, &tex_filter, 1);
-
+  DRW_shgroup_uniform_float_copy(grp, "sampleCount", 64.0f); /* Actual sample count is squared. */
+  DRW_shgroup_call_procedural_triangles(grp, NULL, 1);
+
+  GPUTexture *tex = DRW_texture_create_2d(lut_size, lut_size, GPU_RG16F, 0, NULL);
+  GPUFrameBuffer *fb = NULL;
+  GPU_framebuffer_ensure_config(&fb,
+                                {
+                                    GPU_ATTACHMENT_NONE,
+                                    GPU_ATTACHMENT_TEXTURE(tex),
+                                });
   GPU_framebuffer_bind(fb);
   DRW_draw_pass(pass);
-
-  float *data = MEM_mallocN(sizeof(float[3]) * w * h, "lut");
-  GPU_framebuffer_read_color(fb, 0, 0, w, h, 3, 0, GPU_DATA_FLOAT, data);
-
-  printf("{");
-  for (int i = 0; i < w * h * 3; i += 3) {
-    printf("%ff, %ff, ", data[i], data[i + 1]);
-    i += 3;
-    printf("%ff, %ff, ", data[i], data[i + 1]);
-    i += 3;
-    printf("%ff, %ff, ", data[i], data[i + 1]);
-    i += 3;
-    printf("%ff, %ff, \n", data[i], data[i + 1]);
+  GPU_FRAMEBUFFER_FREE_SAFE(fb);
+
+  float *data = GPU_texture_read(tex, GPU_DATA_FLOAT, 0);
+  GPU_texture_free(tex);
+#if DO_FILE_OUTPUT
+  /* Content is to be put inside eevee_lut.c */
+  FILE *f = BLI_fopen("bsdf_split_sum_ggx.h", "w");
+  fprintf(f, "const float bsdf_split_sum_ggx[%d * %d * 2] = {", lut_size, lut_size);
+  for (int i = 0; i < lut_size * lut_size * 2;) {
+    fprintf(f, "\n    ");
+    for (int j = 0; j < 4; j++, i += 2) {
+      fprintf(f, "%ff, %ff, ", data[i], data[i + 1]);
+    }
   }
-  printf("}");
-
-  MEM_freeN(texels);
-  MEM_freeN(data);
+  fprintf(f, "\n};\n");
+  fclose(f);
+#endif
 
-  return tex;
+  return data;
 }
 
-static struct GPUTexture *create_ggx_refraction_lut_texture(int w, int h)
+float *EEVEE_lut_update_ggx_btdf(int lut_size, int lut_depth)
 {
-  struct GPUTexture *tex;
-  struct GPUTexture *hammersley = create_hammersley_sample_texture(8192);
-  struct GPUFrameBuffer *fb = NULL;
-  static float samples_len = 8192.0f;
-  static float a2 = 0.0f;
-  static float inv_samples_len = 1.0f / 8192.0f;
-
-  DRWPass *pass = DRW_pass_create("LightProbe Filtering", DRW_STATE_WRITE_COLOR);
+  float roughness;
+  DRWPass *pass = DRW_pass_create(__func__, DRW_STATE_WRITE_COLOR);
   DRWShadingGroup *grp = DRW_shgroup_create(EEVEE_shaders_ggx_refraction_lut_sh_get(), pass);
-  DRW_shgroup_uniform_float(grp, "a2", &a2, 1);
-  DRW_shgroup_uniform_float(grp, "sampleCount", &samples_len, 1);
-  DRW_shgroup_uniform_float(grp, "invSampleCount", &inv_samples_len, 1);
-  DRW_shgroup_uniform_texture(grp, "texHammersley", hammersley);
-  DRW_shgroup_uniform_texture(grp, "utilTex", e_data.util_tex);
-
-  struct GPUBatch *geom = DRW_cache_fullscreen_quad_get();
-  DRW_shgroup_call(grp, geom, NULL);
-
-  float *texels = MEM_mallocN(sizeof(float[2]) * w * h, "lut");
-
-  tex = DRW_texture_create_2d(w, h, GPU_R16F, DRW_TEX_FILTER, (float *)texels);
-
-  DRWFboTexture tex_filter = {&tex, GPU_R16F, DRW_TEX_FILTER};
-  GPU_framebuffer_init(&fb, &draw_engine_eevee_type, w, h, &tex_filter, 1);
+  DRW_shgroup_uniform_float_copy(grp, "sampleCount", 64.0f); /* Actual sample count is squared. */
+  DRW_shgroup_uniform_float(grp, "z", &roughness, 1);
+  DRW_shgroup_call_procedural_triangles(grp, NULL, 1);
+
+  GPUTexture *tex = DRW_texture_create_2d_array(lut_size, lut_size, lut_depth, GPU_RG16F, 0, NULL);
+  GPUFrameBuffer *fb = NULL;
+  for (int i = 0; i < lut_depth; i++) {
+    GPU_framebuffer_ensure_config(&fb,
+                                  {
+                                      GPU_ATTACHMENT_NONE,
+                                      GPU_ATTACHMENT_TEXTURE_LAYER(tex, i),
+                                  });
+    GPU_framebuffer_bind(fb);
+    roughness = i / (lut_depth - 1.0f);
+    DRW_draw_pass(pass);
+  }
 
-  GPU_framebuffer_bind(fb);
+  GPU_FRAMEBUFFER_FREE_SAFE(fb);
 
-  float *data = MEM_mallocN(sizeof(float[3]) * w * h, "lut");
+  float *data = GPU_texture_read(tex, GPU_DATA_FLOAT, 0);
+  GPU_texture_free(tex);
 
-  float inc = 1.0f / 31.0f;
-  float roughness = 1e-8f - inc;
+#if DO_FILE_OUTPUT
+  /* Content is to be put inside eevee_lut.c. Don't forget to format the output. */
   FILE *f = BLI_fopen("btdf_split_sum_ggx.h", "w");
-  fprintf(f, "static float btdf_split_sum_ggx[32][64 * 64] = {\n");
-  do {
-    roughness += inc;
-    CLAMP(roughness, 1e-4f, 1.0f);
-    a2 = powf(roughness, 4.0f);
-    DRW_draw_pass(pass);
-
-    GPU_framebuffer_read_data(0, 0, w, h, 3, 0, data);
-
-#if 1
-    fprintf(f, "\t{\n\t\t");
-    for (int i = 0; i < w * h * 3; i += 3) {
-      fprintf(f, "%ff,", data[i]);
-      if (((i / 3) + 1) % 12 == 0) {
-        fprintf(f, "\n\t\t");
-      }
-      else {
-        fprintf(f, " ");
-      }
-    }
-    fprintf(f, "\n\t},\n");
-#else
-    for (int i = 0; i < w * h * 3; i += 3) {
-      if (data[i] < 0.01) {
-        printf(" ");
-      }
-      else if (data[i] < 0.3) {
-        printf(".");
-      }
-      else if (data[i] < 0.6) {
-        printf("+");
-      }
-      else if (data[i] < 0.9) {
-        printf("%%");
-      }
-      else {
-        printf("#");
-      }
-      if ((i / 3 + 1) % 64 == 0) {
-        printf("\n");
+  fprintf(f, "const float btdf_split_sum_ggx[%d][%d * %d * 2] = {", lut_depth, lut_size, lut_size);
+  fprintf(f, "\n    ");
+  int ofs = 0;
+  for (int d = 0; d < lut_depth; d++) {
+    fprintf(f, "{\n");
+    for (int i = 0; i < lut_size * lut_size * 2;) {
+      for (int j = 0; j < 4; j++, i += 2, ofs += 2) {
+        fprintf(f, "%ff, %ff, ", data[ofs], data[ofs + 1]);
       }
+      fprintf(f, "\n    ");
     }
-#endif
-
-  } while (roughness < 1.0f);
-  fprintf(f, "\n};\n");
-
+    fprintf(f, "},\n");
+  }
+  fprintf(f, "};\n");
   fclose(f);
+#endif
 
-  MEM_freeN(texels);
-  MEM_freeN(data);
-
-  return tex;
+  return data;
 }
diff --git a/source/blender/draw/engines/eevee/eevee_materials.c b/source/blender/draw/engines/eevee/eevee_materials.c
index 5f2821730f1..841da7fd076 100644
--- a/source/blender/draw/engines/eevee/eevee_materials.c
+++ b/source/blender/draw/engines/eevee/eevee_materials.c
@@ -142,11 +142,20 @@ static void eevee_init_noise_texture(void)
   e_data.noise_tex = DRW_texture_create_2d(64, 64, GPU_RGBA16F, 0, (float *)blue_noise);
 }
 
+#define RUNTIME_LUT_CREATION 1
+
 static void eevee_init_util_texture(void)
 {
   const int layers = 4 + 16;
   float(*texels)[4] = MEM_mallocN(sizeof(float[4]) * 64 * 64 * layers, "utils texels");
   float(*texels_layer)[4] = texels;
+#if RUNTIME_LUT_CREATION
+  float *bsdf_ggx_lut = EEVEE_lut_update_ggx_brdf(64);
+  float(*btdf_ggx_lut)[64 * 64 * 2] = (float(*)[64 * 64 * 2]) EEVEE_lut_update_ggx_btdf(64, 16);
+#else
+  const float *bsdf_ggx_lut = bsdf_split_sum_ggx;
+  const float(*btdf_ggx_lut)[64 * 64 * 2] = btdf_split_sum_ggx;
+#endif
 
   /* Copy ltc_mat_ggx into 1st layer */
   memcpy(texels_layer, ltc_mat_ggx, sizeof(float[4]) * 64 * 64);
@@ -155,8 +164,8 @@ static void eevee_init_util_texture(void)
   /* Copy bsdf_split_sum_ggx into 2nd layer red and green channels.
    * Copy ltc_mag_ggx into 2nd layer blue and alpha channel. */
   for (int i = 0; i < 64 * 64; i++) {
-    texels_layer[i][0] = bsdf_split_sum_ggx[i * 2 + 0];
-    texels_layer[i][1] = bsdf_split_sum_ggx[i * 2 + 1];
+    texels_layer[i][0] = bsdf_ggx_lut[i * 2 + 0];
+    texels_layer[i][1] = bsdf_ggx_lut[i * 2 + 1];
     texels_layer[i][2] = ltc_mag_ggx[i * 2 + 0];
     texels_layer[i][3] = ltc_mag_ggx[i * 2 + 1];
   }
@@ -183,8 +192,8 @@ static void eevee_init_util_texture(void)
   /* Copy Refraction GGX LUT in layer 5 - 21 */
   for (int j = 0; j < 16; j++) {
     for (int i = 0; i < 64 * 64; i++) {
-      texels_layer[i][0] = btdf_split_sum_ggx[j * 2][i];
-      texels_layer[i][1] = 0.0; /* UNUSED */
+      texels_layer[i][0] = btdf_ggx_lut[j][i * 2 + 0];
+      texels_layer[i][1] = btdf_ggx_lut[j][i * 2 + 1];
       texels_layer[i][2] = 0.0; /* UNUSED */
       texels_layer[i][3] = 0.0; /* UNUSED */
     }
@@ -195,6 +204,10 @@ static void eevee_init_util_texture(void)
       64, 64, layers, GPU_RGBA16F, DRW_TEX_FILTER | DRW_TEX_WRAP, (float *)texels);
 
   MEM_freeN(texels);
+#if RUNTIME_LUT_CREATION
+  MEM_freeN(bsdf_ggx_lut);
+  MEM_freeN(btdf_ggx_lut);
+#endif
 }
 
 void EEVEE_update_noise(EEVEE_PassList *psl, EEVEE_FramebufferList *fbl, const double offsets[3])
diff --git a/source/blender/draw/engines/eevee/eevee_private.h b/source/blender/draw/engines/eevee/eevee_private.h
index 9761264f03e..17d0ead86c5 100644
--- a/source/blender/draw/engines/eevee/eevee_private.h
+++ b/source/blender/draw/engines/eevee/eevee_private.h
@@ -1518,6 +1518,10 @@ void EEVEE_lookdev_draw(EEVEE_Data *vedata);
 /** eevee_engine.c */
 void EEVEE_cache_populate(void *vedata, Object *ob);
 
+/** eevee_lut_gen.c */
+float *EEVEE_lut_update_ggx_brdf(int lut_size);
+float *EEVEE_lut_update_ggx_btdf(int lut_size, int lut_depth);
+
 /* Shadow Matrix */
 static const float texcomat[4][4] = {
     /* From NDC to TexCo */
diff --git a/source/blender/draw/engines/eevee/eevee_shaders.c b/source/blender/draw/engines/eevee/eevee_shaders.c
index 64efca83915..2c52295b32f 100644
--- a/source/blender/draw/engines/eevee/eevee_shaders.c
+++ b/source/blender/draw/engines/eevee/eevee_shaders.c
@@ -567,11 +567,8 @@ GPUShader *EEVEE_shaders_effect_maxz_copydepth_sh_get(void)
 GPUShader *EEVEE_shaders_ggx_lut_sh_get(void)
 {
   if (e_data.ggx_lut_sh == NULL) {
-    e_data.ggx_lut_sh = DRW_shader_create_with_shaderlib(datatoc_lightprobe_vert_glsl,
-                                                         datatoc_lightprobe_geom_glsl,
-                                                         datatoc_bsdf_lut_frag_glsl,
-                                                         e_data.lib,
-                                                         "#define HAMMERSLEY_SIZE 8192\n");
+    e_data.ggx_lut_sh = DRW_shader_create_fullscreen_with_shaderlib(
+        datatoc_bsdf_lut_frag_glsl, e_data.lib, NULL);
   }
   return e_data.ggx_lut_sh;
 }
diff --git a/source/blender/draw/engines/eevee/shaders/bsdf_lut_frag.glsl b/source/blender/draw/engines/eevee/shaders/bsdf_lut_frag.glsl
index 1e051994693..46ea8b747c8 100644
--- a/source/blender/draw/engines/eevee/shaders/bsdf_lut_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/bsdf_lut_frag.glsl
@@ -1,48 +1,57 @@
+#pragma BLENDER_REQUIRE(common_utiltex_lib.glsl)
 #pragma BLENDER_REQUIRE(bsdf_sampling_lib.glsl)
 
-out vec4 FragColor;
+uniform float sampleCount;
+
+out vec2 FragColor;
 
 void main()
 {
-  vec3 N, T, B, V;
-
-  float NV = (1.0 - (clamp(gl_FragCoord.y / LUT_SIZE, 1e-4, 0.9999)));
-  float sqrtRoughness = clamp(gl_FragCoord.x / LUT_SIZE, 1e-4, 0.9999);
-  float a = sqrtRoughness * sqrtRoughness;
-  float a2 = a * a;
+  /* Make sure coordinates are covering the whole [0..1] range at texel center. */
+  float y = floor(gl_FragCoord.y) / (LUT_SIZE - 1);
+  float x = floor(gl_FragCoord.x) / (LUT_SIZE - 1);
 
-  N = vec3(0.0, 0.0, 1.0);
-  T = vec3(1.0, 0.0, 0.0);
-  B = vec3(0.0, 1.0, 0.0);
-  V = vec3(sqrt(1.0 - NV * NV), 0.0, NV);
+  float NV = clamp(1.0 - y * y, 1e-4, 0.9999);
+  float a = x * x;
+  float a2 = clamp(a * a, 1e-4, 0.9999);
 
-  setup_noise();
+  vec3 V = vec3(sqrt(1.0 - NV * NV), 0.0, NV);
 
   /* Integrating BRDF */
   float brdf_accum = 0.0;
   float fresnel_accum = 0.0;
-  for (float i = 0; i < sampleCount; i++) {
-    vec3 H = sample_ggx(i, a2, N, T, B); /* Microfacet normal */
-    vec3 L = -reflect(V, H);
-    float NL = L.z;
-
-    if (NL > 0.0) {
-      float NH = max(H.z, 0.0);
-      float VH = max(dot(V, H), 0.0);
-
-      float G1_v = G1_Smith_GGX(NV, a2);
-      float G1_l = G1_Smith_GGX(NL, a2);
-      float G_smith = 4.0 * NV * NL / (G1_v * G1_l); /* See G1_Smith_GGX for explanations. */
-
-      float brdf = (G_smith * VH) / (NH * NV);
-      float Fc = pow(1.0 - VH, 5.0);
-
-      brdf_accum += (1.0 - Fc) * brdf;
-      fresnel_accum += Fc * brdf;
+  for (float j = 0.0; j < sampleCount; j++) {
+    for (float i = 0.0; i < sampleCount; i++) {
+      vec3 Xi = (vec3(i, j, 0.0) + 0.5) / sampleCount;
+      Xi.yz = vec2(cos(Xi.y * M_2PI), sin(Xi.y * M_2PI));
+
+      vec3 H = sample_ggx(Xi, a2); /* Microfacet normal */
+      vec3 L = -reflect(V, H);
+      float NL = L.z;
+
+      if (NL > 0.0) {
+        float NH = max(H.z, 0.0);
+        float VH = max(dot(V, H), 0.0);
+
+        float G1_v = G1_Smith_GGX(NV, a2);
+        float G1_l = G1_Smith_GGX(NL, a2);
+        float G_smith = 4.0 * NV * NL / (G1_v * G1_l); /* See G1_Smith_GGX for explanations. */
+
+        float brdf = (G_smith * VH) / (NH * NV);
+
+        /* Follow maximum specular value for principled bsdf. */
+        const float specular = 1.0;
+        const float eta = (2.0 / (1.0 - sqrt(0.08 * specular))) - 1.0;
+        float fresnel = F_eta(eta, VH);
+        float Fc = F_color_blend(eta, fresnel, vec3(0)).r;
+
+        brdf_accum += (1.0 - Fc) * brdf;
+        fresnel_accum += Fc * brdf;
+      }
     }
   }
-  brdf_accum /= sampleCount;
-  fresnel_accum /= sampleCount;
+  brdf_accum /= sampleCount * sampleCount;
+  fresnel_accum /= sampleCount * sampleCount;
 
-  FragColor = vec4(brdf_accum, fresnel_accum, 0.0, 1.0);
+  FragColor = vec2(brdf_accum, fresnel_accum);
 }
diff --git a/source/blender/draw/engines/eevee/shaders/bsdf_sampling_lib.glsl b/source/blender/draw/engines/eevee/shaders/bsdf_sampling_lib.glsl
index 066ea58e2bf..20c3b64e07a 100644
--- a/source/blender/draw/engines/eevee/shaders/bsdf_sampling_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/bsdf_sampling_lib.glsl
@@ -1,21 +1,7 @@
 
-#pragma BLENDER_REQUIRE(common_utiltex_lib.glsl)
+#pragma BLENDER_REQUIRE(bsdf_common_lib.glsl)
 
 uniform sampler1D texHammersley;
-uniform float sampleCount;
-uniform float invSampleCount;
-
-vec2 jitternoise = vec2(0.0);
-
-#ifndef UTIL_TEX
-#  define UTIL_TEX
-
-#endif /* UTIL_TEX */
-
-void setup_noise(void)
-{
-  jitternoise = texelfetch_noise_tex(gl_FragCoord.xy).rg; /* Global variable */
-}
 
 vec3 tangent_to_world(vec3 vector, vec3 N, vec3 T, vec3 B)
 {
@@ -27,20 +13,11 @@ vec3 hammersley_3d(float i, float invsamplenbr)
 {
   vec3 Xi; /* Theta, cos(Phi), sin(Phi) */
 
-  Xi.x = i * invsamplenbr; /* i/samples */
-  Xi.x = fract(Xi.x + jitternoise.x);
-
-  int u = int(mod(i + jitternoise.y * HAMMERSLEY_SIZE, HAMMERSLEY_SIZE));
-
-  Xi.yz = texelFetch(texHammersley, u, 0).rg;
+  Xi.x = i * invsamplenbr;
+  Xi.yz = texelFetch(texHammersley, int(i), 0).rg;
 
   return Xi;
 }
-
-vec3 hammersley_3d(float i)
-{
-  return hammersley_3d(i, invSampleCount);
-}
 #endif
 
 /* -------------- BSDFS -------------- */
@@ -75,16 +52,16 @@ vec3 sample_ggx(vec3 rand, float a2, vec3 N, vec3 T, vec3 B, out float NH)
 }
 
 #ifdef HAMMERSLEY_SIZE
-vec3 sample_ggx(float nsample, float a2, vec3 N, vec3 T, vec3 B)
+vec3 sample_ggx(float nsample, float inv_sample_count, float a2, vec3 N, vec3 T, vec3 B)
 {
-  vec3 Xi = hammersley_3d(nsample);
+  vec3 Xi = hammersley_3d(nsample, inv_sample_count);
   vec3 Ht = sample_ggx(Xi, a2);
   return tangent_to_world(Ht, N, T, B);
 }
 
-vec3 sample_hemisphere(float nsample, vec3 N, vec3 T, vec3 B)
+vec3 sample_hemisphere(float nsample, float inv_sample_count, vec3 N, vec3 T, vec3 B)
 {
-  vec3 Xi = hammersley_3d(nsample);
+  vec3 Xi = hammersley_3d(nsample, inv_sample_count);
 
   float z = Xi.x;                         /* cos theta */
   float r = sqrt(max(0.0, 1.0f - z * z)); /* sin theta */
@@ -96,9 +73,9 @@ vec3 sample_hemisphere(float nsample, vec3 N, vec3 T, vec3 B)
   return tangent_to_world(Ht, N, T, B);
 }
 
-vec3 sample_cone(float nsample, float angle, vec3 N, vec3 T, vec3 B)
+vec3 sample_cone(float nsample, float inv_sample_count, float angle, vec3 N, vec3 T, vec3 B)
 {
-  vec3 Xi = hammersley_3d(nsample);
+  vec3 Xi = hammersley_3d(nsample, inv_sample_count);
 
   float z = cos(angle * Xi.x);            /* cos theta */
   float r = sqrt(max(0.0, 1.0f - z * z)); /* sin theta */
diff --git a/source/blender/draw/engines/eevee/shaders/btdf_lut_frag.glsl b/source/blender/draw/engines/eevee/shaders/btdf_lut_frag.glsl
index d815d9d4e6b..2ffe23a9197 100644
--- a/source/blender/draw/engines/eevee/shaders/btdf_lut_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/btdf_lut_frag.glsl
@@ -1,62 +1,89 @@
+#pragma BLENDER_REQUIRE(common_utiltex_lib.glsl)
 #pragma BLENDER_REQUIRE(bsdf_sampling_lib.glsl)
 
-uniform float a2;
+uniform float sampleCount;
+uniform float z;
 
 out vec4 FragColor;
 
 void main()
 {
-  vec3 N, T, B, V;
+  float x = floor(gl_FragCoord.x) / (LUT_SIZE - 1.0);
+  float y = floor(gl_FragCoord.y) / (LUT_SIZE - 1.0);
 
-  float x = gl_FragCoord.x / LUT_SIZE;
-  float y = gl_FragCoord.y / LUT_SIZE;
-  /* There is little variation if ior > 1.0 so we
-   * maximize LUT precision for ior < 1.0 */
-  x = x * 1.1;
-  float ior = (x > 1.0) ? ior_from_f0((x - 1.0) * 10.0) : sqrt(x);
-  float NV = (1.0 - (clamp(y, 1e-4, 0.9999)));
+  float ior = clamp(sqrt(x), 0.05, 0.999);
+  /* ior is sin of critical angle. */
+  float critical_cos = sqrt(1.0 - saturate(ior * ior));
 
-  N = vec3(0.0, 0.0, 1.0);
-  T = vec3(1.0, 0.0, 0.0);
-  B = vec3(0.0, 1.0, 0.0);
-  V = vec3(sqrt(1.0 - NV * NV), 0.0, NV);
+  y = y * 2.0 - 1.0;
+  /* Maximize texture usage on both sides of the critical angle. */
+  y *= (y > 0.0) ? (1.0 - critical_cos) : critical_cos;
+  /* Center LUT around critical angle to avoid strange interpolation issues when the critical
+   * angle is changing. */
+  y += critical_cos;
+  float NV = clamp(y, 1e-4, 0.9999);
 
-  setup_noise();
+  float a = z * z;
+  float a2 = clamp(a * a, 1e-8, 0.9999);
+
+  vec3 V = vec3(sqrt(1.0 - NV * NV), 0.0, NV);
 
   /* Integrating BTDF */
   float btdf_accum = 0.0;
-  for (float i = 0.0; i < sampleCount; i++) {
-    vec3 H = sample_ggx(i, a2, N, T, B); /* Microfacet normal */
+  float fresnel_accum = 0.0;
+  for (float j = 0.0; j < sampleCount; j++) {
+    for (float i = 0.0; i < sampleCount; i++) {
+      vec3 Xi = (vec3(i, j, 0.0) + 0.5) / sampleCount;
+      Xi.yz = vec2(cos(Xi.y * M_2PI), sin(Xi.y * M_2PI));
 
-    float VH = dot(V, H);
+      /* Microfacet normal. */
+      vec3 H = sample_ggx(Xi, a2);
 
-    /* Check if there is total internal reflections. */
-    float c = abs(VH);
-    float g = ior * ior - 1.0 + c * c;
+      float VH = dot(V, H);
 
-    float eta = 1.0 / ior;
-    if (dot(H, V) < 0.0) {
-      H = -H;
-      eta = ior;
-    }
+      /* Check if there is total internal reflections. */
+      float fresnel = F_eta(ior, VH);
+
+      fresnel_accum += fresnel;
+
+      float eta = 1.0 / ior;
+      if (dot(H, V) < 0.0) {
+        H = -H;
+        eta = ior;
+      }
 
-    vec3 L = refract(-V, H, eta);
-    float NL = -dot(N, L);
+      vec3 L = refract(-V, H, eta);
+      float NL = -L.z;
 
-    if ((NL > 0.0) && (g > 0.0)) {
-      float LH = dot(L, H);
+      if ((NL > 0.0) && (fresnel < 0.999)) {
+        float LH = dot(L, H);
 
-      float G1_l = NL * 2.0 /
-                   G1_Smith_GGX(NL, a2); /* Balancing the adjustments made in G1_Smith */
+        /* Balancing the adjustments made in G1_Smith. */
+        float G1_l = NL * 2.0 / G1_Smith_GGX(NL, a2);
 
-      /* btdf = abs(VH*LH) * (ior*ior) * D * G(V) * G(L) / (Ht2 * NV)
-       * pdf = (VH * abs(LH)) * (ior*ior) * D * G(V) / (Ht2 * NV) */
-      float btdf = G1_l * abs(VH * LH) / (VH * abs(LH));
+        /* btdf = abs(VH*LH) * (ior*ior) * D * G(V) * G(L) / (Ht2 * NV)
+         * pdf = (VH * abs(LH)) * (ior*ior) * D * G(V) / (Ht2 * NV) */
+        float btdf = G1_l * abs(VH * LH) / (VH * abs(LH));
 
-      btdf_accum += btdf;
+        btdf_accum += btdf;
+      }
     }
   }
-  btdf_accum /= sampleCount;
+  btdf_accum /= sampleCount * sampleCount;
+  fresnel_accum /= sampleCount * sampleCount;
+
+  if (z == 0.0) {
+    /* Perfect mirror. Increased precision because the roughness is clamped. */
+    fresnel_accum = F_eta(ior, NV);
+  }
+
+  if (x == 0.0) {
+    /* Special case. */
+    fresnel_accum = 1.0;
+    btdf_accum = 0.0;
+  }
 
-  FragColor = vec4(btdf_accum, 0.0, 0.0, 1.0);
+  /* There is place to put multiscater result (which is a little bit different still)
+   * and / or lobe fitting for better sampling of  */
+  FragColor = vec4(btdf_accum, fresnel_accum, 0.0, 1.0);
 }
diff --git a/source/blender/draw/engines/eevee/shaders/closure_eval_glossy_lib.glsl b/source/blender/draw/engines/eevee/shaders/closure_eval_glossy_lib.glsl
index 9d539ec5a48..2e506d6ba78 100644
--- a/source/blender/draw/engines/eevee/shaders/closure_eval_glossy_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/closure_eval_glossy_lib.glsl
@@ -40,7 +40,7 @@ ClosureEvalGlossy closure_Glossy_eval_init(inout ClosureInputGlossy cl_in,
   cl_out.radiance = vec3(0.0);
 
   float NV = dot(cl_in.N, cl_common.V);
-  vec2 lut_uv = lut_coords_ltc(NV, cl_in.roughness);
+  vec2 lut_uv = lut_coords(NV, cl_in.roughness);
 
   ClosureEvalGlossy cl_eval;
   cl_eval.ltc_mat = texture(utilTex, vec3(lut_uv, LTC_MAT_LAYER));
diff --git a/source/blender/draw/engines/eevee/shaders/common_utiltex_lib.glsl b/source/blender/draw/engines/eevee/shaders/common_utiltex_lib.glsl
index 7b1a0b263c0..821859b3228 100644
--- a/source/blender/draw/engines/eevee/shaders/common_utiltex_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/common_utiltex_lib.glsl
@@ -17,7 +17,10 @@ uniform sampler2DArray utilTex;
 #define BRDF_LUT_LAYER 1
 #define NOISE_LAYER 2
 #define LTC_DISK_LAYER 3 /* UNUSED */
+
 /* Layers 4 to 20 are for BTDF Lut. */
+const float lut_btdf_layer_first = 4.0;
+const float lut_btdf_layer_count = 16.0;
 
 /**
  * Reminder: The 4 noise values are based of 3 uncorrelated blue noises:
@@ -27,56 +30,77 @@ uniform sampler2DArray utilTex;
  **/
 #define texelfetch_noise_tex(coord) texelFetch(utilTex, ivec3(ivec2(coord) % LUT_SIZE, 2.0), 0)
 
-/* Return texture coordinates to sample Surface LUT */
-vec2 lut_coords(float cosTheta, float roughness)
-{
-  /* TODO(fclem) Ugly Acos here. Get rid ot this. Should use same mapping as lut_coords_ltc. */
-  float theta = acos(cosTheta);
-  vec2 coords = vec2(roughness, theta / M_PI_2);
-
-  /* scale and bias coordinates, for correct filtered lookup */
-  return coords * (LUT_SIZE - 1.0) / LUT_SIZE + 0.5 / LUT_SIZE;
-}
-
-vec2 lut_coords_ltc(float cosTheta, float roughness)
+/* Return texture coordinates to sample Surface LUT. */
+vec2 lut_coords(float cos_theta, float roughness)
 {
-  vec2 coords = vec2(roughness, sqrt(1.0 - cosTheta));
-
+  vec2 coords = vec2(roughness, sqrt(1.0 - cos_theta));
   /* scale and bias coordinates, for correct filtered lookup */
   return coords * (LUT_SIZE - 1.0) / LUT_SIZE + 0.5 / LUT_SIZE;
 }
 
-vec2 brdf_lut(float cosTheta, float roughness)
+/* Returns the GGX split-sum precomputed in LUT. */
+vec2 brdf_lut(float cos_theta, float roughness)
 {
-  return textureLod(utilTex, vec3(lut_coords(cosTheta, roughness), BRDF_LUT_LAYER), 0.0).rg;
+  return textureLod(utilTex, vec3(lut_coords(cos_theta, roughness), BRDF_LUT_LAYER), 0.0).rg;
 }
 
-float get_btdf_lut(float NV, float roughness, float ior)
+/* Return texture coordinates to sample Surface LUT. */
+vec3 lut_coords_btdf(float cos_theta, float roughness, float ior)
 {
-  const vec3 lut_scale_bias_texel_size = vec3((LUT_SIZE - 1.0), 0.5, 1.5) / LUT_SIZE;
+  /* ior is sin of critical angle. */
+  float critical_cos = sqrt(1.0 - ior * ior);
 
   vec3 coords;
-  /* Try to compensate for the low resolution and interpolation error. */
-  coords.x = (ior > 1.0) ? (0.9 + lut_scale_bias_texel_size.z) +
-                               (0.1 - lut_scale_bias_texel_size.z) * f0_from_ior(ior) :
-                           (0.9 + lut_scale_bias_texel_size.z) * ior * ior;
-  coords.y = 1.0 - saturate(NV);
-  coords.xy *= lut_scale_bias_texel_size.x;
-  coords.xy += lut_scale_bias_texel_size.y;
-
-  const float lut_lvl_ofs = 4.0;    /* First texture lvl of roughness. */
-  const float lut_lvl_scale = 16.0; /* How many lvl of roughness in the lut. */
+  coords.x = sqr(ior);
+  coords.y = cos_theta;
+  coords.y -= critical_cos;
+  coords.y /= (coords.y > 0.0) ? (1.0 - critical_cos) : critical_cos;
+  coords.y = coords.y * 0.5 + 0.5;
+  coords.z = roughness;
 
-  float mip = roughness * lut_lvl_scale;
-  float mip_floor = floor(mip);
+  coords = saturate(coords);
 
-  coords.z = lut_lvl_ofs + mip_floor + 1.0;
-  float btdf_high = textureLod(utilTex, coords, 0.0).r;
+  /* scale and bias coordinates, for correct filtered lookup */
+  coords.xy = coords.xy * (LUT_SIZE - 1.0) / LUT_SIZE + 0.5 / LUT_SIZE;
 
-  coords.z -= 1.0;
-  float btdf_low = textureLod(utilTex, coords, 0.0).r;
+  return coords;
+}
 
-  float btdf = (ior == 1.0) ? 1.0 : mix(btdf_low, btdf_high, mip - coords.z);
+/* Returns GGX BTDF in first component and fresnel in second. */
+vec2 btdf_lut(float cos_theta, float roughness, float ior)
+{
+  if (ior <= 1e-5) {
+    return vec2(0.0);
+  }
+
+  if (ior >= 1.0) {
+    vec2 split_sum = brdf_lut(cos_theta, roughness);
+    float f0 = f0_from_ior(ior);
+    /* Baked IOR for GGX BRDF. */
+    const float specular = 1.0;
+    const float eta_brdf = (2.0 / (1.0 - sqrt(0.08 * specular))) - 1.0;
+    /* Avoid harsh transition comming from ior == 1. */
+    float f90 = fast_sqrt(saturate(f0 / (f0_from_ior(eta_brdf) * 0.25)));
+    float fresnel = F_brdf_single_scatter(vec3(f0), vec3(f90), split_sum).r;
+    /* Setting the BTDF to one is not really important since it is only used for multiscatter
+     * and it's already quite close to ground truth. */
+    float btdf = 1.0;
+    return vec2(btdf, fresnel);
+  }
+
+  vec3 coords = lut_coords_btdf(cos_theta, roughness, ior);
+
+  float layer = coords.z * lut_btdf_layer_count;
+  float layer_floored = floor(layer);
+
+  coords.z = lut_btdf_layer_first + layer_floored;
+  vec2 btdf_low = textureLod(utilTex, coords, 0.0).rg;
+
+  coords.z += 1.0;
+  vec2 btdf_high = textureLod(utilTex, coords, 0.0).rg;
+
+  /* Manual trilinear interpolation. */
+  vec2 btdf = mix(btdf_low, btdf_high, layer - layer_floored);
 
   return btdf;
 }
diff --git a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
index bf45169ebaa..4c4cbb069fe 100644
--- a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
@@ -9,6 +9,9 @@ uniform float lodFactor;
 uniform float lodMax;
 uniform float intensityFac;
 
+uniform float sampleCount;
+uniform float invSampleCount;
+
 in vec3 worldPosition;
 
 out vec4 FragColor;
@@ -144,7 +147,7 @@ void main()
   float weight = 0.0;
   vec3 out_radiance = vec3(0.0);
   for (float i = 0; i < sampleCount; i++) {
-    vec3 L = sample_hemisphere(i, N, T, B); /* Microfacet normal */
+    vec3 L = sample_hemisphere(i, invSampleCount, N, T, B); /* Microfacet normal */
     float NL = dot(N, L);
 
     if (NL > 0.0) {
diff --git a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_glossy_frag.glsl b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_glossy_frag.glsl
index ccb77427ed2..35fdbcb715f 100644
--- a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_glossy_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_glossy_frag.glsl
@@ -11,6 +11,9 @@ uniform float paddingSize;
 uniform float intensityFac;
 uniform float fireflyFactor;
 
+uniform float sampleCount;
+uniform float invSampleCount;
+
 in vec3 worldPosition;
 
 out vec4 FragColor;
@@ -45,15 +48,11 @@ void main()
 
   make_orthonormal_basis(N, T, B); /* Generate tangent space */
 
-  /* Noise to dither the samples */
-  /* Note : ghosting is better looking than noise. */
-  // setup_noise();
-
   /* Integrating Envmap */
   float weight = 0.0;
   vec3 out_radiance = vec3(0.0);
   for (float i = 0; i < sampleCount; i++) {
-    vec3 H = sample_ggx(i, roughnessSquared, N, T, B); /* Microfacet normal */
+    vec3 H = sample_ggx(i, invSampleCount, roughnessSquared, N, T, B); /* Microfacet normal */
     vec3 L = -reflect(V, H);
     float NL = dot(N, L);
 
diff --git a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_visibility_frag.glsl b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_visibility_frag.glsl
index 8d7c58a93d5..a974e1d538d 100644
--- a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_visibility_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_visibility_frag.glsl
@@ -13,6 +13,9 @@ uniform float farClip;
 uniform float visibilityRange;
 uniform float visibilityBlur;
 
+uniform float sampleCount;
+uniform float invSampleCount;
+
 out vec4 FragColor;
 
 vec3 octahedral_to_cubemap_proj(vec2 co)
@@ -77,7 +80,7 @@ void main()
   vec2 accum = vec2(0.0);
 
   for (float i = 0; i < sampleCount; i++) {
-    vec3 sample = sample_cone(i, M_PI_2 * visibilityBlur, cos, T, B);
+    vec3 sample = sample_cone(i, invSampleCount, M_PI_2 * visibilityBlur, cos, T, B);
     float depth = texture(probeDepth, sample).r;
     depth = get_world_distance(depth, sample);
     accum += vec2(depth, depth * depth);
diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_glass.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_glass.glsl
index ba02ae6d886..6788b34c938 100644
--- a/source/blender/gpu/shaders/material/gpu_shader_material_glass.glsl
+++ b/source/blender/gpu/shaders/material/gpu_shader_material_glass.glsl
@@ -6,8 +6,8 @@ void node_bsdf_glass(vec4 color,
                      float roughness,
                      float ior,
                      vec3 N,
-                     float use_multiscatter,
-                     float ssr_id,
+                     const float do_multiscatter,
+                     const float ssr_id,
                      out Closure result)
 {
   CLOSURE_VARS_DECLARE_2(Glossy, Refraction);
@@ -23,11 +23,16 @@ void node_bsdf_glass(vec4 color,
 
   result = CLOSURE_DEFAULT;
 
-  float fresnel = F_eta(in_Refraction_1.ior, dot(in_Glossy_0.N, cameraVec));
+  float NV = dot(in_Refraction_1.N, cameraVec);
+
+  float fresnel = (do_multiscatter != 0.0) ?
+                      btdf_lut(NV, in_Refraction_1.roughness, in_Refraction_1.ior).y :
+                      F_eta(in_Refraction_1.ior, NV);
+
+  vec2 split_sum = brdf_lut(NV, in_Glossy_0.roughness);
+  vec3 brdf = (do_multiscatter != 0.0) ? F_brdf_multi_scatter(vec3(1.0), vec3(1.0), split_sum) :
+                                         F_brdf_single_scatter(vec3(1.0), vec3(1.0), split_sum);
 
-  vec2 split_sum = brdf_lut(dot(in_Glossy_0.N, cameraVec), in_Glossy_0.roughness);
-  vec3 brdf = (use_multiscatter != 0.0) ? F_brdf_multi_scatter(vec3(1.0), vec3(1.0), split_sum) :
-                                          F_brdf_single_scatter(vec3(1.0), vec3(1.0), split_sum);
   out_Glossy_0.radiance = closure_mask_ssr_radiance(out_Glossy_0.radiance, ssr_id);
   out_Glossy_0.radiance *= brdf;
   out_Glossy_0.radiance = render_pass_glossy_mask(vec3(1.0), out_Glossy_0.radiance);
@@ -35,6 +40,10 @@ void node_bsdf_glass(vec4 color,
   closure_load_ssr_data(
       out_Glossy_0.radiance, in_Glossy_0.roughness, in_Glossy_0.N, ssr_id, result);
 
+  float btdf = (do_multiscatter != 0.0) ?
+                   1.0 :
+                   btdf_lut(NV, in_Refraction_1.roughness, in_Refraction_1.ior).x;
+  out_Refraction_1.radiance *= btdf;
   out_Refraction_1.radiance = render_pass_glossy_mask(vec3(1.0), out_Refraction_1.radiance);
   out_Refraction_1.radiance *= color.rgb * (1.0 - fresnel);
   /* Simulate 2nd absorption event. */
diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_principled.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_principled.glsl
index 15958dcf65e..139dcb33222 100644
--- a/source/blender/gpu/shaders/material/gpu_shader_material_principled.glsl
+++ b/source/blender/gpu/shaders/material/gpu_shader_material_principled.glsl
@@ -70,7 +70,6 @@ void node_bsdf_principled(vec4 base_color,
   in_Refraction_3.N = N; /* Normalized during eval. */
   in_Refraction_3.roughness = do_multiscatter != 0.0 ? roughness : transmission_roughness;
   in_Refraction_3.ior = ior;
-  
 
   CLOSURE_EVAL_FUNCTION_4(node_bsdf_principled, Diffuse, Glossy, Glossy, Refraction);
 
@@ -92,9 +91,9 @@ void node_bsdf_principled(vec4 base_color,
 
   vec3 base_color_tint = tint_from_color(base_color.rgb);
 
-  /* TODO(fclem) This isn't good for rough glass using multiscatter (since the fresnel is applied
-   * on each microfacet in cycles). */
-  float fresnel = F_eta(in_Refraction_3.ior, NV);
+  float fresnel = (do_multiscatter != 0.0) ?
+                      btdf_lut(NV, in_Glossy_1.roughness, in_Refraction_3.ior).y :
+                      F_eta(in_Refraction_3.ior, NV);
 
   {
     /* Glossy reflections.
@@ -159,7 +158,11 @@ void node_bsdf_principled(vec4 base_color,
   }
 
   if (transmission > 1e-5) {
+    float btdf = (do_multiscatter != 0.0) ?
+                     1.0 :
+                     btdf_lut(NV, in_Refraction_3.roughness, in_Refraction_3.ior).x;
     /* TODO(fclem) This could be going to a transmission render pass instead. */
+    out_Refraction_3.radiance *= btdf;
     out_Refraction_3.radiance = render_pass_glossy_mask(vec3(1), out_Refraction_3.radiance);
     out_Refraction_3.radiance *= base_color.rgb;
     /* Simulate 2nd transmission event. */
author	Clément Foucault <foucault.clem@gmail.com>	2021-02-13 20:50:09 +0300
committer	Clément Foucault <foucault.clem@gmail.com>	2021-02-13 20:52:19 +0300
commit	83ac8628c490eda4fa5237b7a4256bc670dc0682 (patch)
tree	89563a6d46dcea4ea51b70d23b3a73e7637161a6 /source
parent	06492fd61984c1a92fb1f93d30028de97ead451f (diff)