diff options
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/kernel/filter/filter_transform.h | 10 | ||||
-rw-r--r-- | intern/cycles/kernel/filter/filter_transform_gpu.h | 10 | ||||
-rw-r--r-- | intern/cycles/kernel/filter/filter_transform_sse.h | 9 |
3 files changed, 18 insertions, 11 deletions
diff --git a/intern/cycles/kernel/filter/filter_transform.h b/intern/cycles/kernel/filter/filter_transform.h index 67faa27a7d4..a5f87c05ec0 100644 --- a/intern/cycles/kernel/filter/filter_transform.h +++ b/intern/cycles/kernel/filter/filter_transform.h @@ -37,6 +37,7 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff max(rect.y, y - radius)); int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1)); + int num_pixels = (high.y - low.y) * (high.x - low.x); /* === Shift feature passes to have mean 0. === */ float feature_means[DENOISE_FEATURES]; @@ -46,8 +47,7 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff math_vector_add(feature_means, features, DENOISE_FEATURES); } END_FOR_PIXEL_WINDOW - float pixel_scale = 1.0f / ((high.y - low.y) * (high.x - low.x)); - math_vector_scale(feature_means, pixel_scale, DENOISE_FEATURES); + math_vector_scale(feature_means, 1.0f / num_pixels, DENOISE_FEATURES); /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */ float *feature_scale = tempvector; @@ -73,6 +73,8 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff math_matrix_jacobi_eigendecomposition(feature_matrix, transform, DENOISE_FEATURES, 1); *rank = 0; + /* Prevent overfitting when a small window is used. */ + int max_rank = min(DENOISE_FEATURES, num_pixels/3); if(pca_threshold < 0.0f) { float threshold_energy = 0.0f; for(int i = 0; i < DENOISE_FEATURES; i++) { @@ -81,7 +83,7 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff threshold_energy *= 1.0f - (-pca_threshold); float reduced_energy = 0.0f; - for(int i = 0; i < DENOISE_FEATURES; i++, (*rank)++) { + for(int i = 0; i < max_rank; i++, (*rank)++) { if(i >= 2 && reduced_energy >= threshold_energy) break; float s = feature_matrix[i*DENOISE_FEATURES+i]; @@ -89,7 +91,7 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff } } else { - for(int i = 0; i < DENOISE_FEATURES; i++, (*rank)++) { + for(int i = 0; i < max_rank; i++, (*rank)++) { float s = feature_matrix[i*DENOISE_FEATURES+i]; if(i >= 2 && sqrtf(s) < pca_threshold) break; diff --git a/intern/cycles/kernel/filter/filter_transform_gpu.h b/intern/cycles/kernel/filter/filter_transform_gpu.h index 2cd21224762..83a1222bbdb 100644 --- a/intern/cycles/kernel/filter/filter_transform_gpu.h +++ b/intern/cycles/kernel/filter/filter_transform_gpu.h @@ -38,6 +38,7 @@ ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_re max(rect.y, y - radius)); int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1)); + int num_pixels = (high.y - low.y) * (high.x - low.x); const ccl_global float *ccl_restrict pixel_buffer; int2 pixel; @@ -52,8 +53,7 @@ ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_re math_vector_add(feature_means, features, DENOISE_FEATURES); } END_FOR_PIXEL_WINDOW - float pixel_scale = 1.0f / ((high.y - low.y) * (high.x - low.x)); - math_vector_scale(feature_means, pixel_scale, DENOISE_FEATURES); + math_vector_scale(feature_means, 1.0f / num_pixels, DENOISE_FEATURES); /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */ float feature_scale[DENOISE_FEATURES]; @@ -81,6 +81,8 @@ ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_re math_matrix_jacobi_eigendecomposition(feature_matrix, transform, DENOISE_FEATURES, transform_stride); *rank = 0; + /* Prevent overfitting when a small window is used. */ + int max_rank = min(DENOISE_FEATURES, num_pixels/3); if(pca_threshold < 0.0f) { float threshold_energy = 0.0f; for(int i = 0; i < DENOISE_FEATURES; i++) { @@ -89,7 +91,7 @@ ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_re threshold_energy *= 1.0f - (-pca_threshold); float reduced_energy = 0.0f; - for(int i = 0; i < DENOISE_FEATURES; i++, (*rank)++) { + for(int i = 0; i < max_rank; i++, (*rank)++) { if(i >= 2 && reduced_energy >= threshold_energy) break; float s = feature_matrix[i*DENOISE_FEATURES+i]; @@ -97,7 +99,7 @@ ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_re } } else { - for(int i = 0; i < DENOISE_FEATURES; i++, (*rank)++) { + for(int i = 0; i < max_rank; i++, (*rank)++) { float s = feature_matrix[i*DENOISE_FEATURES+i]; if(i >= 2 && sqrtf(s) < pca_threshold) break; diff --git a/intern/cycles/kernel/filter/filter_transform_sse.h b/intern/cycles/kernel/filter/filter_transform_sse.h index 9de51e2d86c..30dc2969b11 100644 --- a/intern/cycles/kernel/filter/filter_transform_sse.h +++ b/intern/cycles/kernel/filter/filter_transform_sse.h @@ -32,6 +32,7 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff max(rect.y, y - radius)); int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1)); + int num_pixels = (high.y - low.y) * (high.x - low.x); __m128 feature_means[DENOISE_FEATURES]; math_vector_zero_sse(feature_means, DENOISE_FEATURES); @@ -40,7 +41,7 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff math_vector_add_sse(feature_means, DENOISE_FEATURES, features); } END_FOR_PIXEL_WINDOW_SSE - __m128 pixel_scale = _mm_set1_ps(1.0f / ((high.y - low.y) * (high.x - low.x))); + __m128 pixel_scale = _mm_set1_ps(1.0f / num_pixels); for(int i = 0; i < DENOISE_FEATURES; i++) { feature_means[i] = _mm_mul_ps(_mm_hsum_ps(feature_means[i]), pixel_scale); } @@ -68,6 +69,8 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff math_matrix_jacobi_eigendecomposition(feature_matrix, transform, DENOISE_FEATURES, 1); *rank = 0; + /* Prevent overfitting when a small window is used. */ + int max_rank = min(DENOISE_FEATURES, num_pixels/3); if(pca_threshold < 0.0f) { float threshold_energy = 0.0f; for(int i = 0; i < DENOISE_FEATURES; i++) { @@ -76,7 +79,7 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff threshold_energy *= 1.0f - (-pca_threshold); float reduced_energy = 0.0f; - for(int i = 0; i < DENOISE_FEATURES; i++, (*rank)++) { + for(int i = 0; i < max_rank; i++, (*rank)++) { if(i >= 2 && reduced_energy >= threshold_energy) break; float s = feature_matrix[i*DENOISE_FEATURES+i]; @@ -84,7 +87,7 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff } } else { - for(int i = 0; i < DENOISE_FEATURES; i++, (*rank)++) { + for(int i = 0; i < max_rank; i++, (*rank)++) { float s = feature_matrix[i*DENOISE_FEATURES+i]; if(i >= 2 && sqrtf(s) < pca_threshold) break; |