ClangFormat: apply to source, most of intern

Apply clang format as proposed in T53211. For details on usage and instructions for migrating branches without conflicts, see: https://wiki.blender.org/wiki/Tools/ClangFormat
author: Campbell Barton <ideasman42@gmail.com> 2019-04-17 07:17:24 +0300
committer: Campbell Barton <ideasman42@gmail.com> 2019-04-17 07:21:24 +0300
commit: e12c08e8d170b7ca40f204a5b0423c23a9fbc2c1 (patch)
tree: 8cf3453d12edb177a218ef8009357518ec6cab6a /intern/cycles/kernel/filter/filter_nlm_cpu.h
parent: b3dabc200a4b0399ec6b81f2ff2730d07b44fcaa (diff)
1 files changed, 171 insertions, 163 deletions
diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h
index 9eb3c603a4a..a94266a8786 100644
--- a/intern/cycles/kernel/filter/filter_nlm_cpu.h
+++ b/intern/cycles/kernel/filter/filter_nlm_cpu.h
@@ -16,10 +16,11 @@
 
 CCL_NAMESPACE_BEGIN
 
-#define load4_a(buf, ofs) (*((float4*) ((buf) + (ofs))))
-#define load4_u(buf, ofs) load_float4((buf)+(ofs))
+#define load4_a(buf, ofs) (*((float4 *)((buf) + (ofs))))
+#define load4_u(buf, ofs) load_float4((buf) + (ofs))
 
-ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy,
+ccl_device_inline void kernel_filter_nlm_calc_difference(int dx,
+                                                         int dy,
                                                          const float *ccl_restrict weight_image,
                                                          const float *ccl_restrict variance_image,
                                                          const float *ccl_restrict scale_image,
@@ -31,122 +32,117 @@ ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy,
                                                          float a,
                                                          float k_2)
 {
-	/* Strides need to be aligned to 16 bytes. */
-	kernel_assert((stride % 4) == 0 && (channel_offset % 4) == 0);
-
-	int aligned_lowx = rect.x & (~3);
-	const int numChannels = (channel_offset > 0)? 3 : 1;
-	const float4 channel_fac = make_float4(1.0f / numChannels);
-
-	for(int y = rect.y; y < rect.w; y++) {
-		int idx_p = y*stride + aligned_lowx;
-		int idx_q = (y+dy)*stride + aligned_lowx + dx + frame_offset;
-		for(int x = aligned_lowx; x < rect.z; x += 4, idx_p += 4, idx_q += 4) {
-			float4 diff = make_float4(0.0f);
-			float4 scale_fac;
-			if(scale_image) {
-				scale_fac = clamp(load4_a(scale_image, idx_p) / load4_u(scale_image, idx_q),
-				                  make_float4(0.25f), make_float4(4.0f));
-			}
-			else {
-				scale_fac = make_float4(1.0f);
-			}
-			for(int c = 0, chan_ofs = 0; c < numChannels; c++, chan_ofs += channel_offset) {
-				/* idx_p is guaranteed to be aligned, but idx_q isn't. */
-				float4 color_p = load4_a(weight_image, idx_p + chan_ofs);
-				float4 color_q = scale_fac*load4_u(weight_image, idx_q + chan_ofs);
-				float4 cdiff = color_p - color_q;
-				float4 var_p = load4_a(variance_image, idx_p + chan_ofs);
-				float4 var_q = sqr(scale_fac)*load4_u(variance_image, idx_q + chan_ofs);
-				diff += (cdiff*cdiff - a*(var_p + min(var_p, var_q))) / (make_float4(1e-8f) + k_2*(var_p+var_q));
-			}
-			load4_a(difference_image, idx_p) = diff*channel_fac;
-		}
-	}
+  /* Strides need to be aligned to 16 bytes. */
+  kernel_assert((stride % 4) == 0 && (channel_offset % 4) == 0);
+
+  int aligned_lowx = rect.x & (~3);
+  const int numChannels = (channel_offset > 0) ? 3 : 1;
+  const float4 channel_fac = make_float4(1.0f / numChannels);
+
+  for (int y = rect.y; y < rect.w; y++) {
+    int idx_p = y * stride + aligned_lowx;
+    int idx_q = (y + dy) * stride + aligned_lowx + dx + frame_offset;
+    for (int x = aligned_lowx; x < rect.z; x += 4, idx_p += 4, idx_q += 4) {
+      float4 diff = make_float4(0.0f);
+      float4 scale_fac;
+      if (scale_image) {
+        scale_fac = clamp(load4_a(scale_image, idx_p) / load4_u(scale_image, idx_q),
+                          make_float4(0.25f),
+                          make_float4(4.0f));
+      }
+      else {
+        scale_fac = make_float4(1.0f);
+      }
+      for (int c = 0, chan_ofs = 0; c < numChannels; c++, chan_ofs += channel_offset) {
+        /* idx_p is guaranteed to be aligned, but idx_q isn't. */
+        float4 color_p = load4_a(weight_image, idx_p + chan_ofs);
+        float4 color_q = scale_fac * load4_u(weight_image, idx_q + chan_ofs);
+        float4 cdiff = color_p - color_q;
+        float4 var_p = load4_a(variance_image, idx_p + chan_ofs);
+        float4 var_q = sqr(scale_fac) * load4_u(variance_image, idx_q + chan_ofs);
+        diff += (cdiff * cdiff - a * (var_p + min(var_p, var_q))) /
+                (make_float4(1e-8f) + k_2 * (var_p + var_q));
+      }
+      load4_a(difference_image, idx_p) = diff * channel_fac;
+    }
+  }
 }
 
-ccl_device_inline void kernel_filter_nlm_blur(const float *ccl_restrict difference_image,
-                                              float *out_image,
-                                              int4 rect,
-                                              int stride,
-                                              int f)
+ccl_device_inline void kernel_filter_nlm_blur(
+    const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
 {
-	int aligned_lowx = round_down(rect.x, 4);
-	for(int y = rect.y; y < rect.w; y++) {
-		const int low = max(rect.y, y-f);
-		const int high = min(rect.w, y+f+1);
-		for(int x = aligned_lowx; x < rect.z; x += 4) {
-			load4_a(out_image, y*stride + x) = make_float4(0.0f);
-		}
-		for(int y1 = low; y1 < high; y1++) {
-			for(int x = aligned_lowx; x < rect.z; x += 4) {
-				load4_a(out_image, y*stride + x) += load4_a(difference_image, y1*stride + x);
-			}
-		}
-		float fac = 1.0f/(high - low);
-		for(int x = aligned_lowx; x < rect.z; x += 4) {
-			load4_a(out_image, y*stride + x) *= fac;
-		}
-	}
+  int aligned_lowx = round_down(rect.x, 4);
+  for (int y = rect.y; y < rect.w; y++) {
+    const int low = max(rect.y, y - f);
+    const int high = min(rect.w, y + f + 1);
+    for (int x = aligned_lowx; x < rect.z; x += 4) {
+      load4_a(out_image, y * stride + x) = make_float4(0.0f);
+    }
+    for (int y1 = low; y1 < high; y1++) {
+      for (int x = aligned_lowx; x < rect.z; x += 4) {
+        load4_a(out_image, y * stride + x) += load4_a(difference_image, y1 * stride + x);
+      }
+    }
+    float fac = 1.0f / (high - low);
+    for (int x = aligned_lowx; x < rect.z; x += 4) {
+      load4_a(out_image, y * stride + x) *= fac;
+    }
+  }
 }
 
-ccl_device_inline void nlm_blur_horizontal(const float *ccl_restrict difference_image,
-                                           float *out_image,
-                                           int4 rect,
-                                           int stride,
-                                           int f)
+ccl_device_inline void nlm_blur_horizontal(
+    const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
 {
-	int aligned_lowx = round_down(rect.x, 4);
-	for(int y = rect.y; y < rect.w; y++) {
-		for(int x = aligned_lowx; x < rect.z; x += 4) {
-			load4_a(out_image, y*stride + x) = make_float4(0.0f);
-		}
-	}
-
-	for(int dx = -f; dx <= f; dx++) {
-		aligned_lowx = round_down(rect.x - min(0, dx), 4);
-		int highx = rect.z - max(0, dx);
-		int4 lowx4 = make_int4(rect.x - min(0, dx));
-		int4 highx4 = make_int4(rect.z - max(0, dx));
-		for(int y = rect.y; y < rect.w; y++) {
-			for(int x = aligned_lowx; x < highx; x += 4) {
-				int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
-				int4 active = (x4 >= lowx4) & (x4 < highx4);
-
-				float4 diff = load4_u(difference_image, y*stride + x + dx);
-				load4_a(out_image, y*stride + x) += mask(active, diff);
-			}
-		}
-	}
-
-	aligned_lowx = round_down(rect.x, 4);
-	for(int y = rect.y; y < rect.w; y++) {
-		for(int x = aligned_lowx; x < rect.z; x += 4) {
-			float4 x4 = make_float4(x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f);
-			float4 low = max(make_float4(rect.x), x4 - make_float4(f));
-			float4 high = min(make_float4(rect.z), x4 + make_float4(f+1));
-			load4_a(out_image, y*stride + x) *= rcp(high - low);
-		}
-	}
+  int aligned_lowx = round_down(rect.x, 4);
+  for (int y = rect.y; y < rect.w; y++) {
+    for (int x = aligned_lowx; x < rect.z; x += 4) {
+      load4_a(out_image, y * stride + x) = make_float4(0.0f);
+    }
+  }
+
+  for (int dx = -f; dx <= f; dx++) {
+    aligned_lowx = round_down(rect.x - min(0, dx), 4);
+    int highx = rect.z - max(0, dx);
+    int4 lowx4 = make_int4(rect.x - min(0, dx));
+    int4 highx4 = make_int4(rect.z - max(0, dx));
+    for (int y = rect.y; y < rect.w; y++) {
+      for (int x = aligned_lowx; x < highx; x += 4) {
+        int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
+        int4 active = (x4 >= lowx4) & (x4 < highx4);
+
+        float4 diff = load4_u(difference_image, y * stride + x + dx);
+        load4_a(out_image, y * stride + x) += mask(active, diff);
+      }
+    }
+  }
+
+  aligned_lowx = round_down(rect.x, 4);
+  for (int y = rect.y; y < rect.w; y++) {
+    for (int x = aligned_lowx; x < rect.z; x += 4) {
+      float4 x4 = make_float4(x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f);
+      float4 low = max(make_float4(rect.x), x4 - make_float4(f));
+      float4 high = min(make_float4(rect.z), x4 + make_float4(f + 1));
+      load4_a(out_image, y * stride + x) *= rcp(high - low);
+    }
+  }
 }
 
-ccl_device_inline void kernel_filter_nlm_calc_weight(const float *ccl_restrict difference_image,
-                                                     float *out_image,
-                                                     int4 rect,
-                                                     int stride,
-                                                     int f)
+ccl_device_inline void kernel_filter_nlm_calc_weight(
+    const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
 {
-	nlm_blur_horizontal(difference_image, out_image, rect, stride, f);
-
-	int aligned_lowx = round_down(rect.x, 4);
-	for(int y = rect.y; y < rect.w; y++) {
-		for(int x = aligned_lowx; x < rect.z; x += 4) {
-			load4_a(out_image, y*stride + x) = fast_expf4(-max(load4_a(out_image, y*stride + x), make_float4(0.0f)));
-		}
-	}
+  nlm_blur_horizontal(difference_image, out_image, rect, stride, f);
+
+  int aligned_lowx = round_down(rect.x, 4);
+  for (int y = rect.y; y < rect.w; y++) {
+    for (int x = aligned_lowx; x < rect.z; x += 4) {
+      load4_a(out_image, y * stride + x) = fast_expf4(
+          -max(load4_a(out_image, y * stride + x), make_float4(0.0f)));
+    }
+  }
 }
 
-ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy,
+ccl_device_inline void kernel_filter_nlm_update_output(int dx,
+                                                       int dy,
                                                        const float *ccl_restrict difference_image,
                                                        const float *ccl_restrict image,
                                                        float *temp_image,
@@ -157,33 +153,36 @@ ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy,
                                                        int stride,
                                                        int f)
 {
-	nlm_blur_horizontal(difference_image, temp_image, rect, stride, f);
+  nlm_blur_horizontal(difference_image, temp_image, rect, stride, f);
 
-	int aligned_lowx = round_down(rect.x, 4);
-	for(int y = rect.y; y < rect.w; y++) {
-		for(int x = aligned_lowx; x < rect.z; x += 4) {
-			int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
-			int4 active = (x4 >= make_int4(rect.x)) & (x4 < make_int4(rect.z));
+  int aligned_lowx = round_down(rect.x, 4);
+  for (int y = rect.y; y < rect.w; y++) {
+    for (int x = aligned_lowx; x < rect.z; x += 4) {
+      int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
+      int4 active = (x4 >= make_int4(rect.x)) & (x4 < make_int4(rect.z));
 
-			int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx);
+      int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx);
 
-			float4 weight = load4_a(temp_image, idx_p);
-			load4_a(accum_image, idx_p) += mask(active, weight);
+      float4 weight = load4_a(temp_image, idx_p);
+      load4_a(accum_image, idx_p) += mask(active, weight);
 
-			float4 val = load4_u(image, idx_q);
-			if(channel_offset) {
-				val += load4_u(image, idx_q + channel_offset);
-				val += load4_u(image, idx_q + 2*channel_offset);
-				val *= 1.0f/3.0f;
-			}
+      float4 val = load4_u(image, idx_q);
+      if (channel_offset) {
+        val += load4_u(image, idx_q + channel_offset);
+        val += load4_u(image, idx_q + 2 * channel_offset);
+        val *= 1.0f / 3.0f;
+      }
 
-			load4_a(out_image, idx_p) += mask(active, weight*val);
-		}
-	}
+      load4_a(out_image, idx_p) += mask(active, weight * val);
+    }
+  }
 }
 
-ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, int t,
-                                                           const float *ccl_restrict difference_image,
+ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx,
+                                                           int dy,
+                                                           int t,
+                                                           const float *ccl_restrict
+                                                               difference_image,
                                                            const float *ccl_restrict buffer,
                                                            float *transform,
                                                            int *rank,
@@ -191,40 +190,49 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, int t
                                                            float3 *XtWY,
                                                            int4 rect,
                                                            int4 filter_window,
-                                                           int stride, int f,
+                                                           int stride,
+                                                           int f,
                                                            int pass_stride,
                                                            int frame_offset,
                                                            bool use_time)
 {
-	int4 clip_area = rect_clip(rect, filter_window);
-	/* fy and fy are in filter-window-relative coordinates, while x and y are in feature-window-relative coordinates. */
-	for(int y = clip_area.y; y < clip_area.w; y++) {
-		for(int x = clip_area.x; x < clip_area.z; x++) {
-			const int low = max(rect.x, x-f);
-			const int high = min(rect.z, x+f+1);
-			float sum = 0.0f;
-			for(int x1 = low; x1 < high; x1++) {
-				sum += difference_image[y*stride + x1];
-			}
-			float weight = sum * (1.0f/(high - low));
-
-			int storage_ofs = coord_to_local_index(filter_window, x, y);
-			float  *l_transform = transform + storage_ofs*TRANSFORM_SIZE;
-			float  *l_XtWX = XtWX + storage_ofs*XTWX_SIZE;
-			float3 *l_XtWY = XtWY + storage_ofs*XTWY_SIZE;
-			int    *l_rank = rank + storage_ofs;
-
-			kernel_filter_construct_gramian(x, y, 1,
-			                                dx, dy, t,
-			                                stride,
-			                                pass_stride,
-			                                frame_offset,
-			                                use_time,
-			                                buffer,
-			                                l_transform, l_rank,
-			                                weight, l_XtWX, l_XtWY, 0);
-		}
-	}
+  int4 clip_area = rect_clip(rect, filter_window);
+  /* fy and fy are in filter-window-relative coordinates, while x and y are in feature-window-relative coordinates. */
+  for (int y = clip_area.y; y < clip_area.w; y++) {
+    for (int x = clip_area.x; x < clip_area.z; x++) {
+      const int low = max(rect.x, x - f);
+      const int high = min(rect.z, x + f + 1);
+      float sum = 0.0f;
+      for (int x1 = low; x1 < high; x1++) {
+        sum += difference_image[y * stride + x1];
+      }
+      float weight = sum * (1.0f / (high - low));
+
+      int storage_ofs = coord_to_local_index(filter_window, x, y);
+      float *l_transform = transform + storage_ofs * TRANSFORM_SIZE;
+      float *l_XtWX = XtWX + storage_ofs * XTWX_SIZE;
+      float3 *l_XtWY = XtWY + storage_ofs * XTWY_SIZE;
+      int *l_rank = rank + storage_ofs;
+
+      kernel_filter_construct_gramian(x,
+                                      y,
+                                      1,
+                                      dx,
+                                      dy,
+                                      t,
+                                      stride,
+                                      pass_stride,
+                                      frame_offset,
+                                      use_time,
+                                      buffer,
+                                      l_transform,
+                                      l_rank,
+                                      weight,
+                                      l_XtWX,
+                                      l_XtWY,
+                                      0);
+    }
+  }
 }
 
 ccl_device_inline void kernel_filter_nlm_normalize(float *out_image,
@@ -232,11 +240,11 @@ ccl_device_inline void kernel_filter_nlm_normalize(float *out_image,
                                                    int4 rect,
                                                    int w)
 {
-	for(int y = rect.y; y < rect.w; y++) {
-		for(int x = rect.x; x < rect.z; x++) {
-			out_image[y*w+x] /= accum_image[y*w+x];
-		}
-	}
+  for (int y = rect.y; y < rect.w; y++) {
+    for (int x = rect.x; x < rect.z; x++) {
+      out_image[y * w + x] /= accum_image[y * w + x];
+    }
+  }
 }
 
 #undef load4_a
author	Campbell Barton <ideasman42@gmail.com>	2019-04-17 07:17:24 +0300
committer	Campbell Barton <ideasman42@gmail.com>	2019-04-17 07:21:24 +0300
commit	e12c08e8d170b7ca40f204a5b0423c23a9fbc2c1 (patch)
tree	8cf3453d12edb177a218ef8009357518ec6cab6a /intern/cycles/kernel/filter/filter_nlm_cpu.h
parent	b3dabc200a4b0399ec6b81f2ff2730d07b44fcaa (diff)