Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDalai Felinto <dfelinto@gmail.com>2015-09-24 18:24:20 +0300
committerDalai Felinto <dfelinto@gmail.com>2015-09-24 18:24:20 +0300
commit27b3ea622f8bd313a8e2827dfec752bf2125566c (patch)
treef212e49d224ce8e1cfc3b17a64ae524711494391 /intern/cycles/util/util_half.h
parent372dff8d1dc7e24d4b2cd37de245588ecfce8bfa (diff)
parentde80e687689032cb85179a1f7e89750573631d5d (diff)
Merge remote-tracking branch 'origin/master' into cycles_camera_nodescycles_camera_nodes
Note: the branch currently crashes in blender_camera_nodes.cpp: BL::NodeTree b_ntree = b_data.node_groups[nodes_tree_name]; The crash was introduced in: cb7cf523e5c000609f32a382e2c0fcc57f635a42 Conflicts: intern/cycles/SConscript intern/cycles/blender/addon/__init__.py intern/cycles/blender/addon/properties.py intern/cycles/blender/blender_camera.cpp intern/cycles/kernel/kernel_types.h intern/cycles/kernel/svm/svm.h intern/cycles/kernel/svm/svm_types.h intern/cycles/render/camera.cpp intern/cycles/render/camera.h
Diffstat (limited to 'intern/cycles/util/util_half.h')
-rw-r--r--intern/cycles/util/util_half.h32
1 files changed, 16 insertions, 16 deletions
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index 397133618be..f4bac9888a5 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_HALF_H__
@@ -56,7 +56,7 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
* assumes no negative, no nan, no inf, and sets denormal to 0 */
union { uint i; float f; } in;
float fscale = f[i] * scale;
- in.f = (fscale > 0.0f)? ((fscale < 65500.0f)? fscale: 65500.0f): 0.0f;
+ in.f = (fscale > 0.0f)? ((fscale < 65504.0f)? fscale: 65504.0f): 0.0f;
int x = in.i;
int absolute = x & 0x7FFFFFFF;
@@ -68,20 +68,20 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
}
#else
/* same as above with SSE */
- const ssef mm_scale = ssef(scale);
- const ssei mm_38800000 = ssei(0x38800000);
- const ssei mm_7FFF = ssei(0x7FFF);
- const ssei mm_7FFFFFFF = ssei(0x7FFFFFFF);
- const ssei mm_C8000000 = ssei(0xC8000000);
-
- ssef mm_fscale = load4f(f) * mm_scale;
- ssei x = cast(min(max(mm_fscale, ssef(0.0f)), ssef(65500.0f)));
- ssei absolute = x & mm_7FFFFFFF;
- ssei Z = absolute + mm_C8000000;
- ssei result = andnot(absolute < mm_38800000, Z);
- ssei rh = (result >> 13) & mm_7FFF;
-
- _mm_storel_pi((__m64*)h, _mm_castsi128_ps(_mm_packs_epi32(rh, rh)));
+ ssef fscale = load4f(f) * scale;
+ ssef x = min(max(fscale, 0.0f), 65504.0f);
+
+#ifdef __KERNEL_AVX2__
+ ssei rpack = _mm_cvtps_ph(x, 0);
+#else
+ ssei absolute = cast(x) & 0x7FFFFFFF;
+ ssei Z = absolute + 0xC8000000;
+ ssei result = andnot(absolute < 0x38800000, Z);
+ ssei rshift = (result >> 13) & 0x7FFF;
+ ssei rpack = _mm_packs_epi32(rshift, rshift);
+#endif
+
+ _mm_storel_pi((__m64*)h, _mm_castsi128_ps(rpack));
#endif
}