diff options
Diffstat (limited to 'intern/cycles/util')
-rw-r--r-- | intern/cycles/util/math.h | 14 | ||||
-rw-r--r-- | intern/cycles/util/math_fast.h | 2 | ||||
-rw-r--r-- | intern/cycles/util/math_float3.h | 4 | ||||
-rw-r--r-- | intern/cycles/util/math_intersect.h | 77 | ||||
-rw-r--r-- | intern/cycles/util/math_matrix.h | 2 | ||||
-rw-r--r-- | intern/cycles/util/path.cpp | 161 | ||||
-rw-r--r-- | intern/cycles/util/path.h | 3 | ||||
-rw-r--r-- | intern/cycles/util/transform.h | 10 |
8 files changed, 217 insertions, 56 deletions
diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h index 6cfeb1aa917..18b60b70a4b 100644 --- a/intern/cycles/util/math.h +++ b/intern/cycles/util/math.h @@ -719,6 +719,20 @@ ccl_device_inline float pow22(float a) return sqr(a * sqr(sqr(sqr(a)) * a)); } +#ifdef __KERNEL_METAL__ +ccl_device_inline float lgammaf(float x) +{ + /* Nemes, Gergő (2010), "New asymptotic expansion for the Gamma function", Archiv der Mathematik + */ + const float _1_180 = 1.0f / 180.0f; + const float log2pi = 1.83787706641f; + const float logx = log(x); + return (log2pi - logx + + x * (logx * 2.0f + log(x * sinh(1.0f / x) + (_1_180 / pow(x, 6.0f))) - 2.0f)) * + 0.5f; +} +#endif + ccl_device_inline float beta(float x, float y) { return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y)); diff --git a/intern/cycles/util/math_fast.h b/intern/cycles/util/math_fast.h index cc924f36a71..e2a33b1c26c 100644 --- a/intern/cycles/util/math_fast.h +++ b/intern/cycles/util/math_fast.h @@ -3,7 +3,7 @@ * * Copyright 2008-2014 Larry Gritz and the other authors and contributors. * All Rights Reserved. - + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h index 1a0213f2a6d..74f1c98e649 100644 --- a/intern/cycles/util/math_float3.h +++ b/intern/cycles/util/math_float3.h @@ -233,7 +233,7 @@ ccl_device_inline float3 operator/=(float3 &a, float f) return a = a * invf; } -#if !(defined(__KERNEL_METAL__) || defined(__KERNEL_CUDA__)) +# if !(defined(__KERNEL_METAL__) || defined(__KERNEL_CUDA__)) ccl_device_inline packed_float3 operator*=(packed_float3 &a, const float3 &b) { a = float3(a) * b; @@ -257,7 +257,7 @@ ccl_device_inline packed_float3 operator/=(packed_float3 &a, float f) a = float3(a) / f; return a; } -#endif +# endif ccl_device_inline bool operator==(const float3 &a, const float3 &b) { diff --git a/intern/cycles/util/math_intersect.h b/intern/cycles/util/math_intersect.h index 0c431a36afb..0fce9ff24fd 100644 --- a/intern/cycles/util/math_intersect.h +++ b/intern/cycles/util/math_intersect.h @@ -85,32 +85,48 @@ ccl_device bool ray_aligned_disk_intersect(float3 ray_P, return true; } +ccl_device bool ray_disk_intersect(float3 ray_P, + float3 ray_D, + float ray_t, + float3 disk_P, + float3 disk_N, + float disk_radius, + ccl_private float3 *isect_P, + ccl_private float *isect_t) +{ + const float3 vp = ray_P - disk_P; + const float dp = dot(vp, disk_N); + const float cos_angle = dot(disk_N, -ray_D); + if (dp * cos_angle > 0.f) // front of light + { + float t = dp / cos_angle; + if (t < 0.f) { /* Ray points away from the light. */ + return false; + } + float3 P = ray_P + t * ray_D; + float3 T = P - disk_P; + if (dot(T, T) < sqr(disk_radius) /*&& t > 0.f*/ && t <= ray_t) { + *isect_P = ray_P + t * ray_D; + *isect_t = t; + return true; + } + } + return false; +} + ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P, float3 ray_dir, float ray_t, -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const ssef *ssef_verts, -#else const float3 tri_a, const float3 tri_b, const float3 tri_c, -#endif ccl_private float *isect_u, ccl_private float *isect_v, ccl_private float *isect_t) { -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - typedef ssef float3; - const float3 tri_a(ssef_verts[0]); - const float3 tri_b(ssef_verts[1]); - const float3 tri_c(ssef_verts[2]); - const float3 P(ray_P); - const float3 dir(ray_dir); -#else -# define dot3(a, b) dot(a, b) +#define dot3(a, b) dot(a, b) const float3 P = ray_P; const float3 dir = ray_dir; -#endif /* Calculate vertices relative to ray origin. */ const float3 v0 = tri_c - P; @@ -123,43 +139,16 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P, const float3 e2 = v1 - v2; /* Perform edge tests. */ -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const float3 crossU = cross(v2 + v0, e0); - const float3 crossV = cross(v0 + v1, e1); - const float3 crossW = cross(v1 + v2, e2); - - ssef crossX(crossU); - ssef crossY(crossV); - ssef crossZ(crossW); - ssef zero = _mm_setzero_ps(); - _MM_TRANSPOSE4_PS(crossX, crossY, crossZ, zero); - - const ssef dirX(ray_dir.x); - const ssef dirY(ray_dir.y); - const ssef dirZ(ray_dir.z); - - ssef UVWW = madd(crossX, dirX, madd(crossY, dirY, crossZ * dirZ)); -#else /* __KERNEL_SSE2__ */ const float U = dot(cross(v2 + v0, e0), ray_dir); const float V = dot(cross(v0 + v1, e1), ray_dir); const float W = dot(cross(v1 + v2, e2), ray_dir); -#endif /* __KERNEL_SSE2__ */ -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - int uvw_sign = movemask(UVWW) & 0x7; - if (uvw_sign != 0) { - if (uvw_sign != 0x7) { - return false; - } - } -#else const float minUVW = min(U, min(V, W)); const float maxUVW = max(U, max(V, W)); if (minUVW < 0.0f && maxUVW > 0.0f) { return false; } -#endif /* Calculate geometry normal and denominator. */ const float3 Ng1 = cross(e1, e0); @@ -180,14 +169,8 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P, } const float inv_den = 1.0f / den; -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - UVWW *= inv_den; - _mm_store_ss(isect_u, UVWW); - _mm_store_ss(isect_v, shuffle<1, 1, 3, 3>(UVWW)); -#else *isect_u = U * inv_den; *isect_v = V * inv_den; -#endif *isect_t = T * inv_den; return true; diff --git a/intern/cycles/util/math_matrix.h b/intern/cycles/util/math_matrix.h index c1be71517e3..846b028a513 100644 --- a/intern/cycles/util/math_matrix.h +++ b/intern/cycles/util/math_matrix.h @@ -116,7 +116,7 @@ ccl_device_inline void math_vec3_add_strided( } /* Elementary matrix operations. - * Note: TriMatrix refers to a square matrix that is symmetric, + * NOTE: TriMatrix refers to a square matrix that is symmetric, * and therefore its upper-triangular part isn't stored. */ ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A, diff --git a/intern/cycles/util/path.cpp b/intern/cycles/util/path.cpp index aad790482d5..e27c929fba9 100644 --- a/intern/cycles/util/path.cpp +++ b/intern/cycles/util/path.cpp @@ -750,6 +750,167 @@ bool path_remove(const string &path) return remove(path.c_str()) == 0; } +struct SourceReplaceState { + typedef map<string, string> ProcessedMapping; + /* Base director for all relative include headers. */ + string base; + /* Result of processed files. */ + ProcessedMapping processed_files; + /* Set of files containing #pragma once which have been included. */ + set<string> pragma_onced; +}; + +static string path_source_replace_includes_recursive(const string &source, + const string &source_filepath, + SourceReplaceState *state); + +static string path_source_handle_preprocessor(const string &preprocessor_line, + const string &source_filepath, + SourceReplaceState *state) +{ + string result = preprocessor_line; + + string rest_of_line = string_strip(preprocessor_line.substr(1)); + + if (0 == strncmp(rest_of_line.c_str(), "include", 7)) { + rest_of_line = string_strip(rest_of_line.substr(8)); + if (rest_of_line[0] == '"') { + const size_t n_start = 1; + const size_t n_end = rest_of_line.find("\"", n_start); + const string filename = rest_of_line.substr(n_start, n_end - n_start); + + string filepath = path_join(state->base, filename); + if (!path_exists(filepath)) { + filepath = path_join(path_dirname(source_filepath), filename); + } + string text; + if (path_read_text(filepath, text)) { + text = path_source_replace_includes_recursive(text, filepath, state); + /* Use line directives for better error messages. */ + return "\n" + text + "\n"; + } + } + } + + return result; +} + +/* Our own little c preprocessor that replaces #includes with the file + * contents, to work around issue of OpenCL drivers not supporting + * include paths with spaces in them. + */ +static string path_source_replace_includes_recursive(const string &_source, + const string &source_filepath, + SourceReplaceState *state) +{ + const string *psource = &_source; + string source_new; + + auto pragma_once = _source.find("#pragma once"); + if (pragma_once != string::npos) { + if (state->pragma_onced.find(source_filepath) != state->pragma_onced.end()) { + return ""; + } + state->pragma_onced.insert(source_filepath); + + // "#pragma once" + // "//prgma once" + source_new = _source; + memcpy(source_new.data() + pragma_once, "//pr", 4); + psource = &source_new; + } + + /* Try to re-use processed file without spending time on replacing all + * include directives again. + */ + SourceReplaceState::ProcessedMapping::iterator replaced_file = state->processed_files.find( + source_filepath); + if (replaced_file != state->processed_files.end()) { + return replaced_file->second; + } + + const string &source = *psource; + + /* Perform full file processing. */ + string result = ""; + const size_t source_length = source.length(); + size_t index = 0; + /* Information about where we are in the source. */ + size_t line_number = 0, column_number = 1; + /* Currently gathered non-preprocessor token. + * Store as start/length rather than token itself to avoid overhead of + * memory re-allocations on each character concatenation. + */ + size_t token_start = 0, token_length = 0; + /* Denotes whether we're inside of preprocessor line, together with + * preprocessor line itself. + * + * TODO(sergey): Investigate whether using token start/end position + * gives measurable speedup. + */ + bool inside_preprocessor = false; + string preprocessor_line = ""; + /* Actual loop over the whole source. */ + while (index < source_length) { + char ch = source[index]; + + if (ch == '\n') { + if (inside_preprocessor) { + string block = path_source_handle_preprocessor(preprocessor_line, source_filepath, state); + + if (!block.empty()) { + result += block; + } + + /* Start gathering net part of the token. */ + token_start = index; + token_length = 0; + inside_preprocessor = false; + preprocessor_line = ""; + } + column_number = 0; + ++line_number; + } + else if (ch == '#' && column_number == 1 && !inside_preprocessor) { + /* Append all possible non-preprocessor token to the result. */ + if (token_length != 0) { + result.append(source, token_start, token_length); + token_start = index; + token_length = 0; + } + inside_preprocessor = true; + } + + if (inside_preprocessor) { + preprocessor_line += ch; + } + else { + ++token_length; + } + ++index; + ++column_number; + } + /* Append possible tokens which happened before special events handled + * above. + */ + if (token_length != 0) { + result.append(source, token_start, token_length); + } + if (inside_preprocessor) { + result += path_source_handle_preprocessor(preprocessor_line, source_filepath, state); + } + /* Store result for further reuse. */ + state->processed_files[source_filepath] = result; + return result; +} + +string path_source_replace_includes(const string &source, const string &path) +{ + SourceReplaceState state; + state.base = path; + return path_source_replace_includes_recursive(source, path, &state); +} + FILE *path_fopen(const string &path, const string &mode) { #ifdef _WIN32 diff --git a/intern/cycles/util/path.h b/intern/cycles/util/path.h index a1394555302..7ec5ed60d7f 100644 --- a/intern/cycles/util/path.h +++ b/intern/cycles/util/path.h @@ -66,6 +66,9 @@ bool path_read_text(const string &path, string &text); /* File manipulation. */ bool path_remove(const string &path); +/* source code utility */ +string path_source_replace_includes(const string &source, const string &path); + /* cache utility */ void path_cache_clear_except(const string &name, const set<string> &except); diff --git a/intern/cycles/util/transform.h b/intern/cycles/util/transform.h index 1d78dfd1385..84827cf6ba5 100644 --- a/intern/cycles/util/transform.h +++ b/intern/cycles/util/transform.h @@ -366,11 +366,11 @@ ccl_device_inline Transform transform_empty() ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t) { - /* Optix is using lerp to interpolate motion transformations. */ -#ifdef __KERNEL_OPTIX__ + /* Optix and MetalRT are using lerp to interpolate motion transformations. */ +#if defined(__KERNEL_GPU_RAYTRACING__) return normalize((1.0f - t) * q1 + t * q2); -#else /* __KERNEL_OPTIX__ */ - /* note: this does not ensure rotation around shortest angle, q1 and q2 +#else /* defined(__KERNEL_GPU_RAYTRACING__) */ + /* NOTE: this does not ensure rotation around shortest angle, q1 and q2 * are assumed to be matched already in transform_motion_decompose */ float costheta = dot(q1, q2); @@ -387,7 +387,7 @@ ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t) float thetap = theta * t; return q1 * cosf(thetap) + qperp * sinf(thetap); } -#endif /* __KERNEL_OPTIX__ */ +#endif /* defined(__KERNEL_GPU_RAYTRACING__) */ } ccl_device_inline Transform transform_quick_inverse(Transform M) |