/*
 * Copyright 2011-2013 Blender Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "kernel/kernel_jitter.h"

CCL_NAMESPACE_BEGIN

#ifdef __SOBOL__

/* Skip initial numbers that are not as well distributed, especially the
 * first sequence is just 0 everywhere, which can be problematic for e.g.
 * path termination.
 */
#define SOBOL_SKIP 64

/* High Dimensional Sobol. */

/* Van der Corput radical inverse. */
ccl_device uint van_der_corput(uint bits)
{
	bits = (bits << 16) | (bits >> 16);
	bits = ((bits & 0x00ff00ff) << 8) | ((bits & 0xff00ff00) >> 8);
	bits = ((bits & 0x0f0f0f0f) << 4) | ((bits & 0xf0f0f0f0) >> 4);
	bits = ((bits & 0x33333333) << 2) | ((bits & 0xcccccccc) >> 2);
	bits = ((bits & 0x55555555) << 1) | ((bits & 0xaaaaaaaa) >> 1);
	return bits;
}

/* Sobol radical inverse. */
ccl_device uint sobol(uint i)
{
	uint r = 0;
	for(uint v = 1U << 31; i; i >>= 1, v ^= v >> 1) {
		if(i & 1) {
			r ^= v;
		}
	}
	return r;
}

/* Inverse of sobol radical inverse. */
ccl_device uint sobol_inverse(uint i)
{
	const uint msb = 1U << 31;
	uint r = 0;
	for(uint v = 1; i; i <<= 1, v ^= v << 1) {
		if(i & msb) {
			r ^= v;
		}
	}
	return r;
}

/* Multidimensional sobol with generator matrices
 * dimension 0 and 1 are equal to van_der_corput() and sobol() respectively.
 */
ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension)
{
	uint result = 0;
	uint i = index;
	for(uint j = 0; i; i >>= 1, j++) {
		if(i & 1) {
			result ^= kernel_tex_fetch(__sobol_directions, 32*dimension + j);
		}
	}
	return result;
}

/* Lookup index and x/y coordinate, assumes m is a power of two. */
ccl_device uint sobol_lookup(const uint m,
                             const uint frame,
                             const uint ex,
                             const uint ey,
                             uint *x, uint *y)
{
	/* Shift is constant per frame. */
	const uint shift = frame << (m << 1);
	const uint sobol_shift = sobol(shift);
	/* Van der Corput is its own inverse. */
	const uint lower = van_der_corput(ex << (32 - m));
	/* Need to compensate for ey difference and shift. */
	const uint sobol_lower = sobol(lower);
	const uint mask = ~-(1 << m) << (32 - m);  /* Only m upper bits. */
	const uint delta = ((ey << (32 - m)) ^ sobol_lower ^ sobol_shift) & mask;
	/* Only use m upper bits for the index (m is a power of two). */
	const uint sobol_result = delta | (delta >> m);
	const uint upper = sobol_inverse(sobol_result);
	const uint index = shift | upper | lower;
	*x = van_der_corput(index);
	*y = sobol_shift ^ sobol_result ^ sobol_lower;
	return index;
}

ccl_device_forceinline float path_rng_1D(KernelGlobals *kg,
                                         RNG *rng,
                                         int sample, int num_samples,
                                         int dimension)
{
#ifdef __CMJ__
	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
		/* Correlated multi-jitter. */
		int p = *rng + dimension;
		return cmj_sample_1D(sample, num_samples, p);
	}
#endif

#ifdef __SOBOL_FULL_SCREEN__
	uint result = sobol_dimension(kg, *rng, dimension);
	float r = (float)result * (1.0f/(float)0xFFFFFFFF);
	return r;
#else
	/* Compute sobol sequence value using direction vectors. */
	uint result = sobol_dimension(kg, sample + SOBOL_SKIP, dimension);
	float r = (float)result * (1.0f/(float)0xFFFFFFFF);

	/* Cranly-Patterson rotation using rng seed */
	float shift;

	/* Hash rng with dimension to solve correlation issues.
	 * See T38710, T50116.
	 */
	RNG tmp_rng = cmj_hash_simple(dimension, *rng);
	shift = tmp_rng * (1.0f/(float)0xFFFFFFFF);

	return r + shift - floorf(r + shift);
#endif
}

ccl_device_forceinline void path_rng_2D(KernelGlobals *kg,
                                        RNG *rng,
                                        int sample, int num_samples,
                                        int dimension,
                                        float *fx, float *fy)
{
#ifdef __CMJ__
	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
		/* Correlated multi-jitter. */
		int p = *rng + dimension;
		cmj_sample_2D(sample, num_samples, p, fx, fy);
	}
	else
#endif
	{
		/* Sobol. */
		*fx = path_rng_1D(kg, rng, sample, num_samples, dimension);
		*fy = path_rng_1D(kg, rng, sample, num_samples, dimension + 1);
	}
}

ccl_device_inline void path_rng_init(KernelGlobals *kg,
                                     ccl_global uint *rng_state,
                                     int sample, int num_samples,
                                     RNG *rng,
                                     int x, int y,
                                     float *fx, float *fy)
{
#ifdef __SOBOL_FULL_SCREEN__
	uint px, py;
	uint bits = 16; /* limits us to 65536x65536 and 65536 samples */
	uint size = 1 << bits;
	uint frame = sample;

	*rng = sobol_lookup(bits, frame, x, y, &px, &py);

	*rng ^= kernel_data.integrator.seed;

	if(sample == 0) {
		*fx = 0.5f;
		*fy = 0.5f;
	}
	else {
		*fx = size * (float)px * (1.0f/(float)0xFFFFFFFF) - x;
		*fy = size * (float)py * (1.0f/(float)0xFFFFFFFF) - y;
	}
#else
	*rng = *rng_state;

	*rng ^= kernel_data.integrator.seed;

	if(sample == 0) {
		*fx = 0.5f;
		*fy = 0.5f;
	}
	else {
		path_rng_2D(kg, rng, sample, num_samples, PRNG_FILTER_U, fx, fy);
	}
#endif
}

ccl_device void path_rng_end(KernelGlobals *kg,
                             ccl_global uint *rng_state,
                             RNG rng)
{
	/* nothing to do */
}

#else  /* __SOBOL__ */

/* Linear Congruential Generator */

ccl_device_forceinline float path_rng_1D(KernelGlobals *kg,
                                         RNG *rng,
                                         int sample, int num_samples,
                                         int dimension)
{
	/* implicit mod 2^32 */
	*rng = (1103515245*(*rng) + 12345);
	return (float)*rng * (1.0f/(float)0xFFFFFFFF);
}

ccl_device_inline void path_rng_2D(KernelGlobals *kg,
                                   RNG *rng,
                                   int sample, int num_samples,
                                   int dimension,
                                   float *fx, float *fy)
{
	*fx = path_rng_1D(kg, rng, sample, num_samples, dimension);
	*fy = path_rng_1D(kg, rng, sample, num_samples, dimension + 1);
}

ccl_device void path_rng_init(KernelGlobals *kg,
                              ccl_global uint *rng_state,
                              int sample, int num_samples,
                              RNG *rng,
                              int x, int y,
                              float *fx, float *fy)
{
	/* load state */
	*rng = *rng_state;

	*rng ^= kernel_data.integrator.seed;

	if(sample == 0) {
		*fx = 0.5f;
		*fy = 0.5f;
	}
	else {
		path_rng_2D(kg, rng, sample, num_samples, PRNG_FILTER_U, fx, fy);
	}
}

ccl_device void path_rng_end(KernelGlobals *kg,
                             ccl_global uint *rng_state,
                             RNG rng)
{
	/* store state for next sample */
	*rng_state = rng;
}

#endif  /* __SOBOL__ */

/* Linear Congruential Generator */

ccl_device uint lcg_step_uint(uint *rng)
{
	/* implicit mod 2^32 */
	*rng = (1103515245*(*rng) + 12345);
	return *rng;
}

ccl_device float lcg_step_float(uint *rng)
{
	/* implicit mod 2^32 */
	*rng = (1103515245*(*rng) + 12345);
	return (float)*rng * (1.0f/(float)0xFFFFFFFF);
}

ccl_device uint lcg_init(uint seed)
{
	uint rng = seed;
	lcg_step_uint(&rng);
	return rng;
}

/* Path Tracing Utility Functions
 *
 * For each random number in each step of the path we must have a unique
 * dimension to avoid using the same sequence twice.
 *
 * For branches in the path we must be careful not to reuse the same number
 * in a sequence and offset accordingly.
 */

ccl_device_inline float path_state_rng_1D(KernelGlobals *kg,
                                          RNG *rng,
                                          const ccl_addr_space PathState *state,
                                          int dimension)
{
	return path_rng_1D(kg,
	                   rng,
	                   state->sample, state->num_samples,
	                   state->rng_offset + dimension);
}

ccl_device_inline float path_state_rng_1D_for_decision(
        KernelGlobals *kg,
        RNG *rng,
        const ccl_addr_space PathState *state,
        int dimension)
{
	/* The rng_offset is not increased for transparent bounces. if we do then
	 * fully transparent objects can become subtly visible by the different
	 * sampling patterns used where the transparent object is.
	 *
	 * however for some random numbers that will determine if we next bounce
	 * is transparent we do need to increase the offset to avoid always making
	 * the same decision. */
	const int rng_offset = state->rng_offset + state->transparent_bounce * PRNG_BOUNCE_NUM;
	return path_rng_1D(kg,
	                   rng,
	                   state->sample, state->num_samples,
	                   rng_offset + dimension);
}

ccl_device_inline void path_state_rng_2D(KernelGlobals *kg,
                                         RNG *rng,
                                         const ccl_addr_space PathState *state,
                                         int dimension,
                                         float *fx, float *fy)
{
	path_rng_2D(kg,
	            rng,
	            state->sample, state->num_samples,
	            state->rng_offset + dimension,
	            fx, fy);
}

ccl_device_inline float path_branched_rng_1D(
        KernelGlobals *kg,
        RNG *rng,
        const ccl_addr_space PathState *state,
        int branch,
        int num_branches,
        int dimension)
{
	return path_rng_1D(kg,
	                   rng,
	                   state->sample * num_branches + branch,
	                   state->num_samples * num_branches,
	                   state->rng_offset + dimension);
}

ccl_device_inline float path_branched_rng_1D_for_decision(
        KernelGlobals *kg,
        RNG *rng,
        const ccl_addr_space PathState *state,
        int branch,
        int num_branches,
        int dimension)
{
	const int rng_offset = state->rng_offset + state->transparent_bounce * PRNG_BOUNCE_NUM;
	return path_rng_1D(kg,
	                   rng,
	                   state->sample * num_branches + branch,
	                   state->num_samples * num_branches,
	                   rng_offset + dimension);
}

ccl_device_inline void path_branched_rng_2D(
        KernelGlobals *kg,
        RNG *rng,
        const ccl_addr_space PathState *state,
        int branch,
        int num_branches,
        int dimension,
        float *fx, float *fy)
{
	path_rng_2D(kg,
	            rng,
	            state->sample * num_branches + branch,
	            state->num_samples * num_branches,
	            state->rng_offset + dimension,
	            fx, fy);
}

/* Utitility functions to get light termination value,
 * since it might not be needed in many cases.
 */
ccl_device_inline float path_state_rng_light_termination(
        KernelGlobals *kg,
        RNG *rng,
        const ccl_addr_space PathState *state)
{
	if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
		return path_state_rng_1D_for_decision(kg, rng, state, PRNG_LIGHT_TERMINATE);
	}
	return 0.0f;
}

ccl_device_inline float path_branched_rng_light_termination(
        KernelGlobals *kg,
        RNG *rng,
        const ccl_addr_space PathState *state,
        int branch,
        int num_branches)
{
	if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
		return path_branched_rng_1D_for_decision(kg,
		                                         rng,
		                                         state,
		                                         branch,
		                                         num_branches,
		                                         PRNG_LIGHT_TERMINATE);
	}
	return 0.0f;
}

ccl_device_inline void path_state_branch(ccl_addr_space PathState *state,
                                         int branch,
                                         int num_branches)
{
	/* path is splitting into a branch, adjust so that each branch
	 * still gets a unique sample from the same sequence */
	state->rng_offset += PRNG_BOUNCE_NUM;
	state->sample = state->sample*num_branches + branch;
	state->num_samples = state->num_samples*num_branches;
}

ccl_device_inline uint lcg_state_init(RNG *rng,
                                      int rng_offset,
                                      int sample,
                                      uint scramble)
{
	return lcg_init(*rng + rng_offset + sample*scramble);
}

ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
{
	/* Implicit mod 2^32 */
	*rng = (1103515245*(*rng) + 12345);
	return (float)*rng * (1.0f/(float)0xFFFFFFFF);
}

CCL_NAMESPACE_END