/*
 * ***** BEGIN GPL LICENSE BLOCK *****
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version. 
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 * The Original Code is Copyright (C) 2006 Blender Foundation.
 * All rights reserved.
 *
 * The Original Code is: all of this file.
 *
 * Contributor(s): Brecht Van Lommel.
 *
 * ***** END GPL LICENSE BLOCK *****
 */

/** \file blender/gpu/intern/gpu_material.c
 *  \ingroup gpu
 *
 * Manages materials, lights and textures.
 */

#include <math.h>
#include <string.h>

#include "MEM_guardedalloc.h"

#include "DNA_lamp_types.h"
#include "DNA_material_types.h"
#include "DNA_object_types.h"
#include "DNA_scene_types.h"
#include "DNA_world_types.h"

#include "BLI_math.h"
#include "BLI_blenlib.h"
#include "BLI_utildefines.h"
#include "BLI_rand.h"

#include "BKE_anim.h"
#include "BKE_colorband.h"
#include "BKE_colortools.h"
#include "BKE_global.h"
#include "BKE_image.h"
#include "BKE_layer.h"
#include "BKE_main.h"
#include "BKE_node.h"
#include "BKE_scene.h"

#include "IMB_imbuf_types.h"

#include "GPU_extensions.h"
#include "GPU_framebuffer.h"
#include "GPU_material.h"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "GPU_uniformbuffer.h"

#include "DRW_engine.h"

#include "gpu_codegen.h"

#ifdef WITH_OPENSUBDIV
#  include "BKE_DerivedMesh.h"
#endif

/* Structs */

struct GPUMaterial {
	Scene *scene; /* DEPRECATED was only usefull for lamps */
	Material *ma;

	/* material for mesh surface, worlds or something else.
	 * some code generation is done differently depending on the use case */
	int type; /* DEPRECATED */
	GPUMaterialStatus status;

	const void *engine_type;   /* attached engine type */
	int options;    /* to identify shader variations (shadow, probe, world background...) */
	
	/* for creating the material */
	ListBase nodes;
	GPUNodeLink *outlink;

	/* for binding the material */
	GPUPass *pass;
	ListBase inputs;  /* GPUInput */
	GPUVertexAttribs attribs;
	int builtins;
	int alpha, obcolalpha;
	int dynproperty;

	/* for passing uniforms */
	int viewmatloc, invviewmatloc;
	int obmatloc, invobmatloc;
	int localtoviewmatloc, invlocaltoviewmatloc;
	int obcolloc, obautobumpscaleloc;
	int cameratexcofacloc;

	int partscalarpropsloc;
	int partcoloc;
	int partvel;
	int partangvel;

	int objectinfoloc;

	bool is_opensubdiv;

	/* XXX: Should be in Material. But it depends on the output node
	 * used and since the output selection is difference for GPUMaterial...
	 */
	int domain;

	/* Used by 2.8 pipeline */
	GPUUniformBuffer *ubo; /* UBOs for shader uniforms. */

	/* Eevee SSS */
	GPUUniformBuffer *sss_profile; /* UBO containing SSS profile. */
	GPUTexture *sss_tex_profile; /* Texture containing SSS profile. */
	float *sss_radii; /* UBO containing SSS profile. */
	int sss_samples;
	short int *sss_falloff;
	float *sss_sharpness;
	bool sss_dirty;
};

enum {
	GPU_DOMAIN_SURFACE    = (1 << 0),
	GPU_DOMAIN_VOLUME     = (1 << 1),
	GPU_DOMAIN_SSS        = (1 << 2)
};

/* Functions */

void GPU_material_free(ListBase *gpumaterial)
{
	for (LinkData *link = gpumaterial->first; link; link = link->next) {
		GPUMaterial *material = link->data;

		/* Cancel / wait any pending lazy compilation. */
		DRW_deferred_shader_remove(material);

		GPU_pass_free_nodes(&material->nodes);
		GPU_inputs_free(&material->inputs);

		if (material->pass)
			GPU_pass_release(material->pass);

		if (material->ubo != NULL) {
			GPU_uniformbuffer_free(material->ubo);
		}

		if (material->sss_tex_profile != NULL) {
			GPU_texture_free(material->sss_tex_profile);
		}

		if (material->sss_profile != NULL) {
			GPU_uniformbuffer_free(material->sss_profile);
		}

		MEM_freeN(material);
	}

	BLI_freelistN(gpumaterial);
}

GPUBuiltin GPU_get_material_builtins(GPUMaterial *material)
{
	return material->builtins;
}

Scene *GPU_material_scene(GPUMaterial *material)
{
	return material->scene;
}

GPUMatType GPU_Material_get_type(GPUMaterial *material)
{
	return material->type;
}

GPUPass *GPU_material_get_pass(GPUMaterial *material)
{
	return material->pass;
}

ListBase *GPU_material_get_inputs(GPUMaterial *material)
{
	return &material->inputs;
}

GPUUniformBuffer *GPU_material_get_uniform_buffer(GPUMaterial *material)
{
	return material->ubo;
}

/**
 * Create dynamic UBO from parameters
 * \param ListBase of BLI_genericNodeN(GPUInput)
 */
void GPU_material_create_uniform_buffer(GPUMaterial *material, ListBase *inputs)
{
	material->ubo = GPU_uniformbuffer_dynamic_create(inputs, NULL);
}

void GPU_material_uniform_buffer_tag_dirty(ListBase *gpumaterials)
{
	for (LinkData *link = gpumaterials->first; link; link = link->next) {
		GPUMaterial *material = link->data;
		if (material->ubo != NULL) {
			GPU_uniformbuffer_tag_dirty(material->ubo);
		}
		if (material->sss_profile != NULL) {
			material->sss_dirty = true;
		}
	}
}

/* Eevee Subsurface scattering. */
/* Based on Separable SSS. by Jorge Jimenez and Diego Gutierrez */

#define SSS_SAMPLES 65
#define SSS_EXPONENT 2.0f /* Importance sampling exponent */

typedef struct GPUSssKernelData {
	float kernel[SSS_SAMPLES][4];
	float param[3], max_radius;
	int samples;
} GPUSssKernelData;

static void sss_calculate_offsets(GPUSssKernelData *kd, int count, float exponent)
{
	float step = 2.0f / (float)(count - 1);
	for (int i = 0; i < count; i++) {
		float o = ((float)i) * step - 1.0f;
		float sign = (o < 0.0f) ? -1.0f : 1.0f;
		float ofs = sign * fabsf(powf(o, exponent));
		kd->kernel[i][3] = ofs;
	}
}

#define GAUSS_TRUNCATE 12.46f
static float gaussian_profile(float r, float radius)
{
	const float v = radius * radius * (0.25f * 0.25f);
	const float Rm = sqrtf(v * GAUSS_TRUNCATE);

	if (r >= Rm) {
		return 0.0f;
	}
	return expf(-r * r / (2.0f * v)) / (2.0f * M_PI * v);
}

#define BURLEY_TRUNCATE     16.0f
#define BURLEY_TRUNCATE_CDF 0.9963790093708328f // cdf(BURLEY_TRUNCATE)
static float burley_profile(float r, float d)
{
	float exp_r_3_d = expf(-r / (3.0f * d));
	float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d;
	return (exp_r_d + exp_r_3_d) / (4.0f * d);
}

static float cubic_profile(float r, float radius, float sharpness)
{
	float Rm = radius * (1.0f + sharpness);

	if (r >= Rm) {
		return 0.0f;
	}
	/* custom variation with extra sharpness, to match the previous code */
	const float y = 1.0f / (1.0f + sharpness);
	float Rmy, ry, ryinv;

	Rmy = powf(Rm, y);
	ry = powf(r, y);
	ryinv = (r > 0.0f) ? powf(r, y - 1.0f) : 0.0f;

	const float Rmy5 = (Rmy * Rmy) * (Rmy * Rmy) * Rmy;
	const float f = Rmy - ry;
	const float num = f * (f * f) * (y * ryinv);

	return (10.0f * num) / (Rmy5 * M_PI);
}

static float eval_profile(float r, short falloff_type, float sharpness, float param)
{
	r = fabsf(r);

	if (falloff_type == SHD_SUBSURFACE_BURLEY ||
	    falloff_type == SHD_SUBSURFACE_RANDOM_WALK)
	{
		return burley_profile(r, param) / BURLEY_TRUNCATE_CDF;
	}
	else if (falloff_type == SHD_SUBSURFACE_CUBIC) {
		return cubic_profile(r, param, sharpness);
	}
	else {
		return gaussian_profile(r, param);
	}
}

/* Resolution for each sample of the precomputed kernel profile */
#define INTEGRAL_RESOLUTION 32
static float eval_integral(float x0, float x1, short falloff_type, float sharpness, float param)
{
	const float range = x1 - x0;
	const float step = range / INTEGRAL_RESOLUTION;
	float integral = 0.0f;

	for (int i = 0; i < INTEGRAL_RESOLUTION; ++i) {
		float x = x0 + range * ((float)i + 0.5f) / (float)INTEGRAL_RESOLUTION;
		float y = eval_profile(x, falloff_type, sharpness, param);
		integral += y * step;
	}

	return integral;
}
#undef INTEGRAL_RESOLUTION

static void compute_sss_kernel(
        GPUSssKernelData *kd, float *radii, int sample_ct, int falloff_type, float sharpness)
{
	float rad[3];
	/* Minimum radius */
	rad[0] = MAX2(radii[0], 1e-15f);
	rad[1] = MAX2(radii[1], 1e-15f);
	rad[2] = MAX2(radii[2], 1e-15f);

	/* Christensen-Burley fitting */
	float l[3], d[3];

	if (falloff_type == SHD_SUBSURFACE_BURLEY ||
	    falloff_type == SHD_SUBSURFACE_RANDOM_WALK)
	{
		mul_v3_v3fl(l, rad, 0.25f * M_1_PI);
		const float A = 1.0f;
		const float s = 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f);
		/* XXX 0.6f Out of nowhere to match cycles! Empirical! Can be tweak better. */
		mul_v3_v3fl(d, l, 0.6f / s);
		mul_v3_v3fl(rad, d, BURLEY_TRUNCATE);
		kd->max_radius = MAX3(rad[0], rad[1], rad[2]);

		copy_v3_v3(kd->param, d);
	}
	else if (falloff_type == SHD_SUBSURFACE_CUBIC) {
		copy_v3_v3(kd->param, rad);
		mul_v3_fl(rad, 1.0f + sharpness);
		kd->max_radius = MAX3(rad[0], rad[1], rad[2]);
	}
	else {
		kd->max_radius = MAX3(rad[0], rad[1], rad[2]);

		copy_v3_v3(kd->param, rad);
	}

	/* Compute samples locations on the 1d kernel [-1..1] */
	sss_calculate_offsets(kd, sample_ct, SSS_EXPONENT);

	/* Weights sum for normalization */
	float sum[3] = {0.0f, 0.0f, 0.0f};

	/* Compute integral of each sample footprint */
	for (int i = 0; i < sample_ct; i++) {
		float x0, x1;

		if (i == 0) {
			x0 = kd->kernel[0][3] - fabsf(kd->kernel[0][3] - kd->kernel[1][3]) / 2.0f;
		}
		else {
			x0 = (kd->kernel[i - 1][3] + kd->kernel[i][3]) / 2.0f;
		}

		if (i == sample_ct - 1) {
			x1 = kd->kernel[sample_ct - 1][3] + fabsf(kd->kernel[sample_ct - 2][3] - kd->kernel[sample_ct - 1][3]) / 2.0f;
		}
		else {
			x1 = (kd->kernel[i][3] + kd->kernel[i + 1][3]) / 2.0f;
		}

		x0 *= kd->max_radius;
		x1 *= kd->max_radius;

		kd->kernel[i][0] = eval_integral(x0, x1, falloff_type, sharpness, kd->param[0]);
		kd->kernel[i][1] = eval_integral(x0, x1, falloff_type, sharpness, kd->param[1]);
		kd->kernel[i][2] = eval_integral(x0, x1, falloff_type, sharpness, kd->param[2]);

		sum[0] += kd->kernel[i][0];
		sum[1] += kd->kernel[i][1];
		sum[2] += kd->kernel[i][2];
	}

	for (int i = 0; i < 3; ++i) {
		if (sum[i] > 0.0f) {
			/* Normalize */
			for (int j = 0; j < sample_ct; j++) {
				kd->kernel[j][i] /= sum[i];
			}
		}
		else {
			/* Avoid 0 kernel sum. */
			kd->kernel[sample_ct / 2][i] = 1.0f;
		}
	}

	/* Put center sample at the start of the array (to sample first) */
	float tmpv[4];
	copy_v4_v4(tmpv, kd->kernel[sample_ct / 2]);
	for (int i = sample_ct / 2; i > 0; i--) {
		copy_v4_v4(kd->kernel[i], kd->kernel[i - 1]);
	}
	copy_v4_v4(kd->kernel[0], tmpv);

	kd->samples = sample_ct;
}

#define INTEGRAL_RESOLUTION 512
static void compute_sss_translucence_kernel(
        const GPUSssKernelData *kd, int resolution, short falloff_type, float sharpness, float **output)
{
	float (*texels)[4];
	texels = MEM_callocN(sizeof(float) * 4 * resolution, "compute_sss_translucence_kernel");
	*output = (float *)texels;

	/* Last texel should be black, hence the - 1. */
	for (int i = 0; i < resolution - 1; ++i) {
		/* Distance from surface. */
		float d = kd->max_radius * ((float)i + 0.00001f) / ((float)resolution);

		/* For each distance d we compute the radiance incomming from an hypothetic parallel plane. */
		/* Compute radius of the footprint on the hypothetic plane */
		float r_fp = sqrtf(kd->max_radius * kd->max_radius - d * d);
		float r_step = r_fp / INTEGRAL_RESOLUTION;
		float area_accum = 0.0f;
		for (float r = 0.0f; r < r_fp; r += r_step) {
			/* Compute distance to the "shading" point through the medium. */
			/* r_step * 0.5f to put sample between the area borders */
			float dist = hypotf(r + r_step * 0.5f, d);

			float profile[3];
			profile[0] = eval_profile(dist, falloff_type, sharpness, kd->param[0]);
			profile[1] = eval_profile(dist, falloff_type, sharpness, kd->param[1]);
			profile[2] = eval_profile(dist, falloff_type, sharpness, kd->param[2]);

			/* Since the profile and configuration are radially symetrical we
			 * can just evaluate it once and weight it accordingly */
			float r_next = r + r_step;
			float disk_area = (M_PI * r_next * r_next) - (M_PI * r * r);

			mul_v3_fl(profile, disk_area);
			add_v3_v3(texels[i], profile);
			area_accum += disk_area;
		}
		/* Normalize over the disk. */
		mul_v3_fl(texels[i], 1.0f / (area_accum));
	}

	/* Normalize */
	for (int j = resolution - 2; j > 0; j--) {
		texels[j][0] /= (texels[0][0] > 0.0f) ? texels[0][0] : 1.0f;
		texels[j][1] /= (texels[0][1] > 0.0f) ? texels[0][1] : 1.0f;
		texels[j][2] /= (texels[0][2] > 0.0f) ? texels[0][2] : 1.0f;
	}

	/* First texel should be white */
	texels[0][0] = (texels[0][0] > 0.0f) ? 1.0f : 0.0f;
	texels[0][1] = (texels[0][1] > 0.0f) ? 1.0f : 0.0f;
	texels[0][2] = (texels[0][2] > 0.0f) ? 1.0f : 0.0f;

	/* dim the last few texels for smoother transition */
	mul_v3_fl(texels[resolution - 2], 0.25f);
	mul_v3_fl(texels[resolution - 3], 0.5f);
	mul_v3_fl(texels[resolution - 4], 0.75f);
}
#undef INTEGRAL_RESOLUTION

void GPU_material_sss_profile_create(GPUMaterial *material, float *radii, short *falloff_type, float *sharpness)
{
	material->sss_radii = radii;
	material->sss_falloff = falloff_type;
	material->sss_sharpness = sharpness;
	material->sss_dirty = true;

	/* Update / Create UBO */
	if (material->sss_profile == NULL) {
		material->sss_profile = GPU_uniformbuffer_create(sizeof(GPUSssKernelData), NULL, NULL);
	}
}

struct GPUUniformBuffer *GPU_material_sss_profile_get(GPUMaterial *material, int sample_ct, GPUTexture **tex_profile)
{
	if (material->sss_radii == NULL)
		return NULL;

	if (material->sss_dirty || (material->sss_samples != sample_ct)) {
		GPUSssKernelData kd;

		float sharpness = (material->sss_sharpness != NULL) ? *material->sss_sharpness : 0.0f;

		/* XXX Black magic but it seems to fit. Maybe because we integrate -1..1 */
		sharpness *= 0.5f;

		compute_sss_kernel(&kd, material->sss_radii, sample_ct, *material->sss_falloff, sharpness);

		/* Update / Create UBO */
		GPU_uniformbuffer_update(material->sss_profile, &kd);

		/* Update / Create Tex */
		float *translucence_profile;
		compute_sss_translucence_kernel(&kd, 64, *material->sss_falloff, sharpness, &translucence_profile);

		if (material->sss_tex_profile != NULL) {
			GPU_texture_free(material->sss_tex_profile);
		}

		material->sss_tex_profile = GPU_texture_create_1D(64, GPU_RGBA16F, translucence_profile, NULL);

		MEM_freeN(translucence_profile);

		material->sss_samples = sample_ct;
		material->sss_dirty = false;
	}

	if (tex_profile != NULL) {
		*tex_profile = material->sss_tex_profile;
	}
	return material->sss_profile;
}

#undef SSS_EXPONENT
#undef SSS_SAMPLES

void GPU_material_vertex_attributes(GPUMaterial *material, GPUVertexAttribs *attribs)
{
	*attribs = material->attribs;
}

void GPU_material_output_link(GPUMaterial *material, GPUNodeLink *link)
{
	if (!material->outlink)
		material->outlink = link;
}

void gpu_material_add_node(GPUMaterial *material, GPUNode *node)
{
	BLI_addtail(&material->nodes, node);
}

/* Return true if the material compilation has not yet begin or begin. */
GPUMaterialStatus GPU_material_status(GPUMaterial *mat)
{
	return mat->status;
}

/* Code generation */

bool GPU_material_do_color_management(GPUMaterial *mat)
{
	if (!BKE_scene_check_color_management_enabled(mat->scene))
		return false;

	return true;
}

bool GPU_material_use_domain_surface(GPUMaterial *mat)
{
	return (mat->domain & GPU_DOMAIN_SURFACE);
}

bool GPU_material_use_domain_volume(GPUMaterial *mat)
{
	return (mat->domain & GPU_DOMAIN_VOLUME);
}

GPUMaterial *GPU_material_from_nodetree_find(
        ListBase *gpumaterials, const void *engine_type, int options)
{
	for (LinkData *link = gpumaterials->first; link; link = link->next) {
		GPUMaterial *current_material = (GPUMaterial *)link->data;
		if (current_material->engine_type == engine_type &&
		    current_material->options == options)
		{
			return current_material;
		}
	}

	return NULL;
}

/**
 * \note Caller must use #GPU_material_from_nodetree_find to re-use existing materials,
 * This is enforced since constructing other arguments to this function may be expensive
 * so only do this when they are needed.
 */
GPUMaterial *GPU_material_from_nodetree(
        Scene *scene, struct bNodeTree *ntree, ListBase *gpumaterials, const void *engine_type, int options)
{
	LinkData *link;
	bool has_volume_output, has_surface_output;

	/* Caller must re-use materials. */
	BLI_assert(GPU_material_from_nodetree_find(gpumaterials, engine_type, options) == NULL);

	/* allocate material */
	GPUMaterial *mat = MEM_callocN(sizeof(GPUMaterial), "GPUMaterial");;
	mat->scene = scene;
	mat->engine_type = engine_type;
	mat->options = options;

	ntreeGPUMaterialNodes(ntree, mat, NODE_NEW_SHADING | NODE_NEWER_SHADING);
	ntreeGPUMaterialDomain(ntree, &has_surface_output, &has_volume_output);

	if (has_surface_output) {
		mat->domain |= GPU_DOMAIN_SURFACE;
	}
	if (has_volume_output) {
		mat->domain |= GPU_DOMAIN_VOLUME;
	}

	if (mat->outlink) {
		/* Prune the unused nodes and extract attribs before compiling so the
		 * generated VBOs are ready to accept the future shader. */
		GPU_nodes_prune(&mat->nodes, mat->outlink);
		GPU_nodes_get_vertex_attributes(&mat->nodes, &mat->attribs);
		mat->status = GPU_MAT_QUEUED;
	}

	/* note that even if building the shader fails in some way, we still keep
	 * it to avoid trying to compile again and again, and simple do not use
	 * the actual shader on drawing */

	link = MEM_callocN(sizeof(LinkData), "GPUMaterialLink");
	link->data = mat;
	BLI_addtail(gpumaterials, link);

	return mat;
}

void GPU_material_generate_pass(
        GPUMaterial *mat, const char *vert_code, const char *geom_code, const char *frag_lib, const char *defines)
{
	BLI_assert(mat->pass == NULL); /* Only run once! */
	if (mat->outlink) {
		mat->pass = GPU_generate_pass_new(
		        mat, mat->outlink, &mat->attribs, &mat->nodes, &mat->inputs, vert_code, geom_code, frag_lib, defines);
		mat->status = (mat->pass) ? GPU_MAT_SUCCESS : GPU_MAT_FAILED;
	}
	else {
		mat->status = GPU_MAT_FAILED;
	}
}

void GPU_materials_free(void)
{
	Material *ma;
	World *wo;
	extern Material defmaterial;

	for (ma = G.main->mat.first; ma; ma = ma->id.next)
		GPU_material_free(&ma->gpumaterial);

	for (wo = G.main->world.first; wo; wo = wo->id.next)
		GPU_material_free(&wo->gpumaterial);
	
	GPU_material_free(&defmaterial.gpumaterial);
}