Cycles render engine, initial commit. This is the engine itself, blender modifications and build instructions will follow later.

Cycles uses code from some great open source projects, many thanks them: * BVH building and traversal code from NVidia's "Understanding the Efficiency of Ray Traversal on GPUs": http://code.google.com/p/understanding-the-efficiency-of-ray-traversal-on-gpus/ * Open Shading Language for a large part of the shading system: http://code.google.com/p/openshadinglanguage/ * Blender for procedural textures and a few other nodes. * Approximate Catmull Clark subdivision from NVidia Mesh tools: http://code.google.com/p/nvidia-mesh-tools/ * Sobol direction vectors from: http://web.maths.unsw.edu.au/~fkuo/sobol/ * Film response functions from: http://www.cs.columbia.edu/CAVE/software/softlib/dorf.php
author: Ton Roosendaal <ton@blender.org> 2011-04-27 15:58:34 +0400
committer: Ton Roosendaal <ton@blender.org> 2011-04-27 15:58:34 +0400
commit: da376e0237517543aa21740ee2363234ee1c20ae (patch)
tree: 014a513ed8d0eccc5e54fef42347781e85bae56a /intern/cycles/util
parent: 693780074388111e7b9ef1c3825e462f398dc6c4 (diff)
41 files changed, 5012 insertions, 0 deletions
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
new file mode 100644
index 00000000000..3fea6182a97
--- /dev/null
+++ b/intern/cycles/util/CMakeLists.txt
@@ -0,0 +1,47 @@
+
+INCLUDE_DIRECTORIES(.)
+
+SET(sources
+	util_cache.cpp
+	util_cuda.cpp
+	util_dynlib.cpp
+	util_md5.cpp
+	util_path.cpp
+	util_string.cpp
+	util_system.cpp
+	util_time.cpp
+	util_transform.cpp
+	util_view.cpp)
+
+SET(headers
+	util_algorithm.h
+	util_args.h
+	util_boundbox.h
+	util_cache.h
+	util_cuda.h
+	util_debug.h
+	util_dynlib.h
+	util_function.h
+	util_hash.h
+	util_image.h
+	util_list.h
+	util_map.h
+	util_math.h
+	util_md5.h
+	util_opengl.h
+	util_param.h
+	util_path.h
+	util_progress.h
+	util_set.h
+	util_string.h
+	util_system.h
+	util_thread.h
+	util_time.h
+	util_transform.h
+	util_types.h
+	util_view.h
+	util_vector.h
+	util_xml.h)
+
+ADD_LIBRARY(util ${sources} ${headers})
+
diff --git a/intern/cycles/util/util_algorithm.h b/intern/cycles/util/util_algorithm.h
new file mode 100644
index 00000000000..708a2730be7
--- /dev/null
+++ b/intern/cycles/util/util_algorithm.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_ALGORITHM_H__
+#define __UTIL_ALGORITHM_H__
+
+#include <algorithm>
+
+CCL_NAMESPACE_BEGIN
+
+using std::sort;
+using std::swap;
+using std::max;
+using std::min;
+using std::remove;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_ALGORITHM_H__ */
+
diff --git a/intern/cycles/util/util_args.h b/intern/cycles/util/util_args.h
new file mode 100644
index 00000000000..639fd06bead
--- /dev/null
+++ b/intern/cycles/util/util_args.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_ARGS_H__
+#define __UTIL_ARGS_H__
+
+/* Argument Parsing for command line, we use the OpenImageIO
+ * library because it has nice functions to do this. */
+
+#include <OpenImageIO/argparse.h>
+
+CCL_NAMESPACE_BEGIN
+
+OIIO_NAMESPACE_USING
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_ARGS_H__ */
+
diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h
new file mode 100644
index 00000000000..34cc1d6e11c
--- /dev/null
+++ b/intern/cycles/util/util_boundbox.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_BOUNDBOX_H__
+#define __UTIL_BOUNDBOX_H__
+
+#include <float.h>
+
+#include "util_transform.h"
+#include "util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BoundBox
+{
+public:
+	float3 min, max;
+
+	BoundBox(void)
+	{
+		min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX);
+		max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
+	}
+
+	BoundBox(const float3& min_, const float3& max_)
+	: min(min_), max(max_)
+	{
+	}
+
+	void grow(const float3& pt)  
+	{
+		min = ccl::min(min, pt);
+		max = ccl::max(max, pt);
+	}
+
+	void grow(const BoundBox& bbox)
+	{
+		grow(bbox.min);
+		grow(bbox.max);
+	}
+
+	void intersect(const BoundBox& bbox) 
+	{
+		min = ccl::max(min, bbox.min);
+		max = ccl::min(max, bbox.max);
+	}
+
+	float area(void) const
+	{
+		if(!valid())
+			return 0.0f;
+
+		float3 d = max - min;
+		return dot(d, d)*2.0f;
+	}
+
+	bool valid(void) const
+	{
+		return (min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z);
+	}
+
+	BoundBox transformed(const Transform *tfm)
+	{
+		BoundBox result;
+
+		for(int i = 0; i < 8; i++) {
+			float3 p;
+
+			p.x = (i & 1)? min.x: max.x;
+			p.y = (i & 2)? min.y: max.y;
+			p.z = (i & 4)? min.z: max.z;
+
+			result.grow(transform(tfm, p));
+		}
+
+		return result;
+	}
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_BOUNDBOX_H__ */
+
diff --git a/intern/cycles/util/util_cache.cpp b/intern/cycles/util/util_cache.cpp
new file mode 100644
index 00000000000..49a0f62cae8
--- /dev/null
+++ b/intern/cycles/util/util_cache.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+
+#include "util_cache.h"
+#include "util_foreach.h"
+#include "util_md5.h"
+#include "util_path.h"
+#include "util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* CacheData */
+
+CacheData::CacheData(const string& name_)
+{
+	name = name_;
+	f = NULL;
+}
+
+CacheData::~CacheData()
+{
+	if(f)
+		fclose(f);
+}
+
+/* Cache */
+
+Cache Cache::global;
+
+string Cache::data_filename(const CacheData& key)
+{
+	MD5Hash hash;
+
+	foreach(const CacheBuffer& buffer, key.buffers)
+		hash.append((uint8_t*)buffer.data, buffer.size);
+	
+	string fname = key.name + "_" + hash.get_hex();
+	return path_get("cache/" + fname);
+}
+
+void Cache::insert(const CacheData& key, const CacheData& value)
+{
+	string filename = data_filename(key);
+	FILE *f = fopen(filename.c_str(), "wb");
+
+	if(!f) {
+		fprintf(stderr, "Failed to open file %s for writing.\n", filename.c_str());
+		return;
+	}
+
+	foreach(const CacheBuffer& buffer, value.buffers) {
+		if(!fwrite(&buffer.size, sizeof(buffer.size), 1, f))
+			fprintf(stderr, "Failed to write to file %s.\n", filename.c_str());
+		if(!fwrite(buffer.data, buffer.size, 1, f))
+			fprintf(stderr, "Failed to write to file %s.\n", filename.c_str());
+	}
+	
+	fclose(f);
+}
+
+bool Cache::lookup(const CacheData& key, CacheData& value)
+{
+	string filename = data_filename(key);
+	FILE *f = fopen(filename.c_str(), "rb");
+
+	if(!f)
+		return false;
+	
+	value.name = key.name;
+	value.f = f;
+
+	return true;
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/util/util_cache.h b/intern/cycles/util/util_cache.h
new file mode 100644
index 00000000000..25b1f2e7a51
--- /dev/null
+++ b/intern/cycles/util/util_cache.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_CACHE_H__
+#define __UTIL_CACHE_H__
+
+/* Disk Cache based on Hashing
+ *
+ * To be used to cache expensive computations. The hash key is created from an
+ * arbitrary number of bytes, by hashing the bytes using MD5, which then gives
+ * the file name containing the data. This data then is read from the file
+ * again into the appropriate data structures.
+ *
+ * This way we do not need to accurately track changes, compare dates and
+ * invalidate cache entries, at the cost of exta computation. If everything
+ * is stored in a global cache, computations can perhaps even be shared between
+ * different scenes where it may be hard to detect duplicate work.
+ */
+
+#include "util_string.h"
+#include "util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class CacheBuffer {
+public:
+	const void *data;
+	size_t size;
+
+	CacheBuffer(const void *data_, size_t size_)
+	{ data = data_; size = size_; }
+};
+
+class CacheData {
+public:
+	vector<CacheBuffer> buffers;
+	string name;
+	FILE *f;
+
+	CacheData(const string& name = "");
+	~CacheData();
+
+	template<typename T> void add(const vector<T>& data)
+	{
+		CacheBuffer buffer(&data[0], data.size()*sizeof(T));
+		buffers.push_back(buffer);
+	}
+
+	template<typename T> void add(const array<T>& data)
+	{
+		CacheBuffer buffer(&data[0], data.size()*sizeof(T));
+		buffers.push_back(buffer);
+	}
+
+	void add(void *data, size_t size)
+	{
+		CacheBuffer buffer(data, size);
+		buffers.push_back(buffer);
+	}
+
+	void add(int& data)
+	{
+		CacheBuffer buffer(&data, sizeof(int));
+		buffers.push_back(buffer);
+	}
+
+	void add(size_t& data)
+	{
+		CacheBuffer buffer(&data, sizeof(size_t));
+		buffers.push_back(buffer);
+	}
+
+	template<typename T> void read(array<T>& data)
+	{
+		size_t size;
+
+		if(!fread(&size, sizeof(size), 1, f)) {
+			fprintf(stderr, "Failed to read vector size from cache.\n");
+			return;
+		}
+
+		data.resize(size/sizeof(T));
+
+		if(!fread(&data[0], size, 1, f)) {
+			fprintf(stderr, "Failed to read vector data from cache (%ld).\n", size);
+			return;
+		}
+	}
+
+	void read(int& data)
+	{
+		if(!fread(&data, sizeof(data), 1, f))
+			fprintf(stderr, "Failed to read int from cache.\n");
+	}
+
+	void read(size_t& data)
+	{
+		if(!fread(&data, sizeof(data), 1, f))
+			fprintf(stderr, "Failed to read size_t from cache.\n");
+	}
+};
+
+class Cache {
+public:
+	static Cache global;
+
+	void insert(const CacheData& key, const CacheData& value);
+	bool lookup(const CacheData& key, CacheData& value);
+
+protected:
+	string data_filename(const CacheData& key);
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_CACHE_H__ */
+
diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h
new file mode 100644
index 00000000000..fbba0fade63
--- /dev/null
+++ b/intern/cycles/util/util_color.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_COLOR_H__
+#define __UTIL_COLOR_H__
+
+#include "util_math.h"
+#include "util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+__device float color_srgb_to_scene_linear(float c)
+{
+	if(c < 0.04045f)
+		return (c < 0.0f)? 0.0f: c * (1.0f/12.92f);
+	else
+		return pow((c + 0.055f)*(1.0f/1.055f), 2.4f);
+}
+
+__device float color_scene_linear_to_srgb(float c)
+{
+	if(c < 0.0031308f)
+		return (c < 0.0f)? 0.0f: c * 12.92f;
+    else
+		return 1.055f * pow(c, 1.0f/2.4f) - 0.055f;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_COLOR_H__ */
+
diff --git a/intern/cycles/util/util_cuda.cpp b/intern/cycles/util/util_cuda.cpp
new file mode 100644
index 00000000000..15ce7efd9ee
--- /dev/null
+++ b/intern/cycles/util/util_cuda.cpp
@@ -0,0 +1,379 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "util_cuda.h"
+#include "util_debug.h"
+#include "util_dynlib.h"
+
+/* function defininitions */
+
+tcuInit *cuInit;
+tcuDriverGetVersion *cuDriverGetVersion;
+tcuDeviceGet *cuDeviceGet;
+tcuDeviceGetCount *cuDeviceGetCount;
+tcuDeviceGetName *cuDeviceGetName;
+tcuDeviceComputeCapability *cuDeviceComputeCapability;
+tcuDeviceTotalMem *cuDeviceTotalMem;
+tcuDeviceGetProperties *cuDeviceGetProperties;
+tcuDeviceGetAttribute *cuDeviceGetAttribute;
+tcuCtxCreate *cuCtxCreate;
+tcuCtxDestroy *cuCtxDestroy;
+tcuCtxAttach *cuCtxAttach;
+tcuCtxDetach *cuCtxDetach;
+tcuCtxPushCurrent *cuCtxPushCurrent;
+tcuCtxPopCurrent *cuCtxPopCurrent;
+tcuCtxGetDevice *cuCtxGetDevice;
+tcuCtxSynchronize *cuCtxSynchronize;
+tcuModuleLoad *cuModuleLoad;
+tcuModuleLoadData *cuModuleLoadData;
+tcuModuleLoadDataEx *cuModuleLoadDataEx;
+tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
+tcuModuleUnload *cuModuleUnload;
+tcuModuleGetFunction *cuModuleGetFunction;
+tcuModuleGetGlobal *cuModuleGetGlobal;
+tcuModuleGetTexRef *cuModuleGetTexRef;
+tcuModuleGetSurfRef *cuModuleGetSurfRef;
+tcuMemGetInfo *cuMemGetInfo;
+tcuMemAlloc *cuMemAlloc;
+tcuMemAllocPitch *cuMemAllocPitch;
+tcuMemFree *cuMemFree;
+tcuMemGetAddressRange *cuMemGetAddressRange;
+tcuMemAllocHost *cuMemAllocHost;
+tcuMemFreeHost *cuMemFreeHost;
+tcuMemHostAlloc *cuMemHostAlloc;
+tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer;
+tcuMemHostGetFlags *cuMemHostGetFlags;
+tcuMemcpyHtoD *cuMemcpyHtoD;
+tcuMemcpyDtoH *cuMemcpyDtoH;
+tcuMemcpyDtoD *cuMemcpyDtoD;
+tcuMemcpyDtoA *cuMemcpyDtoA;
+tcuMemcpyAtoD *cuMemcpyAtoD;
+tcuMemcpyHtoA *cuMemcpyHtoA;
+tcuMemcpyAtoH *cuMemcpyAtoH;
+tcuMemcpyAtoA *cuMemcpyAtoA;
+tcuMemcpy2D *cuMemcpy2D;
+tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned;
+tcuMemcpy3D *cuMemcpy3D;
+tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync;
+tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync;
+tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync;
+tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync;
+tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync;
+tcuMemcpy2DAsync *cuMemcpy2DAsync;
+tcuMemcpy3DAsync *cuMemcpy3DAsync;
+tcuMemsetD8 *cuMemsetD8;
+tcuMemsetD16 *cuMemsetD16;
+tcuMemsetD32 *cuMemsetD32;
+tcuMemsetD2D8 *cuMemsetD2D8;
+tcuMemsetD2D16 *cuMemsetD2D16;
+tcuMemsetD2D32 *cuMemsetD2D32;
+tcuFuncSetBlockShape *cuFuncSetBlockShape;
+tcuFuncSetSharedSize *cuFuncSetSharedSize;
+tcuFuncGetAttribute *cuFuncGetAttribute;
+tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
+tcuArrayCreate *cuArrayCreate;
+tcuArrayGetDescriptor *cuArrayGetDescriptor;
+tcuArrayDestroy *cuArrayDestroy;
+tcuArray3DCreate *cuArray3DCreate;
+tcuArray3DGetDescriptor *cuArray3DGetDescriptor;
+tcuTexRefCreate *cuTexRefCreate;
+tcuTexRefDestroy *cuTexRefDestroy;
+tcuTexRefSetArray *cuTexRefSetArray;
+tcuTexRefSetAddress *cuTexRefSetAddress;
+tcuTexRefSetAddress2D *cuTexRefSetAddress2D;
+tcuTexRefSetFormat *cuTexRefSetFormat;
+tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
+tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
+tcuTexRefSetFlags *cuTexRefSetFlags;
+tcuTexRefGetAddress *cuTexRefGetAddress;
+tcuTexRefGetArray *cuTexRefGetArray;
+tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
+tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
+tcuTexRefGetFormat *cuTexRefGetFormat;
+tcuTexRefGetFlags *cuTexRefGetFlags;
+tcuSurfRefSetArray *cuSurfRefSetArray;
+tcuSurfRefGetArray *cuSurfRefGetArray;
+tcuParamSetSize *cuParamSetSize;
+tcuParamSeti *cuParamSeti;
+tcuParamSetf *cuParamSetf;
+tcuParamSetv *cuParamSetv;
+tcuParamSetTexRef *cuParamSetTexRef;
+tcuLaunch *cuLaunch;
+tcuLaunchGrid *cuLaunchGrid;
+tcuLaunchGridAsync *cuLaunchGridAsync;
+tcuEventCreate *cuEventCreate;
+tcuEventRecord *cuEventRecord;
+tcuEventQuery *cuEventQuery;
+tcuEventSynchronize *cuEventSynchronize;
+tcuEventDestroy *cuEventDestroy;
+tcuEventElapsedTime *cuEventElapsedTime;
+tcuStreamCreate *cuStreamCreate;
+tcuStreamQuery *cuStreamQuery;
+tcuStreamSynchronize *cuStreamSynchronize;
+tcuStreamDestroy *cuStreamDestroy;
+tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
+tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
+tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer;
+tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
+tcuGraphicsMapResources *cuGraphicsMapResources;
+tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
+tcuGetExportTable *cuGetExportTable;
+tcuCtxSetLimit *cuCtxSetLimit;
+tcuCtxGetLimit *cuCtxGetLimit;
+tcuGLCtxCreate *cuGLCtxCreate;
+tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
+tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
+tcuCtxSetCurrent *cuCtxSetCurrent;
+
+CCL_NAMESPACE_BEGIN
+
+/* utility macros */
+
+#define CUDA_LIBRARY_FIND(name) \
+	name = (t##name*)dynamic_library_find(lib, #name); \
+	assert(name);
+
+#define CUDA_LIBRARY_FIND_V2(name) \
+	name = (t##name*)dynamic_library_find(lib, #name "_v2"); \
+	assert(name);
+
+/* initialization function */
+
+bool cuLibraryInit()
+{
+	static bool initialized = false;
+	static bool result = false;
+
+	if(initialized)
+		return result;
+	
+	initialized = true;
+
+	/* library paths */
+#ifdef _WIN32
+	/* expected in c:/windows/system or similar, no path needed */
+	const char *path = "nvcuda.dll";
+#elif defined(__APPLE__)
+	/* default installation path */
+	const char *path = "/usr/local/cuda/lib/libcuda.dylib";
+#else
+	const char *path = "libcuda.so";
+#endif
+
+	/* load library */
+	DynamicLibrary *lib = dynamic_library_open(path);
+
+	if(lib == NULL)
+		return false;
+
+	/* detect driver version */
+	int driver_version = 1000;
+
+	CUDA_LIBRARY_FIND(cuDriverGetVersion);
+	if(cuDriverGetVersion)
+		cuDriverGetVersion(&driver_version);
+
+	/* we require version 4.0 */
+	if(driver_version < 4000)
+		return false;
+
+	/* fetch all function pointers */
+	CUDA_LIBRARY_FIND(cuInit);
+	CUDA_LIBRARY_FIND(cuDeviceGet);
+	CUDA_LIBRARY_FIND(cuDeviceGetCount);
+	CUDA_LIBRARY_FIND(cuDeviceGetName);
+	CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
+	CUDA_LIBRARY_FIND(cuDeviceTotalMem);
+	CUDA_LIBRARY_FIND(cuDeviceGetProperties);
+	CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
+	CUDA_LIBRARY_FIND(cuCtxCreate);
+	CUDA_LIBRARY_FIND(cuCtxDestroy);
+	CUDA_LIBRARY_FIND(cuCtxAttach);
+	CUDA_LIBRARY_FIND(cuCtxDetach);
+	CUDA_LIBRARY_FIND(cuCtxPushCurrent);
+	CUDA_LIBRARY_FIND(cuCtxPopCurrent);
+	CUDA_LIBRARY_FIND(cuCtxGetDevice);
+	CUDA_LIBRARY_FIND(cuCtxSynchronize);
+	CUDA_LIBRARY_FIND(cuModuleLoad);
+	CUDA_LIBRARY_FIND(cuModuleLoadData);
+	CUDA_LIBRARY_FIND(cuModuleUnload);
+	CUDA_LIBRARY_FIND(cuModuleGetFunction);
+	CUDA_LIBRARY_FIND(cuModuleGetGlobal);
+	CUDA_LIBRARY_FIND(cuModuleGetTexRef);
+	CUDA_LIBRARY_FIND(cuMemGetInfo);
+	CUDA_LIBRARY_FIND(cuMemAlloc);
+	CUDA_LIBRARY_FIND(cuMemAllocPitch);
+	CUDA_LIBRARY_FIND(cuMemFree);
+	CUDA_LIBRARY_FIND(cuMemGetAddressRange);
+	CUDA_LIBRARY_FIND(cuMemAllocHost);
+	CUDA_LIBRARY_FIND(cuMemFreeHost);
+	CUDA_LIBRARY_FIND(cuMemHostAlloc);
+	CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer);
+	CUDA_LIBRARY_FIND(cuMemcpyHtoD);
+	CUDA_LIBRARY_FIND(cuMemcpyDtoH);
+	CUDA_LIBRARY_FIND(cuMemcpyDtoD);
+	CUDA_LIBRARY_FIND(cuMemcpyDtoA);
+	CUDA_LIBRARY_FIND(cuMemcpyAtoD);
+	CUDA_LIBRARY_FIND(cuMemcpyHtoA);
+	CUDA_LIBRARY_FIND(cuMemcpyAtoH);
+	CUDA_LIBRARY_FIND(cuMemcpyAtoA);
+	CUDA_LIBRARY_FIND(cuMemcpy2D);
+	CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned);
+	CUDA_LIBRARY_FIND(cuMemcpy3D);
+	CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync);
+	CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync);
+	CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync);
+	CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync);
+	CUDA_LIBRARY_FIND(cuMemcpy2DAsync);
+	CUDA_LIBRARY_FIND(cuMemcpy3DAsync);
+	CUDA_LIBRARY_FIND(cuMemsetD8);
+	CUDA_LIBRARY_FIND(cuMemsetD16);
+	CUDA_LIBRARY_FIND(cuMemsetD32);
+	CUDA_LIBRARY_FIND(cuMemsetD2D8);
+	CUDA_LIBRARY_FIND(cuMemsetD2D16);
+	CUDA_LIBRARY_FIND(cuMemsetD2D32);
+	CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
+	CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
+	CUDA_LIBRARY_FIND(cuFuncGetAttribute);
+	CUDA_LIBRARY_FIND(cuArrayCreate);
+	CUDA_LIBRARY_FIND(cuArrayGetDescriptor);
+	CUDA_LIBRARY_FIND(cuArrayDestroy);
+	CUDA_LIBRARY_FIND(cuArray3DCreate);
+	CUDA_LIBRARY_FIND(cuArray3DGetDescriptor);
+	CUDA_LIBRARY_FIND(cuTexRefCreate);
+	CUDA_LIBRARY_FIND(cuTexRefDestroy);
+	CUDA_LIBRARY_FIND(cuTexRefSetArray);
+	CUDA_LIBRARY_FIND(cuTexRefSetAddress);
+	CUDA_LIBRARY_FIND(cuTexRefSetAddress2D);
+	CUDA_LIBRARY_FIND(cuTexRefSetFormat);
+	CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
+	CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
+	CUDA_LIBRARY_FIND(cuTexRefSetFlags);
+	CUDA_LIBRARY_FIND(cuTexRefGetAddress);
+	CUDA_LIBRARY_FIND(cuTexRefGetArray);
+	CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
+	CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
+	CUDA_LIBRARY_FIND(cuTexRefGetFormat);
+	CUDA_LIBRARY_FIND(cuTexRefGetFlags);
+	CUDA_LIBRARY_FIND(cuParamSetSize);
+	CUDA_LIBRARY_FIND(cuParamSeti);
+	CUDA_LIBRARY_FIND(cuParamSetf);
+	CUDA_LIBRARY_FIND(cuParamSetv);
+	CUDA_LIBRARY_FIND(cuParamSetTexRef);
+	CUDA_LIBRARY_FIND(cuLaunch);
+	CUDA_LIBRARY_FIND(cuLaunchGrid);
+	CUDA_LIBRARY_FIND(cuLaunchGridAsync);
+	CUDA_LIBRARY_FIND(cuEventCreate);
+	CUDA_LIBRARY_FIND(cuEventRecord);
+	CUDA_LIBRARY_FIND(cuEventQuery);
+	CUDA_LIBRARY_FIND(cuEventSynchronize);
+	CUDA_LIBRARY_FIND(cuEventDestroy);
+	CUDA_LIBRARY_FIND(cuEventElapsedTime);
+	CUDA_LIBRARY_FIND(cuStreamCreate);
+	CUDA_LIBRARY_FIND(cuStreamQuery);
+	CUDA_LIBRARY_FIND(cuStreamSynchronize);
+	CUDA_LIBRARY_FIND(cuStreamDestroy);
+
+	/* cuda 2.1 */
+	CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
+	CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
+	CUDA_LIBRARY_FIND(cuGLCtxCreate);
+	CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
+	CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
+
+	/* cuda 2.3 */
+	CUDA_LIBRARY_FIND(cuMemHostGetFlags);
+	CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
+	CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
+
+	/* cuda 3.0 */
+	CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync);
+	CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
+	CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
+	CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
+	CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer);
+	CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags);
+	CUDA_LIBRARY_FIND(cuGraphicsMapResources);
+	CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
+	CUDA_LIBRARY_FIND(cuGetExportTable);
+
+	/* cuda 3.1 */
+	CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
+	CUDA_LIBRARY_FIND(cuSurfRefSetArray);
+	CUDA_LIBRARY_FIND(cuSurfRefGetArray);
+	CUDA_LIBRARY_FIND(cuCtxSetLimit);
+	CUDA_LIBRARY_FIND(cuCtxGetLimit);
+
+	/* functions which changed 3.1 -> 3.2 for 64 bit stuff, the cuda library
+	   has both the old ones for compatibility and new ones with _v2 postfix,
+	   we load the _v2 ones here. */
+	CUDA_LIBRARY_FIND_V2(cuDeviceTotalMem);
+	CUDA_LIBRARY_FIND_V2(cuCtxCreate);
+	CUDA_LIBRARY_FIND_V2(cuModuleGetGlobal);
+	CUDA_LIBRARY_FIND_V2(cuMemGetInfo);
+	CUDA_LIBRARY_FIND_V2(cuMemAlloc);
+	CUDA_LIBRARY_FIND_V2(cuMemAllocPitch);
+	CUDA_LIBRARY_FIND_V2(cuMemFree);
+	CUDA_LIBRARY_FIND_V2(cuMemGetAddressRange);
+	CUDA_LIBRARY_FIND_V2(cuMemAllocHost);
+	CUDA_LIBRARY_FIND_V2(cuMemHostGetDevicePointer);
+	CUDA_LIBRARY_FIND_V2(cuMemcpyHtoD);
+	CUDA_LIBRARY_FIND_V2(cuMemcpyDtoH);
+	CUDA_LIBRARY_FIND_V2(cuMemcpyDtoD);
+	CUDA_LIBRARY_FIND_V2(cuMemcpyDtoA);
+	CUDA_LIBRARY_FIND_V2(cuMemcpyAtoD);
+	CUDA_LIBRARY_FIND_V2(cuMemcpyHtoA);
+	CUDA_LIBRARY_FIND_V2(cuMemcpyAtoH);
+	CUDA_LIBRARY_FIND_V2(cuMemcpyAtoA);
+	CUDA_LIBRARY_FIND_V2(cuMemcpyHtoAAsync);
+	CUDA_LIBRARY_FIND_V2(cuMemcpyAtoHAsync);
+	CUDA_LIBRARY_FIND_V2(cuMemcpy2D);
+	CUDA_LIBRARY_FIND_V2(cuMemcpy2DUnaligned);
+	CUDA_LIBRARY_FIND_V2(cuMemcpy3D);
+	CUDA_LIBRARY_FIND_V2(cuMemcpyHtoDAsync);
+	CUDA_LIBRARY_FIND_V2(cuMemcpyDtoHAsync);
+	CUDA_LIBRARY_FIND_V2(cuMemcpyDtoDAsync);
+	CUDA_LIBRARY_FIND_V2(cuMemcpy2DAsync);
+	CUDA_LIBRARY_FIND_V2(cuMemcpy3DAsync);
+	CUDA_LIBRARY_FIND_V2(cuMemsetD8);
+	CUDA_LIBRARY_FIND_V2(cuMemsetD16);
+	CUDA_LIBRARY_FIND_V2(cuMemsetD32);
+	CUDA_LIBRARY_FIND_V2(cuMemsetD2D8);
+	CUDA_LIBRARY_FIND_V2(cuMemsetD2D16);
+	CUDA_LIBRARY_FIND_V2(cuMemsetD2D32);
+	CUDA_LIBRARY_FIND_V2(cuArrayCreate);
+	CUDA_LIBRARY_FIND_V2(cuArrayGetDescriptor);
+	CUDA_LIBRARY_FIND_V2(cuArray3DCreate);
+	CUDA_LIBRARY_FIND_V2(cuArray3DGetDescriptor);
+	CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress);
+	CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress2D);
+	CUDA_LIBRARY_FIND_V2(cuTexRefGetAddress);
+	CUDA_LIBRARY_FIND_V2(cuGraphicsResourceGetMappedPointer);
+	CUDA_LIBRARY_FIND_V2(cuGLCtxCreate);
+
+	/* cuda 4.0 */
+	CUDA_LIBRARY_FIND(cuCtxSetCurrent);
+
+	/* success */
+	result = true;
+
+	return result;
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/util/util_cuda.h b/intern/cycles/util/util_cuda.h
new file mode 100644
index 00000000000..ecfaddf43cb
--- /dev/null
+++ b/intern/cycles/util/util_cuda.h
@@ -0,0 +1,619 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_CUDA_H__
+#define __UTIL_CUDA_H__
+
+#include <stdlib.h>
+#include "util_opengl.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* CUDA is linked in dynamically at runtime, so we can start the application
+ * without requiring a CUDA installation. Code adapted from the example
+ * matrixMulDynlinkJIT in the CUDA SDK. */
+
+bool cuLibraryInit();
+
+CCL_NAMESPACE_END
+
+/* defines, structs, enums */
+
+#define CUDA_VERSION 3020
+
+#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
+typedef unsigned long long CUdeviceptr;
+#else
+typedef unsigned int CUdeviceptr;
+#endif
+
+typedef int CUdevice;
+typedef struct CUctx_st *CUcontext;
+typedef struct CUmod_st *CUmodule;
+typedef struct CUfunc_st *CUfunction;
+typedef struct CUarray_st *CUarray;
+typedef struct CUtexref_st *CUtexref;
+typedef struct CUsurfref_st *CUsurfref;
+typedef struct CUevent_st *CUevent;
+typedef struct CUstream_st *CUstream;
+typedef struct CUgraphicsResource_st *CUgraphicsResource;
+
+typedef struct CUuuid_st {
+    char bytes[16];
+} CUuuid;
+
+typedef enum CUctx_flags_enum {
+    CU_CTX_SCHED_AUTO  = 0,
+    CU_CTX_SCHED_SPIN  = 1,
+    CU_CTX_SCHED_YIELD = 2,
+    CU_CTX_SCHED_MASK  = 0x3,
+    CU_CTX_BLOCKING_SYNC = 4,
+    CU_CTX_MAP_HOST = 8,
+    CU_CTX_LMEM_RESIZE_TO_MAX = 16,
+    CU_CTX_FLAGS_MASK  = 0x1f
+} CUctx_flags;
+
+typedef enum CUevent_flags_enum {
+    CU_EVENT_DEFAULT        = 0,
+    CU_EVENT_BLOCKING_SYNC  = 1,
+    CU_EVENT_DISABLE_TIMING = 2
+} CUevent_flags;
+
+typedef enum CUarray_format_enum {
+    CU_AD_FORMAT_UNSIGNED_INT8  = 0x01,
+    CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
+    CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
+    CU_AD_FORMAT_SIGNED_INT8    = 0x08,
+    CU_AD_FORMAT_SIGNED_INT16   = 0x09,
+    CU_AD_FORMAT_SIGNED_INT32   = 0x0a,
+    CU_AD_FORMAT_HALF           = 0x10,
+    CU_AD_FORMAT_FLOAT          = 0x20
+} CUarray_format;
+
+typedef enum CUaddress_mode_enum {
+    CU_TR_ADDRESS_MODE_WRAP   = 0,
+    CU_TR_ADDRESS_MODE_CLAMP  = 1,
+    CU_TR_ADDRESS_MODE_MIRROR = 2,
+    CU_TR_ADDRESS_MODE_BORDER = 3
+} CUaddress_mode;
+
+typedef enum CUfilter_mode_enum {
+    CU_TR_FILTER_MODE_POINT  = 0,
+    CU_TR_FILTER_MODE_LINEAR = 1
+} CUfilter_mode;
+
+typedef enum CUdevice_attribute_enum {
+    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
+    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
+    CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
+    CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
+    CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
+    CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
+    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
+    CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
+    CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
+    CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
+    CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
+    CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
+    CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
+    CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
+    CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
+    CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
+    CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
+    CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
+    CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
+    CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
+    CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
+    CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35
+} CUdevice_attribute;
+
+typedef struct CUdevprop_st {
+    int maxThreadsPerBlock;
+    int maxThreadsDim[3];
+    int maxGridSize[3];
+    int sharedMemPerBlock;
+    int totalConstantMemory;
+    int SIMDWidth;
+    int memPitch;
+    int regsPerBlock;
+    int clockRate;
+    int textureAlign;
+} CUdevprop;
+
+typedef enum CUfunction_attribute_enum {
+    CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
+    CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
+    CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
+    CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
+    CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
+    CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
+    CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
+    CU_FUNC_ATTRIBUTE_MAX
+} CUfunction_attribute;
+
+typedef enum CUfunc_cache_enum {
+    CU_FUNC_CACHE_PREFER_NONE    = 0x00,
+    CU_FUNC_CACHE_PREFER_SHARED  = 0x01,
+    CU_FUNC_CACHE_PREFER_L1      = 0x02
+} CUfunc_cache;
+
+typedef enum CUmemorytype_enum {
+    CU_MEMORYTYPE_HOST   = 0x01,
+    CU_MEMORYTYPE_DEVICE = 0x02,
+    CU_MEMORYTYPE_ARRAY  = 0x03
+} CUmemorytype;
+
+typedef enum CUcomputemode_enum {
+    CU_COMPUTEMODE_DEFAULT    = 0,
+    CU_COMPUTEMODE_EXCLUSIVE  = 1,
+    CU_COMPUTEMODE_PROHIBITED = 2
+} CUcomputemode;
+
+typedef enum CUjit_option_enum
+{
+    CU_JIT_MAX_REGISTERS = 0,
+    CU_JIT_THREADS_PER_BLOCK,
+    CU_JIT_WALL_TIME,
+    CU_JIT_INFO_LOG_BUFFER,
+    CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
+    CU_JIT_ERROR_LOG_BUFFER,
+    CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
+    CU_JIT_OPTIMIZATION_LEVEL,
+    CU_JIT_TARGET_FROM_CUCONTEXT,
+    CU_JIT_TARGET,
+    CU_JIT_FALLBACK_STRATEGY
+
+} CUjit_option;
+
+typedef enum CUjit_target_enum
+{
+    CU_TARGET_COMPUTE_10 = 0,
+    CU_TARGET_COMPUTE_11,
+    CU_TARGET_COMPUTE_12,
+    CU_TARGET_COMPUTE_13,
+    CU_TARGET_COMPUTE_20,
+    CU_TARGET_COMPUTE_21
+} CUjit_target;
+
+typedef enum CUjit_fallback_enum
+{
+    CU_PREFER_PTX = 0,
+    CU_PREFER_BINARY
+
+} CUjit_fallback;
+
+typedef enum CUgraphicsRegisterFlags_enum {
+    CU_GRAPHICS_REGISTER_FLAGS_NONE  = 0x00
+} CUgraphicsRegisterFlags;
+
+typedef enum CUgraphicsMapResourceFlags_enum {
+    CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE          = 0x00,
+    CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY     = 0x01,
+    CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02
+} CUgraphicsMapResourceFlags;
+
+typedef enum CUarray_cubemap_face_enum {
+    CU_CUBEMAP_FACE_POSITIVE_X  = 0x00,
+    CU_CUBEMAP_FACE_NEGATIVE_X  = 0x01,
+    CU_CUBEMAP_FACE_POSITIVE_Y  = 0x02,
+    CU_CUBEMAP_FACE_NEGATIVE_Y  = 0x03,
+    CU_CUBEMAP_FACE_POSITIVE_Z  = 0x04,
+    CU_CUBEMAP_FACE_NEGATIVE_Z  = 0x05
+} CUarray_cubemap_face;
+
+typedef enum CUlimit_enum {
+    CU_LIMIT_STACK_SIZE        = 0x00,
+    CU_LIMIT_PRINTF_FIFO_SIZE  = 0x01,
+    CU_LIMIT_MALLOC_HEAP_SIZE  = 0x02
+} CUlimit;
+
+typedef enum cudaError_enum {
+    CUDA_SUCCESS                              = 0,
+    CUDA_ERROR_INVALID_VALUE                  = 1,
+    CUDA_ERROR_OUT_OF_MEMORY                  = 2,
+    CUDA_ERROR_NOT_INITIALIZED                = 3,
+    CUDA_ERROR_DEINITIALIZED                  = 4,
+    CUDA_ERROR_NO_DEVICE                      = 100,
+    CUDA_ERROR_INVALID_DEVICE                 = 101,
+    CUDA_ERROR_INVALID_IMAGE                  = 200,
+    CUDA_ERROR_INVALID_CONTEXT                = 201,
+    CUDA_ERROR_CONTEXT_ALREADY_CURRENT        = 202,
+    CUDA_ERROR_MAP_FAILED                     = 205,
+    CUDA_ERROR_UNMAP_FAILED                   = 206,
+    CUDA_ERROR_ARRAY_IS_MAPPED                = 207,
+    CUDA_ERROR_ALREADY_MAPPED                 = 208,
+    CUDA_ERROR_NO_BINARY_FOR_GPU              = 209,
+    CUDA_ERROR_ALREADY_ACQUIRED               = 210,
+    CUDA_ERROR_NOT_MAPPED                     = 211,
+    CUDA_ERROR_NOT_MAPPED_AS_ARRAY            = 212,
+    CUDA_ERROR_NOT_MAPPED_AS_POINTER          = 213,
+    CUDA_ERROR_ECC_UNCORRECTABLE              = 214,
+    CUDA_ERROR_UNSUPPORTED_LIMIT              = 215,
+    CUDA_ERROR_INVALID_SOURCE                 = 300,
+    CUDA_ERROR_FILE_NOT_FOUND                 = 301,
+    CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
+    CUDA_ERROR_SHARED_OBJECT_INIT_FAILED      = 303,
+    CUDA_ERROR_OPERATING_SYSTEM               = 304,
+    CUDA_ERROR_INVALID_HANDLE                 = 400,
+    CUDA_ERROR_NOT_FOUND                      = 500,
+    CUDA_ERROR_NOT_READY                      = 600,
+    CUDA_ERROR_LAUNCH_FAILED                  = 700,
+    CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES        = 701,
+    CUDA_ERROR_LAUNCH_TIMEOUT                 = 702,
+    CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  = 703,
+    CUDA_ERROR_UNKNOWN                        = 999
+} CUresult;
+
+#define CU_MEMHOSTALLOC_PORTABLE        0x01
+#define CU_MEMHOSTALLOC_DEVICEMAP       0x02
+#define CU_MEMHOSTALLOC_WRITECOMBINED   0x04
+
+typedef struct CUDA_MEMCPY2D_st {
+    size_t srcXInBytes;
+    size_t srcY;
+
+    CUmemorytype srcMemoryType;
+    const void *srcHost;
+    CUdeviceptr srcDevice;
+    CUarray srcArray;
+    size_t srcPitch;
+
+    size_t dstXInBytes;
+    size_t dstY;
+
+    CUmemorytype dstMemoryType;
+    void *dstHost;
+    CUdeviceptr dstDevice;
+    CUarray dstArray;
+    size_t dstPitch;
+
+    size_t WidthInBytes;
+    size_t Height;
+} CUDA_MEMCPY2D;
+
+typedef struct CUDA_MEMCPY3D_st {
+    size_t srcXInBytes;
+    size_t srcY;
+    size_t srcZ;
+    size_t srcLOD;
+    CUmemorytype srcMemoryType;
+    const void *srcHost;
+    CUdeviceptr srcDevice;
+    CUarray srcArray;
+    void *reserved0;
+    size_t srcPitch;
+    size_t srcHeight;
+
+    size_t dstXInBytes;
+    size_t dstY;
+    size_t dstZ;
+    size_t dstLOD;
+    CUmemorytype dstMemoryType;
+    void *dstHost;
+    CUdeviceptr dstDevice;
+    CUarray dstArray;
+    void *reserved1;
+    size_t dstPitch;
+    size_t dstHeight;
+
+    size_t WidthInBytes;
+    size_t Height;
+    size_t Depth;
+} CUDA_MEMCPY3D;
+
+typedef struct CUDA_ARRAY_DESCRIPTOR_st
+{
+    size_t Width;
+    size_t Height;
+
+    CUarray_format Format;
+    unsigned int NumChannels;
+} CUDA_ARRAY_DESCRIPTOR;
+
+typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
+{
+    size_t Width;
+    size_t Height;
+    size_t Depth;
+
+    CUarray_format Format;
+    unsigned int NumChannels;
+    unsigned int Flags;
+} CUDA_ARRAY3D_DESCRIPTOR;
+
+#define CUDA_ARRAY3D_2DARRAY        0x01
+#define CUDA_ARRAY3D_SURFACE_LDST   0x02
+#define CU_TRSA_OVERRIDE_FORMAT 0x01
+#define CU_TRSF_READ_AS_INTEGER         0x01
+#define CU_TRSF_NORMALIZED_COORDINATES  0x02
+#define CU_TRSF_SRGB  0x10
+#define CU_PARAM_TR_DEFAULT -1
+
+#ifdef _WIN32
+#define CUDAAPI __stdcall
+#else
+#define CUDAAPI
+#endif
+
+/* function types */
+
+typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
+typedef CUresult CUDAAPI tcuDriverGetVersion(int *driverVersion);
+typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal);
+typedef CUresult CUDAAPI tcuDeviceGetCount(int *count);
+typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev);
+typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
+typedef CUresult CUDAAPI tcuDeviceTotalMem(size_t *bytes, CUdevice dev);
+typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop *prop, CUdevice dev);
+typedef CUresult CUDAAPI tcuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
+typedef CUresult CUDAAPI tcuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev);
+typedef CUresult CUDAAPI tcuCtxDestroy(CUcontext ctx);
+typedef CUresult CUDAAPI tcuCtxAttach(CUcontext *pctx, unsigned int flags);
+typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx);
+typedef CUresult CUDAAPI tcuCtxPushCurrent(CUcontext ctx );
+typedef CUresult CUDAAPI tcuCtxPopCurrent(CUcontext *pctx);
+typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice *device);
+typedef CUresult CUDAAPI tcuCtxSynchronize(void);
+typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
+typedef CUresult CUDAAPI tcuCtxGetLimit(size_t *pvalue, CUlimit limit);
+typedef CUresult CUDAAPI tcuCtxGetCacheConfig(CUfunc_cache *pconfig);
+typedef CUresult CUDAAPI tcuCtxSetCacheConfig(CUfunc_cache config);
+typedef CUresult CUDAAPI tcuCtxGetApiVersion(CUcontext ctx, unsigned int *version);
+typedef CUresult CUDAAPI tcuModuleLoad(CUmodule *module, const char *fname);
+typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule *module, const void *image);
+typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
+typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule *module, const void *fatCubin);
+typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod);
+typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
+typedef CUresult CUDAAPI tcuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name);
+typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name);
+typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name);
+typedef CUresult CUDAAPI tcuMemGetInfo(size_t *free, size_t *total);
+typedef CUresult CUDAAPI tcuMemAlloc(CUdeviceptr *dptr, size_t bytesize);
+typedef CUresult CUDAAPI tcuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
+typedef CUresult CUDAAPI tcuMemFree(CUdeviceptr dptr);
+typedef CUresult CUDAAPI tcuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, CUdeviceptr dptr);
+typedef CUresult CUDAAPI tcuMemAllocHost(void **pp, size_t bytesize);
+typedef CUresult CUDAAPI tcuMemFreeHost(void *p);
+typedef CUresult CUDAAPI tcuMemHostAlloc(void **pp, size_t bytesize, unsigned int Flags);
+typedef CUresult CUDAAPI tcuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags);
+typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned int *pFlags, void *p);
+typedef CUresult CUDAAPI tcuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount);
+typedef CUresult CUDAAPI tcuMemcpy2D(const CUDA_MEMCPY2D *pCopy);
+typedef CUresult CUDAAPI tcuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy);
+typedef CUresult CUDAAPI tcuMemcpy3D(const CUDA_MEMCPY3D *pCopy);
+typedef CUresult CUDAAPI tcuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N);
+typedef CUresult CUDAAPI tcuMemsetD16(CUdeviceptr dstDevice, unsigned short us, size_t N);
+typedef CUresult CUDAAPI tcuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N);
+typedef CUresult CUDAAPI tcuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height);
+typedef CUresult CUDAAPI tcuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height);
+typedef CUresult CUDAAPI tcuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height);
+typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream);
+typedef CUresult CUDAAPI tcuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream);
+typedef CUresult CUDAAPI tcuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR *pAllocateArray);
+typedef CUresult CUDAAPI tcuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
+typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray);
+typedef CUresult CUDAAPI tcuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray);
+typedef CUresult CUDAAPI tcuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
+typedef CUresult CUDAAPI tcuStreamCreate(CUstream *phStream, unsigned int Flags);
+typedef CUresult CUDAAPI tcuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags);
+typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream);
+typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream);
+typedef CUresult CUDAAPI tcuStreamDestroy(CUstream hStream);
+typedef CUresult CUDAAPI tcuEventCreate(CUevent *phEvent, unsigned int Flags);
+typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream);
+typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent);
+typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent);
+typedef CUresult CUDAAPI tcuEventDestroy(CUevent hEvent);
+typedef CUresult CUDAAPI tcuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
+typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
+typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
+typedef CUresult CUDAAPI tcuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunction hfunc);
+typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
+typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned int numbytes);
+typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned int value);
+typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value);
+typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void *ptr, unsigned int numbytes);
+typedef CUresult CUDAAPI tcuLaunch(CUfunction f);
+typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height);
+typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
+typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags);
+typedef CUresult CUDAAPI tcuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
+typedef CUresult CUDAAPI tcuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch);
+typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents);
+typedef CUresult CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am);
+typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
+typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);
+typedef CUresult CUDAAPI tcuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray *phArray, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, int dim);
+typedef CUresult CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref *pTexRef);
+typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef);
+typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags);
+typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef);
+typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
+typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
+typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource);
+typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned int flags);
+typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
+typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
+typedef CUresult CUDAAPI tcuGetExportTable(const void **ppExportTable, const CUuuid *pExportTableId);
+typedef CUresult CUDAAPI tcuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevice device );
+typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResource, GLuint buffer, unsigned int Flags);
+typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResource, GLuint image, GLenum target, unsigned int Flags);
+typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
+
+/* function declarations */
+
+extern tcuInit *cuInit;
+extern tcuDriverGetVersion *cuDriverGetVersion;
+extern tcuDeviceGet *cuDeviceGet;
+extern tcuDeviceGetCount *cuDeviceGetCount;
+extern tcuDeviceGetName *cuDeviceGetName;
+extern tcuDeviceComputeCapability *cuDeviceComputeCapability;
+extern tcuDeviceTotalMem *cuDeviceTotalMem;
+extern tcuDeviceGetProperties *cuDeviceGetProperties;
+extern tcuDeviceGetAttribute *cuDeviceGetAttribute;
+extern tcuCtxCreate *cuCtxCreate;
+extern tcuCtxDestroy *cuCtxDestroy;
+extern tcuCtxAttach *cuCtxAttach;
+extern tcuCtxDetach *cuCtxDetach;
+extern tcuCtxPushCurrent *cuCtxPushCurrent;
+extern tcuCtxPopCurrent *cuCtxPopCurrent;
+extern tcuCtxGetDevice *cuCtxGetDevice;
+extern tcuCtxSynchronize *cuCtxSynchronize;
+extern tcuModuleLoad *cuModuleLoad;
+extern tcuModuleLoadData *cuModuleLoadData;
+extern tcuModuleLoadDataEx *cuModuleLoadDataEx;
+extern tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
+extern tcuModuleUnload *cuModuleUnload;
+extern tcuModuleGetFunction *cuModuleGetFunction;
+extern tcuModuleGetGlobal *cuModuleGetGlobal;
+extern tcuModuleGetTexRef *cuModuleGetTexRef;
+extern tcuModuleGetSurfRef *cuModuleGetSurfRef;
+extern tcuMemGetInfo *cuMemGetInfo;
+extern tcuMemAlloc *cuMemAlloc;
+extern tcuMemAllocPitch *cuMemAllocPitch;
+extern tcuMemFree *cuMemFree;
+extern tcuMemGetAddressRange *cuMemGetAddressRange;
+extern tcuMemAllocHost *cuMemAllocHost;
+extern tcuMemFreeHost *cuMemFreeHost;
+extern tcuMemHostAlloc *cuMemHostAlloc;
+extern tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer;
+extern tcuMemHostGetFlags *cuMemHostGetFlags;
+extern tcuMemcpyHtoD *cuMemcpyHtoD;
+extern tcuMemcpyDtoH *cuMemcpyDtoH;
+extern tcuMemcpyDtoD *cuMemcpyDtoD;
+extern tcuMemcpyDtoA *cuMemcpyDtoA;
+extern tcuMemcpyAtoD *cuMemcpyAtoD;
+extern tcuMemcpyHtoA *cuMemcpyHtoA;
+extern tcuMemcpyAtoH *cuMemcpyAtoH;
+extern tcuMemcpyAtoA *cuMemcpyAtoA;
+extern tcuMemcpy2D *cuMemcpy2D;
+extern tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned;
+extern tcuMemcpy3D *cuMemcpy3D;
+extern tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync;
+extern tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync;
+extern tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync;
+extern tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync;
+extern tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync;
+extern tcuMemcpy2DAsync *cuMemcpy2DAsync;
+extern tcuMemcpy3DAsync *cuMemcpy3DAsync;
+extern tcuMemsetD8 *cuMemsetD8;
+extern tcuMemsetD16 *cuMemsetD16;
+extern tcuMemsetD32 *cuMemsetD32;
+extern tcuMemsetD2D8 *cuMemsetD2D8;
+extern tcuMemsetD2D16 *cuMemsetD2D16;
+extern tcuMemsetD2D32 *cuMemsetD2D32;
+extern tcuFuncSetBlockShape *cuFuncSetBlockShape;
+extern tcuFuncSetSharedSize *cuFuncSetSharedSize;
+extern tcuFuncGetAttribute *cuFuncGetAttribute;
+extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
+extern tcuArrayCreate *cuArrayCreate;
+extern tcuArrayGetDescriptor *cuArrayGetDescriptor;
+extern tcuArrayDestroy *cuArrayDestroy;
+extern tcuArray3DCreate *cuArray3DCreate;
+extern tcuArray3DGetDescriptor *cuArray3DGetDescriptor;
+extern tcuTexRefCreate *cuTexRefCreate;
+extern tcuTexRefDestroy *cuTexRefDestroy;
+extern tcuTexRefSetArray *cuTexRefSetArray;
+extern tcuTexRefSetAddress *cuTexRefSetAddress;
+extern tcuTexRefSetAddress2D *cuTexRefSetAddress2D;
+extern tcuTexRefSetFormat *cuTexRefSetFormat;
+extern tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
+extern tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
+extern tcuTexRefSetFlags *cuTexRefSetFlags;
+extern tcuTexRefGetAddress *cuTexRefGetAddress;
+extern tcuTexRefGetArray *cuTexRefGetArray;
+extern tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
+extern tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
+extern tcuTexRefGetFormat *cuTexRefGetFormat;
+extern tcuTexRefGetFlags *cuTexRefGetFlags;
+extern tcuSurfRefSetArray *cuSurfRefSetArray;
+extern tcuSurfRefGetArray *cuSurfRefGetArray;
+extern tcuParamSetSize *cuParamSetSize;
+extern tcuParamSeti *cuParamSeti;
+extern tcuParamSetf *cuParamSetf;
+extern tcuParamSetv *cuParamSetv;
+extern tcuParamSetTexRef *cuParamSetTexRef;
+extern tcuLaunch *cuLaunch;
+extern tcuLaunchGrid *cuLaunchGrid;
+extern tcuLaunchGridAsync *cuLaunchGridAsync;
+extern tcuEventCreate *cuEventCreate;
+extern tcuEventRecord *cuEventRecord;
+extern tcuEventQuery *cuEventQuery;
+extern tcuEventSynchronize *cuEventSynchronize;
+extern tcuEventDestroy *cuEventDestroy;
+extern tcuEventElapsedTime *cuEventElapsedTime;
+extern tcuStreamCreate *cuStreamCreate;
+extern tcuStreamQuery *cuStreamQuery;
+extern tcuStreamSynchronize *cuStreamSynchronize;
+extern tcuStreamDestroy *cuStreamDestroy;
+extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
+extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
+extern tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer;
+extern tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
+extern tcuGraphicsMapResources *cuGraphicsMapResources;
+extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
+extern tcuGetExportTable *cuGetExportTable;
+extern tcuCtxSetLimit *cuCtxSetLimit;
+extern tcuCtxGetLimit *cuCtxGetLimit;
+extern tcuGLCtxCreate *cuGLCtxCreate;
+extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
+extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
+extern tcuCtxSetCurrent *cuCtxSetCurrent;
+
+#endif /* __UTIL_CUDA_H__ */
+
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
new file mode 100644
index 00000000000..17c169859ec
--- /dev/null
+++ b/intern/cycles/util/util_debug.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_DEBUG_H__
+#define __UTIL_DEBUG_H__
+
+#include <assert.h>
+
+#endif /* __UTIL_DEBUG_H__ */
+
diff --git a/intern/cycles/util/util_dynlib.cpp b/intern/cycles/util/util_dynlib.cpp
new file mode 100644
index 00000000000..5836073a07a
--- /dev/null
+++ b/intern/cycles/util/util_dynlib.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <stdlib.h>
+
+#include "util_dynlib.h"
+
+#ifdef _WIN32
+
+#include <Windows.h>
+
+CCL_NAMESPACE_BEGIN
+
+struct DynamicLibrary {
+	HMODULE module;
+};
+
+DynamicLibrary *dynamic_library_open(const char *name)
+{
+	HMODULE module = LoadLibrary(name);
+
+	if(!module)
+		return NULL;
+
+	DynamicLibrary *lib = new DynamicLibrary();
+	lib->module = module;
+
+	return lib;
+}
+
+void *dynamic_library_find(DynamicLibrary *lib, const char *name)
+{
+	return (void*)GetProcAddress(lib->module, name);
+}
+
+void dynamic_library_close(DynamicLibrary *lib)
+{
+	FreeLibrary(lib->module);
+	delete lib;
+}
+
+CCL_NAMESPACE_END
+
+#else
+
+#include <dlfcn.h>
+
+CCL_NAMESPACE_BEGIN
+
+struct DynamicLibrary {
+	void *module;
+};
+
+DynamicLibrary *dynamic_library_open(const char *name)
+{
+	void *module = dlopen(name, RTLD_NOW);
+
+	if(!module)
+		return NULL;
+
+	DynamicLibrary *lib = new DynamicLibrary();
+	lib->module = module;
+
+	return lib;
+}
+
+void *dynamic_library_find(DynamicLibrary *lib, const char *name)
+{
+	return dlsym(lib->module, name);
+}
+
+void dynamic_library_close(DynamicLibrary *lib)
+{
+	dlclose(lib->module);
+	delete lib;
+}
+
+CCL_NAMESPACE_END
+
+#endif
+
diff --git a/intern/cycles/util/util_dynlib.h b/intern/cycles/util/util_dynlib.h
new file mode 100644
index 00000000000..888fb6cef24
--- /dev/null
+++ b/intern/cycles/util/util_dynlib.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_DYNLIB_H__
+#define __UTIL_DYNLIB_H__
+
+CCL_NAMESPACE_BEGIN
+
+struct DynamicLibrary;
+
+DynamicLibrary *dynamic_library_open(const char *name);
+void *dynamic_library_find(DynamicLibrary *lib, const char *name);
+void dynamic_library_close(DynamicLibrary *lib);
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_DYNLIB_H__ */
+
diff --git a/intern/cycles/util/util_foreach.h b/intern/cycles/util/util_foreach.h
new file mode 100644
index 00000000000..b8298c003b5
--- /dev/null
+++ b/intern/cycles/util/util_foreach.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_FOREACH_H__
+#define __UTIL_FOREACH_H__
+
+/* Use Boost to get nice foreach() loops for STL data structures. */
+
+#include <boost/foreach.hpp>
+#define foreach BOOST_FOREACH
+
+#endif /* __UTIL_FOREACH_H__ */
+
diff --git a/intern/cycles/util/util_function.h b/intern/cycles/util/util_function.h
new file mode 100644
index 00000000000..dfcd58183d9
--- /dev/null
+++ b/intern/cycles/util/util_function.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_FUNCTION_H__
+#define __UTIL_FUNCTION_H__
+
+#include <boost/bind.hpp>
+#include <boost/function.hpp>
+
+CCL_NAMESPACE_BEGIN
+
+using boost::function;
+#define function_bind boost::bind
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_FUNCTION_H__ */
+
diff --git a/intern/cycles/util/util_hash.h b/intern/cycles/util/util_hash.h
new file mode 100644
index 00000000000..0b7164403f2
--- /dev/null
+++ b/intern/cycles/util/util_hash.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_HASH_H__
+#define __UTIL_HASH_H__
+
+CCL_NAMESPACE_BEGIN
+
+static unsigned int hash_int_2d(unsigned int kx, unsigned int ky)
+{
+	#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+	unsigned int a, b, c;
+
+	a = b = c = 0xdeadbeef + (2 << 2) + 13;
+	a += kx;
+	b += ky;
+
+	c ^= b; c -= rot(b,14);
+    a ^= c; a -= rot(c,11);
+    b ^= a; b -= rot(a,25);
+    c ^= b; c -= rot(b,16);
+    a ^= c; a -= rot(c,4);
+    b ^= a; b -= rot(a,14);
+    c ^= b; c -= rot(b,24);
+
+    return c;
+
+	#undef rot
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_HASH_H__ */
+
diff --git a/intern/cycles/util/util_image.h b/intern/cycles/util/util_image.h
new file mode 100644
index 00000000000..df566ccc79c
--- /dev/null
+++ b/intern/cycles/util/util_image.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_IMAGE_H__
+#define __UTIL_IMAGE_H__
+
+/* OpenImageIO is used for all image file reading and writing. */
+
+#include <OpenImageIO/imageio.h>
+
+CCL_NAMESPACE_BEGIN
+
+OIIO_NAMESPACE_USING
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_IMAGE_H__ */
+
diff --git a/intern/cycles/util/util_list.h b/intern/cycles/util/util_list.h
new file mode 100644
index 00000000000..d8f79643469
--- /dev/null
+++ b/intern/cycles/util/util_list.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_LIST_H__
+#define __UTIL_LIST_H__
+
+#include <list>
+
+CCL_NAMESPACE_BEGIN
+
+using std::list;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_LIST_H__ */
+
diff --git a/intern/cycles/util/util_map.h b/intern/cycles/util/util_map.h
new file mode 100644
index 00000000000..884f45c3b27
--- /dev/null
+++ b/intern/cycles/util/util_map.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_MAP_H__
+#define __UTIL_MAP_H__
+
+#include <map>
+#include <tr1/unordered_map>
+
+CCL_NAMESPACE_BEGIN
+
+using std::map;
+using std::pair;
+using std::tr1::unordered_map;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MAP_H__ */
+
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
new file mode 100644
index 00000000000..349496e4a70
--- /dev/null
+++ b/intern/cycles/util/util_math.h
@@ -0,0 +1,759 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_MATH_H__
+#define __UTIL_MATH_H__
+
+/* Math
+ *
+ * Basic math functions on scalar and vector types. This header is used by
+ * both the kernel code when compiled as C++, and other C++ non-kernel code. */
+
+#ifndef __KERNEL_OPENCL__
+
+#define _USE_MATH_DEFINES
+
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+
+#endif
+
+#include "util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+#define M_PI_F		((float)3.14159265358979323846264338327950288)
+#define M_PI_2_F	((float)1.57079632679489661923132169163975144)
+#define M_PI_4_F	((float)0.785398163397448309615660845819875721)
+#define M_1_PI_F	((float)0.318309886183790671537767526745028724)
+#define M_2_PI_F	((float)0.636619772367581343075535053490057448)
+
+/* Scalar */
+
+#ifdef _WIN32
+
+#define copysignf _copysign
+
+__device_inline float fmaxf(float a, float b)
+{
+	return (a > b)? a: b;
+}
+
+__device_inline float fminf(float a, float b)
+{
+	return (a < b)? a: b;
+}
+
+#endif
+
+#ifndef __KERNEL_OPENCL__
+
+__device_inline int max(int a, int b)
+{
+	return (a > b)? a: b;
+}
+
+__device_inline int min(int a, int b)
+{
+	return (a < b)? a: b;
+}
+
+__device_inline float max(float a, float b)
+{
+	return (a > b)? a: b;
+}
+
+__device_inline float min(float a, float b)
+{
+	return (a < b)? a: b;
+}
+
+#endif
+
+__device_inline float min4(float a, float b, float c, float d)
+{
+	return min(min(min(a, b), c), d);
+}
+
+__device_inline float max4(float a, float b, float c, float d)
+{
+	return max(max(max(a, b), c), d);
+}
+
+#ifndef __KERNEL_OPENCL__
+
+__device_inline int clamp(int a, int mn, int mx)
+{
+	return min(max(a, mn), mx);
+}
+
+__device_inline float clamp(float a, float mn, float mx)
+{
+	return min(max(a, mn), mx);
+}
+
+#endif
+
+__device_inline float signf(float f)
+{
+	return (f < 0.0f)? -1.0f: 1.0f;
+}
+
+__device_inline float nonzerof(float f, float eps)
+{
+	if(fabsf(f) < eps)
+		return signf(f)*eps;
+	else
+		return f;
+}
+
+/* Float2 Vector */
+
+#ifndef __KERNEL_OPENCL__
+
+__device_inline bool is_zero(const float2 a)
+{
+	return (a.x == 0.0f && a.y == 0.0f);
+}
+
+#endif
+
+#ifndef __KERNEL_OPENCL__
+
+__device_inline float average(const float2 a)
+{
+	return (a.x + a.y)*(1.0f/2.0f);
+}
+
+#endif
+
+#ifndef __KERNEL_OPENCL__
+
+__device_inline float2 operator-(const float2 a)
+{
+	float2 r = {-a.x, -a.y};
+	return r;
+}
+
+__device_inline float2 operator*(const float2 a, const float2 b)
+{
+	float2 r = {a.x*b.x, a.y*b.y};
+	return r;
+}
+
+__device_inline float2 operator*(const float2 a, float f)
+{
+	float2 r = {a.x*f, a.y*f};
+	return r;
+}
+
+__device_inline float2 operator*(float f, const float2 a)
+{
+	float2 r = {a.x*f, a.y*f};
+	return r;
+}
+
+__device_inline float2 operator/(float f, const float2 a)
+{
+	float2 r = {f/a.x, f/a.y};
+	return r;
+}
+
+__device_inline float2 operator/(const float2 a, float f)
+{
+	float invf = 1.0f/f;
+	float2 r = {a.x*invf, a.y*invf};
+	return r;
+}
+
+__device_inline float2 operator/(const float2 a, const float2 b)
+{
+	float2 r = {a.x/b.x, a.y/b.y};
+	return r;
+}
+
+__device_inline float2 operator+(const float2 a, const float2 b)
+{
+	float2 r = {a.x+b.x, a.y+b.y};
+	return r;
+}
+
+__device_inline float2 operator-(const float2 a, const float2 b)
+{
+	float2 r = {a.x-b.x, a.y-b.y};
+	return r;
+}
+
+__device_inline float2 operator+=(float2& a, const float2 b)
+{
+	a.x += b.x;
+	a.y += b.y;
+	return a;
+}
+
+__device_inline float2 operator*=(float2& a, const float2 b)
+{
+	a.x *= b.x;
+	a.y *= b.y;
+	return a;
+}
+
+__device_inline float2 operator*=(float2& a, float f)
+{
+	a.x *= f;
+	a.y *= f;
+	return a;
+}
+
+__device_inline float2 operator/=(float2& a, const float2 b)
+{
+	a.x /= b.x;
+	a.y /= b.y;
+	return a;
+}
+
+__device_inline float2 operator/=(float2& a, float f)
+{
+	float invf = 1.0f/f;
+	a.x *= invf;
+	a.y *= invf;
+	return a;
+}
+
+
+__device_inline float dot(const float2 a, const float2 b)
+{
+	return a.x*b.x + a.y*b.y;
+}
+
+__device_inline float cross(const float2 a, const float2 b)
+{
+	return (a.x*b.y - a.y*b.x);
+}
+
+#endif
+
+#ifndef __KERNEL_OPENCL__
+
+__device_inline float len(const float2 a)
+{
+	return sqrtf(dot(a, a));
+}
+
+__device_inline float2 normalize(const float2 a)
+{
+	return a/len(a);
+}
+
+__device_inline float2 normalize_len(const float2 a, float *t)
+{
+	*t = len(a);
+	return a/(*t);
+}
+
+__device_inline bool operator==(const float2 a, const float2 b)
+{
+	return (a.x == b.x && a.y == b.y);
+}
+
+__device_inline bool operator!=(const float2 a, const float2 b)
+{
+	return !(a == b);
+}
+
+__device_inline float2 min(float2 a, float2 b)
+{
+	float2 r = {min(a.x, b.x), min(a.y, b.y)};
+	return r;
+}
+
+__device_inline float2 max(float2 a, float2 b)
+{
+	float2 r = {max(a.x, b.x), max(a.y, b.y)};
+	return r;
+}
+
+__device_inline float2 clamp(float2 a, float2 mn, float2 mx)
+{
+	return min(max(a, mn), mx);
+}
+
+__device_inline float2 fabs(float2 a)
+{
+	return make_float2(fabsf(a.x), fabsf(a.y));
+}
+
+__device_inline float2 as_float2(const float4 a)
+{
+	return make_float2(a.x, a.y);
+}
+
+#endif
+
+#ifndef __KERNEL_GPU__
+
+__device_inline void print_float2(const char *label, const float2& a)
+{
+	printf("%s: %.8f %.8f\n", label, a.x, a.y);
+}
+
+#endif
+
+#ifndef __KERNEL_OPENCL__
+
+__device_inline float2 interp(float2 a, float2 b, float t)
+{
+	return a + t*(b - a);
+}
+
+#endif
+
+/* Float3 Vector */
+
+__device_inline bool is_zero(const float3 a)
+{
+	return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f);
+}
+
+__device_inline float average(const float3 a)
+{
+	return (a.x + a.y + a.z)*(1.0f/3.0f);
+}
+
+#ifndef __KERNEL_OPENCL__
+
+__device_inline float3 operator-(const float3 a)
+{
+	float3 r = {-a.x, -a.y, -a.z};
+	return r;
+}
+
+__device_inline float3 operator*(const float3 a, const float3 b)
+{
+	float3 r = {a.x*b.x, a.y*b.y, a.z*b.z};
+	return r;
+}
+
+__device_inline float3 operator*(const float3 a, float f)
+{
+	float3 r = {a.x*f, a.y*f, a.z*f};
+	return r;
+}
+
+__device_inline float3 operator*(float f, const float3 a)
+{
+	float3 r = {a.x*f, a.y*f, a.z*f};
+	return r;
+}
+
+__device_inline float3 operator/(float f, const float3 a)
+{
+	float3 r = {f/a.x, f/a.y, f/a.z};
+	return r;
+}
+
+__device_inline float3 operator/(const float3 a, float f)
+{
+	float invf = 1.0f/f;
+	float3 r = {a.x*invf, a.y*invf, a.z*invf};
+	return r;
+}
+
+__device_inline float3 operator/(const float3 a, const float3 b)
+{
+	float3 r = {a.x/b.x, a.y/b.y, a.z/b.z};
+	return r;
+}
+
+__device_inline float3 operator+(const float3 a, const float3 b)
+{
+	float3 r = {a.x+b.x, a.y+b.y, a.z+b.z};
+	return r;
+}
+
+__device_inline float3 operator-(const float3 a, const float3 b)
+{
+	float3 r = {a.x-b.x, a.y-b.y, a.z-b.z};
+	return r;
+}
+
+__device_inline float3 operator+=(float3& a, const float3 b)
+{
+	a.x += b.x;
+	a.y += b.y;
+	a.z += b.z;
+	return a;
+}
+
+__device_inline float3 operator*=(float3& a, const float3 b)
+{
+	a.x *= b.x;
+	a.y *= b.y;
+	a.z *= b.z;
+	return a;
+}
+
+__device_inline float3 operator*=(float3& a, float f)
+{
+	a.x *= f;
+	a.y *= f;
+	a.z *= f;
+	return a;
+}
+
+__device_inline float3 operator/=(float3& a, const float3 b)
+{
+	a.x /= b.x;
+	a.y /= b.y;
+	a.z /= b.z;
+	return a;
+}
+
+__device_inline float3 operator/=(float3& a, float f)
+{
+	float invf = 1.0f/f;
+	a.x *= invf;
+	a.y *= invf;
+	a.z *= invf;
+	return a;
+}
+
+__device_inline float dot(const float3 a, const float3 b)
+{
+	return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+__device_inline float3 cross(const float3 a, const float3 b)
+{
+	float3 r = {a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x};
+	return r;
+}
+
+#endif
+
+__device_inline float len(const float3 a)
+{
+	return sqrtf(dot(a, a));
+}
+
+#ifndef __KERNEL_OPENCL__
+
+__device_inline float3 normalize(const float3 a)
+{
+	return a/len(a);
+}
+
+#endif
+
+__device_inline float3 normalize_len(const float3 a, float *t)
+{
+	*t = len(a);
+	return a/(*t);
+}
+
+#ifndef __KERNEL_OPENCL__
+
+__device_inline bool operator==(const float3 a, const float3 b)
+{
+	return (a.x == b.x && a.y == b.y && a.z == b.z);
+}
+
+__device_inline bool operator!=(const float3 a, const float3 b)
+{
+	return !(a == b);
+}
+
+__device_inline float3 min(float3 a, float3 b)
+{
+	float3 r = {min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)};
+	return r;
+}
+
+__device_inline float3 max(float3 a, float3 b)
+{
+	float3 r = {max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)};
+	return r;
+}
+
+__device_inline float3 clamp(float3 a, float3 mn, float3 mx)
+{
+	return min(max(a, mn), mx);
+}
+
+__device_inline float3 fabs(float3 a)
+{
+	return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
+}
+
+__device_inline float3 as_float3(const float4& a)
+{
+	return make_float3(a.x, a.y, a.z);
+}
+
+#endif
+
+#ifndef __KERNEL_GPU__
+
+__device_inline void print_float3(const char *label, const float3& a)
+{
+	printf("%s: %.8f %.8f %.8f\n", label, a.x, a.y, a.z);
+}
+
+#endif
+
+__device_inline float3 interp(float3 a, float3 b, float t)
+{
+	return a + t*(b - a);
+}
+
+/* Float4 Vector */
+
+#ifndef __KERNEL_OPENCL__
+
+__device_inline bool is_zero(const float4& a)
+{
+	return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
+}
+
+__device_inline float average(const float4& a)
+{
+	return (a.x + a.y + a.z + a.w)*(1.0f/4.0f);
+}
+
+__device_inline float4 operator-(const float4& a)
+{
+	float4 r = {-a.x, -a.y, -a.z, -a.w};
+	return r;
+}
+
+__device_inline float4 operator*(const float4& a, const float4& b)
+{
+	float4 r = {a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w};
+	return r;
+}
+
+__device_inline float4 operator*(const float4& a, float f)
+{
+	float4 r = {a.x*f, a.y*f, a.z*f, a.w*f};
+	return r;
+}
+
+__device_inline float4 operator*(float f, const float4& a)
+{
+	float4 r = {a.x*f, a.y*f, a.z*f, a.w*f};
+	return r;
+}
+
+__device_inline float4 operator/(const float4& a, float f)
+{
+	float invf = 1.0f/f;
+	float4 r = {a.x*invf, a.y*invf, a.z*invf, a.w*invf};
+	return r;
+}
+
+__device_inline float4 operator/(const float4& a, const float4& b)
+{
+	float4 r = {a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w};
+	return r;
+}
+
+__device_inline float4 operator+(const float4& a, const float4& b)
+{
+	float4 r = {a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w};
+	return r;
+}
+
+__device_inline float4 operator-(const float4& a, const float4& b)
+{
+	float4 r = {a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w};
+	return r;
+}
+
+__device_inline float4 operator+=(float4& a, const float4& b)
+{
+	a.x += b.x;
+	a.y += b.y;
+	a.z += b.z;
+	a.w += b.w;
+	return a;
+}
+
+__device_inline float4 operator*=(float4& a, const float4& b)
+{
+	a.x *= b.x;
+	a.y *= b.y;
+	a.z *= b.z;
+	a.w *= b.w;
+	return a;
+}
+
+__device_inline float4 operator/=(float4& a, float f)
+{
+	float invf = 1.0f/f;
+	a.x *= invf;
+	a.y *= invf;
+	a.z *= invf;
+	a.w *= invf;
+	return a;
+}
+
+__device_inline float dot(const float4& a, const float4& b)
+{
+	return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
+}
+
+__device_inline float4 cross(const float4& a, const float4& b)
+{
+	float4 r = {a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x, 0.0f};
+	return r;
+}
+
+__device_inline float4 min(float4 a, float4 b)
+{
+	return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
+}
+
+__device_inline float4 max(float4 a, float4 b)
+{
+	return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
+}
+
+#endif
+
+#ifndef __KERNEL_GPU__
+
+__device_inline void print_float4(const char *label, const float4& a)
+{
+	printf("%s: %.8f %.8f %.8f %.8f\n", label, a.x, a.y, a.z, a.w);
+}
+
+#endif
+
+/* Int3 */
+
+#ifndef __KERNEL_OPENCL__
+
+__device_inline int3 max(int3 a, int3 b)
+{
+	int3 r = {max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)};
+	return r;
+}
+
+__device_inline int3 clamp(const int3& a, int mn, int mx)
+{
+	int3 r = {clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx)};
+	return r;
+}
+
+__device_inline int3 clamp(const int3& a, int3& mn, int mx)
+{
+	int3 r = {clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx)};
+	return r;
+}
+
+#endif
+
+#ifndef __KERNEL_GPU__
+
+__device_inline void print_int3(const char *label, const int3& a)
+{
+	printf("%s: %d %d %d\n", label, a.x, a.y, a.z);
+}
+
+#endif
+
+/* Int4 */
+
+#ifndef __KERNEL_OPENCL__
+
+__device_inline int4 operator>=(float4 a, float4 b)
+{
+	return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
+}
+
+#endif
+
+#ifndef __KERNEL_GPU__
+
+__device_inline void print_int4(const char *label, const int4& a)
+{
+	printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w);
+}
+
+#endif
+
+/* Int/Float conversion */
+
+#ifndef __KERNEL_OPENCL__
+
+__device_inline unsigned int as_uint(float f)
+{
+	union { unsigned int i; float f; } u;
+	u.f = f;
+	return u.i;
+}
+
+__device_inline int __float_as_int(float f)
+{
+	union { int i; float f; } u;
+	u.f = f;
+	return u.i;
+}
+
+__device_inline float __int_as_float(int i)
+{
+	union { int i; float f; } u;
+	u.i = i;
+	return u.f;
+}
+
+__device_inline uint __float_as_uint(float f)
+{
+	union { uint i; float f; } u;
+	u.f = f;
+	return u.i;
+}
+
+__device_inline float __uint_as_float(uint i)
+{
+	union { uint i; float f; } u;
+	u.i = i;
+	return u.f;
+}
+
+/* Interpolation */
+
+template<class A, class B> __device_inline A lerp(const A& a, const A& b, const B& t)
+{
+	return (A)(a * ((B)1 - t) + b * t);
+}
+
+/* Triangle */
+
+__device_inline float triangle_area(const float3 v1, const float3 v2, const float3 v3)
+{
+	return len(cross(v3 - v2, v1 - v2))*0.5f;
+}
+
+#endif
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_H__ */
+
diff --git a/intern/cycles/util/util_md5.cpp b/intern/cycles/util/util_md5.cpp
new file mode 100644
index 00000000000..9fd44740531
--- /dev/null
+++ b/intern/cycles/util/util_md5.cpp
@@ -0,0 +1,350 @@
+/*
+ * Copyright (C) 1999, 2002 Aladdin Enterprises.  All rights reserved.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *	claim that you wrote the original software. If you use this software
+ *	in a product, an acknowledgment in the product documentation would be
+ *	appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *	misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ * L. Peter Deutsch
+ * ghost@aladdin.com
+ */
+
+/* Minor modifications done to remove some code and change style. */
+
+#include "util_md5.h"
+
+#include <string.h>
+#include <stdio.h>
+
+CCL_NAMESPACE_BEGIN
+
+#define T_MASK ((uint32_t)~0)
+#define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87)
+#define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9)
+#define T3	0x242070db
+#define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111)
+#define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050)
+#define T6	0x4787c62a
+#define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec)
+#define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe)
+#define T9	0x698098d8
+#define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850)
+#define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e)
+#define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841)
+#define T13	0x6b901122
+#define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c)
+#define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71)
+#define T16	0x49b40821
+#define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d)
+#define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf)
+#define T19	0x265e5a51
+#define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855)
+#define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2)
+#define T22	0x02441453
+#define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e)
+#define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437)
+#define T25	0x21e1cde6
+#define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829)
+#define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278)
+#define T28	0x455a14ed
+#define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa)
+#define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07)
+#define T31	0x676f02d9
+#define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375)
+#define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd)
+#define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e)
+#define T35	0x6d9d6122
+#define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3)
+#define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb)
+#define T38	0x4bdecfa9
+#define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f)
+#define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f)
+#define T41	0x289b7ec6
+#define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805)
+#define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a)
+#define T44	0x04881d05
+#define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6)
+#define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a)
+#define T47	0x1fa27cf8
+#define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a)
+#define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb)
+#define T50	0x432aff97
+#define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58)
+#define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6)
+#define T53	0x655b59c3
+#define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d)
+#define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82)
+#define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e)
+#define T57	0x6fa87e4f
+#define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f)
+#define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb)
+#define T60	0x4e0811a1
+#define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d)
+#define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca)
+#define T63	0x2ad7d2bb
+#define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e)
+
+void MD5Hash::process(const uint8_t *data /*[64]*/)
+{
+	uint32_t
+	a = abcd[0], b = abcd[1],
+	c = abcd[2], d = abcd[3];
+	uint32_t t;
+	/* Define storage for little-endian or both types of CPUs. */
+	uint32_t xbuf[16];
+	const uint32_t *X;
+
+	{
+		/*
+		 * Determine dynamically whether this is a big-endian or
+		 * little-endian machine, since we can use a more efficient
+		 * algorithm on the latter.
+		 */
+		static const int w = 1;
+
+		if(*((const uint8_t *)&w)) /* dynamic little-endian */
+		{
+			/*
+			 * On little-endian machines, we can process properly aligned
+			 * data without copying it.
+			 */
+			if(!((data - (const uint8_t *)0) & 3)) {
+				/* data are properly aligned */
+				X = (const uint32_t *)data;
+			}
+			else {
+			/* not aligned */
+			memcpy(xbuf, data, 64);
+			X = xbuf;
+			}
+		}
+		else { /* dynamic big-endian */
+			/*
+			 * On big-endian machines, we must arrange the bytes in the
+			 * right order.
+			 */
+			const uint8_t *xp = data;
+			int i;
+
+			X = xbuf;		/* (dynamic only) */
+			for(i = 0; i < 16; ++i, xp += 4)
+			xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24);
+		}
+	}
+
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+
+	/* Round 1. */
+	/* Let [abcd k s i] denote the operation
+	   a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */
+#define F(x, y, z) (((x) & (y)) | (~(x) & (z)))
+#define SET(a, b, c, d, k, s, Ti)\
+  t = a + F(b,c,d) + X[k] + Ti;\
+  a = ROTATE_LEFT(t, s) + b
+	/* Do the following 16 operations. */
+	SET(a, b, c, d,  0,  7,  T1);
+	SET(d, a, b, c,  1, 12,  T2);
+	SET(c, d, a, b,  2, 17,  T3);
+	SET(b, c, d, a,  3, 22,  T4);
+	SET(a, b, c, d,  4,  7,  T5);
+	SET(d, a, b, c,  5, 12,  T6);
+	SET(c, d, a, b,  6, 17,  T7);
+	SET(b, c, d, a,  7, 22,  T8);
+	SET(a, b, c, d,  8,  7,  T9);
+	SET(d, a, b, c,  9, 12, T10);
+	SET(c, d, a, b, 10, 17, T11);
+	SET(b, c, d, a, 11, 22, T12);
+	SET(a, b, c, d, 12,  7, T13);
+	SET(d, a, b, c, 13, 12, T14);
+	SET(c, d, a, b, 14, 17, T15);
+	SET(b, c, d, a, 15, 22, T16);
+#undef SET
+
+	 /* Round 2. */
+	 /* Let [abcd k s i] denote the operation
+		  a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */
+#define G(x, y, z) (((x) & (z)) | ((y) & ~(z)))
+#define SET(a, b, c, d, k, s, Ti)\
+  t = a + G(b,c,d) + X[k] + Ti;\
+  a = ROTATE_LEFT(t, s) + b
+	 /* Do the following 16 operations. */
+	SET(a, b, c, d,  1,  5, T17);
+	SET(d, a, b, c,  6,  9, T18);
+	SET(c, d, a, b, 11, 14, T19);
+	SET(b, c, d, a,  0, 20, T20);
+	SET(a, b, c, d,  5,  5, T21);
+	SET(d, a, b, c, 10,  9, T22);
+	SET(c, d, a, b, 15, 14, T23);
+	SET(b, c, d, a,  4, 20, T24);
+	SET(a, b, c, d,  9,  5, T25);
+	SET(d, a, b, c, 14,  9, T26);
+	SET(c, d, a, b,  3, 14, T27);
+	SET(b, c, d, a,  8, 20, T28);
+	SET(a, b, c, d, 13,  5, T29);
+	SET(d, a, b, c,  2,  9, T30);
+	SET(c, d, a, b,  7, 14, T31);
+	SET(b, c, d, a, 12, 20, T32);
+#undef SET
+
+	 /* Round 3. */
+	 /* Let [abcd k s t] denote the operation
+		  a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define SET(a, b, c, d, k, s, Ti)\
+  t = a + H(b,c,d) + X[k] + Ti;\
+  a = ROTATE_LEFT(t, s) + b
+	 /* Do the following 16 operations. */
+	SET(a, b, c, d,  5,  4, T33);
+	SET(d, a, b, c,  8, 11, T34);
+	SET(c, d, a, b, 11, 16, T35);
+	SET(b, c, d, a, 14, 23, T36);
+	SET(a, b, c, d,  1,  4, T37);
+	SET(d, a, b, c,  4, 11, T38);
+	SET(c, d, a, b,  7, 16, T39);
+	SET(b, c, d, a, 10, 23, T40);
+	SET(a, b, c, d, 13,  4, T41);
+	SET(d, a, b, c,  0, 11, T42);
+	SET(c, d, a, b,  3, 16, T43);
+	SET(b, c, d, a,  6, 23, T44);
+	SET(a, b, c, d,  9,  4, T45);
+	SET(d, a, b, c, 12, 11, T46);
+	SET(c, d, a, b, 15, 16, T47);
+	SET(b, c, d, a,  2, 23, T48);
+#undef SET
+
+	 /* Round 4. */
+	 /* Let [abcd k s t] denote the operation
+		  a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */
+#define I(x, y, z) ((y) ^ ((x) | ~(z)))
+#define SET(a, b, c, d, k, s, Ti)\
+  t = a + I(b,c,d) + X[k] + Ti;\
+  a = ROTATE_LEFT(t, s) + b
+	 /* Do the following 16 operations. */
+	SET(a, b, c, d,  0,  6, T49);
+	SET(d, a, b, c,  7, 10, T50);
+	SET(c, d, a, b, 14, 15, T51);
+	SET(b, c, d, a,  5, 21, T52);
+	SET(a, b, c, d, 12,  6, T53);
+	SET(d, a, b, c,  3, 10, T54);
+	SET(c, d, a, b, 10, 15, T55);
+	SET(b, c, d, a,  1, 21, T56);
+	SET(a, b, c, d,  8,  6, T57);
+	SET(d, a, b, c, 15, 10, T58);
+	SET(c, d, a, b,  6, 15, T59);
+	SET(b, c, d, a, 13, 21, T60);
+	SET(a, b, c, d,  4,  6, T61);
+	SET(d, a, b, c, 11, 10, T62);
+	SET(c, d, a, b,  2, 15, T63);
+	SET(b, c, d, a,  9, 21, T64);
+#undef SET
+
+	 /* Then perform the following additions. (That is increment each
+		of the four registers by the value it had before this block
+		was started.) */
+	abcd[0] += a;
+	abcd[1] += b;
+	abcd[2] += c;
+	abcd[3] += d;
+}
+
+MD5Hash::MD5Hash()
+{
+	count[0] = count[1] = 0;
+	abcd[0] = 0x67452301;
+	abcd[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476;
+	abcd[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301;
+	abcd[3] = 0x10325476;
+}
+
+MD5Hash::~MD5Hash()
+{
+}
+
+void MD5Hash::append(const uint8_t *data, int nbytes)
+{
+	const uint8_t *p = data;
+	int left = nbytes;
+	int offset = (count[0] >> 3) & 63;
+	uint32_t nbits = (uint32_t)(nbytes << 3);
+
+	if(nbytes <= 0)
+		return;
+
+	/* Update the message length. */
+	count[1] += nbytes >> 29;
+	count[0] += nbits;
+	if(count[0] < nbits)
+		count[1]++;
+
+	/* Process an initial partial block. */
+	if(offset) {
+		int copy = (offset + nbytes > 64 ? 64 - offset : nbytes);
+
+		memcpy(buf + offset, p, copy);
+		if(offset + copy < 64)
+			return;
+		p += copy;
+		left -= copy;
+		process(buf);
+	}
+
+	/* Process full blocks. */
+	for(; left >= 64; p += 64, left -= 64)
+		process(p);
+
+	/* Process a final partial block. */
+	if(left)
+		memcpy(buf, p, left);
+}
+
+void MD5Hash::finish(uint8_t digest[16])
+{
+	static const uint8_t pad[64] = {
+		0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+	uint8_t data[8];
+	int i;
+
+	/* Save the length before padding. */
+	for(i = 0; i < 8; ++i)
+		data[i] = (uint8_t)(count[i >> 2] >> ((i & 3) << 3));
+
+	/* Pad to 56 bytes mod 64. */
+	append(pad, ((55 - (count[0] >> 3)) & 63) + 1);
+	/* Append the length. */
+	append(data, 8);
+
+	for(i = 0; i < 16; ++i)
+		digest[i] = (uint8_t)(abcd[i >> 2] >> ((i & 3) << 3));
+}
+
+string MD5Hash::get_hex()
+{
+	uint8_t digest[16];
+	char buf[16*2];
+
+	finish(digest);
+
+	for(int i=0; i<16; i++)
+		sprintf(buf + i*2, "%02X", digest[i]);
+	
+	return string(buf, sizeof(buf));
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/util/util_md5.h b/intern/cycles/util/util_md5.h
new file mode 100644
index 00000000000..49f421d43d9
--- /dev/null
+++ b/intern/cycles/util/util_md5.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 1999, 2002 Aladdin Enterprises.  All rights reserved.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ * L. Peter Deutsch
+ * ghost@aladdin.com
+ */
+
+/* MD5
+ *
+ * Simply MD5 hash computation, used by disk cache. Adapted from external
+ * code, with minor code modifications done to remove some unused code and
+ * change code style. */
+
+#ifndef __UTIL_MD5_H__
+#define __UTIL_MD5_H__
+
+#include "util_string.h"
+#include "util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+class MD5Hash {
+public:
+	MD5Hash();
+	~MD5Hash();
+
+	void append(const uint8_t *data, int size);
+	string get_hex();
+
+protected:
+	void process(const uint8_t *data);
+	void finish(uint8_t digest[16]);
+
+    uint32_t count[2]; /* message length in bits, lsw first */
+    uint32_t abcd[4]; /* digest buffer */
+    uint8_t buf[64]; /* accumulate block */
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MD5_H__ */
+
diff --git a/intern/cycles/util/util_opengl.h b/intern/cycles/util/util_opengl.h
new file mode 100644
index 00000000000..5396f6f17be
--- /dev/null
+++ b/intern/cycles/util/util_opengl.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_OPENGL_H__
+#define __UTIL_OPENGL_H__
+
+/* OpenGL header includes, used everywhere we use OpenGL, to deal with
+ * platform differences in one central place. */
+
+#ifdef __APPLE__
+#include <GLUT/glut.h>
+#include <OpenGL/gl.h>
+#include <OpenGL/glu.h>
+#else
+#include <GL/glew.h>
+#include <GL/glut.h>
+#endif
+
+#endif /* __UTIL_OPENGL_H__ */
+
diff --git a/intern/cycles/util/util_param.h b/intern/cycles/util/util_param.h
new file mode 100644
index 00000000000..d1ca1b65ffb
--- /dev/null
+++ b/intern/cycles/util/util_param.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_PARAM_H__
+#define __UTIL_PARAM_H__
+
+/* Parameter value lists from OpenImageIO are used to store custom properties
+ * on various data, which can then later be used in shaders. */
+
+#include <OpenImageIO/paramlist.h>
+#include <OpenImageIO/typedesc.h>
+#include <OpenImageIO/ustring.h>
+
+CCL_NAMESPACE_BEGIN
+
+OIIO_NAMESPACE_USING
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_PARAM_H__ */
+
diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp
new file mode 100644
index 00000000000..4cf6cea0f69
--- /dev/null
+++ b/intern/cycles/util/util_path.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "util_debug.h"
+#include "util_path.h"
+#include "util_string.h"
+
+#include <OpenImageIO/sysutil.h>
+OIIO_NAMESPACE_USING
+
+#include <boost/filesystem.hpp> 
+
+CCL_NAMESPACE_BEGIN
+
+static string cached_path = "";
+
+void path_init(const string& path)
+{
+	cached_path = path;
+}
+
+string path_get(const string& sub)
+{
+	if(cached_path == "")
+		cached_path = path_dirname(Sysutil::this_program_path());
+
+	return path_join(cached_path, sub);
+}
+
+string path_filename(const string& path)
+{
+	return boost::filesystem::path(path).filename();
+}
+
+string path_dirname(const string& path)
+{
+	return boost::filesystem::path(path).parent_path().string();
+}
+
+string path_join(const string& dir, const string& file)
+{
+	return (boost::filesystem::path(dir) / boost::filesystem::path(file)).string();
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/util/util_path.h b/intern/cycles/util/util_path.h
new file mode 100644
index 00000000000..b80bc0e9131
--- /dev/null
+++ b/intern/cycles/util/util_path.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_PATH_H__
+#define __UTIL_PATH_H__
+
+/* Utility functions to get paths to files distributed with the program. For
+ * the standalone apps, paths are relative to the executable, for dynamically
+ * linked libraries, the path to the library may be set with path_init, which
+ * then makes all paths relative to that. */
+
+#include "util_string.h"
+
+CCL_NAMESPACE_BEGIN
+
+void path_init(const string& path = "");
+string path_get(const string& sub = "");
+
+string path_filename(const string& path);
+string path_dirname(const string& path);
+string path_join(const string& dir, const string& file);
+
+CCL_NAMESPACE_END
+
+#endif
+
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
new file mode 100644
index 00000000000..86ab34aa7f9
--- /dev/null
+++ b/intern/cycles/util/util_progress.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_PROGRESS_H__
+#define __UTIL_PROGRESS_H__
+
+/* Progress
+ *
+ * Simple class to communicate progress status messages, timing information,
+ * update notifications from a job running in another thread. All methods
+ * except for the constructor/destructor are thread safe. */
+
+#include "util_string.h"
+#include "util_thread.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Progress {
+public:
+	Progress()
+	{
+		pass = 0;
+		total_time = 0.0f;
+		pass_time = 0.0f;
+		status = "Initializing";
+		substatus = "";
+		update_cb = NULL;
+		cancel = false;
+		cancel_message = "";
+		cancel_cb = NULL;
+	}
+
+	Progress(Progress& progress)
+	{
+		*this = progress;
+	}
+
+	Progress& operator=(Progress& progress)
+	{
+		thread_scoped_lock lock(progress.progress_mutex);
+
+		progress.get_pass(pass, total_time, pass_time);
+		progress.get_status(status, substatus);
+
+		return *this;
+	}
+
+	/* cancel */
+	void set_cancel(const string& cancel_message_)
+	{
+		thread_scoped_lock lock(progress_mutex);
+		cancel_message = cancel_message_;
+		cancel = true;
+	}
+
+	bool get_cancel()
+	{
+		if(!cancel && cancel_cb)
+			cancel_cb();
+
+		return cancel;
+	}
+
+	string get_cancel_message()
+	{
+		thread_scoped_lock lock(progress_mutex);
+		return cancel_message;
+	}
+
+	void set_cancel_callback(boost::function<void(void)> function)
+	{
+		cancel_cb = function;
+	}
+
+	/* pass and timing information */
+
+	void set_pass(int pass_, double total_time_, double pass_time_)
+	{
+		thread_scoped_lock lock(progress_mutex);
+
+		pass = pass_;
+		total_time = total_time_;
+		pass_time = pass_time_;
+	}
+
+	void get_pass(int& pass_, double& total_time_, double& pass_time_)
+	{
+		thread_scoped_lock lock(progress_mutex);
+
+		pass_ = pass;
+		total_time_ = total_time;
+		pass_time_ = pass_time;
+	}
+
+	/* status messages */
+
+	void set_status(const string& status_, const string& substatus_ = "")
+	{
+		{
+			thread_scoped_lock lock(progress_mutex);
+			status = status_;
+			substatus = substatus_;
+		}
+
+		set_update();
+	}
+
+	void set_substatus(const string& substatus_)
+	{
+		{
+			thread_scoped_lock lock(progress_mutex);
+			substatus = substatus_;
+		}
+
+		set_update();
+	}
+
+	void get_status(string& status_, string& substatus_)
+	{
+		thread_scoped_lock lock(progress_mutex);
+		status_ = status;
+		substatus_ = substatus;
+	}
+
+	/* callback */
+
+	void set_update()
+	{
+		if(update_cb)
+			update_cb();
+	}
+
+	void set_update_callback(boost::function<void(void)> function)
+	{
+		update_cb = function;
+	}
+
+protected:
+	thread_mutex progress_mutex;
+	boost::function<void(void)> update_cb;
+	boost::function<void(void)> cancel_cb;
+
+	int pass;
+
+	double total_time;
+	double pass_time;
+
+	string status;
+	string substatus;
+
+	volatile bool cancel;
+	string cancel_message;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_PROGRESS_H__ */
+
diff --git a/intern/cycles/util/util_set.h b/intern/cycles/util/util_set.h
new file mode 100644
index 00000000000..ac310e93e80
--- /dev/null
+++ b/intern/cycles/util/util_set.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_SET_H__
+#define __UTIL_SET_H__
+
+#include <set>
+#include <tr1/unordered_set>
+
+CCL_NAMESPACE_BEGIN
+
+using std::set;
+using std::tr1::unordered_set;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_SET_H__ */
+
diff --git a/intern/cycles/util/util_string.cpp b/intern/cycles/util/util_string.cpp
new file mode 100644
index 00000000000..aaa482eec0d
--- /dev/null
+++ b/intern/cycles/util/util_string.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include <boost/algorithm/string.hpp>
+
+#include "util_foreach.h"
+#include "util_string.h"
+
+#ifdef _WIN32
+#ifndef vsnprintf
+#define vsnprintf _vsnprintf
+#endif
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+string string_printf(const char *format, ...)
+{
+	vector<char> str(128, 0);
+
+	while(1) {
+		va_list args;
+		int result;
+
+		va_start(args, format);
+		result = vsnprintf(&str[0], str.size(), format, args);
+		va_end(args);
+
+		if(result == -1) {
+			/* not enough space or formatting error */
+			if(str.size() > 65536) {
+				assert(0);
+				return string("");
+			}
+
+			str.resize(str.size()*2, 0);
+			continue;
+		}
+		else if(result >= (int)str.size()) {
+			/* not enough space */
+			str.resize(result + 1, 0);
+			continue;
+		}
+
+		return string(&str[0]);
+	}
+}
+
+bool string_iequals(const string& a, const string& b)
+{
+	if(a.size() == b.size()) {
+		for(size_t i = 0; i < a.size(); i++)
+			if(toupper(a[i]) != toupper(b[i]))
+				return false;
+
+		return true;
+	}
+
+	return false;
+}
+
+void string_split(vector<string>& tokens, const string& str)
+{
+	vector<string> split;
+
+	boost::split(split, str, boost::is_any_of("\t "), boost::token_compress_on);
+
+	foreach(const string& token, split)
+		if(token != "")
+			tokens.push_back(token);
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/util/util_string.h b/intern/cycles/util/util_string.h
new file mode 100644
index 00000000000..72b893d2b17
--- /dev/null
+++ b/intern/cycles/util/util_string.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_STRING_H__
+#define __UTIL_STRING_H__
+
+#include <string.h>
+#include <string>
+#include <sstream>
+
+#include "util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+using std::string;
+using std::stringstream;
+using std::ostringstream;
+using std::istringstream;
+
+#ifdef __GNUC__
+#define PRINTF_ATTRIBUTE __attribute__((format(printf, 1, 2)))
+#else
+#define PRINTF_ATTRIBUTE
+#endif
+
+string string_printf(const char *format, ...) PRINTF_ATTRIBUTE;
+
+bool string_iequals(const string& a, const string& b);
+void string_split(vector<string>& tokens, const string& str);
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_STRING_H__ */
+
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
new file mode 100644
index 00000000000..fae575873e0
--- /dev/null
+++ b/intern/cycles/util/util_system.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "util_system.h"
+#include "util_types.h"
+
+#ifdef _WIN32
+#include <intrin.h>
+#include <windows.h>
+#elif defined(__APPLE__)
+#include <sys/sysctl.h>
+#include <sys/types.h>
+#else
+#include <unistd.h>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+int system_cpu_thread_count()
+{
+	static uint count = 0;
+
+	if(count > 0)
+		return count;
+
+#ifdef _WIN32
+	SYSTEM_INFO info;
+	GetSystemInfo(&info);
+	count = (uint)info.dwNumberOfProcessors;
+#elif defined(__APPLE__)
+	size_t len = sizeof(count);
+	int mib[2] = { CTL_HW, HW_NCPU };
+	
+	sysctl(mib, 2, &count, &len, NULL, 0);
+#else
+	count = (uint)sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+
+	if(count < 1)
+		count = 1;
+
+	return count;
+}
+
+#ifndef _WIN32
+static void __cpuid(int data[4], int selector)
+{
+	asm("cpuid" : "=a" (data[0]), "=b" (data[1]), "=c" (data[2]), "=d" (data[3]) : "a"(selector));
+}
+#endif
+
+static void replace_string(string& haystack, const string& needle, const string& other)
+{
+	size_t i;
+
+	while((i = haystack.find(needle)) != string::npos)
+		haystack.replace(i, needle.length(), other);
+}
+
+string system_cpu_brand_string()
+{
+	char buf[48];
+	int result[4];
+
+	__cpuid(result, 0x80000000);
+
+	if(result[0] >= (int)0x80000004) {
+		__cpuid((int*)(buf+0), 0x80000002);
+		__cpuid((int*)(buf+16), 0x80000003);
+		__cpuid((int*)(buf+32), 0x80000004);
+
+		string brand = buf;
+
+		/* make it a bit more presentable */
+		replace_string(brand, "(TM)", "");
+		replace_string(brand, "(R)", "");
+
+		size_t i;
+		if((i = brand.find("  ")) != string::npos)
+			brand = brand.substr(0, i);
+
+		return brand;
+	}
+
+	return "Unknown CPU";
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h
new file mode 100644
index 00000000000..49a798acf03
--- /dev/null
+++ b/intern/cycles/util/util_system.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_SYSTEM_H__
+#define __UTIL_SYSTEM_H__
+
+#include "util_string.h"
+
+CCL_NAMESPACE_BEGIN
+
+int system_cpu_thread_count();
+string system_cpu_brand_string();
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_SYSTEM_H__ */
+
diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h
new file mode 100644
index 00000000000..ad95f04b4f9
--- /dev/null
+++ b/intern/cycles/util/util_thread.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_THREAD_H__
+#define __UTIL_THREAD_H__
+
+#include <boost/thread.hpp>
+#include <queue>
+
+CCL_NAMESPACE_BEGIN
+
+#if 0
+
+/* Use STL for threading */
+
+using std::thread;
+using std::thread_mutex;
+typedef std::lock_guard thread_scoped_lock;
+using std::condition_variable;
+
+#else
+
+/* Use boost for threading */
+
+using boost::thread;
+typedef boost::mutex thread_mutex;
+typedef boost::mutex::scoped_lock thread_scoped_lock;
+typedef boost::condition_variable thread_condition_variable;
+
+#endif
+
+/* Thread Safe Queue to pass tasks from one thread to another. Tasks should be
+ * pushed into the queue, while the worker thread waits to pop the next task
+ * off the queue. Once all tasks are into the queue, calling stop() will stop
+ * the worker threads from waiting for more tasks once all tasks are done. */
+
+template<typename T> class ThreadQueue
+{
+public:
+	ThreadQueue()
+	{
+		tot = 0;
+		tot_done = 0;
+		do_stop = false;
+		do_cancel = false;
+	}
+
+	/* Main thread functions */
+
+	/* push a task to be executed */
+	void push(const T& value)
+	{
+		thread_scoped_lock lock(queue_mutex);
+		queue.push(value);
+		tot++;
+		lock.unlock();
+
+		queue_cond.notify_one();
+	}
+
+	/* wait until all tasks are done */
+	void wait_done()
+	{
+		thread_scoped_lock lock(done_mutex);
+
+		while(tot_done != tot)
+			done_cond.wait(lock);
+	}
+
+	/* stop all worker threads */
+	void stop()
+	{
+		clear();
+		do_stop = true;
+		queue_cond.notify_all();
+	}
+
+	/* cancel all tasks, but keep worker threads running */
+	void cancel()
+	{
+		clear();
+		do_cancel = true;
+		wait_done();
+		do_cancel = false;
+	}
+
+	/* Worker thread functions
+     *
+	 * while(queue.worker_wait_pop(task)) {
+	 *		for(..) {
+	 *			... do work ...
+	 *
+	 *			if(queue.worker_cancel())
+	 *				break;
+	 *      }
+	 *		
+	 *		queue.worker_done();
+	 * }
+	 */
+
+	bool worker_wait_pop(T& value)
+	{
+		thread_scoped_lock lock(queue_mutex);
+
+		while(queue.empty() && !do_stop)
+			queue_cond.wait(lock);
+
+		if(queue.empty())
+			return false;
+		
+		value = queue.front();
+		queue.pop();
+
+		return true;
+	}
+
+	void worker_done()
+	{
+		thread_scoped_lock lock(done_mutex);
+		tot_done++;
+		lock.unlock();
+
+		assert(tot_done <= tot);
+
+		done_cond.notify_all();
+	}
+
+	bool worker_cancel()
+	{
+		return do_cancel;
+	}
+
+protected:
+	void clear()
+	{
+		thread_scoped_lock lock(queue_mutex);
+
+		while(!queue.empty()) {
+			thread_scoped_lock done_lock(done_mutex);
+			tot_done++;
+			done_lock.unlock();
+
+			queue.pop();
+		}
+
+		done_cond.notify_all();
+	}
+
+	std::queue<T> queue;
+	thread_mutex queue_mutex;
+	thread_mutex done_mutex;
+	thread_condition_variable queue_cond;
+	thread_condition_variable done_cond;
+	volatile bool do_stop;
+	volatile bool do_cancel;
+	volatile int tot, tot_done;
+};
+
+/* Thread Local Storage
+ *
+ * Boost implementation is a bit slow, and Mac OS X __thread is not supported
+ * but the pthreads implementation is optimized, so we use these macros. */
+
+#ifdef __APPLE__
+
+#define tls_ptr(type, name) \
+	pthread_key_t name
+#define tls_set(name, value) \
+	pthread_setspecific(name, value)
+#define tls_get(type, name) \
+	((type*)pthread_getspecific(name))
+#define tls_create(type, name) \
+	pthread_key_create(&name, NULL)
+#define tls_delete(type, name) \
+	pthread_key_delete(name);
+
+#else
+
+#ifdef __WIN32
+#define __thread __declspec(thread)
+#endif
+
+#define tls_ptr(type, name) \
+	__thread type *name
+#define tls_set(name, value) \
+	name = value
+#define tls_get(type, name) \
+	name
+#define tls_create(type, name)
+#define tls_delete(type, name)
+
+#endif
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_THREAD_H__ */
+
diff --git a/intern/cycles/util/util_time.cpp b/intern/cycles/util/util_time.cpp
new file mode 100644
index 00000000000..5f543fc7f91
--- /dev/null
+++ b/intern/cycles/util/util_time.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <stdlib.h>
+
+#include "util_time.h"
+
+#ifdef _WIN32
+
+#include <windows.h>
+
+CCL_NAMESPACE_BEGIN
+
+double time_dt()
+{
+	__int64 frequency, counter;
+
+	QueryPerformanceFrequency((LARGE_INTEGER*)&frequency);
+	QueryPerformanceCounter((LARGE_INTEGER*)&counter);
+
+	return (double)counter/(double)frequency;
+}
+
+void time_sleep(double t)
+{
+	Sleep((int)(t*1000));
+}
+
+CCL_NAMESPACE_END
+
+#else
+
+#include <sys/time.h>
+#include <unistd.h>
+
+CCL_NAMESPACE_BEGIN
+
+double time_dt()
+{
+	struct timeval now;
+	gettimeofday(&now, NULL);
+
+	return now.tv_sec + now.tv_usec*1e-6;
+}
+
+void time_sleep(double t)
+{
+	if(t >= 1.0)
+		sleep((int)t);
+
+	usleep((int)(t*1e6));
+}
+
+CCL_NAMESPACE_END
+
+#endif
+
diff --git a/intern/cycles/util/util_time.h b/intern/cycles/util/util_time.h
new file mode 100644
index 00000000000..33fa8797a69
--- /dev/null
+++ b/intern/cycles/util/util_time.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_TIME_H__
+#define __UTIL_TIME_H__
+
+CCL_NAMESPACE_BEGIN
+
+/* Give current time in seconds in double precision, with good accuracy. */
+
+double time_dt();
+
+/* Sleep for the specified number of seconds */
+
+void time_sleep(double t);
+
+CCL_NAMESPACE_END
+
+#endif
+
diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp
new file mode 100644
index 00000000000..5c2f28af318
--- /dev/null
+++ b/intern/cycles/util/util_transform.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+/*
+ * Adapted from code with license:
+ * 
+ * Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
+ * Digital Ltd. LLC. All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Industrial Light & Magic nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission. 
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "util_math.h"
+#include "util_transform.h"
+
+CCL_NAMESPACE_BEGIN
+
+static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
+{
+	/* forward elimination */
+	for(int i = 0; i < 4; i++) {
+		int pivot = i;
+		float pivotsize = M[i][i];
+
+		if(pivotsize < 0)
+			pivotsize = -pivotsize;
+
+		for(int j = i + 1; j < 4; j++) {
+			float tmp = M[j][i];
+
+			if(tmp < 0)
+				tmp = -tmp;
+
+			if(tmp > pivotsize) {
+				pivot = j;
+				pivotsize = tmp;
+			}
+		}
+
+		if(pivotsize == 0)
+			return false;
+
+		if(pivot != i) {
+			for(int j = 0; j < 4; j++) {
+				float tmp;
+
+				tmp = M[i][j];
+				M[i][j] = M[pivot][j];
+				M[pivot][j] = tmp;
+
+				tmp = R[i][j];
+				R[i][j] = R[pivot][j];
+				R[pivot][j] = tmp;
+			}
+		}
+
+		for(int j = i + 1; j < 4; j++) {
+			float f = M[j][i] / M[i][i];
+
+			for(int k = 0; k < 4; k++) {
+				M[j][k] -= f*M[i][k];
+				R[j][k] -= f*R[i][k];
+			}
+		}
+	}
+
+	/* backward substitution */
+	for(int i = 3; i >= 0; --i) {
+		float f;
+
+		if((f = M[i][i]) == 0)
+			return false;
+
+		for(int j = 0; j < 4; j++) {
+			M[i][j] /= f;
+			R[i][j] /= f;
+		}
+
+		for(int j = 0; j < i; j++) {
+			f = M[j][i];
+
+			for(int k = 0; k < 4; k++) {
+				M[j][k] -= f*M[i][k];
+				R[j][k] -= f*R[i][k];
+			}
+		}
+	}
+
+	return true;
+}
+
+Transform transform_inverse(const Transform& tfm)
+{
+	Transform R = transform_identity();
+	Transform M = tfm;
+
+	if(!transform_matrix4_gj_inverse((float(*)[4])&R, (float(*)[4])&M))
+		return transform_identity();
+
+	return R;
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
new file mode 100644
index 00000000000..9cde410edc8
--- /dev/null
+++ b/intern/cycles/util/util_transform.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_TRANSFORM_H__
+#define __UTIL_TRANSFORM_H__
+
+#ifndef __KERNEL_GPU__
+#include <string.h>
+#endif
+
+#include "util_math.h"
+#include "util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+typedef struct Transform {
+	float4 x, y, z, w; /* rows */
+} Transform;
+
+__device_inline float3 transform(const Transform *t, const float3 a)
+{
+	float4 b = {a.x, a.y, a.z, 1.0f};
+	float3 c = {dot(t->x, b), dot(t->y, b), dot(t->z, b)};
+
+	return c/dot(t->w, b);
+}
+
+__device_inline float3 transform_direction(const Transform *t, const float3 a)
+{
+	float4 b = {a.x, a.y, a.z, 0.0f};
+	float3 c = {dot(t->x, b), dot(t->y, b), dot(t->z, b)};
+
+	return c;
+}
+
+#ifndef __KERNEL_GPU__
+
+__device_inline void print_transform(const char *label, const Transform& t)
+{
+	print_float4(label, t.x);
+	print_float4(label, t.y);
+	print_float4(label, t.z);
+	print_float4(label, t.w);
+	printf("\n");
+}
+
+__device_inline Transform transform_transpose(const Transform a)
+{
+	Transform t;
+
+	t.x.x = a.x.x; t.x.y = a.y.x; t.x.z = a.z.x; t.x.w = a.w.x;
+	t.y.x = a.x.y; t.y.y = a.y.y; t.y.z = a.z.y; t.y.w = a.w.y;
+	t.z.x = a.x.z; t.z.y = a.y.z; t.z.z = a.z.z; t.z.w = a.w.z;
+	t.w.x = a.x.w; t.w.y = a.y.w; t.w.z = a.z.w; t.w.w = a.w.w;
+
+	return t;
+}
+
+__device_inline Transform operator*(const Transform a, const Transform b)
+{
+	Transform c = transform_transpose(b);
+	Transform t;
+
+	t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w));
+	t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w));
+	t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w));
+	t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w));
+
+	return t;
+}
+
+__device_inline Transform make_transform(float a, float b, float c, float d,
+									float e, float f, float g, float h,
+									float i, float j, float k, float l,
+									float m, float n, float o, float p)
+{
+	Transform t;
+
+	t.x.x = a; t.x.y = b; t.x.z = c; t.x.w = d;
+	t.y.x = e; t.y.y = f; t.y.z = g; t.y.w = h;
+	t.z.x = i; t.z.y = j; t.z.z = k; t.z.w = l;
+	t.w.x = m; t.w.y = n; t.w.z = o; t.w.w = p;
+
+	return t;
+}
+
+__device_inline Transform transform_translate(float3 t)
+{
+	return make_transform(
+		1, 0, 0, t.x,
+		0, 1, 0, t.y,
+		0, 0, 1, t.z,
+		0, 0, 0, 1);
+}
+
+__device_inline Transform transform_translate(float x, float y, float z)
+{
+	return transform_translate(make_float3(x, y, z));
+}
+
+__device_inline Transform transform_scale(float3 s)
+{
+	return make_transform(
+		s.x, 0, 0, 0,
+		0, s.y, 0, 0,
+		0, 0, s.z, 0,
+		0, 0, 0, 1);
+}
+
+__device_inline Transform transform_scale(float x, float y, float z)
+{
+	return transform_scale(make_float3(x, y, z));
+}
+
+__device_inline Transform transform_perspective(float fov, float n, float f)
+{
+	Transform persp = make_transform(
+		1, 0, 0, 0,
+		0, 1, 0, 0,
+		0, 0, f / (f - n), -f*n / (f - n),
+		0, 0, 1, 0);
+
+	float inv_angle = 1.0f/tanf(0.5f*fov);
+
+	Transform scale = transform_scale(inv_angle, inv_angle, 1);
+
+	return scale * persp;
+}
+
+__device_inline Transform transform_rotate(float angle, float3 axis)
+{
+	float s = sinf(angle);
+	float c = cosf(angle);
+	float t = 1.f - c;
+
+	axis = normalize(axis);
+
+	return make_transform(
+		axis.x*axis.x*t + c,
+		axis.x*axis.y*t - s*axis.z,
+		axis.x*axis.z*t + s*axis.y,
+		0.0f,
+
+		axis.y*axis.x*t + s*axis.z,
+		axis.y*axis.y*t + c,
+		axis.y*axis.z*t - s*axis.x,
+		0.0f,
+
+		axis.z*axis.x*t - s*axis.y,
+		axis.z*axis.y*t + s*axis.x,
+		axis.z*axis.z*t + c,
+		0.0f,
+
+		0.0f, 0.0f, 0.0f, 1.0f);
+}
+
+__device_inline Transform transform_euler(float3 euler)
+{
+	return
+		transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f)) *
+		transform_rotate(euler.y, make_float3(1.0f, 0.0f, 0.0f)) *
+		transform_rotate(euler.z, make_float3(1.0f, 0.0f, 0.0f));
+}
+
+__device_inline Transform transform_orthographic(float znear, float zfar)
+{
+	return transform_scale(1.0f, 1.0f, 1.0f / (zfar-znear)) *
+		transform_translate(0.0f, 0.0f, -znear);
+}
+
+__device_inline Transform transform_identity()
+{
+	return transform_scale(1.0f, 1.0f, 1.0f);
+}
+
+__device_inline bool operator==(const Transform& A, const Transform& B)
+{
+	return memcmp(&A, &B, sizeof(Transform)) == 0;
+}
+
+__device_inline bool operator!=(const Transform& A, const Transform& B)
+{
+	return !(A == B);
+}
+
+__device_inline float3 transform_get_column(const Transform *t, int column)
+{
+	return make_float3(t->x[column], t->y[column], t->z[column]);
+}
+
+Transform transform_inverse(const Transform& a);
+
+#endif
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TRANSFORM_H__ */
+
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
new file mode 100644
index 00000000000..a0e352128d1
--- /dev/null
+++ b/intern/cycles/util/util_types.h
@@ -0,0 +1,266 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_TYPES_H__
+#define __UTIL_TYPES_H__
+
+#ifndef __KERNEL_OPENCL__
+
+#include <stdlib.h>
+
+#endif
+
+/* Qualifiers for kernel code shared by CPU and GPU */
+
+#ifndef __KERNEL_GPU__
+
+#define __device static inline
+#define __global
+#define __local
+#define __shared
+#define __constant
+#define __device_inline static inline __attribute__((always_inline))
+
+#endif
+
+/* SIMD Types */
+
+#ifndef __KERNEL_GPU__
+
+#include <emmintrin.h>
+#include <xmmintrin.h>
+
+#endif
+
+#ifndef _WIN32
+#ifndef __KERNEL_GPU__
+
+#include <stdint.h>
+
+#endif
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* Types
+ *
+ * Define simpler unsigned type names, and integer with defined number of bits.
+ * Also vector types, named to be compatible with OpenCL builtin types, while
+ * working for CUDA and C++ too. */
+
+/* Shorter Unsigned Names */
+
+typedef unsigned char uchar;
+typedef unsigned int uint;
+
+#ifndef __KERNEL_GPU__
+
+/* Fixed Bits Types */
+
+#ifdef _WIN32
+
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+
+typedef signed short int16_t;
+typedef unsigned short uint16_t;
+
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+
+typedef long long int64_t;
+typedef unsigned long long uint64_t;
+
+#endif
+
+/* Generic Memory Pointer */
+
+typedef uint64_t device_ptr;
+
+/* Vector Types */
+
+struct uchar2 {
+	uchar x, y;
+
+	uchar operator[](int i) const { return *(&x + i); }
+	uchar& operator[](int i) { return *(&x + i); }
+};
+
+struct uchar3 {
+	uchar x, y, z;
+
+	uchar operator[](int i) const { return *(&x + i); }
+	uchar& operator[](int i) { return *(&x + i); }
+};
+
+struct uchar4 {
+	uchar x, y, z, w;
+
+	uchar operator[](int i) const { return *(&x + i); }
+	uchar& operator[](int i) { return *(&x + i); }
+};
+
+struct int2 {
+	int x, y;
+
+	int operator[](int i) const { return *(&x + i); }
+	int& operator[](int i) { return *(&x + i); }
+};
+
+struct int3 {
+	int x, y, z;
+
+	int operator[](int i) const { return *(&x + i); }
+	int& operator[](int i) { return *(&x + i); }
+};
+
+struct int4 {
+	int x, y, z, w;
+
+	int operator[](int i) const { return *(&x + i); }
+	int& operator[](int i) { return *(&x + i); }
+};
+
+struct uint2 {
+	uint x, y;
+
+	uint operator[](int i) const { return *(&x + i); }
+	uint& operator[](int i) { return *(&x + i); }
+};
+
+struct uint3 {
+	uint x, y, z;
+
+	uint operator[](int i) const { return *(&x + i); }
+	uint& operator[](int i) { return *(&x + i); }
+};
+
+struct uint4 {
+	uint x, y, z, w;
+
+	uint operator[](int i) const { return *(&x + i); }
+	uint& operator[](int i) { return *(&x + i); }
+};
+
+struct float2 {
+	float x, y;
+
+	float operator[](int i) const { return *(&x + i); }
+	float& operator[](int i) { return *(&x + i); }
+};
+
+struct float3 {
+	float x, y, z;
+
+	float operator[](int i) const { return *(&x + i); }
+	float& operator[](int i) { return *(&x + i); }
+};
+
+struct float4 {
+	float x, y, z, w;
+
+	float operator[](int i) const { return *(&x + i); }
+	float& operator[](int i) { return *(&x + i); }
+};
+
+#endif
+
+#ifndef __KERNEL_GPU__
+
+/* Vector Type Constructors
+ * 
+ * OpenCL does not support C++ class, so we use these instead. */
+
+__device uchar2 make_uchar2(uchar x, uchar y)
+{
+	uchar2 a = {x, y};
+	return a;
+}
+
+__device uchar3 make_uchar3(uchar x, uchar y, uchar z)
+{
+	uchar3 a = {x, y, z};
+	return a;
+}
+
+__device uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w)
+{
+	uchar4 a = {x, y, z, w};
+	return a;
+}
+
+__device int2 make_int2(int x, int y)
+{
+	int2 a = {x, y};
+	return a;
+}
+
+__device int3 make_int3(int x, int y, int z)
+{
+	int3 a = {x, y, z};
+	return a;
+}
+
+__device int4 make_int4(int x, int y, int z, int w)
+{
+	int4 a = {x, y, z, w};
+	return a;
+}
+
+__device uint2 make_uint2(uint x, uint y)
+{
+	uint2 a = {x, y};
+	return a;
+}
+
+__device uint3 make_uint3(uint x, uint y, uint z)
+{
+	uint3 a = {x, y, z};
+	return a;
+}
+
+__device uint4 make_uint4(uint x, uint y, uint z, uint w)
+{
+	uint4 a = {x, y, z, w};
+	return a;
+}
+
+__device float2 make_float2(float x, float y)
+{
+	float2 a = {x, y};
+	return a;
+}
+
+__device float3 make_float3(float x, float y, float z)
+{
+	float3 a = {x, y, z};
+	return a;
+}
+
+__device float4 make_float4(float x, float y, float z, float w)
+{
+	float4 a = {x, y, z, w};
+	return a;
+}
+
+#endif
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_H__ */
+
diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h
new file mode 100644
index 00000000000..fb872936e3f
--- /dev/null
+++ b/intern/cycles/util/util_vector.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_VECTOR_H__
+#define __UTIL_VECTOR_H__
+
+/* Vector */
+
+#include <vector>
+
+CCL_NAMESPACE_BEGIN
+
+using std::vector;
+
+/* Array
+ *
+ * Simplified version of vector, serving two purposes:
+ * - somewhat faster in that it does not clear memory on resize/alloc,
+ *   this was actually showing up in profiles quite significantly
+ * - if this is used, we are not tempted to use inefficient operations */
+
+template<typename T>
+class array
+{
+public:
+	array()
+	{
+		data = NULL;
+		datasize = 0;
+	}
+
+	array(size_t newsize)
+	{
+		if(newsize == 0) {
+			data = NULL;
+			datasize = 0;
+		}
+		else {
+			data = new T[newsize];
+			datasize = newsize;
+		}
+	}
+
+	array(const array& from)
+	{
+		*this = from;
+	}
+
+	array& operator=(const array& from)
+	{
+		if(from.datasize == 0) {
+			data = NULL;
+			datasize = 0;
+		}
+		else {
+			data = new T[from.datasize];
+			memcpy(data, from.data, from.datasize*sizeof(T));
+			datasize = from.datasize;
+		}
+
+		return *this;
+	}
+
+	array& operator=(const vector<T>& from)
+	{
+		datasize = from.size();
+		data = NULL;
+
+		if(datasize > 0) {
+			data = new T[datasize];
+			memcpy(data, &from[0], datasize*sizeof(T));
+		}
+
+		return *this;
+	}
+
+	~array()
+	{
+		delete [] data;
+	}
+
+	void resize(size_t newsize)
+	{
+		if(newsize == 0) {
+			clear();
+		}
+		else {
+			T *newdata = new T[newsize];
+			memcpy(newdata, data, ((datasize < newsize)? datasize: newsize)*sizeof(T));
+			delete [] data;
+
+			data = newdata;
+			datasize = newsize;
+		}
+	}
+
+	void clear()
+	{
+		delete [] data;
+		data = NULL;
+		datasize = 0;
+	}
+
+	size_t size() const
+	{
+		return datasize;
+	}
+
+	T& operator[](size_t i) const
+	{
+		return data[i];
+	}
+
+protected:
+	T *data;
+	size_t datasize;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_VECTOR_H__ */
+
diff --git a/intern/cycles/util/util_view.cpp b/intern/cycles/util/util_view.cpp
new file mode 100644
index 00000000000..75f0b92e705
--- /dev/null
+++ b/intern/cycles/util/util_view.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "util_opengl.h"
+#include "util_time.h"
+#include "util_view.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* structs */
+
+struct View {
+	ViewInitFunc initf;
+	ViewExitFunc exitf;
+	ViewResizeFunc resize;
+	ViewDisplayFunc display;
+	ViewKeyboardFunc keyboard;
+
+	bool first_display;
+	bool redraw;
+
+	int width, height;
+} V;
+
+/* public */
+
+static void view_display_text(int x, int y, const char *text)
+{
+	const char *c;
+
+	glRasterPos3f(x, y, 0);
+
+	for(c=text; *c != '\0'; c++)
+		glutBitmapCharacter(GLUT_BITMAP_HELVETICA_10, *c);
+}
+
+void view_display_info(const char *info)
+{
+	const int height = 20;
+
+	glEnable(GL_BLEND);
+	glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+	glColor4f(0.1f, 0.1f, 0.1f, 0.8f);
+	glRectf(0.0f, V.height - height, V.width, V.height);
+	glDisable(GL_BLEND);
+
+	glColor3f(0.5f, 0.5f, 0.5f);
+
+	view_display_text(10, 7 + V.height - height, info);
+
+	glColor3f(1.0f, 1.0f, 1.0f);
+}
+
+static void view_display()
+{
+	if(V.first_display) {
+		if(V.initf) V.initf();
+		if(V.exitf) atexit(V.exitf);
+
+		V.first_display = false;
+	}
+
+	glClearColor(0.05f, 0.05f, 0.05f, 0.0f);
+	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+
+	glMatrixMode(GL_PROJECTION);
+	glLoadIdentity();
+	gluOrtho2D(0, V.width, 0, V.height);
+
+	glMatrixMode(GL_MODELVIEW);
+	glLoadIdentity();
+
+	glRasterPos3f(0, 0, 0);
+
+	if(V.display)
+		V.display();
+
+	glutSwapBuffers();
+}
+
+static void view_reshape(int width, int height)
+{
+	if(width <= 0 || height <= 0)
+		return;
+	
+	V.width = width;
+	V.height = height;
+
+	glViewport(0, 0, width, height);
+
+	glMatrixMode(GL_PROJECTION);
+	glLoadIdentity();
+
+	glMatrixMode(GL_MODELVIEW);
+	glLoadIdentity();
+
+	if(V.resize)
+		V.resize(width, height);
+}
+
+static void view_keyboard(unsigned char key, int x, int y)
+{
+	if(V.keyboard)
+		V.keyboard(key);
+
+	if(key == 'm')
+		printf("mouse %d %d\n", x, y);
+	if(key == 'q') {
+		if(V.exitf) V.exitf();
+		exit(0);
+	}
+}
+
+void view_idle()
+{
+	if(V.redraw) {
+		V.redraw = false;
+		glutPostRedisplay();
+	}
+
+	time_sleep(0.1f);
+}
+
+void view_main_loop(const char *title, int width, int height,
+	ViewInitFunc initf, ViewExitFunc exitf,
+	ViewResizeFunc resize, ViewDisplayFunc display,
+	ViewKeyboardFunc keyboard)
+{
+	const char *name = "app";
+	char *argv = (char*)name;
+	int argc = 1;
+
+	memset(&V, 0, sizeof(V));
+	V.width = width;
+	V.height = height;
+	V.first_display = true;
+	V.redraw = false;
+	V.initf = initf;
+	V.exitf = exitf;
+	V.resize = resize;
+	V.display = display;
+	V.keyboard = keyboard;
+
+	glutInit(&argc, &argv);
+	glutInitWindowSize(width, height);
+	glutInitWindowPosition(0, 0);
+	glutInitDisplayMode(GLUT_RGB|GLUT_DOUBLE|GLUT_DEPTH);
+	glutCreateWindow(title);
+
+#ifndef __APPLE__
+	glewInit();
+#endif
+
+	view_reshape(width, height);
+
+	glutDisplayFunc(view_display);
+	glutIdleFunc(view_idle);
+	glutReshapeFunc(view_reshape);
+	glutKeyboardFunc(view_keyboard);
+
+	glutMainLoop();
+}
+
+void view_redraw()
+{
+	V.redraw = true;
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/util/util_view.h b/intern/cycles/util/util_view.h
new file mode 100644
index 00000000000..c6805b5ce7c
--- /dev/null
+++ b/intern/cycles/util/util_view.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_VIEW_H__
+#define __UTIL_VIEW_H__
+
+/* Functions to display a simple OpenGL window using GLUT, simplified to the
+ * bare minimum we need to reduce boilerplate code in tests apps. */
+
+CCL_NAMESPACE_BEGIN
+
+typedef void (*ViewInitFunc)(void);
+typedef void (*ViewExitFunc)(void);
+typedef void (*ViewResizeFunc)(int width, int height);
+typedef void (*ViewDisplayFunc)(void);
+typedef void (*ViewKeyboardFunc)(unsigned char key);
+
+void view_main_loop(const char *title, int width, int height,
+	ViewInitFunc initf, ViewExitFunc exitf,
+	ViewResizeFunc resize, ViewDisplayFunc display,
+	ViewKeyboardFunc keyboard);
+
+void view_display_info(const char *info);
+void view_redraw();
+
+CCL_NAMESPACE_END
+
+#endif /*__UTIL_VIEW_H__*/
+
diff --git a/intern/cycles/util/util_xml.h b/intern/cycles/util/util_xml.h
new file mode 100644
index 00000000000..1e6874b7d77
--- /dev/null
+++ b/intern/cycles/util/util_xml.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __UTIL_XML_H__
+#define __UTIL_XML_H__
+
+/* PugiXML from OpenImageIO is used for XML parsing. */
+
+#include <OpenImageIO/pugixml.hpp>
+
+CCL_NAMESPACE_BEGIN
+
+OIIO_NAMESPACE_USING
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_XML_H__ */
+
author	Ton Roosendaal <ton@blender.org>	2011-04-27 15:58:34 +0400
committer	Ton Roosendaal <ton@blender.org>	2011-04-27 15:58:34 +0400
commit	da376e0237517543aa21740ee2363234ee1c20ae (patch)
tree	014a513ed8d0eccc5e54fef42347781e85bae56a /intern/cycles/util
parent	693780074388111e7b9ef1c3825e462f398dc6c4 (diff)