diff options
author | Antony Riakiotakis <kalast@gmail.com> | 2014-09-01 17:37:21 +0400 |
---|---|---|
committer | Antony Riakiotakis <kalast@gmail.com> | 2014-09-01 17:38:22 +0400 |
commit | f0653c3d3f1b24b45e6f1c5ac659bf130b58af3f (patch) | |
tree | 82721843c274829a1441ad7291809545955db90c /intern | |
parent | a8d2a6faf3beaf8363bd71c65c59e0ed1a1e7b5c (diff) | |
parent | 575cbf0172508eb9d846f1c62027a9c0f4fddd34 (diff) |
Merge branch 'master' into soc-2014-viewport_context
Also fix scons issues with SDL. There is still a linking problem
somewhere but i expect this is an issue in master as well.
Conflicts:
intern/cycles/SConscript
intern/ghost/CMakeLists.txt
intern/ghost/intern/GHOST_WindowX11.cpp
source/blender/nodes/CMakeLists.txt
Diffstat (limited to 'intern')
104 files changed, 901 insertions, 5195 deletions
diff --git a/intern/CMakeLists.txt b/intern/CMakeLists.txt index d0d0ef2fd1c..74048c2a4cc 100644 --- a/intern/CMakeLists.txt +++ b/intern/CMakeLists.txt @@ -73,10 +73,6 @@ if(WITH_BULLET) add_subdirectory(rigidbody) endif() -if(WITH_COMPOSITOR) - add_subdirectory(opencl) -endif() - if(WITH_OPENNL) add_subdirectory(opennl) endif() diff --git a/intern/SConscript b/intern/SConscript index 91f507b35fc..c0dafe37855 100644 --- a/intern/SConscript +++ b/intern/SConscript @@ -60,9 +60,6 @@ if env['WITH_BF_INTERNATIONAL']: if env['WITH_BF_BULLET']: SConscript (['rigidbody/SConscript']) -if env['WITH_BF_COMPOSITOR']: - SConscript (['opencl/SConscript']) - if env['OURPLATFORM'] in ('win32-vc', 'win32-mingw', 'win64-mingw', 'linuxcross', 'win64-vc'): SConscript(['utfconv/SConscript']) diff --git a/intern/cycles/SConscript b/intern/cycles/SConscript index cbc9ef6c1c6..2d7fbe9c593 100644 --- a/intern/cycles/SConscript +++ b/intern/cycles/SConscript @@ -62,9 +62,12 @@ if env['WITH_BF_CYCLES_OSL']: incs.extend('. bvh render device kernel kernel/osl kernel/svm util subd'.split()) incs.extend('#intern/guardedalloc #source/blender/makesrna #source/blender/makesdna #source/blender/blenlib'.split()) incs.extend('#source/blender/blenloader ../../source/blender/makesrna/intern'.split()) + incs.append(env['BF_GLEW_INC']) incs.append('#/intern/glew-mx') incs.append('#intern/mikktspace') +incs.extend('#extern/glew/include #extern/clew/include #extern/cuew/include #intern/mikktspace'.split()) + incs.append(cycles['BF_OIIO_INC']) incs.append(cycles['BF_BOOST_INC']) incs.append(cycles['BF_OPENEXR_INC'].split()) diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt index 96ac7619cbe..c8464899725 100644 --- a/intern/cycles/app/CMakeLists.txt +++ b/intern/cycles/app/CMakeLists.txt @@ -29,6 +29,8 @@ set(LIBRARIES ${JPEG_LIBRARIES} ${ZLIB_LIBRARIES} ${TIFF_LIBRARY} + extern_clew + extern_cuew ) add_definitions(${GL_DEFINITIONS}) diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp index 915ef96a517..6c001f8889b 100644 --- a/intern/cycles/app/cycles_xml.cpp +++ b/intern/cycles/app/cycles_xml.cpp @@ -329,6 +329,7 @@ static void xml_read_camera(const XMLReadState& state, pugi::xml_node node) xml_read_float(&cam->aperturesize, node, "aperturesize"); // 0.5*focallength/fstop xml_read_float(&cam->focaldistance, node, "focaldistance"); xml_read_float(&cam->shuttertime, node, "shuttertime"); + xml_read_float(&cam->aperture_ratio, node, "aperture_ratio"); if(xml_equal_string(node, "type", "orthographic")) cam->type = CAMERA_ORTHOGRAPHIC; diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt index ad553fb1d62..e97ad2e71f5 100644 --- a/intern/cycles/blender/CMakeLists.txt +++ b/intern/cycles/blender/CMakeLists.txt @@ -44,6 +44,7 @@ set(ADDON_FILES addon/presets.py addon/properties.py addon/ui.py + addon/version_update.py ) add_definitions(${GL_DEFINITIONS}) diff --git a/intern/cycles/blender/addon/__init__.py b/intern/cycles/blender/addon/__init__.py index 27d986900c8..d1d27df8dc3 100644 --- a/intern/cycles/blender/addon/__init__.py +++ b/intern/cycles/blender/addon/__init__.py @@ -31,7 +31,7 @@ bl_info = { import bpy from . import engine - +from . import version_update class CyclesRender(bpy.types.RenderEngine): bl_idname = 'CYCLES' @@ -100,12 +100,16 @@ def register(): presets.register() bpy.utils.register_module(__name__) + bpy.app.handlers.version_update.append(version_update.do_versions) + def unregister(): from . import ui from . import properties from . import presets + bpy.app.handlers.version_update.remove(version_update.do_versions) + ui.unregister() properties.unregister() presets.unregister() diff --git a/intern/cycles/blender/addon/presets.py b/intern/cycles/blender/addon/presets.py index 9991fdb8e3b..84be09a8ff4 100644 --- a/intern/cycles/blender/addon/presets.py +++ b/intern/cycles/blender/addon/presets.py @@ -37,6 +37,7 @@ class AddPresetIntegrator(AddPresetBase, Operator): "cycles.diffuse_bounces", "cycles.glossy_bounces", "cycles.transmission_bounces", + "cycles.volume_bounces", "cycles.transparent_min_bounces", "cycles.transparent_max_bounces" ] diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index b4a1b10f8b4..59e60a9eef1 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -544,6 +544,13 @@ class CyclesCameraSettings(bpy.types.PropertyGroup): subtype='ANGLE', default=0, ) + cls.aperture_ratio = FloatProperty( + name="Aperture Ratio", + description="Distortion to simulate anamorphic lens bokeh", + min=0.01, soft_min=1.0, soft_max=2.0, + default=1.0, + precision=4, + ) cls.panorama_type = EnumProperty( name="Panorama Type", description="Distortion to use for the calculation", diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 4a13f265d14..aab9f83d0ed 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -154,7 +154,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel): sub.prop(cscene, "subsurface_samples", text="Subsurface") sub.prop(cscene, "volume_samples", text="Volume") - if cscene.feature_set == 'EXPERIMENTAL' and use_cpu(context): + if use_cpu(context) or cscene.feature_set == 'EXPERIMENTAL': layout.row().prop(cscene, "sampling_pattern", text="Pattern") for rl in scene.render.layers: @@ -468,6 +468,7 @@ class CyclesCamera_PT_dof(CyclesButtonsPanel, Panel): sub = col.column(align=True) sub.prop(ccam, "aperture_blades", text="Blades") sub.prop(ccam, "aperture_rotation", text="Rotation") + sub.prop(ccam, "aperture_ratio", text="Ratio") class Cycles_PT_context_material(CyclesButtonsPanel, Panel): @@ -629,7 +630,8 @@ class CYCLES_OT_use_shading_nodes(Operator): @classmethod def poll(cls, context): - return context.material or context.world or context.lamp + return (getattr(context, "material", False) or getattr(context, "world", False) or + getattr(context, "lamp", False)) def execute(self, context): if context.material: @@ -928,7 +930,9 @@ class CyclesWorld_PT_settings(CyclesButtonsPanel, Panel): col = split.column() col.label(text="Volume:") - col.prop(cworld, "volume_sampling", text="") + sub = col.column() + sub.active = use_cpu(context) + sub.prop(cworld, "volume_sampling", text="") col.prop(cworld, "homogeneous_volume", text="Homogeneous") @@ -1030,7 +1034,9 @@ class CyclesMaterial_PT_settings(CyclesButtonsPanel, Panel): col = split.column() col.label(text="Volume:") - col.prop(cmat, "volume_sampling", text="") + sub = col.column() + sub.active = use_cpu(context) + sub.prop(cmat, "volume_sampling", text="") col.prop(cmat, "homogeneous_volume", text="Homogeneous") diff --git a/intern/cycles/blender/addon/version_update.py b/intern/cycles/blender/addon/version_update.py new file mode 100644 index 00000000000..b0b4e1d24dd --- /dev/null +++ b/intern/cycles/blender/addon/version_update.py @@ -0,0 +1,35 @@ +# +# Copyright 2011-2014 Blender Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License +# + +# <pep8 compliant> + +import bpy + +from bpy.app.handlers import persistent + + +@persistent +def do_versions(self): + # We don't modify startup file because it assumes to + # have all the default values only. + if not bpy.data.is_saved: + return + + if bpy.data.version <= (2, 71, 0): + for scene in bpy.data.scenes: + cscene = scene.cycles + if not cscene.is_property_set("volume_bounces"): + cscene.volume_bounces = 1 diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp index 1a85561c6d5..ce8c64c4819 100644 --- a/intern/cycles/blender/blender_camera.cpp +++ b/intern/cycles/blender/blender_camera.cpp @@ -46,6 +46,8 @@ struct BlenderCamera { float2 pixelaspect; + float aperture_ratio; + PanoramaType panorama_type; float fisheye_fov; float fisheye_lens; @@ -167,6 +169,7 @@ static void blender_camera_from_object(BlenderCamera *bcam, BL::Object b_ob, boo bcam->apertureblades = RNA_int_get(&ccamera, "aperture_blades"); bcam->aperturerotation = RNA_float_get(&ccamera, "aperture_rotation"); bcam->focaldistance = blender_camera_focal_distance(b_ob, b_camera); + bcam->aperture_ratio = RNA_float_get(&ccamera, "aperture_ratio"); bcam->shift.x = b_camera.shift_x(); bcam->shift.y = b_camera.shift_y(); @@ -328,6 +331,9 @@ static void blender_camera_sync(Camera *cam, BlenderCamera *bcam, int width, int cam->fisheye_fov = bcam->fisheye_fov; cam->fisheye_lens = bcam->fisheye_lens; + /* anamorphic lens bokeh */ + cam->aperture_ratio = bcam->aperture_ratio; + /* perspective */ cam->fov = 2.0f * atanf((0.5f * sensor_size) / bcam->lens / aspectratio); cam->focaldistance = bcam->focaldistance; diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp index 7b1a8ec0b15..8cfaea59a06 100644 --- a/intern/cycles/blender/blender_curves.cpp +++ b/intern/cycles/blender/blender_curves.cpp @@ -15,10 +15,11 @@ */ #include "attribute.h" +#include "camera.h" +#include "curves.h" #include "mesh.h" #include "object.h" #include "scene.h" -#include "curves.h" #include "blender_sync.h" #include "blender_util.h" @@ -39,7 +40,8 @@ bool ObtainCacheParticleUV(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Parti bool ObtainCacheParticleVcol(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int vcol_num); bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background); void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData); -void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData, float3 RotCam); +void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData, + float3 RotCam, bool is_ortho); void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resolution); void ExportCurveTriangleUV(Mesh *mesh, ParticleCurveData *CData, int vert_offset, int resol, float3 *uvdata); void ExportCurveTriangleVcol(Mesh *mesh, ParticleCurveData *CData, int vert_offset, int resol, uchar4 *cdata); @@ -328,7 +330,8 @@ static void set_resolution(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, BL::S } } -void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData, float3 RotCam) +void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData, + float3 RotCam, bool is_ortho) { int vertexno = mesh->verts.size(); int vertexindex = vertexno; @@ -362,7 +365,10 @@ void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData, float3 RotC float3 ickey_loc = CData->curvekey_co[CData->curve_firstkey[curve]]; float radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], 0.0f); v1 = CData->curvekey_co[CData->curve_firstkey[curve] + 1] - CData->curvekey_co[CData->curve_firstkey[curve]]; - xbasis = normalize(cross(RotCam - ickey_loc,v1)); + if(is_ortho) + xbasis = normalize(cross(RotCam, v1)); + else + xbasis = normalize(cross(RotCam - ickey_loc, v1)); float3 ickey_loc_shfl = ickey_loc - radius * xbasis; float3 ickey_loc_shfr = ickey_loc + radius * xbasis; mesh->verts.push_back(ickey_loc_shfl); @@ -386,7 +392,10 @@ void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData, float3 RotC if(CData->psys_closetip[sys] && (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)) radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], 0.0f, 0.95f); - xbasis = normalize(cross(RotCam - ickey_loc,v1)); + if(is_ortho) + xbasis = normalize(cross(RotCam, v1)); + else + xbasis = normalize(cross(RotCam - ickey_loc, v1)); float3 ickey_loc_shfl = ickey_loc - radius * xbasis; float3 ickey_loc_shfr = ickey_loc + radius * xbasis; mesh->verts.push_back(ickey_loc_shfl); @@ -858,20 +867,26 @@ void BlenderSync::sync_curves(Mesh *mesh, BL::Mesh b_mesh, BL::Object b_ob, bool ObtainCacheParticleData(mesh, &b_mesh, &b_ob, &CData, !preview); - /* obtain camera parameters */ - BL::Object b_CamOb = b_scene.camera(); - float3 RotCam = make_float3(0.0f, 0.0f, 0.0f); - if(b_CamOb) { - Transform ctfm = get_transform(b_CamOb.matrix_world()); - Transform tfm = get_transform(b_ob.matrix_world()); - Transform itfm = transform_quick_inverse(tfm); - RotCam = transform_point(&itfm, make_float3(ctfm.x.w, ctfm.y.w, ctfm.z.w)); - } - /* add hair geometry to mesh */ if(primitive == CURVE_TRIANGLES) { - if(triangle_method == CURVE_CAMERA_TRIANGLES) - ExportCurveTrianglePlanes(mesh, &CData, RotCam); + if(triangle_method == CURVE_CAMERA_TRIANGLES) { + /* obtain camera parameters */ + float3 RotCam; + Camera *camera = scene->camera; + Transform &ctfm = camera->matrix; + if(camera->type == CAMERA_ORTHOGRAPHIC) { + RotCam = -make_float3(ctfm.x.z, ctfm.y.z, ctfm.z.z); + } + else { + Transform tfm = get_transform(b_ob.matrix_world()); + Transform itfm = transform_quick_inverse(tfm); + RotCam = transform_point(&itfm, make_float3(ctfm.x.w, + ctfm.y.w, + ctfm.z.w)); + } + bool is_ortho = camera->type == CAMERA_ORTHOGRAPHIC; + ExportCurveTrianglePlanes(mesh, &CData, RotCam, is_ortho); + } else { ExportCurveTriangleGeometry(mesh, &CData, resolution); used_res = resolution; diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp index f07f0fde888..a5e4b7bd2ae 100644 --- a/intern/cycles/blender/blender_mesh.cpp +++ b/intern/cycles/blender/blender_mesh.cpp @@ -525,15 +525,16 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri } /* test if we need to sync */ + bool use_mesh_geometry = render_layer.use_surfaces || render_layer.use_hair; Mesh *mesh; if(!mesh_map.sync(&mesh, key)) { - /* if transform was applied to mesh, need full update */ if(object_updated && mesh->transform_applied); /* test if shaders changed, these can be object level so mesh * does not get tagged for recalc */ else if(mesh->used_shaders != used_shaders); + else if(use_mesh_geometry != mesh->geometry_synced); else { /* even if not tagged for recalc, we may need to sync anyway * because the shader needs different mesh attributes */ @@ -560,14 +561,14 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri vector<Mesh::Triangle> oldtriangle = mesh->triangles; /* compares curve_keys rather than strands in order to handle quick hair - * adjustsments in dynamic BVH - other methods could probably do this better*/ + * adjustments in dynamic BVH - other methods could probably do this better*/ vector<float4> oldcurve_keys = mesh->curve_keys; mesh->clear(); mesh->used_shaders = used_shaders; mesh->name = ustring(b_ob_data.name().c_str()); - if(render_layer.use_surfaces || render_layer.use_hair) { + if(use_mesh_geometry) { /* mesh objects does have special handle in the dependency graph, * they're ensured to have properly updated. * @@ -596,6 +597,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri /* free derived mesh */ b_data.meshes.remove(b_mesh); } + mesh->geometry_synced = true; } /* displacement method */ diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp index e82eb67aae5..b756d6acdb2 100644 --- a/intern/cycles/blender/blender_python.cpp +++ b/intern/cycles/blender/blender_python.cpp @@ -363,7 +363,12 @@ static PyObject *osl_update_node_func(PyObject *self, PyObject *args) /* find socket socket */ BL::NodeSocket b_sock(PointerRNA_NULL); if (param->isoutput) { +#if OSL_LIBRARY_VERSION_CODE < 10500 b_sock = b_node.outputs[param->name]; +#else + b_sock = b_node.outputs[param->name.string()]; +#endif + /* remove if type no longer matches */ if(b_sock && b_sock.bl_idname() != socket_type) { @@ -372,7 +377,11 @@ static PyObject *osl_update_node_func(PyObject *self, PyObject *args) } } else { +#if OSL_LIBRARY_VERSION_CODE < 10500 b_sock = b_node.inputs[param->name]; +#else + b_sock = b_node.inputs[param->name.string()]; +#endif /* remove if type no longer matches */ if(b_sock && b_sock.bl_idname() != socket_type) { diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 7e104d5bb41..4ff3d89f9f1 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -116,8 +116,8 @@ void BlenderSession::create_session() if(b_v3d) { if(session_pause == false) { /* full data sync */ - sync->sync_data(b_v3d, b_engine.camera_override(), &python_thread_state); sync->sync_view(b_v3d, b_rv3d, width, height); + sync->sync_data(b_v3d, b_engine.camera_override(), &python_thread_state); } } else { @@ -529,6 +529,7 @@ void BlenderSession::bake(BL::Object b_object, const string& pass_type, BL::Bake SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background); BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_scene, b_v3d, b_rv3d, scene->camera, width, height); + scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y()); scene->bake_manager->set_baking(true); /* set number of samples */ diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 19898bfa573..a5d6bdf1fa1 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -184,6 +184,7 @@ void BlenderSync::sync_integrator() integrator->filter_glossy = get_float(cscene, "blur_glossy"); integrator->seed = get_int(cscene, "seed"); + integrator->sampling_pattern = (SamplingPattern)RNA_enum_get(&cscene, "sampling_pattern"); integrator->layer_flag = render_layer.layer; @@ -231,10 +232,6 @@ void BlenderSync::sync_integrator() integrator->subsurface_samples = subsurface_samples; integrator->volume_samples = volume_samples; } - - - if(experimental) - integrator->sampling_pattern = (SamplingPattern)RNA_enum_get(&cscene, "sampling_pattern"); if(integrator->modified(previntegrator)) integrator->tag_update(scene); diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp index 3c0c5c021c8..15bd814b8d5 100644 --- a/intern/cycles/bvh/bvh.cpp +++ b/intern/cycles/bvh/bvh.cpp @@ -103,18 +103,30 @@ bool BVH::cache_read(CacheData& key) if(Cache::global.lookup(key, value)) { cache_filename = key.get_filename(); - value.read(pack.root_index); - value.read(pack.SAH); - - value.read(pack.nodes); - value.read(pack.object_node); - value.read(pack.tri_woop); - value.read(pack.prim_type); - value.read(pack.prim_visibility); - value.read(pack.prim_index); - value.read(pack.prim_object); - value.read(pack.is_leaf); - + if(!(value.read(pack.root_index) && + value.read(pack.SAH) && + value.read(pack.nodes) && + value.read(pack.object_node) && + value.read(pack.tri_woop) && + value.read(pack.prim_type) && + value.read(pack.prim_visibility) && + value.read(pack.prim_index) && + value.read(pack.prim_object) && + value.read(pack.is_leaf))) + { + /* Clear the pack if load failed. */ + pack.root_index = 0; + pack.SAH = 0.0f; + pack.nodes.clear(); + pack.object_node.clear(); + pack.tri_woop.clear(); + pack.prim_type.clear(); + pack.prim_visibility.clear(); + pack.prim_index.clear(); + pack.prim_object.clear(); + pack.is_leaf.clear(); + return false; + } return true; } diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt index 318e4467e00..998b35351e3 100644 --- a/intern/cycles/device/CMakeLists.txt +++ b/intern/cycles/device/CMakeLists.txt @@ -11,6 +11,8 @@ set(INC set(INC_SYS ${GLEW_INCLUDE_PATH} + ../../../extern/cuew/include + ../../../extern/clew/include ) set(SRC diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index fa1f0acadde..efdfa98cfb5 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -20,12 +20,13 @@ #include "device.h" #include "device_intern.h" -#include "util_cuda.h" +#include "cuew.h" +#include "clew.h" + #include "util_debug.h" #include "util_foreach.h" #include "util_half.h" #include "util_math.h" -#include "util_opencl.h" #include "util_opengl.h" #include "util_time.h" #include "util_types.h" @@ -141,7 +142,7 @@ Device *Device::create(DeviceInfo& info, Stats &stats, bool background) break; #ifdef WITH_CUDA case DEVICE_CUDA: - if(cuLibraryInit()) + if(device_cuda_init()) device = device_cuda_create(info, stats, background); else device = NULL; @@ -159,7 +160,7 @@ Device *Device::create(DeviceInfo& info, Stats &stats, bool background) #endif #ifdef WITH_OPENCL case DEVICE_OPENCL: - if(clLibraryInit()) + if(device_opencl_init()) device = device_opencl_create(info, stats, background); else device = NULL; @@ -213,12 +214,12 @@ vector<DeviceType>& Device::available_types() types.push_back(DEVICE_CPU); #ifdef WITH_CUDA - if(cuLibraryInit()) + if(device_cuda_init()) types.push_back(DEVICE_CUDA); #endif #ifdef WITH_OPENCL - if(clLibraryInit()) + if(device_opencl_init()) types.push_back(DEVICE_OPENCL); #endif @@ -242,12 +243,12 @@ vector<DeviceInfo>& Device::available_devices() if(!devices_init) { #ifdef WITH_CUDA - if(cuLibraryInit()) + if(device_cuda_init()) device_cuda_info(devices); #endif #ifdef WITH_OPENCL - if(clLibraryInit()) + if(device_opencl_init()) device_opencl_info(devices); #endif diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 4fdeef6bdcb..fd5ae1d7828 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -435,7 +435,8 @@ public: if(system_cpu_support_avx2()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_avx2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_avx2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -449,7 +450,8 @@ public: if(system_cpu_support_avx()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -463,7 +465,8 @@ public: if(system_cpu_support_sse41()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -477,7 +480,8 @@ public: if(system_cpu_support_sse3()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -491,7 +495,8 @@ public: if(system_cpu_support_sse2()) { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; @@ -504,7 +509,8 @@ public: { for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); + kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index f0f32f87eed..1ed26717f4b 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -23,7 +23,7 @@ #include "buffers.h" -#include "util_cuda.h" +#include "cuew.h" #include "util_debug.h" #include "util_map.h" #include "util_opengl.h" @@ -61,65 +61,10 @@ public: return (CUdeviceptr)mem; } - static const char *cuda_error_string(CUresult result) + static bool have_precompiled_kernels() { - switch(result) { - case CUDA_SUCCESS: return "No errors"; - case CUDA_ERROR_INVALID_VALUE: return "Invalid value"; - case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory"; - case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized"; - case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized"; - - case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available"; - case CUDA_ERROR_INVALID_DEVICE: return "Invalid device"; - - case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image"; - case CUDA_ERROR_INVALID_CONTEXT: return "Invalid context"; - case CUDA_ERROR_MAP_FAILED: return "Map failed"; - case CUDA_ERROR_UNMAP_FAILED: return "Unmap failed"; - case CUDA_ERROR_ARRAY_IS_MAPPED: return "Array is mapped"; - case CUDA_ERROR_ALREADY_MAPPED: return "Already mapped"; - case CUDA_ERROR_NO_BINARY_FOR_GPU: return "No binary for GPU"; - case CUDA_ERROR_ALREADY_ACQUIRED: return "Already acquired"; - case CUDA_ERROR_NOT_MAPPED: return "Not mapped"; - case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "Mapped resource not available for access as an array"; - case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer"; - case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected"; - case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device"; - case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "Context already in use"; - case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "Peer access unsupported"; - case CUDA_ERROR_INVALID_PTX: return "Invalid PTX code"; - - case CUDA_ERROR_INVALID_SOURCE: return "Invalid source"; - case CUDA_ERROR_FILE_NOT_FOUND: return "File not found"; - case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve"; - case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed"; - case CUDA_ERROR_OPERATING_SYSTEM: return "OS call failed"; - - case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle"; - - case CUDA_ERROR_NOT_FOUND: return "Not found"; - - case CUDA_ERROR_NOT_READY: return "CUDA not ready"; - - case CUDA_ERROR_ILLEGAL_ADDRESS: return "Illegal address"; - case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources"; - case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded time out"; - case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing"; - case CUDA_ERROR_HARDWARE_STACK_ERROR: return "Stack error"; - case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "Illegal instruction"; - case CUDA_ERROR_MISALIGNED_ADDRESS: return "Misaligned address"; - case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "Invalid address space"; - case CUDA_ERROR_INVALID_PC: return "Invalid program counter"; - case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed"; - - case CUDA_ERROR_NOT_PERMITTED: return "Operation not permitted"; - case CUDA_ERROR_NOT_SUPPORTED: return "Operation not supported"; - - case CUDA_ERROR_UNKNOWN: return "Unknown error"; - - default: return "Unknown CUDA error value"; - } + string cubins_path = path_get("lib"); + return path_exists(cubins_path); } /*#ifdef NDEBUG @@ -141,7 +86,7 @@ public: CUresult result = stmt; \ \ if(result != CUDA_SUCCESS) { \ - string message = string_printf("CUDA error: %s in %s", cuda_error_string(result), #stmt); \ + string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \ if(error_msg == "") \ error_msg = message; \ fprintf(stderr, "%s\n", message.c_str()); \ @@ -155,7 +100,7 @@ public: if(result == CUDA_SUCCESS) return false; - string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuda_error_string(result)); + string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuewErrorString(result)); if(error_msg == "") error_msg = message; fprintf(stderr, "%s\n", message.c_str()); @@ -252,14 +197,18 @@ public: return true; } - string compile_kernel() + string compile_kernel(bool experimental) { /* compute cubin name */ int major, minor; cuDeviceComputeCapability(&major, &minor, cuDevId); /* attempt to use kernel provided with blender */ - string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor)); + string cubin; + if(experimental) + cubin = path_get(string_printf("lib/kernel_experimental_sm_%d%d.cubin", major, minor)); + else + cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor)); if(path_exists(cubin)) return cubin; @@ -267,7 +216,10 @@ public: string kernel_path = path_get("kernel"); string md5 = path_files_md5_hash(kernel_path); - cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str()); + if(experimental) + cubin = string_printf("cycles_kernel_experimental_sm%d%d_%s.cubin", major, minor, md5.c_str()); + else + cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str()); cubin = path_user_get(path_join("cache", cubin)); /* if exists already, use it */ @@ -275,7 +227,7 @@ public: return cubin; #ifdef _WIN32 - if(cuHavePrecompiledKernels()) { + if(have_precompiled_kernels()) { if(major < 2) cuda_error_message(string_printf("CUDA device requires compute capability 2.0 or up, found %d.%d. Your GPU is not supported.", major, minor)); else @@ -285,25 +237,25 @@ public: #endif /* if not, find CUDA compiler */ - string nvcc = cuCompilerPath(); + const char *nvcc = cuewCompilerPath(); - if(nvcc == "") { + if(nvcc == NULL) { cuda_error_message("CUDA nvcc compiler not found. Install CUDA toolkit in default location."); return ""; } - int cuda_version = cuCompilerVersion(); + int cuda_version = cuewCompilerVersion(); if(cuda_version == 0) { cuda_error_message("CUDA nvcc compiler version could not be parsed."); return ""; } - if(cuda_version < 50) { - printf("Unsupported CUDA version %d.%d detected, you need CUDA 6.0.\n", cuda_version/10, cuda_version%10); + if(cuda_version < 60) { + printf("Unsupported CUDA version %d.%d detected, you need CUDA 6.5.\n", cuda_version/10, cuda_version%10); return ""; } - else if(cuda_version != 60) - printf("CUDA version %d.%d detected, build may succeed but only CUDA 6.0 is officially supported.\n", cuda_version/10, cuda_version%10); + else if(cuda_version != 65) + printf("CUDA version %d.%d detected, build may succeed but only CUDA 6.5 is officially supported.\n", cuda_version/10, cuda_version%10); /* compile */ string kernel = path_join(kernel_path, "kernel.cu"); @@ -317,7 +269,10 @@ public: string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" " "-o \"%s\" --ptxas-options=\"-v\" -I\"%s\" -DNVCC -D__KERNEL_CUDA_VERSION__=%d", - nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version); + nvcc, major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version); + + if(experimental) + command += " -D__KERNEL_CUDA_EXPERIMENTAL__"; printf("%s\n", command.c_str()); @@ -348,7 +303,7 @@ public: return false; /* get kernel */ - string cubin = compile_kernel(); + string cubin = compile_kernel(experimental); if(cubin == "") return false; @@ -731,6 +686,7 @@ public: const int shader_chunk_size = 65536; const int start = task.shader_x; const int end = task.shader_x + task.shader_w; + int offset = task.offset; bool canceled = false; for(int sample = 0; sample < task.num_samples && !canceled; sample++) { @@ -743,6 +699,7 @@ public: &task.shader_eval_type, &shader_x, &shader_w, + &offset, &sample}; /* launch kernel */ @@ -1050,6 +1007,28 @@ public: } }; +bool device_cuda_init(void) +{ + static bool initialized = false; + static bool result = false; + + if (initialized) + return result; + + initialized = true; + + if (cuewInit() == CUEW_SUCCESS) { + if(CUDADevice::have_precompiled_kernels()) + result = true; +#ifndef _WIN32 + else if(cuewCompilerPath() != NULL) + result = true; +#endif + } + + return result; +} + Device *device_cuda_create(DeviceInfo& info, Stats &stats, bool background) { return new CUDADevice(info, stats, background); @@ -1063,13 +1042,13 @@ void device_cuda_info(vector<DeviceInfo>& devices) result = cuInit(0); if(result != CUDA_SUCCESS) { if(result != CUDA_ERROR_NO_DEVICE) - fprintf(stderr, "CUDA cuInit: %s\n", CUDADevice::cuda_error_string(result)); + fprintf(stderr, "CUDA cuInit: %s\n", cuewErrorString(result)); return; } result = cuDeviceGetCount(&count); if(result != CUDA_SUCCESS) { - fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", CUDADevice::cuda_error_string(result)); + fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", cuewErrorString(result)); return; } diff --git a/intern/cycles/device/device_intern.h b/intern/cycles/device/device_intern.h index 7eb66c25a81..80f1e2441a5 100644 --- a/intern/cycles/device/device_intern.h +++ b/intern/cycles/device/device_intern.h @@ -22,7 +22,9 @@ CCL_NAMESPACE_BEGIN class Device; Device *device_cpu_create(DeviceInfo& info, Stats &stats, bool background); +bool device_opencl_init(void); Device *device_opencl_create(DeviceInfo& info, Stats &stats, bool background); +bool device_cuda_init(void); Device *device_cuda_create(DeviceInfo& info, Stats &stats, bool background); Device *device_network_create(DeviceInfo& info, Stats &stats, const char *address); Device *device_multi_create(DeviceInfo& info, Stats &stats, bool background); diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 564fbdbadf8..7f055c79491 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -280,7 +280,18 @@ public: int get_split_task_count(DeviceTask& task) { - return 1; + int total_tasks = 0; + list<DeviceTask> tasks; + task.split(tasks, devices.size()); + foreach(SubDevice& sub, devices) { + if(!tasks.empty()) { + DeviceTask subtask = tasks.front(); + tasks.pop_front(); + + total_tasks += sub.device->get_split_task_count(subtask); + } + } + return total_tasks; } void task_add(DeviceTask& task) diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 3abda6a54c1..82419cd62b1 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -25,11 +25,12 @@ #include "buffers.h" +#include "clew.h" + #include "util_foreach.h" #include "util_map.h" #include "util_math.h" #include "util_md5.h" -#include "util_opencl.h" #include "util_opengl.h" #include "util_path.h" #include "util_time.h" @@ -334,63 +335,10 @@ public: bool device_initialized; string platform_name; - const char *opencl_error_string(cl_int err) - { - switch (err) { - case CL_SUCCESS: return "Success!"; - case CL_DEVICE_NOT_FOUND: return "Device not found."; - case CL_DEVICE_NOT_AVAILABLE: return "Device not available"; - case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available"; - case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory object allocation failure"; - case CL_OUT_OF_RESOURCES: return "Out of resources"; - case CL_OUT_OF_HOST_MEMORY: return "Out of host memory"; - case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling information not available"; - case CL_MEM_COPY_OVERLAP: return "Memory copy overlap"; - case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch"; - case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported"; - case CL_BUILD_PROGRAM_FAILURE: return "Program build failure"; - case CL_MAP_FAILURE: return "Map failure"; - case CL_INVALID_VALUE: return "Invalid value"; - case CL_INVALID_DEVICE_TYPE: return "Invalid device type"; - case CL_INVALID_PLATFORM: return "Invalid platform"; - case CL_INVALID_DEVICE: return "Invalid device"; - case CL_INVALID_CONTEXT: return "Invalid context"; - case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties"; - case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue"; - case CL_INVALID_HOST_PTR: return "Invalid host pointer"; - case CL_INVALID_MEM_OBJECT: return "Invalid memory object"; - case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid image format descriptor"; - case CL_INVALID_IMAGE_SIZE: return "Invalid image size"; - case CL_INVALID_SAMPLER: return "Invalid sampler"; - case CL_INVALID_BINARY: return "Invalid binary"; - case CL_INVALID_BUILD_OPTIONS: return "Invalid build options"; - case CL_INVALID_PROGRAM: return "Invalid program"; - case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable"; - case CL_INVALID_KERNEL_NAME: return "Invalid kernel name"; - case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition"; - case CL_INVALID_KERNEL: return "Invalid kernel"; - case CL_INVALID_ARG_INDEX: return "Invalid argument index"; - case CL_INVALID_ARG_VALUE: return "Invalid argument value"; - case CL_INVALID_ARG_SIZE: return "Invalid argument size"; - case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments"; - case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension"; - case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size"; - case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size"; - case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset"; - case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list"; - case CL_INVALID_EVENT: return "Invalid event"; - case CL_INVALID_OPERATION: return "Invalid operation"; - case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object"; - case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size"; - case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level"; - default: return "Unknown"; - } - } - bool opencl_error(cl_int err) { if(err != CL_SUCCESS) { - string message = string_printf("OpenCL error (%d): %s", err, opencl_error_string(err)); + string message = string_printf("OpenCL error (%d): %s", err, clewErrorString(err)); if(error_msg == "") error_msg = message; fprintf(stderr, "%s\n", message.c_str()); @@ -412,7 +360,7 @@ public: cl_int err = stmt; \ \ if(err != CL_SUCCESS) { \ - string message = string_printf("OpenCL error: %s in %s", opencl_error_string(err), #stmt); \ + string message = string_printf("OpenCL error: %s in %s", clewErrorString(err), #stmt); \ if(error_msg == "") \ error_msg = message; \ fprintf(stderr, "%s\n", message.c_str()); \ @@ -422,7 +370,7 @@ public: void opencl_assert_err(cl_int err, const char* where) { if(err != CL_SUCCESS) { - string message = string_printf("OpenCL error (%d): %s in %s", err, opencl_error_string(err), where); + string message = string_printf("OpenCL error (%d): %s in %s", err, clewErrorString(err), where); if(error_msg == "") error_msg = message; fprintf(stderr, "%s\n", message.c_str()); @@ -552,7 +500,7 @@ public: device_initialized = true; } - static void context_notify_callback(const char *err_info, + static void CL_CALLBACK context_notify_callback(const char *err_info, const void *private_info, size_t cb, void *user_data) { char name[256]; @@ -1056,6 +1004,7 @@ public: cl_int d_shader_eval_type = task.shader_eval_type; cl_int d_shader_x = task.shader_x; cl_int d_shader_w = task.shader_w; + cl_int d_offset = task.offset; /* sample arguments */ cl_uint narg = 0; @@ -1085,6 +1034,7 @@ public: opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type)); opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x)); opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w)); + opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_offset), (void*)&d_offset)); opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_sample), (void*)&d_sample)); enqueue_kernel(kernel, task.shader_w, 1); @@ -1162,6 +1112,26 @@ Device *device_opencl_create(DeviceInfo& info, Stats &stats, bool background) return new OpenCLDevice(info, stats, background); } +bool device_opencl_init(void) { + static bool initialized = false; + static bool result = false; + + if (initialized) + return result; + + initialized = true; + + // OpenCL disabled for now, only works with this environment variable set + if(!getenv("CYCLES_OPENCL_TEST")) { + result = false; + } + else { + result = clewInit() == CLEW_SUCCESS; + } + + return result; +} + void device_opencl_info(vector<DeviceInfo>& devices) { vector<cl_device_id> device_ids; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 2ff6b53b0a5..8857f86890c 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -149,48 +149,61 @@ if(WITH_CYCLES_CUDA_BINARIES) set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}") # warn for other versions - if(CUDA_VERSION MATCHES "60") + if(CUDA_VERSION MATCHES "65") else() message(WARNING "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, " - "build may succeed but only CUDA 6.0 is officially supported") + "build may succeed but only CUDA 6.5 is officially supported") endif() # build for each arch set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS}) set(cuda_cubins) - foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) - set(cuda_cubin kernel_${arch}.cubin) + macro(CYCLES_CUDA_KERNEL_ADD arch experimental) + if(${experimental}) + set(cuda_extra_flags "-D__KERNEL_CUDA_EXPERIMENTAL__") + set(cuda_cubin kernel_experimental_${arch}.cubin) + else() + set(cuda_extra_flags "") + set(cuda_cubin kernel_${arch}.cubin) + endif() set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}") set(cuda_math_flags "--use_fast_math") - if(CUDA_VERSION LESS 60 AND ${arch} MATCHES "sm_50") - message(WARNING "Can't build kernel for CUDA sm_50 architecture, skipping") - else() - add_custom_command( - OUTPUT ${cuda_cubin} - COMMAND ${CUDA_NVCC_EXECUTABLE} - -arch=${arch} - -m${CUDA_BITS} - --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu - -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} - --ptxas-options="-v" - ${cuda_arch_flags} - ${cuda_version_flags} - ${cuda_math_flags} - -I${CMAKE_CURRENT_SOURCE_DIR}/../util - -I${CMAKE_CURRENT_SOURCE_DIR}/svm - -DCCL_NAMESPACE_BEGIN= - -DCCL_NAMESPACE_END= - -DNVCC - - DEPENDS ${cuda_sources}) - - delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) - list(APPEND cuda_cubins ${cuda_cubin}) - endif() + add_custom_command( + OUTPUT ${cuda_cubin} + COMMAND ${CUDA_NVCC_EXECUTABLE} + -arch=${arch} + -m${CUDA_BITS} + --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu + -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} + --ptxas-options="-v" + ${cuda_arch_flags} + ${cuda_version_flags} + ${cuda_math_flags} + ${cuda_extra_flags} + -I${CMAKE_CURRENT_SOURCE_DIR}/../util + -I${CMAKE_CURRENT_SOURCE_DIR}/svm + -DCCL_NAMESPACE_BEGIN= + -DCCL_NAMESPACE_END= + -DNVCC + + DEPENDS ${cuda_sources}) + + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) + list(APPEND cuda_cubins ${cuda_cubin}) + + unset(cuda_extra_flags) + endmacro() + + foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) + # Compile regular kernel + CYCLES_CUDA_KERNEL_ADD(${arch} FALSE) + + # Compile experimental kernel + CYCLES_CUDA_KERNEL_ADD(${arch} TRUE) endforeach() add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins}) diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript index cfe12e8533d..5a9e57c5342 100644 --- a/intern/cycles/kernel/SConscript +++ b/intern/cycles/kernel/SConscript @@ -69,8 +69,8 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: cuda_major_minor = re.findall(r'release (\d+).(\d+)', output)[0] cuda_version = int(cuda_major_minor[0])*10 + int(cuda_major_minor[1]) - if cuda_version != 60: - print("CUDA version %d.%d detected, build may succeed but only CUDA 6.0 is officially supported." % (cuda_version/10, cuda_version%10)) + if cuda_version != 65: + print("CUDA version %d.%d detected, build may succeed but only CUDA 6.5 is officially supported." % (cuda_version/10, cuda_version%10)) # nvcc flags nvcc_flags = "-m%s" % (bits) @@ -83,30 +83,35 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h') last_cubin_file = None + configs = (("kernel_%s.cubin", ''), + ("kernel_experimental_%s.cubin", ' -D__KERNEL_CUDA_EXPERIMENTAL__')) + # add command for each cuda architecture for arch in cuda_archs: - if cuda_version < 60 and arch == "sm_50": - print("Can't build kernel for CUDA sm_50 architecture, skipping") - continue - - cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch) - - if env['BF_CYCLES_CUDA_ENV']: - MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd" - command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, nvcc_flags, kernel_file, cubin_file) - else: - command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, kernel_file, cubin_file) - - kernel.Command(cubin_file, 'kernel.cu', command) - kernel.Depends(cubin_file, dependencies) - - kernel_binaries.append(cubin_file) - - if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']: - # trick to compile one kernel at a time to reduce memory usage - if last_cubin_file: - kernel.Depends(cubin_file, last_cubin_file) - last_cubin_file = cubin_file + for config in configs: + # TODO(sergey): Use dict instead ocouple in order to increase readability? + name = config[0] + extra_flags = config[1] + + cubin_file = os.path.join(build_dir, name % arch) + current_flags = nvcc_flags + extra_flags + + if env['BF_CYCLES_CUDA_ENV']: + MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd" + command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, current_flags, kernel_file, cubin_file) + else: + command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, current_flags, kernel_file, cubin_file) + + kernel.Command(cubin_file, 'kernel.cu', command) + kernel.Depends(cubin_file, dependencies) + + kernel_binaries.append(cubin_file) + + if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']: + # trick to compile one kernel at a time to reduce memory usage + if last_cubin_file: + kernel.Depends(cubin_file, last_cubin_file) + last_cubin_file = cubin_file Return('kernel_binaries') diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index 81c239ea0c9..9961071c2ac 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -23,9 +23,7 @@ #include "../closure/bsdf_reflection.h" #include "../closure/bsdf_refraction.h" #include "../closure/bsdf_transparent.h" -#ifdef __ANISOTROPIC__ #include "../closure/bsdf_ashikhmin_shirley.h" -#endif #include "../closure/bsdf_westin.h" #include "../closure/bsdf_toon.h" #include "../closure/bsdf_hair.h" @@ -94,13 +92,11 @@ ccl_device int bsdf_sample(KernelGlobals *kg, const ShaderData *sd, const Shader label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; -#ifdef __ANISOTROPIC__ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: label = bsdf_ashikhmin_shirley_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; -#endif case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: label = bsdf_ashikhmin_velvet_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); @@ -190,12 +186,10 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf); break; -#ifdef __ANISOTROPIC__ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf); break; -#endif case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf); break; @@ -260,12 +254,10 @@ ccl_device float3 bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const Shade case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf); break; -#ifdef __ANISOTROPIC__ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf); break; -#endif case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf); break; @@ -348,12 +340,10 @@ ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness) case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: bsdf_microfacet_beckmann_blur(sc, roughness); break; -#ifdef __ANISOTROPIC__ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: bsdf_ashikhmin_shirley_blur(sc, roughness); break; -#endif case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: bsdf_ashikhmin_velvet_blur(sc, roughness); break; diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h index 6a5d0410e01..ad7864cb8ea 100644 --- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h +++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h @@ -77,7 +77,7 @@ ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, c NdotI = fmaxf(NdotI, 1e-6f); NdotO = fmaxf(NdotO, 1e-6f); float3 H = normalize(omega_in + I); - float HdotI = fmaxf(dot(H, I), 1e-6f); + float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f); float HdotN = fmaxf(dot(H, N), 1e-6f); float pump = 1.0f / fmaxf(1e-6f, (HdotI*fmaxf(NdotO, NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */ diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index 5ab0b731bdd..7409aa0d014 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -272,7 +272,11 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD #endif /* compute face normal */ - float3 Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); + float3 Ng; + if(sd->flag & SD_NEGATIVE_SCALE_APPLIED) + Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0])); + else + Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); sd->Ng = Ng; sd->N = Ng; diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 27d1351568a..41e9d183a96 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -127,11 +127,14 @@ ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd) float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); /* return normal */ - return normalize(cross(v1 - v0, v2 - v0)); + if(sd->flag & SD_NEGATIVE_SCALE_APPLIED) + return normalize(cross(v2 - v0, v1 - v0)); + else + return normalize(cross(v1 - v0, v2 - v0)); } /* point and normal on triangle */ -ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int prim, float u, float v, float3 *P, float3 *Ng, int *shader) +ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader) { /* load triangle vertices */ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); @@ -144,8 +147,14 @@ ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int prim, float float t = 1.0f - u - v; *P = (u*v0 + v*v1 + t*v2); + /* get object flags, instance-aware */ + int object_flag = kernel_tex_fetch(__object_flag, object >= 0 ? object : ~object); + /* compute normal */ - *Ng = normalize(cross(v1 - v0, v2 - v0)); + if(object_flag & SD_NEGATIVE_SCALE_APPLIED) + *Ng = normalize(cross(v2 - v0, v1 - v0)); + else + *Ng = normalize(cross(v1 - v0, v2 - v0)); /* shader`*/ *shader = __float_as_int(kernel_tex_fetch(__tri_shader, prim)); diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl index 2e0a49435a8..4f20ef9ca15 100644 --- a/intern/cycles/kernel/kernel.cl +++ b/intern/cycles/kernel/kernel.cl @@ -115,7 +115,7 @@ __kernel void kernel_ocl_shader( ccl_global type *name, #include "kernel_textures.h" - int type, int sx, int sw, int sample) + int type, int sx, int sw, int offset, int sample) { KernelGlobals kglobals, *kg = &kglobals; @@ -140,7 +140,7 @@ __kernel void kernel_ocl_bake( ccl_global type *name, #include "kernel_textures.h" - int type, int sx, int sw, int sample) + int type, int sx, int sw, int offset, int sample) { KernelGlobals kglobals, *kg = &kglobals; @@ -153,6 +153,6 @@ __kernel void kernel_ocl_bake( int x = sx + get_global_id(0); if(x < sx + sw) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, offset, sample); } diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp index 42eb9a62518..fa2113fbb46 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernel.cpp @@ -120,10 +120,10 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu /* Shader Evaluation */ -void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu index 9208acc232e..d5b5293664c 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernel.cu @@ -146,7 +146,7 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int sample) +kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int offset, int sample) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; @@ -156,12 +156,12 @@ kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int s extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int sample) +kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int offset, int sample) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; if(x < sx + sw) - kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample); + kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, offset, sample); } #endif diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 264e5e3e4d0..19e06b88797 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -41,7 +41,7 @@ void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, @@ -51,7 +51,7 @@ void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 @@ -62,7 +62,7 @@ void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 @@ -73,7 +73,7 @@ void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *bu void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX @@ -84,7 +84,7 @@ void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buff void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 @@ -95,7 +95,7 @@ void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int sample); + int type, int i, int offset, int sample); #endif CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernel_avx.cpp index d612a82b785..e7ff21a6f09 100644 --- a/intern/cycles/kernel/kernel_avx.cpp +++ b/intern/cycles/kernel/kernel_avx.cpp @@ -68,10 +68,10 @@ void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float /* Shader Evaluate */ -void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_avx2.cpp b/intern/cycles/kernel/kernel_avx2.cpp index 339421a002b..cb1662bbfbe 100644 --- a/intern/cycles/kernel/kernel_avx2.cpp +++ b/intern/cycles/kernel/kernel_avx2.cpp @@ -69,10 +69,10 @@ void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa /* Shader Evaluate */ -void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index c2d14b7f835..dfbb49db7e6 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -172,7 +172,8 @@ ccl_device_inline float bake_clamp_mirror_repeat(float u) } #endif -ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample) +ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, + ShaderEvalType type, int i, int offset, int sample) { ShaderData sd; uint4 in = input[i * 2]; @@ -197,7 +198,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, int num_samples = kernel_data.integrator.aa_samples; /* random number generator */ - RNG rng = cmj_hash(i, 0); + RNG rng = cmj_hash(offset + i, 0); #if 0 uint rng_state = cmj_hash(i, 0); @@ -215,7 +216,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, int shader; float3 P, Ng; - triangle_point_normal(kg, prim, u, v, &P, &Ng, &shader); + triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader); /* dummy initilizations copied from SHADER_EVAL_DISPLACE */ float3 I = Ng; diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index 6b03abe9708..5c83358a56d 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -21,16 +21,22 @@ CCL_NAMESPACE_BEGIN ccl_device float2 camera_sample_aperture(KernelGlobals *kg, float u, float v) { float blades = kernel_data.cam.blades; + float2 bokeh; if(blades == 0.0f) { /* sample disk */ - return concentric_sample_disk(u, v); + bokeh = concentric_sample_disk(u, v); } else { /* sample polygon */ float rotation = kernel_data.cam.bladesrotation; - return regular_polygon_sample(blades, rotation, u, v); + bokeh = regular_polygon_sample(blades, rotation, u, v); } + + /* anamorphic lens bokeh */ + bokeh.x *= kernel_data.cam.inv_aperture_ratio; + + return bokeh; } ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, Ray *ray) @@ -183,7 +189,8 @@ ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float /* calculate orthonormal coordinates perpendicular to D */ float3 U, V; - make_orthonormals(D, &U, &V); + U = normalize(make_float3(1.0f, 0.0f, 0.0f) - D.x * D); + V = normalize(cross(D, U)); /* update ray for effect of lens */ ray->P = U * lensuv.x + V * lensuv.y; diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h index 7a850844bf2..2a5b7689e57 100644 --- a/intern/cycles/kernel/kernel_jitter.h +++ b/intern/cycles/kernel/kernel_jitter.h @@ -14,6 +14,8 @@ * limitations under the License */ +/* TODO(sergey): Consider moving portable ctz/clz stuff to util. */ + CCL_NAMESPACE_BEGIN /* "Correlated Multi-Jittered Sampling" @@ -35,8 +37,16 @@ ccl_device_inline int cmj_fast_mod_pow2(int a, int b) /* a must be > 0 and b must be > 1 */ ccl_device_inline int cmj_fast_div_pow2(int a, int b) { -#if defined(__KERNEL_SSE2__) && !defined(_MSC_VER) + kernel_assert(a > 0); + kernel_assert(b > 1); +#if defined(__KERNEL_SSE2__) +# ifdef _MSC_VER + unsigned long ctz; + _BitScanForward(&ctz, b); + return a >> ctz; +# else return a >> __builtin_ctz(b); +# endif #else return a/b; #endif @@ -44,8 +54,15 @@ ccl_device_inline int cmj_fast_div_pow2(int a, int b) ccl_device_inline uint cmj_w_mask(uint w) { -#if defined(__KERNEL_SSE2__) && !defined(_MSC_VER) + kernel_assert(w > 1); +#if defined(__KERNEL_SSE2__) +# ifdef _MSC_VER + unsigned long leading_zero; + _BitScanReverse(&leading_zero, w); + return ((1 << (1 + leading_zero)) - 1); +# else return ((1 << (32 - __builtin_clz(w))) - 1); +# endif #else w |= w >> 1; w |= w >> 2; @@ -165,7 +182,8 @@ ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy) smodm = cmj_fast_mod_pow2(s, m); } else { - sdivm = float_to_int(s * invm); + /* Doing s*inmv gives precision issues here. */ + sdivm = s / m; smodm = s - sdivm*m; } diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index ac432d3fe04..e7f62f230f8 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -27,7 +27,7 @@ typedef struct LightSample { float pdf; /* light sampling probability density function */ float eval_fac; /* intensity multiplier */ int object; /* object id for triangle/curve lights */ - int prim; /* primitive id for triangle/curve ligths */ + int prim; /* primitive id for triangle/curve lights */ int shader; /* shader id */ int lamp; /* lamp id */ LightType type; /* type of light */ @@ -457,7 +457,7 @@ ccl_device void triangle_light_sample(KernelGlobals *kg, int prim, int object, v = randv*randu; /* triangle, so get position, normal, shader */ - triangle_point_normal(kg, prim, u, v, &ls->P, &ls->Ng, &ls->shader); + triangle_point_normal(kg, object, prim, u, v, &ls->P, &ls->Ng, &ls->shader); ls->object = object; ls->prim = prim; ls->lamp = LAMP_NONE; @@ -546,11 +546,6 @@ ccl_device int light_select_num_samples(KernelGlobals *kg, int index) return __float_as_int(data3.x); } -ccl_device void light_select(KernelGlobals *kg, int index, float randu, float randv, float3 P, LightSample *ls) -{ - lamp_light_sample(kg, index, randu, randv, P, ls); -} - ccl_device int lamp_light_eval_sample(KernelGlobals *kg, float randt) { /* sample index */ diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 0c033f6234c..65755f0df12 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -89,6 +89,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, volume_ray.t = (hit)? isect.t: FLT_MAX; bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack); + +#ifdef __VOLUME_DECOUPLED__ int sampling_method = volume_stack_sampling_method(kg, state.volume_stack); bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method); @@ -135,28 +137,32 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, kernel_volume_decoupled_free(kg, &volume_segment); if(result == VOLUME_PATH_SCATTERED) { - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray, 1.0f)) + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray)) continue; else break; } } - else { + else +#endif + { /* integrate along volume segment with distance sampling */ ShaderData volume_sd; VolumeIntegrateResult result = kernel_volume_integrate( - kg, &state, &volume_sd, &volume_ray, L, &throughput, rng); + kg, &state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous); +#ifdef __VOLUME_SCATTER__ if(result == VOLUME_PATH_SCATTERED) { /* direct lighting */ kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, L, 1.0f); /* indirect light bounce */ - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray, 1.0f)) + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray)) continue; else break; } +#endif } } #endif @@ -470,6 +476,8 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, volume_ray.t = (hit)? isect.t: FLT_MAX; bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack); + +#ifdef __VOLUME_DECOUPLED__ int sampling_method = volume_stack_sampling_method(kg, state.volume_stack); bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method); @@ -516,28 +524,32 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, kernel_volume_decoupled_free(kg, &volume_segment); if(result == VOLUME_PATH_SCATTERED) { - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray, 1.0f)) + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray)) continue; else break; } } - else { + else +#endif + { /* integrate along volume segment with distance sampling */ ShaderData volume_sd; VolumeIntegrateResult result = kernel_volume_integrate( - kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng); + kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous); +#ifdef __VOLUME_SCATTER__ if(result == VOLUME_PATH_SCATTERED) { /* direct lighting */ kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L, 1.0f); /* indirect light bounce */ - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray, 1.0f)) + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray)) continue; else break; } +#endif } } #endif @@ -803,10 +815,11 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in if(state.volume_stack[0].shader != SHADER_NONE) { Ray volume_ray = ray; volume_ray.t = (hit)? isect.t: FLT_MAX; + + bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack); -#ifdef __KERNEL_CPU__ +#ifdef __VOLUME_DECOUPLED__ /* decoupled ray marching only supported on CPU */ - bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack); /* cache steps along volume for repeated sampling */ VolumeSegment volume_segment; @@ -850,16 +863,17 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, &ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false); + + (void)result; + kernel_assert(result == VOLUME_PATH_SCATTERED); - if(result == VOLUME_PATH_SCATTERED) { - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) { - kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L); + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) { + kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L); - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(&L); - path_radiance_reset_indirect(&L); - } + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(&L); + path_radiance_reset_indirect(&L); } } } @@ -889,14 +903,15 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in path_state_branch(&ps, j, num_samples); VolumeIntegrateResult result = kernel_volume_integrate( - kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng); + kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous); +#ifdef __VOLUME_SCATTER__ if(result == VOLUME_PATH_SCATTERED) { /* todo: support equiangular, MIS and all light sampling. * alternatively get decoupled ray marching working on the GPU */ - kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L, num_samples_inv); + kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L, num_samples_inv); - if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) { + if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) { kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L); /* for render passes, sum and reset indirect light pass variables @@ -905,6 +920,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in path_radiance_reset_indirect(&L); } } +#endif } /* todo: avoid this calculation using decoupled ray marching */ @@ -941,7 +957,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in /* holdout */ #ifdef __HOLDOUT__ - if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK))) { + if(sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) { if(kernel_data.background.transparent) { float3 holdout_weight; diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h index 81526b0ba7a..9553c2da0df 100644 --- a/intern/cycles/kernel/kernel_path_surface.h +++ b/intern/cycles/kernel/kernel_path_surface.h @@ -50,7 +50,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); LightSample ls; - light_select(kg, i, light_u, light_v, sd->P, &ls); + lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls); if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { /* trace shadow ray */ diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h index b4917f9a3ae..9da1cfe7093 100644 --- a/intern/cycles/kernel/kernel_path_volume.h +++ b/intern/cycles/kernel/kernel_path_volume.h @@ -16,7 +16,7 @@ CCL_NAMESPACE_BEGIN -#ifdef __VOLUME__ +#ifdef __VOLUME_SCATTER__ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng, ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L, @@ -58,8 +58,7 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng, } ccl_device bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng, - ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray, - float num_samples_adjust) + ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray) { /* sample phase function */ float phase_pdf; @@ -134,24 +133,22 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); LightSample ls; - light_select(kg, i, light_u, light_v, ray->P, &ls); + lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls); float3 tp = throughput; /* sample position on volume segment */ - if(segment) { - float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE); - float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE); - - VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); + float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE); + float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE); - if(result != VOLUME_PATH_SCATTERED) - continue; + VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, + state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); + + (void)result; + kernel_assert(result == VOLUME_PATH_SCATTERED); - /* todo: split up light_sample so we don't have to call it again with new position */ - light_select(kg, i, light_u, light_v, sd->P, &ls); - } + /* todo: split up light_sample so we don't have to call it again with new position */ + lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls); if(ls.pdf == 0.0f) continue; @@ -192,19 +189,17 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG float3 tp = throughput; /* sample position on volume segment */ - if(segment) { - float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE); - float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE); + float rphase = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_PHASE); + float rscatter = path_branched_rng_1D_for_decision(kg, rng, state, j, num_samples, PRNG_SCATTER_DISTANCE); - VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); + VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, + state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); + + (void)result; + kernel_assert(result == VOLUME_PATH_SCATTERED); - if(result != VOLUME_PATH_SCATTERED) - continue; - - /* todo: split up light_sample so we don't have to call it again with new position */ - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); - } + /* todo: split up light_sample so we don't have to call it again with new position */ + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); if(ls.pdf == 0.0f) continue; @@ -233,19 +228,17 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG float3 tp = throughput; /* sample position on volume segment */ - if(segment) { - float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE); - float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE); - - VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); + float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE); + float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE); - if(result != VOLUME_PATH_SCATTERED) - return; + VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, + state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); + + (void)result; + kernel_assert(result == VOLUME_PATH_SCATTERED); - /* todo: split up light_sample so we don't have to call it again with new position */ - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); - } + /* todo: split up light_sample so we don't have to call it again with new position */ + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); if(ls.pdf == 0.0f) return; diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index 3b95d7055ac..fc61f1a9c2c 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -340,7 +340,7 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd, float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f); int shader; - triangle_point_normal(kg, prim, u, v, &P, &Ng, &shader); + triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader); /* force smooth shading for displacement */ shader |= SHADER_SMOOTH_NORMAL; diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp index 67bd0943b1b..740998e8c92 100644 --- a/intern/cycles/kernel/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernel_sse2.cpp @@ -64,10 +64,10 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa /* Shader Evaluate */ -void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp index 40d621b66f6..da73a3a1c97 100644 --- a/intern/cycles/kernel/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernel_sse3.cpp @@ -66,10 +66,10 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa /* Shader Evaluate */ -void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp index 4b48d10b020..5704f60e138 100644 --- a/intern/cycles/kernel/kernel_sse41.cpp +++ b/intern/cycles/kernel/kernel_sse41.cpp @@ -67,10 +67,10 @@ void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, flo /* Shader Evaluate */ -void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample) +void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) { if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); else kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); } diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 164df41fc09..81306361ea4 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -66,6 +66,8 @@ CCL_NAMESPACE_BEGIN #define __SUBSURFACE__ #define __CMJ__ #define __VOLUME__ +#define __VOLUME_DECOUPLED__ +#define __VOLUME_SCATTER__ #define __SHADOW_RECORD_ALL__ #endif @@ -73,10 +75,15 @@ CCL_NAMESPACE_BEGIN #define __KERNEL_SHADING__ #define __KERNEL_ADV_SHADING__ #define __BRANCHED_PATH__ +#define __VOLUME__ +#define __VOLUME_SCATTER__ /* Experimental on GPU */ -//#define __VOLUME__ -//#define __SUBSURFACE__ +#ifdef __KERNEL_CUDA_EXPERIMENTAL__ +#define __SUBSURFACE__ +#define __CMJ__ +#endif + #endif #ifdef __KERNEL_OPENCL__ @@ -103,7 +110,6 @@ CCL_NAMESPACE_BEGIN #define __BACKGROUND_MIS__ #define __LAMP_MIS__ #define __AO__ -#define __ANISOTROPIC__ //#define __CAMERA_MOTION__ //#define __OBJECT_MOTION__ //#define __HAIR__ @@ -134,11 +140,9 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_SHADING__ #define __SVM__ #define __EMISSION__ -#define __PROCEDURAL_TEXTURES__ -#define __IMAGE_TEXTURES__ +#define __TEXTURES__ #define __EXTRA_NODES__ #define __HOLDOUT__ -#define __NORMAL_MAP__ #endif #ifdef __KERNEL_ADV_SHADING__ @@ -148,7 +152,6 @@ CCL_NAMESPACE_BEGIN #define __BACKGROUND_MIS__ #define __LAMP_MIS__ #define __AO__ -#define __ANISOTROPIC__ #define __CAMERA_MOTION__ #define __OBJECT_MOTION__ #define __HAIR__ @@ -223,10 +226,9 @@ enum PathTraceDimension { PRNG_PHASE_V = 9, PRNG_PHASE = 10, PRNG_SCATTER_DISTANCE = 11, - PRNG_BOUNCE_NUM = 12, -#else - PRNG_BOUNCE_NUM = 8, #endif + + PRNG_BOUNCE_NUM = 12, }; enum SamplingPattern { @@ -524,14 +526,14 @@ typedef struct ShaderClosure { ClosureType type; float3 weight; - float sample_weight; - float data0; float data1; float data2; float3 N; float3 T; + + float sample_weight; #ifdef __OSL__ void *prim; @@ -593,6 +595,7 @@ enum ShaderDataFlag { SD_HOLDOUT_MASK = 524288, /* holdout for camera rays */ SD_OBJECT_MOTION = 1048576, /* has object motion blur */ SD_TRANSFORM_APPLIED = 2097152, /* vertices have transform applied */ + SD_NEGATIVE_SCALE_APPLIED = 4194304, /* vertices have negative scale applied */ SD_OBJECT_FLAGS = (SD_HOLDOUT_MASK|SD_OBJECT_MOTION|SD_TRANSFORM_APPLIED) }; @@ -758,9 +761,12 @@ typedef struct KernelCamera { /* render size */ float width, height; int resolution; + + /* anamorphic lens bokeh */ + float inv_aperture_ratio; + int pad1; int pad2; - int pad3; /* more matrices */ Transform screentoworld; diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index cc4b2e3edf1..ea02ede10cd 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -176,6 +176,8 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState /* compute extinction at the start */ float t = 0.0f; + float3 sum = make_float3(0.0f, 0.0f, 0.0f); + for(int i = 0; i < max_steps; i++) { /* advance to new position */ float new_t = min(ray->t, (i+1) * step); @@ -190,20 +192,26 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState /* compute attenuation over segment */ if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) { - /* todo: we could avoid computing expf() for each step by summing, - * because exp(a)*exp(b) = exp(a+b), but we still want a quick - * tp_eps check too */ - tp *= volume_color_transmittance(sigma_t, new_t - t); - - /* stop if nearly all light blocked */ - if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) - break; + /* Compute expf() only for every Nth step, to save some calculations + * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */ + + sum += (-sigma_t * (new_t - t)); + if((i & 0x07) == 0) { /* ToDo: Other interval? */ + tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z)); + + /* stop if nearly all light is blocked */ + if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) + break; + } } /* stop if at the end of the volume */ t = new_t; - if(t == ray->t) + if(t == ray->t) { + /* Update throughput in case we haven't done it above */ + tp = *throughput * make_float3(expf(sum.x), expf(sum.y), expf(sum.z)); break; + } } *throughput = tp; @@ -326,6 +334,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba float t = ray->t; float3 new_tp; +#ifdef __VOLUME_SCATTER__ /* randomly scatter, and if we do t is shortened */ if(closure_flag & SD_SCATTER) { /* extinction coefficient */ @@ -379,7 +388,9 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba new_tp = *throughput * transmittance / pdf; } } - else if(closure_flag & SD_ABSORPTION) { + else +#endif + if(closure_flag & SD_ABSORPTION) { /* absorption only, no sampling needed */ float3 transmittance = volume_color_transmittance(coeff.sigma_a, t); new_tp = *throughput * transmittance; @@ -456,6 +467,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance( bool scatter = false; /* distance sampling */ +#ifdef __VOLUME_SCATTER__ if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_ABSORPTION))) { has_scatter = true; @@ -491,7 +503,9 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance( xi = 1.0f - (1.0f - xi)/sample_transmittance; } } - else if(closure_flag & SD_ABSORPTION) { + else +#endif + if(closure_flag & SD_ABSORPTION) { /* absorption only, no sampling needed */ float3 sigma_a = coeff.sigma_a; @@ -546,13 +560,12 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance( * between the endpoints. distance sampling is used to decide if we will * scatter or not. */ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg, - PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng) + PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous) { /* workaround to fix correlation bug in T38710, can find better solution * in random number generator later, for now this is done here to not impact * performance of rendering without volumes */ RNG tmp_rng = cmj_hash(*rng, state->rng_offset); - bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce); @@ -724,16 +737,15 @@ ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *s * scattering, they always scatter if there is any non-zero scattering * coefficient. * - * these also do not do emission or modify throughput. */ + * these also do not do emission or modify throughput. + * + * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput, float rphase, float rscatter, const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter) { - int closure_flag = segment->closure_flag; - - if(!(closure_flag & SD_SCATTER)) - return VOLUME_PATH_MISSED; + kernel_assert(segment->closure_flag & SD_SCATTER); /* pick random color channel, we use the Veach one-sample * model with balance heuristic for the channels */ @@ -845,15 +857,33 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f); if(segment->numsteps > 1) { - /* todo: optimize using binary search */ float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < segment->numsteps-1; i++, step++) { - if(sample_t < step->t) + int numsteps = segment->numsteps; + int high = numsteps - 1; + int low = 0; + int mid; + + while(low < high) { + mid = (low + high) >> 1; + + if(sample_t < step[mid].t) + high = mid; + else if(sample_t >= step[mid + 1].t) + low = mid + 1; + else { + /* found our interval in step[mid] .. step[mid+1] */ + prev_t = step[mid].t; + prev_cdf_distance = step[mid].cdf_distance; + step += mid+1; break; + } + } - prev_t = step->t; - prev_cdf_distance = step->cdf_distance; + if(low >= numsteps - 1) { + prev_t = step[numsteps - 1].t; + prev_cdf_distance = step[numsteps-1].cdf_distance; + step += numsteps - 1; } /* pdf for picking step with distance sampling */ diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index d5d995d490b..38cb5061346 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -479,7 +479,7 @@ static bool set_attribute_int(int i, TypeDesc type, bool derivatives, void *val) static bool set_attribute_string(ustring str, TypeDesc type, bool derivatives, void *val) { - if(type.basetype == TypeDesc::INT && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) { + if(type.basetype == TypeDesc::STRING && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) { ustring *sval = (ustring *)val; sval[0] = str; @@ -758,6 +758,12 @@ bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg, bool derivatives, return false; ShaderData *sd = (ShaderData *)(sg->renderstate); + return get_attribute(sd, derivatives, object_name, type, name, val); +} + +bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring object_name, + TypeDesc type, ustring name, void *val) +{ KernelGlobals *kg = sd->osl_globals; bool is_curve; int object; @@ -1100,7 +1106,7 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, ustring source, ustri return set_attribute_float(f, type, derivatives, val); } - return get_attribute(sg, derivatives, u_empty, type, name, val); + return get_attribute(sd, derivatives, u_empty, type, name, val); } } } diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h index 024a8396060..6f928a0d103 100644 --- a/intern/cycles/kernel/osl/osl_services.h +++ b/intern/cycles/kernel/osl/osl_services.h @@ -66,6 +66,8 @@ public: int index, void *val); bool get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object, TypeDesc type, ustring name, void *val); + bool get_attribute(ShaderData *sd, bool derivatives, ustring object_name, + TypeDesc type, ustring name, void *val); bool get_userdata(bool derivatives, ustring name, TypeDesc type, OSL::ShaderGlobals *sg, void *val); diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index d6663aae9db..c13eae813d6 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -236,7 +236,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade if(stack_load_float(stack, node.z) == 1.0f) offset += node.y; break; -#ifdef __IMAGE_TEXTURES__ +#ifdef __TEXTURES__ case NODE_TEX_IMAGE: svm_node_tex_image(kg, sd, stack, node); break; @@ -246,8 +246,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade case NODE_TEX_ENVIRONMENT: svm_node_tex_environment(kg, sd, stack, node); break; -#endif -#ifdef __PROCEDURAL_TEXTURES__ case NODE_TEX_SKY: svm_node_tex_sky(kg, sd, stack, node, &offset); break; @@ -420,17 +418,13 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade case NODE_LIGHT_FALLOFF: svm_node_light_falloff(sd, stack, node); break; -#endif -#ifdef __ANISOTROPIC__ +#endif case NODE_TANGENT: svm_node_tangent(kg, sd, stack, node); break; -#endif -#ifdef __NORMAL_MAP__ case NODE_NORMAL_MAP: svm_node_normal_map(kg, sd, stack, node); - break; -#endif + break; case NODE_END: default: return; diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index f4e2d6ebbf7..cd6d9fc53b5 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -294,7 +294,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(sc) { sc->N = N; -#ifdef __ANISOTROPIC__ sc->T = stack_load_float3(stack, data_node.y); /* rotate tangent */ @@ -324,9 +323,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sd->flag |= bsdf_microfacet_ggx_aniso_setup(sc); else sd->flag |= bsdf_ashikhmin_shirley_aniso_setup(sc); -#else - sd->flag |= bsdf_diffuse_setup(sc); -#endif } break; } diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index d7da63dea8a..5723a22dd84 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -95,6 +95,7 @@ BakeManager::BakeManager() m_bake_data = NULL; m_is_baking = false; need_update = true; + m_shader_limit = 512 * 512; } BakeManager::~BakeManager() @@ -119,74 +120,99 @@ BakeData *BakeManager::init(const int object, const size_t tri_offset, const siz return m_bake_data; } +void BakeManager::set_shader_limit(const size_t x, const size_t y) +{ + m_shader_limit = x * y; + m_shader_limit = (size_t)pow(2, ceil(log(m_shader_limit)/log(2))); +} + bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress, ShaderEvalType shader_type, BakeData *bake_data, float result[]) { - size_t limit = bake_data->size(); + size_t num_pixels = bake_data->size(); - /* setup input for device task */ - device_vector<uint4> d_input; - uint4 *d_input_data = d_input.resize(limit * 2); - size_t d_input_size = 0; + progress.reset_sample(); + this->num_parts = 0; - for(size_t i = 0; i < limit; i++) { - d_input_data[d_input_size++] = bake_data->data(i); - d_input_data[d_input_size++] = bake_data->differentials(i); - } + /* calculate the total parts for the progress bar */ + for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) { + size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit); - if(d_input_size == 0) - return false; + DeviceTask task(DeviceTask::SHADER); + task.shader_w = shader_size; - /* run device task */ - device_vector<float4> d_output; - d_output.resize(limit); + this->num_parts += device->get_split_task_count(task); + } - /* needs to be up to data for attribute access */ - device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); + this->num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1; - device->mem_alloc(d_input, MEM_READ_ONLY); - device->mem_copy_to(d_input); - device->mem_alloc(d_output, MEM_WRITE_ONLY); + for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) { + size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit); - DeviceTask task(DeviceTask::SHADER); - task.shader_input = d_input.device_pointer; - task.shader_output = d_output.device_pointer; - task.shader_eval_type = shader_type; - task.shader_x = 0; - task.shader_w = d_output.size(); - task.num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples: 1; - task.get_cancel = function_bind(&Progress::get_cancel, &progress); - task.update_progress_sample = function_bind(&Progress::increment_sample_update, &progress); + /* setup input for device task */ + device_vector<uint4> d_input; + uint4 *d_input_data = d_input.resize(shader_size * 2); + size_t d_input_size = 0; - this->num_parts = device->get_split_task_count(task); - this->num_samples = task.num_samples; + for(size_t i = shader_offset; i < (shader_offset + shader_size); i++) { + d_input_data[d_input_size++] = bake_data->data(i); + d_input_data[d_input_size++] = bake_data->differentials(i); + } - device->task_add(task); - device->task_wait(); + if(d_input_size == 0) { + m_is_baking = false; + return false; + } + + /* run device task */ + device_vector<float4> d_output; + d_output.resize(shader_size); + + /* needs to be up to data for attribute access */ + device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); + + device->mem_alloc(d_input, MEM_READ_ONLY); + device->mem_copy_to(d_input); + device->mem_alloc(d_output, MEM_WRITE_ONLY); + + DeviceTask task(DeviceTask::SHADER); + task.shader_input = d_input.device_pointer; + task.shader_output = d_output.device_pointer; + task.shader_eval_type = shader_type; + task.shader_x = 0; + task.offset = shader_offset; + task.shader_w = d_output.size(); + task.num_samples = this->num_samples; + task.get_cancel = function_bind(&Progress::get_cancel, &progress); + task.update_progress_sample = function_bind(&Progress::increment_sample_update, &progress); + + device->task_add(task); + device->task_wait(); + + if(progress.get_cancel()) { + device->mem_free(d_input); + device->mem_free(d_output); + m_is_baking = false; + return false; + } - if(progress.get_cancel()) { + device->mem_copy_from(d_output, 0, 1, d_output.size(), sizeof(float4)); device->mem_free(d_input); device->mem_free(d_output); - m_is_baking = false; - return false; - } - - device->mem_copy_from(d_output, 0, 1, d_output.size(), sizeof(float4)); - device->mem_free(d_input); - device->mem_free(d_output); - /* read result */ - int k = 0; + /* read result */ + int k = 0; - float4 *offset = (float4*)d_output.data_pointer; + float4 *offset = (float4*)d_output.data_pointer; - size_t depth = 4; - for(size_t i = 0; i < limit; i++) { - size_t index = i * depth; - float4 out = offset[k++]; + size_t depth = 4; + for(size_t i=shader_offset; i < (shader_offset + shader_size); i++) { + size_t index = i * depth; + float4 out = offset[k++]; - if(bake_data->is_valid(i)) { - for(size_t j=0; j < 4; j++) { - result[index + j] = out[j]; + if(bake_data->is_valid(i)) { + for(size_t j=0; j < 4; j++) { + result[index + j] = out[j]; + } } } } diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h index 652420c2e19..186fbbeea4d 100644 --- a/intern/cycles/render/bake.h +++ b/intern/cycles/render/bake.h @@ -60,6 +60,8 @@ public: BakeData *init(const int object, const size_t tri_offset, const size_t num_pixels); + void set_shader_limit(const size_t x, const size_t y); + bool bake(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress, ShaderEvalType shader_type, BakeData *bake_data, float result[]); void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress); @@ -76,6 +78,7 @@ public: private: BakeData *m_bake_data; bool m_is_baking; + size_t m_shader_limit; }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp index 8659fe4f7a3..bb0fec759a9 100644 --- a/intern/cycles/render/camera.cpp +++ b/intern/cycles/render/camera.cpp @@ -38,6 +38,8 @@ Camera::Camera() motion.post = transform_identity(); use_motion = false; + aperture_ratio = 1.0f; + type = CAMERA_PERSPECTIVE; panorama_type = PANORAMA_EQUIRECTANGULAR; fisheye_fov = M_PI_F; @@ -241,6 +243,9 @@ void Camera::device_update(Device *device, DeviceScene *dscene, Scene *scene) /* type */ kcam->type = type; + /* anamorphic lens bokeh */ + kcam->inv_aperture_ratio = 1.0f / aperture_ratio; + /* panorama */ kcam->panorama_type = panorama_type; kcam->fisheye_fov = fisheye_fov; @@ -291,6 +296,7 @@ bool Camera::modified(const Camera& cam) (viewplane == cam.viewplane) && (border == cam.border) && (matrix == cam.matrix) && + (aperture_ratio == cam.aperture_ratio) && (panorama_type == cam.panorama_type) && (fisheye_fov == cam.fisheye_fov) && (fisheye_lens == cam.fisheye_lens)); diff --git a/intern/cycles/render/camera.h b/intern/cycles/render/camera.h index c28670bc55f..50889968a90 100644 --- a/intern/cycles/render/camera.h +++ b/intern/cycles/render/camera.h @@ -54,6 +54,9 @@ public: float fisheye_fov; float fisheye_lens; + /* anamorphic lens bokeh */ + float aperture_ratio; + /* sensor */ float sensorwidth; float sensorheight; diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index f84396ab6a1..076cc3d8b63 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -135,6 +135,7 @@ bool ImageManager::is_float_image(const string& filename, void *builtin_data, bo (colorspace == "" && (strcmp(in->format_name(), "png") == 0 || strcmp(in->format_name(), "tiff") == 0 || + strcmp(in->format_name(), "dpx") == 0 || strcmp(in->format_name(), "jpeg2000") == 0))); } else { @@ -332,7 +333,7 @@ void ImageManager::tag_reload_image(const string& filename, void *builtin_data, /* see if it's in a float texture slot */ for(slot = 0; slot < float_images.size(); slot++) { if(float_images[slot] && image_equals(float_images[slot], filename, builtin_data, interpolation)) { - images[slot]->need_load = true; + float_images[slot]->need_load = true; break; } } diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index 4a8b490b1ad..32e887d48f1 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -86,11 +86,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->max_diffuse_bounce = max_diffuse_bounce + 1; kintegrator->max_glossy_bounce = max_glossy_bounce + 1; kintegrator->max_transmission_bounce = max_transmission_bounce + 1; - - if(kintegrator->use_volumes) - kintegrator->max_volume_bounce = max_volume_bounce + 1; - else - kintegrator->max_volume_bounce = 1; + kintegrator->max_volume_bounce = max_volume_bounce + 1; kintegrator->transparent_max_bounce = transparent_max_bounce + 1; kintegrator->transparent_min_bounce = transparent_min_bounce + 1; diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 9a0a7ead696..94ab82a600e 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -206,8 +206,10 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen } /* skip motion blurred deforming meshes, not supported yet */ - if(mesh->has_motion_blur()) + if(mesh->has_motion_blur()) { + j++; continue; + } /* skip if we have no emission shaders */ foreach(uint sindex, mesh->used_shaders) { diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 295c934537a..8299cd02fef 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -132,6 +132,7 @@ void Mesh::clear() transform_applied = false; transform_negative_scaled = false; transform_normal = transform_identity(); + geometry_synced = false; } int Mesh::split_vertex(int vertex) @@ -341,13 +342,6 @@ void Mesh::add_vertex_normals() vN[i] = -vN[i]; } } - else if(flip) { - Attribute *attr_vN = attributes.find(ATTR_STD_VERTEX_NORMAL); - float3 *vN = attr_vN->data_float3(); - for(size_t i = 0; i < verts_size; i++) { - vN[i] = -vN[i]; - } - } /* motion vertex normals */ Attribute *attr_mP = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); @@ -382,14 +376,6 @@ void Mesh::add_vertex_normals() } } } - else if(has_motion_blur() && attr_mN && flip) { - for(int step = 0; step < motion_steps - 1; step++) { - float3 *mN = attr_mN->data_float3() + step*verts.size(); - for(size_t i = 0; i < verts_size; i++) { - mN[i] = -mN[i]; - } - } - } } void Mesh::pack_normals(Scene *scene, float *tri_shader, float4 *vnormal) diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h index 5ee774bacc1..d45905611fa 100644 --- a/intern/cycles/render/mesh.h +++ b/intern/cycles/render/mesh.h @@ -71,6 +71,9 @@ public: ustring name; /* Mesh Data */ + bool geometry_synced; /* used to distinguish meshes with no verts + and meshed for which geometry is not created */ + vector<float3> verts; vector<Triangle> triangles; vector<uint> shader; diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 027bfd71931..1f148d34ea6 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -449,6 +449,8 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, u } object_flag[i] |= SD_TRANSFORM_APPLIED; + if(object->mesh->transform_negative_scaled) + object_flag[i] |= SD_NEGATIVE_SCALE_APPLIED; } else have_instancing = true; diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index fa7a242e77b..f57e16471a1 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -248,24 +248,27 @@ void OSLShaderManager::shading_system_free() bool OSLShaderManager::osl_compile(const string& inputfile, const string& outputfile) { -#if OSL_LIBRARY_VERSION_CODE >= 10500 - vector<string_view> options; -#else - vector<string> options; +#if OSL_LIBRARY_VERSION_CODE < 10500 + typedef string string_view; #endif + + vector<string_view> options; string stdosl_path; + string shader_path = path_get("shader"); /* specify output file name */ options.push_back("-o"); options.push_back(outputfile); /* specify standard include path */ - options.push_back("-I" + path_get("shader")); + options.push_back("-I"); + options.push_back(shader_path); + stdosl_path = path_get("shader/stdosl.h"); /* compile */ OSL::OSLCompiler *compiler = OSL::OSLCompiler::create(); - bool ok = compiler->compile(inputfile, options, stdosl_path); + bool ok = compiler->compile(string_view(inputfile), options, string_view(stdosl_path)); delete compiler; return ok; diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 701ba313c79..7b3301e8abe 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -10,10 +10,8 @@ set(INC_SYS set(SRC util_cache.cpp - util_cuda.cpp util_dynlib.cpp util_md5.cpp - util_opencl.cpp util_path.cpp util_string.cpp util_simd.cpp @@ -34,7 +32,6 @@ set(SRC_HEADERS util_args.h util_boundbox.h util_cache.h - util_cuda.h util_debug.h util_dynlib.h util_foreach.h @@ -46,7 +43,6 @@ set(SRC_HEADERS util_map.h util_math.h util_md5.h - util_opencl.h util_opengl.h util_optimization.h util_param.h diff --git a/intern/cycles/util/util_cache.h b/intern/cycles/util/util_cache.h index 417f4a869b6..bfb2877a22b 100644 --- a/intern/cycles/util/util_cache.h +++ b/intern/cycles/util/util_cache.h @@ -25,7 +25,7 @@ * again into the appropriate data structures. * * This way we do not need to accurately track changes, compare dates and - * invalidate cache entries, at the cost of exta computation. If everything + * invalidate cache entries, at the cost of extra computation. If everything * is stored in a global cache, computations can perhaps even be shared between * different scenes where it may be hard to detect duplicate work. */ @@ -96,54 +96,70 @@ public: buffers.push_back(buffer); } - template<typename T> void read(array<T>& data) + template<typename T> bool read(array<T>& data) { size_t size; if(!fread(&size, sizeof(size), 1, f)) { fprintf(stderr, "Failed to read vector size from cache.\n"); - return; + return false; } if(!size) - return; + return false; data.resize(size/sizeof(T)); if(!fread(&data[0], size, 1, f)) { fprintf(stderr, "Failed to read vector data from cache (%lu).\n", (unsigned long)size); - return; + return false; } + return true; } - void read(int& data) + bool read(int& data) { size_t size; - if(!fread(&size, sizeof(size), 1, f)) + if(!fread(&size, sizeof(size), 1, f)) { fprintf(stderr, "Failed to read int size from cache.\n"); - if(!fread(&data, sizeof(data), 1, f)) + return false; + } + if(!fread(&data, sizeof(data), 1, f)) { fprintf(stderr, "Failed to read int from cache.\n"); + return false; + } + return true; } - void read(float& data) + bool read(float& data) { size_t size; - if(!fread(&size, sizeof(size), 1, f)) + if(!fread(&size, sizeof(size), 1, f)) { fprintf(stderr, "Failed to read float size from cache.\n"); - if(!fread(&data, sizeof(data), 1, f)) + return false; + } + if(!fread(&data, sizeof(data), 1, f)) { fprintf(stderr, "Failed to read float from cache.\n"); + return false; + } + return true; } - void read(size_t& data) + bool read(size_t& data) { size_t size; - if(!fread(&size, sizeof(size), 1, f)) + if(!fread(&size, sizeof(size), 1, f)) { fprintf(stderr, "Failed to read size_t size from cache.\n"); - if(!fread(&data, sizeof(data), 1, f)) + return false; + } + if(!fread(&data, sizeof(data), 1, f)) { fprintf(stderr, "Failed to read size_t from cache.\n"); + return false; + } + return true; } }; diff --git a/intern/cycles/util/util_cuda.cpp b/intern/cycles/util/util_cuda.cpp deleted file mode 100644 index 50690434f03..00000000000 --- a/intern/cycles/util/util_cuda.cpp +++ /dev/null @@ -1,503 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License - */ - -#include <iostream> - -#include <stdlib.h> -#include <stdio.h> - -#include "util_cuda.h" -#include "util_debug.h" -#include "util_dynlib.h" -#include "util_path.h" -#include "util_string.h" - -#ifdef _WIN32 -#define popen _popen -#define pclose _pclose -#endif - -/* function defininitions */ - -tcuInit *cuInit; -tcuDriverGetVersion *cuDriverGetVersion; -tcuDeviceGet *cuDeviceGet; -tcuDeviceGetCount *cuDeviceGetCount; -tcuDeviceGetName *cuDeviceGetName; -tcuDeviceComputeCapability *cuDeviceComputeCapability; -tcuDeviceTotalMem *cuDeviceTotalMem; -tcuDeviceGetProperties *cuDeviceGetProperties; -tcuDeviceGetAttribute *cuDeviceGetAttribute; -tcuCtxCreate *cuCtxCreate; -tcuCtxDestroy *cuCtxDestroy; -tcuCtxAttach *cuCtxAttach; -tcuCtxDetach *cuCtxDetach; -tcuCtxPushCurrent *cuCtxPushCurrent; -tcuCtxPopCurrent *cuCtxPopCurrent; -tcuCtxGetDevice *cuCtxGetDevice; -tcuCtxSynchronize *cuCtxSynchronize; -tcuModuleLoad *cuModuleLoad; -tcuModuleLoadData *cuModuleLoadData; -tcuModuleLoadDataEx *cuModuleLoadDataEx; -tcuModuleLoadFatBinary *cuModuleLoadFatBinary; -tcuModuleUnload *cuModuleUnload; -tcuModuleGetFunction *cuModuleGetFunction; -tcuModuleGetGlobal *cuModuleGetGlobal; -tcuModuleGetTexRef *cuModuleGetTexRef; -tcuModuleGetSurfRef *cuModuleGetSurfRef; -tcuMemGetInfo *cuMemGetInfo; -tcuMemAlloc *cuMemAlloc; -tcuMemAllocPitch *cuMemAllocPitch; -tcuMemFree *cuMemFree; -tcuMemGetAddressRange *cuMemGetAddressRange; -tcuMemAllocHost *cuMemAllocHost; -tcuMemFreeHost *cuMemFreeHost; -tcuMemHostAlloc *cuMemHostAlloc; -tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer; -tcuMemHostGetFlags *cuMemHostGetFlags; -tcuMemcpyHtoD *cuMemcpyHtoD; -tcuMemcpyDtoH *cuMemcpyDtoH; -tcuMemcpyDtoD *cuMemcpyDtoD; -tcuMemcpyDtoA *cuMemcpyDtoA; -tcuMemcpyAtoD *cuMemcpyAtoD; -tcuMemcpyHtoA *cuMemcpyHtoA; -tcuMemcpyAtoH *cuMemcpyAtoH; -tcuMemcpyAtoA *cuMemcpyAtoA; -tcuMemcpy2D *cuMemcpy2D; -tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned; -tcuMemcpy3D *cuMemcpy3D; -tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync; -tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync; -tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync; -tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync; -tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync; -tcuMemcpy2DAsync *cuMemcpy2DAsync; -tcuMemcpy3DAsync *cuMemcpy3DAsync; -tcuMemsetD8 *cuMemsetD8; -tcuMemsetD16 *cuMemsetD16; -tcuMemsetD32 *cuMemsetD32; -tcuMemsetD2D8 *cuMemsetD2D8; -tcuMemsetD2D16 *cuMemsetD2D16; -tcuMemsetD2D32 *cuMemsetD2D32; -tcuFuncSetBlockShape *cuFuncSetBlockShape; -tcuFuncSetSharedSize *cuFuncSetSharedSize; -tcuFuncGetAttribute *cuFuncGetAttribute; -tcuFuncSetCacheConfig *cuFuncSetCacheConfig; -tcuArrayCreate *cuArrayCreate; -tcuArrayGetDescriptor *cuArrayGetDescriptor; -tcuArrayDestroy *cuArrayDestroy; -tcuArray3DCreate *cuArray3DCreate; -tcuArray3DGetDescriptor *cuArray3DGetDescriptor; -tcuTexRefCreate *cuTexRefCreate; -tcuTexRefDestroy *cuTexRefDestroy; -tcuTexRefSetArray *cuTexRefSetArray; -tcuTexRefSetAddress *cuTexRefSetAddress; -tcuTexRefSetAddress2D *cuTexRefSetAddress2D; -tcuTexRefSetFormat *cuTexRefSetFormat; -tcuTexRefSetAddressMode *cuTexRefSetAddressMode; -tcuTexRefSetFilterMode *cuTexRefSetFilterMode; -tcuTexRefSetFlags *cuTexRefSetFlags; -tcuTexRefGetAddress *cuTexRefGetAddress; -tcuTexRefGetArray *cuTexRefGetArray; -tcuTexRefGetAddressMode *cuTexRefGetAddressMode; -tcuTexRefGetFilterMode *cuTexRefGetFilterMode; -tcuTexRefGetFormat *cuTexRefGetFormat; -tcuTexRefGetFlags *cuTexRefGetFlags; -tcuSurfRefSetArray *cuSurfRefSetArray; -tcuSurfRefGetArray *cuSurfRefGetArray; -tcuParamSetSize *cuParamSetSize; -tcuParamSeti *cuParamSeti; -tcuParamSetf *cuParamSetf; -tcuParamSetv *cuParamSetv; -tcuParamSetTexRef *cuParamSetTexRef; -tcuLaunch *cuLaunch; -tcuLaunchGrid *cuLaunchGrid; -tcuLaunchGridAsync *cuLaunchGridAsync; -tcuEventCreate *cuEventCreate; -tcuEventRecord *cuEventRecord; -tcuEventQuery *cuEventQuery; -tcuEventSynchronize *cuEventSynchronize; -tcuEventDestroy *cuEventDestroy; -tcuEventElapsedTime *cuEventElapsedTime; -tcuStreamCreate *cuStreamCreate; -tcuStreamQuery *cuStreamQuery; -tcuStreamSynchronize *cuStreamSynchronize; -tcuStreamDestroy *cuStreamDestroy; -tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource; -tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray; -tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer; -tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags; -tcuGraphicsMapResources *cuGraphicsMapResources; -tcuGraphicsUnmapResources *cuGraphicsUnmapResources; -tcuGetExportTable *cuGetExportTable; -tcuCtxSetLimit *cuCtxSetLimit; -tcuCtxGetLimit *cuCtxGetLimit; -tcuGLCtxCreate *cuGLCtxCreate; -tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer; -tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage; -tcuCtxSetCurrent *cuCtxSetCurrent; -tcuLaunchKernel *cuLaunchKernel; - -CCL_NAMESPACE_BEGIN - -/* utility macros */ -#define CUDA_LIBRARY_FIND_CHECKED(name) \ - name = (t##name*)dynamic_library_find(lib, #name); - -#define CUDA_LIBRARY_FIND(name) \ - name = (t##name*)dynamic_library_find(lib, #name); \ - assert(name); - -#define CUDA_LIBRARY_FIND_V2(name) \ - name = (t##name*)dynamic_library_find(lib, #name "_v2"); \ - assert(name); - -/* initialization function */ - -bool cuLibraryInit() -{ - static bool initialized = false; - static bool result = false; - - if(initialized) - return result; - - initialized = true; - - /* library paths */ -#ifdef _WIN32 - /* expected in c:/windows/system or similar, no path needed */ - const char *path = "nvcuda.dll"; - const char *alternative_path = NULL; -#elif defined(__APPLE__) - /* default installation path */ - const char *path = "/usr/local/cuda/lib/libcuda.dylib"; - const char *alternative_path = NULL; -#else - const char *path = "libcuda.so"; - const char *alternative_path = "libcuda.so.1"; -#endif - - /* load library */ - DynamicLibrary *lib = dynamic_library_open(path); - - if(lib == NULL && alternative_path) - lib = dynamic_library_open(alternative_path); - - if(lib == NULL) - return false; - - /* detect driver version */ - int driver_version = 1000; - - CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion); - if(cuDriverGetVersion) - cuDriverGetVersion(&driver_version); - - /* we require version 4.0 */ - if(driver_version < 4000) - return false; - - /* fetch all function pointers */ - CUDA_LIBRARY_FIND(cuInit); - CUDA_LIBRARY_FIND(cuDeviceGet); - CUDA_LIBRARY_FIND(cuDeviceGetCount); - CUDA_LIBRARY_FIND(cuDeviceGetName); - CUDA_LIBRARY_FIND(cuDeviceComputeCapability); - CUDA_LIBRARY_FIND(cuDeviceTotalMem); - CUDA_LIBRARY_FIND(cuDeviceGetProperties); - CUDA_LIBRARY_FIND(cuDeviceGetAttribute); - CUDA_LIBRARY_FIND(cuCtxCreate); - CUDA_LIBRARY_FIND(cuCtxDestroy); - CUDA_LIBRARY_FIND(cuCtxAttach); - CUDA_LIBRARY_FIND(cuCtxDetach); - CUDA_LIBRARY_FIND(cuCtxPushCurrent); - CUDA_LIBRARY_FIND(cuCtxPopCurrent); - CUDA_LIBRARY_FIND(cuCtxGetDevice); - CUDA_LIBRARY_FIND(cuCtxSynchronize); - CUDA_LIBRARY_FIND(cuModuleLoad); - CUDA_LIBRARY_FIND(cuModuleLoadData); - CUDA_LIBRARY_FIND(cuModuleUnload); - CUDA_LIBRARY_FIND(cuModuleGetFunction); - CUDA_LIBRARY_FIND(cuModuleGetGlobal); - CUDA_LIBRARY_FIND(cuModuleGetTexRef); - CUDA_LIBRARY_FIND(cuMemGetInfo); - CUDA_LIBRARY_FIND(cuMemAlloc); - CUDA_LIBRARY_FIND(cuMemAllocPitch); - CUDA_LIBRARY_FIND(cuMemFree); - CUDA_LIBRARY_FIND(cuMemGetAddressRange); - CUDA_LIBRARY_FIND(cuMemAllocHost); - CUDA_LIBRARY_FIND(cuMemFreeHost); - CUDA_LIBRARY_FIND(cuMemHostAlloc); - CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer); - CUDA_LIBRARY_FIND(cuMemcpyHtoD); - CUDA_LIBRARY_FIND(cuMemcpyDtoH); - CUDA_LIBRARY_FIND(cuMemcpyDtoD); - CUDA_LIBRARY_FIND(cuMemcpyDtoA); - CUDA_LIBRARY_FIND(cuMemcpyAtoD); - CUDA_LIBRARY_FIND(cuMemcpyHtoA); - CUDA_LIBRARY_FIND(cuMemcpyAtoH); - CUDA_LIBRARY_FIND(cuMemcpyAtoA); - CUDA_LIBRARY_FIND(cuMemcpy2D); - CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned); - CUDA_LIBRARY_FIND(cuMemcpy3D); - CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync); - CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync); - CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync); - CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync); - CUDA_LIBRARY_FIND(cuMemcpy2DAsync); - CUDA_LIBRARY_FIND(cuMemcpy3DAsync); - CUDA_LIBRARY_FIND(cuMemsetD8); - CUDA_LIBRARY_FIND(cuMemsetD16); - CUDA_LIBRARY_FIND(cuMemsetD32); - CUDA_LIBRARY_FIND(cuMemsetD2D8); - CUDA_LIBRARY_FIND(cuMemsetD2D16); - CUDA_LIBRARY_FIND(cuMemsetD2D32); - CUDA_LIBRARY_FIND(cuFuncSetBlockShape); - CUDA_LIBRARY_FIND(cuFuncSetSharedSize); - CUDA_LIBRARY_FIND(cuFuncGetAttribute); - CUDA_LIBRARY_FIND(cuArrayCreate); - CUDA_LIBRARY_FIND(cuArrayGetDescriptor); - CUDA_LIBRARY_FIND(cuArrayDestroy); - CUDA_LIBRARY_FIND(cuArray3DCreate); - CUDA_LIBRARY_FIND(cuArray3DGetDescriptor); - CUDA_LIBRARY_FIND(cuTexRefCreate); - CUDA_LIBRARY_FIND(cuTexRefDestroy); - CUDA_LIBRARY_FIND(cuTexRefSetArray); - CUDA_LIBRARY_FIND(cuTexRefSetAddress); - CUDA_LIBRARY_FIND(cuTexRefSetAddress2D); - CUDA_LIBRARY_FIND(cuTexRefSetFormat); - CUDA_LIBRARY_FIND(cuTexRefSetAddressMode); - CUDA_LIBRARY_FIND(cuTexRefSetFilterMode); - CUDA_LIBRARY_FIND(cuTexRefSetFlags); - CUDA_LIBRARY_FIND(cuTexRefGetAddress); - CUDA_LIBRARY_FIND(cuTexRefGetArray); - CUDA_LIBRARY_FIND(cuTexRefGetAddressMode); - CUDA_LIBRARY_FIND(cuTexRefGetFilterMode); - CUDA_LIBRARY_FIND(cuTexRefGetFormat); - CUDA_LIBRARY_FIND(cuTexRefGetFlags); - CUDA_LIBRARY_FIND(cuParamSetSize); - CUDA_LIBRARY_FIND(cuParamSeti); - CUDA_LIBRARY_FIND(cuParamSetf); - CUDA_LIBRARY_FIND(cuParamSetv); - CUDA_LIBRARY_FIND(cuParamSetTexRef); - CUDA_LIBRARY_FIND(cuLaunch); - CUDA_LIBRARY_FIND(cuLaunchGrid); - CUDA_LIBRARY_FIND(cuLaunchGridAsync); - CUDA_LIBRARY_FIND(cuEventCreate); - CUDA_LIBRARY_FIND(cuEventRecord); - CUDA_LIBRARY_FIND(cuEventQuery); - CUDA_LIBRARY_FIND(cuEventSynchronize); - CUDA_LIBRARY_FIND(cuEventDestroy); - CUDA_LIBRARY_FIND(cuEventElapsedTime); - CUDA_LIBRARY_FIND(cuStreamCreate); - CUDA_LIBRARY_FIND(cuStreamQuery); - CUDA_LIBRARY_FIND(cuStreamSynchronize); - CUDA_LIBRARY_FIND(cuStreamDestroy); - - /* cuda 2.1 */ - CUDA_LIBRARY_FIND(cuModuleLoadDataEx); - CUDA_LIBRARY_FIND(cuModuleLoadFatBinary); - CUDA_LIBRARY_FIND(cuGLCtxCreate); - CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer); - CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage); - - /* cuda 2.3 */ - CUDA_LIBRARY_FIND(cuMemHostGetFlags); - CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer); - CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage); - - /* cuda 3.0 */ - CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync); - CUDA_LIBRARY_FIND(cuFuncSetCacheConfig); - CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource); - CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray); - CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer); - CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags); - CUDA_LIBRARY_FIND(cuGraphicsMapResources); - CUDA_LIBRARY_FIND(cuGraphicsUnmapResources); - CUDA_LIBRARY_FIND(cuGetExportTable); - - /* cuda 3.1 */ - CUDA_LIBRARY_FIND(cuModuleGetSurfRef); - CUDA_LIBRARY_FIND(cuSurfRefSetArray); - CUDA_LIBRARY_FIND(cuSurfRefGetArray); - CUDA_LIBRARY_FIND(cuCtxSetLimit); - CUDA_LIBRARY_FIND(cuCtxGetLimit); - - /* functions which changed 3.1 -> 3.2 for 64 bit stuff, the cuda library - * has both the old ones for compatibility and new ones with _v2 postfix, - * we load the _v2 ones here. */ - CUDA_LIBRARY_FIND_V2(cuDeviceTotalMem); - CUDA_LIBRARY_FIND_V2(cuCtxCreate); - CUDA_LIBRARY_FIND_V2(cuModuleGetGlobal); - CUDA_LIBRARY_FIND_V2(cuMemGetInfo); - CUDA_LIBRARY_FIND_V2(cuMemAlloc); - CUDA_LIBRARY_FIND_V2(cuMemAllocPitch); - CUDA_LIBRARY_FIND_V2(cuMemFree); - CUDA_LIBRARY_FIND_V2(cuMemGetAddressRange); - CUDA_LIBRARY_FIND_V2(cuMemAllocHost); - CUDA_LIBRARY_FIND_V2(cuMemHostGetDevicePointer); - CUDA_LIBRARY_FIND_V2(cuMemcpyHtoD); - CUDA_LIBRARY_FIND_V2(cuMemcpyDtoH); - CUDA_LIBRARY_FIND_V2(cuMemcpyDtoD); - CUDA_LIBRARY_FIND_V2(cuMemcpyDtoA); - CUDA_LIBRARY_FIND_V2(cuMemcpyAtoD); - CUDA_LIBRARY_FIND_V2(cuMemcpyHtoA); - CUDA_LIBRARY_FIND_V2(cuMemcpyAtoH); - CUDA_LIBRARY_FIND_V2(cuMemcpyAtoA); - CUDA_LIBRARY_FIND_V2(cuMemcpyHtoAAsync); - CUDA_LIBRARY_FIND_V2(cuMemcpyAtoHAsync); - CUDA_LIBRARY_FIND_V2(cuMemcpy2D); - CUDA_LIBRARY_FIND_V2(cuMemcpy2DUnaligned); - CUDA_LIBRARY_FIND_V2(cuMemcpy3D); - CUDA_LIBRARY_FIND_V2(cuMemcpyHtoDAsync); - CUDA_LIBRARY_FIND_V2(cuMemcpyDtoHAsync); - CUDA_LIBRARY_FIND_V2(cuMemcpyDtoDAsync); - CUDA_LIBRARY_FIND_V2(cuMemcpy2DAsync); - CUDA_LIBRARY_FIND_V2(cuMemcpy3DAsync); - CUDA_LIBRARY_FIND_V2(cuMemsetD8); - CUDA_LIBRARY_FIND_V2(cuMemsetD16); - CUDA_LIBRARY_FIND_V2(cuMemsetD32); - CUDA_LIBRARY_FIND_V2(cuMemsetD2D8); - CUDA_LIBRARY_FIND_V2(cuMemsetD2D16); - CUDA_LIBRARY_FIND_V2(cuMemsetD2D32); - CUDA_LIBRARY_FIND_V2(cuArrayCreate); - CUDA_LIBRARY_FIND_V2(cuArrayGetDescriptor); - CUDA_LIBRARY_FIND_V2(cuArray3DCreate); - CUDA_LIBRARY_FIND_V2(cuArray3DGetDescriptor); - CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress); - CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress2D); - CUDA_LIBRARY_FIND_V2(cuTexRefGetAddress); - CUDA_LIBRARY_FIND_V2(cuGraphicsResourceGetMappedPointer); - CUDA_LIBRARY_FIND_V2(cuGLCtxCreate); - - /* cuda 4.0 */ - CUDA_LIBRARY_FIND(cuCtxSetCurrent); - CUDA_LIBRARY_FIND(cuLaunchKernel); - - if(cuHavePrecompiledKernels()) - result = true; -#ifndef _WIN32 - else if(cuCompilerPath() != "") - result = true; -#endif - - return result; -} - -bool cuHavePrecompiledKernels() -{ - string cubins_path = path_get("lib"); - - return path_exists(cubins_path); -} - -string cuCompilerPath() -{ -#ifdef _WIN32 - const char *defaultpaths[] = {"C:/CUDA/bin", NULL}; - const char *executable = "nvcc.exe"; -#else - const char *defaultpaths[] = { - "/Developer/NVIDIA/CUDA-5.0/bin", - "/usr/local/cuda-5.0/bin", - "/usr/local/cuda/bin", - "/Developer/NVIDIA/CUDA-6.0/bin", - "/usr/local/cuda-6.0/bin", - "/Developer/NVIDIA/CUDA-5.5/bin", - "/usr/local/cuda-5.5/bin", - NULL}; - const char *executable = "nvcc"; -#endif - - const char *binpath = getenv("CUDA_BIN_PATH"); - - string nvcc; - - if(binpath) { - nvcc = path_join(binpath, executable); - if(path_exists(nvcc)) - return nvcc; - } - - for(int i = 0; defaultpaths[i]; i++) { - nvcc = path_join(defaultpaths[i], executable); - if(path_exists(nvcc)) - return nvcc; - } - -#ifndef _WIN32 - { - FILE *handle = popen("which nvcc", "r"); - if(handle) { - char buffer[4096] = {0}; - int len = fread(buffer, 1, sizeof(buffer) - 1, handle); - buffer[len] = '\0'; - pclose(handle); - - if(buffer[0]) - return "nvcc"; - } - } -#endif - - return ""; -} - -int cuCompilerVersion() -{ - string path = cuCompilerPath(); - if(path == "") - return 0; - - /* get --version output */ - FILE *pipe = popen((path + " --version").c_str(), "r"); - if(!pipe) { - fprintf(stderr, "CUDA: failed to run compiler to retrieve version"); - return 0; - } - - char buf[128]; - string output = ""; - - while(!feof(pipe)) - if(fgets(buf, 128, pipe) != NULL) - output += buf; - - pclose(pipe); - - /* parse version number */ - string marker = "Cuda compilation tools, release "; - size_t offset = output.find(marker); - if(offset == string::npos) { - fprintf(stderr, "CUDA: failed to find version number in:\n\n%s\n", output.c_str()); - return 0; - } - - string versionstr = output.substr(offset + marker.size(), string::npos); - int major, minor; - - if(sscanf(versionstr.c_str(), "%d.%d", &major, &minor) < 2) { - fprintf(stderr, "CUDA: failed to parse version number from:\n\n%s\n", output.c_str()); - return 0; - } - - return 10*major + minor; -} - -CCL_NAMESPACE_END - diff --git a/intern/cycles/util/util_cuda.h b/intern/cycles/util/util_cuda.h deleted file mode 100644 index a633fb21eca..00000000000 --- a/intern/cycles/util/util_cuda.h +++ /dev/null @@ -1,636 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License - */ - -#ifndef __UTIL_CUDA_H__ -#define __UTIL_CUDA_H__ - -#include <stdlib.h> -#include "util_opengl.h" -#include "util_string.h" - -CCL_NAMESPACE_BEGIN - -/* CUDA is linked in dynamically at runtime, so we can start the application - * without requiring a CUDA installation. Code adapted from the example - * matrixMulDynlinkJIT in the CUDA SDK. */ - -bool cuLibraryInit(); -bool cuHavePrecompiledKernels(); -string cuCompilerPath(); -int cuCompilerVersion(); - -CCL_NAMESPACE_END - -/* defines, structs, enums */ - -#define CUDA_VERSION 3020 - -#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined(__LP64__) -typedef unsigned long long CUdeviceptr; -#else -typedef unsigned int CUdeviceptr; -#endif - -typedef int CUdevice; -typedef struct CUctx_st *CUcontext; -typedef struct CUmod_st *CUmodule; -typedef struct CUfunc_st *CUfunction; -typedef struct CUarray_st *CUarray; -typedef struct CUtexref_st *CUtexref; -typedef struct CUsurfref_st *CUsurfref; -typedef struct CUevent_st *CUevent; -typedef struct CUstream_st *CUstream; -typedef struct CUgraphicsResource_st *CUgraphicsResource; - -typedef struct CUuuid_st { - char bytes[16]; -} CUuuid; - -typedef enum CUctx_flags_enum { - CU_CTX_SCHED_AUTO = 0, - CU_CTX_SCHED_SPIN = 1, - CU_CTX_SCHED_YIELD = 2, - CU_CTX_SCHED_MASK = 0x3, - CU_CTX_BLOCKING_SYNC = 4, - CU_CTX_MAP_HOST = 8, - CU_CTX_LMEM_RESIZE_TO_MAX = 16, - CU_CTX_FLAGS_MASK = 0x1f -} CUctx_flags; - -typedef enum CUevent_flags_enum { - CU_EVENT_DEFAULT = 0, - CU_EVENT_BLOCKING_SYNC = 1, - CU_EVENT_DISABLE_TIMING = 2 -} CUevent_flags; - -typedef enum CUarray_format_enum { - CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, - CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, - CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, - CU_AD_FORMAT_SIGNED_INT8 = 0x08, - CU_AD_FORMAT_SIGNED_INT16 = 0x09, - CU_AD_FORMAT_SIGNED_INT32 = 0x0a, - CU_AD_FORMAT_HALF = 0x10, - CU_AD_FORMAT_FLOAT = 0x20 -} CUarray_format; - -typedef enum CUaddress_mode_enum { - CU_TR_ADDRESS_MODE_WRAP = 0, - CU_TR_ADDRESS_MODE_CLAMP = 1, - CU_TR_ADDRESS_MODE_MIRROR = 2, - CU_TR_ADDRESS_MODE_BORDER = 3 -} CUaddress_mode; - -typedef enum CUfilter_mode_enum { - CU_TR_FILTER_MODE_POINT = 0, - CU_TR_FILTER_MODE_LINEAR = 1 -} CUfilter_mode; - -typedef enum CUdevice_attribute_enum { - CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, - CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, - CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, - CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, - CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, - CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, - CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, - CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, - CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, - CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, - CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, - CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, - CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, - CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, - CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, - CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, - CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, - CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, - CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, - CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, - CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, - CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, - CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, - CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, - CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, - CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, - CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, - CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35 -} CUdevice_attribute; - -typedef struct CUdevprop_st { - int maxThreadsPerBlock; - int maxThreadsDim[3]; - int maxGridSize[3]; - int sharedMemPerBlock; - int totalConstantMemory; - int SIMDWidth; - int memPitch; - int regsPerBlock; - int clockRate; - int textureAlign; -} CUdevprop; - -typedef enum CUfunction_attribute_enum { - CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, - CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1, - CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2, - CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3, - CU_FUNC_ATTRIBUTE_NUM_REGS = 4, - CU_FUNC_ATTRIBUTE_PTX_VERSION = 5, - CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6, - CU_FUNC_ATTRIBUTE_MAX -} CUfunction_attribute; - -typedef enum CUfunc_cache_enum { - CU_FUNC_CACHE_PREFER_NONE = 0x00, - CU_FUNC_CACHE_PREFER_SHARED = 0x01, - CU_FUNC_CACHE_PREFER_L1 = 0x02 -} CUfunc_cache; - -typedef enum CUmemorytype_enum { - CU_MEMORYTYPE_HOST = 0x01, - CU_MEMORYTYPE_DEVICE = 0x02, - CU_MEMORYTYPE_ARRAY = 0x03 -} CUmemorytype; - -typedef enum CUcomputemode_enum { - CU_COMPUTEMODE_DEFAULT = 0, - CU_COMPUTEMODE_EXCLUSIVE = 1, - CU_COMPUTEMODE_PROHIBITED = 2 -} CUcomputemode; - -typedef enum CUjit_option_enum -{ - CU_JIT_MAX_REGISTERS = 0, - CU_JIT_THREADS_PER_BLOCK, - CU_JIT_WALL_TIME, - CU_JIT_INFO_LOG_BUFFER, - CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, - CU_JIT_ERROR_LOG_BUFFER, - CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, - CU_JIT_OPTIMIZATION_LEVEL, - CU_JIT_TARGET_FROM_CUCONTEXT, - CU_JIT_TARGET, - CU_JIT_FALLBACK_STRATEGY - -} CUjit_option; - -typedef enum CUjit_target_enum -{ - CU_TARGET_COMPUTE_10 = 0, - CU_TARGET_COMPUTE_11, - CU_TARGET_COMPUTE_12, - CU_TARGET_COMPUTE_13, - CU_TARGET_COMPUTE_20, - CU_TARGET_COMPUTE_21, - CU_TARGET_COMPUTE_30, - CU_TARGET_COMPUTE_35, - CU_TARGET_COMPUTE_50 -} CUjit_target; - -typedef enum CUjit_fallback_enum -{ - CU_PREFER_PTX = 0, - CU_PREFER_BINARY - -} CUjit_fallback; - -typedef enum CUgraphicsRegisterFlags_enum { - CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00 -} CUgraphicsRegisterFlags; - -typedef enum CUgraphicsMapResourceFlags_enum { - CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00, - CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01, - CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02 -} CUgraphicsMapResourceFlags; - -typedef enum CUarray_cubemap_face_enum { - CU_CUBEMAP_FACE_POSITIVE_X = 0x00, - CU_CUBEMAP_FACE_NEGATIVE_X = 0x01, - CU_CUBEMAP_FACE_POSITIVE_Y = 0x02, - CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03, - CU_CUBEMAP_FACE_POSITIVE_Z = 0x04, - CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05 -} CUarray_cubemap_face; - -typedef enum CUlimit_enum { - CU_LIMIT_STACK_SIZE = 0x00, - CU_LIMIT_PRINTF_FIFO_SIZE = 0x01, - CU_LIMIT_MALLOC_HEAP_SIZE = 0x02 -} CUlimit; - -typedef enum cudaError_enum { - CUDA_SUCCESS = 0, - CUDA_ERROR_INVALID_VALUE = 1, - CUDA_ERROR_OUT_OF_MEMORY = 2, - CUDA_ERROR_NOT_INITIALIZED = 3, - CUDA_ERROR_DEINITIALIZED = 4, - CUDA_ERROR_NO_DEVICE = 100, - CUDA_ERROR_INVALID_DEVICE = 101, - CUDA_ERROR_INVALID_IMAGE = 200, - CUDA_ERROR_INVALID_CONTEXT = 201, - CUDA_ERROR_MAP_FAILED = 205, - CUDA_ERROR_UNMAP_FAILED = 206, - CUDA_ERROR_ARRAY_IS_MAPPED = 207, - CUDA_ERROR_ALREADY_MAPPED = 208, - CUDA_ERROR_NO_BINARY_FOR_GPU = 209, - CUDA_ERROR_ALREADY_ACQUIRED = 210, - CUDA_ERROR_NOT_MAPPED = 211, - CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, - CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, - CUDA_ERROR_ECC_UNCORRECTABLE = 214, - CUDA_ERROR_UNSUPPORTED_LIMIT = 215, - CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216, - CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217, - CUDA_ERROR_INVALID_PTX = 218, - CUDA_ERROR_INVALID_SOURCE = 300, - CUDA_ERROR_FILE_NOT_FOUND = 301, - CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, - CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, - CUDA_ERROR_OPERATING_SYSTEM = 304, - CUDA_ERROR_INVALID_HANDLE = 400, - CUDA_ERROR_NOT_FOUND = 500, - CUDA_ERROR_NOT_READY = 600, - CUDA_ERROR_ILLEGAL_ADDRESS = 700, - CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, - CUDA_ERROR_LAUNCH_TIMEOUT = 702, - CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, - CUDA_ERROR_HARDWARE_STACK_ERROR = 714, - CUDA_ERROR_ILLEGAL_INSTRUCTION = 715, - CUDA_ERROR_MISALIGNED_ADDRESS = 716, - CUDA_ERROR_INVALID_ADDRESS_SPACE = 717, - CUDA_ERROR_INVALID_PC = 718, - CUDA_ERROR_LAUNCH_FAILED = 719, - CUDA_ERROR_NOT_PERMITTED = 800, - CUDA_ERROR_NOT_SUPPORTED = 801, - CUDA_ERROR_UNKNOWN = 999 -} CUresult; - -#define CU_MEMHOSTALLOC_PORTABLE 0x01 -#define CU_MEMHOSTALLOC_DEVICEMAP 0x02 -#define CU_MEMHOSTALLOC_WRITECOMBINED 0x04 - -typedef struct CUDA_MEMCPY2D_st { - size_t srcXInBytes; - size_t srcY; - - CUmemorytype srcMemoryType; - const void *srcHost; - CUdeviceptr srcDevice; - CUarray srcArray; - size_t srcPitch; - - size_t dstXInBytes; - size_t dstY; - - CUmemorytype dstMemoryType; - void *dstHost; - CUdeviceptr dstDevice; - CUarray dstArray; - size_t dstPitch; - - size_t WidthInBytes; - size_t Height; -} CUDA_MEMCPY2D; - -typedef struct CUDA_MEMCPY3D_st { - size_t srcXInBytes; - size_t srcY; - size_t srcZ; - size_t srcLOD; - CUmemorytype srcMemoryType; - const void *srcHost; - CUdeviceptr srcDevice; - CUarray srcArray; - void *reserved0; - size_t srcPitch; - size_t srcHeight; - - size_t dstXInBytes; - size_t dstY; - size_t dstZ; - size_t dstLOD; - CUmemorytype dstMemoryType; - void *dstHost; - CUdeviceptr dstDevice; - CUarray dstArray; - void *reserved1; - size_t dstPitch; - size_t dstHeight; - - size_t WidthInBytes; - size_t Height; - size_t Depth; -} CUDA_MEMCPY3D; - -typedef struct CUDA_ARRAY_DESCRIPTOR_st -{ - size_t Width; - size_t Height; - - CUarray_format Format; - unsigned int NumChannels; -} CUDA_ARRAY_DESCRIPTOR; - -typedef struct CUDA_ARRAY3D_DESCRIPTOR_st -{ - size_t Width; - size_t Height; - size_t Depth; - - CUarray_format Format; - unsigned int NumChannels; - unsigned int Flags; -} CUDA_ARRAY3D_DESCRIPTOR; - -#define CUDA_ARRAY3D_2DARRAY 0x01 -#define CUDA_ARRAY3D_SURFACE_LDST 0x02 -#define CU_TRSA_OVERRIDE_FORMAT 0x01 -#define CU_TRSF_READ_AS_INTEGER 0x01 -#define CU_TRSF_NORMALIZED_COORDINATES 0x02 -#define CU_TRSF_SRGB 0x10 -#define CU_PARAM_TR_DEFAULT -1 - -#ifdef _WIN32 -#define CUDAAPI __stdcall -#else -#define CUDAAPI -#endif - -/* function types */ - -typedef CUresult CUDAAPI tcuInit(unsigned int Flags); -typedef CUresult CUDAAPI tcuDriverGetVersion(int *driverVersion); -typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal); -typedef CUresult CUDAAPI tcuDeviceGetCount(int *count); -typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev); -typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev); -typedef CUresult CUDAAPI tcuDeviceTotalMem(size_t *bytes, CUdevice dev); -typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop *prop, CUdevice dev); -typedef CUresult CUDAAPI tcuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev); -typedef CUresult CUDAAPI tcuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev); -typedef CUresult CUDAAPI tcuCtxDestroy(CUcontext ctx); -typedef CUresult CUDAAPI tcuCtxAttach(CUcontext *pctx, unsigned int flags); -typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx); -typedef CUresult CUDAAPI tcuCtxPushCurrent(CUcontext ctx ); -typedef CUresult CUDAAPI tcuCtxPopCurrent(CUcontext *pctx); -typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice *device); -typedef CUresult CUDAAPI tcuCtxSynchronize(void); -typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value); -typedef CUresult CUDAAPI tcuCtxGetLimit(size_t *pvalue, CUlimit limit); -typedef CUresult CUDAAPI tcuCtxGetCacheConfig(CUfunc_cache *pconfig); -typedef CUresult CUDAAPI tcuCtxSetCacheConfig(CUfunc_cache config); -typedef CUresult CUDAAPI tcuCtxGetApiVersion(CUcontext ctx, unsigned int *version); -typedef CUresult CUDAAPI tcuModuleLoad(CUmodule *module, const char *fname); -typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule *module, const void *image); -typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues); -typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule *module, const void *fatCubin); -typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod); -typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name); -typedef CUresult CUDAAPI tcuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name); -typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name); -typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name); -typedef CUresult CUDAAPI tcuMemGetInfo(size_t *free, size_t *total); -typedef CUresult CUDAAPI tcuMemAlloc(CUdeviceptr *dptr, size_t bytesize); -typedef CUresult CUDAAPI tcuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes); -typedef CUresult CUDAAPI tcuMemFree(CUdeviceptr dptr); -typedef CUresult CUDAAPI tcuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, CUdeviceptr dptr); -typedef CUresult CUDAAPI tcuMemAllocHost(void **pp, size_t bytesize); -typedef CUresult CUDAAPI tcuMemFreeHost(void *p); -typedef CUresult CUDAAPI tcuMemHostAlloc(void **pp, size_t bytesize, unsigned int Flags); -typedef CUresult CUDAAPI tcuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags); -typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned int *pFlags, void *p); -typedef CUresult CUDAAPI tcuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount); -typedef CUresult CUDAAPI tcuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount); -typedef CUresult CUDAAPI tcuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount); -typedef CUresult CUDAAPI tcuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount); -typedef CUresult CUDAAPI tcuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount); -typedef CUresult CUDAAPI tcuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount); -typedef CUresult CUDAAPI tcuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount); -typedef CUresult CUDAAPI tcuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount); -typedef CUresult CUDAAPI tcuMemcpy2D(const CUDA_MEMCPY2D *pCopy); -typedef CUresult CUDAAPI tcuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy); -typedef CUresult CUDAAPI tcuMemcpy3D(const CUDA_MEMCPY3D *pCopy); -typedef CUresult CUDAAPI tcuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream); -typedef CUresult CUDAAPI tcuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); -typedef CUresult CUDAAPI tcuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); -typedef CUresult CUDAAPI tcuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount, CUstream hStream); -typedef CUresult CUDAAPI tcuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream); -typedef CUresult CUDAAPI tcuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream); -typedef CUresult CUDAAPI tcuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream); -typedef CUresult CUDAAPI tcuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N); -typedef CUresult CUDAAPI tcuMemsetD16(CUdeviceptr dstDevice, unsigned short us, size_t N); -typedef CUresult CUDAAPI tcuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N); -typedef CUresult CUDAAPI tcuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height); -typedef CUresult CUDAAPI tcuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height); -typedef CUresult CUDAAPI tcuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height); -typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream); -typedef CUresult CUDAAPI tcuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream); -typedef CUresult CUDAAPI tcuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream); -typedef CUresult CUDAAPI tcuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream); -typedef CUresult CUDAAPI tcuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream); -typedef CUresult CUDAAPI tcuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream); -typedef CUresult CUDAAPI tcuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR *pAllocateArray); -typedef CUresult CUDAAPI tcuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray); -typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray); -typedef CUresult CUDAAPI tcuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray); -typedef CUresult CUDAAPI tcuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray); -typedef CUresult CUDAAPI tcuStreamCreate(CUstream *phStream, unsigned int Flags); -typedef CUresult CUDAAPI tcuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags); -typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream); -typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream); -typedef CUresult CUDAAPI tcuStreamDestroy(CUstream hStream); -typedef CUresult CUDAAPI tcuEventCreate(CUevent *phEvent, unsigned int Flags); -typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream); -typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent); -typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent); -typedef CUresult CUDAAPI tcuEventDestroy(CUevent hEvent); -typedef CUresult CUDAAPI tcuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd); -typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z); -typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes); -typedef CUresult CUDAAPI tcuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunction hfunc); -typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config); -typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned int numbytes); -typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned int value); -typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value); -typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void *ptr, unsigned int numbytes); -typedef CUresult CUDAAPI tcuLaunch(CUfunction f); -typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height); -typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream); -typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef); -typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags); -typedef CUresult CUDAAPI tcuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes); -typedef CUresult CUDAAPI tcuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch); -typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents); -typedef CUresult CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am); -typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm); -typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags); -typedef CUresult CUDAAPI tcuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef); -typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray *phArray, CUtexref hTexRef); -typedef CUresult CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, int dim); -typedef CUresult CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef); -typedef CUresult CUDAAPI tcuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, CUtexref hTexRef); -typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef); -typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref *pTexRef); -typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef); -typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags); -typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef); -typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource); -typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel); -typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource); -typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned int flags); -typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream); -typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream); -typedef CUresult CUDAAPI tcuGetExportTable(const void **ppExportTable, const CUuuid *pExportTableId); -typedef CUresult CUDAAPI tcuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevice device ); -typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResource, GLuint buffer, unsigned int Flags); -typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResource, GLuint image, GLenum target, unsigned int Flags); -typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx); -typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned gridDimX, unsigned gridDimY, unsigned gridDimZ, unsigned blockDimX, unsigned blockDimY, unsigned blockDimZ, unsigned sharedMemBytes, CUstream hStream, void* kernelParams, void* extra); - -/* function declarations */ - -extern tcuInit *cuInit; -extern tcuDriverGetVersion *cuDriverGetVersion; -extern tcuDeviceGet *cuDeviceGet; -extern tcuDeviceGetCount *cuDeviceGetCount; -extern tcuDeviceGetName *cuDeviceGetName; -extern tcuDeviceComputeCapability *cuDeviceComputeCapability; -extern tcuDeviceTotalMem *cuDeviceTotalMem; -extern tcuDeviceGetProperties *cuDeviceGetProperties; -extern tcuDeviceGetAttribute *cuDeviceGetAttribute; -extern tcuCtxCreate *cuCtxCreate; -extern tcuCtxDestroy *cuCtxDestroy; -extern tcuCtxAttach *cuCtxAttach; -extern tcuCtxDetach *cuCtxDetach; -extern tcuCtxPushCurrent *cuCtxPushCurrent; -extern tcuCtxPopCurrent *cuCtxPopCurrent; -extern tcuCtxGetDevice *cuCtxGetDevice; -extern tcuCtxSynchronize *cuCtxSynchronize; -extern tcuModuleLoad *cuModuleLoad; -extern tcuModuleLoadData *cuModuleLoadData; -extern tcuModuleLoadDataEx *cuModuleLoadDataEx; -extern tcuModuleLoadFatBinary *cuModuleLoadFatBinary; -extern tcuModuleUnload *cuModuleUnload; -extern tcuModuleGetFunction *cuModuleGetFunction; -extern tcuModuleGetGlobal *cuModuleGetGlobal; -extern tcuModuleGetTexRef *cuModuleGetTexRef; -extern tcuModuleGetSurfRef *cuModuleGetSurfRef; -extern tcuMemGetInfo *cuMemGetInfo; -extern tcuMemAlloc *cuMemAlloc; -extern tcuMemAllocPitch *cuMemAllocPitch; -extern tcuMemFree *cuMemFree; -extern tcuMemGetAddressRange *cuMemGetAddressRange; -extern tcuMemAllocHost *cuMemAllocHost; -extern tcuMemFreeHost *cuMemFreeHost; -extern tcuMemHostAlloc *cuMemHostAlloc; -extern tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer; -extern tcuMemHostGetFlags *cuMemHostGetFlags; -extern tcuMemcpyHtoD *cuMemcpyHtoD; -extern tcuMemcpyDtoH *cuMemcpyDtoH; -extern tcuMemcpyDtoD *cuMemcpyDtoD; -extern tcuMemcpyDtoA *cuMemcpyDtoA; -extern tcuMemcpyAtoD *cuMemcpyAtoD; -extern tcuMemcpyHtoA *cuMemcpyHtoA; -extern tcuMemcpyAtoH *cuMemcpyAtoH; -extern tcuMemcpyAtoA *cuMemcpyAtoA; -extern tcuMemcpy2D *cuMemcpy2D; -extern tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned; -extern tcuMemcpy3D *cuMemcpy3D; -extern tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync; -extern tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync; -extern tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync; -extern tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync; -extern tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync; -extern tcuMemcpy2DAsync *cuMemcpy2DAsync; -extern tcuMemcpy3DAsync *cuMemcpy3DAsync; -extern tcuMemsetD8 *cuMemsetD8; -extern tcuMemsetD16 *cuMemsetD16; -extern tcuMemsetD32 *cuMemsetD32; -extern tcuMemsetD2D8 *cuMemsetD2D8; -extern tcuMemsetD2D16 *cuMemsetD2D16; -extern tcuMemsetD2D32 *cuMemsetD2D32; -extern tcuFuncSetBlockShape *cuFuncSetBlockShape; -extern tcuFuncSetSharedSize *cuFuncSetSharedSize; -extern tcuFuncGetAttribute *cuFuncGetAttribute; -extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig; -extern tcuArrayCreate *cuArrayCreate; -extern tcuArrayGetDescriptor *cuArrayGetDescriptor; -extern tcuArrayDestroy *cuArrayDestroy; -extern tcuArray3DCreate *cuArray3DCreate; -extern tcuArray3DGetDescriptor *cuArray3DGetDescriptor; -extern tcuTexRefCreate *cuTexRefCreate; -extern tcuTexRefDestroy *cuTexRefDestroy; -extern tcuTexRefSetArray *cuTexRefSetArray; -extern tcuTexRefSetAddress *cuTexRefSetAddress; -extern tcuTexRefSetAddress2D *cuTexRefSetAddress2D; -extern tcuTexRefSetFormat *cuTexRefSetFormat; -extern tcuTexRefSetAddressMode *cuTexRefSetAddressMode; -extern tcuTexRefSetFilterMode *cuTexRefSetFilterMode; -extern tcuTexRefSetFlags *cuTexRefSetFlags; -extern tcuTexRefGetAddress *cuTexRefGetAddress; -extern tcuTexRefGetArray *cuTexRefGetArray; -extern tcuTexRefGetAddressMode *cuTexRefGetAddressMode; -extern tcuTexRefGetFilterMode *cuTexRefGetFilterMode; -extern tcuTexRefGetFormat *cuTexRefGetFormat; -extern tcuTexRefGetFlags *cuTexRefGetFlags; -extern tcuSurfRefSetArray *cuSurfRefSetArray; -extern tcuSurfRefGetArray *cuSurfRefGetArray; -extern tcuParamSetSize *cuParamSetSize; -extern tcuParamSeti *cuParamSeti; -extern tcuParamSetf *cuParamSetf; -extern tcuParamSetv *cuParamSetv; -extern tcuParamSetTexRef *cuParamSetTexRef; -extern tcuLaunch *cuLaunch; -extern tcuLaunchGrid *cuLaunchGrid; -extern tcuLaunchGridAsync *cuLaunchGridAsync; -extern tcuEventCreate *cuEventCreate; -extern tcuEventRecord *cuEventRecord; -extern tcuEventQuery *cuEventQuery; -extern tcuEventSynchronize *cuEventSynchronize; -extern tcuEventDestroy *cuEventDestroy; -extern tcuEventElapsedTime *cuEventElapsedTime; -extern tcuStreamCreate *cuStreamCreate; -extern tcuStreamQuery *cuStreamQuery; -extern tcuStreamSynchronize *cuStreamSynchronize; -extern tcuStreamDestroy *cuStreamDestroy; -extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource; -extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray; -extern tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer; -extern tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags; -extern tcuGraphicsMapResources *cuGraphicsMapResources; -extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources; -extern tcuGetExportTable *cuGetExportTable; -extern tcuCtxSetLimit *cuCtxSetLimit; -extern tcuCtxGetLimit *cuCtxGetLimit; -extern tcuGLCtxCreate *cuGLCtxCreate; -extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer; -extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage; -extern tcuCtxSetCurrent *cuCtxSetCurrent; -extern tcuLaunchKernel *cuLaunchKernel; - -#endif /* __UTIL_CUDA_H__ */ - diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 5ea82c65a92..4ccb5f3e51c 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -76,17 +76,6 @@ CCL_NAMESPACE_BEGIN #ifdef _WIN32 -#ifndef __KERNEL_GPU__ - -#if defined(_MSC_VER) && (_MSC_VER < 1800) -# define copysignf(x, y) ((float)_copysign(x, y)) -# define hypotf(x, y) _hypotf(x, y) -# define isnan(x) _isnan(x) -# define isfinite(x) _finite(x) -#endif - -#endif - #ifndef __KERNEL_OPENCL__ ccl_device_inline float fmaxf(float a, float b) diff --git a/intern/cycles/util/util_opencl.cpp b/intern/cycles/util/util_opencl.cpp deleted file mode 100644 index c2d6bc66dc1..00000000000 --- a/intern/cycles/util/util_opencl.cpp +++ /dev/null @@ -1,337 +0,0 @@ -////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009 Organic Vectory B.V. -// Written by George van Venrooij -// -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file doc/license/Boost.txt) -// Extracted from the CLCC project - http://clcc.sourceforge.net/ -////////////////////////////////////////////////////////////////////////// - -#include <stdlib.h> - -#include "util_opencl.h" - -#ifndef CLCC_GENERATE_DOCUMENTATION -#ifdef _WIN32 -# define WIN32_LEAN_AND_MEAN -# define VC_EXTRALEAN -# include <windows.h> - - typedef HMODULE CLCC_DYNLIB_HANDLE; - -# define CLCC_DYNLIB_OPEN LoadLibrary -# define CLCC_DYNLIB_CLOSE FreeLibrary -# define CLCC_DYNLIB_IMPORT GetProcAddress -#else -# include <dlfcn.h> - - typedef void* CLCC_DYNLIB_HANDLE; - -# define CLCC_DYNLIB_OPEN(path) dlopen(path, RTLD_NOW | RTLD_GLOBAL) -# define CLCC_DYNLIB_CLOSE dlclose -# define CLCC_DYNLIB_IMPORT dlsym -#endif -#else -// typedef implementation_defined CLCC_DYNLIB_HANDLE; -//# define CLCC_DYNLIB_OPEN(path) implementation_defined -//# define CLCC_DYNLIB_CLOSE implementation_defined -//# define CLCC_DYNLIB_IMPORT implementation_defined -#endif - -CCL_NAMESPACE_BEGIN - -//! \brief module handle -static CLCC_DYNLIB_HANDLE module = NULL; - -// Variables holding function entry points -#ifndef CLCC_GENERATE_DOCUMENTATION -PFNCLGETPLATFORMIDS __clewGetPlatformIDs = NULL; -PFNCLGETPLATFORMINFO __clewGetPlatformInfo = NULL; -PFNCLGETDEVICEIDS __clewGetDeviceIDs = NULL; -PFNCLGETDEVICEINFO __clewGetDeviceInfo = NULL; -PFNCLCREATECONTEXT __clewCreateContext = NULL; -PFNCLCREATECONTEXTFROMTYPE __clewCreateContextFromType = NULL; -PFNCLRETAINCONTEXT __clewRetainContext = NULL; -PFNCLRELEASECONTEXT __clewReleaseContext = NULL; -PFNCLGETCONTEXTINFO __clewGetContextInfo = NULL; -PFNCLCREATECOMMANDQUEUE __clewCreateCommandQueue = NULL; -PFNCLRETAINCOMMANDQUEUE __clewRetainCommandQueue = NULL; -PFNCLRELEASECOMMANDQUEUE __clewReleaseCommandQueue = NULL; -PFNCLGETCOMMANDQUEUEINFO __clewGetCommandQueueInfo = NULL; -PFNCLSETCOMMANDQUEUEPROPERTY __clewSetCommandQueueProperty = NULL; -PFNCLCREATEBUFFER __clewCreateBuffer = NULL; -PFNCLCREATEIMAGE2D __clewCreateImage2D = NULL; -PFNCLCREATEIMAGE3D __clewCreateImage3D = NULL; -PFNCLRETAINMEMOBJECT __clewRetainMemObject = NULL; -PFNCLRELEASEMEMOBJECT __clewReleaseMemObject = NULL; -PFNCLGETSUPPORTEDIMAGEFORMATS __clewGetSupportedImageFormats = NULL; -PFNCLGETMEMOBJECTINFO __clewGetMemObjectInfo = NULL; -PFNCLGETIMAGEINFO __clewGetImageInfo = NULL; -PFNCLCREATESAMPLER __clewCreateSampler = NULL; -PFNCLRETAINSAMPLER __clewRetainSampler = NULL; -PFNCLRELEASESAMPLER __clewReleaseSampler = NULL; -PFNCLGETSAMPLERINFO __clewGetSamplerInfo = NULL; -PFNCLCREATEPROGRAMWITHSOURCE __clewCreateProgramWithSource = NULL; -PFNCLCREATEPROGRAMWITHBINARY __clewCreateProgramWithBinary = NULL; -PFNCLRETAINPROGRAM __clewRetainProgram = NULL; -PFNCLRELEASEPROGRAM __clewReleaseProgram = NULL; -PFNCLBUILDPROGRAM __clewBuildProgram = NULL; -PFNCLUNLOADCOMPILER __clewUnloadCompiler = NULL; -PFNCLGETPROGRAMINFO __clewGetProgramInfo = NULL; -PFNCLGETPROGRAMBUILDINFO __clewGetProgramBuildInfo = NULL; -PFNCLCREATEKERNEL __clewCreateKernel = NULL; -PFNCLCREATEKERNELSINPROGRAM __clewCreateKernelsInProgram = NULL; -PFNCLRETAINKERNEL __clewRetainKernel = NULL; -PFNCLRELEASEKERNEL __clewReleaseKernel = NULL; -PFNCLSETKERNELARG __clewSetKernelArg = NULL; -PFNCLGETKERNELINFO __clewGetKernelInfo = NULL; -PFNCLGETKERNELWORKGROUPINFO __clewGetKernelWorkGroupInfo = NULL; -PFNCLWAITFOREVENTS __clewWaitForEvents = NULL; -PFNCLGETEVENTINFO __clewGetEventInfo = NULL; -PFNCLRETAINEVENT __clewRetainEvent = NULL; -PFNCLRELEASEEVENT __clewReleaseEvent = NULL; -PFNCLGETEVENTPROFILINGINFO __clewGetEventProfilingInfo = NULL; -PFNCLFLUSH __clewFlush = NULL; -PFNCLFINISH __clewFinish = NULL; -PFNCLENQUEUEREADBUFFER __clewEnqueueReadBuffer = NULL; -PFNCLENQUEUEWRITEBUFFER __clewEnqueueWriteBuffer = NULL; -PFNCLENQUEUECOPYBUFFER __clewEnqueueCopyBuffer = NULL; -PFNCLENQUEUEREADIMAGE __clewEnqueueReadImage = NULL; -PFNCLENQUEUEWRITEIMAGE __clewEnqueueWriteImage = NULL; -PFNCLENQUEUECOPYIMAGE __clewEnqueueCopyImage = NULL; -PFNCLENQUEUECOPYIMAGETOBUFFER __clewEnqueueCopyImageToBuffer = NULL; -PFNCLENQUEUECOPYBUFFERTOIMAGE __clewEnqueueCopyBufferToImage = NULL; -PFNCLENQUEUEMAPBUFFER __clewEnqueueMapBuffer = NULL; -PFNCLENQUEUEMAPIMAGE __clewEnqueueMapImage = NULL; -PFNCLENQUEUEUNMAPMEMOBJECT __clewEnqueueUnmapMemObject = NULL; -PFNCLENQUEUENDRANGEKERNEL __clewEnqueueNDRangeKernel = NULL; -PFNCLENQUEUETASK __clewEnqueueTask = NULL; -PFNCLENQUEUENATIVEKERNEL __clewEnqueueNativeKernel = NULL; -PFNCLENQUEUEMARKER __clewEnqueueMarker = NULL; -PFNCLENQUEUEWAITFOREVENTS __clewEnqueueWaitForEvents = NULL; -PFNCLENQUEUEBARRIER __clewEnqueueBarrier = NULL; -PFNCLGETEXTENSIONFUNCTIONADDRESS __clewGetExtensionFunctionAddress = NULL; -#endif // CLCC_GENERATE_DOCUMENTATION - - -#if 0 -//! \brief Unloads OpenCL dynamic library, should not be called directly -static void clewExit(void) -{ - if (module != NULL) - { - // Ignore errors - CLCC_DYNLIB_CLOSE(module); - module = NULL; - } -} -#endif - -//! \param path path to dynamic library to load -//! \return CLEW_ERROR_OPEN_FAILED if the library could not be opened -//! CLEW_ERROR_ATEXIT_FAILED if atexit(clewExit) failed -//! CLEW_SUCCESS when the library was succesfully loaded -int clLibraryInit() -{ -#ifdef _WIN32 - const char *path = "OpenCL.dll"; -#elif defined(__APPLE__) - const char *path = "/Library/Frameworks/OpenCL.framework/OpenCL"; -#else - const char *path = "libOpenCL.so"; -#endif - - // OpenCL disabled for now, only works with this environment variable set - if(!getenv("CYCLES_OPENCL_TEST")) - return 0; - - // Check if already initialized - if (module != NULL) - { - return 1; - } - - // Load library - module = CLCC_DYNLIB_OPEN(path); - - // Check for errors - if (module == NULL) - { - return 0; - } - - // Disabled because we retain OpenCL context and it's difficult to ensure - // this will exit after releasing the context -#if 0 - // Set unloading - int error = atexit(clewExit); - - if (error) - { - // Failure queing atexit, shutdown with error - CLCC_DYNLIB_CLOSE(module); - module = NULL; - - return 0; - } -#endif - - // Determine function entry-points - __clewGetPlatformIDs = (PFNCLGETPLATFORMIDS )CLCC_DYNLIB_IMPORT(module, "clGetPlatformIDs"); - __clewGetPlatformInfo = (PFNCLGETPLATFORMINFO )CLCC_DYNLIB_IMPORT(module, "clGetPlatformInfo"); - __clewGetDeviceIDs = (PFNCLGETDEVICEIDS )CLCC_DYNLIB_IMPORT(module, "clGetDeviceIDs"); - __clewGetDeviceInfo = (PFNCLGETDEVICEINFO )CLCC_DYNLIB_IMPORT(module, "clGetDeviceInfo"); - __clewCreateContext = (PFNCLCREATECONTEXT )CLCC_DYNLIB_IMPORT(module, "clCreateContext"); - __clewCreateContextFromType = (PFNCLCREATECONTEXTFROMTYPE )CLCC_DYNLIB_IMPORT(module, "clCreateContextFromType"); - __clewRetainContext = (PFNCLRETAINCONTEXT )CLCC_DYNLIB_IMPORT(module, "clRetainContext"); - __clewReleaseContext = (PFNCLRELEASECONTEXT )CLCC_DYNLIB_IMPORT(module, "clReleaseContext"); - __clewGetContextInfo = (PFNCLGETCONTEXTINFO )CLCC_DYNLIB_IMPORT(module, "clGetContextInfo"); - __clewCreateCommandQueue = (PFNCLCREATECOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clCreateCommandQueue"); - __clewRetainCommandQueue = (PFNCLRETAINCOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clRetainCommandQueue"); - __clewReleaseCommandQueue = (PFNCLRELEASECOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clReleaseCommandQueue"); - __clewGetCommandQueueInfo = (PFNCLGETCOMMANDQUEUEINFO )CLCC_DYNLIB_IMPORT(module, "clGetCommandQueueInfo"); - __clewSetCommandQueueProperty = (PFNCLSETCOMMANDQUEUEPROPERTY )CLCC_DYNLIB_IMPORT(module, "clSetCommandQueueProperty"); - __clewCreateBuffer = (PFNCLCREATEBUFFER )CLCC_DYNLIB_IMPORT(module, "clCreateBuffer"); - __clewCreateImage2D = (PFNCLCREATEIMAGE2D )CLCC_DYNLIB_IMPORT(module, "clCreateImage2D"); - __clewCreateImage3D = (PFNCLCREATEIMAGE3D )CLCC_DYNLIB_IMPORT(module, "clCreateImage3D"); - __clewRetainMemObject = (PFNCLRETAINMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clRetainMemObject"); - __clewReleaseMemObject = (PFNCLRELEASEMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clReleaseMemObject"); - __clewGetSupportedImageFormats = (PFNCLGETSUPPORTEDIMAGEFORMATS )CLCC_DYNLIB_IMPORT(module, "clGetSupportedImageFormats"); - __clewGetMemObjectInfo = (PFNCLGETMEMOBJECTINFO )CLCC_DYNLIB_IMPORT(module, "clGetMemObjectInfo"); - __clewGetImageInfo = (PFNCLGETIMAGEINFO )CLCC_DYNLIB_IMPORT(module, "clGetImageInfo"); - __clewCreateSampler = (PFNCLCREATESAMPLER )CLCC_DYNLIB_IMPORT(module, "clCreateSampler"); - __clewRetainSampler = (PFNCLRETAINSAMPLER )CLCC_DYNLIB_IMPORT(module, "clRetainSampler"); - __clewReleaseSampler = (PFNCLRELEASESAMPLER )CLCC_DYNLIB_IMPORT(module, "clReleaseSampler"); - __clewGetSamplerInfo = (PFNCLGETSAMPLERINFO )CLCC_DYNLIB_IMPORT(module, "clGetSamplerInfo"); - __clewCreateProgramWithSource = (PFNCLCREATEPROGRAMWITHSOURCE )CLCC_DYNLIB_IMPORT(module, "clCreateProgramWithSource"); - __clewCreateProgramWithBinary = (PFNCLCREATEPROGRAMWITHBINARY )CLCC_DYNLIB_IMPORT(module, "clCreateProgramWithBinary"); - __clewRetainProgram = (PFNCLRETAINPROGRAM )CLCC_DYNLIB_IMPORT(module, "clRetainProgram"); - __clewReleaseProgram = (PFNCLRELEASEPROGRAM )CLCC_DYNLIB_IMPORT(module, "clReleaseProgram"); - __clewBuildProgram = (PFNCLBUILDPROGRAM )CLCC_DYNLIB_IMPORT(module, "clBuildProgram"); - __clewUnloadCompiler = (PFNCLUNLOADCOMPILER )CLCC_DYNLIB_IMPORT(module, "clUnloadCompiler"); - __clewGetProgramInfo = (PFNCLGETPROGRAMINFO )CLCC_DYNLIB_IMPORT(module, "clGetProgramInfo"); - __clewGetProgramBuildInfo = (PFNCLGETPROGRAMBUILDINFO )CLCC_DYNLIB_IMPORT(module, "clGetProgramBuildInfo"); - __clewCreateKernel = (PFNCLCREATEKERNEL )CLCC_DYNLIB_IMPORT(module, "clCreateKernel"); - __clewCreateKernelsInProgram = (PFNCLCREATEKERNELSINPROGRAM )CLCC_DYNLIB_IMPORT(module, "clCreateKernelsInProgram"); - __clewRetainKernel = (PFNCLRETAINKERNEL )CLCC_DYNLIB_IMPORT(module, "clRetainKernel"); - __clewReleaseKernel = (PFNCLRELEASEKERNEL )CLCC_DYNLIB_IMPORT(module, "clReleaseKernel"); - __clewSetKernelArg = (PFNCLSETKERNELARG )CLCC_DYNLIB_IMPORT(module, "clSetKernelArg"); - __clewGetKernelInfo = (PFNCLGETKERNELINFO )CLCC_DYNLIB_IMPORT(module, "clGetKernelInfo"); - __clewGetKernelWorkGroupInfo = (PFNCLGETKERNELWORKGROUPINFO )CLCC_DYNLIB_IMPORT(module, "clGetKernelWorkGroupInfo"); - __clewWaitForEvents = (PFNCLWAITFOREVENTS )CLCC_DYNLIB_IMPORT(module, "clWaitForEvents"); - __clewGetEventInfo = (PFNCLGETEVENTINFO )CLCC_DYNLIB_IMPORT(module, "clGetEventInfo"); - __clewRetainEvent = (PFNCLRETAINEVENT )CLCC_DYNLIB_IMPORT(module, "clRetainEvent"); - __clewReleaseEvent = (PFNCLRELEASEEVENT )CLCC_DYNLIB_IMPORT(module, "clReleaseEvent"); - __clewGetEventProfilingInfo = (PFNCLGETEVENTPROFILINGINFO )CLCC_DYNLIB_IMPORT(module, "clGetEventProfilingInfo"); - __clewFlush = (PFNCLFLUSH )CLCC_DYNLIB_IMPORT(module, "clFlush"); - __clewFinish = (PFNCLFINISH )CLCC_DYNLIB_IMPORT(module, "clFinish"); - __clewEnqueueReadBuffer = (PFNCLENQUEUEREADBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueReadBuffer"); - __clewEnqueueWriteBuffer = (PFNCLENQUEUEWRITEBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueWriteBuffer"); - __clewEnqueueCopyBuffer = (PFNCLENQUEUECOPYBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyBuffer"); - __clewEnqueueReadImage = (PFNCLENQUEUEREADIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueReadImage"); - __clewEnqueueWriteImage = (PFNCLENQUEUEWRITEIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueWriteImage"); - __clewEnqueueCopyImage = (PFNCLENQUEUECOPYIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyImage"); - __clewEnqueueCopyImageToBuffer = (PFNCLENQUEUECOPYIMAGETOBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyImageToBuffer"); - __clewEnqueueCopyBufferToImage = (PFNCLENQUEUECOPYBUFFERTOIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyBufferToImage"); - __clewEnqueueMapBuffer = (PFNCLENQUEUEMAPBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueMapBuffer"); - __clewEnqueueMapImage = (PFNCLENQUEUEMAPIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueMapImage"); - __clewEnqueueUnmapMemObject = (PFNCLENQUEUEUNMAPMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clEnqueueUnmapMemObject"); - __clewEnqueueNDRangeKernel = (PFNCLENQUEUENDRANGEKERNEL )CLCC_DYNLIB_IMPORT(module, "clEnqueueNDRangeKernel"); - __clewEnqueueTask = (PFNCLENQUEUETASK )CLCC_DYNLIB_IMPORT(module, "clEnqueueTask"); - __clewEnqueueNativeKernel = (PFNCLENQUEUENATIVEKERNEL )CLCC_DYNLIB_IMPORT(module, "clEnqueueNativeKernel"); - __clewEnqueueMarker = (PFNCLENQUEUEMARKER )CLCC_DYNLIB_IMPORT(module, "clEnqueueMarker"); - __clewEnqueueWaitForEvents = (PFNCLENQUEUEWAITFOREVENTS )CLCC_DYNLIB_IMPORT(module, "clEnqueueWaitForEvents"); - __clewEnqueueBarrier = (PFNCLENQUEUEBARRIER )CLCC_DYNLIB_IMPORT(module, "clEnqueueBarrier"); - __clewGetExtensionFunctionAddress = (PFNCLGETEXTENSIONFUNCTIONADDRESS )CLCC_DYNLIB_IMPORT(module, "clGetExtensionFunctionAddress"); - - if(__clewGetPlatformIDs == NULL) return 0; - if(__clewGetPlatformInfo == NULL) return 0; - if(__clewGetDeviceIDs == NULL) return 0; - if(__clewGetDeviceInfo == NULL) return 0; - - return 1; -} - -//! \param error CL error code -//! \return a string representation of the error code -const char *clErrorString(cl_int error) -{ - static const char* strings[] = - { - // Error Codes - "CL_SUCCESS" // 0 - , "CL_DEVICE_NOT_FOUND" // -1 - , "CL_DEVICE_NOT_AVAILABLE" // -2 - , "CL_COMPILER_NOT_AVAILABLE" // -3 - , "CL_MEM_OBJECT_ALLOCATION_FAILURE" // -4 - , "CL_OUT_OF_RESOURCES" // -5 - , "CL_OUT_OF_HOST_MEMORY" // -6 - , "CL_PROFILING_INFO_NOT_AVAILABLE" // -7 - , "CL_MEM_COPY_OVERLAP" // -8 - , "CL_IMAGE_FORMAT_MISMATCH" // -9 - , "CL_IMAGE_FORMAT_NOT_SUPPORTED" // -10 - , "CL_BUILD_PROGRAM_FAILURE" // -11 - , "CL_MAP_FAILURE" // -12 - - , "" // -13 - , "" // -14 - , "" // -15 - , "" // -16 - , "" // -17 - , "" // -18 - , "" // -19 - - , "" // -20 - , "" // -21 - , "" // -22 - , "" // -23 - , "" // -24 - , "" // -25 - , "" // -26 - , "" // -27 - , "" // -28 - , "" // -29 - - , "CL_INVALID_VALUE" // -30 - , "CL_INVALID_DEVICE_TYPE" // -31 - , "CL_INVALID_PLATFORM" // -32 - , "CL_INVALID_DEVICE" // -33 - , "CL_INVALID_CONTEXT" // -34 - , "CL_INVALID_QUEUE_PROPERTIES" // -35 - , "CL_INVALID_COMMAND_QUEUE" // -36 - , "CL_INVALID_HOST_PTR" // -37 - , "CL_INVALID_MEM_OBJECT" // -38 - , "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR" // -39 - , "CL_INVALID_IMAGE_SIZE" // -40 - , "CL_INVALID_SAMPLER" // -41 - , "CL_INVALID_BINARY" // -42 - , "CL_INVALID_BUILD_OPTIONS" // -43 - , "CL_INVALID_PROGRAM" // -44 - , "CL_INVALID_PROGRAM_EXECUTABLE" // -45 - , "CL_INVALID_KERNEL_NAME" // -46 - , "CL_INVALID_KERNEL_DEFINITION" // -47 - , "CL_INVALID_KERNEL" // -48 - , "CL_INVALID_ARG_INDEX" // -49 - , "CL_INVALID_ARG_VALUE" // -50 - , "CL_INVALID_ARG_SIZE" // -51 - , "CL_INVALID_KERNEL_ARGS" // -52 - , "CL_INVALID_WORK_DIMENSION" // -53 - , "CL_INVALID_WORK_GROUP_SIZE" // -54 - , "CL_INVALID_WORK_ITEM_SIZE" // -55 - , "CL_INVALID_GLOBAL_OFFSET" // -56 - , "CL_INVALID_EVENT_WAIT_LIST" // -57 - , "CL_INVALID_EVENT" // -58 - , "CL_INVALID_OPERATION" // -59 - , "CL_INVALID_GL_OBJECT" // -60 - , "CL_INVALID_BUFFER_SIZE" // -61 - , "CL_INVALID_MIP_LEVEL" // -62 - , "CL_INVALID_GLOBAL_WORK_SIZE" // -63 - }; - - return strings[-error]; -} - -CCL_NAMESPACE_END - -#ifdef CLCC_DYNLIB_CLOSE -#endif diff --git a/intern/cycles/util/util_opencl.h b/intern/cycles/util/util_opencl.h deleted file mode 100644 index 141c5e38273..00000000000 --- a/intern/cycles/util/util_opencl.h +++ /dev/null @@ -1,1313 +0,0 @@ -////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009 Organic Vectory B.V. -// Written by George van Venrooij -// -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file doc/license/Boost.txt) -// Extracted from the CLCC project - http://clcc.sourceforge.net/ -////////////////////////////////////////////////////////////////////////// - -#ifndef __UTIL_OPENCL_H__ -#define __UTIL_OPENCL_H__ - -CCL_NAMESPACE_BEGIN - -//! This file contains a copy of the contents of CL.H and CL_PLATFORM.H from the -//! official OpenCL spec. The purpose of this code is to load the OpenCL dynamic -//! library at run-time and thus allow the executable to function on many -//! platforms regardless of the vendor of the OpenCL driver actually installed. -//! Some of the techniques used here were inspired by work done in the GLEW -//! library (http://glew.sourceforge.net/) - -// Run-time dynamic linking functionality based on concepts used in GLEW -#ifdef __OPENCL_CL_H -#error cl.h included before clew.h -#endif - -#ifdef __OPENCL_CL_PLATFORM_H -#error cl_platform.h included before clew.h -#endif - -#ifndef CLCC_GENERATE_DOCUMENTATION -// Prevent cl.h inclusion -#define __OPENCL_CL_H -// Prevent cl_platform.h inclusion -#define __CL_PLATFORM_H -#endif // CLCC_GENERATE_DOCUMENTATION - -/******************************************************************************* - * Copyright (c) 2008-2009 The Khronos Group Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - ******************************************************************************/ - -#ifndef CLCC_GENERATE_DOCUMENTATION - -#if defined(_WIN32) -#define CL_API_ENTRY -#define CL_API_CALL __stdcall -#else -#define CL_API_ENTRY -#define CL_API_CALL -#endif - -#define CL_API_SUFFIX__VERSION_1_0 - -#if defined(_WIN32) && defined(_MSC_VER) - -/* scalar types */ -typedef signed __int8 cl_char; -typedef unsigned __int8 cl_uchar; -typedef signed __int16 cl_short; -typedef unsigned __int16 cl_ushort; -typedef signed __int32 cl_int; -typedef unsigned __int32 cl_uint; -typedef signed __int64 cl_long; -typedef unsigned __int64 cl_ulong; - -typedef unsigned __int16 cl_half; -typedef float cl_float; -typedef double cl_double; - - -/* - * Vector types - * - * Note: OpenCL requires that all types be naturally aligned. - * This means that vector types must be naturally aligned. - * For example, a vector of four floats must be aligned to - * a 16 byte boundary (calculated as 4 * the natural 4-byte - * alignment of the float). The alignment qualifiers here - * will only function properly if your compiler supports them - * and if you don't actively work to defeat them. For example, - * in order for a cl_float4 to be 16 byte aligned in a struct, - * the start of the struct must itself be 16-byte aligned. - * - * Maintaining proper alignment is the user's responsibility. - */ -typedef signed __int8 cl_char2[2]; -typedef signed __int8 cl_char4[4]; -typedef signed __int8 cl_char8[8]; -typedef signed __int8 cl_char16[16]; -typedef unsigned __int8 cl_uchar2[2]; -typedef unsigned __int8 cl_uchar4[4]; -typedef unsigned __int8 cl_uchar8[8]; -typedef unsigned __int8 cl_uchar16[16]; - -typedef signed __int16 cl_short2[2]; -typedef signed __int16 cl_short4[4]; -typedef signed __int16 cl_short8[8]; -typedef signed __int16 cl_short16[16]; -typedef unsigned __int16 cl_ushort2[2]; -typedef unsigned __int16 cl_ushort4[4]; -typedef unsigned __int16 cl_ushort8[8]; -typedef unsigned __int16 cl_ushort16[16]; - -typedef signed __int32 cl_int2[2]; -typedef signed __int32 cl_int4[4]; -typedef signed __int32 cl_int8[8]; -typedef signed __int32 cl_int16[16]; -typedef unsigned __int32 cl_uint2[2]; -typedef unsigned __int32 cl_uint4[4]; -typedef unsigned __int32 cl_uint8[8]; -typedef unsigned __int32 cl_uint16[16]; - -typedef signed __int64 cl_long2[2]; -typedef signed __int64 cl_long4[4]; -typedef signed __int64 cl_long8[8]; -typedef signed __int64 cl_long16[16]; -typedef unsigned __int64 cl_ulong2[2]; -typedef unsigned __int64 cl_ulong4[4]; -typedef unsigned __int64 cl_ulong8[8]; -typedef unsigned __int64 cl_ulong16[16]; - -typedef float cl_float2[2]; -typedef float cl_float4[4]; -typedef float cl_float8[8]; -typedef float cl_float16[16]; - -typedef double cl_double2[2]; -typedef double cl_double4[4]; -typedef double cl_double8[8]; -typedef double cl_double16[16]; -/* There are no vector types for half */ - -#else - -#include <stdint.h> - -/* scalar types */ -typedef int8_t cl_char; -typedef uint8_t cl_uchar; -typedef int16_t cl_short __attribute__((aligned(2))); -typedef uint16_t cl_ushort __attribute__((aligned(2))); -typedef int32_t cl_int __attribute__((aligned(4))); -typedef uint32_t cl_uint __attribute__((aligned(4))); -typedef int64_t cl_long __attribute__((aligned(8))); -typedef uint64_t cl_ulong __attribute__((aligned(8))); - -typedef uint16_t cl_half __attribute__((aligned(2))); -typedef float cl_float __attribute__((aligned(4))); -typedef double cl_double __attribute__((aligned(8))); - -/* - * Vector types - * - * Note: OpenCL requires that all types be naturally aligned. - * This means that vector types must be naturally aligned. - * For example, a vector of four floats must be aligned to - * a 16 byte boundary (calculated as 4 * the natural 4-byte - * alignment of the float). The alignment qualifiers here - * will only function properly if your compiler supports them - * and if you don't actively work to defeat them. For example, - * in order for a cl_float4 to be 16 byte aligned in a struct, - * the start of the struct must itself be 16-byte aligned. - * - * Maintaining proper alignment is the user's responsibility. - */ -typedef int8_t cl_char2[2] __attribute__((aligned(2))); -typedef int8_t cl_char4[4] __attribute__((aligned(4))); -typedef int8_t cl_char8[8] __attribute__((aligned(8))); -typedef int8_t cl_char16[16] __attribute__((aligned(16))); -typedef uint8_t cl_uchar2[2] __attribute__((aligned(2))); -typedef uint8_t cl_uchar4[4] __attribute__((aligned(4))); -typedef uint8_t cl_uchar8[8] __attribute__((aligned(8))); -typedef uint8_t cl_uchar16[16] __attribute__((aligned(16))); - -typedef int16_t cl_short2[2] __attribute__((aligned(4))); -typedef int16_t cl_short4[4] __attribute__((aligned(8))); -typedef int16_t cl_short8[8] __attribute__((aligned(16))); -typedef int16_t cl_short16[16] __attribute__((aligned(32))); -typedef uint16_t cl_ushort2[2] __attribute__((aligned(4))); -typedef uint16_t cl_ushort4[4] __attribute__((aligned(8))); -typedef uint16_t cl_ushort8[8] __attribute__((aligned(16))); -typedef uint16_t cl_ushort16[16] __attribute__((aligned(32))); - -typedef int32_t cl_int2[2] __attribute__((aligned(8))); -typedef int32_t cl_int4[4] __attribute__((aligned(16))); -typedef int32_t cl_int8[8] __attribute__((aligned(32))); -typedef int32_t cl_int16[16] __attribute__((aligned(64))); -typedef uint32_t cl_uint2[2] __attribute__((aligned(8))); -typedef uint32_t cl_uint4[4] __attribute__((aligned(16))); -typedef uint32_t cl_uint8[8] __attribute__((aligned(32))); -typedef uint32_t cl_uint16[16] __attribute__((aligned(64))); - -typedef int64_t cl_long2[2] __attribute__((aligned(16))); -typedef int64_t cl_long4[4] __attribute__((aligned(32))); -typedef int64_t cl_long8[8] __attribute__((aligned(64))); -typedef int64_t cl_long16[16] __attribute__((aligned(128))); -typedef uint64_t cl_ulong2[2] __attribute__((aligned(16))); -typedef uint64_t cl_ulong4[4] __attribute__((aligned(32))); -typedef uint64_t cl_ulong8[8] __attribute__((aligned(64))); -typedef uint64_t cl_ulong16[16] __attribute__((aligned(128))); - -typedef float cl_float2[2] __attribute__((aligned(8))); -typedef float cl_float4[4] __attribute__((aligned(16))); -typedef float cl_float8[8] __attribute__((aligned(32))); -typedef float cl_float16[16] __attribute__((aligned(64))); - -typedef double cl_double2[2] __attribute__((aligned(16))); -typedef double cl_double4[4] __attribute__((aligned(32))); -typedef double cl_double8[8] __attribute__((aligned(64))); -typedef double cl_double16[16] __attribute__((aligned(128))); - -/* There are no vector types for half */ - -#endif - -/******************************************************************************/ - -// Macro names and corresponding values defined by OpenCL - -#define CL_CHAR_BIT 8 -#define CL_SCHAR_MAX 127 -#define CL_SCHAR_MIN (-127-1) -#define CL_CHAR_MAX CL_SCHAR_MAX -#define CL_CHAR_MIN CL_SCHAR_MIN -#define CL_UCHAR_MAX 255 -#define CL_SHRT_MAX 32767 -#define CL_SHRT_MIN (-32767-1) -#define CL_USHRT_MAX 65535 -#define CL_INT_MAX 2147483647 -#define CL_INT_MIN (-2147483647-1) -#define CL_UINT_MAX 0xffffffffU -#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) -#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) -#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) - -#define CL_FLT_DIG 6 -#define CL_FLT_MANT_DIG 24 -#define CL_FLT_MAX_10_EXP +38 -#define CL_FLT_MAX_EXP +128 -#define CL_FLT_MIN_10_EXP -37 -#define CL_FLT_MIN_EXP -125 -#define CL_FLT_RADIX 2 -#if defined(_MSC_VER) -// MSVC doesn't understand hex floats -#define CL_FLT_MAX 3.402823466e+38F -#define CL_FLT_MIN 1.175494351e-38F -#define CL_FLT_EPSILON 1.192092896e-07F -#else -#define CL_FLT_MAX 0x1.fffffep127f -#define CL_FLT_MIN 0x1.0p-126f -#define CL_FLT_EPSILON 0x1.0p-23f -#endif - -#define CL_DBL_DIG 15 -#define CL_DBL_MANT_DIG 53 -#define CL_DBL_MAX_10_EXP +308 -#define CL_DBL_MAX_EXP +1024 -#define CL_DBL_MIN_10_EXP -307 -#define CL_DBL_MIN_EXP -1021 -#define CL_DBL_RADIX 2 -#if defined(_MSC_VER) -// MSVC doesn't understand hex floats -#define CL_DBL_MAX 1.7976931348623158e+308 -#define CL_DBL_MIN 2.2250738585072014e-308 -#define CL_DBL_EPSILON 2.2204460492503131e-016 -#else -#define CL_DBL_MAX 0x1.fffffffffffffp1023 -#define CL_DBL_MIN 0x1.0p-1022 -#define CL_DBL_EPSILON 0x1.0p-52 -#endif - -#include <stddef.h> - - -// CL.h contents -/******************************************************************************/ - -typedef struct _cl_platform_id * cl_platform_id; -typedef struct _cl_device_id * cl_device_id; -typedef struct _cl_context * cl_context; -typedef struct _cl_command_queue * cl_command_queue; -typedef struct _cl_mem * cl_mem; -typedef struct _cl_program * cl_program; -typedef struct _cl_kernel * cl_kernel; -typedef struct _cl_event * cl_event; -typedef struct _cl_sampler * cl_sampler; - -/* WARNING! Unlike cl_ types in cl_platform.h, - * cl_bool is not guaranteed to be the same size as the bool in kernels. */ -typedef cl_uint cl_bool; -typedef cl_ulong cl_bitfield; -typedef cl_bitfield cl_device_type; -typedef cl_uint cl_platform_info; -typedef cl_uint cl_device_info; -typedef cl_bitfield cl_device_address_info; -typedef cl_bitfield cl_device_fp_config; -typedef cl_uint cl_device_mem_cache_type; -typedef cl_uint cl_device_local_mem_type; -typedef cl_bitfield cl_device_exec_capabilities; -typedef cl_bitfield cl_command_queue_properties; - -typedef intptr_t cl_context_properties; -typedef cl_uint cl_context_info; -typedef cl_uint cl_command_queue_info; -typedef cl_uint cl_channel_order; -typedef cl_uint cl_channel_type; -typedef cl_bitfield cl_mem_flags; -typedef cl_uint cl_mem_object_type; -typedef cl_uint cl_mem_info; -typedef cl_uint cl_image_info; -typedef cl_uint cl_addressing_mode; -typedef cl_uint cl_filter_mode; -typedef cl_uint cl_sampler_info; -typedef cl_bitfield cl_map_flags; -typedef cl_uint cl_program_info; -typedef cl_uint cl_program_build_info; -typedef cl_int cl_build_status; -typedef cl_uint cl_kernel_info; -typedef cl_uint cl_kernel_work_group_info; -typedef cl_uint cl_event_info; -typedef cl_uint cl_command_type; -typedef cl_uint cl_profiling_info; - -typedef struct _cl_image_format { - cl_channel_order image_channel_order; - cl_channel_type image_channel_data_type; -} cl_image_format; - - - -/******************************************************************************/ - -// Error Codes -#define CL_SUCCESS 0 -#define CL_DEVICE_NOT_FOUND -1 -#define CL_DEVICE_NOT_AVAILABLE -2 -#define CL_COMPILER_NOT_AVAILABLE -3 -#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 -#define CL_OUT_OF_RESOURCES -5 -#define CL_OUT_OF_HOST_MEMORY -6 -#define CL_PROFILING_INFO_NOT_AVAILABLE -7 -#define CL_MEM_COPY_OVERLAP -8 -#define CL_IMAGE_FORMAT_MISMATCH -9 -#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 -#define CL_BUILD_PROGRAM_FAILURE -11 -#define CL_MAP_FAILURE -12 - -#define CL_INVALID_VALUE -30 -#define CL_INVALID_DEVICE_TYPE -31 -#define CL_INVALID_PLATFORM -32 -#define CL_INVALID_DEVICE -33 -#define CL_INVALID_CONTEXT -34 -#define CL_INVALID_QUEUE_PROPERTIES -35 -#define CL_INVALID_COMMAND_QUEUE -36 -#define CL_INVALID_HOST_PTR -37 -#define CL_INVALID_MEM_OBJECT -38 -#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 -#define CL_INVALID_IMAGE_SIZE -40 -#define CL_INVALID_SAMPLER -41 -#define CL_INVALID_BINARY -42 -#define CL_INVALID_BUILD_OPTIONS -43 -#define CL_INVALID_PROGRAM -44 -#define CL_INVALID_PROGRAM_EXECUTABLE -45 -#define CL_INVALID_KERNEL_NAME -46 -#define CL_INVALID_KERNEL_DEFINITION -47 -#define CL_INVALID_KERNEL -48 -#define CL_INVALID_ARG_INDEX -49 -#define CL_INVALID_ARG_VALUE -50 -#define CL_INVALID_ARG_SIZE -51 -#define CL_INVALID_KERNEL_ARGS -52 -#define CL_INVALID_WORK_DIMENSION -53 -#define CL_INVALID_WORK_GROUP_SIZE -54 -#define CL_INVALID_WORK_ITEM_SIZE -55 -#define CL_INVALID_GLOBAL_OFFSET -56 -#define CL_INVALID_EVENT_WAIT_LIST -57 -#define CL_INVALID_EVENT -58 -#define CL_INVALID_OPERATION -59 -#define CL_INVALID_GL_OBJECT -60 -#define CL_INVALID_BUFFER_SIZE -61 -#define CL_INVALID_MIP_LEVEL -62 -#define CL_INVALID_GLOBAL_WORK_SIZE -63 - -// OpenCL Version -#define CL_VERSION_1_0 1 - -// cl_bool -#define CL_FALSE 0 -#define CL_TRUE 1 - -// cl_platform_info -#define CL_PLATFORM_PROFILE 0x0900 -#define CL_PLATFORM_VERSION 0x0901 -#define CL_PLATFORM_NAME 0x0902 -#define CL_PLATFORM_VENDOR 0x0903 -#define CL_PLATFORM_EXTENSIONS 0x0904 - -// cl_device_type - bitfield -#define CL_DEVICE_TYPE_DEFAULT (1 << 0) -#define CL_DEVICE_TYPE_CPU (1 << 1) -#define CL_DEVICE_TYPE_GPU (1 << 2) -#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) -#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF - -// cl_device_info -#define CL_DEVICE_TYPE 0x1000 -#define CL_DEVICE_VENDOR_ID 0x1001 -#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 -#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 -#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 -#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B -#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C -#define CL_DEVICE_ADDRESS_BITS 0x100D -#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E -#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F -#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 -#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 -#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 -#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 -#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 -#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 -#define CL_DEVICE_IMAGE_SUPPORT 0x1016 -#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 -#define CL_DEVICE_MAX_SAMPLERS 0x1018 -#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 -#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A -#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B -#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C -#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D -#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E -#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F -#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 -#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 -#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 -#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 -#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 -#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 -#define CL_DEVICE_ENDIAN_LITTLE 0x1026 -#define CL_DEVICE_AVAILABLE 0x1027 -#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 -#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 -#define CL_DEVICE_QUEUE_PROPERTIES 0x102A -#define CL_DEVICE_NAME 0x102B -#define CL_DEVICE_VENDOR 0x102C -#define CL_DRIVER_VERSION 0x102D -#define CL_DEVICE_PROFILE 0x102E -#define CL_DEVICE_VERSION 0x102F -#define CL_DEVICE_EXTENSIONS 0x1030 -#define CL_DEVICE_PLATFORM 0x1031 -/* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */ -/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */ -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 -#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C -#define CL_DEVICE_OPENCL_C_VERSION 0x103D - -// cl_device_fp_config - bitfield -#define CL_FP_DENORM (1 << 0) -#define CL_FP_INF_NAN (1 << 1) -#define CL_FP_ROUND_TO_NEAREST (1 << 2) -#define CL_FP_ROUND_TO_ZERO (1 << 3) -#define CL_FP_ROUND_TO_INF (1 << 4) -#define CL_FP_FMA (1 << 5) - -// cl_device_mem_cache_type -#define CL_NONE 0x0 -#define CL_READ_ONLY_CACHE 0x1 -#define CL_READ_WRITE_CACHE 0x2 - -// cl_device_local_mem_type -#define CL_LOCAL 0x1 -#define CL_GLOBAL 0x2 - -// cl_device_exec_capabilities - bitfield -#define CL_EXEC_KERNEL (1 << 0) -#define CL_EXEC_NATIVE_KERNEL (1 << 1) - -// cl_command_queue_properties - bitfield -#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) -#define CL_QUEUE_PROFILING_ENABLE (1 << 1) - -// cl_context_info -#define CL_CONTEXT_REFERENCE_COUNT 0x1080 -#define CL_CONTEXT_DEVICES 0x1081 -#define CL_CONTEXT_PROPERTIES 0x1082 - -// cl_context_properties -#define CL_CONTEXT_PLATFORM 0x1084 - -// cl_command_queue_info -#define CL_QUEUE_CONTEXT 0x1090 -#define CL_QUEUE_DEVICE 0x1091 -#define CL_QUEUE_REFERENCE_COUNT 0x1092 -#define CL_QUEUE_PROPERTIES 0x1093 - -// cl_mem_flags - bitfield -#define CL_MEM_READ_WRITE (1 << 0) -#define CL_MEM_WRITE_ONLY (1 << 1) -#define CL_MEM_READ_ONLY (1 << 2) -#define CL_MEM_USE_HOST_PTR (1 << 3) -#define CL_MEM_ALLOC_HOST_PTR (1 << 4) -#define CL_MEM_COPY_HOST_PTR (1 << 5) - -// cl_channel_order -#define CL_R 0x10B0 -#define CL_A 0x10B1 -#define CL_RG 0x10B2 -#define CL_RA 0x10B3 -#define CL_RGB 0x10B4 -#define CL_RGBA 0x10B5 -#define CL_BGRA 0x10B6 -#define CL_ARGB 0x10B7 -#define CL_INTENSITY 0x10B8 -#define CL_LUMINANCE 0x10B9 - -// cl_channel_type -#define CL_SNORM_INT8 0x10D0 -#define CL_SNORM_INT16 0x10D1 -#define CL_UNORM_INT8 0x10D2 -#define CL_UNORM_INT16 0x10D3 -#define CL_UNORM_SHORT_565 0x10D4 -#define CL_UNORM_SHORT_555 0x10D5 -#define CL_UNORM_INT_101010 0x10D6 -#define CL_SIGNED_INT8 0x10D7 -#define CL_SIGNED_INT16 0x10D8 -#define CL_SIGNED_INT32 0x10D9 -#define CL_UNSIGNED_INT8 0x10DA -#define CL_UNSIGNED_INT16 0x10DB -#define CL_UNSIGNED_INT32 0x10DC -#define CL_HALF_FLOAT 0x10DD -#define CL_FLOAT 0x10DE - -// cl_mem_object_type -#define CL_MEM_OBJECT_BUFFER 0x10F0 -#define CL_MEM_OBJECT_IMAGE2D 0x10F1 -#define CL_MEM_OBJECT_IMAGE3D 0x10F2 - -// cl_mem_info -#define CL_MEM_TYPE 0x1100 -#define CL_MEM_FLAGS 0x1101 -#define CL_MEM_SIZE 0x1102 -#define CL_MEM_HOST_PTR 0x1103 -#define CL_MEM_MAP_COUNT 0x1104 -#define CL_MEM_REFERENCE_COUNT 0x1105 -#define CL_MEM_CONTEXT 0x1106 - -// cl_image_info -#define CL_IMAGE_FORMAT 0x1110 -#define CL_IMAGE_ELEMENT_SIZE 0x1111 -#define CL_IMAGE_ROW_PITCH 0x1112 -#define CL_IMAGE_SLICE_PITCH 0x1113 -#define CL_IMAGE_WIDTH 0x1114 -#define CL_IMAGE_HEIGHT 0x1115 -#define CL_IMAGE_DEPTH 0x1116 - -// cl_addressing_mode -#define CL_ADDRESS_NONE 0x1130 -#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 -#define CL_ADDRESS_CLAMP 0x1132 -#define CL_ADDRESS_REPEAT 0x1133 - -// cl_filter_mode -#define CL_FILTER_NEAREST 0x1140 -#define CL_FILTER_LINEAR 0x1141 - -// cl_sampler_info -#define CL_SAMPLER_REFERENCE_COUNT 0x1150 -#define CL_SAMPLER_CONTEXT 0x1151 -#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 -#define CL_SAMPLER_ADDRESSING_MODE 0x1153 -#define CL_SAMPLER_FILTER_MODE 0x1154 - -// cl_map_flags - bitfield -#define CL_MAP_READ (1 << 0) -#define CL_MAP_WRITE (1 << 1) - -// cl_program_info -#define CL_PROGRAM_REFERENCE_COUNT 0x1160 -#define CL_PROGRAM_CONTEXT 0x1161 -#define CL_PROGRAM_NUM_DEVICES 0x1162 -#define CL_PROGRAM_DEVICES 0x1163 -#define CL_PROGRAM_SOURCE 0x1164 -#define CL_PROGRAM_BINARY_SIZES 0x1165 -#define CL_PROGRAM_BINARIES 0x1166 - -// cl_program_build_info -#define CL_PROGRAM_BUILD_STATUS 0x1181 -#define CL_PROGRAM_BUILD_OPTIONS 0x1182 -#define CL_PROGRAM_BUILD_LOG 0x1183 - -// cl_build_status -#define CL_BUILD_SUCCESS 0 -#define CL_BUILD_NONE -1 -#define CL_BUILD_ERROR -2 -#define CL_BUILD_IN_PROGRESS -3 - -// cl_kernel_info -#define CL_KERNEL_FUNCTION_NAME 0x1190 -#define CL_KERNEL_NUM_ARGS 0x1191 -#define CL_KERNEL_REFERENCE_COUNT 0x1192 -#define CL_KERNEL_CONTEXT 0x1193 -#define CL_KERNEL_PROGRAM 0x1194 - -// cl_kernel_work_group_info -#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 -#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 -#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 - -// cl_event_info -#define CL_EVENT_COMMAND_QUEUE 0x11D0 -#define CL_EVENT_COMMAND_TYPE 0x11D1 -#define CL_EVENT_REFERENCE_COUNT 0x11D2 -#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 - -// cl_command_type -#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 -#define CL_COMMAND_TASK 0x11F1 -#define CL_COMMAND_NATIVE_KERNEL 0x11F2 -#define CL_COMMAND_READ_BUFFER 0x11F3 -#define CL_COMMAND_WRITE_BUFFER 0x11F4 -#define CL_COMMAND_COPY_BUFFER 0x11F5 -#define CL_COMMAND_READ_IMAGE 0x11F6 -#define CL_COMMAND_WRITE_IMAGE 0x11F7 -#define CL_COMMAND_COPY_IMAGE 0x11F8 -#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 -#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA -#define CL_COMMAND_MAP_BUFFER 0x11FB -#define CL_COMMAND_MAP_IMAGE 0x11FC -#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD -#define CL_COMMAND_MARKER 0x11FE -#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF -#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 - -// command execution status -#define CL_COMPLETE 0x0 -#define CL_RUNNING 0x1 -#define CL_SUBMITTED 0x2 -#define CL_QUEUED 0x3 - -// cl_profiling_info -#define CL_PROFILING_COMMAND_QUEUED 0x1280 -#define CL_PROFILING_COMMAND_SUBMIT 0x1281 -#define CL_PROFILING_COMMAND_START 0x1282 -#define CL_PROFILING_COMMAND_END 0x1283 - -/********************************************************************************************************/ - -/********************************************************************************************************/ - -// Function signature typedef's - -// Platform API -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETPLATFORMIDS)(cl_uint /* num_entries */, - cl_platform_id * /* platforms */, - cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETPLATFORMINFO)(cl_platform_id /* platform */, - cl_platform_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Device APIs -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETDEVICEIDS)(cl_platform_id /* platform */, - cl_device_type /* device_type */, - cl_uint /* num_entries */, - cl_device_id * /* devices */, - cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETDEVICEINFO)(cl_device_id /* device */, - cl_device_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Context APIs -typedef CL_API_ENTRY cl_context (CL_API_CALL * -PFNCLCREATECONTEXT)(const cl_context_properties * /* properties */, - cl_uint /* num_devices */, - const cl_device_id * /* devices */, - void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, - void * /* user_data */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_context (CL_API_CALL * -PFNCLCREATECONTEXTFROMTYPE)(const cl_context_properties * /* properties */, - cl_device_type /* device_type */, - void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, - void * /* user_data */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRETAINCONTEXT)(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRELEASECONTEXT)(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETCONTEXTINFO)(cl_context /* context */, - cl_context_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Command Queue APIs -typedef CL_API_ENTRY cl_command_queue (CL_API_CALL * -PFNCLCREATECOMMANDQUEUE)(cl_context /* context */, - cl_device_id /* device */, - cl_command_queue_properties /* properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRETAINCOMMANDQUEUE)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRELEASECOMMANDQUEUE)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETCOMMANDQUEUEINFO)(cl_command_queue /* command_queue */, - cl_command_queue_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLSETCOMMANDQUEUEPROPERTY)(cl_command_queue /* command_queue */, - cl_command_queue_properties /* properties */, - cl_bool /* enable */, - cl_command_queue_properties * /* old_properties */) CL_API_SUFFIX__VERSION_1_0; - -// Memory Object APIs -typedef CL_API_ENTRY cl_mem (CL_API_CALL * -PFNCLCREATEBUFFER)(cl_context /* context */, - cl_mem_flags /* flags */, - size_t /* size */, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL * -PFNCLCREATEIMAGE2D)(cl_context /* context */, - cl_mem_flags /* flags */, - const cl_image_format * /* image_format */, - size_t /* image_width */, - size_t /* image_height */, - size_t /* image_row_pitch */, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL * -PFNCLCREATEIMAGE3D)(cl_context /* context */, - cl_mem_flags /* flags */, - const cl_image_format * /* image_format */, - size_t /* image_width */, - size_t /* image_height */, - size_t /* image_depth */, - size_t /* image_row_pitch */, - size_t /* image_slice_pitch */, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRETAINMEMOBJECT)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRELEASEMEMOBJECT)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETSUPPORTEDIMAGEFORMATS)(cl_context /* context */, - cl_mem_flags /* flags */, - cl_mem_object_type /* image_type */, - cl_uint /* num_entries */, - cl_image_format * /* image_formats */, - cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETMEMOBJECTINFO)(cl_mem /* memobj */, - cl_mem_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETIMAGEINFO)(cl_mem /* image */, - cl_image_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Sampler APIs -typedef CL_API_ENTRY cl_sampler (CL_API_CALL * -PFNCLCREATESAMPLER)(cl_context /* context */, - cl_bool /* normalized_coords */, - cl_addressing_mode /* addressing_mode */, - cl_filter_mode /* filter_mode */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRETAINSAMPLER)(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRELEASESAMPLER)(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETSAMPLERINFO)(cl_sampler /* sampler */, - cl_sampler_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Program Object APIs -typedef CL_API_ENTRY cl_program (CL_API_CALL * -PFNCLCREATEPROGRAMWITHSOURCE)(cl_context /* context */, - cl_uint /* count */, - const char ** /* strings */, - const size_t * /* lengths */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_program (CL_API_CALL * -PFNCLCREATEPROGRAMWITHBINARY)(cl_context /* context */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const size_t * /* lengths */, - const unsigned char ** /* binaries */, - cl_int * /* binary_status */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRETAINPROGRAM)(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRELEASEPROGRAM)(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLBUILDPROGRAM)(cl_program /* program */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const char * /* options */, - void (*pfn_notify)(cl_program /* program */, void * /* user_data */), - void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLUNLOADCOMPILER)(void) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETPROGRAMINFO)(cl_program /* program */, - cl_program_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETPROGRAMBUILDINFO)(cl_program /* program */, - cl_device_id /* device */, - cl_program_build_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Kernel Object APIs -typedef CL_API_ENTRY cl_kernel (CL_API_CALL * -PFNCLCREATEKERNEL)(cl_program /* program */, - const char * /* kernel_name */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLCREATEKERNELSINPROGRAM)(cl_program /* program */, - cl_uint /* num_kernels */, - cl_kernel * /* kernels */, - cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRETAINKERNEL)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRELEASEKERNEL)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLSETKERNELARG)(cl_kernel /* kernel */, - cl_uint /* arg_index */, - size_t /* arg_size */, - const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETKERNELINFO)(cl_kernel /* kernel */, - cl_kernel_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETKERNELWORKGROUPINFO)(cl_kernel /* kernel */, - cl_device_id /* device */, - cl_kernel_work_group_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Event Object APIs -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLWAITFOREVENTS)(cl_uint /* num_events */, - const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETEVENTINFO)(cl_event /* event */, - cl_event_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRETAINEVENT)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRELEASEEVENT)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; - -// Profiling APIs -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETEVENTPROFILINGINFO)(cl_event /* event */, - cl_profiling_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Flush and Finish APIs -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLFLUSH)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLFINISH)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - -// Enqueued Commands APIs -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEREADBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_read */, - size_t /* offset */, - size_t /* cb */, - void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEWRITEBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_write */, - size_t /* offset */, - size_t /* cb */, - const void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUECOPYBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* src_buffer */, - cl_mem /* dst_buffer */, - size_t /* src_offset */, - size_t /* dst_offset */, - size_t /* cb */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEREADIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_read */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t /* row_pitch */, - size_t /* slice_pitch */, - void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEWRITEIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_write */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t /* input_row_pitch */, - size_t /* input_slice_pitch */, - const void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUECOPYIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* src_image */, - cl_mem /* dst_image */, - const size_t * /* src_origin[3] */, - const size_t * /* dst_origin[3] */, - const size_t * /* region[3] */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUECOPYIMAGETOBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* src_image */, - cl_mem /* dst_buffer */, - const size_t * /* src_origin[3] */, - const size_t * /* region[3] */, - size_t /* dst_offset */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUECOPYBUFFERTOIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* src_buffer */, - cl_mem /* dst_image */, - size_t /* src_offset */, - const size_t * /* dst_origin[3] */, - const size_t * /* region[3] */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY void * (CL_API_CALL * -PFNCLENQUEUEMAPBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_map */, - cl_map_flags /* map_flags */, - size_t /* offset */, - size_t /* cb */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY void * (CL_API_CALL * -PFNCLENQUEUEMAPIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_map */, - cl_map_flags /* map_flags */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t * /* image_row_pitch */, - size_t * /* image_slice_pitch */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEUNMAPMEMOBJECT)(cl_command_queue /* command_queue */, - cl_mem /* memobj */, - void * /* mapped_ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUENDRANGEKERNEL)(cl_command_queue /* command_queue */, - cl_kernel /* kernel */, - cl_uint /* work_dim */, - const size_t * /* global_work_offset */, - const size_t * /* global_work_size */, - const size_t * /* local_work_size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUETASK)(cl_command_queue /* command_queue */, - cl_kernel /* kernel */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUENATIVEKERNEL)(cl_command_queue /* command_queue */, - void (*user_func)(void *), - void * /* args */, - size_t /* cb_args */, - cl_uint /* num_mem_objects */, - const cl_mem * /* mem_list */, - const void ** /* args_mem_loc */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEMARKER)(cl_command_queue /* command_queue */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEWAITFOREVENTS)(cl_command_queue /* command_queue */, - cl_uint /* num_events */, - const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEBARRIER)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - -// Extension function access -// -// Returns the extension function address for the given function name, -// or NULL if a valid function can not be found. The client must -// check to make sure the address is not NULL, before using or -// calling the returned function address. -// -typedef CL_API_ENTRY void * (CL_API_CALL * PFNCLGETEXTENSIONFUNCTIONADDRESS)(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0; - - -#define CLEW_STATIC - -#ifdef CLEW_STATIC -# define CLEWAPI extern -#else -# ifdef CLEW_BUILD -# define CLEWAPI extern __declspec(dllexport) -# else -# define CLEWAPI extern __declspec(dllimport) -# endif -#endif - -#if defined(_WIN32) -#define CLEW_FUN_EXPORT extern -#else -#define CLEW_FUN_EXPORT CLEWAPI -#endif - -#define CLEW_GET_FUN(x) x - - -// Variables holding function entry points -CLEW_FUN_EXPORT PFNCLGETPLATFORMIDS __clewGetPlatformIDs ; -CLEW_FUN_EXPORT PFNCLGETPLATFORMINFO __clewGetPlatformInfo ; -CLEW_FUN_EXPORT PFNCLGETDEVICEIDS __clewGetDeviceIDs ; -CLEW_FUN_EXPORT PFNCLGETDEVICEINFO __clewGetDeviceInfo ; -CLEW_FUN_EXPORT PFNCLCREATECONTEXT __clewCreateContext ; -CLEW_FUN_EXPORT PFNCLCREATECONTEXTFROMTYPE __clewCreateContextFromType ; -CLEW_FUN_EXPORT PFNCLRETAINCONTEXT __clewRetainContext ; -CLEW_FUN_EXPORT PFNCLRELEASECONTEXT __clewReleaseContext ; -CLEW_FUN_EXPORT PFNCLGETCONTEXTINFO __clewGetContextInfo ; -CLEW_FUN_EXPORT PFNCLCREATECOMMANDQUEUE __clewCreateCommandQueue ; -CLEW_FUN_EXPORT PFNCLRETAINCOMMANDQUEUE __clewRetainCommandQueue ; -CLEW_FUN_EXPORT PFNCLRELEASECOMMANDQUEUE __clewReleaseCommandQueue ; -CLEW_FUN_EXPORT PFNCLGETCOMMANDQUEUEINFO __clewGetCommandQueueInfo ; -CLEW_FUN_EXPORT PFNCLSETCOMMANDQUEUEPROPERTY __clewSetCommandQueueProperty ; -CLEW_FUN_EXPORT PFNCLCREATEBUFFER __clewCreateBuffer ; -CLEW_FUN_EXPORT PFNCLCREATEIMAGE2D __clewCreateImage2D ; -CLEW_FUN_EXPORT PFNCLCREATEIMAGE3D __clewCreateImage3D ; -CLEW_FUN_EXPORT PFNCLRETAINMEMOBJECT __clewRetainMemObject ; -CLEW_FUN_EXPORT PFNCLRELEASEMEMOBJECT __clewReleaseMemObject ; -CLEW_FUN_EXPORT PFNCLGETSUPPORTEDIMAGEFORMATS __clewGetSupportedImageFormats ; -CLEW_FUN_EXPORT PFNCLGETMEMOBJECTINFO __clewGetMemObjectInfo ; -CLEW_FUN_EXPORT PFNCLGETIMAGEINFO __clewGetImageInfo ; -CLEW_FUN_EXPORT PFNCLCREATESAMPLER __clewCreateSampler ; -CLEW_FUN_EXPORT PFNCLRETAINSAMPLER __clewRetainSampler ; -CLEW_FUN_EXPORT PFNCLRELEASESAMPLER __clewReleaseSampler ; -CLEW_FUN_EXPORT PFNCLGETSAMPLERINFO __clewGetSamplerInfo ; -CLEW_FUN_EXPORT PFNCLCREATEPROGRAMWITHSOURCE __clewCreateProgramWithSource ; -CLEW_FUN_EXPORT PFNCLCREATEPROGRAMWITHBINARY __clewCreateProgramWithBinary ; -CLEW_FUN_EXPORT PFNCLRETAINPROGRAM __clewRetainProgram ; -CLEW_FUN_EXPORT PFNCLRELEASEPROGRAM __clewReleaseProgram ; -CLEW_FUN_EXPORT PFNCLBUILDPROGRAM __clewBuildProgram ; -CLEW_FUN_EXPORT PFNCLUNLOADCOMPILER __clewUnloadCompiler ; -CLEW_FUN_EXPORT PFNCLGETPROGRAMINFO __clewGetProgramInfo ; -CLEW_FUN_EXPORT PFNCLGETPROGRAMBUILDINFO __clewGetProgramBuildInfo ; -CLEW_FUN_EXPORT PFNCLCREATEKERNEL __clewCreateKernel ; -CLEW_FUN_EXPORT PFNCLCREATEKERNELSINPROGRAM __clewCreateKernelsInProgram ; -CLEW_FUN_EXPORT PFNCLRETAINKERNEL __clewRetainKernel ; -CLEW_FUN_EXPORT PFNCLRELEASEKERNEL __clewReleaseKernel ; -CLEW_FUN_EXPORT PFNCLSETKERNELARG __clewSetKernelArg ; -CLEW_FUN_EXPORT PFNCLGETKERNELINFO __clewGetKernelInfo ; -CLEW_FUN_EXPORT PFNCLGETKERNELWORKGROUPINFO __clewGetKernelWorkGroupInfo ; -CLEW_FUN_EXPORT PFNCLWAITFOREVENTS __clewWaitForEvents ; -CLEW_FUN_EXPORT PFNCLGETEVENTINFO __clewGetEventInfo ; -CLEW_FUN_EXPORT PFNCLRETAINEVENT __clewRetainEvent ; -CLEW_FUN_EXPORT PFNCLRELEASEEVENT __clewReleaseEvent ; -CLEW_FUN_EXPORT PFNCLGETEVENTPROFILINGINFO __clewGetEventProfilingInfo ; -CLEW_FUN_EXPORT PFNCLFLUSH __clewFlush ; -CLEW_FUN_EXPORT PFNCLFINISH __clewFinish ; -CLEW_FUN_EXPORT PFNCLENQUEUEREADBUFFER __clewEnqueueReadBuffer ; -CLEW_FUN_EXPORT PFNCLENQUEUEWRITEBUFFER __clewEnqueueWriteBuffer ; -CLEW_FUN_EXPORT PFNCLENQUEUECOPYBUFFER __clewEnqueueCopyBuffer ; -CLEW_FUN_EXPORT PFNCLENQUEUEREADIMAGE __clewEnqueueReadImage ; -CLEW_FUN_EXPORT PFNCLENQUEUEWRITEIMAGE __clewEnqueueWriteImage ; -CLEW_FUN_EXPORT PFNCLENQUEUECOPYIMAGE __clewEnqueueCopyImage ; -CLEW_FUN_EXPORT PFNCLENQUEUECOPYIMAGETOBUFFER __clewEnqueueCopyImageToBuffer ; -CLEW_FUN_EXPORT PFNCLENQUEUECOPYBUFFERTOIMAGE __clewEnqueueCopyBufferToImage ; -CLEW_FUN_EXPORT PFNCLENQUEUEMAPBUFFER __clewEnqueueMapBuffer ; -CLEW_FUN_EXPORT PFNCLENQUEUEMAPIMAGE __clewEnqueueMapImage ; -CLEW_FUN_EXPORT PFNCLENQUEUEUNMAPMEMOBJECT __clewEnqueueUnmapMemObject ; -CLEW_FUN_EXPORT PFNCLENQUEUENDRANGEKERNEL __clewEnqueueNDRangeKernel ; -CLEW_FUN_EXPORT PFNCLENQUEUETASK __clewEnqueueTask ; -CLEW_FUN_EXPORT PFNCLENQUEUENATIVEKERNEL __clewEnqueueNativeKernel ; -CLEW_FUN_EXPORT PFNCLENQUEUEMARKER __clewEnqueueMarker ; -CLEW_FUN_EXPORT PFNCLENQUEUEWAITFOREVENTS __clewEnqueueWaitForEvents ; -CLEW_FUN_EXPORT PFNCLENQUEUEBARRIER __clewEnqueueBarrier ; -CLEW_FUN_EXPORT PFNCLGETEXTENSIONFUNCTIONADDRESS __clewGetExtensionFunctionAddress ; - - -#define clGetPlatformIDs CLEW_GET_FUN(__clewGetPlatformIDs ) -#define clGetPlatformInfo CLEW_GET_FUN(__clewGetPlatformInfo ) -#define clGetDeviceIDs CLEW_GET_FUN(__clewGetDeviceIDs ) -#define clGetDeviceInfo CLEW_GET_FUN(__clewGetDeviceInfo ) -#define clCreateContext CLEW_GET_FUN(__clewCreateContext ) -#define clCreateContextFromType CLEW_GET_FUN(__clewCreateContextFromType ) -#define clRetainContext CLEW_GET_FUN(__clewRetainContext ) -#define clReleaseContext CLEW_GET_FUN(__clewReleaseContext ) -#define clGetContextInfo CLEW_GET_FUN(__clewGetContextInfo ) -#define clCreateCommandQueue CLEW_GET_FUN(__clewCreateCommandQueue ) -#define clRetainCommandQueue CLEW_GET_FUN(__clewRetainCommandQueue ) -#define clReleaseCommandQueue CLEW_GET_FUN(__clewReleaseCommandQueue ) -#define clGetCommandQueueInfo CLEW_GET_FUN(__clewGetCommandQueueInfo ) -#define clSetCommandQueueProperty CLEW_GET_FUN(__clewSetCommandQueueProperty ) -#define clCreateBuffer CLEW_GET_FUN(__clewCreateBuffer ) -#define clCreateImage2D CLEW_GET_FUN(__clewCreateImage2D ) -#define clCreateImage3D CLEW_GET_FUN(__clewCreateImage3D ) -#define clRetainMemObject CLEW_GET_FUN(__clewRetainMemObject ) -#define clReleaseMemObject CLEW_GET_FUN(__clewReleaseMemObject ) -#define clGetSupportedImageFormats CLEW_GET_FUN(__clewGetSupportedImageFormats ) -#define clGetMemObjectInfo CLEW_GET_FUN(__clewGetMemObjectInfo ) -#define clGetImageInfo CLEW_GET_FUN(__clewGetImageInfo ) -#define clCreateSampler CLEW_GET_FUN(__clewCreateSampler ) -#define clRetainSampler CLEW_GET_FUN(__clewRetainSampler ) -#define clReleaseSampler CLEW_GET_FUN(__clewReleaseSampler ) -#define clGetSamplerInfo CLEW_GET_FUN(__clewGetSamplerInfo ) -#define clCreateProgramWithSource CLEW_GET_FUN(__clewCreateProgramWithSource ) -#define clCreateProgramWithBinary CLEW_GET_FUN(__clewCreateProgramWithBinary ) -#define clRetainProgram CLEW_GET_FUN(__clewRetainProgram ) -#define clReleaseProgram CLEW_GET_FUN(__clewReleaseProgram ) -#define clBuildProgram CLEW_GET_FUN(__clewBuildProgram ) -#define clUnloadCompiler CLEW_GET_FUN(__clewUnloadCompiler ) -#define clGetProgramInfo CLEW_GET_FUN(__clewGetProgramInfo ) -#define clGetProgramBuildInfo CLEW_GET_FUN(__clewGetProgramBuildInfo ) -#define clCreateKernel CLEW_GET_FUN(__clewCreateKernel ) -#define clCreateKernelsInProgram CLEW_GET_FUN(__clewCreateKernelsInProgram ) -#define clRetainKernel CLEW_GET_FUN(__clewRetainKernel ) -#define clReleaseKernel CLEW_GET_FUN(__clewReleaseKernel ) -#define clSetKernelArg CLEW_GET_FUN(__clewSetKernelArg ) -#define clGetKernelInfo CLEW_GET_FUN(__clewGetKernelInfo ) -#define clGetKernelWorkGroupInfo CLEW_GET_FUN(__clewGetKernelWorkGroupInfo ) -#define clWaitForEvents CLEW_GET_FUN(__clewWaitForEvents ) -#define clGetEventInfo CLEW_GET_FUN(__clewGetEventInfo ) -#define clRetainEvent CLEW_GET_FUN(__clewRetainEvent ) -#define clReleaseEvent CLEW_GET_FUN(__clewReleaseEvent ) -#define clGetEventProfilingInfo CLEW_GET_FUN(__clewGetEventProfilingInfo ) -#define clFlush CLEW_GET_FUN(__clewFlush ) -#define clFinish CLEW_GET_FUN(__clewFinish ) -#define clEnqueueReadBuffer CLEW_GET_FUN(__clewEnqueueReadBuffer ) -#define clEnqueueWriteBuffer CLEW_GET_FUN(__clewEnqueueWriteBuffer ) -#define clEnqueueCopyBuffer CLEW_GET_FUN(__clewEnqueueCopyBuffer ) -#define clEnqueueReadImage CLEW_GET_FUN(__clewEnqueueReadImage ) -#define clEnqueueWriteImage CLEW_GET_FUN(__clewEnqueueWriteImage ) -#define clEnqueueCopyImage CLEW_GET_FUN(__clewEnqueueCopyImage ) -#define clEnqueueCopyImageToBuffer CLEW_GET_FUN(__clewEnqueueCopyImageToBuffer ) -#define clEnqueueCopyBufferToImage CLEW_GET_FUN(__clewEnqueueCopyBufferToImage ) -#define clEnqueueMapBuffer CLEW_GET_FUN(__clewEnqueueMapBuffer ) -#define clEnqueueMapImage CLEW_GET_FUN(__clewEnqueueMapImage ) -#define clEnqueueUnmapMemObject CLEW_GET_FUN(__clewEnqueueUnmapMemObject ) -#define clEnqueueNDRangeKernel CLEW_GET_FUN(__clewEnqueueNDRangeKernel ) -#define clEnqueueTask CLEW_GET_FUN(__clewEnqueueTask ) -#define clEnqueueNativeKernel CLEW_GET_FUN(__clewEnqueueNativeKernel ) -#define clEnqueueMarker CLEW_GET_FUN(__clewEnqueueMarker ) -#define clEnqueueWaitForEvents CLEW_GET_FUN(__clewEnqueueWaitForEvents ) -#define clEnqueueBarrier CLEW_GET_FUN(__clewEnqueueBarrier ) -#define clGetExtensionFunctionAddress CLEW_GET_FUN(__clewGetExtensionFunctionAddress ) - -#endif // CLCC_GENERATE_DOCUMENTATION - -#define CLEW_SUCCESS 0 //!< Success error code -#define CLEW_ERROR_OPEN_FAILED -1 //!< Error code for failing to open the dynamic library -#define CLEW_ERROR_ATEXIT_FAILED -2 //!< Error code for failing to queue the closing of the dynamic library to atexit() - -int clLibraryInit(void); -const char *clErrorString(cl_int error); - -CCL_NAMESPACE_END - -#endif /* __UTIL_OPENCL_H__ */ - diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h index 5d0fea34761..2feb3d6ab7e 100644 --- a/intern/cycles/util/util_optimization.h +++ b/intern/cycles/util/util_optimization.h @@ -69,13 +69,6 @@ #define WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 #endif -/* MSVC 2008, no SSE41 (broken blendv intrinsic) and no AVX support */ -#if defined(_MSC_VER) && (_MSC_VER < 1700) -#undef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 -#undef WITH_CYCLES_OPTIMIZED_KERNEL_AVX -#undef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 -#endif - #endif /* SSE Experiment diff --git a/intern/ghost/GHOST_C-api.h b/intern/ghost/GHOST_C-api.h index 28b5fdd9edd..7b47f0526a2 100644 --- a/intern/ghost/GHOST_C-api.h +++ b/intern/ghost/GHOST_C-api.h @@ -151,7 +151,7 @@ extern void GHOST_GetMainDisplayDimensions(GHOST_SystemHandle systemhandle, /** * Returns the dimensions of all displays combine * (the current workspace). - * No need to worrky about overlapping monitors. + * No need to worry about overlapping monitors. * \param systemhandle The handle to the system * \param width A pointer the width gets put in * \param height A pointer the height gets put in @@ -401,7 +401,7 @@ extern GHOST_TSuccess GHOST_SetCursorPosition(GHOST_SystemHandle systemhandle, * do this automatically. * \param windowhandle The handle to the window * \param mode The new grab state of the cursor. - * \param bounds The grab ragion (optional) - left,top,right,bottom + * \param bounds The grab region (optional) - left,top,right,bottom * \param mouse_ungrab_xy XY for new mouse location (optional) - x,y * \return Indication of success. */ @@ -759,7 +759,7 @@ extern GHOST_TSuccess GHOST_IsEmptyRectangle(GHOST_RectangleHandle rectanglehand /** * Returns whether this rectangle is valid. - * Valid rectangles are rectangles that have m_l <= m_r and m_t <= m_b. Thus, emapty rectangles are valid. + * Valid rectangles are rectangles that have m_l <= m_r and m_t <= m_b. Thus, empty rectangles are valid. * \param rectanglehandle The handle to the rectangle * \return intean value (true == valid rectangle) */ @@ -853,7 +853,7 @@ extern GHOST_TSuccess GHOST_ClipRectangle(GHOST_RectangleHandle rectanglehandle, GHOST_RectangleHandle anotherrectanglehandle); /** - * Return the data from the clipboad + * Return the data from the clipboard * \param selection Boolean to return the selection instead, X11 only feature. * \return clipboard data */ diff --git a/intern/ghost/GHOST_ISystem.h b/intern/ghost/GHOST_ISystem.h index 137926b033c..19f36319949 100644 --- a/intern/ghost/GHOST_ISystem.h +++ b/intern/ghost/GHOST_ISystem.h @@ -48,7 +48,7 @@ class GHOST_IEventConsumer; * * GHOST is yet another acronym. It stands for "Generic Handy Operating System * Toolkit". It has been created to replace the OpenGL utility tool kit - * <a href="http://www.opengl.org/developers/documentation/glut.html">GLUT</a>. + * <a href="http://www.opengl.org/resources/libraries/glut/">GLUT</a>. * GLUT was used in <a href="http://www.blender3d.com">Blender</a> until the * point that Blender needed to be ported to Apple's Mac OSX. Blender needed a * number of modifications in GLUT to work but the GLUT sources for OSX were @@ -60,31 +60,29 @@ class GHOST_IEventConsumer; * In short: everything that Blender needed from GLUT to run on all it's supported * operating systems and some extra's. * This includes : - * <ul> - * <li> Time(r) management.</li> - * <li> Display/window management (windows are only created on the main display). - * <li> Event management.</li> - * <li> Cursor shape management (no custom cursors for now).</li> - * <li> Access to the state of the mouse buttons and the keyboard.</li> - * <li> Menus for windows with events generated when they are accessed (this is - * work in progress).</li> - * <li> Video mode switching.</li> - * <li> Copy/Paste buffers.</li> - * <li> System paths.</li> - * </ul> + * + * - Time(r) management. + * - Display/window management (windows are only created on the main display). + * - Event management. + * - Cursor shape management (no custom cursors for now). + * - Access to the state of the mouse buttons and the keyboard. + * - Menus for windows with events generated when they are accessed (this is + * work in progress). + * - Video mode switching. + * - Copy/Paste buffers. + * - System paths. + * * Font management has been moved to a separate library. * * \section platforms Platforms * * GHOST supports the following platforms: - * <ul> - * <li> OSX Cocoa.</li> - * <li> OSX Carbon.</li> - * <li> Windows.</li> - * <li> X11.</li> - * <li> SDL1.3 (experimental).</li> - * <li> NULL (headless mode).</li> - * </ul> + * + * - OSX Cocoa. + * - Windows. + * - X11. + * - SDL2 (experimental). + * - NULL (headless mode). * * \section Building GHOST * @@ -92,23 +90,23 @@ class GHOST_IEventConsumer; * * \section interface Interface * GHOST has two programming interfaces: - * <ul> - * <li>The C-API. For programs written in C.</li> - * <li>The C++-API. For programs written in C++.</li> - * </ul> - * GHOST itself is writtem in C++ and the C-API is a wrapper around the C++ + * + * - The C-API. For programs written in C. + * - The C++-API. For programs written in C++. + * + * GHOST itself is written in C++ and the C-API is a wrapper around the C++ * API. * * \subsection cplusplus_api The C++ API consists of the following files: - * <ul> - * <li>GHOST_IEvent.h</li> - * <li>GHOST_IEventConsumer.h</li> - * <li>GHOST_ISystem.h</li> - * <li>GHOST_ITimerTask.h</li> - * <li>GHOST_IWindow.h</li> - * <li>GHOST_Rect.h</li> - * <li>GHOST_Types.h</li> - * </ul> + * + * - GHOST_IEvent.h + * - GHOST_IEventConsumer.h + * - GHOST_ISystem.h + * - GHOST_ITimerTask.h + * - GHOST_IWindow.h + * - GHOST_Rect.h + * - GHOST_Types.h + * * For an example of using the C++-API, have a look at the GHOST_C-Test.cpp * program in the ?/ghost/test/gears/ directory. * diff --git a/intern/ghost/GHOST_IWindow.h b/intern/ghost/GHOST_IWindow.h index 37406066fc7..71dc193a81b 100644 --- a/intern/ghost/GHOST_IWindow.h +++ b/intern/ghost/GHOST_IWindow.h @@ -46,13 +46,12 @@ * \see GHOST_ISystem#createWindow * * There are two coordinate systems: - * <ul> - * <li>The screen coordinate system. The origin of the screen is located in the - * upper left corner of the screen.</li> - * <li>The client rectangle coordinate system. The client rectangle of a window - * is the area that is drawable by the application (excluding title bars etc.). - * </li> - * </ul> + * + * - The screen coordinate system. The origin of the screen is located in the + * upper left corner of the screen.</li> + * - The client rectangle coordinate system. The client rectangle of a window + * is the area that is drawable by the application (excluding title bars etc.). + * * \author Maarten Gribnau * \date May 31, 2001 */ diff --git a/intern/ghost/GHOST_Rect.h b/intern/ghost/GHOST_Rect.h index a055b6f7f0d..c2ea8db0a66 100644 --- a/intern/ghost/GHOST_Rect.h +++ b/intern/ghost/GHOST_Rect.h @@ -102,7 +102,7 @@ public: /** * Returns whether this rectangle is valid. - * Valid rectangles are rectangles that have m_l <= m_r and m_t <= m_b. Thus, emapty rectangles are valid. + * Valid rectangles are rectangles that have m_l <= m_r and m_t <= m_b. Thus, empty rectangles are valid. * \return boolean value (true==valid rectangle) */ virtual inline bool isValid() const; diff --git a/intern/ghost/SConscript b/intern/ghost/SConscript index e8550753a70..5a4572c164d 100644 --- a/intern/ghost/SConscript +++ b/intern/ghost/SConscript @@ -36,16 +36,17 @@ sources = env.Glob('intern/*.cpp') sources2 = env.Glob('intern/GHOST_NDOFManager3Dconnexion.c') if window_system == 'darwin': sources += env.Glob('intern/*.mm') + #remove, will be readded below if needed. + sources.remove('intern' + os.sep + 'GHOST_ContextCGL.mm') if not env['WITH_BF_GL_EGL']: sources.remove('intern' + os.sep + 'GHOST_ContextEGL.cpp') # seems cleaner to remove these now then add back the one that is needed -sources.remove('intern' + os.sep + 'GHOST_ContextCGL.mm') sources.remove('intern' + os.sep + 'GHOST_ContextGLX.cpp') sources.remove('intern' + os.sep + 'GHOST_ContextWGL.cpp') -pf = ['GHOST_DisplayManager', 'GHOST_System', 'GHOST_SystemPaths', 'GHOST_Window', 'GHOST_DropTarget', 'GHOST_NDOFManager'] +pf = ['GHOST_DisplayManager', 'GHOST_System', 'GHOST_SystemPaths', 'GHOST_Window', 'GHOST_DropTarget', 'GHOST_NDOFManager', 'GHOST_Context'] defs = env['BF_GL_DEFINITIONS'] @@ -75,6 +76,10 @@ elif window_system in ('linux', 'openbsd3', 'sunos5', 'freebsd7', 'freebsd8', 'f for f in pf: try: sources.remove('intern' + os.sep + f + 'Win32.cpp') + except ValueError: + pass + + try: sources.remove('intern' + os.sep + f + 'SDL.cpp') except ValueError: pass @@ -108,6 +113,10 @@ elif window_system in ('win32-vc', 'win32-mingw', 'cygwin', 'linuxcross', 'win64 for f in pf: try: sources.remove('intern' + os.sep + f + 'X11.cpp') + except ValueError: + pass + + try: sources.remove('intern' + os.sep + f + 'SDL.cpp') except ValueError: pass @@ -121,7 +130,14 @@ elif window_system == 'darwin': for f in pf: try: sources.remove('intern' + os.sep + f + 'Win32.cpp') + except ValueError: + pass + + try: sources.remove('intern' + os.sep + f + 'X11.cpp') + except ValueError: + pass + try: sources.remove('intern' + os.sep + f + 'SDL.cpp') except ValueError: pass diff --git a/intern/ghost/intern/GHOST_DisplayManager.h b/intern/ghost/intern/GHOST_DisplayManager.h index 7dc0cf2ca04..afdb11543e9 100644 --- a/intern/ghost/intern/GHOST_DisplayManager.h +++ b/intern/ghost/intern/GHOST_DisplayManager.h @@ -100,7 +100,7 @@ public: /** * Changes the current setting for this display device. - * The setting given to this method is matched againts the available diplay settings. + * The setting given to this method is matched against the available display settings. * The best match is activated (@see findMatch()). * \param display The index of the display to query with 0 <= display < getNumDisplays(). * \param setting The setting of the display device to be matched and activated. diff --git a/intern/ghost/intern/GHOST_DisplayManagerX11.cpp b/intern/ghost/intern/GHOST_DisplayManagerX11.cpp index a5457891ec3..24289e6b006 100644 --- a/intern/ghost/intern/GHOST_DisplayManagerX11.cpp +++ b/intern/ghost/intern/GHOST_DisplayManagerX11.cpp @@ -194,7 +194,7 @@ setCurrentDisplaySetting( fprintf(stderr, "Error: XF86VidMode extension missing!\n"); return GHOST_kFailure; } -# ifdef _DEBUG +# ifdef DEBUG printf("Using XFree86-VidModeExtension Version %d.%d\n", majorVersion, minorVersion); # endif @@ -240,7 +240,7 @@ setCurrentDisplaySetting( } if (best_fit != -1) { -# ifdef _DEBUG +# ifdef DEBUG printf("Switching to video mode %dx%d %dx%d %d\n", vidmodes[best_fit]->hdisplay, vidmodes[best_fit]->vdisplay, vidmodes[best_fit]->htotal, vidmodes[best_fit]->vtotal, diff --git a/intern/ghost/intern/GHOST_DropTargetWin32.cpp b/intern/ghost/intern/GHOST_DropTargetWin32.cpp index 1aaf939e996..fd9abce96b7 100644 --- a/intern/ghost/intern/GHOST_DropTargetWin32.cpp +++ b/intern/ghost/intern/GHOST_DropTargetWin32.cpp @@ -186,7 +186,7 @@ DWORD GHOST_DropTargetWin32::allowedDropEffect(DWORD dwAllowed) GHOST_TDragnDropTypes GHOST_DropTargetWin32::getGhostType(IDataObject *pDataObject) { /* Text - * Note: Unicode text is aviable as CF_TEXT too, the system can do the + * Note: Unicode text is available as CF_TEXT too, the system can do the * conversion, but we do the conversion ourself with WC_NO_BEST_FIT_CHARS. */ FORMATETC fmtetc = { CF_TEXT, 0, DVASPECT_CONTENT, -1, TYMED_HGLOBAL }; diff --git a/intern/ghost/intern/GHOST_DropTargetX11.h b/intern/ghost/intern/GHOST_DropTargetX11.h index 9ac45ba8dfe..e2968844ff0 100644 --- a/intern/ghost/intern/GHOST_DropTargetX11.h +++ b/intern/ghost/intern/GHOST_DropTargetX11.h @@ -78,7 +78,7 @@ private: void Initialize(void); /** - * Uninitiailize XDND and all related X atoms + * Uninitialize XDND and all related X atoms */ void Uninitialize(void); @@ -101,7 +101,7 @@ private: /** * Fully decode file URL (i.e. converts "file:///a%20b/test" to "/a b/test") * \param fileUrl - file path URL to be fully decoded - * \return decoded file path (resutl shold be free-d) + * \return decoded file path (resutl should be free-d) */ char *FileUrlDecode(char *fileUrl); diff --git a/intern/ghost/intern/GHOST_NDOFManager3Dconnexion.c b/intern/ghost/intern/GHOST_NDOFManager3Dconnexion.c index 9df9a56f8b8..01e301f927e 100644 --- a/intern/ghost/intern/GHOST_NDOFManager3Dconnexion.c +++ b/intern/ghost/intern/GHOST_NDOFManager3Dconnexion.c @@ -30,14 +30,14 @@ /* It is to be noted that these implementations are linked in as * 'extern "C"' calls from GHOST_NDOFManagerCocoa. - + * * This is done in order to * preserve weak linking capability (which as of clang-3.3 and xcode5 * breaks weak linking when there is name mangling of c++ libraries.) * * We need to have the weak linked file as pure C. Therefore we build a * compiled bridge from the real weak linked calls and the calls within C++ - + * */ OSErr GHOST_NDOFManager3Dconnexion_available(void) diff --git a/intern/ghost/intern/GHOST_System.h b/intern/ghost/intern/GHOST_System.h index 57aa0a31c94..79230b0f505 100644 --- a/intern/ghost/intern/GHOST_System.h +++ b/intern/ghost/intern/GHOST_System.h @@ -250,27 +250,23 @@ public: virtual GHOST_TSuccess pushEvent(GHOST_IEvent *event); /** - * Returns the timer manager. * \return The timer manager. */ inline virtual GHOST_TimerManager *getTimerManager() const; /** - * Returns a pointer to our event manager. * \return A pointer to our event manager. */ virtual inline GHOST_EventManager *getEventManager() const; /** - * Returns a pointer to our window manager. * \return A pointer to our window manager. */ virtual inline GHOST_WindowManager *getWindowManager() const; #ifdef WITH_INPUT_NDOF /** - * Returns a pointer to our n-degree of freedeom manager. - * \return A pointer to our n-degree of freedeom manager. + * \return A pointer to our n-degree of freedom manager. */ virtual inline GHOST_NDOFManager *getNDOFManager() const; #endif @@ -333,7 +329,7 @@ protected: virtual GHOST_TSuccess createFullScreenWindow(GHOST_Window **window, const GHOST_DisplaySetting &settings, const bool stereoVisual, const GHOST_TUns16 numOfAASamples = 0); - /** The display manager (platform dependant). */ + /** The display manager (platform dependent). */ GHOST_DisplayManager *m_displayManager; /** The timer manager. */ diff --git a/intern/ghost/intern/GHOST_SystemCocoa.mm b/intern/ghost/intern/GHOST_SystemCocoa.mm index 1d4c6d5e10b..a2e26574530 100644 --- a/intern/ghost/intern/GHOST_SystemCocoa.mm +++ b/intern/ghost/intern/GHOST_SystemCocoa.mm @@ -700,10 +700,10 @@ bool GHOST_SystemCocoa::processEvents(bool waitForEvent) handleKeyEvent(event); } else { - // For some reason NSApp is swallowing the key up events when command + // For some reason NSApp is swallowing the key up events when modifier // key is pressed, even if there seems to be no apparent reason to do // so, as a workaround we always handle these up events. - if ([event type] == NSKeyUp && ([event modifierFlags] & NSCommandKeyMask)) + if ([event type] == NSKeyUp && (([event modifierFlags] & NSCommandKeyMask) || ([event modifierFlags] & NSAlternateKeyMask))) handleKeyEvent(event); [NSApp sendEvent:event]; diff --git a/intern/ghost/intern/GHOST_SystemPathsWin32.cpp b/intern/ghost/intern/GHOST_SystemPathsWin32.cpp index 3a313c792d0..2bd380050f1 100644 --- a/intern/ghost/intern/GHOST_SystemPathsWin32.cpp +++ b/intern/ghost/intern/GHOST_SystemPathsWin32.cpp @@ -37,7 +37,7 @@ #include <shlobj.h> #include "utfconv.h" -#if defined(__MINGW32__) || defined(__CYGWIN__) +#ifdef __MINGW32__ #if !defined(SHARD_PIDL) #define SHARD_PIDL 0x00000001L diff --git a/intern/ghost/intern/GHOST_SystemWin32.cpp b/intern/ghost/intern/GHOST_SystemWin32.cpp index ea5c655a3e2..0767ad5a8f9 100644 --- a/intern/ghost/intern/GHOST_SystemWin32.cpp +++ b/intern/ghost/intern/GHOST_SystemWin32.cpp @@ -46,6 +46,8 @@ #include <shlobj.h> #include <tlhelp32.h> +#include <Psapi.h> +#include <windowsx.h> #include "utfconv.h" @@ -114,6 +116,17 @@ #define VK_MEDIA_PLAY_PAUSE 0xB3 #endif // VK_MEDIA_PLAY_PAUSE +/* Workaround for some laptop touchpads, some of which seems to + * have driver issues which makes it so window function receives + * the message, but PeekMessage doesn't pick those messages for + * some reason. + * + * We send a dummy WM_USER message to force PeekMessage to receive + * something, making it so blender's window manager sees the new + * messages coming in. + */ +#define BROKEN_PEEK_TOUCHPAD + static void initRawInput() { #ifdef WITH_INPUT_NDOF @@ -1036,6 +1049,7 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam, event = processCursorEvent(GHOST_kEventCursorMove, window); break; case WM_MOUSEWHEEL: + { /* The WM_MOUSEWHEEL message is sent to the focus window * when the mouse wheel is rotated. The DefWindowProc * function propagates the message to the window's parent. @@ -1043,8 +1057,28 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam, * since DefWindowProc propagates it up the parent chain * until it finds a window that processes it. */ - event = processWheelEvent(window, wParam, lParam); + + /* Get the winow under the mouse and send event to it's queue. */ + POINT mouse_pos = {GET_X_LPARAM(lParam), GET_Y_LPARAM(lParam)}; + HWND mouse_hwnd = WindowFromPoint(mouse_pos); + GHOST_WindowWin32 *mouse_window = (GHOST_WindowWin32 *)::GetWindowLongPtr(mouse_hwnd, GWLP_USERDATA); + if (mouse_window != NULL) { + event = processWheelEvent(mouse_window, wParam, lParam); + } + else { + /* If it happened so window under the mouse is not found (which i'm not + * really sure might happen), then we add event to the focused window + * in order to avoid some possible negative side effects. + * - sergey - + */ + event = processWheelEvent(window, wParam, lParam); + } + +#ifdef BROKEN_PEEK_TOUCHPAD + PostMessage(hwnd, WM_USER, 0, 0); +#endif break; + } case WM_SETCURSOR: /* The WM_SETCURSOR message is sent to a window if the mouse causes the cursor * to move within a window and mouse input is not captured. @@ -1375,16 +1409,63 @@ void GHOST_SystemWin32::putClipboard(GHOST_TInt8 *buffer, bool selection) const } } +static DWORD GetParentProcessID(void) +{ + HANDLE snapshot; + PROCESSENTRY32 pe32 = {0}; + DWORD ppid = 0, pid = GetCurrentProcessId(); + snapshot = CreateToolhelp32Snapshot( TH32CS_SNAPPROCESS, 0 ); + if (snapshot == INVALID_HANDLE_VALUE) { + return -1; + } + pe32.dwSize = sizeof( pe32 ); + if (!Process32First(snapshot, &pe32)) { + CloseHandle(snapshot); + return -1; + } + do { + if (pe32.th32ProcessID == pid) { + ppid = pe32.th32ParentProcessID; + break; + } + } while (Process32Next(snapshot, &pe32)); + CloseHandle(snapshot); + return ppid; +} + +static bool getProcessName(int pid, char *buffer, int max_len) +{ + bool result = false; + HANDLE handle = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, + FALSE, pid); + if (handle) { + GetModuleFileNameEx(handle, 0, buffer, max_len); + result = true; + } + CloseHandle(handle); + return result; +} + static bool isStartedFromCommandPrompt() { HWND hwnd = GetConsoleWindow(); if (hwnd) { DWORD pid = (DWORD)-1; + DWORD ppid = GetParentProcessID(); + char parent_name[MAX_PATH]; + bool start_from_launcher = false; GetWindowThreadProcessId(hwnd, &pid); + if (getProcessName(ppid, parent_name, sizeof(parent_name))) { + char *filename = strrchr(parent_name, '\\'); + if (filename != NULL) { + start_from_launcher = strstr(filename, "blender.exe") != NULL; + } + } - if (pid == GetCurrentProcessId()) + /* When we're starting from a wrapper we need to compare with parent process ID. */ + if (pid == (start_from_launcher ? ppid : GetCurrentProcessId())) return true; } diff --git a/intern/ghost/intern/GHOST_SystemX11.cpp b/intern/ghost/intern/GHOST_SystemX11.cpp index f0d0ef22782..75a9223d6a3 100644 --- a/intern/ghost/intern/GHOST_SystemX11.cpp +++ b/intern/ghost/intern/GHOST_SystemX11.cpp @@ -141,6 +141,8 @@ GHOST_SystemX11( #undef GHOST_INTERN_ATOM m_last_warp = 0; + m_last_release_keycode = 0; + m_last_release_time = 0; /* compute the initial time */ timeval tv; @@ -244,7 +246,7 @@ getMainDisplayDimensions( { if (m_display) { /* note, for this to work as documented, - * we would need to use Xinerama check r54370 for code that did thia, + * we would need to use Xinerama check r54370 for code that did this, * we've since removed since its not worth the extra dep - campbell */ getAllDisplayDimensions(width, height); } @@ -526,6 +528,16 @@ processEvents( continue; } #endif + /* when using autorepeat, some keypress events can actually come *after* the + * last keyrelease. The next code takes care of that */ + if (xevent.type == KeyRelease) { + m_last_release_keycode = xevent.xkey.keycode; + m_last_release_time = xevent.xkey.time; + } + else if (xevent.type == KeyPress) { + if ((xevent.xkey.keycode == m_last_release_keycode) && ((xevent.xkey.time <= m_last_release_time))) + continue; + } processEvent(&xevent); anyProcessed = true; @@ -659,7 +671,7 @@ GHOST_SystemX11::processEvent(XEvent *xe) #ifdef WITH_X11_XINPUT /* Proximity-Out Events are not reliable, if the tablet is active - check on each event * this adds a little overhead but only while the tablet is in use. - * in the futire we could have a ghost call window->CheckTabletProximity() + * in the future we could have a ghost call window->CheckTabletProximity() * but for now enough parts of the code are checking 'Active' * - campbell */ if (window->GetTabletData()->Active != GHOST_kTabletModeNone) { @@ -1240,7 +1252,7 @@ getModifierKeys( XQueryKeymap(m_display, (char *)m_keyboard_vector); - /* now translate key symobols into keycodes and + /* now translate key symbols into keycodes and * test with vector. */ const static KeyCode shift_l = XKeysymToKeycode(m_display, XK_Shift_L); diff --git a/intern/ghost/intern/GHOST_SystemX11.h b/intern/ghost/intern/GHOST_SystemX11.h index 1a055725aac..be149cbb773 100644 --- a/intern/ghost/intern/GHOST_SystemX11.h +++ b/intern/ghost/intern/GHOST_SystemX11.h @@ -355,6 +355,10 @@ private: * and stop accumulating all events generated before that */ Time m_last_warp; + /* detect autorepeat glitch */ + unsigned int m_last_release_keycode; + Time m_last_release_time; + /** * Return the ghost window associated with the * X11 window xwind diff --git a/intern/ghost/intern/GHOST_TimerManager.h b/intern/ghost/intern/GHOST_TimerManager.h index 5a24c56ef10..b94175e9ff8 100644 --- a/intern/ghost/intern/GHOST_TimerManager.h +++ b/intern/ghost/intern/GHOST_TimerManager.h @@ -67,7 +67,7 @@ public: virtual GHOST_TUns32 getNumTimers(); /** - * Returns whther this timer task ins in our list. + * Returns whether this timer task ins in our list. * \return Indication of presence. */ virtual bool getTimerFound(GHOST_TimerTask *timer); diff --git a/intern/ghost/intern/GHOST_WindowCocoa.mm b/intern/ghost/intern/GHOST_WindowCocoa.mm index f0aac339546..52436d38e5c 100644 --- a/intern/ghost/intern/GHOST_WindowCocoa.mm +++ b/intern/ghost/intern/GHOST_WindowCocoa.mm @@ -634,15 +634,14 @@ GHOST_WindowCocoa::GHOST_WindowCocoa( if (state == GHOST_kWindowStateFullScreen) setState(GHOST_kWindowStateFullScreen); -//Using lion_fullscreen suffers from an uncovered problem when called from operator, disabled for now -// //Starting with 10.9 (darwin 13.x.x), we always use Lion fullscreen, since it -// //now has proper multi-monitor support for fullscreen -// char darwin_ver[10]; -// size_t len = sizeof(darwin_ver); -// sysctlbyname("kern.osrelease", &darwin_ver, &len, NULL, 0); -// if(darwin_ver[0] == '1' && darwin_ver[1] >= '3') { -// m_lionStyleFullScreen = true; -// } + //Starting with 10.9 (darwin 13.x.x), we always use Lion fullscreen, since it + //now has proper multi-monitor support for fullscreen + char darwin_ver[10]; + size_t len = sizeof(darwin_ver); + sysctlbyname("kern.osrelease", &darwin_ver, &len, NULL, 0); + if(darwin_ver[0] == '1' && darwin_ver[1] >= '3') { + m_lionStyleFullScreen = true; + } [pool drain]; } diff --git a/intern/ghost/intern/GHOST_WindowManager.h b/intern/ghost/intern/GHOST_WindowManager.h index 79438c03702..8297e4d24d2 100644 --- a/intern/ghost/intern/GHOST_WindowManager.h +++ b/intern/ghost/intern/GHOST_WindowManager.h @@ -87,7 +87,7 @@ public: /** * Returns pointer to the full-screen window. - * \return The fll-screen window (0 if not in full-screen). + * \return The full-screen window (NULL if not in full-screen). */ virtual GHOST_IWindow *getFullScreenWindow(void) const; diff --git a/intern/ghost/intern/GHOST_WindowWin32.cpp b/intern/ghost/intern/GHOST_WindowWin32.cpp index 76ec050f7d7..64ea7192616 100644 --- a/intern/ghost/intern/GHOST_WindowWin32.cpp +++ b/intern/ghost/intern/GHOST_WindowWin32.cpp @@ -120,7 +120,7 @@ GHOST_WindowWin32::GHOST_WindowWin32( MONITORINFO monitor; GHOST_TUns32 tw, th; -#if !defined(_MSC_VER) || _MSC_VER < 1700 +#ifndef _MSC_VER int cxsizeframe = GetSystemMetrics(SM_CXSIZEFRAME); int cysizeframe = GetSystemMetrics(SM_CYSIZEFRAME); #else diff --git a/intern/ghost/intern/GHOST_WindowX11.cpp b/intern/ghost/intern/GHOST_WindowX11.cpp index 6ff42aaf428..97f8ae73d2d 100644 --- a/intern/ghost/intern/GHOST_WindowX11.cpp +++ b/intern/ghost/intern/GHOST_WindowX11.cpp @@ -60,7 +60,7 @@ #include <algorithm> #include <string> -/* For obscure full screen mode stuuf +/* For obscure full screen mode stuff * lifted verbatim from blut. */ typedef struct { diff --git a/intern/ghost/intern/GHOST_WindowX11.h b/intern/ghost/intern/GHOST_WindowX11.h index 6a012f12769..3255751be93 100644 --- a/intern/ghost/intern/GHOST_WindowX11.h +++ b/intern/ghost/intern/GHOST_WindowX11.h @@ -246,7 +246,7 @@ protected: /** * Sets the cursor grab on the window using * native window system calls. - * \param warp Only used when grab is enabled, hides the mouse and allows gragging outside the screen. + * \param warp Only used when grab is enabled, hides the mouse and allows dragging outside the screen. */ GHOST_TSuccess setWindowCursorGrab( diff --git a/intern/ghost/test/gears/GHOST_Test.cpp b/intern/ghost/test/gears/GHOST_Test.cpp index 78e5257be1f..a81aaa85ecf 100644 --- a/intern/ghost/test/gears/GHOST_Test.cpp +++ b/intern/ghost/test/gears/GHOST_Test.cpp @@ -262,7 +262,8 @@ static void View(GHOST_IWindow *window, bool stereo, int eye = 0) window->activateDrawingContext(); GHOST_Rect bnds; int noOfScanlines = 0, lowerScanline = 0; - int verticalBlankingInterval = 32; // hard coded for testing purposes, display device dependant + /* hard coded for testing purposes, display device dependent */ + int verticalBlankingInterval = 32; float left, right, bottom, top; float nearplane, farplane, zeroPlane, distance; float eyeSeparation = 0.62f; diff --git a/intern/guardedalloc/MEM_guardedalloc.h b/intern/guardedalloc/MEM_guardedalloc.h index 8c5ad77b8b6..3b56362e3d6 100644 --- a/intern/guardedalloc/MEM_guardedalloc.h +++ b/intern/guardedalloc/MEM_guardedalloc.h @@ -76,7 +76,7 @@ extern "C" { extern size_t (*MEM_allocN_len)(const void *vmemh) ATTR_WARN_UNUSED_RESULT; /** - * Release memory previously allocatred by this module. + * Release memory previously allocated by this module. */ extern void (*MEM_freeN)(void *vmemh); diff --git a/intern/opencl/CMakeLists.txt b/intern/opencl/CMakeLists.txt deleted file mode 100644 index 03855cfdf8b..00000000000 --- a/intern/opencl/CMakeLists.txt +++ /dev/null @@ -1,42 +0,0 @@ -# ***** BEGIN GPL LICENSE BLOCK ***** -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -# -# The Original Code is Copyright (C) 2006, Blender Foundation -# All rights reserved. -# -# The Original Code is: all of this file. -# -# Contributor(s): Jacques Beaurain. -# -# ***** END GPL LICENSE BLOCK ***** - -set(INC - . -) - -set(INC_SYS - -) - -set(SRC - OCL_opencl.h - intern/clew.h - intern/clew.c - intern/OCL_opencl.c -) - - -blender_add_lib(bf_intern_opencl "${SRC}" "${INC}" "${INC_SYS}") diff --git a/intern/opencl/OCL_opencl.h b/intern/opencl/OCL_opencl.h deleted file mode 100644 index 733e3527197..00000000000 --- a/intern/opencl/OCL_opencl.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 2011, Blender Foundation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Contributor: - * Jeroen Bakker - * Monique Dewanchand - */ - -#ifndef OCL_OPENCL_H -#define OCL_OPENCL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "intern/clew.h" -int OCL_init(void); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/intern/opencl/SConscript b/intern/opencl/SConscript deleted file mode 100644 index 41a6d720098..00000000000 --- a/intern/opencl/SConscript +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python -# -# ***** BEGIN GPL LICENSE BLOCK ***** -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -# -# The Original Code is Copyright (C) 2006, Blender Foundation -# All rights reserved. -# -# The Original Code is: all of this file. -# -# Contributor(s): Nathan Letwory. -# -# ***** END GPL LICENSE BLOCK ***** - -Import ('env') - -sources = env.Glob('intern/*.c') - -incs = '.' - -env.BlenderLib ( 'bf_intern_opencl', sources, Split(incs), libtype=['core','player'], priority = [192,192] ) diff --git a/intern/opencl/intern/OCL_opencl.c b/intern/opencl/intern/OCL_opencl.c deleted file mode 100644 index 33a936896fd..00000000000 --- a/intern/opencl/intern/OCL_opencl.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 2011, Blender Foundation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Contributor: - * Jeroen Bakker - * Monique Dewanchand - */ - -#include "OCL_opencl.h" - -int OCL_init(void) -{ -#ifdef _WIN32 - const char *path = "OpenCL.dll"; -#elif defined(__APPLE__) - const char *path = "/Library/Frameworks/OpenCL.framework/OpenCL"; -#else - const char *path = "libOpenCL.so"; -#endif - - return (clewInit(path) == CLEW_SUCCESS); -} - diff --git a/intern/opencl/intern/clew.c b/intern/opencl/intern/clew.c deleted file mode 100644 index 1e31ebced0a..00000000000 --- a/intern/opencl/intern/clew.c +++ /dev/null @@ -1,316 +0,0 @@ -////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009 Organic Vectory B.V. -// Written by George van Venrooij -// -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file license.txt) -////////////////////////////////////////////////////////////////////////// - -#include "clew.h" - -//! \file clew.c -//! \brief OpenCL run-time loader source - -#ifndef CLCC_GENERATE_DOCUMENTATION -#ifdef _WIN32 - #define WIN32_LEAN_AND_MEAN - #define VC_EXTRALEAN - #include <windows.h> - - typedef HMODULE CLCC_DYNLIB_HANDLE; - - #define CLCC_DYNLIB_OPEN LoadLibrary - #define CLCC_DYNLIB_CLOSE FreeLibrary - #define CLCC_DYNLIB_IMPORT GetProcAddress -#else - #include <dlfcn.h> - - typedef void* CLCC_DYNLIB_HANDLE; - - #define CLCC_DYNLIB_OPEN(path) dlopen(path, RTLD_NOW | RTLD_GLOBAL) - #define CLCC_DYNLIB_CLOSE dlclose - #define CLCC_DYNLIB_IMPORT dlsym -#endif -#else - //typedef implementation_defined CLCC_DYNLIB_HANDLE; - //#define CLCC_DYNLIB_OPEN(path) implementation_defined - //#define CLCC_DYNLIB_CLOSE implementation_defined - //#define CLCC_DYNLIB_IMPORT implementation_defined -#endif - -#include <stdlib.h> - -//! \brief module handle -static CLCC_DYNLIB_HANDLE module = NULL; - -// Variables holding function entry points -#ifndef CLCC_GENERATE_DOCUMENTATION -PFNCLGETPLATFORMIDS __oclGetPlatformIDs = NULL; -PFNCLGETPLATFORMINFO __oclGetPlatformInfo = NULL; -PFNCLGETDEVICEIDS __oclGetDeviceIDs = NULL; -PFNCLGETDEVICEINFO __oclGetDeviceInfo = NULL; -PFNCLCREATECONTEXT __oclCreateContext = NULL; -PFNCLCREATECONTEXTFROMTYPE __oclCreateContextFromType = NULL; -PFNCLRETAINCONTEXT __oclRetainContext = NULL; -PFNCLRELEASECONTEXT __oclReleaseContext = NULL; -PFNCLGETCONTEXTINFO __oclGetContextInfo = NULL; -PFNCLCREATECOMMANDQUEUE __oclCreateCommandQueue = NULL; -PFNCLRETAINCOMMANDQUEUE __oclRetainCommandQueue = NULL; -PFNCLRELEASECOMMANDQUEUE __oclReleaseCommandQueue = NULL; -PFNCLGETCOMMANDQUEUEINFO __oclGetCommandQueueInfo = NULL; -PFNCLSETCOMMANDQUEUEPROPERTY __oclSetCommandQueueProperty = NULL; -PFNCLCREATEBUFFER __oclCreateBuffer = NULL; -PFNCLCREATEIMAGE2D __oclCreateImage2D = NULL; -PFNCLCREATEIMAGE3D __oclCreateImage3D = NULL; -PFNCLRETAINMEMOBJECT __oclRetainMemObject = NULL; -PFNCLRELEASEMEMOBJECT __oclReleaseMemObject = NULL; -PFNCLGETSUPPORTEDIMAGEFORMATS __oclGetSupportedImageFormats = NULL; -PFNCLGETMEMOBJECTINFO __oclGetMemObjectInfo = NULL; -PFNCLGETIMAGEINFO __oclGetImageInfo = NULL; -PFNCLCREATESAMPLER __oclCreateSampler = NULL; -PFNCLRETAINSAMPLER __oclRetainSampler = NULL; -PFNCLRELEASESAMPLER __oclReleaseSampler = NULL; -PFNCLGETSAMPLERINFO __oclGetSamplerInfo = NULL; -PFNCLCREATEPROGRAMWITHSOURCE __oclCreateProgramWithSource = NULL; -PFNCLCREATEPROGRAMWITHBINARY __oclCreateProgramWithBinary = NULL; -PFNCLRETAINPROGRAM __oclRetainProgram = NULL; -PFNCLRELEASEPROGRAM __oclReleaseProgram = NULL; -PFNCLBUILDPROGRAM __oclBuildProgram = NULL; -PFNCLUNLOADCOMPILER __oclUnloadCompiler = NULL; -PFNCLGETPROGRAMINFO __oclGetProgramInfo = NULL; -PFNCLGETPROGRAMBUILDINFO __oclGetProgramBuildInfo = NULL; -PFNCLCREATEKERNEL __oclCreateKernel = NULL; -PFNCLCREATEKERNELSINPROGRAM __oclCreateKernelsInProgram = NULL; -PFNCLRETAINKERNEL __oclRetainKernel = NULL; -PFNCLRELEASEKERNEL __oclReleaseKernel = NULL; -PFNCLSETKERNELARG __oclSetKernelArg = NULL; -PFNCLGETKERNELINFO __oclGetKernelInfo = NULL; -PFNCLGETKERNELWORKGROUPINFO __oclGetKernelWorkGroupInfo = NULL; -PFNCLWAITFOREVENTS __oclWaitForEvents = NULL; -PFNCLGETEVENTINFO __oclGetEventInfo = NULL; -PFNCLRETAINEVENT __oclRetainEvent = NULL; -PFNCLRELEASEEVENT __oclReleaseEvent = NULL; -PFNCLGETEVENTPROFILINGINFO __oclGetEventProfilingInfo = NULL; -PFNCLFLUSH __oclFlush = NULL; -PFNCLFINISH __oclFinish = NULL; -PFNCLENQUEUEREADBUFFER __oclEnqueueReadBuffer = NULL; -PFNCLENQUEUEWRITEBUFFER __oclEnqueueWriteBuffer = NULL; -PFNCLENQUEUECOPYBUFFER __oclEnqueueCopyBuffer = NULL; -PFNCLENQUEUEREADIMAGE __oclEnqueueReadImage = NULL; -PFNCLENQUEUEWRITEIMAGE __oclEnqueueWriteImage = NULL; -PFNCLENQUEUECOPYIMAGE __oclEnqueueCopyImage = NULL; -PFNCLENQUEUECOPYIMAGETOBUFFER __oclEnqueueCopyImageToBuffer = NULL; -PFNCLENQUEUECOPYBUFFERTOIMAGE __oclEnqueueCopyBufferToImage = NULL; -PFNCLENQUEUEMAPBUFFER __oclEnqueueMapBuffer = NULL; -PFNCLENQUEUEMAPIMAGE __oclEnqueueMapImage = NULL; -PFNCLENQUEUEUNMAPMEMOBJECT __oclEnqueueUnmapMemObject = NULL; -PFNCLENQUEUENDRANGEKERNEL __oclEnqueueNDRangeKernel = NULL; -PFNCLENQUEUETASK __oclEnqueueTask = NULL; -PFNCLENQUEUENATIVEKERNEL __oclEnqueueNativeKernel = NULL; -PFNCLENQUEUEMARKER __oclEnqueueMarker = NULL; -PFNCLENQUEUEWAITFOREVENTS __oclEnqueueWaitForEvents = NULL; -PFNCLENQUEUEBARRIER __oclEnqueueBarrier = NULL; -PFNCLGETEXTENSIONFUNCTIONADDRESS __oclGetExtensionFunctionAddress = NULL; -#endif // CLCC_GENERATE_DOCUMENTATION - - -//! \brief Unloads OpenCL dynamic library, should not be called directly -static void clewExit(void) -{ - if (module != NULL) - { - // Ignore errors - CLCC_DYNLIB_CLOSE(module); - module = NULL; - } -} - -//! \param path path to dynamic library to load -//! \return CLEW_ERROR_OPEN_FAILED if the library could not be opened -//! CLEW_ERROR_ATEXIT_FAILED if atexit(clewExit) failed -//! CLEW_SUCCESS when the library was succesfully loaded -int clewInit(const char* path) -{ - int error = 0; - - // Check if already initialized - if (module != NULL) - { - return CLEW_SUCCESS; - } - - // Load library - module = CLCC_DYNLIB_OPEN(path); - - // Check for errors - if (module == NULL) - { - return CLEW_ERROR_OPEN_FAILED; - } - - // Set unloading - error = atexit(clewExit); - - if (error) - { - // Failure queing atexit, shutdown with error - CLCC_DYNLIB_CLOSE(module); - module = NULL; - - return CLEW_ERROR_ATEXIT_FAILED; - } - - // Determine function entry-points - __oclGetPlatformIDs = (PFNCLGETPLATFORMIDS )CLCC_DYNLIB_IMPORT(module, "clGetPlatformIDs"); - __oclGetPlatformInfo = (PFNCLGETPLATFORMINFO )CLCC_DYNLIB_IMPORT(module, "clGetPlatformInfo"); - __oclGetDeviceIDs = (PFNCLGETDEVICEIDS )CLCC_DYNLIB_IMPORT(module, "clGetDeviceIDs"); - __oclGetDeviceInfo = (PFNCLGETDEVICEINFO )CLCC_DYNLIB_IMPORT(module, "clGetDeviceInfo"); - __oclCreateContext = (PFNCLCREATECONTEXT )CLCC_DYNLIB_IMPORT(module, "clCreateContext"); - __oclCreateContextFromType = (PFNCLCREATECONTEXTFROMTYPE )CLCC_DYNLIB_IMPORT(module, "clCreateContextFromType"); - __oclRetainContext = (PFNCLRETAINCONTEXT )CLCC_DYNLIB_IMPORT(module, "clRetainContext"); - __oclReleaseContext = (PFNCLRELEASECONTEXT )CLCC_DYNLIB_IMPORT(module, "clReleaseContext"); - __oclGetContextInfo = (PFNCLGETCONTEXTINFO )CLCC_DYNLIB_IMPORT(module, "clGetContextInfo"); - __oclCreateCommandQueue = (PFNCLCREATECOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clCreateCommandQueue"); - __oclRetainCommandQueue = (PFNCLRETAINCOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clRetainCommandQueue"); - __oclReleaseCommandQueue = (PFNCLRELEASECOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clReleaseCommandQueue"); - __oclGetCommandQueueInfo = (PFNCLGETCOMMANDQUEUEINFO )CLCC_DYNLIB_IMPORT(module, "clGetCommandQueueInfo"); - __oclSetCommandQueueProperty = (PFNCLSETCOMMANDQUEUEPROPERTY )CLCC_DYNLIB_IMPORT(module, "clSetCommandQueueProperty"); - __oclCreateBuffer = (PFNCLCREATEBUFFER )CLCC_DYNLIB_IMPORT(module, "clCreateBuffer"); - __oclCreateImage2D = (PFNCLCREATEIMAGE2D )CLCC_DYNLIB_IMPORT(module, "clCreateImage2D"); - __oclCreateImage3D = (PFNCLCREATEIMAGE3D )CLCC_DYNLIB_IMPORT(module, "clCreateImage3D"); - __oclRetainMemObject = (PFNCLRETAINMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clRetainMemObject"); - __oclReleaseMemObject = (PFNCLRELEASEMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clReleaseMemObject"); - __oclGetSupportedImageFormats = (PFNCLGETSUPPORTEDIMAGEFORMATS )CLCC_DYNLIB_IMPORT(module, "clGetSupportedImageFormats"); - __oclGetMemObjectInfo = (PFNCLGETMEMOBJECTINFO )CLCC_DYNLIB_IMPORT(module, "clGetMemObjectInfo"); - __oclGetImageInfo = (PFNCLGETIMAGEINFO )CLCC_DYNLIB_IMPORT(module, "clGetImageInfo"); - __oclCreateSampler = (PFNCLCREATESAMPLER )CLCC_DYNLIB_IMPORT(module, "clCreateSampler"); - __oclRetainSampler = (PFNCLRETAINSAMPLER )CLCC_DYNLIB_IMPORT(module, "clRetainSampler"); - __oclReleaseSampler = (PFNCLRELEASESAMPLER )CLCC_DYNLIB_IMPORT(module, "clReleaseSampler"); - __oclGetSamplerInfo = (PFNCLGETSAMPLERINFO )CLCC_DYNLIB_IMPORT(module, "clGetSamplerInfo"); - __oclCreateProgramWithSource = (PFNCLCREATEPROGRAMWITHSOURCE )CLCC_DYNLIB_IMPORT(module, "clCreateProgramWithSource"); - __oclCreateProgramWithBinary = (PFNCLCREATEPROGRAMWITHBINARY )CLCC_DYNLIB_IMPORT(module, "clCreateProgramWithBinary"); - __oclRetainProgram = (PFNCLRETAINPROGRAM )CLCC_DYNLIB_IMPORT(module, "clRetainProgram"); - __oclReleaseProgram = (PFNCLRELEASEPROGRAM )CLCC_DYNLIB_IMPORT(module, "clReleaseProgram"); - __oclBuildProgram = (PFNCLBUILDPROGRAM )CLCC_DYNLIB_IMPORT(module, "clBuildProgram"); - __oclUnloadCompiler = (PFNCLUNLOADCOMPILER )CLCC_DYNLIB_IMPORT(module, "clUnloadCompiler"); - __oclGetProgramInfo = (PFNCLGETPROGRAMINFO )CLCC_DYNLIB_IMPORT(module, "clGetProgramInfo"); - __oclGetProgramBuildInfo = (PFNCLGETPROGRAMBUILDINFO )CLCC_DYNLIB_IMPORT(module, "clGetProgramBuildInfo"); - __oclCreateKernel = (PFNCLCREATEKERNEL )CLCC_DYNLIB_IMPORT(module, "clCreateKernel"); - __oclCreateKernelsInProgram = (PFNCLCREATEKERNELSINPROGRAM )CLCC_DYNLIB_IMPORT(module, "clCreateKernelsInProgram"); - __oclRetainKernel = (PFNCLRETAINKERNEL )CLCC_DYNLIB_IMPORT(module, "clRetainKernel"); - __oclReleaseKernel = (PFNCLRELEASEKERNEL )CLCC_DYNLIB_IMPORT(module, "clReleaseKernel"); - __oclSetKernelArg = (PFNCLSETKERNELARG )CLCC_DYNLIB_IMPORT(module, "clSetKernelArg"); - __oclGetKernelInfo = (PFNCLGETKERNELINFO )CLCC_DYNLIB_IMPORT(module, "clGetKernelInfo"); - __oclGetKernelWorkGroupInfo = (PFNCLGETKERNELWORKGROUPINFO )CLCC_DYNLIB_IMPORT(module, "clGetKernelWorkGroupInfo"); - __oclWaitForEvents = (PFNCLWAITFOREVENTS )CLCC_DYNLIB_IMPORT(module, "clWaitForEvents"); - __oclGetEventInfo = (PFNCLGETEVENTINFO )CLCC_DYNLIB_IMPORT(module, "clGetEventInfo"); - __oclRetainEvent = (PFNCLRETAINEVENT )CLCC_DYNLIB_IMPORT(module, "clRetainEvent"); - __oclReleaseEvent = (PFNCLRELEASEEVENT )CLCC_DYNLIB_IMPORT(module, "clReleaseEvent"); - __oclGetEventProfilingInfo = (PFNCLGETEVENTPROFILINGINFO )CLCC_DYNLIB_IMPORT(module, "clGetEventProfilingInfo"); - __oclFlush = (PFNCLFLUSH )CLCC_DYNLIB_IMPORT(module, "clFlush"); - __oclFinish = (PFNCLFINISH )CLCC_DYNLIB_IMPORT(module, "clFinish"); - __oclEnqueueReadBuffer = (PFNCLENQUEUEREADBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueReadBuffer"); - __oclEnqueueWriteBuffer = (PFNCLENQUEUEWRITEBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueWriteBuffer"); - __oclEnqueueCopyBuffer = (PFNCLENQUEUECOPYBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyBuffer"); - __oclEnqueueReadImage = (PFNCLENQUEUEREADIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueReadImage"); - __oclEnqueueWriteImage = (PFNCLENQUEUEWRITEIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueWriteImage"); - __oclEnqueueCopyImage = (PFNCLENQUEUECOPYIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyImage"); - __oclEnqueueCopyImageToBuffer = (PFNCLENQUEUECOPYIMAGETOBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyImageToBuffer"); - __oclEnqueueCopyBufferToImage = (PFNCLENQUEUECOPYBUFFERTOIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyBufferToImage"); - __oclEnqueueMapBuffer = (PFNCLENQUEUEMAPBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueMapBuffer"); - __oclEnqueueMapImage = (PFNCLENQUEUEMAPIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueMapImage"); - __oclEnqueueUnmapMemObject = (PFNCLENQUEUEUNMAPMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clEnqueueUnmapMemObject"); - __oclEnqueueNDRangeKernel = (PFNCLENQUEUENDRANGEKERNEL )CLCC_DYNLIB_IMPORT(module, "clEnqueueNDRangeKernel"); - __oclEnqueueTask = (PFNCLENQUEUETASK )CLCC_DYNLIB_IMPORT(module, "clEnqueueTask"); - __oclEnqueueNativeKernel = (PFNCLENQUEUENATIVEKERNEL )CLCC_DYNLIB_IMPORT(module, "clEnqueueNativeKernel"); - __oclEnqueueMarker = (PFNCLENQUEUEMARKER )CLCC_DYNLIB_IMPORT(module, "clEnqueueMarker"); - __oclEnqueueWaitForEvents = (PFNCLENQUEUEWAITFOREVENTS )CLCC_DYNLIB_IMPORT(module, "clEnqueueWaitForEvents"); - __oclEnqueueBarrier = (PFNCLENQUEUEBARRIER )CLCC_DYNLIB_IMPORT(module, "clEnqueueBarrier"); - __oclGetExtensionFunctionAddress = (PFNCLGETEXTENSIONFUNCTIONADDRESS )CLCC_DYNLIB_IMPORT(module, "clGetExtensionFunctionAddress"); - - if(__oclGetPlatformIDs == NULL) return CLEW_ERROR_OPEN_FAILED; - if(__oclGetPlatformInfo == NULL) return CLEW_ERROR_OPEN_FAILED; - if(__oclGetDeviceIDs == NULL) return CLEW_ERROR_OPEN_FAILED; - if(__oclGetDeviceInfo == NULL) return CLEW_ERROR_OPEN_FAILED; - - return CLEW_SUCCESS; -} - -//! \param error CL error code -//! \return a string representation of the error code -const char* clewErrorString(cl_int error) -{ - static const char* strings[] = - { - // Error Codes - "CL_SUCCESS" // 0 - , "CL_DEVICE_NOT_FOUND" // -1 - , "CL_DEVICE_NOT_AVAILABLE" // -2 - , "CL_COMPILER_NOT_AVAILABLE" // -3 - , "CL_MEM_OBJECT_ALLOCATION_FAILURE" // -4 - , "CL_OUT_OF_RESOURCES" // -5 - , "CL_OUT_OF_HOST_MEMORY" // -6 - , "CL_PROFILING_INFO_NOT_AVAILABLE" // -7 - , "CL_MEM_COPY_OVERLAP" // -8 - , "CL_IMAGE_FORMAT_MISMATCH" // -9 - , "CL_IMAGE_FORMAT_NOT_SUPPORTED" // -10 - , "CL_BUILD_PROGRAM_FAILURE" // -11 - , "CL_MAP_FAILURE" // -12 - - , "" // -13 - , "" // -14 - , "" // -15 - , "" // -16 - , "" // -17 - , "" // -18 - , "" // -19 - - , "" // -20 - , "" // -21 - , "" // -22 - , "" // -23 - , "" // -24 - , "" // -25 - , "" // -26 - , "" // -27 - , "" // -28 - , "" // -29 - - , "CL_INVALID_VALUE" // -30 - , "CL_INVALID_DEVICE_TYPE" // -31 - , "CL_INVALID_PLATFORM" // -32 - , "CL_INVALID_DEVICE" // -33 - , "CL_INVALID_CONTEXT" // -34 - , "CL_INVALID_QUEUE_PROPERTIES" // -35 - , "CL_INVALID_COMMAND_QUEUE" // -36 - , "CL_INVALID_HOST_PTR" // -37 - , "CL_INVALID_MEM_OBJECT" // -38 - , "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR" // -39 - , "CL_INVALID_IMAGE_SIZE" // -40 - , "CL_INVALID_SAMPLER" // -41 - , "CL_INVALID_BINARY" // -42 - , "CL_INVALID_BUILD_OPTIONS" // -43 - , "CL_INVALID_PROGRAM" // -44 - , "CL_INVALID_PROGRAM_EXECUTABLE" // -45 - , "CL_INVALID_KERNEL_NAME" // -46 - , "CL_INVALID_KERNEL_DEFINITION" // -47 - , "CL_INVALID_KERNEL" // -48 - , "CL_INVALID_ARG_INDEX" // -49 - , "CL_INVALID_ARG_VALUE" // -50 - , "CL_INVALID_ARG_SIZE" // -51 - , "CL_INVALID_KERNEL_ARGS" // -52 - , "CL_INVALID_WORK_DIMENSION" // -53 - , "CL_INVALID_WORK_GROUP_SIZE" // -54 - , "CL_INVALID_WORK_ITEM_SIZE" // -55 - , "CL_INVALID_GLOBAL_OFFSET" // -56 - , "CL_INVALID_EVENT_WAIT_LIST" // -57 - , "CL_INVALID_EVENT" // -58 - , "CL_INVALID_OPERATION" // -59 - , "CL_INVALID_GL_OBJECT" // -60 - , "CL_INVALID_BUFFER_SIZE" // -61 - , "CL_INVALID_MIP_LEVEL" // -62 - , "CL_INVALID_GLOBAL_WORK_SIZE" // -63 - }; - - return strings[-error]; -} diff --git a/intern/opencl/intern/clew.h b/intern/opencl/intern/clew.h deleted file mode 100644 index bb7e0134dcf..00000000000 --- a/intern/opencl/intern/clew.h +++ /dev/null @@ -1,1317 +0,0 @@ -#ifndef CLCC_CLEW_HPP_INCLUDED -#define CLCC_CLEW_HPP_INCLUDED - -////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009 Organic Vectory B.V. -// Written by George van Venrooij -// -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file license.txt) -////////////////////////////////////////////////////////////////////////// - -//! \file clew.h -//! \brief OpenCL run-time loader header -//! -//! This file contains a copy of the contents of CL.H and CL_PLATFORM.H from the -//! official OpenCL spec. The purpose of this code is to load the OpenCL dynamic -//! library at run-time and thus allow the executable to function on many -//! platforms regardless of the vendor of the OpenCL driver actually installed. -//! Some of the techniques used here were inspired by work done in the GLEW -//! library (http://glew.sourceforge.net/) - -// Run-time dynamic linking functionality based on concepts used in GLEW -#ifdef __OPENCL_CL_H -#error cl.h included before clew.h -#endif - -#ifdef __OPENCL_CL_PLATFORM_H -#error cl_platform.h included before clew.h -#endif - -#ifndef CLCC_GENERATE_DOCUMENTATION -// Prevent cl.h inclusion -#define __OPENCL_CL_H -// Prevent cl_platform.h inclusion -#define __CL_PLATFORM_H -#endif // CLCC_GENERATE_DOCUMENTATION - -/******************************************************************************* -* Copyright (c) 2008-2009 The Khronos Group Inc. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and/or associated documentation files (the -* "Materials"), to deal in the Materials without restriction, including -* without limitation the rights to use, copy, modify, merge, publish, -* distribute, sublicense, and/or sell copies of the Materials, and to -* permit persons to whom the Materials are furnished to do so, subject to -* the following conditions: -* -* The above copyright notice and this permission notice shall be included -* in all copies or substantial portions of the Materials. -* -* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. -******************************************************************************/ -#ifdef __APPLE__ -/* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */ -#include <AvailabilityMacros.h> -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef CLCC_GENERATE_DOCUMENTATION - -#if defined(_WIN32) -#define CL_API_ENTRY -#define CL_API_CALL __stdcall -#else -#define CL_API_ENTRY -#define CL_API_CALL -#endif - -#if defined(__APPLE__) -//JBKK removed for compatibility with blender trunk #define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER -#define CL_API_SUFFIX__VERSION_1_0 -#define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) -#else -#define CL_API_SUFFIX__VERSION_1_0 -#define CL_EXTENSION_WEAK_LINK -#endif - -#if defined(_WIN32) && defined(_MSC_VER) - -/* scalar types */ -typedef signed __int8 cl_char; -typedef unsigned __int8 cl_uchar; -typedef signed __int16 cl_short; -typedef unsigned __int16 cl_ushort; -typedef signed __int32 cl_int; -typedef unsigned __int32 cl_uint; -typedef signed __int64 cl_long; -typedef unsigned __int64 cl_ulong; - -typedef unsigned __int16 cl_half; -typedef float cl_float; -typedef double cl_double; - - -/* -* Vector types -* -* Note: OpenCL requires that all types be naturally aligned. -* This means that vector types must be naturally aligned. -* For example, a vector of four floats must be aligned to -* a 16 byte boundary (calculated as 4 * the natural 4-byte -* alignment of the float). The alignment qualifiers here -* will only function properly if your compiler supports them -* and if you don't actively work to defeat them. For example, -* in order for a cl_float4 to be 16 byte aligned in a struct, -* the start of the struct must itself be 16-byte aligned. -* -* Maintaining proper alignment is the user's responsibility. -*/ -typedef signed __int8 cl_char2[2]; -typedef signed __int8 cl_char4[4]; -typedef signed __int8 cl_char8[8]; -typedef signed __int8 cl_char16[16]; -typedef unsigned __int8 cl_uchar2[2]; -typedef unsigned __int8 cl_uchar4[4]; -typedef unsigned __int8 cl_uchar8[8]; -typedef unsigned __int8 cl_uchar16[16]; - -typedef signed __int16 cl_short2[2]; -typedef signed __int16 cl_short4[4]; -typedef signed __int16 cl_short8[8]; -typedef signed __int16 cl_short16[16]; -typedef unsigned __int16 cl_ushort2[2]; -typedef unsigned __int16 cl_ushort4[4]; -typedef unsigned __int16 cl_ushort8[8]; -typedef unsigned __int16 cl_ushort16[16]; - -typedef signed __int32 cl_int2[2]; -typedef signed __int32 cl_int4[4]; -typedef signed __int32 cl_int8[8]; -typedef signed __int32 cl_int16[16]; -typedef unsigned __int32 cl_uint2[2]; -typedef unsigned __int32 cl_uint4[4]; -typedef unsigned __int32 cl_uint8[8]; -typedef unsigned __int32 cl_uint16[16]; - -typedef signed __int64 cl_long2[2]; -typedef signed __int64 cl_long4[4]; -typedef signed __int64 cl_long8[8]; -typedef signed __int64 cl_long16[16]; -typedef unsigned __int64 cl_ulong2[2]; -typedef unsigned __int64 cl_ulong4[4]; -typedef unsigned __int64 cl_ulong8[8]; -typedef unsigned __int64 cl_ulong16[16]; - -typedef float cl_float2[2]; -typedef float cl_float4[4]; -typedef float cl_float8[8]; -typedef float cl_float16[16]; - -typedef double cl_double2[2]; -typedef double cl_double4[4]; -typedef double cl_double8[8]; -typedef double cl_double16[16]; -/* There are no vector types for half */ - -#else - -#include <stdint.h> - -/* scalar types */ -typedef int8_t cl_char; -typedef uint8_t cl_uchar; -typedef int16_t cl_short __attribute__((aligned(2))); -typedef uint16_t cl_ushort __attribute__((aligned(2))); -typedef int32_t cl_int __attribute__((aligned(4))); -typedef uint32_t cl_uint __attribute__((aligned(4))); -typedef int64_t cl_long __attribute__((aligned(8))); -typedef uint64_t cl_ulong __attribute__((aligned(8))); - -typedef uint16_t cl_half __attribute__((aligned(2))); -typedef float cl_float __attribute__((aligned(4))); -typedef double cl_double __attribute__((aligned(8))); - -/* -* Vector types -* -* Note: OpenCL requires that all types be naturally aligned. -* This means that vector types must be naturally aligned. -* For example, a vector of four floats must be aligned to -* a 16 byte boundary (calculated as 4 * the natural 4-byte -* alignment of the float). The alignment qualifiers here -* will only function properly if your compiler supports them -* and if you don't actively work to defeat them. For example, -* in order for a cl_float4 to be 16 byte aligned in a struct, -* the start of the struct must itself be 16-byte aligned. -* -* Maintaining proper alignment is the user's responsibility. -*/ -typedef int8_t cl_char2[2] __attribute__((aligned(2))); -typedef int8_t cl_char4[4] __attribute__((aligned(4))); -typedef int8_t cl_char8[8] __attribute__((aligned(8))); -typedef int8_t cl_char16[16] __attribute__((aligned(16))); -typedef uint8_t cl_uchar2[2] __attribute__((aligned(2))); -typedef uint8_t cl_uchar4[4] __attribute__((aligned(4))); -typedef uint8_t cl_uchar8[8] __attribute__((aligned(8))); -typedef uint8_t cl_uchar16[16] __attribute__((aligned(16))); - -typedef int16_t cl_short2[2] __attribute__((aligned(4))); -typedef int16_t cl_short4[4] __attribute__((aligned(8))); -typedef int16_t cl_short8[8] __attribute__((aligned(16))); -typedef int16_t cl_short16[16] __attribute__((aligned(32))); -typedef uint16_t cl_ushort2[2] __attribute__((aligned(4))); -typedef uint16_t cl_ushort4[4] __attribute__((aligned(8))); -typedef uint16_t cl_ushort8[8] __attribute__((aligned(16))); -typedef uint16_t cl_ushort16[16] __attribute__((aligned(32))); - -typedef int32_t cl_int2[2] __attribute__((aligned(8))); -typedef int32_t cl_int4[4] __attribute__((aligned(16))); -typedef int32_t cl_int8[8] __attribute__((aligned(32))); -typedef int32_t cl_int16[16] __attribute__((aligned(64))); -typedef uint32_t cl_uint2[2] __attribute__((aligned(8))); -typedef uint32_t cl_uint4[4] __attribute__((aligned(16))); -typedef uint32_t cl_uint8[8] __attribute__((aligned(32))); -typedef uint32_t cl_uint16[16] __attribute__((aligned(64))); - -typedef int64_t cl_long2[2] __attribute__((aligned(16))); -typedef int64_t cl_long4[4] __attribute__((aligned(32))); -typedef int64_t cl_long8[8] __attribute__((aligned(64))); -typedef int64_t cl_long16[16] __attribute__((aligned(128))); -typedef uint64_t cl_ulong2[2] __attribute__((aligned(16))); -typedef uint64_t cl_ulong4[4] __attribute__((aligned(32))); -typedef uint64_t cl_ulong8[8] __attribute__((aligned(64))); -typedef uint64_t cl_ulong16[16] __attribute__((aligned(128))); - -typedef float cl_float2[2] __attribute__((aligned(8))); -typedef float cl_float4[4] __attribute__((aligned(16))); -typedef float cl_float8[8] __attribute__((aligned(32))); -typedef float cl_float16[16] __attribute__((aligned(64))); - -typedef double cl_double2[2] __attribute__((aligned(16))); -typedef double cl_double4[4] __attribute__((aligned(32))); -typedef double cl_double8[8] __attribute__((aligned(64))); -typedef double cl_double16[16] __attribute__((aligned(128))); - -/* There are no vector types for half */ - -#endif - -/******************************************************************************/ - -// Macro names and corresponding values defined by OpenCL - -#define CL_CHAR_BIT 8 -#define CL_SCHAR_MAX 127 -#define CL_SCHAR_MIN (-127-1) -#define CL_CHAR_MAX CL_SCHAR_MAX -#define CL_CHAR_MIN CL_SCHAR_MIN -#define CL_UCHAR_MAX 255 -#define CL_SHRT_MAX 32767 -#define CL_SHRT_MIN (-32767-1) -#define CL_USHRT_MAX 65535 -#define CL_INT_MAX 2147483647 -#define CL_INT_MIN (-2147483647-1) -#define CL_UINT_MAX 0xffffffffU -#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) -#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) -#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) - -#define CL_FLT_DIG 6 -#define CL_FLT_MANT_DIG 24 -#define CL_FLT_MAX_10_EXP +38 -#define CL_FLT_MAX_EXP +128 -#define CL_FLT_MIN_10_EXP -37 -#define CL_FLT_MIN_EXP -125 -#define CL_FLT_RADIX 2 -#if defined(_MSC_VER) -// MSVC doesn't understand hex floats -#define CL_FLT_MAX 3.402823466e+38F -#define CL_FLT_MIN 1.175494351e-38F -#define CL_FLT_EPSILON 1.192092896e-07F -#else -#define CL_FLT_MAX 0x1.fffffep127f -#define CL_FLT_MIN 0x1.0p-126f -#define CL_FLT_EPSILON 0x1.0p-23f -#endif - -#define CL_DBL_DIG 15 -#define CL_DBL_MANT_DIG 53 -#define CL_DBL_MAX_10_EXP +308 -#define CL_DBL_MAX_EXP +1024 -#define CL_DBL_MIN_10_EXP -307 -#define CL_DBL_MIN_EXP -1021 -#define CL_DBL_RADIX 2 -#if defined(_MSC_VER) -// MSVC doesn't understand hex floats -#define CL_DBL_MAX 1.7976931348623158e+308 -#define CL_DBL_MIN 2.2250738585072014e-308 -#define CL_DBL_EPSILON 2.2204460492503131e-016 -#else -#define CL_DBL_MAX 0x1.fffffffffffffp1023 -#define CL_DBL_MIN 0x1.0p-1022 -#define CL_DBL_EPSILON 0x1.0p-52 -#endif - -#include <stddef.h> - - -// CL.h contents -/******************************************************************************/ - -typedef struct _cl_platform_id * cl_platform_id; -typedef struct _cl_device_id * cl_device_id; -typedef struct _cl_context * cl_context; -typedef struct _cl_command_queue * cl_command_queue; -typedef struct _cl_mem * cl_mem; -typedef struct _cl_program * cl_program; -typedef struct _cl_kernel * cl_kernel; -typedef struct _cl_event * cl_event; -typedef struct _cl_sampler * cl_sampler; - -typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ -typedef cl_ulong cl_bitfield; -typedef cl_bitfield cl_device_type; -typedef cl_uint cl_platform_info; -typedef cl_uint cl_device_info; -typedef cl_bitfield cl_device_address_info; -typedef cl_bitfield cl_device_fp_config; -typedef cl_uint cl_device_mem_cache_type; -typedef cl_uint cl_device_local_mem_type; -typedef cl_bitfield cl_device_exec_capabilities; -typedef cl_bitfield cl_command_queue_properties; - -typedef intptr_t cl_context_properties; -typedef cl_uint cl_context_info; -typedef cl_uint cl_command_queue_info; -typedef cl_uint cl_channel_order; -typedef cl_uint cl_channel_type; -typedef cl_bitfield cl_mem_flags; -typedef cl_uint cl_mem_object_type; -typedef cl_uint cl_mem_info; -typedef cl_uint cl_image_info; -typedef cl_uint cl_addressing_mode; -typedef cl_uint cl_filter_mode; -typedef cl_uint cl_sampler_info; -typedef cl_bitfield cl_map_flags; -typedef cl_uint cl_program_info; -typedef cl_uint cl_program_build_info; -typedef cl_int cl_build_status; -typedef cl_uint cl_kernel_info; -typedef cl_uint cl_kernel_work_group_info; -typedef cl_uint cl_event_info; -typedef cl_uint cl_command_type; -typedef cl_uint cl_profiling_info; - -typedef struct _cl_image_format { - cl_channel_order image_channel_order; - cl_channel_type image_channel_data_type; -} cl_image_format; - - - -/******************************************************************************/ - -// Error Codes -#define CL_SUCCESS 0 -#define CL_DEVICE_NOT_FOUND -1 -#define CL_DEVICE_NOT_AVAILABLE -2 -#define CL_COMPILER_NOT_AVAILABLE -3 -#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 -#define CL_OUT_OF_RESOURCES -5 -#define CL_OUT_OF_HOST_MEMORY -6 -#define CL_PROFILING_INFO_NOT_AVAILABLE -7 -#define CL_MEM_COPY_OVERLAP -8 -#define CL_IMAGE_FORMAT_MISMATCH -9 -#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 -#define CL_BUILD_PROGRAM_FAILURE -11 -#define CL_MAP_FAILURE -12 - -#define CL_INVALID_VALUE -30 -#define CL_INVALID_DEVICE_TYPE -31 -#define CL_INVALID_PLATFORM -32 -#define CL_INVALID_DEVICE -33 -#define CL_INVALID_CONTEXT -34 -#define CL_INVALID_QUEUE_PROPERTIES -35 -#define CL_INVALID_COMMAND_QUEUE -36 -#define CL_INVALID_HOST_PTR -37 -#define CL_INVALID_MEM_OBJECT -38 -#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 -#define CL_INVALID_IMAGE_SIZE -40 -#define CL_INVALID_SAMPLER -41 -#define CL_INVALID_BINARY -42 -#define CL_INVALID_BUILD_OPTIONS -43 -#define CL_INVALID_PROGRAM -44 -#define CL_INVALID_PROGRAM_EXECUTABLE -45 -#define CL_INVALID_KERNEL_NAME -46 -#define CL_INVALID_KERNEL_DEFINITION -47 -#define CL_INVALID_KERNEL -48 -#define CL_INVALID_ARG_INDEX -49 -#define CL_INVALID_ARG_VALUE -50 -#define CL_INVALID_ARG_SIZE -51 -#define CL_INVALID_KERNEL_ARGS -52 -#define CL_INVALID_WORK_DIMENSION -53 -#define CL_INVALID_WORK_GROUP_SIZE -54 -#define CL_INVALID_WORK_ITEM_SIZE -55 -#define CL_INVALID_GLOBAL_OFFSET -56 -#define CL_INVALID_EVENT_WAIT_LIST -57 -#define CL_INVALID_EVENT -58 -#define CL_INVALID_OPERATION -59 -#define CL_INVALID_GL_OBJECT -60 -#define CL_INVALID_BUFFER_SIZE -61 -#define CL_INVALID_MIP_LEVEL -62 -#define CL_INVALID_GLOBAL_WORK_SIZE -63 - -// OpenCL Version -#define CL_VERSION_1_0 1 - -// cl_bool -#define CL_FALSE 0 -#define CL_TRUE 1 - -// cl_platform_info -#define CL_PLATFORM_PROFILE 0x0900 -#define CL_PLATFORM_VERSION 0x0901 -#define CL_PLATFORM_NAME 0x0902 -#define CL_PLATFORM_VENDOR 0x0903 -#define CL_PLATFORM_EXTENSIONS 0x0904 - -// cl_device_type - bitfield -#define CL_DEVICE_TYPE_DEFAULT (1 << 0) -#define CL_DEVICE_TYPE_CPU (1 << 1) -#define CL_DEVICE_TYPE_GPU (1 << 2) -#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) -#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF - -// cl_device_info -#define CL_DEVICE_TYPE 0x1000 -#define CL_DEVICE_VENDOR_ID 0x1001 -#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 -#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 -#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 -#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B -#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C -#define CL_DEVICE_ADDRESS_BITS 0x100D -#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E -#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F -#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 -#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 -#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 -#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 -#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 -#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 -#define CL_DEVICE_IMAGE_SUPPORT 0x1016 -#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 -#define CL_DEVICE_MAX_SAMPLERS 0x1018 -#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 -#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A -#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B -#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C -#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D -#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E -#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F -#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 -#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 -#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 -#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 -#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 -#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 -#define CL_DEVICE_ENDIAN_LITTLE 0x1026 -#define CL_DEVICE_AVAILABLE 0x1027 -#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 -#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 -#define CL_DEVICE_QUEUE_PROPERTIES 0x102A -#define CL_DEVICE_NAME 0x102B -#define CL_DEVICE_VENDOR 0x102C -#define CL_DRIVER_VERSION 0x102D -#define CL_DEVICE_PROFILE 0x102E -#define CL_DEVICE_VERSION 0x102F -#define CL_DEVICE_EXTENSIONS 0x1030 -#define CL_DEVICE_PLATFORM 0x1031 - -// cl_device_fp_config - bitfield -#define CL_FP_DENORM (1 << 0) -#define CL_FP_INF_NAN (1 << 1) -#define CL_FP_ROUND_TO_NEAREST (1 << 2) -#define CL_FP_ROUND_TO_ZERO (1 << 3) -#define CL_FP_ROUND_TO_INF (1 << 4) -#define CL_FP_FMA (1 << 5) - -// cl_device_mem_cache_type -#define CL_NONE 0x0 -#define CL_READ_ONLY_CACHE 0x1 -#define CL_READ_WRITE_CACHE 0x2 - -// cl_device_local_mem_type -#define CL_LOCAL 0x1 -#define CL_GLOBAL 0x2 - -// cl_device_exec_capabilities - bitfield -#define CL_EXEC_KERNEL (1 << 0) -#define CL_EXEC_NATIVE_KERNEL (1 << 1) - -// cl_command_queue_properties - bitfield -#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) -#define CL_QUEUE_PROFILING_ENABLE (1 << 1) - -// cl_context_info -#define CL_CONTEXT_REFERENCE_COUNT 0x1080 -#define CL_CONTEXT_DEVICES 0x1081 -#define CL_CONTEXT_PROPERTIES 0x1082 - -// cl_context_properties -#define CL_CONTEXT_PLATFORM 0x1084 - -// cl_command_queue_info -#define CL_QUEUE_CONTEXT 0x1090 -#define CL_QUEUE_DEVICE 0x1091 -#define CL_QUEUE_REFERENCE_COUNT 0x1092 -#define CL_QUEUE_PROPERTIES 0x1093 - -// cl_mem_flags - bitfield -#define CL_MEM_READ_WRITE (1 << 0) -#define CL_MEM_WRITE_ONLY (1 << 1) -#define CL_MEM_READ_ONLY (1 << 2) -#define CL_MEM_USE_HOST_PTR (1 << 3) -#define CL_MEM_ALLOC_HOST_PTR (1 << 4) -#define CL_MEM_COPY_HOST_PTR (1 << 5) - -// cl_channel_order -#define CL_R 0x10B0 -#define CL_A 0x10B1 -#define CL_RG 0x10B2 -#define CL_RA 0x10B3 -#define CL_RGB 0x10B4 -#define CL_RGBA 0x10B5 -#define CL_BGRA 0x10B6 -#define CL_ARGB 0x10B7 -#define CL_INTENSITY 0x10B8 -#define CL_LUMINANCE 0x10B9 - -// cl_channel_type -#define CL_SNORM_INT8 0x10D0 -#define CL_SNORM_INT16 0x10D1 -#define CL_UNORM_INT8 0x10D2 -#define CL_UNORM_INT16 0x10D3 -#define CL_UNORM_SHORT_565 0x10D4 -#define CL_UNORM_SHORT_555 0x10D5 -#define CL_UNORM_INT_101010 0x10D6 -#define CL_SIGNED_INT8 0x10D7 -#define CL_SIGNED_INT16 0x10D8 -#define CL_SIGNED_INT32 0x10D9 -#define CL_UNSIGNED_INT8 0x10DA -#define CL_UNSIGNED_INT16 0x10DB -#define CL_UNSIGNED_INT32 0x10DC -#define CL_HALF_FLOAT 0x10DD -#define CL_FLOAT 0x10DE - -// cl_mem_object_type -#define CL_MEM_OBJECT_BUFFER 0x10F0 -#define CL_MEM_OBJECT_IMAGE2D 0x10F1 -#define CL_MEM_OBJECT_IMAGE3D 0x10F2 - -// cl_mem_info -#define CL_MEM_TYPE 0x1100 -#define CL_MEM_FLAGS 0x1101 -#define CL_MEM_SIZE 0x1102 -#define CL_MEM_HOST_PTR 0x1103 -#define CL_MEM_MAP_COUNT 0x1104 -#define CL_MEM_REFERENCE_COUNT 0x1105 -#define CL_MEM_CONTEXT 0x1106 - -// cl_image_info -#define CL_IMAGE_FORMAT 0x1110 -#define CL_IMAGE_ELEMENT_SIZE 0x1111 -#define CL_IMAGE_ROW_PITCH 0x1112 -#define CL_IMAGE_SLICE_PITCH 0x1113 -#define CL_IMAGE_WIDTH 0x1114 -#define CL_IMAGE_HEIGHT 0x1115 -#define CL_IMAGE_DEPTH 0x1116 - -// cl_addressing_mode -#define CL_ADDRESS_NONE 0x1130 -#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 -#define CL_ADDRESS_CLAMP 0x1132 -#define CL_ADDRESS_REPEAT 0x1133 - -// cl_filter_mode -#define CL_FILTER_NEAREST 0x1140 -#define CL_FILTER_LINEAR 0x1141 - -// cl_sampler_info -#define CL_SAMPLER_REFERENCE_COUNT 0x1150 -#define CL_SAMPLER_CONTEXT 0x1151 -#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 -#define CL_SAMPLER_ADDRESSING_MODE 0x1153 -#define CL_SAMPLER_FILTER_MODE 0x1154 - -// cl_map_flags - bitfield -#define CL_MAP_READ (1 << 0) -#define CL_MAP_WRITE (1 << 1) - -// cl_program_info -#define CL_PROGRAM_REFERENCE_COUNT 0x1160 -#define CL_PROGRAM_CONTEXT 0x1161 -#define CL_PROGRAM_NUM_DEVICES 0x1162 -#define CL_PROGRAM_DEVICES 0x1163 -#define CL_PROGRAM_SOURCE 0x1164 -#define CL_PROGRAM_BINARY_SIZES 0x1165 -#define CL_PROGRAM_BINARIES 0x1166 - -// cl_program_build_info -#define CL_PROGRAM_BUILD_STATUS 0x1181 -#define CL_PROGRAM_BUILD_OPTIONS 0x1182 -#define CL_PROGRAM_BUILD_LOG 0x1183 - -// cl_build_status -#define CL_BUILD_SUCCESS 0 -#define CL_BUILD_NONE -1 -#define CL_BUILD_ERROR -2 -#define CL_BUILD_IN_PROGRESS -3 - -// cl_kernel_info -#define CL_KERNEL_FUNCTION_NAME 0x1190 -#define CL_KERNEL_NUM_ARGS 0x1191 -#define CL_KERNEL_REFERENCE_COUNT 0x1192 -#define CL_KERNEL_CONTEXT 0x1193 -#define CL_KERNEL_PROGRAM 0x1194 - -// cl_kernel_work_group_info -#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 -#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 -#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 - -// cl_event_info -#define CL_EVENT_COMMAND_QUEUE 0x11D0 -#define CL_EVENT_COMMAND_TYPE 0x11D1 -#define CL_EVENT_REFERENCE_COUNT 0x11D2 -#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 - -// cl_command_type -#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 -#define CL_COMMAND_TASK 0x11F1 -#define CL_COMMAND_NATIVE_KERNEL 0x11F2 -#define CL_COMMAND_READ_BUFFER 0x11F3 -#define CL_COMMAND_WRITE_BUFFER 0x11F4 -#define CL_COMMAND_COPY_BUFFER 0x11F5 -#define CL_COMMAND_READ_IMAGE 0x11F6 -#define CL_COMMAND_WRITE_IMAGE 0x11F7 -#define CL_COMMAND_COPY_IMAGE 0x11F8 -#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 -#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA -#define CL_COMMAND_MAP_BUFFER 0x11FB -#define CL_COMMAND_MAP_IMAGE 0x11FC -#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD -#define CL_COMMAND_MARKER 0x11FE -#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF -#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 - -// command execution status -#define CL_COMPLETE 0x0 -#define CL_RUNNING 0x1 -#define CL_SUBMITTED 0x2 -#define CL_QUEUED 0x3 - -// cl_profiling_info -#define CL_PROFILING_COMMAND_QUEUED 0x1280 -#define CL_PROFILING_COMMAND_SUBMIT 0x1281 -#define CL_PROFILING_COMMAND_START 0x1282 -#define CL_PROFILING_COMMAND_END 0x1283 - -/********************************************************************************************************/ - -/********************************************************************************************************/ - -// Function signature typedef's - -// Platform API -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETPLATFORMIDS)(cl_uint /* num_entries */, - cl_platform_id * /* platforms */, - cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETPLATFORMINFO)(cl_platform_id /* platform */, - cl_platform_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Device APIs -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETDEVICEIDS)(cl_platform_id /* platform */, - cl_device_type /* device_type */, - cl_uint /* num_entries */, - cl_device_id * /* devices */, - cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETDEVICEINFO)(cl_device_id /* device */, - cl_device_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Context APIs -typedef CL_API_ENTRY cl_context (CL_API_CALL * -PFNCLCREATECONTEXT)(const cl_context_properties * /* properties */, - cl_uint /* num_devices */, - const cl_device_id * /* devices */, - void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, - void * /* user_data */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_context (CL_API_CALL * -PFNCLCREATECONTEXTFROMTYPE)(const cl_context_properties * /* properties */, - cl_device_type /* device_type */, - void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, - void * /* user_data */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRETAINCONTEXT)(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRELEASECONTEXT)(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETCONTEXTINFO)(cl_context /* context */, - cl_context_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Command Queue APIs -typedef CL_API_ENTRY cl_command_queue (CL_API_CALL * -PFNCLCREATECOMMANDQUEUE)(cl_context /* context */, - cl_device_id /* device */, - cl_command_queue_properties /* properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRETAINCOMMANDQUEUE)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRELEASECOMMANDQUEUE)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETCOMMANDQUEUEINFO)(cl_command_queue /* command_queue */, - cl_command_queue_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLSETCOMMANDQUEUEPROPERTY)(cl_command_queue /* command_queue */, - cl_command_queue_properties /* properties */, - cl_bool /* enable */, - cl_command_queue_properties * /* old_properties */) CL_API_SUFFIX__VERSION_1_0; - -// Memory Object APIs -typedef CL_API_ENTRY cl_mem (CL_API_CALL * -PFNCLCREATEBUFFER)(cl_context /* context */, - cl_mem_flags /* flags */, - size_t /* size */, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL * -PFNCLCREATEIMAGE2D)(cl_context /* context */, - cl_mem_flags /* flags */, - const cl_image_format * /* image_format */, - size_t /* image_width */, - size_t /* image_height */, - size_t /* image_row_pitch */, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL * -PFNCLCREATEIMAGE3D)(cl_context /* context */, - cl_mem_flags /* flags */, - const cl_image_format * /* image_format */, - size_t /* image_width */, - size_t /* image_height */, - size_t /* image_depth */, - size_t /* image_row_pitch */, - size_t /* image_slice_pitch */, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRETAINMEMOBJECT)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRELEASEMEMOBJECT)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETSUPPORTEDIMAGEFORMATS)(cl_context /* context */, - cl_mem_flags /* flags */, - cl_mem_object_type /* image_type */, - cl_uint /* num_entries */, - cl_image_format * /* image_formats */, - cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETMEMOBJECTINFO)(cl_mem /* memobj */, - cl_mem_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETIMAGEINFO)(cl_mem /* image */, - cl_image_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Sampler APIs -typedef CL_API_ENTRY cl_sampler (CL_API_CALL * -PFNCLCREATESAMPLER)(cl_context /* context */, - cl_bool /* normalized_coords */, - cl_addressing_mode /* addressing_mode */, - cl_filter_mode /* filter_mode */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRETAINSAMPLER)(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRELEASESAMPLER)(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETSAMPLERINFO)(cl_sampler /* sampler */, - cl_sampler_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Program Object APIs -typedef CL_API_ENTRY cl_program (CL_API_CALL * -PFNCLCREATEPROGRAMWITHSOURCE)(cl_context /* context */, - cl_uint /* count */, - const char ** /* strings */, - const size_t * /* lengths */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_program (CL_API_CALL * -PFNCLCREATEPROGRAMWITHBINARY)(cl_context /* context */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const size_t * /* lengths */, - const unsigned char ** /* binaries */, - cl_int * /* binary_status */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRETAINPROGRAM)(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRELEASEPROGRAM)(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLBUILDPROGRAM)(cl_program /* program */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const char * /* options */, - void (*pfn_notify)(cl_program /* program */, void * /* user_data */), - void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLUNLOADCOMPILER)(void) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETPROGRAMINFO)(cl_program /* program */, - cl_program_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETPROGRAMBUILDINFO)(cl_program /* program */, - cl_device_id /* device */, - cl_program_build_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Kernel Object APIs -typedef CL_API_ENTRY cl_kernel (CL_API_CALL * -PFNCLCREATEKERNEL)(cl_program /* program */, - const char * /* kernel_name */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLCREATEKERNELSINPROGRAM)(cl_program /* program */, - cl_uint /* num_kernels */, - cl_kernel * /* kernels */, - cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRETAINKERNEL)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRELEASEKERNEL)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLSETKERNELARG)(cl_kernel /* kernel */, - cl_uint /* arg_index */, - size_t /* arg_size */, - const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETKERNELINFO)(cl_kernel /* kernel */, - cl_kernel_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETKERNELWORKGROUPINFO)(cl_kernel /* kernel */, - cl_device_id /* device */, - cl_kernel_work_group_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Event Object APIs -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLWAITFOREVENTS)(cl_uint /* num_events */, - const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETEVENTINFO)(cl_event /* event */, - cl_event_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRETAINEVENT)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLRELEASEEVENT)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; - -// Profiling APIs -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLGETEVENTPROFILINGINFO)(cl_event /* event */, - cl_profiling_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -// Flush and Finish APIs -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLFLUSH)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLFINISH)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - -// Enqueued Commands APIs -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEREADBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_read */, - size_t /* offset */, - size_t /* cb */, - void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEWRITEBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_write */, - size_t /* offset */, - size_t /* cb */, - const void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUECOPYBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* src_buffer */, - cl_mem /* dst_buffer */, - size_t /* src_offset */, - size_t /* dst_offset */, - size_t /* cb */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEREADIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_read */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t /* row_pitch */, - size_t /* slice_pitch */, - void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEWRITEIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_write */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t /* input_row_pitch */, - size_t /* input_slice_pitch */, - const void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUECOPYIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* src_image */, - cl_mem /* dst_image */, - const size_t * /* src_origin[3] */, - const size_t * /* dst_origin[3] */, - const size_t * /* region[3] */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUECOPYIMAGETOBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* src_image */, - cl_mem /* dst_buffer */, - const size_t * /* src_origin[3] */, - const size_t * /* region[3] */, - size_t /* dst_offset */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUECOPYBUFFERTOIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* src_buffer */, - cl_mem /* dst_image */, - size_t /* src_offset */, - const size_t * /* dst_origin[3] */, - const size_t * /* region[3] */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY void * (CL_API_CALL * -PFNCLENQUEUEMAPBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_map */, - cl_map_flags /* map_flags */, - size_t /* offset */, - size_t /* cb */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY void * (CL_API_CALL * -PFNCLENQUEUEMAPIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_map */, - cl_map_flags /* map_flags */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t * /* image_row_pitch */, - size_t * /* image_slice_pitch */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEUNMAPMEMOBJECT)(cl_command_queue /* command_queue */, - cl_mem /* memobj */, - void * /* mapped_ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUENDRANGEKERNEL)(cl_command_queue /* command_queue */, - cl_kernel /* kernel */, - cl_uint /* work_dim */, - const size_t * /* global_work_offset */, - const size_t * /* global_work_size */, - const size_t * /* local_work_size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUETASK)(cl_command_queue /* command_queue */, - cl_kernel /* kernel */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUENATIVEKERNEL)(cl_command_queue /* command_queue */, - void (*user_func)(void *), - void * /* args */, - size_t /* cb_args */, - cl_uint /* num_mem_objects */, - const cl_mem * /* mem_list */, - const void ** /* args_mem_loc */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEMARKER)(cl_command_queue /* command_queue */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEWAITFOREVENTS)(cl_command_queue /* command_queue */, - cl_uint /* num_events */, - const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -PFNCLENQUEUEBARRIER)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - -// Extension function access -// -// Returns the extension function address for the given function name, -// or NULL if a valid function can not be found. The client must -// check to make sure the address is not NULL, before using or -// calling the returned function address. -// -typedef CL_API_ENTRY void * (CL_API_CALL * PFNCLGETEXTENSIONFUNCTIONADDRESS)(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0; - - -#define CLEW_STATIC - -#ifdef CLEW_STATIC -# define CLEWAPI extern -#else -# ifdef CLEW_BUILD -# define CLEWAPI extern __declspec(dllexport) -# else -# define CLEWAPI extern __declspec(dllimport) -# endif -#endif - -#if defined(_WIN32) -#define CLEW_FUN_EXPORT extern -#else -#define CLEW_FUN_EXPORT CLEWAPI -#endif - -#define CLEW_GET_FUN(x) x - - -// Variables holding function entry points -CLEW_FUN_EXPORT PFNCLGETPLATFORMIDS __oclGetPlatformIDs ; -CLEW_FUN_EXPORT PFNCLGETPLATFORMINFO __oclGetPlatformInfo ; -CLEW_FUN_EXPORT PFNCLGETDEVICEIDS __oclGetDeviceIDs ; -CLEW_FUN_EXPORT PFNCLGETDEVICEINFO __oclGetDeviceInfo ; -CLEW_FUN_EXPORT PFNCLCREATECONTEXT __oclCreateContext ; -CLEW_FUN_EXPORT PFNCLCREATECONTEXTFROMTYPE __oclCreateContextFromType ; -CLEW_FUN_EXPORT PFNCLRETAINCONTEXT __oclRetainContext ; -CLEW_FUN_EXPORT PFNCLRELEASECONTEXT __oclReleaseContext ; -CLEW_FUN_EXPORT PFNCLGETCONTEXTINFO __oclGetContextInfo ; -CLEW_FUN_EXPORT PFNCLCREATECOMMANDQUEUE __oclCreateCommandQueue ; -CLEW_FUN_EXPORT PFNCLRETAINCOMMANDQUEUE __oclRetainCommandQueue ; -CLEW_FUN_EXPORT PFNCLRELEASECOMMANDQUEUE __oclReleaseCommandQueue ; -CLEW_FUN_EXPORT PFNCLGETCOMMANDQUEUEINFO __oclGetCommandQueueInfo ; -CLEW_FUN_EXPORT PFNCLSETCOMMANDQUEUEPROPERTY __oclSetCommandQueueProperty ; -CLEW_FUN_EXPORT PFNCLCREATEBUFFER __oclCreateBuffer ; -CLEW_FUN_EXPORT PFNCLCREATEIMAGE2D __oclCreateImage2D ; -CLEW_FUN_EXPORT PFNCLCREATEIMAGE3D __oclCreateImage3D ; -CLEW_FUN_EXPORT PFNCLRETAINMEMOBJECT __oclRetainMemObject ; -CLEW_FUN_EXPORT PFNCLRELEASEMEMOBJECT __oclReleaseMemObject ; -CLEW_FUN_EXPORT PFNCLGETSUPPORTEDIMAGEFORMATS __oclGetSupportedImageFormats ; -CLEW_FUN_EXPORT PFNCLGETMEMOBJECTINFO __oclGetMemObjectInfo ; -CLEW_FUN_EXPORT PFNCLGETIMAGEINFO __oclGetImageInfo ; -CLEW_FUN_EXPORT PFNCLCREATESAMPLER __oclCreateSampler ; -CLEW_FUN_EXPORT PFNCLRETAINSAMPLER __oclRetainSampler ; -CLEW_FUN_EXPORT PFNCLRELEASESAMPLER __oclReleaseSampler ; -CLEW_FUN_EXPORT PFNCLGETSAMPLERINFO __oclGetSamplerInfo ; -CLEW_FUN_EXPORT PFNCLCREATEPROGRAMWITHSOURCE __oclCreateProgramWithSource ; -CLEW_FUN_EXPORT PFNCLCREATEPROGRAMWITHBINARY __oclCreateProgramWithBinary ; -CLEW_FUN_EXPORT PFNCLRETAINPROGRAM __oclRetainProgram ; -CLEW_FUN_EXPORT PFNCLRELEASEPROGRAM __oclReleaseProgram ; -CLEW_FUN_EXPORT PFNCLBUILDPROGRAM __oclBuildProgram ; -CLEW_FUN_EXPORT PFNCLUNLOADCOMPILER __oclUnloadCompiler ; -CLEW_FUN_EXPORT PFNCLGETPROGRAMINFO __oclGetProgramInfo ; -CLEW_FUN_EXPORT PFNCLGETPROGRAMBUILDINFO __oclGetProgramBuildInfo ; -CLEW_FUN_EXPORT PFNCLCREATEKERNEL __oclCreateKernel ; -CLEW_FUN_EXPORT PFNCLCREATEKERNELSINPROGRAM __oclCreateKernelsInProgram ; -CLEW_FUN_EXPORT PFNCLRETAINKERNEL __oclRetainKernel ; -CLEW_FUN_EXPORT PFNCLRELEASEKERNEL __oclReleaseKernel ; -CLEW_FUN_EXPORT PFNCLSETKERNELARG __oclSetKernelArg ; -CLEW_FUN_EXPORT PFNCLGETKERNELINFO __oclGetKernelInfo ; -CLEW_FUN_EXPORT PFNCLGETKERNELWORKGROUPINFO __oclGetKernelWorkGroupInfo ; -CLEW_FUN_EXPORT PFNCLWAITFOREVENTS __oclWaitForEvents ; -CLEW_FUN_EXPORT PFNCLGETEVENTINFO __oclGetEventInfo ; -CLEW_FUN_EXPORT PFNCLRETAINEVENT __oclRetainEvent ; -CLEW_FUN_EXPORT PFNCLRELEASEEVENT __oclReleaseEvent ; -CLEW_FUN_EXPORT PFNCLGETEVENTPROFILINGINFO __oclGetEventProfilingInfo ; -CLEW_FUN_EXPORT PFNCLFLUSH __oclFlush ; -CLEW_FUN_EXPORT PFNCLFINISH __oclFinish ; -CLEW_FUN_EXPORT PFNCLENQUEUEREADBUFFER __oclEnqueueReadBuffer ; -CLEW_FUN_EXPORT PFNCLENQUEUEWRITEBUFFER __oclEnqueueWriteBuffer ; -CLEW_FUN_EXPORT PFNCLENQUEUECOPYBUFFER __oclEnqueueCopyBuffer ; -CLEW_FUN_EXPORT PFNCLENQUEUEREADIMAGE __oclEnqueueReadImage ; -CLEW_FUN_EXPORT PFNCLENQUEUEWRITEIMAGE __oclEnqueueWriteImage ; -CLEW_FUN_EXPORT PFNCLENQUEUECOPYIMAGE __oclEnqueueCopyImage ; -CLEW_FUN_EXPORT PFNCLENQUEUECOPYIMAGETOBUFFER __oclEnqueueCopyImageToBuffer ; -CLEW_FUN_EXPORT PFNCLENQUEUECOPYBUFFERTOIMAGE __oclEnqueueCopyBufferToImage ; -CLEW_FUN_EXPORT PFNCLENQUEUEMAPBUFFER __oclEnqueueMapBuffer ; -CLEW_FUN_EXPORT PFNCLENQUEUEMAPIMAGE __oclEnqueueMapImage ; -CLEW_FUN_EXPORT PFNCLENQUEUEUNMAPMEMOBJECT __oclEnqueueUnmapMemObject ; -CLEW_FUN_EXPORT PFNCLENQUEUENDRANGEKERNEL __oclEnqueueNDRangeKernel ; -CLEW_FUN_EXPORT PFNCLENQUEUETASK __oclEnqueueTask ; -CLEW_FUN_EXPORT PFNCLENQUEUENATIVEKERNEL __oclEnqueueNativeKernel ; -CLEW_FUN_EXPORT PFNCLENQUEUEMARKER __oclEnqueueMarker ; -CLEW_FUN_EXPORT PFNCLENQUEUEWAITFOREVENTS __oclEnqueueWaitForEvents ; -CLEW_FUN_EXPORT PFNCLENQUEUEBARRIER __oclEnqueueBarrier ; -CLEW_FUN_EXPORT PFNCLGETEXTENSIONFUNCTIONADDRESS __oclGetExtensionFunctionAddress ; - - -#define clGetPlatformIDs CLEW_GET_FUN(__oclGetPlatformIDs ) -#define clGetPlatformInfo CLEW_GET_FUN(__oclGetPlatformInfo ) -#define clGetDeviceIDs CLEW_GET_FUN(__oclGetDeviceIDs ) -#define clGetDeviceInfo CLEW_GET_FUN(__oclGetDeviceInfo ) -#define clCreateContext CLEW_GET_FUN(__oclCreateContext ) -#define clCreateContextFromType CLEW_GET_FUN(__oclCreateContextFromType ) -#define clRetainContext CLEW_GET_FUN(__oclRetainContext ) -#define clReleaseContext CLEW_GET_FUN(__oclReleaseContext ) -#define clGetContextInfo CLEW_GET_FUN(__oclGetContextInfo ) -#define clCreateCommandQueue CLEW_GET_FUN(__oclCreateCommandQueue ) -#define clRetainCommandQueue CLEW_GET_FUN(__oclRetainCommandQueue ) -#define clReleaseCommandQueue CLEW_GET_FUN(__oclReleaseCommandQueue ) -#define clGetCommandQueueInfo CLEW_GET_FUN(__oclGetCommandQueueInfo ) -#define clSetCommandQueueProperty CLEW_GET_FUN(__oclSetCommandQueueProperty ) -#define clCreateBuffer CLEW_GET_FUN(__oclCreateBuffer ) -#define clCreateImage2D CLEW_GET_FUN(__oclCreateImage2D ) -#define clCreateImage3D CLEW_GET_FUN(__oclCreateImage3D ) -#define clRetainMemObject CLEW_GET_FUN(__oclRetainMemObject ) -#define clReleaseMemObject CLEW_GET_FUN(__oclReleaseMemObject ) -#define clGetSupportedImageFormats CLEW_GET_FUN(__oclGetSupportedImageFormats ) -#define clGetMemObjectInfo CLEW_GET_FUN(__oclGetMemObjectInfo ) -#define clGetImageInfo CLEW_GET_FUN(__oclGetImageInfo ) -#define clCreateSampler CLEW_GET_FUN(__oclCreateSampler ) -#define clRetainSampler CLEW_GET_FUN(__oclRetainSampler ) -#define clReleaseSampler CLEW_GET_FUN(__oclReleaseSampler ) -#define clGetSamplerInfo CLEW_GET_FUN(__oclGetSamplerInfo ) -#define clCreateProgramWithSource CLEW_GET_FUN(__oclCreateProgramWithSource ) -#define clCreateProgramWithBinary CLEW_GET_FUN(__oclCreateProgramWithBinary ) -#define clRetainProgram CLEW_GET_FUN(__oclRetainProgram ) -#define clReleaseProgram CLEW_GET_FUN(__oclReleaseProgram ) -#define clBuildProgram CLEW_GET_FUN(__oclBuildProgram ) -#define clUnloadCompiler CLEW_GET_FUN(__oclUnloadCompiler ) -#define clGetProgramInfo CLEW_GET_FUN(__oclGetProgramInfo ) -#define clGetProgramBuildInfo CLEW_GET_FUN(__oclGetProgramBuildInfo ) -#define clCreateKernel CLEW_GET_FUN(__oclCreateKernel ) -#define clCreateKernelsInProgram CLEW_GET_FUN(__oclCreateKernelsInProgram ) -#define clRetainKernel CLEW_GET_FUN(__oclRetainKernel ) -#define clReleaseKernel CLEW_GET_FUN(__oclReleaseKernel ) -#define clSetKernelArg CLEW_GET_FUN(__oclSetKernelArg ) -#define clGetKernelInfo CLEW_GET_FUN(__oclGetKernelInfo ) -#define clGetKernelWorkGroupInfo CLEW_GET_FUN(__oclGetKernelWorkGroupInfo ) -#define clWaitForEvents CLEW_GET_FUN(__oclWaitForEvents ) -#define clGetEventInfo CLEW_GET_FUN(__oclGetEventInfo ) -#define clRetainEvent CLEW_GET_FUN(__oclRetainEvent ) -#define clReleaseEvent CLEW_GET_FUN(__oclReleaseEvent ) -#define clGetEventProfilingInfo CLEW_GET_FUN(__oclGetEventProfilingInfo ) -#define clFlush CLEW_GET_FUN(__oclFlush ) -#define clFinish CLEW_GET_FUN(__oclFinish ) -#define clEnqueueReadBuffer CLEW_GET_FUN(__oclEnqueueReadBuffer ) -#define clEnqueueWriteBuffer CLEW_GET_FUN(__oclEnqueueWriteBuffer ) -#define clEnqueueCopyBuffer CLEW_GET_FUN(__oclEnqueueCopyBuffer ) -#define clEnqueueReadImage CLEW_GET_FUN(__oclEnqueueReadImage ) -#define clEnqueueWriteImage CLEW_GET_FUN(__oclEnqueueWriteImage ) -#define clEnqueueCopyImage CLEW_GET_FUN(__oclEnqueueCopyImage ) -#define clEnqueueCopyImageToBuffer CLEW_GET_FUN(__oclEnqueueCopyImageToBuffer ) -#define clEnqueueCopyBufferToImage CLEW_GET_FUN(__oclEnqueueCopyBufferToImage ) -#define clEnqueueMapBuffer CLEW_GET_FUN(__oclEnqueueMapBuffer ) -#define clEnqueueMapImage CLEW_GET_FUN(__oclEnqueueMapImage ) -#define clEnqueueUnmapMemObject CLEW_GET_FUN(__oclEnqueueUnmapMemObject ) -#define clEnqueueNDRangeKernel CLEW_GET_FUN(__oclEnqueueNDRangeKernel ) -#define clEnqueueTask CLEW_GET_FUN(__oclEnqueueTask ) -#define clEnqueueNativeKernel CLEW_GET_FUN(__oclEnqueueNativeKernel ) -#define clEnqueueMarker CLEW_GET_FUN(__oclEnqueueMarker ) -#define clEnqueueWaitForEvents CLEW_GET_FUN(__oclEnqueueWaitForEvents ) -#define clEnqueueBarrier CLEW_GET_FUN(__oclEnqueueBarrier ) -#define clGetExtensionFunctionAddress CLEW_GET_FUN(__oclGetExtensionFunctionAddress ) - -#endif // CLCC_GENERATE_DOCUMENTATION - -#define CLEW_SUCCESS 0 //!< Success error code -#define CLEW_ERROR_OPEN_FAILED -1 //!< Error code for failing to open the dynamic library -#define CLEW_ERROR_ATEXIT_FAILED -2 //!< Error code for failing to queue the closing of the dynamic library to atexit() - -//! \brief Load OpenCL dynamic library and set function entry points -int clewInit (const char*); -//! \brief Convert an OpenCL error code to its string equivalent -const char* clewErrorString (cl_int error); - -#ifdef __cplusplus -} -#endif - -#endif // CLCC_CLEW_HPP_INCLUDED diff --git a/intern/smoke/intern/tnt/tnt_math_utils.h b/intern/smoke/intern/tnt/tnt_math_utils.h index 9226bc4e753..9f6bd10e23f 100644 --- a/intern/smoke/intern/tnt/tnt_math_utils.h +++ b/intern/smoke/intern/tnt/tnt_math_utils.h @@ -7,10 +7,6 @@ /* needed for fabs, sqrt() below */ #include <cmath> -#if defined(_MSC_VER) && (_MSC_VER < 1800) -#define hypot _hypot -#endif - namespace TNT { /** |