/* * Copyright 2015-2021 Arm Limited * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * At your option, you may choose to accept this material under either: * 1. The Apache License, Version 2.0, found at , or * 2. The MIT License, found at . */ #include "spirv_cross.hpp" #include "GLSL.std.450.h" #include "spirv_cfg.hpp" #include "spirv_common.hpp" #include "spirv_parser.hpp" #include #include #include using namespace std; using namespace spv; using namespace SPIRV_CROSS_NAMESPACE; Compiler::Compiler(vector ir_) { Parser parser(move(ir_)); parser.parse(); set_ir(move(parser.get_parsed_ir())); } Compiler::Compiler(const uint32_t *ir_, size_t word_count) { Parser parser(ir_, word_count); parser.parse(); set_ir(move(parser.get_parsed_ir())); } Compiler::Compiler(const ParsedIR &ir_) { set_ir(ir_); } Compiler::Compiler(ParsedIR &&ir_) { set_ir(move(ir_)); } void Compiler::set_ir(ParsedIR &&ir_) { ir = move(ir_); parse_fixup(); } void Compiler::set_ir(const ParsedIR &ir_) { ir = ir_; parse_fixup(); } string Compiler::compile() { return ""; } bool Compiler::variable_storage_is_aliased(const SPIRVariable &v) { auto &type = get(v.basetype); bool ssbo = v.storage == StorageClassStorageBuffer || ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); bool image = type.basetype == SPIRType::Image; bool counter = type.basetype == SPIRType::AtomicCounter; bool buffer_reference = type.storage == StorageClassPhysicalStorageBufferEXT; bool is_restrict; if (ssbo) is_restrict = ir.get_buffer_block_flags(v).get(DecorationRestrict); else is_restrict = has_decoration(v.self, DecorationRestrict); return !is_restrict && (ssbo || image || counter || buffer_reference); } bool Compiler::block_is_pure(const SPIRBlock &block) { // This is a global side effect of the function. if (block.terminator == SPIRBlock::Kill || block.terminator == SPIRBlock::TerminateRay || block.terminator == SPIRBlock::IgnoreIntersection) return false; for (auto &i : block.ops) { auto ops = stream(i); auto op = static_cast(i.op); switch (op) { case OpFunctionCall: { uint32_t func = ops[2]; if (!function_is_pure(get(func))) return false; break; } case OpCopyMemory: case OpStore: { auto &type = expression_type(ops[0]); if (type.storage != StorageClassFunction) return false; break; } case OpImageWrite: return false; // Atomics are impure. case OpAtomicLoad: case OpAtomicStore: case OpAtomicExchange: case OpAtomicCompareExchange: case OpAtomicCompareExchangeWeak: case OpAtomicIIncrement: case OpAtomicIDecrement: case OpAtomicIAdd: case OpAtomicISub: case OpAtomicSMin: case OpAtomicUMin: case OpAtomicSMax: case OpAtomicUMax: case OpAtomicAnd: case OpAtomicOr: case OpAtomicXor: return false; // Geometry shader builtins modify global state. case OpEndPrimitive: case OpEmitStreamVertex: case OpEndStreamPrimitive: case OpEmitVertex: return false; // Barriers disallow any reordering, so we should treat blocks with barrier as writing. case OpControlBarrier: case OpMemoryBarrier: return false; // Ray tracing builtins are impure. case OpReportIntersectionKHR: case OpIgnoreIntersectionNV: case OpTerminateRayNV: case OpTraceNV: case OpTraceRayKHR: case OpExecuteCallableNV: case OpExecuteCallableKHR: case OpRayQueryInitializeKHR: case OpRayQueryTerminateKHR: case OpRayQueryGenerateIntersectionKHR: case OpRayQueryConfirmIntersectionKHR: case OpRayQueryProceedKHR: // There are various getters in ray query, but they are considered pure. return false; // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure. case OpDemoteToHelperInvocationEXT: // This is a global side effect of the function. return false; case OpExtInst: { uint32_t extension_set = ops[2]; if (get(extension_set).ext == SPIRExtension::GLSL) { auto op_450 = static_cast(ops[3]); switch (op_450) { case GLSLstd450Modf: case GLSLstd450Frexp: { auto &type = expression_type(ops[5]); if (type.storage != StorageClassFunction) return false; break; } default: break; } } break; } default: break; } } return true; } string Compiler::to_name(uint32_t id, bool allow_alias) const { if (allow_alias && ir.ids[id].get_type() == TypeType) { // If this type is a simple alias, emit the // name of the original type instead. // We don't want to override the meta alias // as that can be overridden by the reflection APIs after parse. auto &type = get(id); if (type.type_alias) { // If the alias master has been specially packed, we will have emitted a clean variant as well, // so skip the name aliasing here. if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) return to_name(type.type_alias); } } auto &alias = ir.get_name(id); if (alias.empty()) return join("_", id); else return alias; } bool Compiler::function_is_pure(const SPIRFunction &func) { for (auto block : func.blocks) { if (!block_is_pure(get(block))) { //fprintf(stderr, "Function %s is impure!\n", to_name(func.self).c_str()); return false; } } //fprintf(stderr, "Function %s is pure!\n", to_name(func.self).c_str()); return true; } void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_t id) { for (auto &i : block.ops) { auto ops = stream(i); auto op = static_cast(i.op); switch (op) { case OpFunctionCall: { uint32_t func = ops[2]; register_global_read_dependencies(get(func), id); break; } case OpLoad: case OpImageRead: { // If we're in a storage class which does not get invalidated, adding dependencies here is no big deal. auto *var = maybe_get_backing_variable(ops[2]); if (var && var->storage != StorageClassFunction) { auto &type = get(var->basetype); // InputTargets are immutable. if (type.basetype != SPIRType::Image && type.image.dim != DimSubpassData) var->dependees.push_back(id); } break; } default: break; } } } void Compiler::register_global_read_dependencies(const SPIRFunction &func, uint32_t id) { for (auto block : func.blocks) register_global_read_dependencies(get(block), id); } SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain) { auto *var = maybe_get(chain); if (!var) { auto *cexpr = maybe_get(chain); if (cexpr) var = maybe_get(cexpr->loaded_from); auto *access_chain = maybe_get(chain); if (access_chain) var = maybe_get(access_chain->loaded_from); } return var; } void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded) { auto &e = get(expr); auto *var = maybe_get_backing_variable(chain); if (var) { e.loaded_from = var->self; // If the backing variable is immutable, we do not need to depend on the variable. if (forwarded && !is_immutable(var->self)) var->dependees.push_back(e.self); // If we load from a parameter, make sure we create "inout" if we also write to the parameter. // The default is "in" however, so we never invalidate our compilation by reading. if (var && var->parameter) var->parameter->read_count++; } } void Compiler::register_write(uint32_t chain) { auto *var = maybe_get(chain); if (!var) { // If we're storing through an access chain, invalidate the backing variable instead. auto *expr = maybe_get(chain); if (expr && expr->loaded_from) var = maybe_get(expr->loaded_from); auto *access_chain = maybe_get(chain); if (access_chain && access_chain->loaded_from) var = maybe_get(access_chain->loaded_from); } auto &chain_type = expression_type(chain); if (var) { bool check_argument_storage_qualifier = true; auto &type = expression_type(chain); // If our variable is in a storage class which can alias with other buffers, // invalidate all variables which depend on aliased variables. And if this is a // variable pointer, then invalidate all variables regardless. if (get_variable_data_type(*var).pointer) { flush_all_active_variables(); if (type.pointer_depth == 1) { // We have a backing variable which is a pointer-to-pointer type. // We are storing some data through a pointer acquired through that variable, // but we are not writing to the value of the variable itself, // i.e., we are not modifying the pointer directly. // If we are storing a non-pointer type (pointer_depth == 1), // we know that we are storing some unrelated data. // A case here would be // void foo(Foo * const *arg) { // Foo *bar = *arg; // bar->unrelated = 42; // } // arg, the argument is constant. check_argument_storage_qualifier = false; } } if (type.storage == StorageClassPhysicalStorageBufferEXT || variable_storage_is_aliased(*var)) flush_all_aliased_variables(); else if (var) flush_dependees(*var); // We tried to write to a parameter which is not marked with out qualifier, force a recompile. if (check_argument_storage_qualifier && var->parameter && var->parameter->write_count == 0) { var->parameter->write_count++; force_recompile(); } } else if (chain_type.pointer) { // If we stored through a variable pointer, then we don't know which // variable we stored to. So *all* expressions after this point need to // be invalidated. // FIXME: If we can prove that the variable pointer will point to // only certain variables, we can invalidate only those. flush_all_active_variables(); } // If chain_type.pointer is false, we're not writing to memory backed variables, but temporaries instead. // This can happen in copy_logical_type where we unroll complex reads and writes to temporaries. } void Compiler::flush_dependees(SPIRVariable &var) { for (auto expr : var.dependees) invalid_expressions.insert(expr); var.dependees.clear(); } void Compiler::flush_all_aliased_variables() { for (auto aliased : aliased_variables) flush_dependees(get(aliased)); } void Compiler::flush_all_atomic_capable_variables() { for (auto global : global_variables) flush_dependees(get(global)); flush_all_aliased_variables(); } void Compiler::flush_control_dependent_expressions(uint32_t block_id) { auto &block = get(block_id); for (auto &expr : block.invalidate_expressions) invalid_expressions.insert(expr); block.invalidate_expressions.clear(); } void Compiler::flush_all_active_variables() { // Invalidate all temporaries we read from variables in this block since they were forwarded. // Invalidate all temporaries we read from globals. for (auto &v : current_function->local_variables) flush_dependees(get(v)); for (auto &arg : current_function->arguments) flush_dependees(get(arg.id)); for (auto global : global_variables) flush_dependees(get(global)); flush_all_aliased_variables(); } uint32_t Compiler::expression_type_id(uint32_t id) const { switch (ir.ids[id].get_type()) { case TypeVariable: return get(id).basetype; case TypeExpression: return get(id).expression_type; case TypeConstant: return get(id).constant_type; case TypeConstantOp: return get(id).basetype; case TypeUndef: return get(id).basetype; case TypeCombinedImageSampler: return get(id).combined_type; case TypeAccessChain: return get(id).basetype; default: SPIRV_CROSS_THROW("Cannot resolve expression type."); } } const SPIRType &Compiler::expression_type(uint32_t id) const { return get(expression_type_id(id)); } bool Compiler::expression_is_lvalue(uint32_t id) const { auto &type = expression_type(id); switch (type.basetype) { case SPIRType::SampledImage: case SPIRType::Image: case SPIRType::Sampler: return false; default: return true; } } bool Compiler::is_immutable(uint32_t id) const { if (ir.ids[id].get_type() == TypeVariable) { auto &var = get(id); // Anything we load from the UniformConstant address space is guaranteed to be immutable. bool pointer_to_const = var.storage == StorageClassUniformConstant; return pointer_to_const || var.phi_variable || !expression_is_lvalue(id); } else if (ir.ids[id].get_type() == TypeAccessChain) return get(id).immutable; else if (ir.ids[id].get_type() == TypeExpression) return get(id).immutable; else if (ir.ids[id].get_type() == TypeConstant || ir.ids[id].get_type() == TypeConstantOp || ir.ids[id].get_type() == TypeUndef) return true; else return false; } static inline bool storage_class_is_interface(spv::StorageClass storage) { switch (storage) { case StorageClassInput: case StorageClassOutput: case StorageClassUniform: case StorageClassUniformConstant: case StorageClassAtomicCounter: case StorageClassPushConstant: case StorageClassStorageBuffer: return true; default: return false; } } bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins) const { if ((is_builtin_variable(var) && !include_builtins) || var.remapped_variable) return true; // Combined image samplers are always considered active as they are "magic" variables. if (find_if(begin(combined_image_samplers), end(combined_image_samplers), [&var](const CombinedImageSampler &samp) { return samp.combined_id == var.self; }) != end(combined_image_samplers)) { return false; } // In SPIR-V 1.4 and up we must also use the active variable interface to disable global variables // which are not part of the entry point. if (ir.get_spirv_version() >= 0x10400 && var.storage != spv::StorageClassGeneric && var.storage != spv::StorageClassFunction && !interface_variable_exists_in_entry_point(var.self)) { return true; } return check_active_interface_variables && storage_class_is_interface(var.storage) && active_interface_variables.find(var.self) == end(active_interface_variables); } bool Compiler::is_builtin_type(const SPIRType &type) const { auto *type_meta = ir.find_meta(type.self); // We can have builtin structs as well. If one member of a struct is builtin, the struct must also be builtin. if (type_meta) for (auto &m : type_meta->members) if (m.builtin) return true; return false; } bool Compiler::is_builtin_variable(const SPIRVariable &var) const { auto *m = ir.find_meta(var.self); if (var.compat_builtin || (m && m->decoration.builtin)) return true; else return is_builtin_type(get(var.basetype)); } bool Compiler::is_member_builtin(const SPIRType &type, uint32_t index, BuiltIn *builtin) const { auto *type_meta = ir.find_meta(type.self); if (type_meta) { auto &memb = type_meta->members; if (index < memb.size() && memb[index].builtin) { if (builtin) *builtin = memb[index].builtin_type; return true; } } return false; } bool Compiler::is_scalar(const SPIRType &type) const { return type.basetype != SPIRType::Struct && type.vecsize == 1 && type.columns == 1; } bool Compiler::is_vector(const SPIRType &type) const { return type.vecsize > 1 && type.columns == 1; } bool Compiler::is_matrix(const SPIRType &type) const { return type.vecsize > 1 && type.columns > 1; } bool Compiler::is_array(const SPIRType &type) const { return !type.array.empty(); } ShaderResources Compiler::get_shader_resources() const { return get_shader_resources(nullptr); } ShaderResources Compiler::get_shader_resources(const unordered_set &active_variables) const { return get_shader_resources(&active_variables); } bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) { uint32_t variable = 0; switch (opcode) { // Need this first, otherwise, GCC complains about unhandled switch statements. default: break; case OpFunctionCall: { // Invalid SPIR-V. if (length < 3) return false; uint32_t count = length - 3; args += 3; for (uint32_t i = 0; i < count; i++) { auto *var = compiler.maybe_get(args[i]); if (var && storage_class_is_interface(var->storage)) variables.insert(args[i]); } break; } case OpSelect: { // Invalid SPIR-V. if (length < 5) return false; uint32_t count = length - 3; args += 3; for (uint32_t i = 0; i < count; i++) { auto *var = compiler.maybe_get(args[i]); if (var && storage_class_is_interface(var->storage)) variables.insert(args[i]); } break; } case OpPhi: { // Invalid SPIR-V. if (length < 2) return false; uint32_t count = length - 2; args += 2; for (uint32_t i = 0; i < count; i += 2) { auto *var = compiler.maybe_get(args[i]); if (var && storage_class_is_interface(var->storage)) variables.insert(args[i]); } break; } case OpAtomicStore: case OpStore: // Invalid SPIR-V. if (length < 1) return false; variable = args[0]; break; case OpCopyMemory: { if (length < 2) return false; auto *var = compiler.maybe_get(args[0]); if (var && storage_class_is_interface(var->storage)) variables.insert(args[0]); var = compiler.maybe_get(args[1]); if (var && storage_class_is_interface(var->storage)) variables.insert(args[1]); break; } case OpExtInst: { if (length < 5) return false; auto &extension_set = compiler.get(args[2]); switch (extension_set.ext) { case SPIRExtension::GLSL: { auto op = static_cast(args[3]); switch (op) { case GLSLstd450InterpolateAtCentroid: case GLSLstd450InterpolateAtSample: case GLSLstd450InterpolateAtOffset: { auto *var = compiler.maybe_get(args[4]); if (var && storage_class_is_interface(var->storage)) variables.insert(args[4]); break; } case GLSLstd450Modf: case GLSLstd450Fract: { auto *var = compiler.maybe_get(args[5]); if (var && storage_class_is_interface(var->storage)) variables.insert(args[5]); break; } default: break; } break; } case SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter: { enum AMDShaderExplicitVertexParameter { InterpolateAtVertexAMD = 1 }; auto op = static_cast(args[3]); switch (op) { case InterpolateAtVertexAMD: { auto *var = compiler.maybe_get(args[4]); if (var && storage_class_is_interface(var->storage)) variables.insert(args[4]); break; } default: break; } break; } default: break; } break; } case OpAccessChain: case OpInBoundsAccessChain: case OpPtrAccessChain: case OpLoad: case OpCopyObject: case OpImageTexelPointer: case OpAtomicLoad: case OpAtomicExchange: case OpAtomicCompareExchange: case OpAtomicCompareExchangeWeak: case OpAtomicIIncrement: case OpAtomicIDecrement: case OpAtomicIAdd: case OpAtomicISub: case OpAtomicSMin: case OpAtomicUMin: case OpAtomicSMax: case OpAtomicUMax: case OpAtomicAnd: case OpAtomicOr: case OpAtomicXor: case OpArrayLength: // Invalid SPIR-V. if (length < 3) return false; variable = args[2]; break; } if (variable) { auto *var = compiler.maybe_get(variable); if (var && storage_class_is_interface(var->storage)) variables.insert(variable); } return true; } unordered_set Compiler::get_active_interface_variables() const { // Traverse the call graph and find all interface variables which are in use. unordered_set variables; InterfaceVariableAccessHandler handler(*this, variables); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { if (var.storage != StorageClassOutput) return; if (!interface_variable_exists_in_entry_point(var.self)) return; // An output variable which is just declared (but uninitialized) might be read by subsequent stages // so we should force-enable these outputs, // since compilation will fail if a subsequent stage attempts to read from the variable in question. // Also, make sure we preserve output variables which are only initialized, but never accessed by any code. if (var.initializer != ID(0) || get_execution_model() != ExecutionModelFragment) variables.insert(var.self); }); // If we needed to create one, we'll need it. if (dummy_sampler_id) variables.insert(dummy_sampler_id); return variables; } void Compiler::set_enabled_interface_variables(std::unordered_set active_variables) { active_interface_variables = move(active_variables); check_active_interface_variables = true; } ShaderResources Compiler::get_shader_resources(const unordered_set *active_variables) const { ShaderResources res; bool ssbo_instance_name = reflection_ssbo_instance_name_is_significant(); ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { auto &type = this->get(var.basetype); // It is possible for uniform storage classes to be passed as function parameters, so detect // that. To detect function parameters, check of StorageClass of variable is function scope. if (var.storage == StorageClassFunction || !type.pointer) return; if (active_variables && active_variables->find(var.self) == end(*active_variables)) return; // In SPIR-V 1.4 and up, every global must be present in the entry point interface list, // not just IO variables. bool active_in_entry_point = true; if (ir.get_spirv_version() < 0x10400) { if (var.storage == StorageClassInput || var.storage == StorageClassOutput) active_in_entry_point = interface_variable_exists_in_entry_point(var.self); } else active_in_entry_point = interface_variable_exists_in_entry_point(var.self); if (!active_in_entry_point) return; bool is_builtin = is_builtin_variable(var); if (is_builtin) { if (var.storage != StorageClassInput && var.storage != StorageClassOutput) return; auto &list = var.storage == StorageClassInput ? res.builtin_inputs : res.builtin_outputs; BuiltInResource resource; if (has_decoration(type.self, DecorationBlock)) { resource.resource = { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }; for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) { resource.value_type_id = type.member_types[i]; resource.builtin = BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)); list.push_back(resource); } } else { bool strip_array = !has_decoration(var.self, DecorationPatch) && ( get_execution_model() == ExecutionModelTessellationControl || (get_execution_model() == ExecutionModelTessellationEvaluation && var.storage == StorageClassInput)); resource.resource = { var.self, var.basetype, type.self, get_name(var.self) }; if (strip_array && !type.array.empty()) resource.value_type_id = get_variable_data_type(var).parent_type; else resource.value_type_id = get_variable_data_type_id(var); assert(resource.value_type_id); resource.builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); list.push_back(std::move(resource)); } return; } // Input if (var.storage == StorageClassInput) { if (has_decoration(type.self, DecorationBlock)) { res.stage_inputs.push_back( { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); } else res.stage_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } // Subpass inputs else if (var.storage == StorageClassUniformConstant && type.image.dim == DimSubpassData) { res.subpass_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } // Outputs else if (var.storage == StorageClassOutput) { if (has_decoration(type.self, DecorationBlock)) { res.stage_outputs.push_back( { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); } else res.stage_outputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } // UBOs else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock)) { res.uniform_buffers.push_back( { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); } // Old way to declare SSBOs. else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock)) { res.storage_buffers.push_back( { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) }); } // Modern way to declare SSBOs. else if (type.storage == StorageClassStorageBuffer) { res.storage_buffers.push_back( { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) }); } // Push constant blocks else if (type.storage == StorageClassPushConstant) { // There can only be one push constant block, but keep the vector in case this restriction is lifted // in the future. res.push_constant_buffers.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } // Images else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image && type.image.sampled == 2) { res.storage_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } // Separate images else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image && type.image.sampled == 1) { res.separate_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } // Separate samplers else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Sampler) { res.separate_samplers.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } // Textures else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::SampledImage) { res.sampled_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } // Atomic counters else if (type.storage == StorageClassAtomicCounter) { res.atomic_counters.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } // Acceleration structures else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::AccelerationStructure) { res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); } }); return res; } bool Compiler::type_is_block_like(const SPIRType &type) const { if (type.basetype != SPIRType::Struct) return false; if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) { return true; } // Block-like types may have Offset decorations. for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) if (has_member_decoration(type.self, i, DecorationOffset)) return true; return false; } void Compiler::parse_fixup() { // Figure out specialization constants for work group sizes. for (auto id_ : ir.ids_for_constant_or_variable) { auto &id = ir.ids[id_]; if (id.get_type() == TypeConstant) { auto &c = id.get(); if (has_decoration(c.self, DecorationBuiltIn) && BuiltIn(get_decoration(c.self, DecorationBuiltIn)) == BuiltInWorkgroupSize) { // In current SPIR-V, there can be just one constant like this. // All entry points will receive the constant value. // WorkgroupSize take precedence over LocalSizeId. for (auto &entry : ir.entry_points) { entry.second.workgroup_size.constant = c.self; entry.second.workgroup_size.x = c.scalar(0, 0); entry.second.workgroup_size.y = c.scalar(0, 1); entry.second.workgroup_size.z = c.scalar(0, 2); } } } else if (id.get_type() == TypeVariable) { auto &var = id.get(); if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup || var.storage == StorageClassOutput) global_variables.push_back(var.self); if (variable_storage_is_aliased(var)) aliased_variables.push_back(var.self); } } } void Compiler::update_name_cache(unordered_set &cache_primary, const unordered_set &cache_secondary, string &name) { if (name.empty()) return; const auto find_name = [&](const string &n) -> bool { if (cache_primary.find(n) != end(cache_primary)) return true; if (&cache_primary != &cache_secondary) if (cache_secondary.find(n) != end(cache_secondary)) return true; return false; }; const auto insert_name = [&](const string &n) { cache_primary.insert(n); }; if (!find_name(name)) { insert_name(name); return; } uint32_t counter = 0; auto tmpname = name; bool use_linked_underscore = true; if (tmpname == "_") { // We cannot just append numbers, as we will end up creating internally reserved names. // Make it like _0_ instead. tmpname += "0"; } else if (tmpname.back() == '_') { // The last_character is an underscore, so we don't need to link in underscore. // This would violate double underscore rules. use_linked_underscore = false; } // If there is a collision (very rare), // keep tacking on extra identifier until it's unique. do { counter++; name = tmpname + (use_linked_underscore ? "_" : "") + convert_to_string(counter); } while (find_name(name)); insert_name(name); } void Compiler::update_name_cache(unordered_set &cache, string &name) { update_name_cache(cache, cache, name); } void Compiler::set_name(ID id, const std::string &name) { ir.set_name(id, name); } const SPIRType &Compiler::get_type(TypeID id) const { return get(id); } const SPIRType &Compiler::get_type_from_variable(VariableID id) const { return get(get(id).basetype); } uint32_t Compiler::get_pointee_type_id(uint32_t type_id) const { auto *p_type = &get(type_id); if (p_type->pointer) { assert(p_type->parent_type); type_id = p_type->parent_type; } return type_id; } const SPIRType &Compiler::get_pointee_type(const SPIRType &type) const { auto *p_type = &type; if (p_type->pointer) { assert(p_type->parent_type); p_type = &get(p_type->parent_type); } return *p_type; } const SPIRType &Compiler::get_pointee_type(uint32_t type_id) const { return get_pointee_type(get(type_id)); } uint32_t Compiler::get_variable_data_type_id(const SPIRVariable &var) const { if (var.phi_variable) return var.basetype; return get_pointee_type_id(var.basetype); } SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) { return get(get_variable_data_type_id(var)); } const SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) const { return get(get_variable_data_type_id(var)); } SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) { SPIRType *type = &get_variable_data_type(var); if (is_array(*type)) type = &get(type->parent_type); return *type; } const SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) const { const SPIRType *type = &get_variable_data_type(var); if (is_array(*type)) type = &get(type->parent_type); return *type; } bool Compiler::is_sampled_image_type(const SPIRType &type) { return (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage) && type.image.sampled == 1 && type.image.dim != DimBuffer; } void Compiler::set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration, const std::string &argument) { ir.set_member_decoration_string(id, index, decoration, argument); } void Compiler::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument) { ir.set_member_decoration(id, index, decoration, argument); } void Compiler::set_member_name(TypeID id, uint32_t index, const std::string &name) { ir.set_member_name(id, index, name); } const std::string &Compiler::get_member_name(TypeID id, uint32_t index) const { return ir.get_member_name(id, index); } void Compiler::set_qualified_name(uint32_t id, const string &name) { ir.meta[id].decoration.qualified_alias = name; } void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const std::string &name) { ir.meta[type_id].members.resize(max(ir.meta[type_id].members.size(), size_t(index) + 1)); ir.meta[type_id].members[index].qualified_alias = name; } const string &Compiler::get_member_qualified_name(TypeID type_id, uint32_t index) const { auto *m = ir.find_meta(type_id); if (m && index < m->members.size()) return m->members[index].qualified_alias; else return ir.get_empty_string(); } uint32_t Compiler::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const { return ir.get_member_decoration(id, index, decoration); } const Bitset &Compiler::get_member_decoration_bitset(TypeID id, uint32_t index) const { return ir.get_member_decoration_bitset(id, index); } bool Compiler::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const { return ir.has_member_decoration(id, index, decoration); } void Compiler::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration) { ir.unset_member_decoration(id, index, decoration); } void Compiler::set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument) { ir.set_decoration_string(id, decoration, argument); } void Compiler::set_decoration(ID id, Decoration decoration, uint32_t argument) { ir.set_decoration(id, decoration, argument); } void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value) { auto &dec = ir.meta[id].decoration; dec.extended.flags.set(decoration); dec.extended.values[decoration] = value; } void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration, uint32_t value) { ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1)); auto &dec = ir.meta[type].members[index]; dec.extended.flags.set(decoration); dec.extended.values[decoration] = value; } static uint32_t get_default_extended_decoration(ExtendedDecorations decoration) { switch (decoration) { case SPIRVCrossDecorationResourceIndexPrimary: case SPIRVCrossDecorationResourceIndexSecondary: case SPIRVCrossDecorationResourceIndexTertiary: case SPIRVCrossDecorationResourceIndexQuaternary: case SPIRVCrossDecorationInterfaceMemberIndex: return ~(0u); default: return 0; } } uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const { auto *m = ir.find_meta(id); if (!m) return 0; auto &dec = m->decoration; if (!dec.extended.flags.get(decoration)) return get_default_extended_decoration(decoration); return dec.extended.values[decoration]; } uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const { auto *m = ir.find_meta(type); if (!m) return 0; if (index >= m->members.size()) return 0; auto &dec = m->members[index]; if (!dec.extended.flags.get(decoration)) return get_default_extended_decoration(decoration); return dec.extended.values[decoration]; } bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const { auto *m = ir.find_meta(id); if (!m) return false; auto &dec = m->decoration; return dec.extended.flags.get(decoration); } bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const { auto *m = ir.find_meta(type); if (!m) return false; if (index >= m->members.size()) return false; auto &dec = m->members[index]; return dec.extended.flags.get(decoration); } void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration) { auto &dec = ir.meta[id].decoration; dec.extended.flags.clear(decoration); dec.extended.values[decoration] = 0; } void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) { ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1)); auto &dec = ir.meta[type].members[index]; dec.extended.flags.clear(decoration); dec.extended.values[decoration] = 0; } StorageClass Compiler::get_storage_class(VariableID id) const { return get(id).storage; } const std::string &Compiler::get_name(ID id) const { return ir.get_name(id); } const std::string Compiler::get_fallback_name(ID id) const { return join("_", id); } const std::string Compiler::get_block_fallback_name(VariableID id) const { auto &var = get(id); if (get_name(id).empty()) return join("_", get(var.basetype).self, "_", id); else return get_name(id); } const Bitset &Compiler::get_decoration_bitset(ID id) const { return ir.get_decoration_bitset(id); } bool Compiler::has_decoration(ID id, Decoration decoration) const { return ir.has_decoration(id, decoration); } const string &Compiler::get_decoration_string(ID id, Decoration decoration) const { return ir.get_decoration_string(id, decoration); } const string &Compiler::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const { return ir.get_member_decoration_string(id, index, decoration); } uint32_t Compiler::get_decoration(ID id, Decoration decoration) const { return ir.get_decoration(id, decoration); } void Compiler::unset_decoration(ID id, Decoration decoration) { ir.unset_decoration(id, decoration); } bool Compiler::get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const { auto *m = ir.find_meta(id); if (!m) return false; auto &word_offsets = m->decoration_word_offset; auto itr = word_offsets.find(decoration); if (itr == end(word_offsets)) return false; word_offset = itr->second; return true; } bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const { // Tried and failed. if (block.disable_block_optimization || block.complex_continue) return false; if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop) { // Try to detect common for loop pattern // which the code backend can use to create cleaner code. // for(;;) { if (cond) { some_body; } else { break; } } // is the pattern we're looking for. const auto *false_block = maybe_get(block.false_block); const auto *true_block = maybe_get(block.true_block); const auto *merge_block = maybe_get(block.merge_block); bool false_block_is_merge = block.false_block == block.merge_block || (false_block && merge_block && execution_is_noop(*false_block, *merge_block)); bool true_block_is_merge = block.true_block == block.merge_block || (true_block && merge_block && execution_is_noop(*true_block, *merge_block)); bool positive_candidate = block.true_block != block.merge_block && block.true_block != block.self && false_block_is_merge; bool negative_candidate = block.false_block != block.merge_block && block.false_block != block.self && true_block_is_merge; bool ret = block.terminator == SPIRBlock::Select && block.merge == SPIRBlock::MergeLoop && (positive_candidate || negative_candidate); if (ret && positive_candidate && method == SPIRBlock::MergeToSelectContinueForLoop) ret = block.true_block == block.continue_block; else if (ret && negative_candidate && method == SPIRBlock::MergeToSelectContinueForLoop) ret = block.false_block == block.continue_block; // If we have OpPhi which depends on branches which came from our own block, // we need to flush phi variables in else block instead of a trivial break, // so we cannot assume this is a for loop candidate. if (ret) { for (auto &phi : block.phi_variables) if (phi.parent == block.self) return false; auto *merge = maybe_get(block.merge_block); if (merge) for (auto &phi : merge->phi_variables) if (phi.parent == block.self) return false; } return ret; } else if (method == SPIRBlock::MergeToDirectForLoop) { // Empty loop header that just sets up merge target // and branches to loop body. bool ret = block.terminator == SPIRBlock::Direct && block.merge == SPIRBlock::MergeLoop && block.ops.empty(); if (!ret) return false; auto &child = get(block.next_block); const auto *false_block = maybe_get(child.false_block); const auto *true_block = maybe_get(child.true_block); const auto *merge_block = maybe_get(block.merge_block); bool false_block_is_merge = child.false_block == block.merge_block || (false_block && merge_block && execution_is_noop(*false_block, *merge_block)); bool true_block_is_merge = child.true_block == block.merge_block || (true_block && merge_block && execution_is_noop(*true_block, *merge_block)); bool positive_candidate = child.true_block != block.merge_block && child.true_block != block.self && false_block_is_merge; bool negative_candidate = child.false_block != block.merge_block && child.false_block != block.self && true_block_is_merge; ret = child.terminator == SPIRBlock::Select && child.merge == SPIRBlock::MergeNone && (positive_candidate || negative_candidate); // If we have OpPhi which depends on branches which came from our own block, // we need to flush phi variables in else block instead of a trivial break, // so we cannot assume this is a for loop candidate. if (ret) { for (auto &phi : block.phi_variables) if (phi.parent == block.self || phi.parent == child.self) return false; for (auto &phi : child.phi_variables) if (phi.parent == block.self) return false; auto *merge = maybe_get(block.merge_block); if (merge) for (auto &phi : merge->phi_variables) if (phi.parent == block.self || phi.parent == child.false_block) return false; } return ret; } else return false; } bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const { if (!execution_is_branchless(from, to)) return false; auto *start = &from; for (;;) { if (start->self == to.self) return true; if (!start->ops.empty()) return false; auto &next = get(start->next_block); // Flushing phi variables does not count as noop. for (auto &phi : next.phi_variables) if (phi.parent == start->self) return false; start = &next; } } bool Compiler::execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const { auto *start = &from; for (;;) { if (start->self == to.self) return true; if (start->terminator == SPIRBlock::Direct && start->merge == SPIRBlock::MergeNone) start = &get(start->next_block); else return false; } } bool Compiler::execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const { return from.terminator == SPIRBlock::Direct && from.merge == SPIRBlock::MergeNone && from.next_block == to.self; } SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &block) const { // The block was deemed too complex during code emit, pick conservative fallback paths. if (block.complex_continue) return SPIRBlock::ComplexLoop; // In older glslang output continue block can be equal to the loop header. // In this case, execution is clearly branchless, so just assume a while loop header here. if (block.merge == SPIRBlock::MergeLoop) return SPIRBlock::WhileLoop; if (block.loop_dominator == BlockID(SPIRBlock::NoDominator)) { // Continue block is never reached from CFG. return SPIRBlock::ComplexLoop; } auto &dominator = get(block.loop_dominator); if (execution_is_noop(block, dominator)) return SPIRBlock::WhileLoop; else if (execution_is_branchless(block, dominator)) return SPIRBlock::ForLoop; else { const auto *false_block = maybe_get(block.false_block); const auto *true_block = maybe_get(block.true_block); const auto *merge_block = maybe_get(dominator.merge_block); // If we need to flush Phi in this block, we cannot have a DoWhile loop. bool flush_phi_to_false = false_block && flush_phi_required(block.self, block.false_block); bool flush_phi_to_true = true_block && flush_phi_required(block.self, block.true_block); if (flush_phi_to_false || flush_phi_to_true) return SPIRBlock::ComplexLoop; bool positive_do_while = block.true_block == dominator.self && (block.false_block == dominator.merge_block || (false_block && merge_block && execution_is_noop(*false_block, *merge_block))); bool negative_do_while = block.false_block == dominator.self && (block.true_block == dominator.merge_block || (true_block && merge_block && execution_is_noop(*true_block, *merge_block))); if (block.merge == SPIRBlock::MergeNone && block.terminator == SPIRBlock::Select && (positive_do_while || negative_do_while)) { return SPIRBlock::DoWhileLoop; } else return SPIRBlock::ComplexLoop; } } const SmallVector &Compiler::get_case_list(const SPIRBlock &block) const { uint32_t width = 0; // First we check if we can get the type directly from the block.condition // since it can be a SPIRConstant or a SPIRVariable. if (const auto *constant = maybe_get(block.condition)) { const auto &type = get(constant->constant_type); width = type.width; } else if (const auto *var = maybe_get(block.condition)) { const auto &type = get(var->basetype); width = type.width; } else { auto search = ir.load_type_width.find(block.condition); if (search == ir.load_type_width.end()) { SPIRV_CROSS_THROW("Use of undeclared variable on a switch statement."); } width = search->second; } if (width > 32) return block.cases_64bit; return block.cases_32bit; } bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const { handler.set_current_block(block); handler.rearm_current_block(block); // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks, // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing // inside dead blocks ... for (auto &i : block.ops) { auto ops = stream(i); auto op = static_cast(i.op); if (!handler.handle(op, ops, i.length)) return false; if (op == OpFunctionCall) { auto &func = get(ops[2]); if (handler.follow_function_call(func)) { if (!handler.begin_function_scope(ops, i.length)) return false; if (!traverse_all_reachable_opcodes(get(ops[2]), handler)) return false; if (!handler.end_function_scope(ops, i.length)) return false; handler.rearm_current_block(block); } } } if (!handler.handle_terminator(block)) return false; return true; } bool Compiler::traverse_all_reachable_opcodes(const SPIRFunction &func, OpcodeHandler &handler) const { for (auto block : func.blocks) if (!traverse_all_reachable_opcodes(get(block), handler)) return false; return true; } uint32_t Compiler::type_struct_member_offset(const SPIRType &type, uint32_t index) const { auto *type_meta = ir.find_meta(type.self); if (type_meta) { // Decoration must be set in valid SPIR-V, otherwise throw. auto &dec = type_meta->members[index]; if (dec.decoration_flags.get(DecorationOffset)) return dec.offset; else SPIRV_CROSS_THROW("Struct member does not have Offset set."); } else SPIRV_CROSS_THROW("Struct member does not have Offset set."); } uint32_t Compiler::type_struct_member_array_stride(const SPIRType &type, uint32_t index) const { auto *type_meta = ir.find_meta(type.member_types[index]); if (type_meta) { // Decoration must be set in valid SPIR-V, otherwise throw. // ArrayStride is part of the array type not OpMemberDecorate. auto &dec = type_meta->decoration; if (dec.decoration_flags.get(DecorationArrayStride)) return dec.array_stride; else SPIRV_CROSS_THROW("Struct member does not have ArrayStride set."); } else SPIRV_CROSS_THROW("Struct member does not have ArrayStride set."); } uint32_t Compiler::type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const { auto *type_meta = ir.find_meta(type.self); if (type_meta) { // Decoration must be set in valid SPIR-V, otherwise throw. // MatrixStride is part of OpMemberDecorate. auto &dec = type_meta->members[index]; if (dec.decoration_flags.get(DecorationMatrixStride)) return dec.matrix_stride; else SPIRV_CROSS_THROW("Struct member does not have MatrixStride set."); } else SPIRV_CROSS_THROW("Struct member does not have MatrixStride set."); } size_t Compiler::get_declared_struct_size(const SPIRType &type) const { if (type.member_types.empty()) SPIRV_CROSS_THROW("Declared struct in block cannot be empty."); // Offsets can be declared out of order, so we need to deduce the actual size // based on last member instead. uint32_t member_index = 0; size_t highest_offset = 0; for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) { size_t offset = type_struct_member_offset(type, i); if (offset > highest_offset) { highest_offset = offset; member_index = i; } } size_t size = get_declared_struct_member_size(type, member_index); return highest_offset + size; } size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType &type, size_t array_size) const { if (type.member_types.empty()) SPIRV_CROSS_THROW("Declared struct in block cannot be empty."); size_t size = get_declared_struct_size(type); auto &last_type = get(type.member_types.back()); if (!last_type.array.empty() && last_type.array_size_literal[0] && last_type.array[0] == 0) // Runtime array size += array_size * type_struct_member_array_stride(type, uint32_t(type.member_types.size() - 1)); return size; } uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp &spec) const { auto &result_type = get(spec.basetype); if (result_type.basetype != SPIRType::UInt && result_type.basetype != SPIRType::Int && result_type.basetype != SPIRType::Boolean) { SPIRV_CROSS_THROW( "Only 32-bit integers and booleans are currently supported when evaluating specialization constants.\n"); } if (!is_scalar(result_type)) SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n"); uint32_t value = 0; const auto eval_u32 = [&](uint32_t id) -> uint32_t { auto &type = expression_type(id); if (type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int && type.basetype != SPIRType::Boolean) { SPIRV_CROSS_THROW("Only 32-bit integers and booleans are currently supported when evaluating " "specialization constants.\n"); } if (!is_scalar(type)) SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n"); if (const auto *c = this->maybe_get(id)) return c->scalar(); else return evaluate_spec_constant_u32(this->get(id)); }; #define binary_spec_op(op, binary_op) \ case Op##op: \ value = eval_u32(spec.arguments[0]) binary_op eval_u32(spec.arguments[1]); \ break #define binary_spec_op_cast(op, binary_op, type) \ case Op##op: \ value = uint32_t(type(eval_u32(spec.arguments[0])) binary_op type(eval_u32(spec.arguments[1]))); \ break // Support the basic opcodes which are typically used when computing array sizes. switch (spec.opcode) { binary_spec_op(IAdd, +); binary_spec_op(ISub, -); binary_spec_op(IMul, *); binary_spec_op(BitwiseAnd, &); binary_spec_op(BitwiseOr, |); binary_spec_op(BitwiseXor, ^); binary_spec_op(LogicalAnd, &); binary_spec_op(LogicalOr, |); binary_spec_op(ShiftLeftLogical, <<); binary_spec_op(ShiftRightLogical, >>); binary_spec_op_cast(ShiftRightArithmetic, >>, int32_t); binary_spec_op(LogicalEqual, ==); binary_spec_op(LogicalNotEqual, !=); binary_spec_op(IEqual, ==); binary_spec_op(INotEqual, !=); binary_spec_op(ULessThan, <); binary_spec_op(ULessThanEqual, <=); binary_spec_op(UGreaterThan, >); binary_spec_op(UGreaterThanEqual, >=); binary_spec_op_cast(SLessThan, <, int32_t); binary_spec_op_cast(SLessThanEqual, <=, int32_t); binary_spec_op_cast(SGreaterThan, >, int32_t); binary_spec_op_cast(SGreaterThanEqual, >=, int32_t); #undef binary_spec_op #undef binary_spec_op_cast case OpLogicalNot: value = uint32_t(!eval_u32(spec.arguments[0])); break; case OpNot: value = ~eval_u32(spec.arguments[0]); break; case OpSNegate: value = uint32_t(-int32_t(eval_u32(spec.arguments[0]))); break; case OpSelect: value = eval_u32(spec.arguments[0]) ? eval_u32(spec.arguments[1]) : eval_u32(spec.arguments[2]); break; case OpUMod: { uint32_t a = eval_u32(spec.arguments[0]); uint32_t b = eval_u32(spec.arguments[1]); if (b == 0) SPIRV_CROSS_THROW("Undefined behavior in UMod, b == 0.\n"); value = a % b; break; } case OpSRem: { auto a = int32_t(eval_u32(spec.arguments[0])); auto b = int32_t(eval_u32(spec.arguments[1])); if (b == 0) SPIRV_CROSS_THROW("Undefined behavior in SRem, b == 0.\n"); value = a % b; break; } case OpSMod: { auto a = int32_t(eval_u32(spec.arguments[0])); auto b = int32_t(eval_u32(spec.arguments[1])); if (b == 0) SPIRV_CROSS_THROW("Undefined behavior in SMod, b == 0.\n"); auto v = a % b; // Makes sure we match the sign of b, not a. if ((b < 0 && v > 0) || (b > 0 && v < 0)) v += b; value = v; break; } case OpUDiv: { uint32_t a = eval_u32(spec.arguments[0]); uint32_t b = eval_u32(spec.arguments[1]); if (b == 0) SPIRV_CROSS_THROW("Undefined behavior in UDiv, b == 0.\n"); value = a / b; break; } case OpSDiv: { auto a = int32_t(eval_u32(spec.arguments[0])); auto b = int32_t(eval_u32(spec.arguments[1])); if (b == 0) SPIRV_CROSS_THROW("Undefined behavior in SDiv, b == 0.\n"); value = a / b; break; } default: SPIRV_CROSS_THROW("Unsupported spec constant opcode for evaluation.\n"); } return value; } uint32_t Compiler::evaluate_constant_u32(uint32_t id) const { if (const auto *c = maybe_get(id)) return c->scalar(); else return evaluate_spec_constant_u32(get(id)); } size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const { if (struct_type.member_types.empty()) SPIRV_CROSS_THROW("Declared struct in block cannot be empty."); auto &flags = get_member_decoration_bitset(struct_type.self, index); auto &type = get(struct_type.member_types[index]); switch (type.basetype) { case SPIRType::Unknown: case SPIRType::Void: case SPIRType::Boolean: // Bools are purely logical, and cannot be used for externally visible types. case SPIRType::AtomicCounter: case SPIRType::Image: case SPIRType::SampledImage: case SPIRType::Sampler: SPIRV_CROSS_THROW("Querying size for object with opaque size."); default: break; } if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer) { // Check if this is a top-level pointer type, and not an array of pointers. if (type.pointer_depth > get(type.parent_type).pointer_depth) return 8; } if (!type.array.empty()) { // For arrays, we can use ArrayStride to get an easy check. bool array_size_literal = type.array_size_literal.back(); uint32_t array_size = array_size_literal ? type.array.back() : evaluate_constant_u32(type.array.back()); return type_struct_member_array_stride(struct_type, index) * array_size; } else if (type.basetype == SPIRType::Struct) { return get_declared_struct_size(type); } else { unsigned vecsize = type.vecsize; unsigned columns = type.columns; // Vectors. if (columns == 1) { size_t component_size = type.width / 8; return vecsize * component_size; } else { uint32_t matrix_stride = type_struct_member_matrix_stride(struct_type, index); // Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses. if (flags.get(DecorationRowMajor)) return matrix_stride * vecsize; else if (flags.get(DecorationColMajor)) return matrix_stride * columns; else SPIRV_CROSS_THROW("Either row-major or column-major must be declared for matrices."); } } } bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) { if (opcode != OpAccessChain && opcode != OpInBoundsAccessChain && opcode != OpPtrAccessChain) return true; bool ptr_chain = (opcode == OpPtrAccessChain); // Invalid SPIR-V. if (length < (ptr_chain ? 5u : 4u)) return false; if (args[2] != id) return true; // Don't bother traversing the entire access chain tree yet. // If we access a struct member, assume we access the entire member. uint32_t index = compiler.get(args[ptr_chain ? 4 : 3]).scalar(); // Seen this index already. if (seen.find(index) != end(seen)) return true; seen.insert(index); auto &type = compiler.expression_type(id); uint32_t offset = compiler.type_struct_member_offset(type, index); size_t range; // If we have another member in the struct, deduce the range by looking at the next member. // This is okay since structs in SPIR-V can have padding, but Offset decoration must be // monotonically increasing. // Of course, this doesn't take into account if the SPIR-V for some reason decided to add // very large amounts of padding, but that's not really a big deal. if (index + 1 < type.member_types.size()) { range = compiler.type_struct_member_offset(type, index + 1) - offset; } else { // No padding, so just deduce it from the size of the member directly. range = compiler.get_declared_struct_member_size(type, index); } ranges.push_back({ index, offset, range }); return true; } SmallVector Compiler::get_active_buffer_ranges(VariableID id) const { SmallVector ranges; BufferAccessHandler handler(*this, ranges, id); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); return ranges; } bool Compiler::types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const { if (a.basetype != b.basetype) return false; if (a.width != b.width) return false; if (a.vecsize != b.vecsize) return false; if (a.columns != b.columns) return false; if (a.array.size() != b.array.size()) return false; size_t array_count = a.array.size(); if (array_count && memcmp(a.array.data(), b.array.data(), array_count * sizeof(uint32_t)) != 0) return false; if (a.basetype == SPIRType::Image || a.basetype == SPIRType::SampledImage) { if (memcmp(&a.image, &b.image, sizeof(SPIRType::Image)) != 0) return false; } if (a.member_types.size() != b.member_types.size()) return false; size_t member_types = a.member_types.size(); for (size_t i = 0; i < member_types; i++) { if (!types_are_logically_equivalent(get(a.member_types[i]), get(b.member_types[i]))) return false; } return true; } const Bitset &Compiler::get_execution_mode_bitset() const { return get_entry_point().flags; } void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t arg1, uint32_t arg2) { auto &execution = get_entry_point(); execution.flags.set(mode); switch (mode) { case ExecutionModeLocalSize: execution.workgroup_size.x = arg0; execution.workgroup_size.y = arg1; execution.workgroup_size.z = arg2; break; case ExecutionModeLocalSizeId: execution.workgroup_size.id_x = arg0; execution.workgroup_size.id_y = arg1; execution.workgroup_size.id_z = arg2; break; case ExecutionModeInvocations: execution.invocations = arg0; break; case ExecutionModeOutputVertices: execution.output_vertices = arg0; break; default: break; } } void Compiler::unset_execution_mode(ExecutionMode mode) { auto &execution = get_entry_point(); execution.flags.clear(mode); } uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y, SpecializationConstant &z) const { auto &execution = get_entry_point(); x = { 0, 0 }; y = { 0, 0 }; z = { 0, 0 }; // WorkgroupSize builtin takes precedence over LocalSize / LocalSizeId. if (execution.workgroup_size.constant != 0) { auto &c = get(execution.workgroup_size.constant); if (c.m.c[0].id[0] != ID(0)) { x.id = c.m.c[0].id[0]; x.constant_id = get_decoration(c.m.c[0].id[0], DecorationSpecId); } if (c.m.c[0].id[1] != ID(0)) { y.id = c.m.c[0].id[1]; y.constant_id = get_decoration(c.m.c[0].id[1], DecorationSpecId); } if (c.m.c[0].id[2] != ID(0)) { z.id = c.m.c[0].id[2]; z.constant_id = get_decoration(c.m.c[0].id[2], DecorationSpecId); } } else if (execution.flags.get(ExecutionModeLocalSizeId)) { auto &cx = get(execution.workgroup_size.id_x); if (cx.specialization) { x.id = execution.workgroup_size.id_x; x.constant_id = get_decoration(execution.workgroup_size.id_x, DecorationSpecId); } auto &cy = get(execution.workgroup_size.id_y); if (cy.specialization) { y.id = execution.workgroup_size.id_y; y.constant_id = get_decoration(execution.workgroup_size.id_y, DecorationSpecId); } auto &cz = get(execution.workgroup_size.id_z); if (cz.specialization) { z.id = execution.workgroup_size.id_z; z.constant_id = get_decoration(execution.workgroup_size.id_z, DecorationSpecId); } } return execution.workgroup_size.constant; } uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index) const { auto &execution = get_entry_point(); switch (mode) { case ExecutionModeLocalSizeId: if (execution.flags.get(ExecutionModeLocalSizeId)) { switch (index) { case 0: return execution.workgroup_size.id_x; case 1: return execution.workgroup_size.id_y; case 2: return execution.workgroup_size.id_z; default: return 0; } } else return 0; case ExecutionModeLocalSize: switch (index) { case 0: if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_x != 0) return get(execution.workgroup_size.id_x).scalar(); else return execution.workgroup_size.x; case 1: if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_y != 0) return get(execution.workgroup_size.id_y).scalar(); else return execution.workgroup_size.y; case 2: if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_z != 0) return get(execution.workgroup_size.id_z).scalar(); else return execution.workgroup_size.z; default: return 0; } case ExecutionModeInvocations: return execution.invocations; case ExecutionModeOutputVertices: return execution.output_vertices; default: return 0; } } ExecutionModel Compiler::get_execution_model() const { auto &execution = get_entry_point(); return execution.model; } bool Compiler::is_tessellation_shader(ExecutionModel model) { return model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation; } bool Compiler::is_vertex_like_shader() const { auto model = get_execution_model(); return model == ExecutionModelVertex || model == ExecutionModelGeometry || model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation; } bool Compiler::is_tessellation_shader() const { return is_tessellation_shader(get_execution_model()); } void Compiler::set_remapped_variable_state(VariableID id, bool remap_enable) { get(id).remapped_variable = remap_enable; } bool Compiler::get_remapped_variable_state(VariableID id) const { return get(id).remapped_variable; } void Compiler::set_subpass_input_remapped_components(VariableID id, uint32_t components) { get(id).remapped_components = components; } uint32_t Compiler::get_subpass_input_remapped_components(VariableID id) const { return get(id).remapped_components; } void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source) { auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source)); if (itr == end(e.implied_read_expressions)) e.implied_read_expressions.push_back(source); } void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source) { auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source)); if (itr == end(e.implied_read_expressions)) e.implied_read_expressions.push_back(source); } void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression) { // Don't inherit any expression dependencies if the expression in dst // is not a forwarded temporary. if (forwarded_temporaries.find(dst) == end(forwarded_temporaries) || forced_temporaries.find(dst) != end(forced_temporaries)) { return; } auto &e = get(dst); auto *phi = maybe_get(source_expression); if (phi && phi->phi_variable) { // We have used a phi variable, which can change at the end of the block, // so make sure we take a dependency on this phi variable. phi->dependees.push_back(dst); } auto *s = maybe_get(source_expression); if (!s) return; auto &e_deps = e.expression_dependencies; auto &s_deps = s->expression_dependencies; // If we depend on a expression, we also depend on all sub-dependencies from source. e_deps.push_back(source_expression); e_deps.insert(end(e_deps), begin(s_deps), end(s_deps)); // Eliminate duplicated dependencies. sort(begin(e_deps), end(e_deps)); e_deps.erase(unique(begin(e_deps), end(e_deps)), end(e_deps)); } SmallVector Compiler::get_entry_points_and_stages() const { SmallVector entries; for (auto &entry : ir.entry_points) entries.push_back({ entry.second.orig_name, entry.second.model }); return entries; } void Compiler::rename_entry_point(const std::string &old_name, const std::string &new_name, spv::ExecutionModel model) { auto &entry = get_entry_point(old_name, model); entry.orig_name = new_name; entry.name = new_name; } void Compiler::set_entry_point(const std::string &name, spv::ExecutionModel model) { auto &entry = get_entry_point(name, model); ir.default_entry_point = entry.self; } SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name) { auto itr = find_if( begin(ir.entry_points), end(ir.entry_points), [&](const std::pair &entry) -> bool { return entry.second.orig_name == name; }); if (itr == end(ir.entry_points)) SPIRV_CROSS_THROW("Entry point does not exist."); return itr->second; } const SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name) const { auto itr = find_if( begin(ir.entry_points), end(ir.entry_points), [&](const std::pair &entry) -> bool { return entry.second.orig_name == name; }); if (itr == end(ir.entry_points)) SPIRV_CROSS_THROW("Entry point does not exist."); return itr->second; } SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model) { auto itr = find_if(begin(ir.entry_points), end(ir.entry_points), [&](const std::pair &entry) -> bool { return entry.second.orig_name == name && entry.second.model == model; }); if (itr == end(ir.entry_points)) SPIRV_CROSS_THROW("Entry point does not exist."); return itr->second; } const SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model) const { auto itr = find_if(begin(ir.entry_points), end(ir.entry_points), [&](const std::pair &entry) -> bool { return entry.second.orig_name == name && entry.second.model == model; }); if (itr == end(ir.entry_points)) SPIRV_CROSS_THROW("Entry point does not exist."); return itr->second; } const string &Compiler::get_cleansed_entry_point_name(const std::string &name, ExecutionModel model) const { return get_entry_point(name, model).name; } const SPIREntryPoint &Compiler::get_entry_point() const { return ir.entry_points.find(ir.default_entry_point)->second; } SPIREntryPoint &Compiler::get_entry_point() { return ir.entry_points.find(ir.default_entry_point)->second; } bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const { auto &var = get(id); if (ir.get_spirv_version() < 0x10400) { if (var.storage != StorageClassInput && var.storage != StorageClassOutput && var.storage != StorageClassUniformConstant) SPIRV_CROSS_THROW("Only Input, Output variables and Uniform constants are part of a shader linking interface."); // This is to avoid potential problems with very old glslang versions which did // not emit input/output interfaces properly. // We can assume they only had a single entry point, and single entry point // shaders could easily be assumed to use every interface variable anyways. if (ir.entry_points.size() <= 1) return true; } // In SPIR-V 1.4 and later, all global resource variables must be present. auto &execution = get_entry_point(); return find(begin(execution.interface_variables), end(execution.interface_variables), VariableID(id)) != end(execution.interface_variables); } void Compiler::CombinedImageSamplerHandler::push_remap_parameters(const SPIRFunction &func, const uint32_t *args, uint32_t length) { // If possible, pipe through a remapping table so that parameters know // which variables they actually bind to in this scope. unordered_map remapping; for (uint32_t i = 0; i < length; i++) remapping[func.arguments[i].id] = remap_parameter(args[i]); parameter_remapping.push(move(remapping)); } void Compiler::CombinedImageSamplerHandler::pop_remap_parameters() { parameter_remapping.pop(); } uint32_t Compiler::CombinedImageSamplerHandler::remap_parameter(uint32_t id) { auto *var = compiler.maybe_get_backing_variable(id); if (var) id = var->self; if (parameter_remapping.empty()) return id; auto &remapping = parameter_remapping.top(); auto itr = remapping.find(id); if (itr != end(remapping)) return itr->second; else return id; } bool Compiler::CombinedImageSamplerHandler::begin_function_scope(const uint32_t *args, uint32_t length) { if (length < 3) return false; auto &callee = compiler.get(args[2]); args += 3; length -= 3; push_remap_parameters(callee, args, length); functions.push(&callee); return true; } bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *args, uint32_t length) { if (length < 3) return false; auto &callee = compiler.get(args[2]); args += 3; // There are two types of cases we have to handle, // a callee might call sampler2D(texture2D, sampler) directly where // one or more parameters originate from parameters. // Alternatively, we need to provide combined image samplers to our callees, // and in this case we need to add those as well. pop_remap_parameters(); // Our callee has now been processed at least once. // No point in doing it again. callee.do_combined_parameters = false; auto ¶ms = functions.top()->combined_parameters; functions.pop(); if (functions.empty()) return true; auto &caller = *functions.top(); if (caller.do_combined_parameters) { for (auto ¶m : params) { VariableID image_id = param.global_image ? param.image_id : VariableID(args[param.image_id]); VariableID sampler_id = param.global_sampler ? param.sampler_id : VariableID(args[param.sampler_id]); auto *i = compiler.maybe_get_backing_variable(image_id); auto *s = compiler.maybe_get_backing_variable(sampler_id); if (i) image_id = i->self; if (s) sampler_id = s->self; register_combined_image_sampler(caller, 0, image_id, sampler_id, param.depth); } } return true; } void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller, VariableID combined_module_id, VariableID image_id, VariableID sampler_id, bool depth) { // We now have a texture ID and a sampler ID which will either be found as a global // or a parameter in our own function. If both are global, they will not need a parameter, // otherwise, add it to our list. SPIRFunction::CombinedImageSamplerParameter param = { 0u, image_id, sampler_id, true, true, depth, }; auto texture_itr = find_if(begin(caller.arguments), end(caller.arguments), [image_id](const SPIRFunction::Parameter &p) { return p.id == image_id; }); auto sampler_itr = find_if(begin(caller.arguments), end(caller.arguments), [sampler_id](const SPIRFunction::Parameter &p) { return p.id == sampler_id; }); if (texture_itr != end(caller.arguments)) { param.global_image = false; param.image_id = uint32_t(texture_itr - begin(caller.arguments)); } if (sampler_itr != end(caller.arguments)) { param.global_sampler = false; param.sampler_id = uint32_t(sampler_itr - begin(caller.arguments)); } if (param.global_image && param.global_sampler) return; auto itr = find_if(begin(caller.combined_parameters), end(caller.combined_parameters), [¶m](const SPIRFunction::CombinedImageSamplerParameter &p) { return param.image_id == p.image_id && param.sampler_id == p.sampler_id && param.global_image == p.global_image && param.global_sampler == p.global_sampler; }); if (itr == end(caller.combined_parameters)) { uint32_t id = compiler.ir.increase_bound_by(3); auto type_id = id + 0; auto ptr_type_id = id + 1; auto combined_id = id + 2; auto &base = compiler.expression_type(image_id); auto &type = compiler.set(type_id); auto &ptr_type = compiler.set(ptr_type_id); type = base; type.self = type_id; type.basetype = SPIRType::SampledImage; type.pointer = false; type.storage = StorageClassGeneric; type.image.depth = depth; ptr_type = type; ptr_type.pointer = true; ptr_type.storage = StorageClassUniformConstant; ptr_type.parent_type = type_id; // Build new variable. compiler.set(combined_id, ptr_type_id, StorageClassFunction, 0); // Inherit RelaxedPrecision. // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration. bool relaxed_precision = compiler.has_decoration(sampler_id, DecorationRelaxedPrecision) || compiler.has_decoration(image_id, DecorationRelaxedPrecision) || (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision)); if (relaxed_precision) compiler.set_decoration(combined_id, DecorationRelaxedPrecision); param.id = combined_id; compiler.set_name(combined_id, join("SPIRV_Cross_Combined", compiler.to_name(image_id), compiler.to_name(sampler_id))); caller.combined_parameters.push_back(param); caller.shadow_arguments.push_back({ ptr_type_id, combined_id, 0u, 0u, true }); } } bool Compiler::DummySamplerForCombinedImageHandler::handle(Op opcode, const uint32_t *args, uint32_t length) { if (need_dummy_sampler) { // No need to traverse further, we know the result. return false; } switch (opcode) { case OpLoad: { if (length < 3) return false; uint32_t result_type = args[0]; auto &type = compiler.get(result_type); bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer; // If not separate image, don't bother. if (!separate_image) return true; uint32_t id = args[1]; uint32_t ptr = args[2]; compiler.set(id, "", result_type, true); compiler.register_read(id, ptr, true); break; } case OpImageFetch: case OpImageQuerySizeLod: case OpImageQuerySize: case OpImageQueryLevels: case OpImageQuerySamples: { // If we are fetching or querying LOD from a plain OpTypeImage, we must pre-combine with our dummy sampler. auto *var = compiler.maybe_get_backing_variable(args[2]); if (var) { auto &type = compiler.get(var->basetype); if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) need_dummy_sampler = true; } break; } case OpInBoundsAccessChain: case OpAccessChain: case OpPtrAccessChain: { if (length < 3) return false; uint32_t result_type = args[0]; auto &type = compiler.get(result_type); bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer; if (!separate_image) return true; uint32_t id = args[1]; uint32_t ptr = args[2]; compiler.set(id, "", result_type, true); compiler.register_read(id, ptr, true); // Other backends might use SPIRAccessChain for this later. compiler.ir.ids[id].set_allow_type_rewrite(); break; } default: break; } return true; } bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *args, uint32_t length) { // We need to figure out where samplers and images are loaded from, so do only the bare bones compilation we need. bool is_fetch = false; switch (opcode) { case OpLoad: { if (length < 3) return false; uint32_t result_type = args[0]; auto &type = compiler.get(result_type); bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; bool separate_sampler = type.basetype == SPIRType::Sampler; // If not separate image or sampler, don't bother. if (!separate_image && !separate_sampler) return true; uint32_t id = args[1]; uint32_t ptr = args[2]; compiler.set(id, "", result_type, true); compiler.register_read(id, ptr, true); return true; } case OpInBoundsAccessChain: case OpAccessChain: case OpPtrAccessChain: { if (length < 3) return false; // Technically, it is possible to have arrays of textures and arrays of samplers and combine them, but this becomes essentially // impossible to implement, since we don't know which concrete sampler we are accessing. // One potential way is to create a combinatorial explosion where N textures and M samplers are combined into N * M sampler2Ds, // but this seems ridiculously complicated for a problem which is easy to work around. // Checking access chains like this assumes we don't have samplers or textures inside uniform structs, but this makes no sense. uint32_t result_type = args[0]; auto &type = compiler.get(result_type); bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; bool separate_sampler = type.basetype == SPIRType::Sampler; if (separate_sampler) SPIRV_CROSS_THROW( "Attempting to use arrays or structs of separate samplers. This is not possible to statically " "remap to plain GLSL."); if (separate_image) { uint32_t id = args[1]; uint32_t ptr = args[2]; compiler.set(id, "", result_type, true); compiler.register_read(id, ptr, true); } return true; } case OpImageFetch: case OpImageQuerySizeLod: case OpImageQuerySize: case OpImageQueryLevels: case OpImageQuerySamples: { // If we are fetching from a plain OpTypeImage or querying LOD, we must pre-combine with our dummy sampler. auto *var = compiler.maybe_get_backing_variable(args[2]); if (!var) return true; auto &type = compiler.get(var->basetype); if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) { if (compiler.dummy_sampler_id == 0) SPIRV_CROSS_THROW("texelFetch without sampler was found, but no dummy sampler has been created with " "build_dummy_sampler_for_combined_images()."); // Do it outside. is_fetch = true; break; } return true; } case OpSampledImage: // Do it outside. break; default: return true; } // Registers sampler2D calls used in case they are parameters so // that their callees know which combined image samplers to propagate down the call stack. if (!functions.empty()) { auto &callee = *functions.top(); if (callee.do_combined_parameters) { uint32_t image_id = args[2]; auto *image = compiler.maybe_get_backing_variable(image_id); if (image) image_id = image->self; uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : args[3]; auto *sampler = compiler.maybe_get_backing_variable(sampler_id); if (sampler) sampler_id = sampler->self; uint32_t combined_id = args[1]; auto &combined_type = compiler.get(args[0]); register_combined_image_sampler(callee, combined_id, image_id, sampler_id, combined_type.image.depth); } } // For function calls, we need to remap IDs which are function parameters into global variables. // This information is statically known from the current place in the call stack. // Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know // which backing variable the image/sample came from. VariableID image_id = remap_parameter(args[2]); VariableID sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]); auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers), [image_id, sampler_id](const CombinedImageSampler &combined) { return combined.image_id == image_id && combined.sampler_id == sampler_id; }); if (itr == end(compiler.combined_image_samplers)) { uint32_t sampled_type; uint32_t combined_module_id; if (is_fetch) { // Have to invent the sampled image type. sampled_type = compiler.ir.increase_bound_by(1); auto &type = compiler.set(sampled_type); type = compiler.expression_type(args[2]); type.self = sampled_type; type.basetype = SPIRType::SampledImage; type.image.depth = false; combined_module_id = 0; } else { sampled_type = args[0]; combined_module_id = args[1]; } auto id = compiler.ir.increase_bound_by(2); auto type_id = id + 0; auto combined_id = id + 1; // Make a new type, pointer to OpTypeSampledImage, so we can make a variable of this type. // We will probably have this type lying around, but it doesn't hurt to make duplicates for internal purposes. auto &type = compiler.set(type_id); auto &base = compiler.get(sampled_type); type = base; type.pointer = true; type.storage = StorageClassUniformConstant; type.parent_type = type_id; // Build new variable. compiler.set(combined_id, type_id, StorageClassUniformConstant, 0); // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant). // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration. bool relaxed_precision = (sampler_id && compiler.has_decoration(sampler_id, DecorationRelaxedPrecision)) || (image_id && compiler.has_decoration(image_id, DecorationRelaxedPrecision)) || (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision)); if (relaxed_precision) compiler.set_decoration(combined_id, DecorationRelaxedPrecision); // Propagate the array type for the original image as well. auto *var = compiler.maybe_get_backing_variable(image_id); if (var) { auto &parent_type = compiler.get(var->basetype); type.array = parent_type.array; type.array_size_literal = parent_type.array_size_literal; } compiler.combined_image_samplers.push_back({ combined_id, image_id, sampler_id }); } return true; } VariableID Compiler::build_dummy_sampler_for_combined_images() { DummySamplerForCombinedImageHandler handler(*this); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); if (handler.need_dummy_sampler) { uint32_t offset = ir.increase_bound_by(3); auto type_id = offset + 0; auto ptr_type_id = offset + 1; auto var_id = offset + 2; SPIRType sampler_type; auto &sampler = set(type_id); sampler.basetype = SPIRType::Sampler; auto &ptr_sampler = set(ptr_type_id); ptr_sampler = sampler; ptr_sampler.self = type_id; ptr_sampler.storage = StorageClassUniformConstant; ptr_sampler.pointer = true; ptr_sampler.parent_type = type_id; set(var_id, ptr_type_id, StorageClassUniformConstant, 0); set_name(var_id, "SPIRV_Cross_DummySampler"); dummy_sampler_id = var_id; return var_id; } else return 0; } void Compiler::build_combined_image_samplers() { ir.for_each_typed_id([&](uint32_t, SPIRFunction &func) { func.combined_parameters.clear(); func.shadow_arguments.clear(); func.do_combined_parameters = true; }); combined_image_samplers.clear(); CombinedImageSamplerHandler handler(*this); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); } SmallVector Compiler::get_specialization_constants() const { SmallVector spec_consts; ir.for_each_typed_id([&](uint32_t, const SPIRConstant &c) { if (c.specialization && has_decoration(c.self, DecorationSpecId)) spec_consts.push_back({ c.self, get_decoration(c.self, DecorationSpecId) }); }); return spec_consts; } SPIRConstant &Compiler::get_constant(ConstantID id) { return get(id); } const SPIRConstant &Compiler::get_constant(ConstantID id) const { return get(id); } static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set &blocks, unordered_set &visit_cache) { // This block accesses the variable. if (blocks.find(block) != end(blocks)) return false; // We are at the end of the CFG. if (cfg.get_succeeding_edges(block).empty()) return true; // If any of our successors have a path to the end, there exists a path from block. for (auto &succ : cfg.get_succeeding_edges(block)) { if (visit_cache.count(succ) == 0) { if (exists_unaccessed_path_to_return(cfg, succ, blocks, visit_cache)) return true; visit_cache.insert(succ); } } return false; } void Compiler::analyze_parameter_preservation( SPIRFunction &entry, const CFG &cfg, const unordered_map> &variable_to_blocks, const unordered_map> &complete_write_blocks) { for (auto &arg : entry.arguments) { // Non-pointers are always inputs. auto &type = get(arg.type); if (!type.pointer) continue; // Opaque argument types are always in bool potential_preserve; switch (type.basetype) { case SPIRType::Sampler: case SPIRType::Image: case SPIRType::SampledImage: case SPIRType::AtomicCounter: potential_preserve = false; break; default: potential_preserve = true; break; } if (!potential_preserve) continue; auto itr = variable_to_blocks.find(arg.id); if (itr == end(variable_to_blocks)) { // Variable is never accessed. continue; } // We have accessed a variable, but there was no complete writes to that variable. // We deduce that we must preserve the argument. itr = complete_write_blocks.find(arg.id); if (itr == end(complete_write_blocks)) { arg.read_count++; continue; } // If there is a path through the CFG where no block completely writes to the variable, the variable will be in an undefined state // when the function returns. We therefore need to implicitly preserve the variable in case there are writers in the function. // Major case here is if a function is // void foo(int &var) { if (cond) var = 10; } // Using read/write counts, we will think it's just an out variable, but it really needs to be inout, // because if we don't write anything whatever we put into the function must return back to the caller. unordered_set visit_cache; if (exists_unaccessed_path_to_return(cfg, entry.entry_block, itr->second, visit_cache)) arg.read_count++; } } Compiler::AnalyzeVariableScopeAccessHandler::AnalyzeVariableScopeAccessHandler(Compiler &compiler_, SPIRFunction &entry_) : compiler(compiler_) , entry(entry_) { } bool Compiler::AnalyzeVariableScopeAccessHandler::follow_function_call(const SPIRFunction &) { // Only analyze within this function. return false; } void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBlock &block) { current_block = █ // If we're branching to a block which uses OpPhi, in GLSL // this will be a variable write when we branch, // so we need to track access to these variables as well to // have a complete picture. const auto test_phi = [this, &block](uint32_t to) { auto &next = compiler.get(to); for (auto &phi : next.phi_variables) { if (phi.parent == block.self) { accessed_variables_to_block[phi.function_variable].insert(block.self); // Phi variables are also accessed in our target branch block. accessed_variables_to_block[phi.function_variable].insert(next.self); notify_variable_access(phi.local_variable, block.self); } } }; switch (block.terminator) { case SPIRBlock::Direct: notify_variable_access(block.condition, block.self); test_phi(block.next_block); break; case SPIRBlock::Select: notify_variable_access(block.condition, block.self); test_phi(block.true_block); test_phi(block.false_block); break; case SPIRBlock::MultiSelect: { notify_variable_access(block.condition, block.self); auto &cases = compiler.get_case_list(block); for (auto &target : cases) test_phi(target.block); if (block.default_block) test_phi(block.default_block); break; } default: break; } } void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id, uint32_t block) { if (id == 0) return; // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers. auto itr = access_chain_children.find(id); if (itr != end(access_chain_children)) for (auto child_id : itr->second) notify_variable_access(child_id, block); if (id_is_phi_variable(id)) accessed_variables_to_block[id].insert(block); else if (id_is_potential_temporary(id)) accessed_temporaries_to_block[id].insert(block); } bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_phi_variable(uint32_t id) const { if (id >= compiler.get_current_id_bound()) return false; auto *var = compiler.maybe_get(id); return var && var->phi_variable; } bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint32_t id) const { if (id >= compiler.get_current_id_bound()) return false; // Temporaries are not created before we start emitting code. return compiler.ir.ids[id].empty() || (compiler.ir.ids[id].get_type() == TypeExpression); } bool Compiler::AnalyzeVariableScopeAccessHandler::handle_terminator(const SPIRBlock &block) { switch (block.terminator) { case SPIRBlock::Return: if (block.return_value) notify_variable_access(block.return_value, block.self); break; case SPIRBlock::Select: case SPIRBlock::MultiSelect: notify_variable_access(block.condition, block.self); break; default: break; } return true; } bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) { // Keep track of the types of temporaries, so we can hoist them out as necessary. uint32_t result_type, result_id; if (compiler.instruction_to_result_type(result_type, result_id, op, args, length)) result_id_to_type[result_id] = result_type; switch (op) { case OpStore: { if (length < 2) return false; ID ptr = args[0]; auto *var = compiler.maybe_get_backing_variable(ptr); // If we store through an access chain, we have a partial write. if (var) { accessed_variables_to_block[var->self].insert(current_block->self); if (var->self == ptr) complete_write_variables_to_block[var->self].insert(current_block->self); else partial_write_variables_to_block[var->self].insert(current_block->self); } // args[0] might be an access chain we have to track use of. notify_variable_access(args[0], current_block->self); // Might try to store a Phi variable here. notify_variable_access(args[1], current_block->self); break; } case OpAccessChain: case OpInBoundsAccessChain: case OpPtrAccessChain: { if (length < 3) return false; // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers. uint32_t ptr = args[2]; auto *var = compiler.maybe_get(ptr); if (var) { accessed_variables_to_block[var->self].insert(current_block->self); access_chain_children[args[1]].insert(var->self); } // args[2] might be another access chain we have to track use of. for (uint32_t i = 2; i < length; i++) { notify_variable_access(args[i], current_block->self); access_chain_children[args[1]].insert(args[i]); } // Also keep track of the access chain pointer itself. // In exceptionally rare cases, we can end up with a case where // the access chain is generated in the loop body, but is consumed in continue block. // This means we need complex loop workarounds, and we must detect this via CFG analysis. notify_variable_access(args[1], current_block->self); // The result of an access chain is a fixed expression and is not really considered a temporary. auto &e = compiler.set(args[1], "", args[0], true); auto *backing_variable = compiler.maybe_get_backing_variable(ptr); e.loaded_from = backing_variable ? VariableID(backing_variable->self) : VariableID(0); // Other backends might use SPIRAccessChain for this later. compiler.ir.ids[args[1]].set_allow_type_rewrite(); access_chain_expressions.insert(args[1]); break; } case OpCopyMemory: { if (length < 2) return false; ID lhs = args[0]; ID rhs = args[1]; auto *var = compiler.maybe_get_backing_variable(lhs); // If we store through an access chain, we have a partial write. if (var) { accessed_variables_to_block[var->self].insert(current_block->self); if (var->self == lhs) complete_write_variables_to_block[var->self].insert(current_block->self); else partial_write_variables_to_block[var->self].insert(current_block->self); } // args[0:1] might be access chains we have to track use of. for (uint32_t i = 0; i < 2; i++) notify_variable_access(args[i], current_block->self); var = compiler.maybe_get_backing_variable(rhs); if (var) accessed_variables_to_block[var->self].insert(current_block->self); break; } case OpCopyObject: { if (length < 3) return false; auto *var = compiler.maybe_get_backing_variable(args[2]); if (var) accessed_variables_to_block[var->self].insert(current_block->self); // Might be an access chain which we have to keep track of. notify_variable_access(args[1], current_block->self); if (access_chain_expressions.count(args[2])) access_chain_expressions.insert(args[1]); // Might try to copy a Phi variable here. notify_variable_access(args[2], current_block->self); break; } case OpLoad: { if (length < 3) return false; uint32_t ptr = args[2]; auto *var = compiler.maybe_get_backing_variable(ptr); if (var) accessed_variables_to_block[var->self].insert(current_block->self); // Loaded value is a temporary. notify_variable_access(args[1], current_block->self); // Might be an access chain we have to track use of. notify_variable_access(args[2], current_block->self); break; } case OpFunctionCall: { if (length < 3) return false; // Return value may be a temporary. if (compiler.get_type(args[0]).basetype != SPIRType::Void) notify_variable_access(args[1], current_block->self); length -= 3; args += 3; for (uint32_t i = 0; i < length; i++) { auto *var = compiler.maybe_get_backing_variable(args[i]); if (var) { accessed_variables_to_block[var->self].insert(current_block->self); // Assume we can get partial writes to this variable. partial_write_variables_to_block[var->self].insert(current_block->self); } // Cannot easily prove if argument we pass to a function is completely written. // Usually, functions write to a dummy variable, // which is then copied to in full to the real argument. // Might try to copy a Phi variable here. notify_variable_access(args[i], current_block->self); } break; } case OpSelect: { // In case of variable pointers, we might access a variable here. // We cannot prove anything about these accesses however. for (uint32_t i = 1; i < length; i++) { if (i >= 3) { auto *var = compiler.maybe_get_backing_variable(args[i]); if (var) { accessed_variables_to_block[var->self].insert(current_block->self); // Assume we can get partial writes to this variable. partial_write_variables_to_block[var->self].insert(current_block->self); } } // Might try to copy a Phi variable here. notify_variable_access(args[i], current_block->self); } break; } case OpExtInst: { for (uint32_t i = 4; i < length; i++) notify_variable_access(args[i], current_block->self); notify_variable_access(args[1], current_block->self); uint32_t extension_set = args[2]; if (compiler.get(extension_set).ext == SPIRExtension::GLSL) { auto op_450 = static_cast(args[3]); switch (op_450) { case GLSLstd450Modf: case GLSLstd450Frexp: { uint32_t ptr = args[5]; auto *var = compiler.maybe_get_backing_variable(ptr); if (var) { accessed_variables_to_block[var->self].insert(current_block->self); if (var->self == ptr) complete_write_variables_to_block[var->self].insert(current_block->self); else partial_write_variables_to_block[var->self].insert(current_block->self); } break; } default: break; } } break; } case OpArrayLength: // Only result is a temporary. notify_variable_access(args[1], current_block->self); break; case OpLine: case OpNoLine: // Uses literals, but cannot be a phi variable or temporary, so ignore. break; // Atomics shouldn't be able to access function-local variables. // Some GLSL builtins access a pointer. case OpCompositeInsert: case OpVectorShuffle: // Specialize for opcode which contains literals. for (uint32_t i = 1; i < 4; i++) notify_variable_access(args[i], current_block->self); break; case OpCompositeExtract: // Specialize for opcode which contains literals. for (uint32_t i = 1; i < 3; i++) notify_variable_access(args[i], current_block->self); break; case OpImageWrite: for (uint32_t i = 0; i < length; i++) { // Argument 3 is a literal. if (i != 3) notify_variable_access(args[i], current_block->self); } break; case OpImageSampleImplicitLod: case OpImageSampleExplicitLod: case OpImageSparseSampleImplicitLod: case OpImageSparseSampleExplicitLod: case OpImageSampleProjImplicitLod: case OpImageSampleProjExplicitLod: case OpImageSparseSampleProjImplicitLod: case OpImageSparseSampleProjExplicitLod: case OpImageFetch: case OpImageSparseFetch: case OpImageRead: case OpImageSparseRead: for (uint32_t i = 1; i < length; i++) { // Argument 4 is a literal. if (i != 4) notify_variable_access(args[i], current_block->self); } break; case OpImageSampleDrefImplicitLod: case OpImageSampleDrefExplicitLod: case OpImageSparseSampleDrefImplicitLod: case OpImageSparseSampleDrefExplicitLod: case OpImageSampleProjDrefImplicitLod: case OpImageSampleProjDrefExplicitLod: case OpImageSparseSampleProjDrefImplicitLod: case OpImageSparseSampleProjDrefExplicitLod: case OpImageGather: case OpImageSparseGather: case OpImageDrefGather: case OpImageSparseDrefGather: for (uint32_t i = 1; i < length; i++) { // Argument 5 is a literal. if (i != 5) notify_variable_access(args[i], current_block->self); } break; default: { // Rather dirty way of figuring out where Phi variables are used. // As long as only IDs are used, we can scan through instructions and try to find any evidence that // the ID of a variable has been used. // There are potential false positives here where a literal is used in-place of an ID, // but worst case, it does not affect the correctness of the compile. // Exhaustive analysis would be better here, but it's not worth it for now. for (uint32_t i = 0; i < length; i++) notify_variable_access(args[i], current_block->self); break; } } return true; } Compiler::StaticExpressionAccessHandler::StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_) : compiler(compiler_) , variable_id(variable_id_) { } bool Compiler::StaticExpressionAccessHandler::follow_function_call(const SPIRFunction &) { return false; } bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) { switch (op) { case OpStore: if (length < 2) return false; if (args[0] == variable_id) { static_expression = args[1]; write_count++; } break; case OpLoad: if (length < 3) return false; if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized. return false; break; case OpAccessChain: case OpInBoundsAccessChain: case OpPtrAccessChain: if (length < 3) return false; if (args[2] == variable_id) // If we try to access chain our candidate variable before we store to it, bail. return false; break; default: break; } return true; } void Compiler::find_function_local_luts(SPIRFunction &entry, const AnalyzeVariableScopeAccessHandler &handler, bool single_function) { auto &cfg = *function_cfgs.find(entry.self)->second; // For each variable which is statically accessed. for (auto &accessed_var : handler.accessed_variables_to_block) { auto &blocks = accessed_var.second; auto &var = get(accessed_var.first); auto &type = expression_type(accessed_var.first); // Only consider function local variables here. // If we only have a single function in our CFG, private storage is also fine, // since it behaves like a function local variable. bool allow_lut = var.storage == StorageClassFunction || (single_function && var.storage == StorageClassPrivate); if (!allow_lut) continue; // We cannot be a phi variable. if (var.phi_variable) continue; // Only consider arrays here. if (type.array.empty()) continue; // If the variable has an initializer, make sure it is a constant expression. uint32_t static_constant_expression = 0; if (var.initializer) { if (ir.ids[var.initializer].get_type() != TypeConstant) continue; static_constant_expression = var.initializer; // There can be no stores to this variable, we have now proved we have a LUT. if (handler.complete_write_variables_to_block.count(var.self) != 0 || handler.partial_write_variables_to_block.count(var.self) != 0) continue; } else { // We can have one, and only one write to the variable, and that write needs to be a constant. // No partial writes allowed. if (handler.partial_write_variables_to_block.count(var.self) != 0) continue; auto itr = handler.complete_write_variables_to_block.find(var.self); // No writes? if (itr == end(handler.complete_write_variables_to_block)) continue; // We write to the variable in more than one block. auto &write_blocks = itr->second; if (write_blocks.size() != 1) continue; // The write needs to happen in the dominating block. DominatorBuilder builder(cfg); for (auto &block : blocks) builder.add_block(block); uint32_t dominator = builder.get_dominator(); // The complete write happened in a branch or similar, cannot deduce static expression. if (write_blocks.count(dominator) == 0) continue; // Find the static expression for this variable. StaticExpressionAccessHandler static_expression_handler(*this, var.self); traverse_all_reachable_opcodes(get(dominator), static_expression_handler); // We want one, and exactly one write if (static_expression_handler.write_count != 1 || static_expression_handler.static_expression == 0) continue; // Is it a constant expression? if (ir.ids[static_expression_handler.static_expression].get_type() != TypeConstant) continue; // We found a LUT! static_constant_expression = static_expression_handler.static_expression; } get(static_constant_expression).is_used_as_lut = true; var.static_expression = static_constant_expression; var.statically_assigned = true; var.remapped_variable = true; } } void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeAccessHandler &handler) { // First, we map out all variable access within a function. // Essentially a map of block -> { variables accessed in the basic block } traverse_all_reachable_opcodes(entry, handler); auto &cfg = *function_cfgs.find(entry.self)->second; // Analyze if there are parameters which need to be implicitly preserved with an "in" qualifier. analyze_parameter_preservation(entry, cfg, handler.accessed_variables_to_block, handler.complete_write_variables_to_block); unordered_map potential_loop_variables; // Find the loop dominator block for each block. for (auto &block_id : entry.blocks) { auto &block = get(block_id); auto itr = ir.continue_block_to_loop_header.find(block_id); if (itr != end(ir.continue_block_to_loop_header) && itr->second != block_id) { // Continue block might be unreachable in the CFG, but we still like to know the loop dominator. // Edge case is when continue block is also the loop header, don't set the dominator in this case. block.loop_dominator = itr->second; } else { uint32_t loop_dominator = cfg.find_loop_dominator(block_id); if (loop_dominator != block_id) block.loop_dominator = loop_dominator; else block.loop_dominator = SPIRBlock::NoDominator; } } // For each variable which is statically accessed. for (auto &var : handler.accessed_variables_to_block) { // Only deal with variables which are considered local variables in this function. if (find(begin(entry.local_variables), end(entry.local_variables), VariableID(var.first)) == end(entry.local_variables)) continue; DominatorBuilder builder(cfg); auto &blocks = var.second; auto &type = expression_type(var.first); // Figure out which block is dominating all accesses of those variables. for (auto &block : blocks) { // If we're accessing a variable inside a continue block, this variable might be a loop variable. // We can only use loop variables with scalars, as we cannot track static expressions for vectors. if (is_continue(block)) { // Potentially awkward case to check for. // We might have a variable inside a loop, which is touched by the continue block, // but is not actually a loop variable. // The continue block is dominated by the inner part of the loop, which does not make sense in high-level // language output because it will be declared before the body, // so we will have to lift the dominator up to the relevant loop header instead. builder.add_block(ir.continue_block_to_loop_header[block]); // Arrays or structs cannot be loop variables. if (type.vecsize == 1 && type.columns == 1 && type.basetype != SPIRType::Struct && type.array.empty()) { // The variable is used in multiple continue blocks, this is not a loop // candidate, signal that by setting block to -1u. auto &potential = potential_loop_variables[var.first]; if (potential == 0) potential = block; else potential = ~(0u); } } builder.add_block(block); } builder.lift_continue_block_dominator(); // Add it to a per-block list of variables. BlockID dominating_block = builder.get_dominator(); // For variables whose dominating block is inside a loop, there is a risk that these variables // actually need to be preserved across loop iterations. We can express this by adding // a "read" access to the loop header. // In the dominating block, we must see an OpStore or equivalent as the first access of an OpVariable. // Should that fail, we look for the outermost loop header and tack on an access there. // Phi nodes cannot have this problem. if (dominating_block) { auto &variable = get(var.first); if (!variable.phi_variable) { auto *block = &get(dominating_block); bool preserve = may_read_undefined_variable_in_block(*block, var.first); if (preserve) { // Find the outermost loop scope. while (block->loop_dominator != BlockID(SPIRBlock::NoDominator)) block = &get(block->loop_dominator); if (block->self != dominating_block) { builder.add_block(block->self); dominating_block = builder.get_dominator(); } } } } // If all blocks here are dead code, this will be 0, so the variable in question // will be completely eliminated. if (dominating_block) { auto &block = get(dominating_block); block.dominated_variables.push_back(var.first); get(var.first).dominator = dominating_block; } } for (auto &var : handler.accessed_temporaries_to_block) { auto itr = handler.result_id_to_type.find(var.first); if (itr == end(handler.result_id_to_type)) { // We found a false positive ID being used, ignore. // This should probably be an assert. continue; } // There is no point in doing domination analysis for opaque types. auto &type = get(itr->second); if (type_is_opaque_value(type)) continue; DominatorBuilder builder(cfg); bool force_temporary = false; bool used_in_header_hoisted_continue_block = false; // Figure out which block is dominating all accesses of those temporaries. auto &blocks = var.second; for (auto &block : blocks) { builder.add_block(block); if (blocks.size() != 1 && is_continue(block)) { // The risk here is that inner loop can dominate the continue block. // Any temporary we access in the continue block must be declared before the loop. // This is moot for complex loops however. auto &loop_header_block = get(ir.continue_block_to_loop_header[block]); assert(loop_header_block.merge == SPIRBlock::MergeLoop); builder.add_block(loop_header_block.self); used_in_header_hoisted_continue_block = true; } } uint32_t dominating_block = builder.get_dominator(); if (blocks.size() != 1 && is_single_block_loop(dominating_block)) { // Awkward case, because the loop header is also the continue block, // so hoisting to loop header does not help. force_temporary = true; } if (dominating_block) { // If we touch a variable in the dominating block, this is the expected setup. // SPIR-V normally mandates this, but we have extra cases for temporary use inside loops. bool first_use_is_dominator = blocks.count(dominating_block) != 0; if (!first_use_is_dominator || force_temporary) { if (handler.access_chain_expressions.count(var.first)) { // Exceptionally rare case. // We cannot declare temporaries of access chains (except on MSL perhaps with pointers). // Rather than do that, we force the indexing expressions to be declared in the right scope by // tracking their usage to that end. There is no temporary to hoist. // However, we still need to observe declaration order of the access chain. if (used_in_header_hoisted_continue_block) { // For this scenario, we used an access chain inside a continue block where we also registered an access to header block. // This is a problem as we need to declare an access chain properly first with full definition. // We cannot use temporaries for these expressions, // so we must make sure the access chain is declared ahead of time. // Force a complex for loop to deal with this. // TODO: Out-of-order declaring for loops where continue blocks are emitted last might be another option. auto &loop_header_block = get(dominating_block); assert(loop_header_block.merge == SPIRBlock::MergeLoop); loop_header_block.complex_continue = true; } } else { // This should be very rare, but if we try to declare a temporary inside a loop, // and that temporary is used outside the loop as well (spirv-opt inliner likes this) // we should actually emit the temporary outside the loop. hoisted_temporaries.insert(var.first); forced_temporaries.insert(var.first); auto &block_temporaries = get(dominating_block).declare_temporary; block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first); } } else if (blocks.size() > 1) { // Keep track of the temporary as we might have to declare this temporary. // This can happen if the loop header dominates a temporary, but we have a complex fallback loop. // In this case, the header is actually inside the for (;;) {} block, and we have problems. // What we need to do is hoist the temporaries outside the for (;;) {} block in case the header block // declares the temporary. auto &block_temporaries = get(dominating_block).potential_declare_temporary; block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first); } } } unordered_set seen_blocks; // Now, try to analyze whether or not these variables are actually loop variables. for (auto &loop_variable : potential_loop_variables) { auto &var = get(loop_variable.first); auto dominator = var.dominator; BlockID block = loop_variable.second; // The variable was accessed in multiple continue blocks, ignore. if (block == BlockID(~(0u)) || block == BlockID(0)) continue; // Dead code. if (dominator == ID(0)) continue; BlockID header = 0; // Find the loop header for this block if we are a continue block. { auto itr = ir.continue_block_to_loop_header.find(block); if (itr != end(ir.continue_block_to_loop_header)) { header = itr->second; } else if (get(block).continue_block == block) { // Also check for self-referential continue block. header = block; } } assert(header); auto &header_block = get(header); auto &blocks = handler.accessed_variables_to_block[loop_variable.first]; // If a loop variable is not used before the loop, it's probably not a loop variable. bool has_accessed_variable = blocks.count(header) != 0; // Now, there are two conditions we need to meet for the variable to be a loop variable. // 1. The dominating block must have a branch-free path to the loop header, // this way we statically know which expression should be part of the loop variable initializer. // Walk from the dominator, if there is one straight edge connecting // dominator and loop header, we statically know the loop initializer. bool static_loop_init = true; while (dominator != header) { if (blocks.count(dominator) != 0) has_accessed_variable = true; auto &succ = cfg.get_succeeding_edges(dominator); if (succ.size() != 1) { static_loop_init = false; break; } auto &pred = cfg.get_preceding_edges(succ.front()); if (pred.size() != 1 || pred.front() != dominator) { static_loop_init = false; break; } dominator = succ.front(); } if (!static_loop_init || !has_accessed_variable) continue; // The second condition we need to meet is that no access after the loop // merge can occur. Walk the CFG to see if we find anything. seen_blocks.clear(); cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) -> bool { // We found a block which accesses the variable outside the loop. if (blocks.find(walk_block) != end(blocks)) static_loop_init = false; return true; }); if (!static_loop_init) continue; // We have a loop variable. header_block.loop_variables.push_back(loop_variable.first); // Need to sort here as variables come from an unordered container, and pushing stuff in wrong order // will break reproducability in regression runs. sort(begin(header_block.loop_variables), end(header_block.loop_variables)); get(loop_variable.first).loop_variable = true; } } bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var) { for (auto &op : block.ops) { auto *ops = stream(op); switch (op.op) { case OpStore: case OpCopyMemory: if (ops[0] == var) return false; break; case OpAccessChain: case OpInBoundsAccessChain: case OpPtrAccessChain: // Access chains are generally used to partially read and write. It's too hard to analyze // if all constituents are written fully before continuing, so just assume it's preserved. // This is the same as the parameter preservation analysis. if (ops[2] == var) return true; break; case OpSelect: // Variable pointers. // We might read before writing. if (ops[3] == var || ops[4] == var) return true; break; case OpPhi: { // Variable pointers. // We might read before writing. if (op.length < 2) break; uint32_t count = op.length - 2; for (uint32_t i = 0; i < count; i += 2) if (ops[i + 2] == var) return true; break; } case OpCopyObject: case OpLoad: if (ops[2] == var) return true; break; case OpFunctionCall: { if (op.length < 3) break; // May read before writing. uint32_t count = op.length - 3; for (uint32_t i = 0; i < count; i++) if (ops[i + 3] == var) return true; break; } default: break; } } // Not accessed somehow, at least not in a usual fashion. // It's likely accessed in a branch, so assume we must preserve. return true; } Bitset Compiler::get_buffer_block_flags(VariableID id) const { return ir.get_buffer_block_flags(get(id)); } bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type) { if (type.basetype == SPIRType::Struct) { base_type = SPIRType::Unknown; for (auto &member_type : type.member_types) { SPIRType::BaseType member_base; if (!get_common_basic_type(get(member_type), member_base)) return false; if (base_type == SPIRType::Unknown) base_type = member_base; else if (base_type != member_base) return false; } return true; } else { base_type = type.basetype; return true; } } void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltIn builtin, const Bitset &decoration_flags) { // If used, we will need to explicitly declare a new array size for these builtins. if (builtin == BuiltInClipDistance) { if (!type.array_size_literal[0]) SPIRV_CROSS_THROW("Array size for ClipDistance must be a literal."); uint32_t array_size = type.array[0]; if (array_size == 0) SPIRV_CROSS_THROW("Array size for ClipDistance must not be unsized."); compiler.clip_distance_count = array_size; } else if (builtin == BuiltInCullDistance) { if (!type.array_size_literal[0]) SPIRV_CROSS_THROW("Array size for CullDistance must be a literal."); uint32_t array_size = type.array[0]; if (array_size == 0) SPIRV_CROSS_THROW("Array size for CullDistance must not be unsized."); compiler.cull_distance_count = array_size; } else if (builtin == BuiltInPosition) { if (decoration_flags.get(DecorationInvariant)) compiler.position_invariant = true; } } void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id, bool allow_blocks) { // Only handle plain variables here. // Builtins which are part of a block are handled in AccessChain. // If allow_blocks is used however, this is to handle initializers of blocks, // which implies that all members are written to. auto *var = compiler.maybe_get(id); auto *m = compiler.ir.find_meta(id); if (var && m) { auto &type = compiler.get(var->basetype); auto &decorations = m->decoration; auto &flags = type.storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins; if (decorations.builtin) { flags.set(decorations.builtin_type); handle_builtin(type, decorations.builtin_type, decorations.decoration_flags); } else if (allow_blocks && compiler.has_decoration(type.self, DecorationBlock)) { uint32_t member_count = uint32_t(type.member_types.size()); for (uint32_t i = 0; i < member_count; i++) { if (compiler.has_member_decoration(type.self, i, DecorationBuiltIn)) { auto &member_type = compiler.get(type.member_types[i]); BuiltIn builtin = BuiltIn(compiler.get_member_decoration(type.self, i, DecorationBuiltIn)); flags.set(builtin); handle_builtin(member_type, builtin, compiler.get_member_decoration_bitset(type.self, i)); } } } } } void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id) { add_if_builtin(id, false); } void Compiler::ActiveBuiltinHandler::add_if_builtin_or_block(uint32_t id) { add_if_builtin(id, true); } bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t length) { switch (opcode) { case OpStore: if (length < 1) return false; add_if_builtin(args[0]); break; case OpCopyMemory: if (length < 2) return false; add_if_builtin(args[0]); add_if_builtin(args[1]); break; case OpCopyObject: case OpLoad: if (length < 3) return false; add_if_builtin(args[2]); break; case OpSelect: if (length < 5) return false; add_if_builtin(args[3]); add_if_builtin(args[4]); break; case OpPhi: { if (length < 2) return false; uint32_t count = length - 2; args += 2; for (uint32_t i = 0; i < count; i += 2) add_if_builtin(args[i]); break; } case OpFunctionCall: { if (length < 3) return false; uint32_t count = length - 3; args += 3; for (uint32_t i = 0; i < count; i++) add_if_builtin(args[i]); break; } case OpAccessChain: case OpInBoundsAccessChain: case OpPtrAccessChain: { if (length < 4) return false; // Only consider global variables, cannot consider variables in functions yet, or other // access chains as they have not been created yet. auto *var = compiler.maybe_get(args[2]); if (!var) break; // Required if we access chain into builtins like gl_GlobalInvocationID. add_if_builtin(args[2]); // Start traversing type hierarchy at the proper non-pointer types. auto *type = &compiler.get_variable_data_type(*var); auto &flags = var->storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins; uint32_t count = length - 3; args += 3; for (uint32_t i = 0; i < count; i++) { // Pointers if (opcode == OpPtrAccessChain && i == 0) { type = &compiler.get(type->parent_type); continue; } // Arrays if (!type->array.empty()) { type = &compiler.get(type->parent_type); } // Structs else if (type->basetype == SPIRType::Struct) { uint32_t index = compiler.get(args[i]).scalar(); if (index < uint32_t(compiler.ir.meta[type->self].members.size())) { auto &decorations = compiler.ir.meta[type->self].members[index]; if (decorations.builtin) { flags.set(decorations.builtin_type); handle_builtin(compiler.get(type->member_types[index]), decorations.builtin_type, decorations.decoration_flags); } } type = &compiler.get(type->member_types[index]); } else { // No point in traversing further. We won't find any extra builtins. break; } } break; } default: break; } return true; } void Compiler::update_active_builtins() { active_input_builtins.reset(); active_output_builtins.reset(); cull_distance_count = 0; clip_distance_count = 0; ActiveBuiltinHandler handler(*this); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { if (var.storage != StorageClassOutput) return; if (!interface_variable_exists_in_entry_point(var.self)) return; // Also, make sure we preserve output variables which are only initialized, but never accessed by any code. if (var.initializer != ID(0)) handler.add_if_builtin_or_block(var.self); }); } // Returns whether this shader uses a builtin of the storage class bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage) const { const Bitset *flags; switch (storage) { case StorageClassInput: flags = &active_input_builtins; break; case StorageClassOutput: flags = &active_output_builtins; break; default: return false; } return flags->get(builtin); } void Compiler::analyze_image_and_sampler_usage() { CombinedImageSamplerDrefHandler dref_handler(*this); traverse_all_reachable_opcodes(get(ir.default_entry_point), dref_handler); CombinedImageSamplerUsageHandler handler(*this, dref_handler.dref_combined_samplers); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); // Need to run this traversal twice. First time, we propagate any comparison sampler usage from leaf functions // down to main(). // In the second pass, we can propagate up forced depth state coming from main() up into leaf functions. handler.dependency_hierarchy.clear(); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); comparison_ids = move(handler.comparison_ids); need_subpass_input = handler.need_subpass_input; // Forward information from separate images and samplers into combined image samplers. for (auto &combined : combined_image_samplers) if (comparison_ids.count(combined.sampler_id)) comparison_ids.insert(combined.combined_id); } bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t) { // Mark all sampled images which are used with Dref. switch (opcode) { case OpImageSampleDrefExplicitLod: case OpImageSampleDrefImplicitLod: case OpImageSampleProjDrefExplicitLod: case OpImageSampleProjDrefImplicitLod: case OpImageSparseSampleProjDrefImplicitLod: case OpImageSparseSampleDrefImplicitLod: case OpImageSparseSampleProjDrefExplicitLod: case OpImageSparseSampleDrefExplicitLod: case OpImageDrefGather: case OpImageSparseDrefGather: dref_combined_samplers.insert(args[2]); return true; default: break; } return true; } const CFG &Compiler::get_cfg_for_current_function() const { assert(current_function); return get_cfg_for_function(current_function->self); } const CFG &Compiler::get_cfg_for_function(uint32_t id) const { auto cfg_itr = function_cfgs.find(id); assert(cfg_itr != end(function_cfgs)); assert(cfg_itr->second); return *cfg_itr->second; } void Compiler::build_function_control_flow_graphs_and_analyze() { CFGBuilder handler(*this); handler.function_cfgs[ir.default_entry_point].reset(new CFG(*this, get(ir.default_entry_point))); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); function_cfgs = move(handler.function_cfgs); bool single_function = function_cfgs.size() <= 1; for (auto &f : function_cfgs) { auto &func = get(f.first); AnalyzeVariableScopeAccessHandler scope_handler(*this, func); analyze_variable_scope(func, scope_handler); find_function_local_luts(func, scope_handler, single_function); // Check if we can actually use the loop variables we found in analyze_variable_scope. // To use multiple initializers, we need the same type and qualifiers. for (auto block : func.blocks) { auto &b = get(block); if (b.loop_variables.size() < 2) continue; auto &flags = get_decoration_bitset(b.loop_variables.front()); uint32_t type = get(b.loop_variables.front()).basetype; bool invalid_initializers = false; for (auto loop_variable : b.loop_variables) { if (flags != get_decoration_bitset(loop_variable) || type != get(b.loop_variables.front()).basetype) { invalid_initializers = true; break; } } if (invalid_initializers) { for (auto loop_variable : b.loop_variables) get(loop_variable).loop_variable = false; b.loop_variables.clear(); } } } } Compiler::CFGBuilder::CFGBuilder(Compiler &compiler_) : compiler(compiler_) { } bool Compiler::CFGBuilder::handle(spv::Op, const uint32_t *, uint32_t) { return true; } bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func) { if (function_cfgs.find(func.self) == end(function_cfgs)) { function_cfgs[func.self].reset(new CFG(compiler, func)); return true; } else return false; } void Compiler::CombinedImageSamplerUsageHandler::add_dependency(uint32_t dst, uint32_t src) { dependency_hierarchy[dst].insert(src); // Propagate up any comparison state if we're loading from one such variable. if (comparison_ids.count(src)) comparison_ids.insert(dst); } bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length) { if (length < 3) return false; auto &func = compiler.get(args[2]); const auto *arg = &args[3]; length -= 3; for (uint32_t i = 0; i < length; i++) { auto &argument = func.arguments[i]; add_dependency(argument.id, arg[i]); } return true; } void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids(uint32_t id) { // Traverse the variable dependency hierarchy and tag everything in its path with comparison ids. comparison_ids.insert(id); for (auto &dep_id : dependency_hierarchy[id]) add_hierarchy_to_comparison_ids(dep_id); } bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_t *args, uint32_t length) { switch (opcode) { case OpAccessChain: case OpInBoundsAccessChain: case OpPtrAccessChain: case OpLoad: { if (length < 3) return false; add_dependency(args[1], args[2]); // Ideally defer this to OpImageRead, but then we'd need to track loaded IDs. // If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord. auto &type = compiler.get(args[0]); if (type.image.dim == DimSubpassData) need_subpass_input = true; // If we load a SampledImage and it will be used with Dref, propagate the state up. if (dref_combined_samplers.count(args[1]) != 0) add_hierarchy_to_comparison_ids(args[1]); break; } case OpSampledImage: { if (length < 4) return false; // If the underlying resource has been used for comparison then duplicate loads of that resource must be too. // This image must be a depth image. uint32_t result_id = args[1]; uint32_t image = args[2]; uint32_t sampler = args[3]; if (dref_combined_samplers.count(result_id) != 0) { add_hierarchy_to_comparison_ids(image); // This sampler must be a SamplerComparisonState, and not a regular SamplerState. add_hierarchy_to_comparison_ids(sampler); // Mark the OpSampledImage itself as being comparison state. comparison_ids.insert(result_id); } return true; } default: break; } return true; } bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id) const { auto *m = ir.find_meta(id); return m && m->hlsl_is_magic_counter_buffer; } bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const { auto *m = ir.find_meta(id); // First, check for the proper decoration. if (m && m->hlsl_magic_counter_buffer != 0) { counter_id = m->hlsl_magic_counter_buffer; return true; } else return false; } void Compiler::make_constant_null(uint32_t id, uint32_t type) { auto &constant_type = get(type); if (constant_type.pointer) { auto &constant = set(id, type); constant.make_null(constant_type); } else if (!constant_type.array.empty()) { assert(constant_type.parent_type); uint32_t parent_id = ir.increase_bound_by(1); make_constant_null(parent_id, constant_type.parent_type); if (!constant_type.array_size_literal.back()) SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal."); SmallVector elements(constant_type.array.back()); for (uint32_t i = 0; i < constant_type.array.back(); i++) elements[i] = parent_id; set(id, type, elements.data(), uint32_t(elements.size()), false); } else if (!constant_type.member_types.empty()) { uint32_t member_ids = ir.increase_bound_by(uint32_t(constant_type.member_types.size())); SmallVector elements(constant_type.member_types.size()); for (uint32_t i = 0; i < constant_type.member_types.size(); i++) { make_constant_null(member_ids + i, constant_type.member_types[i]); elements[i] = member_ids + i; } set(id, type, elements.data(), uint32_t(elements.size()), false); } else { auto &constant = set(id, type); constant.make_null(constant_type); } } const SmallVector &Compiler::get_declared_capabilities() const { return ir.declared_capabilities; } const SmallVector &Compiler::get_declared_extensions() const { return ir.declared_extensions; } std::string Compiler::get_remapped_declared_block_name(VariableID id) const { return get_remapped_declared_block_name(id, false); } std::string Compiler::get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const { auto itr = declared_block_names.find(id); if (itr != end(declared_block_names)) { return itr->second; } else { auto &var = get(id); if (fallback_prefer_instance_name) { return to_name(var.self); } else { auto &type = get(var.basetype); auto *type_meta = ir.find_meta(type.self); auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr; return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name; } } } bool Compiler::reflection_ssbo_instance_name_is_significant() const { if (ir.source.known) { // UAVs from HLSL source tend to be declared in a way where the type is reused // but the instance name is significant, and that's the name we should report. // For GLSL, SSBOs each have their own block type as that's how GLSL is written. return ir.source.hlsl; } unordered_set ssbo_type_ids; bool aliased_ssbo_types = false; // If we don't have any OpSource information, we need to perform some shaky heuristics. ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { auto &type = this->get(var.basetype); if (!type.pointer || var.storage == StorageClassFunction) return; bool ssbo = var.storage == StorageClassStorageBuffer || (var.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock)); if (ssbo) { if (ssbo_type_ids.count(type.self)) aliased_ssbo_types = true; else ssbo_type_ids.insert(type.self); } }); // If the block name is aliased, assume we have HLSL-style UAV declarations. return aliased_ssbo_types; } bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args, uint32_t length) { // Most instructions follow the pattern of . // There are some exceptions. switch (op) { case OpStore: case OpCopyMemory: case OpCopyMemorySized: case OpImageWrite: case OpAtomicStore: case OpAtomicFlagClear: case OpEmitStreamVertex: case OpEndStreamPrimitive: case OpControlBarrier: case OpMemoryBarrier: case OpGroupWaitEvents: case OpRetainEvent: case OpReleaseEvent: case OpSetUserEventStatus: case OpCaptureEventProfilingInfo: case OpCommitReadPipe: case OpCommitWritePipe: case OpGroupCommitReadPipe: case OpGroupCommitWritePipe: case OpLine: case OpNoLine: return false; default: if (length > 1 && maybe_get(args[0]) != nullptr) { result_type = args[0]; result_id = args[1]; return true; } else return false; } } Bitset Compiler::combined_decoration_for_member(const SPIRType &type, uint32_t index) const { Bitset flags; auto *type_meta = ir.find_meta(type.self); if (type_meta) { auto &members = type_meta->members; if (index >= members.size()) return flags; auto &dec = members[index]; flags.merge_or(dec.decoration_flags); auto &member_type = get(type.member_types[index]); // If our member type is a struct, traverse all the child members as well recursively. auto &member_childs = member_type.member_types; for (uint32_t i = 0; i < member_childs.size(); i++) { auto &child_member_type = get(member_childs[i]); if (!child_member_type.pointer) flags.merge_or(combined_decoration_for_member(member_type, i)); } } return flags; } bool Compiler::is_desktop_only_format(spv::ImageFormat format) { switch (format) { // Desktop-only formats case ImageFormatR11fG11fB10f: case ImageFormatR16f: case ImageFormatRgb10A2: case ImageFormatR8: case ImageFormatRg8: case ImageFormatR16: case ImageFormatRg16: case ImageFormatRgba16: case ImageFormatR16Snorm: case ImageFormatRg16Snorm: case ImageFormatRgba16Snorm: case ImageFormatR8Snorm: case ImageFormatRg8Snorm: case ImageFormatR8ui: case ImageFormatRg8ui: case ImageFormatR16ui: case ImageFormatRgb10a2ui: case ImageFormatR8i: case ImageFormatRg8i: case ImageFormatR16i: return true; default: break; } return false; } // An image is determined to be a depth image if it is marked as a depth image and is not also // explicitly marked with a color format, or if there are any sample/gather compare operations on it. bool Compiler::is_depth_image(const SPIRType &type, uint32_t id) const { return (type.image.depth && type.image.format == ImageFormatUnknown) || comparison_ids.count(id); } bool Compiler::type_is_opaque_value(const SPIRType &type) const { return !type.pointer && (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image || type.basetype == SPIRType::Sampler); } // Make these member functions so we can easily break on any force_recompile events. void Compiler::force_recompile() { is_force_recompile = true; } void Compiler::force_recompile_guarantee_forward_progress() { force_recompile(); is_force_recompile_forward_progress = true; } bool Compiler::is_forcing_recompilation() const { return is_force_recompile; } void Compiler::clear_force_recompile() { is_force_recompile = false; is_force_recompile_forward_progress = false; } Compiler::PhysicalStorageBufferPointerHandler::PhysicalStorageBufferPointerHandler(Compiler &compiler_) : compiler(compiler_) { } Compiler::PhysicalBlockMeta *Compiler::PhysicalStorageBufferPointerHandler::find_block_meta(uint32_t id) const { auto chain_itr = access_chain_to_physical_block.find(id); if (chain_itr != access_chain_to_physical_block.end()) return chain_itr->second; else return nullptr; } void Compiler::PhysicalStorageBufferPointerHandler::mark_aligned_access(uint32_t id, const uint32_t *args, uint32_t length) { uint32_t mask = *args; args++; length--; if (length && (mask & MemoryAccessVolatileMask) != 0) { args++; length--; } if (length && (mask & MemoryAccessAlignedMask) != 0) { uint32_t alignment = *args; auto *meta = find_block_meta(id); // This makes the assumption that the application does not rely on insane edge cases like: // Bind buffer with ADDR = 8, use block offset of 8 bytes, load/store with 16 byte alignment. // If we emit the buffer with alignment = 16 here, the first element at offset = 0 should // actually have alignment of 8 bytes, but this is too theoretical and awkward to support. // We could potentially keep track of any offset in the access chain, but it's // practically impossible for high level compilers to emit code like that, // so deducing overall alignment requirement based on maximum observed Alignment value is probably fine. if (meta && alignment > meta->alignment) meta->alignment = alignment; } } bool Compiler::PhysicalStorageBufferPointerHandler::type_is_bda_block_entry(uint32_t type_id) const { auto &type = compiler.get(type_id); return type.storage == StorageClassPhysicalStorageBufferEXT && type.pointer && type.pointer_depth == 1 && !compiler.type_is_array_of_pointers(type); } uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_minimum_scalar_alignment(const SPIRType &type) const { if (type.storage == spv::StorageClassPhysicalStorageBufferEXT) return 8; else if (type.basetype == SPIRType::Struct) { uint32_t alignment = 0; for (auto &member_type : type.member_types) { uint32_t member_align = get_minimum_scalar_alignment(compiler.get(member_type)); if (member_align > alignment) alignment = member_align; } return alignment; } else return type.width / 8; } void Compiler::PhysicalStorageBufferPointerHandler::setup_meta_chain(uint32_t type_id, uint32_t var_id) { if (type_is_bda_block_entry(type_id)) { auto &meta = physical_block_type_meta[type_id]; access_chain_to_physical_block[var_id] = &meta; auto &type = compiler.get(type_id); if (type.basetype != SPIRType::Struct) non_block_types.insert(type_id); if (meta.alignment == 0) meta.alignment = get_minimum_scalar_alignment(compiler.get_pointee_type(type)); } } bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t *args, uint32_t length) { // When a BDA pointer comes to life, we need to keep a mapping of SSA ID -> type ID for the pointer type. // For every load and store, we'll need to be able to look up the type ID being accessed and mark any alignment // requirements. switch (op) { case OpConvertUToPtr: case OpBitcast: case OpCompositeExtract: // Extract can begin a new chain if we had a struct or array of pointers as input. // We don't begin chains before we have a pure scalar pointer. setup_meta_chain(args[0], args[1]); break; case OpAccessChain: case OpInBoundsAccessChain: case OpPtrAccessChain: case OpCopyObject: { auto itr = access_chain_to_physical_block.find(args[2]); if (itr != access_chain_to_physical_block.end()) access_chain_to_physical_block[args[1]] = itr->second; break; } case OpLoad: { setup_meta_chain(args[0], args[1]); if (length >= 4) mark_aligned_access(args[2], args + 3, length - 3); break; } case OpStore: { if (length >= 3) mark_aligned_access(args[0], args + 2, length - 2); break; } default: break; } return true; } uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_base_non_block_type_id(uint32_t type_id) const { auto *type = &compiler.get(type_id); while (type->pointer && type->storage == StorageClassPhysicalStorageBufferEXT && !type_is_bda_block_entry(type_id)) { type_id = type->parent_type; type = &compiler.get(type_id); } assert(type_is_bda_block_entry(type_id)); return type_id; } void Compiler::PhysicalStorageBufferPointerHandler::analyze_non_block_types_from_block(const SPIRType &type) { for (auto &member : type.member_types) { auto &subtype = compiler.get(member); if (subtype.basetype != SPIRType::Struct && subtype.pointer && subtype.storage == spv::StorageClassPhysicalStorageBufferEXT) { non_block_types.insert(get_base_non_block_type_id(member)); } else if (subtype.basetype == SPIRType::Struct && !subtype.pointer) analyze_non_block_types_from_block(subtype); } } void Compiler::analyze_non_block_pointer_types() { PhysicalStorageBufferPointerHandler handler(*this); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); // Analyze any block declaration we have to make. It might contain // physical pointers to POD types which we never used, and thus never added to the list. // We'll need to add those pointer types to the set of types we declare. ir.for_each_typed_id([&](uint32_t, SPIRType &type) { if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) handler.analyze_non_block_types_from_block(type); }); physical_storage_non_block_pointer_types.reserve(handler.non_block_types.size()); for (auto type : handler.non_block_types) physical_storage_non_block_pointer_types.push_back(type); sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types)); physical_storage_type_to_alignment = move(handler.physical_block_type_meta); } bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t) { if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT) { if (interlock_function_id != 0 && interlock_function_id != call_stack.back()) { // Most complex case, we have no sensible way of dealing with this // other than taking the 100% conservative approach, exit early. split_function_case = true; return false; } else { interlock_function_id = call_stack.back(); // If this call is performed inside control flow we have a problem. auto &cfg = compiler.get_cfg_for_function(interlock_function_id); uint32_t from_block_id = compiler.get(interlock_function_id).entry_block; bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id); if (!outside_control_flow) control_flow_interlock = true; } } return true; } void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block) { current_block_id = block.self; } bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length) { if (length < 3) return false; call_stack.push_back(args[2]); return true; } bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t) { call_stack.pop_back(); return true; } bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length) { if (length < 3) return false; if (args[2] == interlock_function_id) call_stack_is_interlocked = true; call_stack.push_back(args[2]); return true; } bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t) { if (call_stack.back() == interlock_function_id) call_stack_is_interlocked = false; call_stack.pop_back(); return true; } void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id) { if ((use_critical_section && in_crit_sec) || (control_flow_interlock && call_stack_is_interlocked) || split_function_case) { compiler.interlocked_resources.insert(id); } } bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) { // Only care about critical section analysis if we have simple case. if (use_critical_section) { if (opcode == OpBeginInvocationInterlockEXT) { in_crit_sec = true; return true; } if (opcode == OpEndInvocationInterlockEXT) { // End critical section--nothing more to do. return false; } } // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need. switch (opcode) { case OpLoad: { if (length < 3) return false; uint32_t ptr = args[2]; auto *var = compiler.maybe_get_backing_variable(ptr); // We're only concerned with buffer and image memory here. if (!var) break; switch (var->storage) { default: break; case StorageClassUniformConstant: { uint32_t result_type = args[0]; uint32_t id = args[1]; compiler.set(id, "", result_type, true); compiler.register_read(id, ptr, true); break; } case StorageClassUniform: // Must have BufferBlock; we only care about SSBOs. if (!compiler.has_decoration(compiler.get(var->basetype).self, DecorationBufferBlock)) break; // fallthrough case StorageClassStorageBuffer: access_potential_resource(var->self); break; } break; } case OpInBoundsAccessChain: case OpAccessChain: case OpPtrAccessChain: { if (length < 3) return false; uint32_t result_type = args[0]; auto &type = compiler.get(result_type); if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant || type.storage == StorageClassStorageBuffer) { uint32_t id = args[1]; uint32_t ptr = args[2]; compiler.set(id, "", result_type, true); compiler.register_read(id, ptr, true); compiler.ir.ids[id].set_allow_type_rewrite(); } break; } case OpImageTexelPointer: { if (length < 3) return false; uint32_t result_type = args[0]; uint32_t id = args[1]; uint32_t ptr = args[2]; auto &e = compiler.set(id, "", result_type, true); auto *var = compiler.maybe_get_backing_variable(ptr); if (var) e.loaded_from = var->self; break; } case OpStore: case OpImageWrite: case OpAtomicStore: { if (length < 1) return false; uint32_t ptr = args[0]; auto *var = compiler.maybe_get_backing_variable(ptr); if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || var->storage == StorageClassStorageBuffer)) { access_potential_resource(var->self); } break; } case OpCopyMemory: { if (length < 2) return false; uint32_t dst = args[0]; uint32_t src = args[1]; auto *dst_var = compiler.maybe_get_backing_variable(dst); auto *src_var = compiler.maybe_get_backing_variable(src); if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer)) access_potential_resource(dst_var->self); if (src_var) { if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer) break; if (src_var->storage == StorageClassUniform && !compiler.has_decoration(compiler.get(src_var->basetype).self, DecorationBufferBlock)) { break; } access_potential_resource(src_var->self); } break; } case OpImageRead: case OpAtomicLoad: { if (length < 3) return false; uint32_t ptr = args[2]; auto *var = compiler.maybe_get_backing_variable(ptr); // We're only concerned with buffer and image memory here. if (!var) break; switch (var->storage) { default: break; case StorageClassUniform: // Must have BufferBlock; we only care about SSBOs. if (!compiler.has_decoration(compiler.get(var->basetype).self, DecorationBufferBlock)) break; // fallthrough case StorageClassUniformConstant: case StorageClassStorageBuffer: access_potential_resource(var->self); break; } break; } case OpAtomicExchange: case OpAtomicCompareExchange: case OpAtomicIIncrement: case OpAtomicIDecrement: case OpAtomicIAdd: case OpAtomicISub: case OpAtomicSMin: case OpAtomicUMin: case OpAtomicSMax: case OpAtomicUMax: case OpAtomicAnd: case OpAtomicOr: case OpAtomicXor: { if (length < 3) return false; uint32_t ptr = args[2]; auto *var = compiler.maybe_get_backing_variable(ptr); if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || var->storage == StorageClassStorageBuffer)) { access_potential_resource(var->self); } break; } default: break; } return true; } void Compiler::analyze_interlocked_resource_usage() { if (get_execution_model() == ExecutionModelFragment && (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) || get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) || get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) || get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT))) { InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point); traverse_all_reachable_opcodes(get(ir.default_entry_point), prepass_handler); InterlockedResourceAccessHandler handler(*this, ir.default_entry_point); handler.interlock_function_id = prepass_handler.interlock_function_id; handler.split_function_case = prepass_handler.split_function_case; handler.control_flow_interlock = prepass_handler.control_flow_interlock; handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock; traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); // For GLSL. If we hit any of these cases, we have to fall back to conservative approach. interlocked_is_complex = !handler.use_critical_section || handler.interlock_function_id != ir.default_entry_point; } } bool Compiler::type_is_array_of_pointers(const SPIRType &type) const { if (!type.pointer) return false; // If parent type has same pointer depth, we must have an array of pointers. return type.pointer_depth == get(type.parent_type).pointer_depth; } bool Compiler::type_is_top_level_physical_pointer(const SPIRType &type) const { return type.pointer && type.storage == StorageClassPhysicalStorageBuffer && type.pointer_depth > get(type.parent_type).pointer_depth; } bool Compiler::flush_phi_required(BlockID from, BlockID to) const { auto &child = get(to); for (auto &phi : child.phi_variables) if (phi.parent == from) return true; return false; } void Compiler::add_loop_level() { current_loop_level++; }