diff options
author | kobalicek <kobalicek.petr@gmail.com> | 2021-03-15 00:06:54 +0300 |
---|---|---|
committer | kobalicek <kobalicek.petr@gmail.com> | 2021-03-17 20:05:48 +0300 |
commit | e822fba53e00a2a601f253387029fee6cc6c2029 (patch) | |
tree | d319686b6a2aaa2e995da0e4b3b95903a3ab9c83 | |
parent | 7836449c3099dd564c8cc7666f640a1532151af2 (diff) |
[ABI] Added the possibility to use AVX512 in Compiler and FuncFrame
-rw-r--r-- | .github/workflows/build.yml | 52 | ||||
-rw-r--r-- | src/asmjit/core/emitter.cpp | 6 | ||||
-rw-r--r-- | src/asmjit/core/func.h | 55 | ||||
-rw-r--r-- | src/asmjit/core/ralocal.cpp | 2 | ||||
-rw-r--r-- | src/asmjit/core/rapass.cpp | 88 | ||||
-rw-r--r-- | src/asmjit/core/rapass_p.h | 8 | ||||
-rw-r--r-- | src/asmjit/x86/x86compiler.h | 59 | ||||
-rw-r--r-- | src/asmjit/x86/x86emithelper.cpp | 6 | ||||
-rw-r--r-- | src/asmjit/x86/x86emithelper_p.h | 6 | ||||
-rw-r--r-- | src/asmjit/x86/x86instdb.cpp | 832 | ||||
-rw-r--r-- | src/asmjit/x86/x86instdb.h | 297 | ||||
-rw-r--r-- | src/asmjit/x86/x86rapass.cpp | 180 | ||||
-rw-r--r-- | src/asmjit/x86/x86rapass_p.h | 7 | ||||
-rw-r--r-- | tools/tablegen-x86.js | 108 |
14 files changed, 980 insertions, 726 deletions
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4b3a5a2..b322364 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -48,36 +48,36 @@ jobs: - { title: "linux" , os: "ubuntu-latest" , cc: "gcc" , arch: "x86", build_type: "Release", defs: "ASMJIT_TEST=ON" } - { title: "linux" , os: "ubuntu-latest" , cc: "gcc" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - { title: "linux" , os: "ubuntu-latest" , cc: "gcc" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-4.8" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-4.8" , arch: "x86", build_type: "Release", defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-4.8" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-4.8" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-5" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-5" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-6" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-6" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-7" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-7" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-8" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-8" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-9" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-9" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-10" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-10" , arch: "x86", build_type: "Release", defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-10" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "gcc-10" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-18.04" , cc: "gcc-4.8" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-18.04" , cc: "gcc-4.8" , arch: "x86", build_type: "Release", defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-18.04" , cc: "gcc-4.8" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-18.04" , cc: "gcc-4.8" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-18.04" , cc: "gcc-5" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-18.04" , cc: "gcc-5" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-18.04" , cc: "gcc-6" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-18.04" , cc: "gcc-6" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "gcc-7" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "gcc-7" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "gcc-8" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "gcc-8" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "gcc-9" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "gcc-9" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "gcc-10" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "gcc-10" , arch: "x86", build_type: "Release", defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "gcc-10" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "gcc-10" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } - { title: "linux" , os: "ubuntu-latest" , cc: "clang" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - { title: "linux" , os: "ubuntu-latest" , cc: "clang" , arch: "x86", build_type: "Release", defs: "ASMJIT_TEST=ON" } - { title: "linux" , os: "ubuntu-latest" , cc: "clang" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - { title: "linux" , os: "ubuntu-latest" , cc: "clang" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "clang-9" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "clang-9" , arch: "x86", build_type: "Release", defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "clang-9" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "clang-9" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "clang-10", arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "clang-10", arch: "x86", build_type: "Release", defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "clang-10", arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - - { title: "linux" , os: "ubuntu-latest" , cc: "clang-10", arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "clang-9" , arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "clang-9" , arch: "x86", build_type: "Release", defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "clang-9" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "clang-9" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "clang-10", arch: "x86", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "clang-10", arch: "x86", build_type: "Release", defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "clang-10", arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } + - { title: "linux" , os: "ubuntu-20.04" , cc: "clang-10", arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } - { title: "macos-10.15" , os: "macos-10.15" , cc: "gcc-9" , arch: "x64", build_type: "Debug" , defs: "ASMJIT_TEST=ON" } - { title: "macos-10.15" , os: "macos-10.15" , cc: "gcc-9" , arch: "x64", build_type: "Release", defs: "ASMJIT_TEST=ON" } diff --git a/src/asmjit/core/emitter.cpp b/src/asmjit/core/emitter.cpp index f684140..2d5f1a7 100644 --- a/src/asmjit/core/emitter.cpp +++ b/src/asmjit/core/emitter.cpp @@ -257,7 +257,7 @@ ASMJIT_FAVOR_SIZE Error BaseEmitter::emitProlog(const FuncFrame& frame) { #ifdef ASMJIT_BUILD_X86 if (environment().isFamilyX86()) { - x86::EmitHelper emitHelper(this, frame.isAvxEnabled()); + x86::EmitHelper emitHelper(this, frame.isAvxEnabled(), frame.isAvx512Enabled()); return emitHelper.emitProlog(frame); } #endif @@ -278,7 +278,7 @@ ASMJIT_FAVOR_SIZE Error BaseEmitter::emitEpilog(const FuncFrame& frame) { #ifdef ASMJIT_BUILD_X86 if (environment().isFamilyX86()) { - x86::EmitHelper emitHelper(this, frame.isAvxEnabled()); + x86::EmitHelper emitHelper(this, frame.isAvxEnabled(), frame.isAvx512Enabled()); return emitHelper.emitEpilog(frame); } #endif @@ -299,7 +299,7 @@ ASMJIT_FAVOR_SIZE Error BaseEmitter::emitArgsAssignment(const FuncFrame& frame, #ifdef ASMJIT_BUILD_X86 if (environment().isFamilyX86()) { - x86::EmitHelper emitHelper(this, frame.isAvxEnabled()); + x86::EmitHelper emitHelper(this, frame.isAvxEnabled(), frame.isAvx512Enabled()); return emitHelper.emitArgsAssignment(frame, args); } #endif diff --git a/src/asmjit/core/func.h b/src/asmjit/core/func.h index 6cfd044..9f63096 100644 --- a/src/asmjit/core/func.h +++ b/src/asmjit/core/func.h @@ -881,6 +881,14 @@ public: //! prolog and epilog. Function frame calculation is based on `CallConv` and //! other function attributes. //! +//! SSE vs AVX vs AVX-512 +//! --------------------- +//! +//! Function frame provides a way to tell prolog/epilog inserter to use AVX +//! instructions instead of SSE. Use `setAvxEnabled()` and `setAvx512Enabled()` +//! to enable AVX and/or AVX-512, respectively. Enabling AVX-512 is mostly for +//! Compiler as it would use 32 SIMD registers instead of 16 when enabled. +//! //! Function Frame Structure //! ------------------------ //! @@ -922,13 +930,23 @@ public: //! Function calls other functions (is not leaf). kAttrHasFuncCalls = 0x00000020u, - //! Use AVX instead of SSE for all operations (X86). + //! Function uses AVX (X86). + //! + //! This flag instructs prolog and epilog emitter to use AVX instead of SSE for manipulating + //! XMM registers. kAttrX86AvxEnabled = 0x00010000u, - //! Emit VZEROUPPER instruction in epilog (X86). - kAttrX86AvxCleanup = 0x00020000u, - //! Emit EMMS instruction in epilog (X86). + //! Function uses AVX-512 (X86). + //! + //! This flag instructs Compiler register allocator to use additional 16 registers introduced + //! by AVX-512. + kAttrX86Avx512Enabled = 0x00020000u, + + //! This flag instructs epilog writer to emit EMMS instruction before RET (X86). kAttrX86MmxCleanup = 0x00040000u, + //! This flag instructs epilog writer to emit VZEROUPPER instruction before RET (X86). + kAttrX86AvxCleanup = 0x00080000u, + //! Function has aligned save/restore of vector registers. kAttrAlignedVecSR = 0x40000000u, //! FuncFrame is finalized and can be used by PEI. @@ -1054,27 +1072,34 @@ public: //! Sets `kFlagHasCalls` to false. inline void resetFuncCalls() noexcept { clearAttributes(kAttrHasFuncCalls); } - //! Tests whether the function contains AVX cleanup - 'vzeroupper' instruction in epilog. - inline bool hasAvxCleanup() const noexcept { return hasAttribute(kAttrX86AvxCleanup); } - //! Enables AVX cleanup. - inline void setAvxCleanup() noexcept { addAttributes(kAttrX86AvxCleanup); } - //! Disables AVX cleanup. - inline void resetAvxCleanup() noexcept { clearAttributes(kAttrX86AvxCleanup); } - - //! Tests whether the function contains AVX cleanup - 'vzeroupper' instruction in epilog. + //! Tests whether the function has AVX enabled. inline bool isAvxEnabled() const noexcept { return hasAttribute(kAttrX86AvxEnabled); } - //! Enables AVX cleanup. + //! Enables AVX use. inline void setAvxEnabled() noexcept { addAttributes(kAttrX86AvxEnabled); } - //! Disables AVX cleanup. + //! Disables AVX use. inline void resetAvxEnabled() noexcept { clearAttributes(kAttrX86AvxEnabled); } - //! Tests whether the function contains MMX cleanup - 'emms' instruction in epilog. + //! Tests whether the function has AVX-512 enabled. + inline bool isAvx512Enabled() const noexcept { return hasAttribute(kAttrX86Avx512Enabled); } + //! Enables AVX-512 use. + inline void setAvx512Enabled() noexcept { addAttributes(kAttrX86Avx512Enabled); } + //! Disables AVX-512 use. + inline void resetAvx512Enabled() noexcept { clearAttributes(kAttrX86Avx512Enabled); } + + //! Tests whether the function has MMX cleanup - 'emms' instruction in epilog. inline bool hasMmxCleanup() const noexcept { return hasAttribute(kAttrX86MmxCleanup); } //! Enables MMX cleanup. inline void setMmxCleanup() noexcept { addAttributes(kAttrX86MmxCleanup); } //! Disables MMX cleanup. inline void resetMmxCleanup() noexcept { clearAttributes(kAttrX86MmxCleanup); } + //! Tests whether the function has AVX cleanup - 'vzeroupper' instruction in epilog. + inline bool hasAvxCleanup() const noexcept { return hasAttribute(kAttrX86AvxCleanup); } + //! Enables AVX cleanup. + inline void setAvxCleanup() noexcept { addAttributes(kAttrX86AvxCleanup); } + //! Disables AVX cleanup. + inline void resetAvxCleanup() noexcept { clearAttributes(kAttrX86AvxCleanup); } + //! Tests whether the function uses call stack. inline bool hasCallStack() const noexcept { return _callStackSize != 0; } //! Tests whether the function uses local stack. diff --git a/src/asmjit/core/ralocal.cpp b/src/asmjit/core/ralocal.cpp index 35f6560..5e06d5b 100644 --- a/src/asmjit/core/ralocal.cpp +++ b/src/asmjit/core/ralocal.cpp @@ -801,7 +801,7 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept { uint32_t physId = tiedReg->outId(); if (physId == RAAssignment::kPhysNone) { - uint32_t allocableRegs = _availableRegs[group] & ~(outRegs | avoidRegs); + uint32_t allocableRegs = tiedReg->_allocableRegs & ~(outRegs | avoidRegs); if (!(allocableRegs & ~liveRegs)) { // There are no more registers, decide which one to spill. diff --git a/src/asmjit/core/rapass.cpp b/src/asmjit/core/rapass.cpp index 270027a..5e55584 100644 --- a/src/asmjit/core/rapass.cpp +++ b/src/asmjit/core/rapass.cpp @@ -1737,94 +1737,6 @@ Error BaseRAPass::rewrite() noexcept { return _rewrite(_func, _stop); } -ASMJIT_FAVOR_SPEED Error BaseRAPass::_rewrite(BaseNode* first, BaseNode* stop) noexcept { - uint32_t virtCount = cc()->_vRegArray.size(); - - BaseNode* node = first; - while (node != stop) { - BaseNode* next = node->next(); - if (node->isInst()) { - InstNode* inst = node->as<InstNode>(); - RAInst* raInst = node->passData<RAInst>(); - - Operand* operands = inst->operands(); - uint32_t opCount = inst->opCount(); - uint32_t i; - - // Rewrite virtual registers into physical registers. - if (ASMJIT_LIKELY(raInst)) { - // If the instruction contains pass data (raInst) then it was a subject - // for register allocation and must be rewritten to use physical regs. - RATiedReg* tiedRegs = raInst->tiedRegs(); - uint32_t tiedCount = raInst->tiedCount(); - - for (i = 0; i < tiedCount; i++) { - RATiedReg* tiedReg = &tiedRegs[i]; - - Support::BitWordIterator<uint32_t> useIt(tiedReg->useRewriteMask()); - uint32_t useId = tiedReg->useId(); - while (useIt.hasNext()) inst->rewriteIdAtIndex(useIt.next(), useId); - - Support::BitWordIterator<uint32_t> outIt(tiedReg->outRewriteMask()); - uint32_t outId = tiedReg->outId(); - while (outIt.hasNext()) inst->rewriteIdAtIndex(outIt.next(), outId); - } - - // This data is allocated by Zone passed to `runOnFunction()`, which - // will be reset after the RA pass finishes. So reset this data to - // prevent having a dead pointer after RA pass is complete. - node->resetPassData(); - - if (ASMJIT_UNLIKELY(node->type() != BaseNode::kNodeInst)) { - // FuncRet terminates the flow, it must either be removed if the exit - // label is next to it (optimization) or patched to an architecture - // dependent jump instruction that jumps to the function's exit before - // the epilog. - if (node->type() == BaseNode::kNodeFuncRet) { - RABlock* block = raInst->block(); - if (!isNextTo(node, _func->exitNode())) { - cc()->_setCursor(node->prev()); - ASMJIT_PROPAGATE(emitJump(_func->exitNode()->label())); - } - - BaseNode* prev = node->prev(); - cc()->removeNode(node); - block->setLast(prev); - } - } - } - - // Rewrite stack slot addresses. - for (i = 0; i < opCount; i++) { - Operand& op = operands[i]; - if (op.isMem()) { - BaseMem& mem = op.as<BaseMem>(); - if (mem.isRegHome()) { - uint32_t virtIndex = Operand::virtIdToIndex(mem.baseId()); - if (ASMJIT_UNLIKELY(virtIndex >= virtCount)) - return DebugUtils::errored(kErrorInvalidVirtId); - - VirtReg* virtReg = cc()->virtRegByIndex(virtIndex); - RAWorkReg* workReg = virtReg->workReg(); - ASMJIT_ASSERT(workReg != nullptr); - - RAStackSlot* slot = workReg->stackSlot(); - int32_t offset = slot->offset(); - - mem._setBase(_sp.type(), slot->baseRegId()); - mem.clearRegHome(); - mem.addOffsetLo32(offset); - } - } - } - } - - node = next; - } - - return kErrorOk; -} - // ============================================================================ // [asmjit::BaseRAPass - Logging] // ============================================================================ diff --git a/src/asmjit/core/rapass_p.h b/src/asmjit/core/rapass_p.h index d2fe505..034ec07 100644 --- a/src/asmjit/core/rapass_p.h +++ b/src/asmjit/core/rapass_p.h @@ -305,7 +305,7 @@ public: RATiedReg _tiedRegs[1]; enum Flags : uint32_t { - kFlagIsTerminator = 0x00000001u + kFlagIsTransformable = 0x80000000u }; //! \name Construction & Destruction @@ -338,8 +338,8 @@ public: //! Clears instruction `flags` from this RAInst. inline void clearFlags(uint32_t flags) noexcept { _flags &= ~flags; } - //! Returns whether the RAInst represents an instruction that terminates this basic block. - inline bool isTerminator() const noexcept { return hasFlag(kFlagIsTerminator); } + //! Tests whether this instruction can be transformed to another instruction if necessary. + inline bool isTransformable() const noexcept { return hasFlag(kFlagIsTransformable); } //! Returns the associated block with this RAInst. inline RABlock* block() const noexcept { return _block; } @@ -1125,7 +1125,7 @@ public: //! \{ Error rewrite() noexcept; - Error _rewrite(BaseNode* first, BaseNode* stop) noexcept; + virtual Error _rewrite(BaseNode* first, BaseNode* stop) noexcept = 0; //! \} diff --git a/src/asmjit/x86/x86compiler.h b/src/asmjit/x86/x86compiler.h index 4c64b3b..1dad575 100644 --- a/src/asmjit/x86/x86compiler.h +++ b/src/asmjit/x86/x86compiler.h @@ -170,6 +170,65 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! } //! ``` //! +//! ### AVX and AVX-512 +//! +//! AVX and AVX-512 code generation must be explicitly enabled via \ref FuncFrame +//! to work properly. If it's not setup correctly then Prolog & Epilog would use +//! SSE instead of AVX instructions to work with SIMD registers. In addition, Compiler +//! requires explicitly enable AVX-512 via \ref FuncFrame in order to use all 32 SIMD +//! registers. +//! +//! ``` +//! #include <asmjit/x86.h> +//! #include <stdio.h> +//! +//! using namespace asmjit; +//! +//! // Signature of the generated function. +//! typedef void (*Func)(void*); +//! +//! int main() { +//! JitRuntime rt; // Runtime specialized for JIT code execution. +//! CodeHolder code; // Holds code and relocation information. +//! +//! code.init(rt.environment()); // Initialize code to match the JIT environment. +//! x86::Compiler cc(&code); // Create and attach x86::Compiler to code. +//! +//! cc.addFunc(FuncSignatureT<void, void*>()); +//! +//! // Use the following to enable AVX and/or AVX-512. +//! cc.func()->frame().setAvxEnabled(); +//! cc.func()->frame().setAvx512Enabled(); +//! +//! // Do something with the input pointer. +//! x86::Gp addr = cc.newIntPtr("addr"); +//! x86::Zmm vreg = cc.newZmm("vreg"); +//! +//! cc.setArg(0, addr); +//! +//! cc.vmovdqu32(vreg, x86::ptr(addr)); +//! cc.vpaddq(vreg, vreg, vreg); +//! cc.vmovdqu32(x86::ptr(addr), vreg); +//! +//! cc.endFunc(); // End of the function body. +//! cc.finalize(); // Translate and assemble the whole 'cc' content. +//! // ----> x86::Compiler is no longer needed from here and can be destroyed <---- +//! +//! Func fn; +//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime. +//! if (err) return 1; // Handle a possible error returned by AsmJit. +//! // ----> CodeHolder is no longer needed from here and can be destroyed <---- +//! +//! // Execute the generated code and print some output. +//! uint64_t data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; +//! fn(data); +//! printf("%llu\n", (unsigned long long)data[0]); +//! +//! rt.release(fn); // Explicitly remove the function from the runtime. +//! return 0; +//! } +//! ``` +//! //! ### Recursive Functions //! //! It's possible to create more functions by using the same \ref x86::Compiler diff --git a/src/asmjit/x86/x86emithelper.cpp b/src/asmjit/x86/x86emithelper.cpp index 1b21402..eab35f4 100644 --- a/src/asmjit/x86/x86emithelper.cpp +++ b/src/asmjit/x86/x86emithelper.cpp @@ -148,12 +148,10 @@ ASMJIT_FAVOR_SIZE Error EmitHelper::emitRegMove( instId = _avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps; else if (elementTypeId == Type::kIdF64) instId = _avxEnabled ? Inst::kIdVmovapd : Inst::kIdMovapd; - else if (typeId <= Type::_kIdVec256End) + else if (!_avx512Enabled) instId = _avxEnabled ? Inst::kIdVmovdqa : Inst::kIdMovdqa; - else if (elementTypeId <= Type::kIdU32) - instId = Inst::kIdVmovdqa32; else - instId = Inst::kIdVmovdqa64; + instId = Inst::kIdVmovdqa32; break; } } diff --git a/src/asmjit/x86/x86emithelper_p.h b/src/asmjit/x86/x86emithelper_p.h index 0fb8abc..dd3b0c9 100644 --- a/src/asmjit/x86/x86emithelper_p.h +++ b/src/asmjit/x86/x86emithelper_p.h @@ -49,10 +49,12 @@ static ASMJIT_INLINE uint32_t vecTypeIdToRegType(uint32_t typeId) noexcept { class EmitHelper : public BaseEmitHelper { public: bool _avxEnabled; + bool _avx512Enabled; - inline explicit EmitHelper(BaseEmitter* emitter = nullptr, bool avxEnabled = false) noexcept + inline explicit EmitHelper(BaseEmitter* emitter = nullptr, bool avxEnabled = false, bool avx512Enabled = false) noexcept : BaseEmitHelper(emitter), - _avxEnabled(avxEnabled) {} + _avxEnabled(avxEnabled || avx512Enabled), + _avx512Enabled(avx512Enabled) {} Error emitRegMove( const Operand_& dst_, diff --git a/src/asmjit/x86/x86instdb.cpp b/src/asmjit/x86/x86instdb.cpp index 4391f3b..49c4738 100644 --- a/src/asmjit/x86/x86instdb.cpp +++ b/src/asmjit/x86/x86instdb.cpp @@ -2055,427 +2055,429 @@ const uint32_t InstDB::_altOpcodeTable[] = { // ${InstCommonTable:Begin} // ------------------- Automatically generated, do not edit ------------------- #define F(VAL) InstDB::kFlag##VAL +#define X(VAL) InstDB::kAvx512Flag##VAL #define CONTROL(VAL) Inst::kControl##VAL #define SINGLE_REG(VAL) InstDB::kSingleReg##VAL const InstDB::CommonInfo InstDB::_commonInfoTable[] = { - { 0 , 0 , 0 , CONTROL(None) , SINGLE_REG(None), 0 }, // #0 [ref=1x] - { 0 , 376, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #1 [ref=4x] - { 0 , 377, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #2 [ref=2x] - { F(Lock)|F(XAcquire)|F(XRelease) , 16 , 12, CONTROL(None) , SINGLE_REG(None), 0 }, // #3 [ref=2x] - { 0 , 180, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #4 [ref=2x] - { F(Vec) , 79 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #5 [ref=54x] - { F(Vec) , 106, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #6 [ref=19x] - { F(Vec) , 257, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #7 [ref=16x] - { F(Vec) , 215, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #8 [ref=20x] - { F(Lock)|F(XAcquire)|F(XRelease) , 28 , 11, CONTROL(None) , SINGLE_REG(RO) , 0 }, // #9 [ref=1x] - { F(Vex) , 272, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #10 [ref=3x] - { F(Vec) , 79 , 1 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #11 [ref=12x] - { 0 , 378, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #12 [ref=1x] - { F(Vex) , 274, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #13 [ref=5x] - { F(Vex) , 180, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #14 [ref=12x] - { F(Vec) , 379, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #15 [ref=4x] - { 0 , 276, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #16 [ref=3x] - { F(Mib) , 380, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #17 [ref=1x] - { 0 , 381, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #18 [ref=1x] - { 0 , 278, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #19 [ref=1x] - { F(Mib) , 382, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #20 [ref=1x] - { 0 , 280, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #21 [ref=1x] - { 0 , 179, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #22 [ref=35x] - { 0 , 383, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #23 [ref=3x] - { 0 , 123, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #24 [ref=1x] - { F(Lock)|F(XAcquire)|F(XRelease) , 123, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #25 [ref=3x] - { F(Rep)|F(RepIgnored) , 282, 2 , CONTROL(Call) , SINGLE_REG(None), 0 }, // #26 [ref=1x] - { 0 , 384, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #27 [ref=1x] - { 0 , 385, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #28 [ref=2x] - { 0 , 359, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #29 [ref=1x] - { 0 , 108, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #30 [ref=83x] - { 0 , 386, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #31 [ref=24x] - { 0 , 387, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #32 [ref=6x] - { 0 , 388, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #33 [ref=13x] - { 0 , 389, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #34 [ref=1x] - { 0 , 16 , 12, CONTROL(None) , SINGLE_REG(None), 0 }, // #35 [ref=1x] - { F(Rep) , 127, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #36 [ref=1x] - { F(Vec) , 390, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #37 [ref=2x] - { F(Vec) , 391, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #38 [ref=3x] - { F(Lock)|F(XAcquire)|F(XRelease) , 131, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #39 [ref=1x] - { F(Lock)|F(XAcquire)|F(XRelease) , 392, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #40 [ref=1x] - { F(Lock)|F(XAcquire)|F(XRelease) , 393, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #41 [ref=1x] - { 0 , 394, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #42 [ref=1x] - { 0 , 395, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #43 [ref=1x] - { 0 , 284, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #44 [ref=1x] - { F(Mmx)|F(Vec) , 396, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #45 [ref=2x] - { F(Mmx)|F(Vec) , 397, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #46 [ref=2x] - { F(Mmx)|F(Vec) , 398, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #47 [ref=2x] - { F(Vec) , 399, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #48 [ref=2x] - { F(Vec) , 400, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #49 [ref=2x] - { F(Vec) , 401, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #50 [ref=2x] - { 0 , 402, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #51 [ref=1x] - { 0 , 403, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #52 [ref=2x] - { F(Lock)|F(XAcquire)|F(XRelease) , 286, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #53 [ref=2x] - { 0 , 39 , 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #54 [ref=3x] - { F(Mmx) , 108, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #55 [ref=1x] - { 0 , 288, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #56 [ref=2x] - { 0 , 404, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #57 [ref=1x] - { F(Vec) , 405, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #58 [ref=2x] - { F(Vec) , 290, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #59 [ref=1x] - { F(FpuM32)|F(FpuM64) , 182, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #60 [ref=6x] - { 0 , 292, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #61 [ref=9x] - { F(FpuM80) , 406, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #62 [ref=2x] - { 0 , 293, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #63 [ref=13x] - { F(FpuM32)|F(FpuM64) , 294, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #64 [ref=2x] - { F(FpuM16)|F(FpuM32) , 407, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #65 [ref=9x] - { F(FpuM16)|F(FpuM32)|F(FpuM64) , 408, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #66 [ref=3x] - { F(FpuM32)|F(FpuM64)|F(FpuM80) , 409, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #67 [ref=2x] - { F(FpuM16) , 410, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #68 [ref=3x] - { F(FpuM16) , 411, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #69 [ref=2x] - { F(FpuM32)|F(FpuM64) , 295, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #70 [ref=1x] - { 0 , 412, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #71 [ref=2x] - { 0 , 413, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #72 [ref=1x] - { 0 , 39 , 10, CONTROL(None) , SINGLE_REG(None), 0 }, // #73 [ref=1x] - { 0 , 414, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #74 [ref=1x] - { 0 , 415, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #75 [ref=2x] - { 0 , 343, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #76 [ref=3x] - { F(Rep) , 416, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #77 [ref=1x] - { F(Vec) , 296, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #78 [ref=1x] - { 0 , 417, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #79 [ref=2x] - { 0 , 418, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #80 [ref=8x] - { 0 , 298, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #81 [ref=3x] - { 0 , 300, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #82 [ref=1x] - { 0 , 108, 1 , CONTROL(Return) , SINGLE_REG(None), 0 }, // #83 [ref=2x] - { 0 , 388, 1 , CONTROL(Return) , SINGLE_REG(None), 0 }, // #84 [ref=1x] - { F(Rep)|F(RepIgnored) , 302, 2 , CONTROL(Branch) , SINGLE_REG(None), 0 }, // #85 [ref=30x] - { F(Rep)|F(RepIgnored) , 304, 2 , CONTROL(Branch) , SINGLE_REG(None), 0 }, // #86 [ref=1x] - { F(Rep)|F(RepIgnored) , 306, 2 , CONTROL(Jump) , SINGLE_REG(None), 0 }, // #87 [ref=1x] - { F(Vec)|F(Vex) , 419, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #88 [ref=27x] - { F(Vec)|F(Vex) , 308, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #89 [ref=1x] - { F(Vec)|F(Vex) , 310, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #90 [ref=1x] - { F(Vec)|F(Vex) , 312, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #91 [ref=1x] - { F(Vec)|F(Vex) , 314, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #92 [ref=1x] - { F(Vec)|F(Vex) , 420, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #93 [ref=12x] - { F(Vec)|F(Vex) , 421, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #94 [ref=8x] - { 0 , 422, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #95 [ref=2x] - { 0 , 316, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #96 [ref=1x] - { 0 , 318, 2 , CONTROL(Call) , SINGLE_REG(None), 0 }, // #97 [ref=1x] - { F(Vec) , 224, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #98 [ref=2x] - { 0 , 423, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #99 [ref=2x] - { 0 , 320, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #100 [ref=2x] - { F(Vex) , 424, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #101 [ref=2x] - { 0 , 425, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #102 [ref=1x] - { 0 , 185, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #103 [ref=3x] - { 0 , 318, 2 , CONTROL(Jump) , SINGLE_REG(None), 0 }, // #104 [ref=1x] - { 0 , 426, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #105 [ref=5x] - { F(Vex) , 427, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #106 [ref=2x] - { F(Rep) , 135, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #107 [ref=1x] - { 0 , 304, 2 , CONTROL(Branch) , SINGLE_REG(None), 0 }, // #108 [ref=3x] - { 0 , 322, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #109 [ref=1x] - { F(Vex) , 428, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #110 [ref=2x] - { F(Vec) , 429, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #111 [ref=1x] - { F(Mmx) , 430, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #112 [ref=1x] - { 0 , 431, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #113 [ref=2x] - { F(XRelease) , 0 , 16, CONTROL(None) , SINGLE_REG(None), 0 }, // #114 [ref=1x] - { 0 , 49 , 9 , CONTROL(None) , SINGLE_REG(None), 0 }, // #115 [ref=1x] - { F(Vec) , 79 , 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #116 [ref=6x] - { 0 , 73 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #117 [ref=1x] - { F(Mmx)|F(Vec) , 324, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #118 [ref=1x] - { 0 , 432, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #119 [ref=1x] - { 0 , 77 , 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #120 [ref=2x] - { F(Mmx)|F(Vec) , 433, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #121 [ref=1x] - { F(Vec) , 291, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #122 [ref=2x] - { F(Vec) , 230, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #123 [ref=4x] - { F(Vec) , 434, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #124 [ref=2x] - { F(Vec) , 80 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #125 [ref=3x] - { F(Mmx) , 435, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #126 [ref=1x] - { F(Vec) , 107, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #127 [ref=1x] - { F(Vec) , 233, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #128 [ref=1x] - { F(Mmx)|F(Vec) , 103, 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #129 [ref=1x] - { F(Mmx)|F(Vec) , 436, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #130 [ref=1x] - { F(Rep) , 139, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #131 [ref=1x] - { F(Vec) , 106, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #132 [ref=1x] - { F(Vec) , 326, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #133 [ref=1x] - { 0 , 328, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #134 [ref=2x] - { 0 , 437, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #135 [ref=1x] - { F(Vex) , 330, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #136 [ref=1x] - { 0 , 438, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #137 [ref=1x] - { 0 , 439, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #138 [ref=1x] - { F(Lock)|F(XAcquire)|F(XRelease) , 287, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #139 [ref=2x] - { 0 , 108, 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #140 [ref=1x] - { F(Lock)|F(XAcquire)|F(XRelease) , 16 , 12, CONTROL(None) , SINGLE_REG(RO) , 0 }, // #141 [ref=1x] - { 0 , 440, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #142 [ref=1x] - { F(Rep) , 441, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #143 [ref=1x] - { F(Mmx)|F(Vec) , 332, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #144 [ref=37x] - { F(Mmx)|F(Vec) , 334, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #145 [ref=1x] - { F(Mmx)|F(Vec) , 332, 2 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #146 [ref=6x] - { F(Mmx)|F(Vec) , 332, 2 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #147 [ref=16x] - { F(Mmx) , 332, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #148 [ref=26x] - { F(Vec) , 79 , 1 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #149 [ref=4x] - { F(Vec) , 442, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #150 [ref=1x] - { F(Vec) , 443, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #151 [ref=1x] - { F(Vec) , 444, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #152 [ref=1x] - { F(Vec) , 445, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #153 [ref=1x] - { F(Vec) , 446, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #154 [ref=1x] - { F(Vec) , 447, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #155 [ref=1x] - { F(Mmx)|F(Vec) , 336, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #156 [ref=1x] - { F(Vec) , 448, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #157 [ref=1x] - { F(Vec) , 449, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #158 [ref=1x] - { F(Vec) , 450, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #159 [ref=1x] - { F(Mmx)|F(Vec) , 451, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #160 [ref=1x] - { F(Mmx)|F(Vec) , 452, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #161 [ref=1x] - { F(Vec) , 260, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #162 [ref=2x] - { 0 , 143, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #163 [ref=1x] - { F(Mmx) , 334, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #164 [ref=1x] - { F(Mmx)|F(Vec) , 338, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #165 [ref=8x] - { F(Vec) , 453, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #166 [ref=2x] - { 0 , 454, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #167 [ref=1x] - { F(Mmx)|F(Vec) , 340, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #168 [ref=3x] - { 0 , 147, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #169 [ref=1x] - { 0 , 455, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #170 [ref=8x] - { 0 , 456, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #171 [ref=4x] - { 0 , 457, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #172 [ref=8x] - { 0 , 342, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #173 [ref=1x] - { F(Rep)|F(RepIgnored) , 344, 2 , CONTROL(Return) , SINGLE_REG(None), 0 }, // #174 [ref=1x] - { 0 , 344, 2 , CONTROL(Return) , SINGLE_REG(None), 0 }, // #175 [ref=1x] - { F(Vex) , 346, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #176 [ref=1x] - { F(Lock)|F(XAcquire)|F(XRelease) , 16 , 12, CONTROL(None) , SINGLE_REG(WO) , 0 }, // #177 [ref=3x] - { F(Rep) , 151, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #178 [ref=1x] - { 0 , 458, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #179 [ref=30x] - { 0 , 188, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #180 [ref=2x] - { 0 , 459, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #181 [ref=3x] - { F(Rep) , 155, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #182 [ref=1x] - { F(Vex) , 460, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #183 [ref=5x] - { 0 , 66 , 7 , CONTROL(None) , SINGLE_REG(None), 0 }, // #184 [ref=1x] - { F(Tsib)|F(Vex) , 461, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #185 [ref=2x] - { F(Vex) , 388, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #186 [ref=1x] - { F(Tsib)|F(Vex) , 462, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #187 [ref=1x] - { F(Vex) , 463, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #188 [ref=1x] - { 0 , 464, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #189 [ref=2x] - { 0 , 180, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #190 [ref=2x] - { 0 , 465, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #191 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512T4X)|F(Avx512KZ) , 466, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #192 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512T4X)|F(Avx512KZ) , 467, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #193 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #194 [ref=22x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #195 [ref=22x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE) , 468, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #196 [ref=18x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE) , 469, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #197 [ref=17x] - { F(Vec)|F(Vex) , 191, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #198 [ref=15x] - { F(Vec)|F(Vex)|F(Evex) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #199 [ref=5x] - { F(Vec)|F(Vex) , 79 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #200 [ref=17x] - { F(Vec)|F(Vex) , 215, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #201 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 194, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #202 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 194, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #203 [ref=4x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #204 [ref=10x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #205 [ref=12x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 191, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #206 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 191, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #207 [ref=6x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #208 [ref=19x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #209 [ref=12x] - { F(Vec)|F(Vex) , 194, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #210 [ref=6x] - { F(Vec)|F(Vex) , 348, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #211 [ref=3x] - { F(Vec)|F(Vex) , 470, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #212 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 471, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #213 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 472, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #214 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 473, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #215 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 474, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #216 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 471, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #217 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 475, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #218 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B64) , 197, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #219 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B32) , 197, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #220 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 476, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #221 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 477, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #222 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 106, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #223 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 257, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #224 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 200, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #225 [ref=6x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 203, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #226 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 206, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #227 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 350, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #228 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 350, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #229 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 206, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #230 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 350, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #231 [ref=3x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 203, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #232 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 203, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #233 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 209, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #234 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 203, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #235 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 206, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #236 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512ER_SAE) , 399, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #237 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512ER_SAE) , 399, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #238 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512ER_SAE) , 478, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #239 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 469, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #240 [ref=3x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512ER_SAE) , 401, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #241 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512ER_SAE) , 401, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #242 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B64) , 350, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #243 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 206, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #244 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 350, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #245 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B32) , 206, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #246 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 203, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #247 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 206, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #248 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 399, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #249 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512SAE) , 399, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #250 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 401, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #251 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512SAE) , 401, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #252 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 203, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #253 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512ER_SAE) , 478, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #254 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 194, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #255 [ref=3x] - { F(Vec)|F(Vex) , 194, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #256 [ref=9x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 83 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #257 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 83 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #258 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 206, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #259 [ref=9x] - { F(Vec)|F(Vex) , 210, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #260 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 479, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #261 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 211, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #262 [ref=4x] - { F(Vec)|F(Vex)|F(Evex) , 405, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #263 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 194, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #264 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 194, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #265 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 480, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #266 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 481, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #267 [ref=4x] - { F(Vec)|F(Vex) , 159, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #268 [ref=13x] - { F(Vec)|F(Vex) , 352, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #269 [ref=4x] - { F(Vec)|F(Vex) , 354, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #270 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512K_B64) , 482, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #271 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512K_B32) , 482, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #272 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512K) , 483, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #273 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512K) , 484, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #274 [ref=1x] - { F(Vec)|F(Vex) , 206, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #275 [ref=7x] - { F(Vec)|F(Vex) , 106, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #276 [ref=1x] - { F(Vec)|F(Vex) , 257, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #277 [ref=1x] - { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 163, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #278 [ref=2x] - { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 113, 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #279 [ref=2x] - { F(Vsib)|F(Evex)|F(Avx512K) , 485, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #280 [ref=4x] - { F(Vsib)|F(Evex)|F(Avx512K) , 486, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #281 [ref=4x] - { F(Vsib)|F(Evex)|F(Avx512K) , 487, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #282 [ref=8x] - { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 118, 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #283 [ref=2x] - { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 212, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #284 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 468, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #285 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 469, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #286 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 215, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #287 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 215, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #288 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 194, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #289 [ref=3x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #290 [ref=22x] - { F(Vec)|F(Vex) , 356, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #291 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 356, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #292 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 488, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #293 [ref=4x] - { F(Vec)|F(Vex)|F(Evex) , 481, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #294 [ref=1x] - { F(Vec)|F(Vex) , 224, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #295 [ref=1x] - { F(Vex) , 423, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #296 [ref=2x] - { F(Vec)|F(Vex) , 429, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #297 [ref=1x] - { F(Vec)|F(Vex) , 167, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #298 [ref=4x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B64) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #299 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B32) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #300 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 468, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #301 [ref=2x] - { 0 , 358, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #302 [ref=3x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 79 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #303 [ref=4x] - { F(Vec)|F(Vex)|F(Evex) , 360, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #304 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 218, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #305 [ref=1x] - { F(Vec)|F(Vex) , 79 , 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #306 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 79 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #307 [ref=6x] - { F(Vec)|F(Vex)|F(Evex) , 232, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #308 [ref=2x] - { F(Vec)|F(Vex)|F(Evex) , 362, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #309 [ref=4x] - { F(Vec)|F(Vex) , 489, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #310 [ref=3x] - { F(Vec)|F(Vex)|F(Evex) , 221, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #311 [ref=3x] - { F(Vec)|F(Vex)|F(Evex) , 224, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #312 [ref=1x] - { F(Vec)|F(Vex)|F(Evex) , 227, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #313 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 230, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #314 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 206, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #315 [ref=5x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 233, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #316 [ref=1x] - { 0 , 364, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #317 [ref=1x] - { 0 , 366, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #318 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512B32) , 236, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #319 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512B64) , 236, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #320 [ref=1x] - { F(Vec)|F(Vex) , 191, 2 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #321 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 191, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #322 [ref=2x] - { F(Vec)|F(Vex) , 191, 2 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #323 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 191, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #324 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 191, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #325 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 191, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #326 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #327 [ref=13x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 490, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #328 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 491, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #329 [ref=1x] - { F(Vec)|F(Evex) , 492, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #330 [ref=6x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 239, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #331 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 493, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #332 [ref=1x] - { F(Vec)|F(Vex)|F(Evex) , 194, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #333 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512K) , 242, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #334 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512K_B32) , 242, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #335 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512K) , 245, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #336 [ref=4x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512K_B32) , 245, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #337 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512K_B64) , 245, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #338 [ref=2x] - { F(Vec)|F(Vex) , 442, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #339 [ref=1x] - { F(Vec)|F(Vex) , 443, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #340 [ref=1x] - { F(Vec)|F(Vex) , 444, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #341 [ref=1x] - { F(Vec)|F(Vex) , 445, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #342 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512K_B64) , 242, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #343 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 206, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #344 [ref=6x] - { F(Vec)|F(Vex)|F(Evex)|F(PreferEvex)|F(Avx512KZ_B32) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #345 [ref=4x] - { F(Vec)|F(Vex) , 195, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #346 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 192, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #347 [ref=2x] - { F(Vec)|F(Vex) , 171, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #348 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 85 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #349 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 175, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #350 [ref=2x] - { F(Vec)|F(Vex)|F(Evex) , 446, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #351 [ref=1x] - { F(Vec)|F(Vex)|F(Evex) , 447, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #352 [ref=1x] - { F(Vec)|F(Vex)|F(Evex) , 494, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #353 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 495, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #354 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 496, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #355 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 497, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #356 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 498, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #357 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 206, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #358 [ref=4x] - { F(Vec)|F(Vex) , 348, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #359 [ref=12x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 191, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #360 [ref=8x] - { F(Vec)|F(Evex) , 499, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #361 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 248, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #362 [ref=6x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 251, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #363 [ref=9x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 254, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #364 [ref=3x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 257, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #365 [ref=4x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 260, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #366 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 203, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #367 [ref=6x] - { F(Vec)|F(Vex) , 159, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #368 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 215, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #369 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 215, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #370 [ref=3x] - { F(Vec)|F(Vex) , 368, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #371 [ref=4x] - { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 263, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #372 [ref=2x] - { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 370, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #373 [ref=2x] - { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 372, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #374 [ref=2x] - { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 266, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #375 [ref=2x] - { F(Vec)|F(Vex) , 374, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #376 [ref=8x] - { F(Vec)|F(Evex)|F(Avx512K) , 269, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #377 [ref=5x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 215, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #378 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 215, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #379 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 91 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #380 [ref=3x] - { F(Vec)|F(Vex)|F(Evex) , 215, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #381 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 91 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #382 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 91 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #383 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 97 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #384 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 191, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #385 [ref=6x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 191, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #386 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 191, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #387 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512K_B32) , 269, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #388 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512K_B64) , 269, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #389 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 468, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #390 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 469, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #391 [ref=2x] - { F(Vec)|F(Vex) , 469, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #392 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 480, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #393 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 481, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #394 [ref=1x] - { F(Vec)|F(Vex) , 215, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #395 [ref=2x] - { F(Vec)|F(Vex) , 480, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #396 [ref=1x] - { F(Vec)|F(Vex) , 481, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #397 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #398 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #399 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE) , 468, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #400 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE) , 469, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #401 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 195, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #402 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 195, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #403 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 194, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #404 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 194, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #405 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 206, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #406 [ref=1x] - { F(Vec)|F(Vex) , 108, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #407 [ref=2x] - { 0 , 23 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #408 [ref=2x] - { 0 , 61 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #409 [ref=2x] - { F(Lock)|F(XAcquire)|F(XRelease) , 58 , 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #410 [ref=1x] - { 0 , 500, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #411 [ref=1x] - { F(Lock)|F(XAcquire) , 58 , 8 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #412 [ref=1x] - { 0 , 501, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #413 [ref=6x] - { 0 , 502, 1 , CONTROL(None) , SINGLE_REG(None), 0 } // #414 [ref=6x] + { 0 , 0 , 0 , 0 , CONTROL(None) , SINGLE_REG(None)}, // #0 [ref=1x] + { 0 , 0 , 376, 1 , CONTROL(None) , SINGLE_REG(None)}, // #1 [ref=4x] + { 0 , 0 , 377, 1 , CONTROL(None) , SINGLE_REG(None)}, // #2 [ref=2x] + { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 16 , 12, CONTROL(None) , SINGLE_REG(None)}, // #3 [ref=2x] + { 0 , 0 , 180, 2 , CONTROL(None) , SINGLE_REG(None)}, // #4 [ref=2x] + { F(Vec) , 0 , 79 , 1 , CONTROL(None) , SINGLE_REG(None)}, // #5 [ref=54x] + { F(Vec) , 0 , 106, 1 , CONTROL(None) , SINGLE_REG(None)}, // #6 [ref=19x] + { F(Vec) , 0 , 257, 1 , CONTROL(None) , SINGLE_REG(None)}, // #7 [ref=16x] + { F(Vec) , 0 , 215, 1 , CONTROL(None) , SINGLE_REG(None)}, // #8 [ref=20x] + { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 28 , 11, CONTROL(None) , SINGLE_REG(RO) }, // #9 [ref=1x] + { F(Vex) , 0 , 272, 2 , CONTROL(None) , SINGLE_REG(None)}, // #10 [ref=3x] + { F(Vec) , 0 , 79 , 1 , CONTROL(None) , SINGLE_REG(RO) }, // #11 [ref=12x] + { 0 , 0 , 378, 1 , CONTROL(None) , SINGLE_REG(None)}, // #12 [ref=1x] + { F(Vex) , 0 , 274, 2 , CONTROL(None) , SINGLE_REG(None)}, // #13 [ref=5x] + { F(Vex) , 0 , 180, 2 , CONTROL(None) , SINGLE_REG(None)}, // #14 [ref=12x] + { F(Vec) , 0 , 379, 1 , CONTROL(None) , SINGLE_REG(None)}, // #15 [ref=4x] + { 0 , 0 , 276, 2 , CONTROL(None) , SINGLE_REG(None)}, // #16 [ref=3x] + { F(Mib) , 0 , 380, 1 , CONTROL(None) , SINGLE_REG(None)}, // #17 [ref=1x] + { 0 , 0 , 381, 1 , CONTROL(None) , SINGLE_REG(None)}, // #18 [ref=1x] + { 0 , 0 , 278, 2 , CONTROL(None) , SINGLE_REG(None)}, // #19 [ref=1x] + { F(Mib) , 0 , 382, 1 , CONTROL(None) , SINGLE_REG(None)}, // #20 [ref=1x] + { 0 , 0 , 280, 2 , CONTROL(None) , SINGLE_REG(None)}, // #21 [ref=1x] + { 0 , 0 , 179, 3 , CONTROL(None) , SINGLE_REG(None)}, // #22 [ref=35x] + { 0 , 0 , 383, 1 , CONTROL(None) , SINGLE_REG(None)}, // #23 [ref=3x] + { 0 , 0 , 123, 4 , CONTROL(None) , SINGLE_REG(None)}, // #24 [ref=1x] + { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 123, 4 , CONTROL(None) , SINGLE_REG(None)}, // #25 [ref=3x] + { F(Rep)|F(RepIgnored) , 0 , 282, 2 , CONTROL(Call) , SINGLE_REG(None)}, // #26 [ref=1x] + { 0 , 0 , 384, 1 , CONTROL(None) , SINGLE_REG(None)}, // #27 [ref=1x] + { 0 , 0 , 385, 1 , CONTROL(None) , SINGLE_REG(None)}, // #28 [ref=2x] + { 0 , 0 , 359, 1 , CONTROL(None) , SINGLE_REG(None)}, // #29 [ref=1x] + { 0 , 0 , 108, 1 , CONTROL(None) , SINGLE_REG(None)}, // #30 [ref=83x] + { 0 , 0 , 386, 1 , CONTROL(None) , SINGLE_REG(None)}, // #31 [ref=24x] + { 0 , 0 , 387, 1 , CONTROL(None) , SINGLE_REG(None)}, // #32 [ref=6x] + { 0 , 0 , 388, 1 , CONTROL(None) , SINGLE_REG(None)}, // #33 [ref=13x] + { 0 , 0 , 389, 1 , CONTROL(None) , SINGLE_REG(None)}, // #34 [ref=1x] + { 0 , 0 , 16 , 12, CONTROL(None) , SINGLE_REG(None)}, // #35 [ref=1x] + { F(Rep) , 0 , 127, 4 , CONTROL(None) , SINGLE_REG(None)}, // #36 [ref=1x] + { F(Vec) , 0 , 390, 1 , CONTROL(None) , SINGLE_REG(None)}, // #37 [ref=2x] + { F(Vec) , 0 , 391, 1 , CONTROL(None) , SINGLE_REG(None)}, // #38 [ref=3x] + { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 131, 4 , CONTROL(None) , SINGLE_REG(None)}, // #39 [ref=1x] + { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 392, 1 , CONTROL(None) , SINGLE_REG(None)}, // #40 [ref=1x] + { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 393, 1 , CONTROL(None) , SINGLE_REG(None)}, // #41 [ref=1x] + { 0 , 0 , 394, 1 , CONTROL(None) , SINGLE_REG(None)}, // #42 [ref=1x] + { 0 , 0 , 395, 1 , CONTROL(None) , SINGLE_REG(None)}, // #43 [ref=1x] + { 0 , 0 , 284, 2 , CONTROL(None) , SINGLE_REG(None)}, // #44 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 396, 1 , CONTROL(None) , SINGLE_REG(None)}, // #45 [ref=2x] + { F(Mmx)|F(Vec) , 0 , 397, 1 , CONTROL(None) , SINGLE_REG(None)}, // #46 [ref=2x] + { F(Mmx)|F(Vec) , 0 , 398, 1 , CONTROL(None) , SINGLE_REG(None)}, // #47 [ref=2x] + { F(Vec) , 0 , 399, 1 , CONTROL(None) , SINGLE_REG(None)}, // #48 [ref=2x] + { F(Vec) , 0 , 400, 1 , CONTROL(None) , SINGLE_REG(None)}, // #49 [ref=2x] + { F(Vec) , 0 , 401, 1 , CONTROL(None) , SINGLE_REG(None)}, // #50 [ref=2x] + { 0 , 0 , 402, 1 , CONTROL(None) , SINGLE_REG(None)}, // #51 [ref=1x] + { 0 , 0 , 403, 1 , CONTROL(None) , SINGLE_REG(None)}, // #52 [ref=2x] + { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 286, 2 , CONTROL(None) , SINGLE_REG(None)}, // #53 [ref=2x] + { 0 , 0 , 39 , 4 , CONTROL(None) , SINGLE_REG(None)}, // #54 [ref=3x] + { F(Mmx) , 0 , 108, 1 , CONTROL(None) , SINGLE_REG(None)}, // #55 [ref=1x] + { 0 , 0 , 288, 2 , CONTROL(None) , SINGLE_REG(None)}, // #56 [ref=2x] + { 0 , 0 , 404, 1 , CONTROL(None) , SINGLE_REG(None)}, // #57 [ref=1x] + { F(Vec) , 0 , 405, 1 , CONTROL(None) , SINGLE_REG(None)}, // #58 [ref=2x] + { F(Vec) , 0 , 290, 2 , CONTROL(None) , SINGLE_REG(None)}, // #59 [ref=1x] + { F(FpuM32)|F(FpuM64) , 0 , 182, 3 , CONTROL(None) , SINGLE_REG(None)}, // #60 [ref=6x] + { 0 , 0 , 292, 2 , CONTROL(None) , SINGLE_REG(None)}, // #61 [ref=9x] + { F(FpuM80) , 0 , 406, 1 , CONTROL(None) , SINGLE_REG(None)}, // #62 [ref=2x] + { 0 , 0 , 293, 1 , CONTROL(None) , SINGLE_REG(None)}, // #63 [ref=13x] + { F(FpuM32)|F(FpuM64) , 0 , 294, 2 , CONTROL(None) , SINGLE_REG(None)}, // #64 [ref=2x] + { F(FpuM16)|F(FpuM32) , 0 , 407, 1 , CONTROL(None) , SINGLE_REG(None)}, // #65 [ref=9x] + { F(FpuM16)|F(FpuM32)|F(FpuM64) , 0 , 408, 1 , CONTROL(None) , SINGLE_REG(None)}, // #66 [ref=3x] + { F(FpuM32)|F(FpuM64)|F(FpuM80) , 0 , 409, 1 , CONTROL(None) , SINGLE_REG(None)}, // #67 [ref=2x] + { F(FpuM16) , 0 , 410, 1 , CONTROL(None) , SINGLE_REG(None)}, // #68 [ref=3x] + { F(FpuM16) , 0 , 411, 1 , CONTROL(None) , SINGLE_REG(None)}, // #69 [ref=2x] + { F(FpuM32)|F(FpuM64) , 0 , 295, 1 , CONTROL(None) , SINGLE_REG(None)}, // #70 [ref=1x] + { 0 , 0 , 412, 1 , CONTROL(None) , SINGLE_REG(None)}, // #71 [ref=2x] + { 0 , 0 , 413, 1 , CONTROL(None) , SINGLE_REG(None)}, // #72 [ref=1x] + { 0 , 0 , 39 , 10, CONTROL(None) , SINGLE_REG(None)}, // #73 [ref=1x] + { 0 , 0 , 414, 1 , CONTROL(None) , SINGLE_REG(None)}, // #74 [ref=1x] + { 0 , 0 , 415, 1 , CONTROL(None) , SINGLE_REG(None)}, // #75 [ref=2x] + { 0 , 0 , 343, 1 , CONTROL(None) , SINGLE_REG(None)}, // #76 [ref=3x] + { F(Rep) , 0 , 416, 1 , CONTROL(None) , SINGLE_REG(None)}, // #77 [ref=1x] + { F(Vec) , 0 , 296, 2 , CONTROL(None) , SINGLE_REG(None)}, // #78 [ref=1x] + { 0 , 0 , 417, 1 , CONTROL(None) , SINGLE_REG(None)}, // #79 [ref=2x] + { 0 , 0 , 418, 1 , CONTROL(None) , SINGLE_REG(None)}, // #80 [ref=8x] + { 0 , 0 , 298, 2 , CONTROL(None) , SINGLE_REG(None)}, // #81 [ref=3x] + { 0 , 0 , 300, 2 , CONTROL(None) , SINGLE_REG(None)}, // #82 [ref=1x] + { 0 , 0 , 108, 1 , CONTROL(Return) , SINGLE_REG(None)}, // #83 [ref=2x] + { 0 , 0 , 388, 1 , CONTROL(Return) , SINGLE_REG(None)}, // #84 [ref=1x] + { F(Rep)|F(RepIgnored) , 0 , 302, 2 , CONTROL(Branch) , SINGLE_REG(None)}, // #85 [ref=30x] + { F(Rep)|F(RepIgnored) , 0 , 304, 2 , CONTROL(Branch) , SINGLE_REG(None)}, // #86 [ref=1x] + { F(Rep)|F(RepIgnored) , 0 , 306, 2 , CONTROL(Jump) , SINGLE_REG(None)}, // #87 [ref=1x] + { F(Vex) , 0 , 419, 1 , CONTROL(None) , SINGLE_REG(None)}, // #88 [ref=27x] + { F(Vex) , 0 , 308, 2 , CONTROL(None) , SINGLE_REG(None)}, // #89 [ref=1x] + { F(Vex) , 0 , 310, 2 , CONTROL(None) , SINGLE_REG(None)}, // #90 [ref=1x] + { F(Vex) , 0 , 312, 2 , CONTROL(None) , SINGLE_REG(None)}, // #91 [ref=1x] + { F(Vex) , 0 , 314, 2 , CONTROL(None) , SINGLE_REG(None)}, // #92 [ref=1x] + { F(Vex) , 0 , 420, 1 , CONTROL(None) , SINGLE_REG(None)}, // #93 [ref=12x] + { F(Vex) , 0 , 421, 1 , CONTROL(None) , SINGLE_REG(None)}, // #94 [ref=8x] + { 0 , 0 , 422, 1 , CONTROL(None) , SINGLE_REG(None)}, // #95 [ref=2x] + { 0 , 0 , 316, 2 , CONTROL(None) , SINGLE_REG(None)}, // #96 [ref=1x] + { 0 , 0 , 318, 2 , CONTROL(Call) , SINGLE_REG(None)}, // #97 [ref=1x] + { F(Vec) , 0 , 224, 1 , CONTROL(None) , SINGLE_REG(None)}, // #98 [ref=2x] + { 0 , 0 , 423, 1 , CONTROL(None) , SINGLE_REG(None)}, // #99 [ref=2x] + { 0 , 0 , 320, 2 , CONTROL(None) , SINGLE_REG(None)}, // #100 [ref=2x] + { F(Vex) , 0 , 424, 1 , CONTROL(None) , SINGLE_REG(None)}, // #101 [ref=2x] + { 0 , 0 , 425, 1 , CONTROL(None) , SINGLE_REG(None)}, // #102 [ref=1x] + { 0 , 0 , 185, 3 , CONTROL(None) , SINGLE_REG(None)}, // #103 [ref=3x] + { 0 , 0 , 318, 2 , CONTROL(Jump) , SINGLE_REG(None)}, // #104 [ref=1x] + { 0 , 0 , 426, 1 , CONTROL(None) , SINGLE_REG(None)}, // #105 [ref=5x] + { F(Vex) , 0 , 427, 1 , CONTROL(None) , SINGLE_REG(None)}, // #106 [ref=2x] + { F(Rep) , 0 , 135, 4 , CONTROL(None) , SINGLE_REG(None)}, // #107 [ref=1x] + { 0 , 0 , 304, 2 , CONTROL(Branch) , SINGLE_REG(None)}, // #108 [ref=3x] + { 0 , 0 , 322, 2 , CONTROL(None) , SINGLE_REG(None)}, // #109 [ref=1x] + { F(Vex) , 0 , 428, 1 , CONTROL(None) , SINGLE_REG(None)}, // #110 [ref=2x] + { F(Vec) , 0 , 429, 1 , CONTROL(None) , SINGLE_REG(None)}, // #111 [ref=1x] + { F(Mmx) , 0 , 430, 1 , CONTROL(None) , SINGLE_REG(None)}, // #112 [ref=1x] + { 0 , 0 , 431, 1 , CONTROL(None) , SINGLE_REG(None)}, // #113 [ref=2x] + { F(XRelease) , 0 , 0 , 16, CONTROL(None) , SINGLE_REG(None)}, // #114 [ref=1x] + { 0 , 0 , 49 , 9 , CONTROL(None) , SINGLE_REG(None)}, // #115 [ref=1x] + { F(Vec) , 0 , 79 , 2 , CONTROL(None) , SINGLE_REG(None)}, // #116 [ref=6x] + { 0 , 0 , 73 , 6 , CONTROL(None) , SINGLE_REG(None)}, // #117 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 324, 2 , CONTROL(None) , SINGLE_REG(None)}, // #118 [ref=1x] + { 0 , 0 , 432, 1 , CONTROL(None) , SINGLE_REG(None)}, // #119 [ref=1x] + { 0 , 0 , 77 , 2 , CONTROL(None) , SINGLE_REG(None)}, // #120 [ref=2x] + { F(Mmx)|F(Vec) , 0 , 433, 1 , CONTROL(None) , SINGLE_REG(None)}, // #121 [ref=1x] + { F(Vec) , 0 , 291, 1 , CONTROL(None) , SINGLE_REG(None)}, // #122 [ref=2x] + { F(Vec) , 0 , 230, 2 , CONTROL(None) , SINGLE_REG(None)}, // #123 [ref=4x] + { F(Vec) , 0 , 434, 1 , CONTROL(None) , SINGLE_REG(None)}, // #124 [ref=2x] + { F(Vec) , 0 , 80 , 1 , CONTROL(None) , SINGLE_REG(None)}, // #125 [ref=3x] + { F(Mmx) , 0 , 435, 1 , CONTROL(None) , SINGLE_REG(None)}, // #126 [ref=1x] + { F(Vec) , 0 , 107, 1 , CONTROL(None) , SINGLE_REG(None)}, // #127 [ref=1x] + { F(Vec) , 0 , 233, 1 , CONTROL(None) , SINGLE_REG(None)}, // #128 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 103, 5 , CONTROL(None) , SINGLE_REG(None)}, // #129 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 436, 1 , CONTROL(None) , SINGLE_REG(None)}, // #130 [ref=1x] + { F(Rep) , 0 , 139, 4 , CONTROL(None) , SINGLE_REG(None)}, // #131 [ref=1x] + { F(Vec) , 0 , 106, 2 , CONTROL(None) , SINGLE_REG(None)}, // #132 [ref=1x] + { F(Vec) , 0 , 326, 2 , CONTROL(None) , SINGLE_REG(None)}, // #133 [ref=1x] + { 0 , 0 , 328, 2 , CONTROL(None) , SINGLE_REG(None)}, // #134 [ref=2x] + { 0 , 0 , 437, 1 , CONTROL(None) , SINGLE_REG(None)}, // #135 [ref=1x] + { F(Vex) , 0 , 330, 2 , CONTROL(None) , SINGLE_REG(None)}, // #136 [ref=1x] + { 0 , 0 , 438, 1 , CONTROL(None) , SINGLE_REG(None)}, // #137 [ref=1x] + { 0 , 0 , 439, 1 , CONTROL(None) , SINGLE_REG(None)}, // #138 [ref=1x] + { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 287, 1 , CONTROL(None) , SINGLE_REG(None)}, // #139 [ref=2x] + { 0 , 0 , 108, 5 , CONTROL(None) , SINGLE_REG(None)}, // #140 [ref=1x] + { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 16 , 12, CONTROL(None) , SINGLE_REG(RO) }, // #141 [ref=1x] + { 0 , 0 , 440, 1 , CONTROL(None) , SINGLE_REG(None)}, // #142 [ref=1x] + { F(Rep) , 0 , 441, 1 , CONTROL(None) , SINGLE_REG(None)}, // #143 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 332, 2 , CONTROL(None) , SINGLE_REG(None)}, // #144 [ref=37x] + { F(Mmx)|F(Vec) , 0 , 334, 2 , CONTROL(None) , SINGLE_REG(None)}, // #145 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 332, 2 , CONTROL(None) , SINGLE_REG(RO) }, // #146 [ref=6x] + { F(Mmx)|F(Vec) , 0 , 332, 2 , CONTROL(None) , SINGLE_REG(WO) }, // #147 [ref=16x] + { F(Mmx) , 0 , 332, 1 , CONTROL(None) , SINGLE_REG(None)}, // #148 [ref=26x] + { F(Vec) , 0 , 79 , 1 , CONTROL(None) , SINGLE_REG(WO) }, // #149 [ref=4x] + { F(Vec) , 0 , 442, 1 , CONTROL(None) , SINGLE_REG(None)}, // #150 [ref=1x] + { F(Vec) , 0 , 443, 1 , CONTROL(None) , SINGLE_REG(None)}, // #151 [ref=1x] + { F(Vec) , 0 , 444, 1 , CONTROL(None) , SINGLE_REG(None)}, // #152 [ref=1x] + { F(Vec) , 0 , 445, 1 , CONTROL(None) , SINGLE_REG(None)}, // #153 [ref=1x] + { F(Vec) , 0 , 446, 1 , CONTROL(None) , SINGLE_REG(None)}, // #154 [ref=1x] + { F(Vec) , 0 , 447, 1 , CONTROL(None) , SINGLE_REG(None)}, // #155 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 336, 2 , CONTROL(None) , SINGLE_REG(None)}, // #156 [ref=1x] + { F(Vec) , 0 , 448, 1 , CONTROL(None) , SINGLE_REG(None)}, // #157 [ref=1x] + { F(Vec) , 0 , 449, 1 , CONTROL(None) , SINGLE_REG(None)}, // #158 [ref=1x] + { F(Vec) , 0 , 450, 1 , CONTROL(None) , SINGLE_REG(None)}, // #159 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 451, 1 , CONTROL(None) , SINGLE_REG(None)}, // #160 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 452, 1 , CONTROL(None) , SINGLE_REG(None)}, // #161 [ref=1x] + { F(Vec) , 0 , 260, 1 , CONTROL(None) , SINGLE_REG(None)}, // #162 [ref=2x] + { 0 , 0 , 143, 4 , CONTROL(None) , SINGLE_REG(None)}, // #163 [ref=1x] + { F(Mmx) , 0 , 334, 1 , CONTROL(None) , SINGLE_REG(None)}, // #164 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 338, 2 , CONTROL(None) , SINGLE_REG(None)}, // #165 [ref=8x] + { F(Vec) , 0 , 453, 1 , CONTROL(None) , SINGLE_REG(None)}, // #166 [ref=2x] + { 0 , 0 , 454, 1 , CONTROL(None) , SINGLE_REG(None)}, // #167 [ref=1x] + { F(Mmx)|F(Vec) , 0 , 340, 2 , CONTROL(None) , SINGLE_REG(None)}, // #168 [ref=3x] + { 0 , 0 , 147, 4 , CONTROL(None) , SINGLE_REG(None)}, // #169 [ref=1x] + { 0 , 0 , 455, 1 , CONTROL(None) , SINGLE_REG(None)}, // #170 [ref=8x] + { 0 , 0 , 456, 1 , CONTROL(None) , SINGLE_REG(None)}, // #171 [ref=4x] + { 0 , 0 , 457, 1 , CONTROL(None) , SINGLE_REG(None)}, // #172 [ref=8x] + { 0 , 0 , 342, 2 , CONTROL(None) , SINGLE_REG(None)}, // #173 [ref=1x] + { F(Rep)|F(RepIgnored) , 0 , 344, 2 , CONTROL(Return) , SINGLE_REG(None)}, // #174 [ref=1x] + { 0 , 0 , 344, 2 , CONTROL(Return) , SINGLE_REG(None)}, // #175 [ref=1x] + { F(Vex) , 0 , 346, 2 , CONTROL(None) , SINGLE_REG(None)}, // #176 [ref=1x] + { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 16 , 12, CONTROL(None) , SINGLE_REG(WO) }, // #177 [ref=3x] + { F(Rep) , 0 , 151, 4 , CONTROL(None) , SINGLE_REG(None)}, // #178 [ref=1x] + { 0 , 0 , 458, 1 , CONTROL(None) , SINGLE_REG(None)}, // #179 [ref=30x] + { 0 , 0 , 188, 3 , CONTROL(None) , SINGLE_REG(None)}, // #180 [ref=2x] + { 0 , 0 , 459, 1 , CONTROL(None) , SINGLE_REG(None)}, // #181 [ref=3x] + { F(Rep) , 0 , 155, 4 , CONTROL(None) , SINGLE_REG(None)}, // #182 [ref=1x] + { F(Vex) , 0 , 460, 1 , CONTROL(None) , SINGLE_REG(None)}, // #183 [ref=5x] + { 0 , 0 , 66 , 7 , CONTROL(None) , SINGLE_REG(None)}, // #184 [ref=1x] + { F(Tsib)|F(Vex) , 0 , 461, 1 , CONTROL(None) , SINGLE_REG(None)}, // #185 [ref=2x] + { F(Vex) , 0 , 388, 1 , CONTROL(None) , SINGLE_REG(None)}, // #186 [ref=1x] + { F(Tsib)|F(Vex) , 0 , 462, 1 , CONTROL(None) , SINGLE_REG(None)}, // #187 [ref=1x] + { F(Vex) , 0 , 463, 1 , CONTROL(None) , SINGLE_REG(None)}, // #188 [ref=1x] + { 0 , 0 , 464, 1 , CONTROL(None) , SINGLE_REG(None)}, // #189 [ref=2x] + { 0 , 0 , 180, 1 , CONTROL(None) , SINGLE_REG(None)}, // #190 [ref=2x] + { 0 , 0 , 465, 1 , CONTROL(None) , SINGLE_REG(None)}, // #191 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(T4X) , 466, 1 , CONTROL(None) , SINGLE_REG(None)}, // #192 [ref=4x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(T4X) , 467, 1 , CONTROL(None) , SINGLE_REG(None)}, // #193 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B64)|X(ER)|X(SAE) , 191, 3 , CONTROL(None) , SINGLE_REG(None)}, // #194 [ref=22x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B32)|X(ER)|X(SAE) , 191, 3 , CONTROL(None) , SINGLE_REG(None)}, // #195 [ref=22x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(ER)|X(SAE) , 468, 1 , CONTROL(None) , SINGLE_REG(None)}, // #196 [ref=18x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(ER)|X(SAE) , 469, 1 , CONTROL(None) , SINGLE_REG(None)}, // #197 [ref=17x] + { F(Vec)|F(Vex) , 0 , 191, 2 , CONTROL(None) , SINGLE_REG(None)}, // #198 [ref=15x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , 0 , 191, 3 , CONTROL(None) , SINGLE_REG(None)}, // #199 [ref=5x] + { F(Vec)|F(Vex) , 0 , 79 , 1 , CONTROL(None) , SINGLE_REG(None)}, // #200 [ref=17x] + { F(Vec)|F(Vex) , 0 , 215, 1 , CONTROL(None) , SINGLE_REG(None)}, // #201 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32) , 194, 3 , CONTROL(None) , SINGLE_REG(None)}, // #202 [ref=4x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64) , 194, 3 , CONTROL(None) , SINGLE_REG(None)}, // #203 [ref=4x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B64) , 191, 3 , CONTROL(None) , SINGLE_REG(None)}, // #204 [ref=10x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B32) , 191, 3 , CONTROL(None) , SINGLE_REG(None)}, // #205 [ref=12x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B64) , 191, 3 , CONTROL(None) , SINGLE_REG(RO) }, // #206 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B32) , 191, 3 , CONTROL(None) , SINGLE_REG(RO) }, // #207 [ref=6x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64) , 191, 3 , CONTROL(None) , SINGLE_REG(None)}, // #208 [ref=19x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32) , 191, 3 , CONTROL(None) , SINGLE_REG(None)}, // #209 [ref=12x] + { F(Vec)|F(Vex) , 0 , 194, 2 , CONTROL(None) , SINGLE_REG(None)}, // #210 [ref=6x] + { F(Vec)|F(Vex) , 0 , 348, 2 , CONTROL(None) , SINGLE_REG(None)}, // #211 [ref=3x] + { F(Vec)|F(Vex)|F(EvexTransformable) , 0 , 470, 1 , CONTROL(None) , SINGLE_REG(None)}, // #212 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 471, 1 , CONTROL(None) , SINGLE_REG(None)}, // #213 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 472, 1 , CONTROL(None) , SINGLE_REG(None)}, // #214 [ref=4x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 473, 1 , CONTROL(None) , SINGLE_REG(None)}, // #215 [ref=4x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 474, 1 , CONTROL(None) , SINGLE_REG(None)}, // #216 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 471, 1 , CONTROL(None) , SINGLE_REG(None)}, // #217 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 475, 1 , CONTROL(None) , SINGLE_REG(None)}, // #218 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexKReg) , X(K)|X(Z)|X(B64)|X(SAE) , 197, 3 , CONTROL(None) , SINGLE_REG(None)}, // #219 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexKReg) , X(K)|X(Z)|X(B32)|X(SAE) , 197, 3 , CONTROL(None) , SINGLE_REG(None)}, // #220 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexKReg) , X(K)|X(Z)|X(SAE) , 476, 1 , CONTROL(None) , SINGLE_REG(None)}, // #221 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexKReg) , X(K)|X(Z)|X(SAE) , 477, 1 , CONTROL(None) , SINGLE_REG(None)}, // #222 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(SAE) , 106, 1 , CONTROL(None) , SINGLE_REG(None)}, // #223 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(SAE) , 257, 1 , CONTROL(None) , SINGLE_REG(None)}, // #224 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 200, 3 , CONTROL(None) , SINGLE_REG(None)}, // #225 [ref=6x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B32) , 203, 3 , CONTROL(None) , SINGLE_REG(None)}, // #226 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B32)|X(ER)|X(SAE) , 206, 3 , CONTROL(None) , SINGLE_REG(None)}, // #227 [ref=3x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32) , 350, 2 , CONTROL(None) , SINGLE_REG(None)}, // #228 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B64)|X(ER)|X(SAE) , 350, 2 , CONTROL(None) , SINGLE_REG(None)}, // #229 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64)|X(ER)|X(SAE) , 206, 3 , CONTROL(None) , SINGLE_REG(None)}, // #230 [ref=4x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64)|X(ER)|X(SAE) , 350, 2 , CONTROL(None) , SINGLE_REG(None)}, // #231 [ref=3x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(SAE) , 203, 3 , CONTROL(None) , SINGLE_REG(None)}, // #232 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B32)|X(ER)|X(SAE) , 203, 3 , CONTROL(None) , SINGLE_REG(None)}, // #233 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(SAE) , 209, 3 , CONTROL(None) , SINGLE_REG(None)}, // #234 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32)|X(ER)|X(SAE) , 203, 3 , CONTROL(None) , SINGLE_REG(None)}, // #235 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32)|X(ER)|X(SAE) , 206, 3 , CONTROL(None) , SINGLE_REG(None)}, // #236 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(ER)|X(SAE) , 399, 1 , CONTROL(None) , SINGLE_REG(None)}, // #237 [ref=1x] + { F(Vec)|F(Evex) , X(ER)|X(SAE) , 399, 1 , CONTROL(None) , SINGLE_REG(None)}, // #238 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(ER)|X(SAE) , 478, 1 , CONTROL(None) , SINGLE_REG(None)}, // #239 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(SAE) , 469, 1 , CONTROL(None) , SINGLE_REG(None)}, // #240 [ref=3x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(ER)|X(SAE) , 401, 1 , CONTROL(None) , SINGLE_REG(None)}, // #241 [ref=1x] + { F(Vec)|F(Evex) , X(ER)|X(SAE) , 401, 1 , CONTROL(None) , SINGLE_REG(None)}, // #242 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B64)|X(SAE) , 350, 2 , CONTROL(None) , SINGLE_REG(None)}, // #243 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64)|X(SAE) , 206, 3 , CONTROL(None) , SINGLE_REG(None)}, // #244 [ref=3x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64)|X(SAE) , 350, 2 , CONTROL(None) , SINGLE_REG(None)}, // #245 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B32)|X(SAE) , 206, 3 , CONTROL(None) , SINGLE_REG(None)}, // #246 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32)|X(SAE) , 203, 3 , CONTROL(None) , SINGLE_REG(None)}, // #247 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32)|X(SAE) , 206, 3 , CONTROL(None) , SINGLE_REG(None)}, // #248 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(SAE) , 399, 1 , CONTROL(None) , SINGLE_REG(None)}, // #249 [ref=1x] + { F(Vec)|F(Evex) , X(SAE) , 399, 1 , CONTROL(None) , SINGLE_REG(None)}, // #250 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(SAE) , 401, 1 , CONTROL(None) , SINGLE_REG(None)}, // #251 [ref=1x] + { F(Vec)|F(Evex) , X(SAE) , 401, 1 , CONTROL(None) , SINGLE_REG(None)}, // #252 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32) , 203, 3 , CONTROL(None) , SINGLE_REG(None)}, // #253 [ref=1x] + { F(Vec)|F(Evex) , X(ER)|X(SAE) , 478, 1 , CONTROL(None) , SINGLE_REG(None)}, // #254 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 194, 3 , CONTROL(None) , SINGLE_REG(None)}, // #255 [ref=3x] + { F(Vec)|F(Vex) , 0 , 194, 1 , CONTROL(None) , SINGLE_REG(None)}, // #256 [ref=9x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(SAE)|X(B64) , 83 , 1 , CONTROL(None) , SINGLE_REG(None)}, // #257 [ref=3x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(SAE)|X(B32) , 83 , 1 , CONTROL(None) , SINGLE_REG(None)}, // #258 [ref=3x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 206, 3 , CONTROL(None) , SINGLE_REG(None)}, // #259 [ref=9x] + { F(Vec)|F(Vex)|F(EvexTransformable) , 0 , 210, 1 , CONTROL(None) , SINGLE_REG(None)}, // #260 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 479, 1 , CONTROL(None) , SINGLE_REG(None)}, // #261 [ref=4x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 211, 1 , CONTROL(None) , SINGLE_REG(None)}, // #262 [ref=4x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , 0 , 405, 1 , CONTROL(None) , SINGLE_REG(None)}, // #263 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64)|X(SAE) , 194, 3 , CONTROL(None) , SINGLE_REG(None)}, // #264 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32)|X(SAE) , 194, 3 , CONTROL(None) , SINGLE_REG(None)}, // #265 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(SAE) , 480, 1 , CONTROL(None) , SINGLE_REG(None)}, // #266 [ref=4x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(SAE) , 481, 1 , CONTROL(None) , SINGLE_REG(None)}, // #267 [ref=4x] + { F(Vec)|F(Vex) , 0 , 159, 4 , CONTROL(None) , SINGLE_REG(None)}, // #268 [ref=13x] + { F(Vec)|F(Vex) , 0 , 352, 2 , CONTROL(None) , SINGLE_REG(None)}, // #269 [ref=4x] + { F(Vec)|F(Vex) , 0 , 354, 2 , CONTROL(None) , SINGLE_REG(None)}, // #270 [ref=4x] + { F(Vec)|F(Evex) , X(K)|X(B64) , 482, 1 , CONTROL(None) , SINGLE_REG(None)}, // #271 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(B32) , 482, 1 , CONTROL(None) , SINGLE_REG(None)}, // #272 [ref=1x] + { F(Vec)|F(Evex) , X(K) , 483, 1 , CONTROL(None) , SINGLE_REG(None)}, // #273 [ref=1x] + { F(Vec)|F(Evex) , X(K) , 484, 1 , CONTROL(None) , SINGLE_REG(None)}, // #274 [ref=1x] + { F(Vec)|F(Vex) , 0 , 206, 2 , CONTROL(None) , SINGLE_REG(None)}, // #275 [ref=7x] + { F(Vec)|F(Vex) , 0 , 106, 1 , CONTROL(None) , SINGLE_REG(None)}, // #276 [ref=1x] + { F(Vec)|F(Vex) , 0 , 257, 1 , CONTROL(None) , SINGLE_REG(None)}, // #277 [ref=1x] + { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(EvexTwoOp) , X(K) , 163, 4 , CONTROL(None) , SINGLE_REG(None)}, // #278 [ref=2x] + { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(EvexTwoOp) , X(K) , 113, 5 , CONTROL(None) , SINGLE_REG(None)}, // #279 [ref=2x] + { F(Vsib)|F(Evex) , X(K) , 485, 1 , CONTROL(None) , SINGLE_REG(None)}, // #280 [ref=4x] + { F(Vsib)|F(Evex) , X(K) , 486, 1 , CONTROL(None) , SINGLE_REG(None)}, // #281 [ref=4x] + { F(Vsib)|F(Evex) , X(K) , 487, 1 , CONTROL(None) , SINGLE_REG(None)}, // #282 [ref=8x] + { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(EvexTwoOp) , X(K) , 118, 5 , CONTROL(None) , SINGLE_REG(None)}, // #283 [ref=2x] + { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(EvexTwoOp) , X(K) , 212, 3 , CONTROL(None) , SINGLE_REG(None)}, // #284 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(SAE) , 468, 1 , CONTROL(None) , SINGLE_REG(None)}, // #285 [ref=3x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(SAE) , 469, 1 , CONTROL(None) , SINGLE_REG(None)}, // #286 [ref=3x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64)|X(SAE) , 215, 3 , CONTROL(None) , SINGLE_REG(None)}, // #287 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32)|X(SAE) , 215, 3 , CONTROL(None) , SINGLE_REG(None)}, // #288 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 194, 3 , CONTROL(None) , SINGLE_REG(None)}, // #289 [ref=3x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 191, 3 , CONTROL(None) , SINGLE_REG(None)}, // #290 [ref=22x] + { F(Vec)|F(Vex)|F(EvexTransformable) , 0 , 356, 1 , CONTROL(None) , SINGLE_REG(None)}, // #291 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 356, 2 , CONTROL(None) , SINGLE_REG(None)}, // #292 [ref=4x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 488, 1 , CONTROL(None) , SINGLE_REG(None)}, // #293 [ref=4x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , 0 , 481, 1 , CONTROL(None) , SINGLE_REG(None)}, // #294 [ref=1x] + { F(Vec)|F(Vex) , 0 , 224, 2 , CONTROL(None) , SINGLE_REG(None)}, // #295 [ref=1x] + { F(Vex) , 0 , 423, 1 , CONTROL(None) , SINGLE_REG(None)}, // #296 [ref=2x] + { F(Vec)|F(Vex) , 0 , 429, 1 , CONTROL(None) , SINGLE_REG(None)}, // #297 [ref=1x] + { F(Vec)|F(Vex) , 0 , 167, 4 , CONTROL(None) , SINGLE_REG(None)}, // #298 [ref=4x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B64)|X(SAE) , 191, 3 , CONTROL(None) , SINGLE_REG(None)}, // #299 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B32)|X(SAE) , 191, 3 , CONTROL(None) , SINGLE_REG(None)}, // #300 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(SAE) , 468, 1 , CONTROL(None) , SINGLE_REG(None)}, // #301 [ref=2x] + { 0 , 0 , 358, 2 , CONTROL(None) , SINGLE_REG(None)}, // #302 [ref=3x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 79 , 6 , CONTROL(None) , SINGLE_REG(None)}, // #303 [ref=4x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , 0 , 360, 2 , CONTROL(None) , SINGLE_REG(None)}, // #304 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 218, 3 , CONTROL(None) , SINGLE_REG(None)}, // #305 [ref=1x] + { F(Vec)|F(Vex)|F(EvexTransformable) , 0 , 79 , 4 , CONTROL(None) , SINGLE_REG(None)}, // #306 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 79 , 6 , CONTROL(None) , SINGLE_REG(None)}, // #307 [ref=6x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , 0 , 232, 1 , CONTROL(None) , SINGLE_REG(None)}, // #308 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , 0 , 362, 2 , CONTROL(None) , SINGLE_REG(None)}, // #309 [ref=4x] + { F(Vec)|F(Vex) , 0 , 489, 1 , CONTROL(None) , SINGLE_REG(None)}, // #310 [ref=3x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , 0 , 221, 3 , CONTROL(None) , SINGLE_REG(None)}, // #311 [ref=3x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , 0 , 224, 3 , CONTROL(None) , SINGLE_REG(None)}, // #312 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , 0 , 227, 3 , CONTROL(None) , SINGLE_REG(None)}, // #313 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 230, 3 , CONTROL(None) , SINGLE_REG(None)}, // #314 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 206, 3 , CONTROL(None) , SINGLE_REG(None)}, // #315 [ref=5x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 233, 3 , CONTROL(None) , SINGLE_REG(None)}, // #316 [ref=1x] + { 0 , 0 , 364, 2 , CONTROL(None) , SINGLE_REG(None)}, // #317 [ref=1x] + { 0 , 0 , 366, 2 , CONTROL(None) , SINGLE_REG(None)}, // #318 [ref=1x] + { F(Vec)|F(Evex) , X(B32) , 236, 3 , CONTROL(None) , SINGLE_REG(None)}, // #319 [ref=1x] + { F(Vec)|F(Evex) , X(B64) , 236, 3 , CONTROL(None) , SINGLE_REG(None)}, // #320 [ref=1x] + { F(Vec)|F(Vex)|F(EvexTransformable) , 0 , 191, 2 , CONTROL(None) , SINGLE_REG(RO) }, // #321 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32) , 191, 3 , CONTROL(None) , SINGLE_REG(RO) }, // #322 [ref=2x] + { F(Vec)|F(Vex)|F(EvexTransformable) , 0 , 191, 2 , CONTROL(None) , SINGLE_REG(WO) }, // #323 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32) , 191, 3 , CONTROL(None) , SINGLE_REG(WO) }, // #324 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64) , 191, 3 , CONTROL(None) , SINGLE_REG(WO) }, // #325 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64) , 191, 3 , CONTROL(None) , SINGLE_REG(RO) }, // #326 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 191, 3 , CONTROL(None) , SINGLE_REG(None)}, // #327 [ref=13x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 490, 1 , CONTROL(None) , SINGLE_REG(None)}, // #328 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 491, 1 , CONTROL(None) , SINGLE_REG(None)}, // #329 [ref=1x] + { F(Vec)|F(Evex) , 0 , 492, 1 , CONTROL(None) , SINGLE_REG(None)}, // #330 [ref=6x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 239, 3 , CONTROL(None) , SINGLE_REG(None)}, // #331 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 493, 1 , CONTROL(None) , SINGLE_REG(None)}, // #332 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , 0 , 194, 3 , CONTROL(None) , SINGLE_REG(None)}, // #333 [ref=1x] + { F(Vec)|F(Evex) , X(K) , 242, 3 , CONTROL(None) , SINGLE_REG(WO) }, // #334 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(B32) , 242, 3 , CONTROL(None) , SINGLE_REG(WO) }, // #335 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexKReg) , X(K) , 245, 3 , CONTROL(None) , SINGLE_REG(WO) }, // #336 [ref=4x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexKReg) , X(K)|X(B32) , 245, 3 , CONTROL(None) , SINGLE_REG(WO) }, // #337 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexKReg) , X(K)|X(B64) , 245, 3 , CONTROL(None) , SINGLE_REG(WO) }, // #338 [ref=2x] + { F(Vec)|F(Vex) , 0 , 442, 1 , CONTROL(None) , SINGLE_REG(None)}, // #339 [ref=1x] + { F(Vec)|F(Vex) , 0 , 443, 1 , CONTROL(None) , SINGLE_REG(None)}, // #340 [ref=1x] + { F(Vec)|F(Vex) , 0 , 444, 1 , CONTROL(None) , SINGLE_REG(None)}, // #341 [ref=1x] + { F(Vec)|F(Vex) , 0 , 445, 1 , CONTROL(None) , SINGLE_REG(None)}, // #342 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(B64) , 242, 3 , CONTROL(None) , SINGLE_REG(WO) }, // #343 [ref=4x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32) , 206, 3 , CONTROL(None) , SINGLE_REG(None)}, // #344 [ref=6x] + { F(Vec)|F(Vex)|F(Evex)|F(PreferEvex)|F(EvexCompat) , X(K)|X(Z)|X(B32) , 191, 3 , CONTROL(None) , SINGLE_REG(None)}, // #345 [ref=4x] + { F(Vec)|F(Vex) , 0 , 195, 1 , CONTROL(None) , SINGLE_REG(None)}, // #346 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B32) , 192, 2 , CONTROL(None) , SINGLE_REG(None)}, // #347 [ref=2x] + { F(Vec)|F(Vex) , 0 , 171, 4 , CONTROL(None) , SINGLE_REG(None)}, // #348 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B64) , 85 , 6 , CONTROL(None) , SINGLE_REG(None)}, // #349 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B64) , 175, 4 , CONTROL(None) , SINGLE_REG(None)}, // #350 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , 0 , 446, 1 , CONTROL(None) , SINGLE_REG(None)}, // #351 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , 0 , 447, 1 , CONTROL(None) , SINGLE_REG(None)}, // #352 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , 0 , 494, 1 , CONTROL(None) , SINGLE_REG(None)}, // #353 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 495, 1 , CONTROL(None) , SINGLE_REG(None)}, // #354 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 496, 1 , CONTROL(None) , SINGLE_REG(None)}, // #355 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 497, 1 , CONTROL(None) , SINGLE_REG(None)}, // #356 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 498, 1 , CONTROL(None) , SINGLE_REG(None)}, // #357 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64) , 206, 3 , CONTROL(None) , SINGLE_REG(None)}, // #358 [ref=4x] + { F(Vec)|F(Vex) , 0 , 348, 1 , CONTROL(None) , SINGLE_REG(None)}, // #359 [ref=12x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 191, 3 , CONTROL(None) , SINGLE_REG(RO) }, // #360 [ref=8x] + { F(Vec)|F(Evex) , 0 , 499, 1 , CONTROL(None) , SINGLE_REG(None)}, // #361 [ref=4x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 248, 3 , CONTROL(None) , SINGLE_REG(None)}, // #362 [ref=6x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 251, 3 , CONTROL(None) , SINGLE_REG(None)}, // #363 [ref=9x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 254, 3 , CONTROL(None) , SINGLE_REG(None)}, // #364 [ref=3x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 257, 3 , CONTROL(None) , SINGLE_REG(None)}, // #365 [ref=4x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 260, 3 , CONTROL(None) , SINGLE_REG(None)}, // #366 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 203, 3 , CONTROL(None) , SINGLE_REG(None)}, // #367 [ref=6x] + { F(Vec)|F(Vex) , 0 , 159, 2 , CONTROL(None) , SINGLE_REG(None)}, // #368 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32) , 215, 3 , CONTROL(None) , SINGLE_REG(None)}, // #369 [ref=3x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64) , 215, 3 , CONTROL(None) , SINGLE_REG(None)}, // #370 [ref=3x] + { F(Vec)|F(Vex) , 0 , 368, 2 , CONTROL(None) , SINGLE_REG(None)}, // #371 [ref=4x] + { F(Vec)|F(Vsib)|F(Evex) , X(K) , 263, 3 , CONTROL(None) , SINGLE_REG(None)}, // #372 [ref=2x] + { F(Vec)|F(Vsib)|F(Evex) , X(K) , 370, 2 , CONTROL(None) , SINGLE_REG(None)}, // #373 [ref=2x] + { F(Vec)|F(Vsib)|F(Evex) , X(K) , 372, 2 , CONTROL(None) , SINGLE_REG(None)}, // #374 [ref=2x] + { F(Vec)|F(Vsib)|F(Evex) , X(K) , 266, 3 , CONTROL(None) , SINGLE_REG(None)}, // #375 [ref=2x] + { F(Vec)|F(Vex) , 0 , 374, 2 , CONTROL(None) , SINGLE_REG(None)}, // #376 [ref=8x] + { F(Vec)|F(Evex) , X(K) , 269, 3 , CONTROL(None) , SINGLE_REG(None)}, // #377 [ref=5x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B32) , 215, 3 , CONTROL(None) , SINGLE_REG(None)}, // #378 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 215, 3 , CONTROL(None) , SINGLE_REG(None)}, // #379 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B32) , 91 , 6 , CONTROL(None) , SINGLE_REG(None)}, // #380 [ref=3x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , 0 , 215, 3 , CONTROL(None) , SINGLE_REG(None)}, // #381 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B64) , 91 , 6 , CONTROL(None) , SINGLE_REG(None)}, // #382 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 91 , 6 , CONTROL(None) , SINGLE_REG(None)}, // #383 [ref=3x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64) , 97 , 6 , CONTROL(None) , SINGLE_REG(None)}, // #384 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z) , 191, 3 , CONTROL(None) , SINGLE_REG(WO) }, // #385 [ref=6x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B32) , 191, 3 , CONTROL(None) , SINGLE_REG(WO) }, // #386 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B64) , 191, 3 , CONTROL(None) , SINGLE_REG(WO) }, // #387 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(B32) , 269, 3 , CONTROL(None) , SINGLE_REG(None)}, // #388 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(B64) , 269, 3 , CONTROL(None) , SINGLE_REG(None)}, // #389 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 468, 1 , CONTROL(None) , SINGLE_REG(None)}, // #390 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 469, 1 , CONTROL(None) , SINGLE_REG(None)}, // #391 [ref=2x] + { F(Vec)|F(Vex) , 0 , 469, 1 , CONTROL(None) , SINGLE_REG(None)}, // #392 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 480, 1 , CONTROL(None) , SINGLE_REG(None)}, // #393 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z) , 481, 1 , CONTROL(None) , SINGLE_REG(None)}, // #394 [ref=1x] + { F(Vec)|F(Vex) , 0 , 215, 2 , CONTROL(None) , SINGLE_REG(None)}, // #395 [ref=2x] + { F(Vec)|F(Vex) , 0 , 480, 1 , CONTROL(None) , SINGLE_REG(None)}, // #396 [ref=1x] + { F(Vec)|F(Vex) , 0 , 481, 1 , CONTROL(None) , SINGLE_REG(None)}, // #397 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64)|X(ER)|X(SAE) , 191, 3 , CONTROL(None) , SINGLE_REG(None)}, // #398 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32)|X(ER)|X(SAE) , 191, 3 , CONTROL(None) , SINGLE_REG(None)}, // #399 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(ER)|X(SAE) , 468, 1 , CONTROL(None) , SINGLE_REG(None)}, // #400 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(ER)|X(SAE) , 469, 1 , CONTROL(None) , SINGLE_REG(None)}, // #401 [ref=1x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B32) , 195, 2 , CONTROL(None) , SINGLE_REG(None)}, // #402 [ref=2x] + { F(Vec)|F(Evex) , X(K)|X(Z)|X(B64) , 195, 2 , CONTROL(None) , SINGLE_REG(None)}, // #403 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B32) , 194, 3 , CONTROL(None) , SINGLE_REG(None)}, // #404 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B64) , 194, 3 , CONTROL(None) , SINGLE_REG(None)}, // #405 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(EvexCompat) , X(K)|X(Z)|X(B64)|X(ER)|X(SAE) , 206, 3 , CONTROL(None) , SINGLE_REG(None)}, // #406 [ref=1x] + { F(Vec)|F(Vex) , 0 , 108, 1 , CONTROL(None) , SINGLE_REG(None)}, // #407 [ref=2x] + { 0 , 0 , 23 , 1 , CONTROL(None) , SINGLE_REG(None)}, // #408 [ref=2x] + { 0 , 0 , 61 , 1 , CONTROL(None) , SINGLE_REG(None)}, // #409 [ref=2x] + { F(Lock)|F(XAcquire)|F(XRelease) , 0 , 58 , 4 , CONTROL(None) , SINGLE_REG(None)}, // #410 [ref=1x] + { 0 , 0 , 500, 1 , CONTROL(None) , SINGLE_REG(None)}, // #411 [ref=1x] + { F(Lock)|F(XAcquire) , 0 , 58 , 8 , CONTROL(None) , SINGLE_REG(RO) }, // #412 [ref=1x] + { 0 , 0 , 501, 1 , CONTROL(None) , SINGLE_REG(None)}, // #413 [ref=6x] + { 0 , 0 , 502, 1 , CONTROL(None) , SINGLE_REG(None)} // #414 [ref=6x] }; #undef SINGLE_REG #undef CONTROL +#undef X #undef F // ---------------------------------------------------------------------------- // ${InstCommonTable:End} diff --git a/src/asmjit/x86/x86instdb.h b/src/asmjit/x86/x86instdb.h index 6de7527..4c86c9f 100644 --- a/src/asmjit/x86/x86instdb.h +++ b/src/asmjit/x86/x86instdb.h @@ -40,10 +40,14 @@ namespace InstDB { //! Describes which mode is supported by an instruction or instruction signature. enum Mode : uint32_t { - kModeNone = 0x00u, //!< Invalid. - kModeX86 = 0x01u, //!< X86 mode supported. - kModeX64 = 0x02u, //!< X64 mode supported. - kModeAny = 0x03u //!< Both X86 and X64 modes supported. + //! Invalid mode. + kModeNone = 0x00u, + //! X86 mode supported. + kModeX86 = 0x01u, + //! X64 mode supported. + kModeX64 = 0x02u, + //! Both X86 and X64 modes supported. + kModeAny = 0x03u }; static constexpr uint32_t modeFromArch(uint32_t arch) noexcept { @@ -57,45 +61,45 @@ static constexpr uint32_t modeFromArch(uint32_t arch) noexcept { //! Operand flags (X86). enum OpFlags : uint32_t { - kOpNone = 0x00000000u, //!< No flags. - - kOpGpbLo = 0x00000001u, //!< Operand can be low 8-bit GPB register. - kOpGpbHi = 0x00000002u, //!< Operand can be high 8-bit GPB register. - kOpGpw = 0x00000004u, //!< Operand can be 16-bit GPW register. - kOpGpd = 0x00000008u, //!< Operand can be 32-bit GPD register. - kOpGpq = 0x00000010u, //!< Operand can be 64-bit GPQ register. - kOpXmm = 0x00000020u, //!< Operand can be 128-bit XMM register. - kOpYmm = 0x00000040u, //!< Operand can be 256-bit YMM register. - kOpZmm = 0x00000080u, //!< Operand can be 512-bit ZMM register. - kOpMm = 0x00000100u, //!< Operand can be 64-bit MM register. - kOpKReg = 0x00000200u, //!< Operand can be 64-bit K register. - kOpSReg = 0x00000400u, //!< Operand can be SReg (segment register). - kOpCReg = 0x00000800u, //!< Operand can be CReg (control register). - kOpDReg = 0x00001000u, //!< Operand can be DReg (debug register). - kOpSt = 0x00002000u, //!< Operand can be 80-bit ST register (X87). - kOpBnd = 0x00004000u, //!< Operand can be 128-bit BND register. - kOpTmm = 0x00008000u, //!< Operand can be 0..8192-bit TMM register. - kOpAllRegs = 0x0000FFFFu, //!< Combination of all possible registers. - - kOpI4 = 0x00010000u, //!< Operand can be unsigned 4-bit immediate. - kOpU4 = 0x00020000u, //!< Operand can be unsigned 4-bit immediate. - kOpI8 = 0x00040000u, //!< Operand can be signed 8-bit immediate. - kOpU8 = 0x00080000u, //!< Operand can be unsigned 8-bit immediate. - kOpI16 = 0x00100000u, //!< Operand can be signed 16-bit immediate. - kOpU16 = 0x00200000u, //!< Operand can be unsigned 16-bit immediate. - kOpI32 = 0x00400000u, //!< Operand can be signed 32-bit immediate. - kOpU32 = 0x00800000u, //!< Operand can be unsigned 32-bit immediate. - kOpI64 = 0x01000000u, //!< Operand can be signed 64-bit immediate. - kOpU64 = 0x02000000u, //!< Operand can be unsigned 64-bit immediate. - kOpAllImm = 0x03FF0000u, //!< Operand can be any immediate. - - kOpMem = 0x04000000u, //!< Operand can be a scalar memory pointer. - kOpVm = 0x08000000u, //!< Operand can be a vector memory pointer. - - kOpRel8 = 0x10000000u, //!< Operand can be relative 8-bit displacement. - kOpRel32 = 0x20000000u, //!< Operand can be relative 32-bit displacement. - - kOpImplicit = 0x80000000u //!< Operand is implicit. + kOpNone = 0x00000000u, //!< No flags. + + kOpGpbLo = 0x00000001u, //!< Operand can be low 8-bit GPB register. + kOpGpbHi = 0x00000002u, //!< Operand can be high 8-bit GPB register. + kOpGpw = 0x00000004u, //!< Operand can be 16-bit GPW register. + kOpGpd = 0x00000008u, //!< Operand can be 32-bit GPD register. + kOpGpq = 0x00000010u, //!< Operand can be 64-bit GPQ register. + kOpXmm = 0x00000020u, //!< Operand can be 128-bit XMM register. + kOpYmm = 0x00000040u, //!< Operand can be 256-bit YMM register. + kOpZmm = 0x00000080u, //!< Operand can be 512-bit ZMM register. + kOpMm = 0x00000100u, //!< Operand can be 64-bit MM register. + kOpKReg = 0x00000200u, //!< Operand can be 64-bit K register. + kOpSReg = 0x00000400u, //!< Operand can be SReg (segment register). + kOpCReg = 0x00000800u, //!< Operand can be CReg (control register). + kOpDReg = 0x00001000u, //!< Operand can be DReg (debug register). + kOpSt = 0x00002000u, //!< Operand can be 80-bit ST register (X87). + kOpBnd = 0x00004000u, //!< Operand can be 128-bit BND register. + kOpTmm = 0x00008000u, //!< Operand can be 0..8192-bit TMM register. + kOpAllRegs = 0x0000FFFFu, //!< Combination of all possible registers. + + kOpI4 = 0x00010000u, //!< Operand can be unsigned 4-bit immediate. + kOpU4 = 0x00020000u, //!< Operand can be unsigned 4-bit immediate. + kOpI8 = 0x00040000u, //!< Operand can be signed 8-bit immediate. + kOpU8 = 0x00080000u, //!< Operand can be unsigned 8-bit immediate. + kOpI16 = 0x00100000u, //!< Operand can be signed 16-bit immediate. + kOpU16 = 0x00200000u, //!< Operand can be unsigned 16-bit immediate. + kOpI32 = 0x00400000u, //!< Operand can be signed 32-bit immediate. + kOpU32 = 0x00800000u, //!< Operand can be unsigned 32-bit immediate. + kOpI64 = 0x01000000u, //!< Operand can be signed 64-bit immediate. + kOpU64 = 0x02000000u, //!< Operand can be unsigned 64-bit immediate. + kOpAllImm = 0x03FF0000u, //!< Operand can be any immediate. + + kOpMem = 0x04000000u, //!< Operand can be a scalar memory pointer. + kOpVm = 0x08000000u, //!< Operand can be a vector memory pointer. + + kOpRel8 = 0x10000000u, //!< Operand can be relative 8-bit displacement. + kOpRel32 = 0x20000000u, //!< Operand can be relative 32-bit displacement. + + kOpImplicit = 0x80000000u //!< Operand is implicit. }; // ============================================================================ @@ -107,31 +111,31 @@ enum MemFlags : uint32_t { // NOTE: Instruction uses either scalar or vector memory operands, they never // collide. This allows us to share bits between "M" and "Vm" enums. - kMemOpAny = 0x0001u, //!< Operand can be any scalar memory pointer. - kMemOpM8 = 0x0002u, //!< Operand can be an 8-bit memory pointer. - kMemOpM16 = 0x0004u, //!< Operand can be a 16-bit memory pointer. - kMemOpM32 = 0x0008u, //!< Operand can be a 32-bit memory pointer. - kMemOpM48 = 0x0010u, //!< Operand can be a 48-bit memory pointer (FAR pointers only). - kMemOpM64 = 0x0020u, //!< Operand can be a 64-bit memory pointer. - kMemOpM80 = 0x0040u, //!< Operand can be an 80-bit memory pointer. - kMemOpM128 = 0x0080u, //!< Operand can be a 128-bit memory pointer. - kMemOpM256 = 0x0100u, //!< Operand can be a 256-bit memory pointer. - kMemOpM512 = 0x0200u, //!< Operand can be a 512-bit memory pointer. - kMemOpM1024 = 0x0400u, //!< Operand can be a 1024-bit memory pointer. - - kMemOpVm32x = 0x0002u, //!< Operand can be a vm32x (vector) pointer. - kMemOpVm32y = 0x0004u, //!< Operand can be a vm32y (vector) pointer. - kMemOpVm32z = 0x0008u, //!< Operand can be a vm32z (vector) pointer. - kMemOpVm64x = 0x0020u, //!< Operand can be a vm64x (vector) pointer. - kMemOpVm64y = 0x0040u, //!< Operand can be a vm64y (vector) pointer. - kMemOpVm64z = 0x0080u, //!< Operand can be a vm64z (vector) pointer. - - kMemOpBaseOnly = 0x0800u, //!< Only memory base is allowed (no index, no offset). - kMemOpDs = 0x1000u, //!< Implicit memory operand's DS segment. - kMemOpEs = 0x2000u, //!< Implicit memory operand's ES segment. - - kMemOpMib = 0x4000u, //!< Operand must be MIB (base+index) pointer. - kMemOpTMem = 0x8000u //!< Operand is a sib_mem (ADX memory operand). + kMemOpAny = 0x0001u, //!< Operand can be any scalar memory pointer. + kMemOpM8 = 0x0002u, //!< Operand can be an 8-bit memory pointer. + kMemOpM16 = 0x0004u, //!< Operand can be a 16-bit memory pointer. + kMemOpM32 = 0x0008u, //!< Operand can be a 32-bit memory pointer. + kMemOpM48 = 0x0010u, //!< Operand can be a 48-bit memory pointer (FAR pointers only). + kMemOpM64 = 0x0020u, //!< Operand can be a 64-bit memory pointer. + kMemOpM80 = 0x0040u, //!< Operand can be an 80-bit memory pointer. + kMemOpM128 = 0x0080u, //!< Operand can be a 128-bit memory pointer. + kMemOpM256 = 0x0100u, //!< Operand can be a 256-bit memory pointer. + kMemOpM512 = 0x0200u, //!< Operand can be a 512-bit memory pointer. + kMemOpM1024 = 0x0400u, //!< Operand can be a 1024-bit memory pointer. + + kMemOpVm32x = 0x0002u, //!< Operand can be a vm32x (vector) pointer. + kMemOpVm32y = 0x0004u, //!< Operand can be a vm32y (vector) pointer. + kMemOpVm32z = 0x0008u, //!< Operand can be a vm32z (vector) pointer. + kMemOpVm64x = 0x0020u, //!< Operand can be a vm64x (vector) pointer. + kMemOpVm64y = 0x0040u, //!< Operand can be a vm64y (vector) pointer. + kMemOpVm64z = 0x0080u, //!< Operand can be a vm64z (vector) pointer. + + kMemOpBaseOnly = 0x0800u, //!< Only memory base is allowed (no index, no offset). + kMemOpDs = 0x1000u, //!< Implicit memory operand's DS segment. + kMemOpEs = 0x2000u, //!< Implicit memory operand's ES segment. + + kMemOpMib = 0x4000u, //!< Operand must be MIB (base+index) pointer. + kMemOpTMem = 0x8000u //!< Operand is a sib_mem (ADX memory operand). }; // ============================================================================ @@ -142,76 +146,69 @@ enum MemFlags : uint32_t { //! //! Details about instruction encoding, operation, features, and some limitations. enum Flags : uint32_t { - kFlagNone = 0x00000000u, //!< No flags. + kFlagNone = 0x00000000u, //!< No flags. // Instruction Family // ------------------ // // Instruction family information. - kFlagFpu = 0x00000100u, //!< Instruction that accesses FPU registers. - kFlagMmx = 0x00000200u, //!< Instruction that accesses MMX registers (including 3DNOW and GEODE) and EMMS. - kFlagVec = 0x00000400u, //!< Instruction that accesses XMM registers (SSE, AVX, AVX512). - - // Prefixes and Encoding Flags - // --------------------------- - // - // These describe optional X86 prefixes that can be used to change the instruction's operation. - - kFlagTsib = 0x00000800u, //!< Instruction uses TSIB (or SIB_MEM) encoding (MODRM followed by SIB). - kFlagRep = 0x00001000u, //!< Instruction can be prefixed with using the REP(REPE) or REPNE prefix. - kFlagRepIgnored = 0x00002000u, //!< Instruction ignores REP|REPNE prefixes, but they are accepted. - kFlagLock = 0x00004000u, //!< Instruction can be prefixed with using the LOCK prefix. - kFlagXAcquire = 0x00008000u, //!< Instruction can be prefixed with using the XACQUIRE prefix. - kFlagXRelease = 0x00010000u, //!< Instruction can be prefixed with using the XRELEASE prefix. - kFlagMib = 0x00020000u, //!< Instruction uses MIB (BNDLDX|BNDSTX) to encode two registers. - kFlagVsib = 0x00040000u, //!< Instruction uses VSIB instead of legacy SIB. - kFlagVex = 0x00080000u, //!< Instruction can be encoded by VEX|XOP (AVX|AVX2|BMI|XOP|...). - kFlagEvex = 0x00100000u, //!< Instruction can be encoded by EVEX (AVX512). - kFlagPreferEvex = 0x00200000u, //!< EVEX encoding is preferred over VEX encoding (AVX515_VNNI vs AVX_VNNI). + kFlagFpu = 0x00000100u, //!< Instruction that accesses FPU registers. + kFlagMmx = 0x00000200u, //!< Instruction that accesses MMX registers (including 3DNOW and GEODE) and EMMS. + kFlagVec = 0x00000400u, //!< Instruction that accesses XMM registers (SSE, AVX, AVX512). // FPU Flags // --------- // // Used to tell the encoder which memory operand sizes are encodable. - kFlagFpuM16 = 0x00200000u, //!< FPU instruction can address `word_ptr` (shared with M80). - kFlagFpuM32 = 0x00400000u, //!< FPU instruction can address `dword_ptr`. - kFlagFpuM64 = 0x00800000u, //!< FPU instruction can address `qword_ptr`. - kFlagFpuM80 = 0x00200000u, //!< FPU instruction can address `tword_ptr` (shared with M16). + kFlagFpuM16 = 0x00000800u, //!< FPU instruction can address `word_ptr` (shared with M80). + kFlagFpuM32 = 0x00001000u, //!< FPU instruction can address `dword_ptr`. + kFlagFpuM64 = 0x00002000u, //!< FPU instruction can address `qword_ptr`. + kFlagFpuM80 = 0x00000800u, //!< FPU instruction can address `tword_ptr` (shared with M16). - // AVX and AVX515 Flags - // -------------------- + // Prefixes and Encoding Flags + // --------------------------- // + // These describe optional X86 prefixes that can be used to change the instruction's operation. + + kFlagTsib = 0x00004000u, //!< Instruction uses TSIB (or SIB_MEM) encoding (MODRM followed by SIB). + kFlagRep = 0x00008000u, //!< Instruction can be prefixed with using the REP(REPE) or REPNE prefix. + kFlagRepIgnored = 0x00010000u, //!< Instruction ignores REP|REPNE prefixes, but they are accepted. + kFlagLock = 0x00020000u, //!< Instruction can be prefixed with using the LOCK prefix. + kFlagXAcquire = 0x00040000u, //!< Instruction can be prefixed with using the XACQUIRE prefix. + kFlagXRelease = 0x00080000u, //!< Instruction can be prefixed with using the XRELEASE prefix. + kFlagMib = 0x00100000u, //!< Instruction uses MIB (BNDLDX|BNDSTX) to encode two registers. + kFlagVsib = 0x00200000u, //!< Instruction uses VSIB instead of legacy SIB. + // If both `kFlagPrefixVex` and `kFlagPrefixEvex` flags are specified it // means that the instructions can be encoded by either VEX or EVEX prefix. // In that case AsmJit checks global options and also instruction options // to decide whether to emit VEX or EVEX prefix. - kFlagAvx512_ = 0x00000000u, //!< Internally used in tables, has no meaning. - kFlagAvx512K = 0x01000000u, //!< Supports masking {k1..k7}. - kFlagAvx512Z = 0x02000000u, //!< Supports zeroing {z}, must be used together with `kAvx512k`. - kFlagAvx512ER = 0x04000000u, //!< Supports 'embedded-rounding' {er} with implicit {sae}, - kFlagAvx512SAE = 0x08000000u, //!< Supports 'suppress-all-exceptions' {sae}. - kFlagAvx512B32 = 0x10000000u, //!< Supports 32-bit broadcast 'b32'. - kFlagAvx512B64 = 0x20000000u, //!< Supports 64-bit broadcast 'b64'. - kFlagAvx512T4X = 0x80000000u, //!< Operates on a vector of consecutive registers (AVX512_4FMAPS and AVX512_4VNNIW). - - // Combinations used by instruction tables to make AVX512 definitions more compact. - kFlagAvx512KZ = kFlagAvx512K | kFlagAvx512Z, - kFlagAvx512ER_SAE = kFlagAvx512ER | kFlagAvx512SAE, - kFlagAvx512KZ_SAE = kFlagAvx512KZ | kFlagAvx512SAE, - kFlagAvx512KZ_SAE_B32 = kFlagAvx512KZ_SAE | kFlagAvx512B32, - kFlagAvx512KZ_SAE_B64 = kFlagAvx512KZ_SAE | kFlagAvx512B64, - - kFlagAvx512KZ_ER_SAE = kFlagAvx512KZ | kFlagAvx512ER_SAE, - kFlagAvx512KZ_ER_SAE_B32 = kFlagAvx512KZ_ER_SAE | kFlagAvx512B32, - kFlagAvx512KZ_ER_SAE_B64 = kFlagAvx512KZ_ER_SAE | kFlagAvx512B64, - - kFlagAvx512K_B32 = kFlagAvx512K | kFlagAvx512B32, - kFlagAvx512K_B64 = kFlagAvx512K | kFlagAvx512B64, - kFlagAvx512KZ_B32 = kFlagAvx512KZ | kFlagAvx512B32, - kFlagAvx512KZ_B64 = kFlagAvx512KZ | kFlagAvx512B64 + kFlagVex = 0x00400000u, //!< Instruction can be encoded by VEX|XOP (AVX|AVX2|BMI|XOP|...). + kFlagEvex = 0x00800000u, //!< Instruction can be encoded by EVEX (AVX512). + kFlagPreferEvex = 0x01000000u, //!< EVEX encoding is preferred over VEX encoding (AVX515_VNNI vs AVX_VNNI). + kFlagEvexCompat = 0x02000000u, //!< EVEX and VEX signatures are compatible. + kFlagEvexKReg = 0x04000000u, //!< EVEX instruction requires K register in the first operand (compare instructions). + kFlagEvexTwoOp = 0x08000000u, //!< EVEX instruction requires two operands and K register as a selector (gather instructions). + kFlagEvexTransformable = 0x10000000u //!< VEX instruction that can be transformed to a compatible EVEX instruction. +}; + +// ============================================================================ +// [asmjit::x86::InstDB::Avx512Flags] +// ============================================================================ + +//! AVX512 flags. +enum Avx512Flags : uint32_t { + kAvx512Flag_ = 0x00000000u, //!< Internally used in tables, has no meaning. + kAvx512FlagK = 0x00000001u, //!< Supports masking {k1..k7}. + kAvx512FlagZ = 0x00000002u, //!< Supports zeroing {z}, must be used together with `kAvx512k`. + kAvx512FlagER = 0x00000004u, //!< Supports 'embedded-rounding' {er} with implicit {sae}, + kAvx512FlagSAE = 0x00000008u, //!< Supports 'suppress-all-exceptions' {sae}. + kAvx512FlagB32 = 0x00000010u, //!< Supports 32-bit broadcast 'b32'. + kAvx512FlagB64 = 0x00000020u, //!< Supports 64-bit broadcast 'b64'. + kAvx512FlagT4X = 0x00000080u //!< Operates on a vector of consecutive registers (AVX512_4FMAPS and AVX512_4VNNIW). }; // ============================================================================ @@ -277,6 +274,8 @@ ASMJIT_VARAPI const InstSignature _instSignatureTable[]; struct CommonInfo { //! Instruction flags. uint32_t _flags; + //! Reserved for future use. + uint32_t _avx512Flags : 11; //! First `InstSignature` entry in the database. uint32_t _iSignatureIndex : 11; //! Number of relevant `ISignature` entries. @@ -285,18 +284,21 @@ struct CommonInfo { uint32_t _controlType : 3; //! Specifies what happens if all source operands share the same register. uint32_t _singleRegCase : 2; - //! Reserved for future use. - uint32_t _reserved : 11; // -------------------------------------------------------------------------- // [Accessors] // -------------------------------------------------------------------------- - //! Returns instruction flags, see `InstInfo::Flags`. + //! Returns instruction flags, see \ref Flags. inline uint32_t flags() const noexcept { return _flags; } - //! Tests whether the instruction has a `flag`, see `InstInfo::Flags`. + //! Tests whether the instruction has a `flag`, see \ref Flags. inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; } + //! Returns instruction AVX-512 flags, see \ref Avx512Flags. + inline uint32_t avx512Flags() const noexcept { return _avx512Flags; } + //! Tests whether the instruction has an AVX-512 `flag`, see \ref Avx512Flags. + inline bool hasAvx512Flag(uint32_t flag) const noexcept { return (_avx512Flags & flag) != 0; } + //! Tests whether the instruction is FPU instruction. inline bool isFpu() const noexcept { return hasFlag(kFlagFpu); } //! Tests whether the instruction is MMX/3DNOW instruction that accesses MMX registers (includes EMMS and FEMMS). @@ -335,20 +337,25 @@ struct CommonInfo { //! Tests whether the instruction should prefer EVEX prefix instead of VEX prefix. inline bool preferEvex() const noexcept { return hasFlag(kFlagPreferEvex); } + inline bool isEvexCompatible() const noexcept { return hasFlag(kFlagEvexCompat); } + inline bool isEvexKRegOnly() const noexcept { return hasFlag(kFlagEvexKReg); } + inline bool isEvexTwoOpOnly() const noexcept { return hasFlag(kFlagEvexTwoOp); } + inline bool isEvexTransformable() const noexcept { return hasFlag(kFlagEvexTransformable); } + //! Tests whether the instruction supports AVX512 masking {k}. - inline bool hasAvx512K() const noexcept { return hasFlag(kFlagAvx512K); } + inline bool hasAvx512K() const noexcept { return hasAvx512Flag(kAvx512FlagK); } //! Tests whether the instruction supports AVX512 zeroing {k}{z}. - inline bool hasAvx512Z() const noexcept { return hasFlag(kFlagAvx512Z); } + inline bool hasAvx512Z() const noexcept { return hasAvx512Flag(kAvx512FlagZ); } //! Tests whether the instruction supports AVX512 embedded-rounding {er}. - inline bool hasAvx512ER() const noexcept { return hasFlag(kFlagAvx512ER); } + inline bool hasAvx512ER() const noexcept { return hasAvx512Flag(kAvx512FlagER); } //! Tests whether the instruction supports AVX512 suppress-all-exceptions {sae}. - inline bool hasAvx512SAE() const noexcept { return hasFlag(kFlagAvx512SAE); } + inline bool hasAvx512SAE() const noexcept { return hasAvx512Flag(kAvx512FlagSAE); } //! Tests whether the instruction supports AVX512 broadcast (either 32-bit or 64-bit). - inline bool hasAvx512B() const noexcept { return hasFlag(kFlagAvx512B32 | kFlagAvx512B64); } + inline bool hasAvx512B() const noexcept { return hasAvx512Flag(kAvx512FlagB32 | kAvx512FlagB64); } //! Tests whether the instruction supports AVX512 broadcast (32-bit). - inline bool hasAvx512B32() const noexcept { return hasFlag(kFlagAvx512B32); } + inline bool hasAvx512B32() const noexcept { return hasAvx512Flag(kAvx512FlagB32); } //! Tests whether the instruction supports AVX512 broadcast (64-bit). - inline bool hasAvx512B64() const noexcept { return hasFlag(kFlagAvx512B64); } + inline bool hasAvx512B64() const noexcept { return hasAvx512Flag(kAvx512FlagB64); } inline uint32_t signatureIndex() const noexcept { return _iSignatureIndex; } inline uint32_t signatureCount() const noexcept { return _iSignatureCount; } @@ -394,10 +401,15 @@ struct InstInfo { //! Returns common information, see `CommonInfo`. inline const CommonInfo& commonInfo() const noexcept { return _commonInfoTable[_commonInfoIndex]; } - //! Tests whether the instruction has flag `flag`, see `Flags`. - inline bool hasFlag(uint32_t flag) const noexcept { return commonInfo().hasFlag(flag); } - //! Returns instruction flags, see `Flags`. + //! Returns instruction flags, see \ref Flags. inline uint32_t flags() const noexcept { return commonInfo().flags(); } + //! Tests whether the instruction has flag `flag`, see \ref Flags. + inline bool hasFlag(uint32_t flag) const noexcept { return commonInfo().hasFlag(flag); } + + //! Returns instruction AVX-512 flags, see \ref Avx512Flags. + inline uint32_t avx512Flags() const noexcept { return commonInfo().avx512Flags(); } + //! Tests whether the instruction has an AVX-512 `flag`, see \ref Avx512Flags. + inline bool hasAvx512Flag(uint32_t flag) const noexcept { return commonInfo().hasAvx512Flag(flag); } //! Tests whether the instruction is FPU instruction. inline bool isFpu() const noexcept { return commonInfo().isFpu(); } @@ -432,20 +444,25 @@ struct InstInfo { //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable). inline bool isVexOrEvex() const noexcept { return hasFlag(kFlagVex | kFlagEvex); } + inline bool isEvexCompatible() const noexcept { return hasFlag(kFlagEvexCompat); } + inline bool isEvexKRegOnly() const noexcept { return hasFlag(kFlagEvexKReg); } + inline bool isEvexTwoOpOnly() const noexcept { return hasFlag(kFlagEvexTwoOp); } + inline bool isEvexTransformable() const noexcept { return hasFlag(kFlagEvexTransformable); } + //! Tests whether the instruction supports AVX512 masking {k}. - inline bool hasAvx512K() const noexcept { return hasFlag(kFlagAvx512K); } + inline bool hasAvx512K() const noexcept { return hasAvx512Flag(kAvx512FlagK); } //! Tests whether the instruction supports AVX512 zeroing {k}{z}. - inline bool hasAvx512Z() const noexcept { return hasFlag(kFlagAvx512Z); } + inline bool hasAvx512Z() const noexcept { return hasAvx512Flag(kAvx512FlagZ); } //! Tests whether the instruction supports AVX512 embedded-rounding {er}. - inline bool hasAvx512ER() const noexcept { return hasFlag(kFlagAvx512ER); } + inline bool hasAvx512ER() const noexcept { return hasAvx512Flag(kAvx512FlagER); } //! Tests whether the instruction supports AVX512 suppress-all-exceptions {sae}. - inline bool hasAvx512SAE() const noexcept { return hasFlag(kFlagAvx512SAE); } + inline bool hasAvx512SAE() const noexcept { return hasAvx512Flag(kAvx512FlagSAE); } //! Tests whether the instruction supports AVX512 broadcast (either 32-bit or 64-bit). - inline bool hasAvx512B() const noexcept { return hasFlag(kFlagAvx512B32 | kFlagAvx512B64); } + inline bool hasAvx512B() const noexcept { return hasAvx512Flag(kAvx512FlagB32 | kAvx512FlagB64); } //! Tests whether the instruction supports AVX512 broadcast (32-bit). - inline bool hasAvx512B32() const noexcept { return hasFlag(kFlagAvx512B32); } + inline bool hasAvx512B32() const noexcept { return hasAvx512Flag(kAvx512FlagB32); } //! Tests whether the instruction supports AVX512 broadcast (64-bit). - inline bool hasAvx512B64() const noexcept { return hasFlag(kFlagAvx512B64); } + inline bool hasAvx512B64() const noexcept { return hasAvx512Flag(kAvx512FlagB64); } //! Gets the control-flow type of the instruction. inline uint32_t controlType() const noexcept { return commonInfo().controlType(); } diff --git a/src/asmjit/x86/x86rapass.cpp b/src/asmjit/x86/x86rapass.cpp index eac546f..9e46ce0 100644 --- a/src/asmjit/x86/x86rapass.cpp +++ b/src/asmjit/x86/x86rapass.cpp @@ -140,6 +140,39 @@ Error RACFGBuilder::onInst(InstNode* inst, uint32_t& controlType, RAInstBuilder& uint32_t singleRegOps = 0; if (opCount) { + // The mask is for all registers, but we are mostly interested in AVX-512 + // registers at the moment. The mask will be combined with all available + // registers of the Compiler at the end so we it never use more registers + // than available. + uint32_t instructionAllowedRegs = 0xFFFFFFFFu; + + if (instInfo.isEvex()) { + // EVEX instruction and VEX instructions that can be encoded with EVEX + // have the possibility to use 32 SIMD registers (XMM/YMM/ZMM). + if (instInfo.isVex() && !instInfo.isEvexCompatible()) { + if (instInfo.isEvexKRegOnly()) { + // EVEX encodable only if the first operand is K register (compare instructions). + if (!Reg::isKReg(opArray[0])) + instructionAllowedRegs = 0xFFFFu; + } + else if (instInfo.isEvexTwoOpOnly()) { + // EVEX encodable only if the instruction has two operands (gather instructions). + if (opCount != 2) + instructionAllowedRegs = 0xFFFFu; + } + else { + instructionAllowedRegs = 0xFFFFu; + } + } + } + else if (instInfo.isEvexTransformable()) { + ib.addAggregatedFlags(RAInst::kFlagIsTransformable); + } + else { + // Not EVEX, restrict everything to [0-15] registers. + instructionAllowedRegs = 0xFFFFu; + } + for (uint32_t i = 0; i < opCount; i++) { const Operand& op = opArray[i]; const OpRWInfo& opRwInfo = rwInfo.operand(i); @@ -150,7 +183,7 @@ Error RACFGBuilder::onInst(InstNode* inst, uint32_t& controlType, RAInstBuilder& const Reg& reg = op.as<Reg>(); uint32_t flags = raRegRwFlags(opRwInfo.opFlags()); - uint32_t allowedRegs = 0xFFFFFFFFu; + uint32_t allowedRegs = instructionAllowedRegs; // X86-specific constraints related to LO|HI general purpose registers. // This is only required when the register is part of the encoding. If @@ -281,7 +314,7 @@ Error RACFGBuilder::onInst(InstNode* inst, uint32_t& controlType, RAInstBuilder& uint32_t flags = raMemIndexRwFlags(opRwInfo.opFlags()); uint32_t group = workReg->group(); - uint32_t allocable = _pass->_availableRegs[group]; + uint32_t allocable = _pass->_availableRegs[group] & instructionAllowedRegs; // Index registers have never fixed id on X86/x64. const uint32_t useId = BaseReg::kIdBad; @@ -314,7 +347,7 @@ Error RACFGBuilder::onInst(InstNode* inst, uint32_t& controlType, RAInstBuilder& if (group == Gp::kGroupKReg) { // AVX-512 mask selector {k} register - read-only, allocable to any register except {k0}. - uint32_t allocableRegs= _pass->_availableRegs[group] & ~Support::bitMask(0); + uint32_t allocableRegs = _pass->_availableRegs[group]; ASMJIT_PROPAGATE(ib.add(workReg, RATiedReg::kUse | RATiedReg::kRead, allocableRegs, BaseReg::kIdBad, rewriteMask, BaseReg::kIdBad, 0)); singleRegOps = 0; } @@ -1106,13 +1139,21 @@ X86RAPass::~X86RAPass() noexcept {} void X86RAPass::onInit() noexcept { uint32_t arch = cc()->arch(); uint32_t baseRegCount = Environment::is32Bit(arch) ? 8u : 16u; + uint32_t simdRegCount = baseRegCount; + + if (Environment::is64Bit(arch) && _func->frame().isAvx512Enabled()) + simdRegCount = 32u; + + bool avxEnabled = _func->frame().isAvxEnabled(); + bool avx512Enabled = _func->frame().isAvx512Enabled(); _emitHelper._emitter = _cb; - _emitHelper._avxEnabled = _func->frame().isAvxEnabled(); + _emitHelper._avxEnabled = avxEnabled || avx512Enabled; + _emitHelper._avx512Enabled = avx512Enabled; _archTraits = &ArchTraits::byArch(arch); _physRegCount.set(Reg::kGroupGp , baseRegCount); - _physRegCount.set(Reg::kGroupVec , baseRegCount); + _physRegCount.set(Reg::kGroupVec , simdRegCount); _physRegCount.set(Reg::kGroupMm , 8); _physRegCount.set(Reg::kGroupKReg, 8); _buildPhysIndex(); @@ -1148,6 +1189,135 @@ Error X86RAPass::buildCFG() noexcept { } // ============================================================================ +// [asmjit::x86::X86RAPass - Rewrite] +// ============================================================================ + +static uint32_t transformVexToEvex(uint32_t instId) { + switch (instId) { + case Inst::kIdVbroadcastf128: return Inst::kIdVbroadcastf32x4; + case Inst::kIdVbroadcasti128: return Inst::kIdVbroadcasti32x4; + case Inst::kIdVextractf128: return Inst::kIdVextractf32x4; + case Inst::kIdVextracti128: return Inst::kIdVextracti32x4; + case Inst::kIdVinsertf128: return Inst::kIdVinsertf32x4; + case Inst::kIdVinserti128: return Inst::kIdVinserti32x4; + case Inst::kIdVmovdqa: return Inst::kIdVmovdqa32; + case Inst::kIdVmovdqu: return Inst::kIdVmovdqu32; + case Inst::kIdVpand: return Inst::kIdVpandd; + case Inst::kIdVpandn: return Inst::kIdVpandnd; + case Inst::kIdVpor: return Inst::kIdVpord; + case Inst::kIdVpxor: return Inst::kIdVpxord; + + default: + // This should never happen as only transformable instructions should go this path. + ASMJIT_ASSERT(false); + return 0; + } +} + +ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) noexcept { + uint32_t virtCount = cc()->_vRegArray.size(); + + BaseNode* node = first; + while (node != stop) { + BaseNode* next = node->next(); + if (node->isInst()) { + InstNode* inst = node->as<InstNode>(); + RAInst* raInst = node->passData<RAInst>(); + + Operand* operands = inst->operands(); + uint32_t opCount = inst->opCount(); + uint32_t maxRegId = 0; + + uint32_t i; + + // Rewrite virtual registers into physical registers. + if (raInst) { + // If the instruction contains pass data (raInst) then it was a subject + // for register allocation and must be rewritten to use physical regs. + RATiedReg* tiedRegs = raInst->tiedRegs(); + uint32_t tiedCount = raInst->tiedCount(); + + for (i = 0; i < tiedCount; i++) { + RATiedReg* tiedReg = &tiedRegs[i]; + + Support::BitWordIterator<uint32_t> useIt(tiedReg->useRewriteMask()); + uint32_t useId = tiedReg->useId(); + while (useIt.hasNext()) { + maxRegId = Support::max(maxRegId, useId); + inst->rewriteIdAtIndex(useIt.next(), useId); + } + + Support::BitWordIterator<uint32_t> outIt(tiedReg->outRewriteMask()); + uint32_t outId = tiedReg->outId(); + while (outIt.hasNext()) { + maxRegId = Support::max(maxRegId, outId); + inst->rewriteIdAtIndex(outIt.next(), outId); + } + } + + if (raInst->isTransformable()) { + if (maxRegId > 15) { + // Transform VEX instruction to EVEX. + inst->setId(transformVexToEvex(inst->id())); + } + } + + // This data is allocated by Zone passed to `runOnFunction()`, which + // will be reset after the RA pass finishes. So reset this data to + // prevent having a dead pointer after the RA pass is complete. + node->resetPassData(); + + if (ASMJIT_UNLIKELY(node->type() != BaseNode::kNodeInst)) { + // FuncRet terminates the flow, it must either be removed if the exit + // label is next to it (optimization) or patched to an architecture + // dependent jump instruction that jumps to the function's exit before + // the epilog. + if (node->type() == BaseNode::kNodeFuncRet) { + RABlock* block = raInst->block(); + if (!isNextTo(node, _func->exitNode())) { + cc()->_setCursor(node->prev()); + ASMJIT_PROPAGATE(emitJump(_func->exitNode()->label())); + } + + BaseNode* prev = node->prev(); + cc()->removeNode(node); + block->setLast(prev); + } + } + } + + // Rewrite stack slot addresses. + for (i = 0; i < opCount; i++) { + Operand& op = operands[i]; + if (op.isMem()) { + BaseMem& mem = op.as<BaseMem>(); + if (mem.isRegHome()) { + uint32_t virtIndex = Operand::virtIdToIndex(mem.baseId()); + if (ASMJIT_UNLIKELY(virtIndex >= virtCount)) + return DebugUtils::errored(kErrorInvalidVirtId); + + VirtReg* virtReg = cc()->virtRegByIndex(virtIndex); + RAWorkReg* workReg = virtReg->workReg(); + ASMJIT_ASSERT(workReg != nullptr); + + RAStackSlot* slot = workReg->stackSlot(); + int32_t offset = slot->offset(); + + mem._setBase(_sp.type(), slot->baseRegId()); + mem.clearRegHome(); + mem.addOffsetLo32(offset); + } + } + } + } + + node = next; + } + + return kErrorOk; +} + +// ============================================================================ // [asmjit::x86::X86RAPass - OnEmit] // ============================================================================ diff --git a/src/asmjit/x86/x86rapass_p.h b/src/asmjit/x86/x86rapass_p.h index efcfd3c..5faa04f 100644 --- a/src/asmjit/x86/x86rapass_p.h +++ b/src/asmjit/x86/x86rapass_p.h @@ -77,6 +77,7 @@ public: // -------------------------------------------------------------------------- inline bool avxEnabled() const noexcept { return _emitHelper._avxEnabled; } + inline bool avx512Enabled() const noexcept { return _emitHelper._avx512Enabled; } inline uint32_t choose(uint32_t sseInstId, uint32_t avxInstId) noexcept { return avxEnabled() ? avxInstId : sseInstId; @@ -96,6 +97,12 @@ public: Error buildCFG() noexcept override; // -------------------------------------------------------------------------- + // [Rewrite] + // -------------------------------------------------------------------------- + + Error _rewrite(BaseNode* first, BaseNode* stop) noexcept override; + + // -------------------------------------------------------------------------- // [Emit] // -------------------------------------------------------------------------- diff --git a/tools/tablegen-x86.js b/tools/tablegen-x86.js index 21fc1b6..d80f382 100644 --- a/tools/tablegen-x86.js +++ b/tools/tablegen-x86.js @@ -149,6 +149,21 @@ class Filter { // [tablegen.x86.GenUtils] // ============================================================================ +const VexToEvexMap = { + "vbroadcastf128": "vbroadcastf32x4", + "vbroadcasti128": "vbroadcasti32x4", + "vextractf128": "vextractf32x4", + "vextracti128": "vextracti32x4", + "vinsertf128": "vinsertf32x4", + "vinserti128": "vinserti32x4", + "vmovdqa": "vmovdqa32", + "vmovdqu": "vmovdqu32", + "vpand": "vpandd", + "vpandn": "vpandnd", + "vpor": "vpord", + "vpxor": "vpxord" +}; + class GenUtils { static cpuArchOf(dbInsts) { var anyArch = false; @@ -169,15 +184,60 @@ class GenUtils { return ArrayUtils.sorted(dbInsts.unionCpuFeatures()); } - static flagsOf(dbInsts) { - function replace(map, a, b, c) { - if (map[a] && map[b]) { - delete map[a]; - delete map[b]; - map[c] = true; + static assignVexEvexCompatibilityFlags(f, dbInsts) { + const vexInsts = dbInsts.filter((inst) => { return inst.prefix === "VEX"; }); + const evexInsts = dbInsts.filter((inst) => { return inst.prefix === "EVEX"; }); + + function isCompatible(vexInst, evexInst) { + if (vexInst.operands.length !== evexInst.operands.length) + return false; + + for (let i = 0; i < vexInst.operands.length; i++) { + const vexOp = vexInst.operands[i]; + const evexOp = evexInst.operands[i]; + + if (vexOp.data === evexOp.data) + continue; + + if (vexOp.reg && vexOp.reg === evexOp.reg) + continue; + if (vexOp.mem && vexOp.mem === evexOp.mem) + continue; + + return false; } + return true; } + let compatible = 0; + for (const vexInst of vexInsts) { + for (const evexInst of evexInsts) { + if (isCompatible(vexInst, evexInst)) { + compatible++; + break; + } + } + } + + if (compatible == vexInsts.length) { + f.EvexCompat = true; + return true; + } + + if (evexInsts[0].operands[0].reg === "k") { + f.EvexKReg = true; + return true; + } + + if (evexInsts[0].operands.length == 2 && vexInsts[0].operands.length === 3) { + f.EvexTwoOp = true; + return true; + } + + return false; + } + + static flagsOf(dbInsts) { const f = Object.create(null); var i, j; @@ -201,7 +261,7 @@ class GenUtils { const op = operands[j]; if (op.reg === "mm") mmx = true; - else if (/^(k|xmm|ymm|zmm)$/.test(op.reg)) { + else if (/^(xmm|ymm|zmm)$/.test(op.reg)) { vec = true; } } @@ -256,20 +316,14 @@ class GenUtils { if (dbInst.broadcast) f["Avx512B" + String(dbInst.elementSize)] = true; if (dbInst.tupleType === "T1_4X") f.Avx512T4X = true; } + + if (VexToEvexMap[dbInst.name]) + f.EvexTransformable = true; } - replace(f, "Avx512K" , "Avx512Z" , "Avx512KZ"); - replace(f, "Avx512ER" , "Avx512SAE" , "Avx512ER_SAE"); - replace(f, "Avx512KZ" , "Avx512SAE" , "Avx512KZ_SAE"); - replace(f, "Avx512KZ" , "Avx512ER_SAE", "Avx512KZ_ER_SAE"); - replace(f, "Avx512K" , "Avx512B32" , "Avx512K_B32"); - replace(f, "Avx512K" , "Avx512B64" , "Avx512K_B64"); - replace(f, "Avx512KZ" , "Avx512B32" , "Avx512KZ_B32"); - replace(f, "Avx512KZ" , "Avx512B64" , "Avx512KZ_B64"); - replace(f, "Avx512KZ_SAE" , "Avx512B32" , "Avx512KZ_SAE_B32"); - replace(f, "Avx512KZ_SAE" , "Avx512B64" , "Avx512KZ_SAE_B64"); - replace(f, "Avx512KZ_ER_SAE", "Avx512B32" , "Avx512KZ_ER_SAE_B32"); - replace(f, "Avx512KZ_ER_SAE", "Avx512B64" , "Avx512KZ_ER_SAE_B64"); + if (f.Vex && f.Evex) { + GenUtils.assignVexEvexCompatibilityFlags(f, dbInsts) + } return Object.getOwnPropertyNames(f); } @@ -2382,25 +2436,33 @@ class InstCommonTable extends core.Task { const table = new IndexedArray(); insts.forEach((inst) => { - const flags = inst.flags.map(function(flag) { return `F(${flag})`; }).join("|") || "0"; + const commonFlagsArray = inst.flags.filter((flag) => { return !flag.startsWith("Avx512"); }); + const avx512FlagsArray = inst.flags.filter((flag) => { return flag.startsWith("Avx512"); }); + + const commonFlags = commonFlagsArray.map(function(flag) { return `F(${flag })`; }).join("|") || "0"; + const avx512Flags = avx512FlagsArray.map(function(flag) { return `X(${flag.substr(6)})`; }).join("|") || "0"; + const singleRegCase = `SINGLE_REG(${inst.singleRegCase})`; - const controlType = `CONTROL(${inst.controlType})`; + const controlType = `CONTROL(${inst.controlType})`; const row = "{ " + - String(flags ).padEnd(54) + ", " + + String(commonFlags ).padEnd(50) + ", " + + String(avx512Flags ).padEnd(30) + ", " + String(inst.signatureIndex).padEnd( 3) + ", " + String(inst.signatureCount).padEnd( 2) + ", " + String(controlType ).padEnd(16) + ", " + - String(singleRegCase ).padEnd(16) + ", " + "0 }"; + String(singleRegCase ).padEnd(16) + "}"; inst.commonInfoIndexA = table.addIndexed(row); }); var s = `#define F(VAL) InstDB::kFlag##VAL\n` + + `#define X(VAL) InstDB::kAvx512Flag##VAL\n` + `#define CONTROL(VAL) Inst::kControl##VAL\n` + `#define SINGLE_REG(VAL) InstDB::kSingleReg##VAL\n` + `const InstDB::CommonInfo InstDB::_commonInfoTable[] = {\n${StringUtils.format(table, kIndent, true)}\n};\n` + `#undef SINGLE_REG\n` + `#undef CONTROL\n` + + `#undef X\n` + `#undef F\n`; this.inject("InstCommonTable", disclaimer(s), table.length * 8); } |