Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/asmjit/asmjit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkobalicek <kobalicek.petr@gmail.com>2021-03-14 00:25:43 +0300
committerkobalicek <kobalicek.petr@gmail.com>2021-03-14 01:05:48 +0300
commit7836449c3099dd564c8cc7666f640a1532151af2 (patch)
tree06c6c0f7a0e3dd6709aa25bf6337e09b279dff0e
parentc9cebc67bc195060f56f0a4f7fa9f066a0901e25 (diff)
Added asmjit_test_perf, which replaces asmjit_bench and provides much better performance overview
Removed asmjit_test_opcode (not needed anymore as we have asmjit_test_assembler and asmjit_test_perf)
-rw-r--r--.github/workflows/build-config.json8
-rw-r--r--CMakeLists.txt38
-rw-r--r--test/asmjit_bench_x86.cpp177
-rw-r--r--test/asmjit_test_compiler.cpp23
-rw-r--r--test/asmjit_test_compiler_x86.cpp2
-rw-r--r--test/asmjit_test_misc.h179
-rw-r--r--test/asmjit_test_opcode.cpp115
-rw-r--r--test/asmjit_test_opcode.h6055
-rw-r--r--test/asmjit_test_perf.cpp69
-rw-r--r--test/asmjit_test_perf.h81
-rw-r--r--test/asmjit_test_perf_x86.cpp5049
-rw-r--r--test/cmdline.h6
-rw-r--r--test/performancetimer.h59
13 files changed, 5421 insertions, 6440 deletions
diff --git a/.github/workflows/build-config.json b/.github/workflows/build-config.json
index 857ddeb..3c76bd0 100644
--- a/.github/workflows/build-config.json
+++ b/.github/workflows/build-config.json
@@ -16,15 +16,15 @@
"optional": true
},
{
- "cmd": ["asmjit_test_opcode", "--quiet"],
+ "cmd": ["asmjit_test_assembler", "--quiet"],
"optional": true
},
{
- "cmd": ["asmjit_test_assembler", "--quiet"],
+ "cmd": ["asmjit_test_emitters"],
"optional": true
},
{
- "cmd": ["asmjit_test_emitters"],
+ "cmd": ["asmjit_test_compiler"],
"optional": true
},
{
@@ -36,7 +36,7 @@
"optional": true
},
{
- "cmd": ["asmjit_test_compiler"],
+ "cmd": ["asmjit_test_perf", "--quick"],
"optional": true
}
]
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2153f3f..5ba57ea 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -497,17 +497,6 @@ if (NOT ASMJIT_EMBED)
CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
target_include_directories(asmjit_test_unit BEFORE PRIVATE ${ASMJIT_INCLUDE_DIRS})
- foreach(_target asmjit_test_opcode
- asmjit_test_emitters
- asmjit_test_x86_sections)
- asmjit_add_target(${_target} TEST
- SOURCES test/${_target}.cpp
- LIBRARIES asmjit::asmjit
- CFLAGS ${ASMJIT_PRIVATE_CFLAGS}
- CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG}
- CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
- endforeach()
-
asmjit_add_target(asmjit_test_assembler TEST
SOURCES test/asmjit_test_assembler.cpp
test/asmjit_test_assembler_x64.cpp
@@ -518,6 +507,25 @@ if (NOT ASMJIT_EMBED)
CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG}
CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
+ asmjit_add_target(asmjit_test_perf EXECUTABLE
+ SOURCES test/asmjit_test_perf.cpp
+ test/asmjit_test_perf_x86.cpp
+ SOURCES test/asmjit_test_perf.h
+ LIBRARIES asmjit::asmjit
+ CFLAGS ${ASMJIT_PRIVATE_CFLAGS}
+ CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG}
+ CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
+
+ foreach(_target asmjit_test_emitters
+ asmjit_test_x86_sections)
+ asmjit_add_target(${_target} TEST
+ SOURCES test/${_target}.cpp
+ LIBRARIES asmjit::asmjit
+ CFLAGS ${ASMJIT_PRIVATE_CFLAGS}
+ CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG}
+ CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
+ endforeach()
+
if (NOT ASMJIT_NO_INTROSPECTION)
asmjit_add_target(asmjit_test_instinfo TEST
SOURCES test/asmjit_test_instinfo.cpp
@@ -545,14 +553,6 @@ if (NOT ASMJIT_EMBED)
CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
endif()
- foreach(_target asmjit_bench_x86)
- asmjit_add_target(${_target} EXECUTABLE
- SOURCES test/${_target}.cpp
- LIBRARIES asmjit::asmjit
- CFLAGS ${ASMJIT_PRIVATE_CFLAGS}
- CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG}
- CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL})
- endforeach()
endif()
endif()
diff --git a/test/asmjit_bench_x86.cpp b/test/asmjit_bench_x86.cpp
deleted file mode 100644
index 6f75916..0000000
--- a/test/asmjit_bench_x86.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-// AsmJit - Machine code generation for C++
-//
-// * Official AsmJit Home Page: https://asmjit.com
-// * Official Github Repository: https://github.com/asmjit/asmjit
-//
-// Copyright (c) 2008-2020 The AsmJit Authors
-//
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-//
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-//
-// 1. The origin of this software must not be misrepresented; you must not
-// claim that you wrote the original software. If you use this software
-// in a product, an acknowledgment in the product documentation would be
-// appreciated but is not required.
-// 2. Altered source versions must be plainly marked as such, and must not be
-// misrepresented as being the original software.
-// 3. This notice may not be removed or altered from any source distribution.
-
-#include <asmjit/core.h>
-
-#ifdef ASMJIT_BUILD_X86
-#include <asmjit/x86.h>
-#endif
-
-#include <stdio.h>
-#include <string.h>
-
-#include "asmjit_test_opcode.h"
-
-#ifndef ASMJIT_NO_COMPILER
- #include "asmjit_test_misc.h"
-#endif
-
-using namespace asmjit;
-
-// ============================================================================
-// [Configuration]
-// ============================================================================
-
-static constexpr uint32_t kNumRepeats = 20;
-static constexpr uint32_t kNumIterations = 1000;
-
-// ============================================================================
-// [BenchUtils]
-// ============================================================================
-
-namespace BenchUtils {
- class Performance {
- public:
- inline Performance() noexcept { reset(); }
-
- inline void reset() noexcept {
- tick = 0u;
- best = 0xFFFFFFFFu;
- }
-
- inline uint32_t start() noexcept { return (tick = now()); }
- inline uint32_t diff() const noexcept { return now() - tick; }
-
- inline uint32_t end() noexcept {
- tick = diff();
- if (best > tick)
- best = tick;
- return tick;
- }
-
- static inline uint32_t now() noexcept {
- return OSUtils::getTickCount();
- }
-
- uint32_t tick;
- uint32_t best;
- };
-
- static double mbps(uint32_t time, uint64_t outputSize) noexcept {
- if (!time) return 0.0;
-
- double bytesTotal = double(outputSize);
- return (bytesTotal * 1000) / (double(time) * 1024 * 1024);
- }
-
- template<typename EmitterT, typename FuncT>
- static void bench(CodeHolder& code, uint32_t arch, const char* testName, const FuncT& func) noexcept {
- EmitterT emitter;
-
- const char* archName =
- arch == Environment::kArchX86 ? "X86" :
- arch == Environment::kArchX64 ? "X64" : "???";
-
- const char* emitterName =
- emitter.isAssembler() ? "Assembler" :
- emitter.isCompiler() ? "Compiler" :
- emitter.isBuilder() ? "Builder" : "Unknown";
-
- Performance perf;
- uint64_t codeSize = 0;
-
- Environment env(arch);
-
- for (uint32_t r = 0; r < kNumRepeats; r++) {
- perf.start();
- codeSize = 0;
- for (uint32_t i = 0; i < kNumIterations; i++) {
- code.init(env);
- code.attach(&emitter);
-
- func(emitter);
- codeSize += code.codeSize();
-
- code.reset();
- }
- perf.end();
- }
-
- printf("[%s] %-9s %-10s | Time:%6u [ms] | ", archName, emitterName, testName, perf.best);
- if (codeSize)
- printf("Speed: %7.3f [MB/s]", mbps(perf.best, codeSize));
- else
- printf("Speed: N/A");
- printf("\n");
- }
-}
-
-// ============================================================================
-// [Main]
-// ============================================================================
-
-#ifdef ASMJIT_BUILD_X86
-static void benchX86(uint32_t arch) noexcept {
- CodeHolder code;
-
- BenchUtils::bench<x86::Assembler>(code, arch, "[fast]", [](x86::Assembler& a) {
- asmtest::generateOpcodes(a.as<x86::Emitter>());
- });
-
- BenchUtils::bench<x86::Assembler>(code, arch, "[validate]", [](x86::Assembler& a) {
- a.addValidationOptions(BaseEmitter::kValidationOptionAssembler);
- asmtest::generateOpcodes(a.as<x86::Emitter>());
- });
-
-#ifndef ASMJIT_NO_BUILDER
- BenchUtils::bench<x86::Builder>(code, arch, "[no-asm]", [](x86::Builder& cb) {
- asmtest::generateOpcodes(cb.as<x86::Emitter>());
- });
-
- BenchUtils::bench<x86::Builder>(code, arch, "[asm]", [](x86::Builder& cb) {
- asmtest::generateOpcodes(cb.as<x86::Emitter>());
- cb.finalize();
- });
-#endif
-
-#ifndef ASMJIT_NO_COMPILER
- BenchUtils::bench<x86::Compiler>(code, arch, "[no-asm]", [](x86::Compiler& cc) {
- asmtest::generateAlphaBlend(cc);
- });
-
- BenchUtils::bench<x86::Compiler>(code, arch, "[asm]", [](x86::Compiler& cc) {
- asmtest::generateAlphaBlend(cc);
- cc.finalize();
- });
-#endif
-}
-#endif
-
-int main() {
-#ifdef ASMJIT_BUILD_X86
- benchX86(Environment::kArchX86);
- benchX86(Environment::kArchX64);
-#endif
-
- return 0;
-}
diff --git a/test/asmjit_test_compiler.cpp b/test/asmjit_test_compiler.cpp
index c13ba18..e7a5eeb 100644
--- a/test/asmjit_test_compiler.cpp
+++ b/test/asmjit_test_compiler.cpp
@@ -32,6 +32,8 @@
#include <chrono>
#include "cmdline.h"
+#include "performancetimer.h"
+
#include "asmjit_test_compiler.h"
#if defined(ASMJIT_BUILD_X86) && ASMJIT_ARCH_X86
@@ -54,27 +56,6 @@ void compiler_add_a64_tests(TestApp& app);
using namespace asmjit;
-class PerformanceTimer {
-public:
- typedef std::chrono::high_resolution_clock::time_point TimePoint;
-
- TimePoint _startTime {};
- TimePoint _endTime {};
-
- inline void start() {
- _startTime = std::chrono::high_resolution_clock::now();
- }
-
- inline void stop() {
- _endTime = std::chrono::high_resolution_clock::now();
- }
-
- inline double duration() const {
- std::chrono::duration<double> elapsed = _endTime - _startTime;
- return elapsed.count() * 1000;
- }
-};
-
// ============================================================================
// [TestApp]
// ============================================================================
diff --git a/test/asmjit_test_compiler_x86.cpp b/test/asmjit_test_compiler_x86.cpp
index 75b465e..1c82e72 100644
--- a/test/asmjit_test_compiler_x86.cpp
+++ b/test/asmjit_test_compiler_x86.cpp
@@ -2291,7 +2291,7 @@ public:
}
virtual void compile(x86::Compiler& cc) {
- asmtest::generateAlphaBlend(cc);
+ asmtest::generateSseAlphaBlend(cc, true);
}
virtual bool run(void* _func, String& result, String& expect) {
diff --git a/test/asmjit_test_misc.h b/test/asmjit_test_misc.h
index 0327327..f0d156f 100644
--- a/test/asmjit_test_misc.h
+++ b/test/asmjit_test_misc.h
@@ -28,42 +28,42 @@
namespace asmtest {
-// Generate a typical alpha blend function using SSE2 instruction set. Used
-// for benchmarking and also in test86. The generated code should be stable
-// and fully functional.
-static void generateAlphaBlend(asmjit::x86::Compiler& cc) {
- using namespace asmjit;
- using namespace asmjit::x86;
-
- Gp dst = cc.newIntPtr("dst");
- Gp src = cc.newIntPtr("src");
-
- Gp i = cc.newIntPtr("i");
- Gp j = cc.newIntPtr("j");
- Gp t = cc.newIntPtr("t");
-
- Xmm vzero = cc.newXmm("vzero");
- Xmm v0080 = cc.newXmm("v0080");
- Xmm v0101 = cc.newXmm("v0101");
+using namespace asmjit;
+
+// Generates a typical alpha blend function that uses SSE2 instruction set.
+// This function combines emitting instructions with control flow constructs
+// like binding Labels and jumping to them. This should be pretty representative.
+template<typename Emitter>
+static void generateSseAlphaBlendInternal(
+ Emitter& cc,
+ const x86::Gp& dst, const x86::Gp& src, const x86::Gp& n,
+ const x86::Gp& gp0,
+ const x86::Xmm& simd0, const x86::Xmm& simd1, const x86::Xmm& simd2, const x86::Xmm& simd3,
+ const x86::Xmm& simd4, const x86::Xmm& simd5, const x86::Xmm& simd6, const x86::Xmm& simd7) {
+
+ x86::Gp i = n;
+ x86::Gp j = gp0;
+
+ x86::Xmm vzero = simd0;
+ x86::Xmm v0080 = simd1;
+ x86::Xmm v0101 = simd2;
Label L_SmallLoop = cc.newLabel();
Label L_SmallEnd = cc.newLabel();
Label L_LargeLoop = cc.newLabel();
Label L_LargeEnd = cc.newLabel();
- Label L_DataPool = cc.newLabel();
+ Label L_Done = cc.newLabel();
- cc.addFunc(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost));
-
- cc.setArg(0, dst);
- cc.setArg(1, src);
- cc.setArg(2, i);
-
- // How many pixels have to be processed to make the loop aligned.
- cc.lea(t, x86::ptr(L_DataPool));
+ // Load SIMD Constants.
cc.xorps(vzero, vzero);
- cc.movaps(v0080, x86::ptr(t, 0));
- cc.movaps(v0101, x86::ptr(t, 16));
+ cc.mov(gp0.r32(), 0x00800080);
+ cc.movd(v0080, gp0.r32());
+ cc.mov(gp0.r32(), 0x01010101);
+ cc.movd(v0101, gp0.r32());
+ cc.pshufd(v0080, v0080, x86::Predicate::shuf(0, 0, 0, 0));
+ cc.pshufd(v0101, v0101, x86::Predicate::shuf(0, 0, 0, 0));
+ // How many pixels have to be processed to make the loop aligned.
cc.xor_(j, j);
cc.sub(j, dst);
cc.and_(j, 15);
@@ -71,15 +71,15 @@ static void generateAlphaBlend(asmjit::x86::Compiler& cc) {
cc.jz(L_SmallEnd);
cc.cmp(j, i);
- cc.cmovg(j, i); // j = min(i, j).
- cc.sub(i, j); // i -= j.
+ cc.cmovg(j, i); // j = min(i, j)
+ cc.sub(i, j); // i -= j
// Small loop.
cc.bind(L_SmallLoop);
{
- Xmm x0 = cc.newXmm("x0");
- Xmm y0 = cc.newXmm("y0");
- Xmm a0 = cc.newXmm("a0");
+ x86::Xmm x0 = simd3;
+ x86::Xmm y0 = simd4;
+ x86::Xmm a0 = simd5;
cc.movd(y0, x86::ptr(src));
cc.movd(x0, x86::ptr(dst));
@@ -113,7 +113,7 @@ static void generateAlphaBlend(asmjit::x86::Compiler& cc) {
cc.test(i, i);
cc.mov(j, i);
- cc.jz(cc.func()->exitLabel());
+ cc.jz(L_Done);
cc.and_(j, 3);
cc.shr(i, 2);
@@ -122,11 +122,11 @@ static void generateAlphaBlend(asmjit::x86::Compiler& cc) {
// Aligned loop.
cc.bind(L_LargeLoop);
{
- Xmm x0 = cc.newXmm("x0");
- Xmm x1 = cc.newXmm("x1");
- Xmm y0 = cc.newXmm("y0");
- Xmm a0 = cc.newXmm("a0");
- Xmm a1 = cc.newXmm("a1");
+ x86::Xmm x0 = simd3;
+ x86::Xmm x1 = simd4;
+ x86::Xmm y0 = simd5;
+ x86::Xmm a0 = simd6;
+ x86::Xmm a1 = simd7;
cc.movups(y0, x86::ptr(src));
cc.movaps(x0, x86::ptr(dst));
@@ -172,13 +172,102 @@ static void generateAlphaBlend(asmjit::x86::Compiler& cc) {
cc.test(j, j);
cc.jnz(L_SmallLoop);
- cc.endFunc();
+ cc.bind(L_Done);
+}
+
+static void generateSseAlphaBlend(asmjit::BaseEmitter& emitter, bool emitPrologEpilog) {
+ using namespace asmjit::x86;
- // Data.
- cc.align(kAlignData, 16);
- cc.bind(L_DataPool);
- cc.embedUInt16(uint16_t(0x0080u), 8);
- cc.embedUInt16(uint16_t(0x0101u), 8);
+ if (emitter.isAssembler()) {
+ Assembler& cc = *emitter.as<Assembler>();
+
+ x86::Gp dst = cc.zax();
+ x86::Gp src = cc.zcx();
+ x86::Gp i = cc.zdx();
+ x86::Gp j = cc.zdi();
+
+ if (emitPrologEpilog) {
+ FuncDetail func;
+ func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost), cc.environment());
+
+ FuncFrame frame;
+ frame.init(func);
+ frame.addDirtyRegs(dst, src, i, j);
+ frame.addDirtyRegs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
+
+ FuncArgsAssignment args(&func);
+ args.assignAll(dst, src, i);
+ args.updateFuncFrame(frame);
+ frame.finalize();
+
+ cc.emitProlog(frame);
+ cc.emitArgsAssignment(frame, args);
+ generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
+ cc.emitEpilog(frame);
+ }
+ else {
+ generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
+ }
+ }
+#ifndef ASMJIT_NO_BUILDER
+ else if (emitter.isBuilder()) {
+ Builder& cc = *emitter.as<Builder>();
+
+ x86::Gp dst = cc.zax();
+ x86::Gp src = cc.zcx();
+ x86::Gp i = cc.zdx();
+ x86::Gp j = cc.zdi();
+
+ if (emitPrologEpilog) {
+ FuncDetail func;
+ func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost), cc.environment());
+
+ FuncFrame frame;
+ frame.init(func);
+ frame.addDirtyRegs(dst, src, i, j);
+ frame.addDirtyRegs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
+
+ FuncArgsAssignment args(&func);
+ args.assignAll(dst, src, i);
+ args.updateFuncFrame(frame);
+ frame.finalize();
+
+ cc.emitProlog(frame);
+ cc.emitArgsAssignment(frame, args);
+ generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
+ cc.emitEpilog(frame);
+ }
+ else {
+ generateSseAlphaBlendInternal(cc, dst, src, i, j, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
+ }
+ }
+#endif
+#ifndef ASMJIT_NO_COMPILER
+ else if (emitter.isCompiler()) {
+ Compiler& cc = *emitter.as<Compiler>();
+
+ Gp dst = cc.newIntPtr("dst");
+ Gp src = cc.newIntPtr("src");
+ Gp i = cc.newIntPtr("i");
+ Gp j = cc.newIntPtr("j");
+
+ Xmm v0 = cc.newXmm("v0");
+ Xmm v1 = cc.newXmm("v1");
+ Xmm v2 = cc.newXmm("v2");
+ Xmm v3 = cc.newXmm("v3");
+ Xmm v4 = cc.newXmm("v4");
+ Xmm v5 = cc.newXmm("v5");
+ Xmm v6 = cc.newXmm("v6");
+ Xmm v7 = cc.newXmm("v7");
+
+ cc.addFunc(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost));
+ cc.setArg(0, dst);
+ cc.setArg(1, src);
+ cc.setArg(2, i);
+ generateSseAlphaBlendInternal(cc, dst, src, i, j, v0, v1, v2, v3, v4, v5, v6, v7);
+ cc.endFunc();
+ }
+#endif
}
} // {asmtest}
diff --git a/test/asmjit_test_opcode.cpp b/test/asmjit_test_opcode.cpp
deleted file mode 100644
index 1cd67e2..0000000
--- a/test/asmjit_test_opcode.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-// AsmJit - Machine code generation for C++
-//
-// * Official AsmJit Home Page: https://asmjit.com
-// * Official Github Repository: https://github.com/asmjit/asmjit
-//
-// Copyright (c) 2008-2020 The AsmJit Authors
-//
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-//
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-//
-// 1. The origin of this software must not be misrepresented; you must not
-// claim that you wrote the original software. If you use this software
-// in a product, an acknowledgment in the product documentation would be
-// appreciated but is not required.
-// 2. Altered source versions must be plainly marked as such, and must not be
-// misrepresented as being the original software.
-// 3. This notice may not be removed or altered from any source distribution.
-
-// ----------------------------------------------------------------------------
-// This file is used to test opcodes generated by AsmJit. Output can be
-// disassembled in your IDE or by your favorite disassembler. Instructions
-// are grouped by category and then sorted alphabetically.
-// ----------------------------------------------------------------------------
-
-#include <asmjit/x86.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "cmdline.h"
-#include "asmjit_test_opcode.h"
-
-using namespace asmjit;
-
-struct OpcodeDumpInfo {
- uint32_t arch;
- bool useRex1;
- bool useRex2;
-};
-
-static const char* archToString(uint32_t arch) noexcept {
- switch (arch & ~Environment::kArchBigEndianMask) {
- case Environment::kArchX86 : return "X86";
- case Environment::kArchX64 : return "X64";
- case Environment::kArchARM : return "ARM";
- case Environment::kArchThumb : return "Thumb";
- case Environment::kArchAArch64 : return "AArch64";
- case Environment::kArchMIPS32_LE: return "MIPS32";
- case Environment::kArchMIPS64_LE: return "MIPS64";
- default: return "Unknown";
- }
-}
-
-struct TestErrorHandler : public ErrorHandler {
- virtual void handleError(Error err, const char* message, BaseEmitter* origin) {
- (void)origin;
- printf("ERROR 0x%08X: %s\n", err, message);
- }
-};
-
-typedef void (*VoidFunc)(void);
-
-int main(int argc, char* argv[]) {
- CmdLine cmdLine(argc, argv);
- TestErrorHandler eh;
-
- OpcodeDumpInfo infoList[] = {
- { Environment::kArchX86, false, false },
- { Environment::kArchX64, false, false },
- { Environment::kArchX64, false, true },
- { Environment::kArchX64, true , false },
- { Environment::kArchX64, true , true }
- };
-
- bool quiet = cmdLine.hasArg("--quiet");
-
- for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(infoList); i++) {
- const OpcodeDumpInfo& info = infoList[i];
-
- printf("Opcodes [ARCH=%s REX1=%s REX2=%s]\n",
- archToString(info.arch),
- info.useRex1 ? "true" : "false",
- info.useRex2 ? "true" : "false");
-
- CodeHolder code;
- code.init(Environment(info.arch));
- code.setErrorHandler(&eh);
-
-#ifndef ASMJIT_NO_LOGGING
- FileLogger logger(stdout);
- logger.addFlags(FormatOptions::kFlagMachineCode);
- if (!quiet)
- code.setLogger(&logger);
-#endif
-
- x86::Assembler a(&code);
- asmtest::generateOpcodes(a.as<x86::Emitter>(), info.useRex1, info.useRex2);
-
- // If this is the host architecture the code generated can be executed
- // for debugging purposes (the first instruction is ret anyway).
- if (code.arch() == Environment::kArchHost) {
- JitRuntime runtime;
- VoidFunc p;
-
- Error err = runtime.add(&p, &code);
- if (err == kErrorOk) p();
- }
- }
-
- return 0;
-}
diff --git a/test/asmjit_test_opcode.h b/test/asmjit_test_opcode.h
deleted file mode 100644
index 3768cb1..0000000
--- a/test/asmjit_test_opcode.h
+++ /dev/null
@@ -1,6055 +0,0 @@
-// AsmJit - Machine code generation for C++
-//
-// * Official AsmJit Home Page: https://asmjit.com
-// * Official Github Repository: https://github.com/asmjit/asmjit
-//
-// Copyright (c) 2008-2020 The AsmJit Authors
-//
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-//
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-//
-// 1. The origin of this software must not be misrepresented; you must not
-// claim that you wrote the original software. If you use this software
-// in a product, an acknowledgment in the product documentation would be
-// appreciated but is not required.
-// 2. Altered source versions must be plainly marked as such, and must not be
-// misrepresented as being the original software.
-// 3. This notice may not be removed or altered from any source distribution.
-
-#ifndef ASMJIT_TEST_OPCODE_H_INCLUDED
-#define ASMJIT_TEST_OPCODE_H_INCLUDED
-
-#include <asmjit/x86.h>
-
-namespace asmtest {
-
-// Generate all instructions asmjit can emit.
-static void generateOpcodes(asmjit::x86::Emitter* e, bool useRex1 = false, bool useRex2 = false) {
- using namespace asmjit;
- using namespace asmjit::x86;
-
- bool isX64 = e->is64Bit();
-
- // Prevent a crash when the generated function is called to see the disassembly.
- e->ret();
-
- // All instructions use the following register that can be changed to see if
- // `x86::Assembler` can properly encode all possible combinations. If the given
- // `useRexRegs` argument is true the `A` version will in most cases contain
- // a register having index 8 or greater to force REX prefix.
- Gp gLoA = useRex1 ? r8b : al;
- Gp gLoB = useRex2 ? r9b : bl;
-
- Gp gHiA = ah;
- Gp gHiB = bh;
-
- Gp gwA = useRex1 ? r8w : ax;
- Gp gwB = useRex2 ? r9w : bx;
-
- Gp gdA = useRex1 ? r8d : eax;
- Gp gdB = useRex2 ? r9d : ebx;
- Gp gdC = useRex2 ? r10d : ecx;
-
- Gp gzA = useRex1 ? r8.as<Gp>() : e->zax();
- Gp gzB = useRex2 ? r9.as<Gp>() : e->zbx();
- Gp gzC = useRex2 ? r10.as<Gp>() : e->zcx();
- Gp gzD = useRex2 ? r11.as<Gp>() : e->zdx();
-
- KReg kA = k1;
- KReg kB = k2;
- KReg kC = k3;
-
- Mem anyptr_gpA = ptr(gzA);
- Mem anyptr_gpB = ptr(gzB);
- Mem anyptr_gpC = ptr(gzC);
- Mem anyptr_gpD = ptr(gzD);
-
- Mem intptr_gpA = e->intptr_ptr(gzA);
- Mem intptr_gpB = e->intptr_ptr(gzB);
-
- St stA = st0;
- St stB = st7;
-
- Mm mmA = mm0;
- Mm mmB = mm1;
-
- Xmm xmmA = useRex1 ? xmm8 : xmm0;
- Xmm xmmB = useRex2 ? xmm9 : xmm1;
- Xmm xmmC = useRex2 ? xmm10 : xmm2;
- Xmm xmmD = useRex2 ? xmm11 : xmm3;
-
- Ymm ymmA = useRex1 ? ymm8 : ymm0;
- Ymm ymmB = useRex2 ? ymm9 : ymm1;
- Ymm ymmC = useRex2 ? ymm10 : ymm2;
- Ymm ymmD = useRex2 ? ymm11 : ymm3;
-
- Zmm zmmA = useRex1 ? zmm8 : zmm0;
- Zmm zmmB = useRex2 ? zmm9 : zmm1;
- Zmm zmmC = useRex2 ? zmm10 : zmm2;
-
- Mem vx_ptr = ptr(gzB, xmmB);
- Mem vy_ptr = ptr(gzB, ymmB);
- Mem vz_ptr = ptr(gzB, zmmB);
-
- Label L;
-
- // Base.
- e->adc(gLoA, 1);
- e->adc(gLoB, 1);
- e->adc(gHiA, 1);
- e->adc(gHiB, 1);
- e->adc(gwA, 1);
- e->adc(gwB, 1);
- e->adc(gdA, 1);
- e->adc(gdB, 1);
- e->adc(gzA, 1);
- e->adc(gzA, gzB);
- e->adc(gzA, intptr_gpB);
- e->adc(intptr_gpA, 1);
- e->adc(intptr_gpA, gzB);
- e->add(gLoA, 1);
- e->add(gLoB, 1);
- e->add(gHiA, 1);
- e->add(gHiB, 1);
- e->add(gwA, 1);
- e->add(gwB, 1);
- e->add(gdA, 1);
- e->add(gdB, 1);
- e->add(gzA, 1);
- e->add(gzA, gzB);
- e->add(gzA, intptr_gpB);
- e->add(intptr_gpA, 1);
- e->add(intptr_gpA, gzB);
- e->and_(gLoA, 1);
- e->and_(gLoB, 1);
- e->and_(gHiA, 1);
- e->and_(gHiB, 1);
- e->and_(gwA, 1);
- e->and_(gwB, 1);
- e->and_(gdA, 1);
- e->and_(gdB, 1);
- e->and_(gzA, 1);
- e->and_(gzA, gzB);
- e->and_(gzA, intptr_gpB);
- e->and_(intptr_gpA, 1);
- e->and_(intptr_gpA, gzB);
- e->bswap(gzA);
- e->bt(gdA, 1);
- e->bt(gzA, 1);
- e->bt(gdA, gdB);
- e->bt(gzA, gzB);
- e->bt(intptr_gpA, 1);
- e->bt(anyptr_gpA, gdB);
- e->bt(intptr_gpA, gzB);
- e->btc(gdA, 1);
- e->btc(gzA, 1);
- e->btc(gdA, gdB);
- e->btc(gzA, gzB);
- e->btc(intptr_gpA, 1);
- e->btc(anyptr_gpA, gdB);
- e->btc(intptr_gpA, gzB);
- e->btr(gdA, 1);
- e->btr(gzA, 1);
- e->btr(gdA, gdB);
- e->btr(gzA, gzB);
- e->btr(intptr_gpA, 1);
- e->btr(anyptr_gpA, gdB);
- e->btr(intptr_gpA, gzB);
- e->bts(gdA, 1);
- e->bts(gzA, 1);
- e->bts(gdA, gdB);
- e->bts(gzA, gzB);
- e->bts(intptr_gpA, 1);
- e->bts(anyptr_gpA, gdB);
- e->bts(intptr_gpA, gzB);
- e->call(gzA);
- e->call(intptr_gpA);
- e->cbw(); // Implicit AX <- Sign Extend AL.
- e->cbw(ax); // Explicit AX <- Sign Extend AL.
- e->cdq(); // Implicit EDX:EAX <- Sign Extend EAX.
- e->cdq(edx, eax); // Explicit EDX:EAX <- Sign Extend EAX.
- if (isX64) e->cdqe(); // Implicit RAX <- Sign Extend EAX.
- if (isX64) e->cdqe(eax); // Explicit RAX <- Sign Extend EAX.
- e->cwd(); // Implicit DX:AX <- Sign Extend AX.
- e->cwd(dx, ax); // Explicit DX:AX <- Sign Extend AX.
- e->cwde(); // Implicit EAX <- Sign Extend AX.
- e->cwde(eax); // Explicit EAX <- Sign Extend AX.
- if (isX64) e->cqo(); // Implicit RDX:RAX <- Sign Extend RAX.
- if (isX64) e->cqo(rdx, rax); // Explicit RDX:RAX <- Sign Extend RAX.
- e->clc();
- e->cld();
- e->cmc();
- e->cmp(gLoA, 1);
- e->cmp(gLoB, 1);
- e->cmp(gHiA, 1);
- e->cmp(gHiB, 1);
- e->cmp(gwA, 1);
- e->cmp(gwB, 1);
- e->cmp(gdA, 1);
- e->cmp(gdB, 1);
- e->cmp(gzA, 1);
- e->cmp(gLoA, gLoB);
- e->cmp(gHiA, gHiB);
- e->cmp(gwA, gwB);
- e->cmp(gdA, gdB);
- e->cmp(gzA, gzB);
- e->cmp(gdA, anyptr_gpB);
- e->cmp(gzA, intptr_gpB);
- e->cmp(intptr_gpA, 1);
- e->cmp(anyptr_gpA, gdB);
- e->cmp(intptr_gpA, gzB);
- e->cmpxchg(gdA, gdB); // Implicit regA, regB, <EAX>
- e->cmpxchg(gzA, gzB); // Implicit regA, regB, <ZAX>
- e->cmpxchg(gdA, gdB, eax); // Explicit regA, regB, <EAX>
- e->cmpxchg(gzA, gzB, e->zax()); // Explicit regA, regB, <ZAX>
- e->cmpxchg(anyptr_gpA, gdB); // Implicit mem , regB, <EAX>
- e->cmpxchg(anyptr_gpA, gzB); // Implicit mem , regB, <ZAX>
- e->cmpxchg(anyptr_gpA, gdB, eax); // Explicit mem , regB, <EAX>
- e->cmpxchg(anyptr_gpA, gzB, e->zax()); // Explicit mem , regB, <ZAX>
- e->cmpxchg8b(anyptr_gpA); // Implicit mem , <EDX>, <EAX>, <ECX>, <EBX>
- e->cmpxchg8b(anyptr_gpA,
- x86::edx, x86::eax,
- x86::ecx, x86::ebx); // Explicit mem , <EDX>, <EAX>, <ECX>, <EBX>
- if (isX64) e->cmpxchg16b(anyptr_gpA); // Implicit mem , <RDX>, <RAX>, <RCX>, <RBX>
- if (isX64) e->cmpxchg16b(anyptr_gpA,
- x86::rdx, x86::rax,
- x86::rcx, x86::rbx); // Explicit mem , <EDX>, <EAX>, <ECX>, <EBX>
- e->cpuid(); // Implicit <EAX>, <EBX>, <ECX>, <EDX>
- e->cpuid(eax, ebx, ecx, edx); // Explicit <EAX>, <EBX>, <ECX>, <EDX>
- e->crc32(gdA, byte_ptr(gzB));
- e->crc32(gdA, word_ptr(gzB));
- e->crc32(gdA, dword_ptr(gzB));
- if (isX64) e->crc32(gdA, qword_ptr(gzB));
- if (isX64) e->crc32(gzA, qword_ptr(gzB));
- e->dec(gLoA);
- e->dec(gHiA);
- e->dec(gwA);
- e->dec(gdA);
- e->dec(gzA);
- e->dec(intptr_gpA);
- e->inc(gLoA);
- e->inc(gwA);
- e->inc(gdA);
- e->inc(gzA);
- e->inc(intptr_gpA);
- e->int_(13);
- e->int3();
- e->into();
- e->lea(gzA, intptr_gpB);
- e->mov(gLoA, 1);
- e->mov(gHiA, 1);
- e->mov(gwA, 1);
- e->mov(gdA, 1);
- e->mov(gzA, 1);
- e->mov(gLoA, gLoB);
- e->mov(gHiA, gHiB);
- e->mov(gwA, gwB);
- e->mov(gdA, gdB);
- e->mov(gzA, gzB);
- e->mov(gLoA, anyptr_gpB);
- e->mov(gwA, anyptr_gpB);
- e->mov(gdA, anyptr_gpB);
- e->mov(gzA, intptr_gpB);
- e->mov(anyptr_gpA, gLoB);
- e->mov(anyptr_gpA, gwB);
- e->mov(anyptr_gpA, gdB);
- e->mov(intptr_gpA, 1);
- e->mov(intptr_gpA, gzB);
- e->movsx(gzA, gLoB);
- e->movsx(gzA, byte_ptr(gzB));
- e->movzx(gzA, gLoB);
- e->movzx(gzA, byte_ptr(gzB));
- e->movbe(gzA, anyptr_gpB);
- e->movbe(anyptr_gpA, gzB);
- e->neg(gzA);
- e->neg(intptr_gpA);
- e->nop();
- e->not_(gzA);
- e->not_(intptr_gpA);
- e->or_(gLoA, 1);
- e->or_(gLoB, 1);
- e->or_(gHiA, 1);
- e->or_(gHiB, 1);
- e->or_(gwA, 1);
- e->or_(gwB, 1);
- e->or_(gdA, 1);
- e->or_(gdB, 1);
- e->or_(gzA, 1);
- e->or_(gzA, gzB);
- e->or_(gzA, intptr_gpB);
- e->or_(intptr_gpA, 1);
- e->or_(intptr_gpA, gzB);
- e->pop(gzA);
- e->pop(intptr_gpA);
- if (!isX64) e->popa();
- if (!isX64) e->popad();
- e->popf();
- if (!isX64) e->popfd();
- if ( isX64) e->popfq();
- e->push(gzA);
- e->push(intptr_gpA);
- e->push(0);
- if (!isX64) e->pusha();
- if (!isX64) e->pushad();
- e->pushf();
- if (!isX64) e->pushfd();
- if ( isX64) e->pushfq();
- e->rcl(gdA, 0);
- e->rcl(gzA, 0);
- e->rcl(gdA, 1);
- e->rcl(gzA, 1);
- e->rcl(gdA, cl);
- e->rcl(gzA, cl);
- e->rcl(intptr_gpA, 0);
- e->rcl(intptr_gpA, 1);
- e->rcl(intptr_gpA, cl);
- e->rcr(gdA, 0);
- e->rcr(gzA, 0);
- e->rcr(gdA, 1);
- e->rcr(gzA, 1);
- e->rcr(gdA, cl);
- e->rcr(gzA, cl);
- e->rcr(intptr_gpA, 0);
- e->rcr(intptr_gpA, 1);
- e->rcr(intptr_gpA, cl);
- e->rdtsc(); // Implicit <EDX:EAX>
- e->rdtsc(edx, eax); // Explicit <EDX:EAX>
- e->rdtscp(); // Implicit <EDX:EAX>, <ECX>
- e->rdtscp(edx, eax, ecx); // Implicit <EDX:EAX>, <ECX>
- e->ret();
- e->ret(0);
- e->rol(gdA, 0);
- e->rol(gzA, 0);
- e->rol(gdA, 1);
- e->rol(gzA, 1);
- e->rol(gdA, cl);
- e->rol(gzA, cl);
- e->rol(intptr_gpA, 0);
- e->rol(intptr_gpA, 1);
- e->rol(intptr_gpA, cl);
- e->ror(gdA, 0);
- e->ror(gzA, 0);
- e->ror(gdA, 1);
- e->ror(gzA, 1);
- e->ror(gdA, cl);
- e->ror(gzA, cl);
- e->ror(intptr_gpA, 0);
- e->ror(intptr_gpA, 1);
- e->ror(intptr_gpA, cl);
- e->sbb(gLoA, 1);
- e->sbb(gLoB, 1);
- e->sbb(gHiA, 1);
- e->sbb(gHiB, 1);
- e->sbb(gwA, 1);
- e->sbb(gwB, 1);
- e->sbb(gdA, 1);
- e->sbb(gdB, 1);
- e->sbb(gzA, 1);
- e->sbb(gzA, gzB);
- e->sbb(gzA, intptr_gpB);
- e->sbb(intptr_gpA, 1);
- e->sbb(intptr_gpA, gzB);
- e->sal(gdA, 0);
- e->sal(gzA, 0);
- e->sal(gdA, 1);
- e->sal(gzA, 1);
- e->sal(gdA, cl);
- e->sal(gzA, cl);
- e->sal(intptr_gpA, 0);
- e->sal(intptr_gpA, 1);
- e->sal(intptr_gpA, cl);
- e->sar(gdA, 0);
- e->sar(gzA, 0);
- e->sar(gdA, 1);
- e->sar(gzA, 1);
- e->sar(gdA, cl);
- e->sar(gzA, cl);
- e->sar(intptr_gpA, 0);
- e->sar(intptr_gpA, 1);
- e->sar(intptr_gpA, cl);
- e->shl(gdA, 0);
- e->shl(gzA, 0);
- e->shl(gdA, 1);
- e->shl(gzA, 1);
- e->shl(gdA, cl);
- e->shl(gzA, cl);
- e->shl(intptr_gpA, 0);
- e->shl(intptr_gpA, 1);
- e->shl(intptr_gpA, cl);
- e->shr(gdA, 0);
- e->shr(gzA, 0);
- e->shr(gdA, 1);
- e->shr(gzA, 1);
- e->shr(gdA, cl);
- e->shr(gzA, cl);
- e->shr(intptr_gpA, 0);
- e->shr(intptr_gpA, 1);
- e->shr(intptr_gpA, cl);
- e->shld(gdA, gdB, 0);
- e->shld(gzA, gzB, 0);
- e->shld(gdA, gdB, cl);
- e->shld(gzA, gzB, cl);
- e->shld(anyptr_gpA, gdB, 0);
- e->shld(intptr_gpA, gzB, 0);
- e->shld(anyptr_gpA, gdB, cl);
- e->shld(intptr_gpA, gzB, cl);
- e->shrd(gdA, gdB, 0);
- e->shrd(gzA, gzB, 0);
- e->shrd(gdA, gdB, cl);
- e->shrd(gzA, gzB, cl);
- e->shrd(anyptr_gpA, gdB, 0);
- e->shrd(intptr_gpA, gzB, 0);
- e->shrd(anyptr_gpA, gdB, cl);
- e->shrd(intptr_gpA, gzB, cl);
- e->stc();
- e->std();
- e->sti();
- e->sub(gLoA, 1);
- e->sub(gLoB, 1);
- e->sub(gHiA, 1);
- e->sub(gHiB, 1);
- e->sub(gwA, 1);
- e->sub(gwB, 1);
- e->sub(gdA, 1);
- e->sub(gdB, 1);
- e->sub(gzA, 1);
- e->sub(gzA, gzB);
- e->sub(gzA, intptr_gpB);
- e->sub(intptr_gpA, 1);
- e->sub(intptr_gpA, gzB);
- e->swapgs();
- e->test(gzA, 1);
- e->test(gzA, gzB);
- e->test(intptr_gpA, 1);
- e->test(intptr_gpA, gzB);
- e->ud2();
- e->xadd(gzA, gzB);
- e->xadd(intptr_gpA, gzB);
- e->xchg(gzA, gzB);
- e->xchg(intptr_gpA, gzB);
- e->xchg(gzA, intptr_gpB);
- e->xor_(gLoA, 1);
- e->xor_(gLoB, 1);
- e->xor_(gHiA, 1);
- e->xor_(gHiB, 1);
- e->xor_(gwA, 1);
- e->xor_(gwB, 1);
- e->xor_(gdA, 1);
- e->xor_(gdB, 1);
- e->xor_(gzA, 1);
- e->xor_(gzA, gzB);
- e->xor_(gzA, intptr_gpB);
- e->xor_(intptr_gpA, 1);
- e->xor_(intptr_gpA, gzB);
-
- // Special case - div|mul.
- e->div(cl); // Implicit AH:AL <- AX * r8
- e->div(byte_ptr(gzA)); // Implicit AH:AL <- AX * m8
- e->div(ax, cl); // Explicit AH:AL <- AX * r8
- e->div(ax, anyptr_gpA); // Explicit AH:AL <- AX * m8
-
- e->div(cx); // Implicit DX:AX <- DX:AX * r16
- e->div(word_ptr(gzA)); // Implicit DX:AX <- DX:AX * m16
- e->div(dx, ax, cx); // Explicit DX:AX <- DX:AX * r16
- e->div(dx, ax, anyptr_gpA); // Explicit DX:AX <- DX:AX * m16
-
- e->div(ecx); // Implicit EDX:EAX <- EDX:EAX * r32
- e->div(dword_ptr(gzA)); // Implicit EDX:EAX <- EDX:EAX * m32
- e->div(edx, eax, ecx); // Explicit EDX:EAX <- EDX:EAX * r32
- e->div(edx, eax, anyptr_gpA); // Explicit EDX:EAX <- EDX:EAX * m32
-
- if (isX64) e->div(rcx); // Implicit RDX|RAX <- RDX:RAX * r64
- if (isX64) e->div(qword_ptr(gzA)); // Implicit RDX|RAX <- RDX:RAX * m64
- if (isX64) e->div(rdx, rax, rcx); // Explicit RDX|RAX <- RDX:RAX * r64
- if (isX64) e->div(rdx, rax, anyptr_gpA); // Explicit RDX|RAX <- RDX:RAX * m64
-
- e->idiv(cl); // Implicit AH:AL <- AX * r8
- e->idiv(byte_ptr(gzA)); // Implicit AH:AL <- AX * m8
- e->idiv(ax, cl); // Explicit AH:AL <- AX * r8
- e->idiv(ax, anyptr_gpA); // Explicit AH:AL <- AX * m8
-
- e->idiv(cx); // Implicit DX:AX <- DX:AX * r16
- e->idiv(word_ptr(gzA)); // Implicit DX:AX <- DX:AX * m16
- e->idiv(dx, ax, cx); // Explicit DX:AX <- DX:AX * r16
- e->idiv(dx, ax, anyptr_gpA); // Explicit DX:AX <- DX:AX * m16
-
- e->idiv(ecx); // Implicit EDX:EAX <- EDX:EAX * r32
- e->idiv(dword_ptr(gzA)); // Implicit EDX:EAX <- EDX:EAX * m32
- e->idiv(edx, eax, ecx); // Explicit EDX:EAX <- EDX:EAX * r32
- e->idiv(edx, eax, anyptr_gpA); // Explicit EDX:EAX <- EDX:EAX * m32
-
- if (isX64) e->idiv(rcx); // Implicit RDX|RAX <- RDX:RAX * r64
- if (isX64) e->idiv(qword_ptr(gzA)); // Implicit RDX|RAX <- RDX:RAX * m64
- if (isX64) e->idiv(rdx, rax, rcx); // Explicit RDX|RAX <- RDX:RAX * r64
- if (isX64) e->idiv(rdx, rax, anyptr_gpA); // Explicit RDX|RAX <- RDX:RAX * m64
-
- e->mul(cl); // Implicit AX <- AL * r8
- e->mul(byte_ptr(gzA)); // Implicit AX <- AL * m8
- e->mul(ax, cl); // Explicit AX <- AL * r8
- e->mul(ax, anyptr_gpA); // Explicit AX <- AL * m8
-
- e->mul(cx); // Implicit DX:AX <- AX * r16
- e->mul(word_ptr(gzA)); // Implicit DX:AX <- AX * m16
- e->mul(dx, ax, cx); // Explicit DX:AX <- AX * r16
- e->mul(dx, ax, anyptr_gpA); // Explicit DX:AX <- AX * m16
-
- e->mul(ecx); // Implicit EDX:EAX <- EAX * r32
- e->mul(dword_ptr(gzA)); // Implicit EDX:EAX <- EAX * m32
- e->mul(edx, eax, ecx); // Explicit EDX:EAX <- EAX * r32
- e->mul(edx, eax, anyptr_gpA); // Explicit EDX:EAX <- EAX * m32
-
- if (isX64) e->mul(rcx); // Implicit RDX|RAX <- RAX * r64
- if (isX64) e->mul(qword_ptr(gzA)); // Implicit RDX|RAX <- RAX * m64
- if (isX64) e->mul(rdx, rax, rcx); // Explicit RDX|RAX <- RAX * r64
- if (isX64) e->mul(rdx, rax, anyptr_gpA); // Explicit RDX|RAX <- RAX * m64
-
- e->imul(gdA);
- e->imul(gzA);
- e->imul(intptr_gpA);
- e->imul(gdA, 1);
- e->imul(gzA, 1);
- e->imul(gdA, gdB);
- e->imul(gzA, gzB);
- e->imul(gdA, gdB, 1);
- e->imul(gzA, gzB, 1);
- e->imul(gdA, anyptr_gpB);
- e->imul(gzA, intptr_gpB);
- e->imul(gdA, anyptr_gpB, 1);
- e->imul(gzA, intptr_gpB, 1);
-
- // Special case - zero-extend 32-bit immediate instead of sign-extend:
- if (isX64) e->mov(gzA, uint32_t(0xFEEDFEED));
- if (isX64) e->and_(gzA, uint32_t(0xFEEDFEED));
-
- // Special case - mov with absolute 32-bit address.
- e->mov(al , ptr(0x01020304u));
- e->mov(ax , ptr(0x01020304u));
- e->mov(eax, ptr(0x01020304u));
- e->mov(ptr(0x01020304u), al );
- e->mov(ptr(0x01020304u), ax );
- e->mov(ptr(0x01020304u), eax);
-
- // Special case - mov with absolute 64-bit address.
- if (isX64) e->mov(al , ptr(0x0102030405060708u));
- if (isX64) e->mov(ax , ptr(0x0102030405060708u));
- if (isX64) e->mov(eax, ptr(0x0102030405060708u));
- if (isX64) e->mov(rax, ptr(0x0102030405060708u));
- if (isX64) e->mov(ptr(0x0102030405060708u), al );
- if (isX64) e->mov(ptr(0x0102030405060708u), ax );
- if (isX64) e->mov(ptr(0x0102030405060708u), eax);
- if (isX64) e->mov(ptr(0x0102030405060708u), rax);
-
- // Control registers.
- e->nop();
-
- e->mov(gzA, cr0);
- e->mov(cr0, gzA);
- if (isX64) e->mov(gzA, cr8);
- if (isX64) e->mov(cr8, gzA);
-
- // Debug registers.
- e->nop();
-
- e->mov(gzA, dr0);
- e->mov(dr0, gzA);
-
- // Segment registers.
- e->nop();
-
- if (!isX64) e->mov(es, ax);
- if (!isX64) e->mov(es, bx);
- if (!isX64) e->mov(ax, es);
- if (!isX64) e->mov(bx, es);
-
- if (!isX64) e->mov(cs, ax);
- if (!isX64) e->mov(cs, bx);
- if (!isX64) e->mov(ax, cs);
- if (!isX64) e->mov(bx, cs);
-
- if (!isX64) e->mov(ss, ax);
- if (!isX64) e->mov(ss, bx);
- if (!isX64) e->mov(ax, ss);
- if (!isX64) e->mov(bx, ss);
-
- if (!isX64) e->mov(ds, ax);
- if (!isX64) e->mov(ds, bx);
- if (!isX64) e->mov(ax, ds);
- if (!isX64) e->mov(bx, ds);
-
- e->mov(fs, ax);
- e->mov(fs, bx);
- e->mov(ax, fs);
- e->mov(bx, fs);
-
- e->mov(gs, ax);
- e->mov(gs, bx);
- e->mov(ax, gs);
- e->mov(bx, gs);
-
- // Instructions using REP prefix.
- e->nop();
-
- e->in(al, 0);
- e->in(al, dx);
- e->in(ax, 0);
- e->in(ax, dx);
- e->in(eax, 0);
- e->in(eax, dx);
- e->rep().ins(byte_ptr(e->zdi()), dx);
- e->rep().ins(word_ptr(e->zdi()), dx);
- e->rep().ins(dword_ptr(e->zdi()), dx);
-
- e->out(imm(0), al);
- e->out(dx, al);
- e->out(imm(0), ax);
- e->out(dx, ax);
- e->out(imm(0), eax);
- e->out(dx, eax);
- e->rep().outs(dx, byte_ptr(e->zsi()));
- e->rep().outs(dx, word_ptr(e->zsi()));
- e->rep().outs(dx, dword_ptr(e->zsi()));
-
- e->lodsb();
- e->lodsd();
- e->lodsw();
- e->rep().lodsb();
- e->rep().lodsd();
- e->rep().lodsw();
- if (isX64) e->rep().lodsq();
-
- e->movsb();
- e->movsd();
- e->movsw();
- e->rep().movsb();
- e->rep().movsd();
- e->rep().movsw();
- if (isX64) e->rep().movsq();
-
- e->stosb();
- e->stosd();
- e->stosw();
- e->rep().stosb();
- e->rep().stosd();
- e->rep().stosw();
- if (isX64) e->rep().stosq();
-
- e->cmpsb();
- e->cmpsd();
- e->cmpsw();
- e->repz().cmpsb();
- e->repz().cmpsd();
- e->repz().cmpsw();
- if (isX64) e->repz().cmpsq();
- e->repnz().cmpsb();
- e->repnz().cmpsd();
- e->repnz().cmpsw();
- if (isX64) e->repnz().cmpsq();
-
- e->scasb();
- e->scasd();
- e->scasw();
- e->repz().scasb();
- e->repz().scasd();
- e->repz().scasw();
- if (isX64) e->repz().scasq();
- e->repnz().scasb();
- e->repnz().scasd();
- e->repnz().scasw();
- if (isX64) e->repnz().scasq();
-
- // Label...Jcc/Jecxz/Jmp.
- e->nop();
-
- L = e->newLabel();
- e->bind(L);
- e->ja(L);
- e->jae(L);
- e->jb(L);
- e->jbe(L);
- e->jc(L);
- e->je(L);
- e->jg(L);
- e->jge(L);
- e->jl(L);
- e->jle(L);
- e->jna(L);
- e->jnae(L);
- e->jnb(L);
- e->jnbe(L);
- e->jnc(L);
- e->jne(L);
- e->jng(L);
- e->jnge(L);
- e->jnl(L);
- e->jnle(L);
- e->jno(L);
- e->jnp(L);
- e->jns(L);
- e->jnz(L);
- e->jo(L);
- e->jp(L);
- e->jpe(L);
- e->jpo(L);
- e->js(L);
- e->jz(L);
- e->jecxz(ecx, L);
- e->jmp(L);
-
- // Jcc/Jecxz/Jmp...Label.
- e->nop();
-
- L = e->newLabel();
- e->ja(L);
- e->jae(L);
- e->jb(L);
- e->jbe(L);
- e->jc(L);
- e->je(L);
- e->jg(L);
- e->jge(L);
- e->jl(L);
- e->jle(L);
- e->jna(L);
- e->jnae(L);
- e->jnb(L);
- e->jnbe(L);
- e->jnc(L);
- e->jne(L);
- e->jng(L);
- e->jnge(L);
- e->jnl(L);
- e->jnle(L);
- e->jno(L);
- e->jnp(L);
- e->jns(L);
- e->jnz(L);
- e->jo(L);
- e->jp(L);
- e->jpe(L);
- e->jpo(L);
- e->js(L);
- e->jz(L);
- e->jecxz(ecx, L);
- e->jmp(L);
- e->bind(L);
-
- // FPU.
- e->nop();
-
- e->f2xm1();
- e->fabs();
- e->fadd(stA, stB);
- e->fadd(stB, stA);
- e->fadd(dword_ptr(gzA));
- e->fadd(qword_ptr(gzA));
- e->faddp(stB);
- e->faddp();
- e->fbld(dword_ptr(gzA));
- e->fbstp(dword_ptr(gzA));
- e->fchs();
- e->fclex();
- e->fcom(stB);
- e->fcom();
- e->fcom(dword_ptr(gzA));
- e->fcom(qword_ptr(gzA));
- e->fcomp(stB);
- e->fcomp();
- e->fcomp(dword_ptr(gzA));
- e->fcomp(qword_ptr(gzA));
- e->fcompp();
- e->fcos();
- e->fdecstp();
- e->fdiv(stA, stB);
- e->fdiv(stB, stA);
- e->fdiv(dword_ptr(gzA));
- e->fdiv(qword_ptr(gzA));
- e->fdivp(stB);
- e->fdivp();
- e->fdivr(stA, stB);
- e->fdivr(stB, stA);
- e->fdivr(dword_ptr(gzA));
- e->fdivr(qword_ptr(gzA));
- e->fdivrp(stB);
- e->fdivrp();
- e->fiadd(dword_ptr(gzA));
- e->ficom(word_ptr(gzA));
- e->ficom(dword_ptr(gzA));
- e->ficomp(word_ptr(gzA));
- e->ficomp(dword_ptr(gzA));
- e->fidiv(word_ptr(gzA));
- e->fidiv(dword_ptr(gzA));
- e->fidivr(word_ptr(gzA));
- e->fidivr(dword_ptr(gzA));
- e->fild(word_ptr(gzA));
- e->fild(dword_ptr(gzA));
- e->fild(qword_ptr(gzA));
- e->fimul(word_ptr(gzA));
- e->fimul(dword_ptr(gzA));
- e->fincstp();
- e->finit();
- e->fninit();
- e->fisub(word_ptr(gzA));
- e->fisub(dword_ptr(gzA));
- e->fisubr(word_ptr(gzA));
- e->fisubr(dword_ptr(gzA));
- e->fist(word_ptr(gzA));
- e->fist(dword_ptr(gzA));
- e->fistp(word_ptr(gzA));
- e->fistp(dword_ptr(gzA));
- e->fistp(qword_ptr(gzA));
- e->fld(dword_ptr(gzA));
- e->fld(qword_ptr(gzA));
- e->fld(tword_ptr(gzA));
- e->fld1();
- e->fldl2t();
- e->fldl2e();
- e->fldpi();
- e->fldlg2();
- e->fldln2();
- e->fldz();
- e->fldcw(anyptr_gpA);
- e->fldenv(anyptr_gpA);
- e->fmul(stA, stB);
- e->fmul(stB, stA);
- e->fmul(dword_ptr(gzA));
- e->fmul(qword_ptr(gzA));
- e->fmulp(stB);
- e->fmulp();
- e->fnclex();
- e->fnop();
- e->fnsave(anyptr_gpA);
- e->fnstenv(anyptr_gpA);
- e->fnstcw(anyptr_gpA);
- e->fpatan();
- e->fprem();
- e->fprem1();
- e->fptan();
- e->frndint();
- e->frstor(anyptr_gpA);
- e->fsave(anyptr_gpA);
- e->fscale();
- e->fsin();
- e->fsincos();
- e->fsqrt();
- e->fst(dword_ptr(gzA));
- e->fst(qword_ptr(gzA));
- e->fstp(dword_ptr(gzA));
- e->fstp(qword_ptr(gzA));
- e->fstp(tword_ptr(gzA));
- e->fstcw(anyptr_gpA);
- e->fstenv(anyptr_gpA);
- e->fsub(stA, stB);
- e->fsub(stB, stA);
- e->fsub(dword_ptr(gzA));
- e->fsub(qword_ptr(gzA));
- e->fsubp(stB);
- e->fsubp();
- e->fsubr(stA, stB);
- e->fsubr(stB, stA);
- e->fsubr(dword_ptr(gzA));
- e->fsubr(qword_ptr(gzA));
- e->fsubrp(stB);
- e->fsubrp();
- e->ftst();
- e->fucom(stB);
- e->fucom();
- e->fucom(stB);
- e->fucomi(stB);
- e->fucomip(stB);
- e->fucomp(stB);
- e->fucompp();
- e->fxam();
- e->fxtract();
- e->fyl2x();
- e->fyl2xp1();
-
- // LAHF/SAHF
- e->lahf(); // Implicit <AH>
- e->lahf(ah); // Explicit <AH>
- e->sahf(); // Implicit <AH>
- e->sahf(ah); // Explicit <AH>
-
- // FXSR.
- e->fxrstor(anyptr_gpA);
- e->fxsave(anyptr_gpA);
-
- // XSAVE.
- e->nop();
-
- e->xgetbv(); // Implicit <EDX:EAX>, <ECX>
- e->xgetbv(edx, eax, ecx); // Explicit <EDX:EAX>, <ECX>
-
- e->xsetbv(); // Implicit <EDX:EAX>, <ECX>
- e->xsetbv(edx, eax, ecx); // Explicit <EDX:EAX>, <ECX>
-
- e->xrstor(anyptr_gpA); // Implicit <EDX:EAX>
- e->xrstors(anyptr_gpA); // Implicit <EDX:EAX>
- e->xsave(anyptr_gpA); // Implicit <EDX:EAX>
- e->xsavec(anyptr_gpA); // Implicit <EDX:EAX>
- e->xsaveopt(anyptr_gpA); // Implicit <EDX:EAX>
- e->xsaves(anyptr_gpA); // Implicit <EDX:EAX>
-
- if (isX64) e->xrstor64(anyptr_gpA); // Implicit <EDX:EAX>
- if (isX64) e->xrstors64(anyptr_gpA); // Implicit <EDX:EAX>
- if (isX64) e->xsave64(anyptr_gpA); // Implicit <EDX:EAX>
- if (isX64) e->xsavec64(anyptr_gpA); // Implicit <EDX:EAX>
- if (isX64) e->xsaveopt64(anyptr_gpA); // Implicit <EDX:EAX>
- if (isX64) e->xsaves64(anyptr_gpA); // Implicit <EDX:EAX>
-
- // POPCNT.
- e->nop();
-
- e->popcnt(gdA, gdB);
- e->popcnt(gzA, gzB);
- e->popcnt(gdA, anyptr_gpB);
- e->popcnt(gzA, anyptr_gpB);
-
- // LZCNT.
- e->nop();
-
- e->lzcnt(gdA, gdB);
- e->lzcnt(gzA, gzB);
- e->lzcnt(gdA, anyptr_gpB);
- e->lzcnt(gzA, anyptr_gpB);
-
- // BMI.
- e->nop();
-
- e->andn(gdA, gdB, gdC);
- e->andn(gzA, gzB, gzC);
- e->andn(gdA, gdB, anyptr_gpC);
- e->andn(gzA, gzB, anyptr_gpC);
- e->bextr(gdA, gdB, gdC);
- e->bextr(gzA, gzB, gzC);
- e->bextr(gdA, anyptr_gpB, gdC);
- e->bextr(gzA, anyptr_gpB, gzC);
- e->blsi(gdA, gdB);
- e->blsi(gzA, gzB);
- e->blsi(gdA, anyptr_gpB);
- e->blsi(gzA, anyptr_gpB);
- e->blsmsk(gdA, gdB);
- e->blsmsk(gzA, gzB);
- e->blsmsk(gdA, anyptr_gpB);
- e->blsmsk(gzA, anyptr_gpB);
- e->blsr(gdA, gdB);
- e->blsr(gzA, gzB);
- e->blsr(gdA, anyptr_gpB);
- e->blsr(gzA, anyptr_gpB);
- e->tzcnt(gdA, gdB);
- e->tzcnt(gzA, gzB);
- e->tzcnt(gdA, anyptr_gpB);
- e->tzcnt(gzA, anyptr_gpB);
-
- // BMI2.
- e->nop();
-
- e->bzhi(gdA, gdB, gdC);
- e->bzhi(gzA, gzB, gzC);
- e->bzhi(gdA, anyptr_gpB, gdC);
- e->bzhi(gzA, anyptr_gpB, gzC);
- e->mulx(gdA, gdB, gdC); // Implicit gpA, gpB, gpC, <EDX>
- e->mulx(gdA, gdB, gdC, edx); // Explicit gpA, gpB, gpC, <EDX>
- e->mulx(gzA, gzB, gzC); // Implicit gpA, gpB, gpC, <EDX|RDX>
- e->mulx(gzA, gzB, gzC, e->zdx()); // Explicit gpA, gpB, gpC, <EDX|RDX>
- e->mulx(gdA, gdB, anyptr_gpC); // Implicit gpA, gpB, mem, <EDX>
- e->mulx(gdA, gdB, anyptr_gpC, edx); // Explicit gpA, gpB, mem, <EDX>
- e->mulx(gzA, gzB, anyptr_gpC); // Implicit gpA, gpB, mem, <EDX|RDX>
- e->mulx(gzA, gzB, anyptr_gpC, e->zdx()); // Explicit gpA, gpB, mem, <EDX|RDX>
- e->pdep(gdA, gdB, gdC);
- e->pdep(gzA, gzB, gzC);
- e->pdep(gdA, gdB, anyptr_gpC);
- e->pdep(gzA, gzB, anyptr_gpC);
- e->pext(gdA, gdB, gdC);
- e->pext(gzA, gzB, gzC);
- e->pext(gdA, gdB, anyptr_gpC);
- e->pext(gzA, gzB, anyptr_gpC);
- e->rorx(gdA, gdB, 0);
- e->rorx(gzA, gzB, 0);
- e->rorx(gdA, anyptr_gpB, 0);
- e->rorx(gzA, anyptr_gpB, 0);
- e->sarx(gdA, gdB, gdC);
- e->sarx(gzA, gzB, gzC);
- e->sarx(gdA, anyptr_gpB, gdC);
- e->sarx(gzA, anyptr_gpB, gzC);
- e->shlx(gdA, gdB, gdC);
- e->shlx(gzA, gzB, gzC);
- e->shlx(gdA, anyptr_gpB, gdC);
- e->shlx(gzA, anyptr_gpB, gzC);
- e->shrx(gdA, gdB, gdC);
- e->shrx(gzA, gzB, gzC);
- e->shrx(gdA, anyptr_gpB, gdC);
- e->shrx(gzA, anyptr_gpB, gzC);
-
- // ADX.
- e->nop();
-
- e->adcx(gdA, gdB);
- e->adcx(gzA, gzB);
- e->adcx(gdA, anyptr_gpB);
- e->adcx(gzA, anyptr_gpB);
- e->adox(gdA, gdB);
- e->adox(gzA, gzB);
- e->adox(gdA, anyptr_gpB);
- e->adox(gzA, anyptr_gpB);
-
- // TBM.
- e->nop();
-
- e->blcfill(gdA, gdB);
- e->blcfill(gzA, gzB);
- e->blcfill(gdA, anyptr_gpB);
- e->blcfill(gzA, anyptr_gpB);
-
- e->blci(gdA, gdB);
- e->blci(gzA, gzB);
- e->blci(gdA, anyptr_gpB);
- e->blci(gzA, anyptr_gpB);
-
- e->blcic(gdA, gdB);
- e->blcic(gzA, gzB);
- e->blcic(gdA, anyptr_gpB);
- e->blcic(gzA, anyptr_gpB);
-
- e->blcmsk(gdA, gdB);
- e->blcmsk(gzA, gzB);
- e->blcmsk(gdA, anyptr_gpB);
- e->blcmsk(gzA, anyptr_gpB);
-
- e->blcs(gdA, gdB);
- e->blcs(gzA, gzB);
- e->blcs(gdA, anyptr_gpB);
- e->blcs(gzA, anyptr_gpB);
-
- e->blsfill(gdA, gdB);
- e->blsfill(gzA, gzB);
- e->blsfill(gdA, anyptr_gpB);
- e->blsfill(gzA, anyptr_gpB);
-
- e->blsic(gdA, gdB);
- e->blsic(gzA, gzB);
- e->blsic(gdA, anyptr_gpB);
- e->blsic(gzA, anyptr_gpB);
-
- e->t1mskc(gdA, gdB);
- e->t1mskc(gzA, gzB);
- e->t1mskc(gdA, anyptr_gpB);
- e->t1mskc(gzA, anyptr_gpB);
-
- e->tzmsk(gdA, gdB);
- e->tzmsk(gzA, gzB);
- e->tzmsk(gdA, anyptr_gpB);
- e->tzmsk(gzA, anyptr_gpB);
-
- // CLFLUSH / CLFLUSH_OPT.
- e->nop();
- e->clflush(anyptr_gpA);
- e->clflushopt(anyptr_gpA);
-
- // CLWB.
- e->nop();
- e->clwb(anyptr_gpA);
-
- // CLZERO.
- e->nop();
- e->clzero(); // Implicit <ds:[EAX|RAX]>
- e->clzero(ptr(e->zax())); // Explicit <ds:[EAX|RAX]>
-
- // MONITOR[X] / MWAIT[X].
- e->nop();
- e->monitor(); // Implicit <ds:[EAX|RAX]>, <ECX>, <EDX>
- e->monitorx(); // Implicit <ds:[EAX|RAX]>, <ECX>, <EDX>
- e->mwait(); // Implicit <EAX>, <ECX>
- e->mwaitx(); // Implicit <EAX>, <ECX>, <EBX>
-
- // PREFETCH / PREFETCHW / PREFETCHWT1.
- e->nop();
- e->prefetch(anyptr_gpA); // 3DNOW.
- e->prefetchnta(anyptr_gpA); // MMX+SSE.
- e->prefetcht0(anyptr_gpA); // MMX+SSE.
- e->prefetcht1(anyptr_gpA); // MMX+SSE.
- e->prefetcht2(anyptr_gpA); // MMX+SSE.
- e->prefetchw(anyptr_gpA); // PREFETCHW.
- e->prefetchwt1(anyptr_gpA); // PREFETCHWT1.
-
- // RDRAND / RDSEED.
- e->nop();
-
- e->rdrand(gdA);
- e->rdrand(gzA);
- e->rdseed(gdA);
- e->rdseed(gzA);
-
- // MMX/MMX2.
- e->nop();
-
- e->movd(anyptr_gpA, mmB);
- e->movd(gdA, mmB);
- e->movd(mmA, anyptr_gpB);
- e->movd(mmA, gdB);
- e->movq(mmA, mmB);
- e->movq(anyptr_gpA, mmB);
- e->movq(mmA, anyptr_gpB);
- e->packuswb(mmA, mmB);
- e->packuswb(mmA, anyptr_gpB);
- e->paddb(mmA, mmB);
- e->paddb(mmA, anyptr_gpB);
- e->paddw(mmA, mmB);
- e->paddw(mmA, anyptr_gpB);
- e->paddd(mmA, mmB);
- e->paddd(mmA, anyptr_gpB);
- e->paddsb(mmA, mmB);
- e->paddsb(mmA, anyptr_gpB);
- e->paddsw(mmA, mmB);
- e->paddsw(mmA, anyptr_gpB);
- e->paddusb(mmA, mmB);
- e->paddusb(mmA, anyptr_gpB);
- e->paddusw(mmA, mmB);
- e->paddusw(mmA, anyptr_gpB);
- e->pand(mmA, mmB);
- e->pand(mmA, anyptr_gpB);
- e->pandn(mmA, mmB);
- e->pandn(mmA, anyptr_gpB);
- e->pcmpeqb(mmA, mmB);
- e->pcmpeqb(mmA, anyptr_gpB);
- e->pcmpeqw(mmA, mmB);
- e->pcmpeqw(mmA, anyptr_gpB);
- e->pcmpeqd(mmA, mmB);
- e->pcmpeqd(mmA, anyptr_gpB);
- e->pcmpgtb(mmA, mmB);
- e->pcmpgtb(mmA, anyptr_gpB);
- e->pcmpgtw(mmA, mmB);
- e->pcmpgtw(mmA, anyptr_gpB);
- e->pcmpgtd(mmA, mmB);
- e->pcmpgtd(mmA, anyptr_gpB);
- e->pmulhw(mmA, mmB);
- e->pmulhw(mmA, anyptr_gpB);
- e->pmullw(mmA, mmB);
- e->pmullw(mmA, anyptr_gpB);
- e->por(mmA, mmB);
- e->por(mmA, anyptr_gpB);
- e->pmaddwd(mmA, mmB);
- e->pmaddwd(mmA, anyptr_gpB);
- e->pslld(mmA, mmB);
- e->pslld(mmA, anyptr_gpB);
- e->pslld(mmA, 0);
- e->psllq(mmA, mmB);
- e->psllq(mmA, anyptr_gpB);
- e->psllq(mmA, 0);
- e->psllw(mmA, mmB);
- e->psllw(mmA, anyptr_gpB);
- e->psllw(mmA, 0);
- e->psrad(mmA, mmB);
- e->psrad(mmA, anyptr_gpB);
- e->psrad(mmA, 0);
- e->psraw(mmA, mmB);
- e->psraw(mmA, anyptr_gpB);
- e->psraw(mmA, 0);
- e->psrld(mmA, mmB);
- e->psrld(mmA, anyptr_gpB);
- e->psrld(mmA, 0);
- e->psrlq(mmA, mmB);
- e->psrlq(mmA, anyptr_gpB);
- e->psrlq(mmA, 0);
- e->psrlw(mmA, mmB);
- e->psrlw(mmA, anyptr_gpB);
- e->psrlw(mmA, 0);
- e->psubb(mmA, mmB);
- e->psubb(mmA, anyptr_gpB);
- e->psubw(mmA, mmB);
- e->psubw(mmA, anyptr_gpB);
- e->psubd(mmA, mmB);
- e->psubd(mmA, anyptr_gpB);
- e->psubsb(mmA, mmB);
- e->psubsb(mmA, anyptr_gpB);
- e->psubsw(mmA, mmB);
- e->psubsw(mmA, anyptr_gpB);
- e->psubusb(mmA, mmB);
- e->psubusb(mmA, anyptr_gpB);
- e->psubusw(mmA, mmB);
- e->psubusw(mmA, anyptr_gpB);
- e->punpckhbw(mmA, mmB);
- e->punpckhbw(mmA, anyptr_gpB);
- e->punpckhwd(mmA, mmB);
- e->punpckhwd(mmA, anyptr_gpB);
- e->punpckhdq(mmA, mmB);
- e->punpckhdq(mmA, anyptr_gpB);
- e->punpcklbw(mmA, mmB);
- e->punpcklbw(mmA, anyptr_gpB);
- e->punpcklwd(mmA, mmB);
- e->punpcklwd(mmA, anyptr_gpB);
- e->punpckldq(mmA, mmB);
- e->punpckldq(mmA, anyptr_gpB);
- e->pxor(mmA, mmB);
- e->pxor(mmA, anyptr_gpB);
- e->emms();
-
- // 3DNOW.
- e->nop();
-
- e->pavgusb(mmA, mmB);
- e->pavgusb(mmA, anyptr_gpB);
- e->pf2id(mmA, mmB);
- e->pf2id(mmA, anyptr_gpB);
- e->pf2iw(mmA, mmB);
- e->pf2iw(mmA, anyptr_gpB);
- e->pfacc(mmA, mmB);
- e->pfacc(mmA, anyptr_gpB);
- e->pfadd(mmA, mmB);
- e->pfadd(mmA, anyptr_gpB);
- e->pfcmpeq(mmA, mmB);
- e->pfcmpeq(mmA, anyptr_gpB);
- e->pfcmpge(mmA, mmB);
- e->pfcmpge(mmA, anyptr_gpB);
- e->pfcmpgt(mmA, mmB);
- e->pfcmpgt(mmA, anyptr_gpB);
- e->pfmax(mmA, mmB);
- e->pfmax(mmA, anyptr_gpB);
- e->pfmin(mmA, mmB);
- e->pfmin(mmA, anyptr_gpB);
- e->pfmul(mmA, mmB);
- e->pfmul(mmA, anyptr_gpB);
- e->pfnacc(mmA, mmB);
- e->pfnacc(mmA, anyptr_gpB);
- e->pfpnacc(mmA, mmB);
- e->pfpnacc(mmA, anyptr_gpB);
- e->pfrcp(mmA, mmB);
- e->pfrcp(mmA, anyptr_gpB);
- e->pfrcpit1(mmA, mmB);
- e->pfrcpit1(mmA, anyptr_gpB);
- e->pfrcpit2(mmA, mmB);
- e->pfrcpit2(mmA, anyptr_gpB);
- e->pfrcpv(mmA, mmB);
- e->pfrcpv(mmA, anyptr_gpB);
- e->pfrsqit1(mmA, mmB);
- e->pfrsqit1(mmA, anyptr_gpB);
- e->pfrsqrt(mmA, mmB);
- e->pfrsqrt(mmA, anyptr_gpB);
- e->pfrsqrtv(mmA, mmB);
- e->pfrsqrtv(mmA, anyptr_gpB);
- e->pfsub(mmA, mmB);
- e->pfsub(mmA, anyptr_gpB);
- e->pfsubr(mmA, mmB);
- e->pfsubr(mmA, anyptr_gpB);
- e->pi2fd(mmA, mmB);
- e->pi2fd(mmA, anyptr_gpB);
- e->pi2fw(mmA, mmB);
- e->pi2fw(mmA, anyptr_gpB);
- e->pmulhrw(mmA, mmB);
- e->pmulhrw(mmA, anyptr_gpB);
- e->pswapd(mmA, mmB);
- e->pswapd(mmA, anyptr_gpB);
- e->femms();
-
- // SSE.
- e->nop();
-
- e->addps(xmmA, xmmB);
- e->addps(xmmA, anyptr_gpB);
- e->addss(xmmA, xmmB);
- e->addss(xmmA, anyptr_gpB);
- e->andnps(xmmA, xmmB);
- e->andnps(xmmA, anyptr_gpB);
- e->andps(xmmA, xmmB);
- e->andps(xmmA, anyptr_gpB);
- e->cmpps(xmmA, xmmB, 0);
- e->cmpps(xmmA, anyptr_gpB, 0);
- e->cmpss(xmmA, xmmB, 0);
- e->cmpss(xmmA, anyptr_gpB, 0);
- e->comiss(xmmA, xmmB);
- e->comiss(xmmA, anyptr_gpB);
- e->cvtpi2ps(xmmA, mmB);
- e->cvtpi2ps(xmmA, anyptr_gpB);
- e->cvtps2pi(mmA, xmmB);
- e->cvtps2pi(mmA, anyptr_gpB);
- e->cvtsi2ss(xmmA, gdB);
- e->cvtsi2ss(xmmA, gzB);
- e->cvtsi2ss(xmmA, anyptr_gpB);
- e->cvtss2si(gdA, xmmB);
- e->cvtss2si(gzA, xmmB);
- e->cvtss2si(gdA, anyptr_gpB);
- e->cvtss2si(gzA, anyptr_gpB);
- e->cvttps2pi(mmA, xmmB);
- e->cvttps2pi(mmA, anyptr_gpB);
- e->cvttss2si(gdA, xmmB);
- e->cvttss2si(gzA, xmmB);
- e->cvttss2si(gdA, anyptr_gpB);
- e->cvttss2si(gzA, anyptr_gpB);
- e->divps(xmmA, xmmB);
- e->divps(xmmA, anyptr_gpB);
- e->divss(xmmA, xmmB);
- e->divss(xmmA, anyptr_gpB);
- e->ldmxcsr(anyptr_gpA);
- e->maskmovq(mmA, mmB); // Implicit mmA, mmB, <ds:[EDI|RDI]>
- e->maskmovq(mmA, mmB, ptr(e->zdi())); // Explicit mmA, mmB, <ds:[EDI|RDI]>
- e->maxps(xmmA, xmmB);
- e->maxps(xmmA, anyptr_gpB);
- e->maxss(xmmA, xmmB);
- e->maxss(xmmA, anyptr_gpB);
- e->minps(xmmA, xmmB);
- e->minps(xmmA, anyptr_gpB);
- e->minss(xmmA, xmmB);
- e->minss(xmmA, anyptr_gpB);
- e->movaps(xmmA, xmmB);
- e->movaps(xmmA, anyptr_gpB);
- e->movaps(anyptr_gpA, xmmB);
- e->movd(anyptr_gpA, xmmB);
- e->movd(gdA, xmmB);
- e->movd(gzA, xmmB);
- e->movd(xmmA, anyptr_gpB);
- e->movd(xmmA, gdB);
- e->movd(xmmA, gzB);
- e->movq(mmA, mmB);
- e->movq(xmmA, xmmB);
- e->movq(anyptr_gpA, xmmB);
- e->movq(xmmA, anyptr_gpB);
- e->movntq(anyptr_gpA, mmB);
- e->movhlps(xmmA, xmmB);
- e->movhps(xmmA, anyptr_gpB);
- e->movhps(anyptr_gpA, xmmB);
- e->movlhps(xmmA, xmmB);
- e->movlps(xmmA, anyptr_gpB);
- e->movlps(anyptr_gpA, xmmB);
- e->movntps(anyptr_gpA, xmmB);
- e->movss(xmmA, anyptr_gpB);
- e->movss(anyptr_gpA, xmmB);
- e->movups(xmmA, xmmB);
- e->movups(xmmA, anyptr_gpB);
- e->movups(anyptr_gpA, xmmB);
- e->mulps(xmmA, xmmB);
- e->mulps(xmmA, anyptr_gpB);
- e->mulss(xmmA, xmmB);
- e->mulss(xmmA, anyptr_gpB);
- e->orps(xmmA, xmmB);
- e->orps(xmmA, anyptr_gpB);
- e->pavgb(mmA, mmB);
- e->pavgb(mmA, anyptr_gpB);
- e->pavgw(mmA, mmB);
- e->pavgw(mmA, anyptr_gpB);
- e->pextrw(gdA, mmB, 0);
- e->pextrw(gzA, mmB, 0);
- e->pinsrw(mmA, gdB, 0);
- e->pinsrw(mmA, gzB, 0);
- e->pinsrw(mmA, anyptr_gpB, 0);
- e->pmaxsw(mmA, mmB);
- e->pmaxsw(mmA, anyptr_gpB);
- e->pmaxub(mmA, mmB);
- e->pmaxub(mmA, anyptr_gpB);
- e->pminsw(mmA, mmB);
- e->pminsw(mmA, anyptr_gpB);
- e->pminub(mmA, mmB);
- e->pminub(mmA, anyptr_gpB);
- e->pmovmskb(gdA, mmB);
- e->pmovmskb(gzA, mmB);
- e->pmulhuw(mmA, mmB);
- e->pmulhuw(mmA, anyptr_gpB);
- e->psadbw(mmA, mmB);
- e->psadbw(mmA, anyptr_gpB);
- e->pshufw(mmA, mmB, 0);
- e->pshufw(mmA, anyptr_gpB, 0);
- e->rcpps(xmmA, xmmB);
- e->rcpps(xmmA, anyptr_gpB);
- e->rcpss(xmmA, xmmB);
- e->rcpss(xmmA, anyptr_gpB);
- e->psadbw(xmmA, xmmB);
- e->psadbw(xmmA, anyptr_gpB);
- e->rsqrtps(xmmA, xmmB);
- e->rsqrtps(xmmA, anyptr_gpB);
- e->rsqrtss(xmmA, xmmB);
- e->rsqrtss(xmmA, anyptr_gpB);
- e->sfence();
- e->shufps(xmmA, xmmB, 0);
- e->shufps(xmmA, anyptr_gpB, 0);
- e->sqrtps(xmmA, xmmB);
- e->sqrtps(xmmA, anyptr_gpB);
- e->sqrtss(xmmA, xmmB);
- e->sqrtss(xmmA, anyptr_gpB);
- e->stmxcsr(anyptr_gpA);
- e->subps(xmmA, xmmB);
- e->subps(xmmA, anyptr_gpB);
- e->subss(xmmA, xmmB);
- e->subss(xmmA, anyptr_gpB);
- e->ucomiss(xmmA, xmmB);
- e->ucomiss(xmmA, anyptr_gpB);
- e->unpckhps(xmmA, xmmB);
- e->unpckhps(xmmA, anyptr_gpB);
- e->unpcklps(xmmA, xmmB);
- e->unpcklps(xmmA, anyptr_gpB);
- e->xorps(xmmA, xmmB);
- e->xorps(xmmA, anyptr_gpB);
-
- // SSE2.
- e->nop();
-
- e->addpd(xmmA, xmmB);
- e->addpd(xmmA, anyptr_gpB);
- e->addsd(xmmA, xmmB);
- e->addsd(xmmA, anyptr_gpB);
- e->andnpd(xmmA, xmmB);
- e->andnpd(xmmA, anyptr_gpB);
- e->andpd(xmmA, xmmB);
- e->andpd(xmmA, anyptr_gpB);
- e->cmppd(xmmA, xmmB, 0);
- e->cmppd(xmmA, anyptr_gpB, 0);
- e->cmpsd(xmmA, xmmB, 0);
- e->cmpsd(xmmA, anyptr_gpB, 0);
- e->comisd(xmmA, xmmB);
- e->comisd(xmmA, anyptr_gpB);
- e->cvtdq2pd(xmmA, xmmB);
- e->cvtdq2pd(xmmA, anyptr_gpB);
- e->cvtdq2ps(xmmA, xmmB);
- e->cvtdq2ps(xmmA, anyptr_gpB);
- e->cvtpd2dq(xmmA, xmmB);
- e->cvtpd2dq(xmmA, anyptr_gpB);
- e->cvtpd2pi(mmA, xmmB);
- e->cvtpd2pi(mmA, anyptr_gpB);
- e->cvtpd2ps(xmmA, xmmB);
- e->cvtpd2ps(xmmA, anyptr_gpB);
- e->cvtpi2pd(xmmA, mmB);
- e->cvtpi2pd(xmmA, anyptr_gpB);
- e->cvtps2dq(xmmA, xmmB);
- e->cvtps2dq(xmmA, anyptr_gpB);
- e->cvtps2pd(xmmA, xmmB);
- e->cvtps2pd(xmmA, anyptr_gpB);
- e->cvtsd2si(gdA, xmmB);
- e->cvtsd2si(gzA, xmmB);
- e->cvtsd2si(gdA, anyptr_gpB);
- e->cvtsd2si(gzA, anyptr_gpB);
- e->cvtsd2ss(xmmA, xmmB);
- e->cvtsd2ss(xmmA, anyptr_gpB);
- e->cvtsi2sd(xmmA, gdB);
- e->cvtsi2sd(xmmA, gzB);
- e->cvtsi2sd(xmmA, anyptr_gpB);
- e->cvtss2sd(xmmA, xmmB);
- e->cvtss2sd(xmmA, anyptr_gpB);
- e->cvtss2si(gdA, xmmB);
- e->cvtss2si(gzA, xmmB);
- e->cvtss2si(gdA, anyptr_gpB);
- e->cvtss2si(gzA, anyptr_gpB);
- e->cvttpd2pi(mmA, xmmB);
- e->cvttpd2pi(mmA, anyptr_gpB);
- e->cvttpd2dq(xmmA, xmmB);
- e->cvttpd2dq(xmmA, anyptr_gpB);
- e->cvttps2dq(xmmA, xmmB);
- e->cvttps2dq(xmmA, anyptr_gpB);
- e->cvttsd2si(gdA, xmmB);
- e->cvttsd2si(gzA, xmmB);
- e->cvttsd2si(gdA, anyptr_gpB);
- e->cvttsd2si(gzA, anyptr_gpB);
- e->divpd(xmmA, xmmB);
- e->divpd(xmmA, anyptr_gpB);
- e->divsd(xmmA, xmmB);
- e->divsd(xmmA, anyptr_gpB);
- e->lfence();
- e->maskmovdqu(xmmA, xmmB); // Implicit xmmA, xmmB, <ds:[EDI|RDI]>
- e->maskmovdqu(xmmA, xmmB, ptr(e->zdi())); // Explicit xmmA, xmmB, <ds:[EDI|RDI]>
- e->maxpd(xmmA, xmmB);
- e->maxpd(xmmA, anyptr_gpB);
- e->maxsd(xmmA, xmmB);
- e->maxsd(xmmA, anyptr_gpB);
- e->mfence();
- e->minpd(xmmA, xmmB);
- e->minpd(xmmA, anyptr_gpB);
- e->minsd(xmmA, xmmB);
- e->minsd(xmmA, anyptr_gpB);
- e->movdqa(xmmA, xmmB);
- e->movdqa(xmmA, anyptr_gpB);
- e->movdqa(anyptr_gpA, xmmB);
- e->movdqu(xmmA, xmmB);
- e->movdqu(xmmA, anyptr_gpB);
- e->movdqu(anyptr_gpA, xmmB);
- e->movmskps(gdA, xmmB);
- e->movmskps(gzA, xmmB);
- e->movmskpd(gdA, xmmB);
- e->movmskpd(gzA, xmmB);
- e->movsd(xmmA, xmmB);
- e->movsd(xmmA, anyptr_gpB);
- e->movsd(anyptr_gpA, xmmB);
- e->movapd(xmmA, anyptr_gpB);
- e->movapd(anyptr_gpA, xmmB);
- e->movdq2q(mmA, xmmB);
- e->movq2dq(xmmA, mmB);
- e->movhpd(xmmA, anyptr_gpB);
- e->movhpd(anyptr_gpA, xmmB);
- e->movlpd(xmmA, anyptr_gpB);
- e->movlpd(anyptr_gpA, xmmB);
- e->movntdq(anyptr_gpA, xmmB);
- e->movnti(anyptr_gpA, gdB);
- e->movnti(anyptr_gpA, gzB);
- e->movntpd(anyptr_gpA, xmmB);
- e->movupd(xmmA, anyptr_gpB);
- e->movupd(anyptr_gpA, xmmB);
- e->mulpd(xmmA, xmmB);
- e->mulpd(xmmA, anyptr_gpB);
- e->mulsd(xmmA, xmmB);
- e->mulsd(xmmA, anyptr_gpB);
- e->orpd(xmmA, xmmB);
- e->orpd(xmmA, anyptr_gpB);
- e->packsswb(xmmA, xmmB);
- e->packsswb(xmmA, anyptr_gpB);
- e->packssdw(xmmA, xmmB);
- e->packssdw(xmmA, anyptr_gpB);
- e->packuswb(xmmA, xmmB);
- e->packuswb(xmmA, anyptr_gpB);
- e->paddb(xmmA, xmmB);
- e->paddb(xmmA, anyptr_gpB);
- e->paddw(xmmA, xmmB);
- e->paddw(xmmA, anyptr_gpB);
- e->paddd(xmmA, xmmB);
- e->paddd(xmmA, anyptr_gpB);
- e->paddq(mmA, mmB);
- e->paddq(mmA, anyptr_gpB);
- e->paddq(xmmA, xmmB);
- e->paddq(xmmA, anyptr_gpB);
- e->paddsb(xmmA, xmmB);
- e->paddsb(xmmA, anyptr_gpB);
- e->paddsw(xmmA, xmmB);
- e->paddsw(xmmA, anyptr_gpB);
- e->paddusb(xmmA, xmmB);
- e->paddusb(xmmA, anyptr_gpB);
- e->paddusw(xmmA, xmmB);
- e->paddusw(xmmA, anyptr_gpB);
- e->pand(xmmA, xmmB);
- e->pand(xmmA, anyptr_gpB);
- e->pandn(xmmA, xmmB);
- e->pandn(xmmA, anyptr_gpB);
- e->pause();
- e->pavgb(xmmA, xmmB);
- e->pavgb(xmmA, anyptr_gpB);
- e->pavgw(xmmA, xmmB);
- e->pavgw(xmmA, anyptr_gpB);
- e->pcmpeqb(xmmA, xmmB);
- e->pcmpeqb(xmmA, anyptr_gpB);
- e->pcmpeqw(xmmA, xmmB);
- e->pcmpeqw(xmmA, anyptr_gpB);
- e->pcmpeqd(xmmA, xmmB);
- e->pcmpeqd(xmmA, anyptr_gpB);
- e->pcmpgtb(xmmA, xmmB);
- e->pcmpgtb(xmmA, anyptr_gpB);
- e->pcmpgtw(xmmA, xmmB);
- e->pcmpgtw(xmmA, anyptr_gpB);
- e->pcmpgtd(xmmA, xmmB);
- e->pcmpgtd(xmmA, anyptr_gpB);
- e->pmaxsw(xmmA, xmmB);
- e->pmaxsw(xmmA, anyptr_gpB);
- e->pmaxub(xmmA, xmmB);
- e->pmaxub(xmmA, anyptr_gpB);
- e->pminsw(xmmA, xmmB);
- e->pminsw(xmmA, anyptr_gpB);
- e->pminub(xmmA, xmmB);
- e->pminub(xmmA, anyptr_gpB);
- e->pmovmskb(gdA, xmmB);
- e->pmovmskb(gzA, xmmB);
- e->pmulhw(xmmA, xmmB);
- e->pmulhw(xmmA, anyptr_gpB);
- e->pmulhuw(xmmA, xmmB);
- e->pmulhuw(xmmA, anyptr_gpB);
- e->pmullw(xmmA, xmmB);
- e->pmullw(xmmA, anyptr_gpB);
- e->pmuludq(mmA, mmB);
- e->pmuludq(mmA, anyptr_gpB);
- e->pmuludq(xmmA, xmmB);
- e->pmuludq(xmmA, anyptr_gpB);
- e->por(xmmA, xmmB);
- e->por(xmmA, anyptr_gpB);
- e->pslld(xmmA, xmmB);
- e->pslld(xmmA, anyptr_gpB);
- e->pslld(xmmA, 0);
- e->psllq(xmmA, xmmB);
- e->psllq(xmmA, anyptr_gpB);
- e->psllq(xmmA, 0);
- e->psllw(xmmA, xmmB);
- e->psllw(xmmA, anyptr_gpB);
- e->psllw(xmmA, 0);
- e->pslldq(xmmA, 0);
- e->psrad(xmmA, xmmB);
- e->psrad(xmmA, anyptr_gpB);
- e->psrad(xmmA, 0);
- e->psraw(xmmA, xmmB);
- e->psraw(xmmA, anyptr_gpB);
- e->psraw(xmmA, 0);
- e->psubb(xmmA, xmmB);
- e->psubb(xmmA, anyptr_gpB);
- e->psubw(xmmA, xmmB);
- e->psubw(xmmA, anyptr_gpB);
- e->psubd(xmmA, xmmB);
- e->psubd(xmmA, anyptr_gpB);
- e->psubq(mmA, mmB);
- e->psubq(mmA, anyptr_gpB);
- e->psubq(xmmA, xmmB);
- e->psubq(xmmA, anyptr_gpB);
- e->pmaddwd(xmmA, xmmB);
- e->pmaddwd(xmmA, anyptr_gpB);
- e->pshufd(xmmA, xmmB, 0);
- e->pshufd(xmmA, anyptr_gpB, 0);
- e->pshufhw(xmmA, xmmB, 0);
- e->pshufhw(xmmA, anyptr_gpB, 0);
- e->pshuflw(xmmA, xmmB, 0);
- e->pshuflw(xmmA, anyptr_gpB, 0);
- e->psrld(xmmA, xmmB);
- e->psrld(xmmA, anyptr_gpB);
- e->psrld(xmmA, 0);
- e->psrlq(xmmA, xmmB);
- e->psrlq(xmmA, anyptr_gpB);
- e->psrlq(xmmA, 0);
- e->psrldq(xmmA, 0);
- e->psrlw(xmmA, xmmB);
- e->psrlw(xmmA, anyptr_gpB);
- e->psrlw(xmmA, 0);
- e->psubsb(xmmA, xmmB);
- e->psubsb(xmmA, anyptr_gpB);
- e->psubsw(xmmA, xmmB);
- e->psubsw(xmmA, anyptr_gpB);
- e->psubusb(xmmA, xmmB);
- e->psubusb(xmmA, anyptr_gpB);
- e->psubusw(xmmA, xmmB);
- e->psubusw(xmmA, anyptr_gpB);
- e->punpckhbw(xmmA, xmmB);
- e->punpckhbw(xmmA, anyptr_gpB);
- e->punpckhwd(xmmA, xmmB);
- e->punpckhwd(xmmA, anyptr_gpB);
- e->punpckhdq(xmmA, xmmB);
- e->punpckhdq(xmmA, anyptr_gpB);
- e->punpckhqdq(xmmA, xmmB);
- e->punpckhqdq(xmmA, anyptr_gpB);
- e->punpcklbw(xmmA, xmmB);
- e->punpcklbw(xmmA, anyptr_gpB);
- e->punpcklwd(xmmA, xmmB);
- e->punpcklwd(xmmA, anyptr_gpB);
- e->punpckldq(xmmA, xmmB);
- e->punpckldq(xmmA, anyptr_gpB);
- e->punpcklqdq(xmmA, xmmB);
- e->punpcklqdq(xmmA, anyptr_gpB);
- e->pxor(xmmA, xmmB);
- e->pxor(xmmA, anyptr_gpB);
- e->sqrtpd(xmmA, xmmB);
- e->sqrtpd(xmmA, anyptr_gpB);
- e->sqrtsd(xmmA, xmmB);
- e->sqrtsd(xmmA, anyptr_gpB);
- e->subpd(xmmA, xmmB);
- e->subpd(xmmA, anyptr_gpB);
- e->subsd(xmmA, xmmB);
- e->subsd(xmmA, anyptr_gpB);
- e->ucomisd(xmmA, xmmB);
- e->ucomisd(xmmA, anyptr_gpB);
- e->unpckhpd(xmmA, xmmB);
- e->unpckhpd(xmmA, anyptr_gpB);
- e->unpcklpd(xmmA, xmmB);
- e->unpcklpd(xmmA, anyptr_gpB);
- e->xorpd(xmmA, xmmB);
- e->xorpd(xmmA, anyptr_gpB);
-
- // SSE3.
- e->nop();
-
- e->addsubpd(xmmA, xmmB);
- e->addsubpd(xmmA, anyptr_gpB);
- e->addsubps(xmmA, xmmB);
- e->addsubps(xmmA, anyptr_gpB);
- e->fisttp(dword_ptr(gzA));
- e->haddpd(xmmA, xmmB);
- e->haddpd(xmmA, anyptr_gpB);
- e->haddps(xmmA, xmmB);
- e->haddps(xmmA, anyptr_gpB);
- e->hsubpd(xmmA, xmmB);
- e->hsubpd(xmmA, anyptr_gpB);
- e->hsubps(xmmA, xmmB);
- e->hsubps(xmmA, anyptr_gpB);
- e->lddqu(xmmA, anyptr_gpB);
- e->movddup(xmmA, xmmB);
- e->movddup(xmmA, anyptr_gpB);
- e->movshdup(xmmA, xmmB);
- e->movshdup(xmmA, anyptr_gpB);
- e->movsldup(xmmA, xmmB);
- e->movsldup(xmmA, anyptr_gpB);
-
- // SSSE3.
- e->nop();
-
- e->psignb(mmA, mmB);
- e->psignb(mmA, anyptr_gpB);
- e->psignb(xmmA, xmmB);
- e->psignb(xmmA, anyptr_gpB);
- e->psignw(mmA, mmB);
- e->psignw(mmA, anyptr_gpB);
- e->psignw(xmmA, xmmB);
- e->psignw(xmmA, anyptr_gpB);
- e->psignd(mmA, mmB);
- e->psignd(mmA, anyptr_gpB);
- e->psignd(xmmA, xmmB);
- e->psignd(xmmA, anyptr_gpB);
- e->phaddw(mmA, mmB);
- e->phaddw(mmA, anyptr_gpB);
- e->phaddw(xmmA, xmmB);
- e->phaddw(xmmA, anyptr_gpB);
- e->phaddd(mmA, mmB);
- e->phaddd(mmA, anyptr_gpB);
- e->phaddd(xmmA, xmmB);
- e->phaddd(xmmA, anyptr_gpB);
- e->phaddsw(mmA, mmB);
- e->phaddsw(mmA, anyptr_gpB);
- e->phaddsw(xmmA, xmmB);
- e->phaddsw(xmmA, anyptr_gpB);
- e->phsubw(mmA, mmB);
- e->phsubw(mmA, anyptr_gpB);
- e->phsubw(xmmA, xmmB);
- e->phsubw(xmmA, anyptr_gpB);
- e->phsubd(mmA, mmB);
- e->phsubd(mmA, anyptr_gpB);
- e->phsubd(xmmA, xmmB);
- e->phsubd(xmmA, anyptr_gpB);
- e->phsubsw(mmA, mmB);
- e->phsubsw(mmA, anyptr_gpB);
- e->phsubsw(xmmA, xmmB);
- e->phsubsw(xmmA, anyptr_gpB);
- e->pmaddubsw(mmA, mmB);
- e->pmaddubsw(mmA, anyptr_gpB);
- e->pmaddubsw(xmmA, xmmB);
- e->pmaddubsw(xmmA, anyptr_gpB);
- e->pabsb(mmA, mmB);
- e->pabsb(mmA, anyptr_gpB);
- e->pabsb(xmmA, xmmB);
- e->pabsb(xmmA, anyptr_gpB);
- e->pabsw(mmA, mmB);
- e->pabsw(mmA, anyptr_gpB);
- e->pabsw(xmmA, xmmB);
- e->pabsw(xmmA, anyptr_gpB);
- e->pabsd(mmA, mmB);
- e->pabsd(mmA, anyptr_gpB);
- e->pabsd(xmmA, xmmB);
- e->pabsd(xmmA, anyptr_gpB);
- e->pmulhrsw(mmA, mmB);
- e->pmulhrsw(mmA, anyptr_gpB);
- e->pmulhrsw(xmmA, xmmB);
- e->pmulhrsw(xmmA, anyptr_gpB);
- e->pshufb(mmA, mmB);
- e->pshufb(mmA, anyptr_gpB);
- e->pshufb(xmmA, xmmB);
- e->pshufb(xmmA, anyptr_gpB);
- e->palignr(mmA, mmB, 0);
- e->palignr(mmA, anyptr_gpB, 0);
- e->palignr(xmmA, xmmB, 0);
- e->palignr(xmmA, anyptr_gpB, 0);
-
- // SSE4.1.
- e->nop();
-
- e->blendpd(xmmA, xmmB, 0);
- e->blendpd(xmmA, anyptr_gpB, 0);
- e->blendps(xmmA, xmmB, 0);
- e->blendps(xmmA, anyptr_gpB, 0);
- e->blendvpd(xmmA, xmmB); // Implicit xmmA, xmmB, <XMM0>
- e->blendvpd(xmmA, xmmB, xmm0); // Explicit xmmA, xmmB, <XMM0>
- e->blendvpd(xmmA, anyptr_gpB); // Implicit xmmA, mem , <XMM0>
- e->blendvpd(xmmA, anyptr_gpB, xmm0); // Explicit xmmA, mem , <XMM0>
- e->blendvps(xmmA, xmmB); // Implicit xmmA, xmmB, <XMM0>
- e->blendvps(xmmA, xmmB, xmm0); // Explicit xmmA, xmmB, <XMM0>
- e->blendvps(xmmA, anyptr_gpB); // Implicit xmmA, mem , <XMM0>
- e->blendvps(xmmA, anyptr_gpB, xmm0); // Explicit xmmA, mem , <XMM0>
-
- e->dppd(xmmA, xmmB, 0);
- e->dppd(xmmA, anyptr_gpB, 0);
- e->dpps(xmmA, xmmB, 0);
- e->dpps(xmmA, anyptr_gpB, 0);
- e->extractps(gdA, xmmB, 0);
- e->extractps(gzA, xmmB, 0);
- e->extractps(anyptr_gpA, xmmB, 0);
- e->insertps(xmmA, xmmB, 0);
- e->insertps(xmmA, anyptr_gpB, 0);
- e->movntdqa(xmmA, anyptr_gpB);
- e->mpsadbw(xmmA, xmmB, 0);
- e->mpsadbw(xmmA, anyptr_gpB, 0);
- e->packusdw(xmmA, xmmB);
- e->packusdw(xmmA, anyptr_gpB);
- e->pblendvb(xmmA, xmmB); // Implicit xmmA, xmmB, <XMM0>
- e->pblendvb(xmmA, xmmB, xmm0); // Explicit xmmA, xmmB, <XMM0>
- e->pblendvb(xmmA, anyptr_gpB); // Implicit xmmA, mem, <XMM0>
- e->pblendvb(xmmA, anyptr_gpB, xmm0); // Implicit xmmA, mem, <XMM0>
- e->pblendw(xmmA, xmmB, 0);
- e->pblendw(xmmA, anyptr_gpB, 0);
- e->pcmpeqq(xmmA, xmmB);
- e->pcmpeqq(xmmA, anyptr_gpB);
- e->pextrb(gdA, xmmB, 0);
- e->pextrb(gzA, xmmB, 0);
- e->pextrb(anyptr_gpA, xmmB, 0);
- e->pextrd(gdA, xmmB, 0);
- e->pextrd(gzA, xmmB, 0);
- e->pextrd(anyptr_gpA, xmmB, 0);
- if (isX64) e->pextrq(gzA, xmmB, 0);
- if (isX64) e->pextrq(anyptr_gpA, xmmB, 0);
- e->pextrw(gdA, xmmB, 0);
- e->pextrw(gzA, xmmB, 0);
- e->pextrw(anyptr_gpA, xmmB, 0);
- e->phminposuw(xmmA, xmmB);
- e->phminposuw(xmmA, anyptr_gpB);
- e->pinsrb(xmmA, gdB, 0);
- e->pinsrb(xmmA, gzB, 0);
- e->pinsrb(xmmA, anyptr_gpB, 0);
- e->pinsrd(xmmA, gdB, 0);
- e->pinsrd(xmmA, gzB, 0);
- e->pinsrd(xmmA, anyptr_gpB, 0);
- e->pinsrw(xmmA, gdB, 0);
- e->pinsrw(xmmA, gzB, 0);
- e->pinsrw(xmmA, anyptr_gpB, 0);
- e->pmaxuw(xmmA, xmmB);
- e->pmaxuw(xmmA, anyptr_gpB);
- e->pmaxsb(xmmA, xmmB);
- e->pmaxsb(xmmA, anyptr_gpB);
- e->pmaxsd(xmmA, xmmB);
- e->pmaxsd(xmmA, anyptr_gpB);
- e->pmaxud(xmmA, xmmB);
- e->pmaxud(xmmA, anyptr_gpB);
- e->pminsb(xmmA, xmmB);
- e->pminsb(xmmA, anyptr_gpB);
- e->pminuw(xmmA, xmmB);
- e->pminuw(xmmA, anyptr_gpB);
- e->pminud(xmmA, xmmB);
- e->pminud(xmmA, anyptr_gpB);
- e->pminsd(xmmA, xmmB);
- e->pminsd(xmmA, anyptr_gpB);
- e->pmovsxbw(xmmA, xmmB);
- e->pmovsxbw(xmmA, anyptr_gpB);
- e->pmovsxbd(xmmA, xmmB);
- e->pmovsxbd(xmmA, anyptr_gpB);
- e->pmovsxbq(xmmA, xmmB);
- e->pmovsxbq(xmmA, anyptr_gpB);
- e->pmovsxwd(xmmA, xmmB);
- e->pmovsxwd(xmmA, anyptr_gpB);
- e->pmovsxwq(xmmA, xmmB);
- e->pmovsxwq(xmmA, anyptr_gpB);
- e->pmovsxdq(xmmA, xmmB);
- e->pmovsxdq(xmmA, anyptr_gpB);
- e->pmovzxbw(xmmA, xmmB);
- e->pmovzxbw(xmmA, anyptr_gpB);
- e->pmovzxbd(xmmA, xmmB);
- e->pmovzxbd(xmmA, anyptr_gpB);
- e->pmovzxbq(xmmA, xmmB);
- e->pmovzxbq(xmmA, anyptr_gpB);
- e->pmovzxwd(xmmA, xmmB);
- e->pmovzxwd(xmmA, anyptr_gpB);
- e->pmovzxwq(xmmA, xmmB);
- e->pmovzxwq(xmmA, anyptr_gpB);
- e->pmovzxdq(xmmA, xmmB);
- e->pmovzxdq(xmmA, anyptr_gpB);
- e->pmuldq(xmmA, xmmB);
- e->pmuldq(xmmA, anyptr_gpB);
- e->pmulld(xmmA, xmmB);
- e->pmulld(xmmA, anyptr_gpB);
- e->ptest(xmmA, xmmB);
- e->ptest(xmmA, anyptr_gpB);
- e->roundps(xmmA, xmmB, 0);
- e->roundps(xmmA, anyptr_gpB, 0);
- e->roundss(xmmA, xmmB, 0);
- e->roundss(xmmA, anyptr_gpB, 0);
- e->roundpd(xmmA, xmmB, 0);
- e->roundpd(xmmA, anyptr_gpB, 0);
- e->roundsd(xmmA, xmmB, 0);
- e->roundsd(xmmA, anyptr_gpB, 0);
-
- // SSE4.2.
- e->nop();
-
- e->pcmpestri(xmmA, xmmB , imm(0)); // Implicit xmmA, xmmB, imm, <ECX>, <EAX>, <EDX>
- e->pcmpestri(xmmA, xmmB , imm(0), ecx, eax, edx); // Explicit xmmA, xmmB, imm, <ECX>, <EAX>, <EDX>
- e->pcmpestri(xmmA, anyptr_gpB, imm(0)); // Implicit xmmA, mem , imm, <ECX>, <EAX>, <EDX>
- e->pcmpestri(xmmA, anyptr_gpB, imm(0), ecx, eax, edx); // Explicit xmmA, mem , imm, <ECX>, <EAX>, <EDX>
- e->pcmpestrm(xmmA, xmmB , imm(0)); // Implicit xmmA, xmmB, imm, <XMM0>, <EAX>, <EDX>
- e->pcmpestrm(xmmA, xmmB , imm(0), xmm0, eax, edx); // Explicit xmmA, xmmB, imm, <XMM0>, <EAX>, <EDX>
- e->pcmpestrm(xmmA, anyptr_gpB, imm(0)); // Implicit xmmA, mem , imm, <XMM0>, <EAX>, <EDX>
- e->pcmpestrm(xmmA, anyptr_gpB, imm(0), xmm0, eax, edx); // Explicit xmmA, mem , imm, <XMM0>, <EAX>, <EDX>
- e->pcmpistri(xmmA, xmmB , imm(0)); // Implicit xmmA, xmmB, imm, <ECX>
- e->pcmpistri(xmmA, xmmB , imm(0), ecx); // Explicit xmmA, xmmB, imm, <ECX>
- e->pcmpistri(xmmA, anyptr_gpB, imm(0)); // Implicit xmmA, mem , imm, <ECX>
- e->pcmpistri(xmmA, anyptr_gpB, imm(0), ecx); // Explicit xmmA, mem , imm, <ECX>
- e->pcmpistrm(xmmA, xmmB , imm(0)); // Implicit xmmA, xmmB, imm, <XMM0>
- e->pcmpistrm(xmmA, xmmB , imm(0), xmm0); // Explicit xmmA, xmmB, imm, <XMM0>
- e->pcmpistrm(xmmA, anyptr_gpB, imm(0)); // Implicit xmmA, mem , imm, <XMM0>
- e->pcmpistrm(xmmA, anyptr_gpB, imm(0), xmm0); // Explicit xmmA, mem , imm, <XMM0>
-
- e->pcmpgtq(xmmA, xmmB);
- e->pcmpgtq(xmmA, anyptr_gpB);
-
- // SSE4A.
- e->nop();
-
- e->extrq(xmmA, xmmB);
- e->extrq(xmmA, 0x1, 0x2);
- e->extrq(xmmB, 0x1, 0x2);
- e->insertq(xmmA, xmmB);
- e->insertq(xmmA, xmmB, 0x1, 0x2);
- e->movntsd(anyptr_gpA, xmmB);
- e->movntss(anyptr_gpA, xmmB);
-
- // AESNI.
- e->nop();
-
- e->aesdec(xmmA, xmmB);
- e->aesdec(xmmA, anyptr_gpB);
- e->aesdeclast(xmmA, xmmB);
- e->aesdeclast(xmmA, anyptr_gpB);
- e->aesenc(xmmA, xmmB);
- e->aesenc(xmmA, anyptr_gpB);
- e->aesenclast(xmmA, xmmB);
- e->aesenclast(xmmA, anyptr_gpB);
- e->aesimc(xmmA, xmmB);
- e->aesimc(xmmA, anyptr_gpB);
- e->aeskeygenassist(xmmA, xmmB, 0);
- e->aeskeygenassist(xmmA, anyptr_gpB, 0);
-
- // SHA.
- e->nop();
-
- e->sha1msg1(xmmA, xmmB);
- e->sha1msg1(xmmA, anyptr_gpB);
- e->sha1msg2(xmmA, xmmB);
- e->sha1msg2(xmmA, anyptr_gpB);
- e->sha1nexte(xmmA, xmmB);
- e->sha1nexte(xmmA, anyptr_gpB);
- e->sha1rnds4(xmmA, xmmB, 0);
- e->sha1rnds4(xmmA, anyptr_gpB, 0);
- e->sha256msg1(xmmA, xmmB);
- e->sha256msg1(xmmA, anyptr_gpB);
- e->sha256msg2(xmmA, xmmB);
- e->sha256msg2(xmmA, anyptr_gpB);
- e->sha256rnds2(xmmA, xmmB); // Implicit xmmA, xmmB, <XMM0>
- e->sha256rnds2(xmmA, xmmB, xmm0); // Explicit xmmA, xmmB, <XMM0>
- e->sha256rnds2(xmmA, anyptr_gpB); // Implicit xmmA, mem, <XMM0>
- e->sha256rnds2(xmmA, anyptr_gpB, xmm0); // Explicit xmmA, mem, <XMM0>
-
- // PCLMULQDQ.
- e->nop();
-
- e->pclmulqdq(xmmA, xmmB, 0);
- e->pclmulqdq(xmmA, anyptr_gpB, 0);
-
- // AVX.
- e->nop();
-
- e->vaddpd(xmmA, xmmB, xmmC);
- e->vaddpd(xmmA, xmmB, anyptr_gpC);
- e->vaddpd(ymmA, ymmB, ymmC);
- e->vaddpd(ymmA, ymmB, anyptr_gpC);
- e->vaddps(xmmA, xmmB, xmmC);
- e->vaddps(xmmA, xmmB, anyptr_gpC);
- e->vaddps(ymmA, ymmB, ymmC);
- e->vaddps(ymmA, ymmB, anyptr_gpC);
- e->vaddsd(xmmA, xmmB, xmmC);
- e->vaddsd(xmmA, xmmB, anyptr_gpC);
- e->vaddss(xmmA, xmmB, xmmC);
- e->vaddss(xmmA, xmmB, anyptr_gpC);
- e->vaddsubpd(xmmA, xmmB, xmmC);
- e->vaddsubpd(xmmA, xmmB, anyptr_gpC);
- e->vaddsubpd(ymmA, ymmB, ymmC);
- e->vaddsubpd(ymmA, ymmB, anyptr_gpC);
- e->vaddsubps(xmmA, xmmB, xmmC);
- e->vaddsubps(xmmA, xmmB, anyptr_gpC);
- e->vaddsubps(ymmA, ymmB, ymmC);
- e->vaddsubps(ymmA, ymmB, anyptr_gpC);
- e->vandpd(xmmA, xmmB, xmmC);
- e->vandpd(xmmA, xmmB, anyptr_gpC);
- e->vandpd(ymmA, ymmB, ymmC);
- e->vandpd(ymmA, ymmB, anyptr_gpC);
- e->vandps(xmmA, xmmB, xmmC);
- e->vandps(xmmA, xmmB, anyptr_gpC);
- e->vandps(ymmA, ymmB, ymmC);
- e->vandps(ymmA, ymmB, anyptr_gpC);
- e->vandnpd(xmmA, xmmB, xmmC);
- e->vandnpd(xmmA, xmmB, anyptr_gpC);
- e->vandnpd(ymmA, ymmB, ymmC);
- e->vandnpd(ymmA, ymmB, anyptr_gpC);
- e->vandnps(xmmA, xmmB, xmmC);
- e->vandnps(xmmA, xmmB, anyptr_gpC);
- e->vandnps(ymmA, ymmB, ymmC);
- e->vandnps(ymmA, ymmB, anyptr_gpC);
- e->vblendpd(xmmA, xmmB, xmmC, 0);
- e->vblendpd(xmmA, xmmB, anyptr_gpC, 0);
- e->vblendpd(ymmA, ymmB, ymmC, 0);
- e->vblendpd(ymmA, ymmB, anyptr_gpC, 0);
- e->vblendps(xmmA, xmmB, xmmC, 0);
- e->vblendps(xmmA, xmmB, anyptr_gpC, 0);
- e->vblendps(ymmA, ymmB, ymmC, 0);
- e->vblendps(ymmA, ymmB, anyptr_gpC, 0);
- e->vblendvpd(xmmA, xmmB, xmmC, xmmD);
- e->vblendvpd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vblendvpd(ymmA, ymmB, ymmC, ymmD);
- e->vblendvpd(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vbroadcastf128(ymmA, anyptr_gpB);
- e->vbroadcastsd(ymmA, anyptr_gpB);
- e->vbroadcastss(xmmA, anyptr_gpB);
- e->vbroadcastss(ymmA, anyptr_gpB);
- e->vcmppd(xmmA, xmmB, xmmC, 0);
- e->vcmppd(xmmA, xmmB, anyptr_gpC, 0);
- e->vcmppd(ymmA, ymmB, ymmC, 0);
- e->vcmppd(ymmA, ymmB, anyptr_gpC, 0);
- e->vcmpps(xmmA, xmmB, xmmC, 0);
- e->vcmpps(xmmA, xmmB, anyptr_gpC, 0);
- e->vcmpps(ymmA, ymmB, ymmC, 0);
- e->vcmpps(ymmA, ymmB, anyptr_gpC, 0);
- e->vcmpsd(xmmA, xmmB, xmmC, 0);
- e->vcmpsd(xmmA, xmmB, anyptr_gpC, 0);
- e->vcmpss(xmmA, xmmB, xmmC, 0);
- e->vcmpss(xmmA, xmmB, anyptr_gpC, 0);
- e->vcomisd(xmmA, xmmB);
- e->vcomisd(xmmA, anyptr_gpB);
- e->vcomiss(xmmA, xmmB);
- e->vcomiss(xmmA, anyptr_gpB);
- e->vcvtdq2pd(xmmA, xmmB);
- e->vcvtdq2pd(xmmA, anyptr_gpB);
- e->vcvtdq2pd(ymmA, xmmB);
- e->vcvtdq2pd(ymmA, anyptr_gpB);
- e->vcvtdq2ps(xmmA, xmmB);
- e->vcvtdq2ps(xmmA, anyptr_gpB);
- e->vcvtdq2ps(ymmA, ymmB);
- e->vcvtdq2ps(ymmA, anyptr_gpB);
- e->vcvtpd2dq(xmmA, xmmB);
- e->vcvtpd2dq(xmmA, ymmB);
- e->vcvtpd2dq(xmmA, anyptr_gpB);
- e->vcvtpd2ps(xmmA, xmmB);
- e->vcvtpd2ps(xmmA, ymmB);
- e->vcvtpd2ps(xmmA, anyptr_gpB);
- e->vcvtps2dq(xmmA, xmmB);
- e->vcvtps2dq(xmmA, anyptr_gpB);
- e->vcvtps2dq(ymmA, ymmB);
- e->vcvtps2dq(ymmA, anyptr_gpB);
- e->vcvtps2pd(xmmA, xmmB);
- e->vcvtps2pd(xmmA, anyptr_gpB);
- e->vcvtps2pd(ymmA, xmmB);
- e->vcvtps2pd(ymmA, anyptr_gpB);
- e->vcvtsd2si(gzA, xmmB);
- e->vcvtsd2si(gzA, anyptr_gpB);
- e->vcvtsd2ss(xmmA, xmmB, xmmC);
- e->vcvtsd2ss(xmmA, xmmB, anyptr_gpC);
- e->vcvtsi2sd(xmmA, xmmB, gzC);
- e->vcvtsi2sd(xmmA, xmmB, anyptr_gpC);
- e->vcvtsi2ss(xmmA, xmmB, gzC);
- e->vcvtsi2ss(xmmA, xmmB, anyptr_gpC);
- e->vcvtss2sd(xmmA, xmmB, xmmC);
- e->vcvtss2sd(xmmA, xmmB, anyptr_gpC);
- e->vcvtss2si(gzA, xmmB);
- e->vcvtss2si(gzA, anyptr_gpB);
- e->vcvttpd2dq(xmmA, xmmB);
- e->vcvttpd2dq(xmmA, ymmB);
- e->vcvttpd2dq(xmmA, anyptr_gpB);
- e->vcvttps2dq(xmmA, xmmB);
- e->vcvttps2dq(xmmA, anyptr_gpB);
- e->vcvttps2dq(ymmA, ymmB);
- e->vcvttps2dq(ymmA, anyptr_gpB);
- e->vcvttsd2si(gzA, xmmB);
- e->vcvttsd2si(gzA, anyptr_gpB);
- e->vcvttss2si(gzA, xmmB);
- e->vcvttss2si(gzA, anyptr_gpB);
- e->vdivpd(xmmA, xmmB, xmmC);
- e->vdivpd(xmmA, xmmB, anyptr_gpC);
- e->vdivpd(ymmA, ymmB, ymmC);
- e->vdivpd(ymmA, ymmB, anyptr_gpC);
- e->vdivps(xmmA, xmmB, xmmC);
- e->vdivps(xmmA, xmmB, anyptr_gpC);
- e->vdivps(ymmA, ymmB, ymmC);
- e->vdivps(ymmA, ymmB, anyptr_gpC);
- e->vdivsd(xmmA, xmmB, xmmC);
- e->vdivsd(xmmA, xmmB, anyptr_gpC);
- e->vdivss(xmmA, xmmB, xmmC);
- e->vdivss(xmmA, xmmB, anyptr_gpC);
- e->vdppd(xmmA, xmmB, xmmC, 0);
- e->vdppd(xmmA, xmmB, anyptr_gpC, 0);
- e->vdpps(xmmA, xmmB, xmmC, 0);
- e->vdpps(xmmA, xmmB, anyptr_gpC, 0);
- e->vdpps(ymmA, ymmB, ymmC, 0);
- e->vdpps(ymmA, ymmB, anyptr_gpC, 0);
- e->vextractf128(xmmA, ymmB, 0);
- e->vextractf128(anyptr_gpA, ymmB, 0);
- e->vextractps(gzA, xmmB, 0);
- e->vextractps(anyptr_gpA, xmmB, 0);
- e->vhaddpd(xmmA, xmmB, xmmC);
- e->vhaddpd(xmmA, xmmB, anyptr_gpC);
- e->vhaddpd(ymmA, ymmB, ymmC);
- e->vhaddpd(ymmA, ymmB, anyptr_gpC);
- e->vhaddps(xmmA, xmmB, xmmC);
- e->vhaddps(xmmA, xmmB, anyptr_gpC);
- e->vhaddps(ymmA, ymmB, ymmC);
- e->vhaddps(ymmA, ymmB, anyptr_gpC);
- e->vhsubpd(xmmA, xmmB, xmmC);
- e->vhsubpd(xmmA, xmmB, anyptr_gpC);
- e->vhsubpd(ymmA, ymmB, ymmC);
- e->vhsubpd(ymmA, ymmB, anyptr_gpC);
- e->vhsubps(xmmA, xmmB, xmmC);
- e->vhsubps(xmmA, xmmB, anyptr_gpC);
- e->vhsubps(ymmA, ymmB, ymmC);
- e->vhsubps(ymmA, ymmB, anyptr_gpC);
- e->vinsertf128(ymmA, ymmB, xmmC, 0);
- e->vinsertf128(ymmA, ymmB, anyptr_gpC, 0);
- e->vinsertps(xmmA, xmmB, xmmC, 0);
- e->vinsertps(xmmA, xmmB, anyptr_gpC, 0);
- e->vlddqu(xmmA, anyptr_gpB);
- e->vlddqu(ymmA, anyptr_gpB);
- e->vldmxcsr(anyptr_gpA);
- e->vmaskmovdqu(xmmA, xmmB); // Implicit xmmA, xmmB, <ds:[EDI|RDI]>
- e->vmaskmovdqu(xmmA, xmmB, ptr(e->zdi())); // Explicit xmmA, xmmB, <ds:[EDI|RDI]>
- e->vmaskmovps(xmmA, xmmB, anyptr_gpC);
- e->vmaskmovps(ymmA, ymmB, anyptr_gpC);
- e->vmaskmovps(anyptr_gpA, xmmB, xmmC);
- e->vmaskmovps(anyptr_gpA, ymmB, ymmC);
- e->vmaskmovpd(xmmA, xmmB, anyptr_gpC);
- e->vmaskmovpd(ymmA, ymmB, anyptr_gpC);
- e->vmaskmovpd(anyptr_gpA, xmmB, xmmC);
- e->vmaskmovpd(anyptr_gpA, ymmB, ymmC);
- e->vmaxpd(xmmA, xmmB, xmmC);
- e->vmaxpd(xmmA, xmmB, anyptr_gpC);
- e->vmaxpd(ymmA, ymmB, ymmC);
- e->vmaxpd(ymmA, ymmB, anyptr_gpC);
- e->vmaxps(xmmA, xmmB, xmmC);
- e->vmaxps(xmmA, xmmB, anyptr_gpC);
- e->vmaxps(ymmA, ymmB, ymmC);
- e->vmaxps(ymmA, ymmB, anyptr_gpC);
- e->vmaxsd(xmmA, xmmB, xmmC);
- e->vmaxsd(xmmA, xmmB, anyptr_gpC);
- e->vmaxss(xmmA, xmmB, xmmC);
- e->vmaxss(xmmA, xmmB, anyptr_gpC);
- e->vminpd(xmmA, xmmB, xmmC);
- e->vminpd(xmmA, xmmB, anyptr_gpC);
- e->vminpd(ymmA, ymmB, ymmC);
- e->vminpd(ymmA, ymmB, anyptr_gpC);
- e->vminps(xmmA, xmmB, xmmC);
- e->vminps(xmmA, xmmB, anyptr_gpC);
- e->vminps(ymmA, ymmB, ymmC);
- e->vminps(ymmA, ymmB, anyptr_gpC);
- e->vminsd(xmmA, xmmB, xmmC);
- e->vminsd(xmmA, xmmB, anyptr_gpC);
- e->vminss(xmmA, xmmB, xmmC);
- e->vminss(xmmA, xmmB, anyptr_gpC);
- e->vmovapd(xmmA, xmmB);
- e->vmovapd(xmmA, anyptr_gpB);
- e->vmovapd(anyptr_gpA, xmmB);
- e->vmovapd(ymmA, ymmB);
- e->vmovapd(ymmA, anyptr_gpB);
- e->vmovapd(anyptr_gpA, ymmB);
- e->vmovaps(xmmA, xmmB);
- e->vmovaps(xmmA, anyptr_gpB);
- e->vmovaps(anyptr_gpA, xmmB);
- e->vmovaps(ymmA, ymmB);
- e->vmovaps(ymmA, anyptr_gpB);
- e->vmovaps(anyptr_gpA, ymmB);
- e->vmovd(xmmA, gzB);
- e->vmovd(xmmA, anyptr_gpB);
- e->vmovd(gzA, xmmB);
- e->vmovd(anyptr_gpA, xmmB);
- e->vmovddup(xmmA, xmmB);
- e->vmovddup(xmmA, anyptr_gpB);
- e->vmovddup(ymmA, ymmB);
- e->vmovddup(ymmA, anyptr_gpB);
- e->vmovdqa(xmmA, xmmB);
- e->vmovdqa(xmmA, anyptr_gpB);
- e->vmovdqa(anyptr_gpA, xmmB);
- e->vmovdqa(ymmA, ymmB);
- e->vmovdqa(ymmA, anyptr_gpB);
- e->vmovdqa(anyptr_gpA, ymmB);
- e->vmovdqu(xmmA, xmmB);
- e->vmovdqu(xmmA, anyptr_gpB);
- e->vmovdqu(anyptr_gpA, xmmB);
- e->vmovdqu(ymmA, ymmB);
- e->vmovdqu(ymmA, anyptr_gpB);
- e->vmovdqu(anyptr_gpA, ymmB);
- e->vmovhlps(xmmA, xmmB, xmmC);
- e->vmovhpd(xmmA, xmmB, anyptr_gpC);
- e->vmovhpd(anyptr_gpA, xmmB);
- e->vmovhps(xmmA, xmmB, anyptr_gpC);
- e->vmovhps(anyptr_gpA, xmmB);
- e->vmovlhps(xmmA, xmmB, xmmC);
- e->vmovlpd(xmmA, xmmB, anyptr_gpC);
- e->vmovlpd(anyptr_gpA, xmmB);
- e->vmovlps(xmmA, xmmB, anyptr_gpC);
- e->vmovlps(anyptr_gpA, xmmB);
- e->vmovmskpd(gzA, xmmB);
- e->vmovmskpd(gzA, ymmB);
- e->vmovmskps(gzA, xmmB);
- e->vmovmskps(gzA, ymmB);
- e->vmovntdq(anyptr_gpA, xmmB);
- e->vmovntdq(anyptr_gpA, ymmB);
- e->vmovntdqa(xmmA, anyptr_gpB);
- e->vmovntpd(anyptr_gpA, xmmB);
- e->vmovntpd(anyptr_gpA, ymmB);
- e->vmovntps(anyptr_gpA, xmmB);
- e->vmovntps(anyptr_gpA, ymmB);
- e->vmovsd(xmmA, xmmB, xmmC);
- e->vmovsd(xmmA, anyptr_gpB);
- e->vmovsd(anyptr_gpA, xmmB);
- e->vmovshdup(xmmA, xmmB);
- e->vmovshdup(xmmA, anyptr_gpB);
- e->vmovshdup(ymmA, ymmB);
- e->vmovshdup(ymmA, anyptr_gpB);
- e->vmovsldup(xmmA, xmmB);
- e->vmovsldup(xmmA, anyptr_gpB);
- e->vmovsldup(ymmA, ymmB);
- e->vmovsldup(ymmA, anyptr_gpB);
- e->vmovss(xmmA, xmmB, xmmC);
- e->vmovss(xmmA, anyptr_gpB);
- e->vmovss(anyptr_gpA, xmmB);
- e->vmovupd(xmmA, xmmB);
- e->vmovupd(xmmA, anyptr_gpB);
- e->vmovupd(anyptr_gpA, xmmB);
- e->vmovupd(ymmA, ymmB);
- e->vmovupd(ymmA, anyptr_gpB);
- e->vmovupd(anyptr_gpA, ymmB);
- e->vmovups(xmmA, xmmB);
- e->vmovups(xmmA, anyptr_gpB);
- e->vmovups(anyptr_gpA, xmmB);
- e->vmovups(ymmA, ymmB);
- e->vmovups(ymmA, anyptr_gpB);
- e->vmovups(anyptr_gpA, ymmB);
- e->vmpsadbw(xmmA, xmmB, xmmC, 0);
- e->vmpsadbw(xmmA, xmmB, anyptr_gpC, 0);
- e->vmulpd(xmmA, xmmB, xmmC);
- e->vmulpd(xmmA, xmmB, anyptr_gpC);
- e->vmulpd(ymmA, ymmB, ymmC);
- e->vmulpd(ymmA, ymmB, anyptr_gpC);
- e->vmulps(xmmA, xmmB, xmmC);
- e->vmulps(xmmA, xmmB, anyptr_gpC);
- e->vmulps(ymmA, ymmB, ymmC);
- e->vmulps(ymmA, ymmB, anyptr_gpC);
- e->vmulsd(xmmA, xmmB, xmmC);
- e->vmulsd(xmmA, xmmB, anyptr_gpC);
- e->vmulss(xmmA, xmmB, xmmC);
- e->vmulss(xmmA, xmmB, anyptr_gpC);
- e->vorpd(xmmA, xmmB, xmmC);
- e->vorpd(xmmA, xmmB, anyptr_gpC);
- e->vorpd(ymmA, ymmB, ymmC);
- e->vorpd(ymmA, ymmB, anyptr_gpC);
- e->vorps(xmmA, xmmB, xmmC);
- e->vorps(xmmA, xmmB, anyptr_gpC);
- e->vorps(ymmA, ymmB, ymmC);
- e->vorps(ymmA, ymmB, anyptr_gpC);
- e->vpabsb(xmmA, xmmB);
- e->vpabsb(xmmA, anyptr_gpB);
- e->vpabsd(xmmA, xmmB);
- e->vpabsd(xmmA, anyptr_gpB);
- e->vpabsw(xmmA, xmmB);
- e->vpabsw(xmmA, anyptr_gpB);
- e->vpackssdw(xmmA, xmmB, xmmC);
- e->vpackssdw(xmmA, xmmB, anyptr_gpC);
- e->vpacksswb(xmmA, xmmB, xmmC);
- e->vpacksswb(xmmA, xmmB, anyptr_gpC);
- e->vpackusdw(xmmA, xmmB, xmmC);
- e->vpackusdw(xmmA, xmmB, anyptr_gpC);
- e->vpackuswb(xmmA, xmmB, xmmC);
- e->vpackuswb(xmmA, xmmB, anyptr_gpC);
- e->vpaddb(xmmA, xmmB, xmmC);
- e->vpaddb(xmmA, xmmB, anyptr_gpC);
- e->vpaddd(xmmA, xmmB, xmmC);
- e->vpaddd(xmmA, xmmB, anyptr_gpC);
- e->vpaddq(xmmA, xmmB, xmmC);
- e->vpaddq(xmmA, xmmB, anyptr_gpC);
- e->vpaddw(xmmA, xmmB, xmmC);
- e->vpaddw(xmmA, xmmB, anyptr_gpC);
- e->vpaddsb(xmmA, xmmB, xmmC);
- e->vpaddsb(xmmA, xmmB, anyptr_gpC);
- e->vpaddsw(xmmA, xmmB, xmmC);
- e->vpaddsw(xmmA, xmmB, anyptr_gpC);
- e->vpaddusb(xmmA, xmmB, xmmC);
- e->vpaddusb(xmmA, xmmB, anyptr_gpC);
- e->vpaddusw(xmmA, xmmB, xmmC);
- e->vpaddusw(xmmA, xmmB, anyptr_gpC);
- e->vpalignr(xmmA, xmmB, xmmC, 0);
- e->vpalignr(xmmA, xmmB, anyptr_gpC, 0);
- e->vpand(xmmA, xmmB, xmmC);
- e->vpand(xmmA, xmmB, anyptr_gpC);
- e->vpandn(xmmA, xmmB, xmmC);
- e->vpandn(xmmA, xmmB, anyptr_gpC);
- e->vpavgb(xmmA, xmmB, xmmC);
- e->vpavgb(xmmA, xmmB, anyptr_gpC);
- e->vpavgw(xmmA, xmmB, xmmC);
- e->vpavgw(xmmA, xmmB, anyptr_gpC);
- e->vpblendvb(xmmA, xmmB, xmmC, xmmD);
- e->vpblendvb(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpblendw(xmmA, xmmB, xmmC, 0);
- e->vpblendw(xmmA, xmmB, anyptr_gpC, 0);
- e->vpcmpeqb(xmmA, xmmB, xmmC);
- e->vpcmpeqb(xmmA, xmmB, anyptr_gpC);
- e->vpcmpeqd(xmmA, xmmB, xmmC);
- e->vpcmpeqd(xmmA, xmmB, anyptr_gpC);
- e->vpcmpeqq(xmmA, xmmB, xmmC);
- e->vpcmpeqq(xmmA, xmmB, anyptr_gpC);
- e->vpcmpeqw(xmmA, xmmB, xmmC);
- e->vpcmpeqw(xmmA, xmmB, anyptr_gpC);
- e->vpcmpgtb(xmmA, xmmB, xmmC);
- e->vpcmpgtb(xmmA, xmmB, anyptr_gpC);
- e->vpcmpgtd(xmmA, xmmB, xmmC);
- e->vpcmpgtd(xmmA, xmmB, anyptr_gpC);
- e->vpcmpgtq(xmmA, xmmB, xmmC);
- e->vpcmpgtq(xmmA, xmmB, anyptr_gpC);
- e->vpcmpgtw(xmmA, xmmB, xmmC);
- e->vpcmpgtw(xmmA, xmmB, anyptr_gpC);
- e->vpcmpestri(xmmA, xmmB, 0);
- e->vpcmpestri(xmmA, anyptr_gpB, 0);
- e->vpcmpestrm(xmmA, xmmB, 0);
- e->vpcmpestrm(xmmA, anyptr_gpB, 0);
- e->vpcmpistri(xmmA, xmmB, 0);
- e->vpcmpistri(xmmA, anyptr_gpB, 0);
- e->vpcmpistrm(xmmA, xmmB, 0);
- e->vpcmpistrm(xmmA, anyptr_gpB, 0);
- e->vpermilpd(xmmA, xmmB, xmmC);
- e->vpermilpd(xmmA, xmmB, anyptr_gpC);
- e->vpermilpd(ymmA, ymmB, ymmC);
- e->vpermilpd(ymmA, ymmB, anyptr_gpC);
- e->vpermilpd(xmmA, xmmB, 0);
- e->vpermilpd(xmmA, anyptr_gpB, 0);
- e->vpermilpd(ymmA, ymmB, 0);
- e->vpermilpd(ymmA, anyptr_gpB, 0);
- e->vpermilps(xmmA, xmmB, xmmC);
- e->vpermilps(xmmA, xmmB, anyptr_gpC);
- e->vpermilps(ymmA, ymmB, ymmC);
- e->vpermilps(ymmA, ymmB, anyptr_gpC);
- e->vpermilps(xmmA, xmmB, 0);
- e->vpermilps(xmmA, anyptr_gpB, 0);
- e->vpermilps(ymmA, ymmB, 0);
- e->vpermilps(ymmA, anyptr_gpB, 0);
- e->vperm2f128(ymmA, ymmB, ymmC, 0);
- e->vperm2f128(ymmA, ymmB, anyptr_gpC, 0);
- e->vpextrb(gzA, xmmB, 0);
- e->vpextrb(anyptr_gpA, xmmB, 0);
- e->vpextrd(gzA, xmmB, 0);
- e->vpextrd(anyptr_gpA, xmmB, 0);
- if (isX64) e->vpextrq(gzA, xmmB, 0);
- if (isX64) e->vpextrq(anyptr_gpA, xmmB, 0);
- e->vpextrw(gzA, xmmB, 0);
- e->vpextrw(anyptr_gpA, xmmB, 0);
- e->vphaddd(xmmA, xmmB, xmmC);
- e->vphaddd(xmmA, xmmB, anyptr_gpC);
- e->vphaddsw(xmmA, xmmB, xmmC);
- e->vphaddsw(xmmA, xmmB, anyptr_gpC);
- e->vphaddw(xmmA, xmmB, xmmC);
- e->vphaddw(xmmA, xmmB, anyptr_gpC);
- e->vphminposuw(xmmA, xmmB);
- e->vphminposuw(xmmA, anyptr_gpB);
- e->vphsubd(xmmA, xmmB, xmmC);
- e->vphsubd(xmmA, xmmB, anyptr_gpC);
- e->vphsubsw(xmmA, xmmB, xmmC);
- e->vphsubsw(xmmA, xmmB, anyptr_gpC);
- e->vphsubw(xmmA, xmmB, xmmC);
- e->vphsubw(xmmA, xmmB, anyptr_gpC);
- e->vpinsrb(xmmA, xmmB, gzC, 0);
- e->vpinsrb(xmmA, xmmB, anyptr_gpC, 0);
- e->vpinsrd(xmmA, xmmB, gzC, 0);
- e->vpinsrd(xmmA, xmmB, anyptr_gpC, 0);
- e->vpinsrw(xmmA, xmmB, gzC, 0);
- e->vpinsrw(xmmA, xmmB, anyptr_gpC, 0);
- e->vpmaddubsw(xmmA, xmmB, xmmC);
- e->vpmaddubsw(xmmA, xmmB, anyptr_gpC);
- e->vpmaddwd(xmmA, xmmB, xmmC);
- e->vpmaddwd(xmmA, xmmB, anyptr_gpC);
- e->vpmaxsb(xmmA, xmmB, xmmC);
- e->vpmaxsb(xmmA, xmmB, anyptr_gpC);
- e->vpmaxsd(xmmA, xmmB, xmmC);
- e->vpmaxsd(xmmA, xmmB, anyptr_gpC);
- e->vpmaxsw(xmmA, xmmB, xmmC);
- e->vpmaxsw(xmmA, xmmB, anyptr_gpC);
- e->vpmaxub(xmmA, xmmB, xmmC);
- e->vpmaxub(xmmA, xmmB, anyptr_gpC);
- e->vpmaxud(xmmA, xmmB, xmmC);
- e->vpmaxud(xmmA, xmmB, anyptr_gpC);
- e->vpmaxuw(xmmA, xmmB, xmmC);
- e->vpmaxuw(xmmA, xmmB, anyptr_gpC);
- e->vpminsb(xmmA, xmmB, xmmC);
- e->vpminsb(xmmA, xmmB, anyptr_gpC);
- e->vpminsd(xmmA, xmmB, xmmC);
- e->vpminsd(xmmA, xmmB, anyptr_gpC);
- e->vpminsw(xmmA, xmmB, xmmC);
- e->vpminsw(xmmA, xmmB, anyptr_gpC);
- e->vpminub(xmmA, xmmB, xmmC);
- e->vpminub(xmmA, xmmB, anyptr_gpC);
- e->vpminud(xmmA, xmmB, xmmC);
- e->vpminud(xmmA, xmmB, anyptr_gpC);
- e->vpminuw(xmmA, xmmB, xmmC);
- e->vpminuw(xmmA, xmmB, anyptr_gpC);
- e->vpmovmskb(gzA, xmmB);
- e->vpmovsxbd(xmmA, xmmB);
- e->vpmovsxbd(xmmA, anyptr_gpB);
- e->vpmovsxbq(xmmA, xmmB);
- e->vpmovsxbq(xmmA, anyptr_gpB);
- e->vpmovsxbw(xmmA, xmmB);
- e->vpmovsxbw(xmmA, anyptr_gpB);
- e->vpmovsxdq(xmmA, xmmB);
- e->vpmovsxdq(xmmA, anyptr_gpB);
- e->vpmovsxwd(xmmA, xmmB);
- e->vpmovsxwd(xmmA, anyptr_gpB);
- e->vpmovsxwq(xmmA, xmmB);
- e->vpmovsxwq(xmmA, anyptr_gpB);
- e->vpmovzxbd(xmmA, xmmB);
- e->vpmovzxbd(xmmA, anyptr_gpB);
- e->vpmovzxbq(xmmA, xmmB);
- e->vpmovzxbq(xmmA, anyptr_gpB);
- e->vpmovzxbw(xmmA, xmmB);
- e->vpmovzxbw(xmmA, anyptr_gpB);
- e->vpmovzxdq(xmmA, xmmB);
- e->vpmovzxdq(xmmA, anyptr_gpB);
- e->vpmovzxwd(xmmA, xmmB);
- e->vpmovzxwd(xmmA, anyptr_gpB);
- e->vpmovzxwq(xmmA, xmmB);
- e->vpmovzxwq(xmmA, anyptr_gpB);
- e->vpmuldq(xmmA, xmmB, xmmC);
- e->vpmuldq(xmmA, xmmB, anyptr_gpC);
- e->vpmulhrsw(xmmA, xmmB, xmmC);
- e->vpmulhrsw(xmmA, xmmB, anyptr_gpC);
- e->vpmulhuw(xmmA, xmmB, xmmC);
- e->vpmulhuw(xmmA, xmmB, anyptr_gpC);
- e->vpmulhw(xmmA, xmmB, xmmC);
- e->vpmulhw(xmmA, xmmB, anyptr_gpC);
- e->vpmulld(xmmA, xmmB, xmmC);
- e->vpmulld(xmmA, xmmB, anyptr_gpC);
- e->vpmullw(xmmA, xmmB, xmmC);
- e->vpmullw(xmmA, xmmB, anyptr_gpC);
- e->vpmuludq(xmmA, xmmB, xmmC);
- e->vpmuludq(xmmA, xmmB, anyptr_gpC);
- e->vpor(xmmA, xmmB, xmmC);
- e->vpor(xmmA, xmmB, anyptr_gpC);
- e->vpsadbw(xmmA, xmmB, xmmC);
- e->vpsadbw(xmmA, xmmB, anyptr_gpC);
- e->vpshufb(xmmA, xmmB, xmmC);
- e->vpshufb(xmmA, xmmB, anyptr_gpC);
- e->vpshufd(xmmA, xmmB, 0);
- e->vpshufd(xmmA, anyptr_gpB, 0);
- e->vpshufhw(xmmA, xmmB, 0);
- e->vpshufhw(xmmA, anyptr_gpB, 0);
- e->vpshuflw(xmmA, xmmB, 0);
- e->vpshuflw(xmmA, anyptr_gpB, 0);
- e->vpsignb(xmmA, xmmB, xmmC);
- e->vpsignb(xmmA, xmmB, anyptr_gpC);
- e->vpsignd(xmmA, xmmB, xmmC);
- e->vpsignd(xmmA, xmmB, anyptr_gpC);
- e->vpsignw(xmmA, xmmB, xmmC);
- e->vpsignw(xmmA, xmmB, anyptr_gpC);
- e->vpslld(xmmA, xmmB, xmmC);
- e->vpslld(xmmA, xmmB, anyptr_gpC);
- e->vpslld(xmmA, xmmB, 0);
- e->vpslldq(xmmA, xmmB, 0);
- e->vpsllq(xmmA, xmmB, xmmC);
- e->vpsllq(xmmA, xmmB, anyptr_gpC);
- e->vpsllq(xmmA, xmmB, 0);
- e->vpsllw(xmmA, xmmB, xmmC);
- e->vpsllw(xmmA, xmmB, anyptr_gpC);
- e->vpsllw(xmmA, xmmB, 0);
- e->vpsrad(xmmA, xmmB, xmmC);
- e->vpsrad(xmmA, xmmB, anyptr_gpC);
- e->vpsrad(xmmA, xmmB, 0);
- e->vpsraw(xmmA, xmmB, xmmC);
- e->vpsraw(xmmA, xmmB, anyptr_gpC);
- e->vpsraw(xmmA, xmmB, 0);
- e->vpsrld(xmmA, xmmB, xmmC);
- e->vpsrld(xmmA, xmmB, anyptr_gpC);
- e->vpsrld(xmmA, xmmB, 0);
- e->vpsrldq(xmmA, xmmB, 0);
- e->vpsrlq(xmmA, xmmB, xmmC);
- e->vpsrlq(xmmA, xmmB, anyptr_gpC);
- e->vpsrlq(xmmA, xmmB, 0);
- e->vpsrlw(xmmA, xmmB, xmmC);
- e->vpsrlw(xmmA, xmmB, anyptr_gpC);
- e->vpsrlw(xmmA, xmmB, 0);
- e->vpsubb(xmmA, xmmB, xmmC);
- e->vpsubb(xmmA, xmmB, anyptr_gpC);
- e->vpsubd(xmmA, xmmB, xmmC);
- e->vpsubd(xmmA, xmmB, anyptr_gpC);
- e->vpsubq(xmmA, xmmB, xmmC);
- e->vpsubq(xmmA, xmmB, anyptr_gpC);
- e->vpsubw(xmmA, xmmB, xmmC);
- e->vpsubw(xmmA, xmmB, anyptr_gpC);
- e->vpsubsb(xmmA, xmmB, xmmC);
- e->vpsubsb(xmmA, xmmB, anyptr_gpC);
- e->vpsubsw(xmmA, xmmB, xmmC);
- e->vpsubsw(xmmA, xmmB, anyptr_gpC);
- e->vpsubusb(xmmA, xmmB, xmmC);
- e->vpsubusb(xmmA, xmmB, anyptr_gpC);
- e->vpsubusw(xmmA, xmmB, xmmC);
- e->vpsubusw(xmmA, xmmB, anyptr_gpC);
- e->vptest(xmmA, xmmB);
- e->vptest(xmmA, anyptr_gpB);
- e->vptest(ymmA, ymmB);
- e->vptest(ymmA, anyptr_gpB);
- e->vpunpckhbw(xmmA, xmmB, xmmC);
- e->vpunpckhbw(xmmA, xmmB, anyptr_gpC);
- e->vpunpckhdq(xmmA, xmmB, xmmC);
- e->vpunpckhdq(xmmA, xmmB, anyptr_gpC);
- e->vpunpckhqdq(xmmA, xmmB, xmmC);
- e->vpunpckhqdq(xmmA, xmmB, anyptr_gpC);
- e->vpunpckhwd(xmmA, xmmB, xmmC);
- e->vpunpckhwd(xmmA, xmmB, anyptr_gpC);
- e->vpunpcklbw(xmmA, xmmB, xmmC);
- e->vpunpcklbw(xmmA, xmmB, anyptr_gpC);
- e->vpunpckldq(xmmA, xmmB, xmmC);
- e->vpunpckldq(xmmA, xmmB, anyptr_gpC);
- e->vpunpcklqdq(xmmA, xmmB, xmmC);
- e->vpunpcklqdq(xmmA, xmmB, anyptr_gpC);
- e->vpunpcklwd(xmmA, xmmB, xmmC);
- e->vpunpcklwd(xmmA, xmmB, anyptr_gpC);
- e->vpxor(xmmA, xmmB, xmmC);
- e->vpxor(xmmA, xmmB, anyptr_gpC);
- e->vrcpps(xmmA, xmmB);
- e->vrcpps(xmmA, anyptr_gpB);
- e->vrcpps(ymmA, ymmB);
- e->vrcpps(ymmA, anyptr_gpB);
- e->vrcpss(xmmA, xmmB, xmmC);
- e->vrcpss(xmmA, xmmB, anyptr_gpC);
- e->vrsqrtps(xmmA, xmmB);
- e->vrsqrtps(xmmA, anyptr_gpB);
- e->vrsqrtps(ymmA, ymmB);
- e->vrsqrtps(ymmA, anyptr_gpB);
- e->vrsqrtss(xmmA, xmmB, xmmC);
- e->vrsqrtss(xmmA, xmmB, anyptr_gpC);
- e->vroundpd(xmmA, xmmB, 0);
- e->vroundpd(xmmA, anyptr_gpB, 0);
- e->vroundpd(ymmA, ymmB, 0);
- e->vroundpd(ymmA, anyptr_gpB, 0);
- e->vroundps(xmmA, xmmB, 0);
- e->vroundps(xmmA, anyptr_gpB, 0);
- e->vroundps(ymmA, ymmB, 0);
- e->vroundps(ymmA, anyptr_gpB, 0);
- e->vroundsd(xmmA, xmmB, xmmC, 0);
- e->vroundsd(xmmA, xmmB, anyptr_gpC, 0);
- e->vroundss(xmmA, xmmB, xmmC, 0);
- e->vroundss(xmmA, xmmB, anyptr_gpC, 0);
- e->vshufpd(xmmA, xmmB, xmmC, 0);
- e->vshufpd(xmmA, xmmB, anyptr_gpC, 0);
- e->vshufpd(ymmA, ymmB, ymmC, 0);
- e->vshufpd(ymmA, ymmB, anyptr_gpC, 0);
- e->vshufps(xmmA, xmmB, xmmC, 0);
- e->vshufps(xmmA, xmmB, anyptr_gpC, 0);
- e->vshufps(ymmA, ymmB, ymmC, 0);
- e->vshufps(ymmA, ymmB, anyptr_gpC, 0);
- e->vsqrtpd(xmmA, xmmB);
- e->vsqrtpd(xmmA, anyptr_gpB);
- e->vsqrtpd(ymmA, ymmB);
- e->vsqrtpd(ymmA, anyptr_gpB);
- e->vsqrtps(xmmA, xmmB);
- e->vsqrtps(xmmA, anyptr_gpB);
- e->vsqrtps(ymmA, ymmB);
- e->vsqrtps(ymmA, anyptr_gpB);
- e->vsqrtsd(xmmA, xmmB, xmmC);
- e->vsqrtsd(xmmA, xmmB, anyptr_gpC);
- e->vsqrtss(xmmA, xmmB, xmmC);
- e->vsqrtss(xmmA, xmmB, anyptr_gpC);
- e->vstmxcsr(anyptr_gpA);
- e->vsubpd(xmmA, xmmB, xmmC);
- e->vsubpd(xmmA, xmmB, anyptr_gpC);
- e->vsubpd(ymmA, ymmB, ymmC);
- e->vsubpd(ymmA, ymmB, anyptr_gpC);
- e->vsubps(xmmA, xmmB, xmmC);
- e->vsubps(xmmA, xmmB, anyptr_gpC);
- e->vsubps(ymmA, ymmB, ymmC);
- e->vsubps(ymmA, ymmB, anyptr_gpC);
- e->vsubsd(xmmA, xmmB, xmmC);
- e->vsubsd(xmmA, xmmB, anyptr_gpC);
- e->vsubss(xmmA, xmmB, xmmC);
- e->vsubss(xmmA, xmmB, anyptr_gpC);
- e->vtestps(xmmA, xmmB);
- e->vtestps(xmmA, anyptr_gpB);
- e->vtestps(ymmA, ymmB);
- e->vtestps(ymmA, anyptr_gpB);
- e->vtestpd(xmmA, xmmB);
- e->vtestpd(xmmA, anyptr_gpB);
- e->vtestpd(ymmA, ymmB);
- e->vtestpd(ymmA, anyptr_gpB);
- e->vucomisd(xmmA, xmmB);
- e->vucomisd(xmmA, anyptr_gpB);
- e->vucomiss(xmmA, xmmB);
- e->vucomiss(xmmA, anyptr_gpB);
- e->vunpckhpd(xmmA, xmmB, xmmC);
- e->vunpckhpd(xmmA, xmmB, anyptr_gpC);
- e->vunpckhpd(ymmA, ymmB, ymmC);
- e->vunpckhpd(ymmA, ymmB, anyptr_gpC);
- e->vunpckhps(xmmA, xmmB, xmmC);
- e->vunpckhps(xmmA, xmmB, anyptr_gpC);
- e->vunpckhps(ymmA, ymmB, ymmC);
- e->vunpckhps(ymmA, ymmB, anyptr_gpC);
- e->vunpcklpd(xmmA, xmmB, xmmC);
- e->vunpcklpd(xmmA, xmmB, anyptr_gpC);
- e->vunpcklpd(ymmA, ymmB, ymmC);
- e->vunpcklpd(ymmA, ymmB, anyptr_gpC);
- e->vunpcklps(xmmA, xmmB, xmmC);
- e->vunpcklps(xmmA, xmmB, anyptr_gpC);
- e->vunpcklps(ymmA, ymmB, ymmC);
- e->vunpcklps(ymmA, ymmB, anyptr_gpC);
- e->vxorpd(xmmA, xmmB, xmmC);
- e->vxorpd(xmmA, xmmB, anyptr_gpC);
- e->vxorpd(ymmA, ymmB, ymmC);
- e->vxorpd(ymmA, ymmB, anyptr_gpC);
- e->vxorps(xmmA, xmmB, xmmC);
- e->vxorps(xmmA, xmmB, anyptr_gpC);
- e->vxorps(ymmA, ymmB, ymmC);
- e->vxorps(ymmA, ymmB, anyptr_gpC);
- e->vzeroall();
- e->vex3().vzeroall();
- e->vzeroupper();
- e->vex3().vzeroupper();
-
- // AVX+AESNI.
- e->nop();
-
- e->vaesdec(xmmA, xmmB, xmmC);
- e->vaesdec(xmmA, xmmB, anyptr_gpC);
- e->vaesdeclast(xmmA, xmmB, xmmC);
- e->vaesdeclast(xmmA, xmmB, anyptr_gpC);
- e->vaesenc(xmmA, xmmB, xmmC);
- e->vaesenc(xmmA, xmmB, anyptr_gpC);
- e->vaesenclast(xmmA, xmmB, xmmC);
- e->vaesenclast(xmmA, xmmB, anyptr_gpC);
- e->vaesimc(xmmA, xmmB);
- e->vaesimc(xmmA, anyptr_gpB);
- e->vaeskeygenassist(xmmA, xmmB, 0);
- e->vaeskeygenassist(xmmA, anyptr_gpB, 0);
-
- // AVX+PCLMULQDQ.
- e->nop();
-
- e->vpclmulqdq(xmmA, xmmB, xmmC, 0);
- e->vpclmulqdq(xmmA, xmmB, anyptr_gpC, 0);
-
- // AVX2.
- e->nop();
-
- e->vbroadcasti128(ymmA, anyptr_gpB);
- e->vbroadcastsd(ymmA, xmmB);
- e->vbroadcastss(xmmA, xmmB);
- e->vbroadcastss(ymmA, xmmB);
- e->vextracti128(xmmA, ymmB, 0);
- e->vextracti128(anyptr_gpA, ymmB, 0);
- e->vgatherdpd(xmmA, vx_ptr, xmmC);
- e->vgatherdpd(ymmA, vx_ptr, ymmC);
- e->vgatherdps(xmmA, vx_ptr, xmmC);
- e->vgatherdps(ymmA, vy_ptr, ymmC);
- e->vgatherqpd(xmmA, vx_ptr, xmmC);
- e->vgatherqpd(ymmA, vy_ptr, ymmC);
- e->vgatherqps(xmmA, vx_ptr, xmmC);
- e->vgatherqps(xmmA, vy_ptr, xmmC);
- e->vinserti128(ymmA, ymmB, xmmC, 0);
- e->vinserti128(ymmA, ymmB, anyptr_gpC, 0);
- e->vmovntdqa(ymmA, anyptr_gpB);
- e->vmpsadbw(ymmA, ymmB, ymmC, 0);
- e->vmpsadbw(ymmA, ymmB, anyptr_gpC, 0);
- e->vpabsb(ymmA, ymmB);
- e->vpabsb(ymmA, anyptr_gpB);
- e->vpabsd(ymmA, ymmB);
- e->vpabsd(ymmA, anyptr_gpB);
- e->vpabsw(ymmA, ymmB);
- e->vpabsw(ymmA, anyptr_gpB);
- e->vpackssdw(ymmA, ymmB, ymmC);
- e->vpackssdw(ymmA, ymmB, anyptr_gpC);
- e->vpacksswb(ymmA, ymmB, ymmC);
- e->vpacksswb(ymmA, ymmB, anyptr_gpC);
- e->vpackusdw(ymmA, ymmB, ymmC);
- e->vpackusdw(ymmA, ymmB, anyptr_gpC);
- e->vpackuswb(ymmA, ymmB, ymmC);
- e->vpackuswb(ymmA, ymmB, anyptr_gpC);
- e->vpaddb(ymmA, ymmB, ymmC);
- e->vpaddb(ymmA, ymmB, anyptr_gpC);
- e->vpaddd(ymmA, ymmB, ymmC);
- e->vpaddd(ymmA, ymmB, anyptr_gpC);
- e->vpaddq(ymmA, ymmB, ymmC);
- e->vpaddq(ymmA, ymmB, anyptr_gpC);
- e->vpaddw(ymmA, ymmB, ymmC);
- e->vpaddw(ymmA, ymmB, anyptr_gpC);
- e->vpaddsb(ymmA, ymmB, ymmC);
- e->vpaddsb(ymmA, ymmB, anyptr_gpC);
- e->vpaddsw(ymmA, ymmB, ymmC);
- e->vpaddsw(ymmA, ymmB, anyptr_gpC);
- e->vpaddusb(ymmA, ymmB, ymmC);
- e->vpaddusb(ymmA, ymmB, anyptr_gpC);
- e->vpaddusw(ymmA, ymmB, ymmC);
- e->vpaddusw(ymmA, ymmB, anyptr_gpC);
- e->vpalignr(ymmA, ymmB, ymmC, 0);
- e->vpalignr(ymmA, ymmB, anyptr_gpC, 0);
- e->vpand(ymmA, ymmB, ymmC);
- e->vpand(ymmA, ymmB, anyptr_gpC);
- e->vpandn(ymmA, ymmB, ymmC);
- e->vpandn(ymmA, ymmB, anyptr_gpC);
- e->vpavgb(ymmA, ymmB, ymmC);
- e->vpavgb(ymmA, ymmB, anyptr_gpC);
- e->vpavgw(ymmA, ymmB, ymmC);
- e->vpavgw(ymmA, ymmB, anyptr_gpC);
- e->vpblendd(xmmA, xmmB, xmmC, 0);
- e->vpblendd(xmmA, xmmB, anyptr_gpC, 0);
- e->vpblendd(ymmA, ymmB, ymmC, 0);
- e->vpblendd(ymmA, ymmB, anyptr_gpC, 0);
- e->vpblendvb(ymmA, ymmB, ymmC, ymmD);
- e->vpblendvb(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vpblendw(ymmA, ymmB, ymmC, 0);
- e->vpblendw(ymmA, ymmB, anyptr_gpC, 0);
- e->vpbroadcastb(xmmA, xmmB);
- e->vpbroadcastb(xmmA, anyptr_gpB);
- e->vpbroadcastb(ymmA, xmmB);
- e->vpbroadcastb(ymmA, anyptr_gpB);
- e->vpbroadcastd(xmmA, xmmB);
- e->vpbroadcastd(xmmA, anyptr_gpB);
- e->vpbroadcastd(ymmA, xmmB);
- e->vpbroadcastd(ymmA, anyptr_gpB);
- e->vpbroadcastq(xmmA, xmmB);
- e->vpbroadcastq(xmmA, anyptr_gpB);
- e->vpbroadcastq(ymmA, xmmB);
- e->vpbroadcastq(ymmA, anyptr_gpB);
- e->vpbroadcastw(xmmA, xmmB);
- e->vpbroadcastw(xmmA, anyptr_gpB);
- e->vpbroadcastw(ymmA, xmmB);
- e->vpbroadcastw(ymmA, anyptr_gpB);
- e->vpcmpeqb(ymmA, ymmB, ymmC);
- e->vpcmpeqb(ymmA, ymmB, anyptr_gpC);
- e->vpcmpeqd(ymmA, ymmB, ymmC);
- e->vpcmpeqd(ymmA, ymmB, anyptr_gpC);
- e->vpcmpeqq(ymmA, ymmB, ymmC);
- e->vpcmpeqq(ymmA, ymmB, anyptr_gpC);
- e->vpcmpeqw(ymmA, ymmB, ymmC);
- e->vpcmpeqw(ymmA, ymmB, anyptr_gpC);
- e->vpcmpgtb(ymmA, ymmB, ymmC);
- e->vpcmpgtb(ymmA, ymmB, anyptr_gpC);
- e->vpcmpgtd(ymmA, ymmB, ymmC);
- e->vpcmpgtd(ymmA, ymmB, anyptr_gpC);
- e->vpcmpgtq(ymmA, ymmB, ymmC);
- e->vpcmpgtq(ymmA, ymmB, anyptr_gpC);
- e->vpcmpgtw(ymmA, ymmB, ymmC);
- e->vpcmpgtw(ymmA, ymmB, anyptr_gpC);
- e->vperm2i128(ymmA, ymmB, ymmC, 0);
- e->vperm2i128(ymmA, ymmB, anyptr_gpC, 0);
- e->vpermd(ymmA, ymmB, ymmC);
- e->vpermd(ymmA, ymmB, anyptr_gpC);
- e->vpermps(ymmA, ymmB, ymmC);
- e->vpermps(ymmA, ymmB, anyptr_gpC);
- e->vpermpd(ymmA, ymmB, 0);
- e->vpermpd(ymmA, anyptr_gpB, 0);
- e->vpermq(ymmA, ymmB, 0);
- e->vpermq(ymmA, anyptr_gpB, 0);
- e->vpgatherdd(xmmA, vx_ptr, xmmC);
- e->vpgatherdd(ymmA, vy_ptr, ymmC);
- e->vpgatherdq(xmmA, vx_ptr, xmmC);
- e->vpgatherdq(ymmA, vx_ptr, ymmC);
- e->vpgatherqd(xmmA, vx_ptr, xmmC);
- e->vpgatherqd(xmmA, vy_ptr, xmmC);
- e->vpgatherqq(xmmA, vx_ptr, xmmC);
- e->vpgatherqq(ymmA, vy_ptr, ymmC);
- e->vpmovmskb(gzA, ymmB);
- e->vpmovsxbd(ymmA, anyptr_gpB);
- e->vpmovsxbd(ymmA, xmmB);
- e->vpmovsxbq(ymmA, anyptr_gpB);
- e->vpmovsxbq(ymmA, xmmB);
- e->vpmovsxbw(ymmA, anyptr_gpB);
- e->vpmovsxbw(ymmA, xmmB);
- e->vpmovsxdq(ymmA, anyptr_gpB);
- e->vpmovsxdq(ymmA, xmmB);
- e->vpmovsxwd(ymmA, anyptr_gpB);
- e->vpmovsxwd(ymmA, xmmB);
- e->vpmovsxwq(ymmA, anyptr_gpB);
- e->vpmovsxwq(ymmA, xmmB);
- e->vpmovzxbd(ymmA, anyptr_gpB);
- e->vpmovzxbd(ymmA, xmmB);
- e->vpmovzxbq(ymmA, anyptr_gpB);
- e->vpmovzxbq(ymmA, xmmB);
- e->vpmovzxbw(ymmA, anyptr_gpB);
- e->vpmovzxbw(ymmA, xmmB);
- e->vpmovzxdq(ymmA, anyptr_gpB);
- e->vpmovzxdq(ymmA, xmmB);
- e->vpmovzxwd(ymmA, anyptr_gpB);
- e->vpmovzxwd(ymmA, xmmB);
- e->vpmovzxwq(ymmA, anyptr_gpB);
- e->vpmovzxwq(ymmA, xmmB);
- e->vpshufd(ymmA, anyptr_gpB, 0);
- e->vpshufd(ymmA, ymmB, 0);
- e->vpshufhw(ymmA, anyptr_gpB, 0);
- e->vpshufhw(ymmA, ymmB, 0);
- e->vpshuflw(ymmA, anyptr_gpB, 0);
- e->vpshuflw(ymmA, ymmB, 0);
- e->vpslld(ymmA, ymmB, 0);
- e->vpslldq(ymmA, ymmB, 0);
- e->vpsllq(ymmA, ymmB, 0);
- e->vpsllw(ymmA, ymmB, 0);
- e->vpsrad(ymmA, ymmB, 0);
- e->vpsraw(ymmA, ymmB, 0);
- e->vpsrld(ymmA, ymmB, 0);
- e->vpsrldq(ymmA, ymmB, 0);
- e->vpsrlq(ymmA, ymmB, 0);
- e->vpsrlw(ymmA, ymmB, 0);
- e->vphaddd(ymmA, ymmB, anyptr_gpC);
- e->vphaddd(ymmA, ymmB, ymmC);
- e->vphaddsw(ymmA, ymmB, anyptr_gpC);
- e->vphaddsw(ymmA, ymmB, ymmC);
- e->vphaddw(ymmA, ymmB, anyptr_gpC);
- e->vphaddw(ymmA, ymmB, ymmC);
- e->vphsubd(ymmA, ymmB, anyptr_gpC);
- e->vphsubd(ymmA, ymmB, ymmC);
- e->vphsubsw(ymmA, ymmB, anyptr_gpC);
- e->vphsubsw(ymmA, ymmB, ymmC);
- e->vphsubw(ymmA, ymmB, anyptr_gpC);
- e->vphsubw(ymmA, ymmB, ymmC);
- e->vpmaddubsw(ymmA, ymmB, anyptr_gpC);
- e->vpmaddubsw(ymmA, ymmB, ymmC);
- e->vpmaddwd(ymmA, ymmB, anyptr_gpC);
- e->vpmaddwd(ymmA, ymmB, ymmC);
- e->vpmaskmovd(anyptr_gpA, xmmB, xmmC);
- e->vpmaskmovd(anyptr_gpA, ymmB, ymmC);
- e->vpmaskmovd(xmmA, xmmB, anyptr_gpC);
- e->vpmaskmovd(ymmA, ymmB, anyptr_gpC);
- e->vpmaskmovq(anyptr_gpA, xmmB, xmmC);
- e->vpmaskmovq(anyptr_gpA, ymmB, ymmC);
- e->vpmaskmovq(xmmA, xmmB, anyptr_gpC);
- e->vpmaskmovq(ymmA, ymmB, anyptr_gpC);
- e->vpmaxsb(ymmA, ymmB, anyptr_gpC);
- e->vpmaxsb(ymmA, ymmB, ymmC);
- e->vpmaxsd(ymmA, ymmB, anyptr_gpC);
- e->vpmaxsd(ymmA, ymmB, ymmC);
- e->vpmaxsw(ymmA, ymmB, anyptr_gpC);
- e->vpmaxsw(ymmA, ymmB, ymmC);
- e->vpmaxub(ymmA, ymmB, anyptr_gpC);
- e->vpmaxub(ymmA, ymmB, ymmC);
- e->vpmaxud(ymmA, ymmB, anyptr_gpC);
- e->vpmaxud(ymmA, ymmB, ymmC);
- e->vpmaxuw(ymmA, ymmB, anyptr_gpC);
- e->vpmaxuw(ymmA, ymmB, ymmC);
- e->vpminsb(ymmA, ymmB, anyptr_gpC);
- e->vpminsb(ymmA, ymmB, ymmC);
- e->vpminsd(ymmA, ymmB, anyptr_gpC);
- e->vpminsd(ymmA, ymmB, ymmC);
- e->vpminsw(ymmA, ymmB, anyptr_gpC);
- e->vpminsw(ymmA, ymmB, ymmC);
- e->vpminub(ymmA, ymmB, anyptr_gpC);
- e->vpminub(ymmA, ymmB, ymmC);
- e->vpminud(ymmA, ymmB, anyptr_gpC);
- e->vpminud(ymmA, ymmB, ymmC);
- e->vpminuw(ymmA, ymmB, anyptr_gpC);
- e->vpminuw(ymmA, ymmB, ymmC);
- e->vpmuldq(ymmA, ymmB, anyptr_gpC);
- e->vpmuldq(ymmA, ymmB, ymmC);
- e->vpmulhrsw(ymmA, ymmB, anyptr_gpC);
- e->vpmulhrsw(ymmA, ymmB, ymmC);
- e->vpmulhuw(ymmA, ymmB, anyptr_gpC);
- e->vpmulhuw(ymmA, ymmB, ymmC);
- e->vpmulhw(ymmA, ymmB, anyptr_gpC);
- e->vpmulhw(ymmA, ymmB, ymmC);
- e->vpmulld(ymmA, ymmB, anyptr_gpC);
- e->vpmulld(ymmA, ymmB, ymmC);
- e->vpmullw(ymmA, ymmB, anyptr_gpC);
- e->vpmullw(ymmA, ymmB, ymmC);
- e->vpmuludq(ymmA, ymmB, anyptr_gpC);
- e->vpmuludq(ymmA, ymmB, ymmC);
- e->vpor(ymmA, ymmB, anyptr_gpC);
- e->vpor(ymmA, ymmB, ymmC);
- e->vpsadbw(ymmA, ymmB, anyptr_gpC);
- e->vpsadbw(ymmA, ymmB, ymmC);
- e->vpshufb(ymmA, ymmB, anyptr_gpC);
- e->vpshufb(ymmA, ymmB, ymmC);
- e->vpsignb(ymmA, ymmB, anyptr_gpC);
- e->vpsignb(ymmA, ymmB, ymmC);
- e->vpsignd(ymmA, ymmB, anyptr_gpC);
- e->vpsignd(ymmA, ymmB, ymmC);
- e->vpsignw(ymmA, ymmB, anyptr_gpC);
- e->vpsignw(ymmA, ymmB, ymmC);
- e->vpslld(ymmA, ymmB, anyptr_gpC);
- e->vpslld(ymmA, ymmB, xmmC);
- e->vpsllq(ymmA, ymmB, anyptr_gpC);
- e->vpsllq(ymmA, ymmB, xmmC);
- e->vpsllvd(xmmA, xmmB, anyptr_gpC);
- e->vpsllvd(xmmA, xmmB, xmmC);
- e->vpsllvd(ymmA, ymmB, anyptr_gpC);
- e->vpsllvd(ymmA, ymmB, ymmC);
- e->vpsllvq(xmmA, xmmB, anyptr_gpC);
- e->vpsllvq(xmmA, xmmB, xmmC);
- e->vpsllvq(ymmA, ymmB, anyptr_gpC);
- e->vpsllvq(ymmA, ymmB, ymmC);
- e->vpsllw(ymmA, ymmB, anyptr_gpC);
- e->vpsllw(ymmA, ymmB, xmmC);
- e->vpsrad(ymmA, ymmB, anyptr_gpC);
- e->vpsrad(ymmA, ymmB, xmmC);
- e->vpsravd(xmmA, xmmB, anyptr_gpC);
- e->vpsravd(xmmA, xmmB, xmmC);
- e->vpsravd(ymmA, ymmB, anyptr_gpC);
- e->vpsravd(ymmA, ymmB, ymmC);
- e->vpsraw(ymmA, ymmB, anyptr_gpC);
- e->vpsraw(ymmA, ymmB, xmmC);
- e->vpsrld(ymmA, ymmB, anyptr_gpC);
- e->vpsrld(ymmA, ymmB, xmmC);
- e->vpsrlq(ymmA, ymmB, anyptr_gpC);
- e->vpsrlq(ymmA, ymmB, xmmC);
- e->vpsrlvd(xmmA, xmmB, anyptr_gpC);
- e->vpsrlvd(xmmA, xmmB, xmmC);
- e->vpsrlvd(ymmA, ymmB, anyptr_gpC);
- e->vpsrlvd(ymmA, ymmB, ymmC);
- e->vpsrlvq(xmmA, xmmB, anyptr_gpC);
- e->vpsrlvq(xmmA, xmmB, xmmC);
- e->vpsrlvq(ymmA, ymmB, anyptr_gpC);
- e->vpsrlvq(ymmA, ymmB, ymmC);
- e->vpsrlw(ymmA, ymmB, anyptr_gpC);
- e->vpsrlw(ymmA, ymmB, xmmC);
- e->vpsubb(ymmA, ymmB, anyptr_gpC);
- e->vpsubb(ymmA, ymmB, ymmC);
- e->vpsubd(ymmA, ymmB, anyptr_gpC);
- e->vpsubd(ymmA, ymmB, ymmC);
- e->vpsubq(ymmA, ymmB, anyptr_gpC);
- e->vpsubq(ymmA, ymmB, ymmC);
- e->vpsubsb(ymmA, ymmB, anyptr_gpC);
- e->vpsubsb(ymmA, ymmB, ymmC);
- e->vpsubsw(ymmA, ymmB, anyptr_gpC);
- e->vpsubsw(ymmA, ymmB, ymmC);
- e->vpsubusb(ymmA, ymmB, anyptr_gpC);
- e->vpsubusb(ymmA, ymmB, ymmC);
- e->vpsubusw(ymmA, ymmB, anyptr_gpC);
- e->vpsubusw(ymmA, ymmB, ymmC);
- e->vpsubw(ymmA, ymmB, anyptr_gpC);
- e->vpsubw(ymmA, ymmB, ymmC);
- e->vpunpckhbw(ymmA, ymmB, anyptr_gpC);
- e->vpunpckhbw(ymmA, ymmB, ymmC);
- e->vpunpckhdq(ymmA, ymmB, anyptr_gpC);
- e->vpunpckhdq(ymmA, ymmB, ymmC);
- e->vpunpckhqdq(ymmA, ymmB, anyptr_gpC);
- e->vpunpckhqdq(ymmA, ymmB, ymmC);
- e->vpunpckhwd(ymmA, ymmB, anyptr_gpC);
- e->vpunpckhwd(ymmA, ymmB, ymmC);
- e->vpunpcklbw(ymmA, ymmB, anyptr_gpC);
- e->vpunpcklbw(ymmA, ymmB, ymmC);
- e->vpunpckldq(ymmA, ymmB, anyptr_gpC);
- e->vpunpckldq(ymmA, ymmB, ymmC);
- e->vpunpcklqdq(ymmA, ymmB, anyptr_gpC);
- e->vpunpcklqdq(ymmA, ymmB, ymmC);
- e->vpunpcklwd(ymmA, ymmB, anyptr_gpC);
- e->vpunpcklwd(ymmA, ymmB, ymmC);
- e->vpxor(ymmA, ymmB, anyptr_gpC);
- e->vpxor(ymmA, ymmB, ymmC);
-
- // FMA.
- e->nop();
-
- e->vfmadd132pd(xmmA, xmmB, anyptr_gpC);
- e->vfmadd132pd(xmmA, xmmB, xmmC);
- e->vfmadd132pd(ymmA, ymmB, anyptr_gpC);
- e->vfmadd132pd(ymmA, ymmB, ymmC);
- e->vfmadd132ps(xmmA, xmmB, anyptr_gpC);
- e->vfmadd132ps(xmmA, xmmB, xmmC);
- e->vfmadd132ps(ymmA, ymmB, anyptr_gpC);
- e->vfmadd132ps(ymmA, ymmB, ymmC);
- e->vfmadd132sd(xmmA, xmmB, anyptr_gpC);
- e->vfmadd132sd(xmmA, xmmB, xmmC);
- e->vfmadd132ss(xmmA, xmmB, anyptr_gpC);
- e->vfmadd132ss(xmmA, xmmB, xmmC);
- e->vfmadd213pd(xmmA, xmmB, anyptr_gpC);
- e->vfmadd213pd(xmmA, xmmB, xmmC);
- e->vfmadd213pd(ymmA, ymmB, anyptr_gpC);
- e->vfmadd213pd(ymmA, ymmB, ymmC);
- e->vfmadd213ps(xmmA, xmmB, anyptr_gpC);
- e->vfmadd213ps(xmmA, xmmB, xmmC);
- e->vfmadd213ps(ymmA, ymmB, anyptr_gpC);
- e->vfmadd213ps(ymmA, ymmB, ymmC);
- e->vfmadd213sd(xmmA, xmmB, anyptr_gpC);
- e->vfmadd213sd(xmmA, xmmB, xmmC);
- e->vfmadd213ss(xmmA, xmmB, anyptr_gpC);
- e->vfmadd213ss(xmmA, xmmB, xmmC);
- e->vfmadd231pd(xmmA, xmmB, anyptr_gpC);
- e->vfmadd231pd(xmmA, xmmB, xmmC);
- e->vfmadd231pd(ymmA, ymmB, anyptr_gpC);
- e->vfmadd231pd(ymmA, ymmB, ymmC);
- e->vfmadd231ps(xmmA, xmmB, anyptr_gpC);
- e->vfmadd231ps(xmmA, xmmB, xmmC);
- e->vfmadd231ps(ymmA, ymmB, anyptr_gpC);
- e->vfmadd231ps(ymmA, ymmB, ymmC);
- e->vfmadd231sd(xmmA, xmmB, anyptr_gpC);
- e->vfmadd231sd(xmmA, xmmB, xmmC);
- e->vfmadd231ss(xmmA, xmmB, anyptr_gpC);
- e->vfmadd231ss(xmmA, xmmB, xmmC);
- e->vfmaddsub132pd(xmmA, xmmB, anyptr_gpC);
- e->vfmaddsub132pd(xmmA, xmmB, xmmC);
- e->vfmaddsub132pd(ymmA, ymmB, anyptr_gpC);
- e->vfmaddsub132pd(ymmA, ymmB, ymmC);
- e->vfmaddsub132ps(xmmA, xmmB, anyptr_gpC);
- e->vfmaddsub132ps(xmmA, xmmB, xmmC);
- e->vfmaddsub132ps(ymmA, ymmB, anyptr_gpC);
- e->vfmaddsub132ps(ymmA, ymmB, ymmC);
- e->vfmaddsub213pd(xmmA, xmmB, anyptr_gpC);
- e->vfmaddsub213pd(xmmA, xmmB, xmmC);
- e->vfmaddsub213pd(ymmA, ymmB, anyptr_gpC);
- e->vfmaddsub213pd(ymmA, ymmB, ymmC);
- e->vfmaddsub213ps(xmmA, xmmB, anyptr_gpC);
- e->vfmaddsub213ps(xmmA, xmmB, xmmC);
- e->vfmaddsub213ps(ymmA, ymmB, anyptr_gpC);
- e->vfmaddsub213ps(ymmA, ymmB, ymmC);
- e->vfmaddsub231pd(xmmA, xmmB, anyptr_gpC);
- e->vfmaddsub231pd(xmmA, xmmB, xmmC);
- e->vfmaddsub231pd(ymmA, ymmB, anyptr_gpC);
- e->vfmaddsub231pd(ymmA, ymmB, ymmC);
- e->vfmaddsub231ps(xmmA, xmmB, anyptr_gpC);
- e->vfmaddsub231ps(xmmA, xmmB, xmmC);
- e->vfmaddsub231ps(ymmA, ymmB, anyptr_gpC);
- e->vfmaddsub231ps(ymmA, ymmB, ymmC);
- e->vfmsub132pd(xmmA, xmmB, anyptr_gpC);
- e->vfmsub132pd(xmmA, xmmB, xmmC);
- e->vfmsub132pd(ymmA, ymmB, anyptr_gpC);
- e->vfmsub132pd(ymmA, ymmB, ymmC);
- e->vfmsub132ps(xmmA, xmmB, anyptr_gpC);
- e->vfmsub132ps(xmmA, xmmB, xmmC);
- e->vfmsub132ps(ymmA, ymmB, anyptr_gpC);
- e->vfmsub132ps(ymmA, ymmB, ymmC);
- e->vfmsub132sd(xmmA, xmmB, anyptr_gpC);
- e->vfmsub132sd(xmmA, xmmB, xmmC);
- e->vfmsub132ss(xmmA, xmmB, anyptr_gpC);
- e->vfmsub132ss(xmmA, xmmB, xmmC);
- e->vfmsub213pd(xmmA, xmmB, anyptr_gpC);
- e->vfmsub213pd(xmmA, xmmB, xmmC);
- e->vfmsub213pd(ymmA, ymmB, anyptr_gpC);
- e->vfmsub213pd(ymmA, ymmB, ymmC);
- e->vfmsub213ps(xmmA, xmmB, anyptr_gpC);
- e->vfmsub213ps(xmmA, xmmB, xmmC);
- e->vfmsub213ps(ymmA, ymmB, anyptr_gpC);
- e->vfmsub213ps(ymmA, ymmB, ymmC);
- e->vfmsub213sd(xmmA, xmmB, anyptr_gpC);
- e->vfmsub213sd(xmmA, xmmB, xmmC);
- e->vfmsub213ss(xmmA, xmmB, anyptr_gpC);
- e->vfmsub213ss(xmmA, xmmB, xmmC);
- e->vfmsub231pd(xmmA, xmmB, anyptr_gpC);
- e->vfmsub231pd(xmmA, xmmB, xmmC);
- e->vfmsub231pd(ymmA, ymmB, anyptr_gpC);
- e->vfmsub231pd(ymmA, ymmB, ymmC);
- e->vfmsub231ps(xmmA, xmmB, anyptr_gpC);
- e->vfmsub231ps(xmmA, xmmB, xmmC);
- e->vfmsub231ps(ymmA, ymmB, anyptr_gpC);
- e->vfmsub231ps(ymmA, ymmB, ymmC);
- e->vfmsub231sd(xmmA, xmmB, anyptr_gpC);
- e->vfmsub231sd(xmmA, xmmB, xmmC);
- e->vfmsub231ss(xmmA, xmmB, anyptr_gpC);
- e->vfmsub231ss(xmmA, xmmB, xmmC);
- e->vfmsubadd132pd(xmmA, xmmB, anyptr_gpC);
- e->vfmsubadd132pd(xmmA, xmmB, xmmC);
- e->vfmsubadd132pd(ymmA, ymmB, anyptr_gpC);
- e->vfmsubadd132pd(ymmA, ymmB, ymmC);
- e->vfmsubadd132ps(xmmA, xmmB, anyptr_gpC);
- e->vfmsubadd132ps(xmmA, xmmB, xmmC);
- e->vfmsubadd132ps(ymmA, ymmB, anyptr_gpC);
- e->vfmsubadd132ps(ymmA, ymmB, ymmC);
- e->vfmsubadd213pd(xmmA, xmmB, anyptr_gpC);
- e->vfmsubadd213pd(xmmA, xmmB, xmmC);
- e->vfmsubadd213pd(ymmA, ymmB, anyptr_gpC);
- e->vfmsubadd213pd(ymmA, ymmB, ymmC);
- e->vfmsubadd213ps(xmmA, xmmB, anyptr_gpC);
- e->vfmsubadd213ps(xmmA, xmmB, xmmC);
- e->vfmsubadd213ps(ymmA, ymmB, anyptr_gpC);
- e->vfmsubadd213ps(ymmA, ymmB, ymmC);
- e->vfmsubadd231pd(xmmA, xmmB, anyptr_gpC);
- e->vfmsubadd231pd(xmmA, xmmB, xmmC);
- e->vfmsubadd231pd(ymmA, ymmB, anyptr_gpC);
- e->vfmsubadd231pd(ymmA, ymmB, ymmC);
- e->vfmsubadd231ps(xmmA, xmmB, anyptr_gpC);
- e->vfmsubadd231ps(xmmA, xmmB, xmmC);
- e->vfmsubadd231ps(ymmA, ymmB, anyptr_gpC);
- e->vfmsubadd231ps(ymmA, ymmB, ymmC);
- e->vfnmadd132pd(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd132pd(xmmA, xmmB, xmmC);
- e->vfnmadd132pd(ymmA, ymmB, anyptr_gpC);
- e->vfnmadd132pd(ymmA, ymmB, ymmC);
- e->vfnmadd132ps(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd132ps(xmmA, xmmB, xmmC);
- e->vfnmadd132ps(ymmA, ymmB, anyptr_gpC);
- e->vfnmadd132ps(ymmA, ymmB, ymmC);
- e->vfnmadd132sd(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd132sd(xmmA, xmmB, xmmC);
- e->vfnmadd132ss(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd132ss(xmmA, xmmB, xmmC);
- e->vfnmadd213pd(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd213pd(xmmA, xmmB, xmmC);
- e->vfnmadd213pd(ymmA, ymmB, anyptr_gpC);
- e->vfnmadd213pd(ymmA, ymmB, ymmC);
- e->vfnmadd213ps(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd213ps(xmmA, xmmB, xmmC);
- e->vfnmadd213ps(ymmA, ymmB, anyptr_gpC);
- e->vfnmadd213ps(ymmA, ymmB, ymmC);
- e->vfnmadd213sd(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd213sd(xmmA, xmmB, xmmC);
- e->vfnmadd213ss(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd213ss(xmmA, xmmB, xmmC);
- e->vfnmadd231pd(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd231pd(xmmA, xmmB, xmmC);
- e->vfnmadd231pd(ymmA, ymmB, anyptr_gpC);
- e->vfnmadd231pd(ymmA, ymmB, ymmC);
- e->vfnmadd231ps(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd231ps(xmmA, xmmB, xmmC);
- e->vfnmadd231ps(ymmA, ymmB, anyptr_gpC);
- e->vfnmadd231ps(ymmA, ymmB, ymmC);
- e->vfnmadd231sd(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd231sd(xmmA, xmmB, xmmC);
- e->vfnmadd231ss(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd231ss(xmmA, xmmB, xmmC);
- e->vfnmsub132pd(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub132pd(xmmA, xmmB, xmmC);
- e->vfnmsub132pd(ymmA, ymmB, anyptr_gpC);
- e->vfnmsub132pd(ymmA, ymmB, ymmC);
- e->vfnmsub132ps(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub132ps(xmmA, xmmB, xmmC);
- e->vfnmsub132ps(ymmA, ymmB, anyptr_gpC);
- e->vfnmsub132ps(ymmA, ymmB, ymmC);
- e->vfnmsub132sd(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub132sd(xmmA, xmmB, xmmC);
- e->vfnmsub132ss(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub132ss(xmmA, xmmB, xmmC);
- e->vfnmsub213pd(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub213pd(xmmA, xmmB, xmmC);
- e->vfnmsub213pd(ymmA, ymmB, anyptr_gpC);
- e->vfnmsub213pd(ymmA, ymmB, ymmC);
- e->vfnmsub213ps(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub213ps(xmmA, xmmB, xmmC);
- e->vfnmsub213ps(ymmA, ymmB, anyptr_gpC);
- e->vfnmsub213ps(ymmA, ymmB, ymmC);
- e->vfnmsub213sd(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub213sd(xmmA, xmmB, xmmC);
- e->vfnmsub213ss(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub213ss(xmmA, xmmB, xmmC);
- e->vfnmsub231pd(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub231pd(xmmA, xmmB, xmmC);
- e->vfnmsub231pd(ymmA, ymmB, anyptr_gpC);
- e->vfnmsub231pd(ymmA, ymmB, ymmC);
- e->vfnmsub231ps(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub231ps(xmmA, xmmB, xmmC);
- e->vfnmsub231ps(ymmA, ymmB, anyptr_gpC);
- e->vfnmsub231ps(ymmA, ymmB, ymmC);
- e->vfnmsub231sd(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub231sd(xmmA, xmmB, xmmC);
- e->vfnmsub231ss(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub231ss(xmmA, xmmB, xmmC);
-
- // FMA4.
- e->nop();
-
- e->vfmaddpd(xmmA, xmmB, xmmC, xmmD);
- e->vfmaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfmaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfmaddpd(ymmA, ymmB, ymmC, ymmD);
- e->vfmaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vfmaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
- e->vfmaddps(xmmA, xmmB, xmmC, xmmD);
- e->vfmaddps(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfmaddps(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfmaddps(ymmA, ymmB, ymmC, ymmD);
- e->vfmaddps(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vfmaddps(ymmA, ymmB, ymmC, anyptr_gpD);
- e->vfmaddsd(xmmA, xmmB, xmmC, xmmD);
- e->vfmaddsd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfmaddsd(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfmaddss(xmmA, xmmB, xmmC, xmmD);
- e->vfmaddss(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfmaddss(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfmaddsubpd(xmmA, xmmB, xmmC, xmmD);
- e->vfmaddsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfmaddsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfmaddsubpd(ymmA, ymmB, ymmC, ymmD);
- e->vfmaddsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vfmaddsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
- e->vfmaddsubps(xmmA, xmmB, xmmC, xmmD);
- e->vfmaddsubps(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfmaddsubps(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfmaddsubps(ymmA, ymmB, ymmC, ymmD);
- e->vfmaddsubps(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vfmaddsubps(ymmA, ymmB, ymmC, anyptr_gpD);
- e->vfmsubaddpd(xmmA, xmmB, xmmC, xmmD);
- e->vfmsubaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfmsubaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfmsubaddpd(ymmA, ymmB, ymmC, ymmD);
- e->vfmsubaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vfmsubaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
- e->vfmsubaddps(xmmA, xmmB, xmmC, xmmD);
- e->vfmsubaddps(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfmsubaddps(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfmsubaddps(ymmA, ymmB, ymmC, ymmD);
- e->vfmsubaddps(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vfmsubaddps(ymmA, ymmB, ymmC, anyptr_gpD);
- e->vfmsubpd(xmmA, xmmB, xmmC, xmmD);
- e->vfmsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfmsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfmsubpd(ymmA, ymmB, ymmC, ymmD);
- e->vfmsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vfmsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
- e->vfmsubps(xmmA, xmmB, xmmC, xmmD);
- e->vfmsubps(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfmsubps(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfmsubps(ymmA, ymmB, ymmC, ymmD);
- e->vfmsubps(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vfmsubps(ymmA, ymmB, ymmC, anyptr_gpD);
- e->vfmsubsd(xmmA, xmmB, xmmC, xmmD);
- e->vfmsubsd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfmsubsd(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfmsubss(xmmA, xmmB, xmmC, xmmD);
- e->vfmsubss(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfmsubss(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfnmaddpd(xmmA, xmmB, xmmC, xmmD);
- e->vfnmaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfnmaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfnmaddpd(ymmA, ymmB, ymmC, ymmD);
- e->vfnmaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vfnmaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
- e->vfnmaddps(xmmA, xmmB, xmmC, xmmD);
- e->vfnmaddps(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfnmaddps(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfnmaddps(ymmA, ymmB, ymmC, ymmD);
- e->vfnmaddps(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vfnmaddps(ymmA, ymmB, ymmC, anyptr_gpD);
- e->vfnmaddsd(xmmA, xmmB, xmmC, xmmD);
- e->vfnmaddsd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfnmaddsd(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfnmaddss(xmmA, xmmB, xmmC, xmmD);
- e->vfnmaddss(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfnmaddss(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfnmsubpd(xmmA, xmmB, xmmC, xmmD);
- e->vfnmsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfnmsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfnmsubpd(ymmA, ymmB, ymmC, ymmD);
- e->vfnmsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vfnmsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
- e->vfnmsubps(xmmA, xmmB, xmmC, xmmD);
- e->vfnmsubps(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfnmsubps(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfnmsubps(ymmA, ymmB, ymmC, ymmD);
- e->vfnmsubps(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vfnmsubps(ymmA, ymmB, ymmC, anyptr_gpD);
- e->vfnmsubsd(xmmA, xmmB, xmmC, xmmD);
- e->vfnmsubsd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfnmsubsd(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vfnmsubss(xmmA, xmmB, xmmC, xmmD);
- e->vfnmsubss(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vfnmsubss(xmmA, xmmB, xmmC, anyptr_gpD);
-
- // XOP.
- e->nop();
-
- e->vfrczpd(xmmA, xmmB);
- e->vfrczpd(xmmA, anyptr_gpB);
- e->vfrczpd(ymmA, ymmB);
- e->vfrczpd(ymmA, anyptr_gpB);
- e->vfrczps(xmmA, xmmB);
- e->vfrczps(xmmA, anyptr_gpB);
- e->vfrczps(ymmA, ymmB);
- e->vfrczps(ymmA, anyptr_gpB);
- e->vfrczsd(xmmA, xmmB);
- e->vfrczsd(xmmA, anyptr_gpB);
- e->vfrczss(xmmA, xmmB);
- e->vfrczss(xmmA, anyptr_gpB);
- e->vpcmov(xmmA, xmmB, xmmC, xmmD);
- e->vpcmov(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpcmov(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vpcmov(ymmA, ymmB, ymmC, ymmD);
- e->vpcmov(ymmA, ymmB, anyptr_gpC, ymmD);
- e->vpcmov(ymmA, ymmB, ymmC, anyptr_gpD);
- e->vpcomb(xmmA, xmmB, xmmC, 0);
- e->vpcomb(xmmA, xmmB, anyptr_gpC, 0);
- e->vpcomd(xmmA, xmmB, xmmC, 0);
- e->vpcomd(xmmA, xmmB, anyptr_gpC, 0);
- e->vpcomq(xmmA, xmmB, xmmC, 0);
- e->vpcomq(xmmA, xmmB, anyptr_gpC, 0);
- e->vpcomw(xmmA, xmmB, xmmC, 0);
- e->vpcomw(xmmA, xmmB, anyptr_gpC, 0);
- e->vpcomub(xmmA, xmmB, xmmC, 0);
- e->vpcomub(xmmA, xmmB, anyptr_gpC, 0);
- e->vpcomud(xmmA, xmmB, xmmC, 0);
- e->vpcomud(xmmA, xmmB, anyptr_gpC, 0);
- e->vpcomuq(xmmA, xmmB, xmmC, 0);
- e->vpcomuq(xmmA, xmmB, anyptr_gpC, 0);
- e->vpcomuw(xmmA, xmmB, xmmC, 0);
- e->vpcomuw(xmmA, xmmB, anyptr_gpC, 0);
- e->vpermil2pd(xmmA, xmmB, xmmC, xmmD, 0);
- e->vpermil2pd(xmmA, xmmB, anyptr_gpC, xmmD, 0);
- e->vpermil2pd(xmmA, xmmB, xmmC, anyptr_gpD, 0);
- e->vpermil2pd(ymmA, ymmB, ymmC, ymmD, 0);
- e->vpermil2pd(ymmA, ymmB, anyptr_gpC, ymmD, 0);
- e->vpermil2pd(ymmA, ymmB, ymmC, anyptr_gpD, 0);
- e->vpermil2ps(xmmA, xmmB, xmmC, xmmD, 0);
- e->vpermil2ps(xmmA, xmmB, anyptr_gpC, xmmD, 0);
- e->vpermil2ps(xmmA, xmmB, xmmC, anyptr_gpD, 0);
- e->vpermil2ps(ymmA, ymmB, ymmC, ymmD, 0);
- e->vpermil2ps(ymmA, ymmB, anyptr_gpC, ymmD, 0);
- e->vpermil2ps(ymmA, ymmB, ymmC, anyptr_gpD, 0);
- e->vphaddbd(xmmA, xmmB);
- e->vphaddbd(xmmA, anyptr_gpB);
- e->vphaddbq(xmmA, xmmB);
- e->vphaddbq(xmmA, anyptr_gpB);
- e->vphaddbw(xmmA, xmmB);
- e->vphaddbw(xmmA, anyptr_gpB);
- e->vphadddq(xmmA, xmmB);
- e->vphadddq(xmmA, anyptr_gpB);
- e->vphaddwd(xmmA, xmmB);
- e->vphaddwd(xmmA, anyptr_gpB);
- e->vphaddwq(xmmA, xmmB);
- e->vphaddwq(xmmA, anyptr_gpB);
- e->vphaddubd(xmmA, xmmB);
- e->vphaddubd(xmmA, anyptr_gpB);
- e->vphaddubq(xmmA, xmmB);
- e->vphaddubq(xmmA, anyptr_gpB);
- e->vphaddubw(xmmA, xmmB);
- e->vphaddubw(xmmA, anyptr_gpB);
- e->vphaddudq(xmmA, xmmB);
- e->vphaddudq(xmmA, anyptr_gpB);
- e->vphadduwd(xmmA, xmmB);
- e->vphadduwd(xmmA, anyptr_gpB);
- e->vphadduwq(xmmA, xmmB);
- e->vphadduwq(xmmA, anyptr_gpB);
- e->vphsubbw(xmmA, xmmB);
- e->vphsubbw(xmmA, anyptr_gpB);
- e->vphsubdq(xmmA, xmmB);
- e->vphsubdq(xmmA, anyptr_gpB);
- e->vphsubwd(xmmA, xmmB);
- e->vphsubwd(xmmA, anyptr_gpB);
- e->vpmacsdd(xmmA, xmmB, xmmC, xmmD);
- e->vpmacsdd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpmacsdqh(xmmA, xmmB, xmmC, xmmD);
- e->vpmacsdqh(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpmacsdql(xmmA, xmmB, xmmC, xmmD);
- e->vpmacsdql(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpmacswd(xmmA, xmmB, xmmC, xmmD);
- e->vpmacswd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpmacsww(xmmA, xmmB, xmmC, xmmD);
- e->vpmacsww(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpmacssdd(xmmA, xmmB, xmmC, xmmD);
- e->vpmacssdd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpmacssdqh(xmmA, xmmB, xmmC, xmmD);
- e->vpmacssdqh(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpmacssdql(xmmA, xmmB, xmmC, xmmD);
- e->vpmacssdql(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpmacsswd(xmmA, xmmB, xmmC, xmmD);
- e->vpmacsswd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpmacssww(xmmA, xmmB, xmmC, xmmD);
- e->vpmacssww(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpmadcsswd(xmmA, xmmB, xmmC, xmmD);
- e->vpmadcsswd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpmadcswd(xmmA, xmmB, xmmC, xmmD);
- e->vpmadcswd(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpperm(xmmA, xmmB, xmmC, xmmD);
- e->vpperm(xmmA, xmmB, anyptr_gpC, xmmD);
- e->vpperm(xmmA, xmmB, xmmC, anyptr_gpD);
- e->vprotb(xmmA, xmmB, xmmC);
- e->vprotb(xmmA, anyptr_gpB, xmmC);
- e->vprotb(xmmA, xmmB, anyptr_gpC);
- e->vprotb(xmmA, xmmB, 0);
- e->vprotb(xmmA, anyptr_gpB, 0);
- e->vprotd(xmmA, xmmB, xmmC);
- e->vprotd(xmmA, anyptr_gpB, xmmC);
- e->vprotd(xmmA, xmmB, anyptr_gpC);
- e->vprotd(xmmA, xmmB, 0);
- e->vprotd(xmmA, anyptr_gpB, 0);
- e->vprotq(xmmA, xmmB, xmmC);
- e->vprotq(xmmA, anyptr_gpB, xmmC);
- e->vprotq(xmmA, xmmB, anyptr_gpC);
- e->vprotq(xmmA, xmmB, 0);
- e->vprotq(xmmA, anyptr_gpB, 0);
- e->vprotw(xmmA, xmmB, xmmC);
- e->vprotw(xmmA, anyptr_gpB, xmmC);
- e->vprotw(xmmA, xmmB, anyptr_gpC);
- e->vprotw(xmmA, xmmB, 0);
- e->vprotw(xmmA, anyptr_gpB, 0);
- e->vpshab(xmmA, xmmB, xmmC);
- e->vpshab(xmmA, anyptr_gpB, xmmC);
- e->vpshab(xmmA, xmmB, anyptr_gpC);
- e->vpshad(xmmA, xmmB, xmmC);
- e->vpshad(xmmA, anyptr_gpB, xmmC);
- e->vpshad(xmmA, xmmB, anyptr_gpC);
- e->vpshaq(xmmA, xmmB, xmmC);
- e->vpshaq(xmmA, anyptr_gpB, xmmC);
- e->vpshaq(xmmA, xmmB, anyptr_gpC);
- e->vpshaw(xmmA, xmmB, xmmC);
- e->vpshaw(xmmA, anyptr_gpB, xmmC);
- e->vpshaw(xmmA, xmmB, anyptr_gpC);
- e->vpshlb(xmmA, xmmB, xmmC);
- e->vpshlb(xmmA, anyptr_gpB, xmmC);
- e->vpshlb(xmmA, xmmB, anyptr_gpC);
- e->vpshld(xmmA, xmmB, xmmC);
- e->vpshld(xmmA, anyptr_gpB, xmmC);
- e->vpshld(xmmA, xmmB, anyptr_gpC);
- e->vpshlq(xmmA, xmmB, xmmC);
- e->vpshlq(xmmA, anyptr_gpB, xmmC);
- e->vpshlq(xmmA, xmmB, anyptr_gpC);
- e->vpshlw(xmmA, xmmB, xmmC);
- e->vpshlw(xmmA, anyptr_gpB, xmmC);
- e->vpshlw(xmmA, xmmB, anyptr_gpC);
-
- // F16C.
- e->nop();
-
- e->vcvtph2ps(xmmA, xmmB);
- e->vcvtph2ps(xmmA, anyptr_gpB);
- e->vcvtph2ps(ymmA, xmmB);
- e->vcvtph2ps(ymmA, anyptr_gpB);
- e->vcvtps2ph(xmmA, xmmB, 0);
- e->vcvtps2ph(anyptr_gpA, xmmB, 0);
- e->vcvtps2ph(xmmA, ymmB, 0);
- e->vcvtps2ph(anyptr_gpA, ymmB, 0);
-
- // AVX512.
- e->nop();
-
- e->kaddb(kA, kB, kC);
- e->kaddd(kA, kB, kC);
- e->kaddq(kA, kB, kC);
- e->kaddw(kA, kB, kC);
- e->kandb(kA, kB, kC);
- e->kandd(kA, kB, kC);
- e->kandnb(kA, kB, kC);
- e->kandnd(kA, kB, kC);
- e->kandnq(kA, kB, kC);
- e->kandnw(kA, kB, kC);
- e->kandq(kA, kB, kC);
- e->kandw(kA, kB, kC);
- e->kmovb(kA, kB);
- e->kmovb(kA, anyptr_gpB);
- e->kmovb(kA, gdB);
- if (isX64) e->kmovb(kA, gzB);
- e->kmovb(anyptr_gpA, kB);
- e->kmovb(gdA, kB);
- if (isX64) e->kmovb(gzA, kB);
- e->kmovd(kA, kB);
- e->kmovd(kA, anyptr_gpB);
- e->kmovd(kA, gdB);
- if (isX64) e->kmovd(kA, gzB);
- e->kmovd(anyptr_gpA, kB);
- e->kmovd(gdA, kB);
- if (isX64) e->kmovd(gzA, kB);
- e->kmovq(kA, kB);
- e->kmovq(kA, anyptr_gpB);
- if (isX64) e->kmovq(kA, gzB);
- e->kmovq(anyptr_gpA, kB);
- if (isX64) e->kmovq(gzA, kB);
- e->kmovw(kA, kB);
- e->kmovw(kA, anyptr_gpB);
- e->kmovw(kA, gdB);
- if (isX64) e->kmovw(kA, gzB);
- e->kmovw(anyptr_gpA, kB);
- e->kmovw(gdA, kB);
- if (isX64) e->kmovw(gzA, kB);
- e->knotb(kA, kB);
- e->knotd(kA, kB);
- e->knotq(kA, kB);
- e->knotw(kA, kB);
- e->korb(kA, kB, kC);
- e->kord(kA, kB, kC);
- e->korq(kA, kB, kC);
- e->kortestb(kA, kB);
- e->kortestd(kA, kB);
- e->kortestq(kA, kB);
- e->kortestw(kA, kB);
- e->korw(kA, kB, kC);
- e->kshiftlb(kA, kB, 0);
- e->kshiftld(kA, kB, 0);
- e->kshiftlq(kA, kB, 0);
- e->kshiftlw(kA, kB, 0);
- e->kshiftrb(kA, kB, 0);
- e->kshiftrd(kA, kB, 0);
- e->kshiftrq(kA, kB, 0);
- e->kshiftrw(kA, kB, 0);
- e->ktestb(kA, kB);
- e->ktestd(kA, kB);
- e->ktestq(kA, kB);
- e->ktestw(kA, kB);
- e->kunpckbw(kA, kB, kC);
- e->kunpckdq(kA, kB, kC);
- e->kunpckwd(kA, kB, kC);
- e->kxnorb(kA, kB, kC);
- e->kxnord(kA, kB, kC);
- e->kxnorq(kA, kB, kC);
- e->kxnorw(kA, kB, kC);
- e->kxorb(kA, kB, kC);
- e->kxord(kA, kB, kC);
- e->kxorq(kA, kB, kC);
- e->kxorw(kA, kB, kC);
- e->nop();
-
- e->vaddpd(xmmA, xmmB, xmmC);
- e->vaddpd(xmmA, xmmB, anyptr_gpC);
- e->vaddpd(ymmA, ymmB, ymmC);
- e->vaddpd(ymmA, ymmB, anyptr_gpC);
- e->vaddpd(zmmA, zmmB, zmmC);
- e->vaddpd(zmmA, zmmB, anyptr_gpC);
- e->vaddps(xmmA, xmmB, xmmC);
- e->vaddps(xmmA, xmmB, anyptr_gpC);
- e->vaddps(ymmA, ymmB, ymmC);
- e->vaddps(ymmA, ymmB, anyptr_gpC);
- e->vaddps(zmmA, zmmB, zmmC);
- e->vaddps(zmmA, zmmB, anyptr_gpC);
- e->vaddsd(xmmA, xmmB, xmmC);
- e->vaddsd(xmmA, xmmB, anyptr_gpC);
- e->vaddss(xmmA, xmmB, xmmC);
- e->vaddss(xmmA, xmmB, anyptr_gpC);
- e->valignd(xmmA, xmmB, xmmC, 0);
- e->valignd(xmmA, xmmB, anyptr_gpC, 0);
- e->valignd(ymmA, ymmB, ymmC, 0);
- e->valignd(ymmA, ymmB, anyptr_gpC, 0);
- e->valignd(zmmA, zmmB, zmmC, 0);
- e->valignd(zmmA, zmmB, anyptr_gpC, 0);
- e->valignq(xmmA, xmmB, xmmC, 0);
- e->valignq(xmmA, xmmB, anyptr_gpC, 0);
- e->valignq(ymmA, ymmB, ymmC, 0);
- e->valignq(ymmA, ymmB, anyptr_gpC, 0);
- e->valignq(zmmA, zmmB, zmmC, 0);
- e->valignq(zmmA, zmmB, anyptr_gpC, 0);
- e->vandnpd(xmmA, xmmB, xmmC);
- e->vandnpd(xmmA, xmmB, anyptr_gpC);
- e->vandnpd(ymmA, ymmB, ymmC);
- e->vandnpd(ymmA, ymmB, anyptr_gpC);
- e->vandnpd(zmmA, zmmB, zmmC);
- e->vandnpd(zmmA, zmmB, anyptr_gpC);
- e->vandnps(xmmA, xmmB, xmmC);
- e->vandnps(xmmA, xmmB, anyptr_gpC);
- e->vandnps(ymmA, ymmB, ymmC);
- e->vandnps(ymmA, ymmB, anyptr_gpC);
- e->vandnps(zmmA, zmmB, zmmC);
- e->vandnps(zmmA, zmmB, anyptr_gpC);
- e->vandpd(xmmA, xmmB, xmmC);
- e->vandpd(xmmA, xmmB, anyptr_gpC);
- e->vandpd(ymmA, ymmB, ymmC);
- e->vandpd(ymmA, ymmB, anyptr_gpC);
- e->vandpd(zmmA, zmmB, zmmC);
- e->vandpd(zmmA, zmmB, anyptr_gpC);
- e->vandps(xmmA, xmmB, xmmC);
- e->vandps(xmmA, xmmB, anyptr_gpC);
- e->vandps(ymmA, ymmB, ymmC);
- e->vandps(ymmA, ymmB, anyptr_gpC);
- e->vandps(zmmA, zmmB, zmmC);
- e->vandps(zmmA, zmmB, anyptr_gpC);
- e->vblendmpd(xmmA, xmmB, xmmC);
- e->vblendmpd(xmmA, xmmB, anyptr_gpC);
- e->vblendmpd(ymmA, ymmB, ymmC);
- e->vblendmpd(ymmA, ymmB, anyptr_gpC);
- e->vblendmpd(zmmA, zmmB, zmmC);
- e->vblendmpd(zmmA, zmmB, anyptr_gpC);
- e->vblendmps(xmmA, xmmB, xmmC);
- e->vblendmps(xmmA, xmmB, anyptr_gpC);
- e->vblendmps(ymmA, ymmB, ymmC);
- e->vblendmps(ymmA, ymmB, anyptr_gpC);
- e->vblendmps(zmmA, zmmB, zmmC);
- e->vblendmps(zmmA, zmmB, anyptr_gpC);
- e->vbroadcastf32x2(ymmA, xmmB);
- e->vbroadcastf32x2(ymmA, anyptr_gpB);
- e->vbroadcastf32x2(zmmA, xmmB);
- e->vbroadcastf32x2(zmmA, anyptr_gpB);
- e->vbroadcastf32x4(ymmA, anyptr_gpB);
- e->vbroadcastf32x4(zmmA, anyptr_gpB);
- e->vbroadcastf32x8(zmmA, anyptr_gpB);
- e->vbroadcastf64x2(ymmA, anyptr_gpB);
- e->vbroadcastf64x2(zmmA, anyptr_gpB);
- e->vbroadcastf64x4(zmmA, anyptr_gpB);
- e->vbroadcasti32x2(xmmA, xmmB);
- e->vbroadcasti32x2(xmmA, anyptr_gpB);
- e->vbroadcasti32x2(ymmA, xmmB);
- e->vbroadcasti32x2(ymmA, anyptr_gpB);
- e->vbroadcasti32x2(zmmA, xmmB);
- e->vbroadcasti32x2(zmmA, anyptr_gpB);
- e->vbroadcasti32x4(ymmA, anyptr_gpB);
- e->vbroadcasti32x4(zmmA, anyptr_gpB);
- e->vbroadcasti32x8(zmmA, anyptr_gpB);
- e->vbroadcasti64x2(ymmA, anyptr_gpB);
- e->vbroadcasti64x2(zmmA, anyptr_gpB);
- e->vbroadcasti64x4(zmmA, anyptr_gpB);
- e->vbroadcastsd(ymmA, xmmB);
- e->vbroadcastsd(ymmA, anyptr_gpB);
- e->vbroadcastsd(zmmA, xmmB);
- e->vbroadcastsd(zmmA, anyptr_gpB);
- e->vbroadcastss(xmmA, xmmB);
- e->vbroadcastss(xmmA, anyptr_gpB);
- e->vbroadcastss(ymmA, xmmB);
- e->vbroadcastss(ymmA, anyptr_gpB);
- e->vbroadcastss(zmmA, xmmB);
- e->vbroadcastss(zmmA, anyptr_gpB);
- e->vcmppd(kA, xmmB, xmmC, 0);
- e->vcmppd(kA, xmmB, anyptr_gpC, 0);
- e->vcmppd(kA, ymmB, ymmC, 0);
- e->vcmppd(kA, ymmB, anyptr_gpC, 0);
- e->vcmppd(kA, zmmB, zmmC, 0);
- e->vcmppd(kA, zmmB, anyptr_gpC, 0);
- e->vcmpps(kA, xmmB, xmmC, 0);
- e->vcmpps(kA, xmmB, anyptr_gpC, 0);
- e->vcmpps(kA, ymmB, ymmC, 0);
- e->vcmpps(kA, ymmB, anyptr_gpC, 0);
- e->vcmpps(kA, zmmB, zmmC, 0);
- e->vcmpps(kA, zmmB, anyptr_gpC, 0);
- e->vcmpsd(kA, xmmB, xmmC, 0);
- e->vcmpsd(kA, xmmB, anyptr_gpC, 0);
- e->vcmpss(kA, xmmB, xmmC, 0);
- e->vcmpss(kA, xmmB, anyptr_gpC, 0);
- e->vcomisd(xmmA, xmmB);
- e->vcomisd(xmmA, anyptr_gpB);
- e->vcomiss(xmmA, xmmB);
- e->vcomiss(xmmA, anyptr_gpB);
- e->vcompresspd(xmmA, xmmB);
- e->vcompresspd(anyptr_gpA, xmmB);
- e->vcompresspd(ymmA, ymmB);
- e->vcompresspd(anyptr_gpA, ymmB);
- e->vcompresspd(zmmA, zmmB);
- e->vcompresspd(anyptr_gpA, zmmB);
- e->vcompressps(xmmA, xmmB);
- e->vcompressps(anyptr_gpA, xmmB);
- e->vcompressps(ymmA, ymmB);
- e->vcompressps(anyptr_gpA, ymmB);
- e->vcompressps(zmmA, zmmB);
- e->vcompressps(anyptr_gpA, zmmB);
- e->vcvtdq2pd(xmmA, xmmB);
- e->vcvtdq2pd(xmmA, anyptr_gpB);
- e->vcvtdq2pd(ymmA, xmmB);
- e->vcvtdq2pd(ymmA, anyptr_gpB);
- e->vcvtdq2pd(zmmA, ymmB);
- e->vcvtdq2pd(zmmA, anyptr_gpB);
- e->vcvtdq2ps(xmmA, xmmB);
- e->vcvtdq2ps(xmmA, anyptr_gpB);
- e->vcvtdq2ps(ymmA, ymmB);
- e->vcvtdq2ps(ymmA, anyptr_gpB);
- e->vcvtdq2ps(zmmA, zmmB);
- e->vcvtdq2ps(zmmA, anyptr_gpB);
- e->vcvtpd2dq(xmmA, xmmB);
- e->vcvtpd2dq(xmmA, anyptr_gpB);
- e->vcvtpd2dq(xmmA, ymmB);
- e->vcvtpd2dq(xmmA, anyptr_gpB);
- e->vcvtpd2dq(ymmA, zmmB);
- e->vcvtpd2dq(ymmA, anyptr_gpB);
- e->vcvtpd2qq(xmmA, xmmB);
- e->vcvtpd2qq(xmmA, anyptr_gpB);
- e->vcvtpd2qq(ymmA, ymmB);
- e->vcvtpd2qq(ymmA, anyptr_gpB);
- e->vcvtpd2qq(zmmA, zmmB);
- e->vcvtpd2qq(zmmA, anyptr_gpB);
- e->vcvtpd2udq(xmmA, xmmB);
- e->vcvtpd2udq(xmmA, anyptr_gpB);
- e->vcvtpd2udq(xmmA, ymmB);
- e->vcvtpd2udq(xmmA, anyptr_gpB);
- e->vcvtpd2udq(ymmA, zmmB);
- e->vcvtpd2udq(ymmA, anyptr_gpB);
- e->vcvtpd2uqq(xmmA, xmmB);
- e->vcvtpd2uqq(xmmA, anyptr_gpB);
- e->vcvtpd2uqq(ymmA, ymmB);
- e->vcvtpd2uqq(ymmA, anyptr_gpB);
- e->vcvtpd2uqq(zmmA, zmmB);
- e->vcvtpd2uqq(zmmA, anyptr_gpB);
- e->vcvtph2ps(xmmA, xmmB);
- e->vcvtph2ps(xmmA, anyptr_gpB);
- e->vcvtph2ps(ymmA, xmmB);
- e->vcvtph2ps(ymmA, anyptr_gpB);
- e->vcvtph2ps(zmmA, ymmB);
- e->vcvtph2ps(zmmA, anyptr_gpB);
- e->vcvtps2dq(xmmA, xmmB);
- e->vcvtps2dq(xmmA, anyptr_gpB);
- e->vcvtps2dq(ymmA, ymmB);
- e->vcvtps2dq(ymmA, anyptr_gpB);
- e->vcvtps2dq(zmmA, zmmB);
- e->vcvtps2dq(zmmA, anyptr_gpB);
- e->vcvtps2pd(xmmA, xmmB);
- e->vcvtps2pd(xmmA, anyptr_gpB);
- e->vcvtps2pd(ymmA, xmmB);
- e->vcvtps2pd(ymmA, anyptr_gpB);
- e->vcvtps2pd(zmmA, ymmB);
- e->vcvtps2pd(zmmA, anyptr_gpB);
- e->vcvtps2ph(xmmA, xmmB, 0);
- e->vcvtps2ph(anyptr_gpA, xmmB, 0);
- e->vcvtps2ph(xmmA, ymmB, 0);
- e->vcvtps2ph(anyptr_gpA, ymmB, 0);
- e->vcvtps2ph(ymmA, zmmB, 0);
- e->vcvtps2ph(anyptr_gpA, zmmB, 0);
- e->vcvtps2qq(xmmA, xmmB);
- e->vcvtps2qq(xmmA, anyptr_gpB);
- e->vcvtps2qq(ymmA, xmmB);
- e->vcvtps2qq(ymmA, anyptr_gpB);
- e->vcvtps2qq(zmmA, ymmB);
- e->vcvtps2qq(zmmA, anyptr_gpB);
- e->vcvtps2udq(xmmA, xmmB);
- e->vcvtps2udq(xmmA, anyptr_gpB);
- e->vcvtps2udq(ymmA, ymmB);
- e->vcvtps2udq(ymmA, anyptr_gpB);
- e->vcvtps2udq(zmmA, zmmB);
- e->vcvtps2udq(zmmA, anyptr_gpB);
- e->vcvtps2uqq(xmmA, xmmB);
- e->vcvtps2uqq(xmmA, anyptr_gpB);
- e->vcvtps2uqq(ymmA, xmmB);
- e->vcvtps2uqq(ymmA, anyptr_gpB);
- e->vcvtps2uqq(zmmA, ymmB);
- e->vcvtps2uqq(zmmA, anyptr_gpB);
- e->vcvtqq2pd(xmmA, xmmB);
- e->vcvtqq2pd(xmmA, anyptr_gpB);
- e->vcvtqq2pd(ymmA, ymmB);
- e->vcvtqq2pd(ymmA, anyptr_gpB);
- e->vcvtqq2pd(zmmA, zmmB);
- e->vcvtqq2pd(zmmA, anyptr_gpB);
- e->vcvtqq2ps(xmmA, xmmB);
- e->vcvtqq2ps(xmmA, anyptr_gpB);
- e->vcvtqq2ps(xmmA, ymmB);
- e->vcvtqq2ps(xmmA, anyptr_gpB);
- e->vcvtqq2ps(ymmA, zmmB);
- e->vcvtqq2ps(ymmA, anyptr_gpB);
- e->vcvtsd2si(gdA, xmmB);
- e->vcvtsd2si(gdA, anyptr_gpB);
- if (isX64) e->vcvtsd2si(gzA, xmmB);
- if (isX64) e->vcvtsd2si(gzA, anyptr_gpB);
- e->vcvtsd2ss(xmmA, xmmB, xmmC);
- e->vcvtsd2ss(xmmA, xmmB, anyptr_gpC);
- e->vcvtsd2usi(gdA, xmmB);
- e->vcvtsd2usi(gdA, anyptr_gpB);
- if (isX64) e->vcvtsd2usi(gzA, xmmB);
- if (isX64) e->vcvtsd2usi(gzA, anyptr_gpB);
- e->vcvtsi2sd(xmmA, xmmB, gdC);
- e->vcvtsi2sd(xmmA, xmmB, dword_ptr(gzC));
- if (isX64) e->vcvtsi2sd(xmmA, xmmB, gzC);
- if (isX64) e->vcvtsi2sd(xmmA, xmmB, qword_ptr(gzC));
- e->vcvtsi2ss(xmmA, xmmB, gdC);
- e->vcvtsi2ss(xmmA, xmmB, dword_ptr(gzC));
- if (isX64) e->vcvtsi2ss(xmmA, xmmB, gzC);
- if (isX64) e->vcvtsi2ss(xmmA, xmmB, qword_ptr(gzC));
- e->vcvtss2sd(xmmA, xmmB, xmmC);
- e->vcvtss2sd(xmmA, xmmB, anyptr_gpC);
- e->vcvtss2si(gdA, xmmB);
- e->vcvtss2si(gdA, anyptr_gpB);
- if (isX64) e->vcvtss2si(gzA, xmmB);
- if (isX64) e->vcvtss2si(gzA, anyptr_gpB);
- e->vcvtss2usi(gdA, xmmB);
- e->vcvtss2usi(gdA, anyptr_gpB);
- if (isX64) e->vcvtss2usi(gzA, xmmB);
- if (isX64) e->vcvtss2usi(gzA, anyptr_gpB);
- e->vcvttpd2dq(xmmA, xmmB);
- e->vcvttpd2dq(xmmA, anyptr_gpB);
- e->vcvttpd2dq(xmmA, ymmB);
- e->vcvttpd2dq(xmmA, anyptr_gpB);
- e->vcvttpd2dq(ymmA, zmmB);
- e->vcvttpd2dq(ymmA, anyptr_gpB);
- e->vcvttpd2qq(xmmA, xmmB);
- e->vcvttpd2qq(xmmA, anyptr_gpB);
- e->vcvttpd2qq(ymmA, ymmB);
- e->vcvttpd2qq(ymmA, anyptr_gpB);
- e->vcvttpd2qq(zmmA, zmmB);
- e->vcvttpd2qq(zmmA, anyptr_gpB);
- e->vcvttpd2udq(xmmA, xmmB);
- e->vcvttpd2udq(xmmA, anyptr_gpB);
- e->vcvttpd2udq(xmmA, ymmB);
- e->vcvttpd2udq(xmmA, anyptr_gpB);
- e->vcvttpd2udq(ymmA, zmmB);
- e->vcvttpd2udq(ymmA, anyptr_gpB);
- e->vcvttpd2uqq(xmmA, xmmB);
- e->vcvttpd2uqq(xmmA, anyptr_gpB);
- e->vcvttpd2uqq(ymmA, ymmB);
- e->vcvttpd2uqq(ymmA, anyptr_gpB);
- e->vcvttpd2uqq(zmmA, zmmB);
- e->vcvttpd2uqq(zmmA, anyptr_gpB);
- e->vcvttps2dq(xmmA, xmmB);
- e->vcvttps2dq(xmmA, anyptr_gpB);
- e->vcvttps2dq(ymmA, ymmB);
- e->vcvttps2dq(ymmA, anyptr_gpB);
- e->vcvttps2dq(zmmA, zmmB);
- e->vcvttps2dq(zmmA, anyptr_gpB);
- e->vcvttps2qq(xmmA, xmmB);
- e->vcvttps2qq(xmmA, anyptr_gpB);
- e->vcvttps2qq(ymmA, xmmB);
- e->vcvttps2qq(ymmA, anyptr_gpB);
- e->vcvttps2qq(zmmA, ymmB);
- e->vcvttps2qq(zmmA, anyptr_gpB);
- e->vcvttps2udq(xmmA, xmmB);
- e->vcvttps2udq(xmmA, anyptr_gpB);
- e->vcvttps2udq(ymmA, ymmB);
- e->vcvttps2udq(ymmA, anyptr_gpB);
- e->vcvttps2udq(zmmA, zmmB);
- e->vcvttps2udq(zmmA, anyptr_gpB);
- e->vcvttps2uqq(xmmA, xmmB);
- e->vcvttps2uqq(xmmA, anyptr_gpB);
- e->vcvttps2uqq(ymmA, xmmB);
- e->vcvttps2uqq(ymmA, anyptr_gpB);
- e->vcvttps2uqq(zmmA, ymmB);
- e->vcvttps2uqq(zmmA, anyptr_gpB);
- e->vcvttsd2si(gdA, xmmB);
- e->vcvttsd2si(gdA, anyptr_gpB);
- if (isX64) e->vcvttsd2si(gzA, xmmB);
- if (isX64) e->vcvttsd2si(gzA, anyptr_gpB);
- e->vcvttsd2usi(gdA, xmmB);
- e->vcvttsd2usi(gdA, anyptr_gpB);
- if (isX64) e->vcvttsd2usi(gzA, xmmB);
- if (isX64) e->vcvttsd2usi(gzA, anyptr_gpB);
- e->vcvttss2si(gdA, xmmB);
- e->vcvttss2si(gdA, anyptr_gpB);
- if (isX64) e->vcvttss2si(gzA, xmmB);
- if (isX64) e->vcvttss2si(gzA, anyptr_gpB);
- e->vcvttss2usi(gdA, xmmB);
- e->vcvttss2usi(gdA, anyptr_gpB);
- if (isX64) e->vcvttss2usi(gzA, xmmB);
- if (isX64) e->vcvttss2usi(gzA, anyptr_gpB);
- e->vcvtudq2pd(xmmA, xmmB);
- e->vcvtudq2pd(xmmA, anyptr_gpB);
- e->vcvtudq2pd(ymmA, xmmB);
- e->vcvtudq2pd(ymmA, anyptr_gpB);
- e->vcvtudq2pd(zmmA, ymmB);
- e->vcvtudq2pd(zmmA, anyptr_gpB);
- e->vcvtudq2ps(xmmA, xmmB);
- e->vcvtudq2ps(xmmA, anyptr_gpB);
- e->vcvtudq2ps(ymmA, ymmB);
- e->vcvtudq2ps(ymmA, anyptr_gpB);
- e->vcvtudq2ps(zmmA, zmmB);
- e->vcvtudq2ps(zmmA, anyptr_gpB);
- e->vcvtuqq2pd(xmmA, xmmB);
- e->vcvtuqq2pd(xmmA, anyptr_gpB);
- e->vcvtuqq2pd(ymmA, ymmB);
- e->vcvtuqq2pd(ymmA, anyptr_gpB);
- e->vcvtuqq2pd(zmmA, zmmB);
- e->vcvtuqq2pd(zmmA, anyptr_gpB);
- e->vcvtuqq2ps(xmmA, xmmB);
- e->vcvtuqq2ps(xmmA, anyptr_gpB);
- e->vcvtuqq2ps(xmmA, ymmB);
- e->vcvtuqq2ps(xmmA, anyptr_gpB);
- e->vcvtuqq2ps(ymmA, zmmB);
- e->vcvtuqq2ps(ymmA, anyptr_gpB);
- e->vcvtusi2sd(xmmA, xmmB, gdC);
- e->vcvtusi2sd(xmmA, xmmB, dword_ptr(gzC));
- if (isX64) e->vcvtusi2sd(xmmA, xmmB, gzC);
- if (isX64) e->vcvtusi2sd(xmmA, xmmB, qword_ptr(gzC));
- e->vcvtusi2ss(xmmA, xmmB, gdC);
- e->vcvtusi2ss(xmmA, xmmB, dword_ptr(gzC));
- if (isX64) e->vcvtusi2ss(xmmA, xmmB, gzC);
- if (isX64) e->vcvtusi2ss(xmmA, xmmB, qword_ptr(gzC));
- e->vdbpsadbw(xmmA, xmmB, xmmC, 0);
- e->vdbpsadbw(xmmA, xmmB, anyptr_gpC, 0);
- e->vdbpsadbw(ymmA, ymmB, ymmC, 0);
- e->vdbpsadbw(ymmA, ymmB, anyptr_gpC, 0);
- e->vdbpsadbw(zmmA, zmmB, zmmC, 0);
- e->vdbpsadbw(zmmA, zmmB, anyptr_gpC, 0);
- e->vdivpd(xmmA, xmmB, xmmC);
- e->vdivpd(xmmA, xmmB, anyptr_gpC);
- e->vdivpd(ymmA, ymmB, ymmC);
- e->vdivpd(ymmA, ymmB, anyptr_gpC);
- e->vdivpd(zmmA, zmmB, zmmC);
- e->vdivpd(zmmA, zmmB, anyptr_gpC);
- e->vdivps(xmmA, xmmB, xmmC);
- e->vdivps(xmmA, xmmB, anyptr_gpC);
- e->vdivps(ymmA, ymmB, ymmC);
- e->vdivps(ymmA, ymmB, anyptr_gpC);
- e->vdivps(zmmA, zmmB, zmmC);
- e->vdivps(zmmA, zmmB, anyptr_gpC);
- e->vdivsd(xmmA, xmmB, xmmC);
- e->vdivsd(xmmA, xmmB, anyptr_gpC);
- e->vdivss(xmmA, xmmB, xmmC);
- e->vdivss(xmmA, xmmB, anyptr_gpC);
- e->vexp2pd(zmmA, zmmB);
- e->vexp2pd(zmmA, anyptr_gpB);
- e->vexp2ps(zmmA, zmmB);
- e->vexp2ps(zmmA, anyptr_gpB);
- e->vexpandpd(xmmA, xmmB);
- e->vexpandpd(xmmA, anyptr_gpB);
- e->vexpandpd(ymmA, ymmB);
- e->vexpandpd(ymmA, anyptr_gpB);
- e->vexpandpd(zmmA, zmmB);
- e->vexpandpd(zmmA, anyptr_gpB);
- e->vexpandps(xmmA, xmmB);
- e->vexpandps(xmmA, anyptr_gpB);
- e->vexpandps(ymmA, ymmB);
- e->vexpandps(ymmA, anyptr_gpB);
- e->vexpandps(zmmA, zmmB);
- e->vexpandps(zmmA, anyptr_gpB);
- e->vextractf32x4(xmmA, ymmB, 0);
- e->vextractf32x4(anyptr_gpA, ymmB, 0);
- e->vextractf32x4(xmmA, zmmB, 0);
- e->vextractf32x4(anyptr_gpA, zmmB, 0);
- e->vextractf32x8(ymmA, zmmB, 0);
- e->vextractf32x8(anyptr_gpA, zmmB, 0);
- e->vextractf64x2(xmmA, ymmB, 0);
- e->vextractf64x2(anyptr_gpA, ymmB, 0);
- e->vextractf64x2(xmmA, zmmB, 0);
- e->vextractf64x2(anyptr_gpA, zmmB, 0);
- e->vextractf64x4(ymmA, zmmB, 0);
- e->vextractf64x4(anyptr_gpA, zmmB, 0);
- e->vextracti32x4(xmmA, ymmB, 0);
- e->vextracti32x4(anyptr_gpA, ymmB, 0);
- e->vextracti32x4(xmmA, zmmB, 0);
- e->vextracti32x4(anyptr_gpA, zmmB, 0);
- e->vextracti32x8(ymmA, zmmB, 0);
- e->vextracti32x8(anyptr_gpA, zmmB, 0);
- e->vextracti64x2(xmmA, ymmB, 0);
- e->vextracti64x2(anyptr_gpA, ymmB, 0);
- e->vextracti64x2(xmmA, zmmB, 0);
- e->vextracti64x2(anyptr_gpA, zmmB, 0);
- e->vextracti64x4(ymmA, zmmB, 0);
- e->vextracti64x4(anyptr_gpA, zmmB, 0);
- e->vextractps(gdA, xmmB, 0);
- e->vextractps(gzA, xmmB, 0);
- e->vextractps(anyptr_gpA, xmmB, 0);
- e->vfixupimmpd(xmmA, xmmB, xmmC, 0);
- e->vfixupimmpd(xmmA, xmmB, anyptr_gpC, 0);
- e->vfixupimmpd(ymmA, ymmB, ymmC, 0);
- e->vfixupimmpd(ymmA, ymmB, anyptr_gpC, 0);
- e->vfixupimmpd(zmmA, zmmB, zmmC, 0);
- e->vfixupimmpd(zmmA, zmmB, anyptr_gpC, 0);
- e->vfixupimmps(xmmA, xmmB, xmmC, 0);
- e->vfixupimmps(xmmA, xmmB, anyptr_gpC, 0);
- e->vfixupimmps(ymmA, ymmB, ymmC, 0);
- e->vfixupimmps(ymmA, ymmB, anyptr_gpC, 0);
- e->vfixupimmps(zmmA, zmmB, zmmC, 0);
- e->vfixupimmps(zmmA, zmmB, anyptr_gpC, 0);
- e->vfixupimmsd(xmmA, xmmB, xmmC, 0);
- e->vfixupimmsd(xmmA, xmmB, anyptr_gpC, 0);
- e->vfixupimmss(xmmA, xmmB, xmmC, 0);
- e->vfixupimmss(xmmA, xmmB, anyptr_gpC, 0);
- e->vfmadd132pd(xmmA, xmmB, xmmC);
- e->vfmadd132pd(xmmA, xmmB, anyptr_gpC);
- e->vfmadd132pd(ymmA, ymmB, ymmC);
- e->vfmadd132pd(ymmA, ymmB, anyptr_gpC);
- e->vfmadd132pd(zmmA, zmmB, zmmC);
- e->vfmadd132pd(zmmA, zmmB, anyptr_gpC);
- e->vfmadd132ps(xmmA, xmmB, xmmC);
- e->vfmadd132ps(xmmA, xmmB, anyptr_gpC);
- e->vfmadd132ps(ymmA, ymmB, ymmC);
- e->vfmadd132ps(ymmA, ymmB, anyptr_gpC);
- e->vfmadd132ps(zmmA, zmmB, zmmC);
- e->vfmadd132ps(zmmA, zmmB, anyptr_gpC);
- e->vfmadd132sd(xmmA, xmmB, xmmC);
- e->vfmadd132sd(xmmA, xmmB, anyptr_gpC);
- e->vfmadd132ss(xmmA, xmmB, xmmC);
- e->vfmadd132ss(xmmA, xmmB, anyptr_gpC);
- e->vfmadd213pd(xmmA, xmmB, xmmC);
- e->vfmadd213pd(xmmA, xmmB, anyptr_gpC);
- e->vfmadd213pd(ymmA, ymmB, ymmC);
- e->vfmadd213pd(ymmA, ymmB, anyptr_gpC);
- e->vfmadd213pd(zmmA, zmmB, zmmC);
- e->vfmadd213pd(zmmA, zmmB, anyptr_gpC);
- e->vfmadd213ps(xmmA, xmmB, xmmC);
- e->vfmadd213ps(xmmA, xmmB, anyptr_gpC);
- e->vfmadd213ps(ymmA, ymmB, ymmC);
- e->vfmadd213ps(ymmA, ymmB, anyptr_gpC);
- e->vfmadd213ps(zmmA, zmmB, zmmC);
- e->vfmadd213ps(zmmA, zmmB, anyptr_gpC);
- e->vfmadd213sd(xmmA, xmmB, xmmC);
- e->vfmadd213sd(xmmA, xmmB, anyptr_gpC);
- e->vfmadd213ss(xmmA, xmmB, xmmC);
- e->vfmadd213ss(xmmA, xmmB, anyptr_gpC);
- e->vfmadd231pd(xmmA, xmmB, xmmC);
- e->vfmadd231pd(xmmA, xmmB, anyptr_gpC);
- e->vfmadd231pd(ymmA, ymmB, ymmC);
- e->vfmadd231pd(ymmA, ymmB, anyptr_gpC);
- e->vfmadd231pd(zmmA, zmmB, zmmC);
- e->vfmadd231pd(zmmA, zmmB, anyptr_gpC);
- e->vfmadd231ps(xmmA, xmmB, xmmC);
- e->vfmadd231ps(xmmA, xmmB, anyptr_gpC);
- e->vfmadd231ps(ymmA, ymmB, ymmC);
- e->vfmadd231ps(ymmA, ymmB, anyptr_gpC);
- e->vfmadd231ps(zmmA, zmmB, zmmC);
- e->vfmadd231ps(zmmA, zmmB, anyptr_gpC);
- e->vfmadd231sd(xmmA, xmmB, xmmC);
- e->vfmadd231sd(xmmA, xmmB, anyptr_gpC);
- e->vfmadd231ss(xmmA, xmmB, xmmC);
- e->vfmadd231ss(xmmA, xmmB, anyptr_gpC);
- e->vfmaddsub132pd(xmmA, xmmB, xmmC);
- e->vfmaddsub132pd(xmmA, xmmB, anyptr_gpC);
- e->vfmaddsub132pd(ymmA, ymmB, ymmC);
- e->vfmaddsub132pd(ymmA, ymmB, anyptr_gpC);
- e->vfmaddsub132pd(zmmA, zmmB, zmmC);
- e->vfmaddsub132pd(zmmA, zmmB, anyptr_gpC);
- e->vfmaddsub132ps(xmmA, xmmB, xmmC);
- e->vfmaddsub132ps(xmmA, xmmB, anyptr_gpC);
- e->vfmaddsub132ps(ymmA, ymmB, ymmC);
- e->vfmaddsub132ps(ymmA, ymmB, anyptr_gpC);
- e->vfmaddsub132ps(zmmA, zmmB, zmmC);
- e->vfmaddsub132ps(zmmA, zmmB, anyptr_gpC);
- e->vfmaddsub213pd(xmmA, xmmB, xmmC);
- e->vfmaddsub213pd(xmmA, xmmB, anyptr_gpC);
- e->vfmaddsub213pd(ymmA, ymmB, ymmC);
- e->vfmaddsub213pd(ymmA, ymmB, anyptr_gpC);
- e->vfmaddsub213pd(zmmA, zmmB, zmmC);
- e->vfmaddsub213pd(zmmA, zmmB, anyptr_gpC);
- e->vfmaddsub213ps(xmmA, xmmB, xmmC);
- e->vfmaddsub213ps(xmmA, xmmB, anyptr_gpC);
- e->vfmaddsub213ps(ymmA, ymmB, ymmC);
- e->vfmaddsub213ps(ymmA, ymmB, anyptr_gpC);
- e->vfmaddsub213ps(zmmA, zmmB, zmmC);
- e->vfmaddsub213ps(zmmA, zmmB, anyptr_gpC);
- e->vfmaddsub231pd(xmmA, xmmB, xmmC);
- e->vfmaddsub231pd(xmmA, xmmB, anyptr_gpC);
- e->vfmaddsub231pd(ymmA, ymmB, ymmC);
- e->vfmaddsub231pd(ymmA, ymmB, anyptr_gpC);
- e->vfmaddsub231pd(zmmA, zmmB, zmmC);
- e->vfmaddsub231pd(zmmA, zmmB, anyptr_gpC);
- e->vfmaddsub231ps(xmmA, xmmB, xmmC);
- e->vfmaddsub231ps(xmmA, xmmB, anyptr_gpC);
- e->vfmaddsub231ps(ymmA, ymmB, ymmC);
- e->vfmaddsub231ps(ymmA, ymmB, anyptr_gpC);
- e->vfmaddsub231ps(zmmA, zmmB, zmmC);
- e->vfmaddsub231ps(zmmA, zmmB, anyptr_gpC);
- e->vfmsub132pd(xmmA, xmmB, xmmC);
- e->vfmsub132pd(xmmA, xmmB, anyptr_gpC);
- e->vfmsub132pd(ymmA, ymmB, ymmC);
- e->vfmsub132pd(ymmA, ymmB, anyptr_gpC);
- e->vfmsub132pd(zmmA, zmmB, zmmC);
- e->vfmsub132pd(zmmA, zmmB, anyptr_gpC);
- e->vfmsub132ps(xmmA, xmmB, xmmC);
- e->vfmsub132ps(xmmA, xmmB, anyptr_gpC);
- e->vfmsub132ps(ymmA, ymmB, ymmC);
- e->vfmsub132ps(ymmA, ymmB, anyptr_gpC);
- e->vfmsub132ps(zmmA, zmmB, zmmC);
- e->vfmsub132ps(zmmA, zmmB, anyptr_gpC);
- e->vfmsub132sd(xmmA, xmmB, xmmC);
- e->vfmsub132sd(xmmA, xmmB, anyptr_gpC);
- e->vfmsub132ss(xmmA, xmmB, xmmC);
- e->vfmsub132ss(xmmA, xmmB, anyptr_gpC);
- e->vfmsub213pd(xmmA, xmmB, xmmC);
- e->vfmsub213pd(xmmA, xmmB, anyptr_gpC);
- e->vfmsub213pd(ymmA, ymmB, ymmC);
- e->vfmsub213pd(ymmA, ymmB, anyptr_gpC);
- e->vfmsub213pd(zmmA, zmmB, zmmC);
- e->vfmsub213pd(zmmA, zmmB, anyptr_gpC);
- e->vfmsub213ps(xmmA, xmmB, xmmC);
- e->vfmsub213ps(xmmA, xmmB, anyptr_gpC);
- e->vfmsub213ps(ymmA, ymmB, ymmC);
- e->vfmsub213ps(ymmA, ymmB, anyptr_gpC);
- e->vfmsub213ps(zmmA, zmmB, zmmC);
- e->vfmsub213ps(zmmA, zmmB, anyptr_gpC);
- e->vfmsub213sd(xmmA, xmmB, xmmC);
- e->vfmsub213sd(xmmA, xmmB, anyptr_gpC);
- e->vfmsub213ss(xmmA, xmmB, xmmC);
- e->vfmsub213ss(xmmA, xmmB, anyptr_gpC);
- e->vfmsub231pd(xmmA, xmmB, xmmC);
- e->vfmsub231pd(xmmA, xmmB, anyptr_gpC);
- e->vfmsub231pd(ymmA, ymmB, ymmC);
- e->vfmsub231pd(ymmA, ymmB, anyptr_gpC);
- e->vfmsub231pd(zmmA, zmmB, zmmC);
- e->vfmsub231pd(zmmA, zmmB, anyptr_gpC);
- e->vfmsub231ps(xmmA, xmmB, xmmC);
- e->vfmsub231ps(xmmA, xmmB, anyptr_gpC);
- e->vfmsub231ps(ymmA, ymmB, ymmC);
- e->vfmsub231ps(ymmA, ymmB, anyptr_gpC);
- e->vfmsub231ps(zmmA, zmmB, zmmC);
- e->vfmsub231ps(zmmA, zmmB, anyptr_gpC);
- e->vfmsub231sd(xmmA, xmmB, xmmC);
- e->vfmsub231sd(xmmA, xmmB, anyptr_gpC);
- e->vfmsub231ss(xmmA, xmmB, xmmC);
- e->vfmsub231ss(xmmA, xmmB, anyptr_gpC);
- e->vfmsubadd132pd(xmmA, xmmB, xmmC);
- e->vfmsubadd132pd(xmmA, xmmB, anyptr_gpC);
- e->vfmsubadd132pd(ymmA, ymmB, ymmC);
- e->vfmsubadd132pd(ymmA, ymmB, anyptr_gpC);
- e->vfmsubadd132pd(zmmA, zmmB, zmmC);
- e->vfmsubadd132pd(zmmA, zmmB, anyptr_gpC);
- e->vfmsubadd132ps(xmmA, xmmB, xmmC);
- e->vfmsubadd132ps(xmmA, xmmB, anyptr_gpC);
- e->vfmsubadd132ps(ymmA, ymmB, ymmC);
- e->vfmsubadd132ps(ymmA, ymmB, anyptr_gpC);
- e->vfmsubadd132ps(zmmA, zmmB, zmmC);
- e->vfmsubadd132ps(zmmA, zmmB, anyptr_gpC);
- e->vfmsubadd213pd(xmmA, xmmB, xmmC);
- e->vfmsubadd213pd(xmmA, xmmB, anyptr_gpC);
- e->vfmsubadd213pd(ymmA, ymmB, ymmC);
- e->vfmsubadd213pd(ymmA, ymmB, anyptr_gpC);
- e->vfmsubadd213pd(zmmA, zmmB, zmmC);
- e->vfmsubadd213pd(zmmA, zmmB, anyptr_gpC);
- e->vfmsubadd213ps(xmmA, xmmB, xmmC);
- e->vfmsubadd213ps(xmmA, xmmB, anyptr_gpC);
- e->vfmsubadd213ps(ymmA, ymmB, ymmC);
- e->vfmsubadd213ps(ymmA, ymmB, anyptr_gpC);
- e->vfmsubadd213ps(zmmA, zmmB, zmmC);
- e->vfmsubadd213ps(zmmA, zmmB, anyptr_gpC);
- e->vfmsubadd231pd(xmmA, xmmB, xmmC);
- e->vfmsubadd231pd(xmmA, xmmB, anyptr_gpC);
- e->vfmsubadd231pd(ymmA, ymmB, ymmC);
- e->vfmsubadd231pd(ymmA, ymmB, anyptr_gpC);
- e->vfmsubadd231pd(zmmA, zmmB, zmmC);
- e->vfmsubadd231pd(zmmA, zmmB, anyptr_gpC);
- e->vfmsubadd231ps(xmmA, xmmB, xmmC);
- e->vfmsubadd231ps(xmmA, xmmB, anyptr_gpC);
- e->vfmsubadd231ps(ymmA, ymmB, ymmC);
- e->vfmsubadd231ps(ymmA, ymmB, anyptr_gpC);
- e->vfmsubadd231ps(zmmA, zmmB, zmmC);
- e->vfmsubadd231ps(zmmA, zmmB, anyptr_gpC);
- e->vfnmadd132pd(xmmA, xmmB, xmmC);
- e->vfnmadd132pd(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd132pd(ymmA, ymmB, ymmC);
- e->vfnmadd132pd(ymmA, ymmB, anyptr_gpC);
- e->vfnmadd132pd(zmmA, zmmB, zmmC);
- e->vfnmadd132pd(zmmA, zmmB, anyptr_gpC);
- e->vfnmadd132ps(xmmA, xmmB, xmmC);
- e->vfnmadd132ps(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd132ps(ymmA, ymmB, ymmC);
- e->vfnmadd132ps(ymmA, ymmB, anyptr_gpC);
- e->vfnmadd132ps(zmmA, zmmB, zmmC);
- e->vfnmadd132ps(zmmA, zmmB, anyptr_gpC);
- e->vfnmadd132sd(xmmA, xmmB, xmmC);
- e->vfnmadd132sd(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd132ss(xmmA, xmmB, xmmC);
- e->vfnmadd132ss(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd213pd(xmmA, xmmB, xmmC);
- e->vfnmadd213pd(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd213pd(ymmA, ymmB, ymmC);
- e->vfnmadd213pd(ymmA, ymmB, anyptr_gpC);
- e->vfnmadd213pd(zmmA, zmmB, zmmC);
- e->vfnmadd213pd(zmmA, zmmB, anyptr_gpC);
- e->vfnmadd213ps(xmmA, xmmB, xmmC);
- e->vfnmadd213ps(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd213ps(ymmA, ymmB, ymmC);
- e->vfnmadd213ps(ymmA, ymmB, anyptr_gpC);
- e->vfnmadd213ps(zmmA, zmmB, zmmC);
- e->vfnmadd213ps(zmmA, zmmB, anyptr_gpC);
- e->vfnmadd213sd(xmmA, xmmB, xmmC);
- e->vfnmadd213sd(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd213ss(xmmA, xmmB, xmmC);
- e->vfnmadd213ss(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd231pd(xmmA, xmmB, xmmC);
- e->vfnmadd231pd(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd231pd(ymmA, ymmB, ymmC);
- e->vfnmadd231pd(ymmA, ymmB, anyptr_gpC);
- e->vfnmadd231pd(zmmA, zmmB, zmmC);
- e->vfnmadd231pd(zmmA, zmmB, anyptr_gpC);
- e->vfnmadd231ps(xmmA, xmmB, xmmC);
- e->vfnmadd231ps(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd231ps(ymmA, ymmB, ymmC);
- e->vfnmadd231ps(ymmA, ymmB, anyptr_gpC);
- e->vfnmadd231ps(zmmA, zmmB, zmmC);
- e->vfnmadd231ps(zmmA, zmmB, anyptr_gpC);
- e->vfnmadd231sd(xmmA, xmmB, xmmC);
- e->vfnmadd231sd(xmmA, xmmB, anyptr_gpC);
- e->vfnmadd231ss(xmmA, xmmB, xmmC);
- e->vfnmadd231ss(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub132pd(xmmA, xmmB, xmmC);
- e->vfnmsub132pd(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub132pd(ymmA, ymmB, ymmC);
- e->vfnmsub132pd(ymmA, ymmB, anyptr_gpC);
- e->vfnmsub132pd(zmmA, zmmB, zmmC);
- e->vfnmsub132pd(zmmA, zmmB, anyptr_gpC);
- e->vfnmsub132ps(xmmA, xmmB, xmmC);
- e->vfnmsub132ps(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub132ps(ymmA, ymmB, ymmC);
- e->vfnmsub132ps(ymmA, ymmB, anyptr_gpC);
- e->vfnmsub132ps(zmmA, zmmB, zmmC);
- e->vfnmsub132ps(zmmA, zmmB, anyptr_gpC);
- e->vfnmsub132sd(xmmA, xmmB, xmmC);
- e->vfnmsub132sd(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub132ss(xmmA, xmmB, xmmC);
- e->vfnmsub132ss(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub213pd(xmmA, xmmB, xmmC);
- e->vfnmsub213pd(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub213pd(ymmA, ymmB, ymmC);
- e->vfnmsub213pd(ymmA, ymmB, anyptr_gpC);
- e->vfnmsub213pd(zmmA, zmmB, zmmC);
- e->vfnmsub213pd(zmmA, zmmB, anyptr_gpC);
- e->vfnmsub213ps(xmmA, xmmB, xmmC);
- e->vfnmsub213ps(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub213ps(ymmA, ymmB, ymmC);
- e->vfnmsub213ps(ymmA, ymmB, anyptr_gpC);
- e->vfnmsub213ps(zmmA, zmmB, zmmC);
- e->vfnmsub213ps(zmmA, zmmB, anyptr_gpC);
- e->vfnmsub213sd(xmmA, xmmB, xmmC);
- e->vfnmsub213sd(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub213ss(xmmA, xmmB, xmmC);
- e->vfnmsub213ss(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub231pd(xmmA, xmmB, xmmC);
- e->vfnmsub231pd(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub231pd(ymmA, ymmB, ymmC);
- e->vfnmsub231pd(ymmA, ymmB, anyptr_gpC);
- e->vfnmsub231pd(zmmA, zmmB, zmmC);
- e->vfnmsub231pd(zmmA, zmmB, anyptr_gpC);
- e->vfnmsub231ps(xmmA, xmmB, xmmC);
- e->vfnmsub231ps(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub231ps(ymmA, ymmB, ymmC);
- e->vfnmsub231ps(ymmA, ymmB, anyptr_gpC);
- e->vfnmsub231ps(zmmA, zmmB, zmmC);
- e->vfnmsub231ps(zmmA, zmmB, anyptr_gpC);
- e->vfnmsub231sd(xmmA, xmmB, xmmC);
- e->vfnmsub231sd(xmmA, xmmB, anyptr_gpC);
- e->vfnmsub231ss(xmmA, xmmB, xmmC);
- e->vfnmsub231ss(xmmA, xmmB, anyptr_gpC);
- e->vfpclasspd(kA, xmmB, 0);
- e->vfpclasspd(kA, anyptr_gpB, 0);
- e->vfpclasspd(kA, ymmB, 0);
- e->vfpclasspd(kA, anyptr_gpB, 0);
- e->vfpclasspd(kA, zmmB, 0);
- e->vfpclasspd(kA, anyptr_gpB, 0);
- e->vfpclassps(kA, xmmB, 0);
- e->vfpclassps(kA, anyptr_gpB, 0);
- e->vfpclassps(kA, ymmB, 0);
- e->vfpclassps(kA, anyptr_gpB, 0);
- e->vfpclassps(kA, zmmB, 0);
- e->vfpclassps(kA, anyptr_gpB, 0);
- e->vfpclasssd(kA, xmmB, 0);
- e->vfpclasssd(kA, anyptr_gpB, 0);
- e->vfpclassss(kA, xmmB, 0);
- e->vfpclassss(kA, anyptr_gpB, 0);
- e->vgatherdpd(xmmA, vx_ptr);
- e->vgatherdpd(ymmA, vy_ptr);
- e->vgatherdpd(zmmA, vz_ptr);
- e->vgatherdps(xmmA, vx_ptr);
- e->vgatherdps(ymmA, vy_ptr);
- e->vgatherdps(zmmA, vz_ptr);
- e->vgatherpf0dpd(vy_ptr);
- e->vgatherpf0dps(vz_ptr);
- e->vgatherpf0qpd(vz_ptr);
- e->vgatherpf0qps(vz_ptr);
- e->vgatherpf1dpd(vy_ptr);
- e->vgatherpf1dps(vz_ptr);
- e->vgatherpf1qpd(vz_ptr);
- e->vgatherpf1qps(vz_ptr);
- e->vgatherqpd(xmmA, vx_ptr);
- e->vgatherqpd(ymmA, vy_ptr);
- e->vgatherqpd(zmmA, vz_ptr);
- e->vgatherqps(xmmA, vx_ptr);
- e->vgatherqps(ymmA, vy_ptr);
- e->vgatherqps(zmmA, vz_ptr);
- e->vgetexppd(xmmA, xmmB);
- e->vgetexppd(xmmA, anyptr_gpB);
- e->vgetexppd(ymmA, ymmB);
- e->vgetexppd(ymmA, anyptr_gpB);
- e->vgetexppd(zmmA, zmmB);
- e->vgetexppd(zmmA, anyptr_gpB);
- e->vgetexpps(xmmA, xmmB);
- e->vgetexpps(xmmA, anyptr_gpB);
- e->vgetexpps(ymmA, ymmB);
- e->vgetexpps(ymmA, anyptr_gpB);
- e->vgetexpps(zmmA, zmmB);
- e->vgetexpps(zmmA, anyptr_gpB);
- e->vgetexpsd(xmmA, xmmB, xmmC);
- e->vgetexpsd(xmmA, xmmB, anyptr_gpB);
- e->vgetexpss(xmmA, xmmB, xmmC);
- e->vgetexpss(xmmA, xmmB, anyptr_gpB);
- e->vgetmantpd(xmmA, xmmB, 0);
- e->vgetmantpd(xmmA, anyptr_gpB, 0);
- e->vgetmantpd(ymmA, ymmB, 0);
- e->vgetmantpd(ymmA, anyptr_gpB, 0);
- e->vgetmantpd(zmmA, zmmB, 0);
- e->vgetmantpd(zmmA, anyptr_gpB, 0);
- e->vgetmantps(xmmA, xmmB, 0);
- e->vgetmantps(xmmA, anyptr_gpB, 0);
- e->vgetmantps(ymmA, ymmB, 0);
- e->vgetmantps(ymmA, anyptr_gpB, 0);
- e->vgetmantps(zmmA, zmmB, 0);
- e->vgetmantps(zmmA, anyptr_gpB, 0);
- e->vgetmantsd(xmmA, xmmB, xmmC, 0);
- e->vgetmantsd(xmmA, xmmB, anyptr_gpB, 0);
- e->vgetmantss(xmmA, xmmB, xmmC, 0);
- e->vgetmantss(xmmA, xmmB, anyptr_gpB, 0);
- e->vinsertf32x4(ymmA, ymmB, xmmC, 0);
- e->vinsertf32x4(ymmA, ymmB, anyptr_gpC, 0);
- e->vinsertf32x4(zmmA, zmmB, xmmC, 0);
- e->vinsertf32x4(zmmA, zmmB, anyptr_gpC, 0);
- e->vinsertf32x8(zmmA, zmmB, ymmC, 0);
- e->vinsertf32x8(zmmA, zmmB, anyptr_gpC, 0);
- e->vinsertf64x2(ymmA, ymmB, xmmC, 0);
- e->vinsertf64x2(ymmA, ymmB, anyptr_gpC, 0);
- e->vinsertf64x2(zmmA, zmmB, xmmC, 0);
- e->vinsertf64x2(zmmA, zmmB, anyptr_gpC, 0);
- e->vinsertf64x4(zmmA, zmmB, ymmC, 0);
- e->vinsertf64x4(zmmA, zmmB, anyptr_gpC, 0);
- e->vinserti32x4(ymmA, ymmB, xmmC, 0);
- e->vinserti32x4(ymmA, ymmB, anyptr_gpC, 0);
- e->vinserti32x4(zmmA, zmmB, xmmC, 0);
- e->vinserti32x4(zmmA, zmmB, anyptr_gpC, 0);
- e->vinserti32x8(zmmA, zmmB, ymmC, 0);
- e->vinserti32x8(zmmA, zmmB, anyptr_gpC, 0);
- e->vinserti64x2(ymmA, ymmB, xmmC, 0);
- e->vinserti64x2(ymmA, ymmB, anyptr_gpC, 0);
- e->vinserti64x2(zmmA, zmmB, xmmC, 0);
- e->vinserti64x2(zmmA, zmmB, anyptr_gpC, 0);
- e->vinserti64x4(zmmA, zmmB, ymmC, 0);
- e->vinserti64x4(zmmA, zmmB, anyptr_gpC, 0);
- e->vinsertps(xmmA, xmmB, xmmC, 0);
- e->vinsertps(xmmA, xmmB, anyptr_gpC, 0);
- e->vmaxpd(xmmA, xmmB, xmmC);
- e->vmaxpd(xmmA, xmmB, anyptr_gpC);
- e->vmaxpd(ymmA, ymmB, ymmC);
- e->vmaxpd(ymmA, ymmB, anyptr_gpC);
- e->vmaxpd(zmmA, zmmB, zmmC);
- e->vmaxpd(zmmA, zmmB, anyptr_gpC);
- e->vmaxps(xmmA, xmmB, xmmC);
- e->vmaxps(xmmA, xmmB, anyptr_gpC);
- e->vmaxps(ymmA, ymmB, ymmC);
- e->vmaxps(ymmA, ymmB, anyptr_gpC);
- e->vmaxps(zmmA, zmmB, zmmC);
- e->vmaxps(zmmA, zmmB, anyptr_gpC);
- e->vmaxsd(xmmA, xmmB, xmmC);
- e->vmaxsd(xmmA, xmmB, anyptr_gpC);
- e->vmaxss(xmmA, xmmB, xmmC);
- e->vmaxss(xmmA, xmmB, anyptr_gpC);
- e->vminpd(xmmA, xmmB, xmmC);
- e->vminpd(xmmA, xmmB, anyptr_gpC);
- e->vminpd(ymmA, ymmB, ymmC);
- e->vminpd(ymmA, ymmB, anyptr_gpC);
- e->vminpd(zmmA, zmmB, zmmC);
- e->vminpd(zmmA, zmmB, anyptr_gpC);
- e->vminps(xmmA, xmmB, xmmC);
- e->vminps(xmmA, xmmB, anyptr_gpC);
- e->vminps(ymmA, ymmB, ymmC);
- e->vminps(ymmA, ymmB, anyptr_gpC);
- e->vminps(zmmA, zmmB, zmmC);
- e->vminps(zmmA, zmmB, anyptr_gpC);
- e->vminsd(xmmA, xmmB, xmmC);
- e->vminsd(xmmA, xmmB, anyptr_gpC);
- e->vminss(xmmA, xmmB, xmmC);
- e->vminss(xmmA, xmmB, anyptr_gpC);
- e->vmovapd(xmmA, xmmB);
- e->vmovapd(xmmA, anyptr_gpB);
- e->vmovapd(xmmA, xmmB);
- e->vmovapd(anyptr_gpA, xmmB);
- e->vmovapd(ymmA, ymmB);
- e->vmovapd(ymmA, anyptr_gpB);
- e->vmovapd(ymmA, ymmB);
- e->vmovapd(anyptr_gpA, ymmB);
- e->vmovapd(zmmA, zmmB);
- e->vmovapd(zmmA, anyptr_gpB);
- e->vmovapd(zmmA, zmmB);
- e->vmovapd(anyptr_gpA, zmmB);
- e->vmovaps(xmmA, xmmB);
- e->vmovaps(xmmA, anyptr_gpB);
- e->vmovaps(xmmA, xmmB);
- e->vmovaps(anyptr_gpA, xmmB);
- e->vmovaps(ymmA, ymmB);
- e->vmovaps(ymmA, anyptr_gpB);
- e->vmovaps(ymmA, ymmB);
- e->vmovaps(anyptr_gpA, ymmB);
- e->vmovaps(zmmA, zmmB);
- e->vmovaps(zmmA, anyptr_gpB);
- e->vmovaps(zmmA, zmmB);
- e->vmovaps(anyptr_gpA, zmmB);
- e->vmovd(gdA, xmmB);
- e->vmovd(gzA, xmmB);
- e->vmovd(anyptr_gpA, xmmB);
- e->vmovd(xmmA, gdB);
- e->vmovd(xmmA, gzB);
- e->vmovd(xmmA, anyptr_gpB);
- e->vmovddup(xmmA, xmmB);
- e->vmovddup(xmmA, anyptr_gpB);
- e->vmovddup(ymmA, ymmB);
- e->vmovddup(ymmA, anyptr_gpB);
- e->vmovddup(zmmA, zmmB);
- e->vmovddup(zmmA, anyptr_gpB);
- e->vmovdqa32(xmmA, xmmB);
- e->vmovdqa32(xmmA, anyptr_gpB);
- e->vmovdqa32(xmmA, xmmB);
- e->vmovdqa32(anyptr_gpA, xmmB);
- e->vmovdqa32(ymmA, ymmB);
- e->vmovdqa32(ymmA, anyptr_gpB);
- e->vmovdqa32(ymmA, ymmB);
- e->vmovdqa32(anyptr_gpA, ymmB);
- e->vmovdqa32(zmmA, zmmB);
- e->vmovdqa32(zmmA, anyptr_gpB);
- e->vmovdqa32(zmmA, zmmB);
- e->vmovdqa32(anyptr_gpA, zmmB);
- e->vmovdqa64(xmmA, xmmB);
- e->vmovdqa64(xmmA, anyptr_gpB);
- e->vmovdqa64(xmmA, xmmB);
- e->vmovdqa64(anyptr_gpA, xmmB);
- e->vmovdqa64(ymmA, ymmB);
- e->vmovdqa64(ymmA, anyptr_gpB);
- e->vmovdqa64(ymmA, ymmB);
- e->vmovdqa64(anyptr_gpA, ymmB);
- e->vmovdqa64(zmmA, zmmB);
- e->vmovdqa64(zmmA, anyptr_gpB);
- e->vmovdqa64(zmmA, zmmB);
- e->vmovdqa64(anyptr_gpA, zmmB);
- e->vmovdqu16(xmmA, xmmB);
- e->vmovdqu16(xmmA, anyptr_gpB);
- e->vmovdqu16(xmmA, xmmB);
- e->vmovdqu16(anyptr_gpA, xmmB);
- e->vmovdqu16(ymmA, ymmB);
- e->vmovdqu16(ymmA, anyptr_gpB);
- e->vmovdqu16(ymmA, ymmB);
- e->vmovdqu16(anyptr_gpA, ymmB);
- e->vmovdqu16(zmmA, zmmB);
- e->vmovdqu16(zmmA, anyptr_gpB);
- e->vmovdqu16(zmmA, zmmB);
- e->vmovdqu16(anyptr_gpA, zmmB);
- e->vmovdqu32(xmmA, xmmB);
- e->vmovdqu32(xmmA, anyptr_gpB);
- e->vmovdqu32(xmmA, xmmB);
- e->vmovdqu32(anyptr_gpA, xmmB);
- e->vmovdqu32(ymmA, ymmB);
- e->vmovdqu32(ymmA, anyptr_gpB);
- e->vmovdqu32(ymmA, ymmB);
- e->vmovdqu32(anyptr_gpA, ymmB);
- e->vmovdqu32(zmmA, zmmB);
- e->vmovdqu32(zmmA, anyptr_gpB);
- e->vmovdqu32(zmmA, zmmB);
- e->vmovdqu32(anyptr_gpA, zmmB);
- e->vmovdqu64(xmmA, xmmB);
- e->vmovdqu64(xmmA, anyptr_gpB);
- e->vmovdqu64(xmmA, xmmB);
- e->vmovdqu64(anyptr_gpA, xmmB);
- e->vmovdqu64(ymmA, ymmB);
- e->vmovdqu64(ymmA, anyptr_gpB);
- e->vmovdqu64(ymmA, ymmB);
- e->vmovdqu64(anyptr_gpA, ymmB);
- e->vmovdqu64(zmmA, zmmB);
- e->vmovdqu64(zmmA, anyptr_gpB);
- e->vmovdqu64(zmmA, zmmB);
- e->vmovdqu64(anyptr_gpA, zmmB);
- e->vmovdqu8(xmmA, xmmB);
- e->vmovdqu8(xmmA, anyptr_gpB);
- e->vmovdqu8(xmmA, xmmB);
- e->vmovdqu8(anyptr_gpA, xmmB);
- e->vmovdqu8(ymmA, ymmB);
- e->vmovdqu8(ymmA, anyptr_gpB);
- e->vmovdqu8(ymmA, ymmB);
- e->vmovdqu8(anyptr_gpA, ymmB);
- e->vmovdqu8(zmmA, zmmB);
- e->vmovdqu8(zmmA, anyptr_gpB);
- e->vmovdqu8(zmmA, zmmB);
- e->vmovdqu8(anyptr_gpA, zmmB);
- e->vmovhlps(xmmA, xmmB, xmmC);
- e->vmovhpd(anyptr_gpA, xmmB);
- e->vmovhpd(xmmA, xmmB, anyptr_gpC);
- e->vmovhps(anyptr_gpA, xmmB);
- e->vmovhps(xmmA, xmmB, anyptr_gpC);
- e->vmovlhps(xmmA, xmmB, xmmC);
- e->vmovlpd(anyptr_gpA, xmmB);
- e->vmovlpd(xmmA, xmmB, anyptr_gpC);
- e->vmovlps(anyptr_gpA, xmmB);
- e->vmovlps(xmmA, xmmB, anyptr_gpC);
- e->vmovntdq(anyptr_gpA, xmmB);
- e->vmovntdq(anyptr_gpA, ymmB);
- e->vmovntdq(anyptr_gpA, zmmB);
- e->vmovntdqa(xmmA, anyptr_gpB);
- e->vmovntdqa(ymmA, anyptr_gpB);
- e->vmovntdqa(zmmA, anyptr_gpB);
- e->vmovntpd(anyptr_gpA, xmmB);
- e->vmovntpd(anyptr_gpA, ymmB);
- e->vmovntpd(anyptr_gpA, zmmB);
- e->vmovntps(anyptr_gpA, xmmB);
- e->vmovntps(anyptr_gpA, ymmB);
- e->vmovntps(anyptr_gpA, zmmB);
- if (isX64) e->vmovq(gzA, xmmB);
- if (isX64) e->vmovq(xmmA, gzB);
- e->vmovq(anyptr_gpA, xmmB);
- e->vmovq(xmmA, anyptr_gpB);
- e->vmovq(xmmA, xmmB);
- e->vmovq(xmmA, anyptr_gpB);
- e->vmovq(xmmA, xmmB);
- e->vmovq(anyptr_gpA, xmmB);
- e->vmovsd(anyptr_gpA, xmmB);
- e->vmovsd(xmmA, anyptr_gpB);
- e->vmovsd(xmmA, xmmB, xmmC);
- e->vmovsd(xmmA, xmmB, xmmC);
- e->vmovshdup(xmmA, xmmB);
- e->vmovshdup(xmmA, anyptr_gpB);
- e->vmovshdup(ymmA, ymmB);
- e->vmovshdup(ymmA, anyptr_gpB);
- e->vmovshdup(zmmA, zmmB);
- e->vmovshdup(zmmA, anyptr_gpB);
- e->vmovsldup(xmmA, xmmB);
- e->vmovsldup(xmmA, anyptr_gpB);
- e->vmovsldup(ymmA, ymmB);
- e->vmovsldup(ymmA, anyptr_gpB);
- e->vmovsldup(zmmA, zmmB);
- e->vmovsldup(zmmA, anyptr_gpB);
- e->vmovss(anyptr_gpA, xmmB);
- e->vmovss(xmmA, anyptr_gpB);
- e->vmovss(xmmA, xmmB, xmmC);
- e->vmovss(xmmA, xmmB, xmmC);
- e->vmovupd(xmmA, xmmB);
- e->vmovupd(xmmA, anyptr_gpB);
- e->vmovupd(xmmA, xmmB);
- e->vmovupd(anyptr_gpA, xmmB);
- e->vmovupd(ymmA, ymmB);
- e->vmovupd(ymmA, anyptr_gpB);
- e->vmovupd(ymmA, ymmB);
- e->vmovupd(anyptr_gpA, ymmB);
- e->vmovupd(zmmA, zmmB);
- e->vmovupd(zmmA, anyptr_gpB);
- e->vmovupd(zmmA, zmmB);
- e->vmovupd(anyptr_gpA, zmmB);
- e->vmovups(xmmA, xmmB);
- e->vmovups(xmmA, anyptr_gpB);
- e->vmovups(xmmA, xmmB);
- e->vmovups(anyptr_gpA, xmmB);
- e->vmovups(ymmA, ymmB);
- e->vmovups(ymmA, anyptr_gpB);
- e->vmovups(ymmA, ymmB);
- e->vmovups(anyptr_gpA, ymmB);
- e->vmovups(zmmA, zmmB);
- e->vmovups(zmmA, anyptr_gpB);
- e->vmovups(zmmA, zmmB);
- e->vmovups(anyptr_gpA, zmmB);
- e->vmulpd(xmmA, xmmB, xmmC);
- e->vmulpd(xmmA, xmmB, anyptr_gpC);
- e->vmulpd(ymmA, ymmB, ymmC);
- e->vmulpd(ymmA, ymmB, anyptr_gpC);
- e->vmulpd(zmmA, zmmB, zmmC);
- e->vmulpd(zmmA, zmmB, anyptr_gpC);
- e->vmulps(xmmA, xmmB, xmmC);
- e->vmulps(xmmA, xmmB, anyptr_gpC);
- e->vmulps(ymmA, ymmB, ymmC);
- e->vmulps(ymmA, ymmB, anyptr_gpC);
- e->vmulps(zmmA, zmmB, zmmC);
- e->vmulps(zmmA, zmmB, anyptr_gpC);
- e->vmulsd(xmmA, xmmB, xmmC);
- e->vmulsd(xmmA, xmmB, anyptr_gpC);
- e->vmulss(xmmA, xmmB, xmmC);
- e->vmulss(xmmA, xmmB, anyptr_gpC);
- e->vorpd(xmmA, xmmB, xmmC);
- e->vorpd(xmmA, xmmB, anyptr_gpC);
- e->vorpd(ymmA, ymmB, ymmC);
- e->vorpd(ymmA, ymmB, anyptr_gpC);
- e->vorpd(zmmA, zmmB, zmmC);
- e->vorpd(zmmA, zmmB, anyptr_gpC);
- e->vorps(xmmA, xmmB, xmmC);
- e->vorps(xmmA, xmmB, anyptr_gpC);
- e->vorps(ymmA, ymmB, ymmC);
- e->vorps(ymmA, ymmB, anyptr_gpC);
- e->vorps(zmmA, zmmB, zmmC);
- e->vorps(zmmA, zmmB, anyptr_gpC);
- e->vpabsb(xmmA, xmmB);
- e->vpabsb(xmmA, anyptr_gpB);
- e->vpabsb(ymmA, ymmB);
- e->vpabsb(ymmA, anyptr_gpB);
- e->vpabsb(zmmA, zmmB);
- e->vpabsb(zmmA, anyptr_gpB);
- e->vpabsd(xmmA, xmmB);
- e->vpabsd(xmmA, anyptr_gpB);
- e->vpabsd(ymmA, ymmB);
- e->vpabsd(ymmA, anyptr_gpB);
- e->vpabsd(zmmA, zmmB);
- e->vpabsd(zmmA, anyptr_gpB);
- e->vpabsq(xmmA, xmmB);
- e->vpabsq(xmmA, anyptr_gpB);
- e->vpabsq(ymmA, ymmB);
- e->vpabsq(ymmA, anyptr_gpB);
- e->vpabsq(zmmA, zmmB);
- e->vpabsq(zmmA, anyptr_gpB);
- e->vpabsw(xmmA, xmmB);
- e->vpabsw(xmmA, anyptr_gpB);
- e->vpabsw(ymmA, ymmB);
- e->vpabsw(ymmA, anyptr_gpB);
- e->vpabsw(zmmA, zmmB);
- e->vpabsw(zmmA, anyptr_gpB);
- e->vpackssdw(xmmA, xmmB, xmmC);
- e->vpackssdw(xmmA, xmmB, anyptr_gpC);
- e->vpackssdw(ymmA, ymmB, ymmC);
- e->vpackssdw(ymmA, ymmB, anyptr_gpC);
- e->vpackssdw(zmmA, zmmB, zmmC);
- e->vpackssdw(zmmA, zmmB, anyptr_gpC);
- e->vpacksswb(xmmA, xmmB, xmmC);
- e->vpacksswb(xmmA, xmmB, anyptr_gpC);
- e->vpacksswb(ymmA, ymmB, ymmC);
- e->vpacksswb(ymmA, ymmB, anyptr_gpC);
- e->vpacksswb(zmmA, zmmB, zmmC);
- e->vpacksswb(zmmA, zmmB, anyptr_gpC);
- e->vpackusdw(xmmA, xmmB, xmmC);
- e->vpackusdw(xmmA, xmmB, anyptr_gpC);
- e->vpackusdw(ymmA, ymmB, ymmC);
- e->vpackusdw(ymmA, ymmB, anyptr_gpC);
- e->vpackusdw(zmmA, zmmB, zmmC);
- e->vpackusdw(zmmA, zmmB, anyptr_gpC);
- e->vpackuswb(xmmA, xmmB, xmmC);
- e->vpackuswb(xmmA, xmmB, anyptr_gpC);
- e->vpackuswb(ymmA, ymmB, ymmC);
- e->vpackuswb(ymmA, ymmB, anyptr_gpC);
- e->vpackuswb(zmmA, zmmB, zmmC);
- e->vpackuswb(zmmA, zmmB, anyptr_gpC);
- e->vpaddb(xmmA, xmmB, xmmC);
- e->vpaddb(xmmA, xmmB, anyptr_gpC);
- e->vpaddb(ymmA, ymmB, ymmC);
- e->vpaddb(ymmA, ymmB, anyptr_gpC);
- e->vpaddb(zmmA, zmmB, zmmC);
- e->vpaddb(zmmA, zmmB, anyptr_gpC);
- e->vpaddd(xmmA, xmmB, xmmC);
- e->vpaddd(xmmA, xmmB, anyptr_gpC);
- e->vpaddd(ymmA, ymmB, ymmC);
- e->vpaddd(ymmA, ymmB, anyptr_gpC);
- e->vpaddd(zmmA, zmmB, zmmC);
- e->vpaddd(zmmA, zmmB, anyptr_gpC);
- e->vpaddq(xmmA, xmmB, xmmC);
- e->vpaddq(xmmA, xmmB, anyptr_gpC);
- e->vpaddq(ymmA, ymmB, ymmC);
- e->vpaddq(ymmA, ymmB, anyptr_gpC);
- e->vpaddq(zmmA, zmmB, zmmC);
- e->vpaddq(zmmA, zmmB, anyptr_gpC);
- e->vpaddsb(xmmA, xmmB, xmmC);
- e->vpaddsb(xmmA, xmmB, anyptr_gpC);
- e->vpaddsb(ymmA, ymmB, ymmC);
- e->vpaddsb(ymmA, ymmB, anyptr_gpC);
- e->vpaddsb(zmmA, zmmB, zmmC);
- e->vpaddsb(zmmA, zmmB, anyptr_gpC);
- e->vpaddsw(xmmA, xmmB, xmmC);
- e->vpaddsw(xmmA, xmmB, anyptr_gpC);
- e->vpaddsw(ymmA, ymmB, ymmC);
- e->vpaddsw(ymmA, ymmB, anyptr_gpC);
- e->vpaddsw(zmmA, zmmB, zmmC);
- e->vpaddsw(zmmA, zmmB, anyptr_gpC);
- e->vpaddusb(xmmA, xmmB, xmmC);
- e->vpaddusb(xmmA, xmmB, anyptr_gpC);
- e->vpaddusb(ymmA, ymmB, ymmC);
- e->vpaddusb(ymmA, ymmB, anyptr_gpC);
- e->vpaddusb(zmmA, zmmB, zmmC);
- e->vpaddusb(zmmA, zmmB, anyptr_gpC);
- e->vpaddusw(xmmA, xmmB, xmmC);
- e->vpaddusw(xmmA, xmmB, anyptr_gpC);
- e->vpaddusw(ymmA, ymmB, ymmC);
- e->vpaddusw(ymmA, ymmB, anyptr_gpC);
- e->vpaddusw(zmmA, zmmB, zmmC);
- e->vpaddusw(zmmA, zmmB, anyptr_gpC);
- e->vpaddw(xmmA, xmmB, xmmC);
- e->vpaddw(xmmA, xmmB, anyptr_gpC);
- e->vpaddw(ymmA, ymmB, ymmC);
- e->vpaddw(ymmA, ymmB, anyptr_gpC);
- e->vpaddw(zmmA, zmmB, zmmC);
- e->vpaddw(zmmA, zmmB, anyptr_gpC);
- e->vpalignr(xmmA, xmmB, xmmC, 0);
- e->vpalignr(xmmA, xmmB, anyptr_gpC, 0);
- e->vpalignr(ymmA, ymmB, ymmC, 0);
- e->vpalignr(ymmA, ymmB, anyptr_gpC, 0);
- e->vpalignr(zmmA, zmmB, zmmC, 0);
- e->vpalignr(zmmA, zmmB, anyptr_gpC, 0);
- e->vpandd(xmmA, xmmB, xmmC);
- e->vpandd(xmmA, xmmB, anyptr_gpC);
- e->vpandd(ymmA, ymmB, ymmC);
- e->vpandd(ymmA, ymmB, anyptr_gpC);
- e->vpandd(zmmA, zmmB, zmmC);
- e->vpandd(zmmA, zmmB, anyptr_gpC);
- e->vpandnd(xmmA, xmmB, xmmC);
- e->vpandnd(xmmA, xmmB, anyptr_gpC);
- e->vpandnd(ymmA, ymmB, ymmC);
- e->vpandnd(ymmA, ymmB, anyptr_gpC);
- e->vpandnd(zmmA, zmmB, zmmC);
- e->vpandnd(zmmA, zmmB, anyptr_gpC);
- e->vpandnq(xmmA, xmmB, xmmC);
- e->vpandnq(xmmA, xmmB, anyptr_gpC);
- e->vpandnq(ymmA, ymmB, ymmC);
- e->vpandnq(ymmA, ymmB, anyptr_gpC);
- e->vpandnq(zmmA, zmmB, zmmC);
- e->vpandnq(zmmA, zmmB, anyptr_gpC);
- e->vpandq(xmmA, xmmB, xmmC);
- e->vpandq(xmmA, xmmB, anyptr_gpC);
- e->vpandq(ymmA, ymmB, ymmC);
- e->vpandq(ymmA, ymmB, anyptr_gpC);
- e->vpandq(zmmA, zmmB, zmmC);
- e->vpandq(zmmA, zmmB, anyptr_gpC);
- e->vpavgb(xmmA, xmmB, xmmC);
- e->vpavgb(xmmA, xmmB, anyptr_gpC);
- e->vpavgb(ymmA, ymmB, ymmC);
- e->vpavgb(ymmA, ymmB, anyptr_gpC);
- e->vpavgb(zmmA, zmmB, zmmC);
- e->vpavgb(zmmA, zmmB, anyptr_gpC);
- e->vpavgw(xmmA, xmmB, xmmC);
- e->vpavgw(xmmA, xmmB, anyptr_gpC);
- e->vpavgw(ymmA, ymmB, ymmC);
- e->vpavgw(ymmA, ymmB, anyptr_gpC);
- e->vpavgw(zmmA, zmmB, zmmC);
- e->vpavgw(zmmA, zmmB, anyptr_gpC);
- e->vpblendmb(xmmA, xmmB, xmmC);
- e->vpblendmb(xmmA, xmmB, anyptr_gpC);
- e->vpblendmb(ymmA, ymmB, ymmC);
- e->vpblendmb(ymmA, ymmB, anyptr_gpC);
- e->vpblendmb(zmmA, zmmB, zmmC);
- e->vpblendmb(zmmA, zmmB, anyptr_gpC);
- e->vpblendmd(xmmA, xmmB, xmmC);
- e->vpblendmd(xmmA, xmmB, anyptr_gpC);
- e->vpblendmd(ymmA, ymmB, ymmC);
- e->vpblendmd(ymmA, ymmB, anyptr_gpC);
- e->vpblendmd(zmmA, zmmB, zmmC);
- e->vpblendmd(zmmA, zmmB, anyptr_gpC);
- e->vpblendmq(xmmA, xmmB, xmmC);
- e->vpblendmq(xmmA, xmmB, anyptr_gpC);
- e->vpblendmq(ymmA, ymmB, ymmC);
- e->vpblendmq(ymmA, ymmB, anyptr_gpC);
- e->vpblendmq(zmmA, zmmB, zmmC);
- e->vpblendmq(zmmA, zmmB, anyptr_gpC);
- e->vpblendmw(xmmA, xmmB, xmmC);
- e->vpblendmw(xmmA, xmmB, anyptr_gpC);
- e->vpblendmw(ymmA, ymmB, ymmC);
- e->vpblendmw(ymmA, ymmB, anyptr_gpC);
- e->vpblendmw(zmmA, zmmB, zmmC);
- e->vpblendmw(zmmA, zmmB, anyptr_gpC);
- e->vpbroadcastb(xmmA, gdB);
- e->vpbroadcastb(xmmA, gzB);
- e->vpbroadcastb(xmmA, xmmB);
- e->vpbroadcastb(xmmA, anyptr_gpB);
- e->vpbroadcastb(ymmA, gdB);
- e->vpbroadcastb(ymmA, gzB);
- e->vpbroadcastb(ymmA, xmmB);
- e->vpbroadcastb(ymmA, anyptr_gpB);
- e->vpbroadcastb(zmmA, gdB);
- e->vpbroadcastb(zmmA, gzB);
- e->vpbroadcastb(zmmA, xmmB);
- e->vpbroadcastb(zmmA, anyptr_gpB);
- e->vpbroadcastd(xmmA, gdB);
- e->vpbroadcastd(xmmA, gzB);
- e->vpbroadcastd(xmmA, xmmB);
- e->vpbroadcastd(xmmA, anyptr_gpB);
- e->vpbroadcastd(ymmA, gdB);
- e->vpbroadcastd(ymmA, gzB);
- e->vpbroadcastd(ymmA, xmmB);
- e->vpbroadcastd(ymmA, anyptr_gpB);
- e->vpbroadcastd(zmmA, gdB);
- e->vpbroadcastd(zmmA, gzB);
- e->vpbroadcastd(zmmA, xmmB);
- e->vpbroadcastd(zmmA, anyptr_gpB);
- e->vpbroadcastmb2q(xmmA, kB);
- e->vpbroadcastmb2q(ymmA, kB);
- e->vpbroadcastmb2q(zmmA, kB);
- e->vpbroadcastmw2d(xmmA, kB);
- e->vpbroadcastmw2d(ymmA, kB);
- e->vpbroadcastmw2d(zmmA, kB);
- if (isX64) e->vpbroadcastq(xmmA, gzB);
- e->vpbroadcastq(xmmA, xmmB);
- e->vpbroadcastq(xmmA, anyptr_gpB);
- if (isX64) e->vpbroadcastq(ymmA, gzB);
- e->vpbroadcastq(ymmA, xmmB);
- e->vpbroadcastq(ymmA, anyptr_gpB);
- if (isX64) e->vpbroadcastq(zmmA, gzB);
- e->vpbroadcastq(zmmA, xmmB);
- e->vpbroadcastq(zmmA, anyptr_gpB);
- e->vpbroadcastw(xmmA, gdB);
- e->vpbroadcastw(xmmA, gzB);
- e->vpbroadcastw(xmmA, xmmB);
- e->vpbroadcastw(xmmA, anyptr_gpB);
- e->vpbroadcastw(ymmA, gdB);
- e->vpbroadcastw(ymmA, gzB);
- e->vpbroadcastw(ymmA, xmmB);
- e->vpbroadcastw(ymmA, anyptr_gpB);
- e->vpbroadcastw(zmmA, gdB);
- e->vpbroadcastw(zmmA, gzB);
- e->vpbroadcastw(zmmA, xmmB);
- e->vpbroadcastw(zmmA, anyptr_gpB);
- e->vpcmpb(kA, xmmB, xmmC, 0);
- e->vpcmpb(kA, xmmB, anyptr_gpC, 0);
- e->vpcmpb(kA, ymmB, ymmC, 0);
- e->vpcmpb(kA, ymmB, anyptr_gpC, 0);
- e->vpcmpb(kA, zmmB, zmmC, 0);
- e->vpcmpb(kA, zmmB, anyptr_gpC, 0);
- e->vpcmpd(kA, xmmB, xmmC, 0);
- e->vpcmpd(kA, xmmB, anyptr_gpC, 0);
- e->vpcmpd(kA, ymmB, ymmC, 0);
- e->vpcmpd(kA, ymmB, anyptr_gpC, 0);
- e->vpcmpd(kA, zmmB, zmmC, 0);
- e->vpcmpd(kA, zmmB, anyptr_gpC, 0);
- e->vpcmpeqb(kA, xmmB, xmmC);
- e->vpcmpeqb(kA, xmmB, anyptr_gpC);
- e->vpcmpeqb(kA, ymmB, ymmC);
- e->vpcmpeqb(kA, ymmB, anyptr_gpC);
- e->vpcmpeqb(kA, zmmB, zmmC);
- e->vpcmpeqb(kA, zmmB, anyptr_gpC);
- e->vpcmpeqd(kA, xmmB, xmmC);
- e->vpcmpeqd(kA, xmmB, anyptr_gpC);
- e->vpcmpeqd(kA, ymmB, ymmC);
- e->vpcmpeqd(kA, ymmB, anyptr_gpC);
- e->vpcmpeqd(kA, zmmB, zmmC);
- e->vpcmpeqd(kA, zmmB, anyptr_gpC);
- e->vpcmpeqq(kA, xmmB, xmmC);
- e->vpcmpeqq(kA, xmmB, anyptr_gpC);
- e->vpcmpeqq(kA, ymmB, ymmC);
- e->vpcmpeqq(kA, ymmB, anyptr_gpC);
- e->vpcmpeqq(kA, zmmB, zmmC);
- e->vpcmpeqq(kA, zmmB, anyptr_gpC);
- e->vpcmpeqw(kA, xmmB, xmmC);
- e->vpcmpeqw(kA, xmmB, anyptr_gpC);
- e->vpcmpeqw(kA, ymmB, ymmC);
- e->vpcmpeqw(kA, ymmB, anyptr_gpC);
- e->vpcmpeqw(kA, zmmB, zmmC);
- e->vpcmpeqw(kA, zmmB, anyptr_gpC);
- e->vpcmpgtb(kA, xmmB, xmmC);
- e->vpcmpgtb(kA, xmmB, anyptr_gpC);
- e->vpcmpgtb(kA, ymmB, ymmC);
- e->vpcmpgtb(kA, ymmB, anyptr_gpC);
- e->vpcmpgtb(kA, zmmB, zmmC);
- e->vpcmpgtb(kA, zmmB, anyptr_gpC);
- e->vpcmpgtd(kA, xmmB, xmmC);
- e->vpcmpgtd(kA, xmmB, anyptr_gpC);
- e->vpcmpgtd(kA, ymmB, ymmC);
- e->vpcmpgtd(kA, ymmB, anyptr_gpC);
- e->vpcmpgtd(kA, zmmB, zmmC);
- e->vpcmpgtd(kA, zmmB, anyptr_gpC);
- e->vpcmpgtq(kA, xmmB, xmmC);
- e->vpcmpgtq(kA, xmmB, anyptr_gpC);
- e->vpcmpgtq(kA, ymmB, ymmC);
- e->vpcmpgtq(kA, ymmB, anyptr_gpC);
- e->vpcmpgtq(kA, zmmB, zmmC);
- e->vpcmpgtq(kA, zmmB, anyptr_gpC);
- e->vpcmpgtw(kA, xmmB, xmmC);
- e->vpcmpgtw(kA, xmmB, anyptr_gpC);
- e->vpcmpgtw(kA, ymmB, ymmC);
- e->vpcmpgtw(kA, ymmB, anyptr_gpC);
- e->vpcmpgtw(kA, zmmB, zmmC);
- e->vpcmpgtw(kA, zmmB, anyptr_gpC);
- e->vpcmpq(kA, xmmB, xmmC, 0);
- e->vpcmpq(kA, xmmB, anyptr_gpC, 0);
- e->vpcmpq(kA, ymmB, ymmC, 0);
- e->vpcmpq(kA, ymmB, anyptr_gpC, 0);
- e->vpcmpq(kA, zmmB, zmmC, 0);
- e->vpcmpq(kA, zmmB, anyptr_gpC, 0);
- e->vpcmpub(kA, xmmB, xmmC, 0);
- e->vpcmpub(kA, xmmB, anyptr_gpC, 0);
- e->vpcmpub(kA, ymmB, ymmC, 0);
- e->vpcmpub(kA, ymmB, anyptr_gpC, 0);
- e->vpcmpub(kA, zmmB, zmmC, 0);
- e->vpcmpub(kA, zmmB, anyptr_gpC, 0);
- e->vpcmpud(kA, xmmB, xmmC, 0);
- e->vpcmpud(kA, xmmB, anyptr_gpC, 0);
- e->vpcmpud(kA, ymmB, ymmC, 0);
- e->vpcmpud(kA, ymmB, anyptr_gpC, 0);
- e->vpcmpud(kA, zmmB, zmmC, 0);
- e->vpcmpud(kA, zmmB, anyptr_gpC, 0);
- e->vpcmpuq(kA, xmmB, xmmC, 0);
- e->vpcmpuq(kA, xmmB, anyptr_gpC, 0);
- e->vpcmpuq(kA, ymmB, ymmC, 0);
- e->vpcmpuq(kA, ymmB, anyptr_gpC, 0);
- e->vpcmpuq(kA, zmmB, zmmC, 0);
- e->vpcmpuq(kA, zmmB, anyptr_gpC, 0);
- e->vpcmpuw(kA, xmmB, xmmC, 0);
- e->vpcmpuw(kA, xmmB, anyptr_gpC, 0);
- e->vpcmpuw(kA, ymmB, ymmC, 0);
- e->vpcmpuw(kA, ymmB, anyptr_gpC, 0);
- e->vpcmpuw(kA, zmmB, zmmC, 0);
- e->vpcmpuw(kA, zmmB, anyptr_gpC, 0);
- e->vpcmpw(kA, xmmB, xmmC, 0);
- e->vpcmpw(kA, xmmB, anyptr_gpC, 0);
- e->vpcmpw(kA, ymmB, ymmC, 0);
- e->vpcmpw(kA, ymmB, anyptr_gpC, 0);
- e->vpcmpw(kA, zmmB, zmmC, 0);
- e->vpcmpw(kA, zmmB, anyptr_gpC, 0);
- e->vpcompressd(xmmA, xmmB);
- e->vpcompressd(anyptr_gpA, xmmB);
- e->vpcompressd(ymmA, ymmB);
- e->vpcompressd(anyptr_gpA, ymmB);
- e->vpcompressd(zmmA, zmmB);
- e->vpcompressd(anyptr_gpA, zmmB);
- e->vpcompressq(xmmA, xmmB);
- e->vpcompressq(anyptr_gpA, xmmB);
- e->vpcompressq(ymmA, ymmB);
- e->vpcompressq(anyptr_gpA, ymmB);
- e->vpcompressq(zmmA, zmmB);
- e->vpcompressq(anyptr_gpA, zmmB);
- e->vpconflictd(xmmA, xmmB);
- e->vpconflictd(xmmA, anyptr_gpB);
- e->vpconflictd(ymmA, ymmB);
- e->vpconflictd(ymmA, anyptr_gpB);
- e->vpconflictd(zmmA, zmmB);
- e->vpconflictd(zmmA, anyptr_gpB);
- e->vpconflictq(xmmA, xmmB);
- e->vpconflictq(xmmA, anyptr_gpB);
- e->vpconflictq(ymmA, ymmB);
- e->vpconflictq(ymmA, anyptr_gpB);
- e->vpconflictq(zmmA, zmmB);
- e->vpconflictq(zmmA, anyptr_gpB);
- e->vpermb(xmmA, xmmB, xmmC);
- e->vpermb(xmmA, xmmB, anyptr_gpC);
- e->vpermb(ymmA, ymmB, ymmC);
- e->vpermb(ymmA, ymmB, anyptr_gpC);
- e->vpermb(zmmA, zmmB, zmmC);
- e->vpermb(zmmA, zmmB, anyptr_gpC);
- e->vpermd(ymmA, ymmB, ymmC);
- e->vpermd(ymmA, ymmB, anyptr_gpC);
- e->vpermd(zmmA, zmmB, zmmC);
- e->vpermd(zmmA, zmmB, anyptr_gpC);
- e->vpermi2b(xmmA, xmmB, xmmC);
- e->vpermi2b(xmmA, xmmB, anyptr_gpC);
- e->vpermi2b(ymmA, ymmB, ymmC);
- e->vpermi2b(ymmA, ymmB, anyptr_gpC);
- e->vpermi2b(zmmA, zmmB, zmmC);
- e->vpermi2b(zmmA, zmmB, anyptr_gpC);
- e->vpermi2d(xmmA, xmmB, xmmC);
- e->vpermi2d(xmmA, xmmB, anyptr_gpC);
- e->vpermi2d(ymmA, ymmB, ymmC);
- e->vpermi2d(ymmA, ymmB, anyptr_gpC);
- e->vpermi2d(zmmA, zmmB, zmmC);
- e->vpermi2d(zmmA, zmmB, anyptr_gpC);
- e->vpermi2pd(xmmA, xmmB, xmmC);
- e->vpermi2pd(xmmA, xmmB, anyptr_gpC);
- e->vpermi2pd(ymmA, ymmB, ymmC);
- e->vpermi2pd(ymmA, ymmB, anyptr_gpC);
- e->vpermi2pd(zmmA, zmmB, zmmC);
- e->vpermi2pd(zmmA, zmmB, anyptr_gpC);
- e->vpermi2ps(xmmA, xmmB, xmmC);
- e->vpermi2ps(xmmA, xmmB, anyptr_gpC);
- e->vpermi2ps(ymmA, ymmB, ymmC);
- e->vpermi2ps(ymmA, ymmB, anyptr_gpC);
- e->vpermi2ps(zmmA, zmmB, zmmC);
- e->vpermi2ps(zmmA, zmmB, anyptr_gpC);
- e->vpermi2q(xmmA, xmmB, xmmC);
- e->vpermi2q(xmmA, xmmB, anyptr_gpC);
- e->vpermi2q(ymmA, ymmB, ymmC);
- e->vpermi2q(ymmA, ymmB, anyptr_gpC);
- e->vpermi2q(zmmA, zmmB, zmmC);
- e->vpermi2q(zmmA, zmmB, anyptr_gpC);
- e->vpermi2w(xmmA, xmmB, xmmC);
- e->vpermi2w(xmmA, xmmB, anyptr_gpC);
- e->vpermi2w(ymmA, ymmB, ymmC);
- e->vpermi2w(ymmA, ymmB, anyptr_gpC);
- e->vpermi2w(zmmA, zmmB, zmmC);
- e->vpermi2w(zmmA, zmmB, anyptr_gpC);
- e->vpermilpd(xmmA, xmmB, xmmC);
- e->vpermilpd(xmmA, xmmB, anyptr_gpC);
- e->vpermilpd(ymmA, ymmB, ymmC);
- e->vpermilpd(ymmA, ymmB, anyptr_gpC);
- e->vpermilpd(zmmA, zmmB, zmmC);
- e->vpermilpd(zmmA, zmmB, anyptr_gpC);
- e->vpermilpd(xmmA, xmmB, 0);
- e->vpermilpd(xmmA, anyptr_gpB, 0);
- e->vpermilpd(ymmA, ymmB, 0);
- e->vpermilpd(ymmA, anyptr_gpB, 0);
- e->vpermilpd(zmmA, zmmB, 0);
- e->vpermilpd(zmmA, anyptr_gpB, 0);
- e->vpermilps(xmmA, xmmB, xmmC);
- e->vpermilps(xmmA, xmmB, anyptr_gpC);
- e->vpermilps(ymmA, ymmB, ymmC);
- e->vpermilps(ymmA, ymmB, anyptr_gpC);
- e->vpermilps(zmmA, zmmB, zmmC);
- e->vpermilps(zmmA, zmmB, anyptr_gpC);
- e->vpermilps(xmmA, xmmB, 0);
- e->vpermilps(xmmA, anyptr_gpB, 0);
- e->vpermilps(ymmA, ymmB, 0);
- e->vpermilps(ymmA, anyptr_gpB, 0);
- e->vpermilps(zmmA, zmmB, 0);
- e->vpermilps(zmmA, anyptr_gpB, 0);
- e->vpermq(ymmA, ymmB, ymmC);
- e->vpermq(ymmA, ymmB, anyptr_gpC);
- e->vpermq(zmmA, zmmB, zmmC);
- e->vpermq(zmmA, zmmB, anyptr_gpC);
- e->vpermq(ymmA, ymmB, 0);
- e->vpermq(ymmA, anyptr_gpB, 0);
- e->vpermq(zmmA, zmmB, 0);
- e->vpermq(zmmA, anyptr_gpB, 0);
- e->vpermt2b(xmmA, xmmB, xmmC);
- e->vpermt2b(xmmA, xmmB, anyptr_gpC);
- e->vpermt2b(ymmA, ymmB, ymmC);
- e->vpermt2b(ymmA, ymmB, anyptr_gpC);
- e->vpermt2b(zmmA, zmmB, zmmC);
- e->vpermt2b(zmmA, zmmB, anyptr_gpC);
- e->vpermt2d(xmmA, xmmB, xmmC);
- e->vpermt2d(xmmA, xmmB, anyptr_gpC);
- e->vpermt2d(ymmA, ymmB, ymmC);
- e->vpermt2d(ymmA, ymmB, anyptr_gpC);
- e->vpermt2d(zmmA, zmmB, zmmC);
- e->vpermt2d(zmmA, zmmB, anyptr_gpC);
- e->vpermt2pd(xmmA, xmmB, xmmC);
- e->vpermt2pd(xmmA, xmmB, anyptr_gpC);
- e->vpermt2pd(ymmA, ymmB, ymmC);
- e->vpermt2pd(ymmA, ymmB, anyptr_gpC);
- e->vpermt2pd(zmmA, zmmB, zmmC);
- e->vpermt2pd(zmmA, zmmB, anyptr_gpC);
- e->vpermt2ps(xmmA, xmmB, xmmC);
- e->vpermt2ps(xmmA, xmmB, anyptr_gpC);
- e->vpermt2ps(ymmA, ymmB, ymmC);
- e->vpermt2ps(ymmA, ymmB, anyptr_gpC);
- e->vpermt2ps(zmmA, zmmB, zmmC);
- e->vpermt2ps(zmmA, zmmB, anyptr_gpC);
- e->vpermt2q(xmmA, xmmB, xmmC);
- e->vpermt2q(xmmA, xmmB, anyptr_gpC);
- e->vpermt2q(ymmA, ymmB, ymmC);
- e->vpermt2q(ymmA, ymmB, anyptr_gpC);
- e->vpermt2q(zmmA, zmmB, zmmC);
- e->vpermt2q(zmmA, zmmB, anyptr_gpC);
- e->vpermt2w(xmmA, xmmB, xmmC);
- e->vpermt2w(xmmA, xmmB, anyptr_gpC);
- e->vpermt2w(ymmA, ymmB, ymmC);
- e->vpermt2w(ymmA, ymmB, anyptr_gpC);
- e->vpermt2w(zmmA, zmmB, zmmC);
- e->vpermt2w(zmmA, zmmB, anyptr_gpC);
- e->vpermw(xmmA, xmmB, xmmC);
- e->vpermw(xmmA, xmmB, anyptr_gpC);
- e->vpermw(ymmA, ymmB, ymmC);
- e->vpermw(ymmA, ymmB, anyptr_gpC);
- e->vpermw(zmmA, zmmB, zmmC);
- e->vpermw(zmmA, zmmB, anyptr_gpC);
- e->vpexpandd(xmmA, xmmB);
- e->vpexpandd(xmmA, anyptr_gpB);
- e->vpexpandd(ymmA, ymmB);
- e->vpexpandd(ymmA, anyptr_gpB);
- e->vpexpandd(zmmA, zmmB);
- e->vpexpandd(zmmA, anyptr_gpB);
- e->vpexpandq(xmmA, xmmB);
- e->vpexpandq(xmmA, anyptr_gpB);
- e->vpexpandq(ymmA, ymmB);
- e->vpexpandq(ymmA, anyptr_gpB);
- e->vpexpandq(zmmA, zmmB);
- e->vpexpandq(zmmA, anyptr_gpB);
- e->vpextrb(gdA, xmmB, 0);
- e->vpextrb(anyptr_gpA, xmmB, 0);
- e->vpextrb(gzA, xmmB, 0);
- e->vpextrd(gdA, xmmB, 0);
- e->vpextrd(anyptr_gpA, xmmB, 0);
- if (isX64) e->vpextrd(gzA, xmmB, 0);
- if (isX64) e->vpextrq(gzA, xmmB, 0);
- e->vpextrq(anyptr_gpA, xmmB, 0);
- e->vpextrw(gdA, xmmB, 0);
- e->vpextrw(gzA, xmmB, 0);
- e->vpextrw(gdA, xmmB, 0);
- e->vpextrw(anyptr_gpA, xmmB, 0);
- e->vpextrw(gzA, xmmB, 0);
- e->vpgatherdd(xmmA, vx_ptr);
- e->vpgatherdd(ymmA, vy_ptr);
- e->vpgatherdd(zmmA, vz_ptr);
- e->vpgatherdq(xmmA, vx_ptr);
- e->vpgatherdq(ymmA, vy_ptr);
- e->vpgatherdq(zmmA, vz_ptr);
- e->vpgatherqd(xmmA, vx_ptr);
- e->vpgatherqd(ymmA, vy_ptr);
- e->vpgatherqd(zmmA, vz_ptr);
- e->vpgatherqq(xmmA, vx_ptr);
- e->vpgatherqq(ymmA, vy_ptr);
- e->vpgatherqq(zmmA, vz_ptr);
- e->vpinsrb(xmmA, xmmB, gdC, 0);
- e->vpinsrb(xmmA, xmmB, anyptr_gpC, 0);
- e->vpinsrb(xmmA, xmmB, gzC, 0);
- e->vpinsrd(xmmA, xmmB, gdC, 0);
- e->vpinsrd(xmmA, xmmB, anyptr_gpC, 0);
- e->vpinsrd(xmmA, xmmB, gzC, 0);
- if (isX64) e->vpinsrq(xmmA, xmmB, gzC, 0);
- e->vpinsrq(xmmA, xmmB, anyptr_gpC, 0);
- e->vpinsrw(xmmA, xmmB, gdC, 0);
- e->vpinsrw(xmmA, xmmB, anyptr_gpC, 0);
- e->vpinsrw(xmmA, xmmB, gzC, 0);
- e->vplzcntd(xmmA, xmmB);
- e->vplzcntd(xmmA, anyptr_gpB);
- e->vplzcntd(ymmA, ymmB);
- e->vplzcntd(ymmA, anyptr_gpB);
- e->vplzcntd(zmmA, zmmB);
- e->vplzcntd(zmmA, anyptr_gpB);
- e->vplzcntq(xmmA, xmmB);
- e->vplzcntq(xmmA, anyptr_gpB);
- e->vplzcntq(ymmA, ymmB);
- e->vplzcntq(ymmA, anyptr_gpB);
- e->vplzcntq(zmmA, zmmB);
- e->vplzcntq(zmmA, anyptr_gpB);
- e->vpmadd52huq(xmmA, xmmB, xmmC);
- e->vpmadd52huq(xmmA, xmmB, anyptr_gpC);
- e->vpmadd52huq(ymmA, ymmB, ymmC);
- e->vpmadd52huq(ymmA, ymmB, anyptr_gpC);
- e->vpmadd52huq(zmmA, zmmB, zmmC);
- e->vpmadd52huq(zmmA, zmmB, anyptr_gpC);
- e->vpmadd52luq(xmmA, xmmB, xmmC);
- e->vpmadd52luq(xmmA, xmmB, anyptr_gpC);
- e->vpmadd52luq(ymmA, ymmB, ymmC);
- e->vpmadd52luq(ymmA, ymmB, anyptr_gpC);
- e->vpmadd52luq(zmmA, zmmB, zmmC);
- e->vpmadd52luq(zmmA, zmmB, anyptr_gpC);
- e->vpmaddubsw(xmmA, xmmB, xmmC);
- e->vpmaddubsw(xmmA, xmmB, anyptr_gpC);
- e->vpmaddubsw(ymmA, ymmB, ymmC);
- e->vpmaddubsw(ymmA, ymmB, anyptr_gpC);
- e->vpmaddubsw(zmmA, zmmB, zmmC);
- e->vpmaddubsw(zmmA, zmmB, anyptr_gpC);
- e->vpmaddwd(xmmA, xmmB, xmmC);
- e->vpmaddwd(xmmA, xmmB, anyptr_gpC);
- e->vpmaddwd(ymmA, ymmB, ymmC);
- e->vpmaddwd(ymmA, ymmB, anyptr_gpC);
- e->vpmaddwd(zmmA, zmmB, zmmC);
- e->vpmaddwd(zmmA, zmmB, anyptr_gpC);
- e->vpmaxsb(xmmA, xmmB, xmmC);
- e->vpmaxsb(xmmA, xmmB, anyptr_gpC);
- e->vpmaxsb(ymmA, ymmB, ymmC);
- e->vpmaxsb(ymmA, ymmB, anyptr_gpC);
- e->vpmaxsb(zmmA, zmmB, zmmC);
- e->vpmaxsb(zmmA, zmmB, anyptr_gpC);
- e->vpmaxsd(xmmA, xmmB, xmmC);
- e->vpmaxsd(xmmA, xmmB, anyptr_gpC);
- e->vpmaxsd(ymmA, ymmB, ymmC);
- e->vpmaxsd(ymmA, ymmB, anyptr_gpC);
- e->vpmaxsd(zmmA, zmmB, zmmC);
- e->vpmaxsd(zmmA, zmmB, anyptr_gpC);
- e->vpmaxsq(xmmA, xmmB, xmmC);
- e->vpmaxsq(xmmA, xmmB, anyptr_gpC);
- e->vpmaxsq(ymmA, ymmB, ymmC);
- e->vpmaxsq(ymmA, ymmB, anyptr_gpC);
- e->vpmaxsq(zmmA, zmmB, zmmC);
- e->vpmaxsq(zmmA, zmmB, anyptr_gpC);
- e->vpmaxsw(xmmA, xmmB, xmmC);
- e->vpmaxsw(xmmA, xmmB, anyptr_gpC);
- e->vpmaxsw(ymmA, ymmB, ymmC);
- e->vpmaxsw(ymmA, ymmB, anyptr_gpC);
- e->vpmaxsw(zmmA, zmmB, zmmC);
- e->vpmaxsw(zmmA, zmmB, anyptr_gpC);
- e->vpmaxub(xmmA, xmmB, xmmC);
- e->vpmaxub(xmmA, xmmB, anyptr_gpC);
- e->vpmaxub(ymmA, ymmB, ymmC);
- e->vpmaxub(ymmA, ymmB, anyptr_gpC);
- e->vpmaxub(zmmA, zmmB, zmmC);
- e->vpmaxub(zmmA, zmmB, anyptr_gpC);
- e->vpmaxud(xmmA, xmmB, xmmC);
- e->vpmaxud(xmmA, xmmB, anyptr_gpC);
- e->vpmaxud(ymmA, ymmB, ymmC);
- e->vpmaxud(ymmA, ymmB, anyptr_gpC);
- e->vpmaxud(zmmA, zmmB, zmmC);
- e->vpmaxud(zmmA, zmmB, anyptr_gpC);
- e->vpmaxuq(xmmA, xmmB, xmmC);
- e->vpmaxuq(xmmA, xmmB, anyptr_gpC);
- e->vpmaxuq(ymmA, ymmB, ymmC);
- e->vpmaxuq(ymmA, ymmB, anyptr_gpC);
- e->vpmaxuq(zmmA, zmmB, zmmC);
- e->vpmaxuq(zmmA, zmmB, anyptr_gpC);
- e->vpmaxuw(xmmA, xmmB, xmmC);
- e->vpmaxuw(xmmA, xmmB, anyptr_gpC);
- e->vpmaxuw(ymmA, ymmB, ymmC);
- e->vpmaxuw(ymmA, ymmB, anyptr_gpC);
- e->vpmaxuw(zmmA, zmmB, zmmC);
- e->vpmaxuw(zmmA, zmmB, anyptr_gpC);
- e->vpminsb(xmmA, xmmB, xmmC);
- e->vpminsb(xmmA, xmmB, anyptr_gpC);
- e->vpminsb(ymmA, ymmB, ymmC);
- e->vpminsb(ymmA, ymmB, anyptr_gpC);
- e->vpminsb(zmmA, zmmB, zmmC);
- e->vpminsb(zmmA, zmmB, anyptr_gpC);
- e->vpminsd(xmmA, xmmB, xmmC);
- e->vpminsd(xmmA, xmmB, anyptr_gpC);
- e->vpminsd(ymmA, ymmB, ymmC);
- e->vpminsd(ymmA, ymmB, anyptr_gpC);
- e->vpminsd(zmmA, zmmB, zmmC);
- e->vpminsd(zmmA, zmmB, anyptr_gpC);
- e->vpminsq(xmmA, xmmB, xmmC);
- e->vpminsq(xmmA, xmmB, anyptr_gpC);
- e->vpminsq(ymmA, ymmB, ymmC);
- e->vpminsq(ymmA, ymmB, anyptr_gpC);
- e->vpminsq(zmmA, zmmB, zmmC);
- e->vpminsq(zmmA, zmmB, anyptr_gpC);
- e->vpminsw(xmmA, xmmB, xmmC);
- e->vpminsw(xmmA, xmmB, anyptr_gpC);
- e->vpminsw(ymmA, ymmB, ymmC);
- e->vpminsw(ymmA, ymmB, anyptr_gpC);
- e->vpminsw(zmmA, zmmB, zmmC);
- e->vpminsw(zmmA, zmmB, anyptr_gpC);
- e->vpminub(xmmA, xmmB, xmmC);
- e->vpminub(xmmA, xmmB, anyptr_gpC);
- e->vpminub(ymmA, ymmB, ymmC);
- e->vpminub(ymmA, ymmB, anyptr_gpC);
- e->vpminub(zmmA, zmmB, zmmC);
- e->vpminub(zmmA, zmmB, anyptr_gpC);
- e->vpminud(xmmA, xmmB, xmmC);
- e->vpminud(xmmA, xmmB, anyptr_gpC);
- e->vpminud(ymmA, ymmB, ymmC);
- e->vpminud(ymmA, ymmB, anyptr_gpC);
- e->vpminud(zmmA, zmmB, zmmC);
- e->vpminud(zmmA, zmmB, anyptr_gpC);
- e->vpminuq(xmmA, xmmB, xmmC);
- e->vpminuq(xmmA, xmmB, anyptr_gpC);
- e->vpminuq(ymmA, ymmB, ymmC);
- e->vpminuq(ymmA, ymmB, anyptr_gpC);
- e->vpminuq(zmmA, zmmB, zmmC);
- e->vpminuq(zmmA, zmmB, anyptr_gpC);
- e->vpminuw(xmmA, xmmB, xmmC);
- e->vpminuw(xmmA, xmmB, anyptr_gpC);
- e->vpminuw(ymmA, ymmB, ymmC);
- e->vpminuw(ymmA, ymmB, anyptr_gpC);
- e->vpminuw(zmmA, zmmB, zmmC);
- e->vpminuw(zmmA, zmmB, anyptr_gpC);
- e->vpmovb2m(kA, xmmB);
- e->vpmovb2m(kA, ymmB);
- e->vpmovb2m(kA, zmmB);
- e->vpmovd2m(kA, xmmB);
- e->vpmovd2m(kA, ymmB);
- e->vpmovd2m(kA, zmmB);
- e->vpmovdb(xmmA, xmmB);
- e->vpmovdb(anyptr_gpA, xmmB);
- e->vpmovdb(xmmA, ymmB);
- e->vpmovdb(anyptr_gpA, ymmB);
- e->vpmovdb(xmmA, zmmB);
- e->vpmovdb(anyptr_gpA, zmmB);
- e->vpmovdw(xmmA, xmmB);
- e->vpmovdw(anyptr_gpA, xmmB);
- e->vpmovdw(xmmA, ymmB);
- e->vpmovdw(anyptr_gpA, ymmB);
- e->vpmovdw(ymmA, zmmB);
- e->vpmovdw(anyptr_gpA, zmmB);
- e->vpmovm2b(xmmA, kB);
- e->vpmovm2b(ymmA, kB);
- e->vpmovm2b(zmmA, kB);
- e->vpmovm2d(xmmA, kB);
- e->vpmovm2d(ymmA, kB);
- e->vpmovm2d(zmmA, kB);
- e->vpmovm2q(xmmA, kB);
- e->vpmovm2q(ymmA, kB);
- e->vpmovm2q(zmmA, kB);
- e->vpmovm2w(xmmA, kB);
- e->vpmovm2w(ymmA, kB);
- e->vpmovm2w(zmmA, kB);
- e->vpmovq2m(kA, xmmB);
- e->vpmovq2m(kA, ymmB);
- e->vpmovq2m(kA, zmmB);
- e->vpmovqb(xmmA, xmmB);
- e->vpmovqb(anyptr_gpA, xmmB);
- e->vpmovqb(xmmA, ymmB);
- e->vpmovqb(anyptr_gpA, ymmB);
- e->vpmovqb(xmmA, zmmB);
- e->vpmovqb(anyptr_gpA, zmmB);
- e->vpmovqd(xmmA, xmmB);
- e->vpmovqd(anyptr_gpA, xmmB);
- e->vpmovqd(xmmA, ymmB);
- e->vpmovqd(anyptr_gpA, ymmB);
- e->vpmovqd(ymmA, zmmB);
- e->vpmovqd(anyptr_gpA, zmmB);
- e->vpmovqw(xmmA, xmmB);
- e->vpmovqw(anyptr_gpA, xmmB);
- e->vpmovqw(xmmA, ymmB);
- e->vpmovqw(anyptr_gpA, ymmB);
- e->vpmovqw(xmmA, zmmB);
- e->vpmovqw(anyptr_gpA, zmmB);
- e->vpmovsdb(xmmA, xmmB);
- e->vpmovsdb(anyptr_gpA, xmmB);
- e->vpmovsdb(xmmA, ymmB);
- e->vpmovsdb(anyptr_gpA, ymmB);
- e->vpmovsdb(xmmA, zmmB);
- e->vpmovsdb(anyptr_gpA, zmmB);
- e->vpmovsdw(xmmA, xmmB);
- e->vpmovsdw(anyptr_gpA, xmmB);
- e->vpmovsdw(xmmA, ymmB);
- e->vpmovsdw(anyptr_gpA, ymmB);
- e->vpmovsdw(ymmA, zmmB);
- e->vpmovsdw(anyptr_gpA, zmmB);
- e->vpmovsqb(xmmA, xmmB);
- e->vpmovsqb(anyptr_gpA, xmmB);
- e->vpmovsqb(xmmA, ymmB);
- e->vpmovsqb(anyptr_gpA, ymmB);
- e->vpmovsqb(xmmA, zmmB);
- e->vpmovsqb(anyptr_gpA, zmmB);
- e->vpmovsqd(xmmA, xmmB);
- e->vpmovsqd(anyptr_gpA, xmmB);
- e->vpmovsqd(xmmA, ymmB);
- e->vpmovsqd(anyptr_gpA, ymmB);
- e->vpmovsqd(ymmA, zmmB);
- e->vpmovsqd(anyptr_gpA, zmmB);
- e->vpmovsqw(xmmA, xmmB);
- e->vpmovsqw(anyptr_gpA, xmmB);
- e->vpmovsqw(xmmA, ymmB);
- e->vpmovsqw(anyptr_gpA, ymmB);
- e->vpmovsqw(xmmA, zmmB);
- e->vpmovsqw(anyptr_gpA, zmmB);
- e->vpmovswb(xmmA, xmmB);
- e->vpmovswb(anyptr_gpA, xmmB);
- e->vpmovswb(xmmA, ymmB);
- e->vpmovswb(anyptr_gpA, ymmB);
- e->vpmovswb(ymmA, zmmB);
- e->vpmovswb(anyptr_gpA, zmmB);
- e->vpmovsxbd(xmmA, xmmB);
- e->vpmovsxbd(xmmA, anyptr_gpB);
- e->vpmovsxbd(ymmA, xmmB);
- e->vpmovsxbd(ymmA, anyptr_gpB);
- e->vpmovsxbd(zmmA, xmmB);
- e->vpmovsxbd(zmmA, anyptr_gpB);
- e->vpmovsxbq(xmmA, xmmB);
- e->vpmovsxbq(xmmA, anyptr_gpB);
- e->vpmovsxbq(ymmA, xmmB);
- e->vpmovsxbq(ymmA, anyptr_gpB);
- e->vpmovsxbq(zmmA, xmmB);
- e->vpmovsxbq(zmmA, anyptr_gpB);
- e->vpmovsxbw(xmmA, xmmB);
- e->vpmovsxbw(xmmA, anyptr_gpB);
- e->vpmovsxbw(ymmA, xmmB);
- e->vpmovsxbw(ymmA, anyptr_gpB);
- e->vpmovsxbw(zmmA, ymmB);
- e->vpmovsxbw(zmmA, anyptr_gpB);
- e->vpmovsxdq(xmmA, xmmB);
- e->vpmovsxdq(xmmA, anyptr_gpB);
- e->vpmovsxdq(ymmA, xmmB);
- e->vpmovsxdq(ymmA, anyptr_gpB);
- e->vpmovsxdq(zmmA, ymmB);
- e->vpmovsxdq(zmmA, anyptr_gpB);
- e->vpmovsxwd(xmmA, xmmB);
- e->vpmovsxwd(xmmA, anyptr_gpB);
- e->vpmovsxwd(ymmA, xmmB);
- e->vpmovsxwd(ymmA, anyptr_gpB);
- e->vpmovsxwd(zmmA, ymmB);
- e->vpmovsxwd(zmmA, anyptr_gpB);
- e->vpmovsxwq(xmmA, xmmB);
- e->vpmovsxwq(xmmA, anyptr_gpB);
- e->vpmovsxwq(ymmA, xmmB);
- e->vpmovsxwq(ymmA, anyptr_gpB);
- e->vpmovsxwq(zmmA, xmmB);
- e->vpmovsxwq(zmmA, anyptr_gpB);
- e->vpmovusdb(xmmA, xmmB);
- e->vpmovusdb(anyptr_gpA, xmmB);
- e->vpmovusdb(xmmA, ymmB);
- e->vpmovusdb(anyptr_gpA, ymmB);
- e->vpmovusdb(xmmA, zmmB);
- e->vpmovusdb(anyptr_gpA, zmmB);
- e->vpmovusdw(xmmA, xmmB);
- e->vpmovusdw(anyptr_gpA, xmmB);
- e->vpmovusdw(xmmA, ymmB);
- e->vpmovusdw(anyptr_gpA, ymmB);
- e->vpmovusdw(ymmA, zmmB);
- e->vpmovusdw(anyptr_gpA, zmmB);
- e->vpmovusqb(xmmA, xmmB);
- e->vpmovusqb(anyptr_gpA, xmmB);
- e->vpmovusqb(xmmA, ymmB);
- e->vpmovusqb(anyptr_gpA, ymmB);
- e->vpmovusqb(xmmA, zmmB);
- e->vpmovusqb(anyptr_gpA, zmmB);
- e->vpmovusqd(xmmA, xmmB);
- e->vpmovusqd(anyptr_gpA, xmmB);
- e->vpmovusqd(xmmA, ymmB);
- e->vpmovusqd(anyptr_gpA, ymmB);
- e->vpmovusqd(ymmA, zmmB);
- e->vpmovusqd(anyptr_gpA, zmmB);
- e->vpmovusqw(xmmA, xmmB);
- e->vpmovusqw(anyptr_gpA, xmmB);
- e->vpmovusqw(xmmA, ymmB);
- e->vpmovusqw(anyptr_gpA, ymmB);
- e->vpmovusqw(xmmA, zmmB);
- e->vpmovusqw(anyptr_gpA, zmmB);
- e->vpmovuswb(xmmA, xmmB);
- e->vpmovuswb(anyptr_gpA, xmmB);
- e->vpmovuswb(xmmA, ymmB);
- e->vpmovuswb(anyptr_gpA, ymmB);
- e->vpmovuswb(ymmA, zmmB);
- e->vpmovuswb(anyptr_gpA, zmmB);
- e->vpmovw2m(kA, xmmB);
- e->vpmovw2m(kA, ymmB);
- e->vpmovw2m(kA, zmmB);
- e->vpmovwb(xmmA, xmmB);
- e->vpmovwb(anyptr_gpA, xmmB);
- e->vpmovwb(xmmA, ymmB);
- e->vpmovwb(anyptr_gpA, ymmB);
- e->vpmovwb(ymmA, zmmB);
- e->vpmovwb(anyptr_gpA, zmmB);
- e->vpmovzxbd(xmmA, xmmB);
- e->vpmovzxbd(xmmA, anyptr_gpB);
- e->vpmovzxbd(ymmA, xmmB);
- e->vpmovzxbd(ymmA, anyptr_gpB);
- e->vpmovzxbd(zmmA, xmmB);
- e->vpmovzxbd(zmmA, anyptr_gpB);
- e->vpmovzxbq(xmmA, xmmB);
- e->vpmovzxbq(xmmA, anyptr_gpB);
- e->vpmovzxbq(ymmA, xmmB);
- e->vpmovzxbq(ymmA, anyptr_gpB);
- e->vpmovzxbq(zmmA, xmmB);
- e->vpmovzxbq(zmmA, anyptr_gpB);
- e->vpmovzxbw(xmmA, xmmB);
- e->vpmovzxbw(xmmA, anyptr_gpB);
- e->vpmovzxbw(ymmA, xmmB);
- e->vpmovzxbw(ymmA, anyptr_gpB);
- e->vpmovzxbw(zmmA, ymmB);
- e->vpmovzxbw(zmmA, anyptr_gpB);
- e->vpmovzxdq(xmmA, xmmB);
- e->vpmovzxdq(xmmA, anyptr_gpB);
- e->vpmovzxdq(ymmA, xmmB);
- e->vpmovzxdq(ymmA, anyptr_gpB);
- e->vpmovzxdq(zmmA, ymmB);
- e->vpmovzxdq(zmmA, anyptr_gpB);
- e->vpmovzxwd(xmmA, xmmB);
- e->vpmovzxwd(xmmA, anyptr_gpB);
- e->vpmovzxwd(ymmA, xmmB);
- e->vpmovzxwd(ymmA, anyptr_gpB);
- e->vpmovzxwd(zmmA, ymmB);
- e->vpmovzxwd(zmmA, anyptr_gpB);
- e->vpmovzxwq(xmmA, xmmB);
- e->vpmovzxwq(xmmA, anyptr_gpB);
- e->vpmovzxwq(ymmA, xmmB);
- e->vpmovzxwq(ymmA, anyptr_gpB);
- e->vpmovzxwq(zmmA, xmmB);
- e->vpmovzxwq(zmmA, anyptr_gpB);
- e->vpmuldq(xmmA, xmmB, xmmC);
- e->vpmuldq(xmmA, xmmB, anyptr_gpC);
- e->vpmuldq(ymmA, ymmB, ymmC);
- e->vpmuldq(ymmA, ymmB, anyptr_gpC);
- e->vpmuldq(zmmA, zmmB, zmmC);
- e->vpmuldq(zmmA, zmmB, anyptr_gpC);
- e->vpmulhrsw(xmmA, xmmB, xmmC);
- e->vpmulhrsw(xmmA, xmmB, anyptr_gpC);
- e->vpmulhrsw(ymmA, ymmB, ymmC);
- e->vpmulhrsw(ymmA, ymmB, anyptr_gpC);
- e->vpmulhrsw(zmmA, zmmB, zmmC);
- e->vpmulhrsw(zmmA, zmmB, anyptr_gpC);
- e->vpmulhuw(xmmA, xmmB, xmmC);
- e->vpmulhuw(xmmA, xmmB, anyptr_gpC);
- e->vpmulhuw(ymmA, ymmB, ymmC);
- e->vpmulhuw(ymmA, ymmB, anyptr_gpC);
- e->vpmulhuw(zmmA, zmmB, zmmC);
- e->vpmulhuw(zmmA, zmmB, anyptr_gpC);
- e->vpmulhw(xmmA, xmmB, xmmC);
- e->vpmulhw(xmmA, xmmB, anyptr_gpC);
- e->vpmulhw(ymmA, ymmB, ymmC);
- e->vpmulhw(ymmA, ymmB, anyptr_gpC);
- e->vpmulhw(zmmA, zmmB, zmmC);
- e->vpmulhw(zmmA, zmmB, anyptr_gpC);
- e->vpmulld(xmmA, xmmB, xmmC);
- e->vpmulld(xmmA, xmmB, anyptr_gpC);
- e->vpmulld(ymmA, ymmB, ymmC);
- e->vpmulld(ymmA, ymmB, anyptr_gpC);
- e->vpmulld(zmmA, zmmB, zmmC);
- e->vpmulld(zmmA, zmmB, anyptr_gpC);
- e->vpmullq(xmmA, xmmB, xmmC);
- e->vpmullq(xmmA, xmmB, anyptr_gpC);
- e->vpmullq(ymmA, ymmB, ymmC);
- e->vpmullq(ymmA, ymmB, anyptr_gpC);
- e->vpmullq(zmmA, zmmB, zmmC);
- e->vpmullq(zmmA, zmmB, anyptr_gpC);
- e->vpmullw(xmmA, xmmB, xmmC);
- e->vpmullw(xmmA, xmmB, anyptr_gpC);
- e->vpmullw(ymmA, ymmB, ymmC);
- e->vpmullw(ymmA, ymmB, anyptr_gpC);
- e->vpmullw(zmmA, zmmB, zmmC);
- e->vpmullw(zmmA, zmmB, anyptr_gpC);
- e->vpmultishiftqb(xmmA, xmmB, xmmC);
- e->vpmultishiftqb(xmmA, xmmB, anyptr_gpC);
- e->vpmultishiftqb(ymmA, ymmB, ymmC);
- e->vpmultishiftqb(ymmA, ymmB, anyptr_gpC);
- e->vpmultishiftqb(zmmA, zmmB, zmmC);
- e->vpmultishiftqb(zmmA, zmmB, anyptr_gpC);
- e->vpmuludq(xmmA, xmmB, xmmC);
- e->vpmuludq(xmmA, xmmB, anyptr_gpC);
- e->vpmuludq(ymmA, ymmB, ymmC);
- e->vpmuludq(ymmA, ymmB, anyptr_gpC);
- e->vpmuludq(zmmA, zmmB, zmmC);
- e->vpmuludq(zmmA, zmmB, anyptr_gpC);
- e->vpopcntd(zmmA, zmmB);
- e->vpopcntd(zmmA, anyptr_gpB);
- e->vpopcntq(zmmA, zmmB);
- e->vpopcntq(zmmA, anyptr_gpB);
- e->vpord(xmmA, xmmB, xmmC);
- e->vpord(xmmA, xmmB, anyptr_gpC);
- e->vpord(ymmA, ymmB, ymmC);
- e->vpord(ymmA, ymmB, anyptr_gpC);
- e->vpord(zmmA, zmmB, zmmC);
- e->vpord(zmmA, zmmB, anyptr_gpC);
- e->vporq(xmmA, xmmB, xmmC);
- e->vporq(xmmA, xmmB, anyptr_gpC);
- e->vporq(ymmA, ymmB, ymmC);
- e->vporq(ymmA, ymmB, anyptr_gpC);
- e->vporq(zmmA, zmmB, zmmC);
- e->vporq(zmmA, zmmB, anyptr_gpC);
- e->vprold(xmmA, xmmB, 0);
- e->vprold(xmmA, anyptr_gpB, 0);
- e->vprold(ymmA, ymmB, 0);
- e->vprold(ymmA, anyptr_gpB, 0);
- e->vprold(zmmA, zmmB, 0);
- e->vprold(zmmA, anyptr_gpB, 0);
- e->vprolq(xmmA, xmmB, 0);
- e->vprolq(xmmA, anyptr_gpB, 0);
- e->vprolq(ymmA, ymmB, 0);
- e->vprolq(ymmA, anyptr_gpB, 0);
- e->vprolq(zmmA, zmmB, 0);
- e->vprolq(zmmA, anyptr_gpB, 0);
- e->vprolvd(xmmA, xmmB, xmmC);
- e->vprolvd(xmmA, xmmB, anyptr_gpC);
- e->vprolvd(ymmA, ymmB, ymmC);
- e->vprolvd(ymmA, ymmB, anyptr_gpC);
- e->vprolvd(zmmA, zmmB, zmmC);
- e->vprolvd(zmmA, zmmB, anyptr_gpC);
- e->vprolvq(xmmA, xmmB, xmmC);
- e->vprolvq(xmmA, xmmB, anyptr_gpC);
- e->vprolvq(ymmA, ymmB, ymmC);
- e->vprolvq(ymmA, ymmB, anyptr_gpC);
- e->vprolvq(zmmA, zmmB, zmmC);
- e->vprolvq(zmmA, zmmB, anyptr_gpC);
- e->vprord(xmmA, xmmB, 0);
- e->vprord(xmmA, anyptr_gpB, 0);
- e->vprord(ymmA, ymmB, 0);
- e->vprord(ymmA, anyptr_gpB, 0);
- e->vprord(zmmA, zmmB, 0);
- e->vprord(zmmA, anyptr_gpB, 0);
- e->vprorq(xmmA, xmmB, 0);
- e->vprorq(xmmA, anyptr_gpB, 0);
- e->vprorq(ymmA, ymmB, 0);
- e->vprorq(ymmA, anyptr_gpB, 0);
- e->vprorq(zmmA, zmmB, 0);
- e->vprorq(zmmA, anyptr_gpB, 0);
- e->vprorvd(xmmA, xmmB, xmmC);
- e->vprorvd(xmmA, xmmB, anyptr_gpC);
- e->vprorvd(ymmA, ymmB, ymmC);
- e->vprorvd(ymmA, ymmB, anyptr_gpC);
- e->vprorvd(zmmA, zmmB, zmmC);
- e->vprorvd(zmmA, zmmB, anyptr_gpC);
- e->vprorvq(xmmA, xmmB, xmmC);
- e->vprorvq(xmmA, xmmB, anyptr_gpC);
- e->vprorvq(ymmA, ymmB, ymmC);
- e->vprorvq(ymmA, ymmB, anyptr_gpC);
- e->vprorvq(zmmA, zmmB, zmmC);
- e->vprorvq(zmmA, zmmB, anyptr_gpC);
- e->vpsadbw(xmmA, xmmB, xmmC);
- e->vpsadbw(xmmA, xmmB, anyptr_gpC);
- e->vpsadbw(ymmA, ymmB, ymmC);
- e->vpsadbw(ymmA, ymmB, anyptr_gpC);
- e->vpsadbw(zmmA, zmmB, zmmC);
- e->vpsadbw(zmmA, zmmB, anyptr_gpC);
- e->vpscatterdd(vx_ptr, xmmB);
- e->vpscatterdd(vy_ptr, ymmB);
- e->vpscatterdd(vz_ptr, zmmB);
- e->vpscatterdq(vx_ptr, xmmB);
- e->vpscatterdq(vy_ptr, ymmB);
- e->vpscatterdq(vz_ptr, zmmB);
- e->vpscatterqd(vx_ptr, xmmB);
- e->vpscatterqd(vy_ptr, xmmB);
- e->vpscatterqd(vz_ptr, ymmB);
- e->vpscatterqq(vx_ptr, xmmB);
- e->vpscatterqq(vy_ptr, ymmB);
- e->vpscatterqq(vz_ptr, zmmB);
- e->vpshufb(xmmA, xmmB, xmmC);
- e->vpshufb(xmmA, xmmB, anyptr_gpC);
- e->vpshufb(ymmA, ymmB, ymmC);
- e->vpshufb(ymmA, ymmB, anyptr_gpC);
- e->vpshufb(zmmA, zmmB, zmmC);
- e->vpshufb(zmmA, zmmB, anyptr_gpC);
- e->vpshufd(xmmA, xmmB, 0);
- e->vpshufd(xmmA, anyptr_gpB, 0);
- e->vpshufd(ymmA, ymmB, 0);
- e->vpshufd(ymmA, anyptr_gpB, 0);
- e->vpshufd(zmmA, zmmB, 0);
- e->vpshufd(zmmA, anyptr_gpB, 0);
- e->vpshufhw(xmmA, xmmB, 0);
- e->vpshufhw(xmmA, anyptr_gpB, 0);
- e->vpshufhw(ymmA, ymmB, 0);
- e->vpshufhw(ymmA, anyptr_gpB, 0);
- e->vpshufhw(zmmA, zmmB, 0);
- e->vpshufhw(zmmA, anyptr_gpB, 0);
- e->vpshuflw(xmmA, xmmB, 0);
- e->vpshuflw(xmmA, anyptr_gpB, 0);
- e->vpshuflw(ymmA, ymmB, 0);
- e->vpshuflw(ymmA, anyptr_gpB, 0);
- e->vpshuflw(zmmA, zmmB, 0);
- e->vpshuflw(zmmA, anyptr_gpB, 0);
- e->vpslld(xmmA, xmmB, xmmC);
- e->vpslld(xmmA, xmmB, anyptr_gpC);
- e->vpslld(xmmA, xmmB, 0);
- e->vpslld(xmmA, anyptr_gpB, 0);
- e->vpslld(ymmA, ymmB, xmmC);
- e->vpslld(ymmA, ymmB, anyptr_gpC);
- e->vpslld(ymmA, ymmB, 0);
- e->vpslld(ymmA, anyptr_gpB, 0);
- e->vpslld(zmmA, zmmB, xmmC);
- e->vpslld(zmmA, zmmB, anyptr_gpC);
- e->vpslld(zmmA, zmmB, 0);
- e->vpslld(zmmA, anyptr_gpB, 0);
- e->vpslldq(xmmA, xmmB, 0);
- e->vpslldq(xmmA, anyptr_gpB, 0);
- e->vpslldq(ymmA, ymmB, 0);
- e->vpslldq(ymmA, anyptr_gpB, 0);
- e->vpslldq(zmmA, zmmB, 0);
- e->vpslldq(zmmA, anyptr_gpB, 0);
- e->vpsllq(xmmA, xmmB, xmmC);
- e->vpsllq(xmmA, xmmB, anyptr_gpC);
- e->vpsllq(xmmA, xmmB, 0);
- e->vpsllq(xmmA, anyptr_gpB, 0);
- e->vpsllq(ymmA, ymmB, xmmC);
- e->vpsllq(ymmA, ymmB, anyptr_gpC);
- e->vpsllq(ymmA, ymmB, 0);
- e->vpsllq(ymmA, anyptr_gpB, 0);
- e->vpsllq(zmmA, zmmB, xmmC);
- e->vpsllq(zmmA, zmmB, anyptr_gpC);
- e->vpsllq(zmmA, zmmB, 0);
- e->vpsllq(zmmA, anyptr_gpB, 0);
- e->vpsllvd(xmmA, xmmB, xmmC);
- e->vpsllvd(xmmA, xmmB, anyptr_gpC);
- e->vpsllvd(ymmA, ymmB, ymmC);
- e->vpsllvd(ymmA, ymmB, anyptr_gpC);
- e->vpsllvd(zmmA, zmmB, zmmC);
- e->vpsllvd(zmmA, zmmB, anyptr_gpC);
- e->vpsllvq(xmmA, xmmB, xmmC);
- e->vpsllvq(xmmA, xmmB, anyptr_gpC);
- e->vpsllvq(ymmA, ymmB, ymmC);
- e->vpsllvq(ymmA, ymmB, anyptr_gpC);
- e->vpsllvq(zmmA, zmmB, zmmC);
- e->vpsllvq(zmmA, zmmB, anyptr_gpC);
- e->vpsllvw(xmmA, xmmB, xmmC);
- e->vpsllvw(xmmA, xmmB, anyptr_gpC);
- e->vpsllvw(ymmA, ymmB, ymmC);
- e->vpsllvw(ymmA, ymmB, anyptr_gpC);
- e->vpsllvw(zmmA, zmmB, zmmC);
- e->vpsllvw(zmmA, zmmB, anyptr_gpC);
- e->vpsllw(xmmA, xmmB, xmmC);
- e->vpsllw(xmmA, xmmB, anyptr_gpC);
- e->vpsllw(xmmA, xmmB, 0);
- e->vpsllw(xmmA, anyptr_gpB, 0);
- e->vpsllw(ymmA, ymmB, xmmC);
- e->vpsllw(ymmA, ymmB, anyptr_gpC);
- e->vpsllw(ymmA, ymmB, 0);
- e->vpsllw(ymmA, anyptr_gpB, 0);
- e->vpsllw(zmmA, zmmB, xmmC);
- e->vpsllw(zmmA, zmmB, anyptr_gpC);
- e->vpsllw(zmmA, zmmB, 0);
- e->vpsllw(zmmA, anyptr_gpB, 0);
- e->vpsrad(xmmA, xmmB, xmmC);
- e->vpsrad(xmmA, xmmB, anyptr_gpC);
- e->vpsrad(xmmA, xmmB, 0);
- e->vpsrad(xmmA, anyptr_gpB, 0);
- e->vpsrad(ymmA, ymmB, xmmC);
- e->vpsrad(ymmA, ymmB, anyptr_gpC);
- e->vpsrad(ymmA, ymmB, 0);
- e->vpsrad(ymmA, anyptr_gpB, 0);
- e->vpsrad(zmmA, zmmB, xmmC);
- e->vpsrad(zmmA, zmmB, anyptr_gpC);
- e->vpsrad(zmmA, zmmB, 0);
- e->vpsrad(zmmA, anyptr_gpB, 0);
- e->vpsraq(xmmA, xmmB, xmmC);
- e->vpsraq(xmmA, xmmB, anyptr_gpC);
- e->vpsraq(xmmA, xmmB, 0);
- e->vpsraq(xmmA, anyptr_gpB, 0);
- e->vpsraq(ymmA, ymmB, xmmC);
- e->vpsraq(ymmA, ymmB, anyptr_gpC);
- e->vpsraq(ymmA, ymmB, 0);
- e->vpsraq(ymmA, anyptr_gpB, 0);
- e->vpsraq(zmmA, zmmB, xmmC);
- e->vpsraq(zmmA, zmmB, anyptr_gpC);
- e->vpsraq(zmmA, zmmB, 0);
- e->vpsraq(zmmA, anyptr_gpB, 0);
- e->vpsravd(xmmA, xmmB, xmmC);
- e->vpsravd(xmmA, xmmB, anyptr_gpC);
- e->vpsravd(ymmA, ymmB, ymmC);
- e->vpsravd(ymmA, ymmB, anyptr_gpC);
- e->vpsravd(zmmA, zmmB, zmmC);
- e->vpsravd(zmmA, zmmB, anyptr_gpC);
- e->vpsravq(xmmA, xmmB, xmmC);
- e->vpsravq(xmmA, xmmB, anyptr_gpC);
- e->vpsravq(ymmA, ymmB, ymmC);
- e->vpsravq(ymmA, ymmB, anyptr_gpC);
- e->vpsravq(zmmA, zmmB, zmmC);
- e->vpsravq(zmmA, zmmB, anyptr_gpC);
- e->vpsravw(xmmA, xmmB, xmmC);
- e->vpsravw(xmmA, xmmB, anyptr_gpC);
- e->vpsravw(ymmA, ymmB, ymmC);
- e->vpsravw(ymmA, ymmB, anyptr_gpC);
- e->vpsravw(zmmA, zmmB, zmmC);
- e->vpsravw(zmmA, zmmB, anyptr_gpC);
- e->vpsraw(xmmA, xmmB, xmmC);
- e->vpsraw(xmmA, xmmB, anyptr_gpC);
- e->vpsraw(xmmA, xmmB, 0);
- e->vpsraw(xmmA, anyptr_gpB, 0);
- e->vpsraw(ymmA, ymmB, xmmC);
- e->vpsraw(ymmA, ymmB, anyptr_gpC);
- e->vpsraw(ymmA, ymmB, 0);
- e->vpsraw(ymmA, anyptr_gpB, 0);
- e->vpsraw(zmmA, zmmB, xmmC);
- e->vpsraw(zmmA, zmmB, anyptr_gpC);
- e->vpsraw(zmmA, zmmB, 0);
- e->vpsraw(zmmA, anyptr_gpB, 0);
- e->vpsrld(xmmA, xmmB, xmmC);
- e->vpsrld(xmmA, xmmB, anyptr_gpC);
- e->vpsrld(xmmA, xmmB, 0);
- e->vpsrld(xmmA, anyptr_gpB, 0);
- e->vpsrld(ymmA, ymmB, xmmC);
- e->vpsrld(ymmA, ymmB, anyptr_gpC);
- e->vpsrld(ymmA, ymmB, 0);
- e->vpsrld(ymmA, anyptr_gpB, 0);
- e->vpsrld(zmmA, zmmB, xmmC);
- e->vpsrld(zmmA, zmmB, anyptr_gpC);
- e->vpsrld(zmmA, zmmB, 0);
- e->vpsrld(zmmA, anyptr_gpB, 0);
- e->vpsrldq(xmmA, xmmB, 0);
- e->vpsrldq(xmmA, anyptr_gpB, 0);
- e->vpsrldq(ymmA, ymmB, 0);
- e->vpsrldq(ymmA, anyptr_gpB, 0);
- e->vpsrldq(zmmA, zmmB, 0);
- e->vpsrldq(zmmA, anyptr_gpB, 0);
- e->vpsrlq(xmmA, xmmB, xmmC);
- e->vpsrlq(xmmA, xmmB, anyptr_gpC);
- e->vpsrlq(xmmA, xmmB, 0);
- e->vpsrlq(xmmA, anyptr_gpB, 0);
- e->vpsrlq(ymmA, ymmB, xmmC);
- e->vpsrlq(ymmA, ymmB, anyptr_gpC);
- e->vpsrlq(ymmA, ymmB, 0);
- e->vpsrlq(ymmA, anyptr_gpB, 0);
- e->vpsrlq(zmmA, zmmB, xmmC);
- e->vpsrlq(zmmA, zmmB, anyptr_gpC);
- e->vpsrlq(zmmA, zmmB, 0);
- e->vpsrlq(zmmA, anyptr_gpB, 0);
- e->vpsrlvd(xmmA, xmmB, xmmC);
- e->vpsrlvd(xmmA, xmmB, anyptr_gpC);
- e->vpsrlvd(ymmA, ymmB, ymmC);
- e->vpsrlvd(ymmA, ymmB, anyptr_gpC);
- e->vpsrlvd(zmmA, zmmB, zmmC);
- e->vpsrlvd(zmmA, zmmB, anyptr_gpC);
- e->vpsrlvq(xmmA, xmmB, xmmC);
- e->vpsrlvq(xmmA, xmmB, anyptr_gpC);
- e->vpsrlvq(ymmA, ymmB, ymmC);
- e->vpsrlvq(ymmA, ymmB, anyptr_gpC);
- e->vpsrlvq(zmmA, zmmB, zmmC);
- e->vpsrlvq(zmmA, zmmB, anyptr_gpC);
- e->vpsrlvw(xmmA, xmmB, xmmC);
- e->vpsrlvw(xmmA, xmmB, anyptr_gpC);
- e->vpsrlvw(ymmA, ymmB, ymmC);
- e->vpsrlvw(ymmA, ymmB, anyptr_gpC);
- e->vpsrlvw(zmmA, zmmB, zmmC);
- e->vpsrlvw(zmmA, zmmB, anyptr_gpC);
- e->vpsrlw(xmmA, xmmB, xmmC);
- e->vpsrlw(xmmA, xmmB, anyptr_gpC);
- e->vpsrlw(xmmA, xmmB, 0);
- e->vpsrlw(xmmA, anyptr_gpB, 0);
- e->vpsrlw(ymmA, ymmB, xmmC);
- e->vpsrlw(ymmA, ymmB, anyptr_gpC);
- e->vpsrlw(ymmA, ymmB, 0);
- e->vpsrlw(ymmA, anyptr_gpB, 0);
- e->vpsrlw(zmmA, zmmB, xmmC);
- e->vpsrlw(zmmA, zmmB, anyptr_gpC);
- e->vpsrlw(zmmA, zmmB, 0);
- e->vpsrlw(zmmA, anyptr_gpB, 0);
- e->vpsubb(xmmA, xmmB, xmmC);
- e->vpsubb(xmmA, xmmB, anyptr_gpC);
- e->vpsubb(ymmA, ymmB, ymmC);
- e->vpsubb(ymmA, ymmB, anyptr_gpC);
- e->vpsubb(zmmA, zmmB, zmmC);
- e->vpsubb(zmmA, zmmB, anyptr_gpC);
- e->vpsubd(xmmA, xmmB, xmmC);
- e->vpsubd(xmmA, xmmB, anyptr_gpC);
- e->vpsubd(ymmA, ymmB, ymmC);
- e->vpsubd(ymmA, ymmB, anyptr_gpC);
- e->vpsubd(zmmA, zmmB, zmmC);
- e->vpsubd(zmmA, zmmB, anyptr_gpC);
- e->vpsubq(xmmA, xmmB, xmmC);
- e->vpsubq(xmmA, xmmB, anyptr_gpC);
- e->vpsubq(ymmA, ymmB, ymmC);
- e->vpsubq(ymmA, ymmB, anyptr_gpC);
- e->vpsubq(zmmA, zmmB, zmmC);
- e->vpsubq(zmmA, zmmB, anyptr_gpC);
- e->vpsubsb(xmmA, xmmB, xmmC);
- e->vpsubsb(xmmA, xmmB, anyptr_gpC);
- e->vpsubsb(ymmA, ymmB, ymmC);
- e->vpsubsb(ymmA, ymmB, anyptr_gpC);
- e->vpsubsb(zmmA, zmmB, zmmC);
- e->vpsubsb(zmmA, zmmB, anyptr_gpC);
- e->vpsubsw(xmmA, xmmB, xmmC);
- e->vpsubsw(xmmA, xmmB, anyptr_gpC);
- e->vpsubsw(ymmA, ymmB, ymmC);
- e->vpsubsw(ymmA, ymmB, anyptr_gpC);
- e->vpsubsw(zmmA, zmmB, zmmC);
- e->vpsubsw(zmmA, zmmB, anyptr_gpC);
- e->vpsubusb(xmmA, xmmB, xmmC);
- e->vpsubusb(xmmA, xmmB, anyptr_gpC);
- e->vpsubusb(ymmA, ymmB, ymmC);
- e->vpsubusb(ymmA, ymmB, anyptr_gpC);
- e->vpsubusb(zmmA, zmmB, zmmC);
- e->vpsubusb(zmmA, zmmB, anyptr_gpC);
- e->vpsubusw(xmmA, xmmB, xmmC);
- e->vpsubusw(xmmA, xmmB, anyptr_gpC);
- e->vpsubusw(ymmA, ymmB, ymmC);
- e->vpsubusw(ymmA, ymmB, anyptr_gpC);
- e->vpsubusw(zmmA, zmmB, zmmC);
- e->vpsubusw(zmmA, zmmB, anyptr_gpC);
- e->vpsubw(xmmA, xmmB, xmmC);
- e->vpsubw(xmmA, xmmB, anyptr_gpC);
- e->vpsubw(ymmA, ymmB, ymmC);
- e->vpsubw(ymmA, ymmB, anyptr_gpC);
- e->vpsubw(zmmA, zmmB, zmmC);
- e->vpsubw(zmmA, zmmB, anyptr_gpC);
- e->vpternlogd(xmmA, xmmB, xmmC, 0);
- e->vpternlogd(xmmA, xmmB, anyptr_gpC, 0);
- e->vpternlogd(ymmA, ymmB, ymmC, 0);
- e->vpternlogd(ymmA, ymmB, anyptr_gpC, 0);
- e->vpternlogd(zmmA, zmmB, zmmC, 0);
- e->vpternlogd(zmmA, zmmB, anyptr_gpC, 0);
- e->vpternlogq(xmmA, xmmB, xmmC, 0);
- e->vpternlogq(xmmA, xmmB, anyptr_gpC, 0);
- e->vpternlogq(ymmA, ymmB, ymmC, 0);
- e->vpternlogq(ymmA, ymmB, anyptr_gpC, 0);
- e->vpternlogq(zmmA, zmmB, zmmC, 0);
- e->vpternlogq(zmmA, zmmB, anyptr_gpC, 0);
- e->vptestmb(kA, xmmB, xmmC);
- e->vptestmb(kA, xmmB, anyptr_gpC);
- e->vptestmb(kA, ymmB, ymmC);
- e->vptestmb(kA, ymmB, anyptr_gpC);
- e->vptestmb(kA, zmmB, zmmC);
- e->vptestmb(kA, zmmB, anyptr_gpC);
- e->vptestmd(kA, xmmB, xmmC);
- e->vptestmd(kA, xmmB, anyptr_gpC);
- e->vptestmd(kA, ymmB, ymmC);
- e->vptestmd(kA, ymmB, anyptr_gpC);
- e->vptestmd(kA, zmmB, zmmC);
- e->vptestmd(kA, zmmB, anyptr_gpC);
- e->vptestmq(kA, xmmB, xmmC);
- e->vptestmq(kA, xmmB, anyptr_gpC);
- e->vptestmq(kA, ymmB, ymmC);
- e->vptestmq(kA, ymmB, anyptr_gpC);
- e->vptestmq(kA, zmmB, zmmC);
- e->vptestmq(kA, zmmB, anyptr_gpC);
- e->vptestmw(kA, xmmB, xmmC);
- e->vptestmw(kA, xmmB, anyptr_gpC);
- e->vptestmw(kA, ymmB, ymmC);
- e->vptestmw(kA, ymmB, anyptr_gpC);
- e->vptestmw(kA, zmmB, zmmC);
- e->vptestmw(kA, zmmB, anyptr_gpC);
- e->vptestnmb(kA, xmmB, xmmC);
- e->vptestnmb(kA, xmmB, anyptr_gpC);
- e->vptestnmb(kA, ymmB, ymmC);
- e->vptestnmb(kA, ymmB, anyptr_gpC);
- e->vptestnmb(kA, zmmB, zmmC);
- e->vptestnmb(kA, zmmB, anyptr_gpC);
- e->vptestnmd(kA, xmmB, xmmC);
- e->vptestnmd(kA, xmmB, anyptr_gpC);
- e->vptestnmd(kA, ymmB, ymmC);
- e->vptestnmd(kA, ymmB, anyptr_gpC);
- e->vptestnmd(kA, zmmB, zmmC);
- e->vptestnmd(kA, zmmB, anyptr_gpC);
- e->vptestnmq(kA, xmmB, xmmC);
- e->vptestnmq(kA, xmmB, anyptr_gpC);
- e->vptestnmq(kA, ymmB, ymmC);
- e->vptestnmq(kA, ymmB, anyptr_gpC);
- e->vptestnmq(kA, zmmB, zmmC);
- e->vptestnmq(kA, zmmB, anyptr_gpC);
- e->vptestnmw(kA, xmmB, xmmC);
- e->vptestnmw(kA, xmmB, anyptr_gpC);
- e->vptestnmw(kA, ymmB, ymmC);
- e->vptestnmw(kA, ymmB, anyptr_gpC);
- e->vptestnmw(kA, zmmB, zmmC);
- e->vptestnmw(kA, zmmB, anyptr_gpC);
- e->vpunpckhbw(xmmA, xmmB, xmmC);
- e->vpunpckhbw(xmmA, xmmB, anyptr_gpC);
- e->vpunpckhbw(ymmA, ymmB, ymmC);
- e->vpunpckhbw(ymmA, ymmB, anyptr_gpC);
- e->vpunpckhbw(zmmA, zmmB, zmmC);
- e->vpunpckhbw(zmmA, zmmB, anyptr_gpC);
- e->vpunpckhdq(xmmA, xmmB, xmmC);
- e->vpunpckhdq(xmmA, xmmB, anyptr_gpC);
- e->vpunpckhdq(ymmA, ymmB, ymmC);
- e->vpunpckhdq(ymmA, ymmB, anyptr_gpC);
- e->vpunpckhdq(zmmA, zmmB, zmmC);
- e->vpunpckhdq(zmmA, zmmB, anyptr_gpC);
- e->vpunpckhqdq(xmmA, xmmB, xmmC);
- e->vpunpckhqdq(xmmA, xmmB, anyptr_gpC);
- e->vpunpckhqdq(ymmA, ymmB, ymmC);
- e->vpunpckhqdq(ymmA, ymmB, anyptr_gpC);
- e->vpunpckhqdq(zmmA, zmmB, zmmC);
- e->vpunpckhqdq(zmmA, zmmB, anyptr_gpC);
- e->vpunpckhwd(xmmA, xmmB, xmmC);
- e->vpunpckhwd(xmmA, xmmB, anyptr_gpC);
- e->vpunpckhwd(ymmA, ymmB, ymmC);
- e->vpunpckhwd(ymmA, ymmB, anyptr_gpC);
- e->vpunpckhwd(zmmA, zmmB, zmmC);
- e->vpunpckhwd(zmmA, zmmB, anyptr_gpC);
- e->vpunpcklbw(xmmA, xmmB, xmmC);
- e->vpunpcklbw(xmmA, xmmB, anyptr_gpC);
- e->vpunpcklbw(ymmA, ymmB, ymmC);
- e->vpunpcklbw(ymmA, ymmB, anyptr_gpC);
- e->vpunpcklbw(zmmA, zmmB, zmmC);
- e->vpunpcklbw(zmmA, zmmB, anyptr_gpC);
- e->vpunpckldq(xmmA, xmmB, xmmC);
- e->vpunpckldq(xmmA, xmmB, anyptr_gpC);
- e->vpunpckldq(ymmA, ymmB, ymmC);
- e->vpunpckldq(ymmA, ymmB, anyptr_gpC);
- e->vpunpckldq(zmmA, zmmB, zmmC);
- e->vpunpckldq(zmmA, zmmB, anyptr_gpC);
- e->vpunpcklqdq(xmmA, xmmB, xmmC);
- e->vpunpcklqdq(xmmA, xmmB, anyptr_gpC);
- e->vpunpcklqdq(ymmA, ymmB, ymmC);
- e->vpunpcklqdq(ymmA, ymmB, anyptr_gpC);
- e->vpunpcklqdq(zmmA, zmmB, zmmC);
- e->vpunpcklqdq(zmmA, zmmB, anyptr_gpC);
- e->vpunpcklwd(xmmA, xmmB, xmmC);
- e->vpunpcklwd(xmmA, xmmB, anyptr_gpC);
- e->vpunpcklwd(ymmA, ymmB, ymmC);
- e->vpunpcklwd(ymmA, ymmB, anyptr_gpC);
- e->vpunpcklwd(zmmA, zmmB, zmmC);
- e->vpunpcklwd(zmmA, zmmB, anyptr_gpC);
- e->vpxord(xmmA, xmmB, xmmC);
- e->vpxord(xmmA, xmmB, anyptr_gpC);
- e->vpxord(ymmA, ymmB, ymmC);
- e->vpxord(ymmA, ymmB, anyptr_gpC);
- e->vpxord(zmmA, zmmB, zmmC);
- e->vpxord(zmmA, zmmB, anyptr_gpC);
- e->vpxorq(xmmA, xmmB, xmmC);
- e->vpxorq(xmmA, xmmB, anyptr_gpC);
- e->vpxorq(ymmA, ymmB, ymmC);
- e->vpxorq(ymmA, ymmB, anyptr_gpC);
- e->vpxorq(zmmA, zmmB, zmmC);
- e->vpxorq(zmmA, zmmB, anyptr_gpC);
- e->vrangepd(xmmA, xmmB, xmmC, 0);
- e->vrangepd(xmmA, xmmB, anyptr_gpC, 0);
- e->vrangepd(ymmA, ymmB, ymmC, 0);
- e->vrangepd(ymmA, ymmB, anyptr_gpC, 0);
- e->vrangepd(zmmA, zmmB, zmmC, 0);
- e->vrangepd(zmmA, zmmB, anyptr_gpC, 0);
- e->vrangeps(xmmA, xmmB, xmmC, 0);
- e->vrangeps(xmmA, xmmB, anyptr_gpC, 0);
- e->vrangeps(ymmA, ymmB, ymmC, 0);
- e->vrangeps(ymmA, ymmB, anyptr_gpC, 0);
- e->vrangeps(zmmA, zmmB, zmmC, 0);
- e->vrangeps(zmmA, zmmB, anyptr_gpC, 0);
- e->vrangesd(xmmA, xmmB, xmmC, 0);
- e->vrangesd(xmmA, xmmB, anyptr_gpC, 0);
- e->vrangess(xmmA, xmmB, xmmC, 0);
- e->vrangess(xmmA, xmmB, anyptr_gpC, 0);
- e->vrcp14pd(xmmA, xmmB);
- e->vrcp14pd(xmmA, anyptr_gpB);
- e->vrcp14pd(ymmA, ymmB);
- e->vrcp14pd(ymmA, anyptr_gpB);
- e->vrcp14pd(zmmA, zmmB);
- e->vrcp14pd(zmmA, anyptr_gpB);
- e->vrcp14ps(xmmA, xmmB);
- e->vrcp14ps(xmmA, anyptr_gpB);
- e->vrcp14ps(ymmA, ymmB);
- e->vrcp14ps(ymmA, anyptr_gpB);
- e->vrcp14ps(zmmA, zmmB);
- e->vrcp14ps(zmmA, anyptr_gpB);
- e->vrcp14sd(xmmA, xmmB, xmmC);
- e->vrcp14sd(xmmA, xmmB, anyptr_gpC);
- e->vrcp14ss(xmmA, xmmB, xmmC);
- e->vrcp14ss(xmmA, xmmB, anyptr_gpC);
- e->vrcp28pd(zmmA, zmmB);
- e->vrcp28pd(zmmA, anyptr_gpB);
- e->vrcp28ps(zmmA, zmmB);
- e->vrcp28ps(zmmA, anyptr_gpB);
- e->vrcp28sd(xmmA, xmmB, xmmC);
- e->vrcp28sd(xmmA, xmmB, anyptr_gpC);
- e->vrcp28ss(xmmA, xmmB, xmmC);
- e->vrcp28ss(xmmA, xmmB, anyptr_gpC);
- e->vreducepd(xmmA, xmmB, 0);
- e->vreducepd(xmmA, anyptr_gpB, 0);
- e->vreducepd(ymmA, ymmB, 0);
- e->vreducepd(ymmA, anyptr_gpB, 0);
- e->vreducepd(zmmA, zmmB, 0);
- e->vreducepd(zmmA, anyptr_gpB, 0);
- e->vreduceps(xmmA, xmmB, 0);
- e->vreduceps(xmmA, anyptr_gpB, 0);
- e->vreduceps(ymmA, ymmB, 0);
- e->vreduceps(ymmA, anyptr_gpB, 0);
- e->vreduceps(zmmA, zmmB, 0);
- e->vreduceps(zmmA, anyptr_gpB, 0);
- e->vreducesd(xmmA, xmmB, xmmC, 0);
- e->vreducesd(xmmA, xmmB, anyptr_gpC, 0);
- e->vreducess(xmmA, xmmB, xmmC, 0);
- e->vreducess(xmmA, xmmB, anyptr_gpC, 0);
- e->vrndscalepd(xmmA, xmmB, 0);
- e->vrndscalepd(xmmA, anyptr_gpB, 0);
- e->vrndscalepd(ymmA, ymmB, 0);
- e->vrndscalepd(ymmA, anyptr_gpB, 0);
- e->vrndscalepd(zmmA, zmmB, 0);
- e->vrndscalepd(zmmA, anyptr_gpB, 0);
- e->vrndscaleps(xmmA, xmmB, 0);
- e->vrndscaleps(xmmA, anyptr_gpB, 0);
- e->vrndscaleps(ymmA, ymmB, 0);
- e->vrndscaleps(ymmA, anyptr_gpB, 0);
- e->vrndscaleps(zmmA, zmmB, 0);
- e->vrndscaleps(zmmA, anyptr_gpB, 0);
- e->vrndscalesd(xmmA, xmmB, xmmC, 0);
- e->vrndscalesd(xmmA, xmmB, anyptr_gpC, 0);
- e->vrndscaless(xmmA, xmmB, xmmC, 0);
- e->vrndscaless(xmmA, xmmB, anyptr_gpC, 0);
- e->vrsqrt14pd(xmmA, xmmB);
- e->vrsqrt14pd(xmmA, anyptr_gpB);
- e->vrsqrt14pd(ymmA, ymmB);
- e->vrsqrt14pd(ymmA, anyptr_gpB);
- e->vrsqrt14pd(zmmA, zmmB);
- e->vrsqrt14pd(zmmA, anyptr_gpB);
- e->vrsqrt14ps(xmmA, xmmB);
- e->vrsqrt14ps(xmmA, anyptr_gpB);
- e->vrsqrt14ps(ymmA, ymmB);
- e->vrsqrt14ps(ymmA, anyptr_gpB);
- e->vrsqrt14ps(zmmA, zmmB);
- e->vrsqrt14ps(zmmA, anyptr_gpB);
- e->vrsqrt14sd(xmmA, xmmB, xmmC);
- e->vrsqrt14sd(xmmA, xmmB, anyptr_gpC);
- e->vrsqrt14ss(xmmA, xmmB, xmmC);
- e->vrsqrt14ss(xmmA, xmmB, anyptr_gpC);
- e->vrsqrt28pd(zmmA, zmmB);
- e->vrsqrt28pd(zmmA, anyptr_gpB);
- e->vrsqrt28ps(zmmA, zmmB);
- e->vrsqrt28ps(zmmA, anyptr_gpB);
- e->vrsqrt28sd(xmmA, xmmB, xmmC);
- e->vrsqrt28sd(xmmA, xmmB, anyptr_gpC);
- e->vrsqrt28ss(xmmA, xmmB, xmmC);
- e->vrsqrt28ss(xmmA, xmmB, anyptr_gpC);
- e->vscalefpd(xmmA, xmmB, xmmC);
- e->vscalefpd(xmmA, xmmB, anyptr_gpC);
- e->vscalefpd(ymmA, ymmB, ymmC);
- e->vscalefpd(ymmA, ymmB, anyptr_gpC);
- e->vscalefpd(zmmA, zmmB, zmmC);
- e->vscalefpd(zmmA, zmmB, anyptr_gpC);
- e->vscalefps(xmmA, xmmB, xmmC);
- e->vscalefps(xmmA, xmmB, anyptr_gpC);
- e->vscalefps(ymmA, ymmB, ymmC);
- e->vscalefps(ymmA, ymmB, anyptr_gpC);
- e->vscalefps(zmmA, zmmB, zmmC);
- e->vscalefps(zmmA, zmmB, anyptr_gpC);
- e->vscalefsd(xmmA, xmmB, xmmC);
- e->vscalefsd(xmmA, xmmB, anyptr_gpC);
- e->vscalefss(xmmA, xmmB, xmmC);
- e->vscalefss(xmmA, xmmB, anyptr_gpC);
- e->vscatterdpd(vx_ptr, xmmB);
- e->vscatterdpd(vx_ptr, ymmB);
- e->vscatterdpd(vy_ptr, zmmB);
- e->vscatterdps(vx_ptr, xmmB);
- e->vscatterdps(vy_ptr, ymmB);
- e->vscatterdps(vz_ptr, zmmB);
- e->vscatterpf0dpd(vy_ptr);
- e->vscatterpf0dps(vz_ptr);
- e->vscatterpf0qpd(vz_ptr);
- e->vscatterpf0qps(vz_ptr);
- e->vscatterpf1dpd(vy_ptr);
- e->vscatterpf1dps(vz_ptr);
- e->vscatterpf1qpd(vz_ptr);
- e->vscatterpf1qps(vz_ptr);
- e->vscatterqpd(vx_ptr, xmmB);
- e->vscatterqpd(vy_ptr, ymmB);
- e->vscatterqpd(vz_ptr, zmmB);
- e->vscatterqps(vx_ptr, xmmB);
- e->vscatterqps(vy_ptr, xmmB);
- e->vscatterqps(vz_ptr, ymmB);
- e->vshuff32x4(ymmA, ymmB, ymmC, 0);
- e->vshuff32x4(ymmA, ymmB, anyptr_gpC, 0);
- e->vshuff32x4(zmmA, zmmB, zmmC, 0);
- e->vshuff32x4(zmmA, zmmB, anyptr_gpC, 0);
- e->vshuff64x2(ymmA, ymmB, ymmC, 0);
- e->vshuff64x2(ymmA, ymmB, anyptr_gpC, 0);
- e->vshuff64x2(zmmA, zmmB, zmmC, 0);
- e->vshuff64x2(zmmA, zmmB, anyptr_gpC, 0);
- e->vshufi32x4(ymmA, ymmB, ymmC, 0);
- e->vshufi32x4(ymmA, ymmB, anyptr_gpC, 0);
- e->vshufi32x4(zmmA, zmmB, zmmC, 0);
- e->vshufi32x4(zmmA, zmmB, anyptr_gpC, 0);
- e->vshufi64x2(ymmA, ymmB, ymmC, 0);
- e->vshufi64x2(ymmA, ymmB, anyptr_gpC, 0);
- e->vshufi64x2(zmmA, zmmB, zmmC, 0);
- e->vshufi64x2(zmmA, zmmB, anyptr_gpC, 0);
- e->vshufpd(xmmA, xmmB, xmmC, 0);
- e->vshufpd(xmmA, xmmB, anyptr_gpC, 0);
- e->vshufpd(ymmA, ymmB, ymmC, 0);
- e->vshufpd(ymmA, ymmB, anyptr_gpC, 0);
- e->vshufpd(zmmA, zmmB, zmmC, 0);
- e->vshufpd(zmmA, zmmB, anyptr_gpC, 0);
- e->vshufps(xmmA, xmmB, xmmC, 0);
- e->vshufps(xmmA, xmmB, anyptr_gpC, 0);
- e->vshufps(ymmA, ymmB, ymmC, 0);
- e->vshufps(ymmA, ymmB, anyptr_gpC, 0);
- e->vshufps(zmmA, zmmB, zmmC, 0);
- e->vshufps(zmmA, zmmB, anyptr_gpC, 0);
- e->vsqrtpd(xmmA, xmmB);
- e->vsqrtpd(xmmA, anyptr_gpB);
- e->vsqrtpd(ymmA, ymmB);
- e->vsqrtpd(ymmA, anyptr_gpB);
- e->vsqrtpd(zmmA, zmmB);
- e->vsqrtpd(zmmA, anyptr_gpB);
- e->vsqrtps(xmmA, xmmB);
- e->vsqrtps(xmmA, anyptr_gpB);
- e->vsqrtps(ymmA, ymmB);
- e->vsqrtps(ymmA, anyptr_gpB);
- e->vsqrtps(zmmA, zmmB);
- e->vsqrtps(zmmA, anyptr_gpB);
- e->vsqrtsd(xmmA, xmmB, xmmC);
- e->vsqrtsd(xmmA, xmmB, anyptr_gpC);
- e->vsqrtss(xmmA, xmmB, xmmC);
- e->vsqrtss(xmmA, xmmB, anyptr_gpC);
- e->vsubpd(xmmA, xmmB, xmmC);
- e->vsubpd(xmmA, xmmB, anyptr_gpC);
- e->vsubpd(ymmA, ymmB, ymmC);
- e->vsubpd(ymmA, ymmB, anyptr_gpC);
- e->vsubpd(zmmA, zmmB, zmmC);
- e->vsubpd(zmmA, zmmB, anyptr_gpC);
- e->vsubps(xmmA, xmmB, xmmC);
- e->vsubps(xmmA, xmmB, anyptr_gpC);
- e->vsubps(ymmA, ymmB, ymmC);
- e->vsubps(ymmA, ymmB, anyptr_gpC);
- e->vsubps(zmmA, zmmB, zmmC);
- e->vsubps(zmmA, zmmB, anyptr_gpC);
- e->vsubsd(xmmA, xmmB, xmmC);
- e->vsubsd(xmmA, xmmB, anyptr_gpC);
- e->vsubss(xmmA, xmmB, xmmC);
- e->vsubss(xmmA, xmmB, anyptr_gpC);
- e->vucomisd(xmmA, xmmB);
- e->vucomisd(xmmA, anyptr_gpB);
- e->vucomiss(xmmA, xmmB);
- e->vucomiss(xmmA, anyptr_gpB);
- e->vunpckhpd(xmmA, xmmB, xmmC);
- e->vunpckhpd(xmmA, xmmB, anyptr_gpC);
- e->vunpckhpd(ymmA, ymmB, ymmC);
- e->vunpckhpd(ymmA, ymmB, anyptr_gpC);
- e->vunpckhpd(zmmA, zmmB, zmmC);
- e->vunpckhpd(zmmA, zmmB, anyptr_gpC);
- e->vunpckhps(xmmA, xmmB, xmmC);
- e->vunpckhps(xmmA, xmmB, anyptr_gpC);
- e->vunpckhps(ymmA, ymmB, ymmC);
- e->vunpckhps(ymmA, ymmB, anyptr_gpC);
- e->vunpckhps(zmmA, zmmB, zmmC);
- e->vunpckhps(zmmA, zmmB, anyptr_gpC);
- e->vunpcklpd(xmmA, xmmB, xmmC);
- e->vunpcklpd(xmmA, xmmB, anyptr_gpC);
- e->vunpcklpd(ymmA, ymmB, ymmC);
- e->vunpcklpd(ymmA, ymmB, anyptr_gpC);
- e->vunpcklpd(zmmA, zmmB, zmmC);
- e->vunpcklpd(zmmA, zmmB, anyptr_gpC);
- e->vunpcklps(xmmA, xmmB, xmmC);
- e->vunpcklps(xmmA, xmmB, anyptr_gpC);
- e->vunpcklps(ymmA, ymmB, ymmC);
- e->vunpcklps(ymmA, ymmB, anyptr_gpC);
- e->vunpcklps(zmmA, zmmB, zmmC);
- e->vunpcklps(zmmA, zmmB, anyptr_gpC);
- e->vxorpd(xmmA, xmmB, xmmC);
- e->vxorpd(xmmA, xmmB, anyptr_gpC);
- e->vxorpd(ymmA, ymmB, ymmC);
- e->vxorpd(ymmA, ymmB, anyptr_gpC);
- e->vxorpd(zmmA, zmmB, zmmC);
- e->vxorpd(zmmA, zmmB, anyptr_gpC);
- e->vxorps(xmmA, xmmB, xmmC);
- e->vxorps(xmmA, xmmB, anyptr_gpC);
- e->vxorps(ymmA, ymmB, ymmC);
- e->vxorps(ymmA, ymmB, anyptr_gpC);
- e->vxorps(zmmA, zmmB, zmmC);
- e->vxorps(zmmA, zmmB, anyptr_gpC);
-
- // Mark the end.
- e->nop();
- e->nop();
- e->nop();
- e->nop();
-}
-
-} // {asmtest}
-
-#endif // ASMJIT_TEST_OPCODE_H_INCLUDED
diff --git a/test/asmjit_test_perf.cpp b/test/asmjit_test_perf.cpp
new file mode 100644
index 0000000..a1b638e
--- /dev/null
+++ b/test/asmjit_test_perf.cpp
@@ -0,0 +1,69 @@
+// AsmJit - Machine code generation for C++
+//
+// * Official AsmJit Home Page: https://asmjit.com
+// * Official Github Repository: https://github.com/asmjit/asmjit
+//
+// Copyright (c) 2008-2020 The AsmJit Authors
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+// claim that you wrote the original software. If you use this software
+// in a product, an acknowledgment in the product documentation would be
+// appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+// misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+
+#include <asmjit/core.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cmdline.h"
+
+using namespace asmjit;
+
+#if defined(ASMJIT_BUILD_X86)
+void benchmarkX86Emitters(uint32_t numIterations, bool testX86, bool testX64) noexcept;
+#endif
+
+int main(int argc, char* argv[]) {
+ CmdLine cmdLine(argc, argv);
+ uint32_t numIterations = 20000;
+
+ printf("AsmJit Performance Suite v%u.%u.%u:\n\n",
+ unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
+ unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
+ unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF));
+
+ printf("Usage:\n");
+ printf(" --help Show usage only\n");
+ printf(" --quick Decrease the number of iterations to make tests quicker\n");
+ printf(" --arch=<ARCH> Select architecture to run ('all' by default)\n");
+ printf("\n");
+
+ if (cmdLine.hasArg("--help"))
+ return 0;
+
+ if (cmdLine.hasArg("--quick"))
+ numIterations = 1000;
+
+ const char* arch = cmdLine.valueOf("--arch", "all");
+
+#if defined(ASMJIT_BUILD_X86)
+ bool testX86 = strcmp(arch, "all") == 0 || strcmp(arch, "x86") == 0;
+ bool testX64 = strcmp(arch, "all") == 0 || strcmp(arch, "x64") == 0;
+
+ if (testX86 || testX64)
+ benchmarkX86Emitters(numIterations, testX86, testX64);
+#endif
+
+ return 0;
+}
diff --git a/test/asmjit_test_perf.h b/test/asmjit_test_perf.h
new file mode 100644
index 0000000..565c18a
--- /dev/null
+++ b/test/asmjit_test_perf.h
@@ -0,0 +1,81 @@
+// AsmJit - Machine code generation for C++
+//
+// * Official AsmJit Home Page: https://asmjit.com
+// * Official Github Repository: https://github.com/asmjit/asmjit
+//
+// Copyright (c) 2008-2020 The AsmJit Authors
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+// claim that you wrote the original software. If you use this software
+// in a product, an acknowledgment in the product documentation would be
+// appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+// misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+
+#ifndef ASMJIT_TEST_PERF_H_INCLUDED
+#define ASMJIT_TEST_PERF_H_INCLUDED
+
+#include <asmjit/core.h>
+#include "performancetimer.h"
+
+class MyErrorHandler : public asmjit::ErrorHandler {
+ void handleError(asmjit::Error err, const char* message, asmjit::BaseEmitter* origin) {
+ (void)err;
+ (void)origin;
+ printf("ERROR: %s\n", message);
+ abort();
+ }
+};
+
+template<typename EmitterT, typename FuncT>
+static void bench(asmjit::CodeHolder& code, uint32_t arch, uint32_t numIterations, const char* testName, const FuncT& func) noexcept {
+ EmitterT emitter;
+ MyErrorHandler eh;
+
+ const char* archName =
+ arch == asmjit::Environment::kArchX86 ? "X86" :
+ arch == asmjit::Environment::kArchX64 ? "X64" : "???";
+
+ const char* emitterName =
+ emitter.isAssembler() ? "Assembler" :
+ emitter.isCompiler() ? "Compiler" :
+ emitter.isBuilder() ? "Builder" : "Unknown";
+
+ uint64_t codeSize = 0;
+ asmjit::Environment env(arch);
+
+ PerformanceTimer timer;
+ double duration = std::numeric_limits<double>::infinity();
+
+ for (uint32_t r = 0; r < numIterations; r++) {
+ codeSize = 0;
+ code.init(env);
+ code.setErrorHandler(&eh);
+ code.attach(&emitter);
+
+ timer.start();
+ func(emitter);
+ timer.stop();
+
+ codeSize += code.codeSize();
+
+ code.reset();
+ duration = asmjit::Support::min(duration, timer.duration());
+ }
+
+ printf(" [%s] %-9s %-16s | CodeSize:%5llu [B] | Time:%8.4f [ms]", archName, emitterName, testName, (unsigned long long)codeSize, duration);
+ if (codeSize)
+ printf(" | Speed:%8.3f [MB/s]", mbps(duration, codeSize));
+ printf("\n");
+}
+
+#endif // ASMJIT_TEST_PERF_H_INCLUDED
diff --git a/test/asmjit_test_perf_x86.cpp b/test/asmjit_test_perf_x86.cpp
new file mode 100644
index 0000000..cfbaa1f
--- /dev/null
+++ b/test/asmjit_test_perf_x86.cpp
@@ -0,0 +1,5049 @@
+// AsmJit - Machine code generation for C++
+//
+// * Official AsmJit Home Page: https://asmjit.com
+// * Official Github Repository: https://github.com/asmjit/asmjit
+//
+// Copyright (c) 2008-2020 The AsmJit Authors
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+// claim that you wrote the original software. If you use this software
+// in a product, an acknowledgment in the product documentation would be
+// appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+// misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+
+#include <asmjit/core.h>
+
+#ifdef ASMJIT_BUILD_X86
+#include <asmjit/x86.h>
+
+#include <limits>
+#include <stdio.h>
+#include <string.h>
+
+#include "asmjit_test_misc.h"
+#include "asmjit_test_perf.h"
+
+using namespace asmjit;
+
+enum class InstForm {
+ kReg,
+ kMem
+};
+
+// Generates a long sequence of GP instructions.
+template<typename Emitter>
+static void generateGpSequenceInternal(
+ Emitter& cc,
+ InstForm form,
+ const x86::Gp& a, const x86::Gp& b, const x86::Gp& c, const x86::Gp& d) {
+
+ cc.mov(a, 0xAAAAAAAA);
+ cc.mov(b, 0xBBBBBBBB);
+ cc.mov(c, 0xCCCCCCCC);
+ cc.mov(d, 0xFFFFFFFF);
+
+ if (form == InstForm::kReg) {
+ cc.adc(a, b);
+ cc.adc(b, c);
+ cc.adc(c, d);
+ cc.add(a, b);
+ cc.add(b, c);
+ cc.add(c, d);
+ cc.and_(a, b);
+ cc.and_(b, c);
+ cc.and_(c, d);
+ cc.bsf(a, b);
+ cc.bsf(b, c);
+ cc.bsf(c, d);
+ cc.bsr(a, b);
+ cc.bsr(b, c);
+ cc.bsr(c, d);
+ cc.bswap(a);
+ cc.bswap(b);
+ cc.bswap(c);
+ cc.bt(a, b);
+ cc.bt(b, c);
+ cc.bt(c, d);
+ cc.btc(a, b);
+ cc.btc(b, c);
+ cc.btc(c, d);
+ cc.btr(a, b);
+ cc.btr(b, c);
+ cc.btr(c, d);
+ cc.bts(a, b);
+ cc.bts(b, c);
+ cc.bts(c, d);
+ cc.cmp(a, b);
+ cc.cmovc(a, b);
+ cc.cmp(b, c);
+ cc.cmovc(b, c);
+ cc.cmp(c, d);
+ cc.cmovc(c, d);
+ cc.dec(a);
+ cc.dec(b);
+ cc.dec(c);
+ cc.imul(a, b);
+ cc.imul(b, c);
+ cc.imul(c, d);
+ cc.movsx(a, b.r8Lo());
+ cc.movsx(b, c.r8Lo());
+ cc.movsx(c, d.r8Lo());
+ cc.movzx(a, b.r8Lo());
+ cc.movzx(b, c.r8Lo());
+ cc.movzx(c, d.r8Lo());
+ cc.neg(a);
+ cc.neg(b);
+ cc.neg(c);
+ cc.not_(a);
+ cc.not_(b);
+ cc.not_(c);
+ cc.or_(a, b);
+ cc.or_(b, c);
+ cc.or_(c, d);
+ cc.sbb(a, b);
+ cc.sbb(b, c);
+ cc.sbb(c, d);
+ cc.sub(a, b);
+ cc.sub(b, c);
+ cc.sub(c, d);
+ cc.test(a, b);
+ cc.test(b, c);
+ cc.test(c, d);
+ cc.xchg(a, b);
+ cc.xchg(b, c);
+ cc.xchg(c, d);
+ cc.xor_(a, b);
+ cc.xor_(b, c);
+ cc.xor_(c, d);
+
+ cc.rcl(a, c.r8Lo());
+ cc.rcl(b, c.r8Lo());
+ cc.rcl(d, c.r8Lo());
+ cc.rcr(a, c.r8Lo());
+ cc.rcr(b, c.r8Lo());
+ cc.rcr(d, c.r8Lo());
+ cc.rol(a, c.r8Lo());
+ cc.rol(b, c.r8Lo());
+ cc.rol(d, c.r8Lo());
+ cc.ror(a, c.r8Lo());
+ cc.ror(b, c.r8Lo());
+ cc.ror(d, c.r8Lo());
+ cc.shl(a, c.r8Lo());
+ cc.shl(b, c.r8Lo());
+ cc.shl(d, c.r8Lo());
+ cc.shr(a, c.r8Lo());
+ cc.shr(b, c.r8Lo());
+ cc.shr(d, c.r8Lo());
+ cc.sar(a, c.r8Lo());
+ cc.sar(b, c.r8Lo());
+ cc.sar(d, c.r8Lo());
+ cc.shld(a, b, c.r8Lo());
+ cc.shld(b, d, c.r8Lo());
+ cc.shld(d, a, c.r8Lo());
+ cc.shrd(a, b, c.r8Lo());
+ cc.shrd(b, d, c.r8Lo());
+ cc.shrd(d, a, c.r8Lo());
+
+ cc.adcx(a, b);
+ cc.adox(a, b);
+ cc.adcx(b, c);
+ cc.adox(b, c);
+ cc.adcx(c, d);
+ cc.adox(c, d);
+ cc.andn(a, b, c);
+ cc.andn(b, c, d);
+ cc.andn(c, d, a);
+ cc.bextr(a, b, c);
+ cc.bextr(b, c, d);
+ cc.bextr(c, d, a);
+ cc.blsi(a, b);
+ cc.blsi(b, c);
+ cc.blsi(c, d);
+ cc.blsmsk(a, b);
+ cc.blsmsk(b, c);
+ cc.blsmsk(c, d);
+ cc.blsr(a, b);
+ cc.blsr(b, c);
+ cc.blsr(c, d);
+ cc.bzhi(a, b, c);
+ cc.bzhi(b, c, d);
+ cc.bzhi(c, d, a);
+ cc.lzcnt(a, b);
+ cc.lzcnt(b, c);
+ cc.lzcnt(c, d);
+ cc.pdep(a, b, c);
+ cc.pdep(b, c, d);
+ cc.pdep(c, d, a);
+ cc.pext(a, b, c);
+ cc.pext(b, c, d);
+ cc.pext(c, d, a);
+ cc.popcnt(a, b);
+ cc.popcnt(b, c);
+ cc.popcnt(c, d);
+ cc.rorx(a, b, 8);
+ cc.rorx(b, c, 8);
+ cc.rorx(c, d, 8);
+ cc.sarx(a, b, c);
+ cc.sarx(b, c, d);
+ cc.sarx(c, d, a);
+ cc.shlx(a, b, c);
+ cc.shlx(b, c, d);
+ cc.shlx(c, d, a);
+ cc.shrx(a, b, c);
+ cc.shrx(b, c, d);
+ cc.shrx(c, d, a);
+ cc.tzcnt(a, b);
+ cc.tzcnt(b, c);
+ cc.tzcnt(c, d);
+ }
+ else {
+ uint32_t regSize = cc.registerSize();
+ x86::Mem m = x86::ptr(c, 0, regSize);
+ x86::Mem m8 = x86::byte_ptr(c);
+
+ cc.adc(a, m);
+ cc.adc(b, m);
+ cc.adc(c, m);
+ cc.add(a, m);
+ cc.add(b, m);
+ cc.add(c, m);
+ cc.and_(a, m);
+ cc.and_(b, m);
+ cc.and_(c, m);
+ cc.bsf(a, m);
+ cc.bsf(b, m);
+ cc.bsf(c, m);
+ cc.bsr(a, m);
+ cc.bsr(b, m);
+ cc.bsr(c, m);
+ cc.bt(m, a);
+ cc.bt(m, b);
+ cc.bt(m, c);
+ cc.btc(m, a);
+ cc.btc(m, b);
+ cc.btc(m, c);
+ cc.btr(m, a);
+ cc.btr(m, b);
+ cc.btr(m, c);
+ cc.bts(m, a);
+ cc.bts(m, b);
+ cc.bts(m, c);
+ cc.cmp(a, m);
+ cc.cmovc(a, m);
+ cc.cmp(b, m);
+ cc.cmovc(b, m);
+ cc.cmp(c, m);
+ cc.cmovc(c, m);
+ cc.dec(m);
+ cc.movsx(a, m8);
+ cc.movsx(b, m8);
+ cc.movsx(c, m8);
+ cc.movzx(a, m8);
+ cc.movzx(b, m8);
+ cc.movzx(c, m8);
+ cc.neg(m);
+ cc.not_(m);
+ cc.or_(a, m);
+ cc.or_(b, m);
+ cc.or_(c, m);
+ cc.sbb(a, m);
+ cc.sbb(b, m);
+ cc.sbb(c, m);
+ cc.sub(a, m);
+ cc.sub(b, m);
+ cc.sub(c, m);
+ cc.test(m, a);
+ cc.test(m, b);
+ cc.test(m, c);
+ cc.xchg(a, m);
+ cc.xchg(b, m);
+ cc.xchg(c, m);
+ cc.xor_(a, m);
+ cc.xor_(b, m);
+ cc.xor_(c, m);
+
+ cc.rcl(m, c.r8Lo());
+ cc.rcr(m, c.r8Lo());
+ cc.rol(m, c.r8Lo());
+ cc.ror(m, c.r8Lo());
+ cc.shl(m, c.r8Lo());
+ cc.shr(m, c.r8Lo());
+ cc.sar(m, c.r8Lo());
+ cc.shld(m, b, c.r8Lo());
+ cc.shld(m, d, c.r8Lo());
+ cc.shld(m, a, c.r8Lo());
+ cc.shrd(m, b, c.r8Lo());
+ cc.shrd(m, d, c.r8Lo());
+ cc.shrd(m, a, c.r8Lo());
+
+ cc.adcx(a, m);
+ cc.adox(a, m);
+ cc.adcx(b, m);
+ cc.adox(b, m);
+ cc.adcx(c, m);
+ cc.adox(c, m);
+ cc.andn(a, b, m);
+ cc.andn(b, c, m);
+ cc.andn(c, d, m);
+ cc.bextr(a, m, c);
+ cc.bextr(b, m, d);
+ cc.bextr(c, m, a);
+ cc.blsi(a, m);
+ cc.blsi(b, m);
+ cc.blsi(c, m);
+ cc.blsmsk(a, m);
+ cc.blsmsk(b, m);
+ cc.blsmsk(c, m);
+ cc.blsr(a, m);
+ cc.blsr(b, m);
+ cc.blsr(c, m);
+ cc.bzhi(a, m, c);
+ cc.bzhi(b, m, d);
+ cc.bzhi(c, m, a);
+ cc.lzcnt(a, m);
+ cc.lzcnt(b, m);
+ cc.lzcnt(c, m);
+ cc.pdep(a, b, m);
+ cc.pdep(b, c, m);
+ cc.pdep(c, d, m);
+ cc.pext(a, b, m);
+ cc.pext(b, c, m);
+ cc.pext(c, d, m);
+ cc.popcnt(a, m);
+ cc.popcnt(b, m);
+ cc.popcnt(c, m);
+ cc.rorx(a, m, 8);
+ cc.rorx(b, m, 8);
+ cc.rorx(c, m, 8);
+ cc.sarx(a, m, c);
+ cc.sarx(b, m, d);
+ cc.sarx(c, m, a);
+ cc.shlx(a, m, c);
+ cc.shlx(b, m, d);
+ cc.shlx(c, m, a);
+ cc.shrx(a, m, c);
+ cc.shrx(b, m, d);
+ cc.shrx(c, m, a);
+ cc.tzcnt(a, m);
+ cc.tzcnt(b, m);
+ cc.tzcnt(c, m);
+ }
+}
+
+static void generateGpSequence(BaseEmitter& emitter, InstForm form, bool emitPrologEpilog) {
+ using namespace asmjit::x86;
+
+ if (emitter.isAssembler()) {
+ Assembler& cc = *emitter.as<Assembler>();
+
+ x86::Gp a = cc.zax();
+ x86::Gp b = cc.zbx();
+ x86::Gp c = cc.zcx();
+ x86::Gp d = cc.zdx();
+
+ if (emitPrologEpilog) {
+ FuncDetail func;
+ func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost), cc.environment());
+
+ FuncFrame frame;
+ frame.init(func);
+ frame.addDirtyRegs(a, b, c, d);
+ frame.finalize();
+
+ cc.emitProlog(frame);
+ generateGpSequenceInternal(cc, form, a, b, c, d);
+ cc.emitEpilog(frame);
+ }
+ else {
+ generateGpSequenceInternal(cc, form, a, b, c, d);
+ }
+ }
+#ifndef ASMJIT_NO_BUILDER
+ else if (emitter.isBuilder()) {
+ Builder& cc = *emitter.as<Builder>();
+
+ x86::Gp a = cc.zax();
+ x86::Gp b = cc.zbx();
+ x86::Gp c = cc.zcx();
+ x86::Gp d = cc.zdx();
+
+ if (emitPrologEpilog) {
+ FuncDetail func;
+ func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost), cc.environment());
+
+ FuncFrame frame;
+ frame.init(func);
+ frame.addDirtyRegs(a, b, c, d);
+ frame.finalize();
+
+ cc.emitProlog(frame);
+ generateGpSequenceInternal(cc, form, a, b, c, d);
+ cc.emitEpilog(frame);
+ }
+ else {
+ generateGpSequenceInternal(cc, form, a, b, c, d);
+ }
+ }
+#endif
+#ifndef ASMJIT_NO_COMPILER
+ else if (emitter.isCompiler()) {
+ Compiler& cc = *emitter.as<Compiler>();
+
+ Gp a = cc.newIntPtr("a");
+ Gp b = cc.newIntPtr("b");
+ Gp c = cc.newIntPtr("c");
+ Gp d = cc.newIntPtr("d");
+
+ cc.addFunc(FuncSignatureT<void>(CallConv::kIdHost));
+ generateGpSequenceInternal(cc, form, a, b, c, d);
+ cc.endFunc();
+ }
+#endif
+}
+
+// Generates a long sequence of SSE instructions using only registers.
+template<typename Emitter>
+static void generateSseSequenceInternal(
+ Emitter& cc,
+ InstForm form,
+ const x86::Gp& gp,
+ const x86::Xmm& xmmA, const x86::Xmm& xmmB, const x86::Xmm& xmmC, const x86::Xmm& xmmD) {
+
+ x86::Gp gpd = gp.r32();
+ x86::Gp gpq = gp.r64();
+ x86::Gp gpz = cc.is32Bit() ? gpd : gpq;
+
+ cc.xor_(gpd, gpd);
+ cc.xorps(xmmA, xmmA);
+ cc.xorps(xmmB, xmmB);
+ cc.xorps(xmmC, xmmC);
+ cc.xorps(xmmD, xmmD);
+
+ if (form == InstForm::kReg) {
+ // SSE.
+ cc.addps(xmmA, xmmB);
+ cc.addss(xmmA, xmmB);
+ cc.andnps(xmmA, xmmB);
+ cc.andps(xmmA, xmmB);
+ cc.cmpps(xmmA, xmmB, 0);
+ cc.cmpss(xmmA, xmmB, 0);
+ cc.comiss(xmmA, xmmB);
+ cc.cvtsi2ss(xmmA, gpd);
+ cc.cvtsi2ss(xmmA, gpz);
+ cc.cvtss2si(gpd, xmmB);
+ cc.cvtss2si(gpz, xmmB);
+ cc.cvttss2si(gpd, xmmB);
+ cc.cvttss2si(gpz, xmmB);
+ cc.divps(xmmA, xmmB);
+ cc.divss(xmmA, xmmB);
+ cc.maxps(xmmA, xmmB);
+ cc.maxss(xmmA, xmmB);
+ cc.minps(xmmA, xmmB);
+ cc.minss(xmmA, xmmB);
+ cc.movaps(xmmA, xmmB);
+ cc.movd(gpd, xmmB);
+ cc.movd(xmmA, gpd);
+ cc.movq(xmmA, xmmB);
+ cc.movhlps(xmmA, xmmB);
+ cc.movlhps(xmmA, xmmB);
+ cc.movups(xmmA, xmmB);
+ cc.mulps(xmmA, xmmB);
+ cc.mulss(xmmA, xmmB);
+ cc.orps(xmmA, xmmB);
+ cc.rcpps(xmmA, xmmB);
+ cc.rcpss(xmmA, xmmB);
+ cc.psadbw(xmmA, xmmB);
+ cc.rsqrtps(xmmA, xmmB);
+ cc.rsqrtss(xmmA, xmmB);
+ cc.sfence();
+ cc.shufps(xmmA, xmmB, 0);
+ cc.sqrtps(xmmA, xmmB);
+ cc.sqrtss(xmmA, xmmB);
+ cc.subps(xmmA, xmmB);
+ cc.subss(xmmA, xmmB);
+ cc.ucomiss(xmmA, xmmB);
+ cc.unpckhps(xmmA, xmmB);
+ cc.unpcklps(xmmA, xmmB);
+ cc.xorps(xmmA, xmmB);
+
+ // SSE2.
+ cc.addpd(xmmA, xmmB);
+ cc.addsd(xmmA, xmmB);
+ cc.andnpd(xmmA, xmmB);
+ cc.andpd(xmmA, xmmB);
+ cc.cmppd(xmmA, xmmB, 0);
+ cc.cmpsd(xmmA, xmmB, 0);
+ cc.comisd(xmmA, xmmB);
+ cc.cvtdq2pd(xmmA, xmmB);
+ cc.cvtdq2ps(xmmA, xmmB);
+ cc.cvtpd2dq(xmmA, xmmB);
+ cc.cvtpd2ps(xmmA, xmmB);
+ cc.cvtps2dq(xmmA, xmmB);
+ cc.cvtps2pd(xmmA, xmmB);
+ cc.cvtsd2si(gpd, xmmB);
+ cc.cvtsd2si(gpz, xmmB);
+ cc.cvtsd2ss(xmmA, xmmB);
+ cc.cvtsi2sd(xmmA, gpd);
+ cc.cvtsi2sd(xmmA, gpz);
+ cc.cvtss2sd(xmmA, xmmB);
+ cc.cvtss2si(gpd, xmmB);
+ cc.cvtss2si(gpz, xmmB);
+ cc.cvttpd2dq(xmmA, xmmB);
+ cc.cvttps2dq(xmmA, xmmB);
+ cc.cvttsd2si(gpd, xmmB);
+ cc.cvttsd2si(gpz, xmmB);
+ cc.divpd(xmmA, xmmB);
+ cc.divsd(xmmA, xmmB);
+ cc.maxpd(xmmA, xmmB);
+ cc.maxsd(xmmA, xmmB);
+ cc.minpd(xmmA, xmmB);
+ cc.minsd(xmmA, xmmB);
+ cc.movdqa(xmmA, xmmB);
+ cc.movdqu(xmmA, xmmB);
+ cc.movmskps(gpd, xmmB);
+ cc.movmskpd(gpd, xmmB);
+ cc.movsd(xmmA, xmmB);
+ cc.mulpd(xmmA, xmmB);
+ cc.mulsd(xmmA, xmmB);
+ cc.orpd(xmmA, xmmB);
+ cc.packsswb(xmmA, xmmB);
+ cc.packssdw(xmmA, xmmB);
+ cc.packuswb(xmmA, xmmB);
+ cc.paddb(xmmA, xmmB);
+ cc.paddw(xmmA, xmmB);
+ cc.paddd(xmmA, xmmB);
+ cc.paddq(xmmA, xmmB);
+ cc.paddsb(xmmA, xmmB);
+ cc.paddsw(xmmA, xmmB);
+ cc.paddusb(xmmA, xmmB);
+ cc.paddusw(xmmA, xmmB);
+ cc.pand(xmmA, xmmB);
+ cc.pandn(xmmA, xmmB);
+ cc.pavgb(xmmA, xmmB);
+ cc.pavgw(xmmA, xmmB);
+ cc.pcmpeqb(xmmA, xmmB);
+ cc.pcmpeqw(xmmA, xmmB);
+ cc.pcmpeqd(xmmA, xmmB);
+ cc.pcmpgtb(xmmA, xmmB);
+ cc.pcmpgtw(xmmA, xmmB);
+ cc.pcmpgtd(xmmA, xmmB);
+ cc.pmaxsw(xmmA, xmmB);
+ cc.pmaxub(xmmA, xmmB);
+ cc.pminsw(xmmA, xmmB);
+ cc.pminub(xmmA, xmmB);
+ cc.pmovmskb(gpd, xmmB);
+ cc.pmulhw(xmmA, xmmB);
+ cc.pmulhuw(xmmA, xmmB);
+ cc.pmullw(xmmA, xmmB);
+ cc.pmuludq(xmmA, xmmB);
+ cc.por(xmmA, xmmB);
+ cc.pslld(xmmA, xmmB);
+ cc.pslld(xmmA, 0);
+ cc.psllq(xmmA, xmmB);
+ cc.psllq(xmmA, 0);
+ cc.psllw(xmmA, xmmB);
+ cc.psllw(xmmA, 0);
+ cc.pslldq(xmmA, 0);
+ cc.psrad(xmmA, xmmB);
+ cc.psrad(xmmA, 0);
+ cc.psraw(xmmA, xmmB);
+ cc.psraw(xmmA, 0);
+ cc.psubb(xmmA, xmmB);
+ cc.psubw(xmmA, xmmB);
+ cc.psubd(xmmA, xmmB);
+ cc.psubq(xmmA, xmmB);
+ cc.pmaddwd(xmmA, xmmB);
+ cc.pshufd(xmmA, xmmB, 0);
+ cc.pshufhw(xmmA, xmmB, 0);
+ cc.pshuflw(xmmA, xmmB, 0);
+ cc.psrld(xmmA, xmmB);
+ cc.psrld(xmmA, 0);
+ cc.psrlq(xmmA, xmmB);
+ cc.psrlq(xmmA, 0);
+ cc.psrldq(xmmA, 0);
+ cc.psrlw(xmmA, xmmB);
+ cc.psrlw(xmmA, 0);
+ cc.psubsb(xmmA, xmmB);
+ cc.psubsw(xmmA, xmmB);
+ cc.psubusb(xmmA, xmmB);
+ cc.psubusw(xmmA, xmmB);
+ cc.punpckhbw(xmmA, xmmB);
+ cc.punpckhwd(xmmA, xmmB);
+ cc.punpckhdq(xmmA, xmmB);
+ cc.punpckhqdq(xmmA, xmmB);
+ cc.punpcklbw(xmmA, xmmB);
+ cc.punpcklwd(xmmA, xmmB);
+ cc.punpckldq(xmmA, xmmB);
+ cc.punpcklqdq(xmmA, xmmB);
+ cc.pxor(xmmA, xmmB);
+ cc.sqrtpd(xmmA, xmmB);
+ cc.sqrtsd(xmmA, xmmB);
+ cc.subpd(xmmA, xmmB);
+ cc.subsd(xmmA, xmmB);
+ cc.ucomisd(xmmA, xmmB);
+ cc.unpckhpd(xmmA, xmmB);
+ cc.unpcklpd(xmmA, xmmB);
+ cc.xorpd(xmmA, xmmB);
+
+ // SSE3.
+ cc.addsubpd(xmmA, xmmB);
+ cc.addsubps(xmmA, xmmB);
+ cc.haddpd(xmmA, xmmB);
+ cc.haddps(xmmA, xmmB);
+ cc.hsubpd(xmmA, xmmB);
+ cc.hsubps(xmmA, xmmB);
+ cc.movddup(xmmA, xmmB);
+ cc.movshdup(xmmA, xmmB);
+ cc.movsldup(xmmA, xmmB);
+
+ // SSSE3.
+ cc.psignb(xmmA, xmmB);
+ cc.psignw(xmmA, xmmB);
+ cc.psignd(xmmA, xmmB);
+ cc.phaddw(xmmA, xmmB);
+ cc.phaddd(xmmA, xmmB);
+ cc.phaddsw(xmmA, xmmB);
+ cc.phsubw(xmmA, xmmB);
+ cc.phsubd(xmmA, xmmB);
+ cc.phsubsw(xmmA, xmmB);
+ cc.pmaddubsw(xmmA, xmmB);
+ cc.pabsb(xmmA, xmmB);
+ cc.pabsw(xmmA, xmmB);
+ cc.pabsd(xmmA, xmmB);
+ cc.pmulhrsw(xmmA, xmmB);
+ cc.pshufb(xmmA, xmmB);
+ cc.palignr(xmmA, xmmB, 0);
+
+ // SSE4.1.
+ cc.blendpd(xmmA, xmmB, 0);
+ cc.blendps(xmmA, xmmB, 0);
+ cc.blendvpd(xmmA, xmmB, xmmA);
+ cc.blendvps(xmmA, xmmB, xmmA);
+
+ cc.dppd(xmmA, xmmB, 0);
+ cc.dpps(xmmA, xmmB, 0);
+ cc.extractps(gpd, xmmB, 0);
+ cc.insertps(xmmA, xmmB, 0);
+ cc.mpsadbw(xmmA, xmmB, 0);
+ cc.packusdw(xmmA, xmmB);
+ cc.pblendvb(xmmA, xmmB, xmmA);
+ cc.pblendw(xmmA, xmmB, 0);
+ cc.pcmpeqq(xmmA, xmmB);
+ cc.pextrb(gpd, xmmB, 0);
+ cc.pextrd(gpd, xmmB, 0);
+ if (cc.is64Bit()) cc.pextrq(gpq, xmmB, 0);
+ cc.pextrw(gpd, xmmB, 0);
+ cc.phminposuw(xmmA, xmmB);
+ cc.pinsrb(xmmA, gpd, 0);
+ cc.pinsrd(xmmA, gpd, 0);
+ cc.pinsrw(xmmA, gpd, 0);
+ cc.pmaxuw(xmmA, xmmB);
+ cc.pmaxsb(xmmA, xmmB);
+ cc.pmaxsd(xmmA, xmmB);
+ cc.pmaxud(xmmA, xmmB);
+ cc.pminsb(xmmA, xmmB);
+ cc.pminuw(xmmA, xmmB);
+ cc.pminud(xmmA, xmmB);
+ cc.pminsd(xmmA, xmmB);
+ cc.pmovsxbw(xmmA, xmmB);
+ cc.pmovsxbd(xmmA, xmmB);
+ cc.pmovsxbq(xmmA, xmmB);
+ cc.pmovsxwd(xmmA, xmmB);
+ cc.pmovsxwq(xmmA, xmmB);
+ cc.pmovsxdq(xmmA, xmmB);
+ cc.pmovzxbw(xmmA, xmmB);
+ cc.pmovzxbd(xmmA, xmmB);
+ cc.pmovzxbq(xmmA, xmmB);
+ cc.pmovzxwd(xmmA, xmmB);
+ cc.pmovzxwq(xmmA, xmmB);
+ cc.pmovzxdq(xmmA, xmmB);
+ cc.pmuldq(xmmA, xmmB);
+ cc.pmulld(xmmA, xmmB);
+ cc.ptest(xmmA, xmmB);
+ cc.roundps(xmmA, xmmB, 0);
+ cc.roundss(xmmA, xmmB, 0);
+ cc.roundpd(xmmA, xmmB, 0);
+ cc.roundsd(xmmA, xmmB, 0);
+ }
+ else {
+ x86::Mem m = x86::ptr(gpz);
+
+ cc.addps(xmmA, m);
+ cc.addss(xmmA, m);
+ cc.andnps(xmmA, m);
+ cc.andps(xmmA, m);
+ cc.cmpps(xmmA, m, 0);
+ cc.cmpss(xmmA, m, 0);
+ cc.comiss(xmmA, m);
+ cc.cvtpi2ps(xmmA, m);
+ cc.cvtsi2ss(xmmA, m);
+ cc.cvtss2si(gpd, m);
+ if (cc.is64Bit()) cc.cvtss2si(gpq, m);
+ cc.cvttss2si(gpd, m);
+ if (cc.is64Bit()) cc.cvttss2si(gpq, m);
+ cc.divps(xmmA, m);
+ cc.divss(xmmA, m);
+ cc.maxps(xmmA, m);
+ cc.maxss(xmmA, m);
+ cc.minps(xmmA, m);
+ cc.minss(xmmA, m);
+ cc.movaps(xmmA, m);
+ cc.movaps(m, xmmB);
+ cc.movd(m, xmmB);
+ cc.movd(xmmA, m);
+ cc.movq(m, xmmB);
+ cc.movq(xmmA, m);
+ cc.movhps(xmmA, m);
+ cc.movhps(m, xmmB);
+ cc.movlps(xmmA, m);
+ cc.movlps(m, xmmB);
+ cc.movntps(m, xmmB);
+ cc.movss(xmmA, m);
+ cc.movss(m, xmmB);
+ cc.movups(xmmA, m);
+ cc.movups(m, xmmB);
+ cc.mulps(xmmA, m);
+ cc.mulss(xmmA, m);
+ cc.orps(xmmA, m);
+ cc.rcpps(xmmA, m);
+ cc.rcpss(xmmA, m);
+ cc.psadbw(xmmA, m);
+ cc.rsqrtps(xmmA, m);
+ cc.rsqrtss(xmmA, m);
+ cc.shufps(xmmA, m, 0);
+ cc.sqrtps(xmmA, m);
+ cc.sqrtss(xmmA, m);
+ cc.stmxcsr(m);
+ cc.subps(xmmA, m);
+ cc.subss(xmmA, m);
+ cc.ucomiss(xmmA, m);
+ cc.unpckhps(xmmA, m);
+ cc.unpcklps(xmmA, m);
+ cc.xorps(xmmA, m);
+
+ // SSE2.
+ cc.addpd(xmmA, m);
+ cc.addsd(xmmA, m);
+ cc.andnpd(xmmA, m);
+ cc.andpd(xmmA, m);
+ cc.cmppd(xmmA, m, 0);
+ cc.cmpsd(xmmA, m, 0);
+ cc.comisd(xmmA, m);
+ cc.cvtdq2pd(xmmA, m);
+ cc.cvtdq2ps(xmmA, m);
+ cc.cvtpd2dq(xmmA, m);
+ cc.cvtpd2ps(xmmA, m);
+ cc.cvtpi2pd(xmmA, m);
+ cc.cvtps2dq(xmmA, m);
+ cc.cvtps2pd(xmmA, m);
+ cc.cvtsd2si(gpd, m);
+ if (cc.is64Bit()) cc.cvtsd2si(gpq, m);
+ cc.cvtsd2ss(xmmA, m);
+ cc.cvtsi2sd(xmmA, m);
+ cc.cvtss2sd(xmmA, m);
+ cc.cvtss2si(gpd, m);
+ if (cc.is64Bit()) cc.cvtss2si(gpq, m);
+ cc.cvttpd2dq(xmmA, m);
+ cc.cvttps2dq(xmmA, m);
+ cc.cvttsd2si(gpd, m);
+ if (cc.is64Bit()) cc.cvttsd2si(gpq, m);
+ cc.divpd(xmmA, m);
+ cc.divsd(xmmA, m);
+ cc.maxpd(xmmA, m);
+ cc.maxsd(xmmA, m);
+ cc.minpd(xmmA, m);
+ cc.minsd(xmmA, m);
+ cc.movdqa(xmmA, m);
+ cc.movdqa(m, xmmB);
+ cc.movdqu(xmmA, m);
+ cc.movdqu(m, xmmB);
+ cc.movsd(xmmA, m);
+ cc.movsd(m, xmmB);
+ cc.movapd(xmmA, m);
+ cc.movapd(m, xmmB);
+ cc.movhpd(xmmA, m);
+ cc.movhpd(m, xmmB);
+ cc.movlpd(xmmA, m);
+ cc.movlpd(m, xmmB);
+ cc.movntdq(m, xmmB);
+ cc.movntpd(m, xmmB);
+ cc.movupd(xmmA, m);
+ cc.movupd(m, xmmB);
+ cc.mulpd(xmmA, m);
+ cc.mulsd(xmmA, m);
+ cc.orpd(xmmA, m);
+ cc.packsswb(xmmA, m);
+ cc.packssdw(xmmA, m);
+ cc.packuswb(xmmA, m);
+ cc.paddb(xmmA, m);
+ cc.paddw(xmmA, m);
+ cc.paddd(xmmA, m);
+ cc.paddq(xmmA, m);
+ cc.paddsb(xmmA, m);
+ cc.paddsw(xmmA, m);
+ cc.paddusb(xmmA, m);
+ cc.paddusw(xmmA, m);
+ cc.pand(xmmA, m);
+ cc.pandn(xmmA, m);
+ cc.pavgb(xmmA, m);
+ cc.pavgw(xmmA, m);
+ cc.pcmpeqb(xmmA, m);
+ cc.pcmpeqw(xmmA, m);
+ cc.pcmpeqd(xmmA, m);
+ cc.pcmpgtb(xmmA, m);
+ cc.pcmpgtw(xmmA, m);
+ cc.pcmpgtd(xmmA, m);
+ cc.pmaxsw(xmmA, m);
+ cc.pmaxub(xmmA, m);
+ cc.pminsw(xmmA, m);
+ cc.pminub(xmmA, m);
+ cc.pmulhw(xmmA, m);
+ cc.pmulhuw(xmmA, m);
+ cc.pmullw(xmmA, m);
+ cc.pmuludq(xmmA, m);
+ cc.por(xmmA, m);
+ cc.pslld(xmmA, m);
+ cc.psllq(xmmA, m);
+ cc.psllw(xmmA, m);
+ cc.psrad(xmmA, m);
+ cc.psraw(xmmA, m);
+ cc.psubb(xmmA, m);
+ cc.psubw(xmmA, m);
+ cc.psubd(xmmA, m);
+ cc.psubq(xmmA, m);
+ cc.pmaddwd(xmmA, m);
+ cc.pshufd(xmmA, m, 0);
+ cc.pshufhw(xmmA, m, 0);
+ cc.pshuflw(xmmA, m, 0);
+ cc.psrld(xmmA, m);
+ cc.psrlq(xmmA, m);
+ cc.psrlw(xmmA, m);
+ cc.psubsb(xmmA, m);
+ cc.psubsw(xmmA, m);
+ cc.psubusb(xmmA, m);
+ cc.psubusw(xmmA, m);
+ cc.punpckhbw(xmmA, m);
+ cc.punpckhwd(xmmA, m);
+ cc.punpckhdq(xmmA, m);
+ cc.punpckhqdq(xmmA, m);
+ cc.punpcklbw(xmmA, m);
+ cc.punpcklwd(xmmA, m);
+ cc.punpckldq(xmmA, m);
+ cc.punpcklqdq(xmmA, m);
+ cc.pxor(xmmA, m);
+ cc.sqrtpd(xmmA, m);
+ cc.sqrtsd(xmmA, m);
+ cc.subpd(xmmA, m);
+ cc.subsd(xmmA, m);
+ cc.ucomisd(xmmA, m);
+ cc.unpckhpd(xmmA, m);
+ cc.unpcklpd(xmmA, m);
+ cc.xorpd(xmmA, m);
+
+ // SSE3.
+ cc.addsubpd(xmmA, m);
+ cc.addsubps(xmmA, m);
+ cc.haddpd(xmmA, m);
+ cc.haddps(xmmA, m);
+ cc.hsubpd(xmmA, m);
+ cc.hsubps(xmmA, m);
+ cc.lddqu(xmmA, m);
+ cc.movddup(xmmA, m);
+ cc.movshdup(xmmA, m);
+ cc.movsldup(xmmA, m);
+
+ // SSSE3.
+ cc.psignb(xmmA, m);
+ cc.psignw(xmmA, m);
+ cc.psignd(xmmA, m);
+ cc.phaddw(xmmA, m);
+ cc.phaddd(xmmA, m);
+ cc.phaddsw(xmmA, m);
+ cc.phsubw(xmmA, m);
+ cc.phsubd(xmmA, m);
+ cc.phsubsw(xmmA, m);
+ cc.pmaddubsw(xmmA, m);
+ cc.pabsb(xmmA, m);
+ cc.pabsw(xmmA, m);
+ cc.pabsd(xmmA, m);
+ cc.pmulhrsw(xmmA, m);
+ cc.pshufb(xmmA, m);
+ cc.palignr(xmmA, m, 0);
+
+ // SSE4.1.
+ cc.blendpd(xmmA, m, 0);
+ cc.blendps(xmmA, m, 0);
+ cc.blendvpd(xmmA, m, xmmA);
+ cc.blendvps(xmmA, m, xmmA);
+
+ cc.dppd(xmmA, m, 0);
+ cc.dpps(xmmA, m, 0);
+ cc.extractps(m, xmmB, 0);
+ cc.insertps(xmmA, m, 0);
+ cc.movntdqa(xmmA, m);
+ cc.mpsadbw(xmmA, m, 0);
+ cc.packusdw(xmmA, m);
+ cc.pblendvb(xmmA, m, xmmA);
+ cc.pblendw(xmmA, m, 0);
+ cc.pcmpeqq(xmmA, m);
+ cc.pextrb(m, xmmB, 0);
+ cc.pextrd(m, xmmB, 0);
+ if (cc.is64Bit()) cc.pextrq(m, xmmB, 0);
+ cc.pextrw(m, xmmB, 0);
+ cc.phminposuw(xmmA, m);
+ cc.pinsrb(xmmA, m, 0);
+ cc.pinsrd(xmmA, m, 0);
+ cc.pinsrw(xmmA, m, 0);
+ cc.pmaxuw(xmmA, m);
+ cc.pmaxsb(xmmA, m);
+ cc.pmaxsd(xmmA, m);
+ cc.pmaxud(xmmA, m);
+ cc.pminsb(xmmA, m);
+ cc.pminuw(xmmA, m);
+ cc.pminud(xmmA, m);
+ cc.pminsd(xmmA, m);
+ cc.pmovsxbw(xmmA, m);
+ cc.pmovsxbd(xmmA, m);
+ cc.pmovsxbq(xmmA, m);
+ cc.pmovsxwd(xmmA, m);
+ cc.pmovsxwq(xmmA, m);
+ cc.pmovsxdq(xmmA, m);
+ cc.pmovzxbw(xmmA, m);
+ cc.pmovzxbd(xmmA, m);
+ cc.pmovzxbq(xmmA, m);
+ cc.pmovzxwd(xmmA, m);
+ cc.pmovzxwq(xmmA, m);
+ cc.pmovzxdq(xmmA, m);
+ cc.pmuldq(xmmA, m);
+ cc.pmulld(xmmA, m);
+ cc.ptest(xmmA, m);
+ cc.roundps(xmmA, m, 0);
+ cc.roundss(xmmA, m, 0);
+ cc.roundpd(xmmA, m, 0);
+ cc.roundsd(xmmA, m, 0);
+
+ // SSE4.2.
+ cc.pcmpgtq(xmmA, m);
+ }
+}
+
+static void generateSseSequence(BaseEmitter& emitter, InstForm form, bool emitPrologEpilog) {
+ using namespace asmjit::x86;
+
+ if (emitter.isAssembler()) {
+ Assembler& cc = *emitter.as<Assembler>();
+
+ if (emitPrologEpilog) {
+ FuncDetail func;
+ func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost), cc.environment());
+
+ FuncFrame frame;
+ frame.init(func);
+ frame.addDirtyRegs(eax, xmm0, xmm1, xmm2, xmm3);
+ frame.finalize();
+
+ cc.emitProlog(frame);
+ generateSseSequenceInternal(cc, form, eax, xmm0, xmm1, xmm2, xmm3);
+ cc.emitEpilog(frame);
+ }
+ else {
+ generateSseSequenceInternal(cc, form, eax, xmm0, xmm1, xmm2, xmm3);
+ }
+ }
+#ifndef ASMJIT_NO_BUILDER
+ else if (emitter.isBuilder()) {
+ Builder& cc = *emitter.as<Builder>();
+
+ if (emitPrologEpilog) {
+ FuncDetail func;
+ func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost), cc.environment());
+
+ FuncFrame frame;
+ frame.init(func);
+ frame.addDirtyRegs(eax, xmm0, xmm1, xmm2, xmm3);
+ frame.finalize();
+
+ cc.emitProlog(frame);
+ generateSseSequenceInternal(cc, form, eax, xmm0, xmm1, xmm2, xmm3);
+ cc.emitEpilog(frame);
+ }
+ else {
+ generateSseSequenceInternal(cc, form, eax, xmm0, xmm1, xmm2, xmm3);
+ }
+ }
+#endif
+#ifndef ASMJIT_NO_COMPILER
+ else if (emitter.isCompiler()) {
+ Compiler& cc = *emitter.as<Compiler>();
+
+ Gp gp = cc.newGpz("gp");
+ Xmm a = cc.newXmm("a");
+ Xmm b = cc.newXmm("b");
+ Xmm c = cc.newXmm("c");
+ Xmm d = cc.newXmm("d");
+
+ cc.addFunc(FuncSignatureT<void>(CallConv::kIdHost));
+ generateSseSequenceInternal(cc, form, gp, a, b, c, d);
+ cc.endFunc();
+ }
+#endif
+}
+
+// Generates a long sequence of AVX instructions.
+template<typename Emitter>
+static void generateAvxSequenceInternal(
+ Emitter& cc,
+ InstForm form,
+ const x86::Gp& gp,
+ const x86::Vec& vecA, const x86::Vec& vecB, const x86::Vec& vecC, const x86::Vec& vecD) {
+
+ x86::Gp gpd = gp.r32();
+ x86::Gp gpq = gp.r64();
+ x86::Gp gpz = cc.is32Bit() ? gpd : gpq;
+
+ x86::Xmm xmmA = vecA.xmm();
+ x86::Xmm xmmB = vecB.xmm();
+ x86::Xmm xmmC = vecC.xmm();
+ x86::Xmm xmmD = vecD.xmm();
+
+ x86::Ymm ymmA = vecA.ymm();
+ x86::Ymm ymmB = vecB.ymm();
+ x86::Ymm ymmC = vecC.ymm();
+ x86::Ymm ymmD = vecD.ymm();
+
+ cc.xor_(gpd, gpd);
+ cc.vxorps(xmmA, xmmA, xmmA);
+ cc.vxorps(xmmB, xmmB, xmmB);
+ cc.vxorps(xmmC, xmmC, xmmC);
+ cc.vxorps(xmmD, xmmD, xmmD);
+
+ if (form == InstForm::kReg) {
+ cc.vaddpd(xmmA, xmmB, xmmC);
+ cc.vaddpd(ymmA, ymmB, ymmC);
+ cc.vaddps(xmmA, xmmB, xmmC);
+ cc.vaddps(ymmA, ymmB, ymmC);
+ cc.vaddsd(xmmA, xmmB, xmmC);
+ cc.vaddss(xmmA, xmmB, xmmC);
+ cc.vaddsubpd(xmmA, xmmB, xmmC);
+ cc.vaddsubpd(ymmA, ymmB, ymmC);
+ cc.vaddsubps(xmmA, xmmB, xmmC);
+ cc.vaddsubps(ymmA, ymmB, ymmC);
+ cc.vandpd(xmmA, xmmB, xmmC);
+ cc.vandpd(ymmA, ymmB, ymmC);
+ cc.vandps(xmmA, xmmB, xmmC);
+ cc.vandps(ymmA, ymmB, ymmC);
+ cc.vandnpd(xmmA, xmmB, xmmC);
+ cc.vandnpd(ymmA, ymmB, ymmC);
+ cc.vandnps(xmmA, xmmB, xmmC);
+ cc.vandnps(ymmA, ymmB, ymmC);
+ cc.vblendpd(xmmA, xmmB, xmmC, 0);
+ cc.vblendpd(ymmA, ymmB, ymmC, 0);
+ cc.vblendps(xmmA, xmmB, xmmC, 0);
+ cc.vblendps(ymmA, ymmB, ymmC, 0);
+ cc.vblendvpd(xmmA, xmmB, xmmC, xmmA);
+ cc.vblendvpd(ymmA, ymmB, ymmC, ymmA);
+ cc.vcmppd(xmmA, xmmB, xmmC, 0);
+ cc.vcmppd(ymmA, ymmB, ymmC, 0);
+ cc.vcmpps(xmmA, xmmB, xmmC, 0);
+ cc.vcmpps(ymmA, ymmB, ymmC, 0);
+ cc.vcmpsd(xmmA, xmmB, xmmC, 0);
+ cc.vcmpss(xmmA, xmmB, xmmC, 0);
+ cc.vcomisd(xmmA, xmmB);
+ cc.vcomiss(xmmA, xmmB);
+ cc.vcvtdq2pd(xmmA, xmmB);
+ cc.vcvtdq2pd(ymmA, xmmB);
+ cc.vcvtdq2ps(xmmA, xmmB);
+ cc.vcvtdq2ps(ymmA, ymmB);
+ cc.vcvtpd2dq(xmmA, xmmB);
+ cc.vcvtpd2dq(xmmA, ymmB);
+ cc.vcvtpd2ps(xmmA, xmmB);
+ cc.vcvtpd2ps(xmmA, ymmB);
+ cc.vcvtps2dq(xmmA, xmmB);
+ cc.vcvtps2dq(ymmA, ymmB);
+ cc.vcvtps2pd(xmmA, xmmB);
+ cc.vcvtps2pd(ymmA, xmmB);
+ cc.vcvtsd2si(gpd, xmmB);
+ cc.vcvtsd2si(gpz, xmmB);
+ cc.vcvtsd2ss(xmmA, xmmB, xmmC);
+ cc.vcvtsi2sd(xmmA, xmmB, gpd);
+ cc.vcvtsi2sd(xmmA, xmmB, gpz);
+ cc.vcvtsi2ss(xmmA, xmmB, gpd);
+ cc.vcvtsi2ss(xmmA, xmmB, gpz);
+ cc.vcvtss2sd(xmmA, xmmB, xmmC);
+ cc.vcvtss2si(gpd, xmmB);
+ cc.vcvttpd2dq(xmmA, xmmB);
+ cc.vcvttpd2dq(xmmA, ymmB);
+ cc.vcvttps2dq(xmmA, xmmB);
+ cc.vcvttps2dq(ymmA, ymmB);
+ cc.vcvttsd2si(gpd, xmmB);
+ cc.vcvttss2si(gpz, xmmB);
+ cc.vdivpd(xmmA, xmmB, xmmC);
+ cc.vdivpd(ymmA, ymmB, ymmC);
+ cc.vdivps(xmmA, xmmB, xmmC);
+ cc.vdivps(ymmA, ymmB, ymmC);
+ cc.vdivsd(xmmA, xmmB, xmmC);
+ cc.vdivss(xmmA, xmmB, xmmC);
+ cc.vdppd(xmmA, xmmB, xmmC, 0);
+ cc.vdpps(xmmA, xmmB, xmmC, 0);
+ cc.vdpps(ymmA, ymmB, ymmC, 0);
+ cc.vextractf128(xmmA, ymmB, 0);
+ cc.vextractps(gpd, xmmB, 0);
+ cc.vhaddpd(xmmA, xmmB, xmmC);
+ cc.vhaddpd(ymmA, ymmB, ymmC);
+ cc.vhaddps(xmmA, xmmB, xmmC);
+ cc.vhaddps(ymmA, ymmB, ymmC);
+ cc.vhsubpd(xmmA, xmmB, xmmC);
+ cc.vhsubpd(ymmA, ymmB, ymmC);
+ cc.vhsubps(xmmA, xmmB, xmmC);
+ cc.vhsubps(ymmA, ymmB, ymmC);
+ cc.vinsertf128(ymmA, ymmB, xmmC, 0);
+ cc.vinsertps(xmmA, xmmB, xmmC, 0);
+ cc.vmaxpd(xmmA, xmmB, xmmC);
+ cc.vmaxpd(ymmA, ymmB, ymmC);
+ cc.vmaxps(xmmA, xmmB, xmmC);
+ cc.vmaxps(ymmA, ymmB, ymmC);
+ cc.vmaxsd(xmmA, xmmB, xmmC);
+ cc.vmaxss(xmmA, xmmB, xmmC);
+ cc.vminpd(xmmA, xmmB, xmmC);
+ cc.vminpd(ymmA, ymmB, ymmC);
+ cc.vminps(xmmA, xmmB, xmmC);
+ cc.vminps(ymmA, ymmB, ymmC);
+ cc.vminsd(xmmA, xmmB, xmmC);
+ cc.vminss(xmmA, xmmB, xmmC);
+ cc.vmovapd(xmmA, xmmB);
+ cc.vmovapd(ymmA, ymmB);
+ cc.vmovaps(xmmA, xmmB);
+ cc.vmovaps(ymmA, ymmB);
+ cc.vmovd(xmmA, gpd);
+ cc.vmovd(gpd, xmmB);
+ cc.vmovddup(xmmA, xmmB);
+ cc.vmovddup(ymmA, ymmB);
+ cc.vmovdqa(xmmA, xmmB);
+ cc.vmovdqa(ymmA, ymmB);
+ cc.vmovdqu(xmmA, xmmB);
+ cc.vmovdqu(ymmA, ymmB);
+ cc.vmovhlps(xmmA, xmmB, xmmC);
+ cc.vmovlhps(xmmA, xmmB, xmmC);
+ cc.vmovmskpd(gpd, xmmB);
+ cc.vmovmskpd(gpd, ymmB);
+ cc.vmovmskps(gpd, xmmB);
+ cc.vmovmskps(gpd, ymmB);
+ cc.vmovsd(xmmA, xmmB, xmmC);
+ cc.vmovshdup(xmmA, xmmB);
+ cc.vmovshdup(ymmA, ymmB);
+ cc.vmovsldup(xmmA, xmmB);
+ cc.vmovsldup(ymmA, ymmB);
+ cc.vmovss(xmmA, xmmB, xmmC);
+ cc.vmovupd(xmmA, xmmB);
+ cc.vmovupd(ymmA, ymmB);
+ cc.vmovups(xmmA, xmmB);
+ cc.vmovups(ymmA, ymmB);
+ cc.vmpsadbw(xmmA, xmmB, xmmC, 0);
+ cc.vmulpd(xmmA, xmmB, xmmC);
+ cc.vmulpd(ymmA, ymmB, ymmC);
+ cc.vmulps(xmmA, xmmB, xmmC);
+ cc.vmulps(ymmA, ymmB, ymmC);
+ cc.vmulsd(xmmA, xmmB, xmmC);
+ cc.vmulss(xmmA, xmmB, xmmC);
+ cc.vorpd(xmmA, xmmB, xmmC);
+ cc.vorpd(ymmA, ymmB, ymmC);
+ cc.vorps(xmmA, xmmB, xmmC);
+ cc.vorps(ymmA, ymmB, ymmC);
+ cc.vpabsb(xmmA, xmmB);
+ cc.vpabsd(xmmA, xmmB);
+ cc.vpabsw(xmmA, xmmB);
+ cc.vpackssdw(xmmA, xmmB, xmmC);
+ cc.vpacksswb(xmmA, xmmB, xmmC);
+ cc.vpackusdw(xmmA, xmmB, xmmC);
+ cc.vpackuswb(xmmA, xmmB, xmmC);
+ cc.vpaddb(xmmA, xmmB, xmmC);
+ cc.vpaddd(xmmA, xmmB, xmmC);
+ cc.vpaddq(xmmA, xmmB, xmmC);
+ cc.vpaddw(xmmA, xmmB, xmmC);
+ cc.vpaddsb(xmmA, xmmB, xmmC);
+ cc.vpaddsw(xmmA, xmmB, xmmC);
+ cc.vpaddusb(xmmA, xmmB, xmmC);
+ cc.vpaddusw(xmmA, xmmB, xmmC);
+ cc.vpalignr(xmmA, xmmB, xmmC, 0);
+ cc.vpand(xmmA, xmmB, xmmC);
+ cc.vpandn(xmmA, xmmB, xmmC);
+ cc.vpavgb(xmmA, xmmB, xmmC);
+ cc.vpavgw(xmmA, xmmB, xmmC);
+ cc.vpblendvb(xmmA, xmmB, xmmC, xmmA);
+ cc.vpblendw(xmmA, xmmB, xmmC, 0);
+ cc.vpcmpeqb(xmmA, xmmB, xmmC);
+ cc.vpcmpeqd(xmmA, xmmB, xmmC);
+ cc.vpcmpeqq(xmmA, xmmB, xmmC);
+ cc.vpcmpeqw(xmmA, xmmB, xmmC);
+ cc.vpcmpgtb(xmmA, xmmB, xmmC);
+ cc.vpcmpgtd(xmmA, xmmB, xmmC);
+ cc.vpcmpgtq(xmmA, xmmB, xmmC);
+ cc.vpcmpgtw(xmmA, xmmB, xmmC);
+ cc.vpermilpd(xmmA, xmmB, xmmC);
+ cc.vpermilpd(ymmA, ymmB, ymmC);
+ cc.vpermilpd(xmmA, xmmB, 0);
+ cc.vpermilpd(ymmA, ymmB, 0);
+ cc.vpermilps(xmmA, xmmB, xmmC);
+ cc.vpermilps(ymmA, ymmB, ymmC);
+ cc.vpermilps(xmmA, xmmB, 0);
+ cc.vpermilps(ymmA, ymmB, 0);
+ cc.vperm2f128(ymmA, ymmB, ymmC, 0);
+ cc.vpextrb(gpd, xmmB, 0);
+ cc.vpextrd(gpd, xmmB, 0);
+ if (cc.is64Bit()) cc.vpextrq(gpq, xmmB, 0);
+ cc.vpextrw(gpd, xmmB, 0);
+ cc.vphaddd(xmmA, xmmB, xmmC);
+ cc.vphaddsw(xmmA, xmmB, xmmC);
+ cc.vphaddw(xmmA, xmmB, xmmC);
+ cc.vphminposuw(xmmA, xmmB);
+ cc.vphsubd(xmmA, xmmB, xmmC);
+ cc.vphsubsw(xmmA, xmmB, xmmC);
+ cc.vphsubw(xmmA, xmmB, xmmC);
+ cc.vpinsrb(xmmA, xmmB, gpd, 0);
+ cc.vpinsrd(xmmA, xmmB, gpd, 0);
+ cc.vpinsrw(xmmA, xmmB, gpd, 0);
+ cc.vpmaddubsw(xmmA, xmmB, xmmC);
+ cc.vpmaddwd(xmmA, xmmB, xmmC);
+ cc.vpmaxsb(xmmA, xmmB, xmmC);
+ cc.vpmaxsd(xmmA, xmmB, xmmC);
+ cc.vpmaxsw(xmmA, xmmB, xmmC);
+ cc.vpmaxub(xmmA, xmmB, xmmC);
+ cc.vpmaxud(xmmA, xmmB, xmmC);
+ cc.vpmaxuw(xmmA, xmmB, xmmC);
+ cc.vpminsb(xmmA, xmmB, xmmC);
+ cc.vpminsd(xmmA, xmmB, xmmC);
+ cc.vpminsw(xmmA, xmmB, xmmC);
+ cc.vpminub(xmmA, xmmB, xmmC);
+ cc.vpminud(xmmA, xmmB, xmmC);
+ cc.vpminuw(xmmA, xmmB, xmmC);
+ cc.vpmovmskb(gpd, xmmB);
+ cc.vpmovsxbd(xmmA, xmmB);
+ cc.vpmovsxbq(xmmA, xmmB);
+ cc.vpmovsxbw(xmmA, xmmB);
+ cc.vpmovsxdq(xmmA, xmmB);
+ cc.vpmovsxwd(xmmA, xmmB);
+ cc.vpmovsxwq(xmmA, xmmB);
+ cc.vpmovzxbd(xmmA, xmmB);
+ cc.vpmovzxbq(xmmA, xmmB);
+ cc.vpmovzxbw(xmmA, xmmB);
+ cc.vpmovzxdq(xmmA, xmmB);
+ cc.vpmovzxwd(xmmA, xmmB);
+ cc.vpmovzxwq(xmmA, xmmB);
+ cc.vpmuldq(xmmA, xmmB, xmmC);
+ cc.vpmulhrsw(xmmA, xmmB, xmmC);
+ cc.vpmulhuw(xmmA, xmmB, xmmC);
+ cc.vpmulhw(xmmA, xmmB, xmmC);
+ cc.vpmulld(xmmA, xmmB, xmmC);
+ cc.vpmullw(xmmA, xmmB, xmmC);
+ cc.vpmuludq(xmmA, xmmB, xmmC);
+ cc.vpor(xmmA, xmmB, xmmC);
+ cc.vpsadbw(xmmA, xmmB, xmmC);
+ cc.vpshufb(xmmA, xmmB, xmmC);
+ cc.vpshufd(xmmA, xmmB, 0);
+ cc.vpshufhw(xmmA, xmmB, 0);
+ cc.vpshuflw(xmmA, xmmB, 0);
+ cc.vpsignb(xmmA, xmmB, xmmC);
+ cc.vpsignd(xmmA, xmmB, xmmC);
+ cc.vpsignw(xmmA, xmmB, xmmC);
+ cc.vpslld(xmmA, xmmB, xmmC);
+ cc.vpslld(xmmA, xmmB, 0);
+ cc.vpslldq(xmmA, xmmB, 0);
+ cc.vpsllq(xmmA, xmmB, xmmC);
+ cc.vpsllq(xmmA, xmmB, 0);
+ cc.vpsllw(xmmA, xmmB, xmmC);
+ cc.vpsllw(xmmA, xmmB, 0);
+ cc.vpsrad(xmmA, xmmB, xmmC);
+ cc.vpsrad(xmmA, xmmB, 0);
+ cc.vpsraw(xmmA, xmmB, xmmC);
+ cc.vpsraw(xmmA, xmmB, 0);
+ cc.vpsrld(xmmA, xmmB, xmmC);
+ cc.vpsrld(xmmA, xmmB, 0);
+ cc.vpsrldq(xmmA, xmmB, 0);
+ cc.vpsrlq(xmmA, xmmB, xmmC);
+ cc.vpsrlq(xmmA, xmmB, 0);
+ cc.vpsrlw(xmmA, xmmB, xmmC);
+ cc.vpsrlw(xmmA, xmmB, 0);
+ cc.vpsubb(xmmA, xmmB, xmmC);
+ cc.vpsubd(xmmA, xmmB, xmmC);
+ cc.vpsubq(xmmA, xmmB, xmmC);
+ cc.vpsubw(xmmA, xmmB, xmmC);
+ cc.vpsubsb(xmmA, xmmB, xmmC);
+ cc.vpsubsw(xmmA, xmmB, xmmC);
+ cc.vpsubusb(xmmA, xmmB, xmmC);
+ cc.vpsubusw(xmmA, xmmB, xmmC);
+ cc.vptest(xmmA, xmmB);
+ cc.vptest(ymmA, ymmB);
+ cc.vpunpckhbw(xmmA, xmmB, xmmC);
+ cc.vpunpckhdq(xmmA, xmmB, xmmC);
+ cc.vpunpckhqdq(xmmA, xmmB, xmmC);
+ cc.vpunpckhwd(xmmA, xmmB, xmmC);
+ cc.vpunpcklbw(xmmA, xmmB, xmmC);
+ cc.vpunpckldq(xmmA, xmmB, xmmC);
+ cc.vpunpcklqdq(xmmA, xmmB, xmmC);
+ cc.vpunpcklwd(xmmA, xmmB, xmmC);
+ cc.vpxor(xmmA, xmmB, xmmC);
+ cc.vrcpps(xmmA, xmmB);
+ cc.vrcpps(ymmA, ymmB);
+ cc.vrcpss(xmmA, xmmB, xmmC);
+ cc.vrsqrtps(xmmA, xmmB);
+ cc.vrsqrtps(ymmA, ymmB);
+ cc.vrsqrtss(xmmA, xmmB, xmmC);
+ cc.vroundpd(xmmA, xmmB, 0);
+ cc.vroundpd(ymmA, ymmB, 0);
+ cc.vroundps(xmmA, xmmB, 0);
+ cc.vroundps(ymmA, ymmB, 0);
+ cc.vroundsd(xmmA, xmmB, xmmC, 0);
+ cc.vroundss(xmmA, xmmB, xmmC, 0);
+ cc.vshufpd(xmmA, xmmB, xmmC, 0);
+ cc.vshufpd(ymmA, ymmB, ymmC, 0);
+ cc.vshufps(xmmA, xmmB, xmmC, 0);
+ cc.vshufps(ymmA, ymmB, ymmC, 0);
+ cc.vsqrtpd(xmmA, xmmB);
+ cc.vsqrtpd(ymmA, ymmB);
+ cc.vsqrtps(xmmA, xmmB);
+ cc.vsqrtps(ymmA, ymmB);
+ cc.vsqrtsd(xmmA, xmmB, xmmC);
+ cc.vsqrtss(xmmA, xmmB, xmmC);
+ cc.vsubpd(xmmA, xmmB, xmmC);
+ cc.vsubpd(ymmA, ymmB, ymmC);
+ cc.vsubps(xmmA, xmmB, xmmC);
+ cc.vsubps(ymmA, ymmB, ymmC);
+ cc.vsubsd(xmmA, xmmB, xmmC);
+ cc.vsubss(xmmA, xmmB, xmmC);
+ cc.vtestps(xmmA, xmmB);
+ cc.vtestps(ymmA, ymmB);
+ cc.vtestpd(xmmA, xmmB);
+ cc.vtestpd(ymmA, ymmB);
+ cc.vucomisd(xmmA, xmmB);
+ cc.vucomiss(xmmA, xmmB);
+ cc.vunpckhpd(xmmA, xmmB, xmmC);
+ cc.vunpckhpd(ymmA, ymmB, ymmC);
+ cc.vunpckhps(xmmA, xmmB, xmmC);
+ cc.vunpckhps(ymmA, ymmB, ymmC);
+ cc.vunpcklpd(xmmA, xmmB, xmmC);
+ cc.vunpcklpd(ymmA, ymmB, ymmC);
+ cc.vunpcklps(xmmA, xmmB, xmmC);
+ cc.vunpcklps(ymmA, ymmB, ymmC);
+ cc.vxorpd(xmmA, xmmB, xmmC);
+ cc.vxorpd(ymmA, ymmB, ymmC);
+ cc.vxorps(xmmA, xmmB, xmmC);
+ cc.vxorps(ymmA, ymmB, ymmC);
+
+ // AVX+AESNI.
+ cc.vaesdec(xmmA, xmmB, xmmC);
+ cc.vaesdeclast(xmmA, xmmB, xmmC);
+ cc.vaesenc(xmmA, xmmB, xmmC);
+ cc.vaesenclast(xmmA, xmmB, xmmC);
+ cc.vaesimc(xmmA, xmmB);
+ cc.vaeskeygenassist(xmmA, xmmB, 0);
+
+ // AVX+PCLMULQDQ.
+ cc.vpclmulqdq(xmmA, xmmB, xmmC, 0);
+
+ // AVX2.
+ cc.vbroadcastsd(ymmA, xmmB);
+ cc.vbroadcastss(xmmA, xmmB);
+ cc.vbroadcastss(ymmA, xmmB);
+ cc.vextracti128(xmmA, ymmB, 0);
+ cc.vinserti128(ymmA, ymmB, xmmC, 0);
+ cc.vmpsadbw(ymmA, ymmB, ymmC, 0);
+ cc.vpabsb(ymmA, ymmB);
+ cc.vpabsd(ymmA, ymmB);
+ cc.vpabsw(ymmA, ymmB);
+ cc.vpackssdw(ymmA, ymmB, ymmC);
+ cc.vpacksswb(ymmA, ymmB, ymmC);
+ cc.vpackusdw(ymmA, ymmB, ymmC);
+ cc.vpackuswb(ymmA, ymmB, ymmC);
+ cc.vpaddb(ymmA, ymmB, ymmC);
+ cc.vpaddd(ymmA, ymmB, ymmC);
+ cc.vpaddq(ymmA, ymmB, ymmC);
+ cc.vpaddw(ymmA, ymmB, ymmC);
+ cc.vpaddsb(ymmA, ymmB, ymmC);
+ cc.vpaddsw(ymmA, ymmB, ymmC);
+ cc.vpaddusb(ymmA, ymmB, ymmC);
+ cc.vpaddusw(ymmA, ymmB, ymmC);
+ cc.vpalignr(ymmA, ymmB, ymmC, 0);
+ cc.vpand(ymmA, ymmB, ymmC);
+ cc.vpandn(ymmA, ymmB, ymmC);
+ cc.vpavgb(ymmA, ymmB, ymmC);
+ cc.vpavgw(ymmA, ymmB, ymmC);
+ cc.vpblendd(xmmA, xmmB, xmmC, 0);
+ cc.vpblendd(ymmA, ymmB, ymmC, 0);
+ cc.vpblendvb(ymmA, ymmB, ymmC, ymmA);
+ cc.vpblendw(ymmA, ymmB, ymmC, 0);
+ cc.vpbroadcastb(xmmA, xmmB);
+ cc.vpbroadcastb(ymmA, xmmB);
+ cc.vpbroadcastd(xmmA, xmmB);
+ cc.vpbroadcastd(ymmA, xmmB);
+ cc.vpbroadcastq(xmmA, xmmB);
+ cc.vpbroadcastq(ymmA, xmmB);
+ cc.vpbroadcastw(xmmA, xmmB);
+ cc.vpbroadcastw(ymmA, xmmB);
+ cc.vpcmpeqb(ymmA, ymmB, ymmC);
+ cc.vpcmpeqd(ymmA, ymmB, ymmC);
+ cc.vpcmpeqq(ymmA, ymmB, ymmC);
+ cc.vpcmpeqw(ymmA, ymmB, ymmC);
+ cc.vpcmpgtb(ymmA, ymmB, ymmC);
+ cc.vpcmpgtd(ymmA, ymmB, ymmC);
+ cc.vpcmpgtq(ymmA, ymmB, ymmC);
+ cc.vpcmpgtw(ymmA, ymmB, ymmC);
+ cc.vperm2i128(ymmA, ymmB, ymmC, 0);
+ cc.vpermd(ymmA, ymmB, ymmC);
+ cc.vpermps(ymmA, ymmB, ymmC);
+ cc.vpermpd(ymmA, ymmB, 0);
+ cc.vpermq(ymmA, ymmB, 0);
+ cc.vpmovmskb(gpd, ymmB);
+ cc.vpmovsxbd(ymmA, xmmB);
+ cc.vpmovsxbq(ymmA, xmmB);
+ cc.vpmovsxbw(ymmA, xmmB);
+ cc.vpmovsxdq(ymmA, xmmB);
+ cc.vpmovsxwd(ymmA, xmmB);
+ cc.vpmovsxwq(ymmA, xmmB);
+ cc.vpmovzxbd(ymmA, xmmB);
+ cc.vpmovzxbq(ymmA, xmmB);
+ cc.vpmovzxbw(ymmA, xmmB);
+ cc.vpmovzxdq(ymmA, xmmB);
+ cc.vpmovzxwd(ymmA, xmmB);
+ cc.vpmovzxwq(ymmA, xmmB);
+ cc.vpshufd(ymmA, ymmB, 0);
+ cc.vpshufhw(ymmA, ymmB, 0);
+ cc.vpshuflw(ymmA, ymmB, 0);
+ cc.vpslld(ymmA, ymmB, 0);
+ cc.vpslldq(ymmA, ymmB, 0);
+ cc.vpsllq(ymmA, ymmB, 0);
+ cc.vpsllw(ymmA, ymmB, 0);
+ cc.vpsrad(ymmA, ymmB, 0);
+ cc.vpsraw(ymmA, ymmB, 0);
+ cc.vpsrld(ymmA, ymmB, 0);
+ cc.vpsrldq(ymmA, ymmB, 0);
+ cc.vpsrlq(ymmA, ymmB, 0);
+ cc.vpsrlw(ymmA, ymmB, 0);
+ cc.vphaddd(ymmA, ymmB, ymmC);
+ cc.vphaddsw(ymmA, ymmB, ymmC);
+ cc.vphaddw(ymmA, ymmB, ymmC);
+ cc.vphsubd(ymmA, ymmB, ymmC);
+ cc.vphsubsw(ymmA, ymmB, ymmC);
+ cc.vphsubw(ymmA, ymmB, ymmC);
+ cc.vpmaddubsw(ymmA, ymmB, ymmC);
+ cc.vpmaddwd(ymmA, ymmB, ymmC);
+ cc.vpmaxsb(ymmA, ymmB, ymmC);
+ cc.vpmaxsd(ymmA, ymmB, ymmC);
+ cc.vpmaxsw(ymmA, ymmB, ymmC);
+ cc.vpmaxub(ymmA, ymmB, ymmC);
+ cc.vpmaxud(ymmA, ymmB, ymmC);
+ cc.vpmaxuw(ymmA, ymmB, ymmC);
+ cc.vpminsb(ymmA, ymmB, ymmC);
+ cc.vpminsd(ymmA, ymmB, ymmC);
+ cc.vpminsw(ymmA, ymmB, ymmC);
+ cc.vpminub(ymmA, ymmB, ymmC);
+ cc.vpminud(ymmA, ymmB, ymmC);
+ cc.vpminuw(ymmA, ymmB, ymmC);
+ cc.vpmuldq(ymmA, ymmB, ymmC);
+ cc.vpmulhrsw(ymmA, ymmB, ymmC);
+ cc.vpmulhuw(ymmA, ymmB, ymmC);
+ cc.vpmulhw(ymmA, ymmB, ymmC);
+ cc.vpmulld(ymmA, ymmB, ymmC);
+ cc.vpmullw(ymmA, ymmB, ymmC);
+ cc.vpmuludq(ymmA, ymmB, ymmC);
+ cc.vpor(ymmA, ymmB, ymmC);
+ cc.vpsadbw(ymmA, ymmB, ymmC);
+ cc.vpshufb(ymmA, ymmB, ymmC);
+ cc.vpsignb(ymmA, ymmB, ymmC);
+ cc.vpsignd(ymmA, ymmB, ymmC);
+ cc.vpsignw(ymmA, ymmB, ymmC);
+ cc.vpslld(ymmA, ymmB, xmmC);
+ cc.vpsllq(ymmA, ymmB, xmmC);
+ cc.vpsllvd(xmmA, xmmB, xmmC);
+ cc.vpsllvd(ymmA, ymmB, ymmC);
+ cc.vpsllvq(xmmA, xmmB, xmmC);
+ cc.vpsllvq(ymmA, ymmB, ymmC);
+ cc.vpsllw(ymmA, ymmB, xmmC);
+ cc.vpsrad(ymmA, ymmB, xmmC);
+ cc.vpsravd(xmmA, xmmB, xmmC);
+ cc.vpsravd(ymmA, ymmB, ymmC);
+ cc.vpsraw(ymmA, ymmB, xmmC);
+ cc.vpsrld(ymmA, ymmB, xmmC);
+ cc.vpsrlq(ymmA, ymmB, xmmC);
+ cc.vpsrlvd(xmmA, xmmB, xmmC);
+ cc.vpsrlvd(ymmA, ymmB, ymmC);
+ cc.vpsrlvq(xmmA, xmmB, xmmC);
+ cc.vpsrlvq(ymmA, ymmB, ymmC);
+ cc.vpsrlw(ymmA, ymmB, xmmC);
+ cc.vpsubb(ymmA, ymmB, ymmC);
+ cc.vpsubd(ymmA, ymmB, ymmC);
+ cc.vpsubq(ymmA, ymmB, ymmC);
+ cc.vpsubsb(ymmA, ymmB, ymmC);
+ cc.vpsubsw(ymmA, ymmB, ymmC);
+ cc.vpsubusb(ymmA, ymmB, ymmC);
+ cc.vpsubusw(ymmA, ymmB, ymmC);
+ cc.vpsubw(ymmA, ymmB, ymmC);
+ cc.vpunpckhbw(ymmA, ymmB, ymmC);
+ cc.vpunpckhdq(ymmA, ymmB, ymmC);
+ cc.vpunpckhqdq(ymmA, ymmB, ymmC);
+ cc.vpunpckhwd(ymmA, ymmB, ymmC);
+ cc.vpunpcklbw(ymmA, ymmB, ymmC);
+ cc.vpunpckldq(ymmA, ymmB, ymmC);
+ cc.vpunpcklqdq(ymmA, ymmB, ymmC);
+ cc.vpunpcklwd(ymmA, ymmB, ymmC);
+ cc.vpxor(ymmA, ymmB, ymmC);
+
+ // FMA.
+ cc.vfmadd132pd(xmmA, xmmB, xmmC);
+ cc.vfmadd132pd(ymmA, ymmB, ymmC);
+ cc.vfmadd132ps(xmmA, xmmB, xmmC);
+ cc.vfmadd132ps(ymmA, ymmB, ymmC);
+ cc.vfmadd132sd(xmmA, xmmB, xmmC);
+ cc.vfmadd132ss(xmmA, xmmB, xmmC);
+ cc.vfmadd213pd(xmmA, xmmB, xmmC);
+ cc.vfmadd213pd(ymmA, ymmB, ymmC);
+ cc.vfmadd213ps(xmmA, xmmB, xmmC);
+ cc.vfmadd213ps(ymmA, ymmB, ymmC);
+ cc.vfmadd213sd(xmmA, xmmB, xmmC);
+ cc.vfmadd213ss(xmmA, xmmB, xmmC);
+ cc.vfmadd231pd(xmmA, xmmB, xmmC);
+ cc.vfmadd231pd(ymmA, ymmB, ymmC);
+ cc.vfmadd231ps(xmmA, xmmB, xmmC);
+ cc.vfmadd231ps(ymmA, ymmB, ymmC);
+ cc.vfmadd231sd(xmmA, xmmB, xmmC);
+ cc.vfmadd231ss(xmmA, xmmB, xmmC);
+ cc.vfmaddsub132pd(xmmA, xmmB, xmmC);
+ cc.vfmaddsub132pd(ymmA, ymmB, ymmC);
+ cc.vfmaddsub132ps(xmmA, xmmB, xmmC);
+ cc.vfmaddsub132ps(ymmA, ymmB, ymmC);
+ cc.vfmaddsub213pd(xmmA, xmmB, xmmC);
+ cc.vfmaddsub213pd(ymmA, ymmB, ymmC);
+ cc.vfmaddsub213ps(xmmA, xmmB, xmmC);
+ cc.vfmaddsub213ps(ymmA, ymmB, ymmC);
+ cc.vfmaddsub231pd(xmmA, xmmB, xmmC);
+ cc.vfmaddsub231pd(ymmA, ymmB, ymmC);
+ cc.vfmaddsub231ps(xmmA, xmmB, xmmC);
+ cc.vfmaddsub231ps(ymmA, ymmB, ymmC);
+ cc.vfmsub132pd(xmmA, xmmB, xmmC);
+ cc.vfmsub132pd(ymmA, ymmB, ymmC);
+ cc.vfmsub132ps(xmmA, xmmB, xmmC);
+ cc.vfmsub132ps(ymmA, ymmB, ymmC);
+ cc.vfmsub132sd(xmmA, xmmB, xmmC);
+ cc.vfmsub132ss(xmmA, xmmB, xmmC);
+ cc.vfmsub213pd(xmmA, xmmB, xmmC);
+ cc.vfmsub213pd(ymmA, ymmB, ymmC);
+ cc.vfmsub213ps(xmmA, xmmB, xmmC);
+ cc.vfmsub213ps(ymmA, ymmB, ymmC);
+ cc.vfmsub213sd(xmmA, xmmB, xmmC);
+ cc.vfmsub213ss(xmmA, xmmB, xmmC);
+ cc.vfmsub231pd(xmmA, xmmB, xmmC);
+ cc.vfmsub231pd(ymmA, ymmB, ymmC);
+ cc.vfmsub231ps(xmmA, xmmB, xmmC);
+ cc.vfmsub231ps(ymmA, ymmB, ymmC);
+ cc.vfmsub231sd(xmmA, xmmB, xmmC);
+ cc.vfmsub231ss(xmmA, xmmB, xmmC);
+ cc.vfmsubadd132pd(xmmA, xmmB, xmmC);
+ cc.vfmsubadd132pd(ymmA, ymmB, ymmC);
+ cc.vfmsubadd132ps(xmmA, xmmB, xmmC);
+ cc.vfmsubadd132ps(ymmA, ymmB, ymmC);
+ cc.vfmsubadd213pd(xmmA, xmmB, xmmC);
+ cc.vfmsubadd213pd(ymmA, ymmB, ymmC);
+ cc.vfmsubadd213ps(xmmA, xmmB, xmmC);
+ cc.vfmsubadd213ps(ymmA, ymmB, ymmC);
+ cc.vfmsubadd231pd(xmmA, xmmB, xmmC);
+ cc.vfmsubadd231pd(ymmA, ymmB, ymmC);
+ cc.vfmsubadd231ps(xmmA, xmmB, xmmC);
+ cc.vfmsubadd231ps(ymmA, ymmB, ymmC);
+ cc.vfnmadd132pd(xmmA, xmmB, xmmC);
+ cc.vfnmadd132pd(ymmA, ymmB, ymmC);
+ cc.vfnmadd132ps(xmmA, xmmB, xmmC);
+ cc.vfnmadd132ps(ymmA, ymmB, ymmC);
+ cc.vfnmadd132sd(xmmA, xmmB, xmmC);
+ cc.vfnmadd132ss(xmmA, xmmB, xmmC);
+ cc.vfnmadd213pd(xmmA, xmmB, xmmC);
+ cc.vfnmadd213pd(ymmA, ymmB, ymmC);
+ cc.vfnmadd213ps(xmmA, xmmB, xmmC);
+ cc.vfnmadd213ps(ymmA, ymmB, ymmC);
+ cc.vfnmadd213sd(xmmA, xmmB, xmmC);
+ cc.vfnmadd213ss(xmmA, xmmB, xmmC);
+ cc.vfnmadd231pd(xmmA, xmmB, xmmC);
+ cc.vfnmadd231pd(ymmA, ymmB, ymmC);
+ cc.vfnmadd231ps(xmmA, xmmB, xmmC);
+ cc.vfnmadd231ps(ymmA, ymmB, ymmC);
+ cc.vfnmadd231sd(xmmA, xmmB, xmmC);
+ cc.vfnmadd231ss(xmmA, xmmB, xmmC);
+ cc.vfnmsub132pd(xmmA, xmmB, xmmC);
+ cc.vfnmsub132pd(ymmA, ymmB, ymmC);
+ cc.vfnmsub132ps(xmmA, xmmB, xmmC);
+ cc.vfnmsub132ps(ymmA, ymmB, ymmC);
+ cc.vfnmsub132sd(xmmA, xmmB, xmmC);
+ cc.vfnmsub132ss(xmmA, xmmB, xmmC);
+ cc.vfnmsub213pd(xmmA, xmmB, xmmC);
+ cc.vfnmsub213pd(ymmA, ymmB, ymmC);
+ cc.vfnmsub213ps(xmmA, xmmB, xmmC);
+ cc.vfnmsub213ps(ymmA, ymmB, ymmC);
+ cc.vfnmsub213sd(xmmA, xmmB, xmmC);
+ cc.vfnmsub213ss(xmmA, xmmB, xmmC);
+ cc.vfnmsub231pd(xmmA, xmmB, xmmC);
+ cc.vfnmsub231pd(ymmA, ymmB, ymmC);
+ cc.vfnmsub231ps(xmmA, xmmB, xmmC);
+ cc.vfnmsub231ps(ymmA, ymmB, ymmC);
+ cc.vfnmsub231sd(xmmA, xmmB, xmmC);
+ cc.vfnmsub231ss(xmmA, xmmB, xmmC);
+ }
+ else {
+ x86::Mem m = x86::ptr(gpz);
+ x86::Mem m128 = x86::xmmword_ptr(gpz);
+ x86::Mem m256 = x86::xmmword_ptr(gpz);
+ x86::Mem vx_ptr = x86::ptr(gpz, xmmD);
+ x86::Mem vy_ptr = x86::ptr(gpz, ymmD);
+
+ cc.vaddpd(xmmA, xmmB, m);
+ cc.vaddpd(ymmA, ymmB, m);
+ cc.vaddps(xmmA, xmmB, m);
+ cc.vaddps(ymmA, ymmB, m);
+ cc.vaddsd(xmmA, xmmB, m);
+ cc.vaddss(xmmA, xmmB, m);
+ cc.vaddsubpd(xmmA, xmmB, m);
+ cc.vaddsubpd(ymmA, ymmB, m);
+ cc.vaddsubps(xmmA, xmmB, m);
+ cc.vaddsubps(ymmA, ymmB, m);
+ cc.vandpd(xmmA, xmmB, m);
+ cc.vandpd(ymmA, ymmB, m);
+ cc.vandps(xmmA, xmmB, m);
+ cc.vandps(ymmA, ymmB, m);
+ cc.vandnpd(xmmA, xmmB, m);
+ cc.vandnpd(ymmA, ymmB, m);
+ cc.vandnps(xmmA, xmmB, m);
+ cc.vandnps(ymmA, ymmB, m);
+ cc.vblendpd(xmmA, xmmB, m, 0);
+ cc.vblendpd(ymmA, ymmB, m, 0);
+ cc.vblendps(xmmA, xmmB, m, 0);
+ cc.vblendps(ymmA, ymmB, m, 0);
+ cc.vblendvpd(xmmA, xmmB, m, xmmA);
+ cc.vblendvpd(ymmA, ymmB, m, ymmA);
+ cc.vbroadcastf128(ymmA, m);
+ cc.vbroadcastsd(ymmA, m);
+ cc.vbroadcastss(xmmA, m);
+ cc.vbroadcastss(ymmA, m);
+ cc.vcmppd(xmmA, xmmB, m, 0);
+ cc.vcmppd(ymmA, ymmB, m, 0);
+ cc.vcmpps(xmmA, xmmB, m, 0);
+ cc.vcmpps(ymmA, ymmB, m, 0);
+ cc.vcmpsd(xmmA, xmmB, m, 0);
+ cc.vcmpss(xmmA, xmmB, m, 0);
+ cc.vcomisd(xmmA, m);
+ cc.vcomiss(xmmA, m);
+ cc.vcvtdq2pd(xmmA, m);
+ cc.vcvtdq2pd(ymmA, m);
+ cc.vcvtdq2ps(xmmA, m);
+ cc.vcvtdq2ps(ymmA, m);
+ cc.vcvtpd2dq(xmmA, m128);
+ cc.vcvtpd2dq(xmmA, m256);
+ cc.vcvtpd2ps(xmmA, m128);
+ cc.vcvtpd2ps(xmmA, m256);
+ cc.vcvtps2dq(xmmA, m);
+ cc.vcvtps2dq(ymmA, m);
+ cc.vcvtps2pd(xmmA, m);
+ cc.vcvtps2pd(ymmA, m);
+ cc.vcvtsd2si(gpd, m);
+ cc.vcvtsd2ss(xmmA, xmmB, m);
+ cc.vcvtsi2sd(xmmA, xmmB, m);
+ cc.vcvtsi2ss(xmmA, xmmB, m);
+ cc.vcvtss2sd(xmmA, xmmB, m);
+ cc.vcvtss2si(gpd, m);
+ cc.vcvttpd2dq(xmmA, m128);
+ cc.vcvttpd2dq(xmmA, m256);
+ cc.vcvttps2dq(xmmA, m);
+ cc.vcvttps2dq(ymmA, m);
+ cc.vcvttsd2si(gpd, m);
+ cc.vcvttss2si(gpd, m);
+ cc.vdivpd(xmmA, xmmB, m);
+ cc.vdivpd(ymmA, ymmB, m);
+ cc.vdivps(xmmA, xmmB, m);
+ cc.vdivps(ymmA, ymmB, m);
+ cc.vdivsd(xmmA, xmmB, m);
+ cc.vdivss(xmmA, xmmB, m);
+ cc.vdppd(xmmA, xmmB, m, 0);
+ cc.vdpps(xmmA, xmmB, m, 0);
+ cc.vdpps(ymmA, ymmB, m, 0);
+ cc.vextractf128(m, ymmB, 0);
+ cc.vextractps(m, xmmB, 0);
+ cc.vhaddpd(xmmA, xmmB, m);
+ cc.vhaddpd(ymmA, ymmB, m);
+ cc.vhaddps(xmmA, xmmB, m);
+ cc.vhaddps(ymmA, ymmB, m);
+ cc.vhsubpd(xmmA, xmmB, m);
+ cc.vhsubpd(ymmA, ymmB, m);
+ cc.vhsubps(xmmA, xmmB, m);
+ cc.vhsubps(ymmA, ymmB, m);
+ cc.vinsertf128(ymmA, ymmB, m, 0);
+ cc.vinsertps(xmmA, xmmB, m, 0);
+ cc.vlddqu(xmmA, m);
+ cc.vlddqu(ymmA, m);
+ cc.vmaskmovps(xmmA, xmmB, m);
+ cc.vmaskmovps(ymmA, ymmB, m);
+ cc.vmaskmovps(m, xmmB, xmmC);
+ cc.vmaskmovps(m, ymmB, ymmC);
+ cc.vmaskmovpd(xmmA, xmmB, m);
+ cc.vmaskmovpd(ymmA, ymmB, m);
+ cc.vmaskmovpd(m, xmmB, xmmC);
+ cc.vmaskmovpd(m, ymmB, ymmC);
+ cc.vmaxpd(xmmA, xmmB, m);
+ cc.vmaxpd(ymmA, ymmB, m);
+ cc.vmaxps(xmmA, xmmB, m);
+ cc.vmaxps(ymmA, ymmB, m);
+ cc.vmaxsd(xmmA, xmmB, m);
+ cc.vmaxss(xmmA, xmmB, m);
+ cc.vminpd(xmmA, xmmB, m);
+ cc.vminpd(ymmA, ymmB, m);
+ cc.vminps(xmmA, xmmB, m);
+ cc.vminps(ymmA, ymmB, m);
+ cc.vminsd(xmmA, xmmB, m);
+ cc.vminss(xmmA, xmmB, m);
+ cc.vmovapd(xmmA, m);
+ cc.vmovapd(m, xmmB);
+ cc.vmovapd(ymmA, m);
+ cc.vmovapd(m, ymmB);
+ cc.vmovaps(xmmA, m);
+ cc.vmovaps(m, xmmB);
+ cc.vmovaps(ymmA, m);
+ cc.vmovaps(m, ymmB);
+ cc.vmovd(xmmA, m);
+ cc.vmovd(m, xmmB);
+ cc.vmovddup(xmmA, m);
+ cc.vmovddup(ymmA, m);
+ cc.vmovdqa(xmmA, m);
+ cc.vmovdqa(m, xmmB);
+ cc.vmovdqa(ymmA, m);
+ cc.vmovdqa(m, ymmB);
+ cc.vmovdqu(xmmA, m);
+ cc.vmovdqu(m, xmmB);
+ cc.vmovdqu(ymmA, m);
+ cc.vmovdqu(m, ymmB);
+ cc.vmovhpd(xmmA, xmmB, m);
+ cc.vmovhps(xmmA, xmmB, m);
+ cc.vmovhps(m, xmmB);
+ cc.vmovlpd(xmmA, xmmB, m);
+ cc.vmovlpd(m, xmmB);
+ cc.vmovlps(xmmA, xmmB, m);
+ cc.vmovlps(m, xmmB);
+ cc.vmovntdq(m, xmmB);
+ cc.vmovntdq(m, ymmB);
+ cc.vmovntdqa(xmmA, m);
+ cc.vmovntpd(m, xmmB);
+ cc.vmovntpd(m, ymmB);
+ cc.vmovntps(m, xmmB);
+ cc.vmovntps(m, ymmB);
+ cc.vmovsd(xmmA, m);
+ cc.vmovsd(m, xmmB);
+ cc.vmovshdup(xmmA, m);
+ cc.vmovshdup(ymmA, m);
+ cc.vmovsldup(xmmA, m);
+ cc.vmovsldup(ymmA, m);
+ cc.vmovss(xmmA, m);
+ cc.vmovss(m, xmmB);
+ cc.vmovupd(xmmA, m);
+ cc.vmovupd(m, xmmB);
+ cc.vmovupd(ymmA, m);
+ cc.vmovupd(m, ymmB);
+ cc.vmovups(xmmA, m);
+ cc.vmovups(m, xmmB);
+ cc.vmovups(ymmA, m);
+ cc.vmovups(m, ymmB);
+ cc.vmpsadbw(xmmA, xmmB, m, 0);
+ cc.vmulpd(xmmA, xmmB, m);
+ cc.vmulpd(ymmA, ymmB, m);
+ cc.vmulps(xmmA, xmmB, m);
+ cc.vmulps(ymmA, ymmB, m);
+ cc.vmulsd(xmmA, xmmB, m);
+ cc.vmulss(xmmA, xmmB, m);
+ cc.vorpd(xmmA, xmmB, m);
+ cc.vorpd(ymmA, ymmB, m);
+ cc.vorps(xmmA, xmmB, m);
+ cc.vorps(ymmA, ymmB, m);
+ cc.vpabsb(xmmA, m);
+ cc.vpabsd(xmmA, m);
+ cc.vpabsw(xmmA, m);
+ cc.vpackssdw(xmmA, xmmB, m);
+ cc.vpacksswb(xmmA, xmmB, m);
+ cc.vpackusdw(xmmA, xmmB, m);
+ cc.vpackuswb(xmmA, xmmB, m);
+ cc.vpaddb(xmmA, xmmB, m);
+ cc.vpaddd(xmmA, xmmB, m);
+ cc.vpaddq(xmmA, xmmB, m);
+ cc.vpaddw(xmmA, xmmB, m);
+ cc.vpaddsb(xmmA, xmmB, m);
+ cc.vpaddsw(xmmA, xmmB, m);
+ cc.vpaddusb(xmmA, xmmB, m);
+ cc.vpaddusw(xmmA, xmmB, m);
+ cc.vpalignr(xmmA, xmmB, m, 0);
+ cc.vpand(xmmA, xmmB, m);
+ cc.vpandn(xmmA, xmmB, m);
+ cc.vpavgb(xmmA, xmmB, m);
+ cc.vpavgw(xmmA, xmmB, m);
+ cc.vpblendvb(xmmA, xmmB, m, xmmA);
+ cc.vpblendw(xmmA, xmmB, m, 0);
+ cc.vpcmpeqb(xmmA, xmmB, m);
+ cc.vpcmpeqd(xmmA, xmmB, m);
+ cc.vpcmpeqq(xmmA, xmmB, m);
+ cc.vpcmpeqw(xmmA, xmmB, m);
+ cc.vpcmpgtb(xmmA, xmmB, m);
+ cc.vpcmpgtd(xmmA, xmmB, m);
+ cc.vpcmpgtq(xmmA, xmmB, m);
+ cc.vpcmpgtw(xmmA, xmmB, m);
+ cc.vpermilpd(xmmA, xmmB, m);
+ cc.vpermilpd(ymmA, ymmB, m);
+ cc.vpermilpd(xmmA, m, 0);
+ cc.vpermilpd(ymmA, m, 0);
+ cc.vpermilps(xmmA, xmmB, m);
+ cc.vpermilps(ymmA, ymmB, m);
+ cc.vpermilps(xmmA, m, 0);
+ cc.vpermilps(ymmA, m, 0);
+ cc.vperm2f128(ymmA, ymmB, m, 0);
+ cc.vpextrb(m, xmmB, 0);
+ cc.vpextrd(m, xmmB, 0);
+ if (cc.is64Bit()) cc.vpextrq(m, xmmB, 0);
+ cc.vpextrw(m, xmmB, 0);
+ cc.vphaddd(xmmA, xmmB, m);
+ cc.vphaddsw(xmmA, xmmB, m);
+ cc.vphaddw(xmmA, xmmB, m);
+ cc.vphminposuw(xmmA, m);
+ cc.vphsubd(xmmA, xmmB, m);
+ cc.vphsubsw(xmmA, xmmB, m);
+ cc.vphsubw(xmmA, xmmB, m);
+ cc.vpinsrb(xmmA, xmmB, m, 0);
+ cc.vpinsrd(xmmA, xmmB, m, 0);
+ cc.vpinsrw(xmmA, xmmB, m, 0);
+ cc.vpmaddubsw(xmmA, xmmB, m);
+ cc.vpmaddwd(xmmA, xmmB, m);
+ cc.vpmaxsb(xmmA, xmmB, m);
+ cc.vpmaxsd(xmmA, xmmB, m);
+ cc.vpmaxsw(xmmA, xmmB, m);
+ cc.vpmaxub(xmmA, xmmB, m);
+ cc.vpmaxud(xmmA, xmmB, m);
+ cc.vpmaxuw(xmmA, xmmB, m);
+ cc.vpminsb(xmmA, xmmB, m);
+ cc.vpminsd(xmmA, xmmB, m);
+ cc.vpminsw(xmmA, xmmB, m);
+ cc.vpminub(xmmA, xmmB, m);
+ cc.vpminud(xmmA, xmmB, m);
+ cc.vpminuw(xmmA, xmmB, m);
+ cc.vpmovsxbd(xmmA, m);
+ cc.vpmovsxbq(xmmA, m);
+ cc.vpmovsxbw(xmmA, m);
+ cc.vpmovsxdq(xmmA, m);
+ cc.vpmovsxwd(xmmA, m);
+ cc.vpmovsxwq(xmmA, m);
+ cc.vpmovzxbd(xmmA, m);
+ cc.vpmovzxbq(xmmA, m);
+ cc.vpmovzxbw(xmmA, m);
+ cc.vpmovzxdq(xmmA, m);
+ cc.vpmovzxwd(xmmA, m);
+ cc.vpmovzxwq(xmmA, m);
+ cc.vpmuldq(xmmA, xmmB, m);
+ cc.vpmulhrsw(xmmA, xmmB, m);
+ cc.vpmulhuw(xmmA, xmmB, m);
+ cc.vpmulhw(xmmA, xmmB, m);
+ cc.vpmulld(xmmA, xmmB, m);
+ cc.vpmullw(xmmA, xmmB, m);
+ cc.vpmuludq(xmmA, xmmB, m);
+ cc.vpor(xmmA, xmmB, m);
+ cc.vpsadbw(xmmA, xmmB, m);
+ cc.vpshufb(xmmA, xmmB, m);
+ cc.vpshufd(xmmA, m, 0);
+ cc.vpshufhw(xmmA, m, 0);
+ cc.vpshuflw(xmmA, m, 0);
+ cc.vpsignb(xmmA, xmmB, m);
+ cc.vpsignd(xmmA, xmmB, m);
+ cc.vpsignw(xmmA, xmmB, m);
+ cc.vpslld(xmmA, xmmB, m);
+ cc.vpsllq(xmmA, xmmB, m);
+ cc.vpsllw(xmmA, xmmB, m);
+ cc.vpsrad(xmmA, xmmB, m);
+ cc.vpsraw(xmmA, xmmB, m);
+ cc.vpsrld(xmmA, xmmB, m);
+ cc.vpsrlq(xmmA, xmmB, m);
+ cc.vpsrlw(xmmA, xmmB, m);
+ cc.vpsubb(xmmA, xmmB, m);
+ cc.vpsubd(xmmA, xmmB, m);
+ cc.vpsubq(xmmA, xmmB, m);
+ cc.vpsubw(xmmA, xmmB, m);
+ cc.vpsubsb(xmmA, xmmB, m);
+ cc.vpsubsw(xmmA, xmmB, m);
+ cc.vpsubusb(xmmA, xmmB, m);
+ cc.vpsubusw(xmmA, xmmB, m);
+ cc.vptest(xmmA, m);
+ cc.vptest(ymmA, m);
+ cc.vpunpckhbw(xmmA, xmmB, m);
+ cc.vpunpckhdq(xmmA, xmmB, m);
+ cc.vpunpckhqdq(xmmA, xmmB, m);
+ cc.vpunpckhwd(xmmA, xmmB, m);
+ cc.vpunpcklbw(xmmA, xmmB, m);
+ cc.vpunpckldq(xmmA, xmmB, m);
+ cc.vpunpcklqdq(xmmA, xmmB, m);
+ cc.vpunpcklwd(xmmA, xmmB, m);
+ cc.vpxor(xmmA, xmmB, m);
+ cc.vrcpps(xmmA, m);
+ cc.vrcpps(ymmA, m);
+ cc.vrcpss(xmmA, xmmB, m);
+ cc.vrsqrtps(xmmA, m);
+ cc.vrsqrtps(ymmA, m);
+ cc.vrsqrtss(xmmA, xmmB, m);
+ cc.vroundpd(xmmA, m, 0);
+ cc.vroundpd(ymmA, m, 0);
+ cc.vroundps(xmmA, m, 0);
+ cc.vroundps(ymmA, m, 0);
+ cc.vroundsd(xmmA, xmmB, m, 0);
+ cc.vroundss(xmmA, xmmB, m, 0);
+ cc.vshufpd(xmmA, xmmB, m, 0);
+ cc.vshufpd(ymmA, ymmB, m, 0);
+ cc.vshufps(xmmA, xmmB, m, 0);
+ cc.vshufps(ymmA, ymmB, m, 0);
+ cc.vsqrtpd(xmmA, m);
+ cc.vsqrtpd(ymmA, m);
+ cc.vsqrtps(xmmA, m);
+ cc.vsqrtps(ymmA, m);
+ cc.vsqrtsd(xmmA, xmmB, m);
+ cc.vsqrtss(xmmA, xmmB, m);
+ cc.vsubpd(xmmA, xmmB, m);
+ cc.vsubpd(ymmA, ymmB, m);
+ cc.vsubps(xmmA, xmmB, m);
+ cc.vsubps(ymmA, ymmB, m);
+ cc.vsubsd(xmmA, xmmB, m);
+ cc.vsubss(xmmA, xmmB, m);
+ cc.vtestps(xmmA, m);
+ cc.vtestps(ymmA, m);
+ cc.vtestpd(xmmA, m);
+ cc.vtestpd(ymmA, m);
+ cc.vucomisd(xmmA, m);
+ cc.vucomiss(xmmA, m);
+ cc.vunpckhpd(xmmA, xmmB, m);
+ cc.vunpckhpd(ymmA, ymmB, m);
+ cc.vunpckhps(xmmA, xmmB, m);
+ cc.vunpckhps(ymmA, ymmB, m);
+ cc.vunpcklpd(xmmA, xmmB, m);
+ cc.vunpcklpd(ymmA, ymmB, m);
+ cc.vunpcklps(xmmA, xmmB, m);
+ cc.vunpcklps(ymmA, ymmB, m);
+ cc.vxorpd(xmmA, xmmB, m);
+ cc.vxorpd(ymmA, ymmB, m);
+ cc.vxorps(xmmA, xmmB, m);
+ cc.vxorps(ymmA, ymmB, m);
+
+ // AVX+AESNI.
+ cc.vaesdec(xmmA, xmmB, m);
+ cc.vaesdeclast(xmmA, xmmB, m);
+ cc.vaesenc(xmmA, xmmB, m);
+ cc.vaesenclast(xmmA, xmmB, m);
+ cc.vaesimc(xmmA, m);
+ cc.vaeskeygenassist(xmmA, m, 0);
+
+ // AVX+PCLMULQDQ.
+ cc.vpclmulqdq(xmmA, xmmB, m, 0);
+
+ // AVX2.
+ cc.vbroadcasti128(ymmA, m);
+ cc.vextracti128(m, ymmB, 0);
+ cc.vgatherdpd(xmmA, vx_ptr, xmmC);
+ cc.vgatherdpd(ymmA, vx_ptr, ymmC);
+ cc.vgatherdps(xmmA, vx_ptr, xmmC);
+ cc.vgatherdps(ymmA, vy_ptr, ymmC);
+ cc.vgatherqpd(xmmA, vx_ptr, xmmC);
+ cc.vgatherqpd(ymmA, vy_ptr, ymmC);
+ cc.vgatherqps(xmmA, vx_ptr, xmmC);
+ cc.vgatherqps(xmmA, vy_ptr, xmmC);
+ cc.vinserti128(ymmA, ymmB, m, 0);
+ cc.vmovntdqa(ymmA, m);
+ cc.vmpsadbw(ymmA, ymmB, m, 0);
+ cc.vpabsb(ymmA, m);
+ cc.vpabsd(ymmA, m);
+ cc.vpabsw(ymmA, m);
+ cc.vpackssdw(ymmA, ymmB, m);
+ cc.vpacksswb(ymmA, ymmB, m);
+ cc.vpackusdw(ymmA, ymmB, m);
+ cc.vpackuswb(ymmA, ymmB, m);
+ cc.vpaddb(ymmA, ymmB, m);
+ cc.vpaddd(ymmA, ymmB, m);
+ cc.vpaddq(ymmA, ymmB, m);
+ cc.vpaddw(ymmA, ymmB, m);
+ cc.vpaddsb(ymmA, ymmB, m);
+ cc.vpaddsw(ymmA, ymmB, m);
+ cc.vpaddusb(ymmA, ymmB, m);
+ cc.vpaddusw(ymmA, ymmB, m);
+ cc.vpalignr(ymmA, ymmB, m, 0);
+ cc.vpand(ymmA, ymmB, m);
+ cc.vpandn(ymmA, ymmB, m);
+ cc.vpavgb(ymmA, ymmB, m);
+ cc.vpavgw(ymmA, ymmB, m);
+ cc.vpblendd(xmmA, xmmB, m, 0);
+ cc.vpblendd(ymmA, ymmB, m, 0);
+ cc.vpblendvb(ymmA, ymmB, m, ymmA);
+ cc.vpblendw(ymmA, ymmB, m, 0);
+ cc.vpbroadcastb(xmmA, m);
+ cc.vpbroadcastb(ymmA, m);
+ cc.vpbroadcastd(xmmA, m);
+ cc.vpbroadcastd(ymmA, m);
+ cc.vpbroadcastq(xmmA, m);
+ cc.vpbroadcastq(ymmA, m);
+ cc.vpbroadcastw(xmmA, m);
+ cc.vpbroadcastw(ymmA, m);
+ cc.vpcmpeqb(ymmA, ymmB, m);
+ cc.vpcmpeqd(ymmA, ymmB, m);
+ cc.vpcmpeqq(ymmA, ymmB, m);
+ cc.vpcmpeqw(ymmA, ymmB, m);
+ cc.vpcmpgtb(ymmA, ymmB, m);
+ cc.vpcmpgtd(ymmA, ymmB, m);
+ cc.vpcmpgtq(ymmA, ymmB, m);
+ cc.vpcmpgtw(ymmA, ymmB, m);
+ cc.vperm2i128(ymmA, ymmB, m, 0);
+ cc.vpermd(ymmA, ymmB, m);
+ cc.vpermps(ymmA, ymmB, m);
+ cc.vpermpd(ymmA, m, 0);
+ cc.vpermq(ymmA, m, 0);
+ cc.vpgatherdd(xmmA, vx_ptr, xmmC);
+ cc.vpgatherdd(ymmA, vy_ptr, ymmC);
+ cc.vpgatherdq(xmmA, vx_ptr, xmmC);
+ cc.vpgatherdq(ymmA, vx_ptr, ymmC);
+ cc.vpgatherqd(xmmA, vx_ptr, xmmC);
+ cc.vpgatherqd(xmmA, vy_ptr, xmmC);
+ cc.vpgatherqq(xmmA, vx_ptr, xmmC);
+ cc.vpgatherqq(ymmA, vy_ptr, ymmC);
+ cc.vpmovsxbd(ymmA, m);
+ cc.vpmovsxbq(ymmA, m);
+ cc.vpmovsxbw(ymmA, m);
+ cc.vpmovsxdq(ymmA, m);
+ cc.vpmovsxwd(ymmA, m);
+ cc.vpmovsxwq(ymmA, m);
+ cc.vpmovzxbd(ymmA, m);
+ cc.vpmovzxbq(ymmA, m);
+ cc.vpmovzxbw(ymmA, m);
+ cc.vpmovzxdq(ymmA, m);
+ cc.vpmovzxwd(ymmA, m);
+ cc.vpmovzxwq(ymmA, m);
+ cc.vpshufd(ymmA, m, 0);
+ cc.vpshufhw(ymmA, m, 0);
+ cc.vpshuflw(ymmA, m, 0);
+ cc.vphaddd(ymmA, ymmB, m);
+ cc.vphaddsw(ymmA, ymmB, m);
+ cc.vphaddw(ymmA, ymmB, m);
+ cc.vphsubd(ymmA, ymmB, m);
+ cc.vphsubsw(ymmA, ymmB, m);
+ cc.vphsubw(ymmA, ymmB, m);
+ cc.vpmaddubsw(ymmA, ymmB, m);
+ cc.vpmaddwd(ymmA, ymmB, m);
+ cc.vpmaskmovd(m, xmmB, xmmC);
+ cc.vpmaskmovd(m, ymmB, ymmC);
+ cc.vpmaskmovd(xmmA, xmmB, m);
+ cc.vpmaskmovd(ymmA, ymmB, m);
+ cc.vpmaskmovq(m, xmmB, xmmC);
+ cc.vpmaskmovq(m, ymmB, ymmC);
+ cc.vpmaskmovq(xmmA, xmmB, m);
+ cc.vpmaskmovq(ymmA, ymmB, m);
+ cc.vpmaxsb(ymmA, ymmB, m);
+ cc.vpmaxsd(ymmA, ymmB, m);
+ cc.vpmaxsw(ymmA, ymmB, m);
+ cc.vpmaxub(ymmA, ymmB, m);
+ cc.vpmaxud(ymmA, ymmB, m);
+ cc.vpmaxuw(ymmA, ymmB, m);
+ cc.vpminsb(ymmA, ymmB, m);
+ cc.vpminsd(ymmA, ymmB, m);
+ cc.vpminsw(ymmA, ymmB, m);
+ cc.vpminub(ymmA, ymmB, m);
+ cc.vpminud(ymmA, ymmB, m);
+ cc.vpminuw(ymmA, ymmB, m);
+ cc.vpmuldq(ymmA, ymmB, m);
+ cc.vpmulhrsw(ymmA, ymmB, m);
+ cc.vpmulhuw(ymmA, ymmB, m);
+ cc.vpmulhw(ymmA, ymmB, m);
+ cc.vpmulld(ymmA, ymmB, m);
+ cc.vpmullw(ymmA, ymmB, m);
+ cc.vpmuludq(ymmA, ymmB, m);
+ cc.vpor(ymmA, ymmB, m);
+ cc.vpsadbw(ymmA, ymmB, m);
+ cc.vpshufb(ymmA, ymmB, m);
+ cc.vpsignb(ymmA, ymmB, m);
+ cc.vpsignd(ymmA, ymmB, m);
+ cc.vpsignw(ymmA, ymmB, m);
+ cc.vpslld(ymmA, ymmB, m);
+ cc.vpsllq(ymmA, ymmB, m);
+ cc.vpsllvd(xmmA, xmmB, m);
+ cc.vpsllvd(ymmA, ymmB, m);
+ cc.vpsllvq(xmmA, xmmB, m);
+ cc.vpsllvq(ymmA, ymmB, m);
+ cc.vpsllw(ymmA, ymmB, m);
+ cc.vpsrad(ymmA, ymmB, m);
+ cc.vpsravd(xmmA, xmmB, m);
+ cc.vpsravd(ymmA, ymmB, m);
+ cc.vpsraw(ymmA, ymmB, m);
+ cc.vpsrld(ymmA, ymmB, m);
+ cc.vpsrlq(ymmA, ymmB, m);
+ cc.vpsrlvd(xmmA, xmmB, m);
+ cc.vpsrlvd(ymmA, ymmB, m);
+ cc.vpsrlvq(xmmA, xmmB, m);
+ cc.vpsrlvq(ymmA, ymmB, m);
+ cc.vpsrlw(ymmA, ymmB, m);
+ cc.vpsubb(ymmA, ymmB, m);
+ cc.vpsubd(ymmA, ymmB, m);
+ cc.vpsubq(ymmA, ymmB, m);
+ cc.vpsubsb(ymmA, ymmB, m);
+ cc.vpsubsw(ymmA, ymmB, m);
+ cc.vpsubusb(ymmA, ymmB, m);
+ cc.vpsubusw(ymmA, ymmB, m);
+ cc.vpsubw(ymmA, ymmB, m);
+ cc.vpunpckhbw(ymmA, ymmB, m);
+ cc.vpunpckhdq(ymmA, ymmB, m);
+ cc.vpunpckhqdq(ymmA, ymmB, m);
+ cc.vpunpckhwd(ymmA, ymmB, m);
+ cc.vpunpcklbw(ymmA, ymmB, m);
+ cc.vpunpckldq(ymmA, ymmB, m);
+ cc.vpunpcklqdq(ymmA, ymmB, m);
+ cc.vpunpcklwd(ymmA, ymmB, m);
+ cc.vpxor(ymmA, ymmB, m);
+ }
+}
+
+static void generateAvxSequence(BaseEmitter& emitter, InstForm form, bool emitPrologEpilog) {
+ using namespace asmjit::x86;
+
+ if (emitter.isAssembler()) {
+ Assembler& cc = *emitter.as<Assembler>();
+
+ if (emitPrologEpilog) {
+ FuncDetail func;
+ func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost), cc.environment());
+
+ FuncFrame frame;
+ frame.init(func);
+ frame.addDirtyRegs(eax, ymm0, ymm1, ymm2, ymm3);
+ frame.finalize();
+
+ cc.emitProlog(frame);
+ generateAvxSequenceInternal(cc, form, eax, ymm0, ymm1, ymm2, ymm3);
+ cc.emitEpilog(frame);
+ }
+ else {
+ generateAvxSequenceInternal(cc, form, eax, ymm0, ymm1, ymm2, ymm3);
+ }
+ }
+#ifndef ASMJIT_NO_BUILDER
+ else if (emitter.isBuilder()) {
+ Builder& cc = *emitter.as<Builder>();
+
+ if (emitPrologEpilog) {
+ FuncDetail func;
+ func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost), cc.environment());
+
+ FuncFrame frame;
+ frame.init(func);
+ frame.addDirtyRegs(eax, ymm0, ymm1, ymm2, ymm3);
+ frame.finalize();
+
+ cc.emitProlog(frame);
+ generateAvxSequenceInternal(cc, form, eax, ymm0, ymm1, ymm2, ymm3);
+ cc.emitEpilog(frame);
+ }
+ else {
+ generateAvxSequenceInternal(cc, form, eax, ymm0, ymm1, ymm2, ymm3);
+ }
+ }
+#endif
+#ifndef ASMJIT_NO_COMPILER
+ else if (emitter.isCompiler()) {
+ Compiler& cc = *emitter.as<Compiler>();
+
+ Gp gp = cc.newGpz("gp");
+ Ymm a = cc.newYmm("a");
+ Ymm b = cc.newYmm("b");
+ Ymm c = cc.newYmm("c");
+ Ymm d = cc.newYmm("d");
+
+ cc.addFunc(FuncSignatureT<void>(CallConv::kIdHost));
+ generateAvxSequenceInternal(cc, form, gp, a, b, c, d);
+ cc.endFunc();
+ }
+#endif
+}
+
+// Generates a long sequence of AVX512 instructions.
+template<typename Emitter>
+static void generateAvx512SequenceInternal(
+ Emitter& cc,
+ InstForm form,
+ const x86::Gp& gp,
+ const x86::KReg& kA, const x86::KReg& kB, const x86::KReg& kC,
+ const x86::Vec& vecA, const x86::Vec& vecB, const x86::Vec& vecC, const x86::Vec& vecD) {
+
+ x86::Gp gpd = gp.r32();
+ x86::Gp gpq = gp.r64();
+ x86::Gp gpz = cc.is32Bit() ? gpd : gpq;
+
+ x86::Xmm xmmA = vecA.xmm();
+ x86::Xmm xmmB = vecB.xmm();
+ x86::Xmm xmmC = vecC.xmm();
+ x86::Xmm xmmD = vecD.xmm();
+
+ x86::Ymm ymmA = vecA.ymm();
+ x86::Ymm ymmB = vecB.ymm();
+ x86::Ymm ymmC = vecC.ymm();
+ x86::Ymm ymmD = vecD.ymm();
+
+ x86::Zmm zmmA = vecA.zmm();
+ x86::Zmm zmmB = vecB.zmm();
+ x86::Zmm zmmC = vecC.zmm();
+ x86::Zmm zmmD = vecD.zmm();
+
+ cc.xor_(gpd, gpd);
+ cc.vxorps(xmmA, xmmA, xmmA);
+ cc.vxorps(xmmB, xmmB, xmmB);
+ cc.vxorps(xmmC, xmmC, xmmC);
+ cc.vxorps(xmmD, xmmD, xmmD);
+
+ if (form == InstForm::kReg) {
+ cc.kaddb(kA, kB, kC);
+ cc.kaddd(kA, kB, kC);
+ cc.kaddq(kA, kB, kC);
+ cc.kaddw(kA, kB, kC);
+ cc.kandb(kA, kB, kC);
+ cc.kandd(kA, kB, kC);
+ cc.kandnb(kA, kB, kC);
+ cc.kandnd(kA, kB, kC);
+ cc.kandnq(kA, kB, kC);
+ cc.kandnw(kA, kB, kC);
+ cc.kandq(kA, kB, kC);
+ cc.kandw(kA, kB, kC);
+ cc.kmovb(kA, kB);
+ cc.kmovb(kA, gpd);
+ cc.kmovb(gpd, kB);
+ cc.kmovd(kA, kB);
+ cc.kmovd(kA, gpd);
+ cc.kmovd(gpd, kB);
+ cc.kmovq(kA, kB);
+ if (cc.is64Bit()) cc.kmovq(kA, gpq);
+ if (cc.is64Bit()) cc.kmovq(gpq, kB);
+ cc.kmovw(kA, kB);
+ cc.kmovw(kA, gpd);
+ cc.kmovw(gpd, kB);
+ cc.knotb(kA, kB);
+ cc.knotd(kA, kB);
+ cc.knotq(kA, kB);
+ cc.knotw(kA, kB);
+ cc.korb(kA, kB, kC);
+ cc.kord(kA, kB, kC);
+ cc.korq(kA, kB, kC);
+ cc.kortestb(kA, kB);
+ cc.kortestd(kA, kB);
+ cc.kortestq(kA, kB);
+ cc.kortestw(kA, kB);
+ cc.korw(kA, kB, kC);
+ cc.kshiftlb(kA, kB, 0);
+ cc.kshiftld(kA, kB, 0);
+ cc.kshiftlq(kA, kB, 0);
+ cc.kshiftlw(kA, kB, 0);
+ cc.kshiftrb(kA, kB, 0);
+ cc.kshiftrd(kA, kB, 0);
+ cc.kshiftrq(kA, kB, 0);
+ cc.kshiftrw(kA, kB, 0);
+ cc.ktestb(kA, kB);
+ cc.ktestd(kA, kB);
+ cc.ktestq(kA, kB);
+ cc.ktestw(kA, kB);
+ cc.kunpckbw(kA, kB, kC);
+ cc.kunpckdq(kA, kB, kC);
+ cc.kunpckwd(kA, kB, kC);
+ cc.kxnorb(kA, kB, kC);
+ cc.kxnord(kA, kB, kC);
+ cc.kxnorq(kA, kB, kC);
+ cc.kxnorw(kA, kB, kC);
+ cc.kxorb(kA, kB, kC);
+ cc.kxord(kA, kB, kC);
+ cc.kxorq(kA, kB, kC);
+ cc.kxorw(kA, kB, kC);
+ cc.nop();
+
+ cc.evex().vaddpd(xmmA, xmmB, xmmC);
+ cc.evex().vaddpd(ymmA, ymmB, ymmC);
+ cc.evex().vaddpd(zmmA, zmmB, zmmC);
+ cc.evex().vaddps(xmmA, xmmB, xmmC);
+ cc.evex().vaddps(ymmA, ymmB, ymmC);
+ cc.evex().vaddps(zmmA, zmmB, zmmC);
+ cc.evex().vaddsd(xmmA, xmmB, xmmC);
+ cc.evex().vaddss(xmmA, xmmB, xmmC);
+ cc.evex().valignd(xmmA, xmmB, xmmC, 0);
+ cc.evex().valignd(ymmA, ymmB, ymmC, 0);
+ cc.evex().valignd(zmmA, zmmB, zmmC, 0);
+ cc.evex().valignq(xmmA, xmmB, xmmC, 0);
+ cc.evex().valignq(ymmA, ymmB, ymmC, 0);
+ cc.evex().valignq(zmmA, zmmB, zmmC, 0);
+ cc.evex().vandnpd(xmmA, xmmB, xmmC);
+ cc.evex().vandnpd(ymmA, ymmB, ymmC);
+ cc.evex().vandnpd(zmmA, zmmB, zmmC);
+ cc.evex().vandnps(xmmA, xmmB, xmmC);
+ cc.evex().vandnps(ymmA, ymmB, ymmC);
+ cc.evex().vandnps(zmmA, zmmB, zmmC);
+ cc.evex().vandpd(xmmA, xmmB, xmmC);
+ cc.evex().vandpd(ymmA, ymmB, ymmC);
+ cc.evex().vandpd(zmmA, zmmB, zmmC);
+ cc.evex().vandps(xmmA, xmmB, xmmC);
+ cc.evex().vandps(ymmA, ymmB, ymmC);
+ cc.evex().vandps(zmmA, zmmB, zmmC);
+ cc.evex().vblendmpd(xmmA, xmmB, xmmC);
+ cc.evex().vblendmpd(ymmA, ymmB, ymmC);
+ cc.evex().vblendmpd(zmmA, zmmB, zmmC);
+ cc.evex().vblendmps(xmmA, xmmB, xmmC);
+ cc.evex().vblendmps(ymmA, ymmB, ymmC);
+ cc.evex().vblendmps(zmmA, zmmB, zmmC);
+ cc.evex().vbroadcastf32x2(ymmA, xmmB);
+ cc.evex().vbroadcastf32x2(zmmA, xmmB);
+ cc.evex().vbroadcasti32x2(xmmA, xmmB);
+ cc.evex().vbroadcasti32x2(ymmA, xmmB);
+ cc.evex().vbroadcasti32x2(zmmA, xmmB);
+ cc.evex().vbroadcastsd(ymmA, xmmB);
+ cc.evex().vbroadcastsd(zmmA, xmmB);
+ cc.evex().vbroadcastss(xmmA, xmmB);
+ cc.evex().vbroadcastss(ymmA, xmmB);
+ cc.evex().vbroadcastss(zmmA, xmmB);
+ cc.evex().vcmppd(kA, xmmB, xmmC, 0);
+ cc.evex().vcmppd(kA, ymmB, ymmC, 0);
+ cc.evex().vcmppd(kA, zmmB, zmmC, 0);
+ cc.evex().vcmpps(kA, xmmB, xmmC, 0);
+ cc.evex().vcmpps(kA, ymmB, ymmC, 0);
+ cc.evex().vcmpps(kA, zmmB, zmmC, 0);
+ cc.evex().vcmpsd(kA, xmmB, xmmC, 0);
+ cc.evex().vcmpss(kA, xmmB, xmmC, 0);
+ cc.evex().vcomisd(xmmA, xmmB);
+ cc.evex().vcomiss(xmmA, xmmB);
+ cc.evex().vcompresspd(xmmA, xmmB);
+ cc.evex().vcompresspd(ymmA, ymmB);
+ cc.evex().vcompresspd(zmmA, zmmB);
+ cc.evex().vcompressps(xmmA, xmmB);
+ cc.evex().vcompressps(ymmA, ymmB);
+ cc.evex().vcompressps(zmmA, zmmB);
+ cc.evex().vcvtdq2pd(xmmA, xmmB);
+ cc.evex().vcvtdq2pd(ymmA, xmmB);
+ cc.evex().vcvtdq2pd(zmmA, ymmB);
+ cc.evex().vcvtdq2ps(xmmA, xmmB);
+ cc.evex().vcvtdq2ps(ymmA, ymmB);
+ cc.evex().vcvtdq2ps(zmmA, zmmB);
+ cc.evex().vcvtpd2dq(xmmA, xmmB);
+ cc.evex().vcvtpd2dq(xmmA, ymmB);
+ cc.evex().vcvtpd2dq(ymmA, zmmB);
+ cc.evex().vcvtpd2qq(xmmA, xmmB);
+ cc.evex().vcvtpd2qq(ymmA, ymmB);
+ cc.evex().vcvtpd2qq(zmmA, zmmB);
+ cc.evex().vcvtpd2udq(xmmA, xmmB);
+ cc.evex().vcvtpd2udq(xmmA, ymmB);
+ cc.evex().vcvtpd2udq(ymmA, zmmB);
+ cc.evex().vcvtpd2uqq(xmmA, xmmB);
+ cc.evex().vcvtpd2uqq(ymmA, ymmB);
+ cc.evex().vcvtpd2uqq(zmmA, zmmB);
+ cc.evex().vcvtph2ps(xmmA, xmmB);
+ cc.evex().vcvtph2ps(ymmA, xmmB);
+ cc.evex().vcvtph2ps(zmmA, ymmB);
+ cc.evex().vcvtps2dq(xmmA, xmmB);
+ cc.evex().vcvtps2dq(ymmA, ymmB);
+ cc.evex().vcvtps2dq(zmmA, zmmB);
+ cc.evex().vcvtps2pd(xmmA, xmmB);
+ cc.evex().vcvtps2pd(ymmA, xmmB);
+ cc.evex().vcvtps2pd(zmmA, ymmB);
+ cc.evex().vcvtps2ph(xmmA, xmmB, 0);
+ cc.evex().vcvtps2ph(xmmA, ymmB, 0);
+ cc.evex().vcvtps2ph(ymmA, zmmB, 0);
+ cc.evex().vcvtps2qq(xmmA, xmmB);
+ cc.evex().vcvtps2qq(ymmA, xmmB);
+ cc.evex().vcvtps2qq(zmmA, ymmB);
+ cc.evex().vcvtps2udq(xmmA, xmmB);
+ cc.evex().vcvtps2udq(ymmA, ymmB);
+ cc.evex().vcvtps2udq(zmmA, zmmB);
+ cc.evex().vcvtps2uqq(xmmA, xmmB);
+ cc.evex().vcvtps2uqq(ymmA, xmmB);
+ cc.evex().vcvtps2uqq(zmmA, ymmB);
+ cc.evex().vcvtqq2pd(xmmA, xmmB);
+ cc.evex().vcvtqq2pd(ymmA, ymmB);
+ cc.evex().vcvtqq2pd(zmmA, zmmB);
+ cc.evex().vcvtqq2ps(xmmA, xmmB);
+ cc.evex().vcvtqq2ps(xmmA, ymmB);
+ cc.evex().vcvtqq2ps(ymmA, zmmB);
+ cc.evex().vcvtsd2si(gpd, xmmB);
+ if (cc.is64Bit()) cc.evex().vcvtsd2si(gpq, xmmB);
+ cc.evex().vcvtsd2ss(xmmA, xmmB, xmmC);
+ cc.evex().vcvtsd2usi(gpd, xmmB);
+ if (cc.is64Bit()) cc.evex().vcvtsd2usi(gpq, xmmB);
+ cc.evex().vcvtsi2sd(xmmA, xmmB, gpd);
+ if (cc.is64Bit()) cc.evex().vcvtsi2sd(xmmA, xmmB, gpq);
+ cc.evex().vcvtsi2ss(xmmA, xmmB, gpd);
+ if (cc.is64Bit()) cc.evex().vcvtsi2ss(xmmA, xmmB, gpq);
+ cc.evex().vcvtss2sd(xmmA, xmmB, xmmC);
+ cc.evex().vcvtss2si(gpd, xmmB);
+ if (cc.is64Bit()) cc.evex().vcvtss2si(gpq, xmmB);
+ cc.evex().vcvtss2usi(gpd, xmmB);
+ if (cc.is64Bit()) cc.evex().vcvtss2usi(gpq, xmmB);
+ cc.evex().vcvttpd2dq(xmmA, xmmB);
+ cc.evex().vcvttpd2dq(xmmA, ymmB);
+ cc.evex().vcvttpd2dq(ymmA, zmmB);
+ cc.evex().vcvttpd2qq(xmmA, xmmB);
+ cc.evex().vcvttpd2qq(ymmA, ymmB);
+ cc.evex().vcvttpd2qq(zmmA, zmmB);
+ cc.evex().vcvttpd2udq(xmmA, xmmB);
+ cc.evex().vcvttpd2udq(xmmA, ymmB);
+ cc.evex().vcvttpd2udq(ymmA, zmmB);
+ cc.evex().vcvttpd2uqq(xmmA, xmmB);
+ cc.evex().vcvttpd2uqq(ymmA, ymmB);
+ cc.evex().vcvttpd2uqq(zmmA, zmmB);
+ cc.evex().vcvttps2dq(xmmA, xmmB);
+ cc.evex().vcvttps2dq(ymmA, ymmB);
+ cc.evex().vcvttps2dq(zmmA, zmmB);
+ cc.evex().vcvttps2qq(xmmA, xmmB);
+ cc.evex().vcvttps2qq(ymmA, xmmB);
+ cc.evex().vcvttps2qq(zmmA, ymmB);
+ cc.evex().vcvttps2udq(xmmA, xmmB);
+ cc.evex().vcvttps2udq(ymmA, ymmB);
+ cc.evex().vcvttps2udq(zmmA, zmmB);
+ cc.evex().vcvttps2uqq(xmmA, xmmB);
+ cc.evex().vcvttps2uqq(ymmA, xmmB);
+ cc.evex().vcvttps2uqq(zmmA, ymmB);
+ cc.evex().vcvttsd2si(gpd, xmmB);
+ if (cc.is64Bit()) cc.evex().vcvttsd2si(gpq, xmmB);
+ cc.evex().vcvttsd2usi(gpd, xmmB);
+ if (cc.is64Bit()) cc.evex().vcvttsd2usi(gpq, xmmB);
+ cc.evex().vcvttss2si(gpd, xmmB);
+ if (cc.is64Bit()) cc.evex().vcvttss2si(gpq, xmmB);
+ cc.evex().vcvttss2usi(gpd, xmmB);
+ if (cc.is64Bit()) cc.evex().vcvttss2usi(gpq, xmmB);
+ cc.evex().vcvtudq2pd(xmmA, xmmB);
+ cc.evex().vcvtudq2pd(ymmA, xmmB);
+ cc.evex().vcvtudq2pd(zmmA, ymmB);
+ cc.evex().vcvtudq2ps(xmmA, xmmB);
+ cc.evex().vcvtudq2ps(ymmA, ymmB);
+ cc.evex().vcvtudq2ps(zmmA, zmmB);
+ cc.evex().vcvtuqq2pd(xmmA, xmmB);
+ cc.evex().vcvtuqq2pd(ymmA, ymmB);
+ cc.evex().vcvtuqq2pd(zmmA, zmmB);
+ cc.evex().vcvtuqq2ps(xmmA, xmmB);
+ cc.evex().vcvtuqq2ps(xmmA, ymmB);
+ cc.evex().vcvtuqq2ps(ymmA, zmmB);
+ cc.evex().vcvtusi2sd(xmmA, xmmB, gpd);
+ if (cc.is64Bit()) cc.evex().vcvtusi2sd(xmmA, xmmB, gpq);
+ cc.evex().vcvtusi2ss(xmmA, xmmB, gpd);
+ if (cc.is64Bit()) cc.evex().vcvtusi2ss(xmmA, xmmB, gpq);
+ cc.evex().vdbpsadbw(xmmA, xmmB, xmmC, 0);
+ cc.evex().vdbpsadbw(ymmA, ymmB, ymmC, 0);
+ cc.evex().vdbpsadbw(zmmA, zmmB, zmmC, 0);
+ cc.evex().vdivpd(xmmA, xmmB, xmmC);
+ cc.evex().vdivpd(ymmA, ymmB, ymmC);
+ cc.evex().vdivpd(zmmA, zmmB, zmmC);
+ cc.evex().vdivps(xmmA, xmmB, xmmC);
+ cc.evex().vdivps(ymmA, ymmB, ymmC);
+ cc.evex().vdivps(zmmA, zmmB, zmmC);
+ cc.evex().vdivsd(xmmA, xmmB, xmmC);
+ cc.evex().vdivss(xmmA, xmmB, xmmC);
+ cc.evex().vexp2pd(zmmA, zmmB);
+ cc.evex().vexp2ps(zmmA, zmmB);
+ cc.evex().vexpandpd(xmmA, xmmB);
+ cc.evex().vexpandpd(ymmA, ymmB);
+ cc.evex().vexpandpd(zmmA, zmmB);
+ cc.evex().vexpandps(xmmA, xmmB);
+ cc.evex().vexpandps(ymmA, ymmB);
+ cc.evex().vexpandps(zmmA, zmmB);
+ cc.evex().vextractf32x4(xmmA, ymmB, 0);
+ cc.evex().vextractf32x4(xmmA, zmmB, 0);
+ cc.evex().vextractf32x8(ymmA, zmmB, 0);
+ cc.evex().vextractf64x2(xmmA, ymmB, 0);
+ cc.evex().vextractf64x2(xmmA, zmmB, 0);
+ cc.evex().vextractf64x4(ymmA, zmmB, 0);
+ cc.evex().vextracti32x4(xmmA, ymmB, 0);
+ cc.evex().vextracti32x4(xmmA, zmmB, 0);
+ cc.evex().vextracti32x8(ymmA, zmmB, 0);
+ cc.evex().vextracti64x2(xmmA, ymmB, 0);
+ cc.evex().vextracti64x2(xmmA, zmmB, 0);
+ cc.evex().vextracti64x4(ymmA, zmmB, 0);
+ cc.evex().vextractps(gpd, xmmB, 0);
+ cc.evex().vfixupimmpd(xmmA, xmmB, xmmC, 0);
+ cc.evex().vfixupimmpd(ymmA, ymmB, ymmC, 0);
+ cc.evex().vfixupimmpd(zmmA, zmmB, zmmC, 0);
+ cc.evex().vfixupimmps(xmmA, xmmB, xmmC, 0);
+ cc.evex().vfixupimmps(ymmA, ymmB, ymmC, 0);
+ cc.evex().vfixupimmps(zmmA, zmmB, zmmC, 0);
+ cc.evex().vfixupimmsd(xmmA, xmmB, xmmC, 0);
+ cc.evex().vfixupimmss(xmmA, xmmB, xmmC, 0);
+ cc.evex().vfmadd132pd(xmmA, xmmB, xmmC);
+ cc.evex().vfmadd132pd(ymmA, ymmB, ymmC);
+ cc.evex().vfmadd132pd(zmmA, zmmB, zmmC);
+ cc.evex().vfmadd132ps(xmmA, xmmB, xmmC);
+ cc.evex().vfmadd132ps(ymmA, ymmB, ymmC);
+ cc.evex().vfmadd132ps(zmmA, zmmB, zmmC);
+ cc.evex().vfmadd132sd(xmmA, xmmB, xmmC);
+ cc.evex().vfmadd132ss(xmmA, xmmB, xmmC);
+ cc.evex().vfmadd213pd(xmmA, xmmB, xmmC);
+ cc.evex().vfmadd213pd(ymmA, ymmB, ymmC);
+ cc.evex().vfmadd213pd(zmmA, zmmB, zmmC);
+ cc.evex().vfmadd213ps(xmmA, xmmB, xmmC);
+ cc.evex().vfmadd213ps(ymmA, ymmB, ymmC);
+ cc.evex().vfmadd213ps(zmmA, zmmB, zmmC);
+ cc.evex().vfmadd213sd(xmmA, xmmB, xmmC);
+ cc.evex().vfmadd213ss(xmmA, xmmB, xmmC);
+ cc.evex().vfmadd231pd(xmmA, xmmB, xmmC);
+ cc.evex().vfmadd231pd(ymmA, ymmB, ymmC);
+ cc.evex().vfmadd231pd(zmmA, zmmB, zmmC);
+ cc.evex().vfmadd231ps(xmmA, xmmB, xmmC);
+ cc.evex().vfmadd231ps(ymmA, ymmB, ymmC);
+ cc.evex().vfmadd231ps(zmmA, zmmB, zmmC);
+ cc.evex().vfmadd231sd(xmmA, xmmB, xmmC);
+ cc.evex().vfmadd231ss(xmmA, xmmB, xmmC);
+ cc.evex().vfmaddsub132pd(xmmA, xmmB, xmmC);
+ cc.evex().vfmaddsub132pd(ymmA, ymmB, ymmC);
+ cc.evex().vfmaddsub132pd(zmmA, zmmB, zmmC);
+ cc.evex().vfmaddsub132ps(xmmA, xmmB, xmmC);
+ cc.evex().vfmaddsub132ps(ymmA, ymmB, ymmC);
+ cc.evex().vfmaddsub132ps(zmmA, zmmB, zmmC);
+ cc.evex().vfmaddsub213pd(xmmA, xmmB, xmmC);
+ cc.evex().vfmaddsub213pd(ymmA, ymmB, ymmC);
+ cc.evex().vfmaddsub213pd(zmmA, zmmB, zmmC);
+ cc.evex().vfmaddsub213ps(xmmA, xmmB, xmmC);
+ cc.evex().vfmaddsub213ps(ymmA, ymmB, ymmC);
+ cc.evex().vfmaddsub213ps(zmmA, zmmB, zmmC);
+ cc.evex().vfmaddsub231pd(xmmA, xmmB, xmmC);
+ cc.evex().vfmaddsub231pd(ymmA, ymmB, ymmC);
+ cc.evex().vfmaddsub231pd(zmmA, zmmB, zmmC);
+ cc.evex().vfmaddsub231ps(xmmA, xmmB, xmmC);
+ cc.evex().vfmaddsub231ps(ymmA, ymmB, ymmC);
+ cc.evex().vfmaddsub231ps(zmmA, zmmB, zmmC);
+ cc.evex().vfmsub132pd(xmmA, xmmB, xmmC);
+ cc.evex().vfmsub132pd(ymmA, ymmB, ymmC);
+ cc.evex().vfmsub132pd(zmmA, zmmB, zmmC);
+ cc.evex().vfmsub132ps(xmmA, xmmB, xmmC);
+ cc.evex().vfmsub132ps(ymmA, ymmB, ymmC);
+ cc.evex().vfmsub132ps(zmmA, zmmB, zmmC);
+ cc.evex().vfmsub132sd(xmmA, xmmB, xmmC);
+ cc.evex().vfmsub132ss(xmmA, xmmB, xmmC);
+ cc.evex().vfmsub213pd(xmmA, xmmB, xmmC);
+ cc.evex().vfmsub213pd(ymmA, ymmB, ymmC);
+ cc.evex().vfmsub213pd(zmmA, zmmB, zmmC);
+ cc.evex().vfmsub213ps(xmmA, xmmB, xmmC);
+ cc.evex().vfmsub213ps(ymmA, ymmB, ymmC);
+ cc.evex().vfmsub213ps(zmmA, zmmB, zmmC);
+ cc.evex().vfmsub213sd(xmmA, xmmB, xmmC);
+ cc.evex().vfmsub213ss(xmmA, xmmB, xmmC);
+ cc.evex().vfmsub231pd(xmmA, xmmB, xmmC);
+ cc.evex().vfmsub231pd(ymmA, ymmB, ymmC);
+ cc.evex().vfmsub231pd(zmmA, zmmB, zmmC);
+ cc.evex().vfmsub231ps(xmmA, xmmB, xmmC);
+ cc.evex().vfmsub231ps(ymmA, ymmB, ymmC);
+ cc.evex().vfmsub231ps(zmmA, zmmB, zmmC);
+ cc.evex().vfmsub231sd(xmmA, xmmB, xmmC);
+ cc.evex().vfmsub231ss(xmmA, xmmB, xmmC);
+ cc.evex().vfmsubadd132pd(xmmA, xmmB, xmmC);
+ cc.evex().vfmsubadd132pd(ymmA, ymmB, ymmC);
+ cc.evex().vfmsubadd132pd(zmmA, zmmB, zmmC);
+ cc.evex().vfmsubadd132ps(xmmA, xmmB, xmmC);
+ cc.evex().vfmsubadd132ps(ymmA, ymmB, ymmC);
+ cc.evex().vfmsubadd132ps(zmmA, zmmB, zmmC);
+ cc.evex().vfmsubadd213pd(xmmA, xmmB, xmmC);
+ cc.evex().vfmsubadd213pd(ymmA, ymmB, ymmC);
+ cc.evex().vfmsubadd213pd(zmmA, zmmB, zmmC);
+ cc.evex().vfmsubadd213ps(xmmA, xmmB, xmmC);
+ cc.evex().vfmsubadd213ps(ymmA, ymmB, ymmC);
+ cc.evex().vfmsubadd213ps(zmmA, zmmB, zmmC);
+ cc.evex().vfmsubadd231pd(xmmA, xmmB, xmmC);
+ cc.evex().vfmsubadd231pd(ymmA, ymmB, ymmC);
+ cc.evex().vfmsubadd231pd(zmmA, zmmB, zmmC);
+ cc.evex().vfmsubadd231ps(xmmA, xmmB, xmmC);
+ cc.evex().vfmsubadd231ps(ymmA, ymmB, ymmC);
+ cc.evex().vfmsubadd231ps(zmmA, zmmB, zmmC);
+ cc.evex().vfnmadd132pd(xmmA, xmmB, xmmC);
+ cc.evex().vfnmadd132pd(ymmA, ymmB, ymmC);
+ cc.evex().vfnmadd132pd(zmmA, zmmB, zmmC);
+ cc.evex().vfnmadd132ps(xmmA, xmmB, xmmC);
+ cc.evex().vfnmadd132ps(ymmA, ymmB, ymmC);
+ cc.evex().vfnmadd132ps(zmmA, zmmB, zmmC);
+ cc.evex().vfnmadd132sd(xmmA, xmmB, xmmC);
+ cc.evex().vfnmadd132ss(xmmA, xmmB, xmmC);
+ cc.evex().vfnmadd213pd(xmmA, xmmB, xmmC);
+ cc.evex().vfnmadd213pd(ymmA, ymmB, ymmC);
+ cc.evex().vfnmadd213pd(zmmA, zmmB, zmmC);
+ cc.evex().vfnmadd213ps(xmmA, xmmB, xmmC);
+ cc.evex().vfnmadd213ps(ymmA, ymmB, ymmC);
+ cc.evex().vfnmadd213ps(zmmA, zmmB, zmmC);
+ cc.evex().vfnmadd213sd(xmmA, xmmB, xmmC);
+ cc.evex().vfnmadd213ss(xmmA, xmmB, xmmC);
+ cc.evex().vfnmadd231pd(xmmA, xmmB, xmmC);
+ cc.evex().vfnmadd231pd(ymmA, ymmB, ymmC);
+ cc.evex().vfnmadd231pd(zmmA, zmmB, zmmC);
+ cc.evex().vfnmadd231ps(xmmA, xmmB, xmmC);
+ cc.evex().vfnmadd231ps(ymmA, ymmB, ymmC);
+ cc.evex().vfnmadd231ps(zmmA, zmmB, zmmC);
+ cc.evex().vfnmadd231sd(xmmA, xmmB, xmmC);
+ cc.evex().vfnmadd231ss(xmmA, xmmB, xmmC);
+ cc.evex().vfnmsub132pd(xmmA, xmmB, xmmC);
+ cc.evex().vfnmsub132pd(ymmA, ymmB, ymmC);
+ cc.evex().vfnmsub132pd(zmmA, zmmB, zmmC);
+ cc.evex().vfnmsub132ps(xmmA, xmmB, xmmC);
+ cc.evex().vfnmsub132ps(ymmA, ymmB, ymmC);
+ cc.evex().vfnmsub132ps(zmmA, zmmB, zmmC);
+ cc.evex().vfnmsub132sd(xmmA, xmmB, xmmC);
+ cc.evex().vfnmsub132ss(xmmA, xmmB, xmmC);
+ cc.evex().vfnmsub213pd(xmmA, xmmB, xmmC);
+ cc.evex().vfnmsub213pd(ymmA, ymmB, ymmC);
+ cc.evex().vfnmsub213pd(zmmA, zmmB, zmmC);
+ cc.evex().vfnmsub213ps(xmmA, xmmB, xmmC);
+ cc.evex().vfnmsub213ps(ymmA, ymmB, ymmC);
+ cc.evex().vfnmsub213ps(zmmA, zmmB, zmmC);
+ cc.evex().vfnmsub213sd(xmmA, xmmB, xmmC);
+ cc.evex().vfnmsub213ss(xmmA, xmmB, xmmC);
+ cc.evex().vfnmsub231pd(xmmA, xmmB, xmmC);
+ cc.evex().vfnmsub231pd(ymmA, ymmB, ymmC);
+ cc.evex().vfnmsub231pd(zmmA, zmmB, zmmC);
+ cc.evex().vfnmsub231ps(xmmA, xmmB, xmmC);
+ cc.evex().vfnmsub231ps(ymmA, ymmB, ymmC);
+ cc.evex().vfnmsub231ps(zmmA, zmmB, zmmC);
+ cc.evex().vfnmsub231sd(xmmA, xmmB, xmmC);
+ cc.evex().vfnmsub231ss(xmmA, xmmB, xmmC);
+ cc.evex().vfpclasspd(kA, xmmB, 0);
+ cc.evex().vfpclasspd(kA, ymmB, 0);
+ cc.evex().vfpclasspd(kA, zmmB, 0);
+ cc.evex().vfpclassps(kA, xmmB, 0);
+ cc.evex().vfpclassps(kA, ymmB, 0);
+ cc.evex().vfpclassps(kA, zmmB, 0);
+ cc.evex().vfpclasssd(kA, xmmB, 0);
+ cc.evex().vfpclassss(kA, xmmB, 0);
+ cc.evex().vgetexppd(xmmA, xmmB);
+ cc.evex().vgetexppd(ymmA, ymmB);
+ cc.evex().vgetexppd(zmmA, zmmB);
+ cc.evex().vgetexpps(xmmA, xmmB);
+ cc.evex().vgetexpps(ymmA, ymmB);
+ cc.evex().vgetexpps(zmmA, zmmB);
+ cc.evex().vgetexpsd(xmmA, xmmB, xmmC);
+ cc.evex().vgetexpss(xmmA, xmmB, xmmC);
+ cc.evex().vgetmantpd(xmmA, xmmB, 0);
+ cc.evex().vgetmantpd(ymmA, ymmB, 0);
+ cc.evex().vgetmantpd(zmmA, zmmB, 0);
+ cc.evex().vgetmantps(xmmA, xmmB, 0);
+ cc.evex().vgetmantps(ymmA, ymmB, 0);
+ cc.evex().vgetmantps(zmmA, zmmB, 0);
+ cc.evex().vgetmantsd(xmmA, xmmB, xmmC, 0);
+ cc.evex().vgetmantss(xmmA, xmmB, xmmC, 0);
+ cc.evex().vinsertf32x4(ymmA, ymmB, xmmC, 0);
+ cc.evex().vinsertf32x4(zmmA, zmmB, xmmC, 0);
+ cc.evex().vinsertf32x8(zmmA, zmmB, ymmC, 0);
+ cc.evex().vinsertf64x2(ymmA, ymmB, xmmC, 0);
+ cc.evex().vinsertf64x2(zmmA, zmmB, xmmC, 0);
+ cc.evex().vinsertf64x4(zmmA, zmmB, ymmC, 0);
+ cc.evex().vinserti32x4(ymmA, ymmB, xmmC, 0);
+ cc.evex().vinserti32x4(zmmA, zmmB, xmmC, 0);
+ cc.evex().vinserti32x8(zmmA, zmmB, ymmC, 0);
+ cc.evex().vinserti64x2(ymmA, ymmB, xmmC, 0);
+ cc.evex().vinserti64x2(zmmA, zmmB, xmmC, 0);
+ cc.evex().vinserti64x4(zmmA, zmmB, ymmC, 0);
+ cc.evex().vinsertps(xmmA, xmmB, xmmC, 0);
+ cc.evex().vmaxpd(xmmA, xmmB, xmmC);
+ cc.evex().vmaxpd(ymmA, ymmB, ymmC);
+ cc.evex().vmaxpd(zmmA, zmmB, zmmC);
+ cc.evex().vmaxps(xmmA, xmmB, xmmC);
+ cc.evex().vmaxps(ymmA, ymmB, ymmC);
+ cc.evex().vmaxps(zmmA, zmmB, zmmC);
+ cc.evex().vmaxsd(xmmA, xmmB, xmmC);
+ cc.evex().vmaxss(xmmA, xmmB, xmmC);
+ cc.evex().vminpd(xmmA, xmmB, xmmC);
+ cc.evex().vminpd(ymmA, ymmB, ymmC);
+ cc.evex().vminpd(zmmA, zmmB, zmmC);
+ cc.evex().vminps(xmmA, xmmB, xmmC);
+ cc.evex().vminps(ymmA, ymmB, ymmC);
+ cc.evex().vminps(zmmA, zmmB, zmmC);
+ cc.evex().vminsd(xmmA, xmmB, xmmC);
+ cc.evex().vminss(xmmA, xmmB, xmmC);
+ cc.evex().vmovapd(xmmA, xmmB);
+ cc.evex().vmovapd(xmmA, xmmB);
+ cc.evex().vmovapd(ymmA, ymmB);
+ cc.evex().vmovapd(ymmA, ymmB);
+ cc.evex().vmovapd(zmmA, zmmB);
+ cc.evex().vmovapd(zmmA, zmmB);
+ cc.evex().vmovaps(xmmA, xmmB);
+ cc.evex().vmovaps(xmmA, xmmB);
+ cc.evex().vmovaps(ymmA, ymmB);
+ cc.evex().vmovaps(ymmA, ymmB);
+ cc.evex().vmovaps(zmmA, zmmB);
+ cc.evex().vmovaps(zmmA, zmmB);
+ cc.evex().vmovd(gpd, xmmB);
+ cc.evex().vmovd(xmmA, gpd);
+ cc.evex().vmovddup(xmmA, xmmB);
+ cc.evex().vmovddup(ymmA, ymmB);
+ cc.evex().vmovddup(zmmA, zmmB);
+ cc.evex().vmovdqa32(xmmA, xmmB);
+ cc.evex().vmovdqa32(xmmA, xmmB);
+ cc.evex().vmovdqa32(ymmA, ymmB);
+ cc.evex().vmovdqa32(ymmA, ymmB);
+ cc.evex().vmovdqa32(zmmA, zmmB);
+ cc.evex().vmovdqa32(zmmA, zmmB);
+ cc.evex().vmovdqa64(xmmA, xmmB);
+ cc.evex().vmovdqa64(xmmA, xmmB);
+ cc.evex().vmovdqa64(ymmA, ymmB);
+ cc.evex().vmovdqa64(ymmA, ymmB);
+ cc.evex().vmovdqa64(zmmA, zmmB);
+ cc.evex().vmovdqa64(zmmA, zmmB);
+ cc.evex().vmovdqu16(xmmA, xmmB);
+ cc.evex().vmovdqu16(xmmA, xmmB);
+ cc.evex().vmovdqu16(ymmA, ymmB);
+ cc.evex().vmovdqu16(ymmA, ymmB);
+ cc.evex().vmovdqu16(zmmA, zmmB);
+ cc.evex().vmovdqu16(zmmA, zmmB);
+ cc.evex().vmovdqu32(xmmA, xmmB);
+ cc.evex().vmovdqu32(xmmA, xmmB);
+ cc.evex().vmovdqu32(ymmA, ymmB);
+ cc.evex().vmovdqu32(ymmA, ymmB);
+ cc.evex().vmovdqu32(zmmA, zmmB);
+ cc.evex().vmovdqu32(zmmA, zmmB);
+ cc.evex().vmovdqu64(xmmA, xmmB);
+ cc.evex().vmovdqu64(xmmA, xmmB);
+ cc.evex().vmovdqu64(ymmA, ymmB);
+ cc.evex().vmovdqu64(ymmA, ymmB);
+ cc.evex().vmovdqu64(zmmA, zmmB);
+ cc.evex().vmovdqu64(zmmA, zmmB);
+ cc.evex().vmovdqu8(xmmA, xmmB);
+ cc.evex().vmovdqu8(xmmA, xmmB);
+ cc.evex().vmovdqu8(ymmA, ymmB);
+ cc.evex().vmovdqu8(ymmA, ymmB);
+ cc.evex().vmovdqu8(zmmA, zmmB);
+ cc.evex().vmovdqu8(zmmA, zmmB);
+ cc.evex().vmovhlps(xmmA, xmmB, xmmC);
+ if (cc.is64Bit()) cc.evex().vmovq(gpq, xmmB);
+ if (cc.is64Bit()) cc.evex().vmovq(xmmA, gpq);
+ cc.evex().vmovq(xmmA, xmmB);
+ cc.evex().vmovsd(xmmA, xmmB, xmmC);
+ cc.evex().vmovshdup(xmmA, xmmB);
+ cc.evex().vmovshdup(ymmA, ymmB);
+ cc.evex().vmovshdup(zmmA, zmmB);
+ cc.evex().vmovsldup(xmmA, xmmB);
+ cc.evex().vmovsldup(ymmA, ymmB);
+ cc.evex().vmovsldup(zmmA, zmmB);
+ cc.evex().vmovss(xmmA, xmmB, xmmC);
+ cc.evex().vmovupd(xmmA, xmmB);
+ cc.evex().vmovupd(xmmA, xmmB);
+ cc.evex().vmovupd(ymmA, ymmB);
+ cc.evex().vmovupd(ymmA, ymmB);
+ cc.evex().vmovupd(zmmA, zmmB);
+ cc.evex().vmovupd(zmmA, zmmB);
+ cc.evex().vmovups(xmmA, xmmB);
+ cc.evex().vmovups(xmmA, xmmB);
+ cc.evex().vmovups(ymmA, ymmB);
+ cc.evex().vmovups(ymmA, ymmB);
+ cc.evex().vmovups(zmmA, zmmB);
+ cc.evex().vmovups(zmmA, zmmB);
+ cc.evex().vmulpd(xmmA, xmmB, xmmC);
+ cc.evex().vmulpd(ymmA, ymmB, ymmC);
+ cc.evex().vmulpd(zmmA, zmmB, zmmC);
+ cc.evex().vmulps(xmmA, xmmB, xmmC);
+ cc.evex().vmulps(ymmA, ymmB, ymmC);
+ cc.evex().vmulps(zmmA, zmmB, zmmC);
+ cc.evex().vmulsd(xmmA, xmmB, xmmC);
+ cc.evex().vmulss(xmmA, xmmB, xmmC);
+ cc.evex().vorpd(xmmA, xmmB, xmmC);
+ cc.evex().vorpd(ymmA, ymmB, ymmC);
+ cc.evex().vorpd(zmmA, zmmB, zmmC);
+ cc.evex().vorps(xmmA, xmmB, xmmC);
+ cc.evex().vorps(ymmA, ymmB, ymmC);
+ cc.evex().vorps(zmmA, zmmB, zmmC);
+ cc.evex().vpabsb(xmmA, xmmB);
+ cc.evex().vpabsb(ymmA, ymmB);
+ cc.evex().vpabsb(zmmA, zmmB);
+ cc.evex().vpabsd(xmmA, xmmB);
+ cc.evex().vpabsd(ymmA, ymmB);
+ cc.evex().vpabsd(zmmA, zmmB);
+ cc.evex().vpabsq(xmmA, xmmB);
+ cc.evex().vpabsq(ymmA, ymmB);
+ cc.evex().vpabsq(zmmA, zmmB);
+ cc.evex().vpabsw(xmmA, xmmB);
+ cc.evex().vpabsw(ymmA, ymmB);
+ cc.evex().vpabsw(zmmA, zmmB);
+ cc.evex().vpackssdw(xmmA, xmmB, xmmC);
+ cc.evex().vpackssdw(ymmA, ymmB, ymmC);
+ cc.evex().vpackssdw(zmmA, zmmB, zmmC);
+ cc.evex().vpacksswb(xmmA, xmmB, xmmC);
+ cc.evex().vpacksswb(ymmA, ymmB, ymmC);
+ cc.evex().vpacksswb(zmmA, zmmB, zmmC);
+ cc.evex().vpackusdw(xmmA, xmmB, xmmC);
+ cc.evex().vpackusdw(ymmA, ymmB, ymmC);
+ cc.evex().vpackusdw(zmmA, zmmB, zmmC);
+ cc.evex().vpackuswb(xmmA, xmmB, xmmC);
+ cc.evex().vpackuswb(ymmA, ymmB, ymmC);
+ cc.evex().vpackuswb(zmmA, zmmB, zmmC);
+ cc.evex().vpaddb(xmmA, xmmB, xmmC);
+ cc.evex().vpaddb(ymmA, ymmB, ymmC);
+ cc.evex().vpaddb(zmmA, zmmB, zmmC);
+ cc.evex().vpaddd(xmmA, xmmB, xmmC);
+ cc.evex().vpaddd(ymmA, ymmB, ymmC);
+ cc.evex().vpaddd(zmmA, zmmB, zmmC);
+ cc.evex().vpaddq(xmmA, xmmB, xmmC);
+ cc.evex().vpaddq(ymmA, ymmB, ymmC);
+ cc.evex().vpaddq(zmmA, zmmB, zmmC);
+ cc.evex().vpaddsb(xmmA, xmmB, xmmC);
+ cc.evex().vpaddsb(ymmA, ymmB, ymmC);
+ cc.evex().vpaddsb(zmmA, zmmB, zmmC);
+ cc.evex().vpaddsw(xmmA, xmmB, xmmC);
+ cc.evex().vpaddsw(ymmA, ymmB, ymmC);
+ cc.evex().vpaddsw(zmmA, zmmB, zmmC);
+ cc.evex().vpaddusb(xmmA, xmmB, xmmC);
+ cc.evex().vpaddusb(ymmA, ymmB, ymmC);
+ cc.evex().vpaddusb(zmmA, zmmB, zmmC);
+ cc.evex().vpaddusw(xmmA, xmmB, xmmC);
+ cc.evex().vpaddusw(ymmA, ymmB, ymmC);
+ cc.evex().vpaddusw(zmmA, zmmB, zmmC);
+ cc.evex().vpaddw(xmmA, xmmB, xmmC);
+ cc.evex().vpaddw(ymmA, ymmB, ymmC);
+ cc.evex().vpaddw(zmmA, zmmB, zmmC);
+ cc.evex().vpalignr(xmmA, xmmB, xmmC, 0);
+ cc.evex().vpalignr(ymmA, ymmB, ymmC, 0);
+ cc.evex().vpalignr(zmmA, zmmB, zmmC, 0);
+ cc.evex().vpandd(xmmA, xmmB, xmmC);
+ cc.evex().vpandd(ymmA, ymmB, ymmC);
+ cc.evex().vpandd(zmmA, zmmB, zmmC);
+ cc.evex().vpandnd(xmmA, xmmB, xmmC);
+ cc.evex().vpandnd(ymmA, ymmB, ymmC);
+ cc.evex().vpandnd(zmmA, zmmB, zmmC);
+ cc.evex().vpandnq(xmmA, xmmB, xmmC);
+ cc.evex().vpandnq(ymmA, ymmB, ymmC);
+ cc.evex().vpandnq(zmmA, zmmB, zmmC);
+ cc.evex().vpandq(xmmA, xmmB, xmmC);
+ cc.evex().vpandq(ymmA, ymmB, ymmC);
+ cc.evex().vpandq(zmmA, zmmB, zmmC);
+ cc.evex().vpavgb(xmmA, xmmB, xmmC);
+ cc.evex().vpavgb(ymmA, ymmB, ymmC);
+ cc.evex().vpavgb(zmmA, zmmB, zmmC);
+ cc.evex().vpavgw(xmmA, xmmB, xmmC);
+ cc.evex().vpavgw(ymmA, ymmB, ymmC);
+ cc.evex().vpavgw(zmmA, zmmB, zmmC);
+ cc.evex().vpblendmb(xmmA, xmmB, xmmC);
+ cc.evex().vpblendmb(ymmA, ymmB, ymmC);
+ cc.evex().vpblendmb(zmmA, zmmB, zmmC);
+ cc.evex().vpblendmd(xmmA, xmmB, xmmC);
+ cc.evex().vpblendmd(ymmA, ymmB, ymmC);
+ cc.evex().vpblendmd(zmmA, zmmB, zmmC);
+ cc.evex().vpblendmq(xmmA, xmmB, xmmC);
+ cc.evex().vpblendmq(ymmA, ymmB, ymmC);
+ cc.evex().vpblendmq(zmmA, zmmB, zmmC);
+ cc.evex().vpblendmw(xmmA, xmmB, xmmC);
+ cc.evex().vpblendmw(ymmA, ymmB, ymmC);
+ cc.evex().vpblendmw(zmmA, zmmB, zmmC);
+ cc.evex().vpbroadcastb(xmmA, gpd);
+ cc.evex().vpbroadcastb(xmmA, xmmB);
+ cc.evex().vpbroadcastb(ymmA, gpd);
+ cc.evex().vpbroadcastb(ymmA, xmmB);
+ cc.evex().vpbroadcastb(zmmA, gpd);
+ cc.evex().vpbroadcastb(zmmA, xmmB);
+ cc.evex().vpbroadcastd(xmmA, gpd);
+ cc.evex().vpbroadcastd(xmmA, xmmB);
+ cc.evex().vpbroadcastd(ymmA, gpd);
+ cc.evex().vpbroadcastd(ymmA, xmmB);
+ cc.evex().vpbroadcastd(zmmA, gpd);
+ cc.evex().vpbroadcastd(zmmA, xmmB);
+ cc.evex().vpbroadcastmb2q(xmmA, kB);
+ cc.evex().vpbroadcastmb2q(ymmA, kB);
+ cc.evex().vpbroadcastmb2q(zmmA, kB);
+ cc.evex().vpbroadcastmw2d(xmmA, kB);
+ cc.evex().vpbroadcastmw2d(ymmA, kB);
+ cc.evex().vpbroadcastmw2d(zmmA, kB);
+ if (cc.is64Bit()) cc.evex().vpbroadcastq(xmmA, gpq);
+ cc.evex().vpbroadcastq(xmmA, xmmB);
+ if (cc.is64Bit()) cc.evex().vpbroadcastq(ymmA, gpq);
+ cc.evex().vpbroadcastq(ymmA, xmmB);
+ if (cc.is64Bit()) cc.evex().vpbroadcastq(zmmA, gpq);
+ cc.evex().vpbroadcastq(zmmA, xmmB);
+ cc.evex().vpbroadcastw(xmmA, gpd);
+ cc.evex().vpbroadcastw(xmmA, xmmB);
+ cc.evex().vpbroadcastw(ymmA, gpd);
+ cc.evex().vpbroadcastw(ymmA, xmmB);
+ cc.evex().vpbroadcastw(zmmA, gpd);
+ cc.evex().vpbroadcastw(zmmA, xmmB);
+ cc.evex().vpcmpb(kA, xmmB, xmmC, 0);
+ cc.evex().vpcmpb(kA, ymmB, ymmC, 0);
+ cc.evex().vpcmpb(kA, zmmB, zmmC, 0);
+ cc.evex().vpcmpd(kA, xmmB, xmmC, 0);
+ cc.evex().vpcmpd(kA, ymmB, ymmC, 0);
+ cc.evex().vpcmpd(kA, zmmB, zmmC, 0);
+ cc.evex().vpcmpeqb(kA, xmmB, xmmC);
+ cc.evex().vpcmpeqb(kA, ymmB, ymmC);
+ cc.evex().vpcmpeqb(kA, zmmB, zmmC);
+ cc.evex().vpcmpeqd(kA, xmmB, xmmC);
+ cc.evex().vpcmpeqd(kA, ymmB, ymmC);
+ cc.evex().vpcmpeqd(kA, zmmB, zmmC);
+ cc.evex().vpcmpeqq(kA, xmmB, xmmC);
+ cc.evex().vpcmpeqq(kA, ymmB, ymmC);
+ cc.evex().vpcmpeqq(kA, zmmB, zmmC);
+ cc.evex().vpcmpeqw(kA, xmmB, xmmC);
+ cc.evex().vpcmpeqw(kA, ymmB, ymmC);
+ cc.evex().vpcmpeqw(kA, zmmB, zmmC);
+ cc.evex().vpcmpgtb(kA, xmmB, xmmC);
+ cc.evex().vpcmpgtb(kA, ymmB, ymmC);
+ cc.evex().vpcmpgtb(kA, zmmB, zmmC);
+ cc.evex().vpcmpgtd(kA, xmmB, xmmC);
+ cc.evex().vpcmpgtd(kA, ymmB, ymmC);
+ cc.evex().vpcmpgtd(kA, zmmB, zmmC);
+ cc.evex().vpcmpgtq(kA, xmmB, xmmC);
+ cc.evex().vpcmpgtq(kA, ymmB, ymmC);
+ cc.evex().vpcmpgtq(kA, zmmB, zmmC);
+ cc.evex().vpcmpgtw(kA, xmmB, xmmC);
+ cc.evex().vpcmpgtw(kA, ymmB, ymmC);
+ cc.evex().vpcmpgtw(kA, zmmB, zmmC);
+ cc.evex().vpcmpq(kA, xmmB, xmmC, 0);
+ cc.evex().vpcmpq(kA, ymmB, ymmC, 0);
+ cc.evex().vpcmpq(kA, zmmB, zmmC, 0);
+ cc.evex().vpcmpub(kA, xmmB, xmmC, 0);
+ cc.evex().vpcmpub(kA, ymmB, ymmC, 0);
+ cc.evex().vpcmpub(kA, zmmB, zmmC, 0);
+ cc.evex().vpcmpud(kA, xmmB, xmmC, 0);
+ cc.evex().vpcmpud(kA, ymmB, ymmC, 0);
+ cc.evex().vpcmpud(kA, zmmB, zmmC, 0);
+ cc.evex().vpcmpuq(kA, xmmB, xmmC, 0);
+ cc.evex().vpcmpuq(kA, ymmB, ymmC, 0);
+ cc.evex().vpcmpuq(kA, zmmB, zmmC, 0);
+ cc.evex().vpcmpuw(kA, xmmB, xmmC, 0);
+ cc.evex().vpcmpuw(kA, ymmB, ymmC, 0);
+ cc.evex().vpcmpuw(kA, zmmB, zmmC, 0);
+ cc.evex().vpcmpw(kA, xmmB, xmmC, 0);
+ cc.evex().vpcmpw(kA, ymmB, ymmC, 0);
+ cc.evex().vpcmpw(kA, zmmB, zmmC, 0);
+ cc.evex().vpcompressd(xmmA, xmmB);
+ cc.evex().vpcompressd(ymmA, ymmB);
+ cc.evex().vpcompressd(zmmA, zmmB);
+ cc.evex().vpcompressq(xmmA, xmmB);
+ cc.evex().vpcompressq(ymmA, ymmB);
+ cc.evex().vpcompressq(zmmA, zmmB);
+ cc.evex().vpconflictd(xmmA, xmmB);
+ cc.evex().vpconflictd(ymmA, ymmB);
+ cc.evex().vpconflictd(zmmA, zmmB);
+ cc.evex().vpconflictq(xmmA, xmmB);
+ cc.evex().vpconflictq(ymmA, ymmB);
+ cc.evex().vpconflictq(zmmA, zmmB);
+ cc.evex().vpermb(xmmA, xmmB, xmmC);
+ cc.evex().vpermb(ymmA, ymmB, ymmC);
+ cc.evex().vpermb(zmmA, zmmB, zmmC);
+ cc.evex().vpermd(ymmA, ymmB, ymmC);
+ cc.evex().vpermd(zmmA, zmmB, zmmC);
+ cc.evex().vpermi2b(xmmA, xmmB, xmmC);
+ cc.evex().vpermi2b(ymmA, ymmB, ymmC);
+ cc.evex().vpermi2b(zmmA, zmmB, zmmC);
+ cc.evex().vpermi2d(xmmA, xmmB, xmmC);
+ cc.evex().vpermi2d(ymmA, ymmB, ymmC);
+ cc.evex().vpermi2d(zmmA, zmmB, zmmC);
+ cc.evex().vpermi2pd(xmmA, xmmB, xmmC);
+ cc.evex().vpermi2pd(ymmA, ymmB, ymmC);
+ cc.evex().vpermi2pd(zmmA, zmmB, zmmC);
+ cc.evex().vpermi2ps(xmmA, xmmB, xmmC);
+ cc.evex().vpermi2ps(ymmA, ymmB, ymmC);
+ cc.evex().vpermi2ps(zmmA, zmmB, zmmC);
+ cc.evex().vpermi2q(xmmA, xmmB, xmmC);
+ cc.evex().vpermi2q(ymmA, ymmB, ymmC);
+ cc.evex().vpermi2q(zmmA, zmmB, zmmC);
+ cc.evex().vpermi2w(xmmA, xmmB, xmmC);
+ cc.evex().vpermi2w(ymmA, ymmB, ymmC);
+ cc.evex().vpermi2w(zmmA, zmmB, zmmC);
+ cc.evex().vpermilpd(xmmA, xmmB, xmmC);
+ cc.evex().vpermilpd(ymmA, ymmB, ymmC);
+ cc.evex().vpermilpd(zmmA, zmmB, zmmC);
+ cc.evex().vpermilpd(xmmA, xmmB, 0);
+ cc.evex().vpermilpd(ymmA, ymmB, 0);
+ cc.evex().vpermilpd(zmmA, zmmB, 0);
+ cc.evex().vpermilps(xmmA, xmmB, xmmC);
+ cc.evex().vpermilps(ymmA, ymmB, ymmC);
+ cc.evex().vpermilps(zmmA, zmmB, zmmC);
+ cc.evex().vpermilps(xmmA, xmmB, 0);
+ cc.evex().vpermilps(ymmA, ymmB, 0);
+ cc.evex().vpermilps(zmmA, zmmB, 0);
+ cc.evex().vpermq(ymmA, ymmB, ymmC);
+ cc.evex().vpermq(zmmA, zmmB, zmmC);
+ cc.evex().vpermq(ymmA, ymmB, 0);
+ cc.evex().vpermq(zmmA, zmmB, 0);
+ cc.evex().vpermt2b(xmmA, xmmB, xmmC);
+ cc.evex().vpermt2b(ymmA, ymmB, ymmC);
+ cc.evex().vpermt2b(zmmA, zmmB, zmmC);
+ cc.evex().vpermt2d(xmmA, xmmB, xmmC);
+ cc.evex().vpermt2d(ymmA, ymmB, ymmC);
+ cc.evex().vpermt2d(zmmA, zmmB, zmmC);
+ cc.evex().vpermt2pd(xmmA, xmmB, xmmC);
+ cc.evex().vpermt2pd(ymmA, ymmB, ymmC);
+ cc.evex().vpermt2pd(zmmA, zmmB, zmmC);
+ cc.evex().vpermt2ps(xmmA, xmmB, xmmC);
+ cc.evex().vpermt2ps(ymmA, ymmB, ymmC);
+ cc.evex().vpermt2ps(zmmA, zmmB, zmmC);
+ cc.evex().vpermt2q(xmmA, xmmB, xmmC);
+ cc.evex().vpermt2q(ymmA, ymmB, ymmC);
+ cc.evex().vpermt2q(zmmA, zmmB, zmmC);
+ cc.evex().vpermt2w(xmmA, xmmB, xmmC);
+ cc.evex().vpermt2w(ymmA, ymmB, ymmC);
+ cc.evex().vpermt2w(zmmA, zmmB, zmmC);
+ cc.evex().vpermw(xmmA, xmmB, xmmC);
+ cc.evex().vpermw(ymmA, ymmB, ymmC);
+ cc.evex().vpermw(zmmA, zmmB, zmmC);
+ cc.evex().vpexpandd(xmmA, xmmB);
+ cc.evex().vpexpandd(ymmA, ymmB);
+ cc.evex().vpexpandd(zmmA, zmmB);
+ cc.evex().vpexpandq(xmmA, xmmB);
+ cc.evex().vpexpandq(ymmA, ymmB);
+ cc.evex().vpexpandq(zmmA, zmmB);
+ cc.evex().vpextrb(gpd, xmmB, 0);
+ cc.evex().vpextrd(gpd, xmmB, 0);
+ if (cc.is64Bit()) cc.evex().vpextrq(gpq, xmmB, 0);
+ cc.evex().vpextrw(gpd, xmmB, 0);
+ cc.evex().vpinsrb(xmmA, xmmB, gpd, 0);
+ cc.evex().vpinsrd(xmmA, xmmB, gpd, 0);
+ if (cc.is64Bit()) cc.evex().vpinsrq(xmmA, xmmB, gpq, 0);
+ cc.evex().vpinsrw(xmmA, xmmB, gpd, 0);
+ cc.evex().vplzcntd(xmmA, xmmB);
+ cc.evex().vplzcntd(ymmA, ymmB);
+ cc.evex().vplzcntd(zmmA, zmmB);
+ cc.evex().vplzcntq(xmmA, xmmB);
+ cc.evex().vplzcntq(ymmA, ymmB);
+ cc.evex().vplzcntq(zmmA, zmmB);
+ cc.evex().vpmadd52huq(xmmA, xmmB, xmmC);
+ cc.evex().vpmadd52huq(ymmA, ymmB, ymmC);
+ cc.evex().vpmadd52huq(zmmA, zmmB, zmmC);
+ cc.evex().vpmadd52luq(xmmA, xmmB, xmmC);
+ cc.evex().vpmadd52luq(ymmA, ymmB, ymmC);
+ cc.evex().vpmadd52luq(zmmA, zmmB, zmmC);
+ cc.evex().vpmaddubsw(xmmA, xmmB, xmmC);
+ cc.evex().vpmaddubsw(ymmA, ymmB, ymmC);
+ cc.evex().vpmaddubsw(zmmA, zmmB, zmmC);
+ cc.evex().vpmaddwd(xmmA, xmmB, xmmC);
+ cc.evex().vpmaddwd(ymmA, ymmB, ymmC);
+ cc.evex().vpmaddwd(zmmA, zmmB, zmmC);
+ cc.evex().vpmaxsb(xmmA, xmmB, xmmC);
+ cc.evex().vpmaxsb(ymmA, ymmB, ymmC);
+ cc.evex().vpmaxsb(zmmA, zmmB, zmmC);
+ cc.evex().vpmaxsd(xmmA, xmmB, xmmC);
+ cc.evex().vpmaxsd(ymmA, ymmB, ymmC);
+ cc.evex().vpmaxsd(zmmA, zmmB, zmmC);
+ cc.evex().vpmaxsq(xmmA, xmmB, xmmC);
+ cc.evex().vpmaxsq(ymmA, ymmB, ymmC);
+ cc.evex().vpmaxsq(zmmA, zmmB, zmmC);
+ cc.evex().vpmaxsw(xmmA, xmmB, xmmC);
+ cc.evex().vpmaxsw(ymmA, ymmB, ymmC);
+ cc.evex().vpmaxsw(zmmA, zmmB, zmmC);
+ cc.evex().vpmaxub(xmmA, xmmB, xmmC);
+ cc.evex().vpmaxub(ymmA, ymmB, ymmC);
+ cc.evex().vpmaxub(zmmA, zmmB, zmmC);
+ cc.evex().vpmaxud(xmmA, xmmB, xmmC);
+ cc.evex().vpmaxud(ymmA, ymmB, ymmC);
+ cc.evex().vpmaxud(zmmA, zmmB, zmmC);
+ cc.evex().vpmaxuq(xmmA, xmmB, xmmC);
+ cc.evex().vpmaxuq(ymmA, ymmB, ymmC);
+ cc.evex().vpmaxuq(zmmA, zmmB, zmmC);
+ cc.evex().vpmaxuw(xmmA, xmmB, xmmC);
+ cc.evex().vpmaxuw(ymmA, ymmB, ymmC);
+ cc.evex().vpmaxuw(zmmA, zmmB, zmmC);
+ cc.evex().vpminsb(xmmA, xmmB, xmmC);
+ cc.evex().vpminsb(ymmA, ymmB, ymmC);
+ cc.evex().vpminsb(zmmA, zmmB, zmmC);
+ cc.evex().vpminsd(xmmA, xmmB, xmmC);
+ cc.evex().vpminsd(ymmA, ymmB, ymmC);
+ cc.evex().vpminsd(zmmA, zmmB, zmmC);
+ cc.evex().vpminsq(xmmA, xmmB, xmmC);
+ cc.evex().vpminsq(ymmA, ymmB, ymmC);
+ cc.evex().vpminsq(zmmA, zmmB, zmmC);
+ cc.evex().vpminsw(xmmA, xmmB, xmmC);
+ cc.evex().vpminsw(ymmA, ymmB, ymmC);
+ cc.evex().vpminsw(zmmA, zmmB, zmmC);
+ cc.evex().vpminub(xmmA, xmmB, xmmC);
+ cc.evex().vpminub(ymmA, ymmB, ymmC);
+ cc.evex().vpminub(zmmA, zmmB, zmmC);
+ cc.evex().vpminud(xmmA, xmmB, xmmC);
+ cc.evex().vpminud(ymmA, ymmB, ymmC);
+ cc.evex().vpminud(zmmA, zmmB, zmmC);
+ cc.evex().vpminuq(xmmA, xmmB, xmmC);
+ cc.evex().vpminuq(ymmA, ymmB, ymmC);
+ cc.evex().vpminuq(zmmA, zmmB, zmmC);
+ cc.evex().vpminuw(xmmA, xmmB, xmmC);
+ cc.evex().vpminuw(ymmA, ymmB, ymmC);
+ cc.evex().vpminuw(zmmA, zmmB, zmmC);
+ cc.evex().vpmovb2m(kA, xmmB);
+ cc.evex().vpmovb2m(kA, ymmB);
+ cc.evex().vpmovb2m(kA, zmmB);
+ cc.evex().vpmovd2m(kA, xmmB);
+ cc.evex().vpmovd2m(kA, ymmB);
+ cc.evex().vpmovd2m(kA, zmmB);
+ cc.evex().vpmovdb(xmmA, xmmB);
+ cc.evex().vpmovdb(xmmA, ymmB);
+ cc.evex().vpmovdb(xmmA, zmmB);
+ cc.evex().vpmovdw(xmmA, xmmB);
+ cc.evex().vpmovdw(xmmA, ymmB);
+ cc.evex().vpmovdw(ymmA, zmmB);
+ cc.evex().vpmovm2b(xmmA, kB);
+ cc.evex().vpmovm2b(ymmA, kB);
+ cc.evex().vpmovm2b(zmmA, kB);
+ cc.evex().vpmovm2d(xmmA, kB);
+ cc.evex().vpmovm2d(ymmA, kB);
+ cc.evex().vpmovm2d(zmmA, kB);
+ cc.evex().vpmovm2q(xmmA, kB);
+ cc.evex().vpmovm2q(ymmA, kB);
+ cc.evex().vpmovm2q(zmmA, kB);
+ cc.evex().vpmovm2w(xmmA, kB);
+ cc.evex().vpmovm2w(ymmA, kB);
+ cc.evex().vpmovm2w(zmmA, kB);
+ cc.evex().vpmovq2m(kA, xmmB);
+ cc.evex().vpmovq2m(kA, ymmB);
+ cc.evex().vpmovq2m(kA, zmmB);
+ cc.evex().vpmovqb(xmmA, xmmB);
+ cc.evex().vpmovqb(xmmA, ymmB);
+ cc.evex().vpmovqb(xmmA, zmmB);
+ cc.evex().vpmovqd(xmmA, xmmB);
+ cc.evex().vpmovqd(xmmA, ymmB);
+ cc.evex().vpmovqd(ymmA, zmmB);
+ cc.evex().vpmovqw(xmmA, xmmB);
+ cc.evex().vpmovqw(xmmA, ymmB);
+ cc.evex().vpmovqw(xmmA, zmmB);
+ cc.evex().vpmovsdb(xmmA, xmmB);
+ cc.evex().vpmovsdb(xmmA, ymmB);
+ cc.evex().vpmovsdb(xmmA, zmmB);
+ cc.evex().vpmovsdw(xmmA, xmmB);
+ cc.evex().vpmovsdw(xmmA, ymmB);
+ cc.evex().vpmovsdw(ymmA, zmmB);
+ cc.evex().vpmovsqb(xmmA, xmmB);
+ cc.evex().vpmovsqb(xmmA, ymmB);
+ cc.evex().vpmovsqb(xmmA, zmmB);
+ cc.evex().vpmovsqd(xmmA, xmmB);
+ cc.evex().vpmovsqd(xmmA, ymmB);
+ cc.evex().vpmovsqd(ymmA, zmmB);
+ cc.evex().vpmovsqw(xmmA, xmmB);
+ cc.evex().vpmovsqw(xmmA, ymmB);
+ cc.evex().vpmovsqw(xmmA, zmmB);
+ cc.evex().vpmovswb(xmmA, xmmB);
+ cc.evex().vpmovswb(xmmA, ymmB);
+ cc.evex().vpmovswb(ymmA, zmmB);
+ cc.evex().vpmovsxbd(xmmA, xmmB);
+ cc.evex().vpmovsxbd(ymmA, xmmB);
+ cc.evex().vpmovsxbd(zmmA, xmmB);
+ cc.evex().vpmovsxbq(xmmA, xmmB);
+ cc.evex().vpmovsxbq(ymmA, xmmB);
+ cc.evex().vpmovsxbq(zmmA, xmmB);
+ cc.evex().vpmovsxbw(xmmA, xmmB);
+ cc.evex().vpmovsxbw(ymmA, xmmB);
+ cc.evex().vpmovsxbw(zmmA, ymmB);
+ cc.evex().vpmovsxdq(xmmA, xmmB);
+ cc.evex().vpmovsxdq(ymmA, xmmB);
+ cc.evex().vpmovsxdq(zmmA, ymmB);
+ cc.evex().vpmovsxwd(xmmA, xmmB);
+ cc.evex().vpmovsxwd(ymmA, xmmB);
+ cc.evex().vpmovsxwd(zmmA, ymmB);
+ cc.evex().vpmovsxwq(xmmA, xmmB);
+ cc.evex().vpmovsxwq(ymmA, xmmB);
+ cc.evex().vpmovsxwq(zmmA, xmmB);
+ cc.evex().vpmovusdb(xmmA, xmmB);
+ cc.evex().vpmovusdb(xmmA, ymmB);
+ cc.evex().vpmovusdb(xmmA, zmmB);
+ cc.evex().vpmovusdw(xmmA, xmmB);
+ cc.evex().vpmovusdw(xmmA, ymmB);
+ cc.evex().vpmovusdw(ymmA, zmmB);
+ cc.evex().vpmovusqb(xmmA, xmmB);
+ cc.evex().vpmovusqb(xmmA, ymmB);
+ cc.evex().vpmovusqb(xmmA, zmmB);
+ cc.evex().vpmovusqd(xmmA, xmmB);
+ cc.evex().vpmovusqd(xmmA, ymmB);
+ cc.evex().vpmovusqd(ymmA, zmmB);
+ cc.evex().vpmovusqw(xmmA, xmmB);
+ cc.evex().vpmovusqw(xmmA, ymmB);
+ cc.evex().vpmovusqw(xmmA, zmmB);
+ cc.evex().vpmovuswb(xmmA, xmmB);
+ cc.evex().vpmovuswb(xmmA, ymmB);
+ cc.evex().vpmovuswb(ymmA, zmmB);
+ cc.evex().vpmovw2m(kA, xmmB);
+ cc.evex().vpmovw2m(kA, ymmB);
+ cc.evex().vpmovw2m(kA, zmmB);
+ cc.evex().vpmovwb(xmmA, xmmB);
+ cc.evex().vpmovwb(xmmA, ymmB);
+ cc.evex().vpmovwb(ymmA, zmmB);
+ cc.evex().vpmovzxbd(xmmA, xmmB);
+ cc.evex().vpmovzxbd(ymmA, xmmB);
+ cc.evex().vpmovzxbd(zmmA, xmmB);
+ cc.evex().vpmovzxbq(xmmA, xmmB);
+ cc.evex().vpmovzxbq(ymmA, xmmB);
+ cc.evex().vpmovzxbq(zmmA, xmmB);
+ cc.evex().vpmovzxbw(xmmA, xmmB);
+ cc.evex().vpmovzxbw(ymmA, xmmB);
+ cc.evex().vpmovzxbw(zmmA, ymmB);
+ cc.evex().vpmovzxdq(xmmA, xmmB);
+ cc.evex().vpmovzxdq(ymmA, xmmB);
+ cc.evex().vpmovzxdq(zmmA, ymmB);
+ cc.evex().vpmovzxwd(xmmA, xmmB);
+ cc.evex().vpmovzxwd(ymmA, xmmB);
+ cc.evex().vpmovzxwd(zmmA, ymmB);
+ cc.evex().vpmovzxwq(xmmA, xmmB);
+ cc.evex().vpmovzxwq(ymmA, xmmB);
+ cc.evex().vpmovzxwq(zmmA, xmmB);
+ cc.evex().vpmuldq(xmmA, xmmB, xmmC);
+ cc.evex().vpmuldq(ymmA, ymmB, ymmC);
+ cc.evex().vpmuldq(zmmA, zmmB, zmmC);
+ cc.evex().vpmulhrsw(xmmA, xmmB, xmmC);
+ cc.evex().vpmulhrsw(ymmA, ymmB, ymmC);
+ cc.evex().vpmulhrsw(zmmA, zmmB, zmmC);
+ cc.evex().vpmulhuw(xmmA, xmmB, xmmC);
+ cc.evex().vpmulhuw(ymmA, ymmB, ymmC);
+ cc.evex().vpmulhuw(zmmA, zmmB, zmmC);
+ cc.evex().vpmulhw(xmmA, xmmB, xmmC);
+ cc.evex().vpmulhw(ymmA, ymmB, ymmC);
+ cc.evex().vpmulhw(zmmA, zmmB, zmmC);
+ cc.evex().vpmulld(xmmA, xmmB, xmmC);
+ cc.evex().vpmulld(ymmA, ymmB, ymmC);
+ cc.evex().vpmulld(zmmA, zmmB, zmmC);
+ cc.evex().vpmullq(xmmA, xmmB, xmmC);
+ cc.evex().vpmullq(ymmA, ymmB, ymmC);
+ cc.evex().vpmullq(zmmA, zmmB, zmmC);
+ cc.evex().vpmullw(xmmA, xmmB, xmmC);
+ cc.evex().vpmullw(ymmA, ymmB, ymmC);
+ cc.evex().vpmullw(zmmA, zmmB, zmmC);
+ cc.evex().vpmultishiftqb(xmmA, xmmB, xmmC);
+ cc.evex().vpmultishiftqb(ymmA, ymmB, ymmC);
+ cc.evex().vpmultishiftqb(zmmA, zmmB, zmmC);
+ cc.evex().vpmuludq(xmmA, xmmB, xmmC);
+ cc.evex().vpmuludq(ymmA, ymmB, ymmC);
+ cc.evex().vpmuludq(zmmA, zmmB, zmmC);
+ cc.evex().vpopcntd(zmmA, zmmB);
+ cc.evex().vpopcntq(zmmA, zmmB);
+ cc.evex().vpord(xmmA, xmmB, xmmC);
+ cc.evex().vpord(ymmA, ymmB, ymmC);
+ cc.evex().vpord(zmmA, zmmB, zmmC);
+ cc.evex().vporq(xmmA, xmmB, xmmC);
+ cc.evex().vporq(ymmA, ymmB, ymmC);
+ cc.evex().vporq(zmmA, zmmB, zmmC);
+ cc.evex().vprold(xmmA, xmmB, 0);
+ cc.evex().vprold(ymmA, ymmB, 0);
+ cc.evex().vprold(zmmA, zmmB, 0);
+ cc.evex().vprolq(xmmA, xmmB, 0);
+ cc.evex().vprolq(ymmA, ymmB, 0);
+ cc.evex().vprolq(zmmA, zmmB, 0);
+ cc.evex().vprolvd(xmmA, xmmB, xmmC);
+ cc.evex().vprolvd(ymmA, ymmB, ymmC);
+ cc.evex().vprolvd(zmmA, zmmB, zmmC);
+ cc.evex().vprolvq(xmmA, xmmB, xmmC);
+ cc.evex().vprolvq(ymmA, ymmB, ymmC);
+ cc.evex().vprolvq(zmmA, zmmB, zmmC);
+ cc.evex().vprord(xmmA, xmmB, 0);
+ cc.evex().vprord(ymmA, ymmB, 0);
+ cc.evex().vprord(zmmA, zmmB, 0);
+ cc.evex().vprorq(xmmA, xmmB, 0);
+ cc.evex().vprorq(ymmA, ymmB, 0);
+ cc.evex().vprorq(zmmA, zmmB, 0);
+ cc.evex().vprorvd(xmmA, xmmB, xmmC);
+ cc.evex().vprorvd(ymmA, ymmB, ymmC);
+ cc.evex().vprorvd(zmmA, zmmB, zmmC);
+ cc.evex().vprorvq(xmmA, xmmB, xmmC);
+ cc.evex().vprorvq(ymmA, ymmB, ymmC);
+ cc.evex().vprorvq(zmmA, zmmB, zmmC);
+ cc.evex().vpsadbw(xmmA, xmmB, xmmC);
+ cc.evex().vpsadbw(ymmA, ymmB, ymmC);
+ cc.evex().vpsadbw(zmmA, zmmB, zmmC);
+ cc.evex().vpshufb(xmmA, xmmB, xmmC);
+ cc.evex().vpshufb(ymmA, ymmB, ymmC);
+ cc.evex().vpshufb(zmmA, zmmB, zmmC);
+ cc.evex().vpshufd(xmmA, xmmB, 0);
+ cc.evex().vpshufd(ymmA, ymmB, 0);
+ cc.evex().vpshufd(zmmA, zmmB, 0);
+ cc.evex().vpshufhw(xmmA, xmmB, 0);
+ cc.evex().vpshufhw(ymmA, ymmB, 0);
+ cc.evex().vpshufhw(zmmA, zmmB, 0);
+ cc.evex().vpshuflw(xmmA, xmmB, 0);
+ cc.evex().vpshuflw(ymmA, ymmB, 0);
+ cc.evex().vpshuflw(zmmA, zmmB, 0);
+ cc.evex().vpslld(xmmA, xmmB, xmmC);
+ cc.evex().vpslld(xmmA, xmmB, 0);
+ cc.evex().vpslld(ymmA, ymmB, xmmC);
+ cc.evex().vpslld(ymmA, ymmB, 0);
+ cc.evex().vpslld(zmmA, zmmB, xmmC);
+ cc.evex().vpslld(zmmA, zmmB, 0);
+ cc.evex().vpslldq(xmmA, xmmB, 0);
+ cc.evex().vpslldq(ymmA, ymmB, 0);
+ cc.evex().vpslldq(zmmA, zmmB, 0);
+ cc.evex().vpsllq(xmmA, xmmB, xmmC);
+ cc.evex().vpsllq(xmmA, xmmB, 0);
+ cc.evex().vpsllq(ymmA, ymmB, xmmC);
+ cc.evex().vpsllq(ymmA, ymmB, 0);
+ cc.evex().vpsllq(zmmA, zmmB, xmmC);
+ cc.evex().vpsllq(zmmA, zmmB, 0);
+ cc.evex().vpsllvd(xmmA, xmmB, xmmC);
+ cc.evex().vpsllvd(ymmA, ymmB, ymmC);
+ cc.evex().vpsllvd(zmmA, zmmB, zmmC);
+ cc.evex().vpsllvq(xmmA, xmmB, xmmC);
+ cc.evex().vpsllvq(ymmA, ymmB, ymmC);
+ cc.evex().vpsllvq(zmmA, zmmB, zmmC);
+ cc.evex().vpsllvw(xmmA, xmmB, xmmC);
+ cc.evex().vpsllvw(ymmA, ymmB, ymmC);
+ cc.evex().vpsllvw(zmmA, zmmB, zmmC);
+ cc.evex().vpsllw(xmmA, xmmB, xmmC);
+ cc.evex().vpsllw(xmmA, xmmB, 0);
+ cc.evex().vpsllw(ymmA, ymmB, xmmC);
+ cc.evex().vpsllw(ymmA, ymmB, 0);
+ cc.evex().vpsllw(zmmA, zmmB, xmmC);
+ cc.evex().vpsllw(zmmA, zmmB, 0);
+ cc.evex().vpsrad(xmmA, xmmB, xmmC);
+ cc.evex().vpsrad(xmmA, xmmB, 0);
+ cc.evex().vpsrad(ymmA, ymmB, xmmC);
+ cc.evex().vpsrad(ymmA, ymmB, 0);
+ cc.evex().vpsrad(zmmA, zmmB, xmmC);
+ cc.evex().vpsrad(zmmA, zmmB, 0);
+ cc.evex().vpsraq(xmmA, xmmB, xmmC);
+ cc.evex().vpsraq(xmmA, xmmB, 0);
+ cc.evex().vpsraq(ymmA, ymmB, xmmC);
+ cc.evex().vpsraq(ymmA, ymmB, 0);
+ cc.evex().vpsraq(zmmA, zmmB, xmmC);
+ cc.evex().vpsraq(zmmA, zmmB, 0);
+ cc.evex().vpsravd(xmmA, xmmB, xmmC);
+ cc.evex().vpsravd(ymmA, ymmB, ymmC);
+ cc.evex().vpsravd(zmmA, zmmB, zmmC);
+ cc.evex().vpsravq(xmmA, xmmB, xmmC);
+ cc.evex().vpsravq(ymmA, ymmB, ymmC);
+ cc.evex().vpsravq(zmmA, zmmB, zmmC);
+ cc.evex().vpsravw(xmmA, xmmB, xmmC);
+ cc.evex().vpsravw(ymmA, ymmB, ymmC);
+ cc.evex().vpsravw(zmmA, zmmB, zmmC);
+ cc.evex().vpsraw(xmmA, xmmB, xmmC);
+ cc.evex().vpsraw(xmmA, xmmB, 0);
+ cc.evex().vpsraw(ymmA, ymmB, xmmC);
+ cc.evex().vpsraw(ymmA, ymmB, 0);
+ cc.evex().vpsraw(zmmA, zmmB, xmmC);
+ cc.evex().vpsraw(zmmA, zmmB, 0);
+ cc.evex().vpsrld(xmmA, xmmB, xmmC);
+ cc.evex().vpsrld(xmmA, xmmB, 0);
+ cc.evex().vpsrld(ymmA, ymmB, xmmC);
+ cc.evex().vpsrld(ymmA, ymmB, 0);
+ cc.evex().vpsrld(zmmA, zmmB, xmmC);
+ cc.evex().vpsrld(zmmA, zmmB, 0);
+ cc.evex().vpsrldq(xmmA, xmmB, 0);
+ cc.evex().vpsrldq(ymmA, ymmB, 0);
+ cc.evex().vpsrldq(zmmA, zmmB, 0);
+ cc.evex().vpsrlq(xmmA, xmmB, xmmC);
+ cc.evex().vpsrlq(xmmA, xmmB, 0);
+ cc.evex().vpsrlq(ymmA, ymmB, xmmC);
+ cc.evex().vpsrlq(ymmA, ymmB, 0);
+ cc.evex().vpsrlq(zmmA, zmmB, xmmC);
+ cc.evex().vpsrlq(zmmA, zmmB, 0);
+ cc.evex().vpsrlvd(xmmA, xmmB, xmmC);
+ cc.evex().vpsrlvd(ymmA, ymmB, ymmC);
+ cc.evex().vpsrlvd(zmmA, zmmB, zmmC);
+ cc.evex().vpsrlvq(xmmA, xmmB, xmmC);
+ cc.evex().vpsrlvq(ymmA, ymmB, ymmC);
+ cc.evex().vpsrlvq(zmmA, zmmB, zmmC);
+ cc.evex().vpsrlvw(xmmA, xmmB, xmmC);
+ cc.evex().vpsrlvw(ymmA, ymmB, ymmC);
+ cc.evex().vpsrlvw(zmmA, zmmB, zmmC);
+ cc.evex().vpsrlw(xmmA, xmmB, xmmC);
+ cc.evex().vpsrlw(xmmA, xmmB, 0);
+ cc.evex().vpsrlw(ymmA, ymmB, xmmC);
+ cc.evex().vpsrlw(ymmA, ymmB, 0);
+ cc.evex().vpsrlw(zmmA, zmmB, xmmC);
+ cc.evex().vpsrlw(zmmA, zmmB, 0);
+ cc.evex().vpsubb(xmmA, xmmB, xmmC);
+ cc.evex().vpsubb(ymmA, ymmB, ymmC);
+ cc.evex().vpsubb(zmmA, zmmB, zmmC);
+ cc.evex().vpsubd(xmmA, xmmB, xmmC);
+ cc.evex().vpsubd(ymmA, ymmB, ymmC);
+ cc.evex().vpsubd(zmmA, zmmB, zmmC);
+ cc.evex().vpsubq(xmmA, xmmB, xmmC);
+ cc.evex().vpsubq(ymmA, ymmB, ymmC);
+ cc.evex().vpsubq(zmmA, zmmB, zmmC);
+ cc.evex().vpsubsb(xmmA, xmmB, xmmC);
+ cc.evex().vpsubsb(ymmA, ymmB, ymmC);
+ cc.evex().vpsubsb(zmmA, zmmB, zmmC);
+ cc.evex().vpsubsw(xmmA, xmmB, xmmC);
+ cc.evex().vpsubsw(ymmA, ymmB, ymmC);
+ cc.evex().vpsubsw(zmmA, zmmB, zmmC);
+ cc.evex().vpsubusb(xmmA, xmmB, xmmC);
+ cc.evex().vpsubusb(ymmA, ymmB, ymmC);
+ cc.evex().vpsubusb(zmmA, zmmB, zmmC);
+ cc.evex().vpsubusw(xmmA, xmmB, xmmC);
+ cc.evex().vpsubusw(ymmA, ymmB, ymmC);
+ cc.evex().vpsubusw(zmmA, zmmB, zmmC);
+ cc.evex().vpsubw(xmmA, xmmB, xmmC);
+ cc.evex().vpsubw(ymmA, ymmB, ymmC);
+ cc.evex().vpsubw(zmmA, zmmB, zmmC);
+ cc.evex().vpternlogd(xmmA, xmmB, xmmC, 0);
+ cc.evex().vpternlogd(ymmA, ymmB, ymmC, 0);
+ cc.evex().vpternlogd(zmmA, zmmB, zmmC, 0);
+ cc.evex().vpternlogq(xmmA, xmmB, xmmC, 0);
+ cc.evex().vpternlogq(ymmA, ymmB, ymmC, 0);
+ cc.evex().vpternlogq(zmmA, zmmB, zmmC, 0);
+ cc.evex().vptestmb(kA, xmmB, xmmC);
+ cc.evex().vptestmb(kA, ymmB, ymmC);
+ cc.evex().vptestmb(kA, zmmB, zmmC);
+ cc.evex().vptestmd(kA, xmmB, xmmC);
+ cc.evex().vptestmd(kA, ymmB, ymmC);
+ cc.evex().vptestmd(kA, zmmB, zmmC);
+ cc.evex().vptestmq(kA, xmmB, xmmC);
+ cc.evex().vptestmq(kA, ymmB, ymmC);
+ cc.evex().vptestmq(kA, zmmB, zmmC);
+ cc.evex().vptestmw(kA, xmmB, xmmC);
+ cc.evex().vptestmw(kA, ymmB, ymmC);
+ cc.evex().vptestmw(kA, zmmB, zmmC);
+ cc.evex().vptestnmb(kA, xmmB, xmmC);
+ cc.evex().vptestnmb(kA, ymmB, ymmC);
+ cc.evex().vptestnmb(kA, zmmB, zmmC);
+ cc.evex().vptestnmd(kA, xmmB, xmmC);
+ cc.evex().vptestnmd(kA, ymmB, ymmC);
+ cc.evex().vptestnmd(kA, zmmB, zmmC);
+ cc.evex().vptestnmq(kA, xmmB, xmmC);
+ cc.evex().vptestnmq(kA, ymmB, ymmC);
+ cc.evex().vptestnmq(kA, zmmB, zmmC);
+ cc.evex().vptestnmw(kA, xmmB, xmmC);
+ cc.evex().vptestnmw(kA, ymmB, ymmC);
+ cc.evex().vptestnmw(kA, zmmB, zmmC);
+ cc.evex().vpunpckhbw(xmmA, xmmB, xmmC);
+ cc.evex().vpunpckhbw(ymmA, ymmB, ymmC);
+ cc.evex().vpunpckhbw(zmmA, zmmB, zmmC);
+ cc.evex().vpunpckhdq(xmmA, xmmB, xmmC);
+ cc.evex().vpunpckhdq(ymmA, ymmB, ymmC);
+ cc.evex().vpunpckhdq(zmmA, zmmB, zmmC);
+ cc.evex().vpunpckhqdq(xmmA, xmmB, xmmC);
+ cc.evex().vpunpckhqdq(ymmA, ymmB, ymmC);
+ cc.evex().vpunpckhqdq(zmmA, zmmB, zmmC);
+ cc.evex().vpunpckhwd(xmmA, xmmB, xmmC);
+ cc.evex().vpunpckhwd(ymmA, ymmB, ymmC);
+ cc.evex().vpunpckhwd(zmmA, zmmB, zmmC);
+ cc.evex().vpunpcklbw(xmmA, xmmB, xmmC);
+ cc.evex().vpunpcklbw(ymmA, ymmB, ymmC);
+ cc.evex().vpunpcklbw(zmmA, zmmB, zmmC);
+ cc.evex().vpunpckldq(xmmA, xmmB, xmmC);
+ cc.evex().vpunpckldq(ymmA, ymmB, ymmC);
+ cc.evex().vpunpckldq(zmmA, zmmB, zmmC);
+ cc.evex().vpunpcklqdq(xmmA, xmmB, xmmC);
+ cc.evex().vpunpcklqdq(ymmA, ymmB, ymmC);
+ cc.evex().vpunpcklqdq(zmmA, zmmB, zmmC);
+ cc.evex().vpunpcklwd(xmmA, xmmB, xmmC);
+ cc.evex().vpunpcklwd(ymmA, ymmB, ymmC);
+ cc.evex().vpunpcklwd(zmmA, zmmB, zmmC);
+ cc.evex().vpxord(xmmA, xmmB, xmmC);
+ cc.evex().vpxord(ymmA, ymmB, ymmC);
+ cc.evex().vpxord(zmmA, zmmB, zmmC);
+ cc.evex().vpxorq(xmmA, xmmB, xmmC);
+ cc.evex().vpxorq(ymmA, ymmB, ymmC);
+ cc.evex().vpxorq(zmmA, zmmB, zmmC);
+ cc.evex().vrangepd(xmmA, xmmB, xmmC, 0);
+ cc.evex().vrangepd(ymmA, ymmB, ymmC, 0);
+ cc.evex().vrangepd(zmmA, zmmB, zmmC, 0);
+ cc.evex().vrangeps(xmmA, xmmB, xmmC, 0);
+ cc.evex().vrangeps(ymmA, ymmB, ymmC, 0);
+ cc.evex().vrangeps(zmmA, zmmB, zmmC, 0);
+ cc.evex().vrangesd(xmmA, xmmB, xmmC, 0);
+ cc.evex().vrangess(xmmA, xmmB, xmmC, 0);
+ cc.evex().vrcp14pd(xmmA, xmmB);
+ cc.evex().vrcp14pd(ymmA, ymmB);
+ cc.evex().vrcp14pd(zmmA, zmmB);
+ cc.evex().vrcp14ps(xmmA, xmmB);
+ cc.evex().vrcp14ps(ymmA, ymmB);
+ cc.evex().vrcp14ps(zmmA, zmmB);
+ cc.evex().vrcp14sd(xmmA, xmmB, xmmC);
+ cc.evex().vrcp14ss(xmmA, xmmB, xmmC);
+ cc.evex().vrcp28pd(zmmA, zmmB);
+ cc.evex().vrcp28ps(zmmA, zmmB);
+ cc.evex().vrcp28sd(xmmA, xmmB, xmmC);
+ cc.evex().vrcp28ss(xmmA, xmmB, xmmC);
+ cc.evex().vreducepd(xmmA, xmmB, 0);
+ cc.evex().vreducepd(ymmA, ymmB, 0);
+ cc.evex().vreducepd(zmmA, zmmB, 0);
+ cc.evex().vreduceps(xmmA, xmmB, 0);
+ cc.evex().vreduceps(ymmA, ymmB, 0);
+ cc.evex().vreduceps(zmmA, zmmB, 0);
+ cc.evex().vreducesd(xmmA, xmmB, xmmC, 0);
+ cc.evex().vreducess(xmmA, xmmB, xmmC, 0);
+ cc.evex().vrndscalepd(xmmA, xmmB, 0);
+ cc.evex().vrndscalepd(ymmA, ymmB, 0);
+ cc.evex().vrndscalepd(zmmA, zmmB, 0);
+ cc.evex().vrndscaleps(xmmA, xmmB, 0);
+ cc.evex().vrndscaleps(ymmA, ymmB, 0);
+ cc.evex().vrndscaleps(zmmA, zmmB, 0);
+ cc.evex().vrndscalesd(xmmA, xmmB, xmmC, 0);
+ cc.evex().vrndscaless(xmmA, xmmB, xmmC, 0);
+ cc.evex().vrsqrt14pd(xmmA, xmmB);
+ cc.evex().vrsqrt14pd(ymmA, ymmB);
+ cc.evex().vrsqrt14pd(zmmA, zmmB);
+ cc.evex().vrsqrt14ps(xmmA, xmmB);
+ cc.evex().vrsqrt14ps(ymmA, ymmB);
+ cc.evex().vrsqrt14ps(zmmA, zmmB);
+ cc.evex().vrsqrt14sd(xmmA, xmmB, xmmC);
+ cc.evex().vrsqrt14ss(xmmA, xmmB, xmmC);
+ cc.evex().vrsqrt28pd(zmmA, zmmB);
+ cc.evex().vrsqrt28ps(zmmA, zmmB);
+ cc.evex().vrsqrt28sd(xmmA, xmmB, xmmC);
+ cc.evex().vrsqrt28ss(xmmA, xmmB, xmmC);
+ cc.evex().vscalefpd(xmmA, xmmB, xmmC);
+ cc.evex().vscalefpd(ymmA, ymmB, ymmC);
+ cc.evex().vscalefpd(zmmA, zmmB, zmmC);
+ cc.evex().vscalefps(xmmA, xmmB, xmmC);
+ cc.evex().vscalefps(ymmA, ymmB, ymmC);
+ cc.evex().vscalefps(zmmA, zmmB, zmmC);
+ cc.evex().vscalefsd(xmmA, xmmB, xmmC);
+ cc.evex().vscalefss(xmmA, xmmB, xmmC);
+ cc.evex().vshuff32x4(ymmA, ymmB, ymmC, 0);
+ cc.evex().vshuff32x4(zmmA, zmmB, zmmC, 0);
+ cc.evex().vshuff64x2(ymmA, ymmB, ymmC, 0);
+ cc.evex().vshuff64x2(zmmA, zmmB, zmmC, 0);
+ cc.evex().vshufi32x4(ymmA, ymmB, ymmC, 0);
+ cc.evex().vshufi32x4(zmmA, zmmB, zmmC, 0);
+ cc.evex().vshufi64x2(ymmA, ymmB, ymmC, 0);
+ cc.evex().vshufi64x2(zmmA, zmmB, zmmC, 0);
+ cc.evex().vshufpd(xmmA, xmmB, xmmC, 0);
+ cc.evex().vshufpd(ymmA, ymmB, ymmC, 0);
+ cc.evex().vshufpd(zmmA, zmmB, zmmC, 0);
+ cc.evex().vshufps(xmmA, xmmB, xmmC, 0);
+ cc.evex().vshufps(ymmA, ymmB, ymmC, 0);
+ cc.evex().vshufps(zmmA, zmmB, zmmC, 0);
+ cc.evex().vsqrtpd(xmmA, xmmB);
+ cc.evex().vsqrtpd(ymmA, ymmB);
+ cc.evex().vsqrtpd(zmmA, zmmB);
+ cc.evex().vsqrtps(xmmA, xmmB);
+ cc.evex().vsqrtps(ymmA, ymmB);
+ cc.evex().vsqrtps(zmmA, zmmB);
+ cc.evex().vsqrtsd(xmmA, xmmB, xmmC);
+ cc.evex().vsqrtss(xmmA, xmmB, xmmC);
+ cc.evex().vsubpd(xmmA, xmmB, xmmC);
+ cc.evex().vsubpd(ymmA, ymmB, ymmC);
+ cc.evex().vsubpd(zmmA, zmmB, zmmC);
+ cc.evex().vsubps(xmmA, xmmB, xmmC);
+ cc.evex().vsubps(ymmA, ymmB, ymmC);
+ cc.evex().vsubps(zmmA, zmmB, zmmC);
+ cc.evex().vsubsd(xmmA, xmmB, xmmC);
+ cc.evex().vsubss(xmmA, xmmB, xmmC);
+ cc.evex().vucomisd(xmmA, xmmB);
+ cc.evex().vucomiss(xmmA, xmmB);
+ cc.evex().vunpckhpd(xmmA, xmmB, xmmC);
+ cc.evex().vunpckhpd(ymmA, ymmB, ymmC);
+ cc.evex().vunpckhpd(zmmA, zmmB, zmmC);
+ cc.evex().vunpckhps(xmmA, xmmB, xmmC);
+ cc.evex().vunpckhps(ymmA, ymmB, ymmC);
+ cc.evex().vunpckhps(zmmA, zmmB, zmmC);
+ cc.evex().vunpcklpd(xmmA, xmmB, xmmC);
+ cc.evex().vunpcklpd(ymmA, ymmB, ymmC);
+ cc.evex().vunpcklpd(zmmA, zmmB, zmmC);
+ cc.evex().vunpcklps(xmmA, xmmB, xmmC);
+ cc.evex().vunpcklps(ymmA, ymmB, ymmC);
+ cc.evex().vunpcklps(zmmA, zmmB, zmmC);
+ cc.evex().vxorpd(xmmA, xmmB, xmmC);
+ cc.evex().vxorpd(ymmA, ymmB, ymmC);
+ cc.evex().vxorpd(zmmA, zmmB, zmmC);
+ cc.evex().vxorps(xmmA, xmmB, xmmC);
+ cc.evex().vxorps(ymmA, ymmB, ymmC);
+ cc.evex().vxorps(zmmA, zmmB, zmmC);
+ }
+ else {
+ x86::Mem m = x86::ptr(gpz);
+ x86::Mem m32 = x86::dword_ptr(gpz);
+ x86::Mem m64 = x86::qword_ptr(gpz);
+ x86::Mem m128 = x86::xmmword_ptr(gpz);
+ x86::Mem m256 = x86::ymmword_ptr(gpz);
+ x86::Mem m512 = x86::zmmword_ptr(gpz);
+ x86::Mem vx_ptr = x86::ptr(gpz, xmmD);
+ x86::Mem vy_ptr = x86::ptr(gpz, ymmD);
+ x86::Mem vz_ptr = x86::ptr(gpz, zmmD);
+
+ cc.kmovb(kA, m);
+ cc.kmovb(m, kB);
+ cc.kmovd(kA, m);
+ cc.kmovd(m, kB);
+ cc.kmovq(kA, m);
+ cc.kmovq(m, kB);
+ cc.kmovw(kA, m);
+ cc.kmovw(m, kB);
+
+ cc.evex().vaddpd(xmmA, xmmB, m);
+ cc.evex().vaddpd(ymmA, ymmB, m);
+ cc.evex().vaddpd(zmmA, zmmB, m);
+ cc.evex().vaddps(xmmA, xmmB, m);
+ cc.evex().vaddps(ymmA, ymmB, m);
+ cc.evex().vaddps(zmmA, zmmB, m);
+ cc.evex().vaddsd(xmmA, xmmB, m);
+ cc.evex().vaddss(xmmA, xmmB, m);
+ cc.evex().valignd(xmmA, xmmB, m, 0);
+ cc.evex().valignd(ymmA, ymmB, m, 0);
+ cc.evex().valignd(zmmA, zmmB, m, 0);
+ cc.evex().valignq(xmmA, xmmB, m, 0);
+ cc.evex().valignq(ymmA, ymmB, m, 0);
+ cc.evex().valignq(zmmA, zmmB, m, 0);
+ cc.evex().vandnpd(xmmA, xmmB, m);
+ cc.evex().vandnpd(ymmA, ymmB, m);
+ cc.evex().vandnpd(zmmA, zmmB, m);
+ cc.evex().vandnps(xmmA, xmmB, m);
+ cc.evex().vandnps(ymmA, ymmB, m);
+ cc.evex().vandnps(zmmA, zmmB, m);
+ cc.evex().vandpd(xmmA, xmmB, m);
+ cc.evex().vandpd(ymmA, ymmB, m);
+ cc.evex().vandpd(zmmA, zmmB, m);
+ cc.evex().vandps(xmmA, xmmB, m);
+ cc.evex().vandps(ymmA, ymmB, m);
+ cc.evex().vandps(zmmA, zmmB, m);
+ cc.evex().vblendmpd(xmmA, xmmB, m);
+ cc.evex().vblendmpd(ymmA, ymmB, m);
+ cc.evex().vblendmpd(zmmA, zmmB, m);
+ cc.evex().vblendmps(xmmA, xmmB, m);
+ cc.evex().vblendmps(ymmA, ymmB, m);
+ cc.evex().vblendmps(zmmA, zmmB, m);
+ cc.evex().vbroadcastf32x2(ymmA, m);
+ cc.evex().vbroadcastf32x2(zmmA, m);
+ cc.evex().vbroadcastf32x4(ymmA, m);
+ cc.evex().vbroadcastf32x4(zmmA, m);
+ cc.evex().vbroadcastf32x8(zmmA, m);
+ cc.evex().vbroadcastf64x2(ymmA, m);
+ cc.evex().vbroadcastf64x2(zmmA, m);
+ cc.evex().vbroadcastf64x4(zmmA, m);
+ cc.evex().vbroadcasti32x2(xmmA, m);
+ cc.evex().vbroadcasti32x2(ymmA, m);
+ cc.evex().vbroadcasti32x2(zmmA, m);
+ cc.evex().vbroadcasti32x4(ymmA, m);
+ cc.evex().vbroadcasti32x4(zmmA, m);
+ cc.evex().vbroadcasti32x8(zmmA, m);
+ cc.evex().vbroadcasti64x2(ymmA, m);
+ cc.evex().vbroadcasti64x2(zmmA, m);
+ cc.evex().vbroadcasti64x4(zmmA, m);
+ cc.evex().vbroadcastsd(ymmA, m);
+ cc.evex().vbroadcastsd(zmmA, m);
+ cc.evex().vbroadcastss(xmmA, m);
+ cc.evex().vbroadcastss(ymmA, m);
+ cc.evex().vbroadcastss(zmmA, m);
+ cc.evex().vcmppd(kA, xmmB, m, 0);
+ cc.evex().vcmppd(kA, ymmB, m, 0);
+ cc.evex().vcmppd(kA, zmmB, m, 0);
+ cc.evex().vcmpps(kA, xmmB, m, 0);
+ cc.evex().vcmpps(kA, ymmB, m, 0);
+ cc.evex().vcmpps(kA, zmmB, m, 0);
+ cc.evex().vcmpsd(kA, xmmB, m, 0);
+ cc.evex().vcmpss(kA, xmmB, m, 0);
+ cc.evex().vcomisd(xmmA, m);
+ cc.evex().vcomiss(xmmA, m);
+ cc.evex().vcompresspd(m, xmmB);
+ cc.evex().vcompresspd(m, ymmB);
+ cc.evex().vcompresspd(m, zmmB);
+ cc.evex().vcompressps(m, xmmB);
+ cc.evex().vcompressps(m, ymmB);
+ cc.evex().vcompressps(m, zmmB);
+ cc.evex().vcvtdq2pd(xmmA, m);
+ cc.evex().vcvtdq2pd(ymmA, m);
+ cc.evex().vcvtdq2pd(zmmA, m);
+ cc.evex().vcvtdq2ps(xmmA, m);
+ cc.evex().vcvtdq2ps(ymmA, m);
+ cc.evex().vcvtdq2ps(zmmA, m);
+ cc.evex().vcvtpd2dq(xmmA, m128);
+ cc.evex().vcvtpd2dq(xmmA, m256);
+ cc.evex().vcvtpd2dq(ymmA, m512);
+ cc.evex().vcvtpd2qq(xmmA, m);
+ cc.evex().vcvtpd2qq(ymmA, m);
+ cc.evex().vcvtpd2qq(zmmA, m);
+ cc.evex().vcvtpd2udq(xmmA, m128);
+ cc.evex().vcvtpd2udq(xmmA, m256);
+ cc.evex().vcvtpd2udq(ymmA, m512);
+ cc.evex().vcvtpd2uqq(xmmA, m);
+ cc.evex().vcvtpd2uqq(ymmA, m);
+ cc.evex().vcvtpd2uqq(zmmA, m);
+ cc.evex().vcvtph2ps(xmmA, m);
+ cc.evex().vcvtph2ps(ymmA, m);
+ cc.evex().vcvtph2ps(zmmA, m);
+ cc.evex().vcvtps2dq(xmmA, m);
+ cc.evex().vcvtps2dq(ymmA, m);
+ cc.evex().vcvtps2dq(zmmA, m);
+ cc.evex().vcvtps2pd(xmmA, m);
+ cc.evex().vcvtps2pd(ymmA, m);
+ cc.evex().vcvtps2pd(zmmA, m);
+ cc.evex().vcvtps2ph(m, xmmB, 0);
+ cc.evex().vcvtps2ph(m, ymmB, 0);
+ cc.evex().vcvtps2ph(m, zmmB, 0);
+ cc.evex().vcvtps2qq(xmmA, m);
+ cc.evex().vcvtps2qq(ymmA, m);
+ cc.evex().vcvtps2qq(zmmA, m);
+ cc.evex().vcvtps2udq(xmmA, m);
+ cc.evex().vcvtps2udq(ymmA, m);
+ cc.evex().vcvtps2udq(zmmA, m);
+ cc.evex().vcvtps2uqq(xmmA, m);
+ cc.evex().vcvtps2uqq(ymmA, m);
+ cc.evex().vcvtps2uqq(zmmA, m);
+ cc.evex().vcvtqq2pd(xmmA, m);
+ cc.evex().vcvtqq2pd(ymmA, m);
+ cc.evex().vcvtqq2pd(zmmA, m);
+ cc.evex().vcvtqq2ps(xmmA, m128);
+ cc.evex().vcvtqq2ps(xmmA, m256);
+ cc.evex().vcvtqq2ps(ymmA, m512);
+ cc.evex().vcvtsd2si(gpd, m);
+ if (cc.is64Bit()) cc.evex().vcvtsd2si(gpq, m);
+ cc.evex().vcvtsd2ss(xmmA, xmmB, m);
+ cc.evex().vcvtsd2usi(gpd, m);
+ if (cc.is64Bit()) cc.evex().vcvtsd2usi(gpq, m);
+ cc.evex().vcvtsi2sd(xmmA, xmmB, m32);
+ if (cc.is64Bit()) cc.evex().vcvtsi2sd(xmmA, xmmB, m64);
+ cc.evex().vcvtsi2ss(xmmA, xmmB, m32);
+ if (cc.is64Bit()) cc.evex().vcvtsi2ss(xmmA, xmmB, m64);
+ cc.evex().vcvtss2sd(xmmA, xmmB, m);
+ cc.evex().vcvtss2si(gpd, m);
+ if (cc.is64Bit()) cc.evex().vcvtss2si(gpq, m);
+ cc.evex().vcvtss2usi(gpd, m);
+ if (cc.is64Bit()) cc.evex().vcvtss2usi(gpq, m);
+ cc.evex().vcvttpd2dq(xmmA, m128);
+ cc.evex().vcvttpd2dq(xmmA, m256);
+ cc.evex().vcvttpd2dq(ymmA, m512);
+ cc.evex().vcvttpd2qq(xmmA, m);
+ cc.evex().vcvttpd2qq(ymmA, m);
+ cc.evex().vcvttpd2qq(zmmA, m);
+ cc.evex().vcvttpd2udq(xmmA, m128);
+ cc.evex().vcvttpd2udq(xmmA, m256);
+ cc.evex().vcvttpd2udq(ymmA, m512);
+ cc.evex().vcvttpd2uqq(xmmA, m);
+ cc.evex().vcvttpd2uqq(ymmA, m);
+ cc.evex().vcvttpd2uqq(zmmA, m);
+ cc.evex().vcvttps2dq(xmmA, m);
+ cc.evex().vcvttps2dq(ymmA, m);
+ cc.evex().vcvttps2dq(zmmA, m);
+ cc.evex().vcvttps2qq(xmmA, m);
+ cc.evex().vcvttps2qq(ymmA, m);
+ cc.evex().vcvttps2qq(zmmA, m);
+ cc.evex().vcvttps2udq(xmmA, m);
+ cc.evex().vcvttps2udq(ymmA, m);
+ cc.evex().vcvttps2udq(zmmA, m);
+ cc.evex().vcvttps2uqq(xmmA, m);
+ cc.evex().vcvttps2uqq(ymmA, m);
+ cc.evex().vcvttps2uqq(zmmA, m);
+ cc.evex().vcvttsd2si(gpd, m);
+ if (cc.is64Bit()) cc.evex().vcvttsd2si(gpq, m);
+ cc.evex().vcvttsd2usi(gpd, m);
+ if (cc.is64Bit()) cc.evex().vcvttsd2usi(gpq, m);
+ cc.evex().vcvttss2si(gpd, m);
+ if (cc.is64Bit()) cc.evex().vcvttss2si(gpq, m);
+ cc.evex().vcvttss2usi(gpd, m);
+ if (cc.is64Bit()) cc.evex().vcvttss2usi(gpq, m);
+ cc.evex().vcvtudq2pd(xmmA, m);
+ cc.evex().vcvtudq2pd(ymmA, m);
+ cc.evex().vcvtudq2pd(zmmA, m);
+ cc.evex().vcvtudq2ps(xmmA, m);
+ cc.evex().vcvtudq2ps(ymmA, m);
+ cc.evex().vcvtudq2ps(zmmA, m);
+ cc.evex().vcvtuqq2pd(xmmA, m);
+ cc.evex().vcvtuqq2pd(ymmA, m);
+ cc.evex().vcvtuqq2pd(zmmA, m);
+ cc.evex().vcvtuqq2ps(xmmA, m128);
+ cc.evex().vcvtuqq2ps(xmmA, m256);
+ cc.evex().vcvtuqq2ps(ymmA, m512);
+ cc.evex().vcvtusi2sd(xmmA, xmmB, m32);
+ if (cc.is64Bit()) cc.evex().vcvtusi2sd(xmmA, xmmB, m64);
+ cc.evex().vcvtusi2ss(xmmA, xmmB, m32);
+ if (cc.is64Bit()) cc.evex().vcvtusi2ss(xmmA, xmmB, m64);
+ cc.evex().vdbpsadbw(xmmA, xmmB, m, 0);
+ cc.evex().vdbpsadbw(ymmA, ymmB, m, 0);
+ cc.evex().vdbpsadbw(zmmA, zmmB, m, 0);
+ cc.evex().vdivpd(xmmA, xmmB, m);
+ cc.evex().vdivpd(ymmA, ymmB, m);
+ cc.evex().vdivpd(zmmA, zmmB, m);
+ cc.evex().vdivps(xmmA, xmmB, m);
+ cc.evex().vdivps(ymmA, ymmB, m);
+ cc.evex().vdivps(zmmA, zmmB, m);
+ cc.evex().vdivsd(xmmA, xmmB, m);
+ cc.evex().vdivss(xmmA, xmmB, m);
+ cc.evex().vexp2pd(zmmA, m);
+ cc.evex().vexp2ps(zmmA, m);
+ cc.evex().vexpandpd(xmmA, m);
+ cc.evex().vexpandpd(ymmA, m);
+ cc.evex().vexpandpd(zmmA, m);
+ cc.evex().vexpandps(xmmA, m);
+ cc.evex().vexpandps(ymmA, m);
+ cc.evex().vexpandps(zmmA, m);
+ cc.evex().vextractf32x4(m, ymmB, 0);
+ cc.evex().vextractf32x4(m, zmmB, 0);
+ cc.evex().vextractf32x8(m, zmmB, 0);
+ cc.evex().vextractf64x2(m, ymmB, 0);
+ cc.evex().vextractf64x2(m, zmmB, 0);
+ cc.evex().vextractf64x4(m, zmmB, 0);
+ cc.evex().vextracti32x4(m, ymmB, 0);
+ cc.evex().vextracti32x4(m, zmmB, 0);
+ cc.evex().vextracti32x8(m, zmmB, 0);
+ cc.evex().vextracti64x2(m, ymmB, 0);
+ cc.evex().vextracti64x2(m, zmmB, 0);
+ cc.evex().vextracti64x4(m, zmmB, 0);
+ cc.evex().vextractps(m, xmmB, 0);
+ cc.evex().vfixupimmpd(xmmA, xmmB, m, 0);
+ cc.evex().vfixupimmpd(ymmA, ymmB, m, 0);
+ cc.evex().vfixupimmpd(zmmA, zmmB, m, 0);
+ cc.evex().vfixupimmps(xmmA, xmmB, m, 0);
+ cc.evex().vfixupimmps(ymmA, ymmB, m, 0);
+ cc.evex().vfixupimmps(zmmA, zmmB, m, 0);
+ cc.evex().vfixupimmsd(xmmA, xmmB, m, 0);
+ cc.evex().vfixupimmss(xmmA, xmmB, m, 0);
+ cc.evex().vfmadd132pd(xmmA, xmmB, m);
+ cc.evex().vfmadd132pd(ymmA, ymmB, m);
+ cc.evex().vfmadd132pd(zmmA, zmmB, m);
+ cc.evex().vfmadd132ps(xmmA, xmmB, m);
+ cc.evex().vfmadd132ps(ymmA, ymmB, m);
+ cc.evex().vfmadd132ps(zmmA, zmmB, m);
+ cc.evex().vfmadd132sd(xmmA, xmmB, m);
+ cc.evex().vfmadd132ss(xmmA, xmmB, m);
+ cc.evex().vfmadd213pd(xmmA, xmmB, m);
+ cc.evex().vfmadd213pd(ymmA, ymmB, m);
+ cc.evex().vfmadd213pd(zmmA, zmmB, m);
+ cc.evex().vfmadd213ps(xmmA, xmmB, m);
+ cc.evex().vfmadd213ps(ymmA, ymmB, m);
+ cc.evex().vfmadd213ps(zmmA, zmmB, m);
+ cc.evex().vfmadd213sd(xmmA, xmmB, m);
+ cc.evex().vfmadd213ss(xmmA, xmmB, m);
+ cc.evex().vfmadd231pd(xmmA, xmmB, m);
+ cc.evex().vfmadd231pd(ymmA, ymmB, m);
+ cc.evex().vfmadd231pd(zmmA, zmmB, m);
+ cc.evex().vfmadd231ps(xmmA, xmmB, m);
+ cc.evex().vfmadd231ps(ymmA, ymmB, m);
+ cc.evex().vfmadd231ps(zmmA, zmmB, m);
+ cc.evex().vfmadd231sd(xmmA, xmmB, m);
+ cc.evex().vfmadd231ss(xmmA, xmmB, m);
+ cc.evex().vfmaddsub132pd(xmmA, xmmB, m);
+ cc.evex().vfmaddsub132pd(ymmA, ymmB, m);
+ cc.evex().vfmaddsub132pd(zmmA, zmmB, m);
+ cc.evex().vfmaddsub132ps(xmmA, xmmB, m);
+ cc.evex().vfmaddsub132ps(ymmA, ymmB, m);
+ cc.evex().vfmaddsub132ps(zmmA, zmmB, m);
+ cc.evex().vfmaddsub213pd(xmmA, xmmB, m);
+ cc.evex().vfmaddsub213pd(ymmA, ymmB, m);
+ cc.evex().vfmaddsub213pd(zmmA, zmmB, m);
+ cc.evex().vfmaddsub213ps(xmmA, xmmB, m);
+ cc.evex().vfmaddsub213ps(ymmA, ymmB, m);
+ cc.evex().vfmaddsub213ps(zmmA, zmmB, m);
+ cc.evex().vfmaddsub231pd(xmmA, xmmB, m);
+ cc.evex().vfmaddsub231pd(ymmA, ymmB, m);
+ cc.evex().vfmaddsub231pd(zmmA, zmmB, m);
+ cc.evex().vfmaddsub231ps(xmmA, xmmB, m);
+ cc.evex().vfmaddsub231ps(ymmA, ymmB, m);
+ cc.evex().vfmaddsub231ps(zmmA, zmmB, m);
+ cc.evex().vfmsub132pd(xmmA, xmmB, m);
+ cc.evex().vfmsub132pd(ymmA, ymmB, m);
+ cc.evex().vfmsub132pd(zmmA, zmmB, m);
+ cc.evex().vfmsub132ps(xmmA, xmmB, m);
+ cc.evex().vfmsub132ps(ymmA, ymmB, m);
+ cc.evex().vfmsub132ps(zmmA, zmmB, m);
+ cc.evex().vfmsub132sd(xmmA, xmmB, m);
+ cc.evex().vfmsub132ss(xmmA, xmmB, m);
+ cc.evex().vfmsub213pd(xmmA, xmmB, m);
+ cc.evex().vfmsub213pd(ymmA, ymmB, m);
+ cc.evex().vfmsub213pd(zmmA, zmmB, m);
+ cc.evex().vfmsub213ps(xmmA, xmmB, m);
+ cc.evex().vfmsub213ps(ymmA, ymmB, m);
+ cc.evex().vfmsub213ps(zmmA, zmmB, m);
+ cc.evex().vfmsub213sd(xmmA, xmmB, m);
+ cc.evex().vfmsub213ss(xmmA, xmmB, m);
+ cc.evex().vfmsub231pd(xmmA, xmmB, m);
+ cc.evex().vfmsub231pd(ymmA, ymmB, m);
+ cc.evex().vfmsub231pd(zmmA, zmmB, m);
+ cc.evex().vfmsub231ps(xmmA, xmmB, m);
+ cc.evex().vfmsub231ps(ymmA, ymmB, m);
+ cc.evex().vfmsub231ps(zmmA, zmmB, m);
+ cc.evex().vfmsub231sd(xmmA, xmmB, m);
+ cc.evex().vfmsub231ss(xmmA, xmmB, m);
+ cc.evex().vfmsubadd132pd(xmmA, xmmB, m);
+ cc.evex().vfmsubadd132pd(ymmA, ymmB, m);
+ cc.evex().vfmsubadd132pd(zmmA, zmmB, m);
+ cc.evex().vfmsubadd132ps(xmmA, xmmB, m);
+ cc.evex().vfmsubadd132ps(ymmA, ymmB, m);
+ cc.evex().vfmsubadd132ps(zmmA, zmmB, m);
+ cc.evex().vfmsubadd213pd(xmmA, xmmB, m);
+ cc.evex().vfmsubadd213pd(ymmA, ymmB, m);
+ cc.evex().vfmsubadd213pd(zmmA, zmmB, m);
+ cc.evex().vfmsubadd213ps(xmmA, xmmB, m);
+ cc.evex().vfmsubadd213ps(ymmA, ymmB, m);
+ cc.evex().vfmsubadd213ps(zmmA, zmmB, m);
+ cc.evex().vfmsubadd231pd(xmmA, xmmB, m);
+ cc.evex().vfmsubadd231pd(ymmA, ymmB, m);
+ cc.evex().vfmsubadd231pd(zmmA, zmmB, m);
+ cc.evex().vfmsubadd231ps(xmmA, xmmB, m);
+ cc.evex().vfmsubadd231ps(ymmA, ymmB, m);
+ cc.evex().vfmsubadd231ps(zmmA, zmmB, m);
+ cc.evex().vfnmadd132pd(xmmA, xmmB, m);
+ cc.evex().vfnmadd132pd(ymmA, ymmB, m);
+ cc.evex().vfnmadd132pd(zmmA, zmmB, m);
+ cc.evex().vfnmadd132ps(xmmA, xmmB, m);
+ cc.evex().vfnmadd132ps(ymmA, ymmB, m);
+ cc.evex().vfnmadd132ps(zmmA, zmmB, m);
+ cc.evex().vfnmadd132sd(xmmA, xmmB, m);
+ cc.evex().vfnmadd132ss(xmmA, xmmB, m);
+ cc.evex().vfnmadd213pd(xmmA, xmmB, m);
+ cc.evex().vfnmadd213pd(ymmA, ymmB, m);
+ cc.evex().vfnmadd213pd(zmmA, zmmB, m);
+ cc.evex().vfnmadd213ps(xmmA, xmmB, m);
+ cc.evex().vfnmadd213ps(ymmA, ymmB, m);
+ cc.evex().vfnmadd213ps(zmmA, zmmB, m);
+ cc.evex().vfnmadd213sd(xmmA, xmmB, m);
+ cc.evex().vfnmadd213ss(xmmA, xmmB, m);
+ cc.evex().vfnmadd231pd(xmmA, xmmB, m);
+ cc.evex().vfnmadd231pd(ymmA, ymmB, m);
+ cc.evex().vfnmadd231pd(zmmA, zmmB, m);
+ cc.evex().vfnmadd231ps(xmmA, xmmB, m);
+ cc.evex().vfnmadd231ps(ymmA, ymmB, m);
+ cc.evex().vfnmadd231ps(zmmA, zmmB, m);
+ cc.evex().vfnmadd231sd(xmmA, xmmB, m);
+ cc.evex().vfnmadd231ss(xmmA, xmmB, m);
+ cc.evex().vfnmsub132pd(xmmA, xmmB, m);
+ cc.evex().vfnmsub132pd(ymmA, ymmB, m);
+ cc.evex().vfnmsub132pd(zmmA, zmmB, m);
+ cc.evex().vfnmsub132ps(xmmA, xmmB, m);
+ cc.evex().vfnmsub132ps(ymmA, ymmB, m);
+ cc.evex().vfnmsub132ps(zmmA, zmmB, m);
+ cc.evex().vfnmsub132sd(xmmA, xmmB, m);
+ cc.evex().vfnmsub132ss(xmmA, xmmB, m);
+ cc.evex().vfnmsub213pd(xmmA, xmmB, m);
+ cc.evex().vfnmsub213pd(ymmA, ymmB, m);
+ cc.evex().vfnmsub213pd(zmmA, zmmB, m);
+ cc.evex().vfnmsub213ps(xmmA, xmmB, m);
+ cc.evex().vfnmsub213ps(ymmA, ymmB, m);
+ cc.evex().vfnmsub213ps(zmmA, zmmB, m);
+ cc.evex().vfnmsub213sd(xmmA, xmmB, m);
+ cc.evex().vfnmsub213ss(xmmA, xmmB, m);
+ cc.evex().vfnmsub231pd(xmmA, xmmB, m);
+ cc.evex().vfnmsub231pd(ymmA, ymmB, m);
+ cc.evex().vfnmsub231pd(zmmA, zmmB, m);
+ cc.evex().vfnmsub231ps(xmmA, xmmB, m);
+ cc.evex().vfnmsub231ps(ymmA, ymmB, m);
+ cc.evex().vfnmsub231ps(zmmA, zmmB, m);
+ cc.evex().vfnmsub231sd(xmmA, xmmB, m);
+ cc.evex().vfnmsub231ss(xmmA, xmmB, m);
+ cc.evex().vfpclasspd(kA, m128, 0);
+ cc.evex().vfpclasspd(kA, m256, 0);
+ cc.evex().vfpclasspd(kA, m512, 0);
+ cc.evex().vfpclassps(kA, m128, 0);
+ cc.evex().vfpclassps(kA, m256, 0);
+ cc.evex().vfpclassps(kA, m512, 0);
+ cc.evex().vfpclasssd(kA, m, 0);
+ cc.evex().vfpclassss(kA, m, 0);
+ cc.evex().k(kA).vgatherdpd(xmmA, vx_ptr);
+ cc.evex().k(kA).vgatherdpd(ymmA, vx_ptr);
+ cc.evex().k(kA).vgatherdpd(zmmA, vy_ptr);
+ cc.evex().k(kA).vgatherdps(xmmA, vx_ptr);
+ cc.evex().k(kA).vgatherdps(ymmA, vy_ptr);
+ cc.evex().k(kA).vgatherdps(zmmA, vz_ptr);
+ cc.evex().k(kA).vgatherpf0dpd(vy_ptr);
+ cc.evex().k(kA).vgatherpf0dps(vz_ptr);
+ cc.evex().k(kA).vgatherpf0qpd(vz_ptr);
+ cc.evex().k(kA).vgatherpf0qps(vz_ptr);
+ cc.evex().k(kA).vgatherpf1dpd(vy_ptr);
+ cc.evex().k(kA).vgatherpf1dps(vz_ptr);
+ cc.evex().k(kA).vgatherpf1qpd(vz_ptr);
+ cc.evex().k(kA).vgatherpf1qps(vz_ptr);
+ cc.evex().k(kA).vgatherqpd(xmmA, vx_ptr);
+ cc.evex().k(kA).vgatherqpd(ymmA, vy_ptr);
+ cc.evex().k(kA).vgatherqpd(zmmA, vz_ptr);
+ cc.evex().k(kA).vgatherqps(xmmA, vx_ptr);
+ cc.evex().k(kA).vgatherqps(xmmA, vy_ptr);
+ cc.evex().k(kA).vgatherqps(ymmA, vz_ptr);
+ cc.evex().vgetexppd(xmmA, m);
+ cc.evex().vgetexppd(ymmA, m);
+ cc.evex().vgetexppd(zmmA, m);
+ cc.evex().vgetexpps(xmmA, m);
+ cc.evex().vgetexpps(ymmA, m);
+ cc.evex().vgetexpps(zmmA, m);
+ cc.evex().vgetexpsd(xmmA, xmmB, m);
+ cc.evex().vgetexpss(xmmA, xmmB, m);
+ cc.evex().vgetmantpd(xmmA, m, 0);
+ cc.evex().vgetmantpd(ymmA, m, 0);
+ cc.evex().vgetmantpd(zmmA, m, 0);
+ cc.evex().vgetmantps(xmmA, m, 0);
+ cc.evex().vgetmantps(ymmA, m, 0);
+ cc.evex().vgetmantps(zmmA, m, 0);
+ cc.evex().vgetmantsd(xmmA, xmmB, m, 0);
+ cc.evex().vgetmantss(xmmA, xmmB, m, 0);
+ cc.evex().vinsertf32x4(ymmA, ymmB, m, 0);
+ cc.evex().vinsertf32x4(zmmA, zmmB, m, 0);
+ cc.evex().vinsertf32x8(zmmA, zmmB, m, 0);
+ cc.evex().vinsertf64x2(ymmA, ymmB, m, 0);
+ cc.evex().vinsertf64x2(zmmA, zmmB, m, 0);
+ cc.evex().vinsertf64x4(zmmA, zmmB, m, 0);
+ cc.evex().vinserti32x4(ymmA, ymmB, m, 0);
+ cc.evex().vinserti32x4(zmmA, zmmB, m, 0);
+ cc.evex().vinserti32x8(zmmA, zmmB, m, 0);
+ cc.evex().vinserti64x2(ymmA, ymmB, m, 0);
+ cc.evex().vinserti64x2(zmmA, zmmB, m, 0);
+ cc.evex().vinserti64x4(zmmA, zmmB, m, 0);
+ cc.evex().vinsertps(xmmA, xmmB, m, 0);
+ cc.evex().vmaxpd(xmmA, xmmB, m);
+ cc.evex().vmaxpd(ymmA, ymmB, m);
+ cc.evex().vmaxpd(zmmA, zmmB, m);
+ cc.evex().vmaxps(xmmA, xmmB, m);
+ cc.evex().vmaxps(ymmA, ymmB, m);
+ cc.evex().vmaxps(zmmA, zmmB, m);
+ cc.evex().vmaxsd(xmmA, xmmB, m);
+ cc.evex().vmaxss(xmmA, xmmB, m);
+ cc.evex().vminpd(xmmA, xmmB, m);
+ cc.evex().vminpd(ymmA, ymmB, m);
+ cc.evex().vminpd(zmmA, zmmB, m);
+ cc.evex().vminps(xmmA, xmmB, m);
+ cc.evex().vminps(ymmA, ymmB, m);
+ cc.evex().vminps(zmmA, zmmB, m);
+ cc.evex().vminsd(xmmA, xmmB, m);
+ cc.evex().vminss(xmmA, xmmB, m);
+ cc.evex().vmovapd(xmmA, m);
+ cc.evex().vmovapd(m, xmmB);
+ cc.evex().vmovapd(ymmA, m);
+ cc.evex().vmovapd(m, ymmB);
+ cc.evex().vmovapd(zmmA, m);
+ cc.evex().vmovapd(m, zmmB);
+ cc.evex().vmovaps(xmmA, m);
+ cc.evex().vmovaps(m, xmmB);
+ cc.evex().vmovaps(ymmA, m);
+ cc.evex().vmovaps(m, ymmB);
+ cc.evex().vmovaps(zmmA, m);
+ cc.evex().vmovaps(m, zmmB);
+ cc.evex().vmovd(m, xmmB);
+ cc.evex().vmovd(xmmA, m);
+ cc.evex().vmovddup(xmmA, m);
+ cc.evex().vmovddup(ymmA, m);
+ cc.evex().vmovddup(zmmA, m);
+ cc.evex().vmovdqa32(xmmA, m);
+ cc.evex().vmovdqa32(m, xmmB);
+ cc.evex().vmovdqa32(ymmA, m);
+ cc.evex().vmovdqa32(m, ymmB);
+ cc.evex().vmovdqa32(zmmA, m);
+ cc.evex().vmovdqa32(m, zmmB);
+ cc.evex().vmovdqa64(xmmA, m);
+ cc.evex().vmovdqa64(m, xmmB);
+ cc.evex().vmovdqa64(ymmA, m);
+ cc.evex().vmovdqa64(m, ymmB);
+ cc.evex().vmovdqa64(zmmA, m);
+ cc.evex().vmovdqa64(m, zmmB);
+ cc.evex().vmovdqu16(xmmA, m);
+ cc.evex().vmovdqu16(m, xmmB);
+ cc.evex().vmovdqu16(ymmA, m);
+ cc.evex().vmovdqu16(m, ymmB);
+ cc.evex().vmovdqu16(zmmA, m);
+ cc.evex().vmovdqu16(m, zmmB);
+ cc.evex().vmovdqu32(xmmA, m);
+ cc.evex().vmovdqu32(m, xmmB);
+ cc.evex().vmovdqu32(ymmA, m);
+ cc.evex().vmovdqu32(m, ymmB);
+ cc.evex().vmovdqu32(zmmA, m);
+ cc.evex().vmovdqu32(m, zmmB);
+ cc.evex().vmovdqu64(xmmA, m);
+ cc.evex().vmovdqu64(m, xmmB);
+ cc.evex().vmovdqu64(ymmA, m);
+ cc.evex().vmovdqu64(m, ymmB);
+ cc.evex().vmovdqu64(zmmA, m);
+ cc.evex().vmovdqu64(m, zmmB);
+ cc.evex().vmovdqu8(xmmA, m);
+ cc.evex().vmovdqu8(m, xmmB);
+ cc.evex().vmovdqu8(ymmA, m);
+ cc.evex().vmovdqu8(m, ymmB);
+ cc.evex().vmovdqu8(zmmA, m);
+ cc.evex().vmovdqu8(m, zmmB);
+ cc.evex().vmovhpd(m, xmmB);
+ cc.evex().vmovhpd(xmmA, xmmB, m);
+ cc.evex().vmovhps(m, xmmB);
+ cc.evex().vmovhps(xmmA, xmmB, m);
+ cc.evex().vmovlpd(m, xmmB);
+ cc.evex().vmovlpd(xmmA, xmmB, m);
+ cc.evex().vmovlps(m, xmmB);
+ cc.evex().vmovlps(xmmA, xmmB, m);
+ cc.evex().vmovntdq(m, xmmB);
+ cc.evex().vmovntdq(m, ymmB);
+ cc.evex().vmovntdq(m, zmmB);
+ cc.evex().vmovntdqa(xmmA, m);
+ cc.evex().vmovntdqa(ymmA, m);
+ cc.evex().vmovntdqa(zmmA, m);
+ cc.evex().vmovntpd(m, xmmB);
+ cc.evex().vmovntpd(m, ymmB);
+ cc.evex().vmovntpd(m, zmmB);
+ cc.evex().vmovntps(m, xmmB);
+ cc.evex().vmovntps(m, ymmB);
+ cc.evex().vmovntps(m, zmmB);
+ cc.evex().vmovq(m, xmmB);
+ cc.evex().vmovq(xmmA, m);
+ cc.evex().vmovq(xmmA, m);
+ cc.evex().vmovq(m, xmmB);
+ cc.evex().vmovsd(m, xmmB);
+ cc.evex().vmovsd(xmmA, m);
+ cc.evex().vmovshdup(xmmA, m);
+ cc.evex().vmovshdup(ymmA, m);
+ cc.evex().vmovshdup(zmmA, m);
+ cc.evex().vmovsldup(xmmA, m);
+ cc.evex().vmovsldup(ymmA, m);
+ cc.evex().vmovsldup(zmmA, m);
+ cc.evex().vmovss(m, xmmB);
+ cc.evex().vmovss(xmmA, m);
+ cc.evex().vmovupd(xmmA, m);
+ cc.evex().vmovupd(m, xmmB);
+ cc.evex().vmovupd(ymmA, m);
+ cc.evex().vmovupd(m, ymmB);
+ cc.evex().vmovupd(zmmA, m);
+ cc.evex().vmovupd(m, zmmB);
+ cc.evex().vmovups(xmmA, m);
+ cc.evex().vmovups(m, xmmB);
+ cc.evex().vmovups(ymmA, m);
+ cc.evex().vmovups(m, ymmB);
+ cc.evex().vmovups(zmmA, m);
+ cc.evex().vmovups(m, zmmB);
+ cc.evex().vmulpd(xmmA, xmmB, m);
+ cc.evex().vmulpd(ymmA, ymmB, m);
+ cc.evex().vmulpd(zmmA, zmmB, m);
+ cc.evex().vmulps(xmmA, xmmB, m);
+ cc.evex().vmulps(ymmA, ymmB, m);
+ cc.evex().vmulps(zmmA, zmmB, m);
+ cc.evex().vmulsd(xmmA, xmmB, m);
+ cc.evex().vmulss(xmmA, xmmB, m);
+ cc.evex().vorpd(xmmA, xmmB, m);
+ cc.evex().vorpd(ymmA, ymmB, m);
+ cc.evex().vorpd(zmmA, zmmB, m);
+ cc.evex().vorps(xmmA, xmmB, m);
+ cc.evex().vorps(ymmA, ymmB, m);
+ cc.evex().vorps(zmmA, zmmB, m);
+ cc.evex().vpabsb(xmmA, m);
+ cc.evex().vpabsb(ymmA, m);
+ cc.evex().vpabsb(zmmA, m);
+ cc.evex().vpabsd(xmmA, m);
+ cc.evex().vpabsd(ymmA, m);
+ cc.evex().vpabsd(zmmA, m);
+ cc.evex().vpabsq(xmmA, m);
+ cc.evex().vpabsq(ymmA, m);
+ cc.evex().vpabsq(zmmA, m);
+ cc.evex().vpabsw(xmmA, m);
+ cc.evex().vpabsw(ymmA, m);
+ cc.evex().vpabsw(zmmA, m);
+ cc.evex().vpackssdw(xmmA, xmmB, m);
+ cc.evex().vpackssdw(ymmA, ymmB, m);
+ cc.evex().vpackssdw(zmmA, zmmB, m);
+ cc.evex().vpacksswb(xmmA, xmmB, m);
+ cc.evex().vpacksswb(ymmA, ymmB, m);
+ cc.evex().vpacksswb(zmmA, zmmB, m);
+ cc.evex().vpackusdw(xmmA, xmmB, m);
+ cc.evex().vpackusdw(ymmA, ymmB, m);
+ cc.evex().vpackusdw(zmmA, zmmB, m);
+ cc.evex().vpackuswb(xmmA, xmmB, m);
+ cc.evex().vpackuswb(ymmA, ymmB, m);
+ cc.evex().vpackuswb(zmmA, zmmB, m);
+ cc.evex().vpaddb(xmmA, xmmB, m);
+ cc.evex().vpaddb(ymmA, ymmB, m);
+ cc.evex().vpaddb(zmmA, zmmB, m);
+ cc.evex().vpaddd(xmmA, xmmB, m);
+ cc.evex().vpaddd(ymmA, ymmB, m);
+ cc.evex().vpaddd(zmmA, zmmB, m);
+ cc.evex().vpaddq(xmmA, xmmB, m);
+ cc.evex().vpaddq(ymmA, ymmB, m);
+ cc.evex().vpaddq(zmmA, zmmB, m);
+ cc.evex().vpaddsb(xmmA, xmmB, m);
+ cc.evex().vpaddsb(ymmA, ymmB, m);
+ cc.evex().vpaddsb(zmmA, zmmB, m);
+ cc.evex().vpaddsw(xmmA, xmmB, m);
+ cc.evex().vpaddsw(ymmA, ymmB, m);
+ cc.evex().vpaddsw(zmmA, zmmB, m);
+ cc.evex().vpaddusb(xmmA, xmmB, m);
+ cc.evex().vpaddusb(ymmA, ymmB, m);
+ cc.evex().vpaddusb(zmmA, zmmB, m);
+ cc.evex().vpaddusw(xmmA, xmmB, m);
+ cc.evex().vpaddusw(ymmA, ymmB, m);
+ cc.evex().vpaddusw(zmmA, zmmB, m);
+ cc.evex().vpaddw(xmmA, xmmB, m);
+ cc.evex().vpaddw(ymmA, ymmB, m);
+ cc.evex().vpaddw(zmmA, zmmB, m);
+ cc.evex().vpalignr(xmmA, xmmB, m, 0);
+ cc.evex().vpalignr(ymmA, ymmB, m, 0);
+ cc.evex().vpalignr(zmmA, zmmB, m, 0);
+ cc.evex().vpandd(xmmA, xmmB, m);
+ cc.evex().vpandd(ymmA, ymmB, m);
+ cc.evex().vpandd(zmmA, zmmB, m);
+ cc.evex().vpandnd(xmmA, xmmB, m);
+ cc.evex().vpandnd(ymmA, ymmB, m);
+ cc.evex().vpandnd(zmmA, zmmB, m);
+ cc.evex().vpandnq(xmmA, xmmB, m);
+ cc.evex().vpandnq(ymmA, ymmB, m);
+ cc.evex().vpandnq(zmmA, zmmB, m);
+ cc.evex().vpandq(xmmA, xmmB, m);
+ cc.evex().vpandq(ymmA, ymmB, m);
+ cc.evex().vpandq(zmmA, zmmB, m);
+ cc.evex().vpavgb(xmmA, xmmB, m);
+ cc.evex().vpavgb(ymmA, ymmB, m);
+ cc.evex().vpavgb(zmmA, zmmB, m);
+ cc.evex().vpavgw(xmmA, xmmB, m);
+ cc.evex().vpavgw(ymmA, ymmB, m);
+ cc.evex().vpavgw(zmmA, zmmB, m);
+ cc.evex().vpblendmb(xmmA, xmmB, m);
+ cc.evex().vpblendmb(ymmA, ymmB, m);
+ cc.evex().vpblendmb(zmmA, zmmB, m);
+ cc.evex().vpblendmd(xmmA, xmmB, m);
+ cc.evex().vpblendmd(ymmA, ymmB, m);
+ cc.evex().vpblendmd(zmmA, zmmB, m);
+ cc.evex().vpblendmq(xmmA, xmmB, m);
+ cc.evex().vpblendmq(ymmA, ymmB, m);
+ cc.evex().vpblendmq(zmmA, zmmB, m);
+ cc.evex().vpblendmw(xmmA, xmmB, m);
+ cc.evex().vpblendmw(ymmA, ymmB, m);
+ cc.evex().vpblendmw(zmmA, zmmB, m);
+ cc.evex().vpbroadcastb(xmmA, m);
+ cc.evex().vpbroadcastb(ymmA, m);
+ cc.evex().vpbroadcastb(zmmA, m);
+ cc.evex().vpbroadcastd(xmmA, m);
+ cc.evex().vpbroadcastd(ymmA, m);
+ cc.evex().vpbroadcastd(zmmA, m);
+ cc.evex().vpbroadcastq(xmmA, m);
+ cc.evex().vpbroadcastq(ymmA, m);
+ cc.evex().vpbroadcastq(zmmA, m);
+ cc.evex().vpbroadcastw(xmmA, m);
+ cc.evex().vpbroadcastw(ymmA, m);
+ cc.evex().vpbroadcastw(zmmA, m);
+ cc.evex().vpcmpb(kA, xmmB, m, 0);
+ cc.evex().vpcmpb(kA, ymmB, m, 0);
+ cc.evex().vpcmpb(kA, zmmB, m, 0);
+ cc.evex().vpcmpd(kA, xmmB, m, 0);
+ cc.evex().vpcmpd(kA, ymmB, m, 0);
+ cc.evex().vpcmpd(kA, zmmB, m, 0);
+ cc.evex().vpcmpeqb(kA, xmmB, m);
+ cc.evex().vpcmpeqb(kA, ymmB, m);
+ cc.evex().vpcmpeqb(kA, zmmB, m);
+ cc.evex().vpcmpeqd(kA, xmmB, m);
+ cc.evex().vpcmpeqd(kA, ymmB, m);
+ cc.evex().vpcmpeqd(kA, zmmB, m);
+ cc.evex().vpcmpeqq(kA, xmmB, m);
+ cc.evex().vpcmpeqq(kA, ymmB, m);
+ cc.evex().vpcmpeqq(kA, zmmB, m);
+ cc.evex().vpcmpeqw(kA, xmmB, m);
+ cc.evex().vpcmpeqw(kA, ymmB, m);
+ cc.evex().vpcmpeqw(kA, zmmB, m);
+ cc.evex().vpcmpgtb(kA, xmmB, m);
+ cc.evex().vpcmpgtb(kA, ymmB, m);
+ cc.evex().vpcmpgtb(kA, zmmB, m);
+ cc.evex().vpcmpgtd(kA, xmmB, m);
+ cc.evex().vpcmpgtd(kA, ymmB, m);
+ cc.evex().vpcmpgtd(kA, zmmB, m);
+ cc.evex().vpcmpgtq(kA, xmmB, m);
+ cc.evex().vpcmpgtq(kA, ymmB, m);
+ cc.evex().vpcmpgtq(kA, zmmB, m);
+ cc.evex().vpcmpgtw(kA, xmmB, m);
+ cc.evex().vpcmpgtw(kA, ymmB, m);
+ cc.evex().vpcmpgtw(kA, zmmB, m);
+ cc.evex().vpcmpq(kA, xmmB, m, 0);
+ cc.evex().vpcmpq(kA, ymmB, m, 0);
+ cc.evex().vpcmpq(kA, zmmB, m, 0);
+ cc.evex().vpcmpub(kA, xmmB, m, 0);
+ cc.evex().vpcmpub(kA, ymmB, m, 0);
+ cc.evex().vpcmpub(kA, zmmB, m, 0);
+ cc.evex().vpcmpud(kA, xmmB, m, 0);
+ cc.evex().vpcmpud(kA, ymmB, m, 0);
+ cc.evex().vpcmpud(kA, zmmB, m, 0);
+ cc.evex().vpcmpuq(kA, xmmB, m, 0);
+ cc.evex().vpcmpuq(kA, ymmB, m, 0);
+ cc.evex().vpcmpuq(kA, zmmB, m, 0);
+ cc.evex().vpcmpuw(kA, xmmB, m, 0);
+ cc.evex().vpcmpuw(kA, ymmB, m, 0);
+ cc.evex().vpcmpuw(kA, zmmB, m, 0);
+ cc.evex().vpcmpw(kA, xmmB, m, 0);
+ cc.evex().vpcmpw(kA, ymmB, m, 0);
+ cc.evex().vpcmpw(kA, zmmB, m, 0);
+ cc.evex().vpcompressd(m, xmmB);
+ cc.evex().vpcompressd(m, ymmB);
+ cc.evex().vpcompressd(m, zmmB);
+ cc.evex().vpcompressq(m, xmmB);
+ cc.evex().vpcompressq(m, ymmB);
+ cc.evex().vpcompressq(m, zmmB);
+ cc.evex().vpconflictd(xmmA, m);
+ cc.evex().vpconflictd(ymmA, m);
+ cc.evex().vpconflictd(zmmA, m);
+ cc.evex().vpconflictq(xmmA, m);
+ cc.evex().vpconflictq(ymmA, m);
+ cc.evex().vpconflictq(zmmA, m);
+ cc.evex().vpermb(xmmA, xmmB, m);
+ cc.evex().vpermb(ymmA, ymmB, m);
+ cc.evex().vpermb(zmmA, zmmB, m);
+ cc.evex().vpermd(ymmA, ymmB, m);
+ cc.evex().vpermd(zmmA, zmmB, m);
+ cc.evex().vpermi2b(xmmA, xmmB, m);
+ cc.evex().vpermi2b(ymmA, ymmB, m);
+ cc.evex().vpermi2b(zmmA, zmmB, m);
+ cc.evex().vpermi2d(xmmA, xmmB, m);
+ cc.evex().vpermi2d(ymmA, ymmB, m);
+ cc.evex().vpermi2d(zmmA, zmmB, m);
+ cc.evex().vpermi2pd(xmmA, xmmB, m);
+ cc.evex().vpermi2pd(ymmA, ymmB, m);
+ cc.evex().vpermi2pd(zmmA, zmmB, m);
+ cc.evex().vpermi2ps(xmmA, xmmB, m);
+ cc.evex().vpermi2ps(ymmA, ymmB, m);
+ cc.evex().vpermi2ps(zmmA, zmmB, m);
+ cc.evex().vpermi2q(xmmA, xmmB, m);
+ cc.evex().vpermi2q(ymmA, ymmB, m);
+ cc.evex().vpermi2q(zmmA, zmmB, m);
+ cc.evex().vpermi2w(xmmA, xmmB, m);
+ cc.evex().vpermi2w(ymmA, ymmB, m);
+ cc.evex().vpermi2w(zmmA, zmmB, m);
+ cc.evex().vpermilpd(xmmA, xmmB, m);
+ cc.evex().vpermilpd(ymmA, ymmB, m);
+ cc.evex().vpermilpd(zmmA, zmmB, m);
+ cc.evex().vpermilpd(xmmA, m, 0);
+ cc.evex().vpermilpd(ymmA, m, 0);
+ cc.evex().vpermilpd(zmmA, m, 0);
+ cc.evex().vpermilps(xmmA, xmmB, m);
+ cc.evex().vpermilps(ymmA, ymmB, m);
+ cc.evex().vpermilps(zmmA, zmmB, m);
+ cc.evex().vpermilps(xmmA, m, 0);
+ cc.evex().vpermilps(ymmA, m, 0);
+ cc.evex().vpermilps(zmmA, m, 0);
+ cc.evex().vpermq(ymmA, ymmB, m);
+ cc.evex().vpermq(zmmA, zmmB, m);
+ cc.evex().vpermq(ymmA, m, 0);
+ cc.evex().vpermq(zmmA, m, 0);
+ cc.evex().vpermt2b(xmmA, xmmB, m);
+ cc.evex().vpermt2b(ymmA, ymmB, m);
+ cc.evex().vpermt2b(zmmA, zmmB, m);
+ cc.evex().vpermt2d(xmmA, xmmB, m);
+ cc.evex().vpermt2d(ymmA, ymmB, m);
+ cc.evex().vpermt2d(zmmA, zmmB, m);
+ cc.evex().vpermt2pd(xmmA, xmmB, m);
+ cc.evex().vpermt2pd(ymmA, ymmB, m);
+ cc.evex().vpermt2pd(zmmA, zmmB, m);
+ cc.evex().vpermt2ps(xmmA, xmmB, m);
+ cc.evex().vpermt2ps(ymmA, ymmB, m);
+ cc.evex().vpermt2ps(zmmA, zmmB, m);
+ cc.evex().vpermt2q(xmmA, xmmB, m);
+ cc.evex().vpermt2q(ymmA, ymmB, m);
+ cc.evex().vpermt2q(zmmA, zmmB, m);
+ cc.evex().vpermt2w(xmmA, xmmB, m);
+ cc.evex().vpermt2w(ymmA, ymmB, m);
+ cc.evex().vpermt2w(zmmA, zmmB, m);
+ cc.evex().vpermw(xmmA, xmmB, m);
+ cc.evex().vpermw(ymmA, ymmB, m);
+ cc.evex().vpermw(zmmA, zmmB, m);
+ cc.evex().vpexpandd(xmmA, m);
+ cc.evex().vpexpandd(ymmA, m);
+ cc.evex().vpexpandd(zmmA, m);
+ cc.evex().vpexpandq(xmmA, m);
+ cc.evex().vpexpandq(ymmA, m);
+ cc.evex().vpexpandq(zmmA, m);
+ cc.evex().vpextrb(m, xmmB, 0);
+ cc.evex().vpextrd(m, xmmB, 0);
+ if (cc.is64Bit()) cc.evex().vpextrq(m, xmmB, 0);
+ cc.evex().vpextrw(m, xmmB, 0);
+ cc.evex().k(kA).vpgatherdd(xmmA, vx_ptr);
+ cc.evex().k(kA).vpgatherdd(ymmA, vy_ptr);
+ cc.evex().k(kA).vpgatherdd(zmmA, vz_ptr);
+ cc.evex().k(kA).vpgatherdq(xmmA, vx_ptr);
+ cc.evex().k(kA).vpgatherdq(ymmA, vx_ptr);
+ cc.evex().k(kA).vpgatherdq(zmmA, vy_ptr);
+ cc.evex().k(kA).vpgatherqd(xmmA, vx_ptr);
+ cc.evex().k(kA).vpgatherqd(xmmA, vy_ptr);
+ cc.evex().k(kA).vpgatherqd(ymmA, vz_ptr);
+ cc.evex().k(kA).vpgatherqq(xmmA, vx_ptr);
+ cc.evex().k(kA).vpgatherqq(ymmA, vy_ptr);
+ cc.evex().k(kA).vpgatherqq(zmmA, vz_ptr);
+ cc.evex().vpinsrb(xmmA, xmmB, m, 0);
+ cc.evex().vpinsrd(xmmA, xmmB, m, 0);
+ if (cc.is64Bit()) cc.evex().vpinsrq(xmmA, xmmB, m, 0);
+ cc.evex().vpinsrw(xmmA, xmmB, m, 0);
+ cc.evex().vplzcntd(xmmA, m);
+ cc.evex().vplzcntd(ymmA, m);
+ cc.evex().vplzcntd(zmmA, m);
+ cc.evex().vplzcntq(xmmA, m);
+ cc.evex().vplzcntq(ymmA, m);
+ cc.evex().vplzcntq(zmmA, m);
+ cc.evex().vpmadd52huq(xmmA, xmmB, m);
+ cc.evex().vpmadd52huq(ymmA, ymmB, m);
+ cc.evex().vpmadd52huq(zmmA, zmmB, m);
+ cc.evex().vpmadd52luq(xmmA, xmmB, m);
+ cc.evex().vpmadd52luq(ymmA, ymmB, m);
+ cc.evex().vpmadd52luq(zmmA, zmmB, m);
+ cc.evex().vpmaddubsw(xmmA, xmmB, m);
+ cc.evex().vpmaddubsw(ymmA, ymmB, m);
+ cc.evex().vpmaddubsw(zmmA, zmmB, m);
+ cc.evex().vpmaddwd(xmmA, xmmB, m);
+ cc.evex().vpmaddwd(ymmA, ymmB, m);
+ cc.evex().vpmaddwd(zmmA, zmmB, m);
+ cc.evex().vpmaxsb(xmmA, xmmB, m);
+ cc.evex().vpmaxsb(ymmA, ymmB, m);
+ cc.evex().vpmaxsb(zmmA, zmmB, m);
+ cc.evex().vpmaxsd(xmmA, xmmB, m);
+ cc.evex().vpmaxsd(ymmA, ymmB, m);
+ cc.evex().vpmaxsd(zmmA, zmmB, m);
+ cc.evex().vpmaxsq(xmmA, xmmB, m);
+ cc.evex().vpmaxsq(ymmA, ymmB, m);
+ cc.evex().vpmaxsq(zmmA, zmmB, m);
+ cc.evex().vpmaxsw(xmmA, xmmB, m);
+ cc.evex().vpmaxsw(ymmA, ymmB, m);
+ cc.evex().vpmaxsw(zmmA, zmmB, m);
+ cc.evex().vpmaxub(xmmA, xmmB, m);
+ cc.evex().vpmaxub(ymmA, ymmB, m);
+ cc.evex().vpmaxub(zmmA, zmmB, m);
+ cc.evex().vpmaxud(xmmA, xmmB, m);
+ cc.evex().vpmaxud(ymmA, ymmB, m);
+ cc.evex().vpmaxud(zmmA, zmmB, m);
+ cc.evex().vpmaxuq(xmmA, xmmB, m);
+ cc.evex().vpmaxuq(ymmA, ymmB, m);
+ cc.evex().vpmaxuq(zmmA, zmmB, m);
+ cc.evex().vpmaxuw(xmmA, xmmB, m);
+ cc.evex().vpmaxuw(ymmA, ymmB, m);
+ cc.evex().vpmaxuw(zmmA, zmmB, m);
+ cc.evex().vpminsb(xmmA, xmmB, m);
+ cc.evex().vpminsb(ymmA, ymmB, m);
+ cc.evex().vpminsb(zmmA, zmmB, m);
+ cc.evex().vpminsd(xmmA, xmmB, m);
+ cc.evex().vpminsd(ymmA, ymmB, m);
+ cc.evex().vpminsd(zmmA, zmmB, m);
+ cc.evex().vpminsq(xmmA, xmmB, m);
+ cc.evex().vpminsq(ymmA, ymmB, m);
+ cc.evex().vpminsq(zmmA, zmmB, m);
+ cc.evex().vpminsw(xmmA, xmmB, m);
+ cc.evex().vpminsw(ymmA, ymmB, m);
+ cc.evex().vpminsw(zmmA, zmmB, m);
+ cc.evex().vpminub(xmmA, xmmB, m);
+ cc.evex().vpminub(ymmA, ymmB, m);
+ cc.evex().vpminub(zmmA, zmmB, m);
+ cc.evex().vpminud(xmmA, xmmB, m);
+ cc.evex().vpminud(ymmA, ymmB, m);
+ cc.evex().vpminud(zmmA, zmmB, m);
+ cc.evex().vpminuq(xmmA, xmmB, m);
+ cc.evex().vpminuq(ymmA, ymmB, m);
+ cc.evex().vpminuq(zmmA, zmmB, m);
+ cc.evex().vpminuw(xmmA, xmmB, m);
+ cc.evex().vpminuw(ymmA, ymmB, m);
+ cc.evex().vpminuw(zmmA, zmmB, m);
+ cc.evex().vpmovdb(m, xmmB);
+ cc.evex().vpmovdb(m, ymmB);
+ cc.evex().vpmovdb(m, zmmB);
+ cc.evex().vpmovdw(m, xmmB);
+ cc.evex().vpmovdw(m, ymmB);
+ cc.evex().vpmovdw(m, zmmB);
+ cc.evex().vpmovqb(m, xmmB);
+ cc.evex().vpmovqb(m, ymmB);
+ cc.evex().vpmovqb(m, zmmB);
+ cc.evex().vpmovqd(m, xmmB);
+ cc.evex().vpmovqd(m, ymmB);
+ cc.evex().vpmovqd(m, zmmB);
+ cc.evex().vpmovqw(m, xmmB);
+ cc.evex().vpmovqw(m, ymmB);
+ cc.evex().vpmovqw(m, zmmB);
+ cc.evex().vpmovsdb(m, xmmB);
+ cc.evex().vpmovsdb(m, ymmB);
+ cc.evex().vpmovsdb(m, zmmB);
+ cc.evex().vpmovsdw(m, xmmB);
+ cc.evex().vpmovsdw(m, ymmB);
+ cc.evex().vpmovsdw(m, zmmB);
+ cc.evex().vpmovsqb(m, xmmB);
+ cc.evex().vpmovsqb(m, ymmB);
+ cc.evex().vpmovsqb(m, zmmB);
+ cc.evex().vpmovsqd(m, xmmB);
+ cc.evex().vpmovsqd(m, ymmB);
+ cc.evex().vpmovsqd(m, zmmB);
+ cc.evex().vpmovsqw(m, xmmB);
+ cc.evex().vpmovsqw(m, ymmB);
+ cc.evex().vpmovsqw(m, zmmB);
+ cc.evex().vpmovswb(m, xmmB);
+ cc.evex().vpmovswb(m, ymmB);
+ cc.evex().vpmovswb(m, zmmB);
+ cc.evex().vpmovsxbd(xmmA, m);
+ cc.evex().vpmovsxbd(ymmA, m);
+ cc.evex().vpmovsxbd(zmmA, m);
+ cc.evex().vpmovsxbq(xmmA, m);
+ cc.evex().vpmovsxbq(ymmA, m);
+ cc.evex().vpmovsxbq(zmmA, m);
+ cc.evex().vpmovsxbw(xmmA, m);
+ cc.evex().vpmovsxbw(ymmA, m);
+ cc.evex().vpmovsxbw(zmmA, m);
+ cc.evex().vpmovsxdq(xmmA, m);
+ cc.evex().vpmovsxdq(ymmA, m);
+ cc.evex().vpmovsxdq(zmmA, m);
+ cc.evex().vpmovsxwd(xmmA, m);
+ cc.evex().vpmovsxwd(ymmA, m);
+ cc.evex().vpmovsxwd(zmmA, m);
+ cc.evex().vpmovsxwq(xmmA, m);
+ cc.evex().vpmovsxwq(ymmA, m);
+ cc.evex().vpmovsxwq(zmmA, m);
+ cc.evex().vpmovusdb(m, xmmB);
+ cc.evex().vpmovusdb(m, ymmB);
+ cc.evex().vpmovusdb(m, zmmB);
+ cc.evex().vpmovusdw(m, xmmB);
+ cc.evex().vpmovusdw(m, ymmB);
+ cc.evex().vpmovusdw(m, zmmB);
+ cc.evex().vpmovusqb(m, xmmB);
+ cc.evex().vpmovusqb(m, ymmB);
+ cc.evex().vpmovusqb(m, zmmB);
+ cc.evex().vpmovusqd(m, xmmB);
+ cc.evex().vpmovusqd(m, ymmB);
+ cc.evex().vpmovusqd(m, zmmB);
+ cc.evex().vpmovusqw(m, xmmB);
+ cc.evex().vpmovusqw(m, ymmB);
+ cc.evex().vpmovusqw(m, zmmB);
+ cc.evex().vpmovuswb(m, xmmB);
+ cc.evex().vpmovuswb(m, ymmB);
+ cc.evex().vpmovuswb(m, zmmB);
+ cc.evex().vpmovwb(m, xmmB);
+ cc.evex().vpmovwb(m, ymmB);
+ cc.evex().vpmovwb(m, zmmB);
+ cc.evex().vpmovzxbd(xmmA, m);
+ cc.evex().vpmovzxbd(ymmA, m);
+ cc.evex().vpmovzxbd(zmmA, m);
+ cc.evex().vpmovzxbq(xmmA, m);
+ cc.evex().vpmovzxbq(ymmA, m);
+ cc.evex().vpmovzxbq(zmmA, m);
+ cc.evex().vpmovzxbw(xmmA, m);
+ cc.evex().vpmovzxbw(ymmA, m);
+ cc.evex().vpmovzxbw(zmmA, m);
+ cc.evex().vpmovzxdq(xmmA, m);
+ cc.evex().vpmovzxdq(ymmA, m);
+ cc.evex().vpmovzxdq(zmmA, m);
+ cc.evex().vpmovzxwd(xmmA, m);
+ cc.evex().vpmovzxwd(ymmA, m);
+ cc.evex().vpmovzxwd(zmmA, m);
+ cc.evex().vpmovzxwq(xmmA, m);
+ cc.evex().vpmovzxwq(ymmA, m);
+ cc.evex().vpmovzxwq(zmmA, m);
+ cc.evex().vpmuldq(xmmA, xmmB, m);
+ cc.evex().vpmuldq(ymmA, ymmB, m);
+ cc.evex().vpmuldq(zmmA, zmmB, m);
+ cc.evex().vpmulhrsw(xmmA, xmmB, m);
+ cc.evex().vpmulhrsw(ymmA, ymmB, m);
+ cc.evex().vpmulhrsw(zmmA, zmmB, m);
+ cc.evex().vpmulhuw(xmmA, xmmB, m);
+ cc.evex().vpmulhuw(ymmA, ymmB, m);
+ cc.evex().vpmulhuw(zmmA, zmmB, m);
+ cc.evex().vpmulhw(xmmA, xmmB, m);
+ cc.evex().vpmulhw(ymmA, ymmB, m);
+ cc.evex().vpmulhw(zmmA, zmmB, m);
+ cc.evex().vpmulld(xmmA, xmmB, m);
+ cc.evex().vpmulld(ymmA, ymmB, m);
+ cc.evex().vpmulld(zmmA, zmmB, m);
+ cc.evex().vpmullq(xmmA, xmmB, m);
+ cc.evex().vpmullq(ymmA, ymmB, m);
+ cc.evex().vpmullq(zmmA, zmmB, m);
+ cc.evex().vpmullw(xmmA, xmmB, m);
+ cc.evex().vpmullw(ymmA, ymmB, m);
+ cc.evex().vpmullw(zmmA, zmmB, m);
+ cc.evex().vpmultishiftqb(xmmA, xmmB, m);
+ cc.evex().vpmultishiftqb(ymmA, ymmB, m);
+ cc.evex().vpmultishiftqb(zmmA, zmmB, m);
+ cc.evex().vpmuludq(xmmA, xmmB, m);
+ cc.evex().vpmuludq(ymmA, ymmB, m);
+ cc.evex().vpmuludq(zmmA, zmmB, m);
+ cc.evex().vpopcntd(zmmA, m);
+ cc.evex().vpopcntq(zmmA, m);
+ cc.evex().vpord(xmmA, xmmB, m);
+ cc.evex().vpord(ymmA, ymmB, m);
+ cc.evex().vpord(zmmA, zmmB, m);
+ cc.evex().vporq(xmmA, xmmB, m);
+ cc.evex().vporq(ymmA, ymmB, m);
+ cc.evex().vporq(zmmA, zmmB, m);
+ cc.evex().vprold(xmmA, m, 0);
+ cc.evex().vprold(ymmA, m, 0);
+ cc.evex().vprold(zmmA, m, 0);
+ cc.evex().vprolq(xmmA, m, 0);
+ cc.evex().vprolq(ymmA, m, 0);
+ cc.evex().vprolq(zmmA, m, 0);
+ cc.evex().vprolvd(xmmA, xmmB, m);
+ cc.evex().vprolvd(ymmA, ymmB, m);
+ cc.evex().vprolvd(zmmA, zmmB, m);
+ cc.evex().vprolvq(xmmA, xmmB, m);
+ cc.evex().vprolvq(ymmA, ymmB, m);
+ cc.evex().vprolvq(zmmA, zmmB, m);
+ cc.evex().vprord(xmmA, m, 0);
+ cc.evex().vprord(ymmA, m, 0);
+ cc.evex().vprord(zmmA, m, 0);
+ cc.evex().vprorq(xmmA, m, 0);
+ cc.evex().vprorq(ymmA, m, 0);
+ cc.evex().vprorq(zmmA, m, 0);
+ cc.evex().vprorvd(xmmA, xmmB, m);
+ cc.evex().vprorvd(ymmA, ymmB, m);
+ cc.evex().vprorvd(zmmA, zmmB, m);
+ cc.evex().vprorvq(xmmA, xmmB, m);
+ cc.evex().vprorvq(ymmA, ymmB, m);
+ cc.evex().vprorvq(zmmA, zmmB, m);
+ cc.evex().vpsadbw(xmmA, xmmB, m);
+ cc.evex().vpsadbw(ymmA, ymmB, m);
+ cc.evex().vpsadbw(zmmA, zmmB, m);
+ cc.evex().k(kA).vpscatterdd(vx_ptr, xmmB);
+ cc.evex().k(kA).vpscatterdd(vy_ptr, ymmB);
+ cc.evex().k(kA).vpscatterdd(vz_ptr, zmmB);
+ cc.evex().k(kA).vpscatterdq(vx_ptr, xmmB);
+ cc.evex().k(kA).vpscatterdq(vx_ptr, ymmB);
+ cc.evex().k(kA).vpscatterdq(vy_ptr, zmmB);
+ cc.evex().k(kA).vpscatterqd(vx_ptr, xmmB);
+ cc.evex().k(kA).vpscatterqd(vy_ptr, xmmB);
+ cc.evex().k(kA).vpscatterqd(vz_ptr, ymmB);
+ cc.evex().k(kA).vpscatterqq(vx_ptr, xmmB);
+ cc.evex().k(kA).vpscatterqq(vy_ptr, ymmB);
+ cc.evex().k(kA).vpscatterqq(vz_ptr, zmmB);
+ cc.evex().vpshufb(xmmA, xmmB, m);
+ cc.evex().vpshufb(ymmA, ymmB, m);
+ cc.evex().vpshufb(zmmA, zmmB, m);
+ cc.evex().vpshufd(xmmA, m, 0);
+ cc.evex().vpshufd(ymmA, m, 0);
+ cc.evex().vpshufd(zmmA, m, 0);
+ cc.evex().vpshufhw(xmmA, m, 0);
+ cc.evex().vpshufhw(ymmA, m, 0);
+ cc.evex().vpshufhw(zmmA, m, 0);
+ cc.evex().vpshuflw(xmmA, m, 0);
+ cc.evex().vpshuflw(ymmA, m, 0);
+ cc.evex().vpshuflw(zmmA, m, 0);
+ cc.evex().vpslld(xmmA, xmmB, m);
+ cc.evex().vpslld(xmmA, m, 0);
+ cc.evex().vpslld(ymmA, ymmB, m);
+ cc.evex().vpslld(ymmA, m, 0);
+ cc.evex().vpslld(zmmA, zmmB, m);
+ cc.evex().vpslld(zmmA, m, 0);
+ cc.evex().vpslldq(xmmA, m, 0);
+ cc.evex().vpslldq(ymmA, m, 0);
+ cc.evex().vpslldq(zmmA, m, 0);
+ cc.evex().vpsllq(xmmA, xmmB, m);
+ cc.evex().vpsllq(xmmA, m, 0);
+ cc.evex().vpsllq(ymmA, ymmB, m);
+ cc.evex().vpsllq(ymmA, m, 0);
+ cc.evex().vpsllq(zmmA, zmmB, m);
+ cc.evex().vpsllq(zmmA, m, 0);
+ cc.evex().vpsllvd(xmmA, xmmB, m);
+ cc.evex().vpsllvd(ymmA, ymmB, m);
+ cc.evex().vpsllvd(zmmA, zmmB, m);
+ cc.evex().vpsllvq(xmmA, xmmB, m);
+ cc.evex().vpsllvq(ymmA, ymmB, m);
+ cc.evex().vpsllvq(zmmA, zmmB, m);
+ cc.evex().vpsllvw(xmmA, xmmB, m);
+ cc.evex().vpsllvw(ymmA, ymmB, m);
+ cc.evex().vpsllvw(zmmA, zmmB, m);
+ cc.evex().vpsllw(xmmA, xmmB, m);
+ cc.evex().vpsllw(xmmA, m, 0);
+ cc.evex().vpsllw(ymmA, ymmB, m);
+ cc.evex().vpsllw(ymmA, m, 0);
+ cc.evex().vpsllw(zmmA, zmmB, m);
+ cc.evex().vpsllw(zmmA, m, 0);
+ cc.evex().vpsrad(xmmA, xmmB, m);
+ cc.evex().vpsrad(xmmA, m, 0);
+ cc.evex().vpsrad(ymmA, ymmB, m);
+ cc.evex().vpsrad(ymmA, m, 0);
+ cc.evex().vpsrad(zmmA, zmmB, m);
+ cc.evex().vpsrad(zmmA, m, 0);
+ cc.evex().vpsraq(xmmA, xmmB, m);
+ cc.evex().vpsraq(xmmA, m, 0);
+ cc.evex().vpsraq(ymmA, ymmB, m);
+ cc.evex().vpsraq(ymmA, m, 0);
+ cc.evex().vpsraq(zmmA, zmmB, m);
+ cc.evex().vpsraq(zmmA, m, 0);
+ cc.evex().vpsravd(xmmA, xmmB, m);
+ cc.evex().vpsravd(ymmA, ymmB, m);
+ cc.evex().vpsravd(zmmA, zmmB, m);
+ cc.evex().vpsravq(xmmA, xmmB, m);
+ cc.evex().vpsravq(ymmA, ymmB, m);
+ cc.evex().vpsravq(zmmA, zmmB, m);
+ cc.evex().vpsravw(xmmA, xmmB, m);
+ cc.evex().vpsravw(ymmA, ymmB, m);
+ cc.evex().vpsravw(zmmA, zmmB, m);
+ cc.evex().vpsraw(xmmA, xmmB, m);
+ cc.evex().vpsraw(xmmA, m, 0);
+ cc.evex().vpsraw(ymmA, ymmB, m);
+ cc.evex().vpsraw(ymmA, m, 0);
+ cc.evex().vpsraw(zmmA, zmmB, m);
+ cc.evex().vpsraw(zmmA, m, 0);
+ cc.evex().vpsrld(xmmA, xmmB, m);
+ cc.evex().vpsrld(xmmA, m, 0);
+ cc.evex().vpsrld(ymmA, ymmB, m);
+ cc.evex().vpsrld(ymmA, m, 0);
+ cc.evex().vpsrld(zmmA, zmmB, m);
+ cc.evex().vpsrld(zmmA, m, 0);
+ cc.evex().vpsrldq(xmmA, m, 0);
+ cc.evex().vpsrldq(ymmA, m, 0);
+ cc.evex().vpsrldq(zmmA, m, 0);
+ cc.evex().vpsrlq(xmmA, xmmB, m);
+ cc.evex().vpsrlq(xmmA, m, 0);
+ cc.evex().vpsrlq(ymmA, ymmB, m);
+ cc.evex().vpsrlq(ymmA, m, 0);
+ cc.evex().vpsrlq(zmmA, zmmB, m);
+ cc.evex().vpsrlq(zmmA, m, 0);
+ cc.evex().vpsrlvd(xmmA, xmmB, m);
+ cc.evex().vpsrlvd(ymmA, ymmB, m);
+ cc.evex().vpsrlvd(zmmA, zmmB, m);
+ cc.evex().vpsrlvq(xmmA, xmmB, m);
+ cc.evex().vpsrlvq(ymmA, ymmB, m);
+ cc.evex().vpsrlvq(zmmA, zmmB, m);
+ cc.evex().vpsrlvw(xmmA, xmmB, m);
+ cc.evex().vpsrlvw(ymmA, ymmB, m);
+ cc.evex().vpsrlvw(zmmA, zmmB, m);
+ cc.evex().vpsrlw(xmmA, xmmB, m);
+ cc.evex().vpsrlw(xmmA, m, 0);
+ cc.evex().vpsrlw(ymmA, ymmB, m);
+ cc.evex().vpsrlw(ymmA, m, 0);
+ cc.evex().vpsrlw(zmmA, zmmB, m);
+ cc.evex().vpsrlw(zmmA, m, 0);
+ cc.evex().vpsubb(xmmA, xmmB, m);
+ cc.evex().vpsubb(ymmA, ymmB, m);
+ cc.evex().vpsubb(zmmA, zmmB, m);
+ cc.evex().vpsubd(xmmA, xmmB, m);
+ cc.evex().vpsubd(ymmA, ymmB, m);
+ cc.evex().vpsubd(zmmA, zmmB, m);
+ cc.evex().vpsubq(xmmA, xmmB, m);
+ cc.evex().vpsubq(ymmA, ymmB, m);
+ cc.evex().vpsubq(zmmA, zmmB, m);
+ cc.evex().vpsubsb(xmmA, xmmB, m);
+ cc.evex().vpsubsb(ymmA, ymmB, m);
+ cc.evex().vpsubsb(zmmA, zmmB, m);
+ cc.evex().vpsubsw(xmmA, xmmB, m);
+ cc.evex().vpsubsw(ymmA, ymmB, m);
+ cc.evex().vpsubsw(zmmA, zmmB, m);
+ cc.evex().vpsubusb(xmmA, xmmB, m);
+ cc.evex().vpsubusb(ymmA, ymmB, m);
+ cc.evex().vpsubusb(zmmA, zmmB, m);
+ cc.evex().vpsubusw(xmmA, xmmB, m);
+ cc.evex().vpsubusw(ymmA, ymmB, m);
+ cc.evex().vpsubusw(zmmA, zmmB, m);
+ cc.evex().vpsubw(xmmA, xmmB, m);
+ cc.evex().vpsubw(ymmA, ymmB, m);
+ cc.evex().vpsubw(zmmA, zmmB, m);
+ cc.evex().vpternlogd(xmmA, xmmB, m, 0);
+ cc.evex().vpternlogd(ymmA, ymmB, m, 0);
+ cc.evex().vpternlogd(zmmA, zmmB, m, 0);
+ cc.evex().vpternlogq(xmmA, xmmB, m, 0);
+ cc.evex().vpternlogq(ymmA, ymmB, m, 0);
+ cc.evex().vpternlogq(zmmA, zmmB, m, 0);
+ cc.evex().vptestmb(kA, xmmB, m);
+ cc.evex().vptestmb(kA, ymmB, m);
+ cc.evex().vptestmb(kA, zmmB, m);
+ cc.evex().vptestmd(kA, xmmB, m);
+ cc.evex().vptestmd(kA, ymmB, m);
+ cc.evex().vptestmd(kA, zmmB, m);
+ cc.evex().vptestmq(kA, xmmB, m);
+ cc.evex().vptestmq(kA, ymmB, m);
+ cc.evex().vptestmq(kA, zmmB, m);
+ cc.evex().vptestmw(kA, xmmB, m);
+ cc.evex().vptestmw(kA, ymmB, m);
+ cc.evex().vptestmw(kA, zmmB, m);
+ cc.evex().vptestnmb(kA, xmmB, m);
+ cc.evex().vptestnmb(kA, ymmB, m);
+ cc.evex().vptestnmb(kA, zmmB, m);
+ cc.evex().vptestnmd(kA, xmmB, m);
+ cc.evex().vptestnmd(kA, ymmB, m);
+ cc.evex().vptestnmd(kA, zmmB, m);
+ cc.evex().vptestnmq(kA, xmmB, m);
+ cc.evex().vptestnmq(kA, ymmB, m);
+ cc.evex().vptestnmq(kA, zmmB, m);
+ cc.evex().vptestnmw(kA, xmmB, m);
+ cc.evex().vptestnmw(kA, ymmB, m);
+ cc.evex().vptestnmw(kA, zmmB, m);
+ cc.evex().vpunpckhbw(xmmA, xmmB, m);
+ cc.evex().vpunpckhbw(ymmA, ymmB, m);
+ cc.evex().vpunpckhbw(zmmA, zmmB, m);
+ cc.evex().vpunpckhdq(xmmA, xmmB, m);
+ cc.evex().vpunpckhdq(ymmA, ymmB, m);
+ cc.evex().vpunpckhdq(zmmA, zmmB, m);
+ cc.evex().vpunpckhqdq(xmmA, xmmB, m);
+ cc.evex().vpunpckhqdq(ymmA, ymmB, m);
+ cc.evex().vpunpckhqdq(zmmA, zmmB, m);
+ cc.evex().vpunpckhwd(xmmA, xmmB, m);
+ cc.evex().vpunpckhwd(ymmA, ymmB, m);
+ cc.evex().vpunpckhwd(zmmA, zmmB, m);
+ cc.evex().vpunpcklbw(xmmA, xmmB, m);
+ cc.evex().vpunpcklbw(ymmA, ymmB, m);
+ cc.evex().vpunpcklbw(zmmA, zmmB, m);
+ cc.evex().vpunpckldq(xmmA, xmmB, m);
+ cc.evex().vpunpckldq(ymmA, ymmB, m);
+ cc.evex().vpunpckldq(zmmA, zmmB, m);
+ cc.evex().vpunpcklqdq(xmmA, xmmB, m);
+ cc.evex().vpunpcklqdq(ymmA, ymmB, m);
+ cc.evex().vpunpcklqdq(zmmA, zmmB, m);
+ cc.evex().vpunpcklwd(xmmA, xmmB, m);
+ cc.evex().vpunpcklwd(ymmA, ymmB, m);
+ cc.evex().vpunpcklwd(zmmA, zmmB, m);
+ cc.evex().vpxord(xmmA, xmmB, m);
+ cc.evex().vpxord(ymmA, ymmB, m);
+ cc.evex().vpxord(zmmA, zmmB, m);
+ cc.evex().vpxorq(xmmA, xmmB, m);
+ cc.evex().vpxorq(ymmA, ymmB, m);
+ cc.evex().vpxorq(zmmA, zmmB, m);
+ cc.evex().vrangepd(xmmA, xmmB, m, 0);
+ cc.evex().vrangepd(ymmA, ymmB, m, 0);
+ cc.evex().vrangepd(zmmA, zmmB, m, 0);
+ cc.evex().vrangeps(xmmA, xmmB, m, 0);
+ cc.evex().vrangeps(ymmA, ymmB, m, 0);
+ cc.evex().vrangeps(zmmA, zmmB, m, 0);
+ cc.evex().vrangesd(xmmA, xmmB, m, 0);
+ cc.evex().vrangess(xmmA, xmmB, m, 0);
+ cc.evex().vrcp14pd(xmmA, m);
+ cc.evex().vrcp14pd(ymmA, m);
+ cc.evex().vrcp14pd(zmmA, m);
+ cc.evex().vrcp14ps(xmmA, m);
+ cc.evex().vrcp14ps(ymmA, m);
+ cc.evex().vrcp14ps(zmmA, m);
+ cc.evex().vrcp14sd(xmmA, xmmB, m);
+ cc.evex().vrcp14ss(xmmA, xmmB, m);
+ cc.evex().vrcp28pd(zmmA, m);
+ cc.evex().vrcp28ps(zmmA, m);
+ cc.evex().vrcp28sd(xmmA, xmmB, m);
+ cc.evex().vrcp28ss(xmmA, xmmB, m);
+ cc.evex().vreducepd(xmmA, m, 0);
+ cc.evex().vreducepd(ymmA, m, 0);
+ cc.evex().vreducepd(zmmA, m, 0);
+ cc.evex().vreduceps(xmmA, m, 0);
+ cc.evex().vreduceps(ymmA, m, 0);
+ cc.evex().vreduceps(zmmA, m, 0);
+ cc.evex().vreducesd(xmmA, xmmB, m, 0);
+ cc.evex().vreducess(xmmA, xmmB, m, 0);
+ cc.evex().vrndscalepd(xmmA, m, 0);
+ cc.evex().vrndscalepd(ymmA, m, 0);
+ cc.evex().vrndscalepd(zmmA, m, 0);
+ cc.evex().vrndscaleps(xmmA, m, 0);
+ cc.evex().vrndscaleps(ymmA, m, 0);
+ cc.evex().vrndscaleps(zmmA, m, 0);
+ cc.evex().vrndscalesd(xmmA, xmmB, m, 0);
+ cc.evex().vrndscaless(xmmA, xmmB, m, 0);
+ cc.evex().vrsqrt14pd(xmmA, m);
+ cc.evex().vrsqrt14pd(ymmA, m);
+ cc.evex().vrsqrt14pd(zmmA, m);
+ cc.evex().vrsqrt14ps(xmmA, m);
+ cc.evex().vrsqrt14ps(ymmA, m);
+ cc.evex().vrsqrt14ps(zmmA, m);
+ cc.evex().vrsqrt14sd(xmmA, xmmB, m);
+ cc.evex().vrsqrt14ss(xmmA, xmmB, m);
+ cc.evex().vrsqrt28pd(zmmA, m);
+ cc.evex().vrsqrt28ps(zmmA, m);
+ cc.evex().vrsqrt28sd(xmmA, xmmB, m);
+ cc.evex().vrsqrt28ss(xmmA, xmmB, m);
+ cc.evex().vscalefpd(xmmA, xmmB, m);
+ cc.evex().vscalefpd(ymmA, ymmB, m);
+ cc.evex().vscalefpd(zmmA, zmmB, m);
+ cc.evex().vscalefps(xmmA, xmmB, m);
+ cc.evex().vscalefps(ymmA, ymmB, m);
+ cc.evex().vscalefps(zmmA, zmmB, m);
+ cc.evex().vscalefsd(xmmA, xmmB, m);
+ cc.evex().vscalefss(xmmA, xmmB, m);
+ cc.evex().k(kA).vscatterdpd(vx_ptr, xmmB);
+ cc.evex().k(kA).vscatterdpd(vx_ptr, ymmB);
+ cc.evex().k(kA).vscatterdpd(vy_ptr, zmmB);
+ cc.evex().k(kA).vscatterdps(vx_ptr, xmmB);
+ cc.evex().k(kA).vscatterdps(vy_ptr, ymmB);
+ cc.evex().k(kA).vscatterdps(vz_ptr, zmmB);
+ cc.evex().k(kA).vscatterpf0dpd(vy_ptr);
+ cc.evex().k(kA).vscatterpf0dps(vz_ptr);
+ cc.evex().k(kA).vscatterpf0qpd(vz_ptr);
+ cc.evex().k(kA).vscatterpf0qps(vz_ptr);
+ cc.evex().k(kA).vscatterpf1dpd(vy_ptr);
+ cc.evex().k(kA).vscatterpf1dps(vz_ptr);
+ cc.evex().k(kA).vscatterpf1qpd(vz_ptr);
+ cc.evex().k(kA).vscatterpf1qps(vz_ptr);
+ cc.evex().k(kA).vscatterqpd(vx_ptr, xmmB);
+ cc.evex().k(kA).vscatterqpd(vy_ptr, ymmB);
+ cc.evex().k(kA).vscatterqpd(vz_ptr, zmmB);
+ cc.evex().k(kA).vscatterqps(vx_ptr, xmmB);
+ cc.evex().k(kA).vscatterqps(vy_ptr, xmmB);
+ cc.evex().k(kA).vscatterqps(vz_ptr, ymmB);
+ cc.evex().vshuff32x4(ymmA, ymmB, m, 0);
+ cc.evex().vshuff32x4(zmmA, zmmB, m, 0);
+ cc.evex().vshuff64x2(ymmA, ymmB, m, 0);
+ cc.evex().vshuff64x2(zmmA, zmmB, m, 0);
+ cc.evex().vshufi32x4(ymmA, ymmB, m, 0);
+ cc.evex().vshufi32x4(zmmA, zmmB, m, 0);
+ cc.evex().vshufi64x2(ymmA, ymmB, m, 0);
+ cc.evex().vshufi64x2(zmmA, zmmB, m, 0);
+ cc.evex().vshufpd(xmmA, xmmB, m, 0);
+ cc.evex().vshufpd(ymmA, ymmB, m, 0);
+ cc.evex().vshufpd(zmmA, zmmB, m, 0);
+ cc.evex().vshufps(xmmA, xmmB, m, 0);
+ cc.evex().vshufps(ymmA, ymmB, m, 0);
+ cc.evex().vshufps(zmmA, zmmB, m, 0);
+ cc.evex().vsqrtpd(xmmA, m);
+ cc.evex().vsqrtpd(ymmA, m);
+ cc.evex().vsqrtpd(zmmA, m);
+ cc.evex().vsqrtps(xmmA, m);
+ cc.evex().vsqrtps(ymmA, m);
+ cc.evex().vsqrtps(zmmA, m);
+ cc.evex().vsqrtsd(xmmA, xmmB, m);
+ cc.evex().vsqrtss(xmmA, xmmB, m);
+ cc.evex().vsubpd(xmmA, xmmB, m);
+ cc.evex().vsubpd(ymmA, ymmB, m);
+ cc.evex().vsubpd(zmmA, zmmB, m);
+ cc.evex().vsubps(xmmA, xmmB, m);
+ cc.evex().vsubps(ymmA, ymmB, m);
+ cc.evex().vsubps(zmmA, zmmB, m);
+ cc.evex().vsubsd(xmmA, xmmB, m);
+ cc.evex().vsubss(xmmA, xmmB, m);
+ cc.evex().vucomisd(xmmA, m);
+ cc.evex().vucomiss(xmmA, m);
+ cc.evex().vunpckhpd(xmmA, xmmB, m);
+ cc.evex().vunpckhpd(ymmA, ymmB, m);
+ cc.evex().vunpckhpd(zmmA, zmmB, m);
+ cc.evex().vunpckhps(xmmA, xmmB, m);
+ cc.evex().vunpckhps(ymmA, ymmB, m);
+ cc.evex().vunpckhps(zmmA, zmmB, m);
+ cc.evex().vunpcklpd(xmmA, xmmB, m);
+ cc.evex().vunpcklpd(ymmA, ymmB, m);
+ cc.evex().vunpcklpd(zmmA, zmmB, m);
+ cc.evex().vunpcklps(xmmA, xmmB, m);
+ cc.evex().vunpcklps(ymmA, ymmB, m);
+ cc.evex().vunpcklps(zmmA, zmmB, m);
+ cc.evex().vxorpd(xmmA, xmmB, m);
+ cc.evex().vxorpd(ymmA, ymmB, m);
+ cc.evex().vxorpd(zmmA, zmmB, m);
+ cc.evex().vxorps(xmmA, xmmB, m);
+ cc.evex().vxorps(ymmA, ymmB, m);
+ cc.evex().vxorps(zmmA, zmmB, m);
+ }
+}
+
+static void generateAvx512Sequence(BaseEmitter& emitter, InstForm form, bool emitPrologEpilog) {
+ using namespace asmjit::x86;
+
+ if (emitter.isAssembler()) {
+ Assembler& cc = *emitter.as<Assembler>();
+
+ if (emitPrologEpilog) {
+ FuncDetail func;
+ func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost), cc.environment());
+
+ FuncFrame frame;
+ frame.init(func);
+ frame.addDirtyRegs(eax, k1, k2, k3, zmm0, zmm1, zmm2, zmm3);
+ frame.finalize();
+
+ cc.emitProlog(frame);
+ generateAvx512SequenceInternal(cc, form, eax, k1, k2, k3, zmm0, zmm1, zmm2, zmm3);
+ cc.emitEpilog(frame);
+ }
+ else {
+ generateAvx512SequenceInternal(cc, form, eax, k1, k2, k3, zmm0, zmm1, zmm2, zmm3);
+ }
+ }
+#ifndef ASMJIT_NO_BUILDER
+ else if (emitter.isBuilder()) {
+ Builder& cc = *emitter.as<Builder>();
+
+ if (emitPrologEpilog) {
+ FuncDetail func;
+ func.init(FuncSignatureT<void, void*, const void*, size_t>(CallConv::kIdHost), cc.environment());
+
+ FuncFrame frame;
+ frame.init(func);
+ frame.addDirtyRegs(eax, k1, k2, k3, zmm0, zmm1, zmm2, zmm3);
+ frame.finalize();
+
+ cc.emitProlog(frame);
+ generateAvx512SequenceInternal(cc, form, eax, k1, k2, k3, zmm0, zmm1, zmm2, zmm3);
+ cc.emitEpilog(frame);
+ }
+ else {
+ generateAvx512SequenceInternal(cc, form, eax, k1, k2, k3, zmm0, zmm1, zmm2, zmm3);
+ }
+ }
+#endif
+#ifndef ASMJIT_NO_COMPILER
+ else if (emitter.isCompiler()) {
+ Compiler& cc = *emitter.as<Compiler>();
+
+ Gp gp = cc.newGpz("gp");
+ Zmm vecA = cc.newZmm("vecA");
+ Zmm vecB = cc.newZmm("vecB");
+ Zmm vecC = cc.newZmm("vecC");
+ Zmm vecD = cc.newZmm("vecD");
+
+ KReg kA = cc.newKq("kA");
+ KReg kB = cc.newKq("kB");
+ KReg kC = cc.newKq("kC");
+
+ cc.addFunc(FuncSignatureT<void>(CallConv::kIdHost));
+ generateAvx512SequenceInternal(cc, form, gp, kA, kB, kC, vecA, vecB, vecC, vecD);
+ cc.endFunc();
+ }
+#endif
+}
+
+template<typename EmitterFn>
+static void benchmarkX86Function(uint32_t arch, uint32_t numIterations, const char* description, const EmitterFn& emitterFn) noexcept {
+ CodeHolder code;
+ printf("%s:\n", description);
+
+ bench<x86::Assembler>(code, arch, numIterations, "[raw]", [&](x86::Assembler& cc) {
+ emitterFn(cc, false);
+ });
+
+ bench<x86::Assembler>(code, arch, numIterations, "[validated]", [&](x86::Assembler& cc) {
+ cc.addValidationOptions(BaseEmitter::kValidationOptionAssembler);
+ emitterFn(cc, false);
+ });
+
+ bench<x86::Assembler>(code, arch, numIterations, "[prolog/epilog]", [&](x86::Assembler& cc) {
+ cc.addValidationOptions(BaseEmitter::kValidationOptionAssembler);
+ emitterFn(cc, true);
+ });
+
+#ifndef ASMJIT_NO_BUILDER
+ bench<x86::Builder>(code, arch, numIterations, "[no-asm]", [&](x86::Builder& cc) {
+ emitterFn(cc, false);
+ });
+
+ bench<x86::Builder>(code, arch, numIterations, "[finalized]", [&](x86::Builder& cc) {
+ emitterFn(cc, false);
+ cc.finalize();
+ });
+
+ bench<x86::Builder>(code, arch, numIterations, "[prolog/epilog]", [&](x86::Builder& cc) {
+ emitterFn(cc, true);
+ cc.finalize();
+ });
+#endif
+
+#ifndef ASMJIT_NO_COMPILER
+ bench<x86::Compiler>(code, arch, numIterations, "[no-asm]", [&](x86::Compiler& cc) {
+ emitterFn(cc, true);
+ });
+
+ bench<x86::Compiler>(code, arch, numIterations, "[finalized]", [&](x86::Compiler& cc) {
+ emitterFn(cc, true);
+ cc.finalize();
+ });
+#endif
+
+ printf("\n");
+}
+
+void benchmarkX86Emitters(uint32_t numIterations, bool testX86, bool testX64) {
+ uint32_t i = 0;
+ uint32_t n = 0;
+
+ uint32_t archs[2] {};
+
+ if (testX86) archs[n++] = Environment::kArchX86;
+ if (testX64) archs[n++] = Environment::kArchX64;
+
+ for (i = 0; i < n; i++) {
+ static const char description[] = "GpSequence<Reg> (Sequence of GP instructions - reg-only)";
+ benchmarkX86Function(archs[i], numIterations, description, [](BaseEmitter& emitter, bool emitPrologEpilog) {
+ generateGpSequence(emitter, InstForm::kReg, emitPrologEpilog);
+ });
+ }
+
+ for (i = 0; i < n; i++) {
+ static const char description[] = "GpSequence<Mem> (Sequence of GP instructions - reg/mem)";
+ benchmarkX86Function(archs[i], numIterations, description, [](BaseEmitter& emitter, bool emitPrologEpilog) {
+ generateGpSequence(emitter, InstForm::kMem, emitPrologEpilog);
+ });
+ }
+
+ for (i = 0; i < n; i++) {
+ static const char description[] = "SseSequence<Reg> (sequence of SSE+ instructions - reg-only)";
+ benchmarkX86Function(archs[i], numIterations, description, [](BaseEmitter& emitter, bool emitPrologEpilog) {
+ generateSseSequence(emitter, InstForm::kReg, emitPrologEpilog);
+ });
+ }
+
+ for (i = 0; i < n; i++) {
+ static const char description[] = "SseSequence<Mem> (sequence of SSE+ instructions - reg/mem)";
+ benchmarkX86Function(archs[i], numIterations, description, [](BaseEmitter& emitter, bool emitPrologEpilog) {
+ generateSseSequence(emitter, InstForm::kMem, emitPrologEpilog);
+ });
+ }
+
+ for (i = 0; i < n; i++) {
+ static const char description[] = "AvxSequence<Reg> (sequence of AVX+ instructions - reg-only)";
+ benchmarkX86Function(archs[i], numIterations, description, [](BaseEmitter& emitter, bool emitPrologEpilog) {
+ generateAvxSequence(emitter, InstForm::kReg, emitPrologEpilog);
+ });
+ }
+
+ for (i = 0; i < n; i++) {
+ static const char description[] = "AvxSequence<Mem> (sequence of AVX+ instructions - reg/mem)";
+ benchmarkX86Function(archs[i], numIterations, description, [](BaseEmitter& emitter, bool emitPrologEpilog) {
+ generateAvxSequence(emitter, InstForm::kMem, emitPrologEpilog);
+ });
+ }
+
+ for (i = 0; i < n; i++) {
+ static const char description[] = "Avx512Sequence<Reg> (sequence of AVX512+ instructions - reg-only)";
+ benchmarkX86Function(archs[i], numIterations, description, [](BaseEmitter& emitter, bool emitPrologEpilog) {
+ generateAvx512Sequence(emitter, InstForm::kReg, emitPrologEpilog);
+ });
+ }
+
+ for (i = 0; i < n; i++) {
+ static const char description[] = "Avx512Sequence<Mem> (sequence of AVX512+ instructions - reg/mem)";
+ benchmarkX86Function(archs[i], numIterations, description, [](BaseEmitter& emitter, bool emitPrologEpilog) {
+ generateAvx512Sequence(emitter, InstForm::kMem, emitPrologEpilog);
+ });
+ }
+
+ for (i = 0; i < n; i++) {
+ static const char description[] = "SseAlphaBlend (alpha-blend function with labels and jumps)";
+ benchmarkX86Function(archs[i], numIterations, description, [](BaseEmitter& emitter, bool emitPrologEpilog) {
+ asmtest::generateSseAlphaBlend(emitter, emitPrologEpilog);
+ });
+ }
+}
+#endif
diff --git a/test/cmdline.h b/test/cmdline.h
index caafa6c..effedd1 100644
--- a/test/cmdline.h
+++ b/test/cmdline.h
@@ -21,8 +21,8 @@
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
-#ifndef ASMJIT_TEST_CMDLINE_H_INCLUDED
-#define ASMJIT_TEST_CMDLINE_H_INCLUDED
+#ifndef CMDLINE_H_INCLUDED
+#define CMDLINE_H_INCLUDED
#include <stdint.h>
#include <stdlib.h>
@@ -80,4 +80,4 @@ public:
}
};
-#endif // ASMJIT_TEST_CMDLINE_H_INCLUDED
+#endif // CMDLINE_H_INCLUDED
diff --git a/test/performancetimer.h b/test/performancetimer.h
new file mode 100644
index 0000000..ebbaca3
--- /dev/null
+++ b/test/performancetimer.h
@@ -0,0 +1,59 @@
+// AsmJit - Machine code generation for C++
+//
+// * Official AsmJit Home Page: https://asmjit.com
+// * Official Github Repository: https://github.com/asmjit/asmjit
+//
+// Copyright (c) 2008-2020 The AsmJit Authors
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+// claim that you wrote the original software. If you use this software
+// in a product, an acknowledgment in the product documentation would be
+// appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+// misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+
+#ifndef PERFORMANCETIMER_H_INCLUDED
+#define PERFORMANCETIMER_H_INCLUDED
+
+#include <asmjit/core.h>
+#include <chrono>
+
+class PerformanceTimer {
+public:
+ typedef std::chrono::high_resolution_clock::time_point TimePoint;
+
+ TimePoint _startTime {};
+ TimePoint _endTime {};
+
+ inline void start() {
+ _startTime = std::chrono::high_resolution_clock::now();
+ }
+
+ inline void stop() {
+ _endTime = std::chrono::high_resolution_clock::now();
+ }
+
+ inline double duration() const {
+ std::chrono::duration<double> elapsed = _endTime - _startTime;
+ return elapsed.count() * 1000;
+ }
+};
+
+static inline double mbps(double duration, uint64_t outputSize) noexcept {
+ if (duration == 0)
+ return 0.0;
+
+ double bytesTotal = double(outputSize);
+ return (bytesTotal * 1000) / (duration * 1024 * 1024);
+}
+
+#endif // PERFORMANCETIMER_H_INCLUDED