Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/nodejs/node.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/wasm/baseline/x64/liftoff-assembler-x64.h')
-rw-r--r--deps/v8/src/wasm/baseline/x64/liftoff-assembler-x64.h1479
1 files changed, 1193 insertions, 286 deletions
diff --git a/deps/v8/src/wasm/baseline/x64/liftoff-assembler-x64.h b/deps/v8/src/wasm/baseline/x64/liftoff-assembler-x64.h
index 6c586255366..7638c4f9cc0 100644
--- a/deps/v8/src/wasm/baseline/x64/liftoff-assembler-x64.h
+++ b/deps/v8/src/wasm/baseline/x64/liftoff-assembler-x64.h
@@ -36,7 +36,9 @@ static_assert((kLiftoffAssemblerFpCacheRegs &
// rbp-8 holds the stack marker, rbp-16 is the instance parameter.
constexpr int kInstanceOffset = 16;
-inline Operand GetStackSlot(int offset) { return Operand(rbp, -offset); }
+inline Operand GetStackSlot(int offset) {
+ return Operand(offset > 0 ? rbp : rsp, -offset);
+}
// TODO(clemensb): Make this a constexpr variable once Operand is constexpr.
inline Operand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
@@ -122,13 +124,6 @@ inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueType type) {
}
}
-template <typename... Regs>
-inline void SpillRegisters(LiftoffAssembler* assm, Regs... regs) {
- for (LiftoffRegister r : {LiftoffRegister(regs)...}) {
- if (assm->cache_state()->is_used(r)) assm->SpillRegister(r);
- }
-}
-
constexpr int kSubSpSize = 7; // 7 bytes for "subq rsp, <imm32>"
} // namespace liftoff
@@ -390,8 +385,15 @@ void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
- StoreType type) {
- DCHECK(!cache_state()->is_used(value));
+ LiftoffRegister result, StoreType type) {
+ DCHECK(!cache_state()->is_used(result));
+ if (cache_state()->is_used(value)) {
+ // We cannot overwrite {value}, but the {value} register is changed in the
+ // code we generate. Therefore we copy {value} to {result} and use the
+ // {result} register in the code below.
+ movq(result.gp(), value.gp());
+ value = result;
+ }
if (emit_debug_code() && offset_reg != no_reg) {
AssertZeroExtended(offset_reg);
}
@@ -401,19 +403,25 @@ void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
case StoreType::kI32Store8:
case StoreType::kI64Store8:
xaddb(dst_op, value.gp());
- movzxbq(value.gp(), value.gp());
+ movzxbq(result.gp(), value.gp());
break;
case StoreType::kI32Store16:
case StoreType::kI64Store16:
xaddw(dst_op, value.gp());
- movzxwq(value.gp(), value.gp());
+ movzxwq(result.gp(), value.gp());
break;
case StoreType::kI32Store:
case StoreType::kI64Store32:
xaddl(dst_op, value.gp());
+ if (value != result) {
+ movq(result.gp(), value.gp());
+ }
break;
case StoreType::kI64Store:
xaddq(dst_op, value.gp());
+ if (value != result) {
+ movq(result.gp(), value.gp());
+ }
break;
default:
UNREACHABLE();
@@ -422,8 +430,15 @@ void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
- StoreType type) {
- DCHECK(!cache_state()->is_used(value));
+ LiftoffRegister result, StoreType type) {
+ DCHECK(!cache_state()->is_used(result));
+ if (cache_state()->is_used(value)) {
+ // We cannot overwrite {value}, but the {value} register is changed in the
+ // code we generate. Therefore we copy {value} to {result} and use the
+ // {result} register in the code below.
+ movq(result.gp(), value.gp());
+ value = result;
+ }
if (emit_debug_code() && offset_reg != no_reg) {
AssertZeroExtended(offset_reg);
}
@@ -434,25 +449,31 @@ void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
negb(value.gp());
lock();
xaddb(dst_op, value.gp());
- movzxbq(value.gp(), value.gp());
+ movzxbq(result.gp(), value.gp());
break;
case StoreType::kI32Store16:
case StoreType::kI64Store16:
negw(value.gp());
lock();
xaddw(dst_op, value.gp());
- movzxwq(value.gp(), value.gp());
+ movzxwq(result.gp(), value.gp());
break;
case StoreType::kI32Store:
case StoreType::kI64Store32:
negl(value.gp());
lock();
xaddl(dst_op, value.gp());
+ if (value != result) {
+ movq(result.gp(), value.gp());
+ }
break;
case StoreType::kI64Store:
negq(value.gp());
lock();
xaddq(dst_op, value.gp());
+ if (value != result) {
+ movq(result.gp(), value.gp());
+ }
break;
default:
UNREACHABLE();
@@ -461,40 +482,21 @@ void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
namespace liftoff {
#define __ lasm->
-// Checks if a register in {possible_uses} uses {reg}. If so, it allocates a
-// replacement register for that use, and moves the content of {reg} to {use}.
-// The replacement register is written into the pointer stored in
-// {possible_uses}.
-inline void ClearRegister(LiftoffAssembler* lasm, Register reg,
- std::initializer_list<Register*> possible_uses,
- LiftoffRegList pinned) {
- liftoff::SpillRegisters(lasm, reg);
- Register replacement = no_reg;
- for (Register* use : possible_uses) {
- if (reg != *use) continue;
- if (replacement == no_reg) {
- replacement = __ GetUnusedRegister(kGpReg, pinned).gp();
- __ movq(replacement, reg);
- }
- // We cannot leave this loop early. There may be multiple uses of {reg}.
- *use = replacement;
- }
-}
inline void AtomicBinop(LiftoffAssembler* lasm,
void (Assembler::*opl)(Register, Register),
void (Assembler::*opq)(Register, Register),
Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
- StoreType type) {
- DCHECK(!__ cache_state()->is_used(value));
+ LiftoffRegister result, StoreType type) {
+ DCHECK(!__ cache_state()->is_used(result));
Register value_reg = value.gp();
// The cmpxchg instruction uses rax to store the old value of the
// compare-exchange primitive. Therefore we have to spill the register and
// move any use to another register.
LiftoffRegList pinned =
LiftoffRegList::ForRegs(dst_addr, offset_reg, value_reg);
- ClearRegister(lasm, rax, {&dst_addr, &offset_reg, &value_reg}, pinned);
+ __ ClearRegister(rax, {&dst_addr, &offset_reg, &value_reg}, pinned);
if (__ emit_debug_code() && offset_reg != no_reg) {
__ AssertZeroExtended(offset_reg);
}
@@ -554,8 +556,8 @@ inline void AtomicBinop(LiftoffAssembler* lasm,
UNREACHABLE();
}
- if (value.gp() != rax) {
- __ movq(value.gp(), rax);
+ if (result.gp() != rax) {
+ __ movq(result.gp(), rax);
}
}
#undef __
@@ -563,29 +565,37 @@ inline void AtomicBinop(LiftoffAssembler* lasm,
void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
- StoreType type) {
+ LiftoffRegister result, StoreType type) {
liftoff::AtomicBinop(this, &Assembler::andl, &Assembler::andq, dst_addr,
- offset_reg, offset_imm, value, type);
+ offset_reg, offset_imm, value, result, type);
}
void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
- StoreType type) {
+ LiftoffRegister result, StoreType type) {
liftoff::AtomicBinop(this, &Assembler::orl, &Assembler::orq, dst_addr,
- offset_reg, offset_imm, value, type);
+ offset_reg, offset_imm, value, result, type);
}
void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
- StoreType type) {
+ LiftoffRegister result, StoreType type) {
liftoff::AtomicBinop(this, &Assembler::xorl, &Assembler::xorq, dst_addr,
- offset_reg, offset_imm, value, type);
+ offset_reg, offset_imm, value, result, type);
}
void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
uint32_t offset_imm,
- LiftoffRegister value, StoreType type) {
- DCHECK(!cache_state()->is_used(value));
+ LiftoffRegister value,
+ LiftoffRegister result, StoreType type) {
+ DCHECK(!cache_state()->is_used(result));
+ if (cache_state()->is_used(value)) {
+ // We cannot overwrite {value}, but the {value} register is changed in the
+ // code we generate. Therefore we copy {value} to {result} and use the
+ // {result} register in the code below.
+ movq(result.gp(), value.gp());
+ value = result;
+ }
if (emit_debug_code() && offset_reg != no_reg) {
AssertZeroExtended(offset_reg);
}
@@ -594,19 +604,25 @@ void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
case StoreType::kI32Store8:
case StoreType::kI64Store8:
xchgb(value.gp(), dst_op);
- movzxbq(value.gp(), value.gp());
+ movzxbq(result.gp(), value.gp());
break;
case StoreType::kI32Store16:
case StoreType::kI64Store16:
xchgw(value.gp(), dst_op);
- movzxwq(value.gp(), value.gp());
+ movzxwq(result.gp(), value.gp());
break;
case StoreType::kI32Store:
case StoreType::kI64Store32:
xchgl(value.gp(), dst_op);
+ if (value != result) {
+ movq(result.gp(), value.gp());
+ }
break;
case StoreType::kI64Store:
xchgq(value.gp(), dst_op);
+ if (value != result) {
+ movq(result.gp(), value.gp());
+ }
break;
default:
UNREACHABLE();
@@ -623,8 +639,7 @@ void LiftoffAssembler::AtomicCompareExchange(
// move any use to another register.
LiftoffRegList pinned =
LiftoffRegList::ForRegs(dst_addr, offset_reg, expected, value_reg);
- liftoff::ClearRegister(this, rax, {&dst_addr, &offset_reg, &value_reg},
- pinned);
+ ClearRegister(rax, {&dst_addr, &offset_reg, &value_reg}, pinned);
if (expected.gp() != rax) {
movq(rax, expected.gp());
}
@@ -682,6 +697,13 @@ void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
liftoff::Load(this, dst, src, type);
}
+void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
+ uint32_t caller_slot_idx,
+ ValueType type) {
+ Operand dst(rbp, kSystemPointerSize * (caller_slot_idx + 1));
+ liftoff::Store(this, dst, src, type);
+}
+
void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
ValueType type) {
DCHECK_NE(dst_offset, src_offset);
@@ -840,7 +862,7 @@ void LiftoffAssembler::emit_i32_add(Register dst, Register lhs, Register rhs) {
}
}
-void LiftoffAssembler::emit_i32_add(Register dst, Register lhs, int32_t imm) {
+void LiftoffAssembler::emit_i32_addi(Register dst, Register lhs, int32_t imm) {
if (lhs != dst) {
leal(dst, Operand(lhs, imm));
} else {
@@ -916,7 +938,7 @@ void EmitIntDivOrRem(LiftoffAssembler* assm, Register dst, Register lhs,
// another temporary register.
// Do all this before any branch, such that the code is executed
// unconditionally, as the cache state will also be modified unconditionally.
- liftoff::SpillRegisters(assm, rdx, rax);
+ assm->SpillRegisters(rdx, rax);
if (rhs == rax || rhs == rdx) {
iop(mov, kScratchRegister, rhs);
rhs = kScratchRegister;
@@ -1006,7 +1028,7 @@ void LiftoffAssembler::emit_i32_and(Register dst, Register lhs, Register rhs) {
lhs, rhs);
}
-void LiftoffAssembler::emit_i32_and(Register dst, Register lhs, int32_t imm) {
+void LiftoffAssembler::emit_i32_andi(Register dst, Register lhs, int32_t imm) {
liftoff::EmitCommutativeBinOpImm<&Assembler::andl, &Assembler::movl>(
this, dst, lhs, imm);
}
@@ -1016,7 +1038,7 @@ void LiftoffAssembler::emit_i32_or(Register dst, Register lhs, Register rhs) {
lhs, rhs);
}
-void LiftoffAssembler::emit_i32_or(Register dst, Register lhs, int32_t imm) {
+void LiftoffAssembler::emit_i32_ori(Register dst, Register lhs, int32_t imm) {
liftoff::EmitCommutativeBinOpImm<&Assembler::orl, &Assembler::movl>(this, dst,
lhs, imm);
}
@@ -1026,7 +1048,7 @@ void LiftoffAssembler::emit_i32_xor(Register dst, Register lhs, Register rhs) {
lhs, rhs);
}
-void LiftoffAssembler::emit_i32_xor(Register dst, Register lhs, int32_t imm) {
+void LiftoffAssembler::emit_i32_xori(Register dst, Register lhs, int32_t imm) {
liftoff::EmitCommutativeBinOpImm<&Assembler::xorl, &Assembler::movl>(
this, dst, lhs, imm);
}
@@ -1071,8 +1093,8 @@ void LiftoffAssembler::emit_i32_shl(Register dst, Register src,
&Assembler::shll_cl);
}
-void LiftoffAssembler::emit_i32_shl(Register dst, Register src,
- int32_t amount) {
+void LiftoffAssembler::emit_i32_shli(Register dst, Register src,
+ int32_t amount) {
if (dst != src) movl(dst, src);
shll(dst, Immediate(amount & 31));
}
@@ -1083,8 +1105,8 @@ void LiftoffAssembler::emit_i32_sar(Register dst, Register src,
&Assembler::sarl_cl);
}
-void LiftoffAssembler::emit_i32_sar(Register dst, Register src,
- int32_t amount) {
+void LiftoffAssembler::emit_i32_sari(Register dst, Register src,
+ int32_t amount) {
if (dst != src) movl(dst, src);
sarl(dst, Immediate(amount & 31));
}
@@ -1095,8 +1117,8 @@ void LiftoffAssembler::emit_i32_shr(Register dst, Register src,
&Assembler::shrl_cl);
}
-void LiftoffAssembler::emit_i32_shr(Register dst, Register src,
- int32_t amount) {
+void LiftoffAssembler::emit_i32_shri(Register dst, Register src,
+ int32_t amount) {
if (dst != src) movl(dst, src);
shrl(dst, Immediate(amount & 31));
}
@@ -1125,8 +1147,8 @@ void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs,
}
}
-void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs,
- int32_t imm) {
+void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
+ int32_t imm) {
if (lhs.gp() != dst.gp()) {
leaq(dst.gp(), Operand(lhs.gp(), imm));
} else {
@@ -1191,8 +1213,8 @@ void LiftoffAssembler::emit_i64_and(LiftoffRegister dst, LiftoffRegister lhs,
this, dst.gp(), lhs.gp(), rhs.gp());
}
-void LiftoffAssembler::emit_i64_and(LiftoffRegister dst, LiftoffRegister lhs,
- int32_t imm) {
+void LiftoffAssembler::emit_i64_andi(LiftoffRegister dst, LiftoffRegister lhs,
+ int32_t imm) {
liftoff::EmitCommutativeBinOpImm<&Assembler::andq, &Assembler::movq>(
this, dst.gp(), lhs.gp(), imm);
}
@@ -1203,8 +1225,8 @@ void LiftoffAssembler::emit_i64_or(LiftoffRegister dst, LiftoffRegister lhs,
this, dst.gp(), lhs.gp(), rhs.gp());
}
-void LiftoffAssembler::emit_i64_or(LiftoffRegister dst, LiftoffRegister lhs,
- int32_t imm) {
+void LiftoffAssembler::emit_i64_ori(LiftoffRegister dst, LiftoffRegister lhs,
+ int32_t imm) {
liftoff::EmitCommutativeBinOpImm<&Assembler::orq, &Assembler::movq>(
this, dst.gp(), lhs.gp(), imm);
}
@@ -1215,8 +1237,8 @@ void LiftoffAssembler::emit_i64_xor(LiftoffRegister dst, LiftoffRegister lhs,
this, dst.gp(), lhs.gp(), rhs.gp());
}
-void LiftoffAssembler::emit_i64_xor(LiftoffRegister dst, LiftoffRegister lhs,
- int32_t imm) {
+void LiftoffAssembler::emit_i64_xori(LiftoffRegister dst, LiftoffRegister lhs,
+ int32_t imm) {
liftoff::EmitCommutativeBinOpImm<&Assembler::xorq, &Assembler::movq>(
this, dst.gp(), lhs.gp(), imm);
}
@@ -1227,8 +1249,8 @@ void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
&Assembler::shlq_cl);
}
-void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
- int32_t amount) {
+void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src,
+ int32_t amount) {
if (dst.gp() != src.gp()) movq(dst.gp(), src.gp());
shlq(dst.gp(), Immediate(amount & 63));
}
@@ -1239,8 +1261,8 @@ void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
&Assembler::sarq_cl);
}
-void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
- int32_t amount) {
+void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src,
+ int32_t amount) {
if (dst.gp() != src.gp()) movq(dst.gp(), src.gp());
sarq(dst.gp(), Immediate(amount & 63));
}
@@ -1251,8 +1273,8 @@ void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
&Assembler::shrq_cl);
}
-void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
- int32_t amount) {
+void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src,
+ int32_t amount) {
if (dst != src) movq(dst.gp(), src.gp());
shrq(dst.gp(), Immediate(amount & 63));
}
@@ -1900,14 +1922,19 @@ void EmitSimdCommutativeBinOp(
template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
-void EmitSimdSub(LiftoffAssembler* assm, LiftoffRegister dst,
- LiftoffRegister lhs, LiftoffRegister rhs) {
+void EmitSimdNonCommutativeBinOp(
+ LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs, base::Optional<CpuFeature> feature = base::nullopt) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
- } else if (lhs.fp() == rhs.fp()) {
- assm->pxor(dst.fp(), dst.fp());
- } else if (dst.fp() == rhs.fp()) {
+ return;
+ }
+
+ base::Optional<CpuFeatureScope> sse_scope;
+ if (feature.has_value()) sse_scope.emplace(assm, *feature);
+
+ if (dst.fp() == rhs.fp()) {
assm->movaps(kScratchDoubleReg, rhs.fp());
assm->movaps(dst.fp(), lhs.fp());
(assm->*sse_op)(dst.fp(), kScratchDoubleReg);
@@ -1916,138 +1943,733 @@ void EmitSimdSub(LiftoffAssembler* assm, LiftoffRegister dst,
(assm->*sse_op)(dst.fp(), rhs.fp());
}
}
+
+template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
+ void (Assembler::*sse_op)(XMMRegister, XMMRegister), uint8_t width>
+void EmitSimdShiftOp(LiftoffAssembler* assm, LiftoffRegister dst,
+ LiftoffRegister operand, LiftoffRegister count) {
+ constexpr int mask = (1 << width) - 1;
+ assm->movq(kScratchRegister, count.gp());
+ assm->andq(kScratchRegister, Immediate(mask));
+ assm->Movq(kScratchDoubleReg, kScratchRegister);
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(assm, AVX);
+ (assm->*avx_op)(dst.fp(), operand.fp(), kScratchDoubleReg);
+ } else {
+ if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
+ (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
+ }
+}
+
+template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, byte),
+ void (Assembler::*sse_op)(XMMRegister, byte), uint8_t width>
+void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
+ LiftoffRegister operand, int32_t count) {
+ constexpr int mask = (1 << width) - 1;
+ byte shift = static_cast<byte>(count & mask);
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(assm, AVX);
+ (assm->*avx_op)(dst.fp(), operand.fp(), shift);
+ } else {
+ if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
+ (assm->*sse_op)(dst.fp(), shift);
+ }
+}
} // namespace liftoff
+void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Movd(dst.fp(), src.gp());
+ Pxor(kScratchDoubleReg, kScratchDoubleReg);
+ Pshufb(dst.fp(), kScratchDoubleReg);
+}
+
+void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Movd(dst.fp(), src.gp());
+ Pshuflw(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
+ Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
+}
+
+void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Movd(dst.fp(), src.gp());
+ Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
+}
+
+void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Movq(dst.fp(), src.gp());
+ Movddup(dst.fp(), dst.fp());
+}
+
+void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
+ LiftoffRegister src) {
+ if (dst.fp() != src.fp()) {
+ Movss(dst.fp(), src.fp());
+ }
+ Shufps(dst.fp(), src.fp(), static_cast<byte>(0));
+}
+
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Movddup(dst.fp(), src.fp());
}
-void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
- LiftoffRegister lhs,
- uint8_t imm_lane_idx) {
- Pextrq(kScratchRegister, lhs.fp(), static_cast<int8_t>(imm_lane_idx));
- Movq(dst.fp(), kScratchRegister);
+void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
+ this, dst, lhs, rhs);
}
-void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
- LiftoffRegister src1,
- LiftoffRegister src2,
- uint8_t imm_lane_idx) {
+void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
+ this, dst, lhs, rhs);
+ Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
+ Pxor(dst.fp(), kScratchDoubleReg);
+}
+
+void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtb,
+ &Assembler::pcmpgtb>(this, dst, lhs,
+ rhs);
+}
+
+void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ DoubleRegister ref = rhs.fp();
+ if (dst == rhs) {
+ Movaps(kScratchDoubleReg, rhs.fp());
+ ref = kScratchDoubleReg;
+ }
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
+ this, dst, lhs, rhs);
+ Pcmpeqb(dst.fp(), ref);
+ Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
+ Pxor(dst.fp(), kScratchDoubleReg);
+}
+
+void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ DoubleRegister ref = rhs.fp();
+ if (dst == rhs) {
+ Movaps(kScratchDoubleReg, rhs.fp());
+ ref = kScratchDoubleReg;
+ }
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
+ this, dst, lhs, rhs, SSE4_1);
+ Pcmpeqb(dst.fp(), ref);
+}
+
+void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ DoubleRegister ref = rhs.fp();
+ if (dst == rhs) {
+ Movaps(kScratchDoubleReg, rhs.fp());
+ ref = kScratchDoubleReg;
+ }
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
+ this, dst, lhs, rhs);
+ Pcmpeqb(dst.fp(), ref);
+}
+
+void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
+ this, dst, lhs, rhs);
+ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+ Pxor(dst.fp(), kScratchDoubleReg);
+}
+
+void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtw,
+ &Assembler::pcmpgtw>(this, dst, lhs,
+ rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ DoubleRegister ref = rhs.fp();
+ if (dst == rhs) {
+ Movaps(kScratchDoubleReg, rhs.fp());
+ ref = kScratchDoubleReg;
+ }
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
+ this, dst, lhs, rhs);
+ Pcmpeqw(dst.fp(), ref);
+ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+ Pxor(dst.fp(), kScratchDoubleReg);
+}
+
+void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ DoubleRegister ref = rhs.fp();
+ if (dst == rhs) {
+ Movaps(kScratchDoubleReg, rhs.fp());
+ ref = kScratchDoubleReg;
+ }
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
+ this, dst, lhs, rhs);
+ Pcmpeqw(dst.fp(), ref);
+}
+
+void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ DoubleRegister ref = rhs.fp();
+ if (dst == rhs) {
+ Movaps(kScratchDoubleReg, rhs.fp());
+ ref = kScratchDoubleReg;
+ }
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
+ this, dst, lhs, rhs, SSE4_1);
+ Pcmpeqw(dst.fp(), ref);
+}
+
+void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
+ this, dst, lhs, rhs);
+ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ Pxor(dst.fp(), kScratchDoubleReg);
+}
+
+void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtd,
+ &Assembler::pcmpgtd>(this, dst, lhs,
+ rhs);
+}
+
+void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ DoubleRegister ref = rhs.fp();
+ if (dst == rhs) {
+ Movaps(kScratchDoubleReg, rhs.fp());
+ ref = kScratchDoubleReg;
+ }
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
+ this, dst, lhs, rhs);
+ Pcmpeqd(dst.fp(), ref);
+ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ Pxor(dst.fp(), kScratchDoubleReg);
+}
+
+void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ DoubleRegister ref = rhs.fp();
+ if (dst == rhs) {
+ Movaps(kScratchDoubleReg, rhs.fp());
+ ref = kScratchDoubleReg;
+ }
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
+ this, dst, lhs, rhs, SSE4_1);
+ Pcmpeqd(dst.fp(), ref);
+}
+
+void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ DoubleRegister ref = rhs.fp();
+ if (dst == rhs) {
+ Movaps(kScratchDoubleReg, rhs.fp());
+ ref = kScratchDoubleReg;
+ }
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
+ this, dst, lhs, rhs, SSE4_1);
+ Pcmpeqd(dst.fp(), ref);
+}
+
+void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqps, &Assembler::cmpeqps>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqps,
+ &Assembler::cmpneqps>(this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltps,
+ &Assembler::cmpltps>(this, dst, lhs,
+ rhs);
+}
+
+void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpleps,
+ &Assembler::cmpleps>(this, dst, lhs,
+ rhs);
+}
+
+void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqpd, &Assembler::cmpeqpd>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqpd,
+ &Assembler::cmpneqpd>(this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltpd,
+ &Assembler::cmpltpd>(this, dst, lhs,
+ rhs);
+}
+
+void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmplepd,
+ &Assembler::cmplepd>(this, dst, lhs,
+ rhs);
+}
+
+void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
+ if (dst.fp() != src.fp()) {
+ Pcmpeqd(dst.fp(), dst.fp());
+ Pxor(dst.fp(), src.fp());
+ } else {
+ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ Pxor(dst.fp(), kScratchDoubleReg);
+ }
+}
+
+void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpand, &Assembler::pand>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpor, &Assembler::por>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpxor, &Assembler::pxor>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
+ LiftoffRegister src1,
+ LiftoffRegister src2,
+ LiftoffRegister mask) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
- if (imm_lane_idx == 0) {
- vpblendw(dst.fp(), src1.fp(), src2.fp(), 0b00001111);
- } else {
- vmovlhps(dst.fp(), src1.fp(), src2.fp());
- }
+ vxorps(kScratchDoubleReg, src1.fp(), src2.fp());
+ vandps(kScratchDoubleReg, kScratchDoubleReg, mask.fp());
+ vxorps(dst.fp(), kScratchDoubleReg, src2.fp());
} else {
- CpuFeatureScope scope(this, SSE4_1);
- if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
- if (imm_lane_idx == 0) {
- pblendw(dst.fp(), src2.fp(), 0b00001111);
- } else {
- movlhps(dst.fp(), src2.fp());
- }
+ movaps(kScratchDoubleReg, src1.fp());
+ xorps(kScratchDoubleReg, src2.fp());
+ andps(kScratchDoubleReg, mask.fp());
+ if (dst.fp() != src2.fp()) movaps(dst.fp(), src2.fp());
+ xorps(dst.fp(), kScratchDoubleReg);
}
}
-void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
+void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
+ LiftoffRegister src) {
+ if (dst.fp() == src.fp()) {
+ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ Psignb(dst.fp(), kScratchDoubleReg);
+ } else {
+ Pxor(dst.fp(), dst.fp());
+ Psubb(dst.fp(), src.fp());
+ }
+}
+
+void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
- liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
- this, dst, lhs, rhs);
+ static constexpr RegClass tmp_simd_rc = reg_class_for(ValueType::kS128);
+ LiftoffRegister tmp_simd =
+ GetUnusedRegister(tmp_simd_rc, LiftoffRegList::ForRegs(dst, lhs));
+ // Mask off the unwanted bits before word-shifting.
+ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+ movq(kScratchRegister, rhs.gp());
+ andq(kScratchRegister, Immediate(7));
+ addq(kScratchRegister, Immediate(8));
+ Movq(tmp_simd.fp(), kScratchRegister);
+ Psrlw(kScratchDoubleReg, tmp_simd.fp());
+ Packuswb(kScratchDoubleReg, kScratchDoubleReg);
+
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(this, AVX);
+ vpand(dst.fp(), lhs.fp(), kScratchDoubleReg);
+ } else {
+ if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
+ pand(dst.fp(), kScratchDoubleReg);
+ }
+ subq(kScratchRegister, Immediate(8));
+ Movq(tmp_simd.fp(), kScratchRegister);
+ Psllw(dst.fp(), tmp_simd.fp());
}
-void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
+void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
+ int32_t rhs) {
+ byte shift = static_cast<byte>(rhs & 0x7);
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(this, AVX);
+ vpsllw(dst.fp(), lhs.fp(), shift);
+ } else {
+ if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
+ psllw(dst.fp(), shift);
+ }
+
+ uint8_t bmask = static_cast<uint8_t>(0xff << shift);
+ uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
+ movl(kScratchRegister, Immediate(mask));
+ Movd(kScratchDoubleReg, kScratchRegister);
+ Pshufd(kScratchDoubleReg, kScratchDoubleReg, uint8_t{0});
+ Pand(dst.fp(), kScratchDoubleReg);
+}
+
+void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
- liftoff::EmitSimdSub<&Assembler::vsubpd, &Assembler::subpd>(this, dst, lhs,
- rhs);
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
+ this, dst, lhs, rhs);
}
-void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
+void LiftoffAssembler::emit_i8x16_add_saturate_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsb, &Assembler::paddsb>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i8x16_add_saturate_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusb, &Assembler::paddusb>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
- liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulpd, &Assembler::mulpd>(
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubb, &Assembler::psubb>(
this, dst, lhs, rhs);
}
-void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
- LiftoffRegister src) {
- if (dst.fp() != src.fp()) {
- Movss(dst.fp(), src.fp());
- }
- Shufps(dst.fp(), src.fp(), static_cast<byte>(0));
+void LiftoffAssembler::emit_i8x16_sub_saturate_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsb, &Assembler::psubsb>(
+ this, dst, lhs, rhs);
}
-void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
- LiftoffRegister lhs,
- uint8_t imm_lane_idx) {
+void LiftoffAssembler::emit_i8x16_sub_saturate_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusb,
+ &Assembler::psubusb>(this, dst, lhs,
+ rhs);
+}
+
+void LiftoffAssembler::emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ static constexpr RegClass tmp_rc = reg_class_for(ValueType::kS128);
+ LiftoffRegister tmp =
+ GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs));
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
- vshufps(dst.fp(), lhs.fp(), lhs.fp(), imm_lane_idx);
+ // I16x8 view of I8x16
+ // left = AAaa AAaa ... AAaa AAaa
+ // right= BBbb BBbb ... BBbb BBbb
+ // t = 00AA 00AA ... 00AA 00AA
+ // s = 00BB 00BB ... 00BB 00BB
+ vpsrlw(tmp.fp(), lhs.fp(), 8);
+ vpsrlw(kScratchDoubleReg, rhs.fp(), 8);
+ // t = I16x8Mul(t0, t1)
+ //    => __PP __PP ...  __PP  __PP
+ vpmullw(tmp.fp(), tmp.fp(), kScratchDoubleReg);
+ // s = left * 256
+ vpsllw(kScratchDoubleReg, lhs.fp(), 8);
+ // dst = I16x8Mul(left * 256, right)
+ //    => pp__ pp__ ...  pp__  pp__
+ vpmullw(dst.fp(), kScratchDoubleReg, rhs.fp());
+ // dst = I16x8Shr(dst, 8)
+ //    => 00pp 00pp ...  00pp  00pp
+ vpsrlw(dst.fp(), dst.fp(), 8);
+ // t = I16x8Shl(t, 8)
+ //    => PP00 PP00 ...  PP00  PP00
+ vpsllw(tmp.fp(), tmp.fp(), 8);
+ // dst = I16x8Or(dst, t)
+ //    => PPpp PPpp ...  PPpp  PPpp
+ vpor(dst.fp(), dst.fp(), tmp.fp());
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
- if (imm_lane_idx != 0) shufps(dst.fp(), dst.fp(), imm_lane_idx);
+ // I16x8 view of I8x16
+ // left = AAaa AAaa ... AAaa AAaa
+ // right= BBbb BBbb ... BBbb BBbb
+ // t = 00AA 00AA ... 00AA 00AA
+ // s = 00BB 00BB ... 00BB 00BB
+ movaps(tmp.fp(), dst.fp());
+ movaps(kScratchDoubleReg, rhs.fp());
+ psrlw(tmp.fp(), 8);
+ psrlw(kScratchDoubleReg, 8);
+ // dst = left * 256
+ psllw(dst.fp(), 8);
+ // t = I16x8Mul(t, s)
+ //    => __PP __PP ...  __PP  __PP
+ pmullw(tmp.fp(), kScratchDoubleReg);
+ // dst = I16x8Mul(left * 256, right)
+ //    => pp__ pp__ ...  pp__  pp__
+ pmullw(dst.fp(), rhs.fp());
+ // t = I16x8Shl(t, 8)
+ //    => PP00 PP00 ...  PP00  PP00
+ psllw(tmp.fp(), 8);
+ // dst = I16x8Shr(dst, 8)
+ //    => 00pp 00pp ...  00pp  00pp
+ psrlw(dst.fp(), 8);
+ // dst = I16x8Or(dst, t)
+ //    => PPpp PPpp ...  PPpp  PPpp
+ por(dst.fp(), tmp.fp());
}
}
-void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
- LiftoffRegister src1,
- LiftoffRegister src2,
- uint8_t imm_lane_idx) {
- if (CpuFeatures::IsSupported(AVX)) {
- CpuFeatureScope scope(this, AVX);
- vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
+void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
+ this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
+}
+
+void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsb, &Assembler::pmaxsb>(
+ this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
+}
+
+void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
+ LiftoffRegister src) {
+ if (dst.fp() == src.fp()) {
+ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ Psignw(dst.fp(), kScratchDoubleReg);
} else {
- CpuFeatureScope scope(this, SSE4_1);
- if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
- insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
+ Pxor(dst.fp(), dst.fp());
+ Psubw(dst.fp(), src.fp());
}
}
-void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
+void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
- liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
+ liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(this, dst,
+ lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
+ int32_t rhs) {
+ liftoff::EmitSimdShiftOpImm<&Assembler::vpsllw, &Assembler::psllw, 4>(
this, dst, lhs, rhs);
}
-void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
+void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
- liftoff::EmitSimdSub<&Assembler::vsubps, &Assembler::subps>(this, dst, lhs,
- rhs);
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
+ this, dst, lhs, rhs);
}
-void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
+void LiftoffAssembler::emit_i16x8_add_saturate_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsw, &Assembler::paddsw>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_add_saturate_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusw, &Assembler::paddusw>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
- liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulps, &Assembler::mulps>(
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubw, &Assembler::psubw>(
this, dst, lhs, rhs);
}
-void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
- LiftoffRegister src) {
- Movq(dst.fp(), src.gp());
- Movddup(dst.fp(), dst.fp());
+void LiftoffAssembler::emit_i16x8_sub_saturate_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsw, &Assembler::psubsw>(
+ this, dst, lhs, rhs);
}
-void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
- LiftoffRegister lhs,
- uint8_t imm_lane_idx) {
- Pextrq(dst.gp(), lhs.fp(), static_cast<int8_t>(imm_lane_idx));
+void LiftoffAssembler::emit_i16x8_sub_saturate_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusw,
+ &Assembler::psubusw>(this, dst, lhs,
+ rhs);
}
-void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
- LiftoffRegister src1,
- LiftoffRegister src2,
- uint8_t imm_lane_idx) {
+void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmullw, &Assembler::pmullw>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
+ this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
+}
+
+void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsw, &Assembler::pmaxsw>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
+ this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
+}
+
+void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
+ LiftoffRegister src) {
+ if (dst.fp() == src.fp()) {
+ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ Psignd(dst.fp(), kScratchDoubleReg);
+ } else {
+ Pxor(dst.fp(), dst.fp());
+ Psubd(dst.fp(), src.fp());
+ }
+}
+
+void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(this, dst,
+ lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
+ int32_t rhs) {
+ liftoff::EmitSimdShiftOpImm<&Assembler::vpslld, &Assembler::pslld, 5>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubd, &Assembler::psubd>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmulld, &Assembler::pmulld>(
+ this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
+}
+
+void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
+ this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
+}
+
+void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
+ this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
+}
+
+void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsd, &Assembler::pmaxsd>(
+ this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
+}
+
+void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
+ this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
+}
+
+void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
+ LiftoffRegister src) {
+ DoubleRegister reg = dst.fp() == src.fp() ? kScratchDoubleReg : dst.fp();
+ Pxor(reg, reg);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
- vpinsrq(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
+ vpsubq(dst.fp(), reg, src.fp());
} else {
- CpuFeatureScope scope(this, SSE4_1);
- if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
- pinsrq(dst.fp(), src2.gp(), imm_lane_idx);
+ psubq(reg, src.fp());
+ if (dst.fp() != reg) movapd(dst.fp(), reg);
}
}
+void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdShiftOp<&Assembler::vpsllq, &Assembler::psllq, 6>(this, dst,
+ lhs, rhs);
+}
+
+void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
+ int32_t rhs) {
+ liftoff::EmitSimdShiftOpImm<&Assembler::vpsllq, &Assembler::psllq, 6>(
+ this, dst, lhs, rhs);
+}
+
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
@@ -2056,8 +2678,8 @@ void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
- liftoff::EmitSimdSub<&Assembler::vpsubq, &Assembler::psubq>(this, dst, lhs,
- rhs);
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubq, &Assembler::psubq>(
+ this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
@@ -2087,119 +2709,401 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
Paddq(dst.fp(), tmp2.fp());
}
-void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
- LiftoffRegister src) {
- Movd(dst.fp(), src.gp());
- Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
+void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
+ LiftoffRegister src) {
+ if (dst.fp() == src.fp()) {
+ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ Psrld(kScratchDoubleReg, static_cast<byte>(1));
+ Andps(dst.fp(), kScratchDoubleReg);
+ } else {
+ Pcmpeqd(dst.fp(), dst.fp());
+ Psrld(dst.fp(), static_cast<byte>(1));
+ Andps(dst.fp(), src.fp());
+ }
}
-void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
- LiftoffRegister lhs,
- uint8_t imm_lane_idx) {
- Pextrd(dst.gp(), lhs.fp(), imm_lane_idx);
+void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
+ LiftoffRegister src) {
+ if (dst.fp() == src.fp()) {
+ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ Pslld(kScratchDoubleReg, static_cast<byte>(31));
+ Xorps(dst.fp(), kScratchDoubleReg);
+ } else {
+ Pcmpeqd(dst.fp(), dst.fp());
+ Pslld(dst.fp(), static_cast<byte>(31));
+ Xorps(dst.fp(), src.fp());
+ }
}
-void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
- LiftoffRegister src1,
- LiftoffRegister src2,
- uint8_t imm_lane_idx) {
+void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Sqrtps(dst.fp(), src.fp());
+}
+
+void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubps, &Assembler::subps>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulps, &Assembler::mulps>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivps, &Assembler::divps>(
+ this, dst, lhs, rhs);
+}
+
+void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ // The minps instruction doesn't propagate NaNs and +0's in its first
+ // operand. Perform minps in both orders, merge the results, and adjust.
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
- vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
+ vminps(kScratchDoubleReg, lhs.fp(), rhs.fp());
+ vminps(dst.fp(), rhs.fp(), lhs.fp());
+ } else if (dst.fp() == lhs.fp() || dst.fp() == rhs.fp()) {
+ XMMRegister src = dst.fp() == lhs.fp() ? rhs.fp() : lhs.fp();
+ movaps(kScratchDoubleReg, src);
+ minps(kScratchDoubleReg, dst.fp());
+ minps(dst.fp(), src);
} else {
- CpuFeatureScope scope(this, SSE4_1);
- if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
- pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
+ movaps(kScratchDoubleReg, lhs.fp());
+ minps(kScratchDoubleReg, rhs.fp());
+ movaps(dst.fp(), rhs.fp());
+ minps(dst.fp(), lhs.fp());
}
+ // propagate -0's and NaNs, which may be non-canonical.
+ Orps(kScratchDoubleReg, dst.fp());
+ // Canonicalize NaNs by quieting and clearing the payload.
+ Cmpps(dst.fp(), kScratchDoubleReg, int8_t{3});
+ Orps(kScratchDoubleReg, dst.fp());
+ Psrld(dst.fp(), byte{10});
+ Andnps(dst.fp(), kScratchDoubleReg);
}
-void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
+void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
- liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
- this, dst, lhs, rhs);
+ // The maxps instruction doesn't propagate NaNs and +0's in its first
+ // operand. Perform maxps in both orders, merge the results, and adjust.
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(this, AVX);
+ vmaxps(kScratchDoubleReg, lhs.fp(), rhs.fp());
+ vmaxps(dst.fp(), rhs.fp(), lhs.fp());
+ } else if (dst.fp() == lhs.fp() || dst.fp() == rhs.fp()) {
+ XMMRegister src = dst.fp() == lhs.fp() ? rhs.fp() : lhs.fp();
+ movaps(kScratchDoubleReg, src);
+ maxps(kScratchDoubleReg, dst.fp());
+ maxps(dst.fp(), src);
+ } else {
+ movaps(kScratchDoubleReg, lhs.fp());
+ maxps(kScratchDoubleReg, rhs.fp());
+ movaps(dst.fp(), rhs.fp());
+ maxps(dst.fp(), lhs.fp());
+ }
+ // Find discrepancies.
+ Xorps(dst.fp(), kScratchDoubleReg);
+ // Propagate NaNs, which may be non-canonical.
+ Orps(kScratchDoubleReg, dst.fp());
+ // Propagate sign discrepancy and (subtle) quiet NaNs.
+ Subps(kScratchDoubleReg, dst.fp());
+ // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
+ Cmpps(dst.fp(), kScratchDoubleReg, int8_t{3});
+ Psrld(dst.fp(), byte{10});
+ Andnps(dst.fp(), kScratchDoubleReg);
+}
+
+void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
+ LiftoffRegister src) {
+ if (dst.fp() == src.fp()) {
+ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ Psrlq(kScratchDoubleReg, static_cast<byte>(1));
+ Andpd(dst.fp(), kScratchDoubleReg);
+ } else {
+ Pcmpeqd(dst.fp(), dst.fp());
+ Psrlq(dst.fp(), static_cast<byte>(1));
+ Andpd(dst.fp(), src.fp());
+ }
}
-void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
- LiftoffRegister rhs) {
- liftoff::EmitSimdSub<&Assembler::vpsubd, &Assembler::psubd>(this, dst, lhs,
- rhs);
+void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
+ LiftoffRegister src) {
+ if (dst.fp() == src.fp()) {
+ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+ Psllq(kScratchDoubleReg, static_cast<byte>(63));
+ Xorpd(dst.fp(), kScratchDoubleReg);
+ } else {
+ Pcmpeqd(dst.fp(), dst.fp());
+ Psllq(dst.fp(), static_cast<byte>(63));
+ Xorpd(dst.fp(), src.fp());
+ }
}
-void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
+void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Sqrtpd(dst.fp(), src.fp());
+}
+
+void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
- liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmulld, &Assembler::pmulld>(
- this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
+ this, dst, lhs, rhs);
}
-void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
- LiftoffRegister src) {
- Movd(dst.fp(), src.gp());
- Pshuflw(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
- Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
+void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubpd, &Assembler::subpd>(
+ this, dst, lhs, rhs);
}
-void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
- LiftoffRegister lhs,
- uint8_t imm_lane_idx) {
- Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
+void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulpd, &Assembler::mulpd>(
+ this, dst, lhs, rhs);
}
-void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
- LiftoffRegister lhs,
- uint8_t imm_lane_idx) {
- Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
- movsxwl(dst.gp(), dst.gp());
+void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivpd, &Assembler::divpd>(
+ this, dst, lhs, rhs);
}
-void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
- LiftoffRegister src1,
- LiftoffRegister src2,
- uint8_t imm_lane_idx) {
+void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ // The minpd instruction doesn't propagate NaNs and +0's in its first
+ // operand. Perform minpd in both orders, merge the results, and adjust.
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
- vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
+ vminpd(kScratchDoubleReg, lhs.fp(), rhs.fp());
+ vminpd(dst.fp(), rhs.fp(), lhs.fp());
+ } else if (dst.fp() == lhs.fp() || dst.fp() == rhs.fp()) {
+ XMMRegister src = dst.fp() == lhs.fp() ? rhs.fp() : lhs.fp();
+ movapd(kScratchDoubleReg, src);
+ minpd(kScratchDoubleReg, dst.fp());
+ minpd(dst.fp(), src);
} else {
- if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
- pinsrw(dst.fp(), src2.gp(), imm_lane_idx);
+ movapd(kScratchDoubleReg, lhs.fp());
+ minpd(kScratchDoubleReg, rhs.fp());
+ movapd(dst.fp(), rhs.fp());
+ minpd(dst.fp(), lhs.fp());
}
+ // propagate -0's and NaNs, which may be non-canonical.
+ Orpd(kScratchDoubleReg, dst.fp());
+ // Canonicalize NaNs by quieting and clearing the payload.
+ Cmppd(dst.fp(), kScratchDoubleReg, int8_t{3});
+ Orpd(kScratchDoubleReg, dst.fp());
+ Psrlq(dst.fp(), 13);
+ Andnpd(dst.fp(), kScratchDoubleReg);
}
-void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
+void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
- liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
- this, dst, lhs, rhs);
+ // The maxpd instruction doesn't propagate NaNs and +0's in its first
+ // operand. Perform maxpd in both orders, merge the results, and adjust.
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(this, AVX);
+ vmaxpd(kScratchDoubleReg, lhs.fp(), rhs.fp());
+ vmaxpd(dst.fp(), rhs.fp(), lhs.fp());
+ } else if (dst.fp() == lhs.fp() || dst.fp() == rhs.fp()) {
+ XMMRegister src = dst.fp() == lhs.fp() ? rhs.fp() : lhs.fp();
+ movapd(kScratchDoubleReg, src);
+ maxpd(kScratchDoubleReg, dst.fp());
+ maxpd(dst.fp(), src);
+ } else {
+ movapd(kScratchDoubleReg, lhs.fp());
+ maxpd(kScratchDoubleReg, rhs.fp());
+ movapd(dst.fp(), rhs.fp());
+ maxpd(dst.fp(), lhs.fp());
+ }
+ // Find discrepancies.
+ Xorpd(dst.fp(), kScratchDoubleReg);
+ // Propagate NaNs, which may be non-canonical.
+ Orpd(kScratchDoubleReg, dst.fp());
+ // Propagate sign discrepancy and (subtle) quiet NaNs.
+ Subpd(kScratchDoubleReg, dst.fp());
+ // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
+ Cmppd(dst.fp(), kScratchDoubleReg, int8_t{3});
+ Psrlq(dst.fp(), 13);
+ Andnpd(dst.fp(), kScratchDoubleReg);
+}
+
+void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpacksswb,
+ &Assembler::packsswb>(this, dst, lhs,
+ rhs);
}
-void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
- LiftoffRegister rhs) {
- liftoff::EmitSimdSub<&Assembler::vpsubw, &Assembler::psubw>(this, dst, lhs,
- rhs);
+void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackuswb,
+ &Assembler::packuswb>(this, dst, lhs,
+ rhs);
}
-void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
- LiftoffRegister rhs) {
- liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmullw, &Assembler::pmullw>(
+void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackssdw,
+ &Assembler::packssdw>(this, dst, lhs,
+ rhs);
+}
+
+void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackusdw,
+ &Assembler::packusdw>(this, dst, lhs,
+ rhs, SSE4_1);
+}
+
+void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Pmovsxbw(dst.fp(), src.fp());
+}
+
+void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Palignr(dst.fp(), src.fp(), static_cast<uint8_t>(8));
+ Pmovsxbw(dst.fp(), dst.fp());
+}
+
+void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Pmovzxbw(dst.fp(), src.fp());
+}
+
+void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Palignr(dst.fp(), src.fp(), static_cast<uint8_t>(8));
+ Pmovzxbw(dst.fp(), dst.fp());
+}
+
+void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Pmovsxwd(dst.fp(), src.fp());
+}
+
+void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Palignr(dst.fp(), src.fp(), static_cast<uint8_t>(8));
+ Pmovsxwd(dst.fp(), dst.fp());
+}
+
+void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Pmovzxwd(dst.fp(), src.fp());
+}
+
+void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Palignr(dst.fp(), src.fp(), static_cast<uint8_t>(8));
+ Pmovzxwd(dst.fp(), dst.fp());
+}
+
+void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vandnps, &Assembler::andnps>(
+ this, dst, rhs, lhs);
+}
+
+void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgb, &Assembler::pavgb>(
this, dst, lhs, rhs);
}
-void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
- LiftoffRegister src) {
- Movd(dst.fp(), src.gp());
- Pxor(kScratchDoubleReg, kScratchDoubleReg);
- Pshufb(dst.fp(), kScratchDoubleReg);
+void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ LiftoffRegister rhs) {
+ liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgw, &Assembler::pavgw>(
+ this, dst, lhs, rhs);
}
-void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
+void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Pabsb(dst.fp(), src.fp());
+}
+
+void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Pabsw(dst.fp(), src.fp());
+}
+
+void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
+ LiftoffRegister src) {
+ Pabsd(dst.fp(), src.fp());
+}
+
+void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
+ movsxbl(dst.gp(), dst.gp());
}
-void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
+void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
- movsxbl(dst.gp(), dst.gp());
+}
+
+void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ uint8_t imm_lane_idx) {
+ Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
+ movsxwl(dst.gp(), dst.gp());
+}
+
+void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ uint8_t imm_lane_idx) {
+ Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
+}
+
+void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ uint8_t imm_lane_idx) {
+ Pextrd(dst.gp(), lhs.fp(), imm_lane_idx);
+}
+
+void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ uint8_t imm_lane_idx) {
+ Pextrq(dst.gp(), lhs.fp(), static_cast<int8_t>(imm_lane_idx));
+}
+
+void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ uint8_t imm_lane_idx) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(this, AVX);
+ vshufps(dst.fp(), lhs.fp(), lhs.fp(), imm_lane_idx);
+ } else {
+ if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
+ if (imm_lane_idx != 0) shufps(dst.fp(), dst.fp(), imm_lane_idx);
+ }
+}
+
+void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
+ LiftoffRegister lhs,
+ uint8_t imm_lane_idx) {
+ Pextrq(kScratchRegister, lhs.fp(), static_cast<int8_t>(imm_lane_idx));
+ Movq(dst.fp(), kScratchRegister);
}
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
@@ -2216,77 +3120,80 @@ void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
}
}
-void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
- LiftoffRegister rhs) {
- liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
- this, dst, lhs, rhs);
+void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
+ LiftoffRegister src1,
+ LiftoffRegister src2,
+ uint8_t imm_lane_idx) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(this, AVX);
+ vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
+ } else {
+ if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
+ pinsrw(dst.fp(), src2.gp(), imm_lane_idx);
+ }
}
-void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
- LiftoffRegister rhs) {
- liftoff::EmitSimdSub<&Assembler::vpsubb, &Assembler::psubb>(this, dst, lhs,
- rhs);
+void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
+ LiftoffRegister src1,
+ LiftoffRegister src2,
+ uint8_t imm_lane_idx) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(this, AVX);
+ vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
+ } else {
+ CpuFeatureScope scope(this, SSE4_1);
+ if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
+ pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
+ }
}
-void LiftoffAssembler::emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs,
- LiftoffRegister rhs) {
- static constexpr RegClass tmp_rc = reg_class_for(ValueType::kS128);
- LiftoffRegister tmp =
- GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(dst, lhs, rhs));
+void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
+ LiftoffRegister src1,
+ LiftoffRegister src2,
+ uint8_t imm_lane_idx) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
- // I16x8 view of I8x16
- // left = AAaa AAaa ... AAaa AAaa
- // right= BBbb BBbb ... BBbb BBbb
- // t = 00AA 00AA ... 00AA 00AA
- // s = 00BB 00BB ... 00BB 00BB
- vpsrlw(tmp.fp(), lhs.fp(), 8);
- vpsrlw(kScratchDoubleReg, rhs.fp(), 8);
- // t = I16x8Mul(t0, t1)
- //    => __PP __PP ...  __PP  __PP
- vpmullw(tmp.fp(), tmp.fp(), kScratchDoubleReg);
- // s = left * 256
- vpsllw(kScratchDoubleReg, lhs.fp(), 8);
- // dst = I16x8Mul(left * 256, right)
- //    => pp__ pp__ ...  pp__  pp__
- vpmullw(dst.fp(), kScratchDoubleReg, rhs.fp());
- // dst = I16x8Shr(dst, 8)
- //    => 00pp 00pp ...  00pp  00pp
- vpsrlw(dst.fp(), dst.fp(), 8);
- // t = I16x8Shl(t, 8)
- //    => PP00 PP00 ...  PP00  PP00
- vpsllw(tmp.fp(), tmp.fp(), 8);
- // dst = I16x8Or(dst, t)
- //    => PPpp PPpp ...  PPpp  PPpp
- vpor(dst.fp(), dst.fp(), tmp.fp());
+ vpinsrq(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
} else {
- if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
- // I16x8 view of I8x16
- // left = AAaa AAaa ... AAaa AAaa
- // right= BBbb BBbb ... BBbb BBbb
- // t = 00AA 00AA ... 00AA 00AA
- // s = 00BB 00BB ... 00BB 00BB
- movaps(tmp.fp(), dst.fp());
- movaps(kScratchDoubleReg, rhs.fp());
- psrlw(tmp.fp(), 8);
- psrlw(kScratchDoubleReg, 8);
- // dst = left * 256
- psllw(dst.fp(), 8);
- // t = I16x8Mul(t, s)
- //    => __PP __PP ...  __PP  __PP
- pmullw(tmp.fp(), kScratchDoubleReg);
- // dst = I16x8Mul(left * 256, right)
- //    => pp__ pp__ ...  pp__  pp__
- pmullw(dst.fp(), rhs.fp());
- // t = I16x8Shl(t, 8)
- //    => PP00 PP00 ...  PP00  PP00
- psllw(tmp.fp(), 8);
- // dst = I16x8Shr(dst, 8)
- //    => 00pp 00pp ...  00pp  00pp
- psrlw(dst.fp(), 8);
- // dst = I16x8Or(dst, t)
- //    => PPpp PPpp ...  PPpp  PPpp
- por(dst.fp(), tmp.fp());
+ CpuFeatureScope scope(this, SSE4_1);
+ if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
+ pinsrq(dst.fp(), src2.gp(), imm_lane_idx);
+ }
+}
+
+void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
+ LiftoffRegister src1,
+ LiftoffRegister src2,
+ uint8_t imm_lane_idx) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(this, AVX);
+ vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
+ } else {
+ CpuFeatureScope scope(this, SSE4_1);
+ if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
+ insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
+ }
+}
+
+void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
+ LiftoffRegister src1,
+ LiftoffRegister src2,
+ uint8_t imm_lane_idx) {
+ if (CpuFeatures::IsSupported(AVX)) {
+ CpuFeatureScope scope(this, AVX);
+ if (imm_lane_idx == 0) {
+ vpblendw(dst.fp(), src1.fp(), src2.fp(), 0b00001111);
+ } else {
+ vmovlhps(dst.fp(), src1.fp(), src2.fp());
+ }
+ } else {
+ CpuFeatureScope scope(this, SSE4_1);
+ if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
+ if (imm_lane_idx == 0) {
+ pblendw(dst.fp(), src2.fp(), 0b00001111);
+ } else {
+ movlhps(dst.fp(), src2.fp());
+ }
}
}