Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/llvm/llvm-project.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2022-11-12 20:57:07 +0300
committerMatt Arsenault <arsenm2@gmail.com>2022-11-12 21:36:55 +0300
commit0a376d1034d3dfc54a3486d687bd56a0e00caa6b (patch)
tree2916948b0127c96bd4bd216f40f3fb04d48c5f6d
parent5247ae9de5122d2e77ffe03099a090749324d781 (diff)
AMDGPU: Add some tests for i1 sitofp/uitofp-like selects
-rw-r--r--llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll157
-rw-r--r--llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll154
2 files changed, 309 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
index 7b820ab1e717..f4ff6c13150c 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
@@ -225,6 +225,159 @@ define double @v_sint_to_fp_i8_to_f64(i8 %in) {
; VI-NEXT: s_setpc_b64 s[30:31]
%fp = sitofp i8 %in to double
ret double %fp
+ }
+
+define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; CI-LABEL: s_select_sint_to_fp_i1_vals_f64:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dword s2, s[4:5], 0x2
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v0, 0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_cmp_eq_u32 s2, 0
+; CI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
+; CI-NEXT: v_mov_b32_e32 v3, s1
+; CI-NEXT: v_mov_b32_e32 v1, s2
+; CI-NEXT: v_mov_b32_e32 v2, s0
+; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: s_select_sint_to_fp_i1_vals_f64:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dword s2, s[4:5], 0x8
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_cmp_eq_u32 s2, 0
+; VI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: v_mov_b32_e32 v1, s2
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT: s_endpgm
+ %cmp = icmp eq i32 %in, 0
+ %select = select i1 %cmp, double -1.0, double 0.0
+ store double %select, double addrspace(1)* %out, align 8
+ ret void
+}
+
+define void @v_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; GCN-LABEL: v_select_sint_to_fp_i1_vals_f64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GCN-NEXT: v_mov_b32_e32 v3, 0
+; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq i32 %in, 0
+ %select = select i1 %cmp, double -1.0, double 0.0
+ store double %select, double addrspace(1)* %out, align 8
+ ret void
+}
+
+define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+; CI-LABEL: s_select_sint_to_fp_i1_vals_i64:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dword s2, s[4:5], 0x2
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v0, 0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_cmp_eq_u32 s2, 0
+; CI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
+; CI-NEXT: v_mov_b32_e32 v3, s1
+; CI-NEXT: v_mov_b32_e32 v1, s2
+; CI-NEXT: v_mov_b32_e32 v2, s0
+; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: s_select_sint_to_fp_i1_vals_i64:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dword s2, s[4:5], 0x8
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_cmp_eq_u32 s2, 0
+; VI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: v_mov_b32_e32 v1, s2
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT: s_endpgm
+ %cmp = icmp eq i32 %in, 0
+ %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
+ store i64 %select, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+define void @v_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+; GCN-LABEL: v_select_sint_to_fp_i1_vals_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GCN-NEXT: v_mov_b32_e32 v3, 0
+; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq i32 %in, 0
+ %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
+ store i64 %select, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; TODO: This should swap the selected order / invert the compare and do it.
+define void @v_swap_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; GCN-LABEL: v_swap_select_sint_to_fp_i1_vals_f64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GCN-NEXT: v_mov_b32_e32 v3, 0
+; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
+; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq i32 %in, 0
+ %select = select i1 %cmp, double 0.0, double -1.0
+ store double %select, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; TODO: This should swap the selected order / invert the compare and do it.
+define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; CI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dword s2, s[4:5], 0x2
+; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT: v_mov_b32_e32 v0, 0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_cmp_eq_u32 s2, 0
+; CI-NEXT: s_cselect_b32 s2, 0, 0xbff00000
+; CI-NEXT: v_mov_b32_e32 v3, s1
+; CI-NEXT: v_mov_b32_e32 v1, s2
+; CI-NEXT: v_mov_b32_e32 v2, s0
+; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; CI-NEXT: s_endpgm
+;
+; VI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dword s2, s[4:5], 0x8
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_cmp_eq_u32 s2, 0
+; VI-NEXT: s_cselect_b32 s2, 0, 0xbff00000
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: v_mov_b32_e32 v1, s2
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT: s_endpgm
+ %cmp = icmp eq i32 %in, 0
+ %select = select i1 %cmp, double 0.0, double -1.0
+ store double %select, double addrspace(1)* %out, align 8
+ ret void
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
index d4286b8ab95a..ddb9aef95603 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
@@ -396,3 +396,157 @@ define double @v_uint_to_fp_i8_to_f64(i8 %in) {
%fp = uitofp i8 %in to double
ret double %fp
}
+
+define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; SI-LABEL: s_select_uint_to_fp_i1_vals_f64:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dword s2, s[4:5], 0x2
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_cmp_eq_u32 s2, 0
+; SI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
+; SI-NEXT: v_mov_b32_e32 v3, s1
+; SI-NEXT: v_mov_b32_e32 v1, s2
+; SI-NEXT: v_mov_b32_e32 v2, s0
+; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: s_select_uint_to_fp_i1_vals_f64:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dword s2, s[4:5], 0x8
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_cmp_eq_u32 s2, 0
+; VI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: v_mov_b32_e32 v1, s2
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT: s_endpgm
+ %cmp = icmp eq i32 %in, 0
+ %select = select i1 %cmp, double 1.0, double 0.0
+ store double %select, double addrspace(1)* %out, align 8
+ ret void
+}
+
+define void @v_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; GCN-LABEL: v_select_uint_to_fp_i1_vals_f64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v4, 0x3ff00000
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GCN-NEXT: v_mov_b32_e32 v3, 0
+; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq i32 %in, 0
+ %select = select i1 %cmp, double 1.0, double 0.0
+ store double %select, double addrspace(1)* %out, align 8
+ ret void
+}
+
+define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+; SI-LABEL: s_select_uint_to_fp_i1_vals_i64:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dword s2, s[4:5], 0x2
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_cmp_eq_u32 s2, 0
+; SI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
+; SI-NEXT: v_mov_b32_e32 v3, s1
+; SI-NEXT: v_mov_b32_e32 v1, s2
+; SI-NEXT: v_mov_b32_e32 v2, s0
+; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: s_select_uint_to_fp_i1_vals_i64:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dword s2, s[4:5], 0x8
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_cmp_eq_u32 s2, 0
+; VI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: v_mov_b32_e32 v1, s2
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT: s_endpgm
+ %cmp = icmp eq i32 %in, 0
+ %select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0
+ store i64 %select, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+define void @v_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+; GCN-LABEL: v_select_uint_to_fp_i1_vals_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v4, 0x3ff00000
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GCN-NEXT: v_mov_b32_e32 v3, 0
+; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq i32 %in, 0
+ %select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0
+ store i64 %select, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; TODO: This should swap the selected order / invert the compare and do it.
+define amdgpu_kernel void @s_swap_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; SI-LABEL: s_swap_select_uint_to_fp_i1_vals_f64:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dword s2, s[4:5], 0x2
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_cmp_eq_u32 s2, 0
+; SI-NEXT: s_cselect_b32 s2, 0, 0x3ff00000
+; SI-NEXT: v_mov_b32_e32 v3, s1
+; SI-NEXT: v_mov_b32_e32 v1, s2
+; SI-NEXT: v_mov_b32_e32 v2, s0
+; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: s_swap_select_uint_to_fp_i1_vals_f64:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dword s2, s[4:5], 0x8
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_cmp_eq_u32 s2, 0
+; VI-NEXT: s_cselect_b32 s2, 0, 0x3ff00000
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: v_mov_b32_e32 v1, s2
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT: s_endpgm
+ %cmp = icmp eq i32 %in, 0
+ %select = select i1 %cmp, double 0.0, double 1.0
+ store double %select, double addrspace(1)* %out, align 8
+ ret void
+}
+
+define void @v_swap_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+; GCN-LABEL: v_swap_select_uint_to_fp_i1_vals_f64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v4, 0x3ff00000
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GCN-NEXT: v_mov_b32_e32 v3, 0
+; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
+; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq i32 %in, 0
+ %select = select i1 %cmp, double 0.0, double 1.0
+ store double %select, double addrspace(1)* %out, align 8
+ ret void
+}