Merging r324581:

------------------------------------------------------------------------ r324581 | sjoerdmeijer | 2018-02-08 00:39:05 -0800 (Thu, 08 Feb 2018) | 12 lines [AArch64] Don't materialize 0 with "fmov h0, .." when FullFP16 is not supported We were generating "fmov h0, wzr" instructions when FullFP16 is not enabled. I've not added any tests, because the problem was visible in: test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll, which I had to change: I don't think Cyclone has FullFP16 enabled by default, so it shouldn't be using this v8.2a instruction. I've also removed these rdar tags, please shout if there are any objections. Differential Revision: https://reviews.llvm.org/D43020 ------------------------------------------------------------------------ llvm-svn: 332655
author: Tom Stellard <tstellar@redhat.com> 2018-05-17 22:34:15 +0300
committer: Tom Stellard <tstellar@redhat.com> 2018-05-17 22:34:15 +0300
commit: c3fd54c8b40d168329df6c1123a77b13af500b01 (patch)
tree: dc55a8ddc6db7e8a39cceb82398d2ea8d7efcbc2
parent: 8af7f5b6e31e0cf2879dfa03798440b1b5798508 (diff)
3 files changed, 11 insertions, 7 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6b46f94abe40..233d6be247c2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4930,7 +4930,8 @@ bool AArch64TargetLowering::isOffsetFoldingLegal(
 bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
   // FIXME: We should be able to handle f128 as well with a clever lowering.
-  if (Imm.isPosZero() && (VT == MVT::f16 || VT == MVT::f64 || VT == MVT::f32)) {
+  if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
+                          (VT == MVT::f16 && Subtarget->hasFullFP16()))) {
     DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n");
     return true;
   }
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 79826ca2ed8d..040011d858e7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2713,7 +2713,7 @@ defm FMOV : UnscaledConversion<"fmov">;
 // Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
 let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
 def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
-    Sched<[WriteF]>;
+    Sched<[WriteF]>, Requires<[HasFullFP16]>;
 def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
     Sched<[WriteF]>;
 def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
index 3acb1892dd96..664078fb7e94 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
@@ -1,12 +1,10 @@
 ; RUN: llc -mtriple=arm64-apple-ios   -mcpu=cyclone   < %s | FileCheck %s -check-prefixes=ALL,CYCLONE
+; RUN: llc -mtriple=arm64-apple-ios   -mcpu=cyclone -mattr=+fullfp16 < %s | FileCheck %s -check-prefixes=CYCLONE-FULLFP16
 ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m1 < %s | FileCheck %s -check-prefixes=ALL,OTHERS
 ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m3 < %s | FileCheck %s -check-prefixes=ALL,OTHERS
 ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo      < %s | FileCheck %s -check-prefixes=ALL,OTHERS
 ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=falkor    < %s | FileCheck %s -check-prefixes=ALL,OTHERS
 
-; rdar://11481771
-; rdar://13713797
-
 declare void @bar(half, float, double, <2 x double>)
 declare void @bari(i32, i32)
 declare void @barl(i64, i64)
@@ -16,11 +14,14 @@ define void @t1() nounwind ssp {
 entry:
 ; ALL-LABEL: t1:
 ; ALL-NOT: fmov
-; CYCLONE: fmov h0, wzr
+; ALL:     ldr h0,{{.*}}
 ; CYCLONE: fmov s1, wzr
 ; CYCLONE: fmov d2, xzr
 ; CYCLONE: movi.16b v3, #0
-; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
+; CYCLONE-FULLFP16: fmov h0, wzr
+; CYCLONE-FULLFP16: fmov s1, wzr
+; CYCLONE-FULLFP16: fmov d2, xzr
+; CYCLONE-FULLFP16: movi.16b v3, #0
 ; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
 ; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
 ; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
@@ -53,6 +54,8 @@ define void @t4() nounwind ssp {
 ; ALL-NOT: fmov
 ; CYCLONE: fmov s{{[0-3]+}}, wzr
 ; CYCLONE: fmov s{{[0-3]+}}, wzr
+; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr
+; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr
 ; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
 ; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
   tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind
author	Tom Stellard <tstellar@redhat.com>	2018-05-17 22:34:15 +0300
committer	Tom Stellard <tstellar@redhat.com>	2018-05-17 22:34:15 +0300
commit	c3fd54c8b40d168329df6c1123a77b13af500b01 (patch)
tree	dc55a8ddc6db7e8a39cceb82398d2ea8d7efcbc2
parent	8af7f5b6e31e0cf2879dfa03798440b1b5798508 (diff)