Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/llvm/llvm-project.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <tstellar@redhat.com>2017-09-29 23:30:55 +0300
committerTom Stellard <tstellar@redhat.com>2017-09-29 23:30:55 +0300
commit7507fd1c7198e0ff961d4359040b63e64e073a31 (patch)
treefa9c71195279e6d4df4ba0e44da2643d640df060
parent4041da7a87b3ed891fc0021082b72fb3e95dc374 (diff)
Merging r314251:
------------------------------------------------------------------------ r314251 | gberry | 2017-09-26 14:40:41 -0700 (Tue, 26 Sep 2017) | 16 lines [AArch64][Falkor] Fix correctness bug in falkor prefetcher fix pass and correct some opcode tag computations. Summary: This addresses a correctness bug for LD[1234]*_POST opcodes that have the prefetcher fix applied to them: the base register was not being written back from the temp after being incremented, so it would appear to never be incremented. Also, fix some opcode tag computations based on some updated HW details to get better tag avoidance and thus better prefetcher performance. Reviewers: mcrosier Subscribers: aemerson, rengolin, javed.absar, kristof.beyls Differential Revision: https://reviews.llvm.org/D38256 ------------------------------------------------------------------------ llvm-svn: 314554
-rw-r--r--llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp101
-rw-r--r--llvm/test/CodeGen/AArch64/falkor-hwpf-fix.mir277
2 files changed, 318 insertions, 60 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index 0627976c9146..49f5c51d4120 100644
--- a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -220,27 +220,27 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
default:
return None;
+ case AArch64::LD1i64:
+ case AArch64::LD2i64:
+ DestRegIdx = 0;
+ BaseRegIdx = 3;
+ OffsetIdx = -1;
+ IsPrePost = false;
+ break;
+
case AArch64::LD1i8:
case AArch64::LD1i16:
case AArch64::LD1i32:
- case AArch64::LD1i64:
case AArch64::LD2i8:
case AArch64::LD2i16:
case AArch64::LD2i32:
- case AArch64::LD2i64:
case AArch64::LD3i8:
case AArch64::LD3i16:
case AArch64::LD3i32:
+ case AArch64::LD3i64:
case AArch64::LD4i8:
case AArch64::LD4i16:
case AArch64::LD4i32:
- DestRegIdx = 0;
- BaseRegIdx = 3;
- OffsetIdx = -1;
- IsPrePost = false;
- break;
-
- case AArch64::LD3i64:
case AArch64::LD4i64:
DestRegIdx = -1;
BaseRegIdx = 3;
@@ -264,23 +264,16 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
case AArch64::LD1Rv4s:
case AArch64::LD1Rv8h:
case AArch64::LD1Rv16b:
- case AArch64::LD1Twov1d:
- case AArch64::LD1Twov2s:
- case AArch64::LD1Twov4h:
- case AArch64::LD1Twov8b:
- case AArch64::LD2Twov2s:
- case AArch64::LD2Twov4s:
- case AArch64::LD2Twov8b:
- case AArch64::LD2Rv1d:
- case AArch64::LD2Rv2s:
- case AArch64::LD2Rv4s:
- case AArch64::LD2Rv8b:
DestRegIdx = 0;
BaseRegIdx = 1;
OffsetIdx = -1;
IsPrePost = false;
break;
+ case AArch64::LD1Twov1d:
+ case AArch64::LD1Twov2s:
+ case AArch64::LD1Twov4h:
+ case AArch64::LD1Twov8b:
case AArch64::LD1Twov2d:
case AArch64::LD1Twov4s:
case AArch64::LD1Twov8h:
@@ -301,10 +294,17 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
case AArch64::LD1Fourv4s:
case AArch64::LD1Fourv8h:
case AArch64::LD1Fourv16b:
+ case AArch64::LD2Twov2s:
+ case AArch64::LD2Twov4s:
+ case AArch64::LD2Twov8b:
case AArch64::LD2Twov2d:
case AArch64::LD2Twov4h:
case AArch64::LD2Twov8h:
case AArch64::LD2Twov16b:
+ case AArch64::LD2Rv1d:
+ case AArch64::LD2Rv2s:
+ case AArch64::LD2Rv4s:
+ case AArch64::LD2Rv8b:
case AArch64::LD2Rv2d:
case AArch64::LD2Rv4h:
case AArch64::LD2Rv8h:
@@ -345,32 +345,32 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
IsPrePost = false;
break;
+ case AArch64::LD1i64_POST:
+ case AArch64::LD2i64_POST:
+ DestRegIdx = 1;
+ BaseRegIdx = 4;
+ OffsetIdx = 5;
+ IsPrePost = true;
+ break;
+
case AArch64::LD1i8_POST:
case AArch64::LD1i16_POST:
case AArch64::LD1i32_POST:
- case AArch64::LD1i64_POST:
case AArch64::LD2i8_POST:
case AArch64::LD2i16_POST:
case AArch64::LD2i32_POST:
- case AArch64::LD2i64_POST:
case AArch64::LD3i8_POST:
case AArch64::LD3i16_POST:
case AArch64::LD3i32_POST:
+ case AArch64::LD3i64_POST:
case AArch64::LD4i8_POST:
case AArch64::LD4i16_POST:
case AArch64::LD4i32_POST:
- DestRegIdx = 1;
- BaseRegIdx = 4;
- OffsetIdx = 5;
- IsPrePost = false;
- break;
-
- case AArch64::LD3i64_POST:
case AArch64::LD4i64_POST:
DestRegIdx = -1;
BaseRegIdx = 4;
OffsetIdx = 5;
- IsPrePost = false;
+ IsPrePost = true;
break;
case AArch64::LD1Onev1d_POST:
@@ -389,23 +389,16 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
case AArch64::LD1Rv4s_POST:
case AArch64::LD1Rv8h_POST:
case AArch64::LD1Rv16b_POST:
- case AArch64::LD1Twov1d_POST:
- case AArch64::LD1Twov2s_POST:
- case AArch64::LD1Twov4h_POST:
- case AArch64::LD1Twov8b_POST:
- case AArch64::LD2Twov2s_POST:
- case AArch64::LD2Twov4s_POST:
- case AArch64::LD2Twov8b_POST:
- case AArch64::LD2Rv1d_POST:
- case AArch64::LD2Rv2s_POST:
- case AArch64::LD2Rv4s_POST:
- case AArch64::LD2Rv8b_POST:
DestRegIdx = 1;
BaseRegIdx = 2;
OffsetIdx = 3;
- IsPrePost = false;
+ IsPrePost = true;
break;
+ case AArch64::LD1Twov1d_POST:
+ case AArch64::LD1Twov2s_POST:
+ case AArch64::LD1Twov4h_POST:
+ case AArch64::LD1Twov8b_POST:
case AArch64::LD1Twov2d_POST:
case AArch64::LD1Twov4s_POST:
case AArch64::LD1Twov8h_POST:
@@ -426,10 +419,17 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
case AArch64::LD1Fourv4s_POST:
case AArch64::LD1Fourv8h_POST:
case AArch64::LD1Fourv16b_POST:
+ case AArch64::LD2Twov2s_POST:
+ case AArch64::LD2Twov4s_POST:
+ case AArch64::LD2Twov8b_POST:
case AArch64::LD2Twov2d_POST:
case AArch64::LD2Twov4h_POST:
case AArch64::LD2Twov8h_POST:
case AArch64::LD2Twov16b_POST:
+ case AArch64::LD2Rv1d_POST:
+ case AArch64::LD2Rv2s_POST:
+ case AArch64::LD2Rv4s_POST:
+ case AArch64::LD2Rv8b_POST:
case AArch64::LD2Rv2d_POST:
case AArch64::LD2Rv4h_POST:
case AArch64::LD2Rv8h_POST:
@@ -467,7 +467,7 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
DestRegIdx = -1;
BaseRegIdx = 2;
OffsetIdx = 3;
- IsPrePost = false;
+ IsPrePost = true;
break;
case AArch64::LDRBBroW:
@@ -572,16 +572,19 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
IsPrePost = true;
break;
+ case AArch64::LDNPDi:
+ case AArch64::LDNPQi:
+ case AArch64::LDNPSi:
case AArch64::LDPQi:
+ case AArch64::LDPDi:
+ case AArch64::LDPSi:
DestRegIdx = -1;
BaseRegIdx = 2;
OffsetIdx = 3;
IsPrePost = false;
break;
- case AArch64::LDPDi:
case AArch64::LDPSWi:
- case AArch64::LDPSi:
case AArch64::LDPWi:
case AArch64::LDPXi:
DestRegIdx = 0;
@@ -592,18 +595,18 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
case AArch64::LDPQpost:
case AArch64::LDPQpre:
+ case AArch64::LDPDpost:
+ case AArch64::LDPDpre:
+ case AArch64::LDPSpost:
+ case AArch64::LDPSpre:
DestRegIdx = -1;
BaseRegIdx = 3;
OffsetIdx = 4;
IsPrePost = true;
break;
- case AArch64::LDPDpost:
- case AArch64::LDPDpre:
case AArch64::LDPSWpost:
case AArch64::LDPSWpre:
- case AArch64::LDPSpost:
- case AArch64::LDPSpre:
case AArch64::LDPWpost:
case AArch64::LDPWpre:
case AArch64::LDPXpost:
diff --git a/llvm/test/CodeGen/AArch64/falkor-hwpf-fix.mir b/llvm/test/CodeGen/AArch64/falkor-hwpf-fix.mir
index 54c8b16a9b43..298e8a0c6d7b 100644
--- a/llvm/test/CodeGen/AArch64/falkor-hwpf-fix.mir
+++ b/llvm/test/CodeGen/AArch64/falkor-hwpf-fix.mir
@@ -1,12 +1,7 @@
# RUN: llc -mtriple=aarch64-linux-gnu -mcpu=falkor -run-pass falkor-hwpf-fix-late -o - %s | FileCheck %s
---- |
- @g = external global i32
-
- define void @hwpf1() { ret void }
- define void @hwpf2() { ret void }
-...
---
-# Verify that the tag collision between the loads is resolved.
+# Verify that the tag collision between the loads is resolved for various load opcodes.
+
# CHECK-LABEL: name: hwpf1
# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
# CHECK: LDRWui %[[BASE]], 0
@@ -17,7 +12,7 @@ body: |
bb.0:
liveins: %w0, %x1
- %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4 from @g)
+ %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4)
%w2 = LDRWui %x1, 1
%w0 = SUBWri %w0, 1, 0
@@ -28,19 +23,147 @@ body: |
RET_ReallyLR
...
---
-# Verify that the tag collision between the loads is resolved and written back for post increment addressing.
# CHECK-LABEL: name: hwpf2
# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i64 %q2, 0, %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name: hwpf2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %q2 = LD1i64 %q2, 0, %x1 :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf3
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i8 %q2, 0, %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name: hwpf3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %q2 = LD1i8 %q2, 0, %x1 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWui %x1, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf4
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1Onev1d %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name: hwpf4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1
+
+ %d2 = LD1Onev1d %x1 :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf5
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1Twov1d %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name: hwpf5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1
+
+ %d2_d3 = LD1Twov1d %x1 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWui %x1, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf6
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPQi %[[BASE]]
+# CHECK: LDRWui %x1, 3
+name: hwpf6
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1
+
+ %q2, %q3 = LDPQi %x1, 3 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWui %x1, 3
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf7
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPXi %[[BASE]]
+# CHECK: LDRWui %x1, 2
+name: hwpf7
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1
+
+ %x2, %x3 = LDPXi %x1, 3 :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 2
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# Verify that the tag collision between the loads is resolved and written back
+# for post increment addressing for various load opcodes.
+
+# CHECK-LABEL: name: hwpfinc1
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
# CHECK: LDRWpost %[[BASE]], 0
# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
# CHECK: LDRWui %x1, 1
-name: hwpf2
+name: hwpfinc1
tracksRegLiveness: true
body: |
bb.0:
liveins: %w0, %x1
- %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4 from @g)
+ %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4)
%w2 = LDRWui %x1, 1
%w0 = SUBWri %w0, 1, 0
@@ -50,3 +173,135 @@ body: |
bb.1:
RET_ReallyLR
...
+---
+# CHECK-LABEL: name: hwpfinc2
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i64_POST %q2, 0, %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x1, 1
+name: hwpfinc2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %x1, %q2 = LD1i64_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 132
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc3
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i8_POST %q2, 0, %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x1, 132
+name: hwpfinc3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %x1, %q2 = LD1i8_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWui %x1, 132
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc4
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1Rv1d_POST %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x1, 252
+name: hwpfinc4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %x1, %d2 = LD1Rv1d_POST %x1, %xzr :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 252
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc5
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD3Threev2s_POST %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWroX %x17, %x0
+name: hwpfinc5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %x17, %q2
+
+ %x1, %d2_d3_d4 = LD3Threev2s_POST %x1, %x0 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWroX %x17, %x0, 0, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc6
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPDpost %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x17, 2
+name: hwpfinc6
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %x17, %q2
+
+ %x1, %d2, %d3 = LDPDpost %x1, 3 :: ("aarch64-strided-access" load 4)
+ %w16 = LDRWui %x17, 2
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc7
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPXpost %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x17, 2
+name: hwpfinc7
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %x17, %q2
+
+ %x1, %x2, %x3 = LDPXpost %x1, 3 :: ("aarch64-strided-access" load 4)
+ %w18 = LDRWui %x17, 2
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...