Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/llvm/llvm-project.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/clang
diff options
context:
space:
mode:
authorErich Keane <erich.keane@intel.com>2022-03-11 00:31:52 +0300
committerErich Keane <erich.keane@intel.com>2022-03-14 16:14:30 +0300
commitdc152659b4527cc2e5f75cc33f36df67c7d5db26 (patch)
tree04caf53b672571f91c0a267a109b95b8e3315cf9 /clang
parent975284ab4b18232fc10dccd14527981ebcb8663e (diff)
Have cpu-specific variants set 'tune-cpu' as an optimization hint
Due to various implementation constraints, despite the programmer choosing a 'processor' cpu_dispatch/cpu_specific needs to use the 'feature' list of a processor to identify it. This results in the identified processor in source-code not being propogated to the optimizer, and thus, not able to be tuned for. This patch changes to use the actual cpu as written for tune-cpu so that opt can make decisions based on the cpu-as-spelled, which should better match the behavior expected by the programmer. Note that the 'valid' list of processors for x86 is in llvm/include/llvm/Support/X86TargetParser.def. At the moment, this list contains only Intel processors, but other vendors may wish to add their own entries as 'alias'es (or with different feature lists!). If this is not done, there is two potential performance issues with the patch, but I believe them to be worth it in light of the improvements to behavior and performance. 1- In the event that the user spelled "ProcessorB", but we only have the features available to test for "ProcessorA" (where A is B minus features), AND there is an optimization opportunity for "B" that negatively affects "A", the optimizer will likely choose to do so. 2- In the event that the user spelled VendorI's processor, and the feature list allows it to run on VendorA's processor of similar features, AND there is an optimization opportunity for VendorIs that negatively affects "A"s, the optimizer will likely choose to do so. This can be fixed by adding an alias to X86TargetParser.def. Differential Revision: https://reviews.llvm.org/D121410
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Basic/TargetInfo.h7
-rw-r--r--clang/lib/Basic/Targets/X86.cpp18
-rw-r--r--clang/lib/Basic/Targets/X86.h2
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp7
-rw-r--r--clang/test/CodeGen/attr-cpuspecific-avx-abi.c2
-rw-r--r--clang/test/CodeGen/attr-cpuspecific.c3
6 files changed, 34 insertions, 5 deletions
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 34884f621e77..94da7ac593a0 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1375,6 +1375,13 @@ public:
"cpu_specific Multiversioning not implemented on this target");
}
+ // Get the value for the 'tune-cpu' flag for a cpu_specific variant with the
+ // programmer-specified 'Name'.
+ virtual StringRef getCPUSpecificTuneName(StringRef Name) const {
+ llvm_unreachable(
+ "cpu_specific Multiversioning not implemented on this target");
+ }
+
// Get a list of the features that make up the CPU option for
// cpu_specific/cpu_dispatch so that it can be passed to llvm as optimization
// options.
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 5c4bd364b06a..1ec2bb9c249f 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -1095,22 +1095,22 @@ unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const {
bool X86TargetInfo::validateCPUSpecificCPUDispatch(StringRef Name) const {
return llvm::StringSwitch<bool>(Name)
-#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, true)
-#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) .Case(NEW_NAME, true)
+#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, true)
+#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, true)
#include "llvm/Support/X86TargetParser.def"
.Default(false);
}
static StringRef CPUSpecificCPUDispatchNameDealias(StringRef Name) {
return llvm::StringSwitch<StringRef>(Name)
-#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) .Case(NEW_NAME, NAME)
+#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, NAME)
#include "llvm/Support/X86TargetParser.def"
.Default(Name);
}
char X86TargetInfo::CPUSpecificManglingCharacter(StringRef Name) const {
return llvm::StringSwitch<char>(CPUSpecificCPUDispatchNameDealias(Name))
-#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, MANGLING)
+#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, MANGLING)
#include "llvm/Support/X86TargetParser.def"
.Default(0);
}
@@ -1119,12 +1119,20 @@ void X86TargetInfo::getCPUSpecificCPUDispatchFeatures(
StringRef Name, llvm::SmallVectorImpl<StringRef> &Features) const {
StringRef WholeList =
llvm::StringSwitch<StringRef>(CPUSpecificCPUDispatchNameDealias(Name))
-#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, FEATURES)
+#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, FEATURES)
#include "llvm/Support/X86TargetParser.def"
.Default("");
WholeList.split(Features, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
}
+StringRef X86TargetInfo::getCPUSpecificTuneName(StringRef Name) const {
+ return llvm::StringSwitch<StringRef>(Name)
+#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, TUNE_NAME)
+#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, TUNE_NAME)
+#include "llvm/Support/X86TargetParser.def"
+ .Default("");
+}
+
// We can't use a generic validation scheme for the cpus accepted here
// versus subtarget cpus accepted in the target attribute because the
// variables intitialized by the runtime only support the below currently
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index e0bb3c344c5b..6b49b0f28bfc 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -201,6 +201,8 @@ public:
StringRef Name,
llvm::SmallVectorImpl<StringRef> &Features) const override;
+ StringRef getCPUSpecificTuneName(StringRef Name) const override;
+
Optional<unsigned> getCPUCacheLineSize() const override;
bool validateAsmConstraint(const char *&Name,
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 9c76648f5f19..ad52c0df9b7e 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2060,6 +2060,13 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
getTarget().isValidCPUName(ParsedAttr.Tune))
TuneCPU = ParsedAttr.Tune;
}
+
+ if (SD) {
+ // Apply the given CPU name as the 'tune-cpu' so that the optimizer can
+ // favor this processor.
+ TuneCPU = getTarget().getCPUSpecificTuneName(
+ SD->getCPUName(GD.getMultiVersionIndex())->getName());
+ }
} else {
// Otherwise just add the existing target cpu and target features to the
// function.
diff --git a/clang/test/CodeGen/attr-cpuspecific-avx-abi.c b/clang/test/CodeGen/attr-cpuspecific-avx-abi.c
index ad9c82b5dbc3..9089035a7099 100644
--- a/clang/test/CodeGen/attr-cpuspecific-avx-abi.c
+++ b/clang/test/CodeGen/attr-cpuspecific-avx-abi.c
@@ -23,4 +23,6 @@ __m256d foo(void) { return bar_avx2(); }
// CHECK: define{{.*}} @foo.V() #[[V:[0-9]+]]
// CHECK: attributes #[[A]] = {{.*}}"target-features"="+avx,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK-SAME: "tune-cpu"="generic"
// CHECK: attributes #[[V]] = {{.*}}"target-features"="+avx,+avx2,+bmi,+cmov,+crc32,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK-SAME: "tune-cpu"="haswell"
diff --git a/clang/test/CodeGen/attr-cpuspecific.c b/clang/test/CodeGen/attr-cpuspecific.c
index 15ecd77de2a6..90fb1307523e 100644
--- a/clang/test/CodeGen/attr-cpuspecific.c
+++ b/clang/test/CodeGen/attr-cpuspecific.c
@@ -340,5 +340,8 @@ ATTR(cpu_specific(knl))
void OrderDispatchUsageSpecific(void) {}
// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+crc32,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK-SAME: "tune-cpu"="ivybridge"
// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+crc32,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK-SAME: "tune-cpu"="knl"
// CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx8,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87"
+// CHECK-SAME: "tune-cpu"="atom"