diff options
author | Erich Keane <erich.keane@intel.com> | 2022-03-11 00:31:52 +0300 |
---|---|---|
committer | Erich Keane <erich.keane@intel.com> | 2022-03-14 16:14:30 +0300 |
commit | dc152659b4527cc2e5f75cc33f36df67c7d5db26 (patch) | |
tree | 04caf53b672571f91c0a267a109b95b8e3315cf9 /clang | |
parent | 975284ab4b18232fc10dccd14527981ebcb8663e (diff) |
Have cpu-specific variants set 'tune-cpu' as an optimization hint
Due to various implementation constraints, despite the programmer
choosing a 'processor' cpu_dispatch/cpu_specific needs to use the
'feature' list of a processor to identify it. This results in the
identified processor in source-code not being propogated to the
optimizer, and thus, not able to be tuned for.
This patch changes to use the actual cpu as written for tune-cpu so that
opt can make decisions based on the cpu-as-spelled, which should better
match the behavior expected by the programmer.
Note that the 'valid' list of processors for x86 is in
llvm/include/llvm/Support/X86TargetParser.def. At the moment, this list
contains only Intel processors, but other vendors may wish to add their
own entries as 'alias'es (or with different feature lists!).
If this is not done, there is two potential performance issues with the
patch, but I believe them to be worth it in light of the improvements to
behavior and performance.
1- In the event that the user spelled "ProcessorB", but we only have the
features available to test for "ProcessorA" (where A is B minus
features),
AND there is an optimization opportunity for "B" that negatively affects
"A", the optimizer will likely choose to do so.
2- In the event that the user spelled VendorI's processor, and the
feature
list allows it to run on VendorA's processor of similar features, AND
there
is an optimization opportunity for VendorIs that negatively affects
"A"s,
the optimizer will likely choose to do so. This can be fixed by adding
an
alias to X86TargetParser.def.
Differential Revision: https://reviews.llvm.org/D121410
Diffstat (limited to 'clang')
-rw-r--r-- | clang/include/clang/Basic/TargetInfo.h | 7 | ||||
-rw-r--r-- | clang/lib/Basic/Targets/X86.cpp | 18 | ||||
-rw-r--r-- | clang/lib/Basic/Targets/X86.h | 2 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.cpp | 7 | ||||
-rw-r--r-- | clang/test/CodeGen/attr-cpuspecific-avx-abi.c | 2 | ||||
-rw-r--r-- | clang/test/CodeGen/attr-cpuspecific.c | 3 |
6 files changed, 34 insertions, 5 deletions
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 34884f621e77..94da7ac593a0 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1375,6 +1375,13 @@ public: "cpu_specific Multiversioning not implemented on this target"); } + // Get the value for the 'tune-cpu' flag for a cpu_specific variant with the + // programmer-specified 'Name'. + virtual StringRef getCPUSpecificTuneName(StringRef Name) const { + llvm_unreachable( + "cpu_specific Multiversioning not implemented on this target"); + } + // Get a list of the features that make up the CPU option for // cpu_specific/cpu_dispatch so that it can be passed to llvm as optimization // options. diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 5c4bd364b06a..1ec2bb9c249f 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -1095,22 +1095,22 @@ unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const { bool X86TargetInfo::validateCPUSpecificCPUDispatch(StringRef Name) const { return llvm::StringSwitch<bool>(Name) -#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, true) -#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) .Case(NEW_NAME, true) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, true) +#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, true) #include "llvm/Support/X86TargetParser.def" .Default(false); } static StringRef CPUSpecificCPUDispatchNameDealias(StringRef Name) { return llvm::StringSwitch<StringRef>(Name) -#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) .Case(NEW_NAME, NAME) +#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, NAME) #include "llvm/Support/X86TargetParser.def" .Default(Name); } char X86TargetInfo::CPUSpecificManglingCharacter(StringRef Name) const { return llvm::StringSwitch<char>(CPUSpecificCPUDispatchNameDealias(Name)) -#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, MANGLING) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, MANGLING) #include "llvm/Support/X86TargetParser.def" .Default(0); } @@ -1119,12 +1119,20 @@ void X86TargetInfo::getCPUSpecificCPUDispatchFeatures( StringRef Name, llvm::SmallVectorImpl<StringRef> &Features) const { StringRef WholeList = llvm::StringSwitch<StringRef>(CPUSpecificCPUDispatchNameDealias(Name)) -#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, FEATURES) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, FEATURES) #include "llvm/Support/X86TargetParser.def" .Default(""); WholeList.split(Features, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false); } +StringRef X86TargetInfo::getCPUSpecificTuneName(StringRef Name) const { + return llvm::StringSwitch<StringRef>(Name) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, TUNE_NAME) +#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, TUNE_NAME) +#include "llvm/Support/X86TargetParser.def" + .Default(""); +} + // We can't use a generic validation scheme for the cpus accepted here // versus subtarget cpus accepted in the target attribute because the // variables intitialized by the runtime only support the below currently diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index e0bb3c344c5b..6b49b0f28bfc 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -201,6 +201,8 @@ public: StringRef Name, llvm::SmallVectorImpl<StringRef> &Features) const override; + StringRef getCPUSpecificTuneName(StringRef Name) const override; + Optional<unsigned> getCPUCacheLineSize() const override; bool validateAsmConstraint(const char *&Name, diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 9c76648f5f19..ad52c0df9b7e 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2060,6 +2060,13 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, getTarget().isValidCPUName(ParsedAttr.Tune)) TuneCPU = ParsedAttr.Tune; } + + if (SD) { + // Apply the given CPU name as the 'tune-cpu' so that the optimizer can + // favor this processor. + TuneCPU = getTarget().getCPUSpecificTuneName( + SD->getCPUName(GD.getMultiVersionIndex())->getName()); + } } else { // Otherwise just add the existing target cpu and target features to the // function. diff --git a/clang/test/CodeGen/attr-cpuspecific-avx-abi.c b/clang/test/CodeGen/attr-cpuspecific-avx-abi.c index ad9c82b5dbc3..9089035a7099 100644 --- a/clang/test/CodeGen/attr-cpuspecific-avx-abi.c +++ b/clang/test/CodeGen/attr-cpuspecific-avx-abi.c @@ -23,4 +23,6 @@ __m256d foo(void) { return bar_avx2(); } // CHECK: define{{.*}} @foo.V() #[[V:[0-9]+]] // CHECK: attributes #[[A]] = {{.*}}"target-features"="+avx,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "tune-cpu"="generic" // CHECK: attributes #[[V]] = {{.*}}"target-features"="+avx,+avx2,+bmi,+cmov,+crc32,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "tune-cpu"="haswell" diff --git a/clang/test/CodeGen/attr-cpuspecific.c b/clang/test/CodeGen/attr-cpuspecific.c index 15ecd77de2a6..90fb1307523e 100644 --- a/clang/test/CodeGen/attr-cpuspecific.c +++ b/clang/test/CodeGen/attr-cpuspecific.c @@ -340,5 +340,8 @@ ATTR(cpu_specific(knl)) void OrderDispatchUsageSpecific(void) {} // CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+crc32,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "tune-cpu"="ivybridge" // CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+crc32,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "tune-cpu"="knl" // CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx8,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87" +// CHECK-SAME: "tune-cpu"="atom" |