Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/llvm/llvm-project.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp12
-rw-r--r--llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll99
-rw-r--r--openmp/libomptarget/DeviceRTL/include/Synchronization.h6
-rw-r--r--openmp/libomptarget/DeviceRTL/src/Mapping.cpp6
-rw-r--r--openmp/libomptarget/DeviceRTL/src/Parallelism.cpp5
-rw-r--r--openmp/libomptarget/DeviceRTL/src/State.cpp4
-rw-r--r--openmp/libomptarget/DeviceRTL/src/Synchronization.cpp6
7 files changed, 127 insertions, 11 deletions
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index ef2384faa273..0b42fc151991 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -499,6 +499,18 @@ struct OMPInformationCache : public InformationCache {
}
#include "llvm/Frontend/OpenMP/OMPKinds.def"
+ // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_`
+ // functions, except if `optnone` is present.
+ if (isOpenMPDevice(M)) {
+ for (Function &F : M) {
+ for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
+ if (F.hasFnAttribute(Attribute::NoInline) &&
+ F.getName().startswith(Prefix) &&
+ !F.hasFnAttribute(Attribute::OptimizeNone))
+ F.removeFnAttr(Attribute::NoInline);
+ }
+ }
+
// TODO: We should attach the attributes defined in OMPKinds.def.
}
diff --git a/llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll b/llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll
new file mode 100644
index 000000000000..349e2799de27
--- /dev/null
+++ b/llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes
+; RUN: opt < %s -S -openmp-opt-cgscc | FileCheck %s
+; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s
+
+declare void @unknown()
+
+; __kmpc functions
+define void @__kmpc_noinline() noinline nounwind {
+; CHECK: Function Attrs: nounwind
+; CHECK-LABEL: @__kmpc_noinline(
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: ret void
+;
+ call void @unknown()
+ ret void
+}
+; omp_X functions
+define void @omp_noinline() noinline nounwind {
+; CHECK: Function Attrs: nounwind
+; CHECK-LABEL: @omp_noinline(
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: ret void
+;
+ call void @unknown()
+ ret void
+}
+; _OMP namespace
+define void @_ZN4_OMP_noinline() noinline nounwind {
+; CHECK: Function Attrs: nounwind
+; CHECK-LABEL: @_ZN4_OMP_noinline(
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: ret void
+;
+ call void @unknown()
+ ret void
+}
+
+; Negative tests:
+
+define void @__kmpc_noinline_optnone() noinline optnone nounwind {
+; CHECK: Function Attrs: noinline nounwind optnone
+; CHECK-LABEL: @__kmpc_noinline_optnone(
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: ret void
+;
+ call void @unknown()
+ ret void
+}
+define void @omp_noinline_optnone() noinline optnone nounwind {
+; CHECK: Function Attrs: noinline nounwind optnone
+; CHECK-LABEL: @omp_noinline_optnone(
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: ret void
+;
+ call void @unknown()
+ ret void
+}
+; _OMP namespace
+define void @_ZN4_OMP_noinline_optnone() noinline optnone nounwind {
+; CHECK: Function Attrs: noinline nounwind optnone
+; CHECK-LABEL: @_ZN4_OMP_noinline_optnone(
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: ret void
+;
+ call void @unknown()
+ ret void
+}
+define void @a___kmpc_noinline() noinline nounwind {
+; CHECK: Function Attrs: noinline nounwind
+; CHECK-LABEL: @a___kmpc_noinline(
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: ret void
+;
+ call void @unknown()
+ ret void
+}
+define void @a_omp_noinline() noinline nounwind {
+; CHECK: Function Attrs: noinline nounwind
+; CHECK-LABEL: @a_omp_noinline(
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: ret void
+;
+ call void @unknown()
+ ret void
+}
+define void @a__ZN4_OMP_noinline() noinline nounwind {
+; CHECK: Function Attrs: noinline nounwind
+; CHECK-LABEL: @a__ZN4_OMP_noinline(
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: ret void
+;
+ call void @unknown()
+ ret void
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 7, !"openmp", i32 50}
+!1 = !{i32 7, !"openmp-device", i32 50}
diff --git a/openmp/libomptarget/DeviceRTL/include/Synchronization.h b/openmp/libomptarget/DeviceRTL/include/Synchronization.h
index 4b8898f2ffb7..e33f37a659af 100644
--- a/openmp/libomptarget/DeviceRTL/include/Synchronization.h
+++ b/openmp/libomptarget/DeviceRTL/include/Synchronization.h
@@ -29,13 +29,15 @@ void threads();
/// Synchronizing threads is allowed even if they all hit different instances of
/// `synchronize::threads()`. However, `synchronize::threadsAligned()` is more
-/// restrictive in that it requires all threads to hit the same instance.
+/// restrictive in that it requires all threads to hit the same instance. The
+/// noinline is removed by the openmp-opt pass and helps to preserve the
+/// information till then.
///{
#pragma omp begin assumes ext_aligned_barrier
/// Synchronize all threads in a block, they are are reaching the same
/// instruction (hence all threads in the block are "aligned").
-void threadsAligned();
+__attribute__((noinline)) void threadsAligned();
#pragma omp end assumes
///}
diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
index b161c5538223..172bbbff68f8 100644
--- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -289,17 +289,17 @@ bool mapping::isGenericMode() { return !isSPMDMode(); }
///}
extern "C" {
-uint32_t __kmpc_get_hardware_thread_id_in_block() {
+__attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
FunctionTracingRAII();
return mapping::getThreadIdInBlock();
}
-uint32_t __kmpc_get_hardware_num_threads_in_block() {
+__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
FunctionTracingRAII();
return impl::getNumHardwareThreadsInBlock();
}
-uint32_t __kmpc_get_warp_size() {
+__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
FunctionTracingRAII();
return impl::getWarpSize();
}
diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
index 5b133b009a7f..27d1ff2e5a55 100644
--- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
@@ -243,7 +243,8 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
__kmpc_end_sharing_variables();
}
-bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
+__attribute__((noinline)) bool
+__kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
FunctionTracingRAII();
// Work function and arguments for L1 parallel region.
*WorkFn = state::ParallelRegionFn;
@@ -258,7 +259,7 @@ bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
return ThreadIsActive;
}
-void __kmpc_kernel_end_parallel() {
+__attribute__((noinline)) void __kmpc_kernel_end_parallel() {
FunctionTracingRAII();
// In case we have modified an ICV for this thread before a ThreadState was
// created. We drop it now to not contaminate the next parallel region.
diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index 92847f79829d..7a73330aa4cc 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -393,12 +393,12 @@ int omp_get_initial_device(void) { return -1; }
}
extern "C" {
-void *__kmpc_alloc_shared(uint64_t Bytes) {
+__attribute__((noinline)) void *__kmpc_alloc_shared(uint64_t Bytes) {
FunctionTracingRAII();
return memory::allocShared(Bytes, "Frontend alloc shared");
}
-void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
+__attribute__((noinline)) void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
FunctionTracingRAII();
memory::freeShared(Ptr, Bytes, "Frontend free shared");
}
diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
index 350da0b460f1..43278715be8d 100644
--- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
@@ -358,12 +358,14 @@ void __kmpc_barrier(IdentTy *Loc, int32_t TId) {
impl::namedBarrier();
}
-void __kmpc_barrier_simple_spmd(IdentTy *Loc, int32_t TId) {
+__attribute__((noinline)) void __kmpc_barrier_simple_spmd(IdentTy *Loc,
+ int32_t TId) {
FunctionTracingRAII();
synchronize::threadsAligned();
}
-void __kmpc_barrier_simple_generic(IdentTy *Loc, int32_t TId) {
+__attribute__((noinline)) void __kmpc_barrier_simple_generic(IdentTy *Loc,
+ int32_t TId) {
FunctionTracingRAII();
synchronize::threads();
}