[OpenMP][mlir] Lowering for omp.atomic.update

This patch adds lowering from omp.atomic.update to LLVM IR. Whenever a special LLVM IR instruction is available for the operation, `atomicrmw` instruction is emitted, otherwise a compare-exchange loop based update is emitted. Depends on D119522 Reviewed By: ftynse, peixin Differential Revision: https://reviews.llvm.org/D119657
author: Shraiysh Vaishay <Shraiysh.Vaishay@amd.com> 2022-03-10 15:58:08 +0300
committer: Shraiysh Vaishay <Shraiysh.Vaishay@amd.com> 2022-03-10 15:58:51 +0300
commit: 6dd54da5a51db6ec674d69366ab883b3057c73a6 (patch)
tree: 96ce7845b5ccd955f731199d4c93cbacb69d36a9 /mlir
parent: 697f55e36823dbd91ca94a666d99f3c4ba11cacb (diff)
6 files changed, 234 insertions, 0 deletions
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 2ab5a3b0e784..08867e00d4c8 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -650,6 +650,15 @@ def AtomicUpdateOp : OpenMP_Op<"atomic.update",
     atomic. Generally the region must have only one instruction, but can
     potentially have more than one instructions too. The update is sematically
     similar to a compare-exchange loop based atomic update.
+
+    The syntax of atomic update operation is different from atomic read and
+    atomic write operations. This is because only the host dialect knows how to
+    appropriately update a value. For example, while generating LLVM IR, if
+    there are no special `atomicrmw` instructions for the operation-type
+    combination in atomic update, a compare-exchange loop is generated, where
+    the core update operation is directly translated like regular operations by
+    the host dialect. The front-end must handle semantic checks for allowed
+    operations.
   }];
 
   let arguments = (ins OpenMP_PointerLikeType:$x,
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 87098b9f7421..b05fa8a92f83 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1047,6 +1047,10 @@ LogicalResult AtomicUpdateOp::verify() {
                      "element type is the same as that of the region argument");
   }
 
+  if (region().front().getOperations().size() < 2)
+    return emitError() << "the update region must have at least two operations "
+                          "(binop and terminator)";
+
   YieldOp yieldOp = *region().getOps<YieldOp>().begin();
 
   if (yieldOp.results().size() != 1)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index feaa75c0bc21..21f2d0176957 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -942,6 +942,103 @@ convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
   return success();
 }
 
+/// Converts an LLVM dialect binary operation to the corresponding enum value
+/// for `atomicrmw` supported binary operation.
+llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
+  return llvm::TypeSwitch<Operation *, llvm::AtomicRMWInst::BinOp>(&op)
+      .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; })
+      .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; })
+      .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; })
+      .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; })
+      .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; })
+      .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; })
+      .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; })
+      .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; })
+      .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; })
+      .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
+}
+
+/// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
+static LogicalResult
+convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
+                       llvm::IRBuilderBase &builder,
+                       LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+
+  // Convert values and types.
+  auto &innerOpList = opInst.region().front().getOperations();
+  if (innerOpList.size() != 2)
+    return opInst.emitError("exactly two operations are allowed inside an "
+                            "atomic update region while lowering to LLVM IR");
+
+  Operation &innerUpdateOp = innerOpList.front();
+
+  if (innerUpdateOp.getNumOperands() != 2 ||
+      !llvm::is_contained(innerUpdateOp.getOperands(),
+                          opInst.getRegion().getArgument(0)))
+    return opInst.emitError(
+        "the update operation inside the region must be a binary operation and "
+        "that update operation must have the region argument as an operand");
+
+  llvm::AtomicRMWInst::BinOp binop = convertBinOpToAtomic(innerUpdateOp);
+
+  bool isXBinopExpr =
+      innerUpdateOp.getNumOperands() > 0 &&
+      innerUpdateOp.getOperand(0) == opInst.getRegion().getArgument(0);
+
+  mlir::Value mlirExpr = (isXBinopExpr ? innerUpdateOp.getOperand(1)
+                                       : innerUpdateOp.getOperand(0));
+  llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr);
+  llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.x());
+  LLVM::LLVMPointerType mlirXType =
+      opInst.x().getType().cast<LLVM::LLVMPointerType>();
+  llvm::Type *llvmXElementType =
+      moduleTranslation.convertType(mlirXType.getElementType());
+  llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType,
+                                                      /*isSigned=*/false,
+                                                      /*isVolatile=*/false};
+
+  llvm::AtomicOrdering atomicOrdering =
+      convertAtomicOrdering(opInst.memory_order_val());
+
+  // Generate update code.
+  LogicalResult updateGenStatus = success();
+  auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus](
+                      llvm::Value *atomicx,
+                      llvm::IRBuilder<> &builder) -> llvm::Value * {
+    Block &bb = *opInst.region().begin();
+    moduleTranslation.mapValue(*opInst.region().args_begin(), atomicx);
+    moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
+    if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
+      updateGenStatus = (opInst.emitError()
+                         << "unable to convert update operation to llvm IR");
+      return nullptr;
+    }
+    omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
+    assert(yieldop && yieldop.results().size() == 1 &&
+           "terminator must be omp.yield op and it must have exactly one "
+           "argument");
+    return moduleTranslation.lookupValue(yieldop.results()[0]);
+  };
+
+  // Handle ambiguous alloca, if any.
+  auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
+  llvm::UnreachableInst *unreachableInst;
+  if (allocaIP.getPoint() == ompLoc.IP.getPoint()) {
+    // Same point => split basic block and make them unambigous.
+    unreachableInst = builder.CreateUnreachable();
+    builder.SetInsertPoint(builder.GetInsertBlock()->splitBasicBlock(
+        unreachableInst, "alloca_split"));
+    ompLoc.IP = builder.saveIP();
+    unreachableInst->removeFromParent();
+  }
+  builder.restoreIP(ompBuilder->createAtomicUpdate(
+      ompLoc, findAllocaInsertPoint(builder, moduleTranslation), llvmAtomicX,
+      llvmExpr, atomicOrdering, binop, updateFn, isXBinopExpr));
+  return updateGenStatus;
+}
+
 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
 /// mapping between reduction variables and their private equivalents to have
 /// been stored on the ModuleTranslation stack. Currently only supports
@@ -1069,6 +1166,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
       .Case([&](omp::AtomicWriteOp) {
         return convertOmpAtomicWrite(*op, builder, moduleTranslation);
       })
+      .Case([&](omp::AtomicUpdateOp op) {
+        return convertOmpAtomicUpdate(op, builder, moduleTranslation);
+      })
       .Case([&](omp::SectionsOp) {
         return convertOmpSections(*op, builder, moduleTranslation);
       })
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 2ac177f4da2c..eefc6f8f2dbb 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -621,6 +621,17 @@ func @omp_atomic_update8(%x: memref<i32>, %expr: i32) {
 
 // -----
 
+func @omp_atomic_update9(%x: memref<i32>, %expr: i32) {
+  // expected-error @below {{the update region must have at least two operations (binop and terminator)}}
+  omp.atomic.update %x : memref<i32> {
+  ^bb0(%xval: i32):
+    omp.yield (%xval : i32)
+  }
+  return
+}
+
+// -----
+
 func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
   // expected-error @below {{expected three operations in omp.atomic.capture region}}
   omp.atomic.capture {
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
new file mode 100644
index 000000000000..232df2e699c6
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
@@ -0,0 +1,31 @@
+// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file -verify-diagnostics
+
+// Checking translation when the update is carried out by using more than one op
+// in the region.
+llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr<i32>, %expr: i32) {
+  // expected-error @+2 {{exactly two operations are allowed inside an atomic update region while lowering to LLVM IR}}
+  // expected-error @+1 {{LLVM Translation failed for operation: omp.atomic.update}}
+  omp.atomic.update %x : !llvm.ptr<i32> {
+  ^bb0(%xval: i32):
+    %t1 = llvm.mul %xval, %expr : i32
+    %t2 = llvm.sdiv %t1, %expr : i32
+    %newval = llvm.add %xval, %t2 : i32
+    omp.yield(%newval : i32)
+  }
+  llvm.return
+}
+
+// -----
+
+// Checking translation when the captured variable is not used in the inner
+// update operation
+llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr<i32>, %expr: i32) {
+  // expected-error @+2 {{the update operation inside the region must be a binary operation and that update operation must have the region argument as an operand}}
+  // expected-error @+1 {{LLVM Translation failed for operation: omp.atomic.update}}
+  omp.atomic.update %x : !llvm.ptr<i32> {
+  ^bb0(%xval: i32):
+    %newval = llvm.mul %expr, %expr : i32
+    omp.yield(%newval : i32)
+  }
+  llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index a5d8300454ff..3b707b157ef9 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -933,6 +933,85 @@ llvm.func @omp_atomic_write(%x: !llvm.ptr<i32>, %expr: i32) -> () {
 
 // -----
 
+// Checking simple atomicrmw and cmpxchg based translation. This also checks for
+// ambigous alloca insert point by putting llvm.mul as the first update operation.
+// CHECK-LABEL: @omp_atomic_update
+// CHECK-SAME: (i32* %[[x:.*]], i32 %[[expr:.*]], i1* %[[xbool:.*]], i1 %[[exprbool:.*]])
+llvm.func @omp_atomic_update(%x:!llvm.ptr<i32>, %expr: i32, %xbool: !llvm.ptr<i1>, %exprbool: i1) {
+  // CHECK: %[[t1:.*]] = mul i32 %[[x_old:.*]], %[[expr]]
+  // CHECK: store i32 %[[t1]], i32* %[[x_new:.*]]
+  // CHECK: %[[t2:.*]] = load i32, i32* %[[x_new]]
+  // CHECK: cmpxchg i32* %[[x]], i32 %[[x_old]], i32 %[[t2]]
+  omp.atomic.update %x : !llvm.ptr<i32> {
+  ^bb0(%xval: i32):
+    %newval = llvm.mul %xval, %expr : i32
+    omp.yield(%newval : i32)
+  }
+  // CHECK: atomicrmw add i32* %[[x]], i32 %[[expr]] monotonic
+  omp.atomic.update %x : !llvm.ptr<i32> {
+  ^bb0(%xval: i32):
+    %newval = llvm.add %xval, %expr : i32
+    omp.yield(%newval : i32)
+  }
+  llvm.return
+}
+
+// -----
+
+// Checking an order-dependent operation when the order is `expr binop x`
+// CHECK-LABEL: @omp_atomic_update_ordering
+// CHECK-SAME: (i32* %[[x:.*]], i32 %[[expr:.*]])
+llvm.func @omp_atomic_update_ordering(%x:!llvm.ptr<i32>, %expr: i32) {
+  // CHECK: %[[t1:.*]] = shl i32 %[[expr]], %[[x_old:[^ ,]*]]
+  // CHECK: store i32 %[[t1]], i32* %[[x_new:.*]]
+  // CHECK: %[[t2:.*]] = load i32, i32* %[[x_new]]
+  // CHECK: cmpxchg i32* %[[x]], i32 %[[x_old]], i32 %[[t2]]
+  omp.atomic.update %x : !llvm.ptr<i32> {
+  ^bb0(%xval: i32):
+    %newval = llvm.shl %expr, %xval : i32
+    omp.yield(%newval : i32)
+  }
+  llvm.return
+}
+
+// -----
+
+// Checking an order-dependent operation when the order is `x binop expr`
+// CHECK-LABEL: @omp_atomic_update_ordering
+// CHECK-SAME: (i32* %[[x:.*]], i32 %[[expr:.*]])
+llvm.func @omp_atomic_update_ordering(%x:!llvm.ptr<i32>, %expr: i32) {
+  // CHECK: %[[t1:.*]] = shl i32 %[[x_old:.*]], %[[expr]]
+  // CHECK: store i32 %[[t1]], i32* %[[x_new:.*]]
+  // CHECK: %[[t2:.*]] = load i32, i32* %[[x_new]]
+  // CHECK: cmpxchg i32* %[[x]], i32 %[[x_old]], i32 %[[t2]] monotonic
+  omp.atomic.update %x : !llvm.ptr<i32> {
+  ^bb0(%xval: i32):
+    %newval = llvm.shl %xval, %expr : i32
+    omp.yield(%newval : i32)
+  }
+  llvm.return
+}
+
+// -----
+
+// Checking intrinsic translation.
+// CHECK-LABEL: @omp_atomic_update_intrinsic
+// CHECK-SAME: (i32* %[[x:.*]], i32 %[[expr:.*]])
+llvm.func @omp_atomic_update_intrinsic(%x:!llvm.ptr<i32>, %expr: i32) {
+  // CHECK: %[[t1:.*]] = call i32 @llvm.smax.i32(i32 %[[x_old:.*]], i32 %[[expr]])
+  // CHECK: store i32 %[[t1]], i32* %[[x_new:.*]]
+  // CHECK: %[[t2:.*]] = load i32, i32* %[[x_new]]
+  // CHECK: cmpxchg i32* %[[x]], i32 %[[x_old]], i32 %[[t2]]
+  omp.atomic.update %x : !llvm.ptr<i32> {
+  ^bb0(%xval: i32):
+    %newval = "llvm.intr.smax"(%xval, %expr) : (i32, i32) -> i32
+    omp.yield(%newval : i32)
+  }
+  llvm.return
+}
+
+// -----
+
 // CHECK-LABEL: @omp_sections_empty
 llvm.func @omp_sections_empty() -> () {
   omp.sections {
author	Shraiysh Vaishay <Shraiysh.Vaishay@amd.com>	2022-03-10 15:58:08 +0300
committer	Shraiysh Vaishay <Shraiysh.Vaishay@amd.com>	2022-03-10 15:58:51 +0300
commit	6dd54da5a51db6ec674d69366ab883b3057c73a6 (patch)
tree	96ce7845b5ccd955f731199d4c93cbacb69d36a9 /mlir
parent	697f55e36823dbd91ca94a666d99f3c4ba11cacb (diff)