Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/llvm/llvm-project.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/bolt
diff options
context:
space:
mode:
authorAmir Ayupov <aaupov@fb.com>2022-02-08 07:16:13 +0300
committerAmir Ayupov <aaupov@fb.com>2022-03-08 21:44:31 +0300
commit687e4af1c05ae36af88900d41150e260d8f273c0 (patch)
tree3e742ec4ac36a1daf0dfe7d19f43b2391331052d /bolt
parent151f809c558d3d7e67e4d4b7efe84218c3b8cfa7 (diff)
[BOLT] CMOVConversion pass
Convert simple hammocks into cmov based on misprediction rate. Test Plan: - Assembly test: `cmov-conversion.s` - Testing on a binary: # Bootstrap clang with `-x86-cmov-converter-force-all` and `-Wl,--emit-relocs` (Release build) # Collect perf.data: - `clang++ <opts> bolt/lib/Core/BinaryFunction.cpp -E > bf.cpp` - `perf record -e cycles:u -j any,u -- clang-15 bf.cpp -O2 -std=c++14 -c -o bf.o` # Optimize clang-15 with and w/o -cmov-conversion: - `llvm-bolt clang-15 -p perf.data -o clang-15.bolt` - `llvm-bolt clang-15 -p perf.data -cmov-conversion -o clang-15.bolt.cmovconv` # Run perf experiment: - test: `clang-15.bolt.cmovconv`, - control: `clang-15.bolt`, - workload (clang options): `bf.cpp -O2 -std=c++14 -c -o bf.o` Results: ``` task-clock [delta: -360.21 ± 356.75, delta(%): -1.7760 ± 1.7589, p-value: 0.047951, balance: -6] instructions [delta: 44061118 ± 13246382, delta(%): 0.0690 ± 0.0207, p-value: 0.000001, balance: 50] icache-misses [delta: -5534468 ± 2779620, delta(%): -0.4331 ± 0.2175, p-value: 0.028014, balance: -28] branch-misses [delta: -1624270 ± 1113244, delta(%): -0.3456 ± 0.2368, p-value: 0.030300, balance: -22] ``` Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D120177
Diffstat (limited to 'bolt')
-rw-r--r--bolt/include/bolt/Core/MCPlusBuilder.h10
-rw-r--r--bolt/include/bolt/Passes/CMOVConversion.h85
-rw-r--r--bolt/lib/Passes/CMOVConversion.cpp287
-rw-r--r--bolt/lib/Passes/CMakeLists.txt1
-rw-r--r--bolt/lib/Rewrite/BinaryPassManager.cpp9
-rw-r--r--bolt/lib/Target/X86/X86MCPlusBuilder.cpp67
-rw-r--r--bolt/test/X86/cmov-conversion.s567
7 files changed, 1026 insertions, 0 deletions
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index e6f4222ce9d3..2378f2b63ac1 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1292,6 +1292,16 @@ public:
return false;
}
+ /// Convert a move instruction into a conditional move instruction, given a
+ /// condition code.
+ virtual bool
+ convertMoveToConditionalMove(MCInst &Inst, unsigned CC,
+ bool AllowStackMemOp = false,
+ bool AllowBasePtrStackMemOp = false) const {
+ llvm_unreachable("not implemented");
+ return false;
+ }
+
/// Lower a tail call instruction \p Inst if required by target.
virtual bool lowerTailCall(MCInst &Inst) {
llvm_unreachable("not implemented");
diff --git a/bolt/include/bolt/Passes/CMOVConversion.h b/bolt/include/bolt/Passes/CMOVConversion.h
new file mode 100644
index 000000000000..77ce2235001a
--- /dev/null
+++ b/bolt/include/bolt/Passes/CMOVConversion.h
@@ -0,0 +1,85 @@
+//===- bolt/Passes/CMOVConversion.h ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass finds the following patterns:
+// jcc
+// / \
+// (empty) mov src, dst
+// \ /
+//
+// and replaces them with:
+//
+// cmovcc src, dst
+//
+// The advantage of performing this conversion in BOLT (compared to compiler
+// heuristic driven instruction selection) is that BOLT can use LBR
+// misprediction information and only convert poorly predictable branches.
+// Note that branch misprediction rate is different from branch bias.
+// For well-predictable branches, it might be beneficial to leave jcc+mov as is
+// from microarchitectural perspective to avoid unneeded dependencies (CMOV
+// instruction has a dataflow dependence on flags and both operands).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_PASSES_CMOVCONVERSION_H
+#define BOLT_PASSES_CMOVCONVERSION_H
+
+#include "bolt/Passes/BinaryPasses.h"
+
+namespace llvm {
+namespace bolt {
+
+/// Pass for folding eligible hammocks into CMOV's if profitable.
+class CMOVConversion : public BinaryFunctionPass {
+ struct Stats {
+ /// Record how many possible cases there are.
+ uint64_t StaticPossible = 0;
+ uint64_t DynamicPossible = 0;
+
+ /// Record how many cases were converted.
+ uint64_t StaticPerformed = 0;
+ uint64_t DynamicPerformed = 0;
+
+ /// Record how many mispredictions were eliminated.
+ uint64_t PossibleMP = 0;
+ uint64_t RemovedMP = 0;
+
+ Stats operator+(const Stats &O) {
+ StaticPossible += O.StaticPossible;
+ DynamicPossible += O.DynamicPossible;
+ StaticPerformed += O.StaticPerformed;
+ DynamicPerformed += O.DynamicPerformed;
+ PossibleMP += O.PossibleMP;
+ RemovedMP += O.RemovedMP;
+ return *this;
+ }
+ double getStaticRatio() { return (double)StaticPerformed / StaticPossible; }
+ double getDynamicRatio() {
+ return (double)DynamicPerformed / DynamicPossible;
+ }
+ double getMPRatio() { return (double)RemovedMP / PossibleMP; }
+
+ void dump();
+ };
+ // BinaryContext-wide stats
+ Stats Global;
+
+ void runOnFunction(BinaryFunction &Function);
+
+public:
+ explicit CMOVConversion() : BinaryFunctionPass(false) {}
+
+ const char *getName() const override { return "CMOV conversion"; }
+
+ void runOnFunctions(BinaryContext &BC) override;
+};
+
+} // namespace bolt
+} // namespace llvm
+
+#endif
diff --git a/bolt/lib/Passes/CMOVConversion.cpp b/bolt/lib/Passes/CMOVConversion.cpp
new file mode 100644
index 000000000000..6213479a5090
--- /dev/null
+++ b/bolt/lib/Passes/CMOVConversion.cpp
@@ -0,0 +1,287 @@
+//===- bolt/Passes/CMOVConversion.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CMOV conversion pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Passes/CMOVConversion.h"
+#include "bolt/Core/BinaryBasicBlock.h"
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <numeric>
+
+#define DEBUG_TYPE "cmov"
+
+using namespace llvm;
+
+namespace opts {
+
+extern cl::OptionCategory BoltOptCategory;
+
+static cl::opt<int> BiasThreshold(
+ "cmov-conversion-bias-threshold",
+ cl::desc("minimum condition bias (pct) to perform a CMOV conversion, "
+ "-1 to not account bias"),
+ cl::ReallyHidden, cl::init(1), cl::cat(BoltOptCategory));
+
+static cl::opt<int> MispredictionThreshold(
+ "cmov-conversion-misprediction-threshold",
+ cl::desc("minimum misprediction rate (pct) to perform a CMOV conversion, "
+ "-1 to not account misprediction rate"),
+ cl::ReallyHidden, cl::init(5), cl::cat(BoltOptCategory));
+
+static cl::opt<bool> ConvertStackMemOperand(
+ "cmov-conversion-convert-stack-mem-operand",
+ cl::desc("convert moves with stack memory operand (potentially unsafe)"),
+ cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory));
+
+static cl::opt<bool> ConvertBasePtrStackMemOperand(
+ "cmov-conversion-convert-rbp-stack-mem-operand",
+ cl::desc("convert moves with rbp stack memory operand (unsafe, must be off "
+ "for binaries compiled with -fomit-frame-pointer)"),
+ cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory));
+
+} // namespace opts
+
+namespace llvm {
+namespace bolt {
+
+// Return true if the CFG conforms to the following subgraph:
+// Predecessor
+// / \
+// | RHS
+// \ /
+// LHS
+// Caller guarantees that LHS and RHS share the same predecessor.
+bool isIfThenSubgraph(const BinaryBasicBlock &LHS,
+ const BinaryBasicBlock &RHS) {
+ if (LHS.pred_size() != 2 || RHS.pred_size() != 1)
+ return false;
+
+ // Sanity check
+ BinaryBasicBlock *Predecessor = *RHS.pred_begin();
+ assert(Predecessor && LHS.isPredecessor(Predecessor) && "invalid subgraph");
+ (void)Predecessor;
+
+ if (!LHS.isPredecessor(&RHS))
+ return false;
+ if (RHS.succ_size() != 1)
+ return false;
+ return true;
+}
+
+bool matchCFGSubgraph(BinaryBasicBlock &BB, BinaryBasicBlock *&ConditionalSucc,
+ BinaryBasicBlock *&UnconditionalSucc,
+ bool &IsConditionalTaken) {
+ BinaryBasicBlock *TakenSucc = BB.getConditionalSuccessor(true);
+ BinaryBasicBlock *FallthroughSucc = BB.getConditionalSuccessor(false);
+ bool IsIfThenTaken = isIfThenSubgraph(*FallthroughSucc, *TakenSucc);
+ bool IsIfThenFallthrough = isIfThenSubgraph(*TakenSucc, *FallthroughSucc);
+ if (!IsIfThenFallthrough && !IsIfThenTaken)
+ return false;
+ assert((!IsIfThenFallthrough || !IsIfThenTaken) && "Invalid subgraph");
+
+ // Output parameters
+ ConditionalSucc = IsIfThenTaken ? TakenSucc : FallthroughSucc;
+ UnconditionalSucc = IsIfThenTaken ? FallthroughSucc : TakenSucc;
+ IsConditionalTaken = IsIfThenTaken;
+ return true;
+}
+
+// Return true if basic block instructions can be converted into cmov(s).
+bool canConvertInstructions(const BinaryContext &BC, const BinaryBasicBlock &BB,
+ unsigned CC) {
+ if (BB.empty())
+ return false;
+ const MCInst *LastInst = BB.getLastNonPseudoInstr();
+ // Only pseudo instructions, can't be converted into CMOV
+ if (LastInst == nullptr)
+ return false;
+ for (const MCInst &Inst : BB) {
+ if (BC.MIB->isPseudo(Inst))
+ continue;
+ // Unconditional branch as a last instruction is OK
+ if (&Inst == LastInst && BC.MIB->isUnconditionalBranch(Inst))
+ continue;
+ MCInst Cmov(Inst);
+ // GPR move is OK
+ if (!BC.MIB->convertMoveToConditionalMove(
+ Cmov, CC, opts::ConvertStackMemOperand,
+ opts::ConvertBasePtrStackMemOperand)) {
+ LLVM_DEBUG({
+ dbgs() << BB.getName() << ": can't convert instruction ";
+ BC.printInstruction(dbgs(), Cmov);
+ });
+ return false;
+ }
+ }
+ return true;
+}
+
+void convertMoves(const BinaryContext &BC, BinaryBasicBlock &BB, unsigned CC) {
+ for (auto II = BB.begin(), IE = BB.end(); II != IE; ++II) {
+ if (BC.MIB->isPseudo(*II))
+ continue;
+ if (BC.MIB->isUnconditionalBranch(*II)) {
+ // XXX: this invalidates II but we return immediately
+ BB.eraseInstruction(II);
+ return;
+ }
+ bool Result = BC.MIB->convertMoveToConditionalMove(
+ *II, CC, opts::ConvertStackMemOperand,
+ opts::ConvertBasePtrStackMemOperand);
+ assert(Result && "unexpected instruction");
+ (void)Result;
+ }
+}
+
+// Returns misprediction rate if the profile data is available, -1 otherwise.
+std::pair<int, uint64_t>
+calculateMispredictionRate(const BinaryBasicBlock &BB) {
+ uint64_t TotalExecCount = 0;
+ uint64_t TotalMispredictionCount = 0;
+ for (auto BI : BB.branch_info()) {
+ TotalExecCount += BI.Count;
+ if (BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED)
+ TotalMispredictionCount += BI.MispredictedCount;
+ }
+ if (!TotalExecCount)
+ return {-1, TotalMispredictionCount};
+ return {100.0f * TotalMispredictionCount / TotalExecCount,
+ TotalMispredictionCount};
+}
+
+// Returns conditional succ bias if the profile is available, -1 otherwise.
+int calculateConditionBias(const BinaryBasicBlock &BB,
+ const BinaryBasicBlock &ConditionalSucc) {
+ if (auto BranchStats = BB.getBranchStats(&ConditionalSucc))
+ return BranchStats->first;
+ return -1;
+}
+
+void CMOVConversion::Stats::dump() {
+ outs() << "converted static " << StaticPerformed << "/" << StaticPossible
+ << formatv(" ({0:P}) ", getStaticRatio())
+ << "hammock(s) into CMOV sequences, with dynamic execution count "
+ << DynamicPerformed << "/" << DynamicPossible
+ << formatv(" ({0:P}), ", getDynamicRatio()) << "saving " << RemovedMP
+ << "/" << PossibleMP << formatv(" ({0:P}) ", getMPRatio())
+ << "mispredictions\n";
+}
+
+void CMOVConversion::runOnFunction(BinaryFunction &Function) {
+ BinaryContext &BC = Function.getBinaryContext();
+ bool Modified = false;
+ // Function-local stats
+ Stats Local;
+ // Traverse blocks in RPO, merging block with a converted cmov with its
+ // successor.
+ for (BinaryBasicBlock *BB : post_order(&Function)) {
+ uint64_t BBExecCount = BB->getKnownExecutionCount();
+ if (BB->empty() || // The block must have instructions
+ BBExecCount == 0 || // must be hot
+ BB->succ_size() != 2 || // with two successors
+ BB->hasJumpTable()) // no jump table
+ continue;
+
+ assert(BB->isValid() && "traversal internal error");
+
+ // Check branch instruction
+ auto BranchInstrIter = BB->getLastNonPseudo();
+ if (BranchInstrIter == BB->rend() ||
+ !BC.MIB->isConditionalBranch(*BranchInstrIter))
+ continue;
+
+ // Check successors
+ BinaryBasicBlock *ConditionalSucc, *UnconditionalSucc;
+ bool IsConditionalTaken;
+ if (!matchCFGSubgraph(*BB, ConditionalSucc, UnconditionalSucc,
+ IsConditionalTaken)) {
+ LLVM_DEBUG(dbgs() << BB->getName() << ": couldn't match hammock\n");
+ continue;
+ }
+
+ unsigned CC = BC.MIB->getCondCode(*BranchInstrIter);
+ if (!IsConditionalTaken)
+ CC = BC.MIB->getInvertedCondCode(CC);
+ // Check contents of the conditional block
+ if (!canConvertInstructions(BC, *ConditionalSucc, CC))
+ continue;
+
+ int ConditionBias = calculateConditionBias(*BB, *ConditionalSucc);
+ int MispredictionRate = 0;
+ uint64_t MispredictionCount = 0;
+ std::tie(MispredictionRate, MispredictionCount) =
+ calculateMispredictionRate(*BB);
+
+ Local.StaticPossible++;
+ Local.DynamicPossible += BBExecCount;
+ Local.PossibleMP += MispredictionCount;
+
+ // If the conditional successor is never executed, don't convert it
+ if (ConditionBias < opts::BiasThreshold) {
+ LLVM_DEBUG(dbgs() << BB->getName() << "->" << ConditionalSucc->getName()
+ << " bias = " << ConditionBias
+ << ", less than threshold " << opts::BiasThreshold
+ << '\n');
+ continue;
+ }
+
+ // Check the misprediction rate of a branch
+ if (MispredictionRate < opts::MispredictionThreshold) {
+ LLVM_DEBUG(dbgs() << BB->getName() << " misprediction rate = "
+ << MispredictionRate << ", less than threshold "
+ << opts::MispredictionThreshold << '\n');
+ continue;
+ }
+
+ // remove conditional branch
+ BB->eraseInstruction(std::prev(BranchInstrIter.base()));
+ BB->removeAllSuccessors();
+ // Convert instructions from the conditional successor into cmov's in BB.
+ convertMoves(BC, *ConditionalSucc, CC);
+ BB->addInstructions(ConditionalSucc->begin(), ConditionalSucc->end());
+ ConditionalSucc->markValid(false);
+
+ // RPO traversal guarantees that the successor is visited and merged if
+ // necessary. Merge the unconditional successor into the current block.
+ BB->addInstructions(UnconditionalSucc->begin(), UnconditionalSucc->end());
+ UnconditionalSucc->moveAllSuccessorsTo(BB);
+ UnconditionalSucc->markValid(false);
+ Local.StaticPerformed++;
+ Local.DynamicPerformed += BBExecCount;
+ Local.RemovedMP += MispredictionCount;
+ Modified = true;
+ }
+ if (Modified)
+ Function.eraseInvalidBBs();
+ if (opts::Verbosity > 1) {
+ outs() << "BOLT-INFO: CMOVConversion: " << Function << ", ";
+ Local.dump();
+ }
+ Global = Global + Local;
+}
+
+void CMOVConversion::runOnFunctions(BinaryContext &BC) {
+ for (auto &It : BC.getBinaryFunctions()) {
+ BinaryFunction &Function = It.second;
+ if (!shouldOptimize(Function))
+ continue;
+ runOnFunction(Function);
+ }
+
+ outs() << "BOLT-INFO: CMOVConversion total: ";
+ Global.dump();
+}
+
+} // end namespace bolt
+} // end namespace llvm
diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt
index c97f203d350e..95d872d0d168 100644
--- a/bolt/lib/Passes/CMakeLists.txt
+++ b/bolt/lib/Passes/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMBOLTPasses
AsmDump.cpp
BinaryPasses.cpp
BinaryFunctionCallGraph.cpp
+ CMOVConversion.cpp
CacheMetrics.cpp
CallGraph.cpp
CallGraphWalker.cpp
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 199ed5fcd9cd..929b73420479 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -11,6 +11,7 @@
#include "bolt/Passes/Aligner.h"
#include "bolt/Passes/AllocCombiner.h"
#include "bolt/Passes/AsmDump.h"
+#include "bolt/Passes/CMOVConversion.h"
#include "bolt/Passes/FrameOptimizer.h"
#include "bolt/Passes/IdenticalCodeFolding.h"
#include "bolt/Passes/IndirectCallPromotion.h"
@@ -247,6 +248,11 @@ ThreeWayBranchFlag("three-way-branch",
cl::desc("reorder three way branches"),
cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory));
+static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
+ cl::desc("fold jcc+mov into cmov"),
+ cl::ZeroOrMore, cl::ReallyHidden,
+ cl::cat(BoltOptCategory));
+
} // namespace opts
namespace llvm {
@@ -393,6 +399,9 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
Manager.registerPass(std::make_unique<TailDuplication>(),
opts::TailDuplicationFlag);
+ Manager.registerPass(std::make_unique<CMOVConversion>(),
+ opts::CMOVConversionFlag);
+
// This pass syncs local branches with CFG. If any of the following
// passes breaks the sync - they either need to re-run the pass or
// fix branches consistency internally.
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index aca88e561e1e..fa6f3700059a 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -12,12 +12,15 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "bolt/Core/MCPlus.h"
#include "bolt/Core/MCPlusBuilder.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Debug.h"
@@ -2134,6 +2137,70 @@ public:
return true;
}
+ bool
+ convertMoveToConditionalMove(MCInst &Inst, unsigned CC, bool AllowStackMemOp,
+ bool AllowBasePtrStackMemOp) const override {
+ // - Register-register moves are OK
+ // - Stores are filtered out by opcode (no store CMOV)
+ // - Non-stack loads are prohibited (generally unsafe)
+ // - Stack loads are OK if AllowStackMemOp is true
+ // - Stack loads with RBP are OK if AllowBasePtrStackMemOp is true
+ if (isLoad(Inst)) {
+ // If stack memory operands are not allowed, no loads are allowed
+ if (!AllowStackMemOp)
+ return false;
+
+ // If stack memory operands are allowed, check if it's a load from stack
+ bool IsLoad, IsStore, IsStoreFromReg, IsSimple, IsIndexed;
+ MCPhysReg Reg;
+ int32_t SrcImm;
+ uint16_t StackPtrReg;
+ int64_t StackOffset;
+ uint8_t Size;
+ bool IsStackAccess =
+ isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, SrcImm,
+ StackPtrReg, StackOffset, Size, IsSimple, IsIndexed);
+ // Prohibit non-stack-based loads
+ if (!IsStackAccess)
+ return false;
+ // If stack memory operands are allowed, check if it's RBP-based
+ if (!AllowBasePtrStackMemOp &&
+ RegInfo->isSubRegisterEq(X86::RBP, StackPtrReg))
+ return false;
+ }
+
+ unsigned NewOpcode = 0;
+ switch (Inst.getOpcode()) {
+ case X86::MOV16rr:
+ NewOpcode = X86::CMOV16rr;
+ break;
+ case X86::MOV16rm:
+ NewOpcode = X86::CMOV16rm;
+ break;
+ case X86::MOV32rr:
+ NewOpcode = X86::CMOV32rr;
+ break;
+ case X86::MOV32rm:
+ NewOpcode = X86::CMOV32rm;
+ break;
+ case X86::MOV64rr:
+ NewOpcode = X86::CMOV64rr;
+ break;
+ case X86::MOV64rm:
+ NewOpcode = X86::CMOV64rm;
+ break;
+ default:
+ return false;
+ }
+ Inst.setOpcode(NewOpcode);
+ // Insert CC at the end of prime operands, before annotations
+ Inst.insert(Inst.begin() + MCPlus::getNumPrimeOperands(Inst),
+ MCOperand::createImm(CC));
+ // CMOV is a 3-operand MCInst, so duplicate the destination as src1
+ Inst.insert(Inst.begin(), Inst.getOperand(0));
+ return true;
+ }
+
bool lowerTailCall(MCInst &Inst) override {
if (Inst.getOpcode() == X86::JMP_4 && isTailCall(Inst)) {
Inst.setOpcode(X86::JMP_1);
diff --git a/bolt/test/X86/cmov-conversion.s b/bolt/test/X86/cmov-conversion.s
new file mode 100644
index 000000000000..b8766002b151
--- /dev/null
+++ b/bolt/test/X86/cmov-conversion.s
@@ -0,0 +1,567 @@
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
+# RUN: link_fdata %s %t.o %t.fdata
+# RUN: llvm-strip --strip-unneeded %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
+# RUN: llvm-bolt %t.exe -data %t.fdata -o %t -lite=0 -v=2 \
+# RUN: -cmov-conversion -cmov-conversion-misprediction-threshold=-1 \
+# RUN: -cmov-conversion-bias-threshold=-1 -print-all | FileCheck %s
+# CHECK: BOLT-INFO: CMOVConversion: CmovInHotPath, converted static 1/1
+# CHECK: BOLT-INFO: CMOVConversion: CmovNotInHotPath, converted static 1/1
+# CHECK: BOLT-INFO: CMOVConversion: MaxIndex, converted static 1/1
+# CHECK: BOLT-INFO: CMOVConversion: MaxIndex_unpredictable, converted static 1/1
+# CHECK: BOLT-INFO: CMOVConversion: MaxValue, converted static 1/1
+# CHECK: BOLT-INFO: CMOVConversion: BinarySearch, converted static 0/0
+# CHECK: BOLT-INFO: CMOVConversion: Transform, converted static 0/0
+# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand, converted static 0/0
+# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_unpredictable, converted static 0/0
+# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group, converted static 0/0
+# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group2, converted static 0/0
+# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_conflicting_dir, converted static 0/0
+# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group_reuse_for_addr, converted static 0/0
+# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group_reuse_for_addr2, converted static 0/0
+# CHECK: BOLT-INFO: CMOVConversion: test_cmov_memoperand_in_group_reuse_for_addr3, converted static 0/0
+# CHECK: BOLT-INFO: CMOVConversion: test_memoperand_loop, converted static 1/1
+# CHECK: BOLT-INFO: CMOVConversion: CmovBackToBack, converted static 2/2
+# CHECK: BOLT-INFO: CMOVConversion total: converted static 8/8
+
+ .globl _start
+_start:
+ .globl CmovInHotPath # -- Begin function CmovInHotPath
+ .p2align 4, 0x90
+ .type CmovInHotPath,@function
+CmovInHotPath: # @CmovInHotPath
+# CHECK-LABEL: Binary Function "CmovInHotPath" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 CmovInHotPath 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ testl %edi, %edi
+ jle LBB0_5
+# %bb.1: # %for.body.preheader
+ movl %edi, %r8d
+ xorl %edi, %edi
+# FDATA: 0 [unknown] 0 1 CmovInHotPath #LBB0_2# 1 2
+LBB0_2: # %for.body
+ movl (%rcx,%rdi,4), %eax
+ leal 1(%rax), %r9d
+ imull %esi, %eax
+ movl $10, %r10d
+ cmpl %edx, %eax
+# CHECK: cmpl %edx, %eax
+# CHECK-NEXT: cmovlel %r9d, %r10d
+LBB0_2_br:
+ jg LBB0_4
+# FDATA: 1 CmovInHotPath #LBB0_2_br# 1 CmovInHotPath #LBB0_3# 1 2
+# FDATA: 1 CmovInHotPath #LBB0_2_br# 1 CmovInHotPath #LBB0_4# 1 2
+# %bb.3: # %for.body
+LBB0_3:
+ movl %r9d, %r10d
+LBB0_4: # %for.body
+ imull %r9d, %r10d
+ movl %r10d, (%rcx,%rdi,4)
+ addq $1, %rdi
+ cmpq %rdi, %r8
+ jne LBB0_2
+LBB0_5: # %for.cond.cleanup
+ retq
+Lfunc_end0:
+ .size CmovInHotPath, Lfunc_end0-CmovInHotPath
+ .cfi_endproc
+ # -- End function
+ .globl CmovNotInHotPath # -- Begin function CmovNotInHotPath
+ .p2align 4, 0x90
+ .type CmovNotInHotPath,@function
+CmovNotInHotPath: # @CmovNotInHotPath
+# CHECK-LABEL: Binary Function "CmovNotInHotPath" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 CmovNotInHotPath 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ testl %edi, %edi
+ jle LBB1_5
+# %bb.1: # %for.body.preheader
+ movl %edx, %r9d
+ movl %edi, %r10d
+ xorl %edi, %edi
+# FDATA: 0 [unknown] 0 1 CmovNotInHotPath #LBB1_2# 1 2
+LBB1_2: # %for.body
+ movl (%rcx,%rdi,4), %r11d
+ movl %r11d, %eax
+ imull %esi, %eax
+ movl $10, %edx
+ cmpl %r9d, %eax
+# CHECK: cmpl %r9d, %eax
+# CHECK-NEXT: cmovlel %r11d, %edx
+LBB1_4_br:
+ jg LBB1_4
+# FDATA: 1 CmovNotInHotPath #LBB1_4_br# 1 CmovNotInHotPath #LBB1_3# 1 2
+# FDATA: 1 CmovNotInHotPath #LBB1_4_br# 1 CmovNotInHotPath #LBB1_4# 1 2
+# %bb.3: # %for.body
+LBB1_3:
+ movl %r11d, %edx
+LBB1_4: # %for.body
+ movl %edx, (%rcx,%rdi,4)
+ movl (%r8,%rdi,4), %eax
+ cltd
+ idivl %r9d
+ movl %eax, (%r8,%rdi,4)
+ addq $1, %rdi
+ cmpq %rdi, %r10
+ jne LBB1_2
+LBB1_5: # %for.cond.cleanup
+ retq
+Lfunc_end1:
+ .size CmovNotInHotPath, Lfunc_end1-CmovNotInHotPath
+ .cfi_endproc
+ # -- End function
+ .globl MaxIndex # -- Begin function MaxIndex
+ .p2align 4, 0x90
+ .type MaxIndex,@function
+MaxIndex: # @MaxIndex
+# CHECK-LABEL: Binary Function "MaxIndex" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 MaxIndex 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ xorl %eax, %eax
+ cmpl $2, %edi
+ jl LBB2_5
+# %bb.1: # %for.body.preheader
+ movl %edi, %r8d
+ xorl %edi, %edi
+ movl $1, %edx
+# FDATA: 0 [unknown] 0 1 MaxIndex #LBB2_2# 1 2
+LBB2_2: # %for.body
+ movl (%rsi,%rdx,4), %r9d
+ movslq %edi, %rcx
+ movl %edx, %eax
+ cmpl (%rsi,%rcx,4), %r9d
+# CHECK: cmpl (%rsi,%rcx,4), %r9d
+# CHECK-NEXT: cmovlel %edi, %eax
+LBB2_2_br:
+ jg LBB2_4
+# FDATA: 1 MaxIndex #LBB2_2_br# 1 MaxIndex #LBB2_3# 1 2
+# FDATA: 1 MaxIndex #LBB2_2_br# 1 MaxIndex #LBB2_4# 1 2
+# %bb.3: # %for.body
+LBB2_3:
+ movl %edi, %eax
+LBB2_4: # %for.body
+ addq $1, %rdx
+ movl %eax, %edi
+ cmpq %rdx, %r8
+ jne LBB2_2
+LBB2_5: # %for.cond.cleanup
+ retq
+Lfunc_end2:
+ .size MaxIndex, Lfunc_end2-MaxIndex
+ .cfi_endproc
+ # -- End function
+ .globl MaxIndex_unpredictable # -- Begin function MaxIndex_unpredictable
+ .p2align 4, 0x90
+ .type MaxIndex_unpredictable,@function
+MaxIndex_unpredictable: # @MaxIndex_unpredictable
+# CHECK-LABEL: Binary Function "MaxIndex_unpredictable" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 MaxIndex_unpredictable 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ xorl %eax, %eax
+ cmpl $2, %edi
+ jl LBB3_5
+# %bb.1: # %for.body.preheader
+ movl %edi, %r8d
+ xorl %edi, %edi
+ movl $1, %edx
+# FDATA: 0 [unknown] 0 1 MaxIndex_unpredictable #LBB3_2# 1 2
+LBB3_2: # %for.body
+ movl (%rsi,%rdx,4), %r9d
+ movslq %edi, %rcx
+ movl %edx, %eax
+ cmpl (%rsi,%rcx,4), %r9d
+# CHECK: cmpl (%rsi,%rcx,4), %r9d
+# CHECK-NEXT: cmovlel %edi, %eax
+LBB3_2_br:
+ jg LBB3_4
+# FDATA: 1 MaxIndex_unpredictable #LBB3_2_br# 1 MaxIndex_unpredictable #LBB3_3# 1 2
+# FDATA: 1 MaxIndex_unpredictable #LBB3_2_br# 1 MaxIndex_unpredictable #LBB3_4# 1 2
+# %bb.3: # %for.body
+LBB3_3:
+ movl %edi, %eax
+LBB3_4: # %for.body
+ addq $1, %rdx
+ movl %eax, %edi
+ cmpq %rdx, %r8
+ jne LBB3_2
+LBB3_5: # %for.cond.cleanup
+ retq
+Lfunc_end3:
+ .size MaxIndex_unpredictable, Lfunc_end3-MaxIndex_unpredictable
+ .cfi_endproc
+ # -- End function
+ .globl MaxValue # -- Begin function MaxValue
+ .p2align 4, 0x90
+ .type MaxValue,@function
+MaxValue: # @MaxValue
+# CHECK-LABEL: Binary Function "MaxValue" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 MaxValue 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ movl (%rsi), %ecx
+ cmpl $2, %edi
+ jge LBB4_3
+# %bb.1:
+LBB4_1:
+ movl %ecx, %eax
+LBB4_2: # %for.cond.cleanup
+ retq
+LBB4_3: # %for.body.preheader
+ movl %edi, %edi
+ movl $1, %edx
+LBB4_4: # %for.body
+ movl (%rsi,%rdx,4), %eax
+ cmpl %ecx, %eax
+# CHECK: cmpl %ecx, %eax
+# CHECK-NEXT: cmovlel %ecx, %eax
+LBB4_4_br:
+ jg LBB4_6
+# FDATA: 1 MaxValue #LBB4_4_br# 1 MaxValue #LBB4_5# 1 2
+# FDATA: 1 MaxValue #LBB4_4_br# 1 MaxValue #LBB4_6# 1 2
+# %bb.5: # %for.body
+LBB4_5:
+ movl %ecx, %eax
+LBB4_6: # %for.body
+ addq $1, %rdx
+ movl %eax, %ecx
+ cmpq %rdx, %rdi
+ je LBB4_2
+ jmp LBB4_4
+Lfunc_end4:
+ .size MaxValue, Lfunc_end4-MaxValue
+ .cfi_endproc
+ # -- End function
+ .globl BinarySearch # -- Begin function BinarySearch
+ .p2align 4, 0x90
+ .type BinarySearch,@function
+BinarySearch: # @BinarySearch
+# CHECK-LABEL: Binary Function "BinarySearch" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 BinarySearch 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ movl (%rsi), %eax
+ jmp LBB5_2
+LBB5_1: # %while.body
+ movl %ecx, %eax
+ xorl %ecx, %ecx
+ btl %eax, %edi
+ setae %cl
+ movq 8(%rdx,%rcx,8), %rdx
+LBB5_2: # %while.body
+ movl (%rdx), %ecx
+ cmpl %ecx, %eax
+ ja LBB5_1
+# %bb.3: # %while.end
+ retq
+Lfunc_end5:
+ .size BinarySearch, Lfunc_end5-BinarySearch
+ .cfi_endproc
+ # -- End function
+ .globl Transform # -- Begin function Transform
+ .p2align 4, 0x90
+ .type Transform,@function
+Transform: # @Transform
+# CHECK-LABEL: Binary Function "Transform" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 Transform 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ movb $1, %al
+ testb %al, %al
+ jne LBB6_5
+# %bb.1: # %while.body.preheader
+ movl %edx, %r8d
+ xorl %esi, %esi
+LBB6_2: # %while.body
+ movslq %esi, %rsi
+ movl (%rdi,%rsi,4), %eax
+ xorl %edx, %edx
+ divl %r8d
+ movl %eax, %edx
+ movl $11, %eax
+ movl %r8d, %ecx
+ cmpl %r8d, %edx
+ ja LBB6_4
+# %bb.3: # %while.body
+ movl $22, %eax
+ movl $22, %ecx
+LBB6_4: # %while.body
+ xorl %edx, %edx
+ divl %ecx
+ movl %edx, (%rdi,%rsi,4)
+ addl $1, %esi
+ cmpl %r9d, %esi
+ ja LBB6_2
+LBB6_5: # %while.end
+ retq
+Lfunc_end6:
+ .size Transform, Lfunc_end6-Transform
+ .cfi_endproc
+ # -- End function
+ .globl test_cmov_memoperand # -- Begin function test_cmov_memoperand
+ .p2align 4, 0x90
+ .type test_cmov_memoperand,@function
+test_cmov_memoperand: # @test_cmov_memoperand
+# CHECK-LABEL: Binary Function "test_cmov_memoperand" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 test_cmov_memoperand 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ movl %edx, %eax
+ cmpl %esi, %edi
+ ja LBB7_2
+# %bb.1: # %entry
+ movl (%rcx), %eax
+LBB7_2: # %entry
+ retq
+Lfunc_end7:
+ .size test_cmov_memoperand, Lfunc_end7-test_cmov_memoperand
+ .cfi_endproc
+ # -- End function
+ .globl test_cmov_memoperand_unpredictable # -- Begin function test_cmov_memoperand_unpredictable
+ .p2align 4, 0x90
+ .type test_cmov_memoperand_unpredictable,@function
+test_cmov_memoperand_unpredictable: # @test_cmov_memoperand_unpredictable
+# CHECK-LABEL: Binary Function "test_cmov_memoperand_unpredictable" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_unpredictable 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ movl %edx, %eax
+ cmpl %esi, %edi
+ ja LBB8_2
+# %bb.1: # %entry
+ movl (%rcx), %eax
+LBB8_2: # %entry
+ retq
+Lfunc_end8:
+ .size test_cmov_memoperand_unpredictable, Lfunc_end8-test_cmov_memoperand_unpredictable
+ .cfi_endproc
+ # -- End function
+ .globl test_cmov_memoperand_in_group # -- Begin function test_cmov_memoperand_in_group
+ .p2align 4, 0x90
+ .type test_cmov_memoperand_in_group,@function
+test_cmov_memoperand_in_group: # @test_cmov_memoperand_in_group
+# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ movl %edx, %eax
+ movl %edx, %r8d
+ cmpl %esi, %edi
+ ja LBB9_2
+# %bb.1: # %entry
+ movl (%rcx), %r8d
+ movl %edi, %eax
+ movl %esi, %edx
+LBB9_2: # %entry
+ addl %r8d, %eax
+ addl %edx, %eax
+ retq
+Lfunc_end9:
+ .size test_cmov_memoperand_in_group, Lfunc_end9-test_cmov_memoperand_in_group
+ .cfi_endproc
+ # -- End function
+ .globl test_cmov_memoperand_in_group2 # -- Begin function test_cmov_memoperand_in_group2
+ .p2align 4, 0x90
+ .type test_cmov_memoperand_in_group2,@function
+test_cmov_memoperand_in_group2: # @test_cmov_memoperand_in_group2
+# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group2" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group2 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ movl %edx, %eax
+ movl %edx, %r8d
+ cmpl %esi, %edi
+ jbe LBB10_2
+# %bb.1: # %entry
+ movl (%rcx), %r8d
+ movl %edi, %eax
+ movl %esi, %edx
+LBB10_2: # %entry
+ addl %r8d, %eax
+ addl %edx, %eax
+ retq
+Lfunc_end10:
+ .size test_cmov_memoperand_in_group2, Lfunc_end10-test_cmov_memoperand_in_group2
+ .cfi_endproc
+ # -- End function
+ .globl test_cmov_memoperand_conflicting_dir # -- Begin function test_cmov_memoperand_conflicting_dir
+ .p2align 4, 0x90
+ .type test_cmov_memoperand_conflicting_dir,@function
+test_cmov_memoperand_conflicting_dir: # @test_cmov_memoperand_conflicting_dir
+# CHECK-LABEL: Binary Function "test_cmov_memoperand_conflicting_dir" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_conflicting_dir 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ cmpl %esi, %edi
+ movl (%rcx), %eax
+ cmoval %edx, %eax
+ cmoval (%r8), %edx
+ addl %edx, %eax
+ retq
+Lfunc_end11:
+ .size test_cmov_memoperand_conflicting_dir, Lfunc_end11-test_cmov_memoperand_conflicting_dir
+ .cfi_endproc
+ # -- End function
+ .globl test_cmov_memoperand_in_group_reuse_for_addr # -- Begin function test_cmov_memoperand_in_group_reuse_for_addr
+ .p2align 4, 0x90
+ .type test_cmov_memoperand_in_group_reuse_for_addr,@function
+test_cmov_memoperand_in_group_reuse_for_addr: # @test_cmov_memoperand_in_group_reuse_for_addr
+# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group_reuse_for_addr" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group_reuse_for_addr 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ movl %edi, %eax
+ cmpl %esi, %edi
+ ja LBB12_2
+# %bb.1: # %entry
+ movl (%rcx), %eax
+LBB12_2: # %entry
+ retq
+Lfunc_end12:
+ .size test_cmov_memoperand_in_group_reuse_for_addr, Lfunc_end12-test_cmov_memoperand_in_group_reuse_for_addr
+ .cfi_endproc
+ # -- End function
+ .globl test_cmov_memoperand_in_group_reuse_for_addr2 # -- Begin function test_cmov_memoperand_in_group_reuse_for_addr2
+ .p2align 4, 0x90
+ .type test_cmov_memoperand_in_group_reuse_for_addr2,@function
+test_cmov_memoperand_in_group_reuse_for_addr2: # @test_cmov_memoperand_in_group_reuse_for_addr2
+# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group_reuse_for_addr2" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group_reuse_for_addr2 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ movl %edi, %eax
+ cmpl %esi, %edi
+ ja LBB13_2
+# %bb.1: # %entry
+ movq (%rcx), %rax
+ movl (%rax), %eax
+LBB13_2: # %entry
+ retq
+Lfunc_end13:
+ .size test_cmov_memoperand_in_group_reuse_for_addr2, Lfunc_end13-test_cmov_memoperand_in_group_reuse_for_addr2
+ .cfi_endproc
+ # -- End function
+ .globl test_cmov_memoperand_in_group_reuse_for_addr3 # -- Begin function test_cmov_memoperand_in_group_reuse_for_addr3
+ .p2align 4, 0x90
+ .type test_cmov_memoperand_in_group_reuse_for_addr3,@function
+test_cmov_memoperand_in_group_reuse_for_addr3: # @test_cmov_memoperand_in_group_reuse_for_addr3
+# CHECK-LABEL: Binary Function "test_cmov_memoperand_in_group_reuse_for_addr3" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 test_cmov_memoperand_in_group_reuse_for_addr3 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ movl %edi, %eax
+ cmpl %esi, %edi
+ ja LBB14_2
+# %bb.1: # %entry
+ movl (%rcx), %eax
+LBB14_2: # %entry
+ retq
+Lfunc_end14:
+ .size test_cmov_memoperand_in_group_reuse_for_addr3, Lfunc_end14-test_cmov_memoperand_in_group_reuse_for_addr3
+ .cfi_endproc
+ # -- End function
+ .globl test_memoperand_loop # -- Begin function test_memoperand_loop
+ .p2align 4, 0x90
+ .type test_memoperand_loop,@function
+test_memoperand_loop: # @test_memoperand_loop
+# CHECK-LABEL: Binary Function "test_memoperand_loop" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 test_memoperand_loop 0 1 2
+ .cfi_startproc
+# %bb.0: # %entry
+ movq begin@GOTPCREL(%rip), %r8
+ movq (%r8), %rax
+ movq end@GOTPCREL(%rip), %rcx
+ movq (%rcx), %rdx
+ xorl %esi, %esi
+ movq %rax, %rcx
+LBB15_1: # %loop.body
+ addq $8, %rcx
+ cmpq %rdx, %rcx
+ ja LBB15_3
+# %bb.2: # %loop.body
+ movq (%r8), %rcx
+LBB15_3: # %loop.body
+ movl %edi, (%rcx)
+ addq $8, %rcx
+ cmpq %rdx, %rcx
+# CHECK: movl %edi, (%rcx)
+# CHECK-NEXT: addq $0x8, %rcx
+# CHECK-NEXT: cmpq %rdx, %rcx
+# CHECK-NEXT: cmovbeq %rax, %rcx
+LBB15_3_br:
+ ja LBB15_5
+# FDATA: 1 test_memoperand_loop #LBB15_3_br# 1 test_memoperand_loop #LBB15_4# 1 2
+# FDATA: 1 test_memoperand_loop #LBB15_3_br# 1 test_memoperand_loop #LBB15_5# 1 2
+# %bb.4: # %loop.body
+LBB15_4:
+ movq %rax, %rcx
+LBB15_5: # %loop.body
+ movl %edi, (%rcx)
+ addl $1, %esi
+ cmpl $1024, %esi # imm = 0x400
+ jl LBB15_1
+# %bb.6: # %exit
+ retq
+Lfunc_end15:
+ .size test_memoperand_loop, Lfunc_end15-test_memoperand_loop
+ .cfi_endproc
+ # -- End function
+ .globl CmovBackToBack # -- Begin function CmovBackToBack
+ .p2align 4, 0x90
+ .type CmovBackToBack,@function
+CmovBackToBack: # @CmovBackToBack
+# CHECK-LABEL: Binary Function "CmovBackToBack" after CMOV conversion
+# FDATA: 0 [unknown] 0 1 CmovBackToBack 0 1 2
+ .cfi_startproc
+ testl %edi, %edi
+ jle LBB16_5
+ movl %edi, %r8d
+ xorl %edi, %edi
+# FDATA: 0 [unknown] 0 1 CmovBackToBack #LBB16_2# 1 2
+LBB16_2: # %for.body
+ movl (%rcx,%rdi,4), %eax
+ leal 1(%rax), %r9d
+ imull %esi, %eax
+ movl $10, %r10d
+ cmpl %edx, %eax
+# CHECK: cmpl %edx, %eax
+# CHECK-NEXT: cmovlel %r9d, %r10d
+LBB16_2_br:
+ jg LBB16_4
+# FDATA: 1 CmovBackToBack #LBB16_2_br# 1 CmovBackToBack #LBB16_3# 1 2
+# FDATA: 1 CmovBackToBack #LBB16_2_br# 1 CmovBackToBack #LBB16_4# 1 2
+LBB16_3:
+ movl %r9d, %r10d
+LBB16_4: # %for.body
+# CHECK-NEXT: cmovlel %r9d, %r10d
+LBB16_6_br:
+ jg LBB16_8
+# FDATA: 1 CmovBackToBack #LBB16_6_br# 1 CmovBackToBack #LBB16_7# 1 2
+# FDATA: 1 CmovBackToBack #LBB16_6_br# 1 CmovBackToBack #LBB16_8# 1 2
+LBB16_7:
+ movl %r9d, %r10d
+LBB16_8: # %for.body
+ imull %r9d, %r10d
+ movl %r10d, (%rcx,%rdi,4)
+ addq $1, %rdi
+ cmpq %rdi, %r8
+ jne LBB16_2
+LBB16_5: # %for.cond.cleanup
+ retq
+Lfunc_end16:
+ .size CmovBackToBack, Lfunc_end16-CmovBackToBack
+ .cfi_endproc
+ # -- End function
+ .data
+ .globl begin
+begin:
+ .quad 0xdeadbeef
+ .globl end
+end:
+ .quad 0xfaceb00c
+