1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass looks for instructions that can be replaced by a Test Data Class
10 // instruction, and replaces them when profitable.
12 // Roughly, the following rules are recognized:
14 // 1: fcmp pred X, 0 -> tdc X, mask
15 // 2: fcmp pred X, +-inf -> tdc X, mask
16 // 3: fcmp pred X, +-minnorm -> tdc X, mask
17 // 4: tdc (fabs X), mask -> tdc X, newmask
18 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
19 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
20 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
21 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
22 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
23 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
25 // The pass works in 4 steps:
27 // 1. All fcmp and icmp instructions in a function are checked for a match
28 // with rules 1-3 and 5-7. Their TDC equivalents are stored in
29 // the ConvertedInsts mapping. If the operand of a fcmp instruction is
30 // a fabs, it's also folded according to rule 4.
31 // 2. All and/or/xor i1 instructions whose both operands have been already
32 // mapped are mapped according to rules 8-10. LogicOpsWorklist is used
33 // as a queue of instructions to check.
34 // 3. All mapped instructions that are considered worthy of conversion (ie.
35 // replacing them will actually simplify the final code) are replaced
36 // with a call to the s390.tdc intrinsic.
37 // 4. All intermediate results of replaced instructions are removed if unused.
39 // Instructions that match rules 1-3 are considered unworthy of conversion
40 // on their own (since a comparison instruction is superior), but are mapped
41 // in the hopes of folding the result using rules 4 and 8-10 (likely removing
42 // the original comparison in the process).
44 //===----------------------------------------------------------------------===//
47 #include "SystemZSubtarget.h"
48 #include "llvm/ADT/MapVector.h"
49 #include "llvm/CodeGen/TargetPassConfig.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/IRBuilder.h"
52 #include "llvm/IR/InstIterator.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/IntrinsicsS390.h"
56 #include "llvm/IR/LegacyPassManager.h"
57 #include "llvm/IR/Module.h"
58 #include "llvm/Target/TargetMachine.h"
65 void initializeSystemZTDCPassPass(PassRegistry&);
70 class SystemZTDCPass : public FunctionPass {
73 SystemZTDCPass() : FunctionPass(ID) {
74 initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
77 bool runOnFunction(Function &F) override;
79 void getAnalysisUsage(AnalysisUsage &AU) const override {
80 AU.addRequired<TargetPassConfig>();
84 // Maps seen instructions that can be mapped to a TDC, values are
85 // (TDC operand, TDC mask, worthy flag) triples.
86 MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
87 // The queue of and/or/xor i1 instructions to be potentially folded.
88 std::vector<BinaryOperator *> LogicOpsWorklist;
89 // Instructions matched while folding, to be removed at the end if unused.
90 std::set<Instruction *> PossibleJunk;
92 // Tries to convert a fcmp instruction.
93 void convertFCmp(CmpInst &I);
95 // Tries to convert an icmp instruction.
96 void convertICmp(CmpInst &I);
98 // Tries to convert an i1 and/or/xor instruction, whose both operands
99 // have been already converted.
100 void convertLogicOp(BinaryOperator &I);
102 // Marks an instruction as converted - adds it to ConvertedInsts and adds
103 // any and/or/xor i1 users to the queue.
104 void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
105 ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
106 auto &M = *I->getFunction()->getParent();
107 auto &Ctx = M.getContext();
108 for (auto *U : I->users()) {
109 auto *LI = dyn_cast<BinaryOperator>(U);
110 if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
111 (LI->getOpcode() == Instruction::And ||
112 LI->getOpcode() == Instruction::Or ||
113 LI->getOpcode() == Instruction::Xor)) {
114 LogicOpsWorklist.push_back(LI);
120 } // end anonymous namespace
122 char SystemZTDCPass::ID = 0;
123 INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
124 "SystemZ Test Data Class optimization", false, false)
126 FunctionPass *llvm::createSystemZTDCPass() {
127 return new SystemZTDCPass();
130 void SystemZTDCPass::convertFCmp(CmpInst &I) {
131 Value *Op0 = I.getOperand(0);
132 auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
133 auto Pred = I.getPredicate();
134 // Only comparisons with consts are interesting.
137 // Compute the smallest normal number (and its negation).
138 auto &Sem = Op0->getType()->getFltSemantics();
139 APFloat Smallest = APFloat::getSmallestNormalized(Sem);
140 APFloat NegSmallest = Smallest;
141 NegSmallest.changeSign();
142 // Check if Const is one of our recognized consts.
144 if (Const->isZero()) {
145 // All comparisons with 0 can be converted.
147 } else if (Const->isInfinity()) {
148 // Likewise for infinities.
149 WhichConst = Const->isNegative() ? 2 : 1;
150 } else if (Const->isExactlyValue(Smallest)) {
151 // For Smallest, we cannot do EQ separately from GT.
152 if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
153 (Pred & CmpInst::FCMP_OGE) != 0)
156 } else if (Const->isExactlyValue(NegSmallest)) {
157 // Likewise for NegSmallest, we cannot do EQ separately from LT.
158 if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
159 (Pred & CmpInst::FCMP_OLE) != 0)
163 // Not one of our special constants.
166 // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
167 static const int Masks[][4] = {
169 SystemZ::TDCMASK_ZERO, // eq
170 SystemZ::TDCMASK_POSITIVE, // gt
171 SystemZ::TDCMASK_NEGATIVE, // lt
172 SystemZ::TDCMASK_NAN, // un
175 SystemZ::TDCMASK_INFINITY_PLUS, // eq
177 (SystemZ::TDCMASK_ZERO |
178 SystemZ::TDCMASK_NEGATIVE |
179 SystemZ::TDCMASK_NORMAL_PLUS |
180 SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt
181 SystemZ::TDCMASK_NAN, // un
184 SystemZ::TDCMASK_INFINITY_MINUS, // eq
185 (SystemZ::TDCMASK_ZERO |
186 SystemZ::TDCMASK_POSITIVE |
187 SystemZ::TDCMASK_NORMAL_MINUS |
188 SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
190 SystemZ::TDCMASK_NAN, // un
193 0, // eq (unsupported)
194 (SystemZ::TDCMASK_NORMAL_PLUS |
195 SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge)
196 (SystemZ::TDCMASK_ZERO |
197 SystemZ::TDCMASK_NEGATIVE |
198 SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt
199 SystemZ::TDCMASK_NAN, // un
202 0, // eq (unsupported)
203 (SystemZ::TDCMASK_ZERO |
204 SystemZ::TDCMASK_POSITIVE |
205 SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
206 (SystemZ::TDCMASK_NORMAL_MINUS |
207 SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le)
208 SystemZ::TDCMASK_NAN, // un
211 // Construct the mask as a combination of the partial masks.
213 if (Pred & CmpInst::FCMP_OEQ)
214 Mask |= Masks[WhichConst][0];
215 if (Pred & CmpInst::FCMP_OGT)
216 Mask |= Masks[WhichConst][1];
217 if (Pred & CmpInst::FCMP_OLT)
218 Mask |= Masks[WhichConst][2];
219 if (Pred & CmpInst::FCMP_UNO)
220 Mask |= Masks[WhichConst][3];
221 // A lone fcmp is unworthy of tdc conversion on its own, but may become
222 // worthy if combined with fabs.
224 if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
225 Function *F = CI->getCalledFunction();
226 if (F && F->getIntrinsicID() == Intrinsic::fabs) {
227 // Fold with fabs - adjust the mask appropriately.
228 Mask &= SystemZ::TDCMASK_PLUS;
230 Op0 = CI->getArgOperand(0);
231 // A combination of fcmp with fabs is a win, unless the constant
232 // involved is 0 (which is handled by later passes).
233 Worthy = WhichConst != 0;
234 PossibleJunk.insert(CI);
237 converted(&I, Op0, Mask, Worthy);
240 void SystemZTDCPass::convertICmp(CmpInst &I) {
241 Value *Op0 = I.getOperand(0);
242 auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
243 auto Pred = I.getPredicate();
244 // All our icmp rules involve comparisons with consts.
247 if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
248 // Check for icmp+bitcast used for signbit.
249 if (!Cast->getSrcTy()->isFloatTy() &&
250 !Cast->getSrcTy()->isDoubleTy() &&
251 !Cast->getSrcTy()->isFP128Ty())
253 Value *V = Cast->getOperand(0);
255 if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
256 // icmp slt (bitcast X), 0 - set if sign bit true
257 Mask = SystemZ::TDCMASK_MINUS;
258 } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
259 // icmp sgt (bitcast X), -1 - set if sign bit false
260 Mask = SystemZ::TDCMASK_PLUS;
262 // Not a sign bit check.
265 PossibleJunk.insert(Cast);
266 converted(&I, V, Mask, true);
267 } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
268 // Check if this is a pre-existing call of our tdc intrinsic.
269 Function *F = CI->getCalledFunction();
270 if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
272 if (!Const->isZero())
274 Value *V = CI->getArgOperand(0);
275 auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
276 // Bail if the mask is not a constant.
279 int Mask = MaskC->getZExtValue();
280 Mask &= SystemZ::TDCMASK_ALL;
281 if (Pred == CmpInst::ICMP_NE) {
282 // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
283 } else if (Pred == CmpInst::ICMP_EQ) {
284 // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
285 Mask ^= SystemZ::TDCMASK_ALL;
287 // An unknown comparison - ignore.
290 PossibleJunk.insert(CI);
291 converted(&I, V, Mask, false);
295 void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
298 bool Worthy0, Worthy1;
299 std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
300 std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
304 switch (I.getOpcode()) {
305 case Instruction::And:
306 Mask = Mask0 & Mask1;
308 case Instruction::Or:
309 Mask = Mask0 | Mask1;
311 case Instruction::Xor:
312 Mask = Mask0 ^ Mask1;
315 llvm_unreachable("Unknown op in convertLogicOp");
317 converted(&I, Op0, Mask, true);
320 bool SystemZTDCPass::runOnFunction(Function &F) {
321 auto &TPC = getAnalysis<TargetPassConfig>();
322 if (TPC.getTM<TargetMachine>()
323 .getSubtarget<SystemZSubtarget>(F)
327 ConvertedInsts.clear();
328 LogicOpsWorklist.clear();
329 PossibleJunk.clear();
331 // Look for icmp+fcmp instructions.
332 for (auto &I : instructions(F)) {
333 if (I.getOpcode() == Instruction::FCmp)
334 convertFCmp(cast<CmpInst>(I));
335 else if (I.getOpcode() == Instruction::ICmp)
336 convertICmp(cast<CmpInst>(I));
339 // If none found, bail already.
340 if (ConvertedInsts.empty())
343 // Process the queue of logic instructions.
344 while (!LogicOpsWorklist.empty()) {
345 BinaryOperator *Op = LogicOpsWorklist.back();
346 LogicOpsWorklist.pop_back();
347 // If both operands mapped, and the instruction itself not yet mapped,
349 if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
350 ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
351 !ConvertedInsts.count(Op))
355 // Time to actually replace the instructions. Do it in the reverse order
356 // of finding them, since there's a good chance the earlier ones will be
357 // unused (due to being folded into later ones).
358 Module &M = *F.getParent();
359 auto &Ctx = M.getContext();
360 Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
361 bool MadeChange = false;
362 for (auto &It : reverse(ConvertedInsts)) {
363 Instruction *I = It.first;
367 std::tie(V, Mask, Worthy) = It.second;
368 if (!I->user_empty()) {
369 // If used and unworthy of conversion, skip it.
372 // Call the intrinsic, compare result with 0.
374 Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, V->getType());
376 Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
377 Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
378 Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
379 I->replaceAllUsesWith(ICmp);
381 // If unused, or used and converted, remove it.
382 I->eraseFromParent();
389 // We've actually done something - now clear misc accumulated junk (fabs,
391 for (auto *I : PossibleJunk)
393 I->eraseFromParent();