//===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file /// This file implements the Localizer class. //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/Localizer.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "localizer" using namespace llvm; char Localizer::ID = 0; INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE, "Move/duplicate certain instructions close to their use", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(Localizer, DEBUG_TYPE, "Move/duplicate certain instructions close to their use", false, false) Localizer::Localizer() : MachineFunctionPass(ID) { } void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); TTI = &getAnalysis().getTTI(MF.getFunction()); } bool Localizer::shouldLocalize(const MachineInstr &MI) { // Assuming a spill and reload of a value has a cost of 1 instruction each, // this helper function computes the maximum number of uses we should consider // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We // break even in terms of code size when the original MI has 2 users vs // choosing to potentially spill. Any more than 2 users we we have a net code // size increase. This doesn't take into account register pressure though. auto maxUses = [](unsigned RematCost) { // A cost of 1 means remats are basically free. if (RematCost == 1) return UINT_MAX; if (RematCost == 2) return 2U; // Remat is too expensive, only sink if there's one user. if (RematCost > 2) return 1U; llvm_unreachable("Unexpected remat cost"); }; // Helper to walk through uses and terminate if we've reached a limit. Saves // us spending time traversing uses if all we want to know is if it's >= min. auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) { unsigned NumUses = 0; auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end(); for (; UI != UE && NumUses < MaxUses; ++UI) { NumUses++; } // If we haven't reached the end yet then there are more than MaxUses users. return UI == UE; }; switch (MI.getOpcode()) { default: return false; // Constants-like instructions should be close to their users. // We don't want long live-ranges for them. case TargetOpcode::G_CONSTANT: case TargetOpcode::G_FCONSTANT: case TargetOpcode::G_FRAME_INDEX: case TargetOpcode::G_INTTOPTR: return true; case TargetOpcode::G_GLOBAL_VALUE: { unsigned RematCost = TTI->getGISelRematGlobalCost(); unsigned Reg = MI.getOperand(0).getReg(); unsigned MaxUses = maxUses(RematCost); if (MaxUses == UINT_MAX) return true; // Remats are "free" so always localize. bool B = isUsesAtMost(Reg, MaxUses); return B; } } } void Localizer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, MachineBasicBlock *&InsertMBB) { MachineInstr &MIUse = *MOUse.getParent(); InsertMBB = MIUse.getParent(); if (MIUse.isPHI()) InsertMBB = MIUse.getOperand(MIUse.getOperandNo(&MOUse) + 1).getMBB(); return InsertMBB == Def.getParent(); } bool Localizer::localizeInterBlock(MachineFunction &MF, LocalizedSetVecT &LocalizedInstrs) { bool Changed = false; DenseMap, unsigned> MBBWithLocalDef; // Since the IRTranslator only emits constants into the entry block, and the // rest of the GISel pipeline generally emits constants close to their users, // we only localize instructions in the entry block here. This might change if // we start doing CSE across blocks. auto &MBB = MF.front(); for (auto RI = MBB.rbegin(), RE = MBB.rend(); RI != RE; ++RI) { MachineInstr &MI = *RI; if (!shouldLocalize(MI)) continue; LLVM_DEBUG(dbgs() << "Should localize: " << MI); assert(MI.getDesc().getNumDefs() == 1 && "More than one definition not supported yet"); unsigned Reg = MI.getOperand(0).getReg(); // Check if all the users of MI are local. // We are going to invalidation the list of use operands, so we // can't use range iterator. for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); MOIt != MOItEnd;) { MachineOperand &MOUse = *MOIt++; // Check if the use is already local. MachineBasicBlock *InsertMBB; LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); dbgs() << "Checking use: " << MIUse << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); if (isLocalUse(MOUse, MI, InsertMBB)) continue; LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); Changed = true; auto MBBAndReg = std::make_pair(InsertMBB, Reg); auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); if (NewVRegIt == MBBWithLocalDef.end()) { // Create the localized instruction. MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI); LocalizedInstrs.insert(LocalizedMI); MachineInstr &UseMI = *MOUse.getParent(); if (MRI->hasOneUse(Reg) && !UseMI.isPHI()) InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI); else InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()), LocalizedMI); // Set a new register for the definition. unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg)); MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); LocalizedMI->getOperand(0).setReg(NewReg); NewVRegIt = MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI); } LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) << '\n'); // Update the user reg. MOUse.setReg(NewVRegIt->second); } } return Changed; } bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) { bool Changed = false; // For each already-localized instruction which has multiple users, then we // scan the block top down from the current position until we hit one of them. // FIXME: Consider doing inst duplication if live ranges are very long due to // many users, but this case may be better served by regalloc improvements. for (MachineInstr *MI : LocalizedInstrs) { unsigned Reg = MI->getOperand(0).getReg(); MachineBasicBlock &MBB = *MI->getParent(); // All of the user MIs of this reg. SmallPtrSet Users; for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { if (!UseMI.isPHI()) Users.insert(&UseMI); } // If all the users were PHIs then they're not going to be in our block, // don't try to move this instruction. if (Users.empty()) continue; MachineBasicBlock::iterator II(MI); ++II; while (II != MBB.end() && !Users.count(&*II)) ++II; LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II << "\n"); assert(II != MBB.end() && "Didn't find the user in the MBB"); MI->removeFromParent(); MBB.insert(II, MI); Changed = true; } return Changed; } bool Localizer::runOnMachineFunction(MachineFunction &MF) { // If the ISel pipeline failed, do not bother running that pass. if (MF.getProperties().hasProperty( MachineFunctionProperties::Property::FailedISel)) return false; LLVM_DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n'); init(MF); // Keep track of the instructions we localized. We'll do a second pass of // intra-block localization to further reduce live ranges. LocalizedSetVecT LocalizedInstrs; bool Changed = localizeInterBlock(MF, LocalizedInstrs); return Changed |= localizeIntraBlock(LocalizedInstrs); }