1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the AArch64 specific subclass of TargetSubtarget.
11 //===----------------------------------------------------------------------===//
13 #include "AArch64Subtarget.h"
16 #include "AArch64InstrInfo.h"
17 #include "AArch64PBQPRegAlloc.h"
18 #include "AArch64TargetMachine.h"
19 #include "GISel/AArch64CallLowering.h"
20 #include "GISel/AArch64LegalizerInfo.h"
21 #include "GISel/AArch64RegisterBankInfo.h"
22 #include "MCTargetDesc/AArch64AddressingModes.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24 #include "llvm/CodeGen/MachineScheduler.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/Support/TargetParser.h"
30 #define DEBUG_TYPE "aarch64-subtarget"
32 #define GET_SUBTARGETINFO_CTOR
33 #define GET_SUBTARGETINFO_TARGET_DESC
34 #include "AArch64GenSubtargetInfo.inc"
37 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
38 "converter pass"), cl::init(true), cl::Hidden);
40 // If OS supports TBI, use this flag to enable it.
42 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
43 "an address is ignored"), cl::init(false), cl::Hidden);
46 UseNonLazyBind("aarch64-enable-nonlazybind",
47 cl::desc("Call nonlazybind functions via direct GOT load"),
48 cl::init(false), cl::Hidden);
50 static cl::opt<unsigned> SVEVectorBitsMax(
51 "aarch64-sve-vector-bits-max",
52 cl::desc("Assume SVE vector registers are at most this big, "
53 "with zero meaning no maximum size is assumed."),
54 cl::init(0), cl::Hidden);
56 static cl::opt<unsigned> SVEVectorBitsMin(
57 "aarch64-sve-vector-bits-min",
58 cl::desc("Assume SVE vector registers are at least this big, "
59 "with zero meaning no minimum size is assumed."),
60 cl::init(0), cl::Hidden);
63 AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
64 StringRef CPUString) {
65 // Determine default and user-specified characteristics
67 if (CPUString.empty())
68 CPUString = "generic";
70 ParseSubtargetFeatures(CPUString, FS);
71 initializeProperties();
76 void AArch64Subtarget::initializeProperties() {
77 // Initialize CPU specific properties. We should add a tablegen feature for
78 // this in the future so we can specify it together with the subtarget
80 switch (ARMProcFamily) {
89 PrefFunctionLogAlignment = 3;
94 MaxInterleaveFactor = 4;
95 PrefFunctionLogAlignment = 4;
98 PrefFunctionLogAlignment = 3;
107 PrefFunctionLogAlignment = 4;
111 PrefFunctionLogAlignment = 5;
112 PrefLoopLogAlignment = 5;
120 PrefetchDistance = 280;
121 MinPrefetchStride = 2048;
122 MaxPrefetchIterationsAhead = 3;
125 MaxInterleaveFactor = 4;
126 MaxJumpTableSize = 20;
127 PrefFunctionLogAlignment = 5;
128 PrefLoopLogAlignment = 4;
131 MaxInterleaveFactor = 4;
132 // FIXME: remove this to enable 64-bit SLP if performance looks good.
133 MinVectorRegisterBitWidth = 128;
135 PrefetchDistance = 820;
136 MinPrefetchStride = 2048;
137 MaxPrefetchIterationsAhead = 8;
140 MaxInterleaveFactor = 4;
141 VectorInsertExtractBaseCost = 2;
143 PrefetchDistance = 740;
144 MinPrefetchStride = 1024;
145 MaxPrefetchIterationsAhead = 11;
146 // FIXME: remove this to enable 64-bit SLP if performance looks good.
147 MinVectorRegisterBitWidth = 128;
150 PrefFunctionLogAlignment = 3;
153 PrefFunctionLogAlignment = 4;
156 MaxInterleaveFactor = 4;
157 // FIXME: remove this to enable 64-bit SLP if performance looks good.
158 MinVectorRegisterBitWidth = 128;
162 PrefFunctionLogAlignment = 3;
163 PrefLoopLogAlignment = 2;
164 MaxInterleaveFactor = 4;
165 PrefetchDistance = 128;
166 MinPrefetchStride = 1024;
167 MaxPrefetchIterationsAhead = 4;
168 // FIXME: remove this to enable 64-bit SLP if performance looks good.
169 MinVectorRegisterBitWidth = 128;
176 PrefFunctionLogAlignment = 3;
177 PrefLoopLogAlignment = 2;
178 // FIXME: remove this to enable 64-bit SLP if performance looks good.
179 MinVectorRegisterBitWidth = 128;
183 PrefFunctionLogAlignment = 4;
184 PrefLoopLogAlignment = 2;
188 PrefFunctionLogAlignment = 4;
189 PrefLoopLogAlignment = 2;
190 MaxInterleaveFactor = 4;
191 PrefetchDistance = 128;
192 MinPrefetchStride = 1024;
193 MaxPrefetchIterationsAhead = 4;
194 // FIXME: remove this to enable 64-bit SLP if performance looks good.
195 MinVectorRegisterBitWidth = 128;
200 AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
201 const std::string &FS,
202 const TargetMachine &TM, bool LittleEndian)
203 : AArch64GenSubtargetInfo(TT, CPU, FS),
204 ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
205 CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
206 IsLittle(LittleEndian),
207 TargetTriple(TT), FrameLowering(),
208 InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
210 if (AArch64::isX18ReservedByDefault(TT))
211 ReserveXRegister.set(18);
213 CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
214 InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
215 Legalizer.reset(new AArch64LegalizerInfo(*this));
217 auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
219 // FIXME: At this point, we can't rely on Subtarget having RBI.
220 // It's awkward to mix passing RBI and the Subtarget; should we pass
222 InstSelector.reset(createAArch64InstructionSelector(
223 *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
225 RegBankInfo.reset(RBI);
228 const CallLowering *AArch64Subtarget::getCallLowering() const {
229 return CallLoweringInfo.get();
232 const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
233 return InlineAsmLoweringInfo.get();
236 InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
237 return InstSelector.get();
240 const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
241 return Legalizer.get();
244 const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
245 return RegBankInfo.get();
248 /// Find the target operand flags that describe how a global value should be
249 /// referenced for the current subtarget.
251 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
252 const TargetMachine &TM) const {
253 // MachO large model always goes via a GOT, simply to get a single 8-byte
254 // absolute relocation on all global addresses.
255 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
256 return AArch64II::MO_GOT;
258 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
259 if (GV->hasDLLImportStorageClass())
260 return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
261 if (getTargetTriple().isOSWindows())
262 return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
263 return AArch64II::MO_GOT;
266 // The small code model's direct accesses use ADRP, which cannot
267 // necessarily produce the value 0 (if the code is above 4GB).
268 // Same for the tiny code model, where we have a pc relative LDR.
269 if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
270 GV->hasExternalWeakLinkage())
271 return AArch64II::MO_GOT;
273 // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
274 // that their nominal addresses are tagged and outside of the code model. In
275 // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
276 // tag if necessary based on MO_TAGGED.
277 if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
278 return AArch64II::MO_NC | AArch64II::MO_TAGGED;
280 return AArch64II::MO_NO_FLAG;
283 unsigned AArch64Subtarget::classifyGlobalFunctionReference(
284 const GlobalValue *GV, const TargetMachine &TM) const {
285 // MachO large model always goes via a GOT, because we don't have the
286 // relocations available to do anything else..
287 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
288 !GV->hasInternalLinkage())
289 return AArch64II::MO_GOT;
291 // NonLazyBind goes via GOT unless we know it's available locally.
292 auto *F = dyn_cast<Function>(GV);
293 if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
294 !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
295 return AArch64II::MO_GOT;
297 // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
298 if (getTargetTriple().isOSWindows())
299 return ClassifyGlobalReference(GV, TM);
301 return AArch64II::MO_NO_FLAG;
304 void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
305 unsigned NumRegionInstrs) const {
306 // LNT run (at least on Cyclone) showed reasonably significant gains for
307 // bi-directional scheduling. 253.perlbmk.
308 Policy.OnlyTopDown = false;
309 Policy.OnlyBottomUp = false;
310 // Enabling or Disabling the latency heuristic is a close call: It seems to
311 // help nearly no benchmark on out-of-order architectures, on the other hand
312 // it regresses register pressure on a few benchmarking.
313 Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
316 bool AArch64Subtarget::enableEarlyIfConversion() const {
317 return EnableEarlyIfConvert;
320 bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
321 if (!UseAddressTopByteIgnored)
324 if (TargetTriple.isiOS()) {
325 unsigned Major, Minor, Micro;
326 TargetTriple.getiOSVersion(Major, Minor, Micro);
333 std::unique_ptr<PBQPRAConstraint>
334 AArch64Subtarget::getCustomPBQPConstraints() const {
335 return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
338 void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
339 // We usually compute max call frame size after ISel. Do the computation now
340 // if the .mir file didn't specify it. Note that this will probably give you
341 // bogus values after PEI has eliminated the callframe setup/destroy pseudo
342 // instructions, specify explicitly if you need it to be correct.
343 MachineFrameInfo &MFI = MF.getFrameInfo();
344 if (!MFI.isMaxCallFrameSizeComputed())
345 MFI.computeMaxCallFrameSize(MF);
348 unsigned AArch64Subtarget::getMaxSVEVectorSizeInBits() const {
349 assert(HasSVE && "Tried to get SVE vector length without SVE support!");
350 assert(SVEVectorBitsMax % 128 == 0 &&
351 "SVE requires vector length in multiples of 128!");
352 assert((SVEVectorBitsMax >= SVEVectorBitsMin || SVEVectorBitsMax == 0) &&
353 "Minimum SVE vector size should not be larger than its maximum!");
354 if (SVEVectorBitsMax == 0)
356 return (std::max(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128;
359 unsigned AArch64Subtarget::getMinSVEVectorSizeInBits() const {
360 assert(HasSVE && "Tried to get SVE vector length without SVE support!");
361 assert(SVEVectorBitsMin % 128 == 0 &&
362 "SVE requires vector length in multiples of 128!");
363 assert((SVEVectorBitsMax >= SVEVectorBitsMin || SVEVectorBitsMax == 0) &&
364 "Minimum SVE vector size should not be larger than its maximum!");
365 if (SVEVectorBitsMax == 0)
366 return (SVEVectorBitsMin / 128) * 128;
367 return (std::min(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128;