contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp

   1 //===- FunctionComparator.h - Function Comparator -------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the FunctionComparator and GlobalNumberState classes
  11 // which are used by the MergeFunctions pass for comparing functions.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/Transforms/Utils/FunctionComparator.h"
  16 #include "llvm/ADT/APFloat.h"
  17 #include "llvm/ADT/APInt.h"
  18 #include "llvm/ADT/ArrayRef.h"
  19 #include "llvm/ADT/Hashing.h"
  20 #include "llvm/ADT/SmallPtrSet.h"
  21 #include "llvm/ADT/SmallVector.h"
  22 #include "llvm/IR/Attributes.h"
  23 #include "llvm/IR/BasicBlock.h"
  24 #include "llvm/IR/CallSite.h"
  25 #include "llvm/IR/Constant.h"
  26 #include "llvm/IR/Constants.h"
  27 #include "llvm/IR/DataLayout.h"
  28 #include "llvm/IR/DerivedTypes.h"
  29 #include "llvm/IR/Function.h"
  30 #include "llvm/IR/GlobalValue.h"
  31 #include "llvm/IR/InlineAsm.h"
  32 #include "llvm/IR/InstrTypes.h"
  33 #include "llvm/IR/Instruction.h"
  34 #include "llvm/IR/Instructions.h"
  35 #include "llvm/IR/LLVMContext.h"
  36 #include "llvm/IR/Metadata.h"
  37 #include "llvm/IR/Module.h"
  38 #include "llvm/IR/Operator.h"
  39 #include "llvm/IR/Type.h"
  40 #include "llvm/IR/Value.h"
  41 #include "llvm/Support/Casting.h"
  42 #include "llvm/Support/Compiler.h"
  43 #include "llvm/Support/Debug.h"
  44 #include "llvm/Support/ErrorHandling.h"
  45 #include "llvm/Support/raw_ostream.h"
  46 #include <cassert>
  47 #include <cstddef>
  48 #include <cstdint>
  49 #include <utility>
  50
  51 using namespace llvm;
  52
  53 #define DEBUG_TYPE "functioncomparator"
  54
  55 int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
  56   if (L < R) return -1;
  57   if (L > R) return 1;
  58   return 0;
  59 }
  60
  61 int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const {
  62   if ((int)L < (int)R) return -1;
  63   if ((int)L > (int)R) return 1;
  64   return 0;
  65 }
  66
  67 int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
  68   if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
  69     return Res;
  70   if (L.ugt(R)) return 1;
  71   if (R.ugt(L)) return -1;
  72   return 0;
  73 }
  74
  75 int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
  76   // Floats are ordered first by semantics (i.e. float, double, half, etc.),
  77   // then by value interpreted as a bitstring (aka APInt).
  78   const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
  79   if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
  80                            APFloat::semanticsPrecision(SR)))
  81     return Res;
  82   if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
  83                            APFloat::semanticsMaxExponent(SR)))
  84     return Res;
  85   if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
  86                            APFloat::semanticsMinExponent(SR)))
  87     return Res;
  88   if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
  89                            APFloat::semanticsSizeInBits(SR)))
  90     return Res;
  91   return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
  92 }
  93
  94 int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
  95   // Prevent heavy comparison, compare sizes first.
  96   if (int Res = cmpNumbers(L.size(), R.size()))
  97     return Res;
  98
  99   // Compare strings lexicographically only when it is necessary: only when
 100   // strings are equal in size.
 101   return L.compare(R);
 102 }
 103
 104 int FunctionComparator::cmpAttrs(const AttributeList L,
 105                                  const AttributeList R) const {
 106   if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets()))
 107     return Res;
 108
 109   for (unsigned i = L.index_begin(), e = L.index_end(); i != e; ++i) {
 110     AttributeSet LAS = L.getAttributes(i);
 111     AttributeSet RAS = R.getAttributes(i);
 112     AttributeSet::iterator LI = LAS.begin(), LE = LAS.end();
 113     AttributeSet::iterator RI = RAS.begin(), RE = RAS.end();
 114     for (; LI != LE && RI != RE; ++LI, ++RI) {
 115       Attribute LA = *LI;
 116       Attribute RA = *RI;
 117       if (LA < RA)
 118         return -1;
 119       if (RA < LA)
 120         return 1;
 121     }
 122     if (LI != LE)
 123       return 1;
 124     if (RI != RE)
 125       return -1;
 126   }
 127   return 0;
 128 }
 129
 130 int FunctionComparator::cmpRangeMetadata(const MDNode *L,
 131                                          const MDNode *R) const {
 132   if (L == R)
 133     return 0;
 134   if (!L)
 135     return -1;
 136   if (!R)
 137     return 1;
 138   // Range metadata is a sequence of numbers. Make sure they are the same
 139   // sequence.
 140   // TODO: Note that as this is metadata, it is possible to drop and/or merge
 141   // this data when considering functions to merge. Thus this comparison would
 142   // return 0 (i.e. equivalent), but merging would become more complicated
 143   // because the ranges would need to be unioned. It is not likely that
 144   // functions differ ONLY in this metadata if they are actually the same
 145   // function semantically.
 146   if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
 147     return Res;
 148   for (size_t I = 0; I < L->getNumOperands(); ++I) {
 149     ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
 150     ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
 151     if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
 152       return Res;
 153   }
 154   return 0;
 155 }
 156
 157 int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
 158                                                 const Instruction *R) const {
 159   ImmutableCallSite LCS(L);
 160   ImmutableCallSite RCS(R);
 161
 162   assert(LCS && RCS && "Must be calls or invokes!");
 163   assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
 164
 165   if (int Res =
 166           cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
 167     return Res;
 168
 169   for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
 170     auto OBL = LCS.getOperandBundleAt(i);
 171     auto OBR = RCS.getOperandBundleAt(i);
 172
 173     if (int Res = OBL.getTagName().compare(OBR.getTagName()))
 174       return Res;
 175
 176     if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
 177       return Res;
 178   }
 179
 180   return 0;
 181 }
 182
 183 /// Constants comparison:
 184 /// 1. Check whether type of L constant could be losslessly bitcasted to R
 185 /// type.
 186 /// 2. Compare constant contents.
 187 /// For more details see declaration comments.
 188 int FunctionComparator::cmpConstants(const Constant *L,
 189                                      const Constant *R) const {
 190   Type *TyL = L->getType();
 191   Type *TyR = R->getType();
 192
 193   // Check whether types are bitcastable. This part is just re-factored
 194   // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
 195   // we also pack into result which type is "less" for us.
 196   int TypesRes = cmpTypes(TyL, TyR);
 197   if (TypesRes != 0) {
 198     // Types are different, but check whether we can bitcast them.
 199     if (!TyL->isFirstClassType()) {
 200       if (TyR->isFirstClassType())
 201         return -1;
 202       // Neither TyL nor TyR are values of first class type. Return the result
 203       // of comparing the types
 204       return TypesRes;
 205     }
 206     if (!TyR->isFirstClassType()) {
 207       if (TyL->isFirstClassType())
 208         return 1;
 209       return TypesRes;
 210     }
 211
 212     // Vector -> Vector conversions are always lossless if the two vector types
 213     // have the same size, otherwise not.
 214     unsigned TyLWidth = 0;
 215     unsigned TyRWidth = 0;
 216
 217     if (auto *VecTyL = dyn_cast<VectorType>(TyL))
 218       TyLWidth = VecTyL->getBitWidth();
 219     if (auto *VecTyR = dyn_cast<VectorType>(TyR))
 220       TyRWidth = VecTyR->getBitWidth();
 221
 222     if (TyLWidth != TyRWidth)
 223       return cmpNumbers(TyLWidth, TyRWidth);
 224
 225     // Zero bit-width means neither TyL nor TyR are vectors.
 226     if (!TyLWidth) {
 227       PointerType *PTyL = dyn_cast<PointerType>(TyL);
 228       PointerType *PTyR = dyn_cast<PointerType>(TyR);
 229       if (PTyL && PTyR) {
 230         unsigned AddrSpaceL = PTyL->getAddressSpace();
 231         unsigned AddrSpaceR = PTyR->getAddressSpace();
 232         if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR))
 233           return Res;
 234       }
 235       if (PTyL)
 236         return 1;
 237       if (PTyR)
 238         return -1;
 239
 240       // TyL and TyR aren't vectors, nor pointers. We don't know how to
 241       // bitcast them.
 242       return TypesRes;
 243     }
 244   }
 245
 246   // OK, types are bitcastable, now check constant contents.
 247
 248   if (L->isNullValue() && R->isNullValue())
 249     return TypesRes;
 250   if (L->isNullValue() && !R->isNullValue())
 251     return 1;
 252   if (!L->isNullValue() && R->isNullValue())
 253     return -1;
 254
 255   auto GlobalValueL = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(L));
 256   auto GlobalValueR = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(R));
 257   if (GlobalValueL && GlobalValueR) {
 258     return cmpGlobalValues(GlobalValueL, GlobalValueR);
 259   }
 260
 261   if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
 262     return Res;
 263
 264   if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
 265     const auto *SeqR = cast<ConstantDataSequential>(R);
 266     // This handles ConstantDataArray and ConstantDataVector. Note that we
 267     // compare the two raw data arrays, which might differ depending on the host
 268     // endianness. This isn't a problem though, because the endiness of a module
 269     // will affect the order of the constants, but this order is the same
 270     // for a given input module and host platform.
 271     return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
 272   }
 273
 274   switch (L->getValueID()) {
 275   case Value::UndefValueVal:
 276   case Value::ConstantTokenNoneVal:
 277     return TypesRes;
 278   case Value::ConstantIntVal: {
 279     const APInt &LInt = cast<ConstantInt>(L)->getValue();
 280     const APInt &RInt = cast<ConstantInt>(R)->getValue();
 281     return cmpAPInts(LInt, RInt);
 282   }
 283   case Value::ConstantFPVal: {
 284     const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
 285     const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
 286     return cmpAPFloats(LAPF, RAPF);
 287   }
 288   case Value::ConstantArrayVal: {
 289     const ConstantArray *LA = cast<ConstantArray>(L);
 290     const ConstantArray *RA = cast<ConstantArray>(R);
 291     uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements();
 292     uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements();
 293     if (int Res = cmpNumbers(NumElementsL, NumElementsR))
 294       return Res;
 295     for (uint64_t i = 0; i < NumElementsL; ++i) {
 296       if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)),
 297                                  cast<Constant>(RA->getOperand(i))))
 298         return Res;
 299     }
 300     return 0;
 301   }
 302   case Value::ConstantStructVal: {
 303     const ConstantStruct *LS = cast<ConstantStruct>(L);
 304     const ConstantStruct *RS = cast<ConstantStruct>(R);
 305     unsigned NumElementsL = cast<StructType>(TyL)->getNumElements();
 306     unsigned NumElementsR = cast<StructType>(TyR)->getNumElements();
 307     if (int Res = cmpNumbers(NumElementsL, NumElementsR))
 308       return Res;
 309     for (unsigned i = 0; i != NumElementsL; ++i) {
 310       if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)),
 311                                  cast<Constant>(RS->getOperand(i))))
 312         return Res;
 313     }
 314     return 0;
 315   }
 316   case Value::ConstantVectorVal: {
 317     const ConstantVector *LV = cast<ConstantVector>(L);
 318     const ConstantVector *RV = cast<ConstantVector>(R);
 319     unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements();
 320     unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements();
 321     if (int Res = cmpNumbers(NumElementsL, NumElementsR))
 322       return Res;
 323     for (uint64_t i = 0; i < NumElementsL; ++i) {
 324       if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)),
 325                                  cast<Constant>(RV->getOperand(i))))
 326         return Res;
 327     }
 328     return 0;
 329   }
 330   case Value::ConstantExprVal: {
 331     const ConstantExpr *LE = cast<ConstantExpr>(L);
 332     const ConstantExpr *RE = cast<ConstantExpr>(R);
 333     unsigned NumOperandsL = LE->getNumOperands();
 334     unsigned NumOperandsR = RE->getNumOperands();
 335     if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
 336       return Res;
 337     for (unsigned i = 0; i < NumOperandsL; ++i) {
 338       if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)),
 339                                  cast<Constant>(RE->getOperand(i))))
 340         return Res;
 341     }
 342     return 0;
 343   }
 344   case Value::BlockAddressVal: {
 345     const BlockAddress *LBA = cast<BlockAddress>(L);
 346     const BlockAddress *RBA = cast<BlockAddress>(R);
 347     if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
 348       return Res;
 349     if (LBA->getFunction() == RBA->getFunction()) {
 350       // They are BBs in the same function. Order by which comes first in the
 351       // BB order of the function. This order is deterministic.
 352       Function* F = LBA->getFunction();
 353       BasicBlock *LBB = LBA->getBasicBlock();
 354       BasicBlock *RBB = RBA->getBasicBlock();
 355       if (LBB == RBB)
 356         return 0;
 357       for(BasicBlock &BB : F->getBasicBlockList()) {
 358         if (&BB == LBB) {
 359           assert(&BB != RBB);
 360           return -1;
 361         }
 362         if (&BB == RBB)
 363           return 1;
 364       }
 365       llvm_unreachable("Basic Block Address does not point to a basic block in "
 366                        "its function.");
 367       return -1;
 368     } else {
 369       // cmpValues said the functions are the same. So because they aren't
 370       // literally the same pointer, they must respectively be the left and
 371       // right functions.
 372       assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
 373       // cmpValues will tell us if these are equivalent BasicBlocks, in the
 374       // context of their respective functions.
 375       return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
 376     }
 377   }
 378   default: // Unknown constant, abort.
 379     LLVM_DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
 380     llvm_unreachable("Constant ValueID not recognized.");
 381     return -1;
 382   }
 383 }
 384
 385 int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue *R) const {
 386   uint64_t LNumber = GlobalNumbers->getNumber(L);
 387   uint64_t RNumber = GlobalNumbers->getNumber(R);
 388   return cmpNumbers(LNumber, RNumber);
 389 }
 390
 391 /// cmpType - compares two types,
 392 /// defines total ordering among the types set.
 393 /// See method declaration comments for more details.
 394 int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
 395   PointerType *PTyL = dyn_cast<PointerType>(TyL);
 396   PointerType *PTyR = dyn_cast<PointerType>(TyR);
 397
 398   const DataLayout &DL = FnL->getParent()->getDataLayout();
 399   if (PTyL && PTyL->getAddressSpace() == 0)
 400     TyL = DL.getIntPtrType(TyL);
 401   if (PTyR && PTyR->getAddressSpace() == 0)
 402     TyR = DL.getIntPtrType(TyR);
 403
 404   if (TyL == TyR)
 405     return 0;
 406
 407   if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID()))
 408     return Res;
 409
 410   switch (TyL->getTypeID()) {
 411   default:
 412     llvm_unreachable("Unknown type!");
 413     // Fall through in Release mode.
 414     LLVM_FALLTHROUGH;
 415   case Type::IntegerTyID:
 416     return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
 417                       cast<IntegerType>(TyR)->getBitWidth());
 418   // TyL == TyR would have returned true earlier, because types are uniqued.
 419   case Type::VoidTyID:
 420   case Type::FloatTyID:
 421   case Type::DoubleTyID:
 422   case Type::X86_FP80TyID:
 423   case Type::FP128TyID:
 424   case Type::PPC_FP128TyID:
 425   case Type::LabelTyID:
 426   case Type::MetadataTyID:
 427   case Type::TokenTyID:
 428     return 0;
 429
 430   case Type::PointerTyID:
 431     assert(PTyL && PTyR && "Both types must be pointers here.");
 432     return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace());
 433
 434   case Type::StructTyID: {
 435     StructType *STyL = cast<StructType>(TyL);
 436     StructType *STyR = cast<StructType>(TyR);
 437     if (STyL->getNumElements() != STyR->getNumElements())
 438       return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
 439
 440     if (STyL->isPacked() != STyR->isPacked())
 441       return cmpNumbers(STyL->isPacked(), STyR->isPacked());
 442
 443     for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
 444       if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i)))
 445         return Res;
 446     }
 447     return 0;
 448   }
 449
 450   case Type::FunctionTyID: {
 451     FunctionType *FTyL = cast<FunctionType>(TyL);
 452     FunctionType *FTyR = cast<FunctionType>(TyR);
 453     if (FTyL->getNumParams() != FTyR->getNumParams())
 454       return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams());
 455
 456     if (FTyL->isVarArg() != FTyR->isVarArg())
 457       return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
 458
 459     if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType()))
 460       return Res;
 461
 462     for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
 463       if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i)))
 464         return Res;
 465     }
 466     return 0;
 467   }
 468
 469   case Type::ArrayTyID:
 470   case Type::VectorTyID: {
 471     auto *STyL = cast<SequentialType>(TyL);
 472     auto *STyR = cast<SequentialType>(TyR);
 473     if (STyL->getNumElements() != STyR->getNumElements())
 474       return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
 475     return cmpTypes(STyL->getElementType(), STyR->getElementType());
 476   }
 477   }
 478 }
 479
 480 // Determine whether the two operations are the same except that pointer-to-A
 481 // and pointer-to-B are equivalent. This should be kept in sync with
 482 // Instruction::isSameOperationAs.
 483 // Read method declaration comments for more details.
 484 int FunctionComparator::cmpOperations(const Instruction *L,
 485                                       const Instruction *R,
 486                                       bool &needToCmpOperands) const {
 487   needToCmpOperands = true;
 488   if (int Res = cmpValues(L, R))
 489     return Res;
 490
 491   // Differences from Instruction::isSameOperationAs:
 492   //  * replace type comparison with calls to cmpTypes.
 493   //  * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top.
 494   //  * because of the above, we don't test for the tail bit on calls later on.
 495   if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
 496     return Res;
 497
 498   if (const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(L)) {
 499     needToCmpOperands = false;
 500     const GetElementPtrInst *GEPR = cast<GetElementPtrInst>(R);
 501     if (int Res =
 502             cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
 503       return Res;
 504     return cmpGEPs(GEPL, GEPR);
 505   }
 506
 507   if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
 508     return Res;
 509
 510   if (int Res = cmpTypes(L->getType(), R->getType()))
 511     return Res;
 512
 513   if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
 514                            R->getRawSubclassOptionalData()))
 515     return Res;
 516
 517   // We have two instructions of identical opcode and #operands.  Check to see
 518   // if all operands are the same type
 519   for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
 520     if (int Res =
 521             cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
 522       return Res;
 523   }
 524
 525   // Check special state that is a part of some instructions.
 526   if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
 527     if (int Res = cmpTypes(AI->getAllocatedType(),
 528                            cast<AllocaInst>(R)->getAllocatedType()))
 529       return Res;
 530     return cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment());
 531   }
 532   if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
 533     if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
 534       return Res;
 535     if (int Res =
 536             cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
 537       return Res;
 538     if (int Res =
 539             cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
 540       return Res;
 541     if (int Res = cmpNumbers(LI->getSyncScopeID(),
 542                              cast<LoadInst>(R)->getSyncScopeID()))
 543       return Res;
 544     return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
 545         cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
 546   }
 547   if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
 548     if (int Res =
 549             cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
 550       return Res;
 551     if (int Res =
 552             cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
 553       return Res;
 554     if (int Res =
 555             cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
 556       return Res;
 557     return cmpNumbers(SI->getSyncScopeID(),
 558                       cast<StoreInst>(R)->getSyncScopeID());
 559   }
 560   if (const CmpInst *CI = dyn_cast<CmpInst>(L))
 561     return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
 562   if (const CallInst *CI = dyn_cast<CallInst>(L)) {
 563     if (int Res = cmpNumbers(CI->getCallingConv(),
 564                              cast<CallInst>(R)->getCallingConv()))
 565       return Res;
 566     if (int Res =
 567             cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
 568       return Res;
 569     if (int Res = cmpOperandBundlesSchema(CI, R))
 570       return Res;
 571     return cmpRangeMetadata(
 572         CI->getMetadata(LLVMContext::MD_range),
 573         cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
 574   }
 575   if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
 576     if (int Res = cmpNumbers(II->getCallingConv(),
 577                              cast<InvokeInst>(R)->getCallingConv()))
 578       return Res;
 579     if (int Res =
 580             cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
 581       return Res;
 582     if (int Res = cmpOperandBundlesSchema(II, R))
 583       return Res;
 584     return cmpRangeMetadata(
 585         II->getMetadata(LLVMContext::MD_range),
 586         cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
 587   }
 588   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
 589     ArrayRef<unsigned> LIndices = IVI->getIndices();
 590     ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices();
 591     if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
 592       return Res;
 593     for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
 594       if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
 595         return Res;
 596     }
 597     return 0;
 598   }
 599   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
 600     ArrayRef<unsigned> LIndices = EVI->getIndices();
 601     ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices();
 602     if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
 603       return Res;
 604     for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
 605       if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
 606         return Res;
 607     }
 608   }
 609   if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
 610     if (int Res =
 611             cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
 612       return Res;
 613     return cmpNumbers(FI->getSyncScopeID(),
 614                       cast<FenceInst>(R)->getSyncScopeID());
 615   }
 616   if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
 617     if (int Res = cmpNumbers(CXI->isVolatile(),
 618                              cast<AtomicCmpXchgInst>(R)->isVolatile()))
 619       return Res;
 620     if (int Res = cmpNumbers(CXI->isWeak(),
 621                              cast<AtomicCmpXchgInst>(R)->isWeak()))
 622       return Res;
 623     if (int Res =
 624             cmpOrderings(CXI->getSuccessOrdering(),
 625                          cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
 626       return Res;
 627     if (int Res =
 628             cmpOrderings(CXI->getFailureOrdering(),
 629                          cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
 630       return Res;
 631     return cmpNumbers(CXI->getSyncScopeID(),
 632                       cast<AtomicCmpXchgInst>(R)->getSyncScopeID());
 633   }
 634   if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
 635     if (int Res = cmpNumbers(RMWI->getOperation(),
 636                              cast<AtomicRMWInst>(R)->getOperation()))
 637       return Res;
 638     if (int Res = cmpNumbers(RMWI->isVolatile(),
 639                              cast<AtomicRMWInst>(R)->isVolatile()))
 640       return Res;
 641     if (int Res = cmpOrderings(RMWI->getOrdering(),
 642                              cast<AtomicRMWInst>(R)->getOrdering()))
 643       return Res;
 644     return cmpNumbers(RMWI->getSyncScopeID(),
 645                       cast<AtomicRMWInst>(R)->getSyncScopeID());
 646   }
 647   if (const PHINode *PNL = dyn_cast<PHINode>(L)) {
 648     const PHINode *PNR = cast<PHINode>(R);
 649     // Ensure that in addition to the incoming values being identical
 650     // (checked by the caller of this function), the incoming blocks
 651     // are also identical.
 652     for (unsigned i = 0, e = PNL->getNumIncomingValues(); i != e; ++i) {
 653       if (int Res =
 654               cmpValues(PNL->getIncomingBlock(i), PNR->getIncomingBlock(i)))
 655         return Res;
 656     }
 657   }
 658   return 0;
 659 }
 660
 661 // Determine whether two GEP operations perform the same underlying arithmetic.
 662 // Read method declaration comments for more details.
 663 int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
 664                                 const GEPOperator *GEPR) const {
 665   unsigned int ASL = GEPL->getPointerAddressSpace();
 666   unsigned int ASR = GEPR->getPointerAddressSpace();
 667
 668   if (int Res = cmpNumbers(ASL, ASR))
 669     return Res;
 670
 671   // When we have target data, we can reduce the GEP down to the value in bytes
 672   // added to the address.
 673   const DataLayout &DL = FnL->getParent()->getDataLayout();
 674   unsigned BitWidth = DL.getPointerSizeInBits(ASL);
 675   APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
 676   if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
 677       GEPR->accumulateConstantOffset(DL, OffsetR))
 678     return cmpAPInts(OffsetL, OffsetR);
 679   if (int Res = cmpTypes(GEPL->getSourceElementType(),
 680                          GEPR->getSourceElementType()))
 681     return Res;
 682
 683   if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
 684     return Res;
 685
 686   for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
 687     if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
 688       return Res;
 689   }
 690
 691   return 0;
 692 }
 693
 694 int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
 695                                      const InlineAsm *R) const {
 696   // InlineAsm's are uniqued. If they are the same pointer, obviously they are
 697   // the same, otherwise compare the fields.
 698   if (L == R)
 699     return 0;
 700   if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
 701     return Res;
 702   if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
 703     return Res;
 704   if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
 705     return Res;
 706   if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
 707     return Res;
 708   if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
 709     return Res;
 710   if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
 711     return Res;
 712   assert(L->getFunctionType() != R->getFunctionType());
 713   return 0;
 714 }
 715
 716 /// Compare two values used by the two functions under pair-wise comparison. If
 717 /// this is the first time the values are seen, they're added to the mapping so
 718 /// that we will detect mismatches on next use.
 719 /// See comments in declaration for more details.
 720 int FunctionComparator::cmpValues(const Value *L, const Value *R) const {
 721   // Catch self-reference case.
 722   if (L == FnL) {
 723     if (R == FnR)
 724       return 0;
 725     return -1;
 726   }
 727   if (R == FnR) {
 728     if (L == FnL)
 729       return 0;
 730     return 1;
 731   }
 732
 733   const Constant *ConstL = dyn_cast<Constant>(L);
 734   const Constant *ConstR = dyn_cast<Constant>(R);
 735   if (ConstL && ConstR) {
 736     if (L == R)
 737       return 0;
 738     return cmpConstants(ConstL, ConstR);
 739   }
 740
 741   if (ConstL)
 742     return 1;
 743   if (ConstR)
 744     return -1;
 745
 746   const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
 747   const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
 748
 749   if (InlineAsmL && InlineAsmR)
 750     return cmpInlineAsm(InlineAsmL, InlineAsmR);
 751   if (InlineAsmL)
 752     return 1;
 753   if (InlineAsmR)
 754     return -1;
 755
 756   auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())),
 757        RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size()));
 758
 759   return cmpNumbers(LeftSN.first->second, RightSN.first->second);
 760 }
 761
 762 // Test whether two basic blocks have equivalent behaviour.
 763 int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
 764                                        const BasicBlock *BBR) const {
 765   BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
 766   BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
 767
 768   do {
 769     bool needToCmpOperands = true;
 770     if (int Res = cmpOperations(&*InstL, &*InstR, needToCmpOperands))
 771       return Res;
 772     if (needToCmpOperands) {
 773       assert(InstL->getNumOperands() == InstR->getNumOperands());
 774
 775       for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) {
 776         Value *OpL = InstL->getOperand(i);
 777         Value *OpR = InstR->getOperand(i);
 778         if (int Res = cmpValues(OpL, OpR))
 779           return Res;
 780         // cmpValues should ensure this is true.
 781         assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
 782       }
 783     }
 784
 785     ++InstL;
 786     ++InstR;
 787   } while (InstL != InstLE && InstR != InstRE);
 788
 789   if (InstL != InstLE && InstR == InstRE)
 790     return 1;
 791   if (InstL == InstLE && InstR != InstRE)
 792     return -1;
 793   return 0;
 794 }
 795
 796 int FunctionComparator::compareSignature() const {
 797   if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes()))
 798     return Res;
 799
 800   if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC()))
 801     return Res;
 802
 803   if (FnL->hasGC()) {
 804     if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
 805       return Res;
 806   }
 807
 808   if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection()))
 809     return Res;
 810
 811   if (FnL->hasSection()) {
 812     if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
 813       return Res;
 814   }
 815
 816   if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg()))
 817     return Res;
 818
 819   // TODO: if it's internal and only used in direct calls, we could handle this
 820   // case too.
 821   if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
 822     return Res;
 823
 824   if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType()))
 825     return Res;
 826
 827   assert(FnL->arg_size() == FnR->arg_size() &&
 828          "Identically typed functions have different numbers of args!");
 829
 830   // Visit the arguments so that they get enumerated in the order they're
 831   // passed in.
 832   for (Function::const_arg_iterator ArgLI = FnL->arg_begin(),
 833        ArgRI = FnR->arg_begin(),
 834        ArgLE = FnL->arg_end();
 835        ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
 836     if (cmpValues(&*ArgLI, &*ArgRI) != 0)
 837       llvm_unreachable("Arguments repeat!");
 838   }
 839   return 0;
 840 }
 841
 842 // Test whether the two functions have equivalent behaviour.
 843 int FunctionComparator::compare() {
 844   beginCompare();
 845
 846   if (int Res = compareSignature())
 847     return Res;
 848
 849   // We do a CFG-ordered walk since the actual ordering of the blocks in the
 850   // linked list is immaterial. Our walk starts at the entry block for both
 851   // functions, then takes each block from each terminator in order. As an
 852   // artifact, this also means that unreachable blocks are ignored.
 853   SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs;
 854   SmallPtrSet<const BasicBlock *, 32> VisitedBBs; // in terms of F1.
 855
 856   FnLBBs.push_back(&FnL->getEntryBlock());
 857   FnRBBs.push_back(&FnR->getEntryBlock());
 858
 859   VisitedBBs.insert(FnLBBs[0]);
 860   while (!FnLBBs.empty()) {
 861     const BasicBlock *BBL = FnLBBs.pop_back_val();
 862     const BasicBlock *BBR = FnRBBs.pop_back_val();
 863
 864     if (int Res = cmpValues(BBL, BBR))
 865       return Res;
 866
 867     if (int Res = cmpBasicBlocks(BBL, BBR))
 868       return Res;
 869
 870     const TerminatorInst *TermL = BBL->getTerminator();
 871     const TerminatorInst *TermR = BBR->getTerminator();
 872
 873     assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
 874     for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
 875       if (!VisitedBBs.insert(TermL->getSuccessor(i)).second)
 876         continue;
 877
 878       FnLBBs.push_back(TermL->getSuccessor(i));
 879       FnRBBs.push_back(TermR->getSuccessor(i));
 880     }
 881   }
 882   return 0;
 883 }
 884
 885 namespace {
 886
 887 // Accumulate the hash of a sequence of 64-bit integers. This is similar to a
 888 // hash of a sequence of 64bit ints, but the entire input does not need to be
 889 // available at once. This interface is necessary for functionHash because it
 890 // needs to accumulate the hash as the structure of the function is traversed
 891 // without saving these values to an intermediate buffer. This form of hashing
 892 // is not often needed, as usually the object to hash is just read from a
 893 // buffer.
 894 class HashAccumulator64 {
 895   uint64_t Hash;
 896
 897 public:
 898   // Initialize to random constant, so the state isn't zero.
 899   HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
 900
 901   void add(uint64_t V) {
 902      Hash = hashing::detail::hash_16_bytes(Hash, V);
 903   }
 904
 905   // No finishing is required, because the entire hash value is used.
 906   uint64_t getHash() { return Hash; }
 907 };
 908
 909 } // end anonymous namespace
 910
 911 // A function hash is calculated by considering only the number of arguments and
 912 // whether a function is varargs, the order of basic blocks (given by the
 913 // successors of each basic block in depth first order), and the order of
 914 // opcodes of each instruction within each of these basic blocks. This mirrors
 915 // the strategy compare() uses to compare functions by walking the BBs in depth
 916 // first order and comparing each instruction in sequence. Because this hash
 917 // does not look at the operands, it is insensitive to things such as the
 918 // target of calls and the constants used in the function, which makes it useful
 919 // when possibly merging functions which are the same modulo constants and call
 920 // targets.
 921 FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
 922   HashAccumulator64 H;
 923   H.add(F.isVarArg());
 924   H.add(F.arg_size());
 925
 926   SmallVector<const BasicBlock *, 8> BBs;
 927   SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
 928
 929   // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
 930   // accumulating the hash of the function "structure." (BB and opcode sequence)
 931   BBs.push_back(&F.getEntryBlock());
 932   VisitedBBs.insert(BBs[0]);
 933   while (!BBs.empty()) {
 934     const BasicBlock *BB = BBs.pop_back_val();
 935     // This random value acts as a block header, as otherwise the partition of
 936     // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
 937     H.add(45798);
 938     for (auto &Inst : *BB) {
 939       H.add(Inst.getOpcode());
 940     }
 941     const TerminatorInst *Term = BB->getTerminator();
 942     for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
 943       if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
 944         continue;
 945       BBs.push_back(Term->getSuccessor(i));
 946     }
 947   }
 948   return H.getHash();
 949 }