contrib/llvm/include/llvm/Bitcode/BitstreamReader.h

   1 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This header defines the BitstreamReader class.  This class can be used to
  11 // read an arbitrary bitstream, regardless of its contents.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef LLVM_BITCODE_BITSTREAMREADER_H
  16 #define LLVM_BITCODE_BITSTREAMREADER_H
  17
  18 #include "llvm/ADT/ArrayRef.h"
  19 #include "llvm/ADT/SmallVector.h"
  20 #include "llvm/Bitcode/BitCodes.h"
  21 #include "llvm/Support/Endian.h"
  22 #include "llvm/Support/ErrorHandling.h"
  23 #include "llvm/Support/MathExtras.h"
  24 #include "llvm/Support/MemoryBuffer.h"
  25 #include <algorithm>
  26 #include <cassert>
  27 #include <climits>
  28 #include <cstddef>
  29 #include <cstdint>
  30 #include <memory>
  31 #include <string>
  32 #include <utility>
  33 #include <vector>
  34
  35 namespace llvm {
  36
  37 /// This class maintains the abbreviations read from a block info block.
  38 class BitstreamBlockInfo {
  39 public:
  40   /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
  41   /// describe abbreviations that all blocks of the specified ID inherit.
  42   struct BlockInfo {
  43     unsigned BlockID;
  44     std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
  45     std::string Name;
  46     std::vector<std::pair<unsigned, std::string> > RecordNames;
  47   };
  48
  49 private:
  50   std::vector<BlockInfo> BlockInfoRecords;
  51
  52 public:
  53   /// If there is block info for the specified ID, return it, otherwise return
  54   /// null.
  55   const BlockInfo *getBlockInfo(unsigned BlockID) const {
  56     // Common case, the most recent entry matches BlockID.
  57     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
  58       return &BlockInfoRecords.back();
  59
  60     for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
  61          i != e; ++i)
  62       if (BlockInfoRecords[i].BlockID == BlockID)
  63         return &BlockInfoRecords[i];
  64     return nullptr;
  65   }
  66
  67   BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
  68     if (const BlockInfo *BI = getBlockInfo(BlockID))
  69       return *const_cast<BlockInfo*>(BI);
  70
  71     // Otherwise, add a new record.
  72     BlockInfoRecords.emplace_back();
  73     BlockInfoRecords.back().BlockID = BlockID;
  74     return BlockInfoRecords.back();
  75   }
  76 };
  77
  78 /// This represents a position within a bitstream. There may be multiple
  79 /// independent cursors reading within one bitstream, each maintaining their
  80 /// own local state.
  81 class SimpleBitstreamCursor {
  82   ArrayRef<uint8_t> BitcodeBytes;
  83   size_t NextChar = 0;
  84
  85 public:
  86   /// This is the current data we have pulled from the stream but have not
  87   /// returned to the client. This is specifically and intentionally defined to
  88   /// follow the word size of the host machine for efficiency. We use word_t in
  89   /// places that are aware of this to make it perfectly explicit what is going
  90   /// on.
  91   typedef size_t word_t;
  92
  93 private:
  94   word_t CurWord = 0;
  95
  96   /// This is the number of bits in CurWord that are valid. This is always from
  97   /// [0...bits_of(size_t)-1] inclusive.
  98   unsigned BitsInCurWord = 0;
  99
 100 public:
 101   static const size_t MaxChunkSize = sizeof(word_t) * 8;
 102
 103   SimpleBitstreamCursor() = default;
 104   explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
 105       : BitcodeBytes(BitcodeBytes) {}
 106   explicit SimpleBitstreamCursor(StringRef BitcodeBytes)
 107       : BitcodeBytes(reinterpret_cast<const uint8_t *>(BitcodeBytes.data()),
 108                      BitcodeBytes.size()) {}
 109   explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)
 110       : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {}
 111
 112   bool canSkipToPos(size_t pos) const {
 113     // pos can be skipped to if it is a valid address or one byte past the end.
 114     return pos <= BitcodeBytes.size();
 115   }
 116
 117   bool AtEndOfStream() {
 118     return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar;
 119   }
 120
 121   /// Return the bit # of the bit we are reading.
 122   uint64_t GetCurrentBitNo() const {
 123     return NextChar*CHAR_BIT - BitsInCurWord;
 124   }
 125
 126   // Return the byte # of the current bit.
 127   uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; }
 128
 129   ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; }
 130
 131   /// Reset the stream to the specified bit number.
 132   void JumpToBit(uint64_t BitNo) {
 133     size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
 134     unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
 135     assert(canSkipToPos(ByteNo) && "Invalid location");
 136
 137     // Move the cursor to the right word.
 138     NextChar = ByteNo;
 139     BitsInCurWord = 0;
 140
 141     // Skip over any bits that are already consumed.
 142     if (WordBitNo)
 143       Read(WordBitNo);
 144   }
 145
 146   /// Get a pointer into the bitstream at the specified byte offset.
 147   const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
 148     return BitcodeBytes.data() + ByteNo;
 149   }
 150
 151   /// Get a pointer into the bitstream at the specified bit offset.
 152   ///
 153   /// The bit offset must be on a byte boundary.
 154   const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) {
 155     assert(!(BitNo % 8) && "Expected bit on byte boundary");
 156     return getPointerToByte(BitNo / 8, NumBytes);
 157   }
 158
 159   void fillCurWord() {
 160     if (NextChar >= BitcodeBytes.size())
 161       report_fatal_error("Unexpected end of file");
 162
 163     // Read the next word from the stream.
 164     const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar;
 165     unsigned BytesRead;
 166     if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) {
 167       BytesRead = sizeof(word_t);
 168       CurWord =
 169           support::endian::read<word_t, support::little, support::unaligned>(
 170               NextCharPtr);
 171     } else {
 172       // Short read.
 173       BytesRead = BitcodeBytes.size() - NextChar;
 174       CurWord = 0;
 175       for (unsigned B = 0; B != BytesRead; ++B)
 176         CurWord |= uint64_t(NextCharPtr[B]) << (B * 8);
 177     }
 178     NextChar += BytesRead;
 179     BitsInCurWord = BytesRead * 8;
 180   }
 181
 182   word_t Read(unsigned NumBits) {
 183     static const unsigned BitsInWord = MaxChunkSize;
 184
 185     assert(NumBits && NumBits <= BitsInWord &&
 186            "Cannot return zero or more than BitsInWord bits!");
 187
 188     static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f;
 189
 190     // If the field is fully contained by CurWord, return it quickly.
 191     if (BitsInCurWord >= NumBits) {
 192       word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
 193
 194       // Use a mask to avoid undefined behavior.
 195       CurWord >>= (NumBits & Mask);
 196
 197       BitsInCurWord -= NumBits;
 198       return R;
 199     }
 200
 201     word_t R = BitsInCurWord ? CurWord : 0;
 202     unsigned BitsLeft = NumBits - BitsInCurWord;
 203
 204     fillCurWord();
 205
 206     // If we run out of data, abort.
 207     if (BitsLeft > BitsInCurWord)
 208       report_fatal_error("Unexpected end of file");
 209
 210     word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
 211
 212     // Use a mask to avoid undefined behavior.
 213     CurWord >>= (BitsLeft & Mask);
 214
 215     BitsInCurWord -= BitsLeft;
 216
 217     R |= R2 << (NumBits - BitsLeft);
 218
 219     return R;
 220   }
 221
 222   uint32_t ReadVBR(unsigned NumBits) {
 223     uint32_t Piece = Read(NumBits);
 224     if ((Piece & (1U << (NumBits-1))) == 0)
 225       return Piece;
 226
 227     uint32_t Result = 0;
 228     unsigned NextBit = 0;
 229     while (true) {
 230       Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
 231
 232       if ((Piece & (1U << (NumBits-1))) == 0)
 233         return Result;
 234
 235       NextBit += NumBits-1;
 236       Piece = Read(NumBits);
 237     }
 238   }
 239
 240   // Read a VBR that may have a value up to 64-bits in size. The chunk size of
 241   // the VBR must still be <= 32 bits though.
 242   uint64_t ReadVBR64(unsigned NumBits) {
 243     uint32_t Piece = Read(NumBits);
 244     if ((Piece & (1U << (NumBits-1))) == 0)
 245       return uint64_t(Piece);
 246
 247     uint64_t Result = 0;
 248     unsigned NextBit = 0;
 249     while (true) {
 250       Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
 251
 252       if ((Piece & (1U << (NumBits-1))) == 0)
 253         return Result;
 254
 255       NextBit += NumBits-1;
 256       Piece = Read(NumBits);
 257     }
 258   }
 259
 260   void SkipToFourByteBoundary() {
 261     // If word_t is 64-bits and if we've read less than 32 bits, just dump
 262     // the bits we have up to the next 32-bit boundary.
 263     if (sizeof(word_t) > 4 &&
 264         BitsInCurWord >= 32) {
 265       CurWord >>= BitsInCurWord-32;
 266       BitsInCurWord = 32;
 267       return;
 268     }
 269
 270     BitsInCurWord = 0;
 271   }
 272
 273   /// Skip to the end of the file.
 274   void skipToEnd() { NextChar = BitcodeBytes.size(); }
 275 };
 276
 277 /// When advancing through a bitstream cursor, each advance can discover a few
 278 /// different kinds of entries:
 279 struct BitstreamEntry {
 280   enum {
 281     Error,    // Malformed bitcode was found.
 282     EndBlock, // We've reached the end of the current block, (or the end of the
 283               // file, which is treated like a series of EndBlock records.
 284     SubBlock, // This is the start of a new subblock of a specific ID.
 285     Record    // This is a record with a specific AbbrevID.
 286   } Kind;
 287
 288   unsigned ID;
 289
 290   static BitstreamEntry getError() {
 291     BitstreamEntry E; E.Kind = Error; return E;
 292   }
 293
 294   static BitstreamEntry getEndBlock() {
 295     BitstreamEntry E; E.Kind = EndBlock; return E;
 296   }
 297
 298   static BitstreamEntry getSubBlock(unsigned ID) {
 299     BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
 300   }
 301
 302   static BitstreamEntry getRecord(unsigned AbbrevID) {
 303     BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
 304   }
 305 };
 306
 307 /// This represents a position within a bitcode file, implemented on top of a
 308 /// SimpleBitstreamCursor.
 309 ///
 310 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
 311 /// be passed by value.
 312 class BitstreamCursor : SimpleBitstreamCursor {
 313   // This is the declared size of code values used for the current block, in
 314   // bits.
 315   unsigned CurCodeSize = 2;
 316
 317   /// Abbrevs installed at in this block.
 318   std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
 319
 320   struct Block {
 321     unsigned PrevCodeSize;
 322     std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
 323
 324     explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
 325   };
 326
 327   /// This tracks the codesize of parent blocks.
 328   SmallVector<Block, 8> BlockScope;
 329
 330   BitstreamBlockInfo *BlockInfo = nullptr;
 331
 332 public:
 333   static const size_t MaxChunkSize = sizeof(word_t) * 8;
 334
 335   BitstreamCursor() = default;
 336   explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
 337       : SimpleBitstreamCursor(BitcodeBytes) {}
 338   explicit BitstreamCursor(StringRef BitcodeBytes)
 339       : SimpleBitstreamCursor(BitcodeBytes) {}
 340   explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
 341       : SimpleBitstreamCursor(BitcodeBytes) {}
 342
 343   using SimpleBitstreamCursor::canSkipToPos;
 344   using SimpleBitstreamCursor::AtEndOfStream;
 345   using SimpleBitstreamCursor::getBitcodeBytes;
 346   using SimpleBitstreamCursor::GetCurrentBitNo;
 347   using SimpleBitstreamCursor::getCurrentByteNo;
 348   using SimpleBitstreamCursor::getPointerToByte;
 349   using SimpleBitstreamCursor::JumpToBit;
 350   using SimpleBitstreamCursor::fillCurWord;
 351   using SimpleBitstreamCursor::Read;
 352   using SimpleBitstreamCursor::ReadVBR;
 353   using SimpleBitstreamCursor::ReadVBR64;
 354
 355   /// Return the number of bits used to encode an abbrev #.
 356   unsigned getAbbrevIDWidth() const { return CurCodeSize; }
 357
 358   /// Flags that modify the behavior of advance().
 359   enum {
 360     /// If this flag is used, the advance() method does not automatically pop
 361     /// the block scope when the end of a block is reached.
 362     AF_DontPopBlockAtEnd = 1,
 363
 364     /// If this flag is used, abbrev entries are returned just like normal
 365     /// records.
 366     AF_DontAutoprocessAbbrevs = 2
 367   };
 368
 369   /// Advance the current bitstream, returning the next entry in the stream.
 370   BitstreamEntry advance(unsigned Flags = 0) {
 371     while (true) {
 372       if (AtEndOfStream())
 373         return BitstreamEntry::getError();
 374
 375       unsigned Code = ReadCode();
 376       if (Code == bitc::END_BLOCK) {
 377         // Pop the end of the block unless Flags tells us not to.
 378         if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
 379           return BitstreamEntry::getError();
 380         return BitstreamEntry::getEndBlock();
 381       }
 382
 383       if (Code == bitc::ENTER_SUBBLOCK)
 384         return BitstreamEntry::getSubBlock(ReadSubBlockID());
 385
 386       if (Code == bitc::DEFINE_ABBREV &&
 387           !(Flags & AF_DontAutoprocessAbbrevs)) {
 388         // We read and accumulate abbrev's, the client can't do anything with
 389         // them anyway.
 390         ReadAbbrevRecord();
 391         continue;
 392       }
 393
 394       return BitstreamEntry::getRecord(Code);
 395     }
 396   }
 397
 398   /// This is a convenience function for clients that don't expect any
 399   /// subblocks. This just skips over them automatically.
 400   BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
 401     while (true) {
 402       // If we found a normal entry, return it.
 403       BitstreamEntry Entry = advance(Flags);
 404       if (Entry.Kind != BitstreamEntry::SubBlock)
 405         return Entry;
 406
 407       // If we found a sub-block, just skip over it and check the next entry.
 408       if (SkipBlock())
 409         return BitstreamEntry::getError();
 410     }
 411   }
 412
 413   unsigned ReadCode() {
 414     return Read(CurCodeSize);
 415   }
 416
 417   // Block header:
 418   //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
 419
 420   /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
 421   unsigned ReadSubBlockID() {
 422     return ReadVBR(bitc::BlockIDWidth);
 423   }
 424
 425   /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
 426   /// of this block. If the block record is malformed, return true.
 427   bool SkipBlock() {
 428     // Read and ignore the codelen value.  Since we are skipping this block, we
 429     // don't care what code widths are used inside of it.
 430     ReadVBR(bitc::CodeLenWidth);
 431     SkipToFourByteBoundary();
 432     unsigned NumFourBytes = Read(bitc::BlockSizeWidth);
 433
 434     // Check that the block wasn't partially defined, and that the offset isn't
 435     // bogus.
 436     size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
 437     if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
 438       return true;
 439
 440     JumpToBit(SkipTo);
 441     return false;
 442   }
 443
 444   /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true
 445   /// if the block has an error.
 446   bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
 447
 448   bool ReadBlockEnd() {
 449     if (BlockScope.empty()) return true;
 450
 451     // Block tail:
 452     //    [END_BLOCK, <align4bytes>]
 453     SkipToFourByteBoundary();
 454
 455     popBlockScope();
 456     return false;
 457   }
 458
 459 private:
 460   void popBlockScope() {
 461     CurCodeSize = BlockScope.back().PrevCodeSize;
 462
 463     CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
 464     BlockScope.pop_back();
 465   }
 466
 467   //===--------------------------------------------------------------------===//
 468   // Record Processing
 469   //===--------------------------------------------------------------------===//
 470
 471 public:
 472   /// Return the abbreviation for the specified AbbrevId.
 473   const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
 474     unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
 475     if (AbbrevNo >= CurAbbrevs.size())
 476       report_fatal_error("Invalid abbrev number");
 477     return CurAbbrevs[AbbrevNo].get();
 478   }
 479
 480   /// Read the current record and discard it, returning the code for the record.
 481   unsigned skipRecord(unsigned AbbrevID);
 482
 483   unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
 484                       StringRef *Blob = nullptr);
 485
 486   //===--------------------------------------------------------------------===//
 487   // Abbrev Processing
 488   //===--------------------------------------------------------------------===//
 489   void ReadAbbrevRecord();
 490
 491   /// Read and return a block info block from the bitstream. If an error was
 492   /// encountered, return None.
 493   ///
 494   /// \param ReadBlockInfoNames Whether to read block/record name information in
 495   /// the BlockInfo block. Only llvm-bcanalyzer uses this.
 496   Optional<BitstreamBlockInfo>
 497   ReadBlockInfoBlock(bool ReadBlockInfoNames = false);
 498
 499   /// Set the block info to be used by this BitstreamCursor to interpret
 500   /// abbreviated records.
 501   void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; }
 502 };
 503
 504 } // end llvm namespace
 505
 506 #endif // LLVM_BITCODE_BITSTREAMREADER_H