contrib/llvm/tools/clang/lib/Lex/PTHLexer.cpp

   1 //===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the PTHLexer interface.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "clang/Lex/PTHLexer.h"
  15 #include "clang/Basic/FileManager.h"
  16 #include "clang/Basic/FileSystemStatCache.h"
  17 #include "clang/Basic/IdentifierTable.h"
  18 #include "clang/Basic/TokenKinds.h"
  19 #include "clang/Lex/LexDiagnostic.h"
  20 #include "clang/Lex/PTHManager.h"
  21 #include "clang/Lex/Preprocessor.h"
  22 #include "clang/Lex/Token.h"
  23 #include "llvm/ADT/StringExtras.h"
  24 #include "llvm/ADT/StringMap.h"
  25 #include "llvm/Support/EndianStream.h"
  26 #include "llvm/Support/MemoryBuffer.h"
  27 #include <memory>
  28 #include <system_error>
  29 using namespace clang;
  30
  31 static const unsigned StoredTokenSize = 1 + 1 + 2 + 4 + 4;
  32
  33 //===----------------------------------------------------------------------===//
  34 // PTHLexer methods.
  35 //===----------------------------------------------------------------------===//
  36
  37 PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
  38                    const unsigned char *ppcond, PTHManager &PM)
  39   : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(nullptr),
  40     PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
  41
  42   FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
  43 }
  44
  45 bool PTHLexer::Lex(Token& Tok) {
  46   //===--------------------------------------==//
  47   // Read the raw token data.
  48   //===--------------------------------------==//
  49   using namespace llvm::support;
  50
  51   // Shadow CurPtr into an automatic variable.
  52   const unsigned char *CurPtrShadow = CurPtr;
  53
  54   // Read in the data for the token.
  55   unsigned Word0 = endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
  56   uint32_t IdentifierID =
  57       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
  58   uint32_t FileOffset =
  59       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
  60
  61   tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
  62   Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
  63   uint32_t Len = Word0 >> 16;
  64
  65   CurPtr = CurPtrShadow;
  66
  67   //===--------------------------------------==//
  68   // Construct the token itself.
  69   //===--------------------------------------==//
  70
  71   Tok.startToken();
  72   Tok.setKind(TKind);
  73   Tok.setFlag(TFlags);
  74   assert(!LexingRawMode);
  75   Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset));
  76   Tok.setLength(Len);
  77
  78   // Handle identifiers.
  79   if (Tok.isLiteral()) {
  80     Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
  81   }
  82   else if (IdentifierID) {
  83     MIOpt.ReadToken();
  84     IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
  85
  86     Tok.setIdentifierInfo(II);
  87
  88     // Change the kind of this identifier to the appropriate token kind, e.g.
  89     // turning "for" into a keyword.
  90     Tok.setKind(II->getTokenID());
  91
  92     if (II->isHandleIdentifierCase())
  93       return PP->HandleIdentifier(Tok);
  94
  95     return true;
  96   }
  97
  98   //===--------------------------------------==//
  99   // Process the token.
 100   //===--------------------------------------==//
 101   if (TKind == tok::eof) {
 102     // Save the end-of-file token.
 103     EofToken = Tok;
 104
 105     assert(!ParsingPreprocessorDirective);
 106     assert(!LexingRawMode);
 107
 108     return LexEndOfFile(Tok);
 109   }
 110
 111   if (TKind == tok::hash && Tok.isAtStartOfLine()) {
 112     LastHashTokPtr = CurPtr - StoredTokenSize;
 113     assert(!LexingRawMode);
 114     PP->HandleDirective(Tok);
 115
 116     return false;
 117   }
 118
 119   if (TKind == tok::eod) {
 120     assert(ParsingPreprocessorDirective);
 121     ParsingPreprocessorDirective = false;
 122     return true;
 123   }
 124
 125   MIOpt.ReadToken();
 126   return true;
 127 }
 128
 129 bool PTHLexer::LexEndOfFile(Token &Result) {
 130   // If we hit the end of the file while parsing a preprocessor directive,
 131   // end the preprocessor directive first.  The next token returned will
 132   // then be the end of file.
 133   if (ParsingPreprocessorDirective) {
 134     ParsingPreprocessorDirective = false; // Done parsing the "line".
 135     return true;  // Have a token.
 136   }
 137
 138   assert(!LexingRawMode);
 139
 140   // If we are in a #if directive, emit an error.
 141   while (!ConditionalStack.empty()) {
 142     if (PP->getCodeCompletionFileLoc() != FileStartLoc)
 143       PP->Diag(ConditionalStack.back().IfLoc,
 144                diag::err_pp_unterminated_conditional);
 145     ConditionalStack.pop_back();
 146   }
 147
 148   // Finally, let the preprocessor handle this.
 149   return PP->HandleEndOfFile(Result);
 150 }
 151
 152 // FIXME: We can just grab the last token instead of storing a copy
 153 // into EofToken.
 154 void PTHLexer::getEOF(Token& Tok) {
 155   assert(EofToken.is(tok::eof));
 156   Tok = EofToken;
 157 }
 158
 159 void PTHLexer::DiscardToEndOfLine() {
 160   assert(ParsingPreprocessorDirective && ParsingFilename == false &&
 161          "Must be in a preprocessing directive!");
 162
 163   // We assume that if the preprocessor wishes to discard to the end of
 164   // the line that it also means to end the current preprocessor directive.
 165   ParsingPreprocessorDirective = false;
 166
 167   // Skip tokens by only peeking at their token kind and the flags.
 168   // We don't need to actually reconstruct full tokens from the token buffer.
 169   // This saves some copies and it also reduces IdentifierInfo* lookup.
 170   const unsigned char* p = CurPtr;
 171   while (1) {
 172     // Read the token kind.  Are we at the end of the file?
 173     tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
 174     if (x == tok::eof) break;
 175
 176     // Read the token flags.  Are we at the start of the next line?
 177     Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
 178     if (y & Token::StartOfLine) break;
 179
 180     // Skip to the next token.
 181     p += StoredTokenSize;
 182   }
 183
 184   CurPtr = p;
 185 }
 186
 187 /// SkipBlock - Used by Preprocessor to skip the current conditional block.
 188 bool PTHLexer::SkipBlock() {
 189   using namespace llvm::support;
 190   assert(CurPPCondPtr && "No cached PP conditional information.");
 191   assert(LastHashTokPtr && "No known '#' token.");
 192
 193   const unsigned char *HashEntryI = nullptr;
 194   uint32_t TableIdx;
 195
 196   do {
 197     // Read the token offset from the side-table.
 198     uint32_t Offset = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
 199
 200     // Read the target table index from the side-table.
 201     TableIdx = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
 202
 203     // Compute the actual memory address of the '#' token data for this entry.
 204     HashEntryI = TokBuf + Offset;
 205
 206     // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
 207     //  contain nested blocks.  In the side-table we can jump over these
 208     //  nested blocks instead of doing a linear search if the next "sibling"
 209     //  entry is not at a location greater than LastHashTokPtr.
 210     if (HashEntryI < LastHashTokPtr && TableIdx) {
 211       // In the side-table we are still at an entry for a '#' token that
 212       // is earlier than the last one we saw.  Check if the location we would
 213       // stride gets us closer.
 214       const unsigned char* NextPPCondPtr =
 215         PPCond + TableIdx*(sizeof(uint32_t)*2);
 216       assert(NextPPCondPtr >= CurPPCondPtr);
 217       // Read where we should jump to.
 218       const unsigned char *HashEntryJ =
 219           TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
 220
 221       if (HashEntryJ <= LastHashTokPtr) {
 222         // Jump directly to the next entry in the side table.
 223         HashEntryI = HashEntryJ;
 224         TableIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
 225         CurPPCondPtr = NextPPCondPtr;
 226       }
 227     }
 228   }
 229   while (HashEntryI < LastHashTokPtr);
 230   assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
 231   assert(TableIdx && "No jumping from #endifs.");
 232
 233   // Update our side-table iterator.
 234   const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
 235   assert(NextPPCondPtr >= CurPPCondPtr);
 236   CurPPCondPtr = NextPPCondPtr;
 237
 238   // Read where we should jump to.
 239   HashEntryI =
 240       TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
 241   uint32_t NextIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
 242
 243   // By construction NextIdx will be zero if this is a #endif.  This is useful
 244   // to know to obviate lexing another token.
 245   bool isEndif = NextIdx == 0;
 246
 247   // This case can occur when we see something like this:
 248   //
 249   //  #if ...
 250   //   /* a comment or nothing */
 251   //  #elif
 252   //
 253   // If we are skipping the first #if block it will be the case that CurPtr
 254   // already points 'elif'.  Just return.
 255
 256   if (CurPtr > HashEntryI) {
 257     assert(CurPtr == HashEntryI + StoredTokenSize);
 258     // Did we reach a #endif?  If so, go ahead and consume that token as well.
 259     if (isEndif)
 260       CurPtr += StoredTokenSize * 2;
 261     else
 262       LastHashTokPtr = HashEntryI;
 263
 264     return isEndif;
 265   }
 266
 267   // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
 268   CurPtr = HashEntryI;
 269
 270   // Update the location of the last observed '#'.  This is useful if we
 271   // are skipping multiple blocks.
 272   LastHashTokPtr = CurPtr;
 273
 274   // Skip the '#' token.
 275   assert(((tok::TokenKind)*CurPtr) == tok::hash);
 276   CurPtr += StoredTokenSize;
 277
 278   // Did we reach a #endif?  If so, go ahead and consume that token as well.
 279   if (isEndif) {
 280     CurPtr += StoredTokenSize * 2;
 281   }
 282
 283   return isEndif;
 284 }
 285
 286 SourceLocation PTHLexer::getSourceLocation() {
 287   // getSourceLocation is not on the hot path.  It is used to get the location
 288   // of the next token when transitioning back to this lexer when done
 289   // handling a #included file.  Just read the necessary data from the token
 290   // data buffer to construct the SourceLocation object.
 291   // NOTE: This is a virtual function; hence it is defined out-of-line.
 292   using namespace llvm::support;
 293
 294   const unsigned char *OffsetPtr = CurPtr + (StoredTokenSize - 4);
 295   uint32_t Offset = endian::readNext<uint32_t, little, aligned>(OffsetPtr);
 296   return FileStartLoc.getLocWithOffset(Offset);
 297 }
 298
 299 //===----------------------------------------------------------------------===//
 300 // PTH file lookup: map from strings to file data.
 301 //===----------------------------------------------------------------------===//
 302
 303 /// PTHFileLookup - This internal data structure is used by the PTHManager
 304 ///  to map from FileEntry objects managed by FileManager to offsets within
 305 ///  the PTH file.
 306 namespace {
 307 class PTHFileData {
 308   const uint32_t TokenOff;
 309   const uint32_t PPCondOff;
 310 public:
 311   PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
 312     : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
 313
 314   uint32_t getTokenOffset() const { return TokenOff; }
 315   uint32_t getPPCondOffset() const { return PPCondOff; }
 316 };
 317
 318
 319 class PTHFileLookupCommonTrait {
 320 public:
 321   typedef std::pair<unsigned char, const char*> internal_key_type;
 322   typedef unsigned hash_value_type;
 323   typedef unsigned offset_type;
 324
 325   static hash_value_type ComputeHash(internal_key_type x) {
 326     return llvm::HashString(x.second);
 327   }
 328
 329   static std::pair<unsigned, unsigned>
 330   ReadKeyDataLength(const unsigned char*& d) {
 331     using namespace llvm::support;
 332     unsigned keyLen =
 333         (unsigned)endian::readNext<uint16_t, little, unaligned>(d);
 334     unsigned dataLen = (unsigned) *(d++);
 335     return std::make_pair(keyLen, dataLen);
 336   }
 337
 338   static internal_key_type ReadKey(const unsigned char* d, unsigned) {
 339     unsigned char k = *(d++); // Read the entry kind.
 340     return std::make_pair(k, (const char*) d);
 341   }
 342 };
 343
 344 } // end anonymous namespace
 345
 346 class PTHManager::PTHFileLookupTrait : public PTHFileLookupCommonTrait {
 347 public:
 348   typedef const FileEntry* external_key_type;
 349   typedef PTHFileData      data_type;
 350
 351   static internal_key_type GetInternalKey(const FileEntry* FE) {
 352     return std::make_pair((unsigned char) 0x1, FE->getName());
 353   }
 354
 355   static bool EqualKey(internal_key_type a, internal_key_type b) {
 356     return a.first == b.first && strcmp(a.second, b.second) == 0;
 357   }
 358
 359   static PTHFileData ReadData(const internal_key_type& k,
 360                               const unsigned char* d, unsigned) {
 361     assert(k.first == 0x1 && "Only file lookups can match!");
 362     using namespace llvm::support;
 363     uint32_t x = endian::readNext<uint32_t, little, unaligned>(d);
 364     uint32_t y = endian::readNext<uint32_t, little, unaligned>(d);
 365     return PTHFileData(x, y);
 366   }
 367 };
 368
 369 class PTHManager::PTHStringLookupTrait {
 370 public:
 371   typedef uint32_t data_type;
 372   typedef const std::pair<const char*, unsigned> external_key_type;
 373   typedef external_key_type internal_key_type;
 374   typedef uint32_t hash_value_type;
 375   typedef unsigned offset_type;
 376
 377   static bool EqualKey(const internal_key_type& a,
 378                        const internal_key_type& b) {
 379     return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
 380                                   : false;
 381   }
 382
 383   static hash_value_type ComputeHash(const internal_key_type& a) {
 384     return llvm::HashString(StringRef(a.first, a.second));
 385   }
 386
 387   // This hopefully will just get inlined and removed by the optimizer.
 388   static const internal_key_type&
 389   GetInternalKey(const external_key_type& x) { return x; }
 390
 391   static std::pair<unsigned, unsigned>
 392   ReadKeyDataLength(const unsigned char*& d) {
 393     using namespace llvm::support;
 394     return std::make_pair(
 395         (unsigned)endian::readNext<uint16_t, little, unaligned>(d),
 396         sizeof(uint32_t));
 397   }
 398
 399   static std::pair<const char*, unsigned>
 400   ReadKey(const unsigned char* d, unsigned n) {
 401       assert(n >= 2 && d[n-1] == '\0');
 402       return std::make_pair((const char*) d, n-1);
 403     }
 404
 405   static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
 406                            unsigned) {
 407     using namespace llvm::support;
 408     return endian::readNext<uint32_t, little, unaligned>(d);
 409   }
 410 };
 411
 412 //===----------------------------------------------------------------------===//
 413 // PTHManager methods.
 414 //===----------------------------------------------------------------------===//
 415
 416 PTHManager::PTHManager(
 417     std::unique_ptr<const llvm::MemoryBuffer> buf,
 418     std::unique_ptr<PTHFileLookup> fileLookup, const unsigned char *idDataTable,
 419     std::unique_ptr<IdentifierInfo *[], llvm::FreeDeleter> perIDCache,
 420     std::unique_ptr<PTHStringIdLookup> stringIdLookup, unsigned numIds,
 421     const unsigned char *spellingBase, const char *originalSourceFile)
 422     : Buf(std::move(buf)), PerIDCache(std::move(perIDCache)),
 423       FileLookup(std::move(fileLookup)), IdDataTable(idDataTable),
 424       StringIdLookup(std::move(stringIdLookup)), NumIds(numIds), PP(nullptr),
 425       SpellingBase(spellingBase), OriginalSourceFile(originalSourceFile) {}
 426
 427 PTHManager::~PTHManager() {
 428 }
 429
 430 static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {
 431   Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0")) << Msg;
 432 }
 433
 434 PTHManager *PTHManager::Create(StringRef file, DiagnosticsEngine &Diags) {
 435   // Memory map the PTH file.
 436   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
 437       llvm::MemoryBuffer::getFile(file);
 438
 439   if (!FileOrErr) {
 440     // FIXME: Add ec.message() to this diag.
 441     Diags.Report(diag::err_invalid_pth_file) << file;
 442     return nullptr;
 443   }
 444   std::unique_ptr<llvm::MemoryBuffer> File = std::move(FileOrErr.get());
 445
 446   using namespace llvm::support;
 447
 448   // Get the buffer ranges and check if there are at least three 32-bit
 449   // words at the end of the file.
 450   const unsigned char *BufBeg = (const unsigned char*)File->getBufferStart();
 451   const unsigned char *BufEnd = (const unsigned char*)File->getBufferEnd();
 452
 453   // Check the prologue of the file.
 454   if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 4 + 4) ||
 455       memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth")) != 0) {
 456     Diags.Report(diag::err_invalid_pth_file) << file;
 457     return nullptr;
 458   }
 459
 460   // Read the PTH version.
 461   const unsigned char *p = BufBeg + (sizeof("cfe-pth"));
 462   unsigned Version = endian::readNext<uint32_t, little, aligned>(p);
 463
 464   if (Version < PTHManager::Version) {
 465     InvalidPTH(Diags,
 466         Version < PTHManager::Version
 467         ? "PTH file uses an older PTH format that is no longer supported"
 468         : "PTH file uses a newer PTH format that cannot be read");
 469     return nullptr;
 470   }
 471
 472   // Compute the address of the index table at the end of the PTH file.
 473   const unsigned char *PrologueOffset = p;
 474
 475   if (PrologueOffset >= BufEnd) {
 476     Diags.Report(diag::err_invalid_pth_file) << file;
 477     return nullptr;
 478   }
 479
 480   // Construct the file lookup table.  This will be used for mapping from
 481   // FileEntry*'s to cached tokens.
 482   const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
 483   const unsigned char *FileTable =
 484       BufBeg + endian::readNext<uint32_t, little, aligned>(FileTableOffset);
 485
 486   if (!(FileTable > BufBeg && FileTable < BufEnd)) {
 487     Diags.Report(diag::err_invalid_pth_file) << file;
 488     return nullptr; // FIXME: Proper error diagnostic?
 489   }
 490
 491   std::unique_ptr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
 492
 493   // Warn if the PTH file is empty.  We still want to create a PTHManager
 494   // as the PTH could be used with -include-pth.
 495   if (FL->isEmpty())
 496     InvalidPTH(Diags, "PTH file contains no cached source data");
 497
 498   // Get the location of the table mapping from persistent ids to the
 499   // data needed to reconstruct identifiers.
 500   const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
 501   const unsigned char *IData =
 502       BufBeg + endian::readNext<uint32_t, little, aligned>(IDTableOffset);
 503
 504   if (!(IData >= BufBeg && IData < BufEnd)) {
 505     Diags.Report(diag::err_invalid_pth_file) << file;
 506     return nullptr;
 507   }
 508
 509   // Get the location of the hashtable mapping between strings and
 510   // persistent IDs.
 511   const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
 512   const unsigned char *StringIdTable =
 513       BufBeg + endian::readNext<uint32_t, little, aligned>(StringIdTableOffset);
 514   if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
 515     Diags.Report(diag::err_invalid_pth_file) << file;
 516     return nullptr;
 517   }
 518
 519   std::unique_ptr<PTHStringIdLookup> SL(
 520       PTHStringIdLookup::Create(StringIdTable, BufBeg));
 521
 522   // Get the location of the spelling cache.
 523   const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
 524   const unsigned char *spellingBase =
 525       BufBeg + endian::readNext<uint32_t, little, aligned>(spellingBaseOffset);
 526   if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
 527     Diags.Report(diag::err_invalid_pth_file) << file;
 528     return nullptr;
 529   }
 530
 531   // Get the number of IdentifierInfos and pre-allocate the identifier cache.
 532   uint32_t NumIds = endian::readNext<uint32_t, little, aligned>(IData);
 533
 534   // Pre-allocate the persistent ID -> IdentifierInfo* cache.  We use calloc()
 535   // so that we in the best case only zero out memory once when the OS returns
 536   // us new pages.
 537   std::unique_ptr<IdentifierInfo *[], llvm::FreeDeleter> PerIDCache;
 538
 539   if (NumIds) {
 540     PerIDCache.reset((IdentifierInfo **)calloc(NumIds, sizeof(PerIDCache[0])));
 541     if (!PerIDCache) {
 542       InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
 543       return nullptr;
 544     }
 545   }
 546
 547   // Compute the address of the original source file.
 548   const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4;
 549   unsigned len =
 550       endian::readNext<uint16_t, little, unaligned>(originalSourceBase);
 551   if (!len) originalSourceBase = nullptr;
 552
 553   // Create the new PTHManager.
 554   return new PTHManager(std::move(File), std::move(FL), IData,
 555                         std::move(PerIDCache), std::move(SL), NumIds,
 556                         spellingBase, (const char *)originalSourceBase);
 557 }
 558
 559 IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
 560   using namespace llvm::support;
 561   // Look in the PTH file for the string data for the IdentifierInfo object.
 562   const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
 563   const unsigned char *IDData =
 564       (const unsigned char *)Buf->getBufferStart() +
 565       endian::readNext<uint32_t, little, aligned>(TableEntry);
 566   assert(IDData < (const unsigned char*)Buf->getBufferEnd());
 567
 568   // Allocate the object.
 569   std::pair<IdentifierInfo,const unsigned char*> *Mem =
 570     Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
 571
 572   Mem->second = IDData;
 573   assert(IDData[0] != '\0');
 574   IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
 575
 576   // Store the new IdentifierInfo in the cache.
 577   PerIDCache[PersistentID] = II;
 578   assert(II->getNameStart() && II->getNameStart()[0] != '\0');
 579   return II;
 580 }
 581
 582 IdentifierInfo* PTHManager::get(StringRef Name) {
 583   // Double check our assumption that the last character isn't '\0'.
 584   assert(Name.empty() || Name.back() != '\0');
 585   PTHStringIdLookup::iterator I =
 586       StringIdLookup->find(std::make_pair(Name.data(), Name.size()));
 587   if (I == StringIdLookup->end()) // No identifier found?
 588     return nullptr;
 589
 590   // Match found.  Return the identifier!
 591   assert(*I > 0);
 592   return GetIdentifierInfo(*I-1);
 593 }
 594
 595 PTHLexer *PTHManager::CreateLexer(FileID FID) {
 596   const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
 597   if (!FE)
 598     return nullptr;
 599
 600   using namespace llvm::support;
 601
 602   // Lookup the FileEntry object in our file lookup data structure.  It will
 603   // return a variant that indicates whether or not there is an offset within
 604   // the PTH file that contains cached tokens.
 605   PTHFileLookup::iterator I = FileLookup->find(FE);
 606
 607   if (I == FileLookup->end()) // No tokens available?
 608     return nullptr;
 609
 610   const PTHFileData& FileData = *I;
 611
 612   const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
 613   // Compute the offset of the token data within the buffer.
 614   const unsigned char* data = BufStart + FileData.getTokenOffset();
 615
 616   // Get the location of pp-conditional table.
 617   const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
 618   uint32_t Len = endian::readNext<uint32_t, little, aligned>(ppcond);
 619   if (Len == 0) ppcond = nullptr;
 620
 621   assert(PP && "No preprocessor set yet!");
 622   return new PTHLexer(*PP, FID, data, ppcond, *this);
 623 }
 624
 625 //===----------------------------------------------------------------------===//
 626 // 'stat' caching.
 627 //===----------------------------------------------------------------------===//
 628
 629 namespace {
 630 class PTHStatData {
 631 public:
 632   const bool HasData;
 633   uint64_t Size;
 634   time_t ModTime;
 635   llvm::sys::fs::UniqueID UniqueID;
 636   bool IsDirectory;
 637
 638   PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID,
 639               bool IsDirectory)
 640       : HasData(true), Size(Size), ModTime(ModTime), UniqueID(UniqueID),
 641         IsDirectory(IsDirectory) {}
 642
 643   PTHStatData() : HasData(false) {}
 644 };
 645
 646 class PTHStatLookupTrait : public PTHFileLookupCommonTrait {
 647 public:
 648   typedef const char* external_key_type;  // const char*
 649   typedef PTHStatData data_type;
 650
 651   static internal_key_type GetInternalKey(const char *path) {
 652     // The key 'kind' doesn't matter here because it is ignored in EqualKey.
 653     return std::make_pair((unsigned char) 0x0, path);
 654   }
 655
 656   static bool EqualKey(internal_key_type a, internal_key_type b) {
 657     // When doing 'stat' lookups we don't care about the kind of 'a' and 'b',
 658     // just the paths.
 659     return strcmp(a.second, b.second) == 0;
 660   }
 661
 662   static data_type ReadData(const internal_key_type& k, const unsigned char* d,
 663                             unsigned) {
 664
 665     if (k.first /* File or Directory */) {
 666       bool IsDirectory = true;
 667       if (k.first == 0x1 /* File */) {
 668         IsDirectory = false;
 669         d += 4 * 2; // Skip the first 2 words.
 670       }
 671
 672       using namespace llvm::support;
 673
 674       uint64_t File = endian::readNext<uint64_t, little, unaligned>(d);
 675       uint64_t Device = endian::readNext<uint64_t, little, unaligned>(d);
 676       llvm::sys::fs::UniqueID UniqueID(Device, File);
 677       time_t ModTime = endian::readNext<uint64_t, little, unaligned>(d);
 678       uint64_t Size = endian::readNext<uint64_t, little, unaligned>(d);
 679       return data_type(Size, ModTime, UniqueID, IsDirectory);
 680     }
 681
 682     // Negative stat.  Don't read anything.
 683     return data_type();
 684   }
 685 };
 686 } // end anonymous namespace
 687
 688 namespace clang {
 689 class PTHStatCache : public FileSystemStatCache {
 690   typedef llvm::OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
 691   CacheTy Cache;
 692
 693 public:
 694   PTHStatCache(PTHManager::PTHFileLookup &FL)
 695       : Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(),
 696               FL.getBase()) {}
 697
 698   LookupResult getStat(const char *Path, FileData &Data, bool isFile,
 699                        std::unique_ptr<vfs::File> *F,
 700                        vfs::FileSystem &FS) override {
 701     // Do the lookup for the file's data in the PTH file.
 702     CacheTy::iterator I = Cache.find(Path);
 703
 704     // If we don't get a hit in the PTH file just forward to 'stat'.
 705     if (I == Cache.end())
 706       return statChained(Path, Data, isFile, F, FS);
 707
 708     const PTHStatData &D = *I;
 709
 710     if (!D.HasData)
 711       return CacheMissing;
 712
 713     Data.Name = Path;
 714     Data.Size = D.Size;
 715     Data.ModTime = D.ModTime;
 716     Data.UniqueID = D.UniqueID;
 717     Data.IsDirectory = D.IsDirectory;
 718     Data.IsNamedPipe = false;
 719     Data.InPCH = true;
 720
 721     return CacheExists;
 722   }
 723 };
 724 }
 725
 726 std::unique_ptr<FileSystemStatCache> PTHManager::createStatCache() {
 727   return llvm::make_unique<PTHStatCache>(*FileLookup);
 728 }