1 //===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the IdentifierInfo, IdentifierTable, and Selector
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
18 #include "clang/Basic/OperatorKinds.h"
19 #include "clang/Basic/TokenKinds.h"
20 #include "llvm/ADT/StringMap.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/ADT/OwningPtr.h"
23 #include "llvm/Support/PointerLikeTypeTraits.h"
28 template <typename T> struct DenseMapInfo;
34 class IdentifierTable;
36 class MultiKeywordSelector; // private class used by Selector
37 class DeclarationName; // AST class that stores declaration names
39 /// IdentifierLocPair - A simple pair of identifier info and location.
40 typedef std::pair<IdentifierInfo*, SourceLocation> IdentifierLocPair;
43 /// IdentifierInfo - One of these records is kept for each identifier that
44 /// is lexed. This contains information about whether the token was #define'd,
45 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
46 /// variable or function name). The preprocessor keeps this information in a
47 /// set, and all tok::identifier tokens have a pointer to one of these.
48 class IdentifierInfo {
49 // Note: DON'T make TokenID a 'tok::TokenKind'; MSVC will treat it as a
50 // signed char and TokenKinds > 127 won't be handled correctly.
51 unsigned TokenID : 8; // Front-end token ID or tok::identifier.
52 // Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
53 // First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values
55 unsigned ObjCOrBuiltinID :10;
56 bool HasMacro : 1; // True if there is a #define for this.
57 bool IsExtension : 1; // True if identifier is a lang extension.
58 bool IsPoisoned : 1; // True if identifier is poisoned.
59 bool IsCPPOperatorKeyword : 1; // True if ident is a C++ operator keyword.
60 bool NeedsHandleIdentifier : 1; // See "RecomputeNeedsHandleIdentifier".
61 // 9 bits left in 32-bit word.
62 void *FETokenInfo; // Managed by the language front-end.
63 llvm::StringMapEntry<IdentifierInfo*> *Entry;
65 IdentifierInfo(const IdentifierInfo&); // NONCOPYABLE.
66 void operator=(const IdentifierInfo&); // NONASSIGNABLE.
68 friend class IdentifierTable;
74 /// isStr - Return true if this is the identifier for the specified string.
75 /// This is intended to be used for string literals only: II->isStr("foo").
76 template <std::size_t StrLen>
77 bool isStr(const char (&Str)[StrLen]) const {
78 return getLength() == StrLen-1 && !memcmp(getNameStart(), Str, StrLen-1);
81 /// getNameStart - Return the beginning of the actual string for this
82 /// identifier. The returned string is properly null terminated.
84 const char *getNameStart() const {
85 if (Entry) return Entry->getKeyData();
86 // FIXME: This is gross. It would be best not to embed specific details
87 // of the PTH file format here.
88 // The 'this' pointer really points to a
89 // std::pair<IdentifierInfo, const char*>, where internal pointer
90 // points to the external string data.
91 return ((std::pair<IdentifierInfo, const char*>*) this)->second;
94 /// getLength - Efficiently return the length of this identifier info.
96 unsigned getLength() const {
97 if (Entry) return Entry->getKeyLength();
98 // FIXME: This is gross. It would be best not to embed specific details
99 // of the PTH file format here.
100 // The 'this' pointer really points to a
101 // std::pair<IdentifierInfo, const char*>, where internal pointer
102 // points to the external string data.
103 const char* p = ((std::pair<IdentifierInfo, const char*>*) this)->second-2;
104 return (((unsigned) p[0]) | (((unsigned) p[1]) << 8)) - 1;
107 /// getName - Return the actual identifier string.
108 llvm::StringRef getName() const {
109 return llvm::StringRef(getNameStart(), getLength());
112 /// hasMacroDefinition - Return true if this identifier is #defined to some
114 bool hasMacroDefinition() const {
117 void setHasMacroDefinition(bool Val) {
118 if (HasMacro == Val) return;
122 NeedsHandleIdentifier = 1;
124 RecomputeNeedsHandleIdentifier();
127 /// get/setTokenID - If this is a source-language token (e.g. 'for'), this API
128 /// can be used to cause the lexer to map identifiers to source-language
130 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
131 void setTokenID(tok::TokenKind ID) { TokenID = ID; }
133 /// getPPKeywordID - Return the preprocessor keyword ID for this identifier.
134 /// For example, "define" will return tok::pp_define.
135 tok::PPKeywordKind getPPKeywordID() const;
137 /// getObjCKeywordID - Return the Objective-C keyword ID for the this
138 /// identifier. For example, 'class' will return tok::objc_class if ObjC is
140 tok::ObjCKeywordKind getObjCKeywordID() const {
141 if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
142 return tok::ObjCKeywordKind(ObjCOrBuiltinID);
144 return tok::objc_not_keyword;
146 void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
148 /// getBuiltinID - Return a value indicating whether this is a builtin
149 /// function. 0 is not-built-in. 1 is builtin-for-some-nonprimary-target.
150 /// 2+ are specific builtin functions.
151 unsigned getBuiltinID() const {
152 if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
153 return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
157 void setBuiltinID(unsigned ID) {
158 ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
159 assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
160 && "ID too large for field!");
163 unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
164 void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
166 /// get/setExtension - Initialize information about whether or not this
167 /// language token is an extension. This controls extension warnings, and is
168 /// only valid if a custom token ID is set.
169 bool isExtensionToken() const { return IsExtension; }
170 void setIsExtensionToken(bool Val) {
173 NeedsHandleIdentifier = 1;
175 RecomputeNeedsHandleIdentifier();
178 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the
179 /// Preprocessor will emit an error every time this token is used.
180 void setIsPoisoned(bool Value = true) {
183 NeedsHandleIdentifier = 1;
185 RecomputeNeedsHandleIdentifier();
188 /// isPoisoned - Return true if this token has been poisoned.
189 bool isPoisoned() const { return IsPoisoned; }
191 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
192 /// this identifier is a C++ alternate representation of an operator.
193 void setIsCPlusPlusOperatorKeyword(bool Val = true) {
194 IsCPPOperatorKeyword = Val;
196 NeedsHandleIdentifier = 1;
198 RecomputeNeedsHandleIdentifier();
200 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
202 /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to
203 /// associate arbitrary metadata with this token.
205 T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); }
206 void setFETokenInfo(void *T) { FETokenInfo = T; }
208 /// isHandleIdentifierCase - Return true if the Preprocessor::HandleIdentifier
209 /// must be called on a token of this identifier. If this returns false, we
210 /// know that HandleIdentifier will not affect the token.
211 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
214 /// RecomputeNeedsHandleIdentifier - The Preprocessor::HandleIdentifier does
215 /// several special (but rare) things to identifiers of various sorts. For
216 /// example, it changes the "for" keyword token from tok::identifier to
219 /// This method is very tied to the definition of HandleIdentifier. Any
220 /// change to it should be reflected here.
221 void RecomputeNeedsHandleIdentifier() {
222 NeedsHandleIdentifier =
223 (isPoisoned() | hasMacroDefinition() | isCPlusPlusOperatorKeyword() |
228 /// IdentifierInfoLookup - An abstract class used by IdentifierTable that
229 /// provides an interface for performing lookups from strings
230 /// (const char *) to IdentiferInfo objects.
231 class IdentifierInfoLookup {
233 virtual ~IdentifierInfoLookup();
235 /// get - Return the identifier token info for the specified named identifier.
236 /// Unlike the version in IdentifierTable, this returns a pointer instead
237 /// of a reference. If the pointer is NULL then the IdentifierInfo cannot
240 // FIXME: Move to StringRef API.
241 virtual IdentifierInfo* get(const char *NameStart, const char *NameEnd) = 0;
244 /// \brief An abstract class used to resolve numerical identifier
245 /// references (meaningful only to some external source) into
246 /// IdentifierInfo pointers.
247 class ExternalIdentifierLookup {
249 virtual ~ExternalIdentifierLookup();
251 /// \brief Return the identifier associated with the given ID number.
253 /// The ID 0 is associated with the NULL identifier.
254 virtual IdentifierInfo *GetIdentifier(unsigned ID) = 0;
257 /// IdentifierTable - This table implements an efficient mapping from strings to
258 /// IdentifierInfo nodes. It has no other purpose, but this is an
259 /// extremely performance-critical piece of the code, as each occurrance of
260 /// every identifier goes through here when lexed.
261 class IdentifierTable {
262 // Shark shows that using MallocAllocator is *much* slower than using this
264 typedef llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator> HashTableTy;
265 HashTableTy HashTable;
267 IdentifierInfoLookup* ExternalLookup;
270 /// IdentifierTable ctor - Create the identifier table, populating it with
271 /// info about the language keywords for the language specified by LangOpts.
272 IdentifierTable(const LangOptions &LangOpts,
273 IdentifierInfoLookup* externalLookup = 0);
275 /// \brief Set the external identifier lookup mechanism.
276 void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
277 ExternalLookup = IILookup;
280 llvm::BumpPtrAllocator& getAllocator() {
281 return HashTable.getAllocator();
284 /// get - Return the identifier token info for the specified named identifier.
286 IdentifierInfo &get(const char *NameStart, const char *NameEnd) {
287 llvm::StringMapEntry<IdentifierInfo*> &Entry =
288 HashTable.GetOrCreateValue(NameStart, NameEnd);
290 IdentifierInfo *II = Entry.getValue();
293 // No entry; if we have an external lookup, look there first.
294 if (ExternalLookup) {
295 II = ExternalLookup->get(NameStart, NameEnd);
297 // Cache in the StringMap for subsequent lookups.
303 // Lookups failed, make a new IdentifierInfo.
304 void *Mem = getAllocator().Allocate<IdentifierInfo>();
305 II = new (Mem) IdentifierInfo();
308 // Make sure getName() knows how to find the IdentifierInfo
315 /// \brief Creates a new IdentifierInfo from the given string.
317 /// This is a lower-level version of get() that requires that this
318 /// identifier not be known previously and that does not consult an
319 /// external source for identifiers. In particular, external
320 /// identifier sources can use this routine to build IdentifierInfo
321 /// nodes and then introduce additional information about those
323 IdentifierInfo &CreateIdentifierInfo(const char *NameStart,
324 const char *NameEnd) {
325 llvm::StringMapEntry<IdentifierInfo*> &Entry =
326 HashTable.GetOrCreateValue(NameStart, NameEnd);
328 IdentifierInfo *II = Entry.getValue();
329 assert(!II && "IdentifierInfo already exists");
331 // Lookups failed, make a new IdentifierInfo.
332 void *Mem = getAllocator().Allocate<IdentifierInfo>();
333 II = new (Mem) IdentifierInfo();
336 // Make sure getName() knows how to find the IdentifierInfo
342 IdentifierInfo &CreateIdentifierInfo(llvm::StringRef Name) {
343 return CreateIdentifierInfo(Name.begin(), Name.end());
346 IdentifierInfo &get(llvm::StringRef Name) {
347 return get(Name.begin(), Name.end());
350 typedef HashTableTy::const_iterator iterator;
351 typedef HashTableTy::const_iterator const_iterator;
353 iterator begin() const { return HashTable.begin(); }
354 iterator end() const { return HashTable.end(); }
355 unsigned size() const { return HashTable.size(); }
357 /// PrintStats - Print some statistics to stderr that indicate how well the
358 /// hashing is doing.
359 void PrintStats() const;
361 void AddKeywords(const LangOptions &LangOpts);
364 /// Selector - This smart pointer class efficiently represents Objective-C
365 /// method names. This class will either point to an IdentifierInfo or a
366 /// MultiKeywordSelector (which is private). This enables us to optimize
367 /// selectors that take no arguments and selectors that take 1 argument, which
368 /// accounts for 78% of all selectors in Cocoa.h.
370 friend class DiagnosticInfo;
372 enum IdentifierInfoFlag {
373 // MultiKeywordSelector = 0.
376 ArgFlags = ZeroArg|OneArg
378 uintptr_t InfoPtr; // a pointer to the MultiKeywordSelector or IdentifierInfo.
380 Selector(IdentifierInfo *II, unsigned nArgs) {
381 InfoPtr = reinterpret_cast<uintptr_t>(II);
382 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
383 assert(nArgs < 2 && "nArgs not equal to 0/1");
386 Selector(MultiKeywordSelector *SI) {
387 InfoPtr = reinterpret_cast<uintptr_t>(SI);
388 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
391 IdentifierInfo *getAsIdentifierInfo() const {
392 if (getIdentifierInfoFlag())
393 return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
396 unsigned getIdentifierInfoFlag() const {
397 return InfoPtr & ArgFlags;
401 friend class SelectorTable; // only the SelectorTable can create these
402 friend class DeclarationName; // and the AST's DeclarationName.
404 /// The default ctor should only be used when creating data structures that
405 /// will contain selectors.
406 Selector() : InfoPtr(0) {}
407 Selector(uintptr_t V) : InfoPtr(V) {}
409 /// operator==/!= - Indicate whether the specified selectors are identical.
410 bool operator==(Selector RHS) const {
411 return InfoPtr == RHS.InfoPtr;
413 bool operator!=(Selector RHS) const {
414 return InfoPtr != RHS.InfoPtr;
416 void *getAsOpaquePtr() const {
417 return reinterpret_cast<void*>(InfoPtr);
420 /// \brief Determine whether this is the empty selector.
421 bool isNull() const { return InfoPtr == 0; }
423 // Predicates to identify the selector type.
424 bool isKeywordSelector() const {
425 return getIdentifierInfoFlag() != ZeroArg;
427 bool isUnarySelector() const {
428 return getIdentifierInfoFlag() == ZeroArg;
430 unsigned getNumArgs() const;
431 IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
433 /// getAsString - Derive the full selector name (e.g. "foo:bar:") and return
434 /// it as an std::string.
435 std::string getAsString() const;
437 static Selector getEmptyMarker() {
438 return Selector(uintptr_t(-1));
440 static Selector getTombstoneMarker() {
441 return Selector(uintptr_t(-2));
445 /// SelectorTable - This table allows us to fully hide how we implement
446 /// multi-keyword caching.
447 class SelectorTable {
448 void *Impl; // Actually a SelectorTableImpl
449 SelectorTable(const SelectorTable&); // DISABLED: DO NOT IMPLEMENT
450 void operator=(const SelectorTable&); // DISABLED: DO NOT IMPLEMENT
455 /// getSelector - This can create any sort of selector. NumArgs indicates
456 /// whether this is a no argument selector "foo", a single argument selector
457 /// "foo:" or multi-argument "foo:bar:".
458 Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
460 Selector getUnarySelector(IdentifierInfo *ID) {
461 return Selector(ID, 1);
463 Selector getNullarySelector(IdentifierInfo *ID) {
464 return Selector(ID, 0);
467 /// constructSetterName - Return the setter name for the given
468 /// identifier, i.e. "set" + Name where the initial character of Name
469 /// has been capitalized.
470 static Selector constructSetterName(IdentifierTable &Idents,
471 SelectorTable &SelTable,
472 const IdentifierInfo *Name) {
473 llvm::SmallString<100> SelectorName;
474 SelectorName = "set";
475 SelectorName += Name->getName();
476 SelectorName[3] = toupper(SelectorName[3]);
477 IdentifierInfo *SetterName =
478 &Idents.get(SelectorName.data(),
479 SelectorName.data() + SelectorName.size());
480 return SelTable.getUnarySelector(SetterName);
484 /// DeclarationNameExtra - Common base of the MultiKeywordSelector,
485 /// CXXSpecialName, and CXXOperatorIdName classes, all of which are
486 /// private classes that describe different kinds of names.
487 class DeclarationNameExtra {
489 /// ExtraKind - The kind of "extra" information stored in the
490 /// DeclarationName. See @c ExtraKindOrNumArgs for an explanation of
491 /// how these enumerator values are used.
495 CXXConversionFunction,
496 #define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
498 #include "clang/Basic/OperatorKinds.def"
504 /// ExtraKindOrNumArgs - Either the kind of C++ special name or
505 /// operator-id (if the value is one of the CXX* enumerators of
506 /// ExtraKind), in which case the DeclarationNameExtra is also a
507 /// CXXSpecialName, (for CXXConstructor, CXXDestructor, or
508 /// CXXConversionFunction) CXXOperatorIdName, or CXXLiteralOperatorName,
509 /// it may be also name common to C++ using-directives (CXXUsingDirective),
510 /// otherwise it is NUM_EXTRA_KINDS+NumArgs, where NumArgs is the number of
511 /// arguments in the Objective-C selector, in which case the
512 /// DeclarationNameExtra is also a MultiKeywordSelector.
513 unsigned ExtraKindOrNumArgs;
516 } // end namespace clang
519 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
522 struct DenseMapInfo<clang::Selector> {
523 static inline clang::Selector getEmptyKey() {
524 return clang::Selector::getEmptyMarker();
526 static inline clang::Selector getTombstoneKey() {
527 return clang::Selector::getTombstoneMarker();
530 static unsigned getHashValue(clang::Selector S);
532 static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
538 struct isPodLike<clang::Selector> { static const bool value = true; };
541 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
542 // are not guaranteed to be 8-byte aligned.
544 class PointerLikeTypeTraits<clang::IdentifierInfo*> {
546 static inline void *getAsVoidPointer(clang::IdentifierInfo* P) {
549 static inline clang::IdentifierInfo *getFromVoidPointer(void *P) {
550 return static_cast<clang::IdentifierInfo*>(P);
552 enum { NumLowBitsAvailable = 1 };
556 class PointerLikeTypeTraits<const clang::IdentifierInfo*> {
558 static inline const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
561 static inline const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
562 return static_cast<const clang::IdentifierInfo*>(P);
564 enum { NumLowBitsAvailable = 1 };
567 } // end namespace llvm