1 //===- lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp ---------------------===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file \brief This file provides a way to read an import library member in a
13 /// Archive Files in Windows
14 /// ========================
16 /// In Windows, archive files with .lib file extension serve two different
19 /// - For static linking: An archive file in this use case contains multiple
20 /// regular .obj files and is used for static linking. This is the same
21 /// usage as .a file in Unix.
23 /// - For dynamic linking: An archive file in this use case contains pseudo
24 /// .obj files to describe exported symbols of a DLL. Each pseudo .obj file
25 /// in an archive has a name of an exported symbol and a DLL filename from
26 /// which the symbol can be imported. When you link a DLL on Windows, you
27 /// pass the name of the .lib file for the DLL instead of the DLL filename
28 /// itself. That is the Windows way of linking against a shared library.
30 /// This file contains a function to handle the pseudo object file.
32 /// Windows Loader and Import Address Table
33 /// =======================================
35 /// Windows supports a GOT-like mechanism for DLLs. The executable using DLLs
36 /// contains a list of DLL names and list of symbols that need to be resolved by
37 /// the loader. Windows loader maps the executable and all the DLLs to memory,
38 /// resolves the symbols referencing items in DLLs, and updates the import
39 /// address table (IAT) in memory. The IAT is an array of pointers to all of the
40 /// data or functions in DLL referenced by the executable. You cannot access
41 /// items in DLLs directly. They have to be accessed through an extra level of
44 /// So, if you want to access an item in DLL, you have to go through a
45 /// pointer. How do you actually do that? You need a symbol for a pointer in the
46 /// IAT. For each symbol defined in a DLL, a symbol with "__imp_" prefix is
47 /// exported from the DLL for an IAT entry. For example, if you have a global
48 /// variable "foo" in a DLL, a pointer to the variable is available as
49 /// "_imp__foo". The IAT is an array of _imp__ symbols.
51 /// Is this OK? That's not that complicated. Because items in a DLL are not
52 /// directly accessible, you need to access through a pointer, and the pointer
53 /// is available as a symbol with _imp__ prefix.
55 /// Note 1: Although you can write code with _imp__ prefix, today's compiler and
56 /// linker let you write code as if there's no extra level of indirection.
57 /// That's why you haven't seen lots of _imp__ in your code. A variable or a
58 /// function declared with "dllimport" attribute is treated as an item in a DLL,
59 /// and the compiler automatically mangles its name and inserts the extra level
60 /// of indirection when accessing the item. Here are some examples:
62 /// __declspec(dllimport) int var_in_dll;
63 /// var_in_dll = 3; // is equivalent to *_imp__var_in_dll = 3;
65 /// __declspec(dllimport) int fn_in_dll(void);
66 /// fn_in_dll(); // is equivalent to (*_imp__fn_in_dll)();
68 /// It's just the compiler rewrites code for you so that you don't need to
69 /// handle the indirection yourself.
71 /// Note 2: __declspec(dllimport) is mandatory for data but optional for
72 /// function. For a function, the linker creates a jump table with the original
73 /// symbol name, so that the function is accessible without _imp__ prefix. The
74 /// same function in a DLL can be called through two different symbols if it's
80 /// The above functions do the same thing. fn's content is a JMP instruction to
81 /// branch to the address pointed by _imp__fn. The latter may be a little bit
82 /// slower than the former because it will execute the extra JMP instruction,
83 /// but that's usually negligible.
85 /// If a function is dllimport'ed, which is usually done in a header file,
86 /// mangled name will be used at compile time so the jump table will not be
89 /// Because there's no way to hide the indirection for data access at link time,
90 /// data has to be accessed through dllimport'ed symbols or explicit _imp__
93 /// Idata Sections in the Pseudo Object File
94 /// ========================================
96 /// The object file created by cl.exe has several sections whose name starts
97 /// with ".idata$" followed by a number. The contents of the sections seem the
98 /// fragments of a complete ".idata" section. These sections has relocations for
99 /// the data referenced from the idata secton. Generally, the linker discards
100 /// "$" and all characters that follow from the section name and merges their
101 /// contents to one section. So, it looks like if everything would work fine,
102 /// the idata section would naturally be constructed without having any special
103 /// code for doing that.
105 /// However, the LLD linker cannot do that. An idata section constructed in that
106 /// way was never be in valid format. We don't know the reason yet. Our
107 /// assumption on the idata fragment could simply be wrong, or the LLD linker is
108 /// not powerful enough to do the job. Meanwhile, we construct the idata section
109 /// ourselves. All the "idata$" sections in the pseudo object file are currently
112 /// Creating Atoms for the Import Address Table
113 /// ===========================================
115 /// The function in this file reads a pseudo object file and creates at most two
116 /// atoms. One is a shared library atom for _imp__ symbol. The another is a
117 /// defined atom for the JMP instruction if the symbol is for a function.
119 //===----------------------------------------------------------------------===//
122 #include "lld/Core/Error.h"
123 #include "lld/Core/File.h"
124 #include "lld/Core/SharedLibraryAtom.h"
125 #include "lld/ReaderWriter/PECOFFLinkingContext.h"
126 #include "llvm/ADT/ArrayRef.h"
127 #include "llvm/ADT/STLExtras.h"
128 #include "llvm/Object/COFF.h"
129 #include "llvm/Support/COFF.h"
130 #include "llvm/Support/Casting.h"
131 #include "llvm/Support/Debug.h"
132 #include "llvm/Support/Endian.h"
133 #include "llvm/Support/ErrorHandling.h"
134 #include "llvm/Support/Memory.h"
135 #include "llvm/Support/MemoryBuffer.h"
136 #include "llvm/Support/raw_ostream.h"
139 #include <system_error>
143 using namespace lld::pecoff;
144 using namespace llvm;
145 using namespace llvm::support::endian;
147 #define DEBUG_TYPE "ReaderImportHeader"
153 // This code is valid both in x86 and x64.
154 const uint8_t FuncAtomContentX86[] = {
155 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0
156 0xcc, 0xcc // INT 3; INT 3
159 const uint8_t FuncAtomContentARMNT[] = {
160 0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0
161 0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0
162 0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip]
165 static void setJumpInstTarget(COFFLinkerInternalAtom *src, const Atom *dst,
166 int off, MachineTypes machine) {
167 SimpleReference *ref;
170 default: llvm::report_fatal_error("unsupported machine type");
171 case llvm::COFF::IMAGE_FILE_MACHINE_I386:
172 ref = new SimpleReference(Reference::KindNamespace::COFF,
173 Reference::KindArch::x86,
174 llvm::COFF::IMAGE_REL_I386_DIR32,
177 case llvm::COFF::IMAGE_FILE_MACHINE_AMD64:
178 ref = new SimpleReference(Reference::KindNamespace::COFF,
179 Reference::KindArch::x86_64,
180 llvm::COFF::IMAGE_REL_AMD64_REL32,
183 case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT:
184 ref = new SimpleReference(Reference::KindNamespace::COFF,
185 Reference::KindArch::ARM,
186 llvm::COFF::IMAGE_REL_ARM_MOV32T,
190 src->addReference(std::unique_ptr<SimpleReference>(ref));
193 /// The defined atom for jump table.
194 class FuncAtom : public COFFLinkerInternalAtom {
196 FuncAtom(const File &file, StringRef symbolName,
197 const COFFSharedLibraryAtom *impAtom, MachineTypes machine)
198 : COFFLinkerInternalAtom(file, /*oridnal*/ 0, createContent(machine),
203 default: llvm::report_fatal_error("unsupported machine type");
204 case llvm::COFF::IMAGE_FILE_MACHINE_I386:
205 case llvm::COFF::IMAGE_FILE_MACHINE_AMD64:
208 case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT:
213 setJumpInstTarget(this, impAtom, Offset, machine);
216 uint64_t ordinal() const override { return 0; }
217 Scope scope() const override { return scopeGlobal; }
218 ContentType contentType() const override { return typeCode; }
219 Alignment alignment() const override { return Alignment(1); }
220 ContentPermissions permissions() const override { return permR_X; }
223 std::vector<uint8_t> createContent(MachineTypes machine) const {
228 default: llvm::report_fatal_error("unsupported machine type");
229 case llvm::COFF::IMAGE_FILE_MACHINE_I386:
230 case llvm::COFF::IMAGE_FILE_MACHINE_AMD64:
231 Data = FuncAtomContentX86;
232 Size = sizeof(FuncAtomContentX86);
234 case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT:
235 Data = FuncAtomContentARMNT;
236 Size = sizeof(FuncAtomContentARMNT);
240 return std::vector<uint8_t>(Data, Data + Size);
244 class FileImportLibrary : public File {
246 FileImportLibrary(std::unique_ptr<MemoryBuffer> mb, MachineTypes machine)
247 : File(mb->getBufferIdentifier(), kindSharedLibrary),
248 _mb(std::move(mb)), _machine(machine) {}
250 std::error_code doParse() override {
251 const char *buf = _mb->getBufferStart();
252 const char *end = _mb->getBufferEnd();
254 // The size of the string that follows the header.
256 = read32le(buf + offsetof(COFF::ImportHeader, SizeOfData));
258 // Check if the total size is valid.
259 if (std::size_t(end - buf) != sizeof(COFF::ImportHeader) + dataSize)
260 return make_error_code(NativeReaderError::unknown_file_format);
262 uint16_t hint = read16le(buf + offsetof(COFF::ImportHeader, OrdinalHint));
263 StringRef symbolName(buf + sizeof(COFF::ImportHeader));
264 StringRef dllName(buf + sizeof(COFF::ImportHeader) + symbolName.size() + 1);
266 // TypeInfo is a bitfield. The least significant 2 bits are import
267 // type, followed by 3 bit import name type.
268 uint16_t typeInfo = read16le(buf + offsetof(COFF::ImportHeader, TypeInfo));
269 int type = typeInfo & 0x3;
270 int nameType = (typeInfo >> 2) & 0x7;
272 // Symbol name used by the linker may be different from the symbol name used
273 // by the loader. The latter may lack symbol decorations, or may not even
274 // have name if it's imported by ordinal.
275 StringRef importName = symbolNameToImportName(symbolName, nameType);
277 const COFFSharedLibraryAtom *dataAtom =
278 addSharedLibraryAtom(hint, symbolName, importName, dllName);
279 if (type == llvm::COFF::IMPORT_CODE)
280 addFuncAtom(symbolName, dllName, dataAtom);
282 return std::error_code();
285 const atom_collection<DefinedAtom> &defined() const override {
286 return _definedAtoms;
289 const atom_collection<UndefinedAtom> &undefined() const override {
290 return _noUndefinedAtoms;
293 const atom_collection<SharedLibraryAtom> &sharedLibrary() const override {
294 return _sharedLibraryAtoms;
297 const atom_collection<AbsoluteAtom> &absolute() const override {
298 return _noAbsoluteAtoms;
302 const COFFSharedLibraryAtom *addSharedLibraryAtom(uint16_t hint,
303 StringRef symbolName,
304 StringRef importName,
306 auto *atom = new (_alloc)
307 COFFSharedLibraryAtom(*this, hint, symbolName, importName, dllName);
308 _sharedLibraryAtoms._atoms.push_back(atom);
312 void addFuncAtom(StringRef symbolName, StringRef dllName,
313 const COFFSharedLibraryAtom *impAtom) {
314 auto *atom = new (_alloc) FuncAtom(*this, symbolName, impAtom, _machine);
315 _definedAtoms._atoms.push_back(atom);
318 atom_collection_vector<DefinedAtom> _definedAtoms;
319 atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms;
320 mutable llvm::BumpPtrAllocator _alloc;
322 // Does the same thing as StringRef::ltrim() but removes at most one
324 StringRef ltrim1(StringRef str, const char *chars) const {
325 if (!str.empty() && strchr(chars, str[0]))
326 return str.substr(1);
330 // Convert the given symbol name to the import symbol name exported by the
332 StringRef symbolNameToImportName(StringRef symbolName, int nameType) const {
335 case llvm::COFF::IMPORT_ORDINAL:
336 // The import is by ordinal. No symbol name will be used to identify the
337 // item in the DLL. Only its ordinal will be used.
339 case llvm::COFF::IMPORT_NAME:
340 // The import name in this case is identical to the symbol name.
342 case llvm::COFF::IMPORT_NAME_NOPREFIX:
343 // The import name is the symbol name without leading ?, @ or _.
344 ret = ltrim1(symbolName, "?@_");
346 case llvm::COFF::IMPORT_NAME_UNDECORATE:
347 // Similar to NOPREFIX, but we also need to truncate at the first @.
348 ret = ltrim1(symbolName, "?@_");
349 ret = ret.substr(0, ret.find('@'));
352 std::string *str = new (_alloc) std::string(ret);
356 std::unique_ptr<MemoryBuffer> _mb;
357 MachineTypes _machine;
360 class COFFImportLibraryReader : public Reader {
362 COFFImportLibraryReader(PECOFFLinkingContext &ctx) : _ctx(ctx) {}
364 bool canParse(file_magic magic, StringRef,
365 const MemoryBuffer &mb) const override {
366 if (mb.getBufferSize() < sizeof(COFF::ImportHeader))
368 return (magic == llvm::sys::fs::file_magic::coff_import_library);
372 loadFile(std::unique_ptr<MemoryBuffer> mb, const class Registry &,
373 std::vector<std::unique_ptr<File> > &result) const override {
374 auto *file = new FileImportLibrary(std::move(mb), _ctx.getMachineType());
375 result.push_back(std::unique_ptr<File>(file));
376 return std::error_code();
380 PECOFFLinkingContext &_ctx;
383 } // end anonymous namespace
385 void Registry::addSupportCOFFImportLibraries(PECOFFLinkingContext &ctx) {
386 add(llvm::make_unique<COFFImportLibraryReader>(ctx));
389 } // end namespace lld