1 #define JEMALLOC_PAGES_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
4 #include "jemalloc/internal/pages.h"
6 #include "jemalloc/internal/jemalloc_internal_includes.h"
8 #include "jemalloc/internal/assert.h"
9 #include "jemalloc/internal/malloc_io.h"
11 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
12 #include <sys/sysctl.h>
14 #include <vm/vm_param.h>
18 /******************************************************************************/
21 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
22 static size_t os_page;
25 # define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
26 # define PAGES_PROT_DECOMMIT (PROT_NONE)
27 static int mmap_flags;
29 static bool os_overcommits;
31 const char *thp_mode_names[] = {
37 thp_mode_t opt_thp = THP_MODE_DEFAULT;
38 thp_mode_t init_system_thp_mode;
40 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
41 static bool pages_can_purge_lazy_runtime = true;
43 /******************************************************************************/
45 * Function prototypes for static functions that are referenced prior to
49 static void os_pages_unmap(void *addr, size_t size);
51 /******************************************************************************/
54 os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
55 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
56 assert(ALIGNMENT_CEILING(size, os_page) == size);
66 * If VirtualAlloc can't allocate at the given address when one is
67 * given, it fails and returns NULL.
69 ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
73 * We don't use MAP_FIXED here, because it can cause the *replacement*
74 * of existing mappings, and we only want to create new mappings.
77 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
79 ret = mmap(addr, size, prot, mmap_flags, -1, 0);
83 if (ret == MAP_FAILED) {
85 } else if (addr != NULL && ret != addr) {
87 * We succeeded in mapping memory, but not in the right place.
89 os_pages_unmap(ret, size);
93 assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
99 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
101 void *ret = (void *)((uintptr_t)addr + leadsize);
103 assert(alloc_size >= leadsize + size);
105 os_pages_unmap(addr, alloc_size);
106 void *new_addr = os_pages_map(ret, size, PAGE, commit);
107 if (new_addr == ret) {
110 if (new_addr != NULL) {
111 os_pages_unmap(new_addr, size);
115 size_t trailsize = alloc_size - leadsize - size;
118 os_pages_unmap(addr, leadsize);
120 if (trailsize != 0) {
121 os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
128 os_pages_unmap(void *addr, size_t size) {
129 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
130 assert(ALIGNMENT_CEILING(size, os_page) == size);
133 if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
135 if (munmap(addr, size) == -1)
138 char buf[BUFERROR_BUF];
140 buferror(get_errno(), buf, sizeof(buf));
141 malloc_printf("<jemalloc>: Error in "
155 pages_map_slow(size_t size, size_t alignment, bool *commit) {
156 size_t alloc_size = size + alignment - os_page;
157 /* Beware size_t wrap-around. */
158 if (alloc_size < size) {
164 void *pages = os_pages_map(NULL, alloc_size, alignment, commit);
168 size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment)
170 ret = os_pages_trim(pages, alloc_size, leadsize, size, commit);
171 } while (ret == NULL);
174 assert(PAGE_ADDR2BASE(ret) == ret);
179 pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
180 assert(alignment >= PAGE);
181 assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
183 #if defined(__FreeBSD__) && defined(MAP_EXCL)
185 * FreeBSD has mechanisms both to mmap at specific address without
186 * touching existing mappings, and to mmap with specific alignment.
189 if (os_overcommits) {
193 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
194 int flags = mmap_flags;
197 flags |= MAP_FIXED | MAP_EXCL;
199 unsigned alignment_bits = ffs_zu(alignment);
200 assert(alignment_bits > 1);
201 flags |= MAP_ALIGNED(alignment_bits - 1);
204 void *ret = mmap(addr, size, prot, flags, -1, 0);
205 if (ret == MAP_FAILED) {
213 * Ideally, there would be a way to specify alignment to mmap() (like
214 * NetBSD has), but in the absence of such a feature, we have to work
215 * hard to efficiently create aligned mappings. The reliable, but
216 * slow method is to create a mapping that is over-sized, then trim the
217 * excess. However, that always results in one or two calls to
218 * os_pages_unmap(), and it can leave holes in the process's virtual
219 * memory map if memory grows downward.
221 * Optimistically try mapping precisely the right amount before falling
222 * back to the slow method, with the expectation that the optimistic
223 * approach works most of the time.
226 void *ret = os_pages_map(addr, size, os_page, commit);
227 if (ret == NULL || ret == addr) {
230 assert(addr == NULL);
231 if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) {
232 os_pages_unmap(ret, size);
233 return pages_map_slow(size, alignment, commit);
236 assert(PAGE_ADDR2BASE(ret) == ret);
241 pages_unmap(void *addr, size_t size) {
242 assert(PAGE_ADDR2BASE(addr) == addr);
243 assert(PAGE_CEILING(size) == size);
245 os_pages_unmap(addr, size);
249 pages_commit_impl(void *addr, size_t size, bool commit) {
250 assert(PAGE_ADDR2BASE(addr) == addr);
251 assert(PAGE_CEILING(size) == size);
253 if (os_overcommits) {
258 return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
259 PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
262 int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
263 void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
265 if (result == MAP_FAILED) {
268 if (result != addr) {
270 * We succeeded in mapping memory, but not in the right
273 os_pages_unmap(result, size);
282 pages_commit(void *addr, size_t size) {
283 return pages_commit_impl(addr, size, true);
287 pages_decommit(void *addr, size_t size) {
288 return pages_commit_impl(addr, size, false);
292 pages_purge_lazy(void *addr, size_t size) {
293 assert(PAGE_ADDR2BASE(addr) == addr);
294 assert(PAGE_CEILING(size) == size);
296 if (!pages_can_purge_lazy) {
299 if (!pages_can_purge_lazy_runtime) {
301 * Built with lazy purge enabled, but detected it was not
302 * supported on the current system.
308 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
310 #elif defined(JEMALLOC_PURGE_MADVISE_FREE)
311 return (madvise(addr, size,
318 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
319 !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
320 return (madvise(addr, size, MADV_DONTNEED) != 0);
327 pages_purge_forced(void *addr, size_t size) {
328 assert(PAGE_ADDR2BASE(addr) == addr);
329 assert(PAGE_CEILING(size) == size);
331 if (!pages_can_purge_forced) {
335 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
336 defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
337 return (madvise(addr, size, MADV_DONTNEED) != 0);
338 #elif defined(JEMALLOC_MAPS_COALESCE)
339 /* Try to overlay a new demand-zeroed mapping. */
340 return pages_commit(addr, size);
347 pages_huge_impl(void *addr, size_t size, bool aligned) {
349 assert(HUGEPAGE_ADDR2BASE(addr) == addr);
350 assert(HUGEPAGE_CEILING(size) == size);
352 #ifdef JEMALLOC_HAVE_MADVISE_HUGE
353 return (madvise(addr, size, MADV_HUGEPAGE) != 0);
360 pages_huge(void *addr, size_t size) {
361 return pages_huge_impl(addr, size, true);
365 pages_huge_unaligned(void *addr, size_t size) {
366 return pages_huge_impl(addr, size, false);
370 pages_nohuge_impl(void *addr, size_t size, bool aligned) {
372 assert(HUGEPAGE_ADDR2BASE(addr) == addr);
373 assert(HUGEPAGE_CEILING(size) == size);
376 #ifdef JEMALLOC_HAVE_MADVISE_HUGE
377 return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
384 pages_nohuge(void *addr, size_t size) {
385 return pages_nohuge_impl(addr, size, true);
389 pages_nohuge_unaligned(void *addr, size_t size) {
390 return pages_nohuge_impl(addr, size, false);
394 pages_dontdump(void *addr, size_t size) {
395 assert(PAGE_ADDR2BASE(addr) == addr);
396 assert(PAGE_CEILING(size) == size);
397 #ifdef JEMALLOC_MADVISE_DONTDUMP
398 return madvise(addr, size, MADV_DONTDUMP) != 0;
405 pages_dodump(void *addr, size_t size) {
406 assert(PAGE_ADDR2BASE(addr) == addr);
407 assert(PAGE_CEILING(size) == size);
408 #ifdef JEMALLOC_MADVISE_DONTDUMP
409 return madvise(addr, size, MADV_DODUMP) != 0;
417 os_page_detect(void) {
421 return si.dwPageSize;
422 #elif defined(__FreeBSD__)
423 return getpagesize();
425 long result = sysconf(_SC_PAGESIZE);
429 return (size_t)result;
433 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
435 os_overcommits_sysctl(void) {
439 sz = sizeof(vm_overcommit);
440 #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
444 mib[1] = VM_OVERCOMMIT;
445 if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
446 return false; /* Error. */
449 if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
450 return false; /* Error. */
454 return ((vm_overcommit & 0x3) == 0);
458 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
460 * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
461 * reentry during bootstrapping if another library has interposed system call
465 os_overcommits_proc(void) {
469 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
470 #if defined(O_CLOEXEC)
471 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
474 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
476 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
479 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
480 #if defined(O_CLOEXEC)
481 fd = (int)syscall(SYS_openat,
482 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
484 fd = (int)syscall(SYS_openat,
485 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
487 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
491 #if defined(O_CLOEXEC)
492 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
494 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
496 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
502 return false; /* Error. */
505 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
506 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
507 syscall(SYS_close, fd);
513 return false; /* Error. */
516 * /proc/sys/vm/overcommit_memory meanings:
517 * 0: Heuristic overcommit.
518 * 1: Always overcommit.
519 * 2: Never overcommit.
521 return (buf[0] == '0' || buf[0] == '1');
526 pages_set_thp_state (void *ptr, size_t size) {
527 if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
530 assert(opt_thp != thp_mode_not_supported &&
531 init_system_thp_mode != thp_mode_not_supported);
533 if (opt_thp == thp_mode_always
534 && init_system_thp_mode != thp_mode_never) {
535 assert(init_system_thp_mode == thp_mode_default);
536 pages_huge_unaligned(ptr, size);
537 } else if (opt_thp == thp_mode_never) {
538 assert(init_system_thp_mode == thp_mode_default ||
539 init_system_thp_mode == thp_mode_always);
540 pages_nohuge_unaligned(ptr, size);
545 init_thp_state(void) {
546 if (!have_madvise_huge) {
547 if (metadata_thp_enabled() && opt_abort) {
548 malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
554 static const char sys_state_madvise[] = "always [madvise] never\n";
555 static const char sys_state_always[] = "[always] madvise never\n";
556 static const char sys_state_never[] = "always madvise [never]\n";
557 char buf[sizeof(sys_state_madvise)];
559 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
560 int fd = (int)syscall(SYS_open,
561 "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
563 int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
569 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
570 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
571 syscall(SYS_close, fd);
576 if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
577 init_system_thp_mode = thp_mode_default;
578 } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
579 init_system_thp_mode = thp_mode_always;
580 } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
581 init_system_thp_mode = thp_mode_never;
587 opt_thp = init_system_thp_mode = thp_mode_not_supported;
592 os_page = os_page_detect();
593 if (os_page > PAGE) {
594 malloc_write("<jemalloc>: Unsupported system page size\n");
602 mmap_flags = MAP_PRIVATE | MAP_ANON;
605 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
606 os_overcommits = os_overcommits_sysctl();
607 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
608 os_overcommits = os_overcommits_proc();
609 # ifdef MAP_NORESERVE
610 if (os_overcommits) {
611 mmap_flags |= MAP_NORESERVE;
615 os_overcommits = false;
622 * FreeBSD doesn't need the check; madvise(2) is known to work.
625 /* Detect lazy purge runtime support. */
626 if (pages_can_purge_lazy) {
627 bool committed = false;
628 void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
629 if (madv_free_page == NULL) {
632 assert(pages_can_purge_lazy_runtime);
633 if (pages_purge_lazy(madv_free_page, PAGE)) {
634 pages_can_purge_lazy_runtime = false;
636 os_pages_unmap(madv_free_page, PAGE);