2 * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
5 //===----------------------------------------------------------------------===//
7 // The LLVM Compiler Infrastructure
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
12 //===----------------------------------------------------------------------===//
16 #include "kmp_wrapper_malloc.h"
18 // Disable bget when it is not used
21 /* Thread private buffer management code */
23 typedef int (*bget_compact_t)(size_t, int);
24 typedef void *(*bget_acquire_t)(size_t);
25 typedef void (*bget_release_t)(void *);
27 /* NOTE: bufsize must be a signed datatype */
30 #if KMP_ARCH_X86 || KMP_ARCH_ARM
31 typedef kmp_int32 bufsize;
33 typedef kmp_int64 bufsize;
36 typedef ssize_t bufsize;
39 /* The three modes of operation are, fifo search, lifo search, and best-fit */
41 typedef enum bget_mode {
47 static void bpool(kmp_info_t *th, void *buffer, bufsize len);
48 static void *bget(kmp_info_t *th, bufsize size);
49 static void *bgetz(kmp_info_t *th, bufsize size);
50 static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
51 static void brel(kmp_info_t *th, void *buf);
52 static void bectl(kmp_info_t *th, bget_compact_t compact,
53 bget_acquire_t acquire, bget_release_t release,
56 /* BGET CONFIGURATION */
57 /* Buffer allocation size quantum: all buffers allocated are a
58 multiple of this size. This MUST be a power of two. */
60 /* On IA-32 architecture with Linux* OS, malloc() does not
61 ensure 16 byte alignmnent */
63 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
66 #define AlignType double
71 #define AlignType _Quad
75 // Define this symbol to enable the bstats() function which calculates the
76 // total free space in the buffer pool, the largest available buffer, and the
77 // total space currently allocated.
82 // Define this symbol to enable the bpoold() function which dumps the buffers
86 // Define this symbol to enable the bpoolv() function for validating a buffer
90 // Define this symbol to enable the bufdump() function which allows dumping the
91 // contents of an allocated or free buffer.
96 // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
97 // who attempt to use pointers into released buffers.
100 // Use a best fit algorithm when searching for space for an allocation request.
101 // This uses memory more efficiently, but allocation will be much slower.
104 #endif /* NOT_USED_NOW */
105 #endif /* KMP_DEBUG */
107 static bufsize bget_bin_size[] = {
109 // 1 << 6, /* .5 Cache line */
110 1 << 7, /* 1 Cache line, new */
111 1 << 8, /* 2 Cache lines */
112 1 << 9, /* 4 Cache lines, new */
113 1 << 10, /* 8 Cache lines */
114 1 << 11, /* 16 Cache lines, new */
115 1 << 12, 1 << 13, /* new */
116 1 << 14, 1 << 15, /* new */
117 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */
125 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
129 // Declare the interface, including the requested buffer size type, bufsize.
132 typedef struct qlinks {
133 struct bfhead *flink; /* Forward link */
134 struct bfhead *blink; /* Backward link */
137 /* Header in allocated and free buffers */
138 typedef struct bhead2 {
139 kmp_info_t *bthr; /* The thread which owns the buffer pool */
140 bufsize prevfree; /* Relative link back to previous free buffer in memory or
141 0 if previous buffer is allocated. */
142 bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
145 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
146 typedef union bhead {
149 char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
152 #define BH(p) ((bhead_t *)(p))
154 /* Header in directly allocated buffers (by acqfcn) */
155 typedef struct bdhead {
156 bufsize tsize; /* Total size, including overhead */
157 bhead_t bh; /* Common header */
159 #define BDH(p) ((bdhead_t *)(p))
161 /* Header in free buffers */
162 typedef struct bfhead {
163 bhead_t bh; /* Common allocated/free header */
164 qlinks_t ql; /* Links on free list */
166 #define BFH(p) ((bfhead_t *)(p))
168 typedef struct thr_data {
169 bfhead_t freelist[MAX_BGET_BINS];
171 size_t totalloc; /* Total space currently allocated */
172 long numget, numrel; /* Number of bget() and brel() calls */
173 long numpblk; /* Number of pool blocks */
174 long numpget, numprel; /* Number of block gets and rels */
175 long numdget, numdrel; /* Number of direct gets and rels */
176 #endif /* BufStats */
178 /* Automatic expansion block management functions */
179 bget_compact_t compfcn;
180 bget_acquire_t acqfcn;
181 bget_release_t relfcn;
183 bget_mode_t mode; /* what allocation mode to use? */
185 bufsize exp_incr; /* Expansion block size */
186 bufsize pool_len; /* 0: no bpool calls have been made
187 -1: not all pool blocks are the same size
188 >0: (common) block size for all bpool calls made so far
190 bfhead_t *last_pool; /* Last pool owned by this thread (delay dealocation) */
193 /* Minimum allocation quantum: */
194 #define QLSize (sizeof(qlinks_t))
195 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
198 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
199 // Maximun for the requested size.
201 /* End sentinel: value placed in bsize field of dummy block delimiting
202 end of pool block. The most negative number which will fit in a
203 bufsize, defined in a way that the compiler will accept. */
206 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
208 /* Thread Data management routines */
209 static int bget_get_bin(bufsize size) {
211 int lo = 0, hi = MAX_BGET_BINS - 1;
213 KMP_DEBUG_ASSERT(size > 0);
215 while ((hi - lo) > 1) {
216 int mid = (lo + hi) >> 1;
217 if (size < bget_bin_size[mid])
223 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
228 static void set_thr_data(kmp_info_t *th) {
232 data = (thr_data_t *)((!th->th.th_local.bget_data)
233 ? __kmp_allocate(sizeof(*data))
234 : th->th.th_local.bget_data);
236 memset(data, '\0', sizeof(*data));
238 for (i = 0; i < MAX_BGET_BINS; ++i) {
239 data->freelist[i].ql.flink = &data->freelist[i];
240 data->freelist[i].ql.blink = &data->freelist[i];
243 th->th.th_local.bget_data = data;
244 th->th.th_local.bget_list = 0;
245 #if !USE_CMP_XCHG_FOR_BGET
246 #ifdef USE_QUEUING_LOCK_FOR_BGET
247 __kmp_init_lock(&th->th.th_local.bget_lock);
249 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
250 #endif /* USE_LOCK_FOR_BGET */
251 #endif /* ! USE_CMP_XCHG_FOR_BGET */
254 static thr_data_t *get_thr_data(kmp_info_t *th) {
257 data = (thr_data_t *)th->th.th_local.bget_data;
259 KMP_DEBUG_ASSERT(data != 0);
264 /* Walk the free list and release the enqueued buffers */
265 static void __kmp_bget_dequeue(kmp_info_t *th) {
266 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
269 #if USE_CMP_XCHG_FOR_BGET
271 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
272 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
273 CCAST(void *, old_value), nullptr)) {
275 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
277 p = CCAST(void *, old_value);
279 #else /* ! USE_CMP_XCHG_FOR_BGET */
280 #ifdef USE_QUEUING_LOCK_FOR_BGET
281 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
283 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
284 #endif /* USE_QUEUING_LOCK_FOR_BGET */
286 p = (void *)th->th.th_local.bget_list;
287 th->th.th_local.bget_list = 0;
289 #ifdef USE_QUEUING_LOCK_FOR_BGET
290 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
292 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
294 #endif /* USE_CMP_XCHG_FOR_BGET */
296 /* Check again to make sure the list is not empty */
299 bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
301 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
302 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
303 (kmp_uintptr_t)th); // clear possible mark
304 KMP_DEBUG_ASSERT(b->ql.blink == 0);
306 p = (void *)b->ql.flink;
313 /* Chain together the free buffers by using the thread owner field */
314 static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
315 #ifdef USE_QUEUING_LOCK_FOR_BGET
320 bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
322 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
323 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
324 (kmp_uintptr_t)th); // clear possible mark
328 KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
329 __kmp_gtid_from_thread(th)));
331 #if USE_CMP_XCHG_FOR_BGET
333 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
334 /* the next pointer must be set before setting bget_list to buf to avoid
335 exposing a broken list to other threads, even for an instant. */
336 b->ql.flink = BFH(CCAST(void *, old_value));
338 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
339 CCAST(void *, old_value), buf)) {
341 old_value = TCR_PTR(th->th.th_local.bget_list);
342 /* the next pointer must be set before setting bget_list to buf to avoid
343 exposing a broken list to other threads, even for an instant. */
344 b->ql.flink = BFH(CCAST(void *, old_value));
347 #else /* ! USE_CMP_XCHG_FOR_BGET */
348 #ifdef USE_QUEUING_LOCK_FOR_BGET
349 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
351 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
354 b->ql.flink = BFH(th->th.th_local.bget_list);
355 th->th.th_local.bget_list = (void *)buf;
357 #ifdef USE_QUEUING_LOCK_FOR_BGET
358 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
360 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
362 #endif /* USE_CMP_XCHG_FOR_BGET */
365 /* insert buffer back onto a new freelist */
366 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
369 KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
370 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
372 bin = bget_get_bin(b->bh.bb.bsize);
374 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
375 &thr->freelist[bin]);
376 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
377 &thr->freelist[bin]);
379 b->ql.flink = &thr->freelist[bin];
380 b->ql.blink = thr->freelist[bin].ql.blink;
382 thr->freelist[bin].ql.blink = b;
383 b->ql.blink->ql.flink = b;
386 /* unlink the buffer from the old freelist */
387 static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
388 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
389 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
391 b->ql.blink->ql.flink = b->ql.flink;
392 b->ql.flink->ql.blink = b->ql.blink;
395 /* GET STATS -- check info on free list */
396 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
397 thr_data_t *thr = get_thr_data(th);
400 *total_free = *max_free = 0;
402 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
405 best = &thr->freelist[bin];
408 while (b != &thr->freelist[bin]) {
409 *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
410 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
413 /* Link to next buffer */
417 if (*max_free < best->bh.bb.bsize)
418 *max_free = best->bh.bb.bsize;
421 if (*max_free > (bufsize)sizeof(bhead_t))
422 *max_free -= sizeof(bhead_t);
425 /* BGET -- Allocate a buffer. */
426 static void *bget(kmp_info_t *th, bufsize requested_size) {
427 thr_data_t *thr = get_thr_data(th);
428 bufsize size = requested_size;
436 if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
440 __kmp_bget_dequeue(th); /* Release any queued buffers */
442 if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
445 #if defined(SizeQuant) && (SizeQuant > 1)
446 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
449 size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
450 KMP_DEBUG_ASSERT(size >= 0);
451 KMP_DEBUG_ASSERT(size % SizeQuant == 0);
453 use_blink = (thr->mode == bget_mode_lifo);
455 /* If a compact function was provided in the call to bectl(), wrap
456 a loop around the allocation process to allow compaction to
457 intervene in case we don't find a suitable buffer in the chain. */
462 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
463 /* Link to next buffer */
464 b = (use_blink ? thr->freelist[bin].ql.blink
465 : thr->freelist[bin].ql.flink);
467 if (thr->mode == bget_mode_best) {
468 best = &thr->freelist[bin];
470 /* Scan the free list searching for the first buffer big enough
471 to hold the requested size buffer. */
472 while (b != &thr->freelist[bin]) {
473 if (b->bh.bb.bsize >= (bufsize)size) {
474 if ((best == &thr->freelist[bin]) ||
475 (b->bh.bb.bsize < best->bh.bb.bsize)) {
480 /* Link to next buffer */
481 b = (use_blink ? b->ql.blink : b->ql.flink);
486 while (b != &thr->freelist[bin]) {
487 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
489 // Buffer is big enough to satisfy the request. Allocate it to the
490 // caller. We must decide whether the buffer is large enough to split
491 // into the part given to the caller and a free buffer that remains
492 // on the free list, or whether the entire buffer should be removed
493 // from the free list and given to the caller in its entirety. We
494 // only split the buffer if enough room remains for a header plus the
495 // minimum quantum of allocation.
496 if ((b->bh.bb.bsize - (bufsize)size) >
497 (bufsize)(SizeQ + (sizeof(bhead_t)))) {
500 ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
501 bn = BH(((char *)ba) + size);
503 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
505 /* Subtract size from length of free block. */
506 b->bh.bb.bsize -= (bufsize)size;
508 /* Link allocated buffer to the previous free buffer. */
509 ba->bb.prevfree = b->bh.bb.bsize;
511 /* Plug negative size into user buffer. */
512 ba->bb.bsize = -size;
514 /* Mark this buffer as owned by this thread. */
516 th); // not an allocated address (do not mark it)
517 /* Mark buffer after this one not preceded by free block. */
520 // unlink buffer from old freelist, and reinsert into new freelist
521 __kmp_bget_remove_from_freelist(b);
522 __kmp_bget_insert_into_freelist(thr, b);
524 thr->totalloc += (size_t)size;
525 thr->numget++; /* Increment number of bget() calls */
527 buf = (void *)((((char *)ba) + sizeof(bhead_t)));
528 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
533 ba = BH(((char *)b) + b->bh.bb.bsize);
535 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
537 /* The buffer isn't big enough to split. Give the whole
538 shebang to the caller and remove it from the free list. */
540 __kmp_bget_remove_from_freelist(b);
542 thr->totalloc += (size_t)b->bh.bb.bsize;
543 thr->numget++; /* Increment number of bget() calls */
545 /* Negate size to mark buffer allocated. */
546 b->bh.bb.bsize = -(b->bh.bb.bsize);
548 /* Mark this buffer as owned by this thread. */
549 TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
550 /* Zero the back pointer in the next buffer in memory
551 to indicate that this buffer is allocated. */
554 /* Give user buffer starting at queue links. */
555 buf = (void *)&(b->ql);
556 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
561 /* Link to next buffer */
562 b = (use_blink ? b->ql.blink : b->ql.flink);
566 /* We failed to find a buffer. If there's a compact function defined,
567 notify it of the size requested. If it returns TRUE, try the allocation
570 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
575 /* No buffer available with requested size free. */
577 /* Don't give up yet -- look in the reserve supply. */
578 if (thr->acqfcn != 0) {
579 if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
580 /* Request is too large to fit in a single expansion block.
581 Try to satisy it by a direct buffer acquisition. */
584 size += sizeof(bdhead_t) - sizeof(bhead_t);
586 KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
589 bdh = BDH((*thr->acqfcn)((bufsize)size));
592 // Mark the buffer special by setting size field of its header to zero.
593 bdh->bh.bb.bsize = 0;
595 /* Mark this buffer as owned by this thread. */
596 TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
597 // because direct buffer never goes to free list
598 bdh->bh.bb.prevfree = 0;
601 thr->totalloc += (size_t)size;
602 thr->numget++; /* Increment number of bget() calls */
603 thr->numdget++; /* Direct bget() call count */
605 buf = (void *)(bdh + 1);
606 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
612 /* Try to obtain a new expansion block */
615 KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
618 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
619 KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
620 if (newpool != NULL) {
621 bpool(th, newpool, thr->exp_incr);
623 th, requested_size); /* This can't, I say, can't get into a loop. */
629 /* Still no buffer available */
634 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
635 the entire contents of the buffer to zero, not just the
636 region requested by the caller. */
638 static void *bgetz(kmp_info_t *th, bufsize size) {
639 char *buf = (char *)bget(th, size);
645 b = BH(buf - sizeof(bhead_t));
646 rsize = -(b->bb.bsize);
650 bd = BDH(buf - sizeof(bdhead_t));
651 rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
653 rsize -= sizeof(bhead_t);
656 KMP_DEBUG_ASSERT(rsize >= size);
658 (void)memset(buf, 0, (bufsize)rsize);
660 return ((void *)buf);
663 /* BGETR -- Reallocate a buffer. This is a minimal implementation,
664 simply in terms of brel() and bget(). It could be
665 enhanced to allow the buffer to grow into adjacent free
666 blocks and to avoid moving data unnecessarily. */
668 static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
670 bufsize osize; /* Old size of buffer */
673 nbuf = bget(th, size);
674 if (nbuf == NULL) { /* Acquire new buffer */
680 b = BH(((char *)buf) - sizeof(bhead_t));
681 osize = -b->bb.bsize;
683 /* Buffer acquired directly through acqfcn. */
686 bd = BDH(((char *)buf) - sizeof(bdhead_t));
687 osize = bd->tsize - (bufsize)sizeof(bdhead_t);
689 osize -= sizeof(bhead_t);
692 KMP_DEBUG_ASSERT(osize > 0);
694 (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
695 (size_t)((size < osize) ? size : osize));
701 /* BREL -- Release a buffer. */
702 static void brel(kmp_info_t *th, void *buf) {
703 thr_data_t *thr = get_thr_data(th);
707 KMP_DEBUG_ASSERT(buf != NULL);
708 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
710 b = BFH(((char *)buf) - sizeof(bhead_t));
712 if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
715 bdh = BDH(((char *)buf) - sizeof(bdhead_t));
716 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
718 thr->totalloc -= (size_t)bdh->tsize;
719 thr->numdrel++; /* Number of direct releases */
720 thr->numrel++; /* Increment number of brel() calls */
721 #endif /* BufStats */
723 (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
724 #endif /* FreeWipe */
726 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
728 KMP_DEBUG_ASSERT(thr->relfcn != 0);
729 (*thr->relfcn)((void *)bdh); /* Release it directly. */
733 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
734 ~1); // clear possible mark before comparison
736 /* Add this buffer to be released by the owning thread later */
737 __kmp_bget_enqueue(bth, buf
738 #ifdef USE_QUEUING_LOCK_FOR_BGET
740 __kmp_gtid_from_thread(th)
746 /* Buffer size must be negative, indicating that the buffer is allocated. */
747 if (b->bh.bb.bsize >= 0) {
750 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
752 /* Back pointer in next buffer must be zero, indicating the same thing: */
754 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
757 thr->numrel++; /* Increment number of brel() calls */
758 thr->totalloc += (size_t)b->bh.bb.bsize;
761 /* If the back link is nonzero, the previous buffer is free. */
763 if (b->bh.bb.prevfree != 0) {
764 /* The previous buffer is free. Consolidate this buffer with it by adding
765 the length of this buffer to the previous free buffer. Note that we
766 subtract the size in the buffer being released, since it's negative to
767 indicate that the buffer is allocated. */
768 bufsize size = b->bh.bb.bsize;
770 /* Make the previous buffer the one we're working on. */
771 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
773 b = BFH(((char *)b) - b->bh.bb.prevfree);
774 b->bh.bb.bsize -= size;
776 /* unlink the buffer from the old freelist */
777 __kmp_bget_remove_from_freelist(b);
779 /* The previous buffer isn't allocated. Mark this buffer size as positive
780 (i.e. free) and fall through to place the buffer on the free list as an
781 isolated free block. */
782 b->bh.bb.bsize = -b->bh.bb.bsize;
785 /* insert buffer back onto a new freelist */
786 __kmp_bget_insert_into_freelist(thr, b);
788 /* Now we look at the next buffer in memory, located by advancing from
789 the start of this buffer by its size, to see if that buffer is
790 free. If it is, we combine this buffer with the next one in
791 memory, dechaining the second buffer from the free list. */
792 bn = BFH(((char *)b) + b->bh.bb.bsize);
793 if (bn->bh.bb.bsize > 0) {
795 /* The buffer is free. Remove it from the free list and add
796 its size to that of our buffer. */
797 KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
800 __kmp_bget_remove_from_freelist(bn);
802 b->bh.bb.bsize += bn->bh.bb.bsize;
804 /* unlink the buffer from the old freelist, and reinsert it into the new
806 __kmp_bget_remove_from_freelist(b);
807 __kmp_bget_insert_into_freelist(thr, b);
809 /* Finally, advance to the buffer that follows the newly
810 consolidated free block. We must set its backpointer to the
811 head of the consolidated free block. We know the next block
812 must be an allocated block because the process of recombination
813 guarantees that two free blocks will never be contiguous in
815 bn = BFH(((char *)b) + b->bh.bb.bsize);
818 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
819 (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
821 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
823 /* The next buffer is allocated. Set the backpointer in it to point
824 to this buffer; the previous free buffer in memory. */
826 bn->bh.bb.prevfree = b->bh.bb.bsize;
828 /* If a block-release function is defined, and this free buffer
829 constitutes the entire block, release it. Note that pool_len
830 is defined in such a way that the test will fail unless all
831 pool blocks are the same size. */
832 if (thr->relfcn != 0 &&
833 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
836 1) { /* Do not release the last buffer until finalization time */
839 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
840 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
841 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
844 /* Unlink the buffer from the free list */
845 __kmp_bget_remove_from_freelist(b);
847 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
851 thr->numprel++; /* Nr of expansion block releases */
852 thr->numpblk--; /* Total number of blocks */
853 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
855 // avoid leaving stale last_pool pointer around if it is being dealloced
856 if (thr->last_pool == b)
861 #endif /* BufStats */
865 /* BECTL -- Establish automatic pool expansion control */
866 static void bectl(kmp_info_t *th, bget_compact_t compact,
867 bget_acquire_t acquire, bget_release_t release,
869 thr_data_t *thr = get_thr_data(th);
871 thr->compfcn = compact;
872 thr->acqfcn = acquire;
873 thr->relfcn = release;
874 thr->exp_incr = pool_incr;
877 /* BPOOL -- Add a region of memory to the buffer pool. */
878 static void bpool(kmp_info_t *th, void *buf, bufsize len) {
880 thr_data_t *thr = get_thr_data(th);
881 bfhead_t *b = BFH(buf);
884 __kmp_bget_dequeue(th); /* Release any queued buffers */
887 len &= ~(SizeQuant - 1);
889 if (thr->pool_len == 0) {
891 } else if (len != thr->pool_len) {
895 thr->numpget++; /* Number of block acquisitions */
896 thr->numpblk++; /* Number of blocks total */
897 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
898 #endif /* BufStats */
900 /* Since the block is initially occupied by a single free buffer,
901 it had better not be (much) larger than the largest buffer
902 whose size we can store in bhead.bb.bsize. */
903 KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
905 /* Clear the backpointer at the start of the block to indicate that
906 there is no free block prior to this one. That blocks
907 recombination when the first block in memory is released. */
908 b->bh.bb.prevfree = 0;
910 /* Create a dummy allocated buffer at the end of the pool. This dummy
911 buffer is seen when a buffer at the end of the pool is released and
912 blocks recombination of the last buffer with the dummy buffer at
913 the end. The length in the dummy buffer is set to the largest
914 negative number to denote the end of the pool for diagnostic
915 routines (this specific value is not counted on by the actual
916 allocation and release functions). */
917 len -= sizeof(bhead_t);
918 b->bh.bb.bsize = (bufsize)len;
919 /* Set the owner of this buffer */
920 TCW_PTR(b->bh.bb.bthr,
921 (kmp_info_t *)((kmp_uintptr_t)th |
922 1)); // mark the buffer as allocated address
924 /* Chain the new block to the free list. */
925 __kmp_bget_insert_into_freelist(thr, b);
928 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
929 (size_t)(len - sizeof(bfhead_t)));
931 bn = BH(((char *)b) + len);
932 bn->bb.prevfree = (bufsize)len;
933 /* Definition of ESent assumes two's complement! */
934 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
936 bn->bb.bsize = ESent;
939 /* BFREED -- Dump the free lists for this thread. */
940 static void bfreed(kmp_info_t *th) {
941 int bin = 0, count = 0;
942 int gtid = __kmp_gtid_from_thread(th);
943 thr_data_t *thr = get_thr_data(th);
946 __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
947 " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
948 " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
949 " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
950 " drel=%" KMP_INT64_SPEC "\n",
951 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
952 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
953 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
954 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
957 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
960 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
962 bufsize bs = b->bh.bb.bsize;
964 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
965 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
966 KMP_DEBUG_ASSERT(bs > 0);
970 __kmp_printf_no_lock(
971 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
975 char *lerr = ((char *)b) + sizeof(bfhead_t);
976 if ((bs > sizeof(bfhead_t)) &&
978 (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
980 __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above "
981 "free block have been overstored.)\n",
990 __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
993 void __kmp_initialize_bget(kmp_info_t *th) {
994 KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
998 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
999 (bufsize)__kmp_malloc_pool_incr);
1002 void __kmp_finalize_bget(kmp_info_t *th) {
1006 KMP_DEBUG_ASSERT(th != 0);
1009 thr = (thr_data_t *)th->th.th_local.bget_data;
1010 KMP_DEBUG_ASSERT(thr != NULL);
1013 /* If a block-release function is defined, and this free buffer constitutes
1014 the entire block, release it. Note that pool_len is defined in such a way
1015 that the test will fail unless all pool blocks are the same size. */
1017 // Deallocate the last pool if one exists because we no longer do it in brel()
1018 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1019 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
1020 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1021 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1022 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
1025 /* Unlink the buffer from the free list */
1026 __kmp_bget_remove_from_freelist(b);
1028 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
1031 thr->numprel++; /* Nr of expansion block releases */
1032 thr->numpblk--; /* Total number of blocks */
1033 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1035 #endif /* BufStats */
1037 /* Deallocate bget_data */
1038 if (th->th.th_local.bget_data != NULL) {
1039 __kmp_free(th->th.th_local.bget_data);
1040 th->th.th_local.bget_data = NULL;
1044 void kmpc_set_poolsize(size_t size) {
1045 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1046 (bget_release_t)free, (bufsize)size);
1049 size_t kmpc_get_poolsize(void) {
1052 p = get_thr_data(__kmp_get_thread());
1057 void kmpc_set_poolmode(int mode) {
1060 if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1061 mode == bget_mode_best) {
1062 p = get_thr_data(__kmp_get_thread());
1063 p->mode = (bget_mode_t)mode;
1067 int kmpc_get_poolmode(void) {
1070 p = get_thr_data(__kmp_get_thread());
1075 void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
1076 kmp_info_t *th = __kmp_get_thread();
1079 __kmp_bget_dequeue(th); /* Release any queued buffers */
1087 void kmpc_poolprint(void) {
1088 kmp_info_t *th = __kmp_get_thread();
1090 __kmp_bget_dequeue(th); /* Release any queued buffers */
1095 #endif // #if KMP_USE_BGET
1097 void *kmpc_malloc(size_t size) {
1099 ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1101 // save allocated pointer just before one returned to user
1102 *(void **)ptr = ptr;
1103 ptr = (void **)ptr + 1;
1108 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1110 void *kmpc_aligned_malloc(size_t size, size_t alignment) {
1112 void *ptr_allocated;
1113 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
1114 if (!IS_POWER_OF_TWO(alignment)) {
1115 // AC: do we need to issue a warning here?
1119 size = size + sizeof(void *) + alignment;
1120 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1121 if (ptr_allocated != NULL) {
1122 // save allocated pointer just before one returned to user
1123 ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
1125 *((void **)ptr - 1) = ptr_allocated;
1132 void *kmpc_calloc(size_t nelem, size_t elsize) {
1134 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
1136 // save allocated pointer just before one returned to user
1137 *(void **)ptr = ptr;
1138 ptr = (void **)ptr + 1;
1143 void *kmpc_realloc(void *ptr, size_t size) {
1144 void *result = NULL;
1146 // If pointer is NULL, realloc behaves like malloc.
1147 result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1148 // save allocated pointer just before one returned to user
1149 if (result != NULL) {
1150 *(void **)result = result;
1151 result = (void **)result + 1;
1153 } else if (size == 0) {
1154 // If size is 0, realloc behaves like free.
1155 // The thread must be registered by the call to kmpc_malloc() or
1156 // kmpc_calloc() before.
1157 // So it should be safe to call __kmp_get_thread(), not
1158 // __kmp_entry_thread().
1159 KMP_ASSERT(*((void **)ptr - 1));
1160 brel(__kmp_get_thread(), *((void **)ptr - 1));
1162 result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
1163 (bufsize)(size + sizeof(ptr)));
1164 if (result != NULL) {
1165 *(void **)result = result;
1166 result = (void **)result + 1;
1172 // NOTE: the library must have already been initialized by a previous allocate
1173 void kmpc_free(void *ptr) {
1174 if (!__kmp_init_serial) {
1178 kmp_info_t *th = __kmp_get_thread();
1179 __kmp_bget_dequeue(th); /* Release any queued buffers */
1180 // extract allocated pointer and free it
1181 KMP_ASSERT(*((void **)ptr - 1));
1182 brel(th, *((void **)ptr - 1));
1186 void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
1188 KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1189 (int)size KMP_SRC_LOC_PARM));
1190 ptr = bget(th, (bufsize)size);
1191 KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
1195 void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
1196 size_t elsize KMP_SRC_LOC_DECL) {
1198 KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1199 (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
1200 ptr = bgetz(th, (bufsize)(nelem * elsize));
1201 KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
1205 void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
1206 size_t size KMP_SRC_LOC_DECL) {
1207 KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1208 ptr, (int)size KMP_SRC_LOC_PARM));
1209 ptr = bgetr(th, ptr, (bufsize)size);
1210 KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
1214 void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
1215 KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1216 ptr KMP_SRC_LOC_PARM));
1218 __kmp_bget_dequeue(th); /* Release any queued buffers */
1221 KE_TRACE(30, ("<- __kmp_thread_free()\n"));
1225 /* OMP 5.0 Memory Management support */
1226 static int (*p_hbw_check)(void);
1227 static void *(*p_hbw_malloc)(size_t);
1228 static void (*p_hbw_free)(void *);
1229 static int (*p_hbw_set_policy)(int);
1230 static const char *kmp_mk_lib_name;
1231 static void *h_memkind;
1233 void __kmp_init_memkind() {
1234 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1235 kmp_mk_lib_name = "libmemkind.so";
1236 h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1238 p_hbw_check = (int (*)())dlsym(h_memkind, "hbw_check_available");
1239 p_hbw_malloc = (void *(*)(size_t))dlsym(h_memkind, "hbw_malloc");
1240 p_hbw_free = (void (*)(void *))dlsym(h_memkind, "hbw_free");
1241 p_hbw_set_policy = (int (*)(int))dlsym(h_memkind, "hbw_set_policy");
1242 if (p_hbw_check && p_hbw_malloc && p_hbw_free && p_hbw_set_policy) {
1243 __kmp_memkind_available = 1;
1244 if (p_hbw_check() == 0) {
1245 p_hbw_set_policy(1); // return NULL is not enough memory
1246 __kmp_hbw_mem_available = 1; // found HBW memory available
1248 return; // success - all symbols resolved
1250 dlclose(h_memkind); // failure
1254 p_hbw_malloc = NULL;
1256 p_hbw_set_policy = NULL;
1258 kmp_mk_lib_name = "";
1261 p_hbw_malloc = NULL;
1263 p_hbw_set_policy = NULL;
1267 void __kmp_fini_memkind() {
1268 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1274 p_hbw_malloc = NULL;
1276 p_hbw_set_policy = NULL;
1280 void __kmpc_set_default_allocator(int gtid, const omp_allocator_t *allocator) {
1281 if (allocator == OMP_NULL_ALLOCATOR)
1282 allocator = omp_default_mem_alloc;
1284 allocator == omp_default_mem_alloc ||
1285 allocator == omp_large_cap_mem_alloc ||
1286 allocator == omp_const_mem_alloc || allocator == omp_high_bw_mem_alloc ||
1287 allocator == omp_low_lat_mem_alloc || allocator == omp_cgroup_mem_alloc ||
1288 allocator == omp_pteam_mem_alloc || allocator == omp_thread_mem_alloc);
1289 __kmp_threads[gtid]->th.th_def_allocator = allocator;
1291 const omp_allocator_t *__kmpc_get_default_allocator(int gtid) {
1292 return __kmp_threads[gtid]->th.th_def_allocator;
1295 typedef struct kmp_mem_desc { // Memory block descriptor
1296 void *ptr_alloc; // Pointer returned by allocator
1297 size_t size_a; // Size of allocated memory block (initial+descriptor+align)
1298 void *ptr_align; // Pointer to aligned memory, returned
1299 const omp_allocator_t *allocator; // allocator
1301 static int alignment = sizeof(void *); // let's align to pointer size
1303 void *__kmpc_alloc(int gtid, size_t size, const omp_allocator_t *allocator) {
1304 KMP_DEBUG_ASSERT(__kmp_init_serial);
1305 if (allocator == OMP_NULL_ALLOCATOR)
1306 allocator = __kmp_threads[gtid]->th.th_def_allocator;
1308 int sz_desc = sizeof(kmp_mem_desc_t);
1310 kmp_mem_desc_t desc;
1311 kmp_uintptr_t addr; // address returned by allocator
1312 kmp_uintptr_t addr_align; // address to return to caller
1313 kmp_uintptr_t addr_descr; // address of memory block descriptor
1315 KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
1317 desc.size_a = size + sz_desc + alignment;
1318 if (allocator == omp_default_mem_alloc)
1319 ptr = __kmp_allocate(desc.size_a);
1320 if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) {
1321 KMP_DEBUG_ASSERT(p_hbw_malloc != NULL);
1322 ptr = p_hbw_malloc(desc.size_a);
1325 KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d) hbw %d\n", gtid, ptr,
1326 desc.size_a, __kmp_hbw_mem_available));
1330 addr = (kmp_uintptr_t)ptr;
1331 addr_align = (addr + sz_desc + alignment - 1) & ~(alignment - 1);
1332 addr_descr = addr_align - sz_desc;
1334 desc.ptr_alloc = ptr;
1335 desc.ptr_align = (void *)addr_align;
1336 desc.allocator = allocator;
1337 *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents
1340 KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", desc.ptr_align, gtid));
1341 return desc.ptr_align;
1344 void __kmpc_free(int gtid, void *ptr, const omp_allocator_t *allocator) {
1345 KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1349 kmp_mem_desc_t desc;
1350 kmp_uintptr_t addr_align; // address to return to caller
1351 kmp_uintptr_t addr_descr; // address of memory block descriptor
1353 addr_align = (kmp_uintptr_t)ptr;
1354 addr_descr = addr_align - sizeof(kmp_mem_desc_t);
1355 desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
1357 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
1359 KMP_DEBUG_ASSERT(desc.allocator == allocator);
1361 allocator = desc.allocator;
1363 KMP_DEBUG_ASSERT(allocator);
1365 if (allocator == omp_default_mem_alloc)
1366 __kmp_free(desc.ptr_alloc);
1367 if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) {
1368 KMP_DEBUG_ASSERT(p_hbw_free != NULL);
1369 p_hbw_free(desc.ptr_alloc);
1371 KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc,
1377 /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
1378 memory leaks, but it may be useful for debugging memory corruptions, used
1379 freed pointers, etc. */
1380 /* #define LEAK_MEMORY */
1381 struct kmp_mem_descr { // Memory block descriptor.
1382 void *ptr_allocated; // Pointer returned by malloc(), subject for free().
1383 size_t size_allocated; // Size of allocated memory block.
1384 void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
1385 size_t size_aligned; // Size of aligned memory block.
1387 typedef struct kmp_mem_descr kmp_mem_descr_t;
1389 /* Allocate memory on requested boundary, fill allocated memory with 0x00.
1390 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1391 error. Must use __kmp_free when freeing memory allocated by this routine! */
1392 static void *___kmp_allocate_align(size_t size,
1393 size_t alignment KMP_SRC_LOC_DECL) {
1394 /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
1395 requested to return properly aligned pointer. Original pointer returned
1396 by malloc() and size of allocated block is saved in descriptor just
1397 before the aligned pointer. This information used by __kmp_free() -- it
1398 has to pass to free() original pointer, not aligned one.
1400 +---------+------------+-----------------------------------+---------+
1401 | padding | descriptor | aligned block | padding |
1402 +---------+------------+-----------------------------------+---------+
1405 | +- Aligned pointer returned to caller
1406 +- Pointer returned by malloc()
1408 Aligned block is filled with zeros, paddings are filled with 0xEF. */
1410 kmp_mem_descr_t descr;
1411 kmp_uintptr_t addr_allocated; // Address returned by malloc().
1412 kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1413 kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1415 KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1416 (int)size, (int)alignment KMP_SRC_LOC_PARM));
1418 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
1419 KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
1420 // Make sure kmp_uintptr_t is enough to store addresses.
1422 descr.size_aligned = size;
1423 descr.size_allocated =
1424 descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
1427 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
1429 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
1431 KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated,
1432 descr.ptr_allocated));
1433 if (descr.ptr_allocated == NULL) {
1434 KMP_FATAL(OutOfHeapMemory);
1437 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1439 (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
1440 addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
1442 descr.ptr_aligned = (void *)addr_aligned;
1444 KE_TRACE(26, (" ___kmp_allocate_align: "
1445 "ptr_allocated=%p, size_allocated=%d, "
1446 "ptr_aligned=%p, size_aligned=%d\n",
1447 descr.ptr_allocated, (int)descr.size_allocated,
1448 descr.ptr_aligned, (int)descr.size_aligned));
1450 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
1451 KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
1452 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1453 addr_allocated + descr.size_allocated);
1454 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
1456 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1457 // Fill allocated memory block with 0xEF.
1459 memset(descr.ptr_aligned, 0x00, descr.size_aligned);
1460 // Fill the aligned memory block (which is intended for using by caller) with
1462 // put this filling under KMP_DEBUG condition! Many callers expect zeroed
1464 // bytes remain filled with 0xEF in debugging library.)
1465 *((kmp_mem_descr_t *)addr_descr) = descr;
1469 KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
1470 return descr.ptr_aligned;
1471 } // func ___kmp_allocate_align
1473 /* Allocate memory on cache line boundary, fill allocated memory with 0x00.
1474 Do not call this func directly! Use __kmp_allocate macro instead.
1475 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1476 error. Must use __kmp_free when freeing memory allocated by this routine! */
1477 void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
1479 KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
1480 (int)size KMP_SRC_LOC_PARM));
1481 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
1482 KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
1484 } // func ___kmp_allocate
1486 /* Allocate memory on page boundary, fill allocated memory with 0x00.
1487 Does not call this func directly! Use __kmp_page_allocate macro instead.
1488 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1489 error. Must use __kmp_free when freeing memory allocated by this routine! */
1490 void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
1491 int page_size = 8 * 1024;
1494 KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
1495 (int)size KMP_SRC_LOC_PARM));
1496 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
1497 KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
1499 } // ___kmp_page_allocate
1501 /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1502 In debug mode, fill the memory block with 0xEF before call to free(). */
1503 void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
1504 kmp_mem_descr_t descr;
1505 kmp_uintptr_t addr_allocated; // Address returned by malloc().
1506 kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
1509 ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
1510 KMP_ASSERT(ptr != NULL);
1512 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
1514 KE_TRACE(26, (" __kmp_free: "
1515 "ptr_allocated=%p, size_allocated=%d, "
1516 "ptr_aligned=%p, size_aligned=%d\n",
1517 descr.ptr_allocated, (int)descr.size_allocated,
1518 descr.ptr_aligned, (int)descr.size_aligned));
1520 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1521 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
1523 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
1524 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
1525 KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
1526 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
1527 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1528 addr_allocated + descr.size_allocated);
1531 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1532 // Fill memory block with 0xEF, it helps catch using freed memory.
1536 KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated));
1538 _free_src_loc(descr.ptr_allocated, _file_, _line_);
1540 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
1544 KE_TRACE(25, ("<- __kmp_free() returns\n"));
1545 } // func ___kmp_free
1547 #if USE_FAST_MEMORY == 3
1548 // Allocate fast memory by first scanning the thread's free lists
1549 // If a chunk the right size exists, grab it off the free list.
1550 // Otherwise allocate normally using kmp_thread_malloc.
1552 // AC: How to choose the limit? Just get 16 for now...
1553 #define KMP_FREE_LIST_LIMIT 16
1555 // Always use 128 bytes for determining buckets for caching memory blocks
1556 #define DCACHE_LINE 128
1558 void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
1565 kmp_mem_descr_t *descr;
1567 KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1568 __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
1570 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
1571 idx = num_lines - 1;
1572 KMP_DEBUG_ASSERT(idx >= 0);
1574 index = 0; // idx is [ 0, 1 ], use first free list
1575 num_lines = 2; // 1, 2 cache lines or less than cache line
1576 } else if ((idx >>= 2) == 0) {
1577 index = 1; // idx is [ 2, 3 ], use second free list
1578 num_lines = 4; // 3, 4 cache lines
1579 } else if ((idx >>= 2) == 0) {
1580 index = 2; // idx is [ 4, 15 ], use third free list
1581 num_lines = 16; // 5, 6, ..., 16 cache lines
1582 } else if ((idx >>= 2) == 0) {
1583 index = 3; // idx is [ 16, 63 ], use fourth free list
1584 num_lines = 64; // 17, 18, ..., 64 cache lines
1586 goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1589 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1591 // pop the head of no-sync free list
1592 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1595 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
1599 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1601 // no-sync free list is empty, use sync free list (filled in by other
1603 // pop the head of the sync free list, push NULL instead
1604 while (!KMP_COMPARE_AND_STORE_PTR(
1605 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
1607 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1609 // push the rest of chain into no-sync free list (can be NULL if there was
1611 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1614 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
1620 // haven't found block in the free lists, thus allocate it
1621 size = num_lines * DCACHE_LINE;
1623 alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
1624 KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
1626 __kmp_gtid_from_thread(this_thr), alloc_size));
1627 alloc_ptr = bget(this_thr, (bufsize)alloc_size);
1629 // align ptr to DCACHE_LINE
1630 ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
1632 ~(DCACHE_LINE - 1));
1633 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
1635 descr->ptr_allocated = alloc_ptr; // remember allocated pointer
1636 // we don't need size_allocated
1637 descr->ptr_aligned = (void *)this_thr; // remember allocating thread
1638 // (it is already saved in bget buffer,
1639 // but we may want to use another allocator in future)
1640 descr->size_aligned = size;
1643 KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
1644 __kmp_gtid_from_thread(this_thr), ptr));
1646 } // func __kmp_fast_allocate
1648 // Free fast memory and place it on the thread's free list if it is of
1649 // the correct size.
1650 void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
1651 kmp_mem_descr_t *descr;
1652 kmp_info_t *alloc_thr;
1657 KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1658 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
1659 KMP_ASSERT(ptr != NULL);
1661 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
1663 KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
1664 (int)descr->size_aligned));
1666 size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
1668 idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1670 index = 0; // 2 cache lines
1671 } else if ((idx <<= 1) == size) {
1672 index = 1; // 4 cache lines
1673 } else if ((idx <<= 2) == size) {
1674 index = 2; // 16 cache lines
1675 } else if ((idx <<= 2) == size) {
1676 index = 3; // 64 cache lines
1678 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
1679 goto free_call; // 65 or more cache lines ( > 8KB )
1682 alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1683 if (alloc_thr == this_thr) {
1684 // push block to self no-sync free list, linking previous head (LIFO)
1685 *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1686 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1688 void *head = this_thr->th.th_free_lists[index].th_free_list_other;
1690 // Create new free list
1691 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1692 *((void **)ptr) = NULL; // mark the tail of the list
1693 descr->size_allocated = (size_t)1; // head of the list keeps its length
1695 // need to check existed "other" list's owner thread and size of queue
1696 kmp_mem_descr_t *dsc =
1697 (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
1698 // allocating thread, same for all queue nodes
1699 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
1701 dsc->size_allocated + 1; // new size in case we add current task
1702 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
1703 // we can add current task to "other" list, no sync needed
1704 *((void **)ptr) = head;
1705 descr->size_allocated = q_sz;
1706 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1708 // either queue blocks owner is changing or size limit exceeded
1709 // return old queue to allocating thread (q_th) synchroneously,
1710 // and start new list for alloc_thr's tasks
1713 void *next = *((void **)head);
1714 while (next != NULL) {
1716 // queue size should decrease by 1 each step through the list
1717 ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
1720 ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
1722 tail = next; // remember tail node
1723 next = *((void **)next);
1725 KMP_DEBUG_ASSERT(q_th != NULL);
1726 // push block to owner's sync free list
1727 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1728 /* the next pointer must be set before setting free_list to ptr to avoid
1729 exposing a broken list to other threads, even for an instant. */
1730 *((void **)tail) = old_ptr;
1732 while (!KMP_COMPARE_AND_STORE_PTR(
1733 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
1735 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1736 *((void **)tail) = old_ptr;
1739 // start new list of not-selt tasks
1740 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1741 *((void **)ptr) = NULL;
1742 descr->size_allocated = (size_t)1; // head of queue keeps its length
1749 KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
1750 __kmp_gtid_from_thread(this_thr), size));
1751 __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
1752 brel(this_thr, descr->ptr_allocated);
1755 KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
1757 } // func __kmp_fast_free
1759 // Initialize the thread free lists related to fast memory
1760 // Only do this when a thread is initially created.
1761 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
1762 KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
1764 memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
1767 // Free the memory in the thread free lists related to fast memory
1768 // Only do this when a thread is being reaped (destroyed).
1769 void __kmp_free_fast_memory(kmp_info_t *th) {
1770 // Suppose we use BGET underlying allocator, walk through its structures...
1772 thr_data_t *thr = get_thr_data(th);
1776 5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
1778 __kmp_bget_dequeue(th); // Release any queued buffers
1780 // Dig through free lists and extract all allocated blocks
1781 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1782 bfhead_t *b = thr->freelist[bin].ql.flink;
1783 while (b != &thr->freelist[bin]) {
1784 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
1786 lst; // link the list (override bthr, but keep flink yet)
1787 lst = (void **)b; // push b into lst
1789 b = b->ql.flink; // get next buffer
1792 while (lst != NULL) {
1794 KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
1795 lst, next, th, __kmp_gtid_from_thread(th)));
1796 (*thr->relfcn)(lst);
1798 // count blocks to prevent problems in __kmp_finalize_bget()
1799 thr->numprel++; /* Nr of expansion block releases */
1800 thr->numpblk--; /* Total number of blocks */
1802 lst = (void **)next;
1806 5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
1809 #endif // USE_FAST_MEMORY