2 * kmp_str.cpp -- String manipulation routines.
5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
15 #include <stdarg.h> // va_*
16 #include <stdio.h> // vsnprintf()
17 #include <stdlib.h> // malloc(), realloc()
26 // Declare buffer and initialize it.
28 __kmp_str_buf_init( & buffer );
31 __kmp_str_buf_print(& buffer, "Error in file \"%s\" line %d\n", "foo.c", 12);
32 __kmp_str_buf_print(& buffer, " <%s>\n", line);
34 // Use buffer contents. buffer.str is a pointer to data, buffer.used is a
35 // number of printed characters (not including terminating zero).
36 write( fd, buffer.str, buffer.used );
39 __kmp_str_buf_free( & buffer );
41 // Alternatively, you can detach allocated memory from buffer:
42 __kmp_str_buf_detach( & buffer );
43 return buffer.str; // That memory should be freed eventually.
47 * Buffer users may use buffer.str and buffer.used. Users should not change
48 any fields of buffer directly.
49 * buffer.str is never NULL. If buffer is empty, buffer.str points to empty
51 * For performance reasons, buffer uses stack memory (buffer.bulk) first. If
52 stack memory is exhausted, buffer allocates memory on heap by malloc(), and
53 reallocates it by realloc() as amount of used memory grows.
54 * Buffer doubles amount of allocated memory each time it is exhausted.
57 // TODO: __kmp_str_buf_print() can use thread local memory allocator.
59 #define KMP_STR_BUF_INVARIANT(b) \
61 KMP_DEBUG_ASSERT((b)->str != NULL); \
62 KMP_DEBUG_ASSERT((b)->size >= sizeof((b)->bulk)); \
63 KMP_DEBUG_ASSERT((b)->size % sizeof((b)->bulk) == 0); \
64 KMP_DEBUG_ASSERT((unsigned)(b)->used < (b)->size); \
66 (b)->size == sizeof((b)->bulk) ? (b)->str == &(b)->bulk[0] : 1); \
67 KMP_DEBUG_ASSERT((b)->size > sizeof((b)->bulk) ? (b)->str != &(b)->bulk[0] \
71 void __kmp_str_buf_clear(kmp_str_buf_t *buffer) {
72 KMP_STR_BUF_INVARIANT(buffer);
73 if (buffer->used > 0) {
77 KMP_STR_BUF_INVARIANT(buffer);
78 } // __kmp_str_buf_clear
80 void __kmp_str_buf_reserve(kmp_str_buf_t *buffer, int size) {
81 KMP_STR_BUF_INVARIANT(buffer);
82 KMP_DEBUG_ASSERT(size >= 0);
84 if (buffer->size < (unsigned int)size) {
85 // Calculate buffer size.
88 } while (buffer->size < (unsigned int)size);
91 if (buffer->str == &buffer->bulk[0]) {
92 buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size);
93 if (buffer->str == NULL) {
94 KMP_FATAL(MemoryAllocFailed);
96 KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1);
98 buffer->str = (char *)KMP_INTERNAL_REALLOC(buffer->str, buffer->size);
99 if (buffer->str == NULL) {
100 KMP_FATAL(MemoryAllocFailed);
105 KMP_DEBUG_ASSERT(buffer->size > 0);
106 KMP_DEBUG_ASSERT(buffer->size >= (unsigned)size);
107 KMP_STR_BUF_INVARIANT(buffer);
108 } // __kmp_str_buf_reserve
110 void __kmp_str_buf_detach(kmp_str_buf_t *buffer) {
111 KMP_STR_BUF_INVARIANT(buffer);
113 // If internal bulk is used, allocate memory and copy it.
114 if (buffer->size <= sizeof(buffer->bulk)) {
115 buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size);
116 if (buffer->str == NULL) {
117 KMP_FATAL(MemoryAllocFailed);
119 KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1);
121 } // __kmp_str_buf_detach
123 void __kmp_str_buf_free(kmp_str_buf_t *buffer) {
124 KMP_STR_BUF_INVARIANT(buffer);
125 if (buffer->size > sizeof(buffer->bulk)) {
126 KMP_INTERNAL_FREE(buffer->str);
128 buffer->str = buffer->bulk;
129 buffer->size = sizeof(buffer->bulk);
131 KMP_STR_BUF_INVARIANT(buffer);
132 } // __kmp_str_buf_free
134 void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len) {
135 KMP_STR_BUF_INVARIANT(buffer);
136 KMP_DEBUG_ASSERT(str != NULL);
137 KMP_DEBUG_ASSERT(len >= 0);
138 __kmp_str_buf_reserve(buffer, buffer->used + len + 1);
139 KMP_MEMCPY(buffer->str + buffer->used, str, len);
140 buffer->str[buffer->used + len] = 0;
142 KMP_STR_BUF_INVARIANT(buffer);
143 } // __kmp_str_buf_cat
145 void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src) {
146 KMP_DEBUG_ASSERT(dest);
147 KMP_DEBUG_ASSERT(src);
148 KMP_STR_BUF_INVARIANT(dest);
149 KMP_STR_BUF_INVARIANT(src);
150 if (!src->str || !src->used)
152 __kmp_str_buf_reserve(dest, dest->used + src->used + 1);
153 KMP_MEMCPY(dest->str + dest->used, src->str, src->used);
154 dest->str[dest->used + src->used] = 0;
155 dest->used += src->used;
156 KMP_STR_BUF_INVARIANT(dest);
157 } // __kmp_str_buf_catbuf
159 // Return the number of characters written
160 int __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
163 KMP_STR_BUF_INVARIANT(buffer);
166 int const free = buffer->size - buffer->used;
169 // Try to format string.
171 /* On Linux* OS Intel(R) 64, vsnprintf() modifies args argument, so vsnprintf()
172 crashes if it is called for the second time with the same args. To prevent
173 the crash, we have to pass a fresh intact copy of args to vsnprintf() on each
176 Unfortunately, standard va_copy() macro is not available on Windows* OS.
177 However, it seems vsnprintf() does not modify args argument on Windows* OS.
182 va_copy(_args, args); // Make copy of args.
183 #define args _args // Substitute args with its copy, _args.
184 #endif // KMP_OS_WINDOWS
185 rc = KMP_VSNPRINTF(buffer->str + buffer->used, free, format, args);
187 #undef args // Remove substitution.
189 #endif // KMP_OS_WINDOWS
192 // No errors, string has been formatted.
193 if (rc >= 0 && rc < free) {
198 // Error occurred, buffer is too small.
200 // C99-conforming implementation of vsnprintf returns required buffer size
201 size = buffer->used + rc + 1;
203 // Older implementations just return -1. Double buffer size.
204 size = buffer->size * 2;
208 __kmp_str_buf_reserve(buffer, size);
213 KMP_DEBUG_ASSERT(buffer->size > 0);
214 KMP_STR_BUF_INVARIANT(buffer);
216 } // __kmp_str_buf_vprint
218 // Return the number of characters written
219 int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) {
222 va_start(args, format);
223 rc = __kmp_str_buf_vprint(buffer, format, args);
226 } // __kmp_str_buf_print
228 /* The function prints specified size to buffer. Size is expressed using biggest
229 possible unit, for example 1024 is printed as "1k". */
230 void __kmp_str_buf_print_size(kmp_str_buf_t *buf, size_t size) {
231 char const *names[] = {"", "k", "M", "G", "T", "P", "E", "Z", "Y"};
232 int const units = sizeof(names) / sizeof(char const *);
235 while ((size % 1024 == 0) && (u + 1 < units)) {
241 __kmp_str_buf_print(buf, "%" KMP_SIZE_T_SPEC "%s", size, names[u]);
242 } // __kmp_str_buf_print_size
244 void __kmp_str_fname_init(kmp_str_fname_t *fname, char const *path) {
250 char *slash = NULL; // Pointer to the last character of dir.
251 char *base = NULL; // Pointer to the beginning of basename.
252 fname->path = __kmp_str_format("%s", path);
253 // Original code used strdup() function to copy a string, but on Windows* OS
254 // Intel(R) 64 it causes assertion id debug heap, so I had to replace
255 // strdup with __kmp_str_format().
256 if (KMP_OS_WINDOWS) {
257 __kmp_str_replace(fname->path, '\\', '/');
259 fname->dir = __kmp_str_format("%s", fname->path);
260 slash = strrchr(fname->dir, '/');
261 if (KMP_OS_WINDOWS &&
262 slash == NULL) { // On Windows* OS, if slash not found,
263 char first = TOLOWER(fname->dir[0]); // look for drive.
264 if ('a' <= first && first <= 'z' && fname->dir[1] == ':') {
265 slash = &fname->dir[1];
268 base = (slash == NULL ? fname->dir : slash + 1);
269 fname->base = __kmp_str_format("%s", base); // Copy basename
270 *base = 0; // and truncate dir.
273 } // kmp_str_fname_init
275 void __kmp_str_fname_free(kmp_str_fname_t *fname) {
276 __kmp_str_free(&fname->path);
277 __kmp_str_free(&fname->dir);
278 __kmp_str_free(&fname->base);
279 } // kmp_str_fname_free
281 int __kmp_str_fname_match(kmp_str_fname_t const *fname, char const *pattern) {
285 if (pattern != NULL) {
286 kmp_str_fname_t ptrn;
287 __kmp_str_fname_init(&ptrn, pattern);
288 dir_match = strcmp(ptrn.dir, "*/") == 0 ||
289 (fname->dir != NULL && __kmp_str_eqf(fname->dir, ptrn.dir));
290 base_match = strcmp(ptrn.base, "*") == 0 ||
291 (fname->base != NULL && __kmp_str_eqf(fname->base, ptrn.base));
292 __kmp_str_fname_free(&ptrn);
295 return dir_match && base_match;
296 } // __kmp_str_fname_match
298 kmp_str_loc_t __kmp_str_loc_init(char const *psource, int init_fname) {
307 if (psource != NULL) {
313 // Copy psource to keep it intact.
314 loc._bulk = __kmp_str_format("%s", psource);
316 // Parse psource string: ";file;func;line;col;;"
318 __kmp_str_split(str, ';', &dummy, &str);
319 __kmp_str_split(str, ';', &loc.file, &str);
320 __kmp_str_split(str, ';', &loc.func, &str);
321 __kmp_str_split(str, ';', &line, &str);
322 __kmp_str_split(str, ';', &col, &str);
324 // Convert line and col into numberic values.
326 loc.line = atoi(line);
339 __kmp_str_fname_init(&loc.fname, init_fname ? loc.file : NULL);
342 } // kmp_str_loc_init
344 void __kmp_str_loc_free(kmp_str_loc_t *loc) {
345 __kmp_str_fname_free(&loc->fname);
346 __kmp_str_free(&(loc->_bulk));
349 } // kmp_str_loc_free
351 /* This function is intended to compare file names. On Windows* OS file names
352 are case-insensitive, so functions performs case-insensitive comparison. On
353 Linux* OS it performs case-sensitive comparison. Note: The function returns
354 *true* if strings are *equal*. */
355 int __kmp_str_eqf( // True, if strings are equal, false otherwise.
356 char const *lhs, // First string.
357 char const *rhs // Second string.
361 result = (_stricmp(lhs, rhs) == 0);
363 result = (strcmp(lhs, rhs) == 0);
368 /* This function is like sprintf, but it *allocates* new buffer, which must be
369 freed eventually by __kmp_str_free(). The function is very convenient for
370 constructing strings, it successfully replaces strdup(), strcat(), it frees
371 programmer from buffer allocations and helps to avoid buffer overflows.
374 str = __kmp_str_format("%s", orig); //strdup() doesn't care about buffer size
375 __kmp_str_free( & str );
376 str = __kmp_str_format( "%s%s", orig1, orig2 ); // strcat(), doesn't care
377 // about buffer size.
378 __kmp_str_free( & str );
379 str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string.
380 __kmp_str_free( & str );
383 This function allocates memory with malloc() calls, so do not call it from
384 performance-critical code. In performance-critical code consider using
385 kmp_str_buf_t instead, since it uses stack-allocated buffer for short
388 Why does this function use malloc()?
389 1. __kmp_allocate() returns cache-aligned memory allocated with malloc().
390 There are no reasons in using __kmp_allocate() for strings due to extra
391 overhead while cache-aligned memory is not necessary.
392 2. __kmp_thread_malloc() cannot be used because it requires pointer to thread
393 structure. We need to perform string operations during library startup
394 (for example, in __kmp_register_library_startup()) when no thread
395 structures are allocated yet.
396 So standard malloc() is the only available option.
399 char *__kmp_str_format( // Allocated string.
400 char const *format, // Format string.
401 ... // Other parameters.
409 buffer = (char *)KMP_INTERNAL_MALLOC(size);
410 if (buffer == NULL) {
411 KMP_FATAL(MemoryAllocFailed);
415 // Try to format string.
416 va_start(args, format);
417 rc = KMP_VSNPRINTF(buffer, size, format, args);
420 // No errors, string has been formatted.
421 if (rc >= 0 && rc < size) {
425 // Error occurred, buffer is too small.
427 // C99-conforming implementation of vsnprintf returns required buffer
431 // Older implementations just return -1.
435 // Enlarge buffer and try again.
436 buffer = (char *)KMP_INTERNAL_REALLOC(buffer, size);
437 if (buffer == NULL) {
438 KMP_FATAL(MemoryAllocFailed);
443 } // func __kmp_str_format
445 void __kmp_str_free(char **str) {
446 KMP_DEBUG_ASSERT(str != NULL);
447 KMP_INTERNAL_FREE(*str);
449 } // func __kmp_str_free
451 /* If len is zero, returns true iff target and data have exact case-insensitive
452 match. If len is negative, returns true iff target is a case-insensitive
453 substring of data. If len is positive, returns true iff target is a
454 case-insensitive substring of data or vice versa, and neither is shorter than
456 int __kmp_str_match(char const *target, int len, char const *data) {
458 if (target == NULL || data == NULL) {
461 for (i = 0; target[i] && data[i]; ++i) {
462 if (TOLOWER(target[i]) != TOLOWER(data[i])) {
466 return ((len > 0) ? i >= len : (!target[i] && (len || !data[i])));
469 int __kmp_str_match_false(char const *data) {
471 __kmp_str_match("false", 1, data) || __kmp_str_match("off", 2, data) ||
472 __kmp_str_match("0", 1, data) || __kmp_str_match(".false.", 2, data) ||
473 __kmp_str_match(".f.", 2, data) || __kmp_str_match("no", 1, data) ||
474 __kmp_str_match("disabled", 0, data);
476 } // __kmp_str_match_false
478 int __kmp_str_match_true(char const *data) {
480 __kmp_str_match("true", 1, data) || __kmp_str_match("on", 2, data) ||
481 __kmp_str_match("1", 1, data) || __kmp_str_match(".true.", 2, data) ||
482 __kmp_str_match(".t.", 2, data) || __kmp_str_match("yes", 1, data) ||
483 __kmp_str_match("enabled", 0, data);
485 } // __kmp_str_match_true
487 void __kmp_str_replace(char *str, char search_for, char replace_with) {
490 found = strchr(str, search_for);
492 *found = replace_with;
493 found = strchr(found + 1, search_for);
495 } // __kmp_str_replace
497 void __kmp_str_split(char *str, // I: String to split.
498 char delim, // I: Character to split on.
499 char **head, // O: Pointer to head (may be NULL).
500 char **tail // O: Pointer to tail (may be NULL).
505 char *ptr = strchr(str, delim);
519 /* strtok_r() is not available on Windows* OS. This function reimplements
521 char *__kmp_str_token(
522 char *str, // String to split into tokens. Note: String *is* modified!
523 char const *delim, // Delimiters.
524 char **buf // Internal buffer.
528 // On Windows* OS there is no strtok_r() function. Let us implement it.
530 *buf = str; // First call, initialize buf.
532 *buf += strspn(*buf, delim); // Skip leading delimiters.
533 if (**buf != 0) { // Rest of the string is not yet empty.
534 token = *buf; // Use it as result.
535 *buf += strcspn(*buf, delim); // Skip non-delimiters.
536 if (**buf != 0) { // Rest of the string is not yet empty.
537 **buf = 0; // Terminate token here.
538 *buf += 1; // Advance buf to start with the next token next time.
542 // On Linux* OS and OS X*, strtok_r() is available. Let us use it.
543 token = strtok_r(str, delim, buf);
548 int __kmp_str_to_int(char const *str, char sentinel) {
554 for (t = str; *t != '\0'; ++t) {
555 if (*t < '0' || *t > '9')
557 result = (result * 10) + (*t - '0');
561 case '\0': /* the current default for no suffix is bytes */
565 case 'B': /* bytes */
570 case 'K': /* kilo-bytes */
575 case 'M': /* mega-bytes */
577 factor = (1024 * 1024);
586 if (result > (INT_MAX / factor))
591 return (*t != 0 ? 0 : result);
592 } // __kmp_str_to_int
594 /* The routine parses input string. It is expected it is a unsigned integer with
595 optional unit. Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb"
596 or "m" for megabytes, ..., "yb" or "y" for yottabytes. :-) Unit name is
597 case-insensitive. The routine returns 0 if everything is ok, or error code:
598 -1 in case of overflow, -2 in case of unknown unit. *size is set to parsed
599 value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown
600 unit *size is set to zero. */
601 void __kmp_str_to_size( // R: Error code.
602 char const *str, // I: String of characters, unsigned number and unit ("b",
604 size_t *out, // O: Parsed number.
605 size_t dfactor, // I: The factor if none of the letters specified.
606 char const **error // O: Null if everything is ok, error message otherwise.
615 KMP_DEBUG_ASSERT(str != NULL);
618 while (str[i] == ' ' || str[i] == '\t') {
623 if (str[i] < '0' || str[i] > '9') {
624 *error = KMP_I18N_STR(NotANumber);
628 digit = str[i] - '0';
629 overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10);
630 value = (value * 10) + digit;
632 } while (str[i] >= '0' && str[i] <= '9');
635 while (str[i] == ' ' || str[i] == '\t') {
640 #define _case(ch, exp) \
642 case ch - ('a' - 'A'): { \
643 size_t shift = (exp)*10; \
645 if (shift < sizeof(size_t) * 8) { \
646 factor = (size_t)(1) << shift; \
652 _case('k', 1); // Kilo
653 _case('m', 2); // Mega
654 _case('g', 3); // Giga
655 _case('t', 4); // Tera
656 _case('p', 5); // Peta
657 _case('e', 6); // Exa
658 _case('z', 7); // Zetta
659 _case('y', 8); // Yotta
660 // Oops. No more units...
663 if (str[i] == 'b' || str[i] == 'B') { // Skip optional "b".
669 if (!(str[i] == ' ' || str[i] == '\t' || str[i] == 0)) { // Bad unit
670 *error = KMP_I18N_STR(BadUnit);
679 overflow = overflow || (value > (KMP_SIZE_T_MAX / factor));
683 while (str[i] == ' ' || str[i] == '\t') {
688 *error = KMP_I18N_STR(IllegalCharacters);
693 *error = KMP_I18N_STR(ValueTooLarge);
694 *out = KMP_SIZE_T_MAX;
700 } // __kmp_str_to_size
702 void __kmp_str_to_uint( // R: Error code.
703 char const *str, // I: String of characters, unsigned number.
704 kmp_uint64 *out, // O: Parsed number.
705 char const **error // O: Null if everything is ok, error message otherwise.
712 KMP_DEBUG_ASSERT(str != NULL);
715 while (str[i] == ' ' || str[i] == '\t') {
720 if (str[i] < '0' || str[i] > '9') {
721 *error = KMP_I18N_STR(NotANumber);
725 digit = str[i] - '0';
726 overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10);
727 value = (value * 10) + digit;
729 } while (str[i] >= '0' && str[i] <= '9');
732 while (str[i] == ' ' || str[i] == '\t') {
737 *error = KMP_I18N_STR(IllegalCharacters);
742 *error = KMP_I18N_STR(ValueTooLarge);
743 *out = (kmp_uint64)-1;
749 } // __kmp_str_to_unit