2 * kmp_str.cpp -- String manipulation routines.
5 //===----------------------------------------------------------------------===//
7 // The LLVM Compiler Infrastructure
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
12 //===----------------------------------------------------------------------===//
16 #include <stdarg.h> // va_*
17 #include <stdio.h> // vsnprintf()
18 #include <stdlib.h> // malloc(), realloc()
27 // Declare buffer and initialize it.
29 __kmp_str_buf_init( & buffer );
32 __kmp_str_buf_print(& buffer, "Error in file \"%s\" line %d\n", "foo.c", 12);
33 __kmp_str_buf_print(& buffer, " <%s>\n", line);
35 // Use buffer contents. buffer.str is a pointer to data, buffer.used is a
36 // number of printed characters (not including terminating zero).
37 write( fd, buffer.str, buffer.used );
40 __kmp_str_buf_free( & buffer );
42 // Alternatively, you can detach allocated memory from buffer:
43 __kmp_str_buf_detach( & buffer );
44 return buffer.str; // That memory should be freed eventually.
48 * Buffer users may use buffer.str and buffer.used. Users should not change
49 any fields of buffer directly.
50 * buffer.str is never NULL. If buffer is empty, buffer.str points to empty
52 * For performance reasons, buffer uses stack memory (buffer.bulk) first. If
53 stack memory is exhausted, buffer allocates memory on heap by malloc(), and
54 reallocates it by realloc() as amount of used memory grows.
55 * Buffer doubles amount of allocated memory each time it is exhausted.
58 // TODO: __kmp_str_buf_print() can use thread local memory allocator.
60 #define KMP_STR_BUF_INVARIANT(b) \
62 KMP_DEBUG_ASSERT((b)->str != NULL); \
63 KMP_DEBUG_ASSERT((b)->size >= sizeof((b)->bulk)); \
64 KMP_DEBUG_ASSERT((b)->size % sizeof((b)->bulk) == 0); \
65 KMP_DEBUG_ASSERT((unsigned)(b)->used < (b)->size); \
67 (b)->size == sizeof((b)->bulk) ? (b)->str == &(b)->bulk[0] : 1); \
68 KMP_DEBUG_ASSERT((b)->size > sizeof((b)->bulk) ? (b)->str != &(b)->bulk[0] \
72 void __kmp_str_buf_clear(kmp_str_buf_t *buffer) {
73 KMP_STR_BUF_INVARIANT(buffer);
74 if (buffer->used > 0) {
78 KMP_STR_BUF_INVARIANT(buffer);
79 } // __kmp_str_buf_clear
81 void __kmp_str_buf_reserve(kmp_str_buf_t *buffer, int size) {
82 KMP_STR_BUF_INVARIANT(buffer);
83 KMP_DEBUG_ASSERT(size >= 0);
85 if (buffer->size < (unsigned int)size) {
86 // Calculate buffer size.
89 } while (buffer->size < (unsigned int)size);
92 if (buffer->str == &buffer->bulk[0]) {
93 buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size);
94 if (buffer->str == NULL) {
95 KMP_FATAL(MemoryAllocFailed);
97 KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1);
99 buffer->str = (char *)KMP_INTERNAL_REALLOC(buffer->str, buffer->size);
100 if (buffer->str == NULL) {
101 KMP_FATAL(MemoryAllocFailed);
106 KMP_DEBUG_ASSERT(buffer->size > 0);
107 KMP_DEBUG_ASSERT(buffer->size >= (unsigned)size);
108 KMP_STR_BUF_INVARIANT(buffer);
109 } // __kmp_str_buf_reserve
111 void __kmp_str_buf_detach(kmp_str_buf_t *buffer) {
112 KMP_STR_BUF_INVARIANT(buffer);
114 // If internal bulk is used, allocate memory and copy it.
115 if (buffer->size <= sizeof(buffer->bulk)) {
116 buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size);
117 if (buffer->str == NULL) {
118 KMP_FATAL(MemoryAllocFailed);
120 KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1);
122 } // __kmp_str_buf_detach
124 void __kmp_str_buf_free(kmp_str_buf_t *buffer) {
125 KMP_STR_BUF_INVARIANT(buffer);
126 if (buffer->size > sizeof(buffer->bulk)) {
127 KMP_INTERNAL_FREE(buffer->str);
129 buffer->str = buffer->bulk;
130 buffer->size = sizeof(buffer->bulk);
132 KMP_STR_BUF_INVARIANT(buffer);
133 } // __kmp_str_buf_free
135 void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len) {
136 KMP_STR_BUF_INVARIANT(buffer);
137 KMP_DEBUG_ASSERT(str != NULL);
138 KMP_DEBUG_ASSERT(len >= 0);
139 __kmp_str_buf_reserve(buffer, buffer->used + len + 1);
140 KMP_MEMCPY(buffer->str + buffer->used, str, len);
141 buffer->str[buffer->used + len] = 0;
143 KMP_STR_BUF_INVARIANT(buffer);
144 } // __kmp_str_buf_cat
146 void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src) {
147 KMP_DEBUG_ASSERT(dest);
148 KMP_DEBUG_ASSERT(src);
149 KMP_STR_BUF_INVARIANT(dest);
150 KMP_STR_BUF_INVARIANT(src);
151 if (!src->str || !src->used)
153 __kmp_str_buf_reserve(dest, dest->used + src->used + 1);
154 KMP_MEMCPY(dest->str + dest->used, src->str, src->used);
155 dest->str[dest->used + src->used] = 0;
156 dest->used += src->used;
157 KMP_STR_BUF_INVARIANT(dest);
158 } // __kmp_str_buf_catbuf
160 // Return the number of characters written
161 int __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
164 KMP_STR_BUF_INVARIANT(buffer);
167 int const free = buffer->size - buffer->used;
170 // Try to format string.
172 /* On Linux* OS Intel(R) 64, vsnprintf() modifies args argument, so vsnprintf()
173 crashes if it is called for the second time with the same args. To prevent
174 the crash, we have to pass a fresh intact copy of args to vsnprintf() on each
177 Unfortunately, standard va_copy() macro is not available on Windows* OS.
178 However, it seems vsnprintf() does not modify args argument on Windows* OS.
183 va_copy(_args, args); // Make copy of args.
184 #define args _args // Substitute args with its copy, _args.
185 #endif // KMP_OS_WINDOWS
186 rc = KMP_VSNPRINTF(buffer->str + buffer->used, free, format, args);
188 #undef args // Remove substitution.
190 #endif // KMP_OS_WINDOWS
193 // No errors, string has been formatted.
194 if (rc >= 0 && rc < free) {
199 // Error occurred, buffer is too small.
201 // C99-conforming implementation of vsnprintf returns required buffer size
202 size = buffer->used + rc + 1;
204 // Older implementations just return -1. Double buffer size.
205 size = buffer->size * 2;
209 __kmp_str_buf_reserve(buffer, size);
214 KMP_DEBUG_ASSERT(buffer->size > 0);
215 KMP_STR_BUF_INVARIANT(buffer);
217 } // __kmp_str_buf_vprint
219 // Return the number of characters written
220 int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) {
223 va_start(args, format);
224 rc = __kmp_str_buf_vprint(buffer, format, args);
227 } // __kmp_str_buf_print
229 /* The function prints specified size to buffer. Size is expressed using biggest
230 possible unit, for example 1024 is printed as "1k". */
231 void __kmp_str_buf_print_size(kmp_str_buf_t *buf, size_t size) {
232 char const *names[] = {"", "k", "M", "G", "T", "P", "E", "Z", "Y"};
233 int const units = sizeof(names) / sizeof(char const *);
236 while ((size % 1024 == 0) && (u + 1 < units)) {
242 __kmp_str_buf_print(buf, "%" KMP_SIZE_T_SPEC "%s", size, names[u]);
243 } // __kmp_str_buf_print_size
245 void __kmp_str_fname_init(kmp_str_fname_t *fname, char const *path) {
251 char *slash = NULL; // Pointer to the last character of dir.
252 char *base = NULL; // Pointer to the beginning of basename.
253 fname->path = __kmp_str_format("%s", path);
254 // Original code used strdup() function to copy a string, but on Windows* OS
255 // Intel(R) 64 it causes assertioon id debug heap, so I had to replace
256 // strdup with __kmp_str_format().
257 if (KMP_OS_WINDOWS) {
258 __kmp_str_replace(fname->path, '\\', '/');
260 fname->dir = __kmp_str_format("%s", fname->path);
261 slash = strrchr(fname->dir, '/');
262 if (KMP_OS_WINDOWS &&
263 slash == NULL) { // On Windows* OS, if slash not found,
264 char first = TOLOWER(fname->dir[0]); // look for drive.
265 if ('a' <= first && first <= 'z' && fname->dir[1] == ':') {
266 slash = &fname->dir[1];
269 base = (slash == NULL ? fname->dir : slash + 1);
270 fname->base = __kmp_str_format("%s", base); // Copy basename
271 *base = 0; // and truncate dir.
274 } // kmp_str_fname_init
276 void __kmp_str_fname_free(kmp_str_fname_t *fname) {
277 __kmp_str_free(&fname->path);
278 __kmp_str_free(&fname->dir);
279 __kmp_str_free(&fname->base);
280 } // kmp_str_fname_free
282 int __kmp_str_fname_match(kmp_str_fname_t const *fname, char const *pattern) {
286 if (pattern != NULL) {
287 kmp_str_fname_t ptrn;
288 __kmp_str_fname_init(&ptrn, pattern);
289 dir_match = strcmp(ptrn.dir, "*/") == 0 ||
290 (fname->dir != NULL && __kmp_str_eqf(fname->dir, ptrn.dir));
291 base_match = strcmp(ptrn.base, "*") == 0 ||
292 (fname->base != NULL && __kmp_str_eqf(fname->base, ptrn.base));
293 __kmp_str_fname_free(&ptrn);
296 return dir_match && base_match;
297 } // __kmp_str_fname_match
299 kmp_str_loc_t __kmp_str_loc_init(char const *psource, int init_fname) {
308 if (psource != NULL) {
314 // Copy psource to keep it intact.
315 loc._bulk = __kmp_str_format("%s", psource);
317 // Parse psource string: ";file;func;line;col;;"
319 __kmp_str_split(str, ';', &dummy, &str);
320 __kmp_str_split(str, ';', &loc.file, &str);
321 __kmp_str_split(str, ';', &loc.func, &str);
322 __kmp_str_split(str, ';', &line, &str);
323 __kmp_str_split(str, ';', &col, &str);
325 // Convert line and col into numberic values.
327 loc.line = atoi(line);
340 __kmp_str_fname_init(&loc.fname, init_fname ? loc.file : NULL);
343 } // kmp_str_loc_init
345 void __kmp_str_loc_free(kmp_str_loc_t *loc) {
346 __kmp_str_fname_free(&loc->fname);
347 __kmp_str_free(&(loc->_bulk));
350 } // kmp_str_loc_free
352 /* This function is intended to compare file names. On Windows* OS file names
353 are case-insensitive, so functions performs case-insensitive comparison. On
354 Linux* OS it performs case-sensitive comparison. Note: The function returns
355 *true* if strings are *equal*. */
356 int __kmp_str_eqf( // True, if strings are equal, false otherwise.
357 char const *lhs, // First string.
358 char const *rhs // Second string.
362 result = (_stricmp(lhs, rhs) == 0);
364 result = (strcmp(lhs, rhs) == 0);
369 /* This function is like sprintf, but it *allocates* new buffer, which must be
370 freed eventually by __kmp_str_free(). The function is very convenient for
371 constructing strings, it successfully replaces strdup(), strcat(), it frees
372 programmer from buffer allocations and helps to avoid buffer overflows.
375 str = __kmp_str_format("%s", orig); //strdup() doesn't care about buffer size
376 __kmp_str_free( & str );
377 str = __kmp_str_format( "%s%s", orig1, orig2 ); // strcat(), doesn't care
378 // about buffer size.
379 __kmp_str_free( & str );
380 str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string.
381 __kmp_str_free( & str );
384 This function allocates memory with malloc() calls, so do not call it from
385 performance-critical code. In performance-critical code consider using
386 kmp_str_buf_t instead, since it uses stack-allocated buffer for short
389 Why does this function use malloc()?
390 1. __kmp_allocate() returns cache-aligned memory allocated with malloc().
391 There are no reasons in using __kmp_allocate() for strings due to extra
392 overhead while cache-aligned memory is not necessary.
393 2. __kmp_thread_malloc() cannot be used because it requires pointer to thread
394 structure. We need to perform string operations during library startup
395 (for example, in __kmp_register_library_startup()) when no thread
396 structures are allocated yet.
397 So standard malloc() is the only available option.
400 char *__kmp_str_format( // Allocated string.
401 char const *format, // Format string.
402 ... // Other parameters.
410 buffer = (char *)KMP_INTERNAL_MALLOC(size);
411 if (buffer == NULL) {
412 KMP_FATAL(MemoryAllocFailed);
416 // Try to format string.
417 va_start(args, format);
418 rc = KMP_VSNPRINTF(buffer, size, format, args);
421 // No errors, string has been formatted.
422 if (rc >= 0 && rc < size) {
426 // Error occurred, buffer is too small.
428 // C99-conforming implementation of vsnprintf returns required buffer
432 // Older implementations just return -1.
436 // Enlarge buffer and try again.
437 buffer = (char *)KMP_INTERNAL_REALLOC(buffer, size);
438 if (buffer == NULL) {
439 KMP_FATAL(MemoryAllocFailed);
444 } // func __kmp_str_format
446 void __kmp_str_free(char **str) {
447 KMP_DEBUG_ASSERT(str != NULL);
448 KMP_INTERNAL_FREE(*str);
450 } // func __kmp_str_free
452 /* If len is zero, returns true iff target and data have exact case-insensitive
453 match. If len is negative, returns true iff target is a case-insensitive
454 substring of data. If len is positive, returns true iff target is a
455 case-insensitive substring of data or vice versa, and neither is shorter than
457 int __kmp_str_match(char const *target, int len, char const *data) {
459 if (target == NULL || data == NULL) {
462 for (i = 0; target[i] && data[i]; ++i) {
463 if (TOLOWER(target[i]) != TOLOWER(data[i])) {
467 return ((len > 0) ? i >= len : (!target[i] && (len || !data[i])));
470 int __kmp_str_match_false(char const *data) {
472 __kmp_str_match("false", 1, data) || __kmp_str_match("off", 2, data) ||
473 __kmp_str_match("0", 1, data) || __kmp_str_match(".false.", 2, data) ||
474 __kmp_str_match(".f.", 2, data) || __kmp_str_match("no", 1, data) ||
475 __kmp_str_match("disabled", 0, data);
477 } // __kmp_str_match_false
479 int __kmp_str_match_true(char const *data) {
481 __kmp_str_match("true", 1, data) || __kmp_str_match("on", 2, data) ||
482 __kmp_str_match("1", 1, data) || __kmp_str_match(".true.", 2, data) ||
483 __kmp_str_match(".t.", 2, data) || __kmp_str_match("yes", 1, data) ||
484 __kmp_str_match("enabled", 0, data);
486 } // __kmp_str_match_true
488 void __kmp_str_replace(char *str, char search_for, char replace_with) {
491 found = strchr(str, search_for);
493 *found = replace_with;
494 found = strchr(found + 1, search_for);
496 } // __kmp_str_replace
498 void __kmp_str_split(char *str, // I: String to split.
499 char delim, // I: Character to split on.
500 char **head, // O: Pointer to head (may be NULL).
501 char **tail // O: Pointer to tail (may be NULL).
506 char *ptr = strchr(str, delim);
520 /* strtok_r() is not available on Windows* OS. This function reimplements
522 char *__kmp_str_token(
523 char *str, // String to split into tokens. Note: String *is* modified!
524 char const *delim, // Delimiters.
525 char **buf // Internal buffer.
529 // On Windows* OS there is no strtok_r() function. Let us implement it.
531 *buf = str; // First call, initialize buf.
533 *buf += strspn(*buf, delim); // Skip leading delimiters.
534 if (**buf != 0) { // Rest of the string is not yet empty.
535 token = *buf; // Use it as result.
536 *buf += strcspn(*buf, delim); // Skip non-delimiters.
537 if (**buf != 0) { // Rest of the string is not yet empty.
538 **buf = 0; // Terminate token here.
539 *buf += 1; // Advance buf to start with the next token next time.
543 // On Linux* OS and OS X*, strtok_r() is available. Let us use it.
544 token = strtok_r(str, delim, buf);
549 int __kmp_str_to_int(char const *str, char sentinel) {
555 for (t = str; *t != '\0'; ++t) {
556 if (*t < '0' || *t > '9')
558 result = (result * 10) + (*t - '0');
562 case '\0': /* the current default for no suffix is bytes */
566 case 'B': /* bytes */
571 case 'K': /* kilo-bytes */
576 case 'M': /* mega-bytes */
578 factor = (1024 * 1024);
587 if (result > (INT_MAX / factor))
592 return (*t != 0 ? 0 : result);
593 } // __kmp_str_to_int
595 /* The routine parses input string. It is expected it is a unsigned integer with
596 optional unit. Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb"
597 or "m" for megabytes, ..., "yb" or "y" for yottabytes. :-) Unit name is
598 case-insensitive. The routine returns 0 if everything is ok, or error code:
599 -1 in case of overflow, -2 in case of unknown unit. *size is set to parsed
600 value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown
601 unit *size is set to zero. */
602 void __kmp_str_to_size( // R: Error code.
603 char const *str, // I: String of characters, unsigned number and unit ("b",
605 size_t *out, // O: Parsed number.
606 size_t dfactor, // I: The factor if none of the letters specified.
607 char const **error // O: Null if everything is ok, error message otherwise.
616 KMP_DEBUG_ASSERT(str != NULL);
619 while (str[i] == ' ' || str[i] == '\t') {
624 if (str[i] < '0' || str[i] > '9') {
625 *error = KMP_I18N_STR(NotANumber);
629 digit = str[i] - '0';
630 overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10);
631 value = (value * 10) + digit;
633 } while (str[i] >= '0' && str[i] <= '9');
636 while (str[i] == ' ' || str[i] == '\t') {
641 #define _case(ch, exp) \
643 case ch - ('a' - 'A'): { \
644 size_t shift = (exp)*10; \
646 if (shift < sizeof(size_t) * 8) { \
647 factor = (size_t)(1) << shift; \
653 _case('k', 1); // Kilo
654 _case('m', 2); // Mega
655 _case('g', 3); // Giga
656 _case('t', 4); // Tera
657 _case('p', 5); // Peta
658 _case('e', 6); // Exa
659 _case('z', 7); // Zetta
660 _case('y', 8); // Yotta
661 // Oops. No more units...
664 if (str[i] == 'b' || str[i] == 'B') { // Skip optional "b".
670 if (!(str[i] == ' ' || str[i] == '\t' || str[i] == 0)) { // Bad unit
671 *error = KMP_I18N_STR(BadUnit);
680 overflow = overflow || (value > (KMP_SIZE_T_MAX / factor));
684 while (str[i] == ' ' || str[i] == '\t') {
689 *error = KMP_I18N_STR(IllegalCharacters);
694 *error = KMP_I18N_STR(ValueTooLarge);
695 *out = KMP_SIZE_T_MAX;
701 } // __kmp_str_to_size
703 void __kmp_str_to_uint( // R: Error code.
704 char const *str, // I: String of characters, unsigned number.
705 kmp_uint64 *out, // O: Parsed number.
706 char const **error // O: Null if everything is ok, error message otherwise.
713 KMP_DEBUG_ASSERT(str != NULL);
716 while (str[i] == ' ' || str[i] == '\t') {
721 if (str[i] < '0' || str[i] > '9') {
722 *error = KMP_I18N_STR(NotANumber);
726 digit = str[i] - '0';
727 overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10);
728 value = (value * 10) + digit;
730 } while (str[i] >= '0' && str[i] <= '9');
733 while (str[i] == ' ' || str[i] == '\t') {
738 *error = KMP_I18N_STR(IllegalCharacters);
743 *error = KMP_I18N_STR(ValueTooLarge);
744 *out = (kmp_uint64)-1;
750 } // __kmp_str_to_unit