1 /* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.47 2015/06/06 21:19:08 christos Exp $ */
3 * tc.str.c: Short string package
4 * This has been a lesson of how to write buggy code!
7 * Copyright (c) 1980, 1991 The Regents of the University of California.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 RCSID("$tcsh: tc.str.c,v 3.47 2015/06/06 21:19:08 christos Exp $")
41 #define MALLOC_INCR 128
43 #define MALLOC_SURPLUS MB_LEN_MAX /* Space for one multibyte character */
45 #define MALLOC_SURPLUS 0
50 one_mbtowc(Char *pwc, const char *s, size_t n)
54 len = rt_mbtowc(pwc, s, n);
57 *pwc = (unsigned char)*s | INVALID_BYTE;
65 one_wctomb(char *s, Char wchar)
70 if ((wchar & INVALID_BYTE) == INVALID_BYTE) { /* wchar >= INVALID_BYTE */
73 * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */
77 if (wchar & (CHAR & INVALID_BYTE)) {
78 s[0] = wchar & (CHAR & 0xFF);
88 if (wchar >= 0x10000) {
89 /* UTF-16 systems can't handle these values directly in calls to
90 wctomb. Convert value to UTF-16 surrogate and call wcstombs to
91 convert the "string" to the correct multibyte representation,
95 ws[0] = 0xd800 | (wchar >> 10);
96 ws[1] = 0xdc00 | (wchar & 0x3ff);
98 /* The return value of wcstombs excludes the trailing 0, so len is
99 the correct number of multibytes for the Unicode char. */
100 len = wcstombs (s, ws, MB_CUR_MAX + 1);
103 len = wctomb(s, (wchar_t) wchar);
113 rt_mbtowc(Char *pwc, const char *s, size_t n)
116 char back[MB_LEN_MAX];
118 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
119 # if defined(AUTOSET_KANJI)
120 static mbstate_t mb_zero, mb;
122 * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
124 if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
125 !memcmp(&mb, &mb_zero, sizeof(mb)))
134 memset (&mb, 0, sizeof mb);
135 ret = mbrtowc(&tmp, s, n, &mb);
137 ret = mbtowc(&tmp, s, n);
141 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
142 if (tmp >= 0xd800 && tmp <= 0xdbff) {
143 /* UTF-16 surrogate pair. Fetch second half and compute
144 UTF-32 value. Dispense with the inverse test in this case. */
145 size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
146 if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
149 *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
154 if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
157 } else if (ret == -2)
168 blk2short(char **src)
176 for (n = 0; src[n] != NULL; n++)
178 sdst = dst = xmalloc((n + 1) * sizeof(Char *));
180 for (; *src != NULL; src++)
187 short2blk(Char **src)
195 for (n = 0; src[n] != NULL; n++)
197 sdst = dst = xmalloc((n + 1) * sizeof(char *));
199 for (; *src != NULL; src++)
200 *dst++ = strsave(short2str(*src));
206 str2short(const char *src)
208 static struct Strbuf buf; /* = Strbuf_INIT; */
217 src += one_mbtowc(&wc, src, MB_LEN_MAX);
218 Strbuf_append1(&buf, wc);
220 Strbuf_terminate(&buf);
225 short2str(const Char *src)
227 static char *sdst = NULL;
228 static size_t dstsize = 0;
235 dstsize = MALLOC_INCR;
236 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
239 edst = &dst[dstsize];
241 dst += one_wctomb(dst, *src);
247 dstsize += MALLOC_INCR;
248 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
249 edst = &sdst[dstsize];
250 dst = &edst[-MALLOC_INCR];
251 while (wdst > wedst) {
261 #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
263 s_strcpy(Char *dst, const Char *src)
268 while ((*dst++ = *src++) != '\0')
274 s_strncpy(Char *dst, const Char *src, size_t n)
283 if ((*dst++ = *src++) == '\0') {
293 s_strcat(Char *dst, const Char *src)
295 Strcpy(Strend(dst), src);
301 s_strncat(Char *dst, const Char *src, size_t n)
314 if ((*dst++ = *src++) == '\0')
326 s_strchr(const Char *str, int ch)
330 return ((Char *)(intptr_t)str);
336 s_strrchr(const Char *str, int ch)
345 return ((Char *)(intptr_t)rstr);
349 s_strlen(const Char *str)
353 for (n = 0; *str++; n++)
359 s_strcmp(const Char *str1, const Char *str2)
361 for (; *str1 && *str1 == *str2; str1++, str2++)
364 * The following case analysis is necessary so that characters which look
365 * negative collate low against normal characters but high against the
368 if (*str1 == '\0' && *str2 == '\0')
370 else if (*str1 == '\0')
372 else if (*str2 == '\0')
375 return (*str1 - *str2);
379 s_strncmp(const Char *str1, const Char *str2, size_t n)
384 if (*str1 != *str2) {
386 * The following case analysis is necessary so that characters
387 * which look negative collate low against normal characters
388 * but high against the end-of-string NUL.
392 else if (*str2 == '\0')
395 return (*str1 - *str2);
403 #endif /* not WIDE_STRINGS */
406 s_strcasecmp(const Char *str1, const Char *str2)
409 wint_t l1 = 0, l2 = 0;
410 for (; *str1; str1++, str2++)
413 else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
416 unsigned char l1 = 0, l2 = 0;
417 for (; *str1; str1++, str2++)
420 else if ((l1 = tolower((unsigned char)*str1)) !=
421 (l2 = tolower((unsigned char)*str2)))
425 * The following case analysis is necessary so that characters which look
426 * negative collate low against normal characters but high against the
429 if (*str1 == '\0' && *str2 == '\0')
431 else if (*str1 == '\0')
433 else if (*str2 == '\0')
435 else if (l1 == l2) /* They are zero when they are equal */
436 return (*str1 - *str2);
442 s_strnsave(const Char *s, size_t len)
446 n = xmalloc((len + 1) * sizeof (*n));
447 memcpy(n, s, len * sizeof (*n));
453 s_strsave(const Char *s)
460 size = (Strlen(s) + 1) * sizeof(*n);
467 s_strspl(const Char *cp, const Char *dp)
480 res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
481 for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
483 for (ep--, q = dp; (*ep++ = *q++) != '\0';)
489 s_strend(const Char *cp)
492 return ((Char *)(intptr_t) cp);
495 return ((Char *)(intptr_t) cp);
499 s_strstr(const Char *s, const Char *t)
507 return ((Char *)(intptr_t) s);
508 while (*ss++ == *tt++);
509 } while (*s++ != '\0');
513 #else /* !SHORT_STRINGS */
515 caching_strip(const char *s)
517 static char *buf = NULL;
518 static size_t buf_size = 0;
523 size = strlen(s) + 1;
524 if (buf_size < size) {
525 buf = xrealloc(buf, size);
528 memcpy(buf, s, size);
535 short2qstr(const Char *src)
537 static char *sdst = NULL;
538 static size_t dstsize = 0;
545 dstsize = MALLOC_INCR;
546 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
549 edst = &dst[dstsize];
554 dstsize += MALLOC_INCR;
555 sdst = xrealloc(sdst,
556 (dstsize + MALLOC_SURPLUS) * sizeof(char));
557 edst = &sdst[dstsize];
558 dst = &edst[-MALLOC_INCR];
561 dst += one_wctomb(dst, *src);
564 ptrdiff_t i = dst - edst;
565 dstsize += MALLOC_INCR;
566 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
567 edst = &sdst[dstsize];
568 dst = &edst[-MALLOC_INCR + i];
578 return xcalloc(1, sizeof(struct blk_buf));
582 bb_store(struct blk_buf *bb, Char *str)
584 if (bb->len == bb->size) { /* Keep space for terminating NULL */
586 bb->size = 16; /* Arbitrary */
589 bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
591 bb->vec[bb->len] = str;
595 bb_append(struct blk_buf *bb, Char *str)
602 bb_cleanup(void *xbb)
607 bb = (struct blk_buf *)xbb;
609 for (i = 0; i < bb->len; i++)
625 bb_finish(struct blk_buf *bb)
628 return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
631 #define DO_STRBUF(STRBUF, CHAR, STRLEN) \
634 STRBUF##_alloc(void) \
636 return xcalloc(1, sizeof(struct STRBUF)); \
640 STRBUF##_store1(struct STRBUF *buf, CHAR c) \
642 if (buf->size == buf->len) { \
643 if (buf->size == 0) \
644 buf->size = 64; /* Arbitrary */ \
647 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
650 buf->s[buf->len] = c; \
653 /* Like strbuf_append1(buf, '\0'), but don't advance len */ \
655 STRBUF##_terminate(struct STRBUF *buf) \
657 STRBUF##_store1(buf, '\0'); \
661 STRBUF##_append1(struct STRBUF *buf, CHAR c) \
663 STRBUF##_store1(buf, c); \
668 STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \
670 if (buf->size < buf->len + len) { \
671 if (buf->size == 0) \
672 buf->size = 64; /* Arbitrary */ \
673 while (buf->size < buf->len + len) \
675 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
677 memcpy(buf->s + buf->len, s, len * sizeof(*buf->s)); \
682 STRBUF##_append(struct STRBUF *buf, const CHAR *s) \
684 STRBUF##_appendn(buf, s, STRLEN(s)); \
688 STRBUF##_finish(struct STRBUF *buf) \
690 STRBUF##_append1(buf, 0); \
691 return xrealloc(buf->s, buf->len * sizeof(*buf->s)); \
695 STRBUF##_cleanup(void *xbuf) \
697 struct STRBUF *buf; \
704 STRBUF##_free(void *xbuf) \
706 STRBUF##_cleanup(xbuf); \
710 const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
712 DO_STRBUF(strbuf, char, strlen);
713 DO_STRBUF(Strbuf, Char, Strlen);