contrib/tcsh/tc.str.c

   1 /* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.47 2015/06/06 21:19:08 christos Exp $ */
   2 /*
   3  * tc.str.c: Short string package
   4  *           This has been a lesson of how to write buggy code!
   5  */
   6 /*-
   7  * Copyright (c) 1980, 1991 The Regents of the University of California.
   8  * All rights reserved.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 3. Neither the name of the University nor the names of its contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34 #include "sh.h"
  35
  36 #include <assert.h>
  37 #include <limits.h>
  38
  39 RCSID("$tcsh: tc.str.c,v 3.47 2015/06/06 21:19:08 christos Exp $")
  40
  41 #define MALLOC_INCR     128
  42 #ifdef WIDE_STRINGS
  43 #define MALLOC_SURPLUS  MB_LEN_MAX /* Space for one multibyte character */
  44 #else
  45 #define MALLOC_SURPLUS  0
  46 #endif
  47
  48 #ifdef WIDE_STRINGS
  49 size_t
  50 one_mbtowc(Char *pwc, const char *s, size_t n)
  51 {
  52     int len;
  53
  54     len = rt_mbtowc(pwc, s, n);
  55     if (len == -1) {
  56         reset_mbtowc();
  57         *pwc = (unsigned char)*s | INVALID_BYTE;
  58     }
  59     if (len <= 0)
  60         len = 1;
  61     return len;
  62 }
  63
  64 size_t
  65 one_wctomb(char *s, Char wchar)
  66 {
  67     int len;
  68
  69 #if INVALID_BYTE != 0
  70     if ((wchar & INVALID_BYTE) == INVALID_BYTE) {    /* wchar >= INVALID_BYTE */
  71         /* invalid char
  72          * exmaple)
  73          * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */
  74         *s = (char)wchar;
  75         len = 1;
  76 #else
  77     if (wchar & (CHAR & INVALID_BYTE)) {
  78         s[0] = wchar & (CHAR & 0xFF);
  79         len = 1;
  80 #endif
  81     } else {
  82 #if INVALID_BYTE != 0
  83         wchar &= MAX_UTF32;
  84 #else
  85         wchar &= CHAR;
  86 #endif
  87 #ifdef UTF16_STRINGS
  88         if (wchar >= 0x10000) {
  89             /* UTF-16 systems can't handle these values directly in calls to
  90                wctomb.  Convert value to UTF-16 surrogate and call wcstombs to
  91                convert the "string" to the correct multibyte representation,
  92                if any. */
  93             wchar_t ws[3];
  94             wchar -= 0x10000;
  95             ws[0] = 0xd800 | (wchar >> 10);
  96             ws[1] = 0xdc00 | (wchar & 0x3ff);
  97             ws[2] = 0;
  98             /* The return value of wcstombs excludes the trailing 0, so len is
  99                the correct number of multibytes for the Unicode char. */
 100             len = wcstombs (s, ws, MB_CUR_MAX + 1);
 101         } else
 102 #endif
 103         len = wctomb(s, (wchar_t) wchar);
 104         if (len == -1)
 105             s[0] = wchar;
 106         if (len <= 0)
 107             len = 1;
 108     }
 109     return len;
 110 }
 111
 112 int
 113 rt_mbtowc(Char *pwc, const char *s, size_t n)
 114 {
 115     int ret;
 116     char back[MB_LEN_MAX];
 117     wchar_t tmp;
 118 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
 119 # if defined(AUTOSET_KANJI)
 120     static mbstate_t mb_zero, mb;
 121     /*
 122      * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
 123      */
 124     if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
 125         !memcmp(&mb, &mb_zero, sizeof(mb)))
 126     {
 127         *pwc = *s;
 128         return 1;
 129     }
 130 # else
 131     mbstate_t mb;
 132 # endif
 133
 134     memset (&mb, 0, sizeof mb);
 135     ret = mbrtowc(&tmp, s, n, &mb);
 136 #else
 137     ret = mbtowc(&tmp, s, n);
 138 #endif
 139     if (ret > 0) {
 140         *pwc = tmp;
 141 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
 142         if (tmp >= 0xd800 && tmp <= 0xdbff) {
 143             /* UTF-16 surrogate pair.  Fetch second half and compute
 144                UTF-32 value.  Dispense with the inverse test in this case. */
 145             size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
 146             if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
 147                 ret = -1;
 148             else {
 149                 *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
 150                 ret += n2;
 151             }
 152         } else
 153 #endif
 154         if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
 155             ret = -1;
 156
 157     } else if (ret == -2)
 158         ret = -1;
 159     else if (ret == 0)
 160         *pwc = '\0';
 161
 162     return ret;
 163 }
 164 #endif
 165
 166 #ifdef SHORT_STRINGS
 167 Char  **
 168 blk2short(char **src)
 169 {
 170     size_t     n;
 171     Char **sdst, **dst;
 172
 173     /*
 174      * Count
 175      */
 176     for (n = 0; src[n] != NULL; n++)
 177         continue;
 178     sdst = dst = xmalloc((n + 1) * sizeof(Char *));
 179
 180     for (; *src != NULL; src++)
 181         *dst++ = SAVE(*src);
 182     *dst = NULL;
 183     return (sdst);
 184 }
 185
 186 char  **
 187 short2blk(Char **src)
 188 {
 189     size_t     n;
 190     char **sdst, **dst;
 191
 192     /*
 193      * Count
 194      */
 195     for (n = 0; src[n] != NULL; n++)
 196         continue;
 197     sdst = dst = xmalloc((n + 1) * sizeof(char *));
 198
 199     for (; *src != NULL; src++)
 200         *dst++ = strsave(short2str(*src));
 201     *dst = NULL;
 202     return (sdst);
 203 }
 204
 205 Char   *
 206 str2short(const char *src)
 207 {
 208     static struct Strbuf buf; /* = Strbuf_INIT; */
 209
 210     if (src == NULL)
 211         return (NULL);
 212
 213     buf.len = 0;
 214     while (*src) {
 215         Char wc;
 216
 217         src += one_mbtowc(&wc, src, MB_LEN_MAX);
 218         Strbuf_append1(&buf, wc);
 219     }
 220     Strbuf_terminate(&buf);
 221     return buf.s;
 222 }
 223
 224 char   *
 225 short2str(const Char *src)
 226 {
 227     static char *sdst = NULL;
 228     static size_t dstsize = 0;
 229     char *dst, *edst;
 230
 231     if (src == NULL)
 232         return (NULL);
 233
 234     if (sdst == NULL) {
 235         dstsize = MALLOC_INCR;
 236         sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
 237     }
 238     dst = sdst;
 239     edst = &dst[dstsize];
 240     while (*src) {
 241         dst += one_wctomb(dst, *src);
 242         src++;
 243         if (dst >= edst) {
 244             char *wdst = dst;
 245             char *wedst = edst;
 246
 247             dstsize += MALLOC_INCR;
 248             sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
 249             edst = &sdst[dstsize];
 250             dst = &edst[-MALLOC_INCR];
 251             while (wdst > wedst) {
 252                 dst++;
 253                 wdst--;
 254             }
 255         }
 256     }
 257     *dst = 0;
 258     return (sdst);
 259 }
 260
 261 #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
 262 Char   *
 263 s_strcpy(Char *dst, const Char *src)
 264 {
 265     Char *sdst;
 266
 267     sdst = dst;
 268     while ((*dst++ = *src++) != '\0')
 269         continue;
 270     return (sdst);
 271 }
 272
 273 Char   *
 274 s_strncpy(Char *dst, const Char *src, size_t n)
 275 {
 276     Char *sdst;
 277
 278     if (n == 0)
 279         return(dst);
 280
 281     sdst = dst;
 282     do
 283         if ((*dst++ = *src++) == '\0') {
 284             while (--n != 0)
 285                 *dst++ = '\0';
 286             return(sdst);
 287         }
 288     while (--n != 0);
 289     return (sdst);
 290 }
 291
 292 Char   *
 293 s_strcat(Char *dst, const Char *src)
 294 {
 295     Strcpy(Strend(dst), src);
 296     return dst;
 297 }
 298
 299 #ifdef NOTUSED
 300 Char   *
 301 s_strncat(Char *dst, const Char *src, size_t n)
 302 {
 303     Char *sdst;
 304
 305     if (n == 0)
 306         return (dst);
 307
 308     sdst = dst;
 309
 310     while (*dst)
 311         dst++;
 312
 313     do
 314         if ((*dst++ = *src++) == '\0')
 315             return(sdst);
 316     while (--n != 0)
 317         continue;
 318
 319     *dst = '\0';
 320     return (sdst);
 321 }
 322
 323 #endif
 324
 325 Char   *
 326 s_strchr(const Char *str, int ch)
 327 {
 328     do
 329         if (*str == ch)
 330             return ((Char *)(intptr_t)str);
 331     while (*str++);
 332     return (NULL);
 333 }
 334
 335 Char   *
 336 s_strrchr(const Char *str, int ch)
 337 {
 338     const Char *rstr;
 339
 340     rstr = NULL;
 341     do
 342         if (*str == ch)
 343             rstr = str;
 344     while (*str++);
 345     return ((Char *)(intptr_t)rstr);
 346 }
 347
 348 size_t
 349 s_strlen(const Char *str)
 350 {
 351     size_t n;
 352
 353     for (n = 0; *str++; n++)
 354         continue;
 355     return (n);
 356 }
 357
 358 int
 359 s_strcmp(const Char *str1, const Char *str2)
 360 {
 361     for (; *str1 && *str1 == *str2; str1++, str2++)
 362         continue;
 363     /*
 364      * The following case analysis is necessary so that characters which look
 365      * negative collate low against normal characters but high against the
 366      * end-of-string NUL.
 367      */
 368     if (*str1 == '\0' && *str2 == '\0')
 369         return (0);
 370     else if (*str1 == '\0')
 371         return (-1);
 372     else if (*str2 == '\0')
 373         return (1);
 374     else
 375         return (*str1 - *str2);
 376 }
 377
 378 int
 379 s_strncmp(const Char *str1, const Char *str2, size_t n)
 380 {
 381     if (n == 0)
 382         return (0);
 383     do {
 384         if (*str1 != *str2) {
 385             /*
 386              * The following case analysis is necessary so that characters
 387              * which look negative collate low against normal characters
 388              * but high against the end-of-string NUL.
 389              */
 390             if (*str1 == '\0')
 391                 return (-1);
 392             else if (*str2 == '\0')
 393                 return (1);
 394             else
 395                 return (*str1 - *str2);
 396         }
 397         if (*str1 == '\0')
 398             return(0);
 399         str1++, str2++;
 400     } while (--n != 0);
 401     return(0);
 402 }
 403 #endif /* not WIDE_STRINGS */
 404
 405 int
 406 s_strcasecmp(const Char *str1, const Char *str2)
 407 {
 408 #ifdef WIDE_STRINGS
 409     wint_t l1 = 0, l2 = 0;
 410     for (; *str1; str1++, str2++)
 411         if (*str1 == *str2)
 412             l1 = l2 = 0;
 413         else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
 414             break;
 415 #else
 416     unsigned char l1 = 0, l2 = 0;
 417     for (; *str1; str1++, str2++)
 418         if (*str1 == *str2)
 419                 l1 = l2 = 0;
 420         else if ((l1 = tolower((unsigned char)*str1)) !=
 421             (l2 = tolower((unsigned char)*str2)))
 422             break;
 423 #endif
 424     /*
 425      * The following case analysis is necessary so that characters which look
 426      * negative collate low against normal characters but high against the
 427      * end-of-string NUL.
 428      */
 429     if (*str1 == '\0' && *str2 == '\0')
 430         return (0);
 431     else if (*str1 == '\0')
 432         return (-1);
 433     else if (*str2 == '\0')
 434         return (1);
 435     else if (l1 == l2)  /* They are zero when they are equal */
 436         return (*str1 - *str2);
 437     else
 438         return (l1 - l2);
 439 }
 440
 441 Char   *
 442 s_strnsave(const Char *s, size_t len)
 443 {
 444     Char *n;
 445
 446     n = xmalloc((len + 1) * sizeof (*n));
 447     memcpy(n, s, len * sizeof (*n));
 448     n[len] = '\0';
 449     return n;
 450 }
 451
 452 Char   *
 453 s_strsave(const Char *s)
 454 {
 455     Char   *n;
 456     size_t size;
 457
 458     if (s == NULL)
 459         s = STRNULL;
 460     size = (Strlen(s) + 1) * sizeof(*n);
 461     n = xmalloc(size);
 462     memcpy(n, s, size);
 463     return (n);
 464 }
 465
 466 Char   *
 467 s_strspl(const Char *cp, const Char *dp)
 468 {
 469     Char *res, *ep;
 470     const Char *p, *q;
 471
 472     if (!cp)
 473         cp = STRNULL;
 474     if (!dp)
 475         dp = STRNULL;
 476     for (p = cp; *p++;)
 477         continue;
 478     for (q = dp; *q++;)
 479         continue;
 480     res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
 481     for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
 482         continue;
 483     for (ep--, q = dp; (*ep++ = *q++) != '\0';)
 484         continue;
 485     return (res);
 486 }
 487
 488 Char   *
 489 s_strend(const Char *cp)
 490 {
 491     if (!cp)
 492         return ((Char *)(intptr_t) cp);
 493     while (*cp)
 494         cp++;
 495     return ((Char *)(intptr_t) cp);
 496 }
 497
 498 Char   *
 499 s_strstr(const Char *s, const Char *t)
 500 {
 501     do {
 502         const Char *ss = s;
 503         const Char *tt = t;
 504
 505         do
 506             if (*tt == '\0')
 507                 return ((Char *)(intptr_t) s);
 508         while (*ss++ == *tt++);
 509     } while (*s++ != '\0');
 510     return (NULL);
 511 }
 512
 513 #else /* !SHORT_STRINGS */
 514 char *
 515 caching_strip(const char *s)
 516 {
 517     static char *buf = NULL;
 518     static size_t buf_size = 0;
 519     size_t size;
 520
 521     if (s == NULL)
 522       return NULL;
 523     size = strlen(s) + 1;
 524     if (buf_size < size) {
 525         buf = xrealloc(buf, size);
 526         buf_size = size;
 527     }
 528     memcpy(buf, s, size);
 529     strip(buf);
 530     return buf;
 531 }
 532 #endif
 533
 534 char   *
 535 short2qstr(const Char *src)
 536 {
 537     static char *sdst = NULL;
 538     static size_t dstsize = 0;
 539     char *dst, *edst;
 540
 541     if (src == NULL)
 542         return (NULL);
 543
 544     if (sdst == NULL) {
 545         dstsize = MALLOC_INCR;
 546         sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
 547     }
 548     dst = sdst;
 549     edst = &dst[dstsize];
 550     while (*src) {
 551         if (*src & QUOTE) {
 552             *dst++ = '\\';
 553             if (dst == edst) {
 554                 dstsize += MALLOC_INCR;
 555                 sdst = xrealloc(sdst,
 556                                 (dstsize + MALLOC_SURPLUS) * sizeof(char));
 557                 edst = &sdst[dstsize];
 558                 dst = &edst[-MALLOC_INCR];
 559             }
 560         }
 561         dst += one_wctomb(dst, *src);
 562         src++;
 563         if (dst >= edst) {
 564             ptrdiff_t i = dst - edst;
 565             dstsize += MALLOC_INCR;
 566             sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
 567             edst = &sdst[dstsize];
 568             dst = &edst[-MALLOC_INCR + i];
 569         }
 570     }
 571     *dst = 0;
 572     return (sdst);
 573 }
 574
 575 struct blk_buf *
 576 bb_alloc(void)
 577 {
 578     return xcalloc(1, sizeof(struct blk_buf));
 579 }
 580
 581 static void
 582 bb_store(struct blk_buf *bb, Char *str)
 583 {
 584     if (bb->len == bb->size) { /* Keep space for terminating NULL */
 585         if (bb->size == 0)
 586             bb->size = 16; /* Arbitrary */
 587         else
 588             bb->size *= 2;
 589         bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
 590     }
 591     bb->vec[bb->len] = str;
 592 }
 593
 594 void
 595 bb_append(struct blk_buf *bb, Char *str)
 596 {
 597     bb_store(bb, str);
 598     bb->len++;
 599 }
 600
 601 void
 602 bb_cleanup(void *xbb)
 603 {
 604     struct blk_buf *bb;
 605     size_t i;
 606
 607     bb = (struct blk_buf *)xbb;
 608     if (bb->vec) {
 609         for (i = 0; i < bb->len; i++)
 610             xfree(bb->vec[i]);
 611         xfree(bb->vec);
 612     }
 613     bb->vec = NULL;
 614     bb->len = 0;
 615 }
 616
 617 void
 618 bb_free(void *bb)
 619 {
 620     bb_cleanup(bb);
 621     xfree(bb);
 622 }
 623
 624 Char **
 625 bb_finish(struct blk_buf *bb)
 626 {
 627     bb_store(bb, NULL);
 628     return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
 629 }
 630
 631 #define DO_STRBUF(STRBUF, CHAR, STRLEN)                         \
 632                                                                 \
 633 struct STRBUF *                                                 \
 634 STRBUF##_alloc(void)                                            \
 635 {                                                               \
 636     return xcalloc(1, sizeof(struct STRBUF));                   \
 637 }                                                               \
 638                                                                 \
 639 static void                                                     \
 640 STRBUF##_store1(struct STRBUF *buf, CHAR c)                     \
 641 {                                                               \
 642     if (buf->size == buf->len) {                                \
 643         if (buf->size == 0)                                     \
 644             buf->size = 64; /* Arbitrary */                     \
 645         else                                                    \
 646             buf->size *= 2;                                     \
 647         buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
 648     }                                                           \
 649     assert(buf->s);                                             \
 650     buf->s[buf->len] = c;                                       \
 651 }                                                               \
 652                                                                 \
 653 /* Like strbuf_append1(buf, '\0'), but don't advance len */     \
 654 void                                                            \
 655 STRBUF##_terminate(struct STRBUF *buf)                          \
 656 {                                                               \
 657     STRBUF##_store1(buf, '\0');                                 \
 658 }                                                               \
 659                                                                 \
 660 void                                                            \
 661 STRBUF##_append1(struct STRBUF *buf, CHAR c)                    \
 662 {                                                               \
 663     STRBUF##_store1(buf, c);                                    \
 664     buf->len++;                                                 \
 665 }                                                               \
 666                                                                 \
 667 void                                                            \
 668 STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \
 669 {                                                               \
 670     if (buf->size < buf->len + len) {                           \
 671         if (buf->size == 0)                                     \
 672             buf->size = 64; /* Arbitrary */                     \
 673         while (buf->size < buf->len + len)                      \
 674             buf->size *= 2;                                     \
 675         buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
 676     }                                                           \
 677     memcpy(buf->s + buf->len, s, len * sizeof(*buf->s));        \
 678     buf->len += len;                                            \
 679 }                                                               \
 680                                                                 \
 681 void                                                            \
 682 STRBUF##_append(struct STRBUF *buf, const CHAR *s)              \
 683 {                                                               \
 684     STRBUF##_appendn(buf, s, STRLEN(s));                        \
 685 }                                                               \
 686                                                                 \
 687 CHAR *                                                          \
 688 STRBUF##_finish(struct STRBUF *buf)                             \
 689 {                                                               \
 690     STRBUF##_append1(buf, 0);                                   \
 691     return xrealloc(buf->s, buf->len * sizeof(*buf->s));        \
 692 }                                                               \
 693                                                                 \
 694 void                                                            \
 695 STRBUF##_cleanup(void *xbuf)                                    \
 696 {                                                               \
 697     struct STRBUF *buf;                                         \
 698                                                                 \
 699     buf = xbuf;                                                 \
 700     xfree(buf->s);                                              \
 701 }                                                               \
 702                                                                 \
 703 void                                                            \
 704 STRBUF##_free(void *xbuf)                                       \
 705 {                                                               \
 706     STRBUF##_cleanup(xbuf);                                     \
 707     xfree(xbuf);                                                \
 708 }                                                               \
 709                                                                 \
 710 const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
 711
 712 DO_STRBUF(strbuf, char, strlen);
 713 DO_STRBUF(Strbuf, Char, Strlen);