]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.bin/sort/bwstring.c
Merge bmake-20230414
[FreeBSD/FreeBSD.git] / usr.bin / sort / bwstring.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <ctype.h>
34 #include <errno.h>
35 #include <err.h>
36 #include <langinfo.h>
37 #include <math.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42
43 #include "bwstring.h"
44 #include "sort.h"
45
46 bool byte_sort;
47
48 static wchar_t **wmonths;
49 static char **cmonths;
50
51 /* initialise months */
52
53 void
54 initialise_months(void)
55 {
56         const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
57             ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
58             ABMON_11, ABMON_12 };
59         char *tmp;
60         size_t len;
61
62         if (mb_cur_max == 1) {
63                 if (cmonths == NULL) {
64                         char *m;
65
66                         cmonths = sort_malloc(sizeof(char*) * 12);
67                         for (int i = 0; i < 12; i++) {
68                                 cmonths[i] = NULL;
69                                 tmp = nl_langinfo(item[i]);
70                                 if (debug_sort)
71                                         printf("month[%d]=%s\n", i, tmp);
72                                 if (*tmp == '\0')
73                                         continue;
74                                 m = sort_strdup(tmp);
75                                 len = strlen(tmp);
76                                 for (unsigned int j = 0; j < len; j++)
77                                         m[j] = toupper(m[j]);
78                                 cmonths[i] = m;
79                         }
80                 }
81
82         } else {
83                 if (wmonths == NULL) {
84                         wchar_t *m;
85
86                         wmonths = sort_malloc(sizeof(wchar_t *) * 12);
87                         for (int i = 0; i < 12; i++) {
88                                 wmonths[i] = NULL;
89                                 tmp = nl_langinfo(item[i]);
90                                 if (debug_sort)
91                                         printf("month[%d]=%s\n", i, tmp);
92                                 if (*tmp == '\0')
93                                         continue;
94                                 len = strlen(tmp);
95                                 m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
96                                 if (mbstowcs(m, tmp, len) ==
97                                     ((size_t) - 1)) {
98                                         sort_free(m);
99                                         continue;
100                                 }
101                                 m[len] = L'\0';
102                                 for (unsigned int j = 0; j < len; j++)
103                                         m[j] = towupper(m[j]);
104                                 wmonths[i] = m;
105                         }
106                 }
107         }
108 }
109
110 /*
111  * Compare two wide-character strings
112  */
113 static int
114 wide_str_coll(const wchar_t *s1, const wchar_t *s2)
115 {
116         int ret;
117
118         errno = 0;
119         ret = wcscoll(s1, s2);
120         if (errno == EILSEQ) {
121                 errno = 0;
122                 ret = wcscmp(s1, s2);
123                 if (errno != 0) {
124                         for (size_t i = 0; ; ++i) {
125                                 wchar_t c1 = s1[i];
126                                 wchar_t c2 = s2[i];
127                                 if (c1 == L'\0')
128                                         return ((c2 == L'\0') ? 0 : -1);
129                                 if (c2 == L'\0')
130                                         return (+1);
131                                 if (c1 == c2)
132                                         continue;
133                                 return ((int)(c1 - c2));
134                         }
135                 }
136         }
137         return (ret);
138 }
139
140 /* counterparts of wcs functions */
141
142 void
143 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
144 {
145
146         if (mb_cur_max == 1)
147                 fprintf(f, "%s%s%s", prefix, bws->cdata.str, suffix);
148         else
149                 fprintf(f, "%s%S%s", prefix, bws->wdata.str, suffix);
150 }
151
152 const void* bwsrawdata(const struct bwstring *bws)
153 {
154
155         return (bws->wdata.str);
156 }
157
158 size_t bwsrawlen(const struct bwstring *bws)
159 {
160
161         return ((mb_cur_max == 1) ? bws->cdata.len :
162             SIZEOF_WCHAR_STRING(bws->wdata.len));
163 }
164
165 size_t
166 bws_memsize(const struct bwstring *bws)
167 {
168
169         return ((mb_cur_max == 1) ?
170             (bws->cdata.len + 2 + sizeof(struct bwstring)) :
171             (SIZEOF_WCHAR_STRING(bws->wdata.len + 1) + sizeof(struct bwstring)));
172 }
173
174 void
175 bws_setlen(struct bwstring *bws, size_t newlen)
176 {
177
178         if (mb_cur_max == 1 && bws && newlen != bws->cdata.len &&
179             newlen <= bws->cdata.len) {
180                 bws->cdata.len = newlen;
181                 bws->cdata.str[newlen] = '\0';
182         } else if (bws && newlen != bws->wdata.len && newlen <= bws->wdata.len) {
183                 bws->wdata.len = newlen;
184                 bws->wdata.str[newlen] = L'\0';
185         }
186 }
187
188 /*
189  * Allocate a new binary string of specified size
190  */
191 struct bwstring *
192 bwsalloc(size_t sz)
193 {
194         struct bwstring *ret;
195
196         if (mb_cur_max == 1) {
197                 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
198                 ret->cdata.len = sz;
199                 ret->cdata.str[sz] = '\0';
200         } else {
201                 ret = sort_malloc(
202                     sizeof(struct bwstring) + SIZEOF_WCHAR_STRING(sz + 1));
203                 ret->wdata.len = sz;
204                 ret->wdata.str[sz] = L'\0';
205         }
206
207         return (ret);
208 }
209
210 /*
211  * Create a copy of binary string.
212  * New string size equals the length of the old string.
213  */
214 struct bwstring *
215 bwsdup(const struct bwstring *s)
216 {
217
218         if (s == NULL)
219                 return (NULL);
220         else {
221                 struct bwstring *ret = bwsalloc(BWSLEN(s));
222
223                 if (mb_cur_max == 1)
224                         memcpy(ret->cdata.str, s->cdata.str, (s->cdata.len));
225                 else
226                         memcpy(ret->wdata.str, s->wdata.str,
227                             SIZEOF_WCHAR_STRING(s->wdata.len));
228
229                 return (ret);
230         }
231 }
232
233 /*
234  * Create a new binary string from a wide character buffer.
235  */
236 struct bwstring *
237 bwssbdup(const wchar_t *str, size_t len)
238 {
239
240         if (str == NULL)
241                 return ((len == 0) ? bwsalloc(0) : NULL);
242         else {
243                 struct bwstring *ret;
244
245                 ret = bwsalloc(len);
246
247                 if (mb_cur_max == 1)
248                         for (size_t i = 0; i < len; ++i)
249                                 ret->cdata.str[i] = (char)str[i];
250                 else
251                         memcpy(ret->wdata.str, str, SIZEOF_WCHAR_STRING(len));
252
253                 return (ret);
254         }
255 }
256
257 /*
258  * Create a new binary string from a raw binary buffer.
259  */
260 struct bwstring *
261 bwscsbdup(const unsigned char *str, size_t len)
262 {
263         struct bwstring *ret;
264
265         ret = bwsalloc(len);
266
267         if (str) {
268                 if (mb_cur_max == 1)
269                         memcpy(ret->cdata.str, str, len);
270                 else {
271                         mbstate_t mbs;
272                         const char *s;
273                         size_t charlen, chars, cptr;
274
275                         chars = 0;
276                         cptr = 0;
277                         s = (const char *) str;
278
279                         memset(&mbs, 0, sizeof(mbs));
280
281                         while (cptr < len) {
282                                 size_t n = mb_cur_max;
283
284                                 if (n > len - cptr)
285                                         n = len - cptr;
286                                 charlen = mbrlen(s + cptr, n, &mbs);
287                                 switch (charlen) {
288                                 case 0:
289                                         /* FALLTHROUGH */
290                                 case (size_t) -1:
291                                         /* FALLTHROUGH */
292                                 case (size_t) -2:
293                                         ret->wdata.str[chars++] =
294                                             (unsigned char) s[cptr];
295                                         ++cptr;
296                                         break;
297                                 default:
298                                         n = mbrtowc(ret->wdata.str + (chars++),
299                                             s + cptr, charlen, &mbs);
300                                         if ((n == (size_t)-1) || (n == (size_t)-2))
301                                                 /* NOTREACHED */
302                                                 err(2, "mbrtowc error");
303                                         cptr += charlen;
304                                 }
305                         }
306
307                         ret->wdata.len = chars;
308                         ret->wdata.str[ret->wdata.len] = L'\0';
309                 }
310         }
311         return (ret);
312 }
313
314 /*
315  * De-allocate object memory
316  */
317 void
318 bwsfree(const struct bwstring *s)
319 {
320
321         if (s)
322                 sort_free(s);
323 }
324
325 /*
326  * Copy content of src binary string to dst,
327  * with specified number of symbols to be copied.
328  * An offset value can be specified, from the start of src string.
329  * If the capacity of the dst string is not sufficient,
330  * then the data is truncated.
331  */
332 struct bwstring *
333 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
334     size_t size)
335 {
336
337         if (offset >= BWSLEN(src)) {
338                 bws_setlen(dst, 0);
339         } else {
340                 size_t nums = BWSLEN(src) - offset;
341
342                 if (nums > BWSLEN(dst))
343                         nums = BWSLEN(dst);
344                 if (nums > size)
345                         nums = size;
346                 if (mb_cur_max == 1) {
347                         memcpy(dst->cdata.str, src->cdata.str + offset, nums);
348                         dst->cdata.len = nums;
349                         dst->cdata.str[nums] = '\0';
350                 } else {
351                         memcpy(dst->wdata.str, src->wdata.str + offset,
352                             SIZEOF_WCHAR_STRING(nums));
353                         dst->wdata.len = nums;
354                         dst->wdata.str[nums] = L'\0';
355                 }
356         }
357         return (dst);
358 }
359
360 /*
361  * Write binary string to the file.
362  * The output is ended either with '\n' (nl == true)
363  * or '\0' (nl == false).
364  */
365 size_t
366 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
367 {
368
369         if (mb_cur_max == 1) {
370                 size_t len = bws->cdata.len;
371
372                 if (!zero_ended) {
373                         bws->cdata.str[len] = '\n';
374
375                         if (fwrite(bws->cdata.str, len + 1, 1, f) < 1)
376                                 err(2, NULL);
377
378                         bws->cdata.str[len] = '\0';
379                 } else if (fwrite(bws->cdata.str, len + 1, 1, f) < 1)
380                         err(2, NULL);
381
382                 return (len + 1);
383
384         } else {
385                 wchar_t eols;
386                 size_t printed = 0;
387
388                 eols = zero_ended ? btowc('\0') : btowc('\n');
389
390                 while (printed < BWSLEN(bws)) {
391                         const wchar_t *s = bws->wdata.str + printed;
392
393                         if (*s == L'\0') {
394                                 int nums;
395
396                                 nums = fwprintf(f, L"%lc", *s);
397
398                                 if (nums != 1)
399                                         err(2, NULL);
400                                 ++printed;
401                         } else {
402                                 int nums;
403
404                                 nums = fwprintf(f, L"%ls", s);
405
406                                 if (nums < 1)
407                                         err(2, NULL);
408                                 printed += nums;
409                         }
410                 }
411                 fwprintf(f, L"%lc", eols);
412                 return (printed + 1);
413         }
414 }
415
416 int
417 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
418     size_t offset, size_t len)
419 {
420         size_t cmp_len, len1, len2;
421         int res;
422
423         len1 = BWSLEN(bws1);
424         len2 = BWSLEN(bws2);
425
426         if (len1 <= offset) {
427                 return ((len2 <= offset) ? 0 : -1);
428         } else {
429                 if (len2 <= offset)
430                         return (+1);
431                 else {
432                         len1 -= offset;
433                         len2 -= offset;
434
435                         cmp_len = len1;
436
437                         if (len2 < cmp_len)
438                                 cmp_len = len2;
439
440                         if (len < cmp_len)
441                                 cmp_len = len;
442
443                         if (mb_cur_max == 1) {
444                                 const char *s1, *s2;
445
446                                 s1 = bws1->cdata.str + offset;
447                                 s2 = bws2->cdata.str + offset;
448
449                                 res = memcmp(s1, s2, cmp_len);
450
451                         } else {
452                                 const wchar_t *s1, *s2;
453
454                                 s1 = bws1->wdata.str + offset;
455                                 s2 = bws2->wdata.str + offset;
456
457                                 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
458                         }
459                 }
460         }
461
462         if (res == 0) {
463                 if (len1 < cmp_len && len1 < len2)
464                         res = -1;
465                 else if (len2 < cmp_len && len2 < len1)
466                         res = +1;
467         }
468
469         return (res);
470 }
471
472 int
473 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
474 {
475         size_t len1, len2, cmp_len;
476         int res;
477
478         len1 = BWSLEN(bws1);
479         len2 = BWSLEN(bws2);
480
481         len1 -= offset;
482         len2 -= offset;
483
484         cmp_len = len1;
485
486         if (len2 < cmp_len)
487                 cmp_len = len2;
488
489         res = bwsncmp(bws1, bws2, offset, cmp_len);
490
491         if (res == 0) {
492                 if( len1 < len2)
493                         res = -1;
494                 else if (len2 < len1)
495                         res = +1;
496         }
497
498         return (res);
499 }
500
501 int
502 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
503 {
504         wchar_t c1, c2;
505         size_t i;
506
507         for (i = 0; i < len; ++i) {
508                 c1 = bws_get_iter_value(iter1);
509                 c2 = bws_get_iter_value(iter2);
510                 if (c1 != c2)
511                         return (c1 - c2);
512                 iter1 = bws_iterator_inc(iter1, 1);
513                 iter2 = bws_iterator_inc(iter2, 1);
514         }
515
516         return (0);
517 }
518
519 int
520 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
521 {
522         size_t len1, len2;
523
524         len1 = BWSLEN(bws1);
525         len2 = BWSLEN(bws2);
526
527         if (len1 <= offset)
528                 return ((len2 <= offset) ? 0 : -1);
529         else {
530                 if (len2 <= offset)
531                         return (+1);
532                 else {
533                         len1 -= offset;
534                         len2 -= offset;
535
536                         if (mb_cur_max == 1) {
537                                 const char *s1, *s2;
538
539                                 s1 = bws1->cdata.str + offset;
540                                 s2 = bws2->cdata.str + offset;
541
542                                 if (byte_sort) {
543                                         int res;
544
545                                         if (len1 > len2) {
546                                                 res = memcmp(s1, s2, len2);
547                                                 if (!res)
548                                                         res = +1;
549                                         } else if (len1 < len2) {
550                                                 res = memcmp(s1, s2, len1);
551                                                 if (!res)
552                                                         res = -1;
553                                         } else
554                                                 res = memcmp(s1, s2, len1);
555
556                                         return (res);
557
558                                 } else {
559                                         int res;
560                                         size_t i, maxlen;
561
562                                         i = 0;
563                                         maxlen = len1;
564
565                                         if (maxlen > len2)
566                                                 maxlen = len2;
567
568                                         while (i < maxlen) {
569                                                 /* goto next non-zero part: */
570                                                 while ((i < maxlen) &&
571                                                     !s1[i] && !s2[i])
572                                                         ++i;
573
574                                                 if (i >= maxlen)
575                                                         break;
576
577                                                 if (s1[i] == 0) {
578                                                         if (s2[i] == 0)
579                                                                 /* NOTREACHED */
580                                                                 err(2, "bwscoll error 01");
581                                                         else
582                                                                 return (-1);
583                                                 } else if (s2[i] == 0)
584                                                         return (+1);
585
586                                                 res = strcoll((const char*)(s1 + i), (const char*)(s2 + i));
587                                                 if (res)
588                                                         return (res);
589
590                                                 while ((i < maxlen) &&
591                                                     s1[i] && s2[i])
592                                                         ++i;
593
594                                                 if (i >= maxlen)
595                                                         break;
596
597                                                 if (s1[i] == 0) {
598                                                         if (s2[i] == 0) {
599                                                                 ++i;
600                                                                 continue;
601                                                         } else
602                                                                 return (-1);
603                                                 } else if (s2[i] == 0)
604                                                         return (+1);
605                                                 else
606                                                         /* NOTREACHED */
607                                                         err(2, "bwscoll error 02");
608                                         }
609
610                                         if (len1 < len2)
611                                                 return (-1);
612                                         else if (len1 > len2)
613                                                 return (+1);
614
615                                         return (0);
616                                 }
617                         } else {
618                                 const wchar_t *s1, *s2;
619                                 size_t i, maxlen;
620                                 int res;
621
622                                 s1 = bws1->wdata.str + offset;
623                                 s2 = bws2->wdata.str + offset;
624
625                                 i = 0;
626                                 maxlen = len1;
627
628                                 if (maxlen > len2)
629                                         maxlen = len2;
630
631                                 while (i < maxlen) {
632
633                                         /* goto next non-zero part: */
634                                         while ((i < maxlen) &&
635                                             !s1[i] && !s2[i])
636                                                 ++i;
637
638                                         if (i >= maxlen)
639                                                 break;
640
641                                         if (s1[i] == 0) {
642                                                 if (s2[i] == 0)
643                                                         /* NOTREACHED */
644                                                         err(2, "bwscoll error 1");
645                                                 else
646                                                         return (-1);
647                                         } else if (s2[i] == 0)
648                                                 return (+1);
649
650                                         res = wide_str_coll(s1 + i, s2 + i);
651                                         if (res)
652                                                 return (res);
653
654                                         while ((i < maxlen) && s1[i] && s2[i])
655                                                 ++i;
656
657                                         if (i >= maxlen)
658                                                 break;
659
660                                         if (s1[i] == 0) {
661                                                 if (s2[i] == 0) {
662                                                         ++i;
663                                                         continue;
664                                                 } else
665                                                         return (-1);
666                                         } else if (s2[i] == 0)
667                                                 return (+1);
668                                         else
669                                                 /* NOTREACHED */
670                                                 err(2, "bwscoll error 2");
671                                 }
672
673                                 if (len1 < len2)
674                                         return (-1);
675                                 else if (len1 > len2)
676                                         return (+1);
677
678                                 return (0);
679                         }
680                 }
681         }
682 }
683
684 /*
685  * Correction of the system API
686  */
687 double
688 bwstod(struct bwstring *s0, bool *empty)
689 {
690         double ret;
691
692         if (mb_cur_max == 1) {
693                 char *end, *s;
694                 char *ep;
695
696                 s = s0->cdata.str;
697                 end = s + s0->cdata.len;
698                 ep = NULL;
699
700                 while (isblank(*s) && s < end)
701                         ++s;
702
703                 if (!isprint(*s)) {
704                         *empty = true;
705                         return (0);
706                 }
707
708                 ret = strtod((char*)s, &ep);
709                 if (ep == s) {
710                         *empty = true;
711                         return (0);
712                 }
713         } else {
714                 wchar_t *end, *ep, *s;
715
716                 s = s0->wdata.str;
717                 end = s + s0->wdata.len;
718                 ep = NULL;
719
720                 while (iswblank(*s) && s < end)
721                         ++s;
722
723                 if (!iswprint(*s)) {
724                         *empty = true;
725                         return (0);
726                 }
727
728                 ret = wcstod(s, &ep);
729                 if (ep == s) {
730                         *empty = true;
731                         return (0);
732                 }
733         }
734
735         *empty = false;
736         return (ret);
737 }
738
739 /*
740  * A helper function for monthcoll.  If a line matches
741  * a month name, it returns (number of the month - 1),
742  * while if there is no match, it just return -1.
743  */
744
745 int
746 bws_month_score(const struct bwstring *s0)
747 {
748
749         if (mb_cur_max == 1) {
750                 const char *end, *s;
751
752                 s = s0->cdata.str;
753                 end = s + s0->cdata.len;
754
755                 while (isblank(*s) && s < end)
756                         ++s;
757
758                 for (int i = 11; i >= 0; --i) {
759                         if (cmonths[i] &&
760                             (s == strstr(s, cmonths[i])))
761                                 return (i);
762                 }
763
764         } else {
765                 const wchar_t *end, *s;
766
767                 s = s0->wdata.str;
768                 end = s + s0->wdata.len;
769
770                 while (iswblank(*s) && s < end)
771                         ++s;
772
773                 for (int i = 11; i >= 0; --i) {
774                         if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
775                                 return (i);
776                 }
777         }
778
779         return (-1);
780 }
781
782 /*
783  * Rips out leading blanks (-b).
784  */
785 struct bwstring *
786 ignore_leading_blanks(struct bwstring *str)
787 {
788
789         if (mb_cur_max == 1) {
790                 char *dst, *end, *src;
791
792                 src = str->cdata.str;
793                 dst = src;
794                 end = src + str->cdata.len;
795
796                 while (src < end && isblank(*src))
797                         ++src;
798
799                 if (src != dst) {
800                         size_t newlen;
801
802                         newlen = BWSLEN(str) - (src - dst);
803
804                         while (src < end) {
805                                 *dst = *src;
806                                 ++dst;
807                                 ++src;
808                         }
809                         bws_setlen(str, newlen);
810                 }
811         } else {
812                 wchar_t *dst, *end, *src;
813
814                 src = str->wdata.str;
815                 dst = src;
816                 end = src + str->wdata.len;
817
818                 while (src < end && iswblank(*src))
819                         ++src;
820
821                 if (src != dst) {
822
823                         size_t newlen = BWSLEN(str) - (src - dst);
824
825                         while (src < end) {
826                                 *dst = *src;
827                                 ++dst;
828                                 ++src;
829                         }
830                         bws_setlen(str, newlen);
831
832                 }
833         }
834         return (str);
835 }
836
837 /*
838  * Rips out nonprinting characters (-i).
839  */
840 struct bwstring *
841 ignore_nonprinting(struct bwstring *str)
842 {
843         size_t newlen = BWSLEN(str);
844
845         if (mb_cur_max == 1) {
846                 char *dst, *end, *src;
847                 char c;
848
849                 src = str->cdata.str;
850                 dst = src;
851                 end = src + str->cdata.len;
852
853                 while (src < end) {
854                         c = *src;
855                         if (isprint(c)) {
856                                 *dst = c;
857                                 ++dst;
858                                 ++src;
859                         } else {
860                                 ++src;
861                                 --newlen;
862                         }
863                 }
864         } else {
865                 wchar_t *dst, *end, *src;
866                 wchar_t c;
867
868                 src = str->wdata.str;
869                 dst = src;
870                 end = src + str->wdata.len;
871
872                 while (src < end) {
873                         c = *src;
874                         if (iswprint(c)) {
875                                 *dst = c;
876                                 ++dst;
877                                 ++src;
878                         } else {
879                                 ++src;
880                                 --newlen;
881                         }
882                 }
883         }
884         bws_setlen(str, newlen);
885
886         return (str);
887 }
888
889 /*
890  * Rips out any characters that are not alphanumeric characters
891  * nor blanks (-d).
892  */
893 struct bwstring *
894 dictionary_order(struct bwstring *str)
895 {
896         size_t newlen = BWSLEN(str);
897
898         if (mb_cur_max == 1) {
899                 char *dst, *end, *src;
900                 char c;
901
902                 src = str->cdata.str;
903                 dst = src;
904                 end = src + str->cdata.len;
905
906                 while (src < end) {
907                         c = *src;
908                         if (isalnum(c) || isblank(c)) {
909                                 *dst = c;
910                                 ++dst;
911                                 ++src;
912                         } else {
913                                 ++src;
914                                 --newlen;
915                         }
916                 }
917         } else {
918                 wchar_t *dst, *end, *src;
919                 wchar_t c;
920
921                 src = str->wdata.str;
922                 dst = src;
923                 end = src + str->wdata.len;
924
925                 while (src < end) {
926                         c = *src;
927                         if (iswalnum(c) || iswblank(c)) {
928                                 *dst = c;
929                                 ++dst;
930                                 ++src;
931                         } else {
932                                 ++src;
933                                 --newlen;
934                         }
935                 }
936         }
937         bws_setlen(str, newlen);
938
939         return (str);
940 }
941
942 /*
943  * Converts string to lower case(-f).
944  */
945 struct bwstring *
946 ignore_case(struct bwstring *str)
947 {
948
949         if (mb_cur_max == 1) {
950                 char *end, *s;
951
952                 s = str->cdata.str;
953                 end = s + str->cdata.len;
954
955                 while (s < end) {
956                         *s = toupper(*s);
957                         ++s;
958                 }
959         } else {
960                 wchar_t *end, *s;
961
962                 s = str->wdata.str;
963                 end = s + str->wdata.len;
964
965                 while (s < end) {
966                         *s = towupper(*s);
967                         ++s;
968                 }
969         }
970         return (str);
971 }
972
973 void
974 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
975 {
976
977         if (mb_cur_max == 1)
978                 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->cdata.str);
979         else
980                 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->wdata.str);
981 }