]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.bin/sort/bwstring.c
Import tzdata 2019c.
[FreeBSD/FreeBSD.git] / usr.bin / sort / bwstring.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <ctype.h>
34 #include <errno.h>
35 #include <err.h>
36 #include <langinfo.h>
37 #include <math.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42
43 #include "bwstring.h"
44 #include "sort.h"
45
46 bool byte_sort;
47
48 static wchar_t **wmonths;
49 static unsigned char **cmonths;
50
51 /* initialise months */
52
53 void
54 initialise_months(void)
55 {
56         const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
57             ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
58             ABMON_11, ABMON_12 };
59         unsigned char *tmp;
60         size_t len;
61
62         if (MB_CUR_MAX == 1) {
63                 if (cmonths == NULL) {
64                         unsigned char *m;
65
66                         cmonths = sort_malloc(sizeof(unsigned char*) * 12);
67                         for (int i = 0; i < 12; i++) {
68                                 cmonths[i] = NULL;
69                                 tmp = (unsigned char *) nl_langinfo(item[i]);
70                                 if (debug_sort)
71                                         printf("month[%d]=%s\n", i, tmp);
72                                 if (*tmp == '\0')
73                                         continue;
74                                 m = sort_strdup(tmp);
75                                 len = strlen(tmp);
76                                 for (unsigned int j = 0; j < len; j++)
77                                         m[j] = toupper(m[j]);
78                                 cmonths[i] = m;
79                         }
80                 }
81
82         } else {
83                 if (wmonths == NULL) {
84                         wchar_t *m;
85
86                         wmonths = sort_malloc(sizeof(wchar_t *) * 12);
87                         for (int i = 0; i < 12; i++) {
88                                 wmonths[i] = NULL;
89                                 tmp = (unsigned char *) nl_langinfo(item[i]);
90                                 if (debug_sort)
91                                         printf("month[%d]=%s\n", i, tmp);
92                                 if (*tmp == '\0')
93                                         continue;
94                                 len = strlen(tmp);
95                                 m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
96                                 if (mbstowcs(m, (char*)tmp, len) ==
97                                     ((size_t) - 1)) {
98                                         sort_free(m);
99                                         continue;
100                                 }
101                                 m[len] = L'\0';
102                                 for (unsigned int j = 0; j < len; j++)
103                                         m[j] = towupper(m[j]);
104                                 wmonths[i] = m;
105                         }
106                 }
107         }
108 }
109
110 /*
111  * Compare two wide-character strings
112  */
113 static int
114 wide_str_coll(const wchar_t *s1, const wchar_t *s2)
115 {
116         int ret = 0;
117
118         errno = 0;
119         ret = wcscoll(s1, s2);
120         if (errno == EILSEQ) {
121                 errno = 0;
122                 ret = wcscmp(s1, s2);
123                 if (errno != 0) {
124                         for (size_t i = 0; ; ++i) {
125                                 wchar_t c1 = s1[i];
126                                 wchar_t c2 = s2[i];
127                                 if (c1 == L'\0')
128                                         return ((c2 == L'\0') ? 0 : -1);
129                                 if (c2 == L'\0')
130                                         return (+1);
131                                 if (c1 == c2)
132                                         continue;
133                                 return ((int)(c1 - c2));
134                         }
135                 }
136         }
137         return (ret);
138 }
139
140 /* counterparts of wcs functions */
141
142 void
143 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
144 {
145
146         if (MB_CUR_MAX == 1)
147                 fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix);
148         else
149                 fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix);
150 }
151
152 const void* bwsrawdata(const struct bwstring *bws)
153 {
154
155         return (&(bws->data));
156 }
157
158 size_t bwsrawlen(const struct bwstring *bws)
159 {
160
161         return ((MB_CUR_MAX == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len));
162 }
163
164 size_t
165 bws_memsize(const struct bwstring *bws)
166 {
167
168         return ((MB_CUR_MAX == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
169             (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring)));
170 }
171
172 void
173 bws_setlen(struct bwstring *bws, size_t newlen)
174 {
175
176         if (bws && newlen != bws->len && newlen <= bws->len) {
177                 bws->len = newlen;
178                 if (MB_CUR_MAX == 1)
179                         bws->data.cstr[newlen] = '\0';
180                 else
181                         bws->data.wstr[newlen] = L'\0';
182         }
183 }
184
185 /*
186  * Allocate a new binary string of specified size
187  */
188 struct bwstring *
189 bwsalloc(size_t sz)
190 {
191         struct bwstring *ret;
192
193         if (MB_CUR_MAX == 1)
194                 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
195         else
196                 ret = sort_malloc(sizeof(struct bwstring) +
197                     SIZEOF_WCHAR_STRING(sz + 1));
198         ret->len = sz;
199
200         if (MB_CUR_MAX == 1)
201                 ret->data.cstr[ret->len] = '\0';
202         else
203                 ret->data.wstr[ret->len] = L'\0';
204
205         return (ret);
206 }
207
208 /*
209  * Create a copy of binary string.
210  * New string size equals the length of the old string.
211  */
212 struct bwstring *
213 bwsdup(const struct bwstring *s)
214 {
215
216         if (s == NULL)
217                 return (NULL);
218         else {
219                 struct bwstring *ret = bwsalloc(s->len);
220
221                 if (MB_CUR_MAX == 1)
222                         memcpy(ret->data.cstr, s->data.cstr, (s->len));
223                 else
224                         memcpy(ret->data.wstr, s->data.wstr,
225                             SIZEOF_WCHAR_STRING(s->len));
226
227                 return (ret);
228         }
229 }
230
231 /*
232  * Create a new binary string from a wide character buffer.
233  */
234 struct bwstring *
235 bwssbdup(const wchar_t *str, size_t len)
236 {
237
238         if (str == NULL)
239                 return ((len == 0) ? bwsalloc(0) : NULL);
240         else {
241                 struct bwstring *ret;
242
243                 ret = bwsalloc(len);
244
245                 if (MB_CUR_MAX == 1)
246                         for (size_t i = 0; i < len; ++i)
247                                 ret->data.cstr[i] = (unsigned char) str[i];
248                 else
249                         memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len));
250
251                 return (ret);
252         }
253 }
254
255 /*
256  * Create a new binary string from a raw binary buffer.
257  */
258 struct bwstring *
259 bwscsbdup(const unsigned char *str, size_t len)
260 {
261         struct bwstring *ret;
262
263         ret = bwsalloc(len);
264
265         if (str) {
266                 if (MB_CUR_MAX == 1)
267                         memcpy(ret->data.cstr, str, len);
268                 else {
269                         mbstate_t mbs;
270                         const char *s;
271                         size_t charlen, chars, cptr;
272
273                         charlen = chars = 0;
274                         cptr = 0;
275                         s = (const char *) str;
276
277                         memset(&mbs, 0, sizeof(mbs));
278
279                         while (cptr < len) {
280                                 size_t n = MB_CUR_MAX;
281
282                                 if (n > len - cptr)
283                                         n = len - cptr;
284                                 charlen = mbrlen(s + cptr, n, &mbs);
285                                 switch (charlen) {
286                                 case 0:
287                                         /* FALLTHROUGH */
288                                 case (size_t) -1:
289                                         /* FALLTHROUGH */
290                                 case (size_t) -2:
291                                         ret->data.wstr[chars++] =
292                                             (unsigned char) s[cptr];
293                                         ++cptr;
294                                         break;
295                                 default:
296                                         n = mbrtowc(ret->data.wstr + (chars++),
297                                             s + cptr, charlen, &mbs);
298                                         if ((n == (size_t)-1) || (n == (size_t)-2))
299                                                 /* NOTREACHED */
300                                                 err(2, "mbrtowc error");
301                                         cptr += charlen;
302                                 }
303                         }
304
305                         ret->len = chars;
306                         ret->data.wstr[ret->len] = L'\0';
307                 }
308         }
309         return (ret);
310 }
311
312 /*
313  * De-allocate object memory
314  */
315 void
316 bwsfree(const struct bwstring *s)
317 {
318
319         if (s)
320                 sort_free(s);
321 }
322
323 /*
324  * Copy content of src binary string to dst.
325  * If the capacity of the dst string is not sufficient,
326  * then the data is truncated.
327  */
328 size_t
329 bwscpy(struct bwstring *dst, const struct bwstring *src)
330 {
331         size_t nums = src->len;
332
333         if (nums > dst->len)
334                 nums = dst->len;
335         dst->len = nums;
336
337         if (MB_CUR_MAX == 1) {
338                 memcpy(dst->data.cstr, src->data.cstr, nums);
339                 dst->data.cstr[dst->len] = '\0';
340         } else {
341                 memcpy(dst->data.wstr, src->data.wstr,
342                     SIZEOF_WCHAR_STRING(nums + 1));
343                 dst->data.wstr[dst->len] = L'\0';
344         }
345
346         return (nums);
347 }
348
349 /*
350  * Copy content of src binary string to dst,
351  * with specified number of symbols to be copied.
352  * If the capacity of the dst string is not sufficient,
353  * then the data is truncated.
354  */
355 struct bwstring *
356 bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
357 {
358         size_t nums = src->len;
359
360         if (nums > dst->len)
361                 nums = dst->len;
362         if (nums > size)
363                 nums = size;
364         dst->len = nums;
365
366         if (MB_CUR_MAX == 1) {
367                 memcpy(dst->data.cstr, src->data.cstr, nums);
368                 dst->data.cstr[dst->len] = '\0';
369         } else {
370                 memcpy(dst->data.wstr, src->data.wstr,
371                     SIZEOF_WCHAR_STRING(nums + 1));
372                 dst->data.wstr[dst->len] = L'\0';
373         }
374
375         return (dst);
376 }
377
378 /*
379  * Copy content of src binary string to dst,
380  * with specified number of symbols to be copied.
381  * An offset value can be specified, from the start of src string.
382  * If the capacity of the dst string is not sufficient,
383  * then the data is truncated.
384  */
385 struct bwstring *
386 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
387     size_t size)
388 {
389
390         if (offset >= src->len) {
391                 dst->data.wstr[0] = 0;
392                 dst->len = 0;
393         } else {
394                 size_t nums = src->len - offset;
395
396                 if (nums > dst->len)
397                         nums = dst->len;
398                 if (nums > size)
399                         nums = size;
400                 dst->len = nums;
401                 if (MB_CUR_MAX == 1) {
402                         memcpy(dst->data.cstr, src->data.cstr + offset,
403                             (nums));
404                         dst->data.cstr[dst->len] = '\0';
405                 } else {
406                         memcpy(dst->data.wstr, src->data.wstr + offset,
407                             SIZEOF_WCHAR_STRING(nums));
408                         dst->data.wstr[dst->len] = L'\0';
409                 }
410         }
411         return (dst);
412 }
413
414 /*
415  * Write binary string to the file.
416  * The output is ended either with '\n' (nl == true)
417  * or '\0' (nl == false).
418  */
419 size_t
420 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
421 {
422
423         if (MB_CUR_MAX == 1) {
424                 size_t len = bws->len;
425
426                 if (!zero_ended) {
427                         bws->data.cstr[len] = '\n';
428
429                         if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
430                                 err(2, NULL);
431
432                         bws->data.cstr[len] = '\0';
433                 } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
434                         err(2, NULL);
435
436                 return (len + 1);
437
438         } else {
439                 wchar_t eols;
440                 size_t printed = 0;
441
442                 eols = zero_ended ? btowc('\0') : btowc('\n');
443
444                 while (printed < BWSLEN(bws)) {
445                         const wchar_t *s = bws->data.wstr + printed;
446
447                         if (*s == L'\0') {
448                                 int nums;
449
450                                 nums = fwprintf(f, L"%lc", *s);
451
452                                 if (nums != 1)
453                                         err(2, NULL);
454                                 ++printed;
455                         } else {
456                                 int nums;
457
458                                 nums = fwprintf(f, L"%ls", s);
459
460                                 if (nums < 1)
461                                         err(2, NULL);
462                                 printed += nums;
463                         }
464                 }
465                 fwprintf(f, L"%lc", eols);
466                 return (printed + 1);
467         }
468 }
469
470 /*
471  * Allocate and read a binary string from file.
472  * The strings are nl-ended or zero-ended, depending on the sort setting.
473  */
474 struct bwstring *
475 bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
476 {
477         wint_t eols;
478
479         eols = zero_ended ? btowc('\0') : btowc('\n');
480
481         if (!zero_ended && (MB_CUR_MAX > 1)) {
482                 wchar_t *ret;
483
484                 ret = fgetwln(f, len);
485
486                 if (ret == NULL) {
487                         if (!feof(f))
488                                 err(2, NULL);
489                         return (NULL);
490                 }
491                 if (*len > 0) {
492                         if (ret[*len - 1] == (wchar_t)eols)
493                                 --(*len);
494                 }
495                 return (bwssbdup(ret, *len));
496
497         } else if (!zero_ended && (MB_CUR_MAX == 1)) {
498                 char *ret;
499
500                 ret = fgetln(f, len);
501
502                 if (ret == NULL) {
503                         if (!feof(f))
504                                 err(2, NULL);
505                         return (NULL);
506                 }
507                 if (*len > 0) {
508                         if (ret[*len - 1] == '\n')
509                                 --(*len);
510                 }
511                 return (bwscsbdup((unsigned char*)ret, *len));
512
513         } else {
514                 *len = 0;
515
516                 if (feof(f))
517                         return (NULL);
518
519                 if (2 >= rb->fgetwln_z_buffer_size) {
520                         rb->fgetwln_z_buffer_size += 256;
521                         rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
522                             sizeof(wchar_t) * rb->fgetwln_z_buffer_size);
523                 }
524                 rb->fgetwln_z_buffer[*len] = 0;
525
526                 if (MB_CUR_MAX == 1)
527                         while (!feof(f)) {
528                                 int c;
529
530                                 c = fgetc(f);
531
532                                 if (c == EOF) {
533                                         if (*len == 0)
534                                                 return (NULL);
535                                         goto line_read_done;
536                                 }
537                                 if (c == eols)
538                                         goto line_read_done;
539
540                                 if (*len + 1 >= rb->fgetwln_z_buffer_size) {
541                                         rb->fgetwln_z_buffer_size += 256;
542                                         rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
543                                             SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
544                                 }
545
546                                 rb->fgetwln_z_buffer[*len] = c;
547                                 rb->fgetwln_z_buffer[++(*len)] = 0;
548                         }
549                 else
550                         while (!feof(f)) {
551                                 wint_t c = 0;
552
553                                 c = fgetwc(f);
554
555                                 if (c == WEOF) {
556                                         if (*len == 0)
557                                                 return (NULL);
558                                         goto line_read_done;
559                                 }
560                                 if (c == eols)
561                                         goto line_read_done;
562
563                                 if (*len + 1 >= rb->fgetwln_z_buffer_size) {
564                                         rb->fgetwln_z_buffer_size += 256;
565                                         rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
566                                             SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
567                                 }
568
569                                 rb->fgetwln_z_buffer[*len] = c;
570                                 rb->fgetwln_z_buffer[++(*len)] = 0;
571                         }
572
573 line_read_done:
574                 /* we do not count the last 0 */
575                 return (bwssbdup(rb->fgetwln_z_buffer, *len));
576         }
577 }
578
579 int
580 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
581     size_t offset, size_t len)
582 {
583         size_t cmp_len, len1, len2;
584         int res = 0;
585
586         cmp_len = 0;
587         len1 = bws1->len;
588         len2 = bws2->len;
589
590         if (len1 <= offset) {
591                 return ((len2 <= offset) ? 0 : -1);
592         } else {
593                 if (len2 <= offset)
594                         return (+1);
595                 else {
596                         len1 -= offset;
597                         len2 -= offset;
598
599                         cmp_len = len1;
600
601                         if (len2 < cmp_len)
602                                 cmp_len = len2;
603
604                         if (len < cmp_len)
605                                 cmp_len = len;
606
607                         if (MB_CUR_MAX == 1) {
608                                 const unsigned char *s1, *s2;
609
610                                 s1 = bws1->data.cstr + offset;
611                                 s2 = bws2->data.cstr + offset;
612
613                                 res = memcmp(s1, s2, cmp_len);
614
615                         } else {
616                                 const wchar_t *s1, *s2;
617
618                                 s1 = bws1->data.wstr + offset;
619                                 s2 = bws2->data.wstr + offset;
620
621                                 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
622                         }
623                 }
624         }
625
626         if (res == 0) {
627                 if (len1 < cmp_len && len1 < len2)
628                         res = -1;
629                 else if (len2 < cmp_len && len2 < len1)
630                         res = +1;
631         }
632
633         return (res);
634 }
635
636 int
637 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
638 {
639         size_t len1, len2, cmp_len;
640         int res;
641
642         len1 = bws1->len;
643         len2 = bws2->len;
644
645         len1 -= offset;
646         len2 -= offset;
647
648         cmp_len = len1;
649
650         if (len2 < cmp_len)
651                 cmp_len = len2;
652
653         res = bwsncmp(bws1, bws2, offset, cmp_len);
654
655         if (res == 0) {
656                 if( len1 < len2)
657                         res = -1;
658                 else if (len2 < len1)
659                         res = +1;
660         }
661
662         return (res);
663 }
664
665 int
666 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
667 {
668         wchar_t c1, c2;
669         size_t i = 0;
670
671         for (i = 0; i < len; ++i) {
672                 c1 = bws_get_iter_value(iter1);
673                 c2 = bws_get_iter_value(iter2);
674                 if (c1 != c2)
675                         return (c1 - c2);
676                 iter1 = bws_iterator_inc(iter1, 1);
677                 iter2 = bws_iterator_inc(iter2, 1);
678         }
679
680         return (0);
681 }
682
683 int
684 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
685 {
686         size_t len1, len2;
687
688         len1 = bws1->len;
689         len2 = bws2->len;
690
691         if (len1 <= offset)
692                 return ((len2 <= offset) ? 0 : -1);
693         else {
694                 if (len2 <= offset)
695                         return (+1);
696                 else {
697                         len1 -= offset;
698                         len2 -= offset;
699
700                         if (MB_CUR_MAX == 1) {
701                                 const unsigned char *s1, *s2;
702
703                                 s1 = bws1->data.cstr + offset;
704                                 s2 = bws2->data.cstr + offset;
705
706                                 if (byte_sort) {
707                                         int res = 0;
708
709                                         if (len1 > len2) {
710                                                 res = memcmp(s1, s2, len2);
711                                                 if (!res)
712                                                         res = +1;
713                                         } else if (len1 < len2) {
714                                                 res = memcmp(s1, s2, len1);
715                                                 if (!res)
716                                                         res = -1;
717                                         } else
718                                                 res = memcmp(s1, s2, len1);
719
720                                         return (res);
721
722                                 } else {
723                                         int res = 0;
724                                         size_t i, maxlen;
725
726                                         i = 0;
727                                         maxlen = len1;
728
729                                         if (maxlen > len2)
730                                                 maxlen = len2;
731
732                                         while (i < maxlen) {
733                                                 /* goto next non-zero part: */
734                                                 while ((i < maxlen) &&
735                                                     !s1[i] && !s2[i])
736                                                         ++i;
737
738                                                 if (i >= maxlen)
739                                                         break;
740
741                                                 if (s1[i] == 0) {
742                                                         if (s2[i] == 0)
743                                                                 /* NOTREACHED */
744                                                                 err(2, "bwscoll error 01");
745                                                         else
746                                                                 return (-1);
747                                                 } else if (s2[i] == 0)
748                                                         return (+1);
749
750                                                 res = strcoll((const char*)(s1 + i), (const char*)(s2 + i));
751                                                 if (res)
752                                                         return (res);
753
754                                                 while ((i < maxlen) &&
755                                                     s1[i] && s2[i])
756                                                         ++i;
757
758                                                 if (i >= maxlen)
759                                                         break;
760
761                                                 if (s1[i] == 0) {
762                                                         if (s2[i] == 0) {
763                                                                 ++i;
764                                                                 continue;
765                                                         } else
766                                                                 return (-1);
767                                                 } else if (s2[i] == 0)
768                                                         return (+1);
769                                                 else
770                                                         /* NOTREACHED */
771                                                         err(2, "bwscoll error 02");
772                                         }
773
774                                         if (len1 < len2)
775                                                 return (-1);
776                                         else if (len1 > len2)
777                                                 return (+1);
778
779                                         return (0);
780                                 }
781                         } else {
782                                 const wchar_t *s1, *s2;
783                                 size_t i, maxlen;
784                                 int res = 0;
785
786                                 s1 = bws1->data.wstr + offset;
787                                 s2 = bws2->data.wstr + offset;
788
789                                 i = 0;
790                                 maxlen = len1;
791
792                                 if (maxlen > len2)
793                                         maxlen = len2;
794
795                                 while (i < maxlen) {
796
797                                         /* goto next non-zero part: */
798                                         while ((i < maxlen) &&
799                                             !s1[i] && !s2[i])
800                                                 ++i;
801
802                                         if (i >= maxlen)
803                                                 break;
804
805                                         if (s1[i] == 0) {
806                                                 if (s2[i] == 0)
807                                                         /* NOTREACHED */
808                                                         err(2, "bwscoll error 1");
809                                                 else
810                                                         return (-1);
811                                         } else if (s2[i] == 0)
812                                                 return (+1);
813
814                                         res = wide_str_coll(s1 + i, s2 + i);
815                                         if (res)
816                                                 return (res);
817
818                                         while ((i < maxlen) && s1[i] && s2[i])
819                                                 ++i;
820
821                                         if (i >= maxlen)
822                                                 break;
823
824                                         if (s1[i] == 0) {
825                                                 if (s2[i] == 0) {
826                                                         ++i;
827                                                         continue;
828                                                 } else
829                                                         return (-1);
830                                         } else if (s2[i] == 0)
831                                                 return (+1);
832                                         else
833                                                 /* NOTREACHED */
834                                                 err(2, "bwscoll error 2");
835                                 }
836
837                                 if (len1 < len2)
838                                         return (-1);
839                                 else if (len1 > len2)
840                                         return (+1);
841
842                                 return (0);
843                         }
844                 }
845         }
846 }
847
848 /*
849  * Correction of the system API
850  */
851 double
852 bwstod(struct bwstring *s0, bool *empty)
853 {
854         double ret = 0;
855
856         if (MB_CUR_MAX == 1) {
857                 unsigned char *end, *s;
858                 char *ep;
859
860                 s = s0->data.cstr;
861                 end = s + s0->len;
862                 ep = NULL;
863
864                 while (isblank(*s) && s < end)
865                         ++s;
866
867                 if (!isprint(*s)) {
868                         *empty = true;
869                         return (0);
870                 }
871
872                 ret = strtod((char*)s, &ep);
873                 if ((unsigned char*) ep == s) {
874                         *empty = true;
875                         return (0);
876                 }
877         } else {
878                 wchar_t *end, *ep, *s;
879
880                 s = s0->data.wstr;
881                 end = s + s0->len;
882                 ep = NULL;
883
884                 while (iswblank(*s) && s < end)
885                         ++s;
886
887                 if (!iswprint(*s)) {
888                         *empty = true;
889                         return (0);
890                 }
891
892                 ret = wcstod(s, &ep);
893                 if (ep == s) {
894                         *empty = true;
895                         return (0);
896                 }
897         }
898
899         *empty = false;
900         return (ret);
901 }
902
903 /*
904  * A helper function for monthcoll.  If a line matches
905  * a month name, it returns (number of the month - 1),
906  * while if there is no match, it just return -1.
907  */
908
909 int
910 bws_month_score(const struct bwstring *s0)
911 {
912
913         if (MB_CUR_MAX == 1) {
914                 const unsigned char *end, *s;
915
916                 s = s0->data.cstr;
917                 end = s + s0->len;
918
919                 while (isblank(*s) && s < end)
920                         ++s;
921
922                 for (int i = 11; i >= 0; --i) {
923                         if (cmonths[i] &&
924                             (s == (unsigned char*)strstr((const char*)s, (char*)(cmonths[i]))))
925                                 return (i);
926                 }
927
928         } else {
929                 const wchar_t *end, *s;
930
931                 s = s0->data.wstr;
932                 end = s + s0->len;
933
934                 while (iswblank(*s) && s < end)
935                         ++s;
936
937                 for (int i = 11; i >= 0; --i) {
938                         if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
939                                 return (i);
940                 }
941         }
942
943         return (-1);
944 }
945
946 /*
947  * Rips out leading blanks (-b).
948  */
949 struct bwstring *
950 ignore_leading_blanks(struct bwstring *str)
951 {
952
953         if (MB_CUR_MAX == 1) {
954                 unsigned char *dst, *end, *src;
955
956                 src = str->data.cstr;
957                 dst = src;
958                 end = src + str->len;
959
960                 while (src < end && isblank(*src))
961                         ++src;
962
963                 if (src != dst) {
964                         size_t newlen;
965
966                         newlen = BWSLEN(str) - (src - dst);
967
968                         while (src < end) {
969                                 *dst = *src;
970                                 ++dst;
971                                 ++src;
972                         }
973                         bws_setlen(str, newlen);
974                 }
975         } else {
976                 wchar_t *dst, *end, *src;
977
978                 src = str->data.wstr;
979                 dst = src;
980                 end = src + str->len;
981
982                 while (src < end && iswblank(*src))
983                         ++src;
984
985                 if (src != dst) {
986
987                         size_t newlen = BWSLEN(str) - (src - dst);
988
989                         while (src < end) {
990                                 *dst = *src;
991                                 ++dst;
992                                 ++src;
993                         }
994                         bws_setlen(str, newlen);
995
996                 }
997         }
998         return (str);
999 }
1000
1001 /*
1002  * Rips out nonprinting characters (-i).
1003  */
1004 struct bwstring *
1005 ignore_nonprinting(struct bwstring *str)
1006 {
1007         size_t newlen = str->len;
1008
1009         if (MB_CUR_MAX == 1) {
1010                 unsigned char *dst, *end, *src;
1011                 unsigned char c;
1012
1013                 src = str->data.cstr;
1014                 dst = src;
1015                 end = src + str->len;
1016
1017                 while (src < end) {
1018                         c = *src;
1019                         if (isprint(c)) {
1020                                 *dst = c;
1021                                 ++dst;
1022                                 ++src;
1023                         } else {
1024                                 ++src;
1025                                 --newlen;
1026                         }
1027                 }
1028         } else {
1029                 wchar_t *dst, *end, *src;
1030                 wchar_t c;
1031
1032                 src = str->data.wstr;
1033                 dst = src;
1034                 end = src + str->len;
1035
1036                 while (src < end) {
1037                         c = *src;
1038                         if (iswprint(c)) {
1039                                 *dst = c;
1040                                 ++dst;
1041                                 ++src;
1042                         } else {
1043                                 ++src;
1044                                 --newlen;
1045                         }
1046                 }
1047         }
1048         bws_setlen(str, newlen);
1049
1050         return (str);
1051 }
1052
1053 /*
1054  * Rips out any characters that are not alphanumeric characters
1055  * nor blanks (-d).
1056  */
1057 struct bwstring *
1058 dictionary_order(struct bwstring *str)
1059 {
1060         size_t newlen = str->len;
1061
1062         if (MB_CUR_MAX == 1) {
1063                 unsigned char *dst, *end, *src;
1064                 unsigned char c;
1065
1066                 src = str->data.cstr;
1067                 dst = src;
1068                 end = src + str->len;
1069
1070                 while (src < end) {
1071                         c = *src;
1072                         if (isalnum(c) || isblank(c)) {
1073                                 *dst = c;
1074                                 ++dst;
1075                                 ++src;
1076                         } else {
1077                                 ++src;
1078                                 --newlen;
1079                         }
1080                 }
1081         } else {
1082                 wchar_t *dst, *end, *src;
1083                 wchar_t c;
1084
1085                 src = str->data.wstr;
1086                 dst = src;
1087                 end = src + str->len;
1088
1089                 while (src < end) {
1090                         c = *src;
1091                         if (iswalnum(c) || iswblank(c)) {
1092                                 *dst = c;
1093                                 ++dst;
1094                                 ++src;
1095                         } else {
1096                                 ++src;
1097                                 --newlen;
1098                         }
1099                 }
1100         }
1101         bws_setlen(str, newlen);
1102
1103         return (str);
1104 }
1105
1106 /*
1107  * Converts string to lower case(-f).
1108  */
1109 struct bwstring *
1110 ignore_case(struct bwstring *str)
1111 {
1112
1113         if (MB_CUR_MAX == 1) {
1114                 unsigned char *end, *s;
1115
1116                 s = str->data.cstr;
1117                 end = s + str->len;
1118
1119                 while (s < end) {
1120                         *s = toupper(*s);
1121                         ++s;
1122                 }
1123         } else {
1124                 wchar_t *end, *s;
1125
1126                 s = str->data.wstr;
1127                 end = s + str->len;
1128
1129                 while (s < end) {
1130                         *s = towupper(*s);
1131                         ++s;
1132                 }
1133         }
1134         return (str);
1135 }
1136
1137 void
1138 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
1139 {
1140
1141         if (MB_CUR_MAX == 1)
1142                 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr);
1143         else
1144                 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr);
1145 }