]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.bin/sort/bwstring.c
ping(8): Fix a mandoc related issue
[FreeBSD/FreeBSD.git] / usr.bin / sort / bwstring.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <ctype.h>
34 #include <errno.h>
35 #include <err.h>
36 #include <langinfo.h>
37 #include <math.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42
43 #include "bwstring.h"
44 #include "sort.h"
45
46 bool byte_sort;
47
48 static wchar_t **wmonths;
49 static char **cmonths;
50
51 /* initialise months */
52
53 void
54 initialise_months(void)
55 {
56         const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
57             ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
58             ABMON_11, ABMON_12 };
59         char *tmp;
60         size_t len;
61
62         if (MB_CUR_MAX == 1) {
63                 if (cmonths == NULL) {
64                         char *m;
65
66                         cmonths = sort_malloc(sizeof(char*) * 12);
67                         for (int i = 0; i < 12; i++) {
68                                 cmonths[i] = NULL;
69                                 tmp = nl_langinfo(item[i]);
70                                 if (debug_sort)
71                                         printf("month[%d]=%s\n", i, tmp);
72                                 if (*tmp == '\0')
73                                         continue;
74                                 m = sort_strdup(tmp);
75                                 len = strlen(tmp);
76                                 for (unsigned int j = 0; j < len; j++)
77                                         m[j] = toupper(m[j]);
78                                 cmonths[i] = m;
79                         }
80                 }
81
82         } else {
83                 if (wmonths == NULL) {
84                         wchar_t *m;
85
86                         wmonths = sort_malloc(sizeof(wchar_t *) * 12);
87                         for (int i = 0; i < 12; i++) {
88                                 wmonths[i] = NULL;
89                                 tmp = nl_langinfo(item[i]);
90                                 if (debug_sort)
91                                         printf("month[%d]=%s\n", i, tmp);
92                                 if (*tmp == '\0')
93                                         continue;
94                                 len = strlen(tmp);
95                                 m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
96                                 if (mbstowcs(m, tmp, len) ==
97                                     ((size_t) - 1)) {
98                                         sort_free(m);
99                                         continue;
100                                 }
101                                 m[len] = L'\0';
102                                 for (unsigned int j = 0; j < len; j++)
103                                         m[j] = towupper(m[j]);
104                                 wmonths[i] = m;
105                         }
106                 }
107         }
108 }
109
110 /*
111  * Compare two wide-character strings
112  */
113 static int
114 wide_str_coll(const wchar_t *s1, const wchar_t *s2)
115 {
116         int ret = 0;
117
118         errno = 0;
119         ret = wcscoll(s1, s2);
120         if (errno == EILSEQ) {
121                 errno = 0;
122                 ret = wcscmp(s1, s2);
123                 if (errno != 0) {
124                         for (size_t i = 0; ; ++i) {
125                                 wchar_t c1 = s1[i];
126                                 wchar_t c2 = s2[i];
127                                 if (c1 == L'\0')
128                                         return ((c2 == L'\0') ? 0 : -1);
129                                 if (c2 == L'\0')
130                                         return (+1);
131                                 if (c1 == c2)
132                                         continue;
133                                 return ((int)(c1 - c2));
134                         }
135                 }
136         }
137         return (ret);
138 }
139
140 /* counterparts of wcs functions */
141
142 void
143 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
144 {
145
146         if (MB_CUR_MAX == 1)
147                 fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix);
148         else
149                 fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix);
150 }
151
152 const void* bwsrawdata(const struct bwstring *bws)
153 {
154
155         return (&(bws->data));
156 }
157
158 size_t bwsrawlen(const struct bwstring *bws)
159 {
160
161         return ((MB_CUR_MAX == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len));
162 }
163
164 size_t
165 bws_memsize(const struct bwstring *bws)
166 {
167
168         return ((MB_CUR_MAX == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
169             (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring)));
170 }
171
172 void
173 bws_setlen(struct bwstring *bws, size_t newlen)
174 {
175
176         if (bws && newlen != bws->len && newlen <= bws->len) {
177                 bws->len = newlen;
178                 if (MB_CUR_MAX == 1)
179                         bws->data.cstr[newlen] = '\0';
180                 else
181                         bws->data.wstr[newlen] = L'\0';
182         }
183 }
184
185 /*
186  * Allocate a new binary string of specified size
187  */
188 struct bwstring *
189 bwsalloc(size_t sz)
190 {
191         struct bwstring *ret;
192
193         if (MB_CUR_MAX == 1)
194                 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
195         else
196                 ret = sort_malloc(sizeof(struct bwstring) +
197                     SIZEOF_WCHAR_STRING(sz + 1));
198         ret->len = sz;
199
200         if (MB_CUR_MAX == 1)
201                 ret->data.cstr[ret->len] = '\0';
202         else
203                 ret->data.wstr[ret->len] = L'\0';
204
205         return (ret);
206 }
207
208 /*
209  * Create a copy of binary string.
210  * New string size equals the length of the old string.
211  */
212 struct bwstring *
213 bwsdup(const struct bwstring *s)
214 {
215
216         if (s == NULL)
217                 return (NULL);
218         else {
219                 struct bwstring *ret = bwsalloc(s->len);
220
221                 if (MB_CUR_MAX == 1)
222                         memcpy(ret->data.cstr, s->data.cstr, (s->len));
223                 else
224                         memcpy(ret->data.wstr, s->data.wstr,
225                             SIZEOF_WCHAR_STRING(s->len));
226
227                 return (ret);
228         }
229 }
230
231 /*
232  * Create a new binary string from a wide character buffer.
233  */
234 struct bwstring *
235 bwssbdup(const wchar_t *str, size_t len)
236 {
237
238         if (str == NULL)
239                 return ((len == 0) ? bwsalloc(0) : NULL);
240         else {
241                 struct bwstring *ret;
242
243                 ret = bwsalloc(len);
244
245                 if (MB_CUR_MAX == 1)
246                         for (size_t i = 0; i < len; ++i)
247                                 ret->data.cstr[i] = (unsigned char) str[i];
248                 else
249                         memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len));
250
251                 return (ret);
252         }
253 }
254
255 /*
256  * Create a new binary string from a raw binary buffer.
257  */
258 struct bwstring *
259 bwscsbdup(const unsigned char *str, size_t len)
260 {
261         struct bwstring *ret;
262
263         ret = bwsalloc(len);
264
265         if (str) {
266                 if (MB_CUR_MAX == 1)
267                         memcpy(ret->data.cstr, str, len);
268                 else {
269                         mbstate_t mbs;
270                         const char *s;
271                         size_t charlen, chars, cptr;
272
273                         chars = 0;
274                         cptr = 0;
275                         s = (const char *) str;
276
277                         memset(&mbs, 0, sizeof(mbs));
278
279                         while (cptr < len) {
280                                 size_t n = MB_CUR_MAX;
281
282                                 if (n > len - cptr)
283                                         n = len - cptr;
284                                 charlen = mbrlen(s + cptr, n, &mbs);
285                                 switch (charlen) {
286                                 case 0:
287                                         /* FALLTHROUGH */
288                                 case (size_t) -1:
289                                         /* FALLTHROUGH */
290                                 case (size_t) -2:
291                                         ret->data.wstr[chars++] =
292                                             (unsigned char) s[cptr];
293                                         ++cptr;
294                                         break;
295                                 default:
296                                         n = mbrtowc(ret->data.wstr + (chars++),
297                                             s + cptr, charlen, &mbs);
298                                         if ((n == (size_t)-1) || (n == (size_t)-2))
299                                                 /* NOTREACHED */
300                                                 err(2, "mbrtowc error");
301                                         cptr += charlen;
302                                 }
303                         }
304
305                         ret->len = chars;
306                         ret->data.wstr[ret->len] = L'\0';
307                 }
308         }
309         return (ret);
310 }
311
312 /*
313  * De-allocate object memory
314  */
315 void
316 bwsfree(const struct bwstring *s)
317 {
318
319         if (s)
320                 sort_free(s);
321 }
322
323 /*
324  * Copy content of src binary string to dst.
325  * If the capacity of the dst string is not sufficient,
326  * then the data is truncated.
327  */
328 size_t
329 bwscpy(struct bwstring *dst, const struct bwstring *src)
330 {
331         size_t nums = src->len;
332
333         if (nums > dst->len)
334                 nums = dst->len;
335         dst->len = nums;
336
337         if (MB_CUR_MAX == 1) {
338                 memcpy(dst->data.cstr, src->data.cstr, nums);
339                 dst->data.cstr[dst->len] = '\0';
340         } else {
341                 memcpy(dst->data.wstr, src->data.wstr,
342                     SIZEOF_WCHAR_STRING(nums + 1));
343                 dst->data.wstr[dst->len] = L'\0';
344         }
345
346         return (nums);
347 }
348
349 /*
350  * Copy content of src binary string to dst,
351  * with specified number of symbols to be copied.
352  * If the capacity of the dst string is not sufficient,
353  * then the data is truncated.
354  */
355 struct bwstring *
356 bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
357 {
358         size_t nums = src->len;
359
360         if (nums > dst->len)
361                 nums = dst->len;
362         if (nums > size)
363                 nums = size;
364         dst->len = nums;
365
366         if (MB_CUR_MAX == 1) {
367                 memcpy(dst->data.cstr, src->data.cstr, nums);
368                 dst->data.cstr[dst->len] = '\0';
369         } else {
370                 memcpy(dst->data.wstr, src->data.wstr,
371                     SIZEOF_WCHAR_STRING(nums + 1));
372                 dst->data.wstr[dst->len] = L'\0';
373         }
374
375         return (dst);
376 }
377
378 /*
379  * Copy content of src binary string to dst,
380  * with specified number of symbols to be copied.
381  * An offset value can be specified, from the start of src string.
382  * If the capacity of the dst string is not sufficient,
383  * then the data is truncated.
384  */
385 struct bwstring *
386 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
387     size_t size)
388 {
389
390         if (offset >= src->len) {
391                 dst->data.wstr[0] = 0;
392                 dst->len = 0;
393         } else {
394                 size_t nums = src->len - offset;
395
396                 if (nums > dst->len)
397                         nums = dst->len;
398                 if (nums > size)
399                         nums = size;
400                 dst->len = nums;
401                 if (MB_CUR_MAX == 1) {
402                         memcpy(dst->data.cstr, src->data.cstr + offset,
403                             (nums));
404                         dst->data.cstr[dst->len] = '\0';
405                 } else {
406                         memcpy(dst->data.wstr, src->data.wstr + offset,
407                             SIZEOF_WCHAR_STRING(nums));
408                         dst->data.wstr[dst->len] = L'\0';
409                 }
410         }
411         return (dst);
412 }
413
414 /*
415  * Write binary string to the file.
416  * The output is ended either with '\n' (nl == true)
417  * or '\0' (nl == false).
418  */
419 size_t
420 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
421 {
422
423         if (MB_CUR_MAX == 1) {
424                 size_t len = bws->len;
425
426                 if (!zero_ended) {
427                         bws->data.cstr[len] = '\n';
428
429                         if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
430                                 err(2, NULL);
431
432                         bws->data.cstr[len] = '\0';
433                 } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
434                         err(2, NULL);
435
436                 return (len + 1);
437
438         } else {
439                 wchar_t eols;
440                 size_t printed = 0;
441
442                 eols = zero_ended ? btowc('\0') : btowc('\n');
443
444                 while (printed < BWSLEN(bws)) {
445                         const wchar_t *s = bws->data.wstr + printed;
446
447                         if (*s == L'\0') {
448                                 int nums;
449
450                                 nums = fwprintf(f, L"%lc", *s);
451
452                                 if (nums != 1)
453                                         err(2, NULL);
454                                 ++printed;
455                         } else {
456                                 int nums;
457
458                                 nums = fwprintf(f, L"%ls", s);
459
460                                 if (nums < 1)
461                                         err(2, NULL);
462                                 printed += nums;
463                         }
464                 }
465                 fwprintf(f, L"%lc", eols);
466                 return (printed + 1);
467         }
468 }
469
470 /*
471  * Allocate and read a binary string from file.
472  * The strings are nl-ended or zero-ended, depending on the sort setting.
473  */
474 struct bwstring *
475 bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
476 {
477         wint_t eols;
478
479         eols = zero_ended ? btowc('\0') : btowc('\n');
480
481         if (!zero_ended && (MB_CUR_MAX > 1)) {
482                 wchar_t *ret;
483
484                 ret = fgetwln(f, len);
485
486                 if (ret == NULL) {
487                         if (!feof(f))
488                                 err(2, NULL);
489                         return (NULL);
490                 }
491                 if (*len > 0) {
492                         if (ret[*len - 1] == (wchar_t)eols)
493                                 --(*len);
494                 }
495                 return (bwssbdup(ret, *len));
496
497         } else if (!zero_ended && (MB_CUR_MAX == 1)) {
498                 char *ret;
499
500                 ret = fgetln(f, len);
501
502                 if (ret == NULL) {
503                         if (!feof(f))
504                                 err(2, NULL);
505                         return (NULL);
506                 }
507                 if (*len > 0) {
508                         if (ret[*len - 1] == '\n')
509                                 --(*len);
510                 }
511                 return (bwscsbdup((unsigned char*)ret, *len));
512
513         } else {
514                 *len = 0;
515
516                 if (feof(f))
517                         return (NULL);
518
519                 if (2 >= rb->fgetwln_z_buffer_size) {
520                         rb->fgetwln_z_buffer_size += 256;
521                         rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
522                             sizeof(wchar_t) * rb->fgetwln_z_buffer_size);
523                 }
524                 rb->fgetwln_z_buffer[*len] = 0;
525
526                 if (MB_CUR_MAX == 1)
527                         while (!feof(f)) {
528                                 int c;
529
530                                 c = fgetc(f);
531
532                                 if (c == EOF) {
533                                         if (*len == 0)
534                                                 return (NULL);
535                                         goto line_read_done;
536                                 }
537                                 if (c == eols)
538                                         goto line_read_done;
539
540                                 if (*len + 1 >= rb->fgetwln_z_buffer_size) {
541                                         rb->fgetwln_z_buffer_size += 256;
542                                         rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
543                                             SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
544                                 }
545
546                                 rb->fgetwln_z_buffer[*len] = c;
547                                 rb->fgetwln_z_buffer[++(*len)] = 0;
548                         }
549                 else
550                         while (!feof(f)) {
551                                 wint_t c = 0;
552
553                                 c = fgetwc(f);
554
555                                 if (c == WEOF) {
556                                         if (*len == 0)
557                                                 return (NULL);
558                                         goto line_read_done;
559                                 }
560                                 if (c == eols)
561                                         goto line_read_done;
562
563                                 if (*len + 1 >= rb->fgetwln_z_buffer_size) {
564                                         rb->fgetwln_z_buffer_size += 256;
565                                         rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
566                                             SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
567                                 }
568
569                                 rb->fgetwln_z_buffer[*len] = c;
570                                 rb->fgetwln_z_buffer[++(*len)] = 0;
571                         }
572
573 line_read_done:
574                 /* we do not count the last 0 */
575                 return (bwssbdup(rb->fgetwln_z_buffer, *len));
576         }
577 }
578
579 int
580 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
581     size_t offset, size_t len)
582 {
583         size_t cmp_len, len1, len2;
584         int res = 0;
585
586         len1 = bws1->len;
587         len2 = bws2->len;
588
589         if (len1 <= offset) {
590                 return ((len2 <= offset) ? 0 : -1);
591         } else {
592                 if (len2 <= offset)
593                         return (+1);
594                 else {
595                         len1 -= offset;
596                         len2 -= offset;
597
598                         cmp_len = len1;
599
600                         if (len2 < cmp_len)
601                                 cmp_len = len2;
602
603                         if (len < cmp_len)
604                                 cmp_len = len;
605
606                         if (MB_CUR_MAX == 1) {
607                                 const unsigned char *s1, *s2;
608
609                                 s1 = bws1->data.cstr + offset;
610                                 s2 = bws2->data.cstr + offset;
611
612                                 res = memcmp(s1, s2, cmp_len);
613
614                         } else {
615                                 const wchar_t *s1, *s2;
616
617                                 s1 = bws1->data.wstr + offset;
618                                 s2 = bws2->data.wstr + offset;
619
620                                 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
621                         }
622                 }
623         }
624
625         if (res == 0) {
626                 if (len1 < cmp_len && len1 < len2)
627                         res = -1;
628                 else if (len2 < cmp_len && len2 < len1)
629                         res = +1;
630         }
631
632         return (res);
633 }
634
635 int
636 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
637 {
638         size_t len1, len2, cmp_len;
639         int res;
640
641         len1 = bws1->len;
642         len2 = bws2->len;
643
644         len1 -= offset;
645         len2 -= offset;
646
647         cmp_len = len1;
648
649         if (len2 < cmp_len)
650                 cmp_len = len2;
651
652         res = bwsncmp(bws1, bws2, offset, cmp_len);
653
654         if (res == 0) {
655                 if( len1 < len2)
656                         res = -1;
657                 else if (len2 < len1)
658                         res = +1;
659         }
660
661         return (res);
662 }
663
664 int
665 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
666 {
667         wchar_t c1, c2;
668         size_t i = 0;
669
670         for (i = 0; i < len; ++i) {
671                 c1 = bws_get_iter_value(iter1);
672                 c2 = bws_get_iter_value(iter2);
673                 if (c1 != c2)
674                         return (c1 - c2);
675                 iter1 = bws_iterator_inc(iter1, 1);
676                 iter2 = bws_iterator_inc(iter2, 1);
677         }
678
679         return (0);
680 }
681
682 int
683 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
684 {
685         size_t len1, len2;
686
687         len1 = bws1->len;
688         len2 = bws2->len;
689
690         if (len1 <= offset)
691                 return ((len2 <= offset) ? 0 : -1);
692         else {
693                 if (len2 <= offset)
694                         return (+1);
695                 else {
696                         len1 -= offset;
697                         len2 -= offset;
698
699                         if (MB_CUR_MAX == 1) {
700                                 const unsigned char *s1, *s2;
701
702                                 s1 = bws1->data.cstr + offset;
703                                 s2 = bws2->data.cstr + offset;
704
705                                 if (byte_sort) {
706                                         int res = 0;
707
708                                         if (len1 > len2) {
709                                                 res = memcmp(s1, s2, len2);
710                                                 if (!res)
711                                                         res = +1;
712                                         } else if (len1 < len2) {
713                                                 res = memcmp(s1, s2, len1);
714                                                 if (!res)
715                                                         res = -1;
716                                         } else
717                                                 res = memcmp(s1, s2, len1);
718
719                                         return (res);
720
721                                 } else {
722                                         int res = 0;
723                                         size_t i, maxlen;
724
725                                         i = 0;
726                                         maxlen = len1;
727
728                                         if (maxlen > len2)
729                                                 maxlen = len2;
730
731                                         while (i < maxlen) {
732                                                 /* goto next non-zero part: */
733                                                 while ((i < maxlen) &&
734                                                     !s1[i] && !s2[i])
735                                                         ++i;
736
737                                                 if (i >= maxlen)
738                                                         break;
739
740                                                 if (s1[i] == 0) {
741                                                         if (s2[i] == 0)
742                                                                 /* NOTREACHED */
743                                                                 err(2, "bwscoll error 01");
744                                                         else
745                                                                 return (-1);
746                                                 } else if (s2[i] == 0)
747                                                         return (+1);
748
749                                                 res = strcoll((const char*)(s1 + i), (const char*)(s2 + i));
750                                                 if (res)
751                                                         return (res);
752
753                                                 while ((i < maxlen) &&
754                                                     s1[i] && s2[i])
755                                                         ++i;
756
757                                                 if (i >= maxlen)
758                                                         break;
759
760                                                 if (s1[i] == 0) {
761                                                         if (s2[i] == 0) {
762                                                                 ++i;
763                                                                 continue;
764                                                         } else
765                                                                 return (-1);
766                                                 } else if (s2[i] == 0)
767                                                         return (+1);
768                                                 else
769                                                         /* NOTREACHED */
770                                                         err(2, "bwscoll error 02");
771                                         }
772
773                                         if (len1 < len2)
774                                                 return (-1);
775                                         else if (len1 > len2)
776                                                 return (+1);
777
778                                         return (0);
779                                 }
780                         } else {
781                                 const wchar_t *s1, *s2;
782                                 size_t i, maxlen;
783                                 int res = 0;
784
785                                 s1 = bws1->data.wstr + offset;
786                                 s2 = bws2->data.wstr + offset;
787
788                                 i = 0;
789                                 maxlen = len1;
790
791                                 if (maxlen > len2)
792                                         maxlen = len2;
793
794                                 while (i < maxlen) {
795
796                                         /* goto next non-zero part: */
797                                         while ((i < maxlen) &&
798                                             !s1[i] && !s2[i])
799                                                 ++i;
800
801                                         if (i >= maxlen)
802                                                 break;
803
804                                         if (s1[i] == 0) {
805                                                 if (s2[i] == 0)
806                                                         /* NOTREACHED */
807                                                         err(2, "bwscoll error 1");
808                                                 else
809                                                         return (-1);
810                                         } else if (s2[i] == 0)
811                                                 return (+1);
812
813                                         res = wide_str_coll(s1 + i, s2 + i);
814                                         if (res)
815                                                 return (res);
816
817                                         while ((i < maxlen) && s1[i] && s2[i])
818                                                 ++i;
819
820                                         if (i >= maxlen)
821                                                 break;
822
823                                         if (s1[i] == 0) {
824                                                 if (s2[i] == 0) {
825                                                         ++i;
826                                                         continue;
827                                                 } else
828                                                         return (-1);
829                                         } else if (s2[i] == 0)
830                                                 return (+1);
831                                         else
832                                                 /* NOTREACHED */
833                                                 err(2, "bwscoll error 2");
834                                 }
835
836                                 if (len1 < len2)
837                                         return (-1);
838                                 else if (len1 > len2)
839                                         return (+1);
840
841                                 return (0);
842                         }
843                 }
844         }
845 }
846
847 /*
848  * Correction of the system API
849  */
850 double
851 bwstod(struct bwstring *s0, bool *empty)
852 {
853         double ret = 0;
854
855         if (MB_CUR_MAX == 1) {
856                 unsigned char *end, *s;
857                 char *ep;
858
859                 s = s0->data.cstr;
860                 end = s + s0->len;
861                 ep = NULL;
862
863                 while (isblank(*s) && s < end)
864                         ++s;
865
866                 if (!isprint(*s)) {
867                         *empty = true;
868                         return (0);
869                 }
870
871                 ret = strtod((char*)s, &ep);
872                 if ((unsigned char*) ep == s) {
873                         *empty = true;
874                         return (0);
875                 }
876         } else {
877                 wchar_t *end, *ep, *s;
878
879                 s = s0->data.wstr;
880                 end = s + s0->len;
881                 ep = NULL;
882
883                 while (iswblank(*s) && s < end)
884                         ++s;
885
886                 if (!iswprint(*s)) {
887                         *empty = true;
888                         return (0);
889                 }
890
891                 ret = wcstod(s, &ep);
892                 if (ep == s) {
893                         *empty = true;
894                         return (0);
895                 }
896         }
897
898         *empty = false;
899         return (ret);
900 }
901
902 /*
903  * A helper function for monthcoll.  If a line matches
904  * a month name, it returns (number of the month - 1),
905  * while if there is no match, it just return -1.
906  */
907
908 int
909 bws_month_score(const struct bwstring *s0)
910 {
911
912         if (MB_CUR_MAX == 1) {
913                 const unsigned char *end, *s;
914
915                 s = s0->data.cstr;
916                 end = s + s0->len;
917
918                 while (isblank(*s) && s < end)
919                         ++s;
920
921                 for (int i = 11; i >= 0; --i) {
922                         if (cmonths[i] &&
923                             (s == (unsigned char*)strstr((const char*)s, (char*)(cmonths[i]))))
924                                 return (i);
925                 }
926
927         } else {
928                 const wchar_t *end, *s;
929
930                 s = s0->data.wstr;
931                 end = s + s0->len;
932
933                 while (iswblank(*s) && s < end)
934                         ++s;
935
936                 for (int i = 11; i >= 0; --i) {
937                         if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
938                                 return (i);
939                 }
940         }
941
942         return (-1);
943 }
944
945 /*
946  * Rips out leading blanks (-b).
947  */
948 struct bwstring *
949 ignore_leading_blanks(struct bwstring *str)
950 {
951
952         if (MB_CUR_MAX == 1) {
953                 unsigned char *dst, *end, *src;
954
955                 src = str->data.cstr;
956                 dst = src;
957                 end = src + str->len;
958
959                 while (src < end && isblank(*src))
960                         ++src;
961
962                 if (src != dst) {
963                         size_t newlen;
964
965                         newlen = BWSLEN(str) - (src - dst);
966
967                         while (src < end) {
968                                 *dst = *src;
969                                 ++dst;
970                                 ++src;
971                         }
972                         bws_setlen(str, newlen);
973                 }
974         } else {
975                 wchar_t *dst, *end, *src;
976
977                 src = str->data.wstr;
978                 dst = src;
979                 end = src + str->len;
980
981                 while (src < end && iswblank(*src))
982                         ++src;
983
984                 if (src != dst) {
985
986                         size_t newlen = BWSLEN(str) - (src - dst);
987
988                         while (src < end) {
989                                 *dst = *src;
990                                 ++dst;
991                                 ++src;
992                         }
993                         bws_setlen(str, newlen);
994
995                 }
996         }
997         return (str);
998 }
999
1000 /*
1001  * Rips out nonprinting characters (-i).
1002  */
1003 struct bwstring *
1004 ignore_nonprinting(struct bwstring *str)
1005 {
1006         size_t newlen = str->len;
1007
1008         if (MB_CUR_MAX == 1) {
1009                 unsigned char *dst, *end, *src;
1010                 unsigned char c;
1011
1012                 src = str->data.cstr;
1013                 dst = src;
1014                 end = src + str->len;
1015
1016                 while (src < end) {
1017                         c = *src;
1018                         if (isprint(c)) {
1019                                 *dst = c;
1020                                 ++dst;
1021                                 ++src;
1022                         } else {
1023                                 ++src;
1024                                 --newlen;
1025                         }
1026                 }
1027         } else {
1028                 wchar_t *dst, *end, *src;
1029                 wchar_t c;
1030
1031                 src = str->data.wstr;
1032                 dst = src;
1033                 end = src + str->len;
1034
1035                 while (src < end) {
1036                         c = *src;
1037                         if (iswprint(c)) {
1038                                 *dst = c;
1039                                 ++dst;
1040                                 ++src;
1041                         } else {
1042                                 ++src;
1043                                 --newlen;
1044                         }
1045                 }
1046         }
1047         bws_setlen(str, newlen);
1048
1049         return (str);
1050 }
1051
1052 /*
1053  * Rips out any characters that are not alphanumeric characters
1054  * nor blanks (-d).
1055  */
1056 struct bwstring *
1057 dictionary_order(struct bwstring *str)
1058 {
1059         size_t newlen = str->len;
1060
1061         if (MB_CUR_MAX == 1) {
1062                 unsigned char *dst, *end, *src;
1063                 unsigned char c;
1064
1065                 src = str->data.cstr;
1066                 dst = src;
1067                 end = src + str->len;
1068
1069                 while (src < end) {
1070                         c = *src;
1071                         if (isalnum(c) || isblank(c)) {
1072                                 *dst = c;
1073                                 ++dst;
1074                                 ++src;
1075                         } else {
1076                                 ++src;
1077                                 --newlen;
1078                         }
1079                 }
1080         } else {
1081                 wchar_t *dst, *end, *src;
1082                 wchar_t c;
1083
1084                 src = str->data.wstr;
1085                 dst = src;
1086                 end = src + str->len;
1087
1088                 while (src < end) {
1089                         c = *src;
1090                         if (iswalnum(c) || iswblank(c)) {
1091                                 *dst = c;
1092                                 ++dst;
1093                                 ++src;
1094                         } else {
1095                                 ++src;
1096                                 --newlen;
1097                         }
1098                 }
1099         }
1100         bws_setlen(str, newlen);
1101
1102         return (str);
1103 }
1104
1105 /*
1106  * Converts string to lower case(-f).
1107  */
1108 struct bwstring *
1109 ignore_case(struct bwstring *str)
1110 {
1111
1112         if (MB_CUR_MAX == 1) {
1113                 unsigned char *end, *s;
1114
1115                 s = str->data.cstr;
1116                 end = s + str->len;
1117
1118                 while (s < end) {
1119                         *s = toupper(*s);
1120                         ++s;
1121                 }
1122         } else {
1123                 wchar_t *end, *s;
1124
1125                 s = str->data.wstr;
1126                 end = s + str->len;
1127
1128                 while (s < end) {
1129                         *s = towupper(*s);
1130                         ++s;
1131                 }
1132         }
1133         return (str);
1134 }
1135
1136 void
1137 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
1138 {
1139
1140         if (MB_CUR_MAX == 1)
1141                 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr);
1142         else
1143                 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr);
1144 }