]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.bin/sort/bwstring.c
MFV: less v608
[FreeBSD/FreeBSD.git] / usr.bin / sort / bwstring.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <ctype.h>
34 #include <errno.h>
35 #include <err.h>
36 #include <langinfo.h>
37 #include <math.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42
43 #include "bwstring.h"
44 #include "sort.h"
45
46 bool byte_sort;
47
48 static wchar_t **wmonths;
49 static char **cmonths;
50
51 /* initialise months */
52
53 void
54 initialise_months(void)
55 {
56         const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
57             ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
58             ABMON_11, ABMON_12 };
59         char *tmp;
60         size_t len;
61
62         if (mb_cur_max == 1) {
63                 if (cmonths == NULL) {
64                         char *m;
65
66                         cmonths = sort_malloc(sizeof(char*) * 12);
67                         for (int i = 0; i < 12; i++) {
68                                 cmonths[i] = NULL;
69                                 tmp = nl_langinfo(item[i]);
70                                 if (debug_sort)
71                                         printf("month[%d]=%s\n", i, tmp);
72                                 if (*tmp == '\0')
73                                         continue;
74                                 m = sort_strdup(tmp);
75                                 len = strlen(tmp);
76                                 for (unsigned int j = 0; j < len; j++)
77                                         m[j] = toupper(m[j]);
78                                 cmonths[i] = m;
79                         }
80                 }
81
82         } else {
83                 if (wmonths == NULL) {
84                         wchar_t *m;
85
86                         wmonths = sort_malloc(sizeof(wchar_t *) * 12);
87                         for (int i = 0; i < 12; i++) {
88                                 wmonths[i] = NULL;
89                                 tmp = nl_langinfo(item[i]);
90                                 if (debug_sort)
91                                         printf("month[%d]=%s\n", i, tmp);
92                                 if (*tmp == '\0')
93                                         continue;
94                                 len = strlen(tmp);
95                                 m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
96                                 if (mbstowcs(m, tmp, len) ==
97                                     ((size_t) - 1)) {
98                                         sort_free(m);
99                                         continue;
100                                 }
101                                 m[len] = L'\0';
102                                 for (unsigned int j = 0; j < len; j++)
103                                         m[j] = towupper(m[j]);
104                                 wmonths[i] = m;
105                         }
106                 }
107         }
108 }
109
110 /*
111  * Compare two wide-character strings
112  */
113 static int
114 wide_str_coll(const wchar_t *s1, const wchar_t *s2)
115 {
116         int ret;
117
118         errno = 0;
119         ret = wcscoll(s1, s2);
120         if (errno == EILSEQ) {
121                 errno = 0;
122                 ret = wcscmp(s1, s2);
123                 if (errno != 0) {
124                         for (size_t i = 0; ; ++i) {
125                                 wchar_t c1 = s1[i];
126                                 wchar_t c2 = s2[i];
127                                 if (c1 == L'\0')
128                                         return ((c2 == L'\0') ? 0 : -1);
129                                 if (c2 == L'\0')
130                                         return (+1);
131                                 if (c1 == c2)
132                                         continue;
133                                 return ((int)(c1 - c2));
134                         }
135                 }
136         }
137         return (ret);
138 }
139
140 /* counterparts of wcs functions */
141
142 void
143 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
144 {
145
146         if (mb_cur_max == 1)
147                 fprintf(f, "%s%s%s", prefix, bws->cdata.str, suffix);
148         else
149                 fprintf(f, "%s%S%s", prefix, bws->wdata.str, suffix);
150 }
151
152 const void* bwsrawdata(const struct bwstring *bws)
153 {
154
155         return (bws->wdata.str);
156 }
157
158 size_t bwsrawlen(const struct bwstring *bws)
159 {
160
161         return ((mb_cur_max == 1) ? bws->cdata.len :
162             SIZEOF_WCHAR_STRING(bws->wdata.len));
163 }
164
165 size_t
166 bws_memsize(const struct bwstring *bws)
167 {
168
169         return ((mb_cur_max == 1) ?
170             (bws->cdata.len + 2 + sizeof(struct bwstring)) :
171             (SIZEOF_WCHAR_STRING(bws->wdata.len + 1) + sizeof(struct bwstring)));
172 }
173
174 void
175 bws_setlen(struct bwstring *bws, size_t newlen)
176 {
177
178         if (mb_cur_max == 1 && bws && newlen != bws->cdata.len &&
179             newlen <= bws->cdata.len) {
180                 bws->cdata.len = newlen;
181                 bws->cdata.str[newlen] = '\0';
182         } else if (bws && newlen != bws->wdata.len && newlen <= bws->wdata.len) {
183                 bws->wdata.len = newlen;
184                 bws->wdata.str[newlen] = L'\0';
185         }
186 }
187
188 /*
189  * Allocate a new binary string of specified size
190  */
191 struct bwstring *
192 bwsalloc(size_t sz)
193 {
194         struct bwstring *ret;
195
196         if (mb_cur_max == 1) {
197                 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
198                 ret->cdata.len = sz;
199                 ret->cdata.str[sz] = '\0';
200         } else {
201                 ret = sort_malloc(
202                     sizeof(struct bwstring) + SIZEOF_WCHAR_STRING(sz + 1));
203                 ret->wdata.len = sz;
204                 ret->wdata.str[sz] = L'\0';
205         }
206
207         return (ret);
208 }
209
210 /*
211  * Create a copy of binary string.
212  * New string size equals the length of the old string.
213  */
214 struct bwstring *
215 bwsdup(const struct bwstring *s)
216 {
217
218         if (s == NULL)
219                 return (NULL);
220         else {
221                 struct bwstring *ret = bwsalloc(BWSLEN(s));
222
223                 if (mb_cur_max == 1)
224                         memcpy(ret->cdata.str, s->cdata.str, (s->cdata.len));
225                 else
226                         memcpy(ret->wdata.str, s->wdata.str,
227                             SIZEOF_WCHAR_STRING(s->wdata.len));
228
229                 return (ret);
230         }
231 }
232
233 /*
234  * Create a new binary string from a wide character buffer.
235  */
236 struct bwstring *
237 bwssbdup(const wchar_t *str, size_t len)
238 {
239
240         if (str == NULL)
241                 return ((len == 0) ? bwsalloc(0) : NULL);
242         else {
243                 struct bwstring *ret;
244
245                 ret = bwsalloc(len);
246
247                 if (mb_cur_max == 1)
248                         for (size_t i = 0; i < len; ++i)
249                                 ret->cdata.str[i] = (char)str[i];
250                 else
251                         memcpy(ret->wdata.str, str, SIZEOF_WCHAR_STRING(len));
252
253                 return (ret);
254         }
255 }
256
257 /*
258  * Create a new binary string from a raw binary buffer.
259  */
260 struct bwstring *
261 bwscsbdup(const unsigned char *str, size_t len)
262 {
263         struct bwstring *ret;
264
265         ret = bwsalloc(len);
266
267         if (str) {
268                 if (mb_cur_max == 1)
269                         memcpy(ret->cdata.str, str, len);
270                 else {
271                         mbstate_t mbs;
272                         const char *s;
273                         size_t charlen, chars, cptr;
274
275                         chars = 0;
276                         cptr = 0;
277                         s = (const char *) str;
278
279                         memset(&mbs, 0, sizeof(mbs));
280
281                         while (cptr < len) {
282                                 size_t n = mb_cur_max;
283
284                                 if (n > len - cptr)
285                                         n = len - cptr;
286                                 charlen = mbrlen(s + cptr, n, &mbs);
287                                 switch (charlen) {
288                                 case 0:
289                                         /* FALLTHROUGH */
290                                 case (size_t) -1:
291                                         /* FALLTHROUGH */
292                                 case (size_t) -2:
293                                         ret->wdata.str[chars++] =
294                                             (unsigned char) s[cptr];
295                                         ++cptr;
296                                         break;
297                                 default:
298                                         n = mbrtowc(ret->wdata.str + (chars++),
299                                             s + cptr, charlen, &mbs);
300                                         if ((n == (size_t)-1) || (n == (size_t)-2))
301                                                 /* NOTREACHED */
302                                                 err(2, "mbrtowc error");
303                                         cptr += charlen;
304                                 }
305                         }
306
307                         ret->wdata.len = chars;
308                         ret->wdata.str[ret->wdata.len] = L'\0';
309                 }
310         }
311         return (ret);
312 }
313
314 /*
315  * De-allocate object memory
316  */
317 void
318 bwsfree(const struct bwstring *s)
319 {
320
321         if (s)
322                 sort_free(s);
323 }
324
325 /*
326  * Copy content of src binary string to dst.
327  * If the capacity of the dst string is not sufficient,
328  * then the data is truncated.
329  */
330 size_t
331 bwscpy(struct bwstring *dst, const struct bwstring *src)
332 {
333         size_t nums = BWSLEN(src);
334
335         if (nums > BWSLEN(dst))
336                 nums = BWSLEN(dst);
337
338         if (mb_cur_max == 1) {
339                 memcpy(dst->cdata.str, src->cdata.str, nums);
340                 dst->cdata.len = nums;
341                 dst->cdata.str[dst->cdata.len] = '\0';
342         } else {
343                 memcpy(dst->wdata.str, src->wdata.str,
344                     SIZEOF_WCHAR_STRING(nums));
345                 dst->wdata.len = nums;
346                 dst->wdata.str[nums] = L'\0';
347         }
348
349         return (nums);
350 }
351
352 /*
353  * Copy content of src binary string to dst,
354  * with specified number of symbols to be copied.
355  * If the capacity of the dst string is not sufficient,
356  * then the data is truncated.
357  */
358 struct bwstring *
359 bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
360 {
361         size_t nums = BWSLEN(src);
362
363         if (nums > BWSLEN(dst))
364                 nums = BWSLEN(dst);
365         if (nums > size)
366                 nums = size;
367
368         if (mb_cur_max == 1) {
369                 memcpy(dst->cdata.str, src->cdata.str, nums);
370                 dst->cdata.len = nums;
371                 dst->cdata.str[nums] = '\0';
372         } else {
373                 memcpy(dst->wdata.str, src->wdata.str,
374                     SIZEOF_WCHAR_STRING(nums));
375                 dst->wdata.len = nums;
376                 dst->wdata.str[nums] = L'\0';
377         }
378
379         return (dst);
380 }
381
382 /*
383  * Copy content of src binary string to dst,
384  * with specified number of symbols to be copied.
385  * An offset value can be specified, from the start of src string.
386  * If the capacity of the dst string is not sufficient,
387  * then the data is truncated.
388  */
389 struct bwstring *
390 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
391     size_t size)
392 {
393
394         if (offset >= BWSLEN(src)) {
395                 bws_setlen(dst, 0);
396         } else {
397                 size_t nums = BWSLEN(src) - offset;
398
399                 if (nums > BWSLEN(dst))
400                         nums = BWSLEN(dst);
401                 if (nums > size)
402                         nums = size;
403                 if (mb_cur_max == 1) {
404                         memcpy(dst->cdata.str, src->cdata.str + offset, nums);
405                         dst->cdata.len = nums;
406                         dst->cdata.str[nums] = '\0';
407                 } else {
408                         memcpy(dst->wdata.str, src->wdata.str + offset,
409                             SIZEOF_WCHAR_STRING(nums));
410                         dst->wdata.len = nums;
411                         dst->wdata.str[nums] = L'\0';
412                 }
413         }
414         return (dst);
415 }
416
417 /*
418  * Write binary string to the file.
419  * The output is ended either with '\n' (nl == true)
420  * or '\0' (nl == false).
421  */
422 size_t
423 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
424 {
425
426         if (mb_cur_max == 1) {
427                 size_t len = bws->cdata.len;
428
429                 if (!zero_ended) {
430                         bws->cdata.str[len] = '\n';
431
432                         if (fwrite(bws->cdata.str, len + 1, 1, f) < 1)
433                                 err(2, NULL);
434
435                         bws->cdata.str[len] = '\0';
436                 } else if (fwrite(bws->cdata.str, len + 1, 1, f) < 1)
437                         err(2, NULL);
438
439                 return (len + 1);
440
441         } else {
442                 wchar_t eols;
443                 size_t printed = 0;
444
445                 eols = zero_ended ? btowc('\0') : btowc('\n');
446
447                 while (printed < BWSLEN(bws)) {
448                         const wchar_t *s = bws->wdata.str + printed;
449
450                         if (*s == L'\0') {
451                                 int nums;
452
453                                 nums = fwprintf(f, L"%lc", *s);
454
455                                 if (nums != 1)
456                                         err(2, NULL);
457                                 ++printed;
458                         } else {
459                                 int nums;
460
461                                 nums = fwprintf(f, L"%ls", s);
462
463                                 if (nums < 1)
464                                         err(2, NULL);
465                                 printed += nums;
466                         }
467                 }
468                 fwprintf(f, L"%lc", eols);
469                 return (printed + 1);
470         }
471 }
472
473 /*
474  * Allocate and read a binary string from file.
475  * The strings are nl-ended or zero-ended, depending on the sort setting.
476  */
477 struct bwstring *
478 bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
479 {
480         wint_t eols;
481
482         eols = zero_ended ? btowc('\0') : btowc('\n');
483
484         if (!zero_ended && (mb_cur_max > 1)) {
485                 wchar_t *ret;
486
487                 ret = fgetwln(f, len);
488
489                 if (ret == NULL) {
490                         if (!feof(f))
491                                 err(2, NULL);
492                         return (NULL);
493                 }
494                 if (*len > 0) {
495                         if (ret[*len - 1] == (wchar_t)eols)
496                                 --(*len);
497                 }
498                 return (bwssbdup(ret, *len));
499
500         } else if (!zero_ended && (mb_cur_max == 1)) {
501                 char *ret;
502
503                 ret = fgetln(f, len);
504
505                 if (ret == NULL) {
506                         if (!feof(f))
507                                 err(2, NULL);
508                         return (NULL);
509                 }
510                 if (*len > 0) {
511                         if (ret[*len - 1] == '\n')
512                                 --(*len);
513                 }
514                 return (bwscsbdup((unsigned char *)ret, *len));
515
516         } else {
517                 *len = 0;
518
519                 if (feof(f))
520                         return (NULL);
521
522                 if (2 >= rb->fgetwln_z_buffer_size) {
523                         rb->fgetwln_z_buffer_size += 256;
524                         rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
525                             sizeof(wchar_t) * rb->fgetwln_z_buffer_size);
526                 }
527                 rb->fgetwln_z_buffer[*len] = 0;
528
529                 if (mb_cur_max == 1)
530                         while (!feof(f)) {
531                                 int c;
532
533                                 c = fgetc(f);
534
535                                 if (c == EOF) {
536                                         if (*len == 0)
537                                                 return (NULL);
538                                         goto line_read_done;
539                                 }
540                                 if (c == eols)
541                                         goto line_read_done;
542
543                                 if (*len + 1 >= rb->fgetwln_z_buffer_size) {
544                                         rb->fgetwln_z_buffer_size += 256;
545                                         rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
546                                             SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
547                                 }
548
549                                 rb->fgetwln_z_buffer[*len] = c;
550                                 rb->fgetwln_z_buffer[++(*len)] = 0;
551                         }
552                 else
553                         while (!feof(f)) {
554                                 wint_t c;
555
556                                 c = fgetwc(f);
557
558                                 if (c == WEOF) {
559                                         if (*len == 0)
560                                                 return (NULL);
561                                         goto line_read_done;
562                                 }
563                                 if (c == eols)
564                                         goto line_read_done;
565
566                                 if (*len + 1 >= rb->fgetwln_z_buffer_size) {
567                                         rb->fgetwln_z_buffer_size += 256;
568                                         rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
569                                             SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
570                                 }
571
572                                 rb->fgetwln_z_buffer[*len] = c;
573                                 rb->fgetwln_z_buffer[++(*len)] = 0;
574                         }
575
576 line_read_done:
577                 /* we do not count the last 0 */
578                 return (bwssbdup(rb->fgetwln_z_buffer, *len));
579         }
580 }
581
582 int
583 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
584     size_t offset, size_t len)
585 {
586         size_t cmp_len, len1, len2;
587         int res;
588
589         len1 = BWSLEN(bws1);
590         len2 = BWSLEN(bws2);
591
592         if (len1 <= offset) {
593                 return ((len2 <= offset) ? 0 : -1);
594         } else {
595                 if (len2 <= offset)
596                         return (+1);
597                 else {
598                         len1 -= offset;
599                         len2 -= offset;
600
601                         cmp_len = len1;
602
603                         if (len2 < cmp_len)
604                                 cmp_len = len2;
605
606                         if (len < cmp_len)
607                                 cmp_len = len;
608
609                         if (mb_cur_max == 1) {
610                                 const char *s1, *s2;
611
612                                 s1 = bws1->cdata.str + offset;
613                                 s2 = bws2->cdata.str + offset;
614
615                                 res = memcmp(s1, s2, cmp_len);
616
617                         } else {
618                                 const wchar_t *s1, *s2;
619
620                                 s1 = bws1->wdata.str + offset;
621                                 s2 = bws2->wdata.str + offset;
622
623                                 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
624                         }
625                 }
626         }
627
628         if (res == 0) {
629                 if (len1 < cmp_len && len1 < len2)
630                         res = -1;
631                 else if (len2 < cmp_len && len2 < len1)
632                         res = +1;
633         }
634
635         return (res);
636 }
637
638 int
639 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
640 {
641         size_t len1, len2, cmp_len;
642         int res;
643
644         len1 = BWSLEN(bws1);
645         len2 = BWSLEN(bws2);
646
647         len1 -= offset;
648         len2 -= offset;
649
650         cmp_len = len1;
651
652         if (len2 < cmp_len)
653                 cmp_len = len2;
654
655         res = bwsncmp(bws1, bws2, offset, cmp_len);
656
657         if (res == 0) {
658                 if( len1 < len2)
659                         res = -1;
660                 else if (len2 < len1)
661                         res = +1;
662         }
663
664         return (res);
665 }
666
667 int
668 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
669 {
670         wchar_t c1, c2;
671         size_t i;
672
673         for (i = 0; i < len; ++i) {
674                 c1 = bws_get_iter_value(iter1);
675                 c2 = bws_get_iter_value(iter2);
676                 if (c1 != c2)
677                         return (c1 - c2);
678                 iter1 = bws_iterator_inc(iter1, 1);
679                 iter2 = bws_iterator_inc(iter2, 1);
680         }
681
682         return (0);
683 }
684
685 int
686 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
687 {
688         size_t len1, len2;
689
690         len1 = BWSLEN(bws1);
691         len2 = BWSLEN(bws2);
692
693         if (len1 <= offset)
694                 return ((len2 <= offset) ? 0 : -1);
695         else {
696                 if (len2 <= offset)
697                         return (+1);
698                 else {
699                         len1 -= offset;
700                         len2 -= offset;
701
702                         if (mb_cur_max == 1) {
703                                 const char *s1, *s2;
704
705                                 s1 = bws1->cdata.str + offset;
706                                 s2 = bws2->cdata.str + offset;
707
708                                 if (byte_sort) {
709                                         int res;
710
711                                         if (len1 > len2) {
712                                                 res = memcmp(s1, s2, len2);
713                                                 if (!res)
714                                                         res = +1;
715                                         } else if (len1 < len2) {
716                                                 res = memcmp(s1, s2, len1);
717                                                 if (!res)
718                                                         res = -1;
719                                         } else
720                                                 res = memcmp(s1, s2, len1);
721
722                                         return (res);
723
724                                 } else {
725                                         int res;
726                                         size_t i, maxlen;
727
728                                         i = 0;
729                                         maxlen = len1;
730
731                                         if (maxlen > len2)
732                                                 maxlen = len2;
733
734                                         while (i < maxlen) {
735                                                 /* goto next non-zero part: */
736                                                 while ((i < maxlen) &&
737                                                     !s1[i] && !s2[i])
738                                                         ++i;
739
740                                                 if (i >= maxlen)
741                                                         break;
742
743                                                 if (s1[i] == 0) {
744                                                         if (s2[i] == 0)
745                                                                 /* NOTREACHED */
746                                                                 err(2, "bwscoll error 01");
747                                                         else
748                                                                 return (-1);
749                                                 } else if (s2[i] == 0)
750                                                         return (+1);
751
752                                                 res = strcoll((const char*)(s1 + i), (const char*)(s2 + i));
753                                                 if (res)
754                                                         return (res);
755
756                                                 while ((i < maxlen) &&
757                                                     s1[i] && s2[i])
758                                                         ++i;
759
760                                                 if (i >= maxlen)
761                                                         break;
762
763                                                 if (s1[i] == 0) {
764                                                         if (s2[i] == 0) {
765                                                                 ++i;
766                                                                 continue;
767                                                         } else
768                                                                 return (-1);
769                                                 } else if (s2[i] == 0)
770                                                         return (+1);
771                                                 else
772                                                         /* NOTREACHED */
773                                                         err(2, "bwscoll error 02");
774                                         }
775
776                                         if (len1 < len2)
777                                                 return (-1);
778                                         else if (len1 > len2)
779                                                 return (+1);
780
781                                         return (0);
782                                 }
783                         } else {
784                                 const wchar_t *s1, *s2;
785                                 size_t i, maxlen;
786                                 int res;
787
788                                 s1 = bws1->wdata.str + offset;
789                                 s2 = bws2->wdata.str + offset;
790
791                                 i = 0;
792                                 maxlen = len1;
793
794                                 if (maxlen > len2)
795                                         maxlen = len2;
796
797                                 while (i < maxlen) {
798
799                                         /* goto next non-zero part: */
800                                         while ((i < maxlen) &&
801                                             !s1[i] && !s2[i])
802                                                 ++i;
803
804                                         if (i >= maxlen)
805                                                 break;
806
807                                         if (s1[i] == 0) {
808                                                 if (s2[i] == 0)
809                                                         /* NOTREACHED */
810                                                         err(2, "bwscoll error 1");
811                                                 else
812                                                         return (-1);
813                                         } else if (s2[i] == 0)
814                                                 return (+1);
815
816                                         res = wide_str_coll(s1 + i, s2 + i);
817                                         if (res)
818                                                 return (res);
819
820                                         while ((i < maxlen) && s1[i] && s2[i])
821                                                 ++i;
822
823                                         if (i >= maxlen)
824                                                 break;
825
826                                         if (s1[i] == 0) {
827                                                 if (s2[i] == 0) {
828                                                         ++i;
829                                                         continue;
830                                                 } else
831                                                         return (-1);
832                                         } else if (s2[i] == 0)
833                                                 return (+1);
834                                         else
835                                                 /* NOTREACHED */
836                                                 err(2, "bwscoll error 2");
837                                 }
838
839                                 if (len1 < len2)
840                                         return (-1);
841                                 else if (len1 > len2)
842                                         return (+1);
843
844                                 return (0);
845                         }
846                 }
847         }
848 }
849
850 /*
851  * Correction of the system API
852  */
853 double
854 bwstod(struct bwstring *s0, bool *empty)
855 {
856         double ret;
857
858         if (mb_cur_max == 1) {
859                 char *end, *s;
860                 char *ep;
861
862                 s = s0->cdata.str;
863                 end = s + s0->cdata.len;
864                 ep = NULL;
865
866                 while (isblank(*s) && s < end)
867                         ++s;
868
869                 if (!isprint(*s)) {
870                         *empty = true;
871                         return (0);
872                 }
873
874                 ret = strtod((char*)s, &ep);
875                 if (ep == s) {
876                         *empty = true;
877                         return (0);
878                 }
879         } else {
880                 wchar_t *end, *ep, *s;
881
882                 s = s0->wdata.str;
883                 end = s + s0->wdata.len;
884                 ep = NULL;
885
886                 while (iswblank(*s) && s < end)
887                         ++s;
888
889                 if (!iswprint(*s)) {
890                         *empty = true;
891                         return (0);
892                 }
893
894                 ret = wcstod(s, &ep);
895                 if (ep == s) {
896                         *empty = true;
897                         return (0);
898                 }
899         }
900
901         *empty = false;
902         return (ret);
903 }
904
905 /*
906  * A helper function for monthcoll.  If a line matches
907  * a month name, it returns (number of the month - 1),
908  * while if there is no match, it just return -1.
909  */
910
911 int
912 bws_month_score(const struct bwstring *s0)
913 {
914
915         if (mb_cur_max == 1) {
916                 const char *end, *s;
917
918                 s = s0->cdata.str;
919                 end = s + s0->cdata.len;
920
921                 while (isblank(*s) && s < end)
922                         ++s;
923
924                 for (int i = 11; i >= 0; --i) {
925                         if (cmonths[i] &&
926                             (s == strstr(s, cmonths[i])))
927                                 return (i);
928                 }
929
930         } else {
931                 const wchar_t *end, *s;
932
933                 s = s0->wdata.str;
934                 end = s + s0->wdata.len;
935
936                 while (iswblank(*s) && s < end)
937                         ++s;
938
939                 for (int i = 11; i >= 0; --i) {
940                         if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
941                                 return (i);
942                 }
943         }
944
945         return (-1);
946 }
947
948 /*
949  * Rips out leading blanks (-b).
950  */
951 struct bwstring *
952 ignore_leading_blanks(struct bwstring *str)
953 {
954
955         if (mb_cur_max == 1) {
956                 char *dst, *end, *src;
957
958                 src = str->cdata.str;
959                 dst = src;
960                 end = src + str->cdata.len;
961
962                 while (src < end && isblank(*src))
963                         ++src;
964
965                 if (src != dst) {
966                         size_t newlen;
967
968                         newlen = BWSLEN(str) - (src - dst);
969
970                         while (src < end) {
971                                 *dst = *src;
972                                 ++dst;
973                                 ++src;
974                         }
975                         bws_setlen(str, newlen);
976                 }
977         } else {
978                 wchar_t *dst, *end, *src;
979
980                 src = str->wdata.str;
981                 dst = src;
982                 end = src + str->wdata.len;
983
984                 while (src < end && iswblank(*src))
985                         ++src;
986
987                 if (src != dst) {
988
989                         size_t newlen = BWSLEN(str) - (src - dst);
990
991                         while (src < end) {
992                                 *dst = *src;
993                                 ++dst;
994                                 ++src;
995                         }
996                         bws_setlen(str, newlen);
997
998                 }
999         }
1000         return (str);
1001 }
1002
1003 /*
1004  * Rips out nonprinting characters (-i).
1005  */
1006 struct bwstring *
1007 ignore_nonprinting(struct bwstring *str)
1008 {
1009         size_t newlen = BWSLEN(str);
1010
1011         if (mb_cur_max == 1) {
1012                 char *dst, *end, *src;
1013                 char c;
1014
1015                 src = str->cdata.str;
1016                 dst = src;
1017                 end = src + str->cdata.len;
1018
1019                 while (src < end) {
1020                         c = *src;
1021                         if (isprint(c)) {
1022                                 *dst = c;
1023                                 ++dst;
1024                                 ++src;
1025                         } else {
1026                                 ++src;
1027                                 --newlen;
1028                         }
1029                 }
1030         } else {
1031                 wchar_t *dst, *end, *src;
1032                 wchar_t c;
1033
1034                 src = str->wdata.str;
1035                 dst = src;
1036                 end = src + str->wdata.len;
1037
1038                 while (src < end) {
1039                         c = *src;
1040                         if (iswprint(c)) {
1041                                 *dst = c;
1042                                 ++dst;
1043                                 ++src;
1044                         } else {
1045                                 ++src;
1046                                 --newlen;
1047                         }
1048                 }
1049         }
1050         bws_setlen(str, newlen);
1051
1052         return (str);
1053 }
1054
1055 /*
1056  * Rips out any characters that are not alphanumeric characters
1057  * nor blanks (-d).
1058  */
1059 struct bwstring *
1060 dictionary_order(struct bwstring *str)
1061 {
1062         size_t newlen = BWSLEN(str);
1063
1064         if (mb_cur_max == 1) {
1065                 char *dst, *end, *src;
1066                 char c;
1067
1068                 src = str->cdata.str;
1069                 dst = src;
1070                 end = src + str->cdata.len;
1071
1072                 while (src < end) {
1073                         c = *src;
1074                         if (isalnum(c) || isblank(c)) {
1075                                 *dst = c;
1076                                 ++dst;
1077                                 ++src;
1078                         } else {
1079                                 ++src;
1080                                 --newlen;
1081                         }
1082                 }
1083         } else {
1084                 wchar_t *dst, *end, *src;
1085                 wchar_t c;
1086
1087                 src = str->wdata.str;
1088                 dst = src;
1089                 end = src + str->wdata.len;
1090
1091                 while (src < end) {
1092                         c = *src;
1093                         if (iswalnum(c) || iswblank(c)) {
1094                                 *dst = c;
1095                                 ++dst;
1096                                 ++src;
1097                         } else {
1098                                 ++src;
1099                                 --newlen;
1100                         }
1101                 }
1102         }
1103         bws_setlen(str, newlen);
1104
1105         return (str);
1106 }
1107
1108 /*
1109  * Converts string to lower case(-f).
1110  */
1111 struct bwstring *
1112 ignore_case(struct bwstring *str)
1113 {
1114
1115         if (mb_cur_max == 1) {
1116                 char *end, *s;
1117
1118                 s = str->cdata.str;
1119                 end = s + str->cdata.len;
1120
1121                 while (s < end) {
1122                         *s = toupper(*s);
1123                         ++s;
1124                 }
1125         } else {
1126                 wchar_t *end, *s;
1127
1128                 s = str->wdata.str;
1129                 end = s + str->wdata.len;
1130
1131                 while (s < end) {
1132                         *s = towupper(*s);
1133                         ++s;
1134                 }
1135         }
1136         return (str);
1137 }
1138
1139 void
1140 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
1141 {
1142
1143         if (mb_cur_max == 1)
1144                 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->cdata.str);
1145         else
1146                 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->wdata.str);
1147 }