]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/nvi/common/conv.c
Update nvi to 2.2.0
[FreeBSD/FreeBSD.git] / contrib / nvi / common / conv.c
1 /*-
2  * Copyright (c) 1993, 1994
3  *      The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1993, 1994, 1995, 1996
5  *      Keith Bostic.  All rights reserved.
6  * Copyright (c) 2011, 2012
7  *      Zhihao Yuan.  All rights reserved.
8  *
9  * See the LICENSE file for redistribution information.
10  */
11
12 #include "config.h"
13
14 #include <sys/types.h>
15 #include <sys/queue.h>
16 #include <sys/time.h>
17
18 #include <bitstring.h>
19 #include <errno.h>
20 #include <limits.h>
21 #include <langinfo.h>
22 #include <locale.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <strings.h>
27 #include <unistd.h>
28
29 #include "common.h"
30
31 /*
32  * codeset --
33  *      Get the locale encoding.
34  *
35  * PUBLIC: char * codeset(void);
36  */
37 char *
38 codeset(void)
39 {
40         static char *cs;
41
42         if (cs == NULL)
43                 cs = nl_langinfo(CODESET);
44
45         return cs;
46 }
47
48 #ifdef USE_WIDECHAR
49 static int 
50 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
51     CHAR_T **dst)
52 {
53         int i;
54         CHAR_T **tostr = &cw->bp1.wc;
55         size_t  *blen = &cw->blen1;
56
57         BINC_RETW(NULL, *tostr, *blen, len);
58
59         *tolen = len;
60         for (i = 0; i < len; ++i)
61                 (*tostr)[i] = (u_char) str[i];
62
63         *dst = cw->bp1.wc;
64
65         return 0;
66 }
67
68 #define CONV_BUFFER_SIZE    512
69 /* fill the buffer with codeset encoding of string pointed to by str
70  * left has the number of bytes left in str and is adjusted
71  * len contains the number of bytes put in the buffer
72  */
73 #ifdef USE_ICONV
74 #define CONVERT(str, left, src, len)                                    \
75         do {                                                            \
76                 size_t outleft;                                         \
77                 char *bp = buffer;                                      \
78                 outleft = CONV_BUFFER_SIZE;                             \
79                 errno = 0;                                              \
80                 if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft)  \
81                     == -1 && errno != E2BIG)                            \
82                         goto err;                                       \
83                 if ((len = CONV_BUFFER_SIZE - outleft) == 0) {          \
84                         error = -left;                                  \
85                         goto err;                                       \
86                 }                                                       \
87                 src = buffer;                                           \
88         } while (0)
89
90 #define IC_RESET()                                                      \
91         do {                                                            \
92                 if (id != (iconv_t)-1)                                  \
93                         iconv(id, NULL, NULL, NULL, NULL);              \
94         } while(0)
95 #else
96 #define CONVERT(str, left, src, len)
97 #define IC_RESET()
98 #endif
99
100 static int 
101 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 
102     size_t *tolen, CHAR_T **dst, iconv_t id)
103 {
104         size_t i = 0, j;
105         CHAR_T **tostr = &cw->bp1.wc;
106         size_t *blen = &cw->blen1;
107         mbstate_t mbs;
108         size_t n;
109         ssize_t nlen = len;
110         char *src = (char *)str;
111 #ifdef USE_ICONV
112         char buffer[CONV_BUFFER_SIZE];
113 #endif
114         size_t left = len;
115         int error = 1;
116
117         memset(&mbs, 0, sizeof(mbs));
118         BINC_RETW(NULL, *tostr, *blen, nlen);
119
120 #ifdef USE_ICONV
121         if (id != (iconv_t)-1)
122                 CONVERT(str, left, src, len);
123 #endif
124
125         for (i = 0, j = 0; j < len; ) {
126                 n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
127                 /* NULL character converted */
128                 if (n == -2)
129                         error = -(len-j);
130                 if (n == -1 || n == -2)
131                         goto err;
132                 if (n == 0)
133                         n = 1;
134                 j += n;
135                 if (++i >= *blen) {
136                         nlen += 256;
137                         BINC_RETW(NULL, *tostr, *blen, nlen);
138                 }
139                 if (id != (iconv_t)-1 && j == len && left) {
140                         CONVERT(str, left, src, len);
141                         j = 0;
142                 }
143         }
144
145         error = 0;
146 err:
147         *tolen = i;
148         *dst = cw->bp1.wc;
149         IC_RESET();
150
151         return error;
152 }
153
154 static int 
155 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
156     CHAR_T **dst)
157 {
158         return default_char2int(sp, str, len, cw, tolen, dst,
159             sp->conv.id[IC_FE_CHAR2INT]);
160 }
161
162 static int 
163 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
164     CHAR_T **dst)
165 {
166         return default_char2int(sp, str, len, cw, tolen, dst,
167             sp->conv.id[IC_IE_CHAR2INT]);
168 }
169
170 static int 
171 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
172     CHAR_T **dst)
173 {
174         return default_char2int(sp, str, len, cw, tolen, dst, (iconv_t)-1);
175 }
176
177 static int 
178 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
179     char **dst)
180 {
181         int i;
182         char **tostr = &cw->bp1.c;
183         size_t  *blen = &cw->blen1;
184
185         BINC_RETC(NULL, *tostr, *blen, len);
186
187         *tolen = len;
188         for (i = 0; i < len; ++i)
189                 (*tostr)[i] = str[i];
190
191         *dst = cw->bp1.c;
192
193         return 0;
194 }
195
196 static int 
197 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 
198     size_t *tolen, char **pdst, iconv_t id)
199 {
200         size_t i, j, offset = 0;
201         char **tostr = &cw->bp1.c;
202         size_t *blen = &cw->blen1;
203         mbstate_t mbs;
204         size_t n;
205         ssize_t  nlen = len + MB_CUR_MAX;
206         char *dst;
207         size_t buflen;
208 #ifdef USE_ICONV
209         char buffer[CONV_BUFFER_SIZE];
210 #endif
211         int error = 1;
212
213 /* convert first len bytes of buffer and append it to cw->bp
214  * len is adjusted => 0
215  * offset contains the offset in cw->bp and is adjusted
216  * cw->bp is grown as required
217  */
218 #ifdef USE_ICONV
219 #define CONVERT2(_buffer, lenp, cw, offset)                             \
220         do {                                                            \
221                 char *bp = _buffer;                                     \
222                 int ret;                                                \
223                 do {                                                    \
224                         size_t outleft = cw->blen1 - offset;            \
225                         char *obp = cw->bp1.c + offset;                 \
226                         if (cw->blen1 < offset + MB_CUR_MAX) {          \
227                                 nlen += 256;                            \
228                                 BINC_RETC(NULL, cw->bp1.c, cw->blen1,   \
229                                     nlen);                              \
230                         }                                               \
231                         errno = 0;                                      \
232                         ret = iconv(id, (iconv_src_t)&bp, lenp, &obp,   \
233                             &outleft);                                  \
234                         if (ret == -1 && errno != E2BIG)                \
235                                 goto err;                               \
236                         offset = cw->blen1 - outleft;                   \
237                 } while (ret != 0);                                     \
238         } while (0)
239 #else
240 #define CONVERT2(_buffer, lenp, cw, offset)
241 #endif
242
243
244         memset(&mbs, 0, sizeof(mbs));
245         BINC_RETC(NULL, *tostr, *blen, nlen);
246         dst = *tostr; buflen = *blen;
247
248 #ifdef USE_ICONV
249         if (id != (iconv_t)-1) {
250                 dst = buffer; buflen = CONV_BUFFER_SIZE;
251         }
252 #endif
253
254         for (i = 0, j = 0; i < len; ++i) {
255                 n = wcrtomb(dst+j, str[i], &mbs);
256                 if (n == -1)
257                         goto err;
258                 j += n;
259                 if (buflen < j + MB_CUR_MAX) {
260                         if (id != (iconv_t)-1) {
261                                 CONVERT2(buffer, &j, cw, offset);
262                         } else {
263                                 nlen += 256;
264                                 BINC_RETC(NULL, *tostr, *blen, nlen);
265                                 dst = *tostr; buflen = *blen;
266                         }
267                 }
268         }
269
270         n = wcrtomb(dst+j, L'\0', &mbs);
271         j += n - 1;                             /* don't count NUL at the end */
272         *tolen = j;
273
274         if (id != (iconv_t)-1) {
275                 CONVERT2(buffer, &j, cw, offset);
276                 /* back to the initial state */
277                 CONVERT2(NULL, NULL, cw, offset);
278                 *tolen = offset;
279         }
280
281         error = 0;
282 err:
283         if (error)
284                 *tolen = j;
285         *pdst = cw->bp1.c;
286         IC_RESET();
287
288         return error;
289 }
290
291 static int 
292 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 
293     size_t *tolen, char **dst)
294 {
295         return default_int2char(sp, str, len, cw, tolen, dst,
296                 sp->conv.id[IC_FE_INT2CHAR]);
297 }
298
299 static int 
300 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 
301     size_t *tolen, char **dst)
302 {
303         return default_int2char(sp, str, len, cw, tolen, dst, (iconv_t)-1);
304 }
305
306 #endif
307
308 /*
309  * conv_init --
310  *      Initialize the iconv environment.
311  *
312  * PUBLIC: void conv_init(SCR *, SCR *);
313  */
314 void
315 conv_init(SCR *orig, SCR *sp)
316 {
317         int i;
318
319         if (orig == NULL)
320                 setlocale(LC_ALL, "");
321         if (orig != NULL)
322                 memmove(&sp->conv, &orig->conv, sizeof(CONV));
323 #ifdef USE_WIDECHAR
324         else {
325                 char *ctype = setlocale(LC_CTYPE, NULL);
326
327                 /*
328                  * XXX
329                  * This hack fixes the libncursesw issue on FreeBSD.
330                  */
331                 if (!strcmp(ctype, "ko_KR.CP949"))
332                         setlocale(LC_CTYPE, "ko_KR.eucKR");
333                 else if (!strcmp(ctype, "zh_CN.GB2312"))
334                         setlocale(LC_CTYPE, "zh_CN.eucCN");
335                 else if (!strcmp(ctype, "zh_CN.GBK"))
336                         setlocale(LC_CTYPE, "zh_CN.GB18030");
337
338                 /*
339                  * Switch to 8bit mode if locale is C;
340                  * LC_CTYPE should be reseted to C if unmatched.
341                  */
342                 if (!strcmp(ctype, "C") || !strcmp(ctype, "POSIX")) {
343                         sp->conv.sys2int = sp->conv.file2int = raw2int;
344                         sp->conv.int2sys = sp->conv.int2file = int2raw;
345                         sp->conv.input2int = raw2int;
346                 } else {
347                         sp->conv.sys2int = cs_char2int;
348                         sp->conv.int2sys = cs_int2char;
349                         sp->conv.file2int = fe_char2int;
350                         sp->conv.int2file = fe_int2char;
351                         sp->conv.input2int = ie_char2int;
352                 }
353 #ifdef USE_ICONV
354                 o_set(sp, O_INPUTENCODING, OS_STRDUP, codeset(), 0);
355 #endif
356         }
357 #endif
358
359         /* iconv descriptors must be distinct to screens. */
360         for (i = 0; i <= IC_IE_TO_UTF16; ++i)
361                 sp->conv.id[i] = (iconv_t)-1;
362 #ifdef USE_ICONV
363         conv_enc(sp, O_INPUTENCODING, 0);
364 #endif
365 }
366
367 /*
368  * conv_enc --
369  *      Convert file/input encoding.
370  *
371  * PUBLIC: int conv_enc(SCR *, int, char *);
372  */
373 int
374 conv_enc(SCR *sp, int option, char *enc)
375 {
376 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
377         iconv_t *c2w, *w2c;
378         iconv_t id_c2w, id_w2c;
379
380         switch (option) {
381         case O_FILEENCODING:
382                 c2w = sp->conv.id + IC_FE_CHAR2INT;
383                 w2c = sp->conv.id + IC_FE_INT2CHAR;
384                 if (!enc)
385                         enc = O_STR(sp, O_FILEENCODING);
386
387                 if (strcasecmp(codeset(), enc)) {
388                         if ((id_c2w = iconv_open(codeset(), enc)) ==
389                             (iconv_t)-1)
390                                 goto err;
391                         if ((id_w2c = iconv_open(enc, codeset())) ==
392                             (iconv_t)-1)
393                                 goto err;
394                 } else {
395                         id_c2w = (iconv_t)-1;
396                         id_w2c = (iconv_t)-1;
397                 }
398
399                 break;
400
401         case O_INPUTENCODING:
402                 c2w = sp->conv.id + IC_IE_CHAR2INT;
403                 w2c = sp->conv.id + IC_IE_TO_UTF16;
404                 if (!enc)
405                         enc = O_STR(sp, O_INPUTENCODING);
406
407                 if (strcasecmp(codeset(), enc)) {
408                         if ((id_c2w = iconv_open(codeset(), enc)) ==
409                             (iconv_t)-1)
410                                 goto err;
411                 } else
412                         id_c2w = (iconv_t)-1;
413
414                 /* UTF-16 can not be locale and can not be inputed. */
415                 if ((id_w2c = iconv_open("utf-16be", enc)) == (iconv_t)-1)
416                         goto err;
417
418                 break;
419
420         default:
421                 abort();
422         }
423
424         if (*c2w != (iconv_t)-1)
425                 iconv_close(*c2w);
426         if (*w2c != (iconv_t)-1)
427                 iconv_close(*w2c);
428
429         *c2w = id_c2w;
430         *w2c = id_w2c;
431
432         F_CLR(sp, SC_CONV_ERROR);
433         F_SET(sp, SC_SCR_REFORMAT);
434
435         return 0;
436 err:
437 #endif
438         switch (option) {
439         case O_FILEENCODING:
440                 msgq(sp, M_ERR, "321|File encoding conversion not supported");
441                 break;
442         case O_INPUTENCODING:
443                 msgq(sp, M_ERR, "322|Input encoding conversion not supported");
444                 break;
445         }
446         return 1;
447 }
448
449 /*
450  * conv_end --
451  *      Close the iconv descriptors, release the buffer.
452  *
453  * PUBLIC: void conv_end(SCR *);
454  */
455 void
456 conv_end(SCR *sp)
457 {
458 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
459         int i;
460         for (i = 0; i <= IC_IE_TO_UTF16; ++i)
461                 if (sp->conv.id[i] != (iconv_t)-1)
462                         iconv_close(sp->conv.id[i]);
463         free(sp->cw.bp1.c);
464 #endif
465 }