]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/nvi/common/conv.c
telnet: use asprintf for r349890 change
[FreeBSD/FreeBSD.git] / contrib / nvi / common / conv.c
1 /*-
2  * Copyright (c) 1993, 1994
3  *      The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1993, 1994, 1995, 1996
5  *      Keith Bostic.  All rights reserved.
6  * Copyright (c) 2011, 2012
7  *      Zhihao Yuan.  All rights reserved.
8  *
9  * See the LICENSE file for redistribution information.
10  */
11
12 #include "config.h"
13
14 #ifndef lint
15 static const char sccsid[] = "$Id: conv.c,v 2.40 2014/02/27 16:25:29 zy Exp $";
16 #endif /* not lint */
17
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/time.h>
21
22 #include <bitstring.h>
23 #include <errno.h>
24 #include <limits.h>
25 #include <langinfo.h>
26 #include <locale.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <strings.h>
31 #include <unistd.h>
32
33 #include "common.h"
34
35 /*
36  * codeset --
37  *      Get the locale encoding.
38  *
39  * PUBLIC: char * codeset(void);
40  */
41 char *
42 codeset(void)
43 {
44         static char *cs;
45
46         if (cs == NULL)
47                 cs = nl_langinfo(CODESET);
48
49         return cs;
50 }
51
52 #ifdef USE_WIDECHAR
53 static int 
54 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
55     CHAR_T **dst)
56 {
57         int i;
58         CHAR_T **tostr = &cw->bp1.wc;
59         size_t  *blen = &cw->blen1;
60
61         BINC_RETW(NULL, *tostr, *blen, len);
62
63         *tolen = len;
64         for (i = 0; i < len; ++i)
65                 (*tostr)[i] = (u_char) str[i];
66
67         *dst = cw->bp1.wc;
68
69         return 0;
70 }
71
72 #define CONV_BUFFER_SIZE    512
73 /* fill the buffer with codeset encoding of string pointed to by str
74  * left has the number of bytes left in str and is adjusted
75  * len contains the number of bytes put in the buffer
76  */
77 #ifdef USE_ICONV
78 #define CONVERT(str, left, src, len)                                    \
79         do {                                                            \
80                 size_t outleft;                                         \
81                 char *bp = buffer;                                      \
82                 outleft = CONV_BUFFER_SIZE;                             \
83                 errno = 0;                                              \
84                 if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft)  \
85                     == -1 && errno != E2BIG)                            \
86                         goto err;                                       \
87                 if ((len = CONV_BUFFER_SIZE - outleft) == 0) {          \
88                         error = -left;                                  \
89                         goto err;                                       \
90                 }                                                       \
91                 src = buffer;                                           \
92         } while (0)
93
94 #define IC_RESET()                                                      \
95         do {                                                            \
96                 if (id != (iconv_t)-1)                                  \
97                         iconv(id, NULL, NULL, NULL, NULL);              \
98         } while(0)
99 #else
100 #define CONVERT(str, left, src, len)
101 #define IC_RESET()
102 #endif
103
104 static int 
105 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 
106     size_t *tolen, CHAR_T **dst, iconv_t id)
107 {
108         size_t i = 0, j;
109         CHAR_T **tostr = &cw->bp1.wc;
110         size_t *blen = &cw->blen1;
111         mbstate_t mbs;
112         size_t n;
113         ssize_t nlen = len;
114         char *src = (char *)str;
115 #ifdef USE_ICONV
116         char buffer[CONV_BUFFER_SIZE];
117 #endif
118         size_t left = len;
119         int error = 1;
120
121         BZERO(&mbs, 1);
122         BINC_RETW(NULL, *tostr, *blen, nlen);
123
124 #ifdef USE_ICONV
125         if (id != (iconv_t)-1)
126                 CONVERT(str, left, src, len);
127 #endif
128
129         for (i = 0, j = 0; j < len; ) {
130                 n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
131                 /* NULL character converted */
132                 if (n == -2)
133                         error = -(len-j);
134                 if (n == -1 || n == -2)
135                         goto err;
136                 if (n == 0)
137                         n = 1;
138                 j += n;
139                 if (++i >= *blen) {
140                         nlen += 256;
141                         BINC_RETW(NULL, *tostr, *blen, nlen);
142                 }
143                 if (id != (iconv_t)-1 && j == len && left) {
144                         CONVERT(str, left, src, len);
145                         j = 0;
146                 }
147         }
148
149         error = 0;
150 err:
151         *tolen = i;
152         *dst = cw->bp1.wc;
153         IC_RESET();
154
155         return error;
156 }
157
158 static int 
159 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
160     CHAR_T **dst)
161 {
162         return default_char2int(sp, str, len, cw, tolen, dst,
163             sp->conv.id[IC_FE_CHAR2INT]);
164 }
165
166 static int 
167 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
168     CHAR_T **dst)
169 {
170         return default_char2int(sp, str, len, cw, tolen, dst,
171             sp->conv.id[IC_IE_CHAR2INT]);
172 }
173
174 static int 
175 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
176     CHAR_T **dst)
177 {
178         return default_char2int(sp, str, len, cw, tolen, dst, (iconv_t)-1);
179 }
180
181 static int 
182 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
183     char **dst)
184 {
185         int i;
186         char **tostr = &cw->bp1.c;
187         size_t  *blen = &cw->blen1;
188
189         BINC_RETC(NULL, *tostr, *blen, len);
190
191         *tolen = len;
192         for (i = 0; i < len; ++i)
193                 (*tostr)[i] = str[i];
194
195         *dst = cw->bp1.c;
196
197         return 0;
198 }
199
200 static int 
201 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 
202     size_t *tolen, char **pdst, iconv_t id)
203 {
204         size_t i, j, offset = 0;
205         char **tostr = &cw->bp1.c;
206         size_t *blen = &cw->blen1;
207         mbstate_t mbs;
208         size_t n;
209         ssize_t  nlen = len + MB_CUR_MAX;
210         char *dst;
211         size_t buflen;
212 #ifdef USE_ICONV
213         char buffer[CONV_BUFFER_SIZE];
214 #endif
215         int error = 1;
216
217 /* convert first len bytes of buffer and append it to cw->bp
218  * len is adjusted => 0
219  * offset contains the offset in cw->bp and is adjusted
220  * cw->bp is grown as required
221  */
222 #ifdef USE_ICONV
223 #define CONVERT2(_buffer, lenp, cw, offset)                             \
224         do {                                                            \
225                 char *bp = _buffer;                                     \
226                 int ret;                                                \
227                 do {                                                    \
228                         size_t outleft = cw->blen1 - offset;            \
229                         char *obp = cw->bp1.c + offset;                 \
230                         if (cw->blen1 < offset + MB_CUR_MAX) {          \
231                                 nlen += 256;                            \
232                                 BINC_RETC(NULL, cw->bp1.c, cw->blen1,   \
233                                     nlen);                              \
234                         }                                               \
235                         errno = 0;                                      \
236                         ret = iconv(id, (iconv_src_t)&bp, lenp, &obp,   \
237                             &outleft);                                  \
238                         if (ret == -1 && errno != E2BIG)                \
239                                 goto err;                               \
240                         offset = cw->blen1 - outleft;                   \
241                 } while (ret != 0);                                     \
242         } while (0)
243 #else
244 #define CONVERT2(_buffer, lenp, cw, offset)
245 #endif
246
247
248         BZERO(&mbs, 1);
249         BINC_RETC(NULL, *tostr, *blen, nlen);
250         dst = *tostr; buflen = *blen;
251
252 #ifdef USE_ICONV
253         if (id != (iconv_t)-1) {
254                 dst = buffer; buflen = CONV_BUFFER_SIZE;
255         }
256 #endif
257
258         for (i = 0, j = 0; i < len; ++i) {
259                 n = wcrtomb(dst+j, str[i], &mbs);
260                 if (n == -1)
261                         goto err;
262                 j += n;
263                 if (buflen < j + MB_CUR_MAX) {
264                         if (id != (iconv_t)-1) {
265                                 CONVERT2(buffer, &j, cw, offset);
266                         } else {
267                                 nlen += 256;
268                                 BINC_RETC(NULL, *tostr, *blen, nlen);
269                                 dst = *tostr; buflen = *blen;
270                         }
271                 }
272         }
273
274         n = wcrtomb(dst+j, L'\0', &mbs);
275         j += n - 1;                             /* don't count NUL at the end */
276         *tolen = j;
277
278         if (id != (iconv_t)-1) {
279                 CONVERT2(buffer, &j, cw, offset);
280                 /* back to the initial state */
281                 CONVERT2(NULL, NULL, cw, offset);
282                 *tolen = offset;
283         }
284
285         error = 0;
286 err:
287         if (error)
288                 *tolen = j;
289         *pdst = cw->bp1.c;
290         IC_RESET();
291
292         return error;
293 }
294
295 static int 
296 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 
297     size_t *tolen, char **dst)
298 {
299         return default_int2char(sp, str, len, cw, tolen, dst,
300                 sp->conv.id[IC_FE_INT2CHAR]);
301 }
302
303 static int 
304 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 
305     size_t *tolen, char **dst)
306 {
307         return default_int2char(sp, str, len, cw, tolen, dst, (iconv_t)-1);
308 }
309
310 #endif
311
312 /*
313  * conv_init --
314  *      Initialize the iconv environment.
315  *
316  * PUBLIC: void conv_init(SCR *, SCR *);
317  */
318 void
319 conv_init(SCR *orig, SCR *sp)
320 {
321         int i;
322
323         if (orig == NULL)
324                 setlocale(LC_ALL, "");
325         if (orig != NULL)
326                 BCOPY(&orig->conv, &sp->conv, 1);
327 #ifdef USE_WIDECHAR
328         else {
329                 char *ctype = setlocale(LC_CTYPE, NULL);
330
331                 /*
332                  * XXX
333                  * This hack fixes the libncursesw issue on FreeBSD.
334                  */
335                 if (!strcmp(ctype, "ko_KR.CP949"))
336                         setlocale(LC_CTYPE, "ko_KR.eucKR");
337                 else if (!strcmp(ctype, "zh_CN.GB2312"))
338                         setlocale(LC_CTYPE, "zh_CN.eucCN");
339                 else if (!strcmp(ctype, "zh_CN.GBK"))
340                         setlocale(LC_CTYPE, "zh_CN.GB18030");
341
342                 /*
343                  * Switch to 8bit mode if locale is C;
344                  * LC_CTYPE should be reseted to C if unmatched.
345                  */
346                 if (!strcmp(ctype, "C") || !strcmp(ctype, "POSIX")) {
347                         sp->conv.sys2int = sp->conv.file2int = raw2int;
348                         sp->conv.int2sys = sp->conv.int2file = int2raw;
349                         sp->conv.input2int = raw2int;
350                 } else {
351                         sp->conv.sys2int = cs_char2int;
352                         sp->conv.int2sys = cs_int2char;
353                         sp->conv.file2int = fe_char2int;
354                         sp->conv.int2file = fe_int2char;
355                         sp->conv.input2int = ie_char2int;
356                 }
357 #ifdef USE_ICONV
358                 o_set(sp, O_INPUTENCODING, OS_STRDUP, codeset(), 0);
359 #endif
360         }
361 #endif
362
363         /* iconv descriptors must be distinct to screens. */
364         for (i = 0; i <= IC_IE_TO_UTF16; ++i)
365                 sp->conv.id[i] = (iconv_t)-1;
366 #ifdef USE_ICONV
367         conv_enc(sp, O_INPUTENCODING, 0);
368 #endif
369 }
370
371 /*
372  * conv_enc --
373  *      Convert file/input encoding.
374  *
375  * PUBLIC: int conv_enc(SCR *, int, char *);
376  */
377 int
378 conv_enc(SCR *sp, int option, char *enc)
379 {
380 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
381         iconv_t *c2w, *w2c;
382         iconv_t id_c2w, id_w2c;
383
384         switch (option) {
385         case O_FILEENCODING:
386                 c2w = sp->conv.id + IC_FE_CHAR2INT;
387                 w2c = sp->conv.id + IC_FE_INT2CHAR;
388                 if (!enc)
389                         enc = O_STR(sp, O_FILEENCODING);
390
391                 if (strcasecmp(codeset(), enc)) {
392                         if ((id_c2w = iconv_open(codeset(), enc)) ==
393                             (iconv_t)-1)
394                                 goto err;
395                         if ((id_w2c = iconv_open(enc, codeset())) ==
396                             (iconv_t)-1)
397                                 goto err;
398                 } else {
399                         id_c2w = (iconv_t)-1;
400                         id_w2c = (iconv_t)-1;
401                 }
402
403                 break;
404
405         case O_INPUTENCODING:
406                 c2w = sp->conv.id + IC_IE_CHAR2INT;
407                 w2c = sp->conv.id + IC_IE_TO_UTF16;
408                 if (!enc)
409                         enc = O_STR(sp, O_INPUTENCODING);
410
411                 if (strcasecmp(codeset(), enc)) {
412                         if ((id_c2w = iconv_open(codeset(), enc)) ==
413                             (iconv_t)-1)
414                                 goto err;
415                 } else
416                         id_c2w = (iconv_t)-1;
417
418                 /* UTF-16 can not be locale and can not be inputed. */
419                 if ((id_w2c = iconv_open("utf-16be", enc)) == (iconv_t)-1)
420                         goto err;
421
422                 break;
423
424         default:
425                 abort();
426         }
427
428         if (*c2w != (iconv_t)-1)
429                 iconv_close(*c2w);
430         if (*w2c != (iconv_t)-1)
431                 iconv_close(*w2c);
432
433         *c2w = id_c2w;
434         *w2c = id_w2c;
435
436         F_CLR(sp, SC_CONV_ERROR);
437         F_SET(sp, SC_SCR_REFORMAT);
438
439         return 0;
440 err:
441 #endif
442         switch (option) {
443         case O_FILEENCODING:
444                 msgq(sp, M_ERR, "321|File encoding conversion not supported");
445                 break;
446         case O_INPUTENCODING:
447                 msgq(sp, M_ERR, "322|Input encoding conversion not supported");
448                 break;
449         }
450         return 1;
451 }
452
453 /*
454  * conv_end --
455  *      Close the iconv descriptors, release the buffer.
456  *
457  * PUBLIC: void conv_end(SCR *);
458  */
459 void
460 conv_end(SCR *sp)
461 {
462 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
463         int i;
464         for (i = 0; i <= IC_IE_TO_UTF16; ++i)
465                 if (sp->conv.id[i] != (iconv_t)-1)
466                         iconv_close(sp->conv.id[i]);
467         if (sp->cw.bp1.c != NULL)
468                 free(sp->cw.bp1.c);
469 #endif
470 }