]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libiconv_modules/iconv_std/citrus_iconv_std.c
Upgrade Unbound to 1.6.1. More to follow.
[FreeBSD/FreeBSD.git] / lib / libiconv_modules / iconv_std / citrus_iconv_std.c
1 /* $FreeBSD$ */
2 /*      $NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $        */
3
4 /*-
5  * SPDX-License-Identifier: BSD-2-Clause
6  *
7  * Copyright (c)2003 Citrus Project,
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31
32 #include <sys/cdefs.h>
33 #include <sys/endian.h>
34 #include <sys/queue.h>
35
36 #include <assert.h>
37 #include <errno.h>
38 #include <limits.h>
39 #include <stdbool.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_module.h"
47 #include "citrus_region.h"
48 #include "citrus_mmap.h"
49 #include "citrus_hash.h"
50 #include "citrus_iconv.h"
51 #include "citrus_stdenc.h"
52 #include "citrus_mapper.h"
53 #include "citrus_csmapper.h"
54 #include "citrus_memstream.h"
55 #include "citrus_iconv_std.h"
56 #include "citrus_esdb.h"
57
58 /* ---------------------------------------------------------------------- */
59
60 _CITRUS_ICONV_DECLS(iconv_std);
61 _CITRUS_ICONV_DEF_OPS(iconv_std);
62
63
64 /* ---------------------------------------------------------------------- */
65
66 int
67 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops)
68 {
69
70         memcpy(ops, &_citrus_iconv_std_iconv_ops,
71             sizeof(_citrus_iconv_std_iconv_ops));
72
73         return (0);
74 }
75
76 /* ---------------------------------------------------------------------- */
77
78 /*
79  * convenience routines for stdenc.
80  */
81 static __inline void
82 save_encoding_state(struct _citrus_iconv_std_encoding *se)
83 {
84
85         if (se->se_ps)
86                 memcpy(se->se_pssaved, se->se_ps,
87                     _stdenc_get_state_size(se->se_handle));
88 }
89
90 static __inline void
91 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
92 {
93
94         if (se->se_ps)
95                 memcpy(se->se_ps, se->se_pssaved,
96                     _stdenc_get_state_size(se->se_handle));
97 }
98
99 static __inline void
100 init_encoding_state(struct _citrus_iconv_std_encoding *se)
101 {
102
103         if (se->se_ps)
104                 _stdenc_init_state(se->se_handle, se->se_ps);
105 }
106
107 static __inline int
108 mbtocsx(struct _citrus_iconv_std_encoding *se,
109     _csid_t *csid, _index_t *idx, char **s, size_t n, size_t *nresult,
110     struct iconv_hooks *hooks)
111 {
112
113         return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
114                               nresult, hooks));
115 }
116
117 static __inline int
118 cstombx(struct _citrus_iconv_std_encoding *se,
119     char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult,
120     struct iconv_hooks *hooks)
121 {
122
123         return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
124                               nresult, hooks));
125 }
126
127 static __inline int
128 wctombx(struct _citrus_iconv_std_encoding *se,
129     char *s, size_t n, _wc_t wc, size_t *nresult,
130     struct iconv_hooks *hooks)
131 {
132
133         return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult,
134                              hooks));
135 }
136
137 static __inline int
138 put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n,
139     size_t *nresult)
140 {
141
142         return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult));
143 }
144
145 static __inline int
146 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
147 {
148         struct _stdenc_state_desc ssd;
149         int ret;
150
151         ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
152             _STDENC_SDID_GENERIC, &ssd);
153         if (!ret)
154                 *rstate = ssd.u.generic.state;
155
156         return (ret);
157 }
158
159 /*
160  * init encoding context
161  */
162 static int
163 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
164     void *ps1, void *ps2)
165 {
166         int ret = -1;
167
168         se->se_handle = cs;
169         se->se_ps = ps1;
170         se->se_pssaved = ps2;
171
172         if (se->se_ps)
173                 ret = _stdenc_init_state(cs, se->se_ps);
174         if (!ret && se->se_pssaved)
175                 ret = _stdenc_init_state(cs, se->se_pssaved);
176
177         return (ret);
178 }
179
180 static int
181 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
182     unsigned long *rnorm)
183 {
184         struct _csmapper *cm;
185         int ret;
186
187         ret = _csmapper_open(&cm, src, dst, 0, rnorm);
188         if (ret)
189                 return (ret);
190         if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
191             _csmapper_get_state_size(cm) != 0) {
192                 _csmapper_close(cm);
193                 return (EINVAL);
194         }
195
196         *rcm = cm;
197
198         return (0);
199 }
200
201 static void
202 close_dsts(struct _citrus_iconv_std_dst_list *dl)
203 {
204         struct _citrus_iconv_std_dst *sd;
205
206         while ((sd = TAILQ_FIRST(dl)) != NULL) {
207                 TAILQ_REMOVE(dl, sd, sd_entry);
208                 _csmapper_close(sd->sd_mapper);
209                 free(sd);
210         }
211 }
212
213 static int
214 open_dsts(struct _citrus_iconv_std_dst_list *dl,
215     const struct _esdb_charset *ec, const struct _esdb *dbdst)
216 {
217         struct _citrus_iconv_std_dst *sd, *sdtmp;
218         unsigned long norm;
219         int i, ret;
220
221         sd = malloc(sizeof(*sd));
222         if (sd == NULL)
223                 return (errno);
224
225         for (i = 0; i < dbdst->db_num_charsets; i++) {
226                 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
227                     dbdst->db_charsets[i].ec_csname, &norm);
228                 if (ret == 0) {
229                         sd->sd_csid = dbdst->db_charsets[i].ec_csid;
230                         sd->sd_norm = norm;
231                         /* insert this mapper by sorted order. */
232                         TAILQ_FOREACH(sdtmp, dl, sd_entry) {
233                                 if (sdtmp->sd_norm > norm) {
234                                         TAILQ_INSERT_BEFORE(sdtmp, sd,
235                                             sd_entry);
236                                         sd = NULL;
237                                         break;
238                                 }
239                         }
240                         if (sd)
241                                 TAILQ_INSERT_TAIL(dl, sd, sd_entry);
242                         sd = malloc(sizeof(*sd));
243                         if (sd == NULL) {
244                                 ret = errno;
245                                 close_dsts(dl);
246                                 return (ret);
247                         }
248                 } else if (ret != ENOENT) {
249                         close_dsts(dl);
250                         free(sd);
251                         return (ret);
252                 }
253         }
254         free(sd);
255         return (0);
256 }
257
258 static void
259 close_srcs(struct _citrus_iconv_std_src_list *sl)
260 {
261         struct _citrus_iconv_std_src *ss;
262
263         while ((ss = TAILQ_FIRST(sl)) != NULL) {
264                 TAILQ_REMOVE(sl, ss, ss_entry);
265                 close_dsts(&ss->ss_dsts);
266                 free(ss);
267         }
268 }
269
270 static int
271 open_srcs(struct _citrus_iconv_std_src_list *sl,
272     const struct _esdb *dbsrc, const struct _esdb *dbdst)
273 {
274         struct _citrus_iconv_std_src *ss;
275         int count = 0, i, ret;
276
277         ss = malloc(sizeof(*ss));
278         if (ss == NULL)
279                 return (errno);
280
281         TAILQ_INIT(&ss->ss_dsts);
282
283         for (i = 0; i < dbsrc->db_num_charsets; i++) {
284                 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
285                 if (ret)
286                         goto err;
287                 if (!TAILQ_EMPTY(&ss->ss_dsts)) {
288                         ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
289                         TAILQ_INSERT_TAIL(sl, ss, ss_entry);
290                         ss = malloc(sizeof(*ss));
291                         if (ss == NULL) {
292                                 ret = errno;
293                                 goto err;
294                         }
295                         count++;
296                         TAILQ_INIT(&ss->ss_dsts);
297                 }
298         }
299         free(ss);
300
301         return (count ? 0 : ENOENT);
302
303 err:
304         free(ss);
305         close_srcs(sl);
306         return (ret);
307 }
308
309 /* do convert a character */
310 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
311 static int
312 /*ARGSUSED*/
313 do_conv(const struct _citrus_iconv_std_shared *is,
314         _csid_t *csid, _index_t *idx)
315 {
316         struct _citrus_iconv_std_dst *sd;
317         struct _citrus_iconv_std_src *ss;
318         _index_t tmpidx;
319         int ret;
320
321         TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
322                 if (ss->ss_csid == *csid) {
323                         TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
324                                 ret = _csmapper_convert(sd->sd_mapper,
325                                     &tmpidx, *idx, NULL);
326                                 switch (ret) {
327                                 case _MAPPER_CONVERT_SUCCESS:
328                                         *csid = sd->sd_csid;
329                                         *idx = tmpidx;
330                                         return (0);
331                                 case _MAPPER_CONVERT_NONIDENTICAL:
332                                         break;
333                                 case _MAPPER_CONVERT_SRC_MORE:
334                                         /*FALLTHROUGH*/
335                                 case _MAPPER_CONVERT_DST_MORE:
336                                         /*FALLTHROUGH*/
337                                 case _MAPPER_CONVERT_ILSEQ:
338                                         return (EILSEQ);
339                                 case _MAPPER_CONVERT_FATAL:
340                                         return (EINVAL);
341                                 }
342                         }
343                         break;
344                 }
345         }
346
347         return (E_NO_CORRESPONDING_CHAR);
348 }
349 /* ---------------------------------------------------------------------- */
350
351 static int
352 /*ARGSUSED*/
353 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
354     const char * __restrict src, const char * __restrict dst)
355 {
356         struct _citrus_esdb esdbdst, esdbsrc;
357         struct _citrus_iconv_std_shared *is;
358         int ret;
359
360         is = malloc(sizeof(*is));
361         if (is == NULL) {
362                 ret = errno;
363                 goto err0;
364         }
365         ret = _citrus_esdb_open(&esdbsrc, src);
366         if (ret)
367                 goto err1;
368         ret = _citrus_esdb_open(&esdbdst, dst);
369         if (ret)
370                 goto err2;
371         ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
372             esdbsrc.db_variable, esdbsrc.db_len_variable);
373         if (ret)
374                 goto err3;
375         ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
376             esdbdst.db_variable, esdbdst.db_len_variable);
377         if (ret)
378                 goto err4;
379         is->is_use_invalid = esdbdst.db_use_invalid;
380         is->is_invalid = esdbdst.db_invalid;
381
382         TAILQ_INIT(&is->is_srcs);
383         ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
384         if (ret)
385                 goto err5;
386
387         _esdb_close(&esdbsrc);
388         _esdb_close(&esdbdst);
389         ci->ci_closure = is;
390
391         return (0);
392
393 err5:
394         _stdenc_close(is->is_dst_encoding);
395 err4:
396         _stdenc_close(is->is_src_encoding);
397 err3:
398         _esdb_close(&esdbdst);
399 err2:
400         _esdb_close(&esdbsrc);
401 err1:
402         free(is);
403 err0:
404         return (ret);
405 }
406
407 static void
408 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
409 {
410         struct _citrus_iconv_std_shared *is = ci->ci_closure;
411
412         if (is == NULL)
413                 return;
414
415         _stdenc_close(is->is_src_encoding);
416         _stdenc_close(is->is_dst_encoding);
417         close_srcs(&is->is_srcs);
418         free(is);
419 }
420
421 static int
422 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
423 {
424         const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
425         struct _citrus_iconv_std_context *sc;
426         char *ptr;
427         size_t sz, szpsdst, szpssrc;
428
429         szpssrc = _stdenc_get_state_size(is->is_src_encoding);
430         szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
431
432         sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
433         sc = malloc(sz);
434         if (sc == NULL)
435                 return (errno);
436
437         ptr = (char *)&sc[1];
438         if (szpssrc > 0)
439                 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
440                     ptr, ptr+szpssrc);
441         else
442                 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
443                     NULL, NULL);
444         ptr += szpssrc*2;
445         if (szpsdst > 0)
446                 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
447                     ptr, ptr+szpsdst);
448         else
449                 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
450                     NULL, NULL);
451
452         cv->cv_closure = (void *)sc;
453
454         return (0);
455 }
456
457 static void
458 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
459 {
460
461         free(cv->cv_closure);
462 }
463
464 static int
465 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
466     char * __restrict * __restrict in, size_t * __restrict inbytes,
467     char * __restrict * __restrict out, size_t * __restrict outbytes,
468     uint32_t flags, size_t * __restrict invalids)
469 {
470         const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
471         struct _citrus_iconv_std_context *sc = cv->cv_closure;
472         _csid_t csid;
473         _index_t idx;
474         char *tmpin;
475         size_t inval, szrin, szrout;
476         int ret, state = 0;
477
478         inval = 0;
479         if (in == NULL || *in == NULL) {
480                 /* special cases */
481                 if (out != NULL && *out != NULL) {
482                         /* init output state and store the shift sequence */
483                         save_encoding_state(&sc->sc_src_encoding);
484                         save_encoding_state(&sc->sc_dst_encoding);
485                         szrout = 0;
486
487                         ret = put_state_resetx(&sc->sc_dst_encoding,
488                             *out, *outbytes, &szrout);
489                         if (ret)
490                                 goto err;
491
492                         if (szrout == (size_t)-2) {
493                                 /* too small to store the character */
494                                 ret = EINVAL;
495                                 goto err;
496                         }
497                         *out += szrout;
498                         *outbytes -= szrout;
499                 } else
500                         /* otherwise, discard the shift sequence */
501                         init_encoding_state(&sc->sc_dst_encoding);
502                 init_encoding_state(&sc->sc_src_encoding);
503                 *invalids = 0;
504                 return (0);
505         }
506
507         /* normal case */
508         for (;;) {
509                 if (*inbytes == 0) {
510                         ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
511                         if (state == _STDENC_SDGEN_INITIAL ||
512                             state == _STDENC_SDGEN_STABLE)
513                                 break;
514                 }
515
516                 /* save the encoding states for the error recovery */
517                 save_encoding_state(&sc->sc_src_encoding);
518                 save_encoding_state(&sc->sc_dst_encoding);
519
520                 /* mb -> csid/index */
521                 tmpin = *in;
522                 szrin = szrout = 0;
523                 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin,
524                     *inbytes, &szrin, cv->cv_shared->ci_hooks);
525                 if (ret)
526                         goto err;
527
528                 if (szrin == (size_t)-2) {
529                         /* incompleted character */
530                         ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
531                         if (ret) {
532                                 ret = EINVAL;
533                                 goto err;
534                         }
535                         switch (state) {
536                         case _STDENC_SDGEN_INITIAL:
537                         case _STDENC_SDGEN_STABLE:
538                                 /* fetch shift sequences only. */
539                                 goto next;
540                         }
541                         ret = EINVAL;
542                         goto err;
543                 }
544                 /* convert the character */
545                 ret = do_conv(is, &csid, &idx);
546                 if (ret) {
547                         if (ret == E_NO_CORRESPONDING_CHAR) {
548                                 /*
549                                  * GNU iconv returns EILSEQ when no
550                                  * corresponding character in the output.
551                                  * Some software depends on this behavior
552                                  * though this is against POSIX specification.
553                                  */
554                                 if (cv->cv_shared->ci_ilseq_invalid != 0) {
555                                         ret = EILSEQ;
556                                         goto err;
557                                 }
558                                 inval++;
559                                 szrout = 0;
560                                 if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) &&
561                                     !cv->cv_shared->ci_discard_ilseq) &&
562                                     is->is_use_invalid) {
563                                         ret = wctombx(&sc->sc_dst_encoding,
564                                             *out, *outbytes, is->is_invalid,
565                                             &szrout, cv->cv_shared->ci_hooks);
566                                         if (ret)
567                                                 goto err;
568                                 }
569                                 goto next;
570                         } else
571                                 goto err;
572                 }
573                 /* csid/index -> mb */
574                 ret = cstombx(&sc->sc_dst_encoding,
575                     *out, *outbytes, csid, idx, &szrout,
576                     cv->cv_shared->ci_hooks);
577                 if (ret)
578                         goto err;
579 next:
580                 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
581                 *in = tmpin;
582                 *outbytes -= szrout;
583                 *out += szrout;
584         }
585         *invalids = inval;
586
587         return (0);
588
589 err:
590         restore_encoding_state(&sc->sc_src_encoding);
591         restore_encoding_state(&sc->sc_dst_encoding);
592         *invalids = inval;
593
594         return (ret);
595 }