2 /* $NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $ */
5 * SPDX-License-Identifier: BSD-2-Clause
7 * Copyright (c)2003 Citrus Project,
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 #include <sys/endian.h>
34 #include <sys/queue.h>
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_module.h"
47 #include "citrus_region.h"
48 #include "citrus_mmap.h"
49 #include "citrus_hash.h"
50 #include "citrus_iconv.h"
51 #include "citrus_stdenc.h"
52 #include "citrus_mapper.h"
53 #include "citrus_csmapper.h"
54 #include "citrus_memstream.h"
55 #include "citrus_iconv_std.h"
56 #include "citrus_esdb.h"
58 /* ---------------------------------------------------------------------- */
60 _CITRUS_ICONV_DECLS(iconv_std);
61 _CITRUS_ICONV_DEF_OPS(iconv_std);
64 /* ---------------------------------------------------------------------- */
67 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops)
70 memcpy(ops, &_citrus_iconv_std_iconv_ops,
71 sizeof(_citrus_iconv_std_iconv_ops));
76 /* ---------------------------------------------------------------------- */
79 * convenience routines for stdenc.
82 save_encoding_state(struct _citrus_iconv_std_encoding *se)
86 memcpy(se->se_pssaved, se->se_ps,
87 _stdenc_get_state_size(se->se_handle));
91 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
95 memcpy(se->se_ps, se->se_pssaved,
96 _stdenc_get_state_size(se->se_handle));
100 init_encoding_state(struct _citrus_iconv_std_encoding *se)
104 _stdenc_init_state(se->se_handle, se->se_ps);
108 mbtocsx(struct _citrus_iconv_std_encoding *se,
109 _csid_t *csid, _index_t *idx, char **s, size_t n, size_t *nresult,
110 struct iconv_hooks *hooks)
113 return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
118 cstombx(struct _citrus_iconv_std_encoding *se,
119 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult,
120 struct iconv_hooks *hooks)
123 return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
128 wctombx(struct _citrus_iconv_std_encoding *se,
129 char *s, size_t n, _wc_t wc, size_t *nresult,
130 struct iconv_hooks *hooks)
133 return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult,
138 put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n,
142 return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult));
146 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
148 struct _stdenc_state_desc ssd;
151 ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
152 _STDENC_SDID_GENERIC, &ssd);
154 *rstate = ssd.u.generic.state;
160 * init encoding context
163 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
164 void *ps1, void *ps2)
170 se->se_pssaved = ps2;
173 ret = _stdenc_init_state(cs, se->se_ps);
174 if (!ret && se->se_pssaved)
175 ret = _stdenc_init_state(cs, se->se_pssaved);
181 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
182 unsigned long *rnorm)
184 struct _csmapper *cm;
187 ret = _csmapper_open(&cm, src, dst, 0, rnorm);
190 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
191 _csmapper_get_state_size(cm) != 0) {
202 close_dsts(struct _citrus_iconv_std_dst_list *dl)
204 struct _citrus_iconv_std_dst *sd;
206 while ((sd = TAILQ_FIRST(dl)) != NULL) {
207 TAILQ_REMOVE(dl, sd, sd_entry);
208 _csmapper_close(sd->sd_mapper);
214 open_dsts(struct _citrus_iconv_std_dst_list *dl,
215 const struct _esdb_charset *ec, const struct _esdb *dbdst)
217 struct _citrus_iconv_std_dst *sd, *sdtmp;
221 sd = malloc(sizeof(*sd));
225 for (i = 0; i < dbdst->db_num_charsets; i++) {
226 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
227 dbdst->db_charsets[i].ec_csname, &norm);
229 sd->sd_csid = dbdst->db_charsets[i].ec_csid;
231 /* insert this mapper by sorted order. */
232 TAILQ_FOREACH(sdtmp, dl, sd_entry) {
233 if (sdtmp->sd_norm > norm) {
234 TAILQ_INSERT_BEFORE(sdtmp, sd,
241 TAILQ_INSERT_TAIL(dl, sd, sd_entry);
242 sd = malloc(sizeof(*sd));
248 } else if (ret != ENOENT) {
259 close_srcs(struct _citrus_iconv_std_src_list *sl)
261 struct _citrus_iconv_std_src *ss;
263 while ((ss = TAILQ_FIRST(sl)) != NULL) {
264 TAILQ_REMOVE(sl, ss, ss_entry);
265 close_dsts(&ss->ss_dsts);
271 open_srcs(struct _citrus_iconv_std_src_list *sl,
272 const struct _esdb *dbsrc, const struct _esdb *dbdst)
274 struct _citrus_iconv_std_src *ss;
275 int count = 0, i, ret;
277 ss = malloc(sizeof(*ss));
281 TAILQ_INIT(&ss->ss_dsts);
283 for (i = 0; i < dbsrc->db_num_charsets; i++) {
284 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
287 if (!TAILQ_EMPTY(&ss->ss_dsts)) {
288 ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
289 TAILQ_INSERT_TAIL(sl, ss, ss_entry);
290 ss = malloc(sizeof(*ss));
296 TAILQ_INIT(&ss->ss_dsts);
301 return (count ? 0 : ENOENT);
309 /* do convert a character */
310 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
313 do_conv(const struct _citrus_iconv_std_shared *is,
314 _csid_t *csid, _index_t *idx)
316 struct _citrus_iconv_std_dst *sd;
317 struct _citrus_iconv_std_src *ss;
321 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
322 if (ss->ss_csid == *csid) {
323 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
324 ret = _csmapper_convert(sd->sd_mapper,
325 &tmpidx, *idx, NULL);
327 case _MAPPER_CONVERT_SUCCESS:
331 case _MAPPER_CONVERT_NONIDENTICAL:
333 case _MAPPER_CONVERT_SRC_MORE:
335 case _MAPPER_CONVERT_DST_MORE:
337 case _MAPPER_CONVERT_ILSEQ:
339 case _MAPPER_CONVERT_FATAL:
347 return (E_NO_CORRESPONDING_CHAR);
349 /* ---------------------------------------------------------------------- */
353 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
354 const char * __restrict src, const char * __restrict dst)
356 struct _citrus_esdb esdbdst, esdbsrc;
357 struct _citrus_iconv_std_shared *is;
360 is = malloc(sizeof(*is));
365 ret = _citrus_esdb_open(&esdbsrc, src);
368 ret = _citrus_esdb_open(&esdbdst, dst);
371 ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
372 esdbsrc.db_variable, esdbsrc.db_len_variable);
375 ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
376 esdbdst.db_variable, esdbdst.db_len_variable);
379 is->is_use_invalid = esdbdst.db_use_invalid;
380 is->is_invalid = esdbdst.db_invalid;
382 TAILQ_INIT(&is->is_srcs);
383 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
387 _esdb_close(&esdbsrc);
388 _esdb_close(&esdbdst);
394 _stdenc_close(is->is_dst_encoding);
396 _stdenc_close(is->is_src_encoding);
398 _esdb_close(&esdbdst);
400 _esdb_close(&esdbsrc);
408 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
410 struct _citrus_iconv_std_shared *is = ci->ci_closure;
415 _stdenc_close(is->is_src_encoding);
416 _stdenc_close(is->is_dst_encoding);
417 close_srcs(&is->is_srcs);
422 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
424 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
425 struct _citrus_iconv_std_context *sc;
427 size_t sz, szpsdst, szpssrc;
429 szpssrc = _stdenc_get_state_size(is->is_src_encoding);
430 szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
432 sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
437 ptr = (char *)&sc[1];
439 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
442 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
446 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
449 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
452 cv->cv_closure = (void *)sc;
458 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
461 free(cv->cv_closure);
465 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
466 char * __restrict * __restrict in, size_t * __restrict inbytes,
467 char * __restrict * __restrict out, size_t * __restrict outbytes,
468 uint32_t flags, size_t * __restrict invalids)
470 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
471 struct _citrus_iconv_std_context *sc = cv->cv_closure;
475 size_t inval, szrin, szrout;
479 if (in == NULL || *in == NULL) {
481 if (out != NULL && *out != NULL) {
482 /* init output state and store the shift sequence */
483 save_encoding_state(&sc->sc_src_encoding);
484 save_encoding_state(&sc->sc_dst_encoding);
487 ret = put_state_resetx(&sc->sc_dst_encoding,
488 *out, *outbytes, &szrout);
492 if (szrout == (size_t)-2) {
493 /* too small to store the character */
500 /* otherwise, discard the shift sequence */
501 init_encoding_state(&sc->sc_dst_encoding);
502 init_encoding_state(&sc->sc_src_encoding);
510 ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
511 if (state == _STDENC_SDGEN_INITIAL ||
512 state == _STDENC_SDGEN_STABLE)
516 /* save the encoding states for the error recovery */
517 save_encoding_state(&sc->sc_src_encoding);
518 save_encoding_state(&sc->sc_dst_encoding);
520 /* mb -> csid/index */
523 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin,
524 *inbytes, &szrin, cv->cv_shared->ci_hooks);
528 if (szrin == (size_t)-2) {
529 /* incompleted character */
530 ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
536 case _STDENC_SDGEN_INITIAL:
537 case _STDENC_SDGEN_STABLE:
538 /* fetch shift sequences only. */
544 /* convert the character */
545 ret = do_conv(is, &csid, &idx);
547 if (ret == E_NO_CORRESPONDING_CHAR) {
549 * GNU iconv returns EILSEQ when no
550 * corresponding character in the output.
551 * Some software depends on this behavior
552 * though this is against POSIX specification.
554 if (cv->cv_shared->ci_ilseq_invalid != 0) {
560 if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) &&
561 !cv->cv_shared->ci_discard_ilseq) &&
562 is->is_use_invalid) {
563 ret = wctombx(&sc->sc_dst_encoding,
564 *out, *outbytes, is->is_invalid,
565 &szrout, cv->cv_shared->ci_hooks);
573 /* csid/index -> mb */
574 ret = cstombx(&sc->sc_dst_encoding,
575 *out, *outbytes, csid, idx, &szrout,
576 cv->cv_shared->ci_hooks);
580 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
590 restore_encoding_state(&sc->sc_src_encoding);
591 restore_encoding_state(&sc->sc_dst_encoding);