2 /* $NetBSD: citrus_hz.c,v 1.2 2008/06/14 16:01:07 tnozaki Exp $ */
5 * Copyright (c)2004, 2006 Citrus Project,
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 #include <sys/queue.h>
33 #include <sys/types.h>
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_bcs.h"
47 #include "citrus_module.h"
48 #include "citrus_stdenc.h"
50 #include "citrus_hz.h"
51 #include "citrus_prop.h"
56 * CTRL/ASCII 00000000 00000000 00000000 gxxxxxxx
57 * GB2312 00000000 00000000 0xxxxxxx gxxxxxxx
58 * 94/96*n (~M) 0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx
61 #define ESCAPE_CHAR '~'
64 CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4
73 static const range_t ranges[] = {
74 #define RANGE(start, end) { start, end, (end - start) + 1 }
75 /* CTRL */ RANGE(0x00, 0x1F),
76 /* ASCII */ RANGE(0x20, 0x7F),
77 /* GB2312 */ RANGE(0x21, 0x7E),
78 /* CS94 */ RANGE(0x21, 0x7E),
79 /* CS96 */ RANGE(0x20, 0x7F),
83 typedef struct escape_t escape_t;
91 typedef TAILQ_HEAD(escape_list, escape_t) escape_list;
93 TAILQ_ENTRY(escape_t) entry;
100 #define GL(escape) ((escape)->left)
101 #define GR(escape) ((escape)->right)
102 #define SET(escape) ((escape)->set)
103 #define ESC(escape) ((escape)->ch)
104 #define INIT(escape) (TAILQ_FIRST(SET(escape)))
106 static __inline escape_t *
107 find_escape(escape_list *set, int ch)
111 TAILQ_FOREACH(escape, set, entry) {
112 if (ESC(escape) == ch)
126 #define E0SET(ei) (&(ei)->e0)
127 #define E1SET(ei) (&(ei)->e1)
128 #define INIT0(ei) (TAILQ_FIRST(E0SET(ei)))
129 #define INIT1(ei) (TAILQ_FIRST(E1SET(ei)))
137 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
138 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
140 #define _FUNCNAME(m) _citrus_HZ_##m
141 #define _ENCODING_INFO _HZEncodingInfo
142 #define _ENCODING_STATE _HZState
143 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
144 #define _ENCODING_IS_STATE_DEPENDENT 1
145 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->inuse == NULL)
148 _citrus_HZ_init_state(_HZEncodingInfo * __restrict ei,
149 _HZState * __restrict psenc)
153 psenc->inuse = INIT0(ei);
158 _citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei __unused,
159 void *__restrict pspriv, const _HZState * __restrict psenc)
162 memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
167 _citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei __unused,
168 _HZState * __restrict psenc, const void * __restrict pspriv)
171 memcpy((void *)psenc, pspriv, sizeof(*psenc));
175 _citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei,
176 wchar_t * __restrict pwc, char ** __restrict s, size_t n,
177 _HZState * __restrict psenc, size_t * __restrict nresult)
179 escape_t *candidate, *init;
181 const range_t *range;
184 int bit, ch, head, len, tail;
187 _citrus_HZ_init_state(ei, psenc);
192 if (psenc->chlen < 0 || psenc->inuse == NULL)
196 bit = head = tail = 0;
198 for (len = 0; len <= MB_LEN_MAX;) {
199 if (psenc->chlen == tail) {
202 *nresult = (size_t)-2;
205 psenc->ch[psenc->chlen++] = *s0++;
208 ch = (unsigned char)psenc->ch[tail++];
210 if ((ch & ~0x80) <= 0x1F) {
211 if (psenc->inuse != INIT0(ei))
217 graphic = GR(psenc->inuse);
221 graphic = GL(psenc->inuse);
222 if (ch == ESCAPE_CHAR)
228 } else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) {
229 if (tail < psenc->chlen)
231 if (ch == ESCAPE_CHAR) {
233 } else if (ch == '\n') {
234 if (psenc->inuse != INIT0(ei))
236 tail = psenc->chlen = 0;
241 if (psenc->inuse == init) {
243 } else if (INIT(psenc->inuse) == init) {
248 if (candidate == NULL) {
249 candidate = find_escape(
250 SET(psenc->inuse), ch);
251 if (candidate == NULL) {
258 psenc->inuse = candidate;
259 tail = psenc->chlen = 0;
262 } else if (ch & 0x80) {
263 if (graphic != GR(psenc->inuse))
267 if (graphic != GL(psenc->inuse))
270 range = &ranges[(size_t)graphic->charset];
271 if (range->start > ch || range->end < ch)
275 if (graphic->length == (tail - head)) {
276 if (graphic->charset > GB2312)
277 bit |= ESC(psenc->inuse) << 24;
282 *nresult = (size_t)-1;
285 if (tail < psenc->chlen)
291 *nresult = (wc == 0) ? 0 : len;
297 _citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei,
298 char * __restrict s, size_t n, wchar_t wc,
299 _HZState * __restrict psenc, size_t * __restrict nresult)
301 escape_t *candidate, *init;
303 const range_t *range;
307 if (psenc->chlen != 0 || psenc->inuse == NULL)
315 if ((uint32_t)wc <= 0x1F) {
316 candidate = INIT0(ei);
317 graphic = (bit == 0) ? candidate->left : candidate->right;
320 range = &ranges[(size_t)CTRL];
322 } else if ((uint32_t)wc <= 0x7F) {
326 candidate = graphic->escape;
327 range = &ranges[(size_t)graphic->charset];
328 len = graphic->length;
329 } else if ((uint32_t)wc <= 0x7F7F) {
330 graphic = ei->gb2312;
333 candidate = graphic->escape;
334 range = &ranges[(size_t)graphic->charset];
335 len = graphic->length;
337 ch = (wc >> 24) & 0xFF;
338 candidate = find_escape(E0SET(ei), ch);
339 if (candidate == NULL) {
340 candidate = find_escape(E1SET(ei), ch);
341 if (candidate == NULL)
345 graphic = (bit == 0) ? candidate->left : candidate->right;
348 range = &ranges[(size_t)graphic->charset];
349 len = graphic->length;
351 if (psenc->inuse != candidate) {
353 if (SET(psenc->inuse) == SET(candidate)) {
354 if (INIT(psenc->inuse) != init ||
355 psenc->inuse == init || candidate == init)
357 } else if (candidate == (init = INIT(candidate))) {
364 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
365 psenc->ch[psenc->chlen++] = ESC(init);
370 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
371 psenc->ch[psenc->chlen++] = ESC(candidate);
372 psenc->inuse = candidate;
377 ch = (wc >> (len * 8)) & 0xFF;
378 if (range->start > ch || range->end < ch)
380 psenc->ch[psenc->chlen++] = ch | bit;
382 memcpy(s, psenc->ch, psenc->chlen);
383 *nresult = psenc->chlen;
389 *nresult = (size_t)-1;
394 _citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei,
395 char * __restrict s, size_t n, _HZState * __restrict psenc,
396 size_t * __restrict nresult)
400 if (psenc->chlen != 0 || psenc->inuse == NULL)
402 candidate = INIT0(ei);
403 if (psenc->inuse != candidate) {
407 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
408 psenc->ch[psenc->chlen++] = ESC(candidate);
412 if (psenc->chlen > 0)
413 memcpy(s, psenc->ch, psenc->chlen);
414 *nresult = psenc->chlen;
415 _citrus_HZ_init_state(ei, psenc);
421 _citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei,
422 _HZState * __restrict psenc, int * __restrict rstate)
425 if (psenc->chlen < 0 || psenc->inuse == NULL)
427 *rstate = (psenc->chlen == 0)
428 ? ((psenc->inuse == INIT0(ei))
429 ? _STDENC_SDGEN_INITIAL
430 : _STDENC_SDGEN_STABLE)
431 : ((psenc->ch[0] == ESCAPE_CHAR)
432 ? _STDENC_SDGEN_INCOMPLETE_SHIFT
433 : _STDENC_SDGEN_INCOMPLETE_CHAR);
440 _citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei __unused,
441 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
450 if ((uint32_t)wc <= 0x7F) {
451 *csid = (_csid_t)bit;
453 } else if ((uint32_t)wc <= 0x7F7F) {
454 *csid = (_csid_t)(bit | 0x8000);
457 *csid = (_index_t)(wc & ~0x00FFFF7F);
458 *idx = (_csid_t)(wc & 0x00FFFF7F);
466 _citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei __unused,
467 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
474 *wc |= (wchar_t)0x80;
480 *wc |= (wchar_t)csid;
487 _citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei)
491 while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) {
492 TAILQ_REMOVE(E0SET(ei), escape, entry);
497 while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) {
498 TAILQ_REMOVE(E1SET(ei), escape, entry);
506 _citrus_HZ_parse_char(void **context, const char *name __unused, const char *s)
511 p = (void **)*context;
512 escape = (escape_t *)p[0];
513 if (escape->ch != '\0')
516 if (escape->ch == ESCAPE_CHAR || *s != '\0')
523 _citrus_HZ_parse_graphic(void **context, const char *name, const char *s)
530 p = (void **)*context;
531 escape = (escape_t *)p[0];
532 ei = (_HZEncodingInfo *)p[1];
533 graphic = malloc(sizeof(*graphic));
536 memset(graphic, 0, sizeof(*graphic));
537 if (strcmp("GL", name) == 0) {
538 if (GL(escape) != NULL)
540 GL(escape) = graphic;
541 } else if (strcmp("GR", name) == 0) {
542 if (GR(escape) != NULL)
544 GR(escape) = graphic;
550 graphic->escape = escape;
551 if (_bcs_strncasecmp("ASCII", s, 5) == 0) {
554 graphic->charset = ASCII;
558 } else if (_bcs_strncasecmp("GB2312", s, 6) == 0) {
561 graphic->charset = GB2312;
563 ei->gb2312 = graphic;
565 } else if (strncmp("94*", s, 3) == 0)
566 graphic->charset = CS94;
567 else if (strncmp("96*", s, 3) == 0)
568 graphic->charset = CS96;
573 case '1': case '2': case '3':
574 graphic->length = (size_t)(*s - '0');
584 static const _citrus_prop_hint_t escape_hints[] = {
585 _CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char),
586 _CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic),
587 _CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic),
588 _CITRUS_PROP_HINT_END
592 _citrus_HZ_parse_escape(void **context, const char *name, const char *s)
598 ei = (_HZEncodingInfo *)*context;
599 escape = malloc(sizeof(*escape));
602 memset(escape, 0, sizeof(*escape));
603 if (strcmp("0", name) == 0) {
604 escape->set = E0SET(ei);
605 TAILQ_INSERT_TAIL(E0SET(ei), escape, entry);
606 } else if (strcmp("1", name) == 0) {
607 escape->set = E1SET(ei);
608 TAILQ_INSERT_TAIL(E1SET(ei), escape, entry);
613 p[0] = (void *)escape;
615 return (_citrus_prop_parse_variable(
616 escape_hints, (void *)&p[0], s, strlen(s)));
619 static const _citrus_prop_hint_t root_hints[] = {
620 _CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape),
621 _CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape),
622 _CITRUS_PROP_HINT_END
626 _citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei,
627 const void * __restrict var, size_t lenvar)
631 memset(ei, 0, sizeof(*ei));
632 TAILQ_INIT(E0SET(ei));
633 TAILQ_INIT(E1SET(ei));
634 errnum = _citrus_prop_parse_variable(
635 root_hints, (void *)ei, var, lenvar);
637 _citrus_HZ_encoding_module_uninit(ei);
641 /* ----------------------------------------------------------------------
642 * public interface for stdenc
645 _CITRUS_STDENC_DECLS(HZ);
646 _CITRUS_STDENC_DEF_OPS(HZ);
648 #include "citrus_stdenc_template.h"