2 * Copyright (C) 2009, 2010 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
30 #include <sys/endian.h>
31 #include <sys/types.h>
41 #define UC_TO_MB_FLAG 1
42 #define MB_TO_WC_FLAG 2
43 #define MB_TO_UC_FLAG 4
44 #define WC_TO_MB_FLAG 8
46 #define MAX(a,b) ((a) < (b) ? (b) : (a))
48 extern char *__progname;
50 static const char *optstr = "cdilrt";
51 static const char *citrus_common = "SRC_ZONE\t0x0000-0xFFFF\n"
54 "DST_UNIT_BITS\t32\n\n"
56 "#\n# Generated with Citrus iconv (FreeBSD)\n#\n";
65 static void do_conv(iconv_t, bool);
66 void mb_to_uc_fb(const char*, size_t,
67 void (*write_replacement)(const unsigned int *,
68 size_t, void *), void *, void *);
69 void mb_to_wc_fb(const char*, size_t,
70 void (*write_replacement) (const wchar_t *, size_t, void *),
72 void uc_to_mb_fb(unsigned int,
73 void (*write_replacement) (const char *, size_t, void *), void *,
75 void wc_to_mb_fb(wchar_t,
76 void (*write_replacement)(const char *,
77 size_t, void *), void *, void *);
79 struct option long_options[] =
81 {"citrus", no_argument, NULL, 'c'},
82 {"diagnostic", no_argument, NULL, 'd'},
83 {"ignore", no_argument, NULL, 'i'},
84 {"long", no_argument, NULL, 'l'},
85 {"reverse", no_argument, NULL, 'r'},
86 {"translit", no_argument, NULL, 't'},
87 {NULL, no_argument, NULL, 0}
93 fprintf(stderr, "Usage: %s [-cdilrt] ENCODING\n", __progname);
98 format_diag(int errcode)
101 const char *u2m, *m2u, *m2w, *w2m;
118 u2m = (fb_flags & UC_TO_MB_FLAG) ? "U2M " : "";
119 m2w = (fb_flags & MB_TO_WC_FLAG) ? "M2W " : "";
120 m2u = (fb_flags & MB_TO_UC_FLAG) ? "M2U " : "";
121 w2m = (fb_flags & WC_TO_MB_FLAG) ? "W2M " : "";
123 printf("%s%s%s%s%s", errstr, u2m, m2w, m2u, w2m);
127 magnitude(const uint32_t p)
132 else if (p >> 16 == 0)
135 return (p >> 24 == 0 ? 3 : 4);
139 format(const uint32_t data)
142 /* XXX: could be simpler, something like this but with leading 0s?
144 printf("0x%.*X", magnitude(data), data);
147 switch (magnitude(data)) {
150 printf("0x%04X", data);
153 printf("0x%06X", data);
156 printf("0x%08X", data);
162 uc_to_mb_fb(unsigned int code,
163 void (*write_replacement)(const char *buf, size_t buflen,
164 void* callback_arg), void* callback_arg, void* data)
167 fb_flags |= UC_TO_MB_FLAG;
171 mb_to_wc_fb(const char* inbuf, size_t inbufsize,
172 void (*write_replacement)(const wchar_t *buf, size_t buflen,
173 void* callback_arg), void* callback_arg, void* data)
176 fb_flags |= MB_TO_WC_FLAG;
180 mb_to_uc_fb(const char* inbuf, size_t inbufsize,
181 void (*write_replacement)(const unsigned int *buf, size_t buflen,
182 void* callback_arg), void* callback_arg, void* data)
185 fb_flags |= MB_TO_UC_FLAG;
189 wc_to_mb_fb(wchar_t wc,
190 void (*write_replacement)(const char *buf, size_t buflen,
191 void* callback_arg), void* callback_arg, void* data)
194 fb_flags |= WC_TO_MB_FLAG;
198 main (int argc, char *argv[])
200 struct iconv_fallbacks fbs;
205 while (((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1)) {
233 fbs.uc_to_mb_fallback = uc_to_mb_fb;
234 fbs.mb_to_wc_fallback = mb_to_wc_fb;
235 fbs.mb_to_uc_fallback = mb_to_uc_fb;
236 fbs.wc_to_mb_fallback = wc_to_mb_fb;
240 asprintf(&tocode, "%s%s%s", argv[1], tflag ? "//TRASNLIT" : "",
241 iflag ? "//IGNORE" : "");
243 if ((cd = iconv_open(tocode, argv[0])) == (iconv_t)-1)
246 if (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)
251 asprintf(&tocode, "%s%s%s", argv[0], tflag ? "//TRANSLIT" : "",
252 iflag ? "//IGNORE" : "");
254 if ((cd = iconv_open(tocode, "UTF-32LE")) == (iconv_t)-1)
256 if (dflag && iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)
259 printf("# $FreeBSD$\n\n");
260 printf("TYPE\t\tROWCOL\n");
261 printf("NAME\t\tUCS/%s\n", argv[0]);
262 printf("%s", citrus_common);
266 if ((cd = iconv_open("UTF-32LE//TRANSLIT", argv[0])) == (iconv_t)-1)
268 if (dflag && (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0))
271 printf("# $FreeBSD$\n\n");
272 printf("TYPE\t\tROWCOL\n");
273 printf("NAME\t\t%s/UCS\n", argv[0]);
274 printf("%s", citrus_common);
279 if (iconv_close(cd) != 0)
282 return (EXIT_SUCCESS);
286 do_conv(iconv_t cd, bool uniinput) {
287 size_t inbytesleft, outbytesleft, ret;
293 for (inbuf = 0; inbuf < (lflag ? 0x100000 : 0x10000); inbuf += 1) {
294 if (uniinput && (inbuf >= 0xD800) && (inbuf <= 0xDF00))
296 inbytesleft = uniinput ? 4 : magnitude(inbuf);
299 outbuf_ = (char *)&outbuf;
300 inbuf_ = (const char *)&inbuf;
301 iconv(cd, NULL, NULL, NULL, NULL);
304 ret = iconv(cd, &inbuf_, &inbytesleft, &outbuf_, &outbytesleft);
305 if (ret == (size_t)-1) {