]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/mdocml/chars.c
Update to ELF Tool Chain r3475
[FreeBSD/FreeBSD.git] / contrib / mdocml / chars.c
1 /*      $Id: chars.c,v 1.68 2015/10/13 22:59:54 schwarze Exp $ */
2 /*
3  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2011, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stddef.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "mandoc_ohash.h"
32 #include "libmandoc.h"
33
34 struct  ln {
35         const char        roffcode[16];
36         const char       *ascii;
37         int               unicode;
38 };
39
40 /* Special break control characters. */
41 static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };
42 static const char ascii_break[2] = { ASCII_BREAK, '\0' };
43
44 static struct ln lines[] = {
45
46         /* Spacing. */
47         { " ",                  ascii_nbrsp,    0x00a0  },
48         { "~",                  ascii_nbrsp,    0x00a0  },
49         { "0",                  " ",            0x2002  },
50         { "|",                  "",             0       },
51         { "^",                  "",             0       },
52         { "&",                  "",             0       },
53         { "%",                  "",             0       },
54         { ":",                  ascii_break,    0       },
55         /* XXX The following three do not really belong here. */
56         { "t",                  "",             0       },
57         { "c",                  "",             0       },
58         { "}",                  "",             0       },
59
60         /* Lines. */
61         { "ba",                 "|",            0x007c  },
62         { "br",                 "|",            0x2502  },
63         { "ul",                 "_",            0x005f  },
64         { "rn",                 "-",            0x203e  },
65         { "bb",                 "|",            0x00a6  },
66         { "sl",                 "/",            0x002f  },
67         { "rs",                 "\\",           0x005c  },
68
69         /* Text markers. */
70         { "ci",                 "O",            0x25cb  },
71         { "bu",                 "+\bo",         0x2022  },
72         { "dd",                 "|\b=",         0x2021  },
73         { "dg",                 "|\b-",         0x2020  },
74         { "lz",                 "<>",           0x25ca  },
75         { "sq",                 "[]",           0x25a1  },
76         { "ps",                 "<par>",        0x00b6  },
77         { "sc",                 "<sec>",        0x00a7  },
78         { "lh",                 "<=",           0x261c  },
79         { "rh",                 "=>",           0x261e  },
80         { "at",                 "@",            0x0040  },
81         { "sh",                 "#",            0x0023  },
82         { "CR",                 "_|",           0x21b5  },
83         { "OK",                 "\\/",          0x2713  },
84
85         /* Legal symbols. */
86         { "co",                 "(C)",          0x00a9  },
87         { "rg",                 "(R)",          0x00ae  },
88         { "tm",                 "tm",           0x2122  },
89
90         /* Punctuation. */
91         { "em",                 "--",           0x2014  },
92         { "en",                 "-",            0x2013  },
93         { "hy",                 "-",            0x2010  },
94         { "e",                  "\\",           0x005c  },
95         { ".",                  ".",            0x002e  },
96         { "r!",                 "!",            0x00a1  },
97         { "r?",                 "?",            0x00bf  },
98
99         /* Quotes. */
100         { "Bq",                 ",,",           0x201e  },
101         { "bq",                 ",",            0x201a  },
102         { "lq",                 "\"",           0x201c  },
103         { "rq",                 "\"",           0x201d  },
104         { "Lq",                 "``",           0x201c  },
105         { "Rq",                 "''",           0x201d  },
106         { "oq",                 "`",            0x2018  },
107         { "cq",                 "\'",           0x2019  },
108         { "aq",                 "\'",           0x0027  },
109         { "dq",                 "\"",           0x0022  },
110         { "Fo",                 "<<",           0x00ab  },
111         { "Fc",                 ">>",           0x00bb  },
112         { "fo",                 "<",            0x2039  },
113         { "fc",                 ">",            0x203a  },
114
115         /* Brackets. */
116         { "lB",                 "[",            0x005b  },
117         { "rB",                 "]",            0x005d  },
118         { "lC",                 "{",            0x007b  },
119         { "rC",                 "}",            0x007d  },
120         { "la",                 "<",            0x27e8  },
121         { "ra",                 ">",            0x27e9  },
122         { "bv",                 "|",            0x23aa  },
123         { "braceex",            "|",            0x23aa  },
124         { "bracketlefttp",      "|",            0x23a1  },
125         { "bracketleftbt",      "|",            0x23a3  },
126         { "bracketleftex",      "|",            0x23a2  },
127         { "bracketrighttp",     "|",            0x23a4  },
128         { "bracketrightbt",     "|",            0x23a6  },
129         { "bracketrightex",     "|",            0x23a5  },
130         { "lt",                 ",-",           0x23a7  },
131         { "bracelefttp",        ",-",           0x23a7  },
132         { "lk",                 "{",            0x23a8  },
133         { "braceleftmid",       "{",            0x23a8  },
134         { "lb",                 "`-",           0x23a9  },
135         { "braceleftbt",        "`-",           0x23a9  },
136         { "braceleftex",        "|",            0x23aa  },
137         { "rt",                 "-.",           0x23ab  },
138         { "bracerighttp",       "-.",           0x23ab  },
139         { "rk",                 "}",            0x23ac  },
140         { "bracerightmid",      "}",            0x23ac  },
141         { "rb",                 "-\'",          0x23ad  },
142         { "bracerightbt",       "-\'",          0x23ad  },
143         { "bracerightex",       "|",            0x23aa  },
144         { "parenlefttp",        "/",            0x239b  },
145         { "parenleftbt",        "\\",           0x239d  },
146         { "parenleftex",        "|",            0x239c  },
147         { "parenrighttp",       "\\",           0x239e  },
148         { "parenrightbt",       "/",            0x23a0  },
149         { "parenrightex",       "|",            0x239f  },
150
151         /* Arrows and lines. */
152         { "<-",                 "<-",           0x2190  },
153         { "->",                 "->",           0x2192  },
154         { "<>",                 "<->",          0x2194  },
155         { "da",                 "|\bv",         0x2193  },
156         { "ua",                 "|\b^",         0x2191  },
157         { "va",                 "^v",           0x2195  },
158         { "lA",                 "<=",           0x21d0  },
159         { "rA",                 "=>",           0x21d2  },
160         { "hA",                 "<=>",          0x21d4  },
161         { "uA",                 "=\b^",         0x21d1  },
162         { "dA",                 "=\bv",         0x21d3  },
163         { "vA",                 "^=v",          0x21d5  },
164
165         /* Logic. */
166         { "AN",                 "^",            0x2227  },
167         { "OR",                 "v",            0x2228  },
168         { "no",                 "~",            0x00ac  },
169         { "tno",                "~",            0x00ac  },
170         { "te",                 "3",            0x2203  },
171         { "fa",                 "-\bV",         0x2200  },
172         { "st",                 "-)",           0x220b  },
173         { "tf",                 ".:.",          0x2234  },
174         { "3d",                 ".:.",          0x2234  },
175         { "or",                 "|",            0x007c  },
176
177         /* Mathematicals. */
178         { "pl",                 "+",            0x002b  },
179         { "mi",                 "-",            0x2212  },
180         { "-",                  "-",            0x002d  },
181         { "-+",                 "-+",           0x2213  },
182         { "+-",                 "+-",           0x00b1  },
183         { "t+-",                "+-",           0x00b1  },
184         { "pc",                 ".",            0x00b7  },
185         { "md",                 ".",            0x22c5  },
186         { "mu",                 "x",            0x00d7  },
187         { "tmu",                "x",            0x00d7  },
188         { "c*",                 "O\bx",         0x2297  },
189         { "c+",                 "O\b+",         0x2295  },
190         { "di",                 "-:-",          0x00f7  },
191         { "tdi",                "-:-",          0x00f7  },
192         { "f/",                 "/",            0x2044  },
193         { "**",                 "*",            0x2217  },
194         { "<=",                 "<=",           0x2264  },
195         { ">=",                 ">=",           0x2265  },
196         { "<<",                 "<<",           0x226a  },
197         { ">>",                 ">>",           0x226b  },
198         { "eq",                 "=",            0x003d  },
199         { "!=",                 "!=",           0x2260  },
200         { "==",                 "==",           0x2261  },
201         { "ne",                 "!==",          0x2262  },
202         { "ap",                 "~",            0x223c  },
203         { "|=",                 "-~",           0x2243  },
204         { "=~",                 "=~",           0x2245  },
205         { "~~",                 "~~",           0x2248  },
206         { "~=",                 "~=",           0x2248  },
207         { "pt",                 "oc",           0x221d  },
208         { "es",                 "{}",           0x2205  },
209         { "mo",                 "E",            0x2208  },
210         { "nm",                 "!E",           0x2209  },
211         { "sb",                 "(=",           0x2282  },
212         { "nb",                 "(!=",          0x2284  },
213         { "sp",                 "=)",           0x2283  },
214         { "nc",                 "!=)",          0x2285  },
215         { "ib",                 "(=\b_",        0x2286  },
216         { "ip",                 "=\b_)",        0x2287  },
217         { "ca",                 "(^)",          0x2229  },
218         { "cu",                 "U",            0x222a  },
219         { "/_",                 "_\b/",         0x2220  },
220         { "pp",                 "_\b|",         0x22a5  },
221         { "is",                 "'\b,\bI",      0x222b  },
222         { "integral",           "'\b,\bI",      0x222b  },
223         { "sum",                "E",            0x2211  },
224         { "product",            "TT",           0x220f  },
225         { "coproduct",          "U",            0x2210  },
226         { "gr",                 "V",            0x2207  },
227         { "sr",                 "\\/",          0x221a  },
228         { "sqrt",               "\\/",          0x221a  },
229         { "lc",                 "|~",           0x2308  },
230         { "rc",                 "~|",           0x2309  },
231         { "lf",                 "|_",           0x230a  },
232         { "rf",                 "_|",           0x230b  },
233         { "if",                 "oo",           0x221e  },
234         { "Ah",                 "N",            0x2135  },
235         { "Im",                 "I",            0x2111  },
236         { "Re",                 "R",            0x211c  },
237         { "pd",                 "a",            0x2202  },
238         { "-h",                 "/h",           0x210f  },
239         { "12",                 "1/2",          0x00bd  },
240         { "14",                 "1/4",          0x00bc  },
241         { "34",                 "3/4",          0x00be  },
242
243         /* Ligatures. */
244         { "ff",                 "ff",           0xfb00  },
245         { "fi",                 "fi",           0xfb01  },
246         { "fl",                 "fl",           0xfb02  },
247         { "Fi",                 "ffi",          0xfb03  },
248         { "Fl",                 "ffl",          0xfb04  },
249         { "AE",                 "AE",           0x00c6  },
250         { "ae",                 "ae",           0x00e6  },
251         { "OE",                 "OE",           0x0152  },
252         { "oe",                 "oe",           0x0153  },
253         { "ss",                 "ss",           0x00df  },
254         { "IJ",                 "IJ",           0x0132  },
255         { "ij",                 "ij",           0x0133  },
256
257         /* Accents. */
258         { "a\"",                "\"",           0x02dd  },
259         { "a-",                 "-",            0x00af  },
260         { "a.",                 ".",            0x02d9  },
261         { "a^",                 "^",            0x005e  },
262         { "aa",                 "\'",           0x00b4  },
263         { "\'",                 "\'",           0x00b4  },
264         { "ga",                 "`",            0x0060  },
265         { "`",                  "`",            0x0060  },
266         { "ab",                 "'\b`",         0x02d8  },
267         { "ac",                 ",",            0x00b8  },
268         { "ad",                 "\"",           0x00a8  },
269         { "ah",                 "v",            0x02c7  },
270         { "ao",                 "o",            0x02da  },
271         { "a~",                 "~",            0x007e  },
272         { "ho",                 ",",            0x02db  },
273         { "ha",                 "^",            0x005e  },
274         { "ti",                 "~",            0x007e  },
275
276         /* Accented letters. */
277         { "'A",                 "'\bA",         0x00c1  },
278         { "'E",                 "'\bE",         0x00c9  },
279         { "'I",                 "'\bI",         0x00cd  },
280         { "'O",                 "'\bO",         0x00d3  },
281         { "'U",                 "'\bU",         0x00da  },
282         { "'a",                 "'\ba",         0x00e1  },
283         { "'e",                 "'\be",         0x00e9  },
284         { "'i",                 "'\bi",         0x00ed  },
285         { "'o",                 "'\bo",         0x00f3  },
286         { "'u",                 "'\bu",         0x00fa  },
287         { "`A",                 "`\bA",         0x00c0  },
288         { "`E",                 "`\bE",         0x00c8  },
289         { "`I",                 "`\bI",         0x00cc  },
290         { "`O",                 "`\bO",         0x00d2  },
291         { "`U",                 "`\bU",         0x00d9  },
292         { "`a",                 "`\ba",         0x00e0  },
293         { "`e",                 "`\be",         0x00e8  },
294         { "`i",                 "`\bi",         0x00ec  },
295         { "`o",                 "`\bo",         0x00f2  },
296         { "`u",                 "`\bu",         0x00f9  },
297         { "~A",                 "~\bA",         0x00c3  },
298         { "~N",                 "~\bN",         0x00d1  },
299         { "~O",                 "~\bO",         0x00d5  },
300         { "~a",                 "~\ba",         0x00e3  },
301         { "~n",                 "~\bn",         0x00f1  },
302         { "~o",                 "~\bo",         0x00f5  },
303         { ":A",                 "\"\bA",        0x00c4  },
304         { ":E",                 "\"\bE",        0x00cb  },
305         { ":I",                 "\"\bI",        0x00cf  },
306         { ":O",                 "\"\bO",        0x00d6  },
307         { ":U",                 "\"\bU",        0x00dc  },
308         { ":a",                 "\"\ba",        0x00e4  },
309         { ":e",                 "\"\be",        0x00eb  },
310         { ":i",                 "\"\bi",        0x00ef  },
311         { ":o",                 "\"\bo",        0x00f6  },
312         { ":u",                 "\"\bu",        0x00fc  },
313         { ":y",                 "\"\by",        0x00ff  },
314         { "^A",                 "^\bA",         0x00c2  },
315         { "^E",                 "^\bE",         0x00ca  },
316         { "^I",                 "^\bI",         0x00ce  },
317         { "^O",                 "^\bO",         0x00d4  },
318         { "^U",                 "^\bU",         0x00db  },
319         { "^a",                 "^\ba",         0x00e2  },
320         { "^e",                 "^\be",         0x00ea  },
321         { "^i",                 "^\bi",         0x00ee  },
322         { "^o",                 "^\bo",         0x00f4  },
323         { "^u",                 "^\bu",         0x00fb  },
324         { ",C",                 ",\bC",         0x00c7  },
325         { ",c",                 ",\bc",         0x00e7  },
326         { "/L",                 "/\bL",         0x0141  },
327         { "/l",                 "/\bl",         0x0142  },
328         { "/O",                 "/\bO",         0x00d8  },
329         { "/o",                 "/\bo",         0x00f8  },
330         { "oA",                 "o\bA",         0x00c5  },
331         { "oa",                 "o\ba",         0x00e5  },
332
333         /* Special letters. */
334         { "-D",                 "-\bD",         0x00d0  },
335         { "Sd",                 "d",            0x00f0  },
336         { "TP",                 "Th",           0x00de  },
337         { "Tp",                 "th",           0x00fe  },
338         { ".i",                 "i",            0x0131  },
339         { ".j",                 "j",            0x0237  },
340
341         /* Currency. */
342         { "Do",                 "$",            0x0024  },
343         { "ct",                 "/\bc",         0x00a2  },
344         { "Eu",                 "EUR",          0x20ac  },
345         { "eu",                 "EUR",          0x20ac  },
346         { "Ye",                 "=\bY",         0x00a5  },
347         { "Po",                 "GBP",          0x00a3  },
348         { "Cs",                 "o\bx",         0x00a4  },
349         { "Fn",                 ",\bf",         0x0192  },
350
351         /* Units. */
352         { "de",                 "<deg>",        0x00b0  },
353         { "%0",                 "%o",           0x2030  },
354         { "fm",                 "\'",           0x2032  },
355         { "sd",                 "''",           0x2033  },
356         { "mc",                 ",\bu",         0x00b5  },
357
358         /* Greek characters. */
359         { "*A",                 "A",            0x0391  },
360         { "*B",                 "B",            0x0392  },
361         { "*G",                 "G",            0x0393  },
362         { "*D",                 "_\b/_\b\\",    0x0394  },
363         { "*E",                 "E",            0x0395  },
364         { "*Z",                 "Z",            0x0396  },
365         { "*Y",                 "H",            0x0397  },
366         { "*H",                 "-\bO",         0x0398  },
367         { "*I",                 "I",            0x0399  },
368         { "*K",                 "K",            0x039a  },
369         { "*L",                 "/\\",          0x039b  },
370         { "*M",                 "M",            0x039c  },
371         { "*N",                 "N",            0x039d  },
372         { "*C",                 "_\bH",         0x039e  },
373         { "*O",                 "O",            0x039f  },
374         { "*P",                 "TT",           0x03a0  },
375         { "*R",                 "P",            0x03a1  },
376         { "*S",                 "S",            0x03a3  },
377         { "*T",                 "T",            0x03a4  },
378         { "*U",                 "Y",            0x03a5  },
379         { "*F",                 "I\bO",         0x03a6  },
380         { "*X",                 "X",            0x03a7  },
381         { "*Q",                 "I\bY",         0x03a8  },
382         { "*W",                 "_\bO",         0x03a9  },
383         { "*a",                 "a",            0x03b1  },
384         { "*b",                 "B",            0x03b2  },
385         { "*g",                 "y",            0x03b3  },
386         { "*d",                 "d",            0x03b4  },
387         { "*e",                 "e",            0x03b5  },
388         { "*z",                 ",\bC",         0x03b6  },
389         { "*y",                 "n",            0x03b7  },
390         { "*h",                 "-\b0",         0x03b8  },
391         { "*i",                 "i",            0x03b9  },
392         { "*k",                 "k",            0x03ba  },
393         { "*l",                 ">\b\\",        0x03bb  },
394         { "*m",                 ",\bu",         0x03bc  },
395         { "*n",                 "v",            0x03bd  },
396         { "*c",                 ",\bE",         0x03be  },
397         { "*o",                 "o",            0x03bf  },
398         { "*p",                 "-\bn",         0x03c0  },
399         { "*r",                 "p",            0x03c1  },
400         { "*s",                 "-\bo",         0x03c3  },
401         { "*t",                 "~\bt",         0x03c4  },
402         { "*u",                 "u",            0x03c5  },
403         { "*f",                 "|\bo",         0x03d5  },
404         { "*x",                 "x",            0x03c7  },
405         { "*q",                 "|\bu",         0x03c8  },
406         { "*w",                 "w",            0x03c9  },
407         { "+h",                 "-\b0",         0x03d1  },
408         { "+f",                 "|\bo",         0x03c6  },
409         { "+p",                 "-\bw",         0x03d6  },
410         { "+e",                 "e",            0x03f5  },
411         { "ts",                 "s",            0x03c2  },
412 };
413
414 static  struct ohash      mchars;
415
416
417 void
418 mchars_free(void)
419 {
420
421         ohash_delete(&mchars);
422 }
423
424 void
425 mchars_alloc(void)
426 {
427         size_t            i;
428         unsigned int      slot;
429
430         mandoc_ohash_init(&mchars, 9, offsetof(struct ln, roffcode));
431         for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) {
432                 slot = ohash_qlookup(&mchars, lines[i].roffcode);
433                 assert(ohash_find(&mchars, slot) == NULL);
434                 ohash_insert(&mchars, slot, lines + i);
435         }
436 }
437
438 int
439 mchars_spec2cp(const char *p, size_t sz)
440 {
441         const struct ln *ln;
442         const char      *end;
443
444         end = p + sz;
445         ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
446         return ln != NULL ? ln->unicode : sz == 1 ? (unsigned char)*p : -1;
447 }
448
449 int
450 mchars_num2char(const char *p, size_t sz)
451 {
452         int       i;
453
454         i = mandoc_strntoi(p, sz, 10);
455         return i >= 0 && i < 256 ? i : -1;
456 }
457
458 int
459 mchars_num2uc(const char *p, size_t sz)
460 {
461         int      i;
462
463         i = mandoc_strntoi(p, sz, 16);
464         assert(i >= 0 && i <= 0x10FFFF);
465         return i;
466 }
467
468 const char *
469 mchars_spec2str(const char *p, size_t sz, size_t *rsz)
470 {
471         const struct ln *ln;
472         const char      *end;
473
474         end = p + sz;
475         ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
476         if (ln == NULL) {
477                 *rsz = 1;
478                 return sz == 1 ? p : NULL;
479         }
480
481         *rsz = strlen(ln->ascii);
482         return ln->ascii;
483 }
484
485 const char *
486 mchars_uc2str(int uc)
487 {
488         size_t    i;
489
490         for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++)
491                 if (uc == lines[i].unicode)
492                         return lines[i].ascii;
493         return "<?>";
494 }