1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004 Free Software
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 /* Written by Paul Eggert <eggert@twinsun.com> */
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) msgid
43 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
51 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
52 other macros are defined only for documentation and to satisfy C
56 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
57 # define iswprint(wc) isprint ((unsigned char) (wc))
61 #if !defined mbsinit && !HAVE_MBSINIT
62 # define mbsinit(ps) 1
69 # if !defined iswprint && !HAVE_ISWPRINT
70 # define iswprint(wc) 1
75 # define SIZE_MAX ((size_t) -1)
78 #define INT_BITS (sizeof (int) * CHAR_BIT)
80 struct quoting_options
82 /* Basic quoting style. */
83 enum quoting_style style;
85 /* Quote the characters indicated by this bit vector even if the
86 quoting style would not normally require them to be quoted. */
87 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
90 /* Names of quoting styles. */
91 char const *const quoting_style_args[] =
103 /* Correspondences to quoting style names. */
104 enum quoting_style const quoting_style_vals[] =
106 literal_quoting_style,
108 shell_always_quoting_style,
110 escape_quoting_style,
111 locale_quoting_style,
112 clocale_quoting_style
115 /* The default quoting options. */
116 static struct quoting_options default_quoting_options;
118 /* Allocate a new set of quoting options, with contents initially identical
119 to O if O is not null, or to the default if O is null.
120 It is the caller's responsibility to free the result. */
121 struct quoting_options *
122 clone_quoting_options (struct quoting_options *o)
125 struct quoting_options *p = xmalloc (sizeof *p);
126 *p = *(o ? o : &default_quoting_options);
131 /* Get the value of O's quoting style. If O is null, use the default. */
133 get_quoting_style (struct quoting_options *o)
135 return (o ? o : &default_quoting_options)->style;
138 /* In O (or in the default if O is null),
139 set the value of the quoting style to S. */
141 set_quoting_style (struct quoting_options *o, enum quoting_style s)
143 (o ? o : &default_quoting_options)->style = s;
146 /* In O (or in the default if O is null),
147 set the value of the quoting options for character C to I.
148 Return the old value. Currently, the only values defined for I are
149 0 (the default) and 1 (which means to quote the character even if
150 it would not otherwise be quoted). */
152 set_char_quoting (struct quoting_options *o, char c, int i)
154 unsigned char uc = c;
155 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
156 int shift = uc % INT_BITS;
157 int r = (*p >> shift) & 1;
158 *p ^= ((i & 1) ^ r) << shift;
162 /* MSGID approximates a quotation mark. Return its translation if it
163 has one; otherwise, return either it or "\"", depending on S. */
165 gettext_quote (char const *msgid, enum quoting_style s)
167 char const *translation = _(msgid);
168 if (translation == msgid && s == clocale_quoting_style)
173 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
174 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
175 non-quoting-style part of O to control quoting.
176 Terminate the output with a null character, and return the written
177 size of the output, not counting the terminating null.
178 If BUFFERSIZE is too small to store the output string, return the
179 value that would have been returned had BUFFERSIZE been large enough.
180 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
182 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
183 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
184 style specified by O, and O may not be null. */
187 quotearg_buffer_restyled (char *buffer, size_t buffersize,
188 char const *arg, size_t argsize,
189 enum quoting_style quoting_style,
190 struct quoting_options const *o)
194 char const *quote_string = 0;
195 size_t quote_string_len = 0;
196 bool backslash_escapes = false;
197 bool unibyte_locale = MB_CUR_MAX == 1;
202 if (len < buffersize) \
208 switch (quoting_style)
210 case c_quoting_style:
212 backslash_escapes = true;
214 quote_string_len = 1;
217 case escape_quoting_style:
218 backslash_escapes = true;
221 case locale_quoting_style:
222 case clocale_quoting_style:
224 /* Get translations for open and closing quotation marks.
226 The message catalog should translate "`" to a left
227 quotation mark suitable for the locale, and similarly for
228 "'". If the catalog has no translation,
229 locale_quoting_style quotes `like this', and
230 clocale_quoting_style quotes "like this".
232 For example, an American English Unicode locale should
233 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
234 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
235 MARK). A British English Unicode locale should instead
236 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
237 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
239 char const *left = gettext_quote (N_("`"), quoting_style);
240 char const *right = gettext_quote (N_("'"), quoting_style);
241 for (quote_string = left; *quote_string; quote_string++)
242 STORE (*quote_string);
243 backslash_escapes = true;
244 quote_string = right;
245 quote_string_len = strlen (quote_string);
249 case shell_always_quoting_style:
252 quote_string_len = 1;
259 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
264 if (backslash_escapes
266 && i + quote_string_len <= argsize
267 && memcmp (arg + i, quote_string, quote_string_len) == 0)
274 if (backslash_escapes)
284 switch (quoting_style)
286 case shell_quoting_style:
287 goto use_shell_always_quoting_style;
289 case c_quoting_style:
290 if (i + 2 < argsize && arg[i + 1] == '?')
294 case '(': case ')': case '-': case '/':
295 case '<': case '=': case '>':
296 /* Escape the second '?' in what would otherwise be
312 case '\a': esc = 'a'; goto c_escape;
313 case '\b': esc = 'b'; goto c_escape;
314 case '\f': esc = 'f'; goto c_escape;
315 case '\n': esc = 'n'; goto c_and_shell_escape;
316 case '\r': esc = 'r'; goto c_and_shell_escape;
317 case '\t': esc = 't'; goto c_and_shell_escape;
318 case '\v': esc = 'v'; goto c_escape;
319 case '\\': esc = c; goto c_and_shell_escape;
322 if (quoting_style == shell_quoting_style)
323 goto use_shell_always_quoting_style;
325 if (backslash_escapes)
332 case '{': case '}': /* sometimes special if isolated */
333 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
341 case '!': /* special in bash */
342 case '"': case '$': case '&':
343 case '(': case ')': case '*': case ';':
345 case '=': /* sometimes special in 0th or (with "set -k") later args */
347 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
349 /* A shell special character. In theory, '$' and '`' could
350 be the first bytes of multibyte characters, which means
351 we should check them with mbrtowc, but in practice this
352 doesn't happen so it's not worth worrying about. */
353 if (quoting_style == shell_quoting_style)
354 goto use_shell_always_quoting_style;
358 switch (quoting_style)
360 case shell_quoting_style:
361 goto use_shell_always_quoting_style;
363 case shell_always_quoting_style:
374 case '%': case '+': case ',': case '-': case '.': case '/':
375 case '0': case '1': case '2': case '3': case '4': case '5':
376 case '6': case '7': case '8': case '9': case ':':
377 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
378 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
379 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
380 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
381 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
382 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
383 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
384 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
385 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
386 /* These characters don't cause problems, no matter what the
387 quoting style is. They cannot start multibyte sequences. */
391 /* If we have a multibyte sequence, copy it until we reach
392 its end, find an error, or come back to the initial shift
393 state. For C-like styles, if the sequence has
394 unprintable characters, escape the whole sequence, since
395 we can't easily escape single characters within it. */
397 /* Length of multibyte sequence found so far. */
405 printable = isprint (c) != 0;
410 memset (&mbstate, 0, sizeof mbstate);
414 if (argsize == SIZE_MAX)
415 argsize = strlen (arg);
420 size_t bytes = mbrtowc (&w, &arg[i + m],
421 argsize - (i + m), &mbstate);
424 else if (bytes == (size_t) -1)
429 else if (bytes == (size_t) -2)
432 while (i + m < argsize && arg[i + m])
438 /* Work around a bug with older shells that "see" a '\'
439 that is really the 2nd byte of a multibyte character.
440 In practice the problem is limited to ASCII
441 chars >= '@' that are shell special chars. */
442 if ('[' == 0x5b && quoting_style == shell_quoting_style)
445 for (j = 1; j < bytes; j++)
446 switch (arg[i + m + j])
448 case '[': case '\\': case '^':
450 goto use_shell_always_quoting_style;
459 while (! mbsinit (&mbstate));
462 if (1 < m || (backslash_escapes && ! printable))
464 /* Output a multibyte sequence, or an escaped
465 unprintable unibyte character. */
470 if (backslash_escapes && ! printable)
473 STORE ('0' + (c >> 6));
474 STORE ('0' + ((c >> 3) & 7));
488 if (! (backslash_escapes
489 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
499 if (i == 0 && quoting_style == shell_quoting_style)
500 goto use_shell_always_quoting_style;
503 for (; *quote_string; quote_string++)
504 STORE (*quote_string);
506 if (len < buffersize)
510 use_shell_always_quoting_style:
511 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
512 shell_always_quoting_style, o);
515 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
516 argument ARG (of size ARGSIZE), using O to control quoting.
517 If O is null, use the default.
518 Terminate the output with a null character, and return the written
519 size of the output, not counting the terminating null.
520 If BUFFERSIZE is too small to store the output string, return the
521 value that would have been returned had BUFFERSIZE been large enough.
522 If ARGSIZE is SIZE_MAX, use the string length of the argument for
525 quotearg_buffer (char *buffer, size_t buffersize,
526 char const *arg, size_t argsize,
527 struct quoting_options const *o)
529 struct quoting_options const *p = o ? o : &default_quoting_options;
531 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
537 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
538 allocated storage containing the quoted string. */
540 quotearg_alloc (char const *arg, size_t argsize,
541 struct quoting_options const *o)
544 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
545 char *buf = xmalloc (bufsize);
546 quotearg_buffer (buf, bufsize, arg, argsize, o);
551 /* Use storage slot N to return a quoted version of argument ARG.
552 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
553 null-terminated string.
554 OPTIONS specifies the quoting options.
555 The returned value points to static storage that can be
556 reused by the next call to this function with the same value of N.
557 N must be nonnegative. N is deliberately declared with type "int"
558 to allow for future extensions (using negative values). */
560 quotearg_n_options (int n, char const *arg, size_t argsize,
561 struct quoting_options const *options)
565 /* Preallocate a slot 0 buffer, so that the caller can always quote
566 one small component of a "memory exhausted" message in slot 0. */
567 static char slot0[256];
568 static unsigned int nslots = 1;
575 static struct slotvec slotvec0 = {sizeof slot0, slot0};
576 static struct slotvec *slotvec = &slotvec0;
583 unsigned int n1 = n0 + 1;
585 if (xalloc_oversized (n1, sizeof *slotvec))
588 if (slotvec == &slotvec0)
590 slotvec = xmalloc (sizeof *slotvec);
593 slotvec = xrealloc (slotvec, n1 * sizeof *slotvec);
594 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
599 size_t size = slotvec[n].size;
600 char *val = slotvec[n].val;
601 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
605 slotvec[n].size = size = qsize + 1;
608 slotvec[n].val = val = xmalloc (size);
609 quotearg_buffer (val, size, arg, argsize, options);
618 quotearg_n (int n, char const *arg)
620 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
624 quotearg (char const *arg)
626 return quotearg_n (0, arg);
629 /* Return quoting options for STYLE, with no extra quoting. */
630 static struct quoting_options
631 quoting_options_from_style (enum quoting_style style)
633 struct quoting_options o;
635 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
640 quotearg_n_style (int n, enum quoting_style s, char const *arg)
642 struct quoting_options const o = quoting_options_from_style (s);
643 return quotearg_n_options (n, arg, SIZE_MAX, &o);
647 quotearg_n_style_mem (int n, enum quoting_style s,
648 char const *arg, size_t argsize)
650 struct quoting_options const o = quoting_options_from_style (s);
651 return quotearg_n_options (n, arg, argsize, &o);
655 quotearg_style (enum quoting_style s, char const *arg)
657 return quotearg_n_style (0, s, arg);
661 quotearg_char (char const *arg, char ch)
663 struct quoting_options options;
664 options = default_quoting_options;
665 set_char_quoting (&options, ch, 1);
666 return quotearg_n_options (0, arg, SIZE_MAX, &options);
670 quotearg_colon (char const *arg)
672 return quotearg_char (arg, ':');