diff options
author | peter <peter@FreeBSD.org> | 2008-07-10 02:08:00 +0000 |
---|---|---|
committer | peter <peter@FreeBSD.org> | 2008-07-10 02:08:00 +0000 |
commit | 5fd3bac82eb9e3ba73a8a18d9badc977aef7f6e5 (patch) | |
tree | c560fe69f2037481cd1e0ead005853b2b18de0bd /contrib/cpio/lib/quotearg.c | |
parent | b07b40d7feb6ae8e188fa82622c29d6a0daeb94e (diff) | |
parent | 6ae22f7302b696d7694ae041817a4744a1476523 (diff) | |
download | FreeBSD-src-5fd3bac82eb9e3ba73a8a18d9badc977aef7f6e5.zip FreeBSD-src-5fd3bac82eb9e3ba73a8a18d9badc977aef7f6e5.tar.gz |
Merge gnu cpio 2.6 -> 2.8 changes. Unfortunately, we have massive
conflicts due to radically different approaches to security and bug fixes.
In some cases I re-started from the vendor version and reimplemented our
patches. Fortunately, this is not enabled by default in -current.
Diffstat (limited to 'contrib/cpio/lib/quotearg.c')
-rw-r--r-- | contrib/cpio/lib/quotearg.c | 697 |
1 files changed, 697 insertions, 0 deletions
diff --git a/contrib/cpio/lib/quotearg.c b/contrib/cpio/lib/quotearg.c new file mode 100644 index 0000000..f7f326a --- /dev/null +++ b/contrib/cpio/lib/quotearg.c @@ -0,0 +1,697 @@ +/* quotearg.c - quote arguments for output + + Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007 Free + Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Written by Paul Eggert <eggert@twinsun.com> */ + +#include <config.h> + +#include "quotearg.h" + +#include "xalloc.h" + +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <wctype.h> + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + +#if !HAVE_MBRTOWC +/* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the + other macros are defined only for documentation and to satisfy C + syntax. */ +# undef MB_CUR_MAX +# define MB_CUR_MAX 1 +# undef mbstate_t +# define mbstate_t int +# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0) +# define iswprint(wc) isprint ((unsigned char) (wc)) +# undef HAVE_MBSINIT +#endif + +#if !defined mbsinit && !HAVE_MBSINIT +# define mbsinit(ps) 1 +#endif + +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + +#define INT_BITS (sizeof (int) * CHAR_BIT) + +struct quoting_options +{ + /* Basic quoting style. */ + enum quoting_style style; + + /* Quote the characters indicated by this bit vector even if the + quoting style would not normally require them to be quoted. */ + unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; +}; + +/* Names of quoting styles. */ +char const *const quoting_style_args[] = +{ + "literal", + "shell", + "shell-always", + "c", + "escape", + "locale", + "clocale", + 0 +}; + +/* Correspondences to quoting style names. */ +enum quoting_style const quoting_style_vals[] = +{ + literal_quoting_style, + shell_quoting_style, + shell_always_quoting_style, + c_quoting_style, + escape_quoting_style, + locale_quoting_style, + clocale_quoting_style +}; + +/* The default quoting options. */ +static struct quoting_options default_quoting_options; + +/* Allocate a new set of quoting options, with contents initially identical + to O if O is not null, or to the default if O is null. + It is the caller's responsibility to free the result. */ +struct quoting_options * +clone_quoting_options (struct quoting_options *o) +{ + int e = errno; + struct quoting_options *p = xmemdup (o ? o : &default_quoting_options, + sizeof *o); + errno = e; + return p; +} + +/* Get the value of O's quoting style. If O is null, use the default. */ +enum quoting_style +get_quoting_style (struct quoting_options *o) +{ + return (o ? o : &default_quoting_options)->style; +} + +/* In O (or in the default if O is null), + set the value of the quoting style to S. */ +void +set_quoting_style (struct quoting_options *o, enum quoting_style s) +{ + (o ? o : &default_quoting_options)->style = s; +} + +/* In O (or in the default if O is null), + set the value of the quoting options for character C to I. + Return the old value. Currently, the only values defined for I are + 0 (the default) and 1 (which means to quote the character even if + it would not otherwise be quoted). */ +int +set_char_quoting (struct quoting_options *o, char c, int i) +{ + unsigned char uc = c; + unsigned int *p = + (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS; + int shift = uc % INT_BITS; + int r = (*p >> shift) & 1; + *p ^= ((i & 1) ^ r) << shift; + return r; +} + +/* MSGID approximates a quotation mark. Return its translation if it + has one; otherwise, return either it or "\"", depending on S. */ +static char const * +gettext_quote (char const *msgid, enum quoting_style s) +{ + char const *translation = _(msgid); + if (translation == msgid && s == clocale_quoting_style) + translation = "\""; + return translation; +} + +/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of + argument ARG (of size ARGSIZE), using QUOTING_STYLE and the + non-quoting-style part of O to control quoting. + Terminate the output with a null character, and return the written + size of the output, not counting the terminating null. + If BUFFERSIZE is too small to store the output string, return the + value that would have been returned had BUFFERSIZE been large enough. + If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE. + + This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, + ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting + style specified by O, and O may not be null. */ + +static size_t +quotearg_buffer_restyled (char *buffer, size_t buffersize, + char const *arg, size_t argsize, + enum quoting_style quoting_style, + struct quoting_options const *o) +{ + size_t i; + size_t len = 0; + char const *quote_string = 0; + size_t quote_string_len = 0; + bool backslash_escapes = false; + bool unibyte_locale = MB_CUR_MAX == 1; + +#define STORE(c) \ + do \ + { \ + if (len < buffersize) \ + buffer[len] = (c); \ + len++; \ + } \ + while (0) + + switch (quoting_style) + { + case c_quoting_style: + STORE ('"'); + backslash_escapes = true; + quote_string = "\""; + quote_string_len = 1; + break; + + case escape_quoting_style: + backslash_escapes = true; + break; + + case locale_quoting_style: + case clocale_quoting_style: + { + /* TRANSLATORS: + Get translations for open and closing quotation marks. + + The message catalog should translate "`" to a left + quotation mark suitable for the locale, and similarly for + "'". If the catalog has no translation, + locale_quoting_style quotes `like this', and + clocale_quoting_style quotes "like this". + + For example, an American English Unicode locale should + translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and + should translate "'" to U+201D (RIGHT DOUBLE QUOTATION + MARK). A British English Unicode locale should instead + translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and + U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. + + If you don't know what to put here, please see + <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs> + and use glyphs suitable for your language. */ + + char const *left = gettext_quote (N_("`"), quoting_style); + char const *right = gettext_quote (N_("'"), quoting_style); + for (quote_string = left; *quote_string; quote_string++) + STORE (*quote_string); + backslash_escapes = true; + quote_string = right; + quote_string_len = strlen (quote_string); + } + break; + + case shell_always_quoting_style: + STORE ('\''); + quote_string = "'"; + quote_string_len = 1; + break; + + default: + break; + } + + for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++) + { + unsigned char c; + unsigned char esc; + + if (backslash_escapes + && quote_string_len + && i + quote_string_len <= argsize + && memcmp (arg + i, quote_string, quote_string_len) == 0) + STORE ('\\'); + + c = arg[i]; + switch (c) + { + case '\0': + if (backslash_escapes) + { + STORE ('\\'); + STORE ('0'); + STORE ('0'); + c = '0'; + } + break; + + case '?': + switch (quoting_style) + { + case shell_quoting_style: + goto use_shell_always_quoting_style; + + case c_quoting_style: + if (i + 2 < argsize && arg[i + 1] == '?') + switch (arg[i + 2]) + { + case '!': case '\'': + case '(': case ')': case '-': case '/': + case '<': case '=': case '>': + /* Escape the second '?' in what would otherwise be + a trigraph. */ + c = arg[i + 2]; + i += 2; + STORE ('?'); + STORE ('\\'); + STORE ('?'); + break; + + default: + break; + } + break; + + default: + break; + } + break; + + case '\a': esc = 'a'; goto c_escape; + case '\b': esc = 'b'; goto c_escape; + case '\f': esc = 'f'; goto c_escape; + case '\n': esc = 'n'; goto c_and_shell_escape; + case '\r': esc = 'r'; goto c_and_shell_escape; + case '\t': esc = 't'; goto c_and_shell_escape; + case '\v': esc = 'v'; goto c_escape; + case '\\': esc = c; goto c_and_shell_escape; + + c_and_shell_escape: + if (quoting_style == shell_quoting_style) + goto use_shell_always_quoting_style; + c_escape: + if (backslash_escapes) + { + c = esc; + goto store_escape; + } + break; + + case '{': case '}': /* sometimes special if isolated */ + if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1)) + break; + /* Fall through. */ + case '#': case '~': + if (i != 0) + break; + /* Fall through. */ + case ' ': + case '!': /* special in bash */ + case '"': case '$': case '&': + case '(': case ')': case '*': case ';': + case '<': + case '=': /* sometimes special in 0th or (with "set -k") later args */ + case '>': case '[': + case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ + case '`': case '|': + /* A shell special character. In theory, '$' and '`' could + be the first bytes of multibyte characters, which means + we should check them with mbrtowc, but in practice this + doesn't happen so it's not worth worrying about. */ + if (quoting_style == shell_quoting_style) + goto use_shell_always_quoting_style; + break; + + case '\'': + switch (quoting_style) + { + case shell_quoting_style: + goto use_shell_always_quoting_style; + + case shell_always_quoting_style: + STORE ('\''); + STORE ('\\'); + STORE ('\''); + break; + + default: + break; + } + break; + + case '%': case '+': case ',': case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': case ':': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': + case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': + case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': + /* These characters don't cause problems, no matter what the + quoting style is. They cannot start multibyte sequences. */ + break; + + default: + /* If we have a multibyte sequence, copy it until we reach + its end, find an error, or come back to the initial shift + state. For C-like styles, if the sequence has + unprintable characters, escape the whole sequence, since + we can't easily escape single characters within it. */ + { + /* Length of multibyte sequence found so far. */ + size_t m; + + bool printable; + + if (unibyte_locale) + { + m = 1; + printable = isprint (c) != 0; + } + else + { + mbstate_t mbstate; + memset (&mbstate, 0, sizeof mbstate); + + m = 0; + printable = true; + if (argsize == SIZE_MAX) + argsize = strlen (arg); + + do + { + wchar_t w; + size_t bytes = mbrtowc (&w, &arg[i + m], + argsize - (i + m), &mbstate); + if (bytes == 0) + break; + else if (bytes == (size_t) -1) + { + printable = false; + break; + } + else if (bytes == (size_t) -2) + { + printable = false; + while (i + m < argsize && arg[i + m]) + m++; + break; + } + else + { + /* Work around a bug with older shells that "see" a '\' + that is really the 2nd byte of a multibyte character. + In practice the problem is limited to ASCII + chars >= '@' that are shell special chars. */ + if ('[' == 0x5b && quoting_style == shell_quoting_style) + { + size_t j; + for (j = 1; j < bytes; j++) + switch (arg[i + m + j]) + { + case '[': case '\\': case '^': + case '`': case '|': + goto use_shell_always_quoting_style; + + default: + break; + } + } + + if (! iswprint (w)) + printable = false; + m += bytes; + } + } + while (! mbsinit (&mbstate)); + } + + if (1 < m || (backslash_escapes && ! printable)) + { + /* Output a multibyte sequence, or an escaped + unprintable unibyte character. */ + size_t ilim = i + m; + + for (;;) + { + if (backslash_escapes && ! printable) + { + STORE ('\\'); + STORE ('0' + (c >> 6)); + STORE ('0' + ((c >> 3) & 7)); + c = '0' + (c & 7); + } + if (ilim <= i + 1) + break; + STORE (c); + c = arg[++i]; + } + + goto store_c; + } + } + } + + if (! (backslash_escapes + && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) + goto store_c; + + store_escape: + STORE ('\\'); + + store_c: + STORE (c); + } + + if (i == 0 && quoting_style == shell_quoting_style) + goto use_shell_always_quoting_style; + + if (quote_string) + for (; *quote_string; quote_string++) + STORE (*quote_string); + + if (len < buffersize) + buffer[len] = '\0'; + return len; + + use_shell_always_quoting_style: + return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, + shell_always_quoting_style, o); +} + +/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of + argument ARG (of size ARGSIZE), using O to control quoting. + If O is null, use the default. + Terminate the output with a null character, and return the written + size of the output, not counting the terminating null. + If BUFFERSIZE is too small to store the output string, return the + value that would have been returned had BUFFERSIZE been large enough. + If ARGSIZE is SIZE_MAX, use the string length of the argument for + ARGSIZE. */ +size_t +quotearg_buffer (char *buffer, size_t buffersize, + char const *arg, size_t argsize, + struct quoting_options const *o) +{ + struct quoting_options const *p = o ? o : &default_quoting_options; + int e = errno; + size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize, + p->style, p); + errno = e; + return r; +} + +/* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly + allocated storage containing the quoted string. */ +char * +quotearg_alloc (char const *arg, size_t argsize, + struct quoting_options const *o) +{ + int e = errno; + size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1; + char *buf = xcharalloc (bufsize); + quotearg_buffer (buf, bufsize, arg, argsize, o); + errno = e; + return buf; +} + +/* A storage slot with size and pointer to a value. */ +struct slotvec +{ + size_t size; + char *val; +}; + +/* Preallocate a slot 0 buffer, so that the caller can always quote + one small component of a "memory exhausted" message in slot 0. */ +static char slot0[256]; +static unsigned int nslots = 1; +static struct slotvec slotvec0 = {sizeof slot0, slot0}; +static struct slotvec *slotvec = &slotvec0; + +void +quotearg_free (void) +{ + struct slotvec *sv = slotvec; + unsigned int i; + for (i = 1; i < nslots; i++) + free (sv[i].val); + if (sv[0].val != slot0) + { + free (sv[0].val); + slotvec0.size = sizeof slot0; + slotvec0.val = slot0; + } + if (sv != &slotvec0) + { + free (sv); + slotvec = &slotvec0; + } + nslots = 1; +} + +/* Use storage slot N to return a quoted version of argument ARG. + ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a + null-terminated string. + OPTIONS specifies the quoting options. + The returned value points to static storage that can be + reused by the next call to this function with the same value of N. + N must be nonnegative. N is deliberately declared with type "int" + to allow for future extensions (using negative values). */ +static char * +quotearg_n_options (int n, char const *arg, size_t argsize, + struct quoting_options const *options) +{ + int e = errno; + + unsigned int n0 = n; + struct slotvec *sv = slotvec; + + if (n < 0) + abort (); + + if (nslots <= n0) + { + /* FIXME: technically, the type of n1 should be `unsigned int', + but that evokes an unsuppressible warning from gcc-4.0.1 and + older. If gcc ever provides an option to suppress that warning, + revert to the original type, so that the test in xalloc_oversized + is once again performed only at compile time. */ + size_t n1 = n0 + 1; + bool preallocated = (sv == &slotvec0); + + if (xalloc_oversized (n1, sizeof *sv)) + xalloc_die (); + + slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv); + if (preallocated) + *sv = slotvec0; + memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv); + nslots = n1; + } + + { + size_t size = sv[n].size; + char *val = sv[n].val; + size_t qsize = quotearg_buffer (val, size, arg, argsize, options); + + if (size <= qsize) + { + sv[n].size = size = qsize + 1; + if (val != slot0) + free (val); + sv[n].val = val = xcharalloc (size); + quotearg_buffer (val, size, arg, argsize, options); + } + + errno = e; + return val; + } +} + +char * +quotearg_n (int n, char const *arg) +{ + return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options); +} + +char * +quotearg (char const *arg) +{ + return quotearg_n (0, arg); +} + +/* Return quoting options for STYLE, with no extra quoting. */ +static struct quoting_options +quoting_options_from_style (enum quoting_style style) +{ + struct quoting_options o; + o.style = style; + memset (o.quote_these_too, 0, sizeof o.quote_these_too); + return o; +} + +char * +quotearg_n_style (int n, enum quoting_style s, char const *arg) +{ + struct quoting_options const o = quoting_options_from_style (s); + return quotearg_n_options (n, arg, SIZE_MAX, &o); +} + +char * +quotearg_n_style_mem (int n, enum quoting_style s, + char const *arg, size_t argsize) +{ + struct quoting_options const o = quoting_options_from_style (s); + return quotearg_n_options (n, arg, argsize, &o); +} + +char * +quotearg_style (enum quoting_style s, char const *arg) +{ + return quotearg_n_style (0, s, arg); +} + +char * +quotearg_char (char const *arg, char ch) +{ + struct quoting_options options; + options = default_quoting_options; + set_char_quoting (&options, ch, 1); + return quotearg_n_options (0, arg, SIZE_MAX, &options); +} + +char * +quotearg_colon (char const *arg) +{ + return quotearg_char (arg, ':'); +} |