summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjilles <jilles@FreeBSD.org>2011-05-28 11:37:47 +0000
committerjilles <jilles@FreeBSD.org>2011-05-28 11:37:47 +0000
commit979af05e7725d814570e4633b10a5d49e67e987c (patch)
tree97044025b3bbee11165340fbea5bcf368dd81b10
parent36cd7cef5ee843f5238463d58cf2180183b674cd (diff)
downloadFreeBSD-src-979af05e7725d814570e4633b10a5d49e67e987c.zip
FreeBSD-src-979af05e7725d814570e4633b10a5d49e67e987c.tar.gz
printf: Allow multibyte characters for '<char> form, avoid negative codes.
Examples: LC_ALL=en_US.UTF-8 printf '%d\n' $(printf \'\\303\\244) LC_ALL=en_US.ISO8859-1 printf '%d\n' $(printf \'\\344) Both of these should print 228. Like some other shells, incomplete or invalid multibyte characters yield the value of the first byte without a warning. Note that there is no general way to go back from the character code to the character.
-rw-r--r--tools/regression/usr.bin/printf/regress.l1.out1
-rw-r--r--tools/regression/usr.bin/printf/regress.l2.out1
-rw-r--r--tools/regression/usr.bin/printf/regress.sh4
-rw-r--r--usr.bin/printf/printf.15
-rw-r--r--usr.bin/printf/printf.c22
5 files changed, 25 insertions, 8 deletions
diff --git a/tools/regression/usr.bin/printf/regress.l1.out b/tools/regression/usr.bin/printf/regress.l1.out
new file mode 100644
index 0000000..9be0dc9
--- /dev/null
+++ b/tools/regression/usr.bin/printf/regress.l1.out
@@ -0,0 +1 @@
+228
diff --git a/tools/regression/usr.bin/printf/regress.l2.out b/tools/regression/usr.bin/printf/regress.l2.out
new file mode 100644
index 0000000..9be0dc9
--- /dev/null
+++ b/tools/regression/usr.bin/printf/regress.l2.out
@@ -0,0 +1 @@
+228
diff --git a/tools/regression/usr.bin/printf/regress.sh b/tools/regression/usr.bin/printf/regress.sh
index 980fc70..4ce282f 100644
--- a/tools/regression/usr.bin/printf/regress.sh
+++ b/tools/regression/usr.bin/printf/regress.sh
@@ -2,11 +2,13 @@
REGRESSION_START($1)
-echo '1..9'
+echo '1..11'
REGRESSION_TEST(`b', `printf "abc%b%b" "def\n" "\cghi"')
REGRESSION_TEST(`d', `printf "%d,%5d,%.5d,%0*d,%.*d\n" 123 123 123 5 123 5 123')
REGRESSION_TEST(`f', `printf "%f,%-8.3f,%f,%f\n" +42.25 -42.25 inf nan')
+REGRESSION_TEST(`l1', `LC_ALL=en_US.ISO8859-1 printf "%d\n" $(printf \"\\344)')
+REGRESSION_TEST(`l2', `LC_ALL=en_US.UTF-8 printf "%d\n" $(printf \"\\303\\244)')
REGRESSION_TEST(`m1', `printf "%c%%%d\0\045\n" abc \"abc')
REGRESSION_TEST(`m2', `printf "abc\n\cdef"')
REGRESSION_TEST(`m3', `printf "%%%s\n" abc def ghi jkl')
diff --git a/usr.bin/printf/printf.1 b/usr.bin/printf/printf.1
index 56c6855..2afb9d3 100644
--- a/usr.bin/printf/printf.1
+++ b/usr.bin/printf/printf.1
@@ -31,7 +31,7 @@
.\" @(#)printf.1 8.1 (Berkeley) 6/6/93
.\" $FreeBSD$
.\"
-.Dd April 25, 2011
+.Dd May 28, 2011
.Dt PRINTF 1
.Os
.Sh NAME
@@ -68,8 +68,7 @@ otherwise it is evaluated as a C constant, with the following extensions:
A leading plus or minus sign is allowed.
.It
If the leading character is a single or double quote, the value is the
-.Tn ASCII
-code of the next character.
+character code of the next character.
.El
.Pp
The format string is reused as often as necessary to satisfy the
diff --git a/usr.bin/printf/printf.c b/usr.bin/printf/printf.c
index 56c1caf..eace370 100644
--- a/usr.bin/printf/printf.c
+++ b/usr.bin/printf/printf.c
@@ -58,6 +58,7 @@ static const char rcsid[] =
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <wchar.h>
#ifdef SHELL
#define main printfcmd
@@ -537,10 +538,23 @@ static int
asciicode(void)
{
int ch;
-
- ch = **gargv;
- if (ch == '\'' || ch == '"')
- ch = (*gargv)[1];
+ wchar_t wch;
+ mbstate_t mbs;
+
+ ch = (unsigned char)**gargv;
+ if (ch == '\'' || ch == '"') {
+ memset(&mbs, 0, sizeof(mbs));
+ switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
+ case (size_t)-2:
+ case (size_t)-1:
+ wch = (unsigned char)gargv[0][1];
+ break;
+ case 0:
+ wch = 0;
+ break;
+ }
+ ch = wch;
+ }
++gargv;
return (ch);
}
OpenPOWER on IntegriCloud