summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authordas <das@FreeBSD.org>2009-01-22 08:14:28 +0000
committerdas <das@FreeBSD.org>2009-01-22 08:14:28 +0000
commitdfcf434c32c40bc259ff09e2780abf613ec3e249 (patch)
treef31a330a971da42d6e17918ccbac552bcdf334f6 /lib
parenta921977684453c89f9016fdcbb3b9fbd6a4a670f (diff)
downloadFreeBSD-src-dfcf434c32c40bc259ff09e2780abf613ec3e249.zip
FreeBSD-src-dfcf434c32c40bc259ff09e2780abf613ec3e249.tar.gz
Add support for multibyte thousands_sep encodings, e.g., U+066C.
The integer thousands' separator code is rewritten in order to avoid having to preallocate a buffer for the largest possible digit string with the most possible instances of the longest possible multibyte thousands' separator. The new version inserts thousands' separators for integers using the same code as floating point.
Diffstat (limited to 'lib')
-rw-r--r--lib/libc/stdio/printfcommon.h55
-rw-r--r--lib/libc/stdio/vfprintf.c165
-rw-r--r--lib/libc/stdio/vfwprintf.c178
3 files changed, 225 insertions, 173 deletions
diff --git a/lib/libc/stdio/printfcommon.h b/lib/libc/stdio/printfcommon.h
index ab49b31..39b0003 100644
--- a/lib/libc/stdio/printfcommon.h
+++ b/lib/libc/stdio/printfcommon.h
@@ -54,10 +54,8 @@ static int exponent(CHAR *, int, CHAR);
#endif /* !NO_FLOATING_POINT */
-static CHAR *__ujtoa(uintmax_t, CHAR *, int, int, const char *, int, char,
- const char *);
-static CHAR *__ultoa(u_long, CHAR *, int, int, const char *, int, char,
- const char *);
+static CHAR *__ujtoa(uintmax_t, CHAR *, int, int, const char *);
+static CHAR *__ultoa(u_long, CHAR *, int, int, const char *);
#define NIOV 8
struct io_state {
@@ -158,12 +156,10 @@ io_flush(struct io_state *iop)
* use the given digits.
*/
static CHAR *
-__ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs,
- int needgrp, char thousep, const char *grp)
+__ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs)
{
CHAR *cp = endp;
long sval;
- int ndig;
/*
* Handle the three cases separately, in the hope of getting
@@ -175,7 +171,6 @@ __ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs,
*--cp = to_char(val);
return (cp);
}
- ndig = 0;
/*
* On many machines, unsigned arithmetic is harder than
* signed arithmetic, so we do at most one unsigned mod and
@@ -184,29 +179,11 @@ __ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs,
*/
if (val > LONG_MAX) {
*--cp = to_char(val % 10);
- ndig++;
sval = val / 10;
} else
sval = val;
do {
*--cp = to_char(sval % 10);
- ndig++;
- /*
- * If (*grp == CHAR_MAX) then no more grouping
- * should be performed.
- */
- if (needgrp && ndig == *grp && *grp != CHAR_MAX
- && sval > 9) {
- *--cp = thousep;
- ndig = 0;
- /*
- * If (*(grp+1) == '\0') then we have to
- * use *grp character (last grouping rule)
- * for all next cases
- */
- if (*(grp+1) != '\0')
- grp++;
- }
sval /= 10;
} while (sval != 0);
break;
@@ -235,50 +212,28 @@ __ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs,
/* Identical to __ultoa, but for intmax_t. */
static CHAR *
-__ujtoa(uintmax_t val, CHAR *endp, int base, int octzero, const char *xdigs,
- int needgrp, char thousep, const char *grp)
+__ujtoa(uintmax_t val, CHAR *endp, int base, int octzero, const char *xdigs)
{
CHAR *cp = endp;
intmax_t sval;
- int ndig;
/* quick test for small values; __ultoa is typically much faster */
/* (perhaps instead we should run until small, then call __ultoa?) */
if (val <= ULONG_MAX)
- return (__ultoa((u_long)val, endp, base, octzero, xdigs,
- needgrp, thousep, grp));
+ return (__ultoa((u_long)val, endp, base, octzero, xdigs));
switch (base) {
case 10:
if (val < 10) {
*--cp = to_char(val % 10);
return (cp);
}
- ndig = 0;
if (val > INTMAX_MAX) {
*--cp = to_char(val % 10);
- ndig++;
sval = val / 10;
} else
sval = val;
do {
*--cp = to_char(sval % 10);
- ndig++;
- /*
- * If (*grp == CHAR_MAX) then no more grouping
- * should be performed.
- */
- if (needgrp && *grp != CHAR_MAX && ndig == *grp
- && sval > 9) {
- *--cp = thousep;
- ndig = 0;
- /*
- * If (*(grp+1) == '\0') then we have to
- * use *grp character (last grouping rule)
- * for all next cases
- */
- if (*(grp+1) != '\0')
- grp++;
- }
sval /= 10;
} while (sval != 0);
break;
diff --git a/lib/libc/stdio/vfprintf.c b/lib/libc/stdio/vfprintf.c
index 19076ab..e3f95eb 100644
--- a/lib/libc/stdio/vfprintf.c
+++ b/lib/libc/stdio/vfprintf.c
@@ -72,6 +72,75 @@ static char *__wcsconv(wchar_t *, int);
#define CHAR char
#include "printfcommon.h"
+struct grouping_state {
+ char *thousands_sep; /* locale-specific thousands separator */
+ int thousep_len; /* length of thousands_sep */
+ const char *grouping; /* locale-specific numeric grouping rules */
+ int lead; /* sig figs before decimal or group sep */
+ int nseps; /* number of group separators with ' */
+ int nrepeats; /* number of repeats of the last group */
+};
+
+/*
+ * Initialize the thousands' grouping state in preparation to print a
+ * number with ndigits digits. This routine returns the total number
+ * of bytes that will be needed.
+ */
+static int
+grouping_init(struct grouping_state *gs, int ndigits)
+{
+ struct lconv *locale;
+
+ locale = localeconv();
+ gs->grouping = locale->grouping;
+ gs->thousands_sep = locale->thousands_sep;
+ gs->thousep_len = strlen(gs->thousands_sep);
+
+ gs->nseps = gs->nrepeats = 0;
+ gs->lead = ndigits;
+ while (*gs->grouping != CHAR_MAX) {
+ if (gs->lead <= *gs->grouping)
+ break;
+ gs->lead -= *gs->grouping;
+ if (*(gs->grouping+1)) {
+ gs->nseps++;
+ gs->grouping++;
+ } else
+ gs->nrepeats++;
+ }
+ return ((gs->nseps + gs->nrepeats) * gs->thousep_len);
+}
+
+/*
+ * Print a number with thousands' separators.
+ */
+static int
+grouping_print(struct grouping_state *gs, struct io_state *iop,
+ const CHAR *cp, const CHAR *ep)
+{
+ const CHAR *cp0 = cp;
+
+ if (io_printandpad(iop, cp, ep, gs->lead, zeroes))
+ return (-1);
+ cp += gs->lead;
+ while (gs->nseps > 0 || gs->nrepeats > 0) {
+ if (gs->nrepeats > 0)
+ gs->nrepeats--;
+ else {
+ gs->grouping--;
+ gs->nseps--;
+ }
+ if (io_print(iop, gs->thousands_sep, gs->thousep_len))
+ return (-1);
+ if (io_printandpad(iop, cp, ep, *gs->grouping, zeroes))
+ return (-1);
+ cp += *gs->grouping;
+ }
+ if (cp > ep)
+ cp = ep;
+ return (cp - cp0);
+}
+
/*
* Flush out all the vectors defined by the given uio,
* then reset it so that it can be reused.
@@ -210,12 +279,14 @@ vfprintf(FILE * __restrict fp, const char * __restrict fmt0, va_list ap)
/*
* The size of the buffer we use as scratch space for integer
- * conversions, among other things. Technically, we would need the
- * most space for base 10 conversions with thousands' grouping
- * characters between each pair of digits. 100 bytes is a
- * conservative overestimate even for a 128-bit uintmax_t.
+ * conversions, among other things. We need enough space to
+ * write a uintmax_t in octal (plus one byte).
*/
-#define BUF 100
+#if UINTMAX_MAX <= UINT64_MAX
+#define BUF 32
+#else
+#error "BUF must be large enough to format a uintmax_t"
+#endif
/*
* Non-MT-safe version
@@ -232,8 +303,7 @@ __vfprintf(FILE *fp, const char *fmt0, va_list ap)
int width; /* width from format (%8d), or 0 */
int prec; /* precision from format; <0 for N/A */
char sign; /* sign prefix (' ', '+', '-', or \0) */
- char thousands_sep; /* locale specific thousands separator */
- const char *grouping; /* locale specific numeric grouping rules */
+ struct grouping_state gs; /* thousands' grouping info */
#ifndef NO_FLOATING_POINT
/*
@@ -261,12 +331,9 @@ __vfprintf(FILE *fp, const char *fmt0, va_list ap)
char expchar; /* exponent character: [eEpP\0] */
char *dtoaend; /* pointer to end of converted digits */
int expsize; /* character count for expstr */
- int lead; /* sig figs before decimal or group sep */
int ndig; /* actual number of digits returned by dtoa */
char expstr[MAXEXPDIG+2]; /* buffer for exponent string: e+ZZZ */
char *dtoaresult; /* buffer allocated by dtoa */
- int nseps; /* number of group separators with ' */
- int nrepeats; /* number of repeats of the last group */
#endif
u_long ulval; /* integer arguments %[diouxX] */
uintmax_t ujval; /* %j, %ll, %q, %t, %z integers */
@@ -378,8 +445,6 @@ __vfprintf(FILE *fp, const char *fmt0, va_list ap)
if (prepwrite(fp) != 0)
return (EOF);
- thousands_sep = '\0';
- grouping = NULL;
convbuf = NULL;
fmt = (char *)fmt0;
argtable = NULL;
@@ -416,6 +481,7 @@ __vfprintf(FILE *fp, const char *fmt0, va_list ap)
dprec = 0;
width = 0;
prec = -1;
+ gs.grouping = NULL;
sign = '\0';
ox[1] = '\0';
@@ -453,8 +519,6 @@ reswitch: switch (ch) {
goto rflag;
case '\'':
flags |= GROUPING;
- thousands_sep = *(localeconv()->thousands_sep);
- grouping = localeconv()->grouping;
goto rflag;
case '.':
if ((ch = *fmt++) == '*') {
@@ -685,23 +749,8 @@ fp_common:
/* space for decimal pt and following digits */
if (prec || flags & ALT)
size += prec + decpt_len;
- if (grouping && expt > 0) {
- /* space for thousands' grouping */
- nseps = nrepeats = 0;
- lead = expt;
- while (*grouping != CHAR_MAX) {
- if (lead <= *grouping)
- break;
- lead -= *grouping;
- if (*(grouping+1)) {
- nseps++;
- grouping++;
- } else
- nrepeats++;
- }
- size += nseps + nrepeats;
- } else
- lead = expt;
+ if ((flags & GROUPING) && expt > 0)
+ size += grouping_init(&gs, expt);
}
break;
#endif /* !NO_FLOATING_POINT */
@@ -842,20 +891,18 @@ number: if ((dprec = prec) >= 0)
if (ujval != 0 || prec != 0 ||
(flags & ALT && base == 8))
cp = __ujtoa(ujval, cp, base,
- flags & ALT, xdigs,
- flags & GROUPING, thousands_sep,
- grouping);
+ flags & ALT, xdigs);
} else {
if (ulval != 0 || prec != 0 ||
(flags & ALT && base == 8))
cp = __ultoa(ulval, cp, base,
- flags & ALT, xdigs,
- flags & GROUPING, thousands_sep,
- grouping);
+ flags & ALT, xdigs);
}
size = buf + BUF - cp;
if (size > BUF) /* should never happen */
abort();
+ if ((flags & GROUPING) && size != 0)
+ size += grouping_init(&gs, size);
break;
default: /* "%?" prints ?, unless ? is NUL */
if (ch == '\0')
@@ -911,13 +958,19 @@ number: if ((dprec = prec) >= 0)
if ((flags & (LADJUST|ZEROPAD)) == ZEROPAD)
PAD(width - realsz, zeroes);
- /* leading zeroes from decimal precision */
- PAD(dprec - size, zeroes);
-
/* the string or number proper */
#ifndef NO_FLOATING_POINT
if ((flags & FPT) == 0) {
- PRINT(cp, size);
+#endif
+ /* leading zeroes from decimal precision */
+ PAD(dprec - size, zeroes);
+ if (gs.grouping) {
+ if (grouping_print(&gs, &io, cp, buf+BUF) < 0)
+ goto error;
+ } else {
+ PRINT(cp, size);
+ }
+#ifndef NO_FLOATING_POINT
} else { /* glue together f_p fragments */
if (!expchar) { /* %[fF] or sufficiently short %[gG] */
if (expt <= 0) {
@@ -928,24 +981,16 @@ number: if ((dprec = prec) >= 0)
/* already handled initial 0's */
prec += expt;
} else {
- PRINTANDPAD(cp, dtoaend, lead, zeroes);
- cp += lead;
- if (grouping) {
- while (nseps>0 || nrepeats>0) {
- if (nrepeats > 0)
- nrepeats--;
- else {
- grouping--;
- nseps--;
- }
- PRINT(&thousands_sep,
- 1);
- PRINTANDPAD(cp,dtoaend,
- *grouping, zeroes);
- cp += *grouping;
- }
- if (cp > dtoaend)
- cp = dtoaend;
+ if (gs.grouping) {
+ n = grouping_print(&gs, &io,
+ cp, dtoaend);
+ if (n < 0)
+ goto error;
+ cp += n;
+ } else {
+ PRINTANDPAD(cp, dtoaend,
+ expt, zeroes);
+ cp += expt;
}
if (prec || flags & ALT)
PRINT(decimal_point,decpt_len);
@@ -962,8 +1007,6 @@ number: if ((dprec = prec) >= 0)
PRINT(expstr, expsize);
}
}
-#else
- PRINT(cp, size);
#endif
/* left-adjusting padding (always blank) */
if (flags & LADJUST)
diff --git a/lib/libc/stdio/vfwprintf.c b/lib/libc/stdio/vfwprintf.c
index 28a87f1..054299a 100644
--- a/lib/libc/stdio/vfwprintf.c
+++ b/lib/libc/stdio/vfwprintf.c
@@ -74,6 +74,14 @@ static wchar_t *__mbsconv(char *, int);
#define CHAR wchar_t
#include "printfcommon.h"
+struct grouping_state {
+ wchar_t thousands_sep; /* locale-specific thousands separator */
+ const char *grouping; /* locale-specific numeric grouping rules */
+ int lead; /* sig figs before decimal or group sep */
+ int nseps; /* number of group separators with ' */
+ int nrepeats; /* number of repeats of the last group */
+};
+
static const mbstate_t initial_mbs;
static inline wchar_t
@@ -90,6 +98,79 @@ get_decpt(void)
return (decpt);
}
+static inline wchar_t
+get_thousep(void)
+{
+ mbstate_t mbs;
+ wchar_t thousep;
+ int nconv;
+
+ mbs = initial_mbs;
+ nconv = mbrtowc(&thousep, localeconv()->thousands_sep,
+ MB_CUR_MAX, &mbs);
+ if (nconv == (size_t)-1 || nconv == (size_t)-2)
+ thousep = '\0'; /* failsafe */
+ return (thousep);
+}
+
+/*
+ * Initialize the thousands' grouping state in preparation to print a
+ * number with ndigits digits. This routine returns the total number
+ * of wide characters that will be printed.
+ */
+static int
+grouping_init(struct grouping_state *gs, int ndigits)
+{
+
+ gs->grouping = localeconv()->grouping;
+ gs->thousands_sep = get_thousep();
+
+ gs->nseps = gs->nrepeats = 0;
+ gs->lead = ndigits;
+ while (*gs->grouping != CHAR_MAX) {
+ if (gs->lead <= *gs->grouping)
+ break;
+ gs->lead -= *gs->grouping;
+ if (*(gs->grouping+1)) {
+ gs->nseps++;
+ gs->grouping++;
+ } else
+ gs->nrepeats++;
+ }
+ return (gs->nseps + gs->nrepeats);
+}
+
+/*
+ * Print a number with thousands' separators.
+ */
+static int
+grouping_print(struct grouping_state *gs, struct io_state *iop,
+ const CHAR *cp, const CHAR *ep)
+{
+ const CHAR *cp0 = cp;
+
+ if (io_printandpad(iop, cp, ep, gs->lead, zeroes))
+ return (-1);
+ cp += gs->lead;
+ while (gs->nseps > 0 || gs->nrepeats > 0) {
+ if (gs->nrepeats > 0)
+ gs->nrepeats--;
+ else {
+ gs->grouping--;
+ gs->nseps--;
+ }
+ if (io_print(iop, &gs->thousands_sep, 1))
+ return (-1);
+ if (io_printandpad(iop, cp, ep, *gs->grouping, zeroes))
+ return (-1);
+ cp += *gs->grouping;
+ }
+ if (cp > ep)
+ cp = ep;
+ return (cp - cp0);
+}
+
+
/*
* Flush out all the vectors defined by the given uio,
* then reset it so that it can be reused.
@@ -280,12 +361,14 @@ vfwprintf(FILE * __restrict fp, const wchar_t * __restrict fmt0, va_list ap)
/*
* The size of the buffer we use as scratch space for integer
- * conversions, among other things. Technically, we would need the
- * most space for base 10 conversions with thousands' grouping
- * characters between each pair of digits. 100 bytes is a
- * conservative overestimate even for a 128-bit uintmax_t.
+ * conversions, among other things. We need enough space to
+ * write a uintmax_t in octal (plus one byte).
*/
-#define BUF 100
+#if UINTMAX_MAX <= UINT64_MAX
+#define BUF 32
+#else
+#error "BUF must be large enough to format a uintmax_t"
+#endif
/*
* Non-MT-safe version
@@ -302,8 +385,7 @@ __vfwprintf(FILE *fp, const wchar_t *fmt0, va_list ap)
int width; /* width from format (%8d), or 0 */
int prec; /* precision from format; <0 for N/A */
wchar_t sign; /* sign prefix (' ', '+', '-', or \0) */
- wchar_t thousands_sep; /* locale specific thousands separator */
- const char *grouping; /* locale specific numeric grouping rules */
+ struct grouping_state gs; /* thousands' grouping info */
#ifndef NO_FLOATING_POINT
/*
* We can decompose the printed representation of floating
@@ -329,12 +411,9 @@ __vfwprintf(FILE *fp, const wchar_t *fmt0, va_list ap)
char expchar; /* exponent character: [eEpP\0] */
char *dtoaend; /* pointer to end of converted digits */
int expsize; /* character count for expstr */
- int lead; /* sig figs before decimal or group sep */
int ndig; /* actual number of digits returned by dtoa */
wchar_t expstr[MAXEXPDIG+2]; /* buffer for exponent string: e+ZZZ */
char *dtoaresult; /* buffer allocated by dtoa */
- int nseps; /* number of group separators with ' */
- int nrepeats; /* number of repeats of the last group */
#endif
u_long ulval; /* integer arguments %[diouxX] */
uintmax_t ujval; /* %j, %ll, %q, %t, %z integers */
@@ -442,8 +521,6 @@ __vfwprintf(FILE *fp, const wchar_t *fmt0, va_list ap)
if (prepwrite(fp) != 0)
return (EOF);
- thousands_sep = '\0';
- grouping = NULL;
convbuf = NULL;
fmt = (wchar_t *)fmt0;
argtable = NULL;
@@ -477,6 +554,7 @@ __vfwprintf(FILE *fp, const wchar_t *fmt0, va_list ap)
dprec = 0;
width = 0;
prec = -1;
+ gs.grouping = NULL;
sign = '\0';
ox[1] = '\0';
@@ -514,8 +592,6 @@ reswitch: switch (ch) {
goto rflag;
case '\'':
flags |= GROUPING;
- thousands_sep = *(localeconv()->thousands_sep);
- grouping = localeconv()->grouping;
goto rflag;
case '.':
if ((ch = *fmt++) == '*') {
@@ -739,23 +815,8 @@ fp_common:
/* space for decimal pt and following digits */
if (prec || flags & ALT)
size += prec + 1;
- if (grouping && expt > 0) {
- /* space for thousands' grouping */
- nseps = nrepeats = 0;
- lead = expt;
- while (*grouping != CHAR_MAX) {
- if (lead <= *grouping)
- break;
- lead -= *grouping;
- if (*(grouping+1)) {
- nseps++;
- grouping++;
- } else
- nrepeats++;
- }
- size += nseps + nrepeats;
- } else
- lead = expt;
+ if ((flags & GROUPING) && expt > 0)
+ size += grouping_init(&gs, expt);
}
break;
#endif /* !NO_FLOATING_POINT */
@@ -899,20 +960,18 @@ number: if ((dprec = prec) >= 0)
if (ujval != 0 || prec != 0 ||
(flags & ALT && base == 8))
cp = __ujtoa(ujval, cp, base,
- flags & ALT, xdigs,
- flags & GROUPING, thousands_sep,
- grouping);
+ flags & ALT, xdigs);
} else {
if (ulval != 0 || prec != 0 ||
(flags & ALT && base == 8))
cp = __ultoa(ulval, cp, base,
- flags & ALT, xdigs,
- flags & GROUPING, thousands_sep,
- grouping);
+ flags & ALT, xdigs);
}
size = buf + BUF - cp;
if (size > BUF) /* should never happen */
abort();
+ if ((flags & GROUPING) && size != 0)
+ size += grouping_init(&gs, size);
break;
default: /* "%?" prints ?, unless ? is NUL */
if (ch == '\0')
@@ -968,13 +1027,19 @@ number: if ((dprec = prec) >= 0)
if ((flags & (LADJUST|ZEROPAD)) == ZEROPAD)
PAD(width - realsz, zeroes);
- /* leading zeroes from decimal precision */
- PAD(dprec - size, zeroes);
-
/* the string or number proper */
#ifndef NO_FLOATING_POINT
if ((flags & FPT) == 0) {
- PRINT(cp, size);
+#endif
+ /* leading zeroes from decimal precision */
+ PAD(dprec - size, zeroes);
+ if (gs.grouping) {
+ if (grouping_print(&gs, &io, cp, buf+BUF) < 0)
+ goto error;
+ } else {
+ PRINT(cp, size);
+ }
+#ifndef NO_FLOATING_POINT
} else { /* glue together f_p fragments */
if (!expchar) { /* %[fF] or sufficiently short %[gG] */
if (expt <= 0) {
@@ -985,25 +1050,16 @@ number: if ((dprec = prec) >= 0)
/* already handled initial 0's */
prec += expt;
} else {
- PRINTANDPAD(cp, convbuf + ndig, lead, zeroes);
- cp += lead;
- if (grouping) {
- while (nseps>0 || nrepeats>0) {
- if (nrepeats > 0)
- nrepeats--;
- else {
- grouping--;
- nseps--;
- }
- PRINT(&thousands_sep,
- 1);
- PRINTANDPAD(cp,
- convbuf + ndig,
- *grouping, zeroes);
- cp += *grouping;
- }
- if (cp > convbuf + ndig)
- cp = convbuf + ndig;
+ if (gs.grouping) {
+ n = grouping_print(&gs, &io,
+ cp, convbuf + ndig);
+ if (n < 0)
+ goto error;
+ cp += n;
+ } else {
+ PRINTANDPAD(cp, convbuf + ndig,
+ expt, zeroes);
+ cp += expt;
}
if (prec || flags & ALT)
PRINT(&decimal_point, 1);
@@ -1021,8 +1077,6 @@ number: if ((dprec = prec) >= 0)
PRINT(expstr, expsize);
}
}
-#else
- PRINT(cp, size);
#endif
/* left-adjusting padding (always blank) */
if (flags & LADJUST)
OpenPOWER on IntegriCloud