From 395c7a4e3ae24f959f8751122defb9c00a1abb45 Mon Sep 17 00:00:00 2001
From: das <das@FreeBSD.org>
Date: Sat, 28 Jun 2003 09:03:05 +0000
Subject: Revamp scanf's floating-point-parsing algorithm to support [+|-]Inf,
 [+|-]NaN, nan(...), and hexidecimal FP constants. While here, add %a and %A,
 which are aliases for %e, and add support for long doubles.

Reviewed by:	standards@
---
 lib/libc/stdio/vfscanf.c | 266 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 172 insertions(+), 94 deletions(-)

diff --git a/lib/libc/stdio/vfscanf.c b/lib/libc/stdio/vfscanf.c
index b447923..b6f7571 100644
--- a/lib/libc/stdio/vfscanf.c
+++ b/lib/libc/stdio/vfscanf.c
@@ -81,16 +81,11 @@ __FBSDID("$FreeBSD$");
 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
 
 /*
- * The following are used in numeric conversions only:
- * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
- * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
+ * The following are used in integral conversions only:
+ * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
  */
 #define	SIGNOK		0x40	/* +/- is (still) legal */
 #define	NDIGITS		0x80	/* no digits detected */
-
-#define	DPTOK		0x100	/* (float) decimal point is still legal */
-#define	EXPOK		0x200	/* (float) exponent (e+3, etc) still legal */
-
 #define	PFXOK		0x100	/* 0x prefix is (still) legal */
 #define	NZDIGITS	0x200	/* no zero digits detected */
 
@@ -104,6 +99,9 @@ __FBSDID("$FreeBSD$");
 #define	CT_FLOAT	4	/* %[efgEFG] conversion */
 
 static const u_char *__sccl(char *, const u_char *);
+static int parsefloat(FILE *, char *, char *);
+
+int __scanfdebug = 0;
 
 __weak_reference(__vfscanf, vfscanf);
 
@@ -148,9 +146,6 @@ __svfscanf(FILE *fp, const char *fmt0, va_list ap)
 	/* `basefix' is used to avoid `if' tests in the integer scanner */
 	static short basefix[17] =
 		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
-#ifdef FLOATING_POINT
-	char decimal_point = localeconv()->decimal_point[0];
-#endif
 
 	ORIENT(fp, -1);
 
@@ -258,8 +253,8 @@ literal:
 			break;
 
 #ifdef FLOATING_POINT
-		case 'E': case 'F': case 'G':
-		case 'e': case 'f': case 'g':
+		case 'A': case 'E': case 'F': case 'G':
+		case 'a': case 'e': case 'f': case 'g':
 			c = CT_FLOAT;
 			break;
 #endif
@@ -769,96 +764,26 @@ literal:
 #ifdef FLOATING_POINT
 		case CT_FLOAT:
 			/* scan a floating point number as if by strtod */
-#ifdef hardway
 			if (width == 0 || width > sizeof(buf) - 1)
 				width = sizeof(buf) - 1;
-#else
-			/* size_t is unsigned, hence this optimisation */
-			if (--width > sizeof(buf) - 2)
-				width = sizeof(buf) - 2;
-			width++;
-#endif
-			flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
-			for (p = buf; width; width--) {
-				c = *fp->_p;
-				/*
-				 * This code mimicks the integer conversion
-				 * code, but is much simpler.
-				 */
-				switch (c) {
-
-				case '0': case '1': case '2': case '3':
-				case '4': case '5': case '6': case '7':
-				case '8': case '9':
-					flags &= ~(SIGNOK | NDIGITS);
-					goto fok;
-
-				case '+': case '-':
-					if (flags & SIGNOK) {
-						flags &= ~SIGNOK;
-						goto fok;
-					}
-					break;
-				case 'e': case 'E':
-					/* no exponent without some digits */
-					if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
-						flags =
-						    (flags & ~(EXPOK|DPTOK)) |
-						    SIGNOK | NDIGITS;
-						goto fok;
-					}
-					break;
-				default:
-					if ((char)c == decimal_point &&
-					    (flags & DPTOK)) {
-						flags &= ~(SIGNOK | DPTOK);
-						goto fok;
-					}
-					break;
-				}
-				break;
-		fok:
-				*p++ = c;
-				if (--fp->_r > 0)
-					fp->_p++;
-				else if (__srefill(fp))
-					break;	/* EOF */
-			}
-			/*
-			 * If no digits, might be missing exponent digits
-			 * (just give back the exponent) or might be missing
-			 * regular digits, but had sign and/or decimal point.
-			 */
-			if (flags & NDIGITS) {
-				if (flags & EXPOK) {
-					/* no digits at all */
-					while (p > buf)
-						__ungetc(*(u_char *)--p, fp);
-					goto match_failure;
-				}
-				/* just a bad exponent (e and maybe sign) */
-				c = *(u_char *)--p;
-				if (c != 'e' && c != 'E') {
-					(void) __ungetc(c, fp);/* sign */
-					c = *(u_char *)--p;
-				}
-				(void) __ungetc(c, fp);
-			}
+			if ((width = parsefloat(fp, buf, buf + width)) == 0)
+				goto match_failure;
 			if ((flags & SUPPRESS) == 0) {
-				double res;
-
-				*p = 0;
-				/* XXX this loses precision for long doubles. */
-				res = strtod(buf, (char **) NULL);
-				if (flags & LONGDBL)
+				if (flags & LONGDBL) {
+					long double res = strtold(buf, &p);
 					*va_arg(ap, long double *) = res;
-				else if (flags & LONG)
+				} else if (flags & LONG) {
+					double res = strtod(buf, &p);
 					*va_arg(ap, double *) = res;
-				else
+				} else {
+					float res = strtof(buf, &p);
 					*va_arg(ap, float *) = res;
+				}
+				if (__scanfdebug && p - buf != width)
+					abort();
 				nassigned++;
 			}
-			nread += p - buf;
+			nread += width;
 			nconversions++;
 			break;
 #endif /* FLOATING_POINT */
@@ -982,3 +907,156 @@ doswitch:
 	}
 	/* NOTREACHED */
 }
+
+#ifdef FLOATING_POINT
+static int
+parsefloat(FILE *fp, char *buf, char *end)
+{
+	char *commit, *p;
+	int infnanpos = 0;
+	enum {
+		S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
+		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
+	} state = S_START;
+	unsigned char c;
+	char decpt = *localeconv()->decimal_point;
+	_Bool gotmantdig = 0, ishex = 0;
+
+	/*
+	 * We set commit = p whenever the string we have read so far
+	 * constitutes a valid representation of a floating point
+	 * number by itself.  At some point, the parse will complete
+	 * or fail, and we will ungetc() back to the last commit point.
+	 * To ensure that the file offset gets updated properly, it is
+	 * always necessary to read at least one character that doesn't
+	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
+	 */
+	commit = buf - 1;
+	for (p = buf; p < end; ) {
+		c = *fp->_p;
+reswitch:
+		switch (state) {
+		case S_START:
+			state = S_GOTSIGN;
+			if (c == '-' || c == '+')
+				break;
+			else
+				goto reswitch;
+		case S_GOTSIGN:
+			switch (c) {
+			case '0':
+				state = S_MAYBEHEX;
+				commit = p;
+				break;
+			case 'I':
+			case 'i':
+				state = S_INF;
+				break;
+			case 'N':
+			case 'n':
+				state = S_NAN;
+				break;
+			default:
+				state = S_DIGITS;
+				goto reswitch;
+			}
+			break;
+		case S_INF:
+			if (infnanpos > 6 ||
+			    (c != "nfinity"[infnanpos] &&
+			     c != "NFINITY"[infnanpos]))
+				goto parsedone;
+			if (infnanpos == 1 || infnanpos == 6)
+				commit = p;	/* inf or infinity */
+			infnanpos++;
+			break;
+		case S_NAN:
+			switch (infnanpos) {
+			case -1:	/* XXX kludge to deal with nan(...) */
+				goto parsedone;
+			case 0:
+				if (c != 'A' && c != 'a')
+					goto parsedone;
+				break;
+			case 1:
+				if (c != 'N' && c != 'n')
+					goto parsedone;
+				else
+					commit = p;
+				break;
+			case 2:
+				if (c != '(')
+					goto parsedone;
+				break;
+			default:
+				if (c == ')') {
+					commit = p;
+					infnanpos = -2;
+				} else if (!isalnum(c) && c != '_')
+					goto parsedone;
+				break;
+			}
+			infnanpos++;
+			break;
+		case S_MAYBEHEX:
+			state = S_DIGITS;
+			if (c == 'X' || c == 'x') {
+				ishex = 1;
+				break;
+			} else {	/* we saw a '0', but no 'x' */
+				gotmantdig = 1;
+				goto reswitch;
+			}
+		case S_DIGITS:
+			if (ishex && isxdigit(c) || isdigit(c))
+				gotmantdig = 1;
+			else {
+				state = S_FRAC;
+				if (c != decpt)
+					goto reswitch;
+			}
+			if (gotmantdig)
+				commit = p;
+			break;
+		case S_FRAC:
+			if ((c == 'E' || c == 'e') && !ishex ||
+			    (c == 'P' || c == 'p') && ishex) {
+				if (!gotmantdig)
+					goto parsedone;
+				else
+					state = S_EXP;
+			} else if (ishex && isxdigit(c) || isdigit(c)) {
+				commit = p;
+				gotmantdig = 1;
+			} else
+				goto parsedone;
+			break;
+		case S_EXP:
+			state = S_EXPDIGITS;
+			if (c == '-' || c == '+')
+				break;
+			else
+				goto reswitch;
+		case S_EXPDIGITS:
+			if (isdigit(c))
+				commit = p;
+			else
+				goto parsedone;
+			break;
+		default:
+			abort();
+		}
+		*p++ = c;
+		if (--fp->_r > 0)
+			fp->_p++;
+		else if (__srefill(fp))
+			break;	/* EOF */
+	}
+
+parsedone:
+	while (commit < --p)
+		__ungetc(*(u_char *)p, fp);
+	*++commit = '\0';
+	return (commit - buf);
+}
+#endif
-- 
cgit v1.1