diff options
author | Elliott Hughes <enh@google.com> | 2016-08-26 16:17:17 -0700 |
---|---|---|
committer | Elliott Hughes <enh@google.com> | 2016-09-07 15:01:54 -0700 |
commit | 7f0849fd113691e62af0400989936d3eff151e37 (patch) | |
tree | 166fa5afeed341e2685091c89d9d8418acb66e5d /libc/stdio/parsefloat.c | |
parent | a2b947e0a5ff4106e36f8110232131dab1356c25 (diff) |
Fix sscanf/wcstod parsing of NaNs.
The parsefloat routines -- which let us pass NaNs and infinities on to
strto(f|d|ld) -- come from NetBSD.
Also fix LP64's strtold to return a NaN, and fix all the architectures
to return quiet NaNs.
Also fix wcstof/wcstod/wcstold to use parsefloat so they support hex
floats.
Lots of new tests.
Bug: http://b/31101647
Change-Id: Id7d46ac2d8acb8770b5e8c445e87cfabfde6f111
Diffstat (limited to 'libc/stdio/parsefloat.c')
-rw-r--r-- | libc/stdio/parsefloat.c | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/libc/stdio/parsefloat.c b/libc/stdio/parsefloat.c new file mode 100644 index 000000000..e911da412 --- /dev/null +++ b/libc/stdio/parsefloat.c @@ -0,0 +1,336 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Chris Torek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <ctype.h> +#include <stdlib.h> + +#include "local.h" +#include "floatio.h" + +#define BUF 513 /* Maximum length of numeric string. */ + +size_t parsefloat(FILE *fp, char *buf, char *end) { + char *commit, *p; + int infnanpos = 0; + enum { + S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX, + S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS + } state = S_START; + unsigned char c; + int gotmantdig = 0, ishex = 0; + + /* + * We set commit = p whenever the string we have read so far + * constitutes a valid representation of a floating point + * number by itself. At some point, the parse will complete + * or fail, and we will ungetc() back to the last commit point. + * To ensure that the file offset gets updated properly, it is + * always necessary to read at least one character that doesn't + * match; thus, we can't short-circuit "infinity" or "nan(...)". + */ + commit = buf - 1; + for (p = buf; p < end; ) { + c = *fp->_p; +reswitch: + switch (state) { + case S_START: + state = S_GOTSIGN; + if (c == '-' || c == '+') + break; + else + goto reswitch; + case S_GOTSIGN: + switch (c) { + case '0': + state = S_MAYBEHEX; + commit = p; + break; + case 'I': + case 'i': + state = S_INF; + break; + case 'N': + case 'n': + state = S_NAN; + break; + default: + state = S_DIGITS; + goto reswitch; + } + break; + case S_INF: + if (infnanpos > 6 || + (c != "nfinity"[infnanpos] && + c != "NFINITY"[infnanpos])) + goto parsedone; + if (infnanpos == 1 || infnanpos == 6) + commit = p; /* inf or infinity */ + infnanpos++; + break; + case S_NAN: + switch (infnanpos) { + case -1: /* XXX kludge to deal with nan(...) */ + goto parsedone; + case 0: + if (c != 'A' && c != 'a') + goto parsedone; + break; + case 1: + if (c != 'N' && c != 'n') + goto parsedone; + else + commit = p; + break; + case 2: + if (c != '(') + goto parsedone; + break; + default: + if (c == ')') { + commit = p; + infnanpos = -2; + } else if (!isalnum(c) && c != '_') + goto parsedone; + break; + } + infnanpos++; + break; + case S_MAYBEHEX: + state = S_DIGITS; + if (c == 'X' || c == 'x') { + ishex = 1; + break; + } else { /* we saw a '0', but no 'x' */ + gotmantdig = 1; + goto reswitch; + } + case S_DIGITS: + if ((ishex && isxdigit(c)) || isdigit(c)) + gotmantdig = 1; + else { + state = S_FRAC; + if (c != '.') + goto reswitch; + } + if (gotmantdig) + commit = p; + break; + case S_FRAC: + if (((c == 'E' || c == 'e') && !ishex) || + ((c == 'P' || c == 'p') && ishex)) { + if (!gotmantdig) + goto parsedone; + else + state = S_EXP; + } else if ((ishex && isxdigit(c)) || isdigit(c)) { + commit = p; + gotmantdig = 1; + } else + goto parsedone; + break; + case S_EXP: + state = S_EXPDIGITS; + if (c == '-' || c == '+') + break; + else + goto reswitch; + case S_EXPDIGITS: + if (isdigit(c)) + commit = p; + else + goto parsedone; + break; + default: + abort(); + } + *p++ = c; + if (--fp->_r > 0) + fp->_p++; + else if (__srefill(fp)) + break; /* EOF */ + } + +parsedone: + while (commit < --p) + (void)ungetc(*(unsigned char *)p, fp); + *++commit = '\0'; + return commit - buf; +} + +size_t wparsefloat(FILE *fp, wchar_t *buf, wchar_t *end) { + wchar_t *commit, *p; + int infnanpos = 0; + enum { + S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX, + S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS + } state = S_START; + wint_t c; + int gotmantdig = 0, ishex = 0; + + /* + * We set commit = p whenever the string we have read so far + * constitutes a valid representation of a floating point + * number by itself. At some point, the parse will complete + * or fail, and we will ungetc() back to the last commit point. + * To ensure that the file offset gets updated properly, it is + * always necessary to read at least one character that doesn't + * match; thus, we can't short-circuit "infinity" or "nan(...)". + */ + commit = buf - 1; + c = WEOF; + for (p = buf; p < end; ) { + if ((c = __fgetwc_unlock(fp)) == WEOF) + break; +reswitch: + switch (state) { + case S_START: + state = S_GOTSIGN; + if (c == '-' || c == '+') + break; + else + goto reswitch; + case S_GOTSIGN: + switch (c) { + case '0': + state = S_MAYBEHEX; + commit = p; + break; + case 'I': + case 'i': + state = S_INF; + break; + case 'N': + case 'n': + state = S_NAN; + break; + default: + state = S_DIGITS; + goto reswitch; + } + break; + case S_INF: + if (infnanpos > 6 || + (c != (wint_t)"nfinity"[infnanpos] && + c != (wint_t)"NFINITY"[infnanpos])) + goto parsedone; + if (infnanpos == 1 || infnanpos == 6) + commit = p; /* inf or infinity */ + infnanpos++; + break; + case S_NAN: + switch (infnanpos) { + case -1: /* XXX kludge to deal with nan(...) */ + goto parsedone; + case 0: + if (c != 'A' && c != 'a') + goto parsedone; + break; + case 1: + if (c != 'N' && c != 'n') + goto parsedone; + else + commit = p; + break; + case 2: + if (c != '(') + goto parsedone; + break; + default: + if (c == ')') { + commit = p; + infnanpos = -2; + } else if (!iswalnum(c) && c != '_') + goto parsedone; + break; + } + infnanpos++; + break; + case S_MAYBEHEX: + state = S_DIGITS; + if (c == 'X' || c == 'x') { + ishex = 1; + break; + } else { /* we saw a '0', but no 'x' */ + gotmantdig = 1; + goto reswitch; + } + case S_DIGITS: + if ((ishex && iswxdigit(c)) || iswdigit(c)) + gotmantdig = 1; + else { + state = S_FRAC; + if (c != L'.') + goto reswitch; + } + if (gotmantdig) + commit = p; + break; + case S_FRAC: + if (((c == 'E' || c == 'e') && !ishex) || + ((c == 'P' || c == 'p') && ishex)) { + if (!gotmantdig) + goto parsedone; + else + state = S_EXP; + } else if ((ishex && iswxdigit(c)) || iswdigit(c)) { + commit = p; + gotmantdig = 1; + } else + goto parsedone; + break; + case S_EXP: + state = S_EXPDIGITS; + if (c == '-' || c == '+') + break; + else + goto reswitch; + case S_EXPDIGITS: + if (iswdigit(c)) + commit = p; + else + goto parsedone; + break; + default: + abort(); + } + *p++ = c; + c = WEOF; + } + +parsedone: + if (c != WEOF) + ungetwc(c, fp); + while (commit < --p) + ungetwc(*p, fp); + *++commit = '\0'; + return (int)(commit - buf); +} |