shithub: trueawk

--- a/awk.1

+++ b/awk.1

@@ -577,3 +577,56 @@

 the syntax is worse.

.PP

 Only eight-bit characters sets are handled correctly.

+.SH UNUSUAL FLOATING-POINT VALUES

+.I Awk

+was designed before IEEE 754 arithmetic defined Not-A-Number (NaN)

+and Infinity values, which are supported by all modern floating-point

+hardware.

+.PP

+Because

+.I awk

+uses

+.IR strtod (3)

+and

+.IR atof (3)

+to convert string values to double-precision floating-point values,

+modern C libraries also convert strings starting with

+.B inf

+and

+.B nan

+into infinity and NaN values respectively.  This led to strange results,

+with something like this:

+.PP

+.EX

+.nf

+echo nancy | awk '{ print $1 + 0 }'

+.fi

+.EE

+.PP

+printing

+.B nan

+instead of zero.

+.PP

+.I Awk

+now follows GNU AWK, and prefilters string values before attempting

+to convert them to numbers, as follows:

+.TP

+.I "Hexadecimal values"

+Hexadecimal values (allowed since C99) convert to zero, as they did

+prior to C99.

+.TP

+.I "NaN values"

+The two strings

+.B +nan

+and

+.B \-nan

+(case independent) convert to NaN. No others do.

+(NaNs can have signs.)

+.TP

+.I "Infinity values"

+The two strings

+.B +inf

+and

+.B \-inf

+(case independent) convert to positive and negative infinity, respectively.

+No others do.

--- a/lex.c

+++ b/lex.c

@@ -191,7 +191,12 @@

 			return word(buf);

 		if (isdigit(c)) {

 			char *cp = tostring(buf);

-			yylval.cp = setsymtab(buf, cp, atof(buf), CON|NUM, symtab);

+			double result;

+			if (is_number(cp, & result))

+				yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);

+			else

+				yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);

 			free(cp);

 			/* should this also have STR set? */

 			RET(NUMBER);

--- a/lib.c

+++ b/lib.c

@@ -30,6 +30,7 @@

 #include <stdlib.h>

 #include <stdarg.h>

 #include <limits.h>

+#include <math.h>

 #include "awk.h"

 char	EMPTY[] = { '\0' };

@@ -181,12 +182,14 @@

 			innew = false;

 		if (c != 0 || buf[0] != '\0') {	/* normal record */

 			if (isrecord) {

+				double result;

 				if (freeable(fldtab[0]))

 					xfree(fldtab[0]->sval);

 				fldtab[0]->sval = buf;	/* buf == record */

 				fldtab[0]->tval = REC | STR | DONTFREE;

-				if (is_number(fldtab[0]->sval)) {

-					fldtab[0]->fval = atof(fldtab[0]->sval);

+				if (is_number(fldtab[0]->sval, & result)) {

+					fldtab[0]->fval = result;

 					fldtab[0]->tval |= NUM;

@@ -293,6 +296,7 @@

 	char *p;

 	Cell *q;

+	double result;

 	for (p=s; *p != '='; p++)

@@ -300,8 +304,8 @@

 	p = qstring(p, '\0');

 	q = setsymtab(s, p, 0.0, STR, symtab);

 	setsval(q, p);

-	if (is_number(q->sval)) {

-		q->fval = atof(q->sval);

+	if (is_number(q->sval, & result)) {

+		q->fval = result;

 		q->tval |= NUM;

 	DPRINTF("command line set %s to |%s|\n", s, p);

@@ -402,9 +406,11 @@

 	lastfld = i;

 	donefld = true;

 	for (j = 1; j <= lastfld; j++) {

+		double result;

 		p = fldtab[j];

-		if(is_number(p->sval)) {

-			p->fval = atof(p->sval);

+		if(is_number(p->sval, & result)) {

+			p->fval = result;

 			p->tval |= NUM;

@@ -756,24 +762,67 @@

 /* strtod is supposed to be a proper test of what's a valid number */

 /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */

 /* wrong: violates 4.10.1.4 of ansi C standard */

 /* well, not quite. As of C99, hex floating point is allowed. so this is

- * a bit of a mess.

+ * a bit of a mess. We work around the mess by checking for a hexadecimal

+ * value and disallowing it. Similarly, we now follow gawk and allow only

+ * +nan, -nan, +inf, and -inf for NaN and infinity values.

*/

-#include <math.h>

-int is_number(const char *s)

+/*

+ * This routine now has a more complicated interface, the main point

+ * being to avoid the double conversion of a string to double, and

+ * also to convey out, if requested, the information that the numeric

+ * value was a leading string or is all of the string. The latter bit

+ * is used in getfval().

+ */

+bool is_valid_number(const char *s, bool trailing_stuff_ok,

+			bool *no_trailing, double *result)

 	double r;

 	char *ep;

+	bool retval = false;

+	if (no_trailing)

+		*no_trailing = false;

+	while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')

+		s++;

+	if (s[0] == '0' && tolower(s[1]) == 'x')	// no hex floating point, sorry

+		return false;

+	// allow +nan, -nan, +inf, -inf, any other letter, no

+	if (s[0] == '+' || s[0] == '-') {

+		if (strcasecmp(s+1, "nan") == 0 || strcasecmp(s+1, "inf") == 0)

+			return true;

+		else if (! isdigit(s[1]) && s[1] != '.')

+			return false;

+	}

+	else if (! isdigit(s[0]) && s[0] != '.')

+		return false;

 	errno = 0;

 	r = strtod(s, &ep);

 	if (ep == s || r == HUGE_VAL || errno == ERANGE)

-		return 0;

-	/* allow \r as well. windows files aren't going to go away. */

+		return false;

+	if (result != NULL)

+		*result = r;

+	/*

+	 * check for trailing stuff

+	 * allow \r as well. windows files aren't going to go away.

+	 */

 	while (*ep == ' ' || *ep == '\t' || *ep == '\n' || *ep == '\r')

 		ep++;

-	if (*ep == '\0')

-		return 1;

-	else

-		return 0;

+	if (no_trailing)

+		*no_trailing = (*ep == '\0');

+	// return true if found the end, or trailing stuff is allowed

+	retval = (*ep == '\0') || trailing_stuff_ok;

+	return retval;

--- a/proto.h

+++ b/proto.h

@@ -146,7 +146,9 @@

 extern	void	bclass(int);

 extern	double	errcheck(double, const char *);

 extern	int	isclvar(const char *);

-extern	int	is_number(const char *);

+extern	bool	is_valid_number(const char *s, bool trailing_stuff_ok,

+				bool *no_trailing, double *result);

+#define is_number(s, val)	is_valid_number(s, false, NULL, val)

 extern	int	adjbuf(char **pb, int *sz, int min, int q, char **pbp, const char *what);

 extern	void	run(Node *);

--- a/run.c

+++ b/run.c

@@ -407,6 +407,7 @@

 	int bufsize = recsize;

 	int mode;

 	bool newflag;

+	double result;

 	if ((buf = (char *) malloc(bufsize)) == NULL)

 		FATAL("out of memory in getline");

@@ -429,15 +430,15 @@

 		} else if (a[0] != NULL) {	/* getline var <file */

 			x = execute(a[0]);

 			setsval(x, buf);

-			if (is_number(x->sval)) {

-				x->fval = atof(x->sval);

+			if (is_number(x->sval, & result)) {

+				x->fval = result;

 				x->tval |= NUM;

 			tempfree(x);

 		} else {			/* getline <file */

 			setsval(fldtab[0], buf);

-			if (is_number(fldtab[0]->sval)) {

-				fldtab[0]->fval = atof(fldtab[0]->sval);

+			if (is_number(fldtab[0]->sval, & result)) {

+				fldtab[0]->fval = result;

 				fldtab[0]->tval |= NUM;

@@ -448,8 +449,8 @@

 			n = getrec(&buf, &bufsize, false);

 			x = execute(a[0]);

 			setsval(x, buf);

-			if (is_number(x->sval)) {

-				x->fval = atof(x->sval);

+			if (is_number(x->sval, & result)) {

+				x->fval = result;

 				x->tval |= NUM;

 			tempfree(x);

@@ -726,7 +727,7 @@

 	if ((Awkfloat)INT_MAX < val)

 		FATAL("trying to access out of range field %s", x->nval);

 	m = (int) val;

-	if (m == 0 && !is_number(s = getsval(x)))	/* suspicion! */

+	if (m == 0 && !is_number(s = getsval(x), NULL))	/* suspicion! */

 		FATAL("illegal field $(%s), name \"%s\"", s, x->nval);

 		/* BUG: can x->nval ever be null??? */

 	tempfree(x);

@@ -1259,6 +1260,7 @@

 	int sep;

 	char temp, num[50];

 	int n, tempstat, arg3type;

+	double result;

 	y = execute(a[0]);	/* source string */

 	origs = s = strdup(getsval(y));

@@ -1303,8 +1305,8 @@

 				snprintf(num, sizeof(num), "%d", n);

 				temp = *patbeg;

 				setptr(patbeg, '\0');

-				if (is_number(s))

-					setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);

+				if (is_number(s, & result))

+					setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);

 				else

 					setsymtab(num, s, 0.0, STR, (Array *) ap->sval);

 				setptr(patbeg, temp);

@@ -1322,8 +1324,8 @@

 		n++;

 		snprintf(num, sizeof(num), "%d", n);

-		if (is_number(s))

-			setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);

+		if (is_number(s, & result))

+			setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);

 		else

 			setsymtab(num, s, 0.0, STR, (Array *) ap->sval);

   spdone:

@@ -1343,8 +1345,8 @@

 			temp = *s;

 			setptr(s, '\0');

 			snprintf(num, sizeof(num), "%d", n);

-			if (is_number(t))

-				setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);

+			if (is_number(t, & result))

+				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);

 			else

 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);

 			setptr(s, temp);

@@ -1372,8 +1374,8 @@

 			temp = *s;

 			setptr(s, '\0');

 			snprintf(num, sizeof(num), "%d", n);

-			if (is_number(t))

-				setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);

+			if (is_number(t, & result))

+				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);

 			else

 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);

 			setptr(s, temp);

--- a/tran.c

+++ b/tran.c

@@ -129,9 +129,11 @@

 	free(cp->sval);

 	cp->sval = (char *) ARGVtab;

 	for (i = 0; i < ac; i++) {

+		double result;

 		sprintf(temp, "%d", i);

-		if (is_number(*av))

-			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);

+		if (is_number(*av, & result))

+			setsymtab(temp, *av, result, STR|NUM, ARGVtab);

 		else

 			setsymtab(temp, *av, 0.0, STR, ARGVtab);

 		av++;

@@ -148,13 +150,15 @@

 	free(cp->sval);

 	cp->sval = (char *) ENVtab;

 	for ( ; *envp; envp++) {

+		double result;

 		if ((p = strchr(*envp, '=')) == NULL)

 			continue;

 		if( p == *envp ) /* no left hand side name in env string */

 			continue;

 		*p++ = 0;	/* split into two strings at = */

-		if (is_number(p))

-			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);

+		if (is_number(p, & result))

+			setsymtab(*envp, p, result, STR|NUM, ENVtab);

 		else

 			setsymtab(*envp, p, 0.0, STR, ENVtab);

 		p[-1] = '=';	/* restore in case env is passed down to a shell */

@@ -399,9 +403,15 @@

 	else if (isrec(vp) && !donerec)

 		recbld();

 	if (!isnum(vp)) {	/* not a number */

-		vp->fval = atof(vp->sval);	/* best guess */

-		if (is_number(vp->sval) && !(vp->tval&CON))

-			vp->tval |= NUM;	/* make NUM only sparingly */

+		double fval;

+		bool no_trailing;

+		if (is_valid_number(vp->sval, true, & no_trailing, & fval)) {

+			vp->fval = fval;

+			if (no_trailing && !(vp->tval&CON))

+				vp->tval |= NUM;	/* make NUM only sparingly */

+		} else

+			vp->fval = 0.0;

 	DPRINTF("getfval %p: %s = %g, t=%o\n",

 		(void*)vp, NN(vp->nval), vp->fval, vp->tval);

--

⑨