shithub: trueawk

Download patch

ref: 79f008e85365b696374d70c9933498990aa248bc
parent: 4f4701e0903d14dcbae8761ad4d04ecb35a5a614
parent: c154c3cb82d1799151917bb97f917fabc90a8d07
author: onetrueawk <bwkster@gmail.com>
date: Mon Jan 21 09:20:28 EST 2019

Merge branch 'master' into nf-self-assign

--- a/awk.h
+++ b/awk.h
@@ -97,9 +97,14 @@
 
 extern Cell	*nrloc;		/* NR */
 extern Cell	*fnrloc;	/* FNR */
+extern Cell	*fsloc;		/* FS */
 extern Cell	*nfloc;		/* NF */
+extern Cell	*ofsloc;	/* OFS */
+extern Cell	*orsloc;	/* ORS */
+extern Cell	*rsloc;		/* RS */
 extern Cell	*rstartloc;	/* RSTART */
 extern Cell	*rlengthloc;	/* RLENGTH */
+extern Cell	*subseploc;	/* SUBSEP */
 
 /* Cell.tval values: */
 #define	NUM	01	/* number value is valid */
--- a/b.c
+++ b/b.c
@@ -823,7 +823,15 @@
 				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
 				    prestr[2 + cc->cc_namelen] == ']') {
 					prestr += cc->cc_namelen + 3;
-					for (i = 0; i < NCHARS; i++) {
+					/*
+					 * BUG: We begin at 1, instead of 0, since we
+					 * would otherwise prematurely terminate the
+					 * string for classes like [[:cntrl:]]. This
+					 * means that we can't match the NUL character,
+					 * not without first adapting the entire
+					 * program to track each string's length.
+					 */
+					for (i = 1; i < NCHARS; i++) {
 						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
 						    FATAL("out of space for reg expr %.10s...", lastre);
 						if (cc->cc_func(i)) {
--- a/bugs-fixed/README
+++ b/bugs-fixed/README
@@ -24,14 +24,31 @@
 7. unary-plus: Unary plus on a string constant returned the string.
 Instead, it should convert the value to numeric and give that value.
 
-8. missing-precision: When using the format string "%*s", the precision
+8. concat-assign-same: Concatenation previously evaluated both sides of the
+expression before doing its work, which, since assign() evaluates to the cell
+being assigned to, meant that expressions like "print (a = 1) (a = 2)" would
+print "22" rather than "12".
+
+9. missing-precision: When using the format string "%*s", the precision
 argument was used without checking if it was present first.
 
-9. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written
+10. missing-precision: When using the format string "%*s", the precision
+argument was used without checking if it was present first.
+
+11. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written
 to with sprintf(), which meant that some conversions could write past the
 end.
 
-X. nf-self-assign: "NF = NF" wouldn't force the record to be rebuilt.
+12. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP,
+FS, RS, OFS, or ORS were set to a numeric value, then their string values
+wouldn't always be generated before being needed.
 
-X. negative-nf: Setting NF to a negative value caused a segmentation fault.
+13. subsep-overflow: The length of SUBSEP needs to be rechecked after
+calling execute(), in case SUBSEP itself has been changed.
+
+14. split-fs-from-array: If the third argument to split() comes from the
+array passed as the second argument, then split() would previously read
+from the freed memory and possibly produce incorrect results (depending
+on the system's malloc()/free() behaviour.)
+
 
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.awk
@@ -1,0 +1,4 @@
+BEGIN {
+    print (a = 1) (a = 2) (a = 3) (a = 4) (a = 5);
+    print (a = 1), (a = 2), (a = 3), (a = 4), (a = 5);
+}
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.bad
@@ -1,0 +1,2 @@
+22345
+1 2 3 4 5
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.ok
@@ -1,0 +1,2 @@
+12345
+1 2 3 4 5
--- /dev/null
+++ b/bugs-fixed/fs-overflow.awk
@@ -1,0 +1,13 @@
+function foo() {
+    a = "";
+    for (i = 0; i < 10000; i++) {
+        a = a "c";
+    }
+    return a;
+}
+
+BEGIN {
+    FS = foo();
+    $0="foo";
+    print $1;
+}
--- /dev/null
+++ b/bugs-fixed/numeric-fs.awk
@@ -1,0 +1,5 @@
+BEGIN {
+	FS = 0; split("20202", a); print a[1];
+	FS = 1; $0="31313"; print $1;
+	FS = 2; "echo 42424" | getline; print $1;
+}
--- /dev/null
+++ b/bugs-fixed/numeric-fs.ok
@@ -1,0 +1,3 @@
+2
+3
+4
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.awk
@@ -1,0 +1,8 @@
+BEGIN {
+	$0 = "a b c";
+	OFS = 1;
+	ORS = 2;
+	NF = 2;
+	print;
+	print "d", "e";
+}
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.bad
@@ -1,0 +1,2 @@
+a b
+d e
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.ok
@@ -1,0 +1,1 @@
+a1b2d1e2
\ No newline at end of file
--- /dev/null
+++ b/bugs-fixed/numeric-rs.awk
@@ -1,0 +1,6 @@
+BEGIN {
+	RS = 1;
+	while ("echo a1b1c1d" | getline > 0) {
+		print $1;
+	}
+}
--- /dev/null
+++ b/bugs-fixed/numeric-rs.bad
@@ -1,0 +1,1 @@
+a1b1c1d
--- /dev/null
+++ b/bugs-fixed/numeric-rs.ok
@@ -1,0 +1,4 @@
+a
+b
+c
+d
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.awk
@@ -1,0 +1,5 @@
+BEGIN {
+    SUBSEP = 123.456;
+    a["hello", "world"] = "foo";
+    print a["hello" SUBSEP "world"];
+}
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.bad
@@ -1,0 +1,1 @@
+
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.ok
@@ -1,0 +1,1 @@
+foo
--- /dev/null
+++ b/bugs-fixed/split-fs-from-array.awk
@@ -1,0 +1,5 @@
+BEGIN {
+        a[1] = "elephantie"
+        a[2] = "e"
+        print split(a[1],a,a[2]), a[2], a[3], split(a[2],a,a[2])
+}
--- /dev/null
+++ b/bugs-fixed/split-fs-from-array.ok
@@ -1,0 +1,1 @@
+4 l phanti 2
--- /dev/null
+++ b/bugs-fixed/subsep-overflow.awk
@@ -1,0 +1,24 @@
+function foo(c, n) {
+    s = "";
+    for (i = 0; i < n; i++) {
+        s = s c;
+    }
+    return s;
+}
+
+BEGIN {
+    str1 = foo("a", 4500);
+    str2 = foo("b", 9000);
+
+    a[(SUBSEP = str1), (SUBSEP = str2), "c"] = 1;
+
+    for (k in a) {
+        print length(k);
+    }
+
+    print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
+    print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
+    delete a[(SUBSEP = str1), (SUBSEP = str2), "c"];
+    print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
+    print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
+}
--- /dev/null
+++ b/bugs-fixed/subsep-overflow.ok
@@ -1,0 +1,5 @@
+27001
+1
+1
+0
+0
--- a/lib.c
+++ b/lib.c
@@ -59,7 +59,7 @@
 {
 	if ( (record = (char *) malloc(n)) == NULL
 	  || (fields = (char *) malloc(n+1)) == NULL
-	  || (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL
+	  || (fldtab = (Cell **) malloc((nfields+2) * sizeof(Cell *))) == NULL
 	  || (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL )
 		FATAL("out of space for $0 and fields");
 	*fldtab[0] = dollar0;
@@ -189,12 +189,13 @@
 	int sep, c;
 	char *rr, *buf = *pbuf;
 	int bufsize = *pbufsize;
+	char *rs = getsval(rsloc);
 
-	if (strlen(*FS) >= sizeof(inputFS))
+	if (strlen(getsval(fsloc)) >= sizeof (inputFS))
 		FATAL("field separator %.10s... is too long", *FS);
 	/*fflush(stdout); avoids some buffering problem but makes it 25% slower*/
 	strcpy(inputFS, *FS);	/* for subsequent field splitting */
-	if ((sep = **RS) == 0) {
+	if ((sep = *rs) == 0) {
 		sep = '\n';
 		while ((c=getc(inf)) == '\n' && c != EOF)	/* skip leading \n's */
 			;
@@ -208,7 +209,7 @@
 					FATAL("input record `%.30s...' too long", buf);
 			*rr++ = c;
 		}
-		if (**RS == sep || c == EOF)
+		if (*rs == sep || c == EOF)
 			break;
 		if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
 			break;
@@ -283,6 +284,8 @@
 	}
 	fr = fields;
 	i = 0;	/* number of fields accumulated here */
+	if (strlen(getsval(fsloc)) >= sizeof (inputFS))
+		FATAL("field separator %.10s... is too long", *FS);
 	strcpy(inputFS, *FS);
 	if (strlen(inputFS) > 1) {	/* it's a regular expression */
 		i = refldbld(r, inputFS);
@@ -481,6 +484,7 @@
 {
 	int i;
 	char *r, *p;
+	char *sep = getsval(ofsloc);
 
 	if (donerec == 1)
 		return;
@@ -492,9 +496,9 @@
 		while ((*r = *p++) != 0)
 			r++;
 		if (i < *NF) {
-			if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2"))
+			if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
 				FATAL("created $0 `%.30s...' too long", record);
-			for (p = *OFS; (*r = *p++) != 0; )
+			for (p = sep; (*r = *p++) != 0; )
 				r++;
 		}
 	}
--- a/main.c
+++ b/main.c
@@ -88,7 +88,7 @@
 			exit(0);
 			break;
 		}
-		if (strncmp(argv[1], "--", 2) == 0) {	/* explicit end of args */
+		if (strcmp(argv[1], "--") == 0) {	/* explicit end of args */
 			argc--;
 			argv++;
 			break;
--- a/run.c
+++ b/run.c
@@ -462,7 +462,7 @@
 	Node *np;
 	char *buf;
 	int bufsz = recsize;
-	int nsub = strlen(*SUBSEP);
+	int nsub;
 
 	if ((buf = (char *) malloc(bufsz)) == NULL)
 		FATAL("out of memory in array");
@@ -472,6 +472,7 @@
 	for (np = a[1]; np; np = np->nnext) {
 		y = execute(np);	/* subscript */
 		s = getsval(y);
+		nsub = strlen(getsval(subseploc));
 		if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array"))
 			FATAL("out of memory for %s[%s...]", x->nval, buf);
 		strcat(buf, s);
@@ -500,7 +501,7 @@
 	Cell *x, *y;
 	Node *np;
 	char *s;
-	int nsub = strlen(*SUBSEP);
+	int nsub;
 
 	x = execute(a[0]);	/* Cell* for symbol table */
 	if (!isarr(x))
@@ -519,9 +520,10 @@
 		for (np = a[1]; np; np = np->nnext) {
 			y = execute(np);	/* subscript */
 			s = getsval(y);
+			nsub = strlen(getsval(subseploc));
 			if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete"))
 				FATAL("out of memory deleting %s[%s...]", x->nval, buf);
-			strcat(buf, s);	
+			strcat(buf, s);
 			if (np->nnext)
 				strcat(buf, *SUBSEP);
 			tempfree(y);
@@ -540,7 +542,7 @@
 	char *buf;
 	char *s;
 	int bufsz = recsize;
-	int nsub = strlen(*SUBSEP);
+	int nsub;
 
 	ap = execute(a[1]);	/* array name */
 	if (!isarr(ap)) {
@@ -558,6 +560,7 @@
 	for (p = a[0]; p; p = p->nnext) {
 		x = execute(p);	/* expr */
 		s = getsval(x);
+		nsub = strlen(getsval(subseploc));
 		if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest"))
 			FATAL("out of memory deleting %s[%s...]", x->nval, buf);
 		strcat(buf, s);
@@ -1178,25 +1181,26 @@
 {
 	Cell *x, *y, *z;
 	int n1, n2;
-	char *s;
+	char *s = NULL;
+	int ssz = 0;
 
 	x = execute(a[0]);
+	n1 = strlen(getsval(x));
+	adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
+	(void) strncpy(s, x->sval, ssz);
+
 	y = execute(a[1]);
-	getsval(x);
-	getsval(y);
-	n1 = strlen(x->sval);
-	n2 = strlen(y->sval);
-	s = (char *) malloc(n1 + n2 + 1);
-	if (s == NULL)
-		FATAL("out of space concatenating %.15s... and %.15s...",
-			x->sval, y->sval);
-	strcpy(s, x->sval);
-	strcpy(s+n1, y->sval);
+	n2 = strlen(getsval(y));
+	adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
+	(void) strncpy(s + n1, y->sval, ssz - n1);
+
 	tempfree(x);
 	tempfree(y);
+
 	z = gettemp();
 	z->sval = s;
 	z->tval = STR;
+
 	return(z);
 }
 
@@ -1243,8 +1247,9 @@
 {
 	Cell *x = 0, *y, *ap;
 	char *s, *origs;
+	char *fs, *origfs = NULL;
 	int sep;
-	char *t, temp, num[50], *fs = 0;
+	char *t, temp, num[50];
 	int n, tempstat, arg3type;
 
 	y = execute(a[0]);	/* source string */
@@ -1251,10 +1256,11 @@
 	origs = s = strdup(getsval(y));
 	arg3type = ptoi(a[3]);
 	if (a[2] == 0)		/* fs string */
-		fs = *FS;
+		fs = getsval(fsloc);
 	else if (arg3type == STRING) {	/* split(str,arr,"string") */
 		x = execute(a[2]);
-		fs = getsval(x);
+		origfs = fs = strdup(getsval(x));
+		tempfree(x);
 	} else if (arg3type == REGEXPR)
 		fs = "(regexpr)";	/* split(str,arr,/regexpr/) */
 	else
@@ -1369,9 +1375,7 @@
 	tempfree(ap);
 	tempfree(y);
 	free(origs);
-	if (a[2] != 0 && arg3type == STRING) {
-		tempfree(x);
-	}
+	free(origfs);
 	x = gettemp();
 	x->tval = NUM;
 	x->fval = n;
@@ -1633,9 +1637,9 @@
 		fputs(getpssval(y), fp);
 		tempfree(y);
 		if (x->nnext == NULL)
-			fputs(*ORS, fp);
+			fputs(getsval(orsloc), fp);
 		else
-			fputs(*OFS, fp);
+			fputs(getsval(ofsloc), fp);
 	}
 	if (a[1] != 0)
 		fflush(fp);
--- a/tran.c
+++ b/tran.c
@@ -55,10 +55,14 @@
 Cell	*nrloc;		/* NR */
 Cell	*nfloc;		/* NF */
 Cell	*fnrloc;	/* FNR */
+Cell	*ofsloc;	/* OFS */
+Cell	*orsloc;	/* ORS */
+Cell	*rsloc;		/* RS */
 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
 Cell	*rstartloc;	/* RSTART */
 Cell	*rlengthloc;	/* RLENGTH */
+Cell	*subseploc;	/* SUBSEP */
 Cell	*symtabloc;	/* SYMTAB */
 
 Cell	*nullloc;	/* a guaranteed empty cell */
@@ -88,9 +92,12 @@
 
 	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
 	FS = &fsloc->sval;
-	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
-	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
-	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
+	rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
+	RS = &rsloc->sval;
+	ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
+	OFS = &ofsloc->sval;
+	orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
+	ORS = &orsloc->sval;
 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
@@ -100,7 +107,8 @@
 	NR = &nrloc->fval;
 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
 	FNR = &fnrloc->fval;
-	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
+	subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab);
+	SUBSEP = &subseploc->sval;
 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
 	RSTART = &rstartloc->fval;
 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
@@ -310,6 +318,9 @@
 	} else if (isrec(vp)) {
 		donefld = 0;	/* mark $1... invalid */
 		donerec = 1;
+	} else if (vp == ofsloc) {
+		if (donerec == 0)
+			recbld();
 	}
 	if (freeable(vp))
 		xfree(vp->sval); /* free any previous string */
@@ -351,7 +362,7 @@
 	} else if (isrec(vp)) {
 		donefld = 0;	/* mark $1... invalid */
 		donerec = 1;
-	} else if (&vp->sval == OFS) {
+	} else if (vp == ofsloc) {
 		if (donerec == 0)
 			recbld();
 	}
--