ref: 79f008e85365b696374d70c9933498990aa248bc
parent: 4f4701e0903d14dcbae8761ad4d04ecb35a5a614
parent: c154c3cb82d1799151917bb97f917fabc90a8d07
author: onetrueawk <bwkster@gmail.com>
date: Mon Jan 21 09:20:28 EST 2019
Merge branch 'master' into nf-self-assign
--- a/awk.h
+++ b/awk.h
@@ -97,9 +97,14 @@
extern Cell *nrloc; /* NR */
extern Cell *fnrloc; /* FNR */
+extern Cell *fsloc; /* FS */
extern Cell *nfloc; /* NF */
+extern Cell *ofsloc; /* OFS */
+extern Cell *orsloc; /* ORS */
+extern Cell *rsloc; /* RS */
extern Cell *rstartloc; /* RSTART */
extern Cell *rlengthloc; /* RLENGTH */
+extern Cell *subseploc; /* SUBSEP */
/* Cell.tval values: */
#define NUM 01 /* number value is valid */
--- a/b.c
+++ b/b.c
@@ -823,7 +823,15 @@
if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
prestr[2 + cc->cc_namelen] == ']') {prestr += cc->cc_namelen + 3;
- for (i = 0; i < NCHARS; i++) {+ /*
+ * BUG: We begin at 1, instead of 0, since we
+ * would otherwise prematurely terminate the
+ * string for classes like [[:cntrl:]]. This
+ * means that we can't match the NUL character,
+ * not without first adapting the entire
+ * program to track each string's length.
+ */
+ for (i = 1; i < NCHARS; i++) {if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
FATAL("out of space for reg expr %.10s...", lastre); if (cc->cc_func(i)) {--- a/bugs-fixed/README
+++ b/bugs-fixed/README
@@ -24,14 +24,31 @@
7. unary-plus: Unary plus on a string constant returned the string.
Instead, it should convert the value to numeric and give that value.
-8. missing-precision: When using the format string "%*s", the precision
+8. concat-assign-same: Concatenation previously evaluated both sides of the
+expression before doing its work, which, since assign() evaluates to the cell
+being assigned to, meant that expressions like "print (a = 1) (a = 2)" would
+print "22" rather than "12".
+
+9. missing-precision: When using the format string "%*s", the precision
argument was used without checking if it was present first.
-9. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written
+10. missing-precision: When using the format string "%*s", the precision
+argument was used without checking if it was present first.
+
+11. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written
to with sprintf(), which meant that some conversions could write past the
end.
-X. nf-self-assign: "NF = NF" wouldn't force the record to be rebuilt.
+12. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP,
+FS, RS, OFS, or ORS were set to a numeric value, then their string values
+wouldn't always be generated before being needed.
-X. negative-nf: Setting NF to a negative value caused a segmentation fault.
+13. subsep-overflow: The length of SUBSEP needs to be rechecked after
+calling execute(), in case SUBSEP itself has been changed.
+
+14. split-fs-from-array: If the third argument to split() comes from the
+array passed as the second argument, then split() would previously read
+from the freed memory and possibly produce incorrect results (depending
+on the system's malloc()/free() behaviour.)
+
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.awk
@@ -1,0 +1,4 @@
+BEGIN {+ print (a = 1) (a = 2) (a = 3) (a = 4) (a = 5);
+ print (a = 1), (a = 2), (a = 3), (a = 4), (a = 5);
+}
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.bad
@@ -1,0 +1,2 @@
+22345
+1 2 3 4 5
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.ok
@@ -1,0 +1,2 @@
+12345
+1 2 3 4 5
--- /dev/null
+++ b/bugs-fixed/fs-overflow.awk
@@ -1,0 +1,13 @@
+function foo() {+ a = "";
+ for (i = 0; i < 10000; i++) {+ a = a "c";
+ }
+ return a;
+}
+
+BEGIN {+ FS = foo();
+ $0="foo";
+ print $1;
+}
--- /dev/null
+++ b/bugs-fixed/numeric-fs.awk
@@ -1,0 +1,5 @@
+BEGIN {+ FS = 0; split("20202", a); print a[1];+ FS = 1; $0="31313"; print $1;
+ FS = 2; "echo 42424" | getline; print $1;
+}
--- /dev/null
+++ b/bugs-fixed/numeric-fs.ok
@@ -1,0 +1,3 @@
+2
+3
+4
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.awk
@@ -1,0 +1,8 @@
+BEGIN {+ $0 = "a b c";
+ OFS = 1;
+ ORS = 2;
+ NF = 2;
+ print;
+ print "d", "e";
+}
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.bad
@@ -1,0 +1,2 @@
+a b
+d e
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.ok
@@ -1,0 +1,1 @@
+a1b2d1e2
\ No newline at end of file
--- /dev/null
+++ b/bugs-fixed/numeric-rs.awk
@@ -1,0 +1,6 @@
+BEGIN {+ RS = 1;
+ while ("echo a1b1c1d" | getline > 0) {+ print $1;
+ }
+}
--- /dev/null
+++ b/bugs-fixed/numeric-rs.bad
@@ -1,0 +1,1 @@
+a1b1c1d
--- /dev/null
+++ b/bugs-fixed/numeric-rs.ok
@@ -1,0 +1,4 @@
+a
+b
+c
+d
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.awk
@@ -1,0 +1,5 @@
+BEGIN {+ SUBSEP = 123.456;
+ a["hello", "world"] = "foo";
+ print a["hello" SUBSEP "world"];
+}
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.bad
@@ -1,0 +1,1 @@
+
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.ok
@@ -1,0 +1,1 @@
+foo
--- /dev/null
+++ b/bugs-fixed/split-fs-from-array.awk
@@ -1,0 +1,5 @@
+BEGIN {+ a[1] = "elephantie"
+ a[2] = "e"
+ print split(a[1],a,a[2]), a[2], a[3], split(a[2],a,a[2])
+}
--- /dev/null
+++ b/bugs-fixed/split-fs-from-array.ok
@@ -1,0 +1,1 @@
+4 l phanti 2
--- /dev/null
+++ b/bugs-fixed/subsep-overflow.awk
@@ -1,0 +1,24 @@
+function foo(c, n) {+ s = "";
+ for (i = 0; i < n; i++) {+ s = s c;
+ }
+ return s;
+}
+
+BEGIN {+ str1 = foo("a", 4500);+ str2 = foo("b", 9000);+
+ a[(SUBSEP = str1), (SUBSEP = str2), "c"] = 1;
+
+ for (k in a) {+ print length(k);
+ }
+
+ print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
+ print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
+ delete a[(SUBSEP = str1), (SUBSEP = str2), "c"];
+ print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
+ print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
+}
--- /dev/null
+++ b/bugs-fixed/subsep-overflow.ok
@@ -1,0 +1,5 @@
+27001
+1
+1
+0
+0
--- a/lib.c
+++ b/lib.c
@@ -59,7 +59,7 @@
{if ( (record = (char *) malloc(n)) == NULL
|| (fields = (char *) malloc(n+1)) == NULL
- || (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL
+ || (fldtab = (Cell **) malloc((nfields+2) * sizeof(Cell *))) == NULL
|| (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL )
FATAL("out of space for $0 and fields");*fldtab[0] = dollar0;
@@ -189,12 +189,13 @@
int sep, c;
char *rr, *buf = *pbuf;
int bufsize = *pbufsize;
+ char *rs = getsval(rsloc);
- if (strlen(*FS) >= sizeof(inputFS))
+ if (strlen(getsval(fsloc)) >= sizeof (inputFS))
FATAL("field separator %.10s... is too long", *FS);/*fflush(stdout); avoids some buffering problem but makes it 25% slower*/
strcpy(inputFS, *FS); /* for subsequent field splitting */
- if ((sep = **RS) == 0) {+ if ((sep = *rs) == 0) {sep = '\n';
while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
;
@@ -208,7 +209,7 @@
FATAL("input record `%.30s...' too long", buf);*rr++ = c;
}
- if (**RS == sep || c == EOF)
+ if (*rs == sep || c == EOF)
break;
if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
break;
@@ -283,6 +284,8 @@
}
fr = fields;
i = 0; /* number of fields accumulated here */
+ if (strlen(getsval(fsloc)) >= sizeof (inputFS))
+ FATAL("field separator %.10s... is too long", *FS);strcpy(inputFS, *FS);
if (strlen(inputFS) > 1) { /* it's a regular expression */i = refldbld(r, inputFS);
@@ -481,6 +484,7 @@
{int i;
char *r, *p;
+ char *sep = getsval(ofsloc);
if (donerec == 1)
return;
@@ -492,9 +496,9 @@
while ((*r = *p++) != 0)
r++;
if (i < *NF) {- if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2"))
+ if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
FATAL("created $0 `%.30s...' too long", record);- for (p = *OFS; (*r = *p++) != 0; )
+ for (p = sep; (*r = *p++) != 0; )
r++;
}
}
--- a/main.c
+++ b/main.c
@@ -88,7 +88,7 @@
exit(0);
break;
}
- if (strncmp(argv[1], "--", 2) == 0) { /* explicit end of args */+ if (strcmp(argv[1], "--") == 0) { /* explicit end of args */argc--;
argv++;
break;
--- a/run.c
+++ b/run.c
@@ -462,7 +462,7 @@
Node *np;
char *buf;
int bufsz = recsize;
- int nsub = strlen(*SUBSEP);
+ int nsub;
if ((buf = (char *) malloc(bufsz)) == NULL)
FATAL("out of memory in array");@@ -472,6 +472,7 @@
for (np = a[1]; np; np = np->nnext) {y = execute(np); /* subscript */
s = getsval(y);
+ nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array"))
FATAL("out of memory for %s[%s...]", x->nval, buf);strcat(buf, s);
@@ -500,7 +501,7 @@
Cell *x, *y;
Node *np;
char *s;
- int nsub = strlen(*SUBSEP);
+ int nsub;
x = execute(a[0]); /* Cell* for symbol table */
if (!isarr(x))
@@ -519,9 +520,10 @@
for (np = a[1]; np; np = np->nnext) {y = execute(np); /* subscript */
s = getsval(y);
+ nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete"))
FATAL("out of memory deleting %s[%s...]", x->nval, buf);- strcat(buf, s);
+ strcat(buf, s);
if (np->nnext)
strcat(buf, *SUBSEP);
tempfree(y);
@@ -540,7 +542,7 @@
char *buf;
char *s;
int bufsz = recsize;
- int nsub = strlen(*SUBSEP);
+ int nsub;
ap = execute(a[1]); /* array name */
if (!isarr(ap)) {@@ -558,6 +560,7 @@
for (p = a[0]; p; p = p->nnext) {x = execute(p); /* expr */
s = getsval(x);
+ nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest"))
FATAL("out of memory deleting %s[%s...]", x->nval, buf);strcat(buf, s);
@@ -1178,25 +1181,26 @@
{Cell *x, *y, *z;
int n1, n2;
- char *s;
+ char *s = NULL;
+ int ssz = 0;
x = execute(a[0]);
+ n1 = strlen(getsval(x));
+ adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
+ (void) strncpy(s, x->sval, ssz);
+
y = execute(a[1]);
- getsval(x);
- getsval(y);
- n1 = strlen(x->sval);
- n2 = strlen(y->sval);
- s = (char *) malloc(n1 + n2 + 1);
- if (s == NULL)
- FATAL("out of space concatenating %.15s... and %.15s...",- x->sval, y->sval);
- strcpy(s, x->sval);
- strcpy(s+n1, y->sval);
+ n2 = strlen(getsval(y));
+ adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
+ (void) strncpy(s + n1, y->sval, ssz - n1);
+
tempfree(x);
tempfree(y);
+
z = gettemp();
z->sval = s;
z->tval = STR;
+
return(z);
}
@@ -1243,8 +1247,9 @@
{Cell *x = 0, *y, *ap;
char *s, *origs;
+ char *fs, *origfs = NULL;
int sep;
- char *t, temp, num[50], *fs = 0;
+ char *t, temp, num[50];
int n, tempstat, arg3type;
y = execute(a[0]); /* source string */
@@ -1251,10 +1256,11 @@
origs = s = strdup(getsval(y));
arg3type = ptoi(a[3]);
if (a[2] == 0) /* fs string */
- fs = *FS;
+ fs = getsval(fsloc);
else if (arg3type == STRING) { /* split(str,arr,"string") */x = execute(a[2]);
- fs = getsval(x);
+ origfs = fs = strdup(getsval(x));
+ tempfree(x);
} else if (arg3type == REGEXPR)
fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
else
@@ -1369,9 +1375,7 @@
tempfree(ap);
tempfree(y);
free(origs);
- if (a[2] != 0 && arg3type == STRING) {- tempfree(x);
- }
+ free(origfs);
x = gettemp();
x->tval = NUM;
x->fval = n;
@@ -1633,9 +1637,9 @@
fputs(getpssval(y), fp);
tempfree(y);
if (x->nnext == NULL)
- fputs(*ORS, fp);
+ fputs(getsval(orsloc), fp);
else
- fputs(*OFS, fp);
+ fputs(getsval(ofsloc), fp);
}
if (a[1] != 0)
fflush(fp);
--- a/tran.c
+++ b/tran.c
@@ -55,10 +55,14 @@
Cell *nrloc; /* NR */
Cell *nfloc; /* NF */
Cell *fnrloc; /* FNR */
+Cell *ofsloc; /* OFS */
+Cell *orsloc; /* ORS */
+Cell *rsloc; /* RS */
Array *ARGVtab; /* symbol table containing ARGV[...] */
Array *ENVtab; /* symbol table containing ENVIRON[...] */
Cell *rstartloc; /* RSTART */
Cell *rlengthloc; /* RLENGTH */
+Cell *subseploc; /* SUBSEP */
Cell *symtabloc; /* SYMTAB */
Cell *nullloc; /* a guaranteed empty cell */
@@ -88,9 +92,12 @@
fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);FS = &fsloc->sval;
- RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;- OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;- ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;+ rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);+ RS = &rsloc->sval;
+ ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);+ OFS = &ofsloc->sval;
+ orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);+ ORS = &orsloc->sval;
OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;@@ -100,7 +107,8 @@
NR = &nrloc->fval;
fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);FNR = &fnrloc->fval;
- SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;+ subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab);+ SUBSEP = &subseploc->sval;
rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);RSTART = &rstartloc->fval;
rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);@@ -310,6 +318,9 @@
} else if (isrec(vp)) {donefld = 0; /* mark $1... invalid */
donerec = 1;
+ } else if (vp == ofsloc) {+ if (donerec == 0)
+ recbld();
}
if (freeable(vp))
xfree(vp->sval); /* free any previous string */
@@ -351,7 +362,7 @@
} else if (isrec(vp)) {donefld = 0; /* mark $1... invalid */
donerec = 1;
- } else if (&vp->sval == OFS) {+ } else if (vp == ofsloc) {if (donerec == 0)
recbld();
}
--
⑨