ref: f3f5210cfac77479e6cf6ca2f21f8f08c012aa85
parent: 66a543eeadeed25d152a53c67ff0dd25ac017730
author: Arnold D. Robbins <arnold@skeeve.com>
date: Wed Aug 24 12:42:27 EDT 2022
Latest additions from BWK.
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,5 @@
ytab*
testdir/foo*
testdir/temp*
--- a/awk.h
+++ b/awk.h
@@ -225,7 +225,8 @@
/* structures used by regular expression matching machinery, mostly b.c: */
-#define NCHARS (256+3) /* 256 handles 8-bit chars; 128 does 7-bit */
+#define NCHARS (1256+3) /* 256 handles 8-bit chars; 128 does 7-bit */
+ /* BUG: some overflows (caught) if we use 256 */
/* watch out in match(), etc. */
#define HAT (NCHARS+2) /* matches ^ in regular expr */
#define NSTATES 32
--- a/b.c
+++ b/b.c
@@ -441,8 +441,13 @@
continue;
}
}
- if (!adjbuf((char **) &buf, &bufsz, bp-buf+8, 100, (char **) &bp, "cclenter2"))
- FATAL("out of space for character class [%.10s...] 3", p);+ if (i >= bufsz) {+ bufsz *= 2;
+ buf = (int *) realloc(buf, bufsz * sizeof(int));
+ if (buf == NULL)
+ FATAL("out of space for character class [%.10s...] 2", p);+ bp = buf + i;
+ }
*bp++ = c;
i++;
}
--- a/lib.c
+++ b/lib.c
@@ -301,6 +301,9 @@
Cell *q;
double result;
+/* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */
+/* I don't understand why it was changed. */
+
for (p=s; *p != '='; p++)
;
e = p;
@@ -324,7 +327,7 @@
/* possibly with a final trailing \0 not associated with any field */
char *r, *fr, sep;
Cell *p;
- int i, j, n;
+ int i, j, n, quote;
if (donefld)
return;
@@ -363,6 +366,57 @@
*fr++ = 0;
}
*fr = 0;
+ } else if ((sep = *inputFS) == ',') { /* CSV: handle quotes, \x, etc. */+ for (i = 0; *r != '\0'; ) {+ i++;
+ if (i > nfields)
+ growfldtab(i);
+ if (freeable(fldtab[i]))
+ xfree(fldtab[i]->sval);
+ fldtab[i]->sval = fr;
+ fldtab[i]->tval = FLD | STR | DONTFREE;
+
+/* printf("fldbld 1 [%s] [%d:] [%s]\n", r, i, fr); */+
+ if (*r == '"' /* || *r == '\'' */ ) { /* "..."; do not include '...' */+ quote = *r++;
+ for ( ; *r != '\0'; ) {+/* printf("fldbld 2 [%s]\n", r); */+ if (*r == quote && r[1] != '\0' && r[1] == quote) {+ r += 2; /* doubled quote */
+ *fr++ = quote;
+ } else if (*r == '\\') { /* BUG: off end? */+ r++; /* backslashes inside "..." ??? */
+ *fr++ = *r++;
+ } else if (*r == quote && (r[1] == '\0' || r[1] == ',')) {+ r++;
+ if (*r == ',')
+ r++;
+ break;
+ } else {+ *fr++ = *r++;
+ }
+ }
+ *fr++ = 0;
+ continue;
+ }
+
+ /* unquoted field */
+ for ( ; *r != '\0'; ) {+ if (*r == ',') { /* bare comma ends field */+ r++;
+ *fr++ = 0;
+ break;
+ } else if (*r == '\\') { /* BUG: could walk off end */+ r++;
+ *fr++ = *r++;
+ } else {+ *fr++ = *r++;
+ }
+ }
+ *fr++ = 0;
+ }
+ *fr = 0;
} else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */ for (i = 0; *r != '\0'; r += n) {char buf[MB_LEN_MAX + 1];
@@ -797,11 +851,11 @@
while (isspace(*s))
s++;
- // no hex floating point, sorry
+ /* no hex floating point, sorry */
if (s[0] == '0' && tolower(s[1]) == 'x')
return false;
- // allow +nan, -nan, +inf, -inf, any other letter, no
+ /* allow +nan, -nan, +inf, -inf, any other letter, no */
if (s[0] == '+' || s[0] == '-') {is_nan = (strncasecmp(s+1, "nan", 3) == 0);
is_inf = (strncasecmp(s+1, "inf", 3) == 0);
@@ -835,7 +889,7 @@
if (no_trailing != NULL)
*no_trailing = (*ep == '\0');
- // return true if found the end, or trailing stuff is allowed
+ /* return true if found the end, or trailing stuff is allowed */
retval = *ep == '\0' || trailing_stuff_ok;
return retval;
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@
THIS SOFTWARE.
****************************************************************/
-const char *version = "version 20220530";
+const char *version = "version 20220818";
#define DEBUG
#include <stdio.h>
--- a/run.c
+++ b/run.c
@@ -1454,6 +1454,7 @@
FATAL("illegal type of split");sep = *fs;
ap = execute(a[1]); /* array name */
+/* BUG 7/26/22: this appears not to reset array: see C1/asplit */
freesymtab(ap);
DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);ap->tval &= ~STR;
--
⑨