shithub: trueawk

Download patch

ref: f3f5210cfac77479e6cf6ca2f21f8f08c012aa85
parent: 66a543eeadeed25d152a53c67ff0dd25ac017730
author: Arnold D. Robbins <arnold@skeeve.com>
date: Wed Aug 24 12:42:27 EDT 2022

Latest additions from BWK.

--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,5 @@
 ytab* 
 testdir/foo*
 testdir/temp*
+*.pdf
+*.mail
--- a/awk.h
+++ b/awk.h
@@ -225,7 +225,8 @@
 
 /* structures used by regular expression matching machinery, mostly b.c: */
 
-#define NCHARS	(256+3)		/* 256 handles 8-bit chars; 128 does 7-bit */
+#define NCHARS	(1256+3)		/* 256 handles 8-bit chars; 128 does 7-bit */
+				/* BUG: some overflows (caught) if we use 256 */
 				/* watch out in match(), etc. */
 #define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
 #define NSTATES	32
--- a/b.c
+++ b/b.c
@@ -441,8 +441,13 @@
 				continue;
 			}
 		}
-		if (!adjbuf((char **) &buf, &bufsz, bp-buf+8, 100, (char **) &bp, "cclenter2"))
-			FATAL("out of space for character class [%.10s...] 3", p);
+		if (i >= bufsz) {
+			bufsz *= 2;
+			buf = (int *) realloc(buf, bufsz * sizeof(int));
+			if (buf == NULL)
+				FATAL("out of space for character class [%.10s...] 2", p);
+			bp = buf + i;
+		}
 		*bp++ = c;
 		i++;
 	}
--- a/lib.c
+++ b/lib.c
@@ -301,6 +301,9 @@
 	Cell *q;
 	double result;
 
+/* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */
+/* I don't understand why it was changed. */
+
 	for (p=s; *p != '='; p++)
 		;
 	e = p;
@@ -324,7 +327,7 @@
 	/* possibly with a final trailing \0 not associated with any field */
 	char *r, *fr, sep;
 	Cell *p;
-	int i, j, n;
+	int i, j, n, quote;
 
 	if (donefld)
 		return;
@@ -363,6 +366,57 @@
 			*fr++ = 0;
 		}
 		*fr = 0;
+	} else if ((sep = *inputFS) == ',') {	/* CSV: handle quotes, \x, etc. */
+		for (i = 0; *r != '\0'; ) {
+			i++;
+			if (i > nfields)
+				growfldtab(i);
+			if (freeable(fldtab[i]))
+				xfree(fldtab[i]->sval);
+			fldtab[i]->sval = fr;
+			fldtab[i]->tval = FLD | STR | DONTFREE;
+
+/* printf("fldbld 1 [%s] [%d:] [%s]\n", r, i, fr); */
+
+			if (*r == '"' /* || *r == '\'' */ ) { /* "..."; do not include '...' */
+				quote = *r++;
+				for ( ; *r != '\0'; ) {
+/* printf("fldbld 2   [%s]\n", r); */
+					if (*r == quote && r[1] != '\0' && r[1] == quote) {
+						r += 2; /* doubled quote */
+						*fr++ = quote;
+					} else if (*r == '\\') { /* BUG: off end? */
+						r++; /* backslashes inside "..." ??? */
+						*fr++ = *r++;
+					} else if (*r == quote && (r[1] == '\0' || r[1] == ',')) {
+						r++;
+						if (*r == ',')
+							r++;
+						break;
+					} else {
+						*fr++ = *r++;
+					}
+				}
+				*fr++ = 0;
+				continue;
+			}
+
+			/* unquoted field */
+			for ( ; *r != '\0'; ) {
+				if (*r == ',') { /* bare comma ends field */
+					r++;
+					*fr++ = 0;
+					break;
+				} else if (*r == '\\') { /* BUG: could walk off end */
+					r++;
+					*fr++ = *r++;
+				} else {
+					*fr++ = *r++;
+				}
+			}
+			*fr++ = 0;
+		}
+		*fr = 0;
 	} else if ((sep = *inputFS) == 0) {		/* new: FS="" => 1 char/field */
 		for (i = 0; *r != '\0'; r += n) {
 			char buf[MB_LEN_MAX + 1];
@@ -797,11 +851,11 @@
 	while (isspace(*s))
 		s++;
 
-	// no hex floating point, sorry
+	/* no hex floating point, sorry */
 	if (s[0] == '0' && tolower(s[1]) == 'x')
 		return false;
 
-	// allow +nan, -nan, +inf, -inf, any other letter, no
+	/* allow +nan, -nan, +inf, -inf, any other letter, no */
 	if (s[0] == '+' || s[0] == '-') {
 		is_nan = (strncasecmp(s+1, "nan", 3) == 0);
 		is_inf = (strncasecmp(s+1, "inf", 3) == 0);
@@ -835,7 +889,7 @@
 	if (no_trailing != NULL)
 		*no_trailing = (*ep == '\0');
 
-        // return true if found the end, or trailing stuff is allowed
+        /* return true if found the end, or trailing stuff is allowed */
 	retval = *ep == '\0' || trailing_stuff_ok;
 
 	return retval;
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@
 THIS SOFTWARE.
 ****************************************************************/
 
-const char	*version = "version 20220530";
+const char	*version = "version 20220818";
 
 #define DEBUG
 #include <stdio.h>
--- a/run.c
+++ b/run.c
@@ -1454,6 +1454,7 @@
 		FATAL("illegal type of split");
 	sep = *fs;
 	ap = execute(a[1]);	/* array name */
+/* BUG 7/26/22: this appears not to reset array: see C1/asplit */
 	freesymtab(ap);
 	DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
 	ap->tval &= ~STR;
--