ref: 1a7797e938dc2b0a52b2d4116a98c019c447660b
parent: 8444a9d1ddb54fe044d26a9ce30c2619a1bf566d
author: Brian Kernighan <fakeuser@fake.com>
date: Wed May 17 13:38:53 EDT 2023
fixes to --csv with embedded newlines
--- a/lib.c
+++ b/lib.c
@@ -289,6 +289,14 @@
}
+/*******************
+ * loose ends here:
+ * \r\n should become \n
+ * what about bare \r? Excel uses that for embedded newlines
+ * can't have "" in unquoted fields, according to RFC 4180
+*/
+
+
int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* csv can have \n's */
{ /* so read a complete record that might be multiple lines */int sep, c;
@@ -303,9 +311,15 @@
recsize, &rr, "readcsvrec 1"))
FATAL("input record `%.30s...' too long", buf);*rr++ = c;
+ if (c == ',')
+ continue;
if (c != '"' ) { /* unquoted field; read until , or \n */ while ((c = getc(inf)) != ',' && c != '\n' && c != EOF) {+ if (rr-buf+1 > bufsize)
+ if (!adjbuf(&buf, &bufsize, 1+rr-buf,
+ recsize, &rr, "readcsvrec 2"))
+ FATAL("input record `%.30s...' too long", buf);*rr++ = c;
}
if (c == ',')
@@ -313,6 +327,10 @@
} else { /* start of "..." */ while ((c = getc(inf)) != EOF) {+ if (rr-buf+1 > bufsize)
+ if (!adjbuf(&buf, &bufsize, 1+rr-buf,
+ recsize, &rr, "readcsvrec 3"))
+ FATAL("input record `%.30s...' too long", buf); if (c != '"') {*rr++ = c;
} else {@@ -335,7 +353,7 @@
if (c == '\n' || c == EOF)
break;
}
- if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readcsvrec 3"))
+ if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readcsvrec 4"))
FATAL("input record `%.30s...' too long", buf);*rr = 0;
*pbuf = buf;
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@
THIS SOFTWARE.
****************************************************************/
-const char *version = "version 20230314";
+const char *version = "version 20230516";
#define DEBUG
#include <stdio.h>
--- a/testdir/T.csv
+++ b/testdir/T.csv
@@ -1,6 +1,6 @@
#!/bin/sh
-echo T.csv: tests of csv field splitting
+echo T.csv: tests of csv field splitting, no embedded newlines
awk=${awk-../a.out}@@ -69,16 +69,14 @@
"""" ["]
"""""" [""]
"""x""" ["x"]
+""",""" [","]
,,"" [][][]
a""b [a""b]
a"b [a"b]
a''b [a''b]
-"abc [abc]
-abc,"def [abc][def]
,, [][][]
a, [a][]
"", [][]
, [][]
-"abc",def [abc][def]
- []
+
!!!!
--- a/tran.c
+++ b/tran.c
@@ -348,6 +348,10 @@
(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld);
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "assign to");
+ if (CSV && (vp == rsloc))
+ WARNING("danger: don't set RS when --csv is in effect");+ if (CSV && (vp == fsloc))
+ WARNING("danger: don't set FS when --csv is in effect"); if (isfld(vp)) {donerec = false; /* mark $0 invalid */
fldno = atoi(vp->nval);
--
⑨