shithub: trueawk

Download patch

ref: ffee7780fe08fa77f662a0903477545d9e26334f
parent: 91eaf7f7015ba2223e993532f5d65dfda4d1f33f
author: zoulasc <zoulasc@users.noreply.github.com>
date: Fri Feb 28 01:23:54 EST 2020

3 more fixes (#75)

* LC_NUMERIC radix issue.

According to https://pubs.opengroup.org/onlinepubs/7990989775/xcu/awk.html
The period character is the character recognized in processing awk
programs.  Make it so that during output we also print the period
character, since this is what other awk implementations do, and it
makes sense from an interoperability point of view.

* print "T.builtin" in the error message

* Fix backslash continuation line handling.

* Keep track of RS processing so we apply the regex properly only once
per record.

--- a/lex.c
+++ b/lex.c
@@ -388,6 +388,7 @@
 		case '\\':
 			c = input();
 			switch (c) {
+			case '\n': break;
 			case '"': *bp++ = '"'; break;
 			case 'n': *bp++ = '\n'; break;
 			case 't': *bp++ = '\t'; break;
--- a/lib.c
+++ b/lib.c
@@ -35,6 +35,7 @@
 
 char	EMPTY[] = { '\0' };
 FILE	*infile	= NULL;
+bool	innew;		/* true = infile has not been read by readrec */
 char	*file	= EMPTY;
 char	*record;
 int	recsize	= RECSIZE;
@@ -106,6 +107,7 @@
 		argno++;
 	}
 	infile = stdin;		/* no filenames, so use stdin */
+	innew = true;
 }
 
 /*
@@ -175,7 +177,9 @@
 				FATAL("can't open file %s", file);
 			setfval(fnrloc, 0.0);
 		}
-		c = readrec(&buf, &bufsize, infile);
+		c = readrec(&buf, &bufsize, infile, innew);
+		if (innew)
+			innew = false;
 		if (c != 0 || buf[0] != '\0') {	/* normal record */
 			if (isrecord) {
 				if (freeable(fldtab[0]))
@@ -213,7 +217,7 @@
 	argno++;
 }
 
-int readrec(char **pbuf, int *pbufsize, FILE *inf)	/* read one record into buf */
+int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag)	/* read one record into buf */
 {
 	int sep, c, isrec;
 	char *rr, *buf = *pbuf;
@@ -224,7 +228,14 @@
 		bool found;
 
 		fa *pfa = makedfa(rs, 1);
-		found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+		if (newflag)
+			found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+		else {
+			int tempstat = pfa->initstat;
+			pfa->initstat = 2;
+			found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+			pfa->initstat = tempstat;
+		}
 		if (found)
 			setptr(patbeg, '\0');
 	} else {
--- a/main.c
+++ b/main.c
@@ -214,7 +214,6 @@
 	if (!safe)
 		envinit(environ);
 	yyparse();
-	setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
 	if (fs)
 		*FS = qstring(fs, '\0');
 	   dprintf( ("errorflag=%d\n", errorflag) );
--- a/proto.h
+++ b/proto.h
@@ -122,7 +122,7 @@
 extern	void	savefs(void);
 extern	int	getrec(char **, int *, bool);
 extern	void	nextfile(void);
-extern	int	readrec(char **buf, int *bufsize, FILE *inf);
+extern	int	readrec(char **buf, int *bufsize, FILE *inf, bool isnew);
 extern	char	*getargv(int);
 extern	void	setclvar(char *);
 extern	void	fldbld(void);
@@ -191,7 +191,7 @@
 extern	Cell	*printstat(Node **, int);
 extern	Cell	*nullproc(Node **, int);
 extern	FILE	*redirect(int, Node *);
-extern	FILE	*openfile(int, const char *);
+extern	FILE	*openfile(int, const char *, bool *);
 extern	const char	*filename(FILE *);
 extern	Cell	*closefile(Node **, int);
 extern	void	closeall(void);
--- a/run.c
+++ b/run.c
@@ -405,6 +405,7 @@
 	char *buf;
 	int bufsize = recsize;
 	int mode;
+	bool newflag;
 
 	if ((buf = malloc(bufsize)) == NULL)
 		FATAL("out of memory in getline");
@@ -416,12 +417,12 @@
 		mode = ptoi(a[1]);
 		if (mode == '|')		/* input pipe */
 			mode = LE;	/* arbitrary flag */
-		fp = openfile(mode, getsval(x));
+		fp = openfile(mode, getsval(x), &newflag);
 		tempfree(x);
 		if (fp == NULL)
 			n = -1;
 		else
-			n = readrec(&buf, &bufsize, fp);
+			n = readrec(&buf, &bufsize, fp, newflag);
 		if (n <= 0) {
 			;
 		} else if (a[0] != NULL) {	/* getline var <file */
@@ -1658,7 +1659,7 @@
 		if (isrec(x) || strlen(getsval(x)) == 0) {
 			flush_all();	/* fflush() or fflush("") -> all */
 			u = 0;
-		} else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
+		} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
 			u = EOF;
 		else
 			u = fflush(fp);
@@ -1718,7 +1719,7 @@
 
 	x = execute(b);
 	fname = getsval(x);
-	fp = openfile(a, fname);
+	fp = openfile(a, fname, NULL);
 	if (fp == NULL)
 		FATAL("can't open file %s", fname);
 	tempfree(x);
@@ -1750,7 +1751,7 @@
 	files[2].mode = GT;
 }
 
-FILE *openfile(int a, const char *us)
+FILE *openfile(int a, const char *us, bool *pnewflag)
 {
 	const char *s = us;
 	size_t i;
@@ -1760,11 +1761,12 @@
 	if (*s == '\0')
 		FATAL("null file name in print or getline");
 	for (i = 0; i < nfiles; i++)
-		if (files[i].fname && strcmp(s, files[i].fname) == 0) {
-			if (a == files[i].mode || (a==APPEND && files[i].mode==GT))
-				return files[i].fp;
-			if (a == FFLUSH)
-				return files[i].fp;
+		if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
+		    (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
+		     a == FFLUSH)) {
+			if (pnewflag)
+				*pnewflag = false;
+			return files[i].fp;
 		}
 	if (a == FFLUSH)	/* didn't find it, so don't create it! */
 		return NULL;
@@ -1801,6 +1803,8 @@
 		files[i].fname = tostring(s);
 		files[i].fp = fp;
 		files[i].mode = m;
+		if (pnewflag)
+			*pnewflag = true;
 		if (fp != stdin && fp != stdout && fp != stderr)
 			(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
 	}
--- a/testdir/T.builtin
+++ b/testdir/T.builtin
@@ -35,6 +35,9 @@
 	$awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}') >foo1
 	echo 'dürst|DÜRST|Dürst' >foo2
 	diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower) for utf-8'
+	(export LC_NUMERIC=de_DE.UTF-8 && $awk 'BEGIN { print 0.01 }' /dev/null) >foo1
+	echo "0.01" >foo2
+	diff foo1 foo2 || echo 'BAD: T.builtin LC_NUMERIC radix (.) handling' 
 fi
 
 $awk 'BEGIN {
@@ -70,3 +73,18 @@
 3' >foo1
 $awk '{ n = split($0, x); print length(x) }' <foo0 >foo2
 diff foo1 foo2 || echo 'BAD: T.builtin length array'
+
+# Test for backslash handling
+cat << \EOF >foo0
+BEGIN {
+    print "A\
+B";
+    print "CD"
+}
+EOF
+$awk -f foo0 /dev/null >foo1
+cat << \EOF >foo2
+AB
+CD
+EOF
+diff foo1 foo2 || echo 'BAD: T.builtin continuation handling (backslash)' 
--- a/testdir/T.misc
+++ b/testdir/T.misc
@@ -186,6 +186,14 @@
 }' >foo1
 $awk 'END {print NR}' foo1 | grep 4 >/dev/null || echo 'BAD: T.misc abcdef fails'
 
+# Test for RS regex being reapplied
+echo aaa1a2a | $awk 1 RS='^a' >foo1
+cat << \EOF > foo2
+
+aa1a2a
+
+EOF
+diff foo1 foo2 || echo 'BAD: T.misc ^regex reapplied fails'
 
 # The following should not produce a warning about changing a constant
 # nor about a curdled tempcell list
--