ref: ffee7780fe08fa77f662a0903477545d9e26334f
parent: 91eaf7f7015ba2223e993532f5d65dfda4d1f33f
author: zoulasc <zoulasc@users.noreply.github.com>
date: Fri Feb 28 01:23:54 EST 2020
3 more fixes (#75) * LC_NUMERIC radix issue. According to https://pubs.opengroup.org/onlinepubs/7990989775/xcu/awk.html The period character is the character recognized in processing awk programs. Make it so that during output we also print the period character, since this is what other awk implementations do, and it makes sense from an interoperability point of view. * print "T.builtin" in the error message * Fix backslash continuation line handling. * Keep track of RS processing so we apply the regex properly only once per record.
--- a/lex.c
+++ b/lex.c
@@ -388,6 +388,7 @@
case '\\':
c = input();
switch (c) {+ case '\n': break;
case '"': *bp++ = '"'; break;
case 'n': *bp++ = '\n'; break;
case 't': *bp++ = '\t'; break;
--- a/lib.c
+++ b/lib.c
@@ -35,6 +35,7 @@
char EMPTY[] = { '\0' };FILE *infile = NULL;
+bool innew; /* true = infile has not been read by readrec */
char *file = EMPTY;
char *record;
int recsize = RECSIZE;
@@ -106,6 +107,7 @@
argno++;
}
infile = stdin; /* no filenames, so use stdin */
+ innew = true;
}
/*
@@ -175,7 +177,9 @@
FATAL("can't open file %s", file);setfval(fnrloc, 0.0);
}
- c = readrec(&buf, &bufsize, infile);
+ c = readrec(&buf, &bufsize, infile, innew);
+ if (innew)
+ innew = false;
if (c != 0 || buf[0] != '\0') { /* normal record */ if (isrecord) {if (freeable(fldtab[0]))
@@ -213,7 +217,7 @@
argno++;
}
-int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */
+int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
{int sep, c, isrec;
char *rr, *buf = *pbuf;
@@ -224,7 +228,14 @@
bool found;
fa *pfa = makedfa(rs, 1);
- found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+ if (newflag)
+ found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+ else {+ int tempstat = pfa->initstat;
+ pfa->initstat = 2;
+ found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+ pfa->initstat = tempstat;
+ }
if (found)
setptr(patbeg, '\0');
} else {--- a/main.c
+++ b/main.c
@@ -214,7 +214,6 @@
if (!safe)
envinit(environ);
yyparse();
- setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
if (fs)
*FS = qstring(fs, '\0');
dprintf( ("errorflag=%d\n", errorflag) );--- a/proto.h
+++ b/proto.h
@@ -122,7 +122,7 @@
extern void savefs(void);
extern int getrec(char **, int *, bool);
extern void nextfile(void);
-extern int readrec(char **buf, int *bufsize, FILE *inf);
+extern int readrec(char **buf, int *bufsize, FILE *inf, bool isnew);
extern char *getargv(int);
extern void setclvar(char *);
extern void fldbld(void);
@@ -191,7 +191,7 @@
extern Cell *printstat(Node **, int);
extern Cell *nullproc(Node **, int);
extern FILE *redirect(int, Node *);
-extern FILE *openfile(int, const char *);
+extern FILE *openfile(int, const char *, bool *);
extern const char *filename(FILE *);
extern Cell *closefile(Node **, int);
extern void closeall(void);
--- a/run.c
+++ b/run.c
@@ -405,6 +405,7 @@
char *buf;
int bufsize = recsize;
int mode;
+ bool newflag;
if ((buf = malloc(bufsize)) == NULL)
FATAL("out of memory in getline");@@ -416,12 +417,12 @@
mode = ptoi(a[1]);
if (mode == '|') /* input pipe */
mode = LE; /* arbitrary flag */
- fp = openfile(mode, getsval(x));
+ fp = openfile(mode, getsval(x), &newflag);
tempfree(x);
if (fp == NULL)
n = -1;
else
- n = readrec(&buf, &bufsize, fp);
+ n = readrec(&buf, &bufsize, fp, newflag);
if (n <= 0) {;
} else if (a[0] != NULL) { /* getline var <file */@@ -1658,7 +1659,7 @@
if (isrec(x) || strlen(getsval(x)) == 0) { flush_all(); /* fflush() or fflush("") -> all */u = 0;
- } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
+ } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
u = EOF;
else
u = fflush(fp);
@@ -1718,7 +1719,7 @@
x = execute(b);
fname = getsval(x);
- fp = openfile(a, fname);
+ fp = openfile(a, fname, NULL);
if (fp == NULL)
FATAL("can't open file %s", fname);tempfree(x);
@@ -1750,7 +1751,7 @@
files[2].mode = GT;
}
-FILE *openfile(int a, const char *us)
+FILE *openfile(int a, const char *us, bool *pnewflag)
{const char *s = us;
size_t i;
@@ -1760,11 +1761,12 @@
if (*s == '\0')
FATAL("null file name in print or getline");for (i = 0; i < nfiles; i++)
- if (files[i].fname && strcmp(s, files[i].fname) == 0) {- if (a == files[i].mode || (a==APPEND && files[i].mode==GT))
- return files[i].fp;
- if (a == FFLUSH)
- return files[i].fp;
+ if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
+ (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
+ a == FFLUSH)) {+ if (pnewflag)
+ *pnewflag = false;
+ return files[i].fp;
}
if (a == FFLUSH) /* didn't find it, so don't create it! */
return NULL;
@@ -1801,6 +1803,8 @@
files[i].fname = tostring(s);
files[i].fp = fp;
files[i].mode = m;
+ if (pnewflag)
+ *pnewflag = true;
if (fp != stdin && fp != stdout && fp != stderr)
(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
}
--- a/testdir/T.builtin
+++ b/testdir/T.builtin
@@ -35,6 +35,9 @@
$awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}') >foo1echo 'dürst|DÜRST|Dürst' >foo2
diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower) for utf-8'
+ (export LC_NUMERIC=de_DE.UTF-8 && $awk 'BEGIN { print 0.01 }' /dev/null) >foo1+ echo "0.01" >foo2
+ diff foo1 foo2 || echo 'BAD: T.builtin LC_NUMERIC radix (.) handling'
fi
$awk 'BEGIN {@@ -70,3 +73,18 @@
3' >foo1
$awk '{ n = split($0, x); print length(x) }' <foo0 >foo2diff foo1 foo2 || echo 'BAD: T.builtin length array'
+
+# Test for backslash handling
+cat << \EOF >foo0
+BEGIN {+ print "A\
+B";
+ print "CD"
+}
+EOF
+$awk -f foo0 /dev/null >foo1
+cat << \EOF >foo2
+AB
+CD
+EOF
+diff foo1 foo2 || echo 'BAD: T.builtin continuation handling (backslash)'
--- a/testdir/T.misc
+++ b/testdir/T.misc
@@ -186,6 +186,14 @@
}' >foo1
$awk 'END {print NR}' foo1 | grep 4 >/dev/null || echo 'BAD: T.misc abcdef fails'+# Test for RS regex being reapplied
+echo aaa1a2a | $awk 1 RS='^a' >foo1
+cat << \EOF > foo2
+
+aa1a2a
+
+EOF
+diff foo1 foo2 || echo 'BAD: T.misc ^regex reapplied fails'
# The following should not produce a warning about changing a constant
# nor about a curdled tempcell list
--
⑨