shithub: kwa

Download patch

ref: 2af227c88bbcf8d6c52eb5b3b3a677d88a281774
parent: c153970394a8b04aa4eca9dee54d3a55e51606c9
author: qwx <qwx@sciops.net>
date: Mon Sep 29 06:13:49 EDT 2025

add more tests, fix typos

--- a/test/T.expr
+++ b/test/T.expr
@@ -14,7 +14,7 @@
 	sub(/try /, "")
 	prog = $0
 	printf("%3d  %s\n", nt, prog)
-	prog = sprintf("%s -F\"\\t\" ''%s''", awk, prog)
+	prog = sprintf("%s -F''\\t'' ''%s''", awk, prog)
 	# print "prog is", prog
 	nt2 = 0
 	while (getline > 0) {
@@ -31,7 +31,7 @@
 			output = sprintf("echo ''%s'' >foo2; ", $NF)
 		gsub(/\\t/, "\t", output)
 		gsub(/\\n/, "\n", output)
-		run = sprintf("cmp foo1 foo2 || echo test %d.%d failed",
+		run = sprintf("cmp foo1 foo2 || echo test %d.%d failed",
 			nt, ++nt2)
 		# print  "input is", input
 		# print  "test is", test
--- /dev/null
+++ b/test/T.func
@@ -1,0 +1,195 @@
+#!/bin/rc
+echo T.func: test user-defined functions
+
+echo '10 2
+2 10
+10 10
+10 1e1
+1e1 9' | $awk '
+# tests whether function returns sensible type bits
+
+function assert(cond) { # assertion
+    if (cond) print 1; else print 0
+}
+
+function i(x) { return x }
+
+{ m=$1; n=i($2); assert(m>n) }
+' >foo1
+echo '1
+0
+0
+0
+1' >foo2
+diff foo1 foo2 || echo 'BAD: T.func (function return type)'
+
+echo 'data: data' >foo1
+$awk '
+function test1(array) { array["test"] = "data" }
+function test2(array) { return(array["test"]) }
+BEGIN { test1(foo); print "data: " test2(foo) }
+' >foo2
+diff foo1 foo2 || echo 'BAD: T.func (array type)'
+
+$awk '
+BEGIN	{ code() }
+END	{ codeout("x") }
+function code() { ; }
+function codeout(ex) { print ex }
+' /dev/null >foo1
+echo x >foo2
+diff foo1 foo2 || echo 'BAD: T.func (argument passing)'
+
+$awk '
+BEGIN { unireghf() }
+
+function unireghf(hfeed) {
+	hfeed[1]=0
+	rcell("foo",hfeed)
+	hfeed[1]=0
+	rcell("bar",hfeed)
+}
+
+function rcell(cellname,hfeed) {
+	print cellname
+}
+' >foo1
+echo 'foo
+bar' >foo2
+diff foo1 foo2 || echo 'BAD: T.func (convert arg to array)'
+
+$awk '
+function f(n) {
+	if (n <= 1)
+		return 1
+	else
+		return n * f(n-1)
+}
+{ print f($1) }
+' <<! >foo2
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+!
+cat <<! >foo1
+1
+1
+2
+6
+24
+120
+720
+5040
+40320
+362880
+!
+diff foo1 foo2 || echo 'BAD: T.func (factorial)'
+
+$awk '
+function ack(m,n) {
+	k = k+1
+	if (m == 0) return n+1
+	if (n == 0) return ack(m-1, 1)
+	return ack(m-1, ack(m, n-1))
+}
+{ k = 0; print ack($1,$2), "(" k " calls)" }
+' <<! >foo2
+0 0
+1 1
+2 2
+3 3
+3 4
+3 5
+!
+cat <<! >foo1
+1 (1 calls)
+3 (4 calls)
+7 (27 calls)
+61 (2432 calls)
+125 (10307 calls)
+253 (42438 calls)
+!
+diff foo1 foo2 || echo 'BAD: T.func (ackermann)'
+
+$awk '
+END { print "end" }
+{ print fib($1) }
+function fib(n) {
+	if (n <= 1) return 1
+	else return add(fib(n-1), fib(n-2))
+}
+function add(m,n) { return m+n }
+BEGIN { print "begin" }
+' <<! >foo2
+1
+3
+5
+10
+!
+cat <<! >foo1
+begin
+1
+3
+8
+89
+end
+!
+diff foo1 foo2 || echo 'BAD: T.func (fib)'
+
+$awk '
+function foo() {
+	for (i = 1; i <= 2; i++)
+		return 3
+	print "should not see this"
+}
+BEGIN { foo(); exit }
+' >foo1
+grep 'should not' foo1 && echo 'BAD: T.func (return)'
+
+# this exercises multiple free of temp cells
+echo 'eqn
+eqn2' >foo1
+$awk 'BEGIN 	{ eprocess("eqn", "x", contig) 
+	  process("tbl" )
+	  eprocess("eqn" "2", "x", contig) 
+	}
+function eprocess(file, first, contig) {
+	print file
+}
+function process(file) {
+	close(file)
+}' >foo2
+diff foo1 foo2 || echo 'BAD: T.func (eqn)'
+
+echo 1 >foo1
+$awk 'function f() { n = 1; exit }
+	BEGIN { n = 0; f(); n = 2 }; END { print n}' >foo2
+diff foo1 foo2 || echo 'BAD: T.func (exit in function)'
+
+echo 1 >foo1
+$awk '
+BEGIN {	n = 10
+	for (i = 1; i <= n; i++)
+	for (j = 1; j <= n; j++)
+		x[i,j] = n * i + j
+	for (i = 1; i <= n; i++)
+	for (j = 1; j <= n; j++)
+		if ((i,j) in x)
+			k++
+	print (k == n^2)
+      }
+' >foo2
+diff foo1 foo2 || echo 'BAD: T.func (multi-dim subscript)'
+
+echo '<> 0' >foo1
+$awk '
+function foo() { i = 0 }
+        BEGIN { x = foo(); printf "<%s> %d\n", x, x }' >foo2
+diff foo1 foo2 || echo 'BAD: T.func (fall off end)'
--- /dev/null
+++ b/test/T.misc
@@ -1,0 +1,536 @@
+#!/bin/rc
+echo T.misc: miscellaneous buglets now watched for
+
+echo 'The big brown over the lazy doe
+The big brown over the lazy dog
+x
+The big brown over the lazy dog' >foo
+echo 'failed
+succeeded
+failed
+succeeded' >foo1
+$awk '{ if (match($0, /^The big brown over the lazy dog/) == 0) {
+		printf("failed\n")
+	} else {
+		printf("succeeded\n")
+	}
+} ' foo >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc ghosh RE bug'
+
+echo '123
+1234567890
+12345678901' >foo
+echo '12345678901' >foo1
+$awk 'length($0) > 10' foo >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc last number bug'
+
+# check some \ sequences in strings (ascii)
+echo HIJKL >foo1
+echo foo | $awk '{ print "H\x49\x4a\x4BL" }' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc hex string cvt'
+
+echo 012x45 >foo1
+$awk 'BEGIN { print "0\061\62x\0645" }' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc oct string cvt'
+
+# $i++ means ($i)++
+echo 3 5 | $awk '{ i = 1; print $i++ ; print $1, i }' >foo1
+echo '3
+4 1' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc bad field increment'
+
+# makes sure that fields are recomputed even if self-assignment
+# take into account that subtracting from NF now rebuilds the record
+echo 'a b c
+s p q r
+x y z' >foo
+echo 'a
+s p
+x' >foo1
+$awk '{ NF -= 2; $1 = $1; print }' <foo >foo2
+diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad field self-assignment'
+
+echo '1
+1' >foo1
+$awk 'BEGIN {x = 1; print x; x = x; print x}' >foo2
+diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad self-assignment'
+
+echo 573109312 | $awk '{print $1*4}' >foo1
+echo 2292437248 >foo2
+diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad overflow'
+
+# note that there are 8-bit characters in the echo
+# some shells will probably screw this up.
+echo '#
+code € 1
+code € 2' |
+$awk '/^#/' >foo1
+echo '#' >foo2
+diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad match of 8-bit char'
+
+echo hello |
+$awk 'BEGIN	{ FILENAME = "/lib/ucd/UnicodeData.txt" }
+	{ print $0 }' >/dev/null
+if(! ~ $status '')
+	echo >[1=2] 'BAD: T.misc /lib/ucd/UnicodeData.txt dropped core'
+
+echo hello |
+$awk '  function foo(foo) {
+                foo = 1
+                foo()
+        }
+	{ foo(bar) }
+' >/dev/null >[2=1]
+if(! ~ $status '')
+	echo >[1=2] 'BAD: T.misc function foo(foo) dropped core'
+
+echo '2
+10' |
+$awk '{ x[NR] = $0 }	# test whether $0 is NUM as well as STR
+END { if (x[1] > x[2]) print "BAD: T.misc: $0 is not NUM" }'
+
+
+$awk 'BEGIN {
+	npad = substr("alexander" "           ",1,15)
+	print npad
+	}' >foo
+grep '\\' foo && echo >[1=2] 'BAD: T.misc alexander fails'
+
+# This should give an error about function arguments
+$awk '
+function foo(x) { print "x is" x }
+BEGIN { foo(foo) }
+' >[2]foo
+grep 'can''t use function foo' foo >/dev/null || echo >[1=2] 'BAD: T.misc fcn args'
+
+
+# gawk defref test; should give error about undefined function
+$awk 'BEGIN { foo() }' >[2]foo
+grep 'calling undefined function foo' foo >/dev/null || echo >[1=2] 'BAD: T.misc undefined function'
+
+
+# gawk arrayparm test; should give error about function 
+$awk '
+BEGIN {
+    foo[1]=1;
+    foo[2]=2;
+    bug1(foo);
+}
+function bug1(i) {
+    for (i in foo) {
+	bug2(i);
+	delete foo[i];
+	print i,1,bot[1];
+    }
+}
+function bug2(arg) {
+    bot[arg]=arg;
+}
+' >[2]foo
+grep 'can.t assign to foo' foo >/dev/null || echo >[1=2] 'BAD: T.misc foo bug'
+
+
+# This should be a syntax error
+$awk '
+!x = y
+' >[2]foo
+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error !x=y fails'
+
+# This should print bbb
+$awk '
+BEGIN { up[1] = "a"
+	for (i in up) gsub("a", "A", x)
+	print x x "bbb"
+	exit
+      }
+' >foo
+grep bbb foo >/dev/null || echo >[1=2] 'BAD: T.misc gsub failed'
+
+echo yes |
+$awk '
+BEGIN {
+	printf "push return" >"/dev/null"
+	getline ans <"/dev/null"
+} '
+if(! ~ $status '')
+	echo >[1=2] 'BAD: T.misc getline ans dropped core'
+
+$awk 'BEGIN { unireghf() }
+function unireghf(hfeed) { hfeed[1] = 0 }'
+if(! ~ $status '')
+	echo >[1=2] 'BAD: T.misc unireghf dropped core'
+
+echo x | $awk '/[/]/' >[2]foo
+grep 'nonterminated character class' foo >/dev/null || error 'BAD: T.misc nonterminated fails'
+if(! ~ $status '')
+	echo >[1=2] 'BAD: T.misc nonterminated dropped core'
+
+$awk '
+function f() { return 12345 }
+BEGIN { printf "<%s>\n", f() }
+' >foo
+grep '<12345>' foo >/dev/null || echo 'BAD: T.misc <12345> fails'
+
+echo 'abc
+def
+
+ghi
+jkl' >foo
+$awk '
+BEGIN {	RS = ""
+	while (getline <"foo")
+		print
+}' >foo1
+$awk 'END {print NR}' foo1 | grep 4 >/dev/null || echo 'BAD: T.misc abcdef fails'
+
+# Test for RS regex matching an empty record at EOF
+echo a | $awk 1 'RS=a\n' > foo1
+cat << 'EOF' > foo2
+
+EOF
+diff foo1 foo2 || echo 'BAD: T.misc RS regex matching an empty record at EOF fails'
+
+# Test for RS regex being reapplied
+echo aaa1a2a | $awk 1 'RS=^a' >foo1
+cat << 'EOF' > foo2
+
+aa1a2a
+
+EOF
+diff foo1 foo2 || echo 'BAD: T.misc ^regex reapplied fails'
+
+# ^-anchored RS matching should be active at the start of each input file
+tee foo1 foo2 >foo3 << 'EOF'
+aaa
+EOF
+$awk 1 'RS=^a' foo1 foo2 foo3 >foo4
+cat << 'EOF' > foo5
+
+aa
+
+
+aa
+
+
+aa
+
+EOF
+diff foo4 foo5 || echo 'BAD: T.misc ^RS matches the start of every input file fails'
+
+# The following should not produce a warning about changing a constant
+# nor about a curdled tempcell list
+$awk 'function f(x) { x = 2 }
+BEGIN { f(1) }' >foo
+grep '^' foo && echo 'BAD: test constant change fails'
+
+# The following should not produce a warning about a curdled tempcell list
+$awk 'function f(x) { x }
+BEGIN { f(1) }' >foo
+grep '^' foo && echo 'BAD: test tempcell list fails'
+
+$awk 'BEGIN { print 9, a=10, 11; print a; exit }' >foo1
+echo '9 10 11
+10' >foo2
+diff foo1 foo2 || echo 'BAD: T.misc (embedded expression)'
+
+echo 'abc defgh ijkl' | $awk '
+  { $1 = ""; line = $0; print line; print $0; $0 = line; print $0 }' >foo1
+echo ' defgh ijkl
+ defgh ijkl
+ defgh ijkl' >foo2
+diff foo1 foo2 || echo 'BAD: T.misc (assignment to $0)'
+
+$awk '
+function min(a, b)
+{
+	if (a < b)
+		return a
+	else
+		return b
+}
+BEGIN { exit }
+'
+if(! ~ $status '')
+	echo >[1=2] 'BAD: T.misc function min dropped core'
+
+# The following should not give a syntax error message:
+$awk '
+function expand(chart) {
+	getline chart < "CHAR.ticks"
+}
+' >foo
+grep '^' foo >/dev/null && echo 'BAD: T.misc expand error'
+
+$awk 'BEGIN { print 1e40 }' >/dev/null
+if(! ~ $status '')
+	echo >[1=2] 'BAD: T.misc 1E40 dropped core'
+
+# The following syntax error should not dump core:
+$awk '
+$NF==3	{first=1}
+$NF==2 && first==0 && (abs($1-o1)>120||abs($2-o2)>120)	{print $0}
+$NF==2	{o1=%1; o2=$2; first=0}
+' >[2]/dev/null
+if(! ~ $status '')
+	echo >[1=2] 'BAD: T.misc first/abs dropped core'
+
+# The following syntax error should not dump core:
+$awk '{ n = split($1, address, !); print address[1] }' >[2]foo
+grep 'illegal statement' foo >/dev/null || echo 'BAD: T.misc split error'
+if(! ~ $status '')
+	echo >[1=2] 'BAD: T.misc split! dropped core'
+
+# The following should cause a syntax error message
+$awk 'BEGIN {"hello"}' >[2]foo
+grep 'illegal statement' foo >/dev/null || echo 'BAD: T.misc hello error'
+
+# The following should give a syntax error message:
+$awk '
+function pile(c,     r) {
+	r = ++pile[c]
+}
+
+{ pile($1) }
+' >[2]foo
+grep 'context is' foo >/dev/null || echo 'BAD: T.misc pile error'
+
+# This should complain about missing atan2 argument:
+$awk 'BEGIN { atan2(1) }' >[2]foo
+grep 'requires two arg' foo >/dev/null || echo 'BAD: T.misc atan2 error'
+
+# This should not core dump:
+$awk 'BEGIN { f() }
+function f(A) { delete A[1] }
+'
+if(! ~ $status '')
+	echo >[1=2] 'BAD: T.misc delete dropped core'
+
+# nasty one:  should not be able to overwrite constants
+$awk 'BEGIN { gsub(/ana/,"anda","banana")
+		printf "the monkey ate a %s\n", "banana" }
+' >/dev/null >[2]foo
+grep 'syntax error' foo >/dev/null || echo 'BAD: T.misc gsub banana error'
+
+# nasty one:  should not be able to overwrite constants
+$awk 'BEGIN { sub(/ana/,"anda","banana")
+		printf "the monkey ate a %s\n", "banana" }
+' >/dev/null >[2]foo
+grep 'syntax error' foo >/dev/null || echo 'BAD: T.misc sub banana error'
+
+# line numbers used to double-count comments
+$awk '#
+#
+#
+/x
+' >/dev/null >[2]foo
+grep 'line [45]' foo >/dev/null || echo 'BAD: T.misc lineno'
+
+echo 'x
\y' >foo1
+$awk 'BEGIN { print "x\f\r\b\v\a\\y" }' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc weird chars'
+
+echo 0 >foo1
+$awk '	BEGIN { exit }
+	{ print }
+	END { print NR }' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc BEGIN exit'
+
+echo 1 >foo1
+$awk '	{ exit }
+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit'
+
+echo 1 >foo1
+$awk '	{i = 1; while (i <= NF) {if (i == NF) exit; i++ } }
+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 2'
+
+echo 1 >foo1
+$awk '	function f() {
+		i = 1; while (i <= NF) {if (i == NF) return NR; i++ }
+	}
+	{ if (f() == 1) exit }
+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc while return'
+
+echo 1 >foo1
+$awk '	function f() {
+		split("a b c", arr)
+		for (i in arr) {if (i == 3) return NR; i++ }
+	}
+	{ if (f() == 1) exit }
+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc while return'
+
+echo 1 >foo1
+$awk '	{i = 1; do { if (i == NF) exit; i++ } while (i <= NF) }
+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 3'
+
+echo 1 >foo1
+$awk '	function f() {
+		i = 1; do { if (i == NF) return NR; i++ } while (i <= NF)
+	}
+	{ if (f() == 1) exit }
+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc do return'
+
+echo 1 >foo1
+$awk '	{i = 1; do { if (i == NF) break; i++ } while (i <= NF); exit }
+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 4'
+
+echo 1 >foo1
+$awk '	{ n = split($0, x)
+	  for (i in x) {
+	 	if (i == 1)
+			exit } }
+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 5'
+
+echo XXXXXXXX >foo1
+$awk 'BEGIN { s = "ab\fc\rd\be"
+	t = s; 	gsub("[" s "]", "X", t); print t }' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc weird escapes in char class'
+
+$awk '{}' /lib/ucd/UnicodeData.txt glop/glop >foo >[2]foo2
+grep 'can''t open.*glop' foo2 >/dev/null || echo 'BAD: T.misc can''t open'
+
+echo '
+
+
+a
+aa
+
+b
+
+
+c
+
+' >foo
+echo 3 >foo1
+$awk 'BEGIN { RS = "" }; END { print NR }' foo >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc RS botch'
+
+$awk 'BEGIN \
+	{
+		print "hello, world"
+	}
+}}}' >foo1 >[2]foo2
+grep 'source line 5' foo2 >/dev/null >[2=1] || echo 'BAD: T.misc continuation line number'
+
+$awk 'BEGIN {
+	if () {
+		print "foo"
+	}
+}' >foo1 >[2]foo2
+grep 'syntax error at source line 2' foo2 >/dev/null >[2=1] || echo 'BAD: T.misc syntax error line number'
+
+echo 111 222 333 >foo
+$awk '{ f[1]=1; f[2]=2; print $f[1], $f[1]++, $f[2], f[1], f[2] }' foo >foo2
+echo 111 111 222 2 2 >foo1
+cmp -s foo1 foo2 || echo 'BAD: T.misc $f[1]++'
+
+
+# These should be syntax errors
+$awk . >[2]foo
+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error . fails'
+
+$awk .. >[2]foo
+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error .. fails'
+
+$awk .E. >[2]foo
+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error .E. fails'
+
+$awk .++. >[2]foo
+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error .++. fails'
+
+
+
+# These should be syntax errors
+$awk '$' >[2]foo
+grep 'unexpected' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error $ fails'
+
+$awk '{print $' >[2]foo
+grep 'unexpected' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error $2 fails'
+
+$awk '"' >[2]foo
+grep 'non-terminated' foo >/dev/null || echo >[1=2] 'BAD: T.misc bare quote fails'
+
+
+# %c of 0 is explicit null byte
+
+echo '3' >foo1
+$awk 'BEGIN {printf("%c%c\n", 0, 0) }' | wc | $awk '{print $3}' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc null byte'
+
+# non-terminated RE
+
+# FIXME
+#$awk /xyz >foo >[2=1]
+grep 'non-terminated' foo >/dev/null || echo >[1=2] 'BAD: T.misc non-terminated RE'
+
+# next several were infinite loops, found by brian tsang.
+# this is his example:
+
+# FIXME
+#$awk 'BEGIN {
+#    switch (substr("x",1,1)) {
+#    case /ask.com/:
+#	break
+#    case "google":
+#	break
+#    }
+#}' >foo >[2=1]
+#grep 'illegal statement' foo >/dev/null || echo >[1=2] 'BAD: T.misc looping syntax error 1'
+
+#$awk 'BEGIN { s { c /./ } }' >foo >[2=1]
+#grep 'illegal statement' foo >/dev/null || echo >[1=2] 'BAD: T.misc looping syntax error 2'
+
+#$awk 'BEGIN { s { c /../ } }' >foo >[2=1]
+#grep 'illegal statement' foo >/dev/null || echo >[1=2] 'BAD: T.misc looping syntax error 3'
+
+$awk 'BEGIN {printf "%2$s %1$s\n", "a", "b"}' >foo >[2=1]
+grep '''$'' not permitted in awk formats' foo >/dev/null || echo >[1=2] 'BAD: T.misc ''$'' not permitted in formats'
+
+echo 'a
+b c
+de fg hi' >foo0
+$awk 'END { print NF, $0 }' foo0 >foo1
+awk '{ print NF, $0 }' foo0| tail -1 >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc END must preserve $0'
+
+echo 'fg hi' >foo0
+$awk 'END { print NF, $0 }' foo0 >foo1
+awk '{ print NF, $0 }' foo0| tail -1 >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc END must preserve $0'
+
+echo '' >foo0
+$awk 'END { print NF, $0 }' foo0 >foo1
+awk '{ print NF, $0 }' foo0| tail -1 >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc END must preserve $0'
+
+# Check for nonzero exit status on I/O error.
+#echo 'E 2' >foo1
+#(trap '' PIPE; "$awk" 'BEGIN { print "hi"; }' >[2]/dev/null; echo "E $?" >foo2) | :
+#cmp -s foo1 foo2 || echo 'BAD: T.misc exit status on I/O error'
+
+# Check for clobbering of the lexer's regular expression buffer.
+# If the output is "a1" instead of "1b", /b/ clobbered /a/.
+echo 1b >foo1
+echo ab | $awk '{ sub(/a/, "b" ~ /b/); print }' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc lexer regex buffer clobbered'
+
+# Check handling of octal \OOO and hex \xHH esc. seqs. in strings.
+echo 'hello888
+hello
+hello
+helloxGOO
+hello
+0A' > foo1
+$awk 'BEGIN { print "hello\888" }'   > foo2
+$awk 'BEGIN { print "hello\x000A" }' >> foo2
+$awk 'BEGIN { printf "hello\x0A" }'  >> foo2
+$awk 'BEGIN { print "hello\xGOO" }'  >> foo2
+$awk 'BEGIN { print "hello\x0A0A" }' >> foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc escape sequences in strings mishandled'
--- /dev/null
+++ b/test/T.overflow
@@ -1,0 +1,86 @@
+#!/bin/rc
+echo T.overflow: test some overflow conditions
+
+$awk 'BEGIN {
+ 	for (i = 0; i < 1000; i++) printf("abcdefghijklmnopqsrtuvwxyz")
+ 	printf("\n")
+ 	exit
+}' >foo1
+$awk '{print}' foo1 >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.overflow record 1'
+
+echo 'abcdefghijklmnopqsrtuvwxyz' >foo1
+echo hello | $awk '
+ { for (i = 1; i < 500; i++) s = s "abcdefghijklmnopqsrtuvwxyz "
+   $0 = s
+   print $1
+ }'  >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.overflow abcdef'
+
+# default input record 3072, fields 200:
+$awk '
+BEGIN {
+	for (j = 0; j < 2; j++) {
+		for (i = 0; i < 500; i++)
+			printf(" 123456789")
+		printf("\n");
+	}
+} ' >foo1
+$awk '{$1 = " 123456789"; print}' foo1 >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.overflow -mr -mf set $1'
+
+$awk '
+BEGIN {
+	for (j = 0; j < 2; j++) {
+		for (i = 0; i < 500; i++)
+			printf(" 123456789")
+		printf("\n");
+	}
+} ' >foo
+$awk  '{print NF}' foo >foo1
+echo '500
+500' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.overflow -mr -mf NF'
+
+rm -f core
+# this should not drop core
+$awk 'BEGIN {
+	for (i = 1; i < 1000; i++) s = s "a-z"
+	if ("x" ~ "[" s "]")
+		print "ugh"
+}' >foo >[2]foo
+test -r core && echo >[1=2] "BAD: T.overflow too long char class dropped core"
+
+echo 4000004 >foo1
+$awk '
+BEGIN {
+	x1 = sprintf("%1000000s\n", "hello")
+	x2 = sprintf("%-1000000s\n", "world")
+	x3 = sprintf("%1000000.1000000s\n", "goodbye")
+	x4 = sprintf("%-1000000.1000000s\n", "goodbye")
+	print length(x1 x2 x3 x4)
+}' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.overflow huge sprintfs'
+
+echo 0 >foo1
+$awk '
+BEGIN {
+	for (i = 0; i < 100000; i++)
+		x[i] = i
+	for (i in x)
+		delete x[i]
+	n = 0
+	for (i in x)
+		n++
+	print n
+}' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.overflow big array'
+
+echo x >foo1
+$awk '{print $40000000000000}' <foo1 >foo2 >[2]foo
+grep 'out of range field' foo >/dev/null || echo >[1=2] 'BAD: T.overflow \$400000'
+
+rm foo*
+$awk 'BEGIN { for (i=1; i <= 1000; i++) print i >("foo" i) }'
+ls foo* | grep '1000' >/dev/null || echo >[1=2] 'BAD: T.overflow openfiles'
+rm foo*
--- a/test/T.sub
+++ b/test/T.sub
@@ -28,7 +28,7 @@
 	gsubout = $5
 }
 NF < 5 {	# weird input line
-	printf("weird test spec `%s` ignored\n", $0) | "cat 1>&2"
+	printf("weird test spec `%s` ignored\n", $0) | "cat >[1=2]"
 	next
 }
 {		# "" => explicitly empty
--- /dev/null
+++ b/test/T.utfre
@@ -1,0 +1,234 @@
+#!/bin/rc
+echo T.utfre: tests of regular expression code for Unicode/utf-8
+# adapted from T.re
+
+awk '
+BEGIN {
+	FS = "\t"
+	awk = ENVIRON["awk"]
+}
+NF == 0 {
+	next
+}
+$1 != "" {	# new test
+	re = $1
+}
+$2 != "" {	# either ~ or !~
+	op = $2
+	if (op == "~")
+		neg = "!"
+	else if (op == "!~")
+		neg = ""
+}
+$3 != "" {	# new test string
+	str = $3
+}
+$3 == "\"\"" {	# explicit empty line
+	$3 = ""
+}
+NF > 2 {	# generate a test
+	input = $3
+	test = sprintf("echo ''%s'' | %s ''%s/%s/ {print \"%d fails %s %s %s\"}''",
+		input, awk, neg, re, NR, re, op, input)
+	# printf(" %3d   %s %s %s:\n", NR, re, op, input)
+	# print "test is |" test "|"
+	system(test)
+	nt++
+}
+END { print "	" nt, "tests" }
+' <<'!!!!'
+	~	🖕
+		🖕🖕
+		🖕🖕🖕
+		""
+🖕	~	🖕
+		b🖕
+		b🖕b
+	!~	""
+		时
+		xxxxx
+.	~	时
+		x时x
+		🙂
+	!~	""			
+.の	~	xの
+		xxの
+		xのx
+	!~	の
+		のx
+		""
+$	~	x
+		🙂
+		""
+.$	~	모
+		xx모
+		x모x
+	!~	""
+д$	~	д
+		bд
+		bbbд
+	!~	дb
+		x
+		""
+^	~	и
+		""
+		^
+^λ$	~	λ
+	!~	xλ
+		λx
+		xλx
+		""
+^λ.$	~	λx
+		λλ
+	!~	xλ
+		λλλ
+		λxy
+		""
+^$	~	""
+	!~	に
+		^
+^.해	~	め해
+		め해해
+	!~	해
+		""
+^.*해	~	해
+		め해
+		めめめめめめ해
+	!~	""
+^.+해	~	め해
+		めめめめめめ해
+	!~	""
+		해
+		해め
+해*	~	""
+		해
+		해해해해
+		め해
+		めめめめ
+해해*	~	해
+		해해해
+		め해
+	!~	めめめめ
+		""
+\$	~	🖕$
+		$
+		$🖕
+		🖕$🖕
+	!~	""
+		🖕
+\.	~	.
+	!~	🖕
+		""
+xθ+y	~	xθy
+		xθθy
+		xθθθθθθy
+	!~	θy
+		xy
+		xθ
+xθ?y	~	xy
+		xθy
+	!~	xθθy
+θ?b?の?	~	""
+		x
+^a?b?め	~	め
+		aめ
+		bめ
+		abめ
+		めa
+	!~	""
+		ab
+		aba			
+[Α-Ω]	~	Α
+		aΔb
+		xyΩ
+	!~	abc
+		β
+		""
+[^Α-Ω]	~	δ
+		aΔb
+		xyΩ
+	!~	Α
+		Δ
+		""
+[Α-ΔΦ-Ω]	~	Α
+		Β
+		Δ
+		Φ
+		Ω
+	!~	Π
+		Σ
+Π[[:lower:]]+	~	Πa
+		Πab
+	!~	Π
+		ΠX
+		Π:
+		Π[
+		Π]
+の[0-9]+に	~	の0に
+		の23に
+		の12345に
+	!~	0に
+		のに
+の[0-9]?に	~	のに
+		の1に
+	!~	の23に
+の[[]に	~	の[に
+	!~	のに
+		の[[]に
+		の]に
+の[[-]に	~	の[に
+		の-に
+	!~	のに
+		の[[]に
+		の]に
+の[[-a]に	~	の[に
+		のaに
+		の]に
+	!~	のに
+		の[[]に
+		の-に
+の[]-a]に	~	の]に
+		のaに
+	!~	のに
+		の[に
+		の-に
+の[]]に	~	の]に
+	!~	のに
+		の[]]に
+		の[に
+の[^[]に	~	のaに
+	!~	の[に
+の[-]に	~	の-に
+	!~	のに
+		の+に
+の[^-]に	~	の+に
+	!~	の-に
+		のに
+の[][]に	~	の[に
+		の]に
+	!~	のに
+		の][に
+		の[]に
+の[z-a]に	~	のに
+	!~	の
+		に
+		のaに
+		のzに
+		の-に
+に|だ	~	だ
+		に
+		だに
+	!~	a
+		""
+^στο|τους$	~	στο
+		στοd
+		aτους
+		τους
+	!~	xστο
+		τουςa
+^(στο|τους)$	~	στο
+		τους
+	!~	στοτους
+		στοx
+		cτους
+!!!!
--