shithub: kwa

--- a/test/T.expr

+++ b/test/T.expr

@@ -14,7 +14,7 @@

 	sub(/try /, "")

 	prog = $0

 	printf("%3d  %s\n", nt, prog)

-	prog = sprintf("%s -F\"\\t\" ''%s''", awk, prog)

+	prog = sprintf("%s -F''\\t'' ''%s''", awk, prog)

 	# print "prog is", prog

 	nt2 = 0

 	while (getline > 0) {

@@ -31,7 +31,7 @@

 			output = sprintf("echo ''%s'' >foo2; ", $NF)

 		gsub(/\\t/, "\t", output)

 		gsub(/\\n/, "\n", output)

-		run = sprintf("cmp foo1 foo2 || echo test %d.%d failed",

+		run = sprintf("cmp foo1 foo2 || echo test %d.%d failed",

 			nt, ++nt2)

 		# print  "input is", input

 		# print  "test is", test

--- /dev/null

+++ b/test/T.func

@@ -1,0 +1,195 @@

+#!/bin/rc

+echo T.func: test user-defined functions

+echo '10 2

+2 10

+10 10

+10 1e1

+1e1 9' | $awk '

+# tests whether function returns sensible type bits

+function assert(cond) { # assertion

+    if (cond) print 1; else print 0

+}

+function i(x) { return x }

+{ m=$1; n=i($2); assert(m>n) }

+' >foo1

+echo '1

+0

+0

+0

+1' >foo2

+diff foo1 foo2 || echo 'BAD: T.func (function return type)'

+echo 'data: data' >foo1

+$awk '

+function test1(array) { array["test"] = "data" }

+function test2(array) { return(array["test"]) }

+BEGIN { test1(foo); print "data: " test2(foo) }

+' >foo2

+diff foo1 foo2 || echo 'BAD: T.func (array type)'

+$awk '

+BEGIN	{ code() }

+END	{ codeout("x") }

+function code() { ; }

+function codeout(ex) { print ex }

+' /dev/null >foo1

+echo x >foo2

+diff foo1 foo2 || echo 'BAD: T.func (argument passing)'

+$awk '

+BEGIN { unireghf() }

+function unireghf(hfeed) {

+	hfeed[1]=0

+	rcell("foo",hfeed)

+	hfeed[1]=0

+	rcell("bar",hfeed)

+}

+function rcell(cellname,hfeed) {

+	print cellname

+}

+' >foo1

+echo 'foo

+bar' >foo2

+diff foo1 foo2 || echo 'BAD: T.func (convert arg to array)'

+$awk '

+function f(n) {

+	if (n <= 1)

+		return 1

+	else

+		return n * f(n-1)

+}

+{ print f($1) }

+' <<! >foo2

+0

+1

+2

+3

+4

+5

+6

+7

+8

+9

+!

+cat <<! >foo1

+1

+1

+2

+6

+24

+120

+720

+5040

+40320

+362880

+!

+diff foo1 foo2 || echo 'BAD: T.func (factorial)'

+$awk '

+function ack(m,n) {

+	k = k+1

+	if (m == 0) return n+1

+	if (n == 0) return ack(m-1, 1)

+	return ack(m-1, ack(m, n-1))

+}

+{ k = 0; print ack($1,$2), "(" k " calls)" }

+' <<! >foo2

+0 0

+1 1

+2 2

+3 3

+3 4

+3 5

+!

+cat <<! >foo1

+1 (1 calls)

+3 (4 calls)

+7 (27 calls)

+61 (2432 calls)

+125 (10307 calls)

+253 (42438 calls)

+!

+diff foo1 foo2 || echo 'BAD: T.func (ackermann)'

+$awk '

+END { print "end" }

+{ print fib($1) }

+function fib(n) {

+	if (n <= 1) return 1

+	else return add(fib(n-1), fib(n-2))

+}

+function add(m,n) { return m+n }

+BEGIN { print "begin" }

+' <<! >foo2

+1

+3

+5

+10

+!

+cat <<! >foo1

+begin

+1

+3

+8

+89

+end

+!

+diff foo1 foo2 || echo 'BAD: T.func (fib)'

+$awk '

+function foo() {

+	for (i = 1; i <= 2; i++)

+		return 3

+	print "should not see this"

+}

+BEGIN { foo(); exit }

+' >foo1

+grep 'should not' foo1 && echo 'BAD: T.func (return)'

+# this exercises multiple free of temp cells

+echo 'eqn

+eqn2' >foo1

+$awk 'BEGIN 	{ eprocess("eqn", "x", contig)

+	  process("tbl" )

+	  eprocess("eqn" "2", "x", contig)

+	}

+function eprocess(file, first, contig) {

+	print file

+}

+function process(file) {

+	close(file)

+}' >foo2

+diff foo1 foo2 || echo 'BAD: T.func (eqn)'

+echo 1 >foo1

+$awk 'function f() { n = 1; exit }

+	BEGIN { n = 0; f(); n = 2 }; END { print n}' >foo2

+diff foo1 foo2 || echo 'BAD: T.func (exit in function)'

+echo 1 >foo1

+$awk '

+BEGIN {	n = 10

+	for (i = 1; i <= n; i++)

+	for (j = 1; j <= n; j++)

+		x[i,j] = n * i + j

+	for (i = 1; i <= n; i++)

+	for (j = 1; j <= n; j++)

+		if ((i,j) in x)

+			k++

+	print (k == n^2)

+      }

+' >foo2

+diff foo1 foo2 || echo 'BAD: T.func (multi-dim subscript)'

+echo '<> 0' >foo1

+$awk '

+function foo() { i = 0 }

+        BEGIN { x = foo(); printf "<%s> %d\n", x, x }' >foo2

+diff foo1 foo2 || echo 'BAD: T.func (fall off end)'

--- /dev/null

+++ b/test/T.misc

@@ -1,0 +1,536 @@

+#!/bin/rc

+echo T.misc: miscellaneous buglets now watched for

+echo 'The big brown over the lazy doe

+The big brown over the lazy dog

+x

+The big brown over the lazy dog' >foo

+echo 'failed

+succeeded

+failed

+succeeded' >foo1

+$awk '{ if (match($0, /^The big brown over the lazy dog/) == 0) {

+		printf("failed\n")

+	} else {

+		printf("succeeded\n")

+	}

+} ' foo >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc ghosh RE bug'

+echo '123

+1234567890

+12345678901' >foo

+echo '12345678901' >foo1

+$awk 'length($0) > 10' foo >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc last number bug'

+# check some \ sequences in strings (ascii)

+echo HIJKL >foo1

+echo foo | $awk '{ print "H\x49\x4a\x4BL" }' >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc hex string cvt'

+echo 012x45 >foo1

+$awk 'BEGIN { print "0\061\62x\0645" }' >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc oct string cvt'

+# $i++ means ($i)++

+echo 3 5 | $awk '{ i = 1; print $i++ ; print $1, i }' >foo1

+echo '3

+4 1' >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc bad field increment'

+# makes sure that fields are recomputed even if self-assignment

+# take into account that subtracting from NF now rebuilds the record

+echo 'a b c

+s p q r

+x y z' >foo

+echo 'a

+s p

+x' >foo1

+$awk '{ NF -= 2; $1 = $1; print }' <foo >foo2

+diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad field self-assignment'

+echo '1

+1' >foo1

+$awk 'BEGIN {x = 1; print x; x = x; print x}' >foo2

+diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad self-assignment'

+echo 573109312 | $awk '{print $1*4}' >foo1

+echo 2292437248 >foo2

+diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad overflow'

+# note that there are 8-bit characters in the echo

+# some shells will probably screw this up.

+echo '#

+code  1

+code  2' |

+$awk '/^#/' >foo1

+echo '#' >foo2

+diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad match of 8-bit char'

+echo hello |

+$awk 'BEGIN	{ FILENAME = "/lib/ucd/UnicodeData.txt" }

+	{ print $0 }' >/dev/null

+if(! ~ $status '')

+	echo >[1=2] 'BAD: T.misc /lib/ucd/UnicodeData.txt dropped core'

+echo hello |

+$awk '  function foo(foo) {

+                foo = 1

+                foo()

+        }

+	{ foo(bar) }

+' >/dev/null >[2=1]

+if(! ~ $status '')

+	echo >[1=2] 'BAD: T.misc function foo(foo) dropped core'

+echo '2

+10' |

+$awk '{ x[NR] = $0 }	# test whether $0 is NUM as well as STR

+END { if (x[1] > x[2]) print "BAD: T.misc: $0 is not NUM" }'

+$awk 'BEGIN {

+	npad = substr("alexander" "           ",1,15)

+	print npad

+	}' >foo

+grep '\\' foo && echo >[1=2] 'BAD: T.misc alexander fails'

+# This should give an error about function arguments

+$awk '

+function foo(x) { print "x is" x }

+BEGIN { foo(foo) }

+' >[2]foo

+grep 'can''t use function foo' foo >/dev/null || echo >[1=2] 'BAD: T.misc fcn args'

+# gawk defref test; should give error about undefined function

+$awk 'BEGIN { foo() }' >[2]foo

+grep 'calling undefined function foo' foo >/dev/null || echo >[1=2] 'BAD: T.misc undefined function'

+# gawk arrayparm test; should give error about function

+$awk '

+BEGIN {

+    foo[1]=1;

+    foo[2]=2;

+    bug1(foo);

+}

+function bug1(i) {

+    for (i in foo) {

+	bug2(i);

+	delete foo[i];

+	print i,1,bot[1];

+    }

+}

+function bug2(arg) {

+    bot[arg]=arg;

+}

+' >[2]foo

+grep 'can.t assign to foo' foo >/dev/null || echo >[1=2] 'BAD: T.misc foo bug'

+# This should be a syntax error

+$awk '

+!x = y

+' >[2]foo

+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error !x=y fails'

+# This should print bbb

+$awk '

+BEGIN { up[1] = "a"

+	for (i in up) gsub("a", "A", x)

+	print x x "bbb"

+	exit

+      }

+' >foo

+grep bbb foo >/dev/null || echo >[1=2] 'BAD: T.misc gsub failed'

+echo yes |

+$awk '

+BEGIN {

+	printf "push return" >"/dev/null"

+	getline ans <"/dev/null"

+} '

+if(! ~ $status '')

+	echo >[1=2] 'BAD: T.misc getline ans dropped core'

+$awk 'BEGIN { unireghf() }

+function unireghf(hfeed) { hfeed[1] = 0 }'

+if(! ~ $status '')

+	echo >[1=2] 'BAD: T.misc unireghf dropped core'

+echo x | $awk '/[/]/' >[2]foo

+grep 'nonterminated character class' foo >/dev/null || error 'BAD: T.misc nonterminated fails'

+if(! ~ $status '')

+	echo >[1=2] 'BAD: T.misc nonterminated dropped core'

+$awk '

+function f() { return 12345 }

+BEGIN { printf "<%s>\n", f() }

+' >foo

+grep '<12345>' foo >/dev/null || echo 'BAD: T.misc <12345> fails'

+echo 'abc

+def

+ghi

+jkl' >foo

+$awk '

+BEGIN {	RS = ""

+	while (getline <"foo")

+		print

+}' >foo1

+$awk 'END {print NR}' foo1 | grep 4 >/dev/null || echo 'BAD: T.misc abcdef fails'

+# Test for RS regex matching an empty record at EOF

+echo a | $awk 1 'RS=a\n' > foo1

+cat << 'EOF' > foo2

+EOF

+diff foo1 foo2 || echo 'BAD: T.misc RS regex matching an empty record at EOF fails'

+# Test for RS regex being reapplied

+echo aaa1a2a | $awk 1 'RS=^a' >foo1

+cat << 'EOF' > foo2

+aa1a2a

+EOF

+diff foo1 foo2 || echo 'BAD: T.misc ^regex reapplied fails'

+# ^-anchored RS matching should be active at the start of each input file

+tee foo1 foo2 >foo3 << 'EOF'

+aaa

+EOF

+$awk 1 'RS=^a' foo1 foo2 foo3 >foo4

+cat << 'EOF' > foo5

+aa

+aa

+aa

+EOF

+diff foo4 foo5 || echo 'BAD: T.misc ^RS matches the start of every input file fails'

+# The following should not produce a warning about changing a constant

+# nor about a curdled tempcell list

+$awk 'function f(x) { x = 2 }

+BEGIN { f(1) }' >foo

+grep '^' foo && echo 'BAD: test constant change fails'

+# The following should not produce a warning about a curdled tempcell list

+$awk 'function f(x) { x }

+BEGIN { f(1) }' >foo

+grep '^' foo && echo 'BAD: test tempcell list fails'

+$awk 'BEGIN { print 9, a=10, 11; print a; exit }' >foo1

+echo '9 10 11

+10' >foo2

+diff foo1 foo2 || echo 'BAD: T.misc (embedded expression)'

+echo 'abc defgh ijkl' | $awk '

+  { $1 = ""; line = $0; print line; print $0; $0 = line; print $0 }' >foo1

+echo ' defgh ijkl

+ defgh ijkl

+ defgh ijkl' >foo2

+diff foo1 foo2 || echo 'BAD: T.misc (assignment to $0)'

+$awk '

+function min(a, b)

+{

+	if (a < b)

+		return a

+	else

+		return b

+}

+BEGIN { exit }

+'

+if(! ~ $status '')

+	echo >[1=2] 'BAD: T.misc function min dropped core'

+# The following should not give a syntax error message:

+$awk '

+function expand(chart) {

+	getline chart < "CHAR.ticks"

+}

+' >foo

+grep '^' foo >/dev/null && echo 'BAD: T.misc expand error'

+$awk 'BEGIN { print 1e40 }' >/dev/null

+if(! ~ $status '')

+	echo >[1=2] 'BAD: T.misc 1E40 dropped core'

+# The following syntax error should not dump core:

+$awk '

+$NF==3	{first=1}

+$NF==2 && first==0 && (abs($1-o1)>120||abs($2-o2)>120)	{print $0}

+$NF==2	{o1=%1; o2=$2; first=0}

+' >[2]/dev/null

+if(! ~ $status '')

+	echo >[1=2] 'BAD: T.misc first/abs dropped core'

+# The following syntax error should not dump core:

+$awk '{ n = split($1, address, !); print address[1] }' >[2]foo

+grep 'illegal statement' foo >/dev/null || echo 'BAD: T.misc split error'

+if(! ~ $status '')

+	echo >[1=2] 'BAD: T.misc split! dropped core'

+# The following should cause a syntax error message

+$awk 'BEGIN {"hello"}' >[2]foo

+grep 'illegal statement' foo >/dev/null || echo 'BAD: T.misc hello error'

+# The following should give a syntax error message:

+$awk '

+function pile(c,     r) {

+	r = ++pile[c]

+}

+{ pile($1) }

+' >[2]foo

+grep 'context is' foo >/dev/null || echo 'BAD: T.misc pile error'

+# This should complain about missing atan2 argument:

+$awk 'BEGIN { atan2(1) }' >[2]foo

+grep 'requires two arg' foo >/dev/null || echo 'BAD: T.misc atan2 error'

+# This should not core dump:

+$awk 'BEGIN { f() }

+function f(A) { delete A[1] }

+'

+if(! ~ $status '')

+	echo >[1=2] 'BAD: T.misc delete dropped core'

+# nasty one:  should not be able to overwrite constants

+$awk 'BEGIN { gsub(/ana/,"anda","banana")

+		printf "the monkey ate a %s\n", "banana" }

+' >/dev/null >[2]foo

+grep 'syntax error' foo >/dev/null || echo 'BAD: T.misc gsub banana error'

+# nasty one:  should not be able to overwrite constants

+$awk 'BEGIN { sub(/ana/,"anda","banana")

+		printf "the monkey ate a %s\n", "banana" }

+' >/dev/null >[2]foo

+grep 'syntax error' foo >/dev/null || echo 'BAD: T.misc sub banana error'

+# line numbers used to double-count comments

+$awk '#

+#

+#

+/x

+' >/dev/null >[2]foo

+grep 'line [45]' foo >/dev/null || echo 'BAD: T.misc lineno'

+echo 'x
\y' >foo1

+$awk 'BEGIN { print "x\f\r\b\v\a\\y" }' >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc weird chars'

+echo 0 >foo1

+$awk '	BEGIN { exit }

+	{ print }

+	END { print NR }' >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc BEGIN exit'

+echo 1 >foo1

+$awk '	{ exit }

+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit'

+echo 1 >foo1

+$awk '	{i = 1; while (i <= NF) {if (i == NF) exit; i++ } }

+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 2'

+echo 1 >foo1

+$awk '	function f() {

+		i = 1; while (i <= NF) {if (i == NF) return NR; i++ }

+	}

+	{ if (f() == 1) exit }

+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc while return'

+echo 1 >foo1

+$awk '	function f() {

+		split("a b c", arr)

+		for (i in arr) {if (i == 3) return NR; i++ }

+	}

+	{ if (f() == 1) exit }

+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc while return'

+echo 1 >foo1

+$awk '	{i = 1; do { if (i == NF) exit; i++ } while (i <= NF) }

+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 3'

+echo 1 >foo1

+$awk '	function f() {

+		i = 1; do { if (i == NF) return NR; i++ } while (i <= NF)

+	}

+	{ if (f() == 1) exit }

+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc do return'

+echo 1 >foo1

+$awk '	{i = 1; do { if (i == NF) break; i++ } while (i <= NF); exit }

+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 4'

+echo 1 >foo1

+$awk '	{ n = split($0, x)

+	  for (i in x) {

+	 	if (i == 1)

+			exit } }

+	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 5'

+echo XXXXXXXX >foo1

+$awk 'BEGIN { s = "ab\fc\rd\be"

+	t = s; 	gsub("[" s "]", "X", t); print t }' >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc weird escapes in char class'

+$awk '{}' /lib/ucd/UnicodeData.txt glop/glop >foo >[2]foo2

+grep 'can''t open.*glop' foo2 >/dev/null || echo 'BAD: T.misc can''t open'

+echo '

+a

+aa

+b

+c

+' >foo

+echo 3 >foo1

+$awk 'BEGIN { RS = "" }; END { print NR }' foo >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc RS botch'

+$awk 'BEGIN \

+	{

+		print "hello, world"

+	}

+}}}' >foo1 >[2]foo2

+grep 'source line 5' foo2 >/dev/null >[2=1] || echo 'BAD: T.misc continuation line number'

+$awk 'BEGIN {

+	if () {

+		print "foo"

+	}

+}' >foo1 >[2]foo2

+grep 'syntax error at source line 2' foo2 >/dev/null >[2=1] || echo 'BAD: T.misc syntax error line number'

+echo 111 222 333 >foo

+$awk '{ f[1]=1; f[2]=2; print $f[1], $f[1]++, $f[2], f[1], f[2] }' foo >foo2

+echo 111 111 222 2 2 >foo1

+cmp -s foo1 foo2 || echo 'BAD: T.misc $f[1]++'

+# These should be syntax errors

+$awk . >[2]foo

+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error . fails'

+$awk .. >[2]foo

+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error .. fails'

+$awk .E. >[2]foo

+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error .E. fails'

+$awk .++. >[2]foo

+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error .++. fails'

+# These should be syntax errors

+$awk '$' >[2]foo

+grep 'unexpected' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error $ fails'

+$awk '{print $' >[2]foo

+grep 'unexpected' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error $2 fails'

+$awk '"' >[2]foo

+grep 'non-terminated' foo >/dev/null || echo >[1=2] 'BAD: T.misc bare quote fails'

+# %c of 0 is explicit null byte

+echo '3' >foo1

+$awk 'BEGIN {printf("%c%c\n", 0, 0) }' | wc | $awk '{print $3}' >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc null byte'

+# non-terminated RE

+# FIXME

+#$awk /xyz >foo >[2=1]

+grep 'non-terminated' foo >/dev/null || echo >[1=2] 'BAD: T.misc non-terminated RE'

+# next several were infinite loops, found by brian tsang.

+# this is his example:

+# FIXME

+#$awk 'BEGIN {

+#    switch (substr("x",1,1)) {

+#    case /ask.com/:

+#	break

+#    case "google":

+#	break

+#    }

+#}' >foo >[2=1]

+#grep 'illegal statement' foo >/dev/null || echo >[1=2] 'BAD: T.misc looping syntax error 1'

+#$awk 'BEGIN { s { c /./ } }' >foo >[2=1]

+#grep 'illegal statement' foo >/dev/null || echo >[1=2] 'BAD: T.misc looping syntax error 2'

+#$awk 'BEGIN { s { c /../ } }' >foo >[2=1]

+#grep 'illegal statement' foo >/dev/null || echo >[1=2] 'BAD: T.misc looping syntax error 3'

+$awk 'BEGIN {printf "%2$s %1$s\n", "a", "b"}' >foo >[2=1]

+grep '''$'' not permitted in awk formats' foo >/dev/null || echo >[1=2] 'BAD: T.misc ''$'' not permitted in formats'

+echo 'a

+b c

+de fg hi' >foo0

+$awk 'END { print NF, $0 }' foo0 >foo1

+awk '{ print NF, $0 }' foo0| tail -1 >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc END must preserve $0'

+echo 'fg hi' >foo0

+$awk 'END { print NF, $0 }' foo0 >foo1

+awk '{ print NF, $0 }' foo0| tail -1 >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc END must preserve $0'

+echo '' >foo0

+$awk 'END { print NF, $0 }' foo0 >foo1

+awk '{ print NF, $0 }' foo0| tail -1 >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc END must preserve $0'

+# Check for nonzero exit status on I/O error.

+#echo 'E 2' >foo1

+#(trap '' PIPE; "$awk" 'BEGIN { print "hi"; }' >[2]/dev/null; echo "E $?" >foo2) | :

+#cmp -s foo1 foo2 || echo 'BAD: T.misc exit status on I/O error'

+# Check for clobbering of the lexer's regular expression buffer.

+# If the output is "a1" instead of "1b", /b/ clobbered /a/.

+echo 1b >foo1

+echo ab | $awk '{ sub(/a/, "b" ~ /b/); print }' >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc lexer regex buffer clobbered'

+# Check handling of octal \OOO and hex \xHH esc. seqs. in strings.

+echo 'hello888

+hello

+hello

+helloxGOO

+hello

+0A' > foo1

+$awk 'BEGIN { print "hello\888" }'   > foo2

+$awk 'BEGIN { print "hello\x000A" }' >> foo2

+$awk 'BEGIN { printf "hello\x0A" }'  >> foo2

+$awk 'BEGIN { print "hello\xGOO" }'  >> foo2

+$awk 'BEGIN { print "hello\x0A0A" }' >> foo2

+cmp -s foo1 foo2 || echo 'BAD: T.misc escape sequences in strings mishandled'

--- /dev/null

+++ b/test/T.overflow

@@ -1,0 +1,86 @@

+#!/bin/rc

+echo T.overflow: test some overflow conditions

+$awk 'BEGIN {

+ 	for (i = 0; i < 1000; i++) printf("abcdefghijklmnopqsrtuvwxyz")

+ 	printf("\n")

+ 	exit

+}' >foo1

+$awk '{print}' foo1 >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.overflow record 1'

+echo 'abcdefghijklmnopqsrtuvwxyz' >foo1

+echo hello | $awk '

+ { for (i = 1; i < 500; i++) s = s "abcdefghijklmnopqsrtuvwxyz "

+   $0 = s

+   print $1

+ }'  >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.overflow abcdef'

+# default input record 3072, fields 200:

+$awk '

+BEGIN {

+	for (j = 0; j < 2; j++) {

+		for (i = 0; i < 500; i++)

+			printf(" 123456789")

+		printf("\n");

+	}

+} ' >foo1

+$awk '{$1 = " 123456789"; print}' foo1 >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.overflow -mr -mf set $1'

+$awk '

+BEGIN {

+	for (j = 0; j < 2; j++) {

+		for (i = 0; i < 500; i++)

+			printf(" 123456789")

+		printf("\n");

+	}

+} ' >foo

+$awk  '{print NF}' foo >foo1

+echo '500

+500' >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.overflow -mr -mf NF'

+rm -f core

+# this should not drop core

+$awk 'BEGIN {

+	for (i = 1; i < 1000; i++) s = s "a-z"

+	if ("x" ~ "[" s "]")

+		print "ugh"

+}' >foo >[2]foo

+test -r core && echo >[1=2] "BAD: T.overflow too long char class dropped core"

+echo 4000004 >foo1

+$awk '

+BEGIN {

+	x1 = sprintf("%1000000s\n", "hello")

+	x2 = sprintf("%-1000000s\n", "world")

+	x3 = sprintf("%1000000.1000000s\n", "goodbye")

+	x4 = sprintf("%-1000000.1000000s\n", "goodbye")

+	print length(x1 x2 x3 x4)

+}' >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.overflow huge sprintfs'

+echo 0 >foo1

+$awk '

+BEGIN {

+	for (i = 0; i < 100000; i++)

+		x[i] = i

+	for (i in x)

+		delete x[i]

+	n = 0

+	for (i in x)

+		n++

+	print n

+}' >foo2

+cmp -s foo1 foo2 || echo 'BAD: T.overflow big array'

+echo x >foo1

+$awk '{print $40000000000000}' <foo1 >foo2 >[2]foo

+grep 'out of range field' foo >/dev/null || echo >[1=2] 'BAD: T.overflow \$400000'

+rm foo*

+$awk 'BEGIN { for (i=1; i <= 1000; i++) print i >("foo" i) }'

+ls foo* | grep '1000' >/dev/null || echo >[1=2] 'BAD: T.overflow openfiles'

+rm foo*

--- a/test/T.sub

+++ b/test/T.sub

@@ -28,7 +28,7 @@

 	gsubout = $5

 NF < 5 {	# weird input line

-	printf("weird test spec `%s` ignored\n", $0) | "cat 1>&2"

+	printf("weird test spec `%s` ignored\n", $0) | "cat >[1=2]"

 	next

 {		# "" => explicitly empty

--- /dev/null

+++ b/test/T.utfre

@@ -1,0 +1,234 @@

+#!/bin/rc

+echo T.utfre: tests of regular expression code for Unicode/utf-8

+# adapted from T.re

+awk '

+BEGIN {

+	FS = "\t"

+	awk = ENVIRON["awk"]

+}

+NF == 0 {

+	next

+}

+$1 != "" {	# new test

+	re = $1

+}

+$2 != "" {	# either ~ or !~

+	op = $2

+	if (op == "~")

+		neg = "!"

+	else if (op == "!~")

+		neg = ""

+}

+$3 != "" {	# new test string

+	str = $3

+}

+$3 == "\"\"" {	# explicit empty line

+	$3 = ""

+}

+NF > 2 {	# generate a test

+	input = $3

+	test = sprintf("echo ''%s'' | %s ''%s/%s/ {print \"%d fails %s %s %s\"}''",

+		input, awk, neg, re, NR, re, op, input)

+	# printf(" %3d   %s %s %s:\n", NR, re, op, input)

+	# print "test is |" test "|"

+	system(test)

+	nt++

+}

+END { print "	" nt, "tests" }

+' <<'!!!!'

+	~	🖕

+		🖕🖕

+		🖕🖕🖕

+		""

+🖕	~	🖕

+		b🖕

+		b🖕b

+	!~	""

+		时

+		xxxxx

+.	~	时

+		x时x

+		🙂

+	!~	""

+.の	~	xの

+		xxの

+		xのx

+	!~	の

+		のx

+		""

+$	~	x

+		🙂

+		""

+.$	~	모

+		xx모

+		x모x

+	!~	""

+д$	~	д

+		bд

+		bbbд

+	!~	дb

+		x

+		""

+^	~	и

+		""

+		^

+^λ$	~	λ

+	!~	xλ

+		λx

+		xλx

+		""

+^λ.$	~	λx

+		λλ

+	!~	xλ

+		λλλ

+		λxy

+		""

+^$	~	""

+	!~	に

+		^

+^.해	~	め해

+		め해해

+	!~	해

+		""

+^.*해	~	해

+		め해

+		めめめめめめ해

+	!~	""

+^.+해	~	め해

+		めめめめめめ해

+	!~	""

+		해

+		해め

+해*	~	""

+		해

+		해해해해

+		め해

+		めめめめ

+해해*	~	해

+		해해해

+		め해

+	!~	めめめめ

+		""

+\$	~	🖕$

+		$

+		$🖕

+		🖕$🖕

+	!~	""

+		🖕

+\.	~	.

+	!~	🖕

+		""

+xθ+y	~	xθy

+		xθθy

+		xθθθθθθy

+	!~	θy

+		xy

+		xθ

+xθ?y	~	xy

+		xθy

+	!~	xθθy

+θ?b?の?	~	""

+		x

+^a?b?め	~	め

+		aめ

+		bめ

+		abめ

+		めa

+	!~	""

+		ab

+		aba

+[Α-Ω]	~	Α

+		aΔb

+		xyΩ

+	!~	abc

+		β

+		""

+[^Α-Ω]	~	δ

+		aΔb

+		xyΩ

+	!~	Α

+		Δ

+		""

+[Α-ΔΦ-Ω]	~	Α

+		Β

+		Δ

+		Φ

+		Ω

+	!~	Π

+		Σ

+Π[[:lower:]]+	~	Πa

+		Πab

+	!~	Π

+		ΠX

+		Π:

+		Π[

+		Π]

+の[0-9]+に	~	の0に

+		の23に

+		の12345に

+	!~	0に

+		のに

+の[0-9]?に	~	のに

+		の1に

+	!~	の23に

+の[[]に	~	の[に

+	!~	のに

+		の[[]に

+		の]に

+の[[-]に	~	の[に

+		の-に

+	!~	のに

+		の[[]に

+		の]に

+の[[-a]に	~	の[に

+		のaに

+		の]に

+	!~	のに

+		の[[]に

+		の-に

+の[]-a]に	~	の]に

+		のaに

+	!~	のに

+		の[に

+		の-に

+の[]]に	~	の]に

+	!~	のに

+		の[]]に

+		の[に

+の[^[]に	~	のaに

+	!~	の[に

+の[-]に	~	の-に

+	!~	のに

+		の+に

+の[^-]に	~	の+に

+	!~	の-に

+		のに

+の[][]に	~	の[に

+		の]に

+	!~	のに

+		の][に

+		の[]に

+の[z-a]に	~	のに

+	!~	の

+		に

+		のaに

+		のzに

+		の-に

+に|だ	~	だ

+		に

+		だに

+	!~	a

+		""

+^στο|τους$	~	στο

+		στοd

+		aτους

+		τους

+	!~	xστο

+		τουςa

+^(στο|τους)$	~	στο

+		τους

+	!~	στοτους

+		στοx

+		cτους

+!!!!

--

⑨