ref: 2af227c88bbcf8d6c52eb5b3b3a677d88a281774
parent: c153970394a8b04aa4eca9dee54d3a55e51606c9
author: qwx <qwx@sciops.net>
date: Mon Sep 29 06:13:49 EDT 2025
add more tests, fix typos
--- a/test/T.expr
+++ b/test/T.expr
@@ -14,7 +14,7 @@
sub(/try /, "")
prog = $0
printf("%3d %s\n", nt, prog)- prog = sprintf("%s -F\"\\t\" ''%s''", awk, prog)+ prog = sprintf("%s -F''\\t'' ''%s''", awk, prog)# print "prog is", prog
nt2 = 0
while (getline > 0) {@@ -31,7 +31,7 @@
output = sprintf("echo ''%s'' >foo2; ", $NF)gsub(/\\t/, "\t", output)
gsub(/\\n/, "\n", output)
- run = sprintf("cmp foo1 foo2 || echo test %d.%d failed",+ run = sprintf("cmp foo1 foo2 || echo test %d.%d failed",nt, ++nt2)
# print "input is", input
# print "test is", test
--- /dev/null
+++ b/test/T.func
@@ -1,0 +1,195 @@
+#!/bin/rc
+echo T.func: test user-defined functions
+
+echo '10 2
+2 10
+10 10
+10 1e1
+1e1 9' | $awk '
+# tests whether function returns sensible type bits
+
+function assert(cond) { # assertion+ if (cond) print 1; else print 0
+}
+
+function i(x) { return x }+
+{ m=$1; n=i($2); assert(m>n) }+' >foo1
+echo '1
+0
+0
+0
+1' >foo2
+diff foo1 foo2 || echo 'BAD: T.func (function return type)'
+
+echo 'data: data' >foo1
+$awk '
+function test1(array) { array["test"] = "data" }+function test2(array) { return(array["test"]) }+BEGIN { test1(foo); print "data: " test2(foo) }+' >foo2
+diff foo1 foo2 || echo 'BAD: T.func (array type)'
+
+$awk '
+BEGIN { code() }+END { codeout("x") }+function code() { ; }+function codeout(ex) { print ex }+' /dev/null >foo1
+echo x >foo2
+diff foo1 foo2 || echo 'BAD: T.func (argument passing)'
+
+$awk '
+BEGIN { unireghf() }+
+function unireghf(hfeed) {+ hfeed[1]=0
+ rcell("foo",hfeed)+ hfeed[1]=0
+ rcell("bar",hfeed)+}
+
+function rcell(cellname,hfeed) {+ print cellname
+}
+' >foo1
+echo 'foo
+bar' >foo2
+diff foo1 foo2 || echo 'BAD: T.func (convert arg to array)'
+
+$awk '
+function f(n) {+ if (n <= 1)
+ return 1
+ else
+ return n * f(n-1)
+}
+{ print f($1) }+' <<! >foo2
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+!
+cat <<! >foo1
+1
+1
+2
+6
+24
+120
+720
+5040
+40320
+362880
+!
+diff foo1 foo2 || echo 'BAD: T.func (factorial)'
+
+$awk '
+function ack(m,n) {+ k = k+1
+ if (m == 0) return n+1
+ if (n == 0) return ack(m-1, 1)
+ return ack(m-1, ack(m, n-1))
+}
+{ k = 0; print ack($1,$2), "(" k " calls)" }+' <<! >foo2
+0 0
+1 1
+2 2
+3 3
+3 4
+3 5
+!
+cat <<! >foo1
+1 (1 calls)
+3 (4 calls)
+7 (27 calls)
+61 (2432 calls)
+125 (10307 calls)
+253 (42438 calls)
+!
+diff foo1 foo2 || echo 'BAD: T.func (ackermann)'
+
+$awk '
+END { print "end" }+{ print fib($1) }+function fib(n) {+ if (n <= 1) return 1
+ else return add(fib(n-1), fib(n-2))
+}
+function add(m,n) { return m+n }+BEGIN { print "begin" }+' <<! >foo2
+1
+3
+5
+10
+!
+cat <<! >foo1
+begin
+1
+3
+8
+89
+end
+!
+diff foo1 foo2 || echo 'BAD: T.func (fib)'
+
+$awk '
+function foo() {+ for (i = 1; i <= 2; i++)
+ return 3
+ print "should not see this"
+}
+BEGIN { foo(); exit }+' >foo1
+grep 'should not' foo1 && echo 'BAD: T.func (return)'
+
+# this exercises multiple free of temp cells
+echo 'eqn
+eqn2' >foo1
+$awk 'BEGIN { eprocess("eqn", "x", contig) + process("tbl" )+ eprocess("eqn" "2", "x", contig) + }
+function eprocess(file, first, contig) {+ print file
+}
+function process(file) {+ close(file)
+}' >foo2
+diff foo1 foo2 || echo 'BAD: T.func (eqn)'
+
+echo 1 >foo1
+$awk 'function f() { n = 1; exit }+ BEGIN { n = 0; f(); n = 2 }; END { print n}' >foo2+diff foo1 foo2 || echo 'BAD: T.func (exit in function)'
+
+echo 1 >foo1
+$awk '
+BEGIN { n = 10+ for (i = 1; i <= n; i++)
+ for (j = 1; j <= n; j++)
+ x[i,j] = n * i + j
+ for (i = 1; i <= n; i++)
+ for (j = 1; j <= n; j++)
+ if ((i,j) in x)
+ k++
+ print (k == n^2)
+ }
+' >foo2
+diff foo1 foo2 || echo 'BAD: T.func (multi-dim subscript)'
+
+echo '<> 0' >foo1
+$awk '
+function foo() { i = 0 }+ BEGIN { x = foo(); printf "<%s> %d\n", x, x }' >foo2+diff foo1 foo2 || echo 'BAD: T.func (fall off end)'
--- /dev/null
+++ b/test/T.misc
@@ -1,0 +1,536 @@
+#!/bin/rc
+echo T.misc: miscellaneous buglets now watched for
+
+echo 'The big brown over the lazy doe
+The big brown over the lazy dog
+x
+The big brown over the lazy dog' >foo
+echo 'failed
+succeeded
+failed
+succeeded' >foo1
+$awk '{ if (match($0, /^The big brown over the lazy dog/) == 0) {+ printf("failed\n")+ } else {+ printf("succeeded\n")+ }
+} ' foo >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc ghosh RE bug'
+
+echo '123
+1234567890
+12345678901' >foo
+echo '12345678901' >foo1
+$awk 'length($0) > 10' foo >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc last number bug'
+
+# check some \ sequences in strings (ascii)
+echo HIJKL >foo1
+echo foo | $awk '{ print "H\x49\x4a\x4BL" }' >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc hex string cvt'
+
+echo 012x45 >foo1
+$awk 'BEGIN { print "0\061\62x\0645" }' >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc oct string cvt'
+
+# $i++ means ($i)++
+echo 3 5 | $awk '{ i = 1; print $i++ ; print $1, i }' >foo1+echo '3
+4 1' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.misc bad field increment'
+
+# makes sure that fields are recomputed even if self-assignment
+# take into account that subtracting from NF now rebuilds the record
+echo 'a b c
+s p q r
+x y z' >foo
+echo 'a
+s p
+x' >foo1
+$awk '{ NF -= 2; $1 = $1; print }' <foo >foo2+diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad field self-assignment'
+
+echo '1
+1' >foo1
+$awk 'BEGIN {x = 1; print x; x = x; print x}' >foo2+diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad self-assignment'
+
+echo 573109312 | $awk '{print $1*4}' >foo1+echo 2292437248 >foo2
+diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad overflow'
+
+# note that there are 8-bit characters in the echo
+# some shells will probably screw this up.
+echo '#
+code 1
+code 2' |
+$awk '/^#/' >foo1
+echo '#' >foo2
+diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad match of 8-bit char'
+
+echo hello |
+$awk 'BEGIN { FILENAME = "/lib/ucd/UnicodeData.txt" }+ { print $0 }' >/dev/null+if(! ~ $status '')
+ echo >[1=2] 'BAD: T.misc /lib/ucd/UnicodeData.txt dropped core'
+
+echo hello |
+$awk ' function foo(foo) {+ foo = 1
+ foo()
+ }
+ { foo(bar) }+' >/dev/null >[2=1]
+if(! ~ $status '')
+ echo >[1=2] 'BAD: T.misc function foo(foo) dropped core'
+
+echo '2
+10' |
+$awk '{ x[NR] = $0 } # test whether $0 is NUM as well as STR+END { if (x[1] > x[2]) print "BAD: T.misc: $0 is not NUM" }'+
+
+$awk 'BEGIN {+ npad = substr("alexander" " ",1,15)+ print npad
+ }' >foo
+grep '\\' foo && echo >[1=2] 'BAD: T.misc alexander fails'
+
+# This should give an error about function arguments
+$awk '
+function foo(x) { print "x is" x }+BEGIN { foo(foo) }+' >[2]foo
+grep 'can''t use function foo' foo >/dev/null || echo >[1=2] 'BAD: T.misc fcn args'
+
+
+# gawk defref test; should give error about undefined function
+$awk 'BEGIN { foo() }' >[2]foo+grep 'calling undefined function foo' foo >/dev/null || echo >[1=2] 'BAD: T.misc undefined function'
+
+
+# gawk arrayparm test; should give error about function
+$awk '
+BEGIN {+ foo[1]=1;
+ foo[2]=2;
+ bug1(foo);
+}
+function bug1(i) {+ for (i in foo) {+ bug2(i);
+ delete foo[i];
+ print i,1,bot[1];
+ }
+}
+function bug2(arg) {+ bot[arg]=arg;
+}
+' >[2]foo
+grep 'can.t assign to foo' foo >/dev/null || echo >[1=2] 'BAD: T.misc foo bug'
+
+
+# This should be a syntax error
+$awk '
+!x = y
+' >[2]foo
+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error !x=y fails'
+
+# This should print bbb
+$awk '
+BEGIN { up[1] = "a"+ for (i in up) gsub("a", "A", x)+ print x x "bbb"
+ exit
+ }
+' >foo
+grep bbb foo >/dev/null || echo >[1=2] 'BAD: T.misc gsub failed'
+
+echo yes |
+$awk '
+BEGIN {+ printf "push return" >"/dev/null"
+ getline ans <"/dev/null"
+} '
+if(! ~ $status '')
+ echo >[1=2] 'BAD: T.misc getline ans dropped core'
+
+$awk 'BEGIN { unireghf() }+function unireghf(hfeed) { hfeed[1] = 0 }'+if(! ~ $status '')
+ echo >[1=2] 'BAD: T.misc unireghf dropped core'
+
+echo x | $awk '/[/]/' >[2]foo
+grep 'nonterminated character class' foo >/dev/null || error 'BAD: T.misc nonterminated fails'
+if(! ~ $status '')
+ echo >[1=2] 'BAD: T.misc nonterminated dropped core'
+
+$awk '
+function f() { return 12345 }+BEGIN { printf "<%s>\n", f() }+' >foo
+grep '<12345>' foo >/dev/null || echo 'BAD: T.misc <12345> fails'
+
+echo 'abc
+def
+
+ghi
+jkl' >foo
+$awk '
+BEGIN { RS = ""+ while (getline <"foo")
+}' >foo1
+$awk 'END {print NR}' foo1 | grep 4 >/dev/null || echo 'BAD: T.misc abcdef fails'+
+# Test for RS regex matching an empty record at EOF
+echo a | $awk 1 'RS=a\n' > foo1
+cat << 'EOF' > foo2
+
+EOF
+diff foo1 foo2 || echo 'BAD: T.misc RS regex matching an empty record at EOF fails'
+
+# Test for RS regex being reapplied
+echo aaa1a2a | $awk 1 'RS=^a' >foo1
+cat << 'EOF' > foo2
+
+aa1a2a
+
+EOF
+diff foo1 foo2 || echo 'BAD: T.misc ^regex reapplied fails'
+
+# ^-anchored RS matching should be active at the start of each input file
+tee foo1 foo2 >foo3 << 'EOF'
+aaa
+EOF
+$awk 1 'RS=^a' foo1 foo2 foo3 >foo4
+cat << 'EOF' > foo5
+
+aa
+
+
+aa
+
+
+aa
+
+EOF
+diff foo4 foo5 || echo 'BAD: T.misc ^RS matches the start of every input file fails'
+
+# The following should not produce a warning about changing a constant
+# nor about a curdled tempcell list
+$awk 'function f(x) { x = 2 }+BEGIN { f(1) }' >foo+grep '^' foo && echo 'BAD: test constant change fails'
+
+# The following should not produce a warning about a curdled tempcell list
+$awk 'function f(x) { x }+BEGIN { f(1) }' >foo+grep '^' foo && echo 'BAD: test tempcell list fails'
+
+$awk 'BEGIN { print 9, a=10, 11; print a; exit }' >foo1+echo '9 10 11
+10' >foo2
+diff foo1 foo2 || echo 'BAD: T.misc (embedded expression)'
+
+echo 'abc defgh ijkl' | $awk '
+ { $1 = ""; line = $0; print line; print $0; $0 = line; print $0 }' >foo1+echo ' defgh ijkl
+ defgh ijkl
+ defgh ijkl' >foo2
+diff foo1 foo2 || echo 'BAD: T.misc (assignment to $0)'
+
+$awk '
+function min(a, b)
+{+ if (a < b)
+ return a
+ else
+ return b
+}
+BEGIN { exit }+'
+if(! ~ $status '')
+ echo >[1=2] 'BAD: T.misc function min dropped core'
+
+# The following should not give a syntax error message:
+$awk '
+function expand(chart) {+ getline chart < "CHAR.ticks"
+}
+' >foo
+grep '^' foo >/dev/null && echo 'BAD: T.misc expand error'
+
+$awk 'BEGIN { print 1e40 }' >/dev/null+if(! ~ $status '')
+ echo >[1=2] 'BAD: T.misc 1E40 dropped core'
+
+# The following syntax error should not dump core:
+$awk '
+$NF==3 {first=1}+$NF==2 && first==0 && (abs($1-o1)>120||abs($2-o2)>120) {print $0}+$NF==2 {o1=%1; o2=$2; first=0}+' >[2]/dev/null
+if(! ~ $status '')
+ echo >[1=2] 'BAD: T.misc first/abs dropped core'
+
+# The following syntax error should not dump core:
+$awk '{ n = split($1, address, !); print address[1] }' >[2]foo+grep 'illegal statement' foo >/dev/null || echo 'BAD: T.misc split error'
+if(! ~ $status '')
+ echo >[1=2] 'BAD: T.misc split! dropped core'
+
+# The following should cause a syntax error message
+$awk 'BEGIN {"hello"}' >[2]foo+grep 'illegal statement' foo >/dev/null || echo 'BAD: T.misc hello error'
+
+# The following should give a syntax error message:
+$awk '
+function pile(c, r) {+ r = ++pile[c]
+}
+
+{ pile($1) }+' >[2]foo
+grep 'context is' foo >/dev/null || echo 'BAD: T.misc pile error'
+
+# This should complain about missing atan2 argument:
+$awk 'BEGIN { atan2(1) }' >[2]foo+grep 'requires two arg' foo >/dev/null || echo 'BAD: T.misc atan2 error'
+
+# This should not core dump:
+$awk 'BEGIN { f() }+function f(A) { delete A[1] }+'
+if(! ~ $status '')
+ echo >[1=2] 'BAD: T.misc delete dropped core'
+
+# nasty one: should not be able to overwrite constants
+$awk 'BEGIN { gsub(/ana/,"anda","banana")+ printf "the monkey ate a %s\n", "banana" }
+' >/dev/null >[2]foo
+grep 'syntax error' foo >/dev/null || echo 'BAD: T.misc gsub banana error'
+
+# nasty one: should not be able to overwrite constants
+$awk 'BEGIN { sub(/ana/,"anda","banana")+ printf "the monkey ate a %s\n", "banana" }
+' >/dev/null >[2]foo
+grep 'syntax error' foo >/dev/null || echo 'BAD: T.misc sub banana error'
+
+# line numbers used to double-count comments
+$awk '#
+#
+#
+/x
+' >/dev/null >[2]foo
+grep 'line [45]' foo >/dev/null || echo 'BAD: T.misc lineno'
+
+echo 'x \y' >foo1
+$awk 'BEGIN { print "x\f\r\b\v\a\\y" }' >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc weird chars'
+
+echo 0 >foo1
+$awk ' BEGIN { exit }+ { print }+ END { print NR }' >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc BEGIN exit'
+
+echo 1 >foo1
+$awk ' { exit }+ END { print NR }' /lib/ucd/UnicodeData.txt >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit'
+
+echo 1 >foo1
+$awk ' {i = 1; while (i <= NF) {if (i == NF) exit; i++ } }+ END { print NR }' /lib/ucd/UnicodeData.txt >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 2'
+
+echo 1 >foo1
+$awk ' function f() {+ i = 1; while (i <= NF) {if (i == NF) return NR; i++ }+ }
+ { if (f() == 1) exit }+ END { print NR }' /lib/ucd/UnicodeData.txt >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc while return'
+
+echo 1 >foo1
+$awk ' function f() {+ split("a b c", arr)+ for (i in arr) {if (i == 3) return NR; i++ }+ }
+ { if (f() == 1) exit }+ END { print NR }' /lib/ucd/UnicodeData.txt >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc while return'
+
+echo 1 >foo1
+$awk ' {i = 1; do { if (i == NF) exit; i++ } while (i <= NF) }+ END { print NR }' /lib/ucd/UnicodeData.txt >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 3'
+
+echo 1 >foo1
+$awk ' function f() {+ i = 1; do { if (i == NF) return NR; i++ } while (i <= NF)+ }
+ { if (f() == 1) exit }+ END { print NR }' /lib/ucd/UnicodeData.txt >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc do return'
+
+echo 1 >foo1
+$awk ' {i = 1; do { if (i == NF) break; i++ } while (i <= NF); exit }+ END { print NR }' /lib/ucd/UnicodeData.txt >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 4'
+
+echo 1 >foo1
+$awk ' { n = split($0, x)+ for (i in x) {+ if (i == 1)
+ exit } }
+ END { print NR }' /lib/ucd/UnicodeData.txt >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 5'
+
+echo XXXXXXXX >foo1
+$awk 'BEGIN { s = "ab\fc\rd\be"+ t = s; gsub("[" s "]", "X", t); print t }' >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc weird escapes in char class'
+
+$awk '{}' /lib/ucd/UnicodeData.txt glop/glop >foo >[2]foo2+grep 'can''t open.*glop' foo2 >/dev/null || echo 'BAD: T.misc can''t open'
+
+echo '
+
+
+a
+aa
+
+b
+
+
+c
+
+' >foo
+echo 3 >foo1
+$awk 'BEGIN { RS = "" }; END { print NR }' foo >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc RS botch'
+
+$awk 'BEGIN \
+ {+ print "hello, world"
+ }
+}}}' >foo1 >[2]foo2
+grep 'source line 5' foo2 >/dev/null >[2=1] || echo 'BAD: T.misc continuation line number'
+
+$awk 'BEGIN {+ if () {+ print "foo"
+ }
+}' >foo1 >[2]foo2
+grep 'syntax error at source line 2' foo2 >/dev/null >[2=1] || echo 'BAD: T.misc syntax error line number'
+
+echo 111 222 333 >foo
+$awk '{ f[1]=1; f[2]=2; print $f[1], $f[1]++, $f[2], f[1], f[2] }' foo >foo2+echo 111 111 222 2 2 >foo1
+cmp -s foo1 foo2 || echo 'BAD: T.misc $f[1]++'
+
+
+# These should be syntax errors
+$awk . >[2]foo
+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error . fails'
+
+$awk .. >[2]foo
+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error .. fails'
+
+$awk .E. >[2]foo
+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error .E. fails'
+
+$awk .++. >[2]foo
+grep 'syntax error' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error .++. fails'
+
+
+
+# These should be syntax errors
+$awk '$' >[2]foo
+grep 'unexpected' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error $ fails'
+
+$awk '{print $' >[2]foo+grep 'unexpected' foo >/dev/null || echo >[1=2] 'BAD: T.misc syntax error $2 fails'
+
+$awk '"' >[2]foo
+grep 'non-terminated' foo >/dev/null || echo >[1=2] 'BAD: T.misc bare quote fails'
+
+
+# %c of 0 is explicit null byte
+
+echo '3' >foo1
+$awk 'BEGIN {printf("%c%c\n", 0, 0) }' | wc | $awk '{print $3}' >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc null byte'
+
+# non-terminated RE
+
+# FIXME
+#$awk /xyz >foo >[2=1]
+grep 'non-terminated' foo >/dev/null || echo >[1=2] 'BAD: T.misc non-terminated RE'
+
+# next several were infinite loops, found by brian tsang.
+# this is his example:
+
+# FIXME
+#$awk 'BEGIN {+# switch (substr("x",1,1)) {+# case /ask.com/:
+# break
+# case "google":
+# break
+# }
+#}' >foo >[2=1]
+#grep 'illegal statement' foo >/dev/null || echo >[1=2] 'BAD: T.misc looping syntax error 1'
+
+#$awk 'BEGIN { s { c /./ } }' >foo >[2=1]+#grep 'illegal statement' foo >/dev/null || echo >[1=2] 'BAD: T.misc looping syntax error 2'
+
+#$awk 'BEGIN { s { c /../ } }' >foo >[2=1]+#grep 'illegal statement' foo >/dev/null || echo >[1=2] 'BAD: T.misc looping syntax error 3'
+
+$awk 'BEGIN {printf "%2$s %1$s\n", "a", "b"}' >foo >[2=1]+grep '''$'' not permitted in awk formats' foo >/dev/null || echo >[1=2] 'BAD: T.misc ''$'' not permitted in formats'
+
+echo 'a
+b c
+de fg hi' >foo0
+$awk 'END { print NF, $0 }' foo0 >foo1+awk '{ print NF, $0 }' foo0| tail -1 >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc END must preserve $0'
+
+echo 'fg hi' >foo0
+$awk 'END { print NF, $0 }' foo0 >foo1+awk '{ print NF, $0 }' foo0| tail -1 >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc END must preserve $0'
+
+echo '' >foo0
+$awk 'END { print NF, $0 }' foo0 >foo1+awk '{ print NF, $0 }' foo0| tail -1 >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc END must preserve $0'
+
+# Check for nonzero exit status on I/O error.
+#echo 'E 2' >foo1
+#(trap '' PIPE; "$awk" 'BEGIN { print "hi"; }' >[2]/dev/null; echo "E $?" >foo2) | :+#cmp -s foo1 foo2 || echo 'BAD: T.misc exit status on I/O error'
+
+# Check for clobbering of the lexer's regular expression buffer.
+# If the output is "a1" instead of "1b", /b/ clobbered /a/.
+echo 1b >foo1
+echo ab | $awk '{ sub(/a/, "b" ~ /b/); print }' >foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc lexer regex buffer clobbered'
+
+# Check handling of octal \OOO and hex \xHH esc. seqs. in strings.
+echo 'hello888
+hello
+hello
+helloxGOO
+hello
+0A' > foo1
+$awk 'BEGIN { print "hello\888" }' > foo2+$awk 'BEGIN { print "hello\x000A" }' >> foo2+$awk 'BEGIN { printf "hello\x0A" }' >> foo2+$awk 'BEGIN { print "hello\xGOO" }' >> foo2+$awk 'BEGIN { print "hello\x0A0A" }' >> foo2+cmp -s foo1 foo2 || echo 'BAD: T.misc escape sequences in strings mishandled'
--- /dev/null
+++ b/test/T.overflow
@@ -1,0 +1,86 @@
+#!/bin/rc
+echo T.overflow: test some overflow conditions
+
+$awk 'BEGIN {+ for (i = 0; i < 1000; i++) printf("abcdefghijklmnopqsrtuvwxyz")+ printf("\n")+ exit
+}' >foo1
+$awk '{print}' foo1 >foo2+cmp -s foo1 foo2 || echo 'BAD: T.overflow record 1'
+
+echo 'abcdefghijklmnopqsrtuvwxyz' >foo1
+echo hello | $awk '
+ { for (i = 1; i < 500; i++) s = s "abcdefghijklmnopqsrtuvwxyz "+ $0 = s
+ print $1
+ }' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.overflow abcdef'
+
+# default input record 3072, fields 200:
+$awk '
+BEGIN {+ for (j = 0; j < 2; j++) {+ for (i = 0; i < 500; i++)
+ printf(" 123456789")+ printf("\n");+ }
+} ' >foo1
+$awk '{$1 = " 123456789"; print}' foo1 >foo2+cmp -s foo1 foo2 || echo 'BAD: T.overflow -mr -mf set $1'
+
+$awk '
+BEGIN {+ for (j = 0; j < 2; j++) {+ for (i = 0; i < 500; i++)
+ printf(" 123456789")+ printf("\n");+ }
+} ' >foo
+$awk '{print NF}' foo >foo1+echo '500
+500' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.overflow -mr -mf NF'
+
+rm -f core
+# this should not drop core
+$awk 'BEGIN {+ for (i = 1; i < 1000; i++) s = s "a-z"
+ if ("x" ~ "[" s "]")+ print "ugh"
+}' >foo >[2]foo
+test -r core && echo >[1=2] "BAD: T.overflow too long char class dropped core"
+
+echo 4000004 >foo1
+$awk '
+BEGIN {+ x1 = sprintf("%1000000s\n", "hello")+ x2 = sprintf("%-1000000s\n", "world")+ x3 = sprintf("%1000000.1000000s\n", "goodbye")+ x4 = sprintf("%-1000000.1000000s\n", "goodbye")+ print length(x1 x2 x3 x4)
+}' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.overflow huge sprintfs'
+
+echo 0 >foo1
+$awk '
+BEGIN {+ for (i = 0; i < 100000; i++)
+ x[i] = i
+ for (i in x)
+ delete x[i]
+ n = 0
+ for (i in x)
+ n++
+ print n
+}' >foo2
+cmp -s foo1 foo2 || echo 'BAD: T.overflow big array'
+
+echo x >foo1
+$awk '{print $40000000000000}' <foo1 >foo2 >[2]foo+grep 'out of range field' foo >/dev/null || echo >[1=2] 'BAD: T.overflow \$400000'
+
+rm foo*
+$awk 'BEGIN { for (i=1; i <= 1000; i++) print i >("foo" i) }'+ls foo* | grep '1000' >/dev/null || echo >[1=2] 'BAD: T.overflow openfiles'
+rm foo*
--- a/test/T.sub
+++ b/test/T.sub
@@ -28,7 +28,7 @@
gsubout = $5
}
NF < 5 { # weird input line- printf("weird test spec `%s` ignored\n", $0) | "cat 1>&2"+ printf("weird test spec `%s` ignored\n", $0) | "cat >[1=2]"next
}
{ # "" => explicitly empty--- /dev/null
+++ b/test/T.utfre
@@ -1,0 +1,234 @@
+#!/bin/rc
+echo T.utfre: tests of regular expression code for Unicode/utf-8
+# adapted from T.re
+
+awk '
+BEGIN {+ FS = "\t"
+ awk = ENVIRON["awk"]
+}
+NF == 0 {+ next
+}
+$1 != "" { # new test+ re = $1
+}
+$2 != "" { # either ~ or !~+ op = $2
+ if (op == "~")
+ neg = "!"
+ else if (op == "!~")
+ neg = ""
+}
+$3 != "" { # new test string+ str = $3
+}
+$3 == "\"\"" { # explicit empty line+ $3 = ""
+}
+NF > 2 { # generate a test+ input = $3
+ test = sprintf("echo ''%s'' | %s ''%s/%s/ {print \"%d fails %s %s %s\"}''",+ input, awk, neg, re, NR, re, op, input)
+ # printf(" %3d %s %s %s:\n", NR, re, op, input)+ # print "test is |" test "|"
+ system(test)
+ nt++
+}
+END { print " " nt, "tests" }+' <<'!!!!'
+ ~ 🖕
+ 🖕🖕
+ 🖕🖕🖕
+ ""
+🖕 ~ 🖕
+ b🖕
+ b🖕b
+ !~ ""
+ 时
+ xxxxx
+. ~ 时
+ x时x
+ 🙂
+ !~ ""
+.の ~ xの
+ xxの
+ xのx
+ !~ の
+ のx
+ ""
+$ ~ x
+ 🙂
+ ""
+.$ ~ 모
+ xx모
+ x모x
+ !~ ""
+д$ ~ д
+ bд
+ bbbд
+ !~ дb
+ x
+ ""
+^ ~ и
+ ""
+ ^
+^λ$ ~ λ
+ !~ xλ
+ λx
+ xλx
+ ""
+^λ.$ ~ λx
+ λλ
+ !~ xλ
+ λλλ
+ λxy
+ ""
+^$ ~ ""
+ !~ に
+ ^
+^.해 ~ め해
+ め해해
+ !~ 해
+ ""
+^.*해 ~ 해
+ め해
+ めめめめめめ해
+ !~ ""
+^.+해 ~ め해
+ めめめめめめ해
+ !~ ""
+ 해
+ 해め
+해* ~ ""
+ 해
+ 해해해해
+ め해
+ めめめめ
+해해* ~ 해
+ 해해해
+ め해
+ !~ めめめめ
+ ""
+\$ ~ 🖕$
+ $
+ $🖕
+ 🖕$🖕
+ !~ ""
+ 🖕
+\. ~ .
+ !~ 🖕
+ ""
+xθ+y ~ xθy
+ xθθy
+ xθθθθθθy
+ !~ θy
+ xy
+ xθ
+xθ?y ~ xy
+ xθy
+ !~ xθθy
+θ?b?の? ~ ""
+ x
+^a?b?め ~ め
+ aめ
+ bめ
+ abめ
+ めa
+ !~ ""
+ ab
+ aba
+[Α-Ω] ~ Α
+ aΔb
+ xyΩ
+ !~ abc
+ β
+ ""
+[^Α-Ω] ~ δ
+ aΔb
+ xyΩ
+ !~ Α
+ Δ
+ ""
+[Α-ΔΦ-Ω] ~ Α
+ Β
+ Δ
+ Φ
+ Ω
+ !~ Π
+ Σ
+Π[[:lower:]]+ ~ Πa
+ Πab
+ !~ Π
+ ΠX
+ Π:
+ Π[
+ Π]
+の[0-9]+に ~ の0に
+ の23に
+ の12345に
+ !~ 0に
+ のに
+の[0-9]?に ~ のに
+ の1に
+ !~ の23に
+の[[]に ~ の[に
+ !~ のに
+ の[[]に
+ の]に
+の[[-]に ~ の[に
+ の-に
+ !~ のに
+ の[[]に
+ の]に
+の[[-a]に ~ の[に
+ のaに
+ の]に
+ !~ のに
+ の[[]に
+ の-に
+の[]-a]に ~ の]に
+ のaに
+ !~ のに
+ の[に
+ の-に
+の[]]に ~ の]に
+ !~ のに
+ の[]]に
+ の[に
+の[^[]に ~ のaに
+ !~ の[に
+の[-]に ~ の-に
+ !~ のに
+ の+に
+の[^-]に ~ の+に
+ !~ の-に
+ のに
+の[][]に ~ の[に
+ の]に
+ !~ のに
+ の][に
+ の[]に
+の[z-a]に ~ のに
+ !~ の
+ に
+ のaに
+ のzに
+ の-に
+に|だ ~ だ
+ に
+ だに
+ !~ a
+ ""
+^στο|τους$ ~ στο
+ στοd
+ aτους
+ τους
+ !~ xστο
+ τουςa
+^(στο|τους)$ ~ στο
+ τους
+ !~ στοτους
+ στοx
+ cτους
+!!!!
--
⑨