shithub: riscv

Download patch

ref: 5c89ac1ce32bb5e1050ff6397bdfadad58ad3ffc
parent: ced2a9bc7cc247be72bd6915c9a01a7d2d2710a5
author: Jacob Moody <moody@posixcafe.org>
date: Mon Mar 3 01:00:09 EST 2025

libc: runecomp: unicode 16.0 and rewrite

Unicode 16.0 adds some more complexity to
normalization, take the chance to rethink
the interface a bit more.
Change mkfile's to build the tables instead
of vendoring them.

--- a/lib/ucd/mkfile
+++ b/lib/ucd/mkfile
@@ -1,6 +1,6 @@
 </$objtype/mkfile
 
-VERSION='15.0.0'
+VERSION='16.0.0'
 URL='https://www.unicode.org/Public/'$VERSION'/ucd/'
 
 TXT=\
--- a/sys/include/libc.h
+++ b/sys/include/libc.h
@@ -77,12 +77,25 @@
 extern	long	runestrlen(Rune*);
 extern	Rune*	runestrstr(Rune*, Rune*);
 
-extern	int	runecomp(Rune*, Rune*, int);
-extern	int	runedecomp(Rune*, Rune*, int);
-extern	int	utfcomp(char*, char*, int);
-extern	int	utfdecomp(char*, char*, int);
-extern	char*	fullutfnorm(char*,int);
-extern	Rune*	fullrunenorm(Rune*,int);
+enum { Maxnormctx = 1+30 };
+typedef struct Norm Norm;
+struct Norm {
+	long (*getrune)(void*);
+	void *ctx;
+
+	struct {
+		Rune *e;
+		Rune a[Maxnormctx];
+	} ibuf, obuf;
+
+	int compose;
+};
+extern	void	norminit(Norm*, int, void*, long (*getrune)(void*));
+extern	long	normpull(Norm*, Rune*, long, int);
+extern	long	runecomp(Rune*, long, Rune*, long);
+extern	long	runedecomp(Rune*, long, Rune*, long);
+extern	long	utfcomp(char*, long, char*, long);
+extern	long	utfdecomp(char*, long, char*, long);
 
 extern	Rune*	runewbreak(Rune*);
 extern	char*	utfwbreak(char*);
--- a/sys/man/2/runecomp
+++ b/sys/man/2/runecomp
@@ -7,102 +7,145 @@
 .br
 .B #include <libc.h>
 .PP
-.B
-int	runecomp(Rune *dst, Rune *src, int max)
-.PP
-.B
-int	runedecomp(Rune *dst, Rune *src, int max)
-.PP
-.B
-Rune*	fullrunenorm(Rune *s, int n)
-.PP
-.B
-Rune*	runegbreak(Rune *s)
-.PP
-.B
-Rune*	runewbreak(Rune *s)
-.PP
-.B
-int	utfcomp(char *dst, char *src, int max)
-.PP
-.B
-int	utfdecomp(char *dst, char *src, int max)
-.PP
-.B
-char*	fullutfnorm(char *s, int n)
-.PP
-.B
-char*	utfgbreak(char *s)
-.PP
-.B
-char*	utfwbreak(char *s)
+.ft L
+.nf
+.EX
+typedef struct Norm Norm;
+struct Norm {
+	...    /* internals */
+};
+
+void	norminit(Norm *n, int comp, void *ctx, long (*getrune)(void *ctx));
+long	normpull(Norm *n, Rune *dst, long max, int flush);
+
+long	runecomp(Rune *dst, long ndst, Rune *src, long nsrc);
+long	runedecomp(Rune *dst, long ndst, Rune *src, long nsrc);
+
+long	utfcomp(char *dst, long ndst, char *src, long nsrc);
+long	utfdecomp(char *dst, long ndst, char *src, long nsrc);
+
+Rune*	runegbreak(Rune *s);
+Rune*	runewbreak(Rune *s);
+
+char*	utfgbreak(char *s);
+char*	utfwbreak(char *s);
+.EE
 .SH DESCRIPTION
-These routines help in handling
-graphemes that may span multiple runes.
-These routines are for use in font rendering
-and advanced text search; most programs do not
-need to perform normalization.
+These routines handle Unicode®
+abstract characters that span more
+than one codepoint.
+Normalization can be used to turn all codepoints
+into a consistent representation. This
+may be useful if a specific protocol requires normalization, or if
+the program is interested in semantically comparing
+irregular input.
 .PP
+The
+.I Norm
+structure is the core structure for all normalization routines.
+.I Norminit
+initializes the structure.
+If the
+.B comp
+argument is non-zero, the output will be normalized to
+NFC (precomposed runes), otherwise it will be normalized
+to NFD (decomposed runes).
+The
+.B getrune
+argument provides the input for normalization, with each call
+returning the next rune of input, and -1 on EOF.
+The
+.B ctx
+argument is stored and passed on to the
+.B getrune
+function in every call.
+.I Normpull
+provides the normalized output, writing at most
+.B max
+elements into
+.BR dst .
+To implement normalization the
+.I Norm
+structure must buffer input until it knows that the context
+for a given base rune is complete.
+In order to accommodate callers which only have chunks
+of data to normalize at a time, the
+.I Norm
+structure maintains runes within its buffer even when
+.B getrune
+returns an EOF.
+The
+.B flush
+argument to
+.I normpull
+changes this behavior, and will instead flush out all
+runes within the structure's buffer when it receives an
+EOF from
+.BR getrune .
+The return value of
+.I normpull
+is the number of runes written to the output.
+.I Normpull
+does not null-terminate the output string,
+however, null bytes are passed through untouched.
+As such, if the input is null terminated, so is the
+output.
+.PP
 .IR Runecomp ,
 .IR runedecomp ,
 .IR utfcomp ,
 and
-.I utfdecomp
-perform Unicode® normalization on
-.IR src ,
-storing the result in
-.IR dst .
-No more than
-.I max
-elements will be written, and the resulting string
-will always be null terminated. The return value
-is always the total number of elements required to
-store the transformation. If this value is larger
-than the supplied
-.I max
-the caller can assume the result has been truncated.
-.I Runecomp
+.IR utfdecomp ,
+are abstractions on top of the
+.I Norm
+structure. They are designed to normalize
+fixed-sized input in one go.
+In all functions
+.B src
 and
-.I utfcomp
-perform NFC normalization while
-.I runedecomp
+.B dst
+specify the source and destination strings
+respectively.
+The
+.B nsrc
 and
-.I utfdecomp
-perform NFD normalization.
+.B ndst
+arguments specify the number of elements
+to process.
+Functions will never read more
+than the specified input, and will never write
+more than the specified output. If there is
+not enough room in the output buffer, the
+result is truncated.
+The return value is likewise the number of elements
+written to the output string.
+Like
+.IR normpull ,
+these functions do not explicitly null terminate
+the output, and pass null bytes through untouched.
 .PP
-.IR Fullrunenorm ,
-and
-.I fullutfnorm
-determine if enough elements are present in
-.I s
-to perform normalization. If enough are present,
-a pointer is returned to the first element that begins
-the next context. Otherwise
-.I s
-is returned. No more then
-.I n
-elements will be read. In order to find the boundary, the
-first element of the next context must be peeked.
+The standard for normalization does not specify
+a maximum number of decomposed attaching runes
+that may follow a base rune.
+In order to implement normalization, within a bounded
+amount of memory, these functions implement a subset
+of normalization called Stream-Safe Text.
+This subset specifies that one base rune may have no
+more than 30 attaching runes.
+In order to break up input that contains runs of more than 30 attaching runes,
+these functions will insert the Combining Grapheme Joiner (U+034F) to provide
+a new base for the remaining combining runes.
 .PP
 .I Runegbreak
-and
-.I utfgbreak
-search
+(\fIrunewbreak\fR)
+return the next grapheme (word) break
+opportunity in
+.BR s ,
+or
 .B s
-for the next grapheme break opportunity.
-If none is found before the end of the string,
-.I s
-is returned.
-.PP
-.I Runewbreak
-and
-.I utfwbreak
-search
-.B s
-for the next word break opportunity.
-If none is found before the end of the string,
-.I s
-is returned.
+if none is found.
+Utfgbreak and utfwbreak are UTF
+variants of these routines.
 .SH SOURCE
 .B /sys/src/libc/port/mkrunetype.c
 .br
@@ -118,4 +161,5 @@
 .IR utf (6),
 .IR tcs (1)
 .SH HISTORY
-This implementation was written for 9front (March, 2023).
+This implementation was first written for 9front (March, 2023).
+The implementation was rewritten (in part) for Unicode 16.0 (March, 2025).
--- a/sys/src/cmd/tcs/utf.c
+++ b/sys/src/cmd/tcs/utf.c
@@ -68,44 +68,56 @@
 		write(1, obuf, p-obuf);
 }
 
+static struct {
+	int off;
+	int n;
+	Rune *input;
+} nctx;
+
+static long
+normgetrune(void*)
+{
+
+	if(nctx.off >= nctx.n)
+		return -1;
+
+	return nctx.input[nctx.off++];
+}
+
 void
-utfnorm_out(Rune *base, int n, int (*fn)(Rune*,Rune*,int))
+utfnorm_out(Rune *base, int n, int comp)
 {
-	static Rune rbuf[32];
-	static int nremain = 0;
-	Rune src[N + 1 + nelem(rbuf)];
-	Rune dst[N + 1 + nelem(rbuf)];
-	Rune *p, *p2, *e;
-	int i;
+	static Norm norm;
+	static int init;
+	Rune buf[N];
+	long w;
+	int flush;
 
-	e = base+n;
-	for(i = 0; i < nremain; i++,n++)
-		src[i] = rbuf[i];
-	nremain = 0;
-	for(p2 = p = base; n > 0;){
-		p2 = fullrunenorm(p, n);
-		if(p == p2)
-			break;
-		n -= p2-p;
-		for(;p < p2; p++)
-			src[i++] = *p;
+	if(init == 0){
+		init++;
+		norminit(&norm, comp, nil, normgetrune);
 	}
-	src[i] = 0;
-	utf_out(dst, fn(dst, src, sizeof dst), nil);
-	for(; p2 < e; p2++)
-		rbuf[nremain++] = *p2;
+	nctx.off = 0;
+	nctx.n = n;
+	nctx.input = base;
+
+	flush = n == 0;
+	do {
+		w = normpull(&norm, buf, nelem(buf), flush);
+		utf_out(buf, w, nil);
+	} while(w != 0);
 }
 
 void
 utfnfc_out(Rune *base, int n, long *)
 {
-	utfnorm_out(base, n, runecomp);
+	utfnorm_out(base, n, 1);
 }
 
 void
 utfnfd_out(Rune *base, int n, long *)
 {
-	utfnorm_out(base, n, runedecomp);
+	utfnorm_out(base, n, 0);
 }
 
 void
--- a/sys/src/libc/mkfile
+++ b/sys/src/libc/mkfile
@@ -10,6 +10,7 @@
 		cd $i
 		mk $MKFLAGS install
 	}
+	@{ cd port && mk extra }
 
 clean:V:
 	for(i in $DIRS $OLDCPUS test)@{
--- a/sys/src/libc/port/mkfile
+++ b/sys/src/libc/port/mkfile
@@ -63,9 +63,6 @@
 	rand.c\
 	readn.c\
 	rune.c\
-	runebreak.c\
-	runeistype.c\
-	runenorm.c\
 	runestrcat.c\
 	runestrchr.c\
 	runestrcmp.c\
@@ -78,7 +75,6 @@
 	runestrrchr.c\
 	runestrlen.c\
 	runestrstr.c\
-	runetotype.c\
 	sin.c\
 	sinh.c\
 	sqrt.c\
@@ -136,16 +132,26 @@
 runebreak.$O:	runebreakdata runebreak.c
 
 UCD=\
-	/lib/ucd/WordBreakProperty.txt\
-	/lib/ucd/GraphemeBreakProperty.txt\
-	/lib/ucd/emoji-data.txt\
 	/lib/ucd/CompositionExclusions.txt\
+	/lib/ucd/DerivedNormalizationProps.txt\
+	/lib/ucd/GraphemeBreakProperty.txt\
 	/lib/ucd/UnicodeData.txt\
+	/lib/ucd/WordBreakProperty.txt\
+	/lib/ucd/emoji-data.txt\
 
-/lib/ucd/%:
-	cd /lib/ucd && mk $stem
+EXTRA=\
+	runebreak.$O\
+	runeistype.$O\
+	runenorm.$O\
+	runetotype.$O\
 
-runenormdata runetotypedata runeistypedata runebreakdata:
+GEN=\
+	runenormdata\
+	runetotypedata\
+	runeistypedata\
+	runebreakdata\
+
+$GEN:	$UCD
 	@{
 		eval `{grep '^[A-Z]' /$cputype/mkfile}
 		$CC $CFLAGS -o mkrunetype.$O mkrunetype.c
@@ -153,4 +159,9 @@
 		$O.mkrunetype
 	}
 
-regen:V:	runenormdata runetotypedata runeistypedata runebreakdata mkrunetype.c $UCD
+$EXTRA:		$GEN
+
+extra:V:	$EXTRA
+	ar vu $LIB $prereq
+
+regen:V:	$GEN
--- a/sys/src/libc/port/mkrunetype.c
+++ b/sys/src/libc/port/mkrunetype.c
@@ -172,8 +172,8 @@
 	return size;
 }
 
-static void
-mklkupmatrix(char *label, int *map, Param *p)
+static int
+mklkupmatrix(int, char *label, int *map, Param *p)
 {
 	int bestsize, size, bestx, besty;
 	int x, y;
@@ -193,6 +193,7 @@
 	assert(bestsize != -1);
 	fprint(2, "label: %s best: %d %d (%d)\n", label, bestx, besty, bestsize);
 	param(p, bestx, besty);
+	return bestsize;
 }
 
 static int myismerged[NRUNES];
@@ -205,7 +206,9 @@
 static int mydecomp[NRUNES];
 static int mydespecial[256*3];
 static int nspecial;
+static int maxdchain;
 static int myccc[NRUNES];
+static int myqc[NRUNES];
 
 typedef struct KV KV;
 struct KV{
@@ -281,6 +284,19 @@
 	fprint(fd, "static uint *_recompcoll = _recompdata+%d*2;\n", nelem(vals));
 }
 
+enum {
+	OTHER, 
+	Hebrew_Letter, Newline, Extend, Format,
+	Katakana, ALetter, MidLetter, MidNum,
+	MidNumLet, Numeric, ExtendNumLet, WSegSpace,
+	PREPEND = 0x10, CONTROL = 0x20, EXTEND = 0x30, REGION = 0x40,
+	L = 0x50, V = 0x60, T = 0x70, LV = 0x80, LVT = 0x90, SPACEMK = 0xA0,
+	EMOJIEX = 0xB0,
+
+	NFC_QC_No = 1, NFC_QC_Maybe = 2, NFD_QC_No = 4, NFD_QC_Maybe = 8,
+	
+};
+
 static void
 mktables(void)
 {
@@ -324,11 +340,22 @@
 	param(&p, 10, 7);
 	size = mklkup(normfd, "decomp", mydecomp, &p);
 	fprint(2, "%s: %d\n", "decomp", size);
+	fprint(normfd, "static enum { Maxdecomp = %d };\n\n", maxdchain);
 
 	param(&p, 9, 7);
 	size = mklkup(normfd, "ccc", myccc, &p);
 	fprint(2, "%s: %d\n", "ccc", size);
 
+	param(&p, 10, 6);
+	size = mklkup(normfd, "qc", myqc, &p);
+	fprint(2, "%s: %d\n", "qc", size);
+	fprint(normfd, "static\nenum {\n");
+	fprint(normfd, "\t%s = %d,\n", "Qnfcno", NFC_QC_No);
+	fprint(normfd, "\t%s = %d,\n", "Qnfcmay", NFC_QC_Maybe);
+	fprint(normfd, "\t%s = %d,\n", "Qnfdno", NFD_QC_No);
+	fprint(normfd, "\t%s = %d,\n", "Qnfdmay", NFD_QC_Maybe);
+	fprint(normfd, "};\n");
+
 	mkexceptarr(normfd, "_decompexceptions", mydespecial, nspecial, 0);
 	mkexceptarr(normfd, "_recompexceptions", recompext, nrecompext, 1);
 	mkrecomp(normfd);
@@ -389,21 +416,35 @@
 	return code;
 }
 
-enum {
-	OTHER, 
-	Hebrew_Letter, Newline, Extend, Format,
-	Katakana, ALetter, MidLetter, MidNum,
-	MidNumLet, Numeric, ExtendNumLet, WSegSpace,
-	PREPEND = 0x10, CONTROL = 0x20, EXTEND = 0x30, REGION = 0x40,
-	L = 0x50, V = 0x60, T = 0x70, LV = 0x80, LVT = 0x90, SPACEMK = 0xA0,
-	EMOJIEX = 0xB0,
-};
+static char*
+getextraline(Biobuf *b, int *s, int *e)
+{
+	char *dot, *p;
 
+again:
+	p = Brdline(b, '\n');
+	if(p == nil)
+		return nil;
+	p[Blinelen(b)-1] = 0;
+	if(p[0] == 0 || p[0] == '#')
+		goto again;
+	if((dot = strstr(p, "..")) != nil){
+		*dot = 0;
+		dot += 2;
+		*s = estrtoul(p, 16);
+		*e = estrtoul(dot, 16);
+	} else {
+		*s = *e = estrtoul(p, 16);
+		dot = p;
+	}
+	return dot;
+}
+
 static void
 markbreak(void)
 {
 	Biobuf *b;
-	char *p, *dot;
+	char *dot;
 	int i, s, e;
 	uchar v;
 
@@ -411,19 +452,7 @@
 	if(b == nil)
 		sysfatal("could not load word breaks: %r");
 
-	while((p = Brdline(b, '\n')) != nil){
-		p[Blinelen(b)-1] = 0;
-		if(p[0] == 0 || p[0] == '#')
-			continue;
-		if((dot = strstr(p, "..")) != nil){
-			*dot = 0;
-			dot += 2;
-			s = estrtoul(p, 16);
-			e = estrtoul(dot, 16);
-		} else {
-			s = e = estrtoul(p, 16);
-			dot = p;
-		}
+	while((dot = getextraline(b, &s, &e)) != nil){
 		v = 0;
 		if(strstr(dot, "ExtendNumLet") != nil)
 			v = ExtendNumLet;
@@ -455,19 +484,7 @@
 	if(b == nil)
 		sysfatal("could not load Grapheme breaks: %r");
 
-	while((p = Brdline(b, '\n')) != nil){
-		p[Blinelen(b)-1] = 0;
-		if(p[0] == 0 || p[0] == '#')
-			continue;
-		if((dot = strstr(p, "..")) != nil){
-			*dot = 0;
-			dot += 2;
-			s = estrtoul(p, 16);
-			e = estrtoul(dot, 16);
-		} else {
-			s = e = estrtoul(p, 16);
-			dot = p;
-		}
+	while((dot = getextraline(b, &s, &e)) != nil){
 		v = 0;
 		if(strstr(dot, "; Prepend #") != nil)
 			v = PREPEND;
@@ -498,19 +515,7 @@
 	if(b == nil)
 		sysfatal("could not load emoji-data: %r");
 
-	while((p = Brdline(b, '\n')) != nil){
-		p[Blinelen(b)-1] = 0;
-		if(p[0] == 0 || p[0] == '#')
-			continue;
-		if((dot = strstr(p, "..")) != nil){
-			*dot = 0;
-			dot += 2;
-			s = estrtoul(p, 16);
-			e = estrtoul(dot, 16);
-		} else {
-			s = e = estrtoul(p, 16);
-			dot = p;
-		}
+	while((dot = getextraline(b, &s, &e)) != nil){
 		v = 0;
 		if(strstr(dot, "; Extended_Pictographic") != nil)
 			v = EMOJIEX;
@@ -518,6 +523,26 @@
 			mybreak[i] |= v;
 	}
 	Bterm(b);
+
+	b = Bopen("/lib/ucd/DerivedNormalizationProps.txt", OREAD);
+	if(b == nil)
+		sysfatal("could not load emoji-data: %r");
+
+	while((dot = getextraline(b, &s, &e)) != nil){
+		v = 0;
+		if(strstr(dot, "; NFC_QC; N") != nil)
+			v = NFC_QC_No;
+		else if(strstr(dot, "; NFC_QC; M") != nil)
+			v = NFC_QC_Maybe;
+		else if(strstr(dot, "; NFD_QC; N") != nil)
+			v = NFD_QC_No;
+		else if(strstr(dot, "; NFD_QC; M") != nil)
+			v = NFD_QC_Maybe;
+
+		for(i = s; i <= e; i++)
+			myqc[i] |= v;
+	}
+	Bterm(b);
 }
 
 static void
@@ -555,6 +580,21 @@
 	Bterm(b);
 }
 
+static void
+findlongchain(void)
+{
+	int i, n, x, r1;
+
+	for(i = 0; i < NRUNES; i++)
+	for(x = i, n = 0; r1 = mydecomp[x]>>16; x = r1){
+		if(++n > maxdchain)
+			maxdchain = n;
+		if(r1 >= DSTART && r1 <0xF8FF)
+			r1 -= DSTART;
+	}
+	maxdchain *= 2;
+}
+
 void
 main(int, char)
 {
@@ -683,6 +723,7 @@
 	}
 
 	Bterm(in);
+	findlongchain();
 	markexclusions();
 
 	/*
--- a/sys/src/libc/port/runenorm.c
+++ b/sys/src/libc/port/runenorm.c
@@ -22,61 +22,81 @@
 	TLast = TBase + TCount - 1,
 };
 
-static void
-_runedecomp(Rune dst[2], Rune c)
+/*
+ * Most runes decompose in to one/two
+ * other runes with codepoints < 0xFFFF,
+ * however there are some exceptions.
+ * To keep the table size down we instead
+ * store an index in to an exception range
+ * within the private use section and use
+ * an exception table.
+ */
+enum {
+	Estart = 0xEEEE,
+	Estop = 0xF8FF,
+};
+
+static Rune
+_runedecomp(Rune c, Rune *r2)
 {
 	uint x;
 
+	if(c < Runeself){
+		*r2 = 0;
+		return 0;
+	}
+
+	//korean
 	if(c >= SBase && c <= SLast){
 		c -= SBase;
 		x = c % TCount;
 		if(x){
-			dst[0] = SBase + ((c / TCount) * TCount);
-			dst[1] = TBase + x;
-			return;
+			*r2 = TBase + x;
+			return SBase + (c - x);
 		}
-		dst[0] = LBase + (c / NCount);
-		dst[1] = VBase + ((c % NCount) / TCount);
-		return;
+		*r2 = VBase + ((c % NCount) / TCount);
+		return LBase + (c / NCount);
 	}
+
 	x = decomplkup(c);
 	if((x & 0xFFFF) != 0){
-		dst[0] = x>>16;
-		dst[1] = x & 0xFFFF;
-		return;
+		*r2 = x & 0xFFFF;
+		return x>>16;
 	}
 	x >>= 16;
-	if(x >= 0xEEEE && x <0xF8FF){
-		memmove(dst, _decompexceptions[x - 0xEEEE], sizeof(Rune)*2);
-		return;
+	if(x >= Estart && x < Estop){
+		Rune *r;
+		r = _decompexceptions[x - Estart];
+		*r2 = r[1];
+		return r[0];
 	}
-	dst[0] = x;
-	dst[1] = 0;
+	*r2 = 0;
+	return x;
 }
 
 static Rune
-_runerecomp(Rune r[2])
+_runerecomp(Rune r0, Rune r1)
 {
 	uint x, y, *p, next;
 
-	if(r[0] >= LBase && r[0] <= LLast){
-		if(r[1] < VBase || r[1] > VLast)
+	if(r0 >= LBase && r0 <= LLast){
+		if(r1 < VBase || r1 > VLast)
 			return 0;
-		x = (r[0] - LBase) * NCount + (r[1] - VBase) * TCount;
+		x = (r0 - LBase) * NCount + (r1 - VBase) * TCount;
 		return SBase + x;
 	}
-	if(r[0] >= SBase && r[0] <= SLast && (r[0] - SBase) % TCount == 0){
-		if(r[1] > TBase && r[1] <= TLast)
-			return r[0] + (r[1] - TBase);
+	if(r0 >= SBase && r0 <= SLast && (r0 - SBase) % TCount == 0){
+		if(r1 > TBase && r1 <= TLast)
+			return r0 + (r1 - TBase);
 		return 0;
 	}
-	if(r[0] > 0xFFFF || r[1] > 0xFFFF){
+	if(r0 > 0xFFFF || r1 > 0xFFFF){
 		for(x = 0; x < nelem(_recompexceptions); x++)
-			if(r[0] == _recompexceptions[x][1] && r[1] == _recompexceptions[x][2])
+			if(r0 == _recompexceptions[x][1] && r1 == _recompexceptions[x][2])
 				return  _recompexceptions[x][0];
 		return 0;
 	}
-	y = x = r[0]<<16 | r[1];
+	y = x = r0<<16 | r1;
 	x ^= x >> 16;
 	x *= 0x21f0aaad;
 	x ^= x >> 15;
@@ -96,239 +116,329 @@
 runecccsort(Rune *a, int len)
 {
 	Rune r;
-	int i;
-	int fail;
+	int i, j;
 
-	do {
-		fail = 0;
-		for(i = 0; i < len - 1; i++){
-			if(ccclkup(a[i]) > ccclkup(a[i+1]) > 0){
-				r = a[i];
-				a[i] = a[i+1];
-				a[i + 1] = r;
-				fail = 1;
-			}
-		}
-	} while(fail);
+	for(i = 1; i < len; i++){
+		r = a[i];
+		for(j = i; j > 0 && ccclkup(a[j-1]) > ccclkup(r); j--)
+			a[j] = a[j-1];
+		a[j] = r;
+	}
 }
 
-char*
-fullutfnorm(char *s, int n)
+static int
+boundary(Rune r)
 {
-	Rune r, peek;
-	char *p, *p2;
+	return !(qclkup(r) & (Qnfcno|Qnfcmay));
+}
 
-	p = s;
-	if(fullrune(p, n) == 0)
-		return s;
+/*
+ * Stk stores the entire context for a chunk of
+ * an input string that is being normalized.
+ * In accordance to the standard, Unicode text
+ * has no upper bound for the amount of conjoining
+ * (also called non-starter) elements associated with
+ * a base rune. Thus to implement normalization within
+ * reasonable memory constraints we implement the
+ * "Stream-Safe Text Format" as defined in UAX #15 § 13.
+ */
+typedef struct {
+	Rune a[Maxnormctx];
+	Rune *e;
+} Stk;
 
-	p += chartorune(&r, p);
-	n -= (p - s);
+static int
+push(Stk *s, Rune c)
+{
+	int n, l;
+	Rune r2, b[Maxdecomp];
+	Rune *p = b + nelem(b) - 1;
 
-	if((r >= LBase && r <= LLast) || (r >= SBase && r <= SLast)){
-		do {
-			if(fullrune(p, n) == 0)
-				return s;
-			p2 = p + chartorune(&peek, p);
-			n -= (p2 - p);
-			p = p2;
-		} while(n > 0 && (peek >= VBase && peek <= VLast) || (peek > TBase && peek <= TLast));
-		if(n <= 0)
-			return s;
-		return p;
+	for(*p = c; c = _runedecomp(c, &r2); *p = c){
+		assert(p > b);
+		if(r2 != 0)
+			*p-- = r2;
 	}
 
-	do {
-		if(fullrune(p, n) == 0)
-			return s;
-		p2 = p + chartorune(&peek, p);
-		n -= (p2 - p);
-		p = p2;
-		if(ccclkup(peek) == 0)
-			return p;
-	} while(n > 0);
-
-	return s;
+	n = b + nelem(b) - p;
+	l = nelem(s->a) - (s->e - s->a);
+	if(n > l){
+		werrstr("runenorm: buffer overflow");
+		return -1;
+	}
+	l -= n;
+	for(; n > 0; n--)
+		*s->e++ = *p++;
+	return l;
 }
 
-Rune*
-fullrunenorm(Rune *r, int n)
+/*
+ * Worst case recomposition, this happens when we have to compose
+ * two runes who both have a CCC of zero.
+ */
+static void
+worstrecomp(Stk *s)
 {
-	Rune *e, *p;
+	int done;
+	Rune c, *p, *rp;
 
-	p = r;
-	e = p + n;
+	for(done = 0; done == 0;){
+		done = 1;
+		for(p = s->a; p+1 < s->e; p++){
+			c = _runerecomp(p[0], p[1]);
+			if(c == 0)
+				continue;
+			done = 0;
+			*p = c;
+			for(rp = p+1; rp < s->e-1; rp++)
+				rp[0] = rp[1];
+			s->e--;
+			p--;
+		}
+	}
+}
 
-	if((*p >= LBase && *p <= LLast) || (*p >= SBase && *p <= SLast)){
-		p++;
-		while(p < e && (*p >= VBase && *p <= VLast) || (*p > TBase && *p <= TLast))
-			p++;
+static void
+cccrecomp(Stk *s)
+{
+	Rune c, *p, *rp;
 
-		if(p >= e)
-			return r;
-		return p;
+	for(p = s->a + 1; p < s->e; p++){
+		c  = _runerecomp(s->a[0], *p);
+		if(c != 0){
+			s->a[0] = c;
+			for(rp = p; rp < s->e-1; rp++){
+				rp[0] = rp[1];
+			}
+			s->e--;
+			p--;
+		} else while(p + 1 < s->e && ccclkup(p[0]) == ccclkup(p[1]))
+			p++;
 	}
+}
 
-	for(; p < e && p + 1 < e; p++)
-		if(ccclkup(p[1]) == 0)
-			return p + 1;
+void
+norminit(Norm *n, int compose, void *ctx, long (*getrune)(void*))
+{
+	memset(n, 0, sizeof *n);
+	n->ctx = ctx;
+	n->getrune = getrune;
+	n->compose = compose;
+	n->obuf.e = n->obuf.a;
+	n->ibuf.e = n->ibuf.a;
+}
 
+int NORMDEBUG;
+
+static long
+peekrune(Norm *n)
+{
+	long r;
+
+	if(n->ibuf.e > n->ibuf.a)
+		return n->ibuf.e[-1];
+
+	r = n->getrune(n->ctx);
+	if(r >= 0)
+		*n->ibuf.e++ = r;
 	return r;
 }
 
-static int
-runenorm(Rune *dst, Rune *src, char *sdst, char *ssrc, int max, int compose)
+static long
+getrune(Norm *n)
 {
-	Rune c, r[2], _stack[32];
-	Rune *p, *stack, *sp, *tp;
-	char *strp, *strstop;
-	Rune *rp, *rrp;
-	Rune *stop;
-	Rune peek;
-	int w, w2, size;
-	int mode;
+	if(n->ibuf.e > n->ibuf.a)
+		return *--n->ibuf.e;
+	return n->getrune(n->ctx);
+}
 
-	if(src){
-		mode = 1;
-		p = src;
-		stop = dst + (max - 1);
-		strp = "";
-		strstop = nil;
-	} else {
-		mode = 0;
-		p = L"";
-		stop = nil;
-		strp = ssrc;
-		strstop = sdst + (max - 1);
-	}
+long
+normpull(Norm *n, Rune *rdst, long max, int flush)
+{
+	Rune *rp, *re;
+	Stk stk;
+	Rune *dot;
+	int r;
+	long c;
 
-	stack = _stack + nelem(_stack)/2;
-	size = 0;
-	w = w2 = 0;
-	while(*strp || *p){
-		if(mode)
-			c = *p;
-		else
-			w = chartorune(&c, strp);
-
-		sp = stack - 1;
-		tp = stack;
-		_runedecomp(r, c);
-		while(r[0] != 0){
-			c = r[0];
-			if(r[1] != 0){
-				*sp-- = r[1];
-				if(sp == _stack)
-					break;
-			}
-			_runedecomp(r, c);
+	rp = rdst;
+	re = rdst + max;
+	dot = nil;
+	c = 0;
+	while(rp < re){
+		if(n->obuf.e != n->obuf.a){
+			memcpy(stk.a, n->obuf.a, (n->obuf.e - n->obuf.a)*sizeof(Rune));
+			stk.e = stk.a + (n->obuf.e - n->obuf.a);
+			n->obuf.e = n->obuf.a;
+			c = stk.a[0];
+			goto Flush;
 		}
 
-		*sp = c;
-		if(mode)
-			peek = p[1];
-		else
-			w2 = chartorune(&peek, strp+w);
-
-		if((*sp >= LBase && *sp <= LLast) || (*sp >= SBase && *sp <= SLast)){
-			while(peek != 0 && (peek >= VBase && peek <= VLast) || (peek > TBase && peek <= TLast)){
-				*tp++ = peek;
-				if(mode){
-					p++;
-					peek = p[1];
-				} else {
-					strp += w;
-					w = w2;
-					w2 = chartorune(&peek, strp+w);
+		stk.e = stk.a;
+		c = getrune(n);
+		if(c < 0)
+			break;
+		push(&stk, c);
+		c = peekrune(n);
+		if(stk.e == stk.a+1 && stk.a[0] < Runeself && c < Runeself && c >= 0)
+			goto Flush;
+		while(c >= 0 && ccclkup(c) != 0){
+			r = push(&stk, getrune(n));
+			c = peekrune(n);
+			if(r > 2)
+				continue;
+			if(ccclkup(stk.a[0]) != 0){
+				assert(r > 0);
+				r--;
+			} else
+				assert(r >= 0);
+			if(r == 0 || (c == 0x0344 && r < 2)){
+				/* in reverse */
+				if(r > 0){
+					getrune(n);
+					*n->ibuf.e++ = 0x301;
+					*n->ibuf.e++ = 0x308;
 				}
-				if(tp == _stack + nelem(_stack))
-					break;
+				*n->ibuf.e++ = 0x034F;
+				break;
 			}
 		}
-		while(peek != 0 && ccclkup(peek) != 0){
-			_runedecomp(r, peek);
-			if(r[1] != 0){
-				if(tp+1 >= _stack + nelem(_stack))
-					break;
-				*tp++ = r[0];
-				*tp++ = r[1];
-			} else if(r[0] != 0)
-				*tp++ = r[0];
-			else
-				*tp++ = peek;
+		if(stk.e - stk.a > 1)
+			runecccsort(stk.a, stk.e - stk.a);
 
-			if(mode){
-				p++;
-				peek = p[1];
-			} else {
-				strp += w;
-				w = w2;
-				w2 = chartorune(&peek, strp+w);
-			}
-			if(tp == _stack + nelem(_stack))
-				break;
-		}
-		runecccsort(sp, tp - sp);
+		if(!n->compose)
+			goto Flush;
 
-		if(compose && ccclkup(*sp) == 0){
-			for(rp = sp + 1; rp < tp; rp++){
-				r[0] = *sp;
-				r[1] = *rp;
-				c = _runerecomp(r);
-				if(c != 0){
-					*sp = c;
-					for(rrp = rp; rrp > sp; rrp--)
-						*rrp = rrp[-1];
-					sp++;
-				} else while(rp + 1 < tp && ccclkup(*rp) == ccclkup(*(rp+1)))
-					rp++;
+		if(ccclkup(stk.e[-1]) == 0){
+			Rune tmp;
+			while(c >= 0 && (!boundary(c) || !boundary(_runedecomp(c, &tmp)))){
+				if(push(&stk, getrune(n)) == -1){
+					*n->ibuf.e++ = c;
+					for(r = 0; r < Maxdecomp; r++)
+						*n->ibuf.e++ = *--stk.e;
+					break;
+				}
+				c = peekrune(n);
 			}
-		}
+			worstrecomp(&stk);
+		} else if(ccclkup(stk.a[0]) == 0)
+			cccrecomp(&stk);
 
-		for(; sp < tp; sp++){
-			if(mode){
-				if(dst < stop)
-					*dst++ = *sp;
-				size++;
-			} else {
-				w2 = runelen(*sp);
-				if(sdst+w2 < strstop)
-					sdst += runetochar(sdst, sp);
-				size += w2;
+Flush:
+		if(flush || c >= 0)
+			for(dot = stk.a; dot < stk.e; dot++){
+				if(rp == re)
+					goto Out;
+				*rp++ = *dot;
 			}
-		}
-		if(mode)
-			p++;
-		else
-			strp += w;
+		dot = nil;
+		if(c < 0)
+			break;
 	}
-	if(mode)
-		*dst = 0;
-	else
-		*sdst = 0;
-	return size;
+Out:
+	if(c < 0 && !flush){
+		while(stk.e > stk.a)
+			*n->ibuf.e++ = *--stk.e;
+	}
+	if(dot != nil){
+		memcpy(n->obuf.a, dot, (stk.e - dot) * sizeof(Rune));
+		n->obuf.e = n->obuf.a + (stk.e - dot);
+	}
+
+	return rp - rdst;
 }
 
-int
-runecomp(Rune *dst, Rune *src, int max)
+typedef struct {
+	Rune *s, *p;
+	int n;
+} Rctx;
+
+static long
+runegetrune(void *ctx)
 {
-	return runenorm(dst, src, nil, nil, max, 1);
+	Rctx *c;
+
+	c = ctx;
+	if(c->p >= c->s + c->n)
+		return -1;
+	return *c->p++;
 }
 
-int
-runedecomp(Rune *dst, Rune *src, int max)
+static long
+runedostr(Rune *dst, long ndst, Rune *src, long nsrc, int comp)
 {
-	return runenorm(dst, src, nil, nil, max, 0);
+	Rctx c;
+	Norm n;
+
+	c.s = c.p = src;
+	c.n = nsrc;
+	norminit(&n, comp, &c, runegetrune);
+	return normpull(&n, dst, ndst, 1);
 }
 
-int
-utfcomp(char *dst, char *src, int max)
+long
+runecomp(Rune *dst, long ndst, Rune *src, long nsrc)
 {
-	return runenorm(nil, nil, dst, src, max, 1);
+	return runedostr(dst, ndst, src, nsrc, 1);
 }
 
-int
-utfdecomp(char *dst, char *src, int max)
+long
+runedecomp(Rune *dst, long ndst, Rune *src, long nsrc)
 {
-	return runenorm(nil, nil, dst, src, max, 0);	
+	return runedostr(dst, ndst, src, nsrc, 0);
+}
+
+typedef struct {
+	char *s, *p;
+	int n;
+} Uctx;
+
+static long
+utfgetrune(void *ctx)
+{
+	Uctx *c;
+	Rune r;
+
+	c = ctx;
+	if(c->p >= c->s + c->n)
+		return -1;
+	c->p += chartorune(&r, c->p);
+	return r;
+}
+
+static long
+utfdostr(char *dst, long ndst, char *src, long nsrc, int comp)
+{
+	Uctx c;
+	Norm n;
+	Rune buf[Maxnormctx];
+	long i, w;
+	char *e, *p;
+
+	c.s = c.p = src;
+	c.n = nsrc;
+	norminit(&n, comp, &c, utfgetrune);
+	for(p = dst, e = dst + ndst; p < e;){
+		w = normpull(&n, buf, nelem(buf), 1);
+		if(w == 0)
+			break;
+		for(i = 0; i < w; i++){
+			if(p + runelen(buf[i]) >= e)
+				break;
+			p += runetochar(p, buf+i);
+		}
+	}
+	return p - dst;
+}
+
+long
+utfcomp(char *dst, long ndst, char *src, long nsrc)
+{
+	return utfdostr(dst, ndst, src, nsrc, 1);
+}
+
+long
+utfdecomp(char *dst, long ndst, char *src, long nsrc)
+{
+	return utfdostr(dst, ndst, src, nsrc, 0);
 }
--- a/sys/src/libc/test/runenorm.c
+++ b/sys/src/libc/test/runenorm.c
@@ -2,6 +2,9 @@
 #include <libc.h>
 #include <bio.h>
 
+//Annoying to get a gauge of how broken if we bail on the first failure
+#define print sysfatal
+
 static int
 estrtoul(char *s)
 {
@@ -14,31 +17,245 @@
 	return code;
 }
 
+#pragma	   varargck    type  "V"   Rune*
+static int
+vrunefmt(Fmt *f)
+{
+	Rune *s;
+	int n;
+
+	s = va_arg(f->args, Rune*);
+	n = fmtprint(f, "%S(", s);
+	for(; *s != 0; s++)
+		n += fmtprint(f, "%X ", *s);
+	n += fmtprint(f, ")");
+	return n;
+}
+
+typedef struct {
+	Rune src[64];
+	Rune nfc[70];
+	Rune nfd[70];
+} Line;
+
+typedef struct {
+	Rune *s, *p;
+	int n;
+} Ctx;
+
+static long
+getrune(void *ctx)
+{
+	Ctx *c;
+
+	c = ctx;
+	if(c->p >= c->s + c->n)
+		return -1;
+	return *c->p++;
+}
+
+static void
+testline(Line *l)
+{
+	Norm rd, rc;
+	static Ctx ctx;
+	Rune out1[70];
+	Rune out2[70];
+	int i, n;
+
+	norminit(&rd, 0, &ctx, getrune);
+	norminit(&rc, 1, &ctx, getrune);
+	ctx.s = ctx.p = l->src;
+	ctx.n = runestrlen(l->src) + 1;
+
+	n = normpull(&rd, out1, nelem(out1), 1);
+	if(out1[n-1] != '\0')
+		sysfatal("norm ate null");
+	if(runestrcmp(l->nfd, out1) != 0)
+		print("(1) %V %V %V\n", l->src, l->nfd, out1);
+
+	ctx.p = ctx.s;
+	n = normpull(&rc, out2, nelem(out2), 1);
+	if(out2[n-1] != '\0')
+		sysfatal("norm ate null");
+	if(runestrcmp(l->nfc, out2) != 0)
+		print("(2) %V %V %V\n", l->src, l->nfc, out2);
+
+	ctx.p = ctx.s;
+	i = 0;
+	do {
+		n = normpull(&rd, out1 + i, 1, 1);
+		i += n;
+	} while(n != 0);
+	if(runestrcmp(l->nfd, out1) != 0)
+		print("rune-by-rune nfd fail %V %V\n", l->nfd, out1);
+
+	ctx.p = ctx.s;
+	i = 0;
+	do {
+		n = normpull(&rc, out2 + i, 1, 1);
+		i += n;
+	} while(n != 0);
+	if(runestrcmp(l->nfc, out2) != 0)
+		print("rune-by-rune nfc fail %V %V\n", l->nfc, out2);
+}
+
+static void
+testutfline(Line *l)
+{
+	char out1[128], out2[128];
+	char buf1[128], buf2[128], buf3[128];
+
+	snprint(buf1, sizeof buf1, "%S", l->src);
+	snprint(buf2, sizeof buf2, "%S", l->nfd);
+	snprint(buf3, sizeof buf3, "%S", l->nfc);
+
+	utfcomp(out1, sizeof out1, buf1, strlen(buf1)+1);
+	utfdecomp(out2, sizeof out2, buf1, strlen(buf1)+1);
+
+	if(strcmp(out1, buf3) != 0)
+		print("utfline fail nfc: %s %s %s\n", buf1, buf3, out1);
+
+	if(strcmp(out2, buf2) != 0)
+		print("utfline fail nfd: %s %s %s\n", buf1, buf2, out2);
+}
+
+static void
+testedge(void)
+{
+	Line l;
+	int i;
+
+	/*
+	 * Test that we correctly break up long attacher
+	 * runs with U+034F
+	 */
+	l.src[0] = L'U';
+	for(i = 1; i < nelem(l.src)-1; i++)
+		l.src[i] = 0x308;
+	l.src[nelem(l.src)-1] = 0;
+
+	memcpy(l.nfd, l.src, sizeof(l.src));
+	l.nfd[31] = 0x34F;
+	l.nfd[62] = 0x34F;
+	l.nfd[63] = 0x308;
+	l.nfd[64] = 0x308;
+	l.nfd[65] = 0;
+
+	memcpy(l.nfc, l.src, sizeof l.src);
+	l.nfc[0] = 0xDC;
+	l.nfc[30] = 0x34F;
+	l.nfc[61] = 0x34F;
+	l.nfc[62] = 0x308;
+	l.nfc[63] = 0x308;
+	l.nfc[64] = 0;
+
+	testline(&l);
+
+	for(i = 0; i < nelem(l.src)-1; i++)
+		l.src[i] = 0x308;
+	l.src[nelem(l.src)-1] = 0;
+	memcpy(l.nfd, l.src, sizeof l.src);
+	l.nfd[30] = 0x034F;
+	l.nfd[61] = 0x034F;
+	l.nfd[62] = 0x308;
+	l.nfd[63] = 0x308;
+	l.nfd[64] = 0x308;
+	l.nfd[65] = 0;
+	memcpy(l.nfc, l.nfd, sizeof l.nfd);
+
+	testline(&l);
+
+	l.src[0] = L'U';
+	for(i = 1; i < 30; i++)
+		l.src[i] = 0x300;
+	l.src[i++] = 0x0344;
+	l.src[i] = 0;
+	memcpy(l.nfc, l.src, sizeof l.src);
+	memcpy(l.nfd, l.src, sizeof l.src);
+	l.nfd[30] = 0x034F;
+	l.nfd[31] = 0x308;
+	l.nfd[32] = 0x301;
+	l.nfd[33] = 0;
+	l.nfc[0] = 0xD9;
+	l.nfc[29] = 0x034F;
+	l.nfc[30] = 0x308;
+	l.nfc[31] = 0x301;
+	l.nfc[32] = 0;
+
+	testline(&l);
+
+	for(i = 0; i < 59; i++)
+		l.src[i] = 0x300;
+	l.src[i++] = 0x0344;
+	l.src[i] = 0;
+	memcpy(l.nfd, l.src, sizeof l.src);
+	l.nfd[30] = 0x34F;
+	l.nfd[59] = 0x300;
+	l.nfd[60] = 0x34F;
+	l.nfd[61] = 0x308;
+	l.nfd[62] = 0x301;
+	l.nfd[63] = 0x0;
+	memcpy(l.nfc, l.nfd, sizeof l.nfd);
+
+	testline(&l);
+
+	l.src[0] = 0x16D63;
+	for(i = 1; i < 33; i++)
+		l.src[i] = 0x16D67;
+	l.src[i] = 0;
+	memcpy(l.nfd, l.src, sizeof l.src);
+
+	l.nfc[0] = 0x16D6A;
+	for(i = 1; i < 1+15; i++)
+		l.nfc[i] = 0x16D68;
+	l.nfc[i] = 0;
+
+	testline(&l);
+}
+
+static void
+testeof(void)
+{
+	Norm n;
+	Ctx ctx;
+	Rune buf[16], out[16], *p;
+
+	buf[0] = L'u';
+	ctx.s = ctx.p = buf;
+	ctx.n = 1;
+
+	norminit(&n, 1, &ctx, getrune);
+	for(p = out; ctx.p < ctx.s + ctx.n;){
+		p += normpull(&n, p, sizeof out - (p - out), 0);
+	}
+	if(p != out)
+		print("norm flushed when we told it not to");
+	buf[0] = L'̈';
+	buf[1] = L'a';
+	ctx.s = ctx.p = buf;
+	ctx.n = 2;
+	normpull(&n, p, sizeof out - (p - out), 1);
+	if(out[0] != L'ü' || out[1] != 'a')
+		print("eof test fail: %X\n", out[0]);
+}
+
 void
 main(int, char)
 {
-	Rune buffer1[64];
-	Rune buffer2[64];
-	char utfbuff1[128];
-	char utfbuff2[128];
-	char srctmp[128], tmp1[128], tmp2[128];
 	char *fields[10];
 	char *runes[32];
 	char *p;
-	int n, n2;
+	int n;
 	int i;
-	uint fail;
 	Biobuf *b;
 
+	fmtinstall('V', vrunefmt);
 	b = Bopen("/lib/ucd/NormalizationTest.txt", OREAD);
 	if(b == nil)
 		sysfatal("could not load composition exclusions: %r");
 
-	struct {
-		Rune src[32];
-		Rune nfc[32];
-		Rune nfd[32];
-	} test;
+	Line test;
 	while((p = Brdline(b, '\n')) != nil){
 		p[Blinelen(b)-1] = 0;
 		if(p[0] == 0 || p[0] == '#' || p[0] == '@')
@@ -59,34 +276,10 @@
 			test.nfd[i] = estrtoul(runes[i]);
 		test.nfd[i] = 0;
 
-		n = runecomp(buffer1, test.src, nelem(buffer1));
-		n2 = runedecomp(buffer2, test.src, nelem(buffer2));
-		fail = 0;
-
-		if(runestrcmp(buffer1, test.nfc) != 0)
-			fail |= 1<<0;
-		if(runestrcmp(buffer2, test.nfd) != 0)
-			fail |= 1<<1;
-		if(fail)
-			print("%d %d %S %S %S %S %S\n", fail, i, test.src, test.nfd, test.nfc, buffer2, buffer1);
-		assert(n == runestrlen(test.nfc));
-		assert(n2 == runestrlen(test.nfd));
-
-		snprint(srctmp, sizeof tmp1, "%S", test.src);
-		snprint(tmp1, sizeof tmp1, "%S", test.nfc);
-		snprint(tmp2, sizeof tmp2, "%S", test.nfd);
-
-		n = utfcomp(utfbuff1, srctmp, nelem(utfbuff1));
-		n2 = utfdecomp(utfbuff2, srctmp, nelem(utfbuff2));
-
-		if(strcmp(utfbuff1, tmp1) != 0)
-			fail |= 1<<2;
-		if(strcmp(utfbuff2, tmp2) != 0)
-			fail |= 1<<3;
-		if(fail)
-			print("%d %d %s %s %s %s %s\n", fail, i, srctmp, tmp2, tmp1, utfbuff2, utfbuff1);
-		assert(n == strlen(tmp1));
-		assert(n2 == strlen(tmp2));
+		testline(&test);
+		testutfline(&test);
 	}
+	testedge();
+	testeof();
 	exits(nil);
 }
--