shithub: front

Download patch

ref: 00f50e30f9b10dfd7920d2744e56c35e6b67a5f4
parent: 5a87aff711a9f6196f983b4c25e2dd98b562289f
author: rodri <rgl@antares-labs.eu>
date: Sun Aug 17 20:14:30 EDT 2025

libmemdraw: arbitrary color chans and other improvements for memaffinewarp()

I imported some of the helper routines from draw.c and drawtest.c,
tied them together to make a (get|put)pixel() pair and made an initial
attempt at optimizing them.  Also separated the (bilinear) filtering
code and put it into its own function for modularity (now we can
easily plug any filter we want in the pipeline), along with an example
nearest neighbor one which is disabled for now.

The fixed-point Foint has disappeared in favor of using Point, and
fix_xform() was simplified to avoid unnecessary operations, exploiting
the affine matrix properties and the fact that the transformed point
always has a w coordinate of 1.

Finally, for the inner loop the calls to fix_xform() were replaced by
an incremental method.

--- a/sys/man/2/memdraw
+++ b/sys/man/2/memdraw
@@ -399,9 +399,6 @@
 and the
 .B repl
 bit, but applied to the affine mappings.
-Both
-.BR Memimage s
-must have the same channel description and use 8-bit channels.
 .PP
 .I Mkwarp
 converts a 3×3 row-major matrix of
--- a/sys/src/libmemdraw/warp.c
+++ b/sys/src/libmemdraw/warp.c
@@ -3,6 +3,9 @@
 #include <draw.h>
 #include <memdraw.h>
 
+/* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
+#define RGB2K(r,g,b)	((156763*(r)+307758*(g)+59769*(b))>>19)
+
 /* 25.7 fixed-point number operations */
 
 #define FMASK		((1<<7) - 1)
@@ -12,76 +15,278 @@
 #define fixfrac(n)	((n)&FMASK)
 #define lerp(a,b,t)	((a) + ((((b) - (a))*(t))>>7))
 
-typedef struct Foint Foint;
 typedef struct Sampler Sampler;
+typedef struct Blitter Blitter;
 
-struct Foint
+struct Sampler
 {
-	long x, y, w;
+	Memimage *i;
+	uchar *a;
+	Rectangle r;
+	int bpl;
+	int cmask;
+	long Δx, Δy;
 };
 
-struct Sampler
+struct Blitter
 {
 	Memimage *i;
 	uchar *a;
-	Rectangle r;
-	int bpp;
 	int bpl;
 	int cmask;
 };
 
-static Foint
-fix_xform(Foint p, Warp m)
+static Point
+fix_xform(Point p, Warp m)
 {
-	return (Foint){
-		fixmul(p.x, m[0][0]) + fixmul(p.y, m[0][1]) + fixmul(p.w, m[0][2]),
-		fixmul(p.x, m[1][0]) + fixmul(p.y, m[1][1]) + fixmul(p.w, m[1][2]),
-		fixmul(p.x, m[2][0]) + fixmul(p.y, m[2][1]) + fixmul(p.w, m[2][2])
+	return (Point){
+		fixmul(p.x, m[0][0]) + fixmul(p.y, m[0][1]) + m[0][2],
+		fixmul(p.x, m[1][0]) + fixmul(p.y, m[1][1]) + m[1][2]
 	};
 }
 
 static ulong
-sample(Sampler *s, Point p)
+getpixel(Sampler *s, Point pt)
 {
-	ulong c;
+	uchar *p, r, g, b, a;
+	ulong val, chan, ctype, ov, v;
+	int nb, off, bpp, npack;
 
+	val = 0;
+	a = 0xFF;
+	r = g = b = 0xAA;	/* garbage */
+	p = s->a + pt.y*s->bpl + (pt.x*s->i->depth >> 3);
+
+	/* pixelbits() */
+	switch(bpp=s->i->depth){
+	case 1:
+	case 2:
+	case 4:
+		npack = 8/bpp;
+		off = pt.x%npack;
+		val = p[0] >> bpp*(npack-1-off);
+		val &= s->cmask;
+		break;
+	case 8:
+		val = p[0];
+		break;
+	case 16:
+		val = p[0]|(p[1]<<8);
+		break;
+	case 24:
+		val = p[0]|(p[1]<<8)|(p[2]<<16);
+		break;
+	case 32:
+		val = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
+		break;
+	}
+
+	/* fast path for true color images */
+	switch(s->i->chan){
+	case RGBA32:
+		return val;
+	case XRGB32:
+	case RGB24:
+		return val<<8|0xFF;
+	case ABGR32:
+		return p[3]|(p[2]<<8)|(p[1]<<16)|(p[0]<<24);
+	case XBGR32:
+	case BGR24:
+		return p[2]<<8|(p[1]<<16)|(p[0]<<24)|0xFF;
+	}
+
+	while(bpp<32){
+		val |= val<<bpp;
+		bpp *= 2;
+	}
+
+	/* imgtorgba() */
+	for(chan=s->i->chan; chan; chan>>=8){
+		ctype = TYPE(chan);
+		nb = s->i->nbits[ctype];
+		ov = v = val & s->i->mask[ctype];
+		val >>= nb;
+
+		while(nb < 8){
+			v |= v<<nb;
+			nb *= 2;
+		}
+		v >>= (nb-8);
+
+		switch(ctype){
+		case CRed:
+			r = v;
+			break;
+		case CGreen:
+			g = v;
+			break;
+		case CBlue:
+			b = v;
+			break;
+		case CAlpha:
+			a = v;
+			break;
+		case CGrey:
+			r = g = b = v;
+			break;
+		case CMap:
+			p = s->i->cmap->cmap2rgb+3*ov;
+			r = p[0];
+			g = p[1];
+			b = p[2];
+			break;
+		}
+	}
+	return (r<<24)|(g<<16)|(b<<8)|a;
+}
+
+static void
+putpixel(Blitter *blt, Point dp, ulong rgba)
+{
+	uchar *p, r, g, b, a, m;
+	ulong chan, ctype, ov, v;
+	int nb, off, bpp, npack, sh, mask;
+
+	bpp = blt->i->depth;
+	p = blt->a + dp.y*blt->bpl + (dp.x*bpp >> 3);
+
+	/* fast path for true color images */
+	switch(blt->i->chan){
+	case RGBA32:
+		p[0] = rgba;
+		p[1] = rgba>>8;
+		p[2] = rgba>>16;
+		p[3] = rgba>>24;
+		return;
+	case XRGB32:
+	case RGB24:
+		p[0] = rgba>>8;
+		p[1] = rgba>>16;
+		p[2] = rgba>>24;
+		return;
+	case ABGR32:
+		p[0] = rgba>>24;
+		p[1] = rgba>>16;
+		p[2] = rgba>>8;
+		p[3] = rgba;
+		return;
+	case XBGR32:
+	case BGR24:
+		p[0] = rgba>>24;
+		p[1] = rgba>>16;
+		p[2] = rgba>>8;
+		return;
+	}
+
+	/* rgbatoimg() */
+	v = 0;
+	r = rgba>>24;
+	g = rgba>>16;
+	b = rgba>>8;
+	a = rgba;
+	for(chan=blt->i->chan; chan; chan>>=8){
+		ctype = TYPE(chan);
+		nb = blt->i->nbits[ctype];
+		switch(ctype){
+		case CRed:
+			v |= (r>>(8-nb)) << blt->i->shift[CRed];
+			break;
+		case CGreen:
+			v |= (g>>(8-nb)) << blt->i->shift[CGreen];
+			break;
+		case CBlue:
+			v |= (b>>(8-nb)) << blt->i->shift[CBlue];
+			break;
+		case CAlpha:
+			v |= (a>>(8-nb)) << blt->i->shift[CAlpha];
+			break;
+		case CMap:
+			m = blt->i->cmap->rgb2cmap[(r>>4)*256+(g>>4)*16+(b>>4)];
+			v |= (m>>(8-nb)) << blt->i->shift[CMap];
+			break;
+		case CGrey:
+			m = RGB2K(r,g,b);
+			v |= (m>>(8-nb)) << blt->i->shift[CGrey];
+			break;
+		}
+	}
+
+	/* blit op */
+	ov = p[0]|p[1]<<8|p[2]<<16|p[3]<<24;
+
+	mask = blt->cmask;
+	if(blt->i->depth < 8){
+		npack = 8/bpp;
+		off = dp.x%npack;
+		sh = bpp*(npack-1-off);
+		mask <<= sh;
+		v <<= sh;
+	}
+	v = (ov &~ mask) | (v & mask);
+	p[0] = v;
+	p[1] = v>>8;
+	p[2] = v>>16;
+	p[3] = v>>24;
+}
+
+static ulong
+sample1(Sampler *s, Point p)
+{
 	if(p.x >= s->r.min.x && p.x < s->r.max.x
-	&& p.y >= s->r.min.y && p.y < s->r.max.y){
-inside:
-		c = *(ulong*)(s->a + p.y*s->bpl + p.x*s->bpp);
-		c &= s->cmask;
-		return c;
-	}else if(s->i->flags & Frepl){
+	&& p.y >= s->r.min.y && p.y < s->r.max.y)
+		return getpixel(s, p);
+	else if(s->i->flags & Frepl){
 		p = drawrepl(s->r, p);
-		goto inside;
+		return getpixel(s, p);
 	}
 	/* edge handler: constant */
 	return 0;
 }
 
+static ulong
+bilinear(Sampler *s, Point p)
+{
+	ulong c00, c01, c10, c11, o;
+	uchar c0, c1, c;
+	int i;
+
+	c00 = sample1(s, p);
+	p.x++;
+	c01 = sample1(s, p);
+	p.x--; p.y++;
+	c10 = sample1(s, p);
+	p.x++;
+	c11 = sample1(s, p);
+
+	/* avoid processing alpha if possible */
+	i = s->i->flags & Falpha? 0: 8;
+	o = s->i->flags & Falpha? 0: 0xFF;
+	for(; i < 4*8; i += 8){
+		c0 = c00>>i; c0 = lerp(c0, c01>>i & 0xFF, s->Δx);
+		c1 = c10>>i; c1 = lerp(c1, c11>>i & 0xFF, s->Δx);
+		c  = lerp(c0, c1, s->Δy);
+		o |= (ulong)c << i;
+	}
+	return o;
+}
+
+static ulong
+nearest(Sampler *s, Point p)
+{
+	return sample1(s, p);
+}
+
+static ulong (*sample)(Sampler*, Point) = bilinear;
+
 int
 memaffinewarp(Memimage *d, Rectangle r, Memimage *s, Point sp0, Warp m)
 {
 	Sampler samp;
-	Point sp, dp;
-	Foint p2;
+	Blitter blit;
+	Point sp, dp, p2, p2₀;
 	Rectangle dr;
-	long Δx, Δy;
-	ulong c00, c01, c10, c11;
-	uchar c0, c1, c, *a0, *a;
-	int bpp, bpl;
+	ulong c;
 
-	for(c00 = s->chan; c00; c00 >>= 8)
-		if(NBITS(c00) != 8){
-			werrstr("unsupported image format");
-			return -1;
-		}
-
-	if(d->chan != s->chan){
-		werrstr("image formats differ");
-		return -1;
-	}
-
 	dr = d->clipr;
 	rectclip(&dr, d->r);
 	if(rectclip(&r, dr) == 0)
@@ -93,56 +298,37 @@
 
 	samp.i = s;
 	samp.a = s->data->bdata + s->zero;
-	samp.bpp = s->depth >> 3;
 	samp.bpl = sizeof(ulong)*s->width;
 	samp.cmask = (1ULL << s->depth) - 1;
-	a0 = d->data->bdata + d->zero;
-	bpp = d->depth >> 3;
-	bpl = sizeof(ulong)*d->width;
 
-	for(dp.y = r.min.y; dp.y < r.max.y; dp.y++)
+	blit.i = d;
+	blit.a = d->data->bdata + d->zero;
+	blit.bpl = sizeof(ulong)*d->width;
+	blit.cmask = (1ULL << d->depth) - 1;
+
+	/*
+	 * incremental affine warping technique from:
+	 * 	“Fast Affine Transform for Real-Time Machine Vision Applications”,
+	 * 	Lee, S., Lee, GG., Jang, E.S., Kim, WY,
+	 * 	Intelligent Computing.  ICIC 2006. LNCS, vol 4113.
+	 */
+	p2 = p2₀ = fix_xform((Point){int2fix(r.min.x - dr.min.x), int2fix(r.min.y - dr.min.y)}, m);
+	for(dp.y = r.min.y; dp.y < r.max.y; dp.y++){
 	for(dp.x = r.min.x; dp.x < r.max.x; dp.x++){
-		a = a0 + dp.y*bpl + dp.x*bpp;
-		p2 = fix_xform((Foint){int2fix(dp.x - dr.min.x), int2fix(dp.y - dr.min.y), 1<<7}, m);
+		samp.Δx = fixfrac(p2.x);
+		samp.Δy = fixfrac(p2.y);
 
-		Δx = fixfrac(p2.x);
-		Δy = fixfrac(p2.y);
+		sp.x = sp0.x + fix2int(p2.x);
+		sp.y = sp0.y + fix2int(p2.y);
 
-		sp.x = fix2int(p2.x);
-		sp.y = fix2int(p2.y);
-		sp.x += sp0.x;
-		sp.y += sp0.y;
-		c00 = sample(&samp, sp);
-		sp.x++;
-		c01 = sample(&samp, sp);
-		sp.x--; sp.y++;
-		c10 = sample(&samp, sp);
-		sp.x++;
-		c11 = sample(&samp, sp);
+		c = sample(&samp, sp);
+		putpixel(&blit, dp, c);
 
-		switch(s->nchan){
-		case 4:
-			c0 = c00 >> 8*3 & 0xFF; c0 = lerp(c0, c01 >> 8*3 & 0xFF, Δx);
-			c1 = c10 >> 8*3 & 0xFF; c1 = lerp(c1, c11 >> 8*3 & 0xFF, Δx);
-			c  = lerp(c0, c1, Δy);
-			a[3] = c;
-		case 3:
-			c0 = c00 >> 8*2 & 0xFF; c0 = lerp(c0, c01 >> 8*2 & 0xFF, Δx);
-			c1 = c10 >> 8*2 & 0xFF; c1 = lerp(c1, c11 >> 8*2 & 0xFF, Δx);
-			c  = lerp(c0, c1, Δy);
-			a[2] = c;
-		case 2:
-			c0 = c00 >> 8*1 & 0xFF; c0 = lerp(c0, c01 >> 8*1 & 0xFF, Δx);
-			c1 = c10 >> 8*1 & 0xFF; c1 = lerp(c1, c11 >> 8*1 & 0xFF, Δx);
-			c  = lerp(c0, c1, Δy);
-			a[1] = c;
-		case 1:
-			c0 = c00 >> 8*0 & 0xFF; c0 = lerp(c0, c01 >> 8*0 & 0xFF, Δx);
-			c1 = c10 >> 8*0 & 0xFF; c1 = lerp(c1, c11 >> 8*0 & 0xFF, Δx);
-			c  = lerp(c0, c1, Δy);
-			a[0] = c;
-		}
+		p2.x += m[0][0];
+		p2.y += m[1][0];
 	}
-
+		p2.x = p2₀.x += m[0][1];
+		p2.y = p2₀.y += m[1][1];
+	}
 	return 0;
 }
--- a/sys/src/libmemdraw/warptest.c
+++ b/sys/src/libmemdraw/warptest.c
@@ -69,6 +69,20 @@
 }
 
 void
+initworkrects(Rectangle *wr, int nwr, Rectangle *fbr)
+{
+	int i, Δy;
+
+	wr[0] = *fbr;
+	Δy = Dy(wr[0])/nwr;
+	wr[0].max.y = wr[0].min.y + Δy;
+	for(i = 1; i < nwr; i++)
+		wr[i] = rectaddpt(wr[i-1], Pt(0,Δy));
+	if(wr[nwr-1].max.y < fbr->max.y)
+		wr[nwr-1].max.y = fbr->max.y;
+}
+
+void
 usage(void)
 {
 	fprint(2, "usage: %s [-s sx sy] [-t tx ty] [-r θ]\n", argv0);
@@ -80,12 +94,16 @@
 {
 	Memimage *dst, *src;
 	Warp w;
-	Rectangle dr;
+	Rectangle dr, *wr;
 	double sx, sy, tx, ty, θ, c, s;
+	int dorepl, parallel, nproc, i;
+	char *nprocs;
 
 	sx = sy = 1;
 	tx = ty = 0;
 	θ = 0;
+	dorepl = 0;
+	parallel = 0;
 	ARGBEGIN{
 	case 's':
 		sx = strtod(EARGF(usage()), nil);
@@ -98,6 +116,12 @@
 	case 'r':
 		θ = strtod(EARGF(usage()), nil)*DEG;
 		break;
+	case 'R':
+		dorepl++;
+		break;
+	case 'p':
+		parallel++;
+		break;
 	default:
 		usage();
 	}ARGEND;
@@ -108,6 +132,8 @@
 		sysfatal("memimageinit: %r");
 
 	src = readmemimage(0);
+	if(dorepl)
+		src->flags |= Frepl;
 	c = cos(θ);
 	s = sin(θ);
 	Matrix S = {
@@ -135,21 +161,49 @@
 	mkwarp(w, T);
 
 	profbegin();
-	dr = rectaddpt(Rect(0,0,Dx(dst->r)/2,Dy(dst->r)/2), dst->r.min);
-	dst->clipr = dr;
-	if(memaffinewarp(dst, dr, src, src->r.min, w) < 0)
-		sysfatal("memaffinewarp: %r");
-	dr = rectaddpt(Rect(Dx(dst->r)/2+1,0,Dx(dst->r),Dy(dst->r)/2), dst->r.min);
-	dst->clipr = dst->r;
-	if(memaffinewarp(dst, dr, src, src->r.min, w) < 0)
-		sysfatal("memaffinewarp: %r");
-	dr = rectaddpt(Rect(0,Dy(dst->r)/2+1,Dx(dst->r)/2,Dy(dst->r)), dst->r.min);
-	if(memaffinewarp(dst, dr, src, src->r.min, w) < 0)
-		sysfatal("memaffinewarp: %r");
-	dr = rectaddpt(Rect(Dx(dst->r)/2+1,Dy(dst->r)/2+1,Dx(dst->r),Dy(dst->r)), dst->r.min);
-	dst->clipr = dr;
-	if(memaffinewarp(dst, dr, src, src->r.min, w) < 0)
-		sysfatal("memaffinewarp: %r");
+	if(parallel){
+		nprocs = getenv("NPROC");
+		if(nprocs == nil || (nproc = strtoul(nprocs, nil, 10)) < 2)
+			nproc = 1;
+		free(nprocs);
+
+		wr = malloc(nproc*sizeof(Rectangle));
+		initworkrects(wr, nproc, &dr);
+
+		for(i = 0; i < nproc; i++){
+			switch(rfork(RFPROC|RFMEM)){
+			case -1:
+				sysfatal("rfork: %r");
+			case 0:
+				if(memaffinewarp(dst, wr[i], src, src->r.min, w) < 0)
+					fprint(2, "[%d] memaffinewarp: %r\n", getpid());
+				exits(nil);
+			}
+		}
+		while(waitpid() != -1)
+			;
+
+		free(wr);
+	}else{
+		if(memaffinewarp(dst, dr, src, src->r.min, w) < 0)
+			sysfatal("memaffinewarp: %r");
+
+//		dr = rectaddpt(Rect(0,0,Dx(dst->r)/2,Dy(dst->r)/2), dst->r.min);
+//		dst->clipr = dr;
+//		if(memaffinewarp(dst, dr, src, src->r.min, w) < 0)
+//			sysfatal("memaffinewarp: %r");
+//		dr = rectaddpt(Rect(Dx(dst->r)/2+1,0,Dx(dst->r),Dy(dst->r)/2), dst->r.min);
+//		dst->clipr = dst->r;
+//		if(memaffinewarp(dst, dr, src, src->r.min, w) < 0)
+//			sysfatal("memaffinewarp: %r");
+//		dr = rectaddpt(Rect(0,Dy(dst->r)/2+1,Dx(dst->r)/2,Dy(dst->r)), dst->r.min);
+//		if(memaffinewarp(dst, dr, src, src->r.min, w) < 0)
+//			sysfatal("memaffinewarp: %r");
+//		dr = rectaddpt(Rect(Dx(dst->r)/2+1,Dy(dst->r)/2+1,Dx(dst->r),Dy(dst->r)), dst->r.min);
+//		dst->clipr = dr;
+//		if(memaffinewarp(dst, dr, src, src->r.min, w) < 0)
+//			sysfatal("memaffinewarp: %r");
+	}
 	profend();
 	writememimage(1, dst);
 
--