shithub: riscv

Download patch

ref: ea93dce7dc0ab842cbef53e04ca8b819e8a5dd66
parent: 52710ef5429087506ad2bdd47b96eb2534d21f7a
author: rodri <rgl@antares-labs.eu>
date: Sun Apr 6 20:37:28 EDT 2025

libgeometry: unroll s?mulm3? for an up to 2-4x speedup

only tested on 386 and amd64.

--- a/sys/src/libgeometry/matrix.c
+++ b/sys/src/libgeometry/matrix.c
@@ -34,26 +34,28 @@
 void
 mulm(Matrix a, Matrix b)
 {
-	int i, j, k;
-	Matrix tmp;
+	double t0, t1, t2;
 
-	for(i = 0; i < 3; i++)
-		for(j = 0; j < 3; j++){
-			tmp[i][j] = 0;
-			for(k = 0; k < 3; k++)
-				tmp[i][j] += a[i][k]*b[k][j];
-		}
-	memmove(a, tmp, 3*3*sizeof(double));
+	t0 = a[0][0]; t1 = a[0][1]; t2 = a[0][2];
+	a[0][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0];
+	a[0][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1];
+	a[0][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2];
+	t0 = a[1][0]; t1 = a[1][1]; t2 = a[1][2];
+	a[1][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0];
+	a[1][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1];
+	a[1][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2];
+	t0 = a[2][0]; t1 = a[2][1]; t2 = a[2][2];
+	a[2][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0];
+	a[2][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1];
+	a[2][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2];
 }
 
 void
 smulm(Matrix m, double s)
 {
-	int i, j;
-
-	for(i = 0; i < 3; i++)
-		for(j = 0; j < 3; j++)
-			m[i][j] *= s;
+	m[0][0] *= s; m[0][1] *= s; m[0][2] *= s;
+	m[1][0] *= s; m[1][1] *= s; m[1][2] *= s;
+	m[2][0] *= s; m[2][1] *= s; m[2][2] *= s;
 }
 
 void
@@ -197,26 +199,37 @@
 void
 mulm3(Matrix3 a, Matrix3 b)
 {
-	int i, j, k;
-	Matrix3 tmp;
+	double t0, t1, t2, t3;
 
-	for(i = 0; i < 4; i++)
-		for(j = 0; j < 4; j++){
-			tmp[i][j] = 0;
-			for(k = 0; k < 4; k++)
-				tmp[i][j] += a[i][k]*b[k][j];
-		}
-	memmove(a, tmp, 4*4*sizeof(double));
+	t0 = a[0][0]; t1 = a[0][1]; t2 = a[0][2]; t3 = a[0][3];
+	a[0][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0] + t3*b[3][0];
+	a[0][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1] + t3*b[3][1];
+	a[0][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2] + t3*b[3][2];
+	a[0][3] = t0*b[0][3] + t1*b[1][3] + t2*b[2][3] + t3*b[3][3];
+	t0 = a[1][0]; t1 = a[1][1]; t2 = a[1][2]; t3 = a[1][3];
+	a[1][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0] + t3*b[3][0];
+	a[1][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1] + t3*b[3][1];
+	a[1][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2] + t3*b[3][2];
+	a[1][3] = t0*b[0][3] + t1*b[1][3] + t2*b[2][3] + t3*b[3][3];
+	t0 = a[2][0]; t1 = a[2][1]; t2 = a[2][2]; t3 = a[2][3];
+	a[2][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0] + t3*b[3][0];
+	a[2][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1] + t3*b[3][1];
+	a[2][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2] + t3*b[3][2];
+	a[2][3] = t0*b[0][3] + t1*b[1][3] + t2*b[2][3] + t3*b[3][3];
+	t0 = a[3][0]; t1 = a[3][1]; t2 = a[3][2]; t3 = a[3][3];
+	a[3][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0] + t3*b[3][0];
+	a[3][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1] + t3*b[3][1];
+	a[3][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2] + t3*b[3][2];
+	a[3][3] = t0*b[0][3] + t1*b[1][3] + t2*b[2][3] + t3*b[3][3];
 }
 
 void
 smulm3(Matrix3 m, double s)
 {
-	int i, j;
-
-	for(i = 0; i < 4; i++)
-		for(j = 0; j < 4; j++)
-			m[i][j] *= s;
+	m[0][0] *= s; m[0][1] *= s; m[0][2] *= s; m[0][3] *= s;
+	m[1][0] *= s; m[1][1] *= s; m[1][2] *= s; m[1][3] *= s;
+	m[2][0] *= s; m[2][1] *= s; m[2][2] *= s; m[2][3] *= s;
+	m[3][0] *= s; m[3][1] *= s; m[3][2] *= s; m[3][3] *= s;
 }
 
 void
--