ref: ea93dce7dc0ab842cbef53e04ca8b819e8a5dd66
parent: 52710ef5429087506ad2bdd47b96eb2534d21f7a
author: rodri <rgl@antares-labs.eu>
date: Sun Apr 6 20:37:28 EDT 2025
libgeometry: unroll s?mulm3? for an up to 2-4x speedup only tested on 386 and amd64.
--- a/sys/src/libgeometry/matrix.c
+++ b/sys/src/libgeometry/matrix.c
@@ -34,26 +34,28 @@
void
mulm(Matrix a, Matrix b)
{
- int i, j, k;
- Matrix tmp;
+ double t0, t1, t2;
- for(i = 0; i < 3; i++)
- for(j = 0; j < 3; j++){
- tmp[i][j] = 0;
- for(k = 0; k < 3; k++)
- tmp[i][j] += a[i][k]*b[k][j];
- }
- memmove(a, tmp, 3*3*sizeof(double));
+ t0 = a[0][0]; t1 = a[0][1]; t2 = a[0][2];
+ a[0][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0];
+ a[0][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1];
+ a[0][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2];
+ t0 = a[1][0]; t1 = a[1][1]; t2 = a[1][2];
+ a[1][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0];
+ a[1][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1];
+ a[1][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2];
+ t0 = a[2][0]; t1 = a[2][1]; t2 = a[2][2];
+ a[2][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0];
+ a[2][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1];
+ a[2][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2];
}
void
smulm(Matrix m, double s)
{
- int i, j;
-
- for(i = 0; i < 3; i++)
- for(j = 0; j < 3; j++)
- m[i][j] *= s;
+ m[0][0] *= s; m[0][1] *= s; m[0][2] *= s;
+ m[1][0] *= s; m[1][1] *= s; m[1][2] *= s;
+ m[2][0] *= s; m[2][1] *= s; m[2][2] *= s;
}
void
@@ -197,26 +199,37 @@
void
mulm3(Matrix3 a, Matrix3 b)
{
- int i, j, k;
- Matrix3 tmp;
+ double t0, t1, t2, t3;
- for(i = 0; i < 4; i++)
- for(j = 0; j < 4; j++){
- tmp[i][j] = 0;
- for(k = 0; k < 4; k++)
- tmp[i][j] += a[i][k]*b[k][j];
- }
- memmove(a, tmp, 4*4*sizeof(double));
+ t0 = a[0][0]; t1 = a[0][1]; t2 = a[0][2]; t3 = a[0][3];
+ a[0][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0] + t3*b[3][0];
+ a[0][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1] + t3*b[3][1];
+ a[0][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2] + t3*b[3][2];
+ a[0][3] = t0*b[0][3] + t1*b[1][3] + t2*b[2][3] + t3*b[3][3];
+ t0 = a[1][0]; t1 = a[1][1]; t2 = a[1][2]; t3 = a[1][3];
+ a[1][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0] + t3*b[3][0];
+ a[1][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1] + t3*b[3][1];
+ a[1][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2] + t3*b[3][2];
+ a[1][3] = t0*b[0][3] + t1*b[1][3] + t2*b[2][3] + t3*b[3][3];
+ t0 = a[2][0]; t1 = a[2][1]; t2 = a[2][2]; t3 = a[2][3];
+ a[2][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0] + t3*b[3][0];
+ a[2][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1] + t3*b[3][1];
+ a[2][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2] + t3*b[3][2];
+ a[2][3] = t0*b[0][3] + t1*b[1][3] + t2*b[2][3] + t3*b[3][3];
+ t0 = a[3][0]; t1 = a[3][1]; t2 = a[3][2]; t3 = a[3][3];
+ a[3][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0] + t3*b[3][0];
+ a[3][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1] + t3*b[3][1];
+ a[3][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2] + t3*b[3][2];
+ a[3][3] = t0*b[0][3] + t1*b[1][3] + t2*b[2][3] + t3*b[3][3];
}
void
smulm3(Matrix3 m, double s)
{
- int i, j;
-
- for(i = 0; i < 4; i++)
- for(j = 0; j < 4; j++)
- m[i][j] *= s;
+ m[0][0] *= s; m[0][1] *= s; m[0][2] *= s; m[0][3] *= s;
+ m[1][0] *= s; m[1][1] *= s; m[1][2] *= s; m[1][3] *= s;
+ m[2][0] *= s; m[2][1] *= s; m[2][2] *= s; m[2][3] *= s;
+ m[3][0] *= s; m[3][1] *= s; m[3][2] *= s; m[3][3] *= s;
}
void
--
⑨