ref: ace08f5c1ae63e6b10191cb3240fa7323b9ce0c5
parent: 08340c398d4549aa374ca9e378eb72919aeb837c
author: allkern <lisandroaalarcon@gmail.com>
date: Tue Oct 24 10:12:32 EDT 2023
Implement MVMVA
--- a/psx/cpu.c
+++ b/psx/cpu.c
@@ -2065,88 +2065,14 @@
return MIN(0x1ffff, res);
}
-void gte_interpolate_color(psx_cpu_t* cpu, int mac1, int mac2, int mac3) {- // PSX SPX is very convoluted about this and it lacks some info
- // [MAC1, MAC2, MAC3] = MAC + (FC - MAC) * IR0;< --- for NCDx only
- // Note: Above "[IR1,IR2,IR3]=(FC-MAC)" is saturated to - 8000h..+7FFFh(ie. as if lm = 0)
- // Details on "MAC+(FC-MAC)*IR0":
- // [IR1, IR2, IR3] = (([RFC, GFC, BFC] SHL 12) - [MAC1, MAC2, MAC3]) SAR(sf * 12)
- // [MAC1, MAC2, MAC3] = (([IR1, IR2, IR3] * IR0) + [MAC1, MAC2, MAC3])
- // [MAC1, MAC2, MAC3] = [MAC1, MAC2, MAC3] SAR(sf * 12);< --- for NCDx / NCCx
- // [IR1, IR2, IR3] = [MAC1, MAC2, MAC3]
-
- // R_MAC1 = (int)(gte_clamp_mac(cpu, 1, ((long)R_RFC << 12) - mac1) >> cpu->gte_sf);
- // R_MAC2 = (int)(gte_clamp_mac(cpu, 2, ((long)R_GFC << 12) - mac2) >> cpu->gte_sf);
- // R_MAC3 = (int)(gte_clamp_mac(cpu, 3, ((long)R_BFC << 12) - mac3) >> cpu->gte_sf);
-
- // R_IR1 = gte_clamp_ir(cpu, 1, R_MAC1, 0);
- // R_IR2 = gte_clamp_ir(cpu, 2, R_MAC2, 0);
- // R_IR3 = gte_clamp_ir(cpu, 3, R_MAC3, 0);
-
- // R_MAC1 = (int)(gte_clamp_mac(cpu, 1, ((long)R_IR1 * R_IR0) + mac1) >> cpu->gte_sf);
- // R_MAC2 = (int)(gte_clamp_mac(cpu, 2, ((long)R_IR2 * R_IR0) + mac2) >> cpu->gte_sf);
- // R_MAC3 = (int)(gte_clamp_mac(cpu, 3, ((long)R_IR3 * R_IR0) + mac3) >> cpu->gte_sf);
-
- // R_IR1 = gte_clamp_ir(cpu, 1, R_MAC1, cpu->gte_lm);
- // R_IR2 = gte_clamp_ir(cpu, 2, R_MAC2, cpu->gte_lm);
- // R_IR3 = gte_clamp_ir(cpu, 3, R_MAC3, cpu->gte_lm);
-}
-
-void gte_ncds(psx_cpu_t* cpu, int r) {- //Normal color depth cue (single vector) //329048 WIP FLAGS
- //In: V0 = Normal vector(for triple variants repeated with V1 and V2),
- //BK = Background color, RGBC = Primary color / code, LLM = Light matrix, LCM = Color matrix, IR0 = Interpolation value.
-
- // uint16_t vrx = (&cpu->cop2_dr.v0)[(3 * r) + 0];
- // uint16_t vry = (&cpu->cop2_dr.v0)[(3 * r) + 1];
- // uint16_t vrz = (&cpu->cop2_dr.v0)[(3 * r) + 2];
-
- // // [IR1, IR2, IR3] = [MAC1, MAC2, MAC3] = (LLM * V0) SAR(sf * 12)
- // R_MAC1 = (int)(gte_clamp_mac(cpu, 1, (long)R_L11 * vrx + R_L12 * vry + R_L13 * vrz) >> cpu->gte_sf);
- // R_MAC2 = (int)(gte_clamp_mac(cpu, 2, (long)R_L21 * vrx + R_L22 * vry + R_L23 * vrz) >> cpu->gte_sf);
- // R_MAC3 = (int)(gte_clamp_mac(cpu, 3, (long)R_L31 * vrx + R_L32 * vry + R_L33 * vrz) >> cpu->gte_sf);
-
- // R_IR1 = gte_clamp_ir(cpu, 1, R_MAC1, cpu->gte_lm);
- // R_IR2 = gte_clamp_ir(cpu, 2, R_MAC2, cpu->gte_lm);
- // R_IR3 = gte_clamp_ir(cpu, 3, R_MAC3, cpu->gte_lm);
-
- // [IR1, IR2, IR3] = [MAC1, MAC2, MAC3] = (BK * 1000h + LCM * IR) SAR(sf * 12)
- // WARNING each multiplication can trigger mac flags so the check is needed on each op! Somehow this only affects the color matrix and not the light one
- // R_MAC1 = (int)(gte_clamp_mac(cpu, 1, gte_clamp_mac(cpu, 1, gte_clamp_mac(cpu, 1, (long)R_RBK * 0x1000 + R_LM1R * R_IR1) + (long)R_LM1G * R_IR2) + (long)R_LM1B * R_IR3) >> cpu->gte_sf);
- // R_MAC2 = (int)(gte_clamp_mac(cpu, 2, gte_clamp_mac(cpu, 2, gte_clamp_mac(cpu, 2, (long)R_GBK * 0x1000 + R_LM2R * R_IR1) + (long)R_LM2G * R_IR2) + (long)R_LM2B * R_IR3) >> cpu->gte_sf);
- // R_MAC3 = (int)(gte_clamp_mac(cpu, 3, gte_clamp_mac(cpu, 3, gte_clamp_mac(cpu, 3, (long)R_BBK * 0x1000 + R_LM3R * R_IR1) + (long)R_LM3G * R_IR2) + (long)R_LM3B * R_IR3) >> cpu->gte_sf);
-
- // R_IR1 = gte_clamp_ir(cpu, 1, R_MAC1, cpu->gte_lm);
- // R_IR2 = gte_clamp_ir(cpu, 2, R_MAC2, cpu->gte_lm);
- // R_IR3 = gte_clamp_ir(cpu, 3, R_MAC3, cpu->gte_lm);
-
- // [MAC1, MAC2, MAC3] = [R * IR1, G * IR2, B * IR3] SHL 4;< --- for NCDx / NCCx
- // R_MAC1 = (int)gte_clamp_mac(cpu, 1, ((long)R_RGBCR * R_IR1) << 4);
- // R_MAC2 = (int)gte_clamp_mac(cpu, 2, ((long)R_RGBCG * R_IR2) << 4);
- // R_MAC3 = (int)gte_clamp_mac(cpu, 3, ((long)R_RGBCB * R_IR3) << 4);
-
- // gte_interpolate_color(cpu, R_MAC1, R_MAC2, R_MAC3);
-
- // // Color FIFO = [MAC1 / 16, MAC2 / 16, MAC3 / 16, CODE]
- // R_RGB0 = R_RGB1;
- // R_RGB1 = R_RGB2;
-
- // uint32_t rgb2;
-
- // rgb2 = gte_clamp_rgb(cpu, 1, R_MAC1 >> 4);
- // rgb2 = gte_clamp_rgb(cpu, 2, R_MAC2 >> 4) << 8;
- // rgb2 = gte_clamp_rgb(cpu, 3, R_MAC3 >> 4) << 16;
- // rgb2 = R_RGBCC << 24;
-}
-
void psx_cpu_i_gte(psx_cpu_t* cpu) {DO_PENDING_LOAD;
cpu->gte_sf = ((cpu->opcode & 0x80000) != 0) * 12;
cpu->gte_lm = (cpu->opcode & 0x400) != 0;
- cpu->gte_mmat = (cpu->opcode >> 13) & 3;
- cpu->gte_mvec = (cpu->opcode >> 15) & 3;
- cpu->gte_tvec = (cpu->opcode >> 17) & 3;
+ cpu->gte_cv = (cpu->opcode >> 13) & 3;
+ cpu->gte_v = (cpu->opcode >> 15) & 3;
+ cpu->gte_mx = (cpu->opcode >> 17) & 3;
g_psx_gte_table[cpu->opcode & 0x3f](cpu);
}
@@ -2221,6 +2147,16 @@
#define R_GC2 cpu->cop2_dr.rgb[2].c[1]
#define R_BC2 cpu->cop2_dr.rgb[2].c[2]
#define R_CD2 cpu->cop2_dr.rgb[2].c[3]
+#define R_L11 cpu->cop2_cr.l.m[0].c[0]
+#define R_L11 cpu->cop2_cr.l.m[0].c[0]
+#define R_L12 cpu->cop2_cr.l.m[0].c[1]
+#define R_L13 cpu->cop2_cr.l.m[1].c[0]
+#define R_L21 cpu->cop2_cr.l.m[1].c[1]
+#define R_L22 cpu->cop2_cr.l.m[2].c[0]
+#define R_L23 cpu->cop2_cr.l.m[2].c[1]
+#define R_L31 cpu->cop2_cr.l.m[3].c[0]
+#define R_L32 cpu->cop2_cr.l.m[3].c[1]
+#define R_L33 cpu->cop2_cr.l.m33
#define GTE_RTP_DQ(i) { \R_FLAG = 0; \
@@ -2348,12 +2284,142 @@
R_BC2 = gte_clamp_rgb(cpu, 3, R_MAC3 >> 4);
}
+#define R_VX v.p[0]
+#define R_VY v.p[1]
+#define R_VZ v.z
+#define R_MX11 mx.m[0].c[0]
+#define R_MX11 mx.m[0].c[0]
+#define R_MX12 mx.m[0].c[1]
+#define R_MX13 mx.m[1].c[0]
+#define R_MX21 mx.m[1].c[1]
+#define R_MX22 mx.m[2].c[0]
+#define R_MX23 mx.m[2].c[1]
+#define R_MX31 mx.m[3].c[0]
+#define R_MX32 mx.m[3].c[1]
+#define R_MX33 mx.m33
+#define R_CV1 cv.x
+#define R_CV2 cv.y
+#define R_CV3 cv.z
+
void psx_gte_i_mvmva(psx_cpu_t* cpu) {- log_fatal("mvmva: Unimplemented GTE instruction");+ R_FLAG = 0;
+
+ gte_matrix_t mx = { 0 };+ gte_vertex_t v = { 0 };+ gte_vec3_t cv = { 0 };+
+ switch (cpu->gte_mx) {+ case 0: mx = cpu->cop2_cr.rt; break;
+ case 1: mx = cpu->cop2_cr.l; break;
+ case 2: mx = cpu->cop2_cr.lr; break;
+ case 3: {+ R_MX11 = -R_RC << 4;
+ R_MX12 = R_RC << 4;
+ R_MX13 = R_IR0;
+ R_MX21 = R_RT13;
+ R_MX22 = R_RT13;
+ R_MX23 = R_RT13;
+ R_MX31 = R_RT22;
+ R_MX32 = R_RT22;
+ R_MX33 = R_RT22;
+ } break;
+ }
+
+ switch (cpu->gte_v) {+ case 0: case 1: case 2:
+ v = cpu->cop2_dr.v[cpu->gte_v];
+ break;
+
+ case 3: {+ v.p[0] = R_IR1;
+ v.p[1] = R_IR2;
+ v.z = R_IR3;
+ } break;
+ }
+
+ switch (cpu->gte_cv) {+ case 0: cv = cpu->cop2_cr.tr; break;
+ case 1: cv = cpu->cop2_cr.bk; break;
+ case 2: cv = cpu->cop2_cr.fc; break;
+ case 3: {+ cv.x = 0;
+ cv.y = 0;
+ cv.z = 0;
+ } break;
+ }
+
+ // Bugged case (CV=FC)
+ if (cpu->gte_cv == 2) {+ R_MAC1 = gte_clamp_mac(cpu, 1, (int64_t)(I64(R_MX12) * I64(R_VY)) + (I64(R_MX13) * I64(R_VZ)));
+ R_MAC2 = gte_clamp_mac(cpu, 2, (int64_t)(I64(R_MX22) * I64(R_VY)) + (I64(R_MX23) * I64(R_VZ)));
+ R_MAC3 = gte_clamp_mac(cpu, 3, (int64_t)(I64(R_MX32) * I64(R_VY)) + (I64(R_MX33) * I64(R_VZ)));
+
+ int64_t mac1 = gte_clamp_mac(cpu, 1, (((int64_t)R_CV1) << 12) + (I64(R_MX11) * I64(R_VX)));
+ int64_t mac2 = gte_clamp_mac(cpu, 2, (((int64_t)R_CV2) << 12) + (I64(R_MX21) * I64(R_VX)));
+ int64_t mac3 = gte_clamp_mac(cpu, 3, (((int64_t)R_CV3) << 12) + (I64(R_MX31) * I64(R_VX)));
+
+ gte_clamp_ir(cpu, 1, mac1, 0);
+ gte_clamp_ir(cpu, 2, mac2, 0);
+ gte_clamp_ir(cpu, 3, mac3, 0);
+ } else {+ R_MAC1 = gte_clamp_mac(cpu, 1, (((int64_t)R_CV1) << 12) + (I64(R_MX11) * I64(R_VX)) + (I64(R_MX12) * I64(R_VY)) + (I64(R_MX13) * I64(R_VZ)));
+ R_MAC2 = gte_clamp_mac(cpu, 2, (((int64_t)R_CV2) << 12) + (I64(R_MX21) * I64(R_VX)) + (I64(R_MX22) * I64(R_VY)) + (I64(R_MX23) * I64(R_VZ)));
+ R_MAC3 = gte_clamp_mac(cpu, 3, (((int64_t)R_CV3) << 12) + (I64(R_MX31) * I64(R_VX)) + (I64(R_MX32) * I64(R_VY)) + (I64(R_MX33) * I64(R_VZ)));
+ }
+
+ R_IR1 = gte_clamp_ir(cpu, 1, R_MAC1, cpu->gte_lm);
+ R_IR2 = gte_clamp_ir(cpu, 2, R_MAC2, cpu->gte_lm);
+ R_IR3 = gte_clamp_ir(cpu, 3, R_MAC3, cpu->gte_lm);
}
+#undef R_VX
+#undef R_VY
+#undef R_VZ
+#undef R_MX11
+#undef R_MX11
+#undef R_MX12
+#undef R_MX13
+#undef R_MX21
+#undef R_MX22
+#undef R_MX23
+#undef R_MX31
+#undef R_MX32
+#undef R_MX33
+#undef R_CV1
+#undef R_CV2
+#undef R_CV3
+
void psx_gte_i_ncds(psx_cpu_t* cpu) {- gte_ncds(cpu, 0);
+ R_FLAG = 0;
+
+ // int64_t vx = (int64_t)((int16_t)cpu->cop2_dr.v[0].p[0]);
+ // int64_t vy = (int64_t)((int16_t)cpu->cop2_dr.v[0].p[1]);
+ // int64_t vz = (int64_t)cpu->cop2_dr.v[0].z;
+
+ // R_MAC1 = gte_clamp_mac(cpu, 1, (int64_t)(R_L11 * vx) + (R_L12 * vy) + (R_L13 * vz));
+ // R_MAC2 = gte_clamp_mac(cpu, 2, (int64_t)(R_L21 * vx) + (R_L22 * vy) + (R_L23 * vz));
+ // R_MAC3 = gte_clamp_mac(cpu, 3, (int64_t)(R_L31 * vx) + (R_L32 * vy) + (R_L33 * vz));
+ // R_IR1 = Lm_B1(R_MAC1, lm);
+ // R_IR2 = Lm_B2(R_MAC2, lm);
+ // R_IR3 = Lm_B3(R_MAC3, lm);
+ // R_MAC1 = A1(int44((int64_t)R_RBK << 12) + (R_LR1 * R_IR1) + (R_LR2 * R_IR2) + (R_LR3 * R_IR3));
+ // R_MAC2 = A2(int44((int64_t)R_GBK << 12) + (R_LG1 * R_IR1) + (R_LG2 * R_IR2) + (R_LG3 * R_IR3));
+ // R_MAC3 = A3(int44((int64_t)R_BBK << 12) + (R_LB1 * R_IR1) + (R_LB2 * R_IR2) + (R_LB3 * R_IR3));
+ // R_IR1 = Lm_B1(R_MAC1, lm);
+ // R_IR2 = Lm_B2(R_MAC2, lm);
+ // R_IR3 = Lm_B3(R_MAC3, lm);
+ // R_MAC1 = A1(((R_RC << 4) * R_IR1) + (R_IR0 * Lm_B1(A1(((int64_t)R_RFC << 12) - ((R_RC << 4) * R_IR1)), 0)));
+ // R_MAC2 = A2(((R_GC << 4) * R_IR2) + (R_IR0 * Lm_B2(A2(((int64_t)R_GFC << 12) - ((R_GC << 4) * R_IR2)), 0)));
+ // R_MAC3 = A3(((R_BC << 4) * R_IR3) + (R_IR0 * Lm_B3(A3(((int64_t)R_BFC << 12) - ((R_BC << 4) * R_IR3)), 0)));
+ // R_IR1 = Lm_B1(R_MAC1, lm);
+ // R_IR2 = Lm_B2(R_MAC2, lm);
+ // R_IR3 = Lm_B3(R_MAC3, lm);
+ // R_RGB0 = R_RGB1;
+ // R_RGB1 = R_RGB2;
+ // R_CD2 = R_CODE;
+ // R_RC2 = Lm_C1(R_MAC1 >> 4);
+ // R_GC2 = Lm_C2(R_MAC2 >> 4);
+ // R_BC2 = Lm_C3(R_MAC3 >> 4);
}
void psx_gte_i_cdp(psx_cpu_t* cpu) {--- a/psx/cpu.h
+++ b/psx/cpu.h
@@ -67,7 +67,7 @@
typedef struct __attribute__((__packed__)) { union {uint32_t xy;
- uint16_t p[2];
+ int16_t p[2];
};
int16_t z;
@@ -76,7 +76,7 @@
typedef struct __attribute__((__packed__)) { union {uint32_t xy;
- uint16_t p[2];
+ int16_t p[2];
};
} gte_vec2_t;
@@ -94,7 +94,7 @@
typedef struct __attribute__((__packed__)) { union {uint32_t u32;
- uint16_t c[2];
+ int16_t c[2];
} m[4];
int16_t m33;
@@ -143,9 +143,9 @@
int gte_lm;
int gte_sf;
- int gte_mmat;
- int gte_mvec;
- int gte_tvec;
+ int gte_mx;
+ int gte_v;
+ int gte_cv;
int64_t s_mac0;
int64_t s_mac3;
--
⑨