shithub: psxe

Download patch

ref: 15d0a415dbe35dc7f83cf3b7371868038c400064
parent: 1b4d507ed55ffb8d297129468c2e3aa3c2653dec
author: allkern <lisandroaalarcon@gmail.com>
date: Thu Oct 19 19:50:30 EDT 2023

Improve RTPS yet again

Please make the tests stop failing

--- a/psx/cpu.c
+++ b/psx/cpu.c
@@ -1686,9 +1686,9 @@
 }
 
 void gte_handle_irgb_read(psx_cpu_t* cpu) {
-    int r = (cpu->cop2_dr.ir[1] / 0x80) & 0x1f;
-    int g = (cpu->cop2_dr.ir[2] / 0x80) & 0x1f;
-    int b = (cpu->cop2_dr.ir[3] / 0x80) & 0x1f;
+    int r = CLAMP(cpu->cop2_dr.ir[1] >> 7, 0x00, 0x1f);
+    int g = CLAMP(cpu->cop2_dr.ir[2] >> 7, 0x00, 0x1f);
+    int b = CLAMP(cpu->cop2_dr.ir[3] >> 7, 0x00, 0x1f);
 
     cpu->cop2_dr.irgb = r | (g << 5) | (b << 10);
 }
@@ -1925,10 +1925,10 @@
 
 #define R_FLAG cpu->cop2_cr.flag
 
-uint32_t gte_clamp_mac0(psx_cpu_t* cpu, int64_t value) {
-    if (value < -0x80000000) {
+uint64_t gte_clamp_mac0(psx_cpu_t* cpu, int64_t value) {
+    if (value < -((int64_t)0x80000000)) {
         R_FLAG |= 0x8000;
-    } else if (value > 0x7fffffff) {
+    } else if (value > ((int64_t)0x7fffffff)) {
         R_FLAG |= 0x10000;
     }
 
@@ -1942,7 +1942,7 @@
         R_FLAG |= (uint32_t)(0x40000000 >> (i - 1));
     }
 
-    return (value << 20) >> 20;
+    return ((value << 20) >> 20) >> cpu->gte_sf;
 }
 
 int16_t gte_clamp_ir0(psx_cpu_t* cpu, int64_t value) {
@@ -2201,18 +2201,27 @@
 void gte_rtp(psx_cpu_t* cpu, int i, int dq) {
     R_FLAG = 0;
 
-    int64_t vx = (int16_t)cpu->cop2_dr.v[i].p[0];
-    int64_t vy = (int16_t)cpu->cop2_dr.v[i].p[1];
-    int64_t vz = (int32_t)cpu->cop2_dr.v[i].z;
+    int64_t vx = (int64_t)(int16_t)cpu->cop2_dr.v[i].p[0];
+    int64_t vy = (int64_t)(int16_t)cpu->cop2_dr.v[i].p[1];
+    int64_t vz = (((int64_t)((int32_t)cpu->cop2_dr.v[i].z)) << 32) >> 32;
 
-    int64_t mac1 = (I64((int32_t)R_TRX) << 12) + I64((int16_t)R_RT11) * vx + I64((int16_t)R_RT12) * vy + I64((int16_t)R_RT13) * vz;
-    int64_t mac2 = (I64((int32_t)R_TRY) << 12) + I64((int16_t)R_RT21) * vx + I64((int16_t)R_RT22) * vy + I64((int16_t)R_RT23) * vz;
-    int64_t mac3 = (I64((int32_t)R_TRZ) << 12) + I64((int16_t)R_RT31) * vx + I64((int16_t)R_RT32) * vy + I64((int16_t)R_RT33) * vz;
+    int64_t mac1 = (I64((int32_t)R_TRX) << 12) + (I64((int16_t)R_RT11) * vx) + (I64((int16_t)R_RT12) * vy) + (I64((int16_t)R_RT13) * vz);
+    int64_t mac2 = (I64((int32_t)R_TRY) << 12) + (I64((int16_t)R_RT21) * vx) + (I64((int16_t)R_RT22) * vy) + (I64((int16_t)R_RT23) * vz);
+    int64_t mac3 = (I64((int32_t)R_TRZ) << 12) + (I64((int16_t)R_RT31) * vx) + (I64((int16_t)R_RT32) * vy) + (I64((int16_t)R_RT33) * vz);
 
-    R_MAC1 = gte_clamp_mac(cpu, 1, mac1) >> cpu->gte_sf;
-    R_MAC2 = gte_clamp_mac(cpu, 2, mac2) >> cpu->gte_sf;
-    R_MAC3 = gte_clamp_mac(cpu, 3, mac3) >> cpu->gte_sf;
+    R_MAC1 = gte_clamp_mac(cpu, 1, mac1);
+    R_MAC2 = gte_clamp_mac(cpu, 2, mac2);
+    R_MAC3 = gte_clamp_mac(cpu, 3, mac3);
 
+    // log_set_quiet(0);
+    // log_fatal("mac1=%016x (%08x), vx=%016x, vy=%016x, vz=%016x (%08x), trx=%016x, res=%016x",
+    //     mac1, R_MAC1,
+    //     vx, vy, vz, cpu->cop2_dr.v[i].z,
+    //     I64((int32_t)R_TRX) << 12,
+    //     (I64((int32_t)R_TRX) << 12) + I64((int16_t)R_RT11) * vx + I64((int16_t)R_RT12) * vy + I64((int16_t)R_RT13) * vz
+    // );
+    // log_set_quiet(1);
+
     R_IR1 = gte_clamp_ir(cpu, 1, I64((int32_t)R_MAC1), cpu->gte_lm);
     R_IR2 = gte_clamp_ir(cpu, 2, I64((int32_t)R_MAC2), cpu->gte_lm);
     R_IR3 = gte_clamp_ir_z(cpu, I64((int32_t)R_MAC3), mac3 >> 12, cpu->gte_lm);
@@ -2224,8 +2233,8 @@
 
     int32_t h_div_sz = gte_divide(cpu, R_H, R_SZ3);
 
-    int32_t x = gte_clamp_mac0(cpu, I64((int32_t)R_OFX) + (I64((int16_t)R_IR1) * h_div_sz)) >> 16;
-    int32_t y = gte_clamp_mac0(cpu, I64((int32_t)R_OFY) + (I64((int16_t)R_IR2) * h_div_sz)) >> 16;
+    int64_t x = gte_clamp_mac0(cpu, I64((int32_t)R_OFX) + (I64((int16_t)R_IR1) * h_div_sz)) >> 16;
+    int64_t y = gte_clamp_mac0(cpu, I64((int32_t)R_OFY) + (I64((int16_t)R_IR2) * h_div_sz)) >> 16;
 
     R_SXY0 = R_SXY1;
     R_SXY1 = R_SXY2;
@@ -2234,6 +2243,7 @@
 
     if (dq) {
         int64_t mac0 = I64(R_DQB) + (h_div_sz * I64(R_DQA));
+
         R_MAC0 = gte_clamp_mac0(cpu, mac0);
         R_IR0 = gte_clamp_ir0(cpu, mac0 >> 12);
     }
--