ref: 0f5a17ffae3057690e59b9f1c2192eae60e251e7
parent: bcbbfec753649cf799d88a496a49bff29157ef1c
author: allkern <lisandroaalarcon@gmail.com>
date: Wed Oct 18 19:50:28 EDT 2023
Improve RTP
--- a/psx/cpu.c
+++ b/psx/cpu.c
@@ -1919,7 +1919,7 @@
#define R_FLAG cpu->cop2_cr.flag
-long gte_clamp_mac0(psx_cpu_t* cpu, int64_t value) {+int32_t gte_clamp_mac0(psx_cpu_t* cpu, int64_t value) { if (value < -0x80000000) {R_FLAG |= 0x8000;
} else if (value > 0x7fffffff) {@@ -1929,7 +1929,7 @@
return value & 0xffffffff;
}
-long gte_clamp_mac(psx_cpu_t* cpu, int i, int64_t value) {+int32_t gte_clamp_mac(psx_cpu_t* cpu, int i, int64_t value) { if (value < -0x80000000000) {R_FLAG |= (uint32_t)(0x8000000 >> (i - 1));
} else if (value > 0x7ffffffffff) {@@ -2010,9 +2010,34 @@
return 0x7fff;
}
- return (short)value;
+ return (int16_t)value;
}
+int32_t gte_clamp_ir_z(psx_cpu_t* cpu, int64_t value, int64_t old, int lm) {+ if (old < -0x8000)
+ R_FLAG |= 0x400000;
+
+ if (old > 0x7fff)
+ R_FLAG |= 0x400000;
+
+ if (lm && (value < 0)) {+ return 0;
+ } else if ((value < -0x8000) && !lm) {+ return -0x8000;
+ } else if (value > 0x7fff) {+ return 0x7fff;
+ }
+
+ return (int16_t)value;
+}
+
+int clz(uint32_t value) {+ if (!value)
+ return 32;
+
+ return __builtin_clz(value);
+}
+
uint32_t gte_divide(psx_cpu_t* cpu, uint16_t n, uint16_t d) {// Overflow
if (n >= d * 2) {@@ -2021,7 +2046,7 @@
return 0x1ffff;
}
- int shift = __builtin_clz(d) - 16;
+ int shift = clz(d) - 16;
int r1 = (d << shift) & 0x7fff;
int r2 = g_psx_gte_unr_table[((r1 + 0x40) >> 7)] + 0x101;
@@ -2028,9 +2053,8 @@
int r3 = ((0x80 - (r2 * (r1 + 0x8000))) >> 8) & 0x1ffff;
uint32_t reciprocal = ((r2 * r3) + 0x80) >> 8;
+ uint32_t res = ((((uint64_t)reciprocal * (n << shift)) + 0x8000) >> 16);
- const uint32_t res = ((((uint64_t)reciprocal * (n << shift)) + 0x8000) >> 16);
-
return MIN(0x1ffff, res);
}
@@ -2169,45 +2193,33 @@
#define R_H cpu->cop2_cr.h
void gte_rtp(psx_cpu_t* cpu, int i, int dq) {- int64_t vx = cpu->cop2_dr.v[i].p[0];
- int64_t vy = cpu->cop2_dr.v[i].p[1];
- int64_t vz = cpu->cop2_dr.v[i].z;
+ R_FLAG = 0;
- int64_t temp[4];
+ int64_t vx = (int16_t)cpu->cop2_dr.v[i].p[0];
+ int64_t vy = (int16_t)cpu->cop2_dr.v[i].p[1];
+ int64_t vz = (int32_t)cpu->cop2_dr.v[i].z;
- temp[0] = gte_clamp_mac(cpu, 1, (I64(R_TRX) << 12) + I64(R_RT11) * vx);
- temp[1] = gte_clamp_mac(cpu, 2, (I64(R_TRY) << 12) + I64(R_RT21) * vx);
- temp[2] = gte_clamp_mac(cpu, 3, (I64(R_TRZ) << 12) + I64(R_RT31) * vx);
+ int64_t mac1 = (I64((int32_t)R_TRX) << 12) + I64((int16_t)R_RT11) * vx + I64((int16_t)R_RT12) * vy + I64((int16_t)R_RT13) * vz;
+ int64_t mac2 = (I64((int32_t)R_TRY) << 12) + I64((int16_t)R_RT21) * vx + I64((int16_t)R_RT22) * vy + I64((int16_t)R_RT23) * vz;
+ int64_t mac3 = (I64((int32_t)R_TRZ) << 12) + I64((int16_t)R_RT31) * vx + I64((int16_t)R_RT32) * vy + I64((int16_t)R_RT33) * vz;
- temp[0] = gte_clamp_mac(cpu, 1, temp[0] + I64(R_RT12) * vy);
- temp[1] = gte_clamp_mac(cpu, 2, temp[1] + I64(R_RT22) * vy);
- temp[2] = gte_clamp_mac(cpu, 3, temp[2] + I64(R_RT32) * vy);
+ R_MAC1 = gte_clamp_mac(cpu, 1, mac1) >> cpu->gte_sf;
+ R_MAC2 = gte_clamp_mac(cpu, 2, mac2) >> cpu->gte_sf;
+ R_MAC3 = gte_clamp_mac(cpu, 3, mac3) >> cpu->gte_sf;
- temp[0] = gte_clamp_mac(cpu, 1, temp[0] + I64(R_RT13) * vz);
- temp[1] = gte_clamp_mac(cpu, 2, temp[1] + I64(R_RT23) * vz);
- temp[2] = gte_clamp_mac(cpu, 3, temp[2] + I64(R_RT33) * vz);
-
- R_MAC1 = (int32_t)(temp[0] >> cpu->gte_sf);
- R_MAC2 = (int32_t)(temp[1] >> cpu->gte_sf);
- R_MAC3 = (int32_t)(temp[2] >> cpu->gte_sf);
+ R_IR1 = gte_clamp_ir(cpu, 1, I64((int32_t)R_MAC1), cpu->gte_lm);
+ R_IR2 = gte_clamp_ir(cpu, 2, I64((int32_t)R_MAC2), cpu->gte_lm);
+ R_IR3 = gte_clamp_ir_z(cpu, I64((int32_t)R_MAC3), mac3 >> 12, cpu->gte_lm);
- int64_t tz = temp[2];
-
- int64_t zs = tz >> 12;
-
- R_IR1 = gte_clamp_ir(cpu, 1, I64(R_MAC1), cpu->gte_lm);
- R_IR2 = gte_clamp_ir(cpu, 2, I64(R_MAC2), cpu->gte_lm);
- R_IR3 = gte_clamp_ir(cpu, 3, I64(R_MAC3), cpu->gte_lm);
-
R_SZ0 = R_SZ1;
R_SZ1 = R_SZ2;
R_SZ2 = R_SZ3;
- R_SZ3 = gte_clamp_sz3(cpu, I64(R_MAC3) >> 12);
+ R_SZ3 = gte_clamp_sz3(cpu, I64(mac3) >> 12);
- uint32_t h_div_sz = gte_divide(cpu, R_H, R_SZ3);
+ int32_t h_div_sz = gte_divide(cpu, R_H, R_SZ3);
- int x = (int)(gte_clamp_mac0(cpu, h_div_sz * R_IR1 + R_OFX) >> 16);
- int y = (int)(gte_clamp_mac0(cpu, h_div_sz * R_IR2 + R_OFY) >> 16);
+ int32_t x = gte_clamp_mac0(cpu, I64((int32_t)R_OFX) + (I64((int16_t)R_IR1) * h_div_sz)) >> 16;
+ int32_t y = gte_clamp_mac0(cpu, I64((int32_t)R_OFY) + (I64((int16_t)R_IR2) * h_div_sz)) >> 16;
R_SXY0 = R_SXY1;
R_SXY1 = R_SXY2;
@@ -2214,13 +2226,10 @@
R_SX2 = gte_clamp_sxy(cpu, 1, x);
R_SY2 = gte_clamp_sxy(cpu, 2, y);
- //if (dq) {- long mac0 = gte_clamp_mac0(cpu, h_div_sz * R_DQA + R_DQB);
-
- R_MAC0 = (int)mac0;
-
- R_IR0 = gte_clamp_ir0(cpu, mac0 >> 12);
- //}
+ if (dq) {+ R_MAC0 = gte_clamp_mac0(cpu, h_div_sz * I64(R_DQA) + I64(R_DQB));
+ R_IR0 = gte_clamp_ir0(cpu, R_MAC0 >> 12);
+ }
}
void psx_gte_i_rtps(psx_cpu_t* cpu) {@@ -2228,11 +2237,11 @@
}
void psx_gte_i_nclip(psx_cpu_t* cpu) {- // uint64_t value = R_SX0 * (R_SY1 - R_SY2);
- // value += R_SX1 * (R_SY2 - R_SY0);
- // value += R_SX2 * (R_SY0 - R_SY1);
+ uint64_t value = R_SX0 * (R_SY1 - R_SY2);
+ value += R_SX1 * (R_SY2 - R_SY0);
+ value += R_SX2 * (R_SY0 - R_SY1);
- // R_MAC0 = gte_clamp_mac0(cpu, value);
+ R_MAC0 = gte_clamp_mac0(cpu, value);
}
void psx_gte_i_op(psx_cpu_t* cpu) {--- a/psx/cpu.h
+++ b/psx/cpu.h
@@ -75,8 +75,8 @@
typedef struct __attribute__((__packed__)) { union {- int32_t xy;
- int16_t p[2];
+ uint32_t xy;
+ uint16_t p[2];
};
} gte_vec2_t;
--
⑨