shithub: psxe

Download patch

ref: bcbbfec753649cf799d88a496a49bff29157ef1c
parent: 67df6a218da964b5477cbf3f8038da5a5c2623d5
author: allkern <lisandroaalarcon@gmail.com>
date: Tue Oct 17 19:07:37 EDT 2023

Cleanup

--- a/frontend/main.c
+++ b/frontend/main.c
@@ -16,8 +16,8 @@
     for (int i = 0; i < (size >> 2); i++) {
         uint32_t sample = psx_spu_get_sample(spu);
 
-        int16_t left = (int16_t)(sample & 0xffff);
-        int16_t right = (int16_t)(sample >> 16);
+        int16_t left = (int16_t)(sample & 0xffff) * 2;
+        int16_t right = (int16_t)(sample >> 16) * 2;
 
         *(int16_t*)(&buf[(i << 2) + 0]) += left;
         *(int16_t*)(&buf[(i << 2) + 2]) += right;
--- a/psx/cpu.c
+++ b/psx/cpu.c
@@ -1675,25 +1675,8 @@
     cpu->cop0_r[COP0_SR] |= mode >> 2;
 }
 
-/*
-  cop2r0-1   3xS16 VXY0,VZ0              Vector 0 (X,Y,Z)
-  cop2r2-3   3xS16 VXY1,VZ1              Vector 1 (X,Y,Z)
-  cop2r4-5   3xS16 VXY2,VZ2              Vector 2 (X,Y,Z)
-  cop2r6     4xU8  RGBC                  Color/code value
-  cop2r7     1xU16 OTZ                   Average Z value (for Ordering Table)
-  cop2r8     1xS16 IR0                   16bit Accumulator (Interpolate)
-  cop2r9-11  3xS16 IR1,IR2,IR3           16bit Accumulator (Vector)
-  cop2r12-15 6xS16 SXY0,SXY1,SXY2,SXYP   Screen XY-coordinate FIFO  (3 stages)
-  cop2r16-19 4xU16 SZ0,SZ1,SZ2,SZ3       Screen Z-coordinate FIFO   (4 stages)
-  cop2r20-22 12xU8 RGB0,RGB1,RGB2        Color CRGB-code/color FIFO (3 stages)
-  cop2r23    4xU8  (RES1)                Prohibited
-  cop2r24    1xS32 MAC0                  32bit Maths Accumulators (Value)
-  cop2r25-27 3xS32 MAC1,MAC2,MAC3        32bit Maths Accumulators (Vector)
-  cop2r28-29 1xU15 IRGB,ORGB             Convert RGB Color (48bit vs 15bit)
-  cop2r30-31 2xS32 LZCS,LZCR             Count Leading-Zeroes/Ones (sign bits)
-*/
-
 // COP2
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
 
 void gte_handle_irgb_write(psx_cpu_t* cpu) {
     cpu->cop2_dr.ir[1] = ((cpu->cop2_dr.irgb >> 0) & 0x1f) * 0x80;
@@ -2030,6 +2013,27 @@
     return (short)value;
 }
 
+uint32_t gte_divide(psx_cpu_t* cpu, uint16_t n, uint16_t d) {
+    // Overflow
+    if (n >= d * 2) {
+        R_FLAG |= (1 << 31) | (1 << 17);
+
+        return 0x1ffff;
+    }
+
+    int shift = __builtin_clz(d) - 16;
+
+    int r1 = (d << shift) & 0x7fff;
+    int r2 = g_psx_gte_unr_table[((r1 + 0x40) >> 7)] + 0x101;
+    int r3 = ((0x80 - (r2 * (r1 + 0x8000))) >> 8) & 0x1ffff;
+
+    uint32_t reciprocal = ((r2 * r3) + 0x80) >> 8;
+
+    const uint32_t res = ((((uint64_t)reciprocal * (n << shift)) + 0x8000) >> 16);
+
+    return MIN(0x1ffff, res);
+}
+
 void gte_interpolate_color(psx_cpu_t* cpu, int mac1, int mac2, int mac3) {
     // PSX SPX is very convoluted about this and it lacks some info
     // [MAC1, MAC2, MAC3] = MAC + (FC - MAC) * IR0;< --- for NCDx only
@@ -2120,8 +2124,6 @@
     log_fatal("invalid: Unimplemented GTE instruction %02x, %02x", cpu->opcode & 0x3f, cpu->opcode >> 25);
 }
 
-#define MIN(a, b) (((a) < (b)) ? (a) : (b))
-
 #define I64(v) ((int64_t)v)
 #define R_TRX cpu->cop2_cr.tr.x
 #define R_TRY cpu->cop2_cr.tr.y
@@ -2164,22 +2166,9 @@
 #define R_ZSF3 cpu->cop2_cr.zsf3
 #define R_ZSF4 cpu->cop2_cr.zsf4
 #define R_OTZ cpu->cop2_dr.otz
+#define R_H cpu->cop2_cr.h
 
 void gte_rtp(psx_cpu_t* cpu, int i, int dq) {
-    int64_t tx = I64(R_TRX) << 12;
-    int64_t ty = I64(R_TRY) << 12;
-    int64_t tz = I64(R_TRZ) << 12;
-
-    int64_t r11 = R_RT11;
-    int64_t r12 = R_RT12;
-    int64_t r13 = R_RT13;
-    int64_t r21 = R_RT21;
-    int64_t r22 = R_RT22;
-    int64_t r23 = R_RT23;
-    int64_t r31 = R_RT31;
-    int64_t r32 = R_RT32;
-    int64_t r33 = R_RT33;
-
     int64_t vx = cpu->cop2_dr.v[i].p[0];
     int64_t vy = cpu->cop2_dr.v[i].p[1];
     int64_t vz = cpu->cop2_dr.v[i].z;
@@ -2186,59 +2175,37 @@
 
     int64_t temp[4];
 
-    temp[0] = gte_clamp_mac(cpu, 1, tx + r11 * vx);
-    temp[1] = gte_clamp_mac(cpu, 2, ty + r21 * vx);
-    temp[2] = gte_clamp_mac(cpu, 3, tz + r31 * vx);
+    temp[0] = gte_clamp_mac(cpu, 1, (I64(R_TRX) << 12) + I64(R_RT11) * vx);
+    temp[1] = gte_clamp_mac(cpu, 2, (I64(R_TRY) << 12) + I64(R_RT21) * vx);
+    temp[2] = gte_clamp_mac(cpu, 3, (I64(R_TRZ) << 12) + I64(R_RT31) * vx);
 
-    temp[0] = gte_clamp_mac(cpu, 1, temp[0] + r12 * vy);
-    temp[1] = gte_clamp_mac(cpu, 2, temp[1] + r22 * vy);
-    temp[2] = gte_clamp_mac(cpu, 3, temp[2] + r32 * vy);
+    temp[0] = gte_clamp_mac(cpu, 1, temp[0] + I64(R_RT12) * vy);
+    temp[1] = gte_clamp_mac(cpu, 2, temp[1] + I64(R_RT22) * vy);
+    temp[2] = gte_clamp_mac(cpu, 3, temp[2] + I64(R_RT32) * vy);
 
-    temp[0] = gte_clamp_mac(cpu, 1, temp[0] + r13 * vz);
-    temp[1] = gte_clamp_mac(cpu, 2, temp[1] + r23 * vz);
-    temp[2] = gte_clamp_mac(cpu, 3, temp[2] + r33 * vz);
+    temp[0] = gte_clamp_mac(cpu, 1, temp[0] + I64(R_RT13) * vz);
+    temp[1] = gte_clamp_mac(cpu, 2, temp[1] + I64(R_RT23) * vz);
+    temp[2] = gte_clamp_mac(cpu, 3, temp[2] + I64(R_RT33) * vz);
     
     R_MAC1 = (int32_t)(temp[0] >> cpu->gte_sf);
     R_MAC2 = (int32_t)(temp[1] >> cpu->gte_sf);
     R_MAC3 = (int32_t)(temp[2] >> cpu->gte_sf);
 
-    tz = temp[2];
+    int64_t tz = temp[2];
 
     int64_t zs = tz >> 12;
 
-    int64_t mac1 = R_MAC1;
-    int64_t mac2 = R_MAC2;
-    int64_t mac3 = R_MAC3;
+    R_IR1 = gte_clamp_ir(cpu, 1, I64(R_MAC1), cpu->gte_lm);
+    R_IR2 = gte_clamp_ir(cpu, 2, I64(R_MAC2), cpu->gte_lm);
+    R_IR3 = gte_clamp_ir(cpu, 3, I64(R_MAC3), cpu->gte_lm);
 
-    cpu->cop2_dr.ir[1] = gte_clamp_ir(cpu, 1, mac1, cpu->gte_lm);
-    cpu->cop2_dr.ir[2] = gte_clamp_ir(cpu, 2, mac2, cpu->gte_lm);
-    cpu->cop2_dr.ir[3] = gte_clamp_ir(cpu, 3, mac3, cpu->gte_lm);
+    R_SZ0 = R_SZ1;
+    R_SZ1 = R_SZ2;
+    R_SZ2 = R_SZ3;
+    R_SZ3 = gte_clamp_sz3(cpu, I64(R_MAC3) >> 12);
 
-    cpu->cop2_dr.sz[0] = cpu->cop2_dr.sz[1];
-    cpu->cop2_dr.sz[1] = cpu->cop2_dr.sz[2];
-    cpu->cop2_dr.sz[2] = cpu->cop2_dr.sz[3];
-    cpu->cop2_dr.sz[3] = gte_clamp_sz3(cpu, mac3 >> 12);
+    uint32_t h_div_sz = gte_divide(cpu, R_H, R_SZ3);
 
-    uint32_t h_div_sz;
-
-    if (cpu->cop2_dr.sz[3] > (cpu->cop2_cr.h / 2)) {
-        int z = __builtin_clz(cpu->cop2_dr.sz[3]) - 16;
-    
-        h_div_sz = cpu->cop2_cr.h << z;
-    
-        uint32_t d = (cpu->cop2_dr.sz[3] << z);
-        uint16_t u = (g_psx_gte_unr_table[(int)(d - 0x7fc0) >> 7] + 0x101);
-    
-        d = ((0x2000080 - (d * u)) >> 8);
-        d = ((0x0000080 + (d * u)) >> 8);
-    
-        h_div_sz = (int)MIN(0x1ffff, ((h_div_sz * d) + 0x8000) >> 16);
-    } else {
-        cpu->cop2_cr.flag |= 0x20000;
-
-        h_div_sz = 0x1ffff;
-    }
-
     int x = (int)(gte_clamp_mac0(cpu, h_div_sz * R_IR1 + R_OFX) >> 16);
     int y = (int)(gte_clamp_mac0(cpu, h_div_sz * R_IR2 + R_OFY) >> 16);
 
@@ -2247,13 +2214,13 @@
     R_SX2 = gte_clamp_sxy(cpu, 1, x);
     R_SY2 = gte_clamp_sxy(cpu, 2, y);
 
-    if (dq) {
+    //if (dq) {
         long mac0 = gte_clamp_mac0(cpu, h_div_sz * R_DQA + R_DQB);
     
         R_MAC0 = (int)mac0;
     
         R_IR0 = gte_clamp_ir0(cpu, mac0 >> 12);
-    }
+    //}
 }
 
 void psx_gte_i_rtps(psx_cpu_t* cpu) {
--- a/psx/dev/dma.c
+++ b/psx/dev/dma.c
@@ -346,22 +346,22 @@
     if (!CHCR_BUSY(spu))
         return;
 
-    log_set_quiet(0);
-    log_fatal("SPU DMA transfer: madr=%08x, dir=%s, sync=%s (%u), step=%s, size=%x, blocks=%u",
-        dma->spu.madr,
-        CHCR_TDIR(spu) ? "to device" : "to RAM",
-        g_psx_dma_sync_type_name_table[CHCR_SYNC(spu)], CHCR_SYNC(spu),
-        CHCR_STEP(spu) ? "decrementing" : "incrementing",
-        BCR_SIZE(spu), BCR_BCNT(spu)
-    );
+    // log_set_quiet(0);
+    // log_fatal("SPU DMA transfer: madr=%08x, dir=%s, sync=%s (%u), step=%s, size=%x, blocks=%u",
+    //     dma->spu.madr,
+    //     CHCR_TDIR(spu) ? "to device" : "to RAM",
+    //     g_psx_dma_sync_type_name_table[CHCR_SYNC(spu)], CHCR_SYNC(spu),
+    //     CHCR_STEP(spu) ? "decrementing" : "incrementing",
+    //     BCR_SIZE(spu), BCR_BCNT(spu)
+    // );
 
-    log_fatal("DICR: force=%u, en=%02x, irqen=%u, flags=%02x",
-        (dma->dicr >> 15) & 1,
-        (dma->dicr >> 16) & 0x7f,
-        (dma->dicr >> 23) & 1,
-        (dma->dicr >> 24) & 0x7f
-    );
-    log_set_quiet(1);
+    // log_fatal("DICR: force=%u, en=%02x, irqen=%u, flags=%02x",
+    //     (dma->dicr >> 15) & 1,
+    //     (dma->dicr >> 16) & 0x7f,
+    //     (dma->dicr >> 23) & 1,
+    //     (dma->dicr >> 24) & 0x7f
+    // );
+    // log_set_quiet(1);
 
     uint32_t size = BCR_SIZE(spu);
     uint32_t blocks = BCR_BCNT(spu);
--- a/psx/dev/gpu.c
+++ b/psx/dev/gpu.c
@@ -1585,11 +1585,11 @@
             /* To-do: Implement mask bit thing */
         } break;
         default: {
-            log_set_quiet(0);
-            log_fatal("Unhandled GP0(%02Xh)", gpu->buf[0] >> 24);
-            log_set_quiet(1);
+            // log_set_quiet(0);
+            // log_fatal("Unhandled GP0(%02Xh)", gpu->buf[0] >> 24);
+            // log_set_quiet(1);
 
-            exit(1);
+            // exit(1);
         } break;
     }
 }
--