shithub: psxe

--- a/frontend/main.c

+++ b/frontend/main.c

@@ -16,8 +16,8 @@

     for (int i = 0; i < (size >> 2); i++) {

         uint32_t sample = psx_spu_get_sample(spu);

-        int16_t left = (int16_t)(sample & 0xffff);

-        int16_t right = (int16_t)(sample >> 16);

+        int16_t left = (int16_t)(sample & 0xffff) * 2;

+        int16_t right = (int16_t)(sample >> 16) * 2;

         *(int16_t*)(&buf[(i << 2) + 0]) += left;

         *(int16_t*)(&buf[(i << 2) + 2]) += right;

--- a/psx/cpu.c

+++ b/psx/cpu.c

@@ -1675,25 +1675,8 @@

     cpu->cop0_r[COP0_SR] |= mode >> 2;

-/*

-  cop2r0-1   3xS16 VXY0,VZ0              Vector 0 (X,Y,Z)

-  cop2r2-3   3xS16 VXY1,VZ1              Vector 1 (X,Y,Z)

-  cop2r4-5   3xS16 VXY2,VZ2              Vector 2 (X,Y,Z)

-  cop2r6     4xU8  RGBC                  Color/code value

-  cop2r7     1xU16 OTZ                   Average Z value (for Ordering Table)

-  cop2r8     1xS16 IR0                   16bit Accumulator (Interpolate)

-  cop2r9-11  3xS16 IR1,IR2,IR3           16bit Accumulator (Vector)

-  cop2r12-15 6xS16 SXY0,SXY1,SXY2,SXYP   Screen XY-coordinate FIFO  (3 stages)

-  cop2r16-19 4xU16 SZ0,SZ1,SZ2,SZ3       Screen Z-coordinate FIFO   (4 stages)

-  cop2r20-22 12xU8 RGB0,RGB1,RGB2        Color CRGB-code/color FIFO (3 stages)

-  cop2r23    4xU8  (RES1)                Prohibited

-  cop2r24    1xS32 MAC0                  32bit Maths Accumulators (Value)

-  cop2r25-27 3xS32 MAC1,MAC2,MAC3        32bit Maths Accumulators (Vector)

-  cop2r28-29 1xU15 IRGB,ORGB             Convert RGB Color (48bit vs 15bit)

-  cop2r30-31 2xS32 LZCS,LZCR             Count Leading-Zeroes/Ones (sign bits)

-*/

 // COP2

+#define MIN(a, b) (((a) < (b)) ? (a) : (b))

 void gte_handle_irgb_write(psx_cpu_t* cpu) {

     cpu->cop2_dr.ir[1] = ((cpu->cop2_dr.irgb >> 0) & 0x1f) * 0x80;

@@ -2030,6 +2013,27 @@

     return (short)value;

+uint32_t gte_divide(psx_cpu_t* cpu, uint16_t n, uint16_t d) {

+    // Overflow

+    if (n >= d * 2) {

+        R_FLAG |= (1 << 31) | (1 << 17);

+        return 0x1ffff;

+    }

+    int shift = __builtin_clz(d) - 16;

+    int r1 = (d << shift) & 0x7fff;

+    int r2 = g_psx_gte_unr_table[((r1 + 0x40) >> 7)] + 0x101;

+    int r3 = ((0x80 - (r2 * (r1 + 0x8000))) >> 8) & 0x1ffff;

+    uint32_t reciprocal = ((r2 * r3) + 0x80) >> 8;

+    const uint32_t res = ((((uint64_t)reciprocal * (n << shift)) + 0x8000) >> 16);

+    return MIN(0x1ffff, res);

+}

 void gte_interpolate_color(psx_cpu_t* cpu, int mac1, int mac2, int mac3) {

     // PSX SPX is very convoluted about this and it lacks some info

     // [MAC1, MAC2, MAC3] = MAC + (FC - MAC) * IR0;< --- for NCDx only

@@ -2120,8 +2124,6 @@

     log_fatal("invalid: Unimplemented GTE instruction %02x, %02x", cpu->opcode & 0x3f, cpu->opcode >> 25);

-#define MIN(a, b) (((a) < (b)) ? (a) : (b))

 #define I64(v) ((int64_t)v)

 #define R_TRX cpu->cop2_cr.tr.x

 #define R_TRY cpu->cop2_cr.tr.y

@@ -2164,22 +2166,9 @@

 #define R_ZSF3 cpu->cop2_cr.zsf3

 #define R_ZSF4 cpu->cop2_cr.zsf4

 #define R_OTZ cpu->cop2_dr.otz

+#define R_H cpu->cop2_cr.h

 void gte_rtp(psx_cpu_t* cpu, int i, int dq) {

-    int64_t tx = I64(R_TRX) << 12;

-    int64_t ty = I64(R_TRY) << 12;

-    int64_t tz = I64(R_TRZ) << 12;

-    int64_t r11 = R_RT11;

-    int64_t r12 = R_RT12;

-    int64_t r13 = R_RT13;

-    int64_t r21 = R_RT21;

-    int64_t r22 = R_RT22;

-    int64_t r23 = R_RT23;

-    int64_t r31 = R_RT31;

-    int64_t r32 = R_RT32;

-    int64_t r33 = R_RT33;

     int64_t vx = cpu->cop2_dr.v[i].p[0];

     int64_t vy = cpu->cop2_dr.v[i].p[1];

     int64_t vz = cpu->cop2_dr.v[i].z;

@@ -2186,59 +2175,37 @@

     int64_t temp[4];

-    temp[0] = gte_clamp_mac(cpu, 1, tx + r11 * vx);

-    temp[1] = gte_clamp_mac(cpu, 2, ty + r21 * vx);

-    temp[2] = gte_clamp_mac(cpu, 3, tz + r31 * vx);

+    temp[0] = gte_clamp_mac(cpu, 1, (I64(R_TRX) << 12) + I64(R_RT11) * vx);

+    temp[1] = gte_clamp_mac(cpu, 2, (I64(R_TRY) << 12) + I64(R_RT21) * vx);

+    temp[2] = gte_clamp_mac(cpu, 3, (I64(R_TRZ) << 12) + I64(R_RT31) * vx);

-    temp[0] = gte_clamp_mac(cpu, 1, temp[0] + r12 * vy);

-    temp[1] = gte_clamp_mac(cpu, 2, temp[1] + r22 * vy);

-    temp[2] = gte_clamp_mac(cpu, 3, temp[2] + r32 * vy);

+    temp[0] = gte_clamp_mac(cpu, 1, temp[0] + I64(R_RT12) * vy);

+    temp[1] = gte_clamp_mac(cpu, 2, temp[1] + I64(R_RT22) * vy);

+    temp[2] = gte_clamp_mac(cpu, 3, temp[2] + I64(R_RT32) * vy);

-    temp[0] = gte_clamp_mac(cpu, 1, temp[0] + r13 * vz);

-    temp[1] = gte_clamp_mac(cpu, 2, temp[1] + r23 * vz);

-    temp[2] = gte_clamp_mac(cpu, 3, temp[2] + r33 * vz);

+    temp[0] = gte_clamp_mac(cpu, 1, temp[0] + I64(R_RT13) * vz);

+    temp[1] = gte_clamp_mac(cpu, 2, temp[1] + I64(R_RT23) * vz);

+    temp[2] = gte_clamp_mac(cpu, 3, temp[2] + I64(R_RT33) * vz);

     R_MAC1 = (int32_t)(temp[0] >> cpu->gte_sf);

     R_MAC2 = (int32_t)(temp[1] >> cpu->gte_sf);

     R_MAC3 = (int32_t)(temp[2] >> cpu->gte_sf);

-    tz = temp[2];

+    int64_t tz = temp[2];

     int64_t zs = tz >> 12;

-    int64_t mac1 = R_MAC1;

-    int64_t mac2 = R_MAC2;

-    int64_t mac3 = R_MAC3;

+    R_IR1 = gte_clamp_ir(cpu, 1, I64(R_MAC1), cpu->gte_lm);

+    R_IR2 = gte_clamp_ir(cpu, 2, I64(R_MAC2), cpu->gte_lm);

+    R_IR3 = gte_clamp_ir(cpu, 3, I64(R_MAC3), cpu->gte_lm);

-    cpu->cop2_dr.ir[1] = gte_clamp_ir(cpu, 1, mac1, cpu->gte_lm);

-    cpu->cop2_dr.ir[2] = gte_clamp_ir(cpu, 2, mac2, cpu->gte_lm);

-    cpu->cop2_dr.ir[3] = gte_clamp_ir(cpu, 3, mac3, cpu->gte_lm);

+    R_SZ0 = R_SZ1;

+    R_SZ1 = R_SZ2;

+    R_SZ2 = R_SZ3;

+    R_SZ3 = gte_clamp_sz3(cpu, I64(R_MAC3) >> 12);

-    cpu->cop2_dr.sz[0] = cpu->cop2_dr.sz[1];

-    cpu->cop2_dr.sz[1] = cpu->cop2_dr.sz[2];

-    cpu->cop2_dr.sz[2] = cpu->cop2_dr.sz[3];

-    cpu->cop2_dr.sz[3] = gte_clamp_sz3(cpu, mac3 >> 12);

+    uint32_t h_div_sz = gte_divide(cpu, R_H, R_SZ3);

-    uint32_t h_div_sz;

-    if (cpu->cop2_dr.sz[3] > (cpu->cop2_cr.h / 2)) {

-        int z = __builtin_clz(cpu->cop2_dr.sz[3]) - 16;

-        h_div_sz = cpu->cop2_cr.h << z;

-        uint32_t d = (cpu->cop2_dr.sz[3] << z);

-        uint16_t u = (g_psx_gte_unr_table[(int)(d - 0x7fc0) >> 7] + 0x101);

-        d = ((0x2000080 - (d * u)) >> 8);

-        d = ((0x0000080 + (d * u)) >> 8);

-        h_div_sz = (int)MIN(0x1ffff, ((h_div_sz * d) + 0x8000) >> 16);

-    } else {

-        cpu->cop2_cr.flag |= 0x20000;

-        h_div_sz = 0x1ffff;

-    }

     int x = (int)(gte_clamp_mac0(cpu, h_div_sz * R_IR1 + R_OFX) >> 16);

     int y = (int)(gte_clamp_mac0(cpu, h_div_sz * R_IR2 + R_OFY) >> 16);

@@ -2247,13 +2214,13 @@

     R_SX2 = gte_clamp_sxy(cpu, 1, x);

     R_SY2 = gte_clamp_sxy(cpu, 2, y);

-    if (dq) {

+    //if (dq) {

         long mac0 = gte_clamp_mac0(cpu, h_div_sz * R_DQA + R_DQB);

         R_MAC0 = (int)mac0;

         R_IR0 = gte_clamp_ir0(cpu, mac0 >> 12);

-    }

+    //}

 void psx_gte_i_rtps(psx_cpu_t* cpu) {

--- a/psx/dev/dma.c

+++ b/psx/dev/dma.c

@@ -346,22 +346,22 @@

     if (!CHCR_BUSY(spu))

         return;

-    log_set_quiet(0);

-    log_fatal("SPU DMA transfer: madr=%08x, dir=%s, sync=%s (%u), step=%s, size=%x, blocks=%u",

-        dma->spu.madr,

-        CHCR_TDIR(spu) ? "to device" : "to RAM",

-        g_psx_dma_sync_type_name_table[CHCR_SYNC(spu)], CHCR_SYNC(spu),

-        CHCR_STEP(spu) ? "decrementing" : "incrementing",

-        BCR_SIZE(spu), BCR_BCNT(spu)

-    );

+    // log_set_quiet(0);

+    // log_fatal("SPU DMA transfer: madr=%08x, dir=%s, sync=%s (%u), step=%s, size=%x, blocks=%u",

+    //     dma->spu.madr,

+    //     CHCR_TDIR(spu) ? "to device" : "to RAM",

+    //     g_psx_dma_sync_type_name_table[CHCR_SYNC(spu)], CHCR_SYNC(spu),

+    //     CHCR_STEP(spu) ? "decrementing" : "incrementing",

+    //     BCR_SIZE(spu), BCR_BCNT(spu)

+    // );

-    log_fatal("DICR: force=%u, en=%02x, irqen=%u, flags=%02x",

-        (dma->dicr >> 15) & 1,

-        (dma->dicr >> 16) & 0x7f,

-        (dma->dicr >> 23) & 1,

-        (dma->dicr >> 24) & 0x7f

-    );

-    log_set_quiet(1);

+    // log_fatal("DICR: force=%u, en=%02x, irqen=%u, flags=%02x",

+    //     (dma->dicr >> 15) & 1,

+    //     (dma->dicr >> 16) & 0x7f,

+    //     (dma->dicr >> 23) & 1,

+    //     (dma->dicr >> 24) & 0x7f

+    // );

+    // log_set_quiet(1);

     uint32_t size = BCR_SIZE(spu);

     uint32_t blocks = BCR_BCNT(spu);

--- a/psx/dev/gpu.c

+++ b/psx/dev/gpu.c

@@ -1585,11 +1585,11 @@

             /* To-do: Implement mask bit thing */

         } break;

         default: {

-            log_set_quiet(0);

-            log_fatal("Unhandled GP0(%02Xh)", gpu->buf[0] >> 24);

-            log_set_quiet(1);

+            // log_set_quiet(0);

+            // log_fatal("Unhandled GP0(%02Xh)", gpu->buf[0] >> 24);

+            // log_set_quiet(1);

-            exit(1);

+            // exit(1);

         } break;

--

⑨