ref: c8c81df608dc488bbea1312f5e4a386aa73450a7
parent: 4a28392169ffedfff514d54d4039e048dd0562d8
author: glenda <glenda@fileserver>
date: Sat Dec 27 22:42:11 EST 2025
missing files added
--- /dev/null
+++ b/mptable.c
@@ -1,0 +1,304 @@
+/*
+ * mptable.c - MP Specification table generation for vmx SMP
+ *
+ * MP Floating Pointer: 16 bytes, signature "_MP_"
+ * MP Config Header: 44 bytes, signature "PCMP"
+ * Followed by variable entries:
+ * - Processor: 20 bytes, type 0
+ * - Bus: 8 bytes, type 1
+ * - I/O APIC: 8 bytes, type 2
+ * - I/O Interrupt: 8 bytes, type 3
+ * - Local Interrupt: 8 bytes, type 4
+ *
+ * Bus Layout (matching typical PC):
+ * Bus 0: PCI (primary PCI bus)
+ * Bus 1: ISA (behind PCI-ISA bridge)
+ */
+
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include "dat.h"
+#include "fns.h"
+
+/* MP table entry types */
+enum {+ MP_PROCESSOR = 0,
+ MP_BUS = 1,
+ MP_IOAPIC = 2,
+ MP_IOINTR = 3,
+ MP_LINTR = 4,
+};
+
+/* Bus IDs - must be consistent throughout */
+enum {+ BUS_PCI = 0, /* Primary PCI bus */
+ BUS_ISA = 1, /* ISA bus (behind PCI-ISA bridge) */
+};
+
+static uchar
+mpchecksum(uchar *p, int len)
+{+ uchar sum = 0;
+ while(len-- > 0)
+ sum += *p++;
+ return sum;
+}
+
+/*
+ * Generate MP tables in guest memory at 0xF0000
+ */
+void
+mpmktable(void)
+{+ uchar *base, *fp, *cfg, *p, *entrystart;
+ int i, ncpu, entries;
+ u16int tbllen;
+
+ base = gptr(0xF0000, 0x1000);
+ if(base == nil){+ vmerror("mpmktable: cannot map 0xF0000");+ return;
+ }
+
+ /* Clear the area first */
+ memset(base, 0, 0x1000);
+
+ ncpu = nvcpu;
+ fp = base;
+ cfg = base + 0x40; /* Config table at physical 0xF0040 */
+
+ /*
+ * MP Floating Pointer Structure (16 bytes)
+ */
+ fp[0] = '_';
+ fp[1] = 'M';
+ fp[2] = 'P';
+ fp[3] = '_';
+ /* Physical address 0x000F0040 in little-endian */
+ fp[4] = 0x40;
+ fp[5] = 0x00;
+ fp[6] = 0x0F;
+ fp[7] = 0x00;
+ fp[8] = 1; /* Length: 1 * 16 = 16 bytes */
+ fp[9] = 4; /* MP spec version 1.4 */
+ fp[10] = 0; /* Checksum - fill later */
+ fp[11] = 0; /* Feature 1: 0 means config table is present */
+ fp[12] = 0; /* Feature 2: bit 7 = IMCR present */
+ fp[13] = 0;
+ fp[14] = 0;
+ fp[15] = 0;
+ fp[10] = -mpchecksum(fp, 16);
+
+ /*
+ * MP Configuration Table Header (44 bytes)
+ */
+ p = cfg;
+
+ /* Signature */
+ p[0] = 'P';
+ p[1] = 'C';
+ p[2] = 'M';
+ p[3] = 'P';
+ /* Skip length (offset 4-5), fill later */
+ /* Spec revision */
+ p[6] = 4;
+ /* Skip checksum (offset 7), fill later */
+ /* OEM ID (8 bytes) */
+ memcpy(p + 8, "9FRONTVX", 8);
+ /* Product ID (12 bytes) */
+ memcpy(p + 16, "VMXSMP ", 12);
+ /* OEM Table Pointer (4 bytes) = 0 */
+ p[28] = 0; p[29] = 0; p[30] = 0; p[31] = 0;
+ /* OEM Table Size (2 bytes) = 0 */
+ p[32] = 0; p[33] = 0;
+ /* Entry Count - fill later at offset 34 */
+ /* Local APIC Address (4 bytes) = 0xFEE00000 little-endian */
+ p[36] = 0x00;
+ p[37] = 0x00;
+ p[38] = 0xE0;
+ p[39] = 0xFE;
+ /* Extended Table Length (2 bytes) = 0 */
+ p[40] = 0; p[41] = 0;
+ /* Extended Table Checksum = 0 */
+ p[42] = 0;
+ /* Reserved */
+ p[43] = 0;
+
+ /* Entries start at offset 44 */
+ entrystart = cfg + 44;
+ p = entrystart;
+ entries = 0;
+
+ /*
+ * Processor entries (20 bytes each)
+ */
+ for(i = 0; i < ncpu; i++){+ p[0] = MP_PROCESSOR;
+ p[1] = i; /* APIC ID */
+ p[2] = 0x14; /* APIC version */
+ p[3] = 0x01 | (i == 0 ? 0x02 : 0x00); /* enabled + BSP for CPU 0 */
+ /* CPU signature: family 6, model 9, stepping 1 */
+ p[4] = 0x91; /* stepping=1, model low=9 */
+ p[5] = 0x06; /* family low=6, model high=0 */
+ p[6] = 0x00; /* family high=0, type=0 */
+ p[7] = 0x00;
+ /* Feature flags - basic x86 features */
+ p[8] = 0x78; /* FPU, VME, DE, PSE */
+ p[9] = 0x1A; /* TSC, MSR, PAE, MCE */
+ p[10] = 0x20; /* APIC */
+ p[11] = 0x00;
+ p[12] = 0x00;
+ p[13] = 0x00;
+ p[14] = 0x00;
+ p[15] = 0x00;
+ /* Reserved */
+ p[16] = 0; p[17] = 0; p[18] = 0; p[19] = 0;
+ p += 20;
+ entries++;
+ }
+
+ /*
+ * Bus entries (8 bytes each)
+ *
+ * IMPORTANT: The kernel looks up buses by TYPE and NUMBER.
+ * PCI devices request "BusPCI, number 0" so we MUST have PCI as bus 0.
+ * ISA devices use mpisabus which gets set from parsing these entries.
+ */
+
+ /* PCI bus - ID 0 (primary PCI bus) */
+ p[0] = MP_BUS;
+ p[1] = BUS_PCI; /* Bus ID 0 */
+ memcpy(p + 2, "PCI ", 6);
+ p += 8;
+ entries++;
+
+ /* ISA bus - ID 1 (behind PCI-ISA bridge) */
+ p[0] = MP_BUS;
+ p[1] = BUS_ISA; /* Bus ID 1 */
+ memcpy(p + 2, "ISA ", 6);
+ p += 8;
+ entries++;
+
+ /*
+ * I/O APIC entry (8 bytes)
+ */
+ p[0] = MP_IOAPIC;
+ p[1] = ncpu; /* I/O APIC ID = ncpu (after CPU IDs) */
+ p[2] = 0x11; /* Version */
+ p[3] = 0x01; /* Enabled */
+ /* Address 0xFEC00000 little-endian */
+ p[4] = 0x00;
+ p[5] = 0x00;
+ p[6] = 0xC0;
+ p[7] = 0xFE;
+ p += 8;
+ entries++;
+
+ /*
+ * I/O Interrupt entries (8 bytes each)
+ *
+ * Map ISA IRQs to I/O APIC pins
+ * IRQ 0 (timer) -> pin 2 (standard override)
+ * IRQ 2 is not used (was cascade)
+ * All other IRQs -> identity mapping
+ */
+
+ /* IRQ 0 -> IOAPIC pin 2 (timer override) */
+ p[0] = MP_IOINTR;
+ p[1] = 0; /* INT type */
+ p[2] = 0; /* Polarity: conforms to bus spec */
+ p[3] = 0; /* Trigger: conforms (edge for ISA) */
+ p[4] = BUS_ISA; /* Source: ISA bus */
+ p[5] = 0; /* Source IRQ 0 */
+ p[6] = ncpu; /* Dest: I/O APIC ID */
+ p[7] = 2; /* INTIN# 2 */
+ p += 8;
+ entries++;
+
+ /* IRQs 1, 3-15 -> identity mapping (skip IRQ 2, it's cascade) */
+ for(i = 1; i < 16; i++){+ if(i == 2) continue; /* Skip cascade */
+ p[0] = MP_IOINTR;
+ p[1] = 0;
+ p[2] = 0;
+ p[3] = 0;
+ p[4] = BUS_ISA;
+ p[5] = i; /* Source IRQ */
+ p[6] = ncpu; /* Dest: I/O APIC ID */
+ p[7] = i; /* INTIN# = IRQ# */
+ p += 8;
+ entries++;
+}
+
+ /*
+ * PCI interrupt routing entries
+ * Map PCI devices to I/O APIC pins 16-19
+ * Source bus is PCI (BUS_PCI = 0)
+ *
+ * The IRQ field for PCI encodes (device << 2) | (pin - 1)
+ * where pin is 1=INTA, 2=INTB, 3=INTC, 4=INTD
+ *
+ * pcibusmap assigns: irqno = 16 + (devno % 4)
+ * So device 1 -> pin 17, device 2 -> pin 18, etc.
+ *
+ * We create entries for devices 1-4 (device 0 is host bridge)
+ */
+ for(i = 1; i <= 4; i++){+ p[0] = MP_IOINTR;
+ p[1] = 0; /* INT type */
+ p[2] = 0x0F; /* Flags: active-low (bits 1:0=11) + level (bits 3:2=11) = 0x0F */
+ p[3] = 0x00; /* Flags high byte = 0 */
+ p[4] = BUS_PCI; /* Source: PCI bus (ID 0) */
+ p[5] = (i << 2) | 0; /* device i, INTA */
+ p[6] = ncpu; /* Dest: I/O APIC ID */
+ p[7] = 16 + (i % 4); /* INTIN# */
+ p += 8;
+ entries++;
+ }
+
+ /*
+ * Local Interrupt entries (8 bytes each)
+ */
+
+ /* LINT0: ExtINT (for 8259 compatibility) */
+ p[0] = MP_LINTR;
+ p[1] = 3; /* ExtINT */
+ p[2] = 0; /* Polarity: conforms */
+ p[3] = 0; /* Trigger: conforms */
+ p[4] = BUS_ISA; /* Source bus: ISA */
+ p[5] = 0; /* Source IRQ */
+ p[6] = 0xFF; /* Dest: all local APICs */
+ p[7] = 0; /* LINT0 */
+ p += 8;
+ entries++;
+
+ /* LINT1: NMI */
+ p[0] = MP_LINTR;
+ p[1] = 1; /* NMI */
+ p[2] = 0; /* Polarity: conforms */
+ p[3] = 0; /* Trigger: conforms */
+ p[4] = BUS_ISA; /* Source bus: ISA */
+ p[5] = 0; /* Source IRQ */
+ p[6] = 0xFF; /* All local APICs */
+ p[7] = 1; /* LINT1 */
+ p += 8;
+ entries++;
+
+ /*
+ * Fill in table length and entry count
+ */
+ tbllen = p - cfg;
+ cfg[4] = tbllen & 0xFF;
+ cfg[5] = (tbllen >> 8) & 0xFF;
+ cfg[34] = entries & 0xFF;
+ cfg[35] = (entries >> 8) & 0xFF;
+
+ /* Compute checksum */
+ cfg[7] = -mpchecksum(cfg, tbllen);
+
+ /* Debug: print what we created */
+ vmerror("mpmktable: created MP table at 0xF0000");+ vmerror(" %d CPUs, %d entries, %d bytes", ncpu, entries, tbllen);+ vmerror(" PCI bus ID=%d, ISA bus ID=%d", BUS_PCI, BUS_ISA);+}
--- /dev/null
+++ b/nanosec.c
@@ -1,0 +1,39 @@
+#include <u.h>
+#include <libc.h>
+#include <tos.h>
+
+#define Nsec 1000000000ULL
+
+/*
+ * nsec() is wallclock and can be adjusted by timesync
+ * so need to use cycles() instead, but fall back to
+ * nsec() in case we can't
+ */
+uvlong
+nanosec(void)
+{+
+ static uvlong fasthz, xstart;
+ uvlong x;
+
+ if(fasthz == ~0ULL)
+ return nsec() - xstart;
+
+ if(fasthz == 0){+ if(_tos->cyclefreq){+ fasthz = _tos->cyclefreq;
+ cycles(&xstart);
+ } else {+ fasthz = ~0ULL;
+ xstart = nsec();
+ }
+ return 0;
+ }
+ cycles(&x);
+ x -= xstart;
+
+ uvlong q = x / fasthz;
+ uvlong r = x % fasthz;
+
+ return q*Nsec + r*Nsec/fasthz;
+}
--- /dev/null
+++ b/pci.c
@@ -1,0 +1,728 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include "dat.h"
+#include "fns.h"
+
+PCIDev *pcidevs;
+PCIBar membars, iobars;
+
+PciShared *pcishared;
+
+static int
+findsharedidx(u32int bdf)
+{+ int i;
+ for(i = 0; i < pcishared->ndev; i++)
+ if(pcishared->dev[i].bdf == bdf)
+ return i;
+ return -1;
+}
+
+static void
+pciregister(PCIDev *d)
+{+ int i;
+
+ if(pcishared == nil)
+ return;
+
+ lock(&pcishared->lock); /* FIX: Add lock */
+ if(pcishared->ndev >= MAXPCIDEV){+ unlock(&pcishared->lock);
+ return;
+ }
+
+ i = pcishared->ndev++;
+ d->sharedidx = i;
+ pcishared->dev[i].bdf = d->bdf;
+ pcishared->dev[i].ctrl = d->ctrl;
+ pcishared->dev[i].irqno = d->irqno;
+ pcishared->dev[i].irqactive = d->irqactive;
+ for(int j = 0; j < 6; j++)
+ pcishared->dev[i].bar_addr[j] = d->bar[j].addr;
+ unlock(&pcishared->lock); /* FIX: Release lock */
+}
+
+
+PCIDev *
+mkpcidev(u32int bdf, u32int viddid, u32int clrev, int needirq)
+{+ PCIDev *d;
+ int n;
+
+ d = emalloc(sizeof(PCIDev));
+ d->bdf = bdf;
+ d->viddid = viddid;
+ d->clrev = clrev;
+ d->next = pcidevs;
+ d->irqno = needirq ? 0 : 0xff;
+ for(n = 0; n < nelem(d->bar); n++){+ d->bar[n].d = d;
+ d->bar[n].busnext = &d->bar[n];
+ d->bar[n].busprev = &d->bar[n];
+ }
+ d->capalloc = 64;
+ pcidevs = d;
+
+ pciregister(d);
+
+ return d;
+
+}
+
+u32int
+allocbdf(void)
+{+ static int dev = 1;
+ u32int bdf = BDF(0, dev, 0);
+ fprint(2, "allocbdf: dev=%d BDF=%#x\n", dev, bdf);
+ dev++;
+ return bdf;
+}
+u32int
+roundpow2(u32int l)
+{+ l = -l;
+ l &= (int)l >> 16;
+ l &= (int)l >> 8;
+ l &= (int)l >> 4;
+ l &= (int)l >> 2;
+ l &= (int)l >> 1;
+ return -l;
+}
+
+PCIBar *
+mkpcibar(PCIDev *d, u8int t, u32int a, u32int l, void *fn, void *aux)
+{+ PCIBar *b;
+ int idx, barno;
+
+ assert((t & 1) == 0 || (t & 2) == 0);
+ assert((t & 1) != 0 || (t & 6) == 0);
+ if((t & 1) != 0 && l < 4) l = 4;
+ if((t & 1) == 0 && l < 4096) l = 4096;
+ if((l & l-1) != 0)
+ l = roundpow2(l);
+ for(b = d->bar; b < d->bar + nelem(d->bar); b++)
+ if(b->length == 0)
+ break;
+ if(b == d->bar + nelem(d->bar))
+ sysfatal("pci bdf %6ux: too many bars", d->bdf);+ b->addr = a;
+ b->type = t;
+ b->length = l;
+ b->busnext = b;
+ b->busprev = b;
+ b->d = d;
+ if((b->type & 1) != 0)
+ b->io = fn;
+ b->aux = aux;
+
+ /* Sync BAR address to shared memory */
+ if(pcishared != nil){+ lock(&pcishared->lock);
+ idx = d->sharedidx;
+ if(idx >= 0){+ barno = b - d->bar;
+ pcishared->dev[idx].bar_addr[barno] = b->addr;
+ }
+ unlock(&pcishared->lock);
+ }
+
+ return b;
+}
+static void
+updatebar(PCIBar *b)
+{+ int devno, barno;
+
+ b->busnext->busprev = b->busprev;
+ b->busprev->busnext = b->busnext;
+ b->busnext = b;
+ b->busprev = b;
+ if(b->length == 0) return;
+ if((b->type & 1) == 0){+ if((b->d->ctrl & 2) == 0) return;
+ b->busnext = &membars;
+ b->busprev = membars.busprev;
+ b->busnext->busprev = b;
+ b->busprev->busnext = b;
+ }else{+ if((b->d->ctrl & 1) == 0 || b->addr == 0 || b->io == nil) return;
+ b->busnext = &iobars;
+ b->busprev = iobars.busprev;
+ b->busnext->busprev = b;
+ b->busprev->busnext = b;
+ }
+
+ if(pcishared != nil){+ for(devno = 0; devno < pcishared->ndev; devno++){+ if(pcishared->dev[devno].bdf == b->d->bdf){+ barno = b - b->d->bar;
+ pcishared->dev[devno].bar_addr[barno] = b->addr;
+ break;
+ }
+ }
+ }
+
+}
+
+static void
+pciirqupdate(void)
+{+ PCIDev *d;
+
+
+
+ for(d = pcidevs; d != nil; d = d->next)
+ if(d->irqno != 0xff)
+ pciirq(d, d->irqactive);
+}
+
+PCICap *
+mkpcicap(PCIDev *d, u8int length, u32int (*readf)(PCICap *, u8int), void (*writef)(PCICap *, u8int, u32int, u32int))
+{+ PCICap *c, **p;
+
+ assert(readf != nil);
+ if(d->capalloc + length > 256)
+ sysfatal("mkpcicap (dev %#ux): out of configuration space", d->bdf);+ c = emalloc(sizeof(PCICap));
+ c->dev = d;
+ c->read = readf;
+ c->write = writef;
+ c->length = length;
+
+ c->addr = d->capalloc;
+ d->capalloc += length;
+ for(p = &d->cap; *p != nil; p = &(*p)->next)
+ ;
+ *p = c;
+ return c;
+}
+
+static PCIDev *
+findpcidev(u32int bdf)
+{+ PCIDev *d;
+ if (debug)
+ fprint(2, "findpcidev: looking for bdf=%#x\n", bdf);
+ for(d = pcidevs; d != nil; d = d->next) {+ if (debug)
+ fprint(2, " checking d->bdf=%#x\n", d->bdf);
+ if(d->bdf == bdf)
+ return d;
+ }
+ if (debug)
+ fprint(2, " not found!\n");
+ return nil;
+}
+
+static PCICap *
+findpcicap(PCIDev *d, u8int addr)
+{+ PCICap *c;
+
+ for(c = d->cap; c != nil; c = c->next)
+ if((uint)(addr - c->addr) < c->length)
+ return c;
+ return nil;
+}
+
+static u32int
+pciread (PCIDev *d, int addr)
+{+ u32int val;
+ PCICap *c;
+ int n, idx;
+
+ /* Lock and sync from shared memory */
+ if(pcishared != nil){+ lock(&pcishared->lock);
+ idx = d->sharedidx;
+ if(idx >= 0){+ d->ctrl = pcishared->dev[idx].ctrl;
+ d->irqno = pcishared->dev[idx].irqno;
+ for(n = 0; n < 6; n++)
+ d->bar[n].addr = pcishared->dev[idx].bar_addr[n];
+ }
+ unlock(&pcishared->lock);
+ }
+ if (debug)
+ fprint(2, "pciread: d=%p bdf=%#x addr=%#x\n", d, d->bdf, addr);
+
+ switch(addr){+ case 0x00:
+ if (debug)
+ fprint(2, "pciread: returning viddid=%#x\n", d->viddid);
+ return d->viddid;
+ case 0x04: return 0xa00000 | (d->cap != nil ? 1<<20 : 0) | d->ctrl;
+ case 0x08: return d->clrev;
+ case 0x0c: return 0; /* BIST, Header Type, Latency Timer, Cache Size */
+ case 0x10: case 0x14: case 0x18: case 0x1c: case 0x20: case 0x24:
+ n = addr - 0x10 >> 2;
+ return d->bar[n].addr | d->bar[n].type;
+ case 0x28: return 0; /* Cardbus */
+ case 0x2c: return d->subid; /* Subsystem ID */
+ case 0x30: return 0; /* Expansion ROM */
+ case 0x34: return d->cap != nil ? d->cap->addr : 0; /* Capabilities */
+ case 0x38: return 0; /* Reserved */
+ case 0x3c: return 1 << 8 | d->irqno; /* Max_Lat, Min_Gnt, IRQ Pin, IRQ Line */
+ }
+ c = findpcicap(d, addr);
+ if(c != nil){+ val = c->read(c, addr - c->addr);
+ if(addr == c->addr){+ val &= ~0xff00;
+ if(c->next != nil)
+ val |= c->next->addr << 8;
+ }
+ return val;
+ }
+ vmdebug("pcidev %.6ux: ignoring read from addr %#ux", d->bdf, addr);+ return 0;
+}
+
+static u32int
+pciread_unlocked (PCIDev *d, int addr)
+{+ u32int val;
+ PCICap *c;
+ int n, idx;
+
+ /* Lock and sync from shared memory */
+ if(pcishared != nil){+ //lock(&pcishared->lock);
+ idx = d->sharedidx;
+ if(idx >= 0){+ d->ctrl = pcishared->dev[idx].ctrl;
+ d->irqno = pcishared->dev[idx].irqno;
+ for(n = 0; n < 6; n++)
+ d->bar[n].addr = pcishared->dev[idx].bar_addr[n];
+ }
+ //unlock(&pcishared->lock);
+ }
+ if (debug)
+ fprint(2, "pciread: d=%p bdf=%#x addr=%#x\n", d, d->bdf, addr);
+
+ switch(addr){+ case 0x00:
+ if (debug)
+ fprint(2, "pciread: returning viddid=%#x\n", d->viddid);
+ return d->viddid;
+ case 0x04: return 0xa00000 | (d->cap != nil ? 1<<20 : 0) | d->ctrl;
+ case 0x08: return d->clrev;
+ case 0x0c: return 0; /* BIST, Header Type, Latency Timer, Cache Size */
+ case 0x10: case 0x14: case 0x18: case 0x1c: case 0x20: case 0x24:
+ n = addr - 0x10 >> 2;
+ return d->bar[n].addr | d->bar[n].type;
+ case 0x28: return 0; /* Cardbus */
+ case 0x2c: return d->subid; /* Subsystem ID */
+ case 0x30: return 0; /* Expansion ROM */
+ case 0x34: return d->cap != nil ? d->cap->addr : 0; /* Capabilities */
+ case 0x38: return 0; /* Reserved */
+ case 0x3c: return 1 << 8 | d->irqno; /* Max_Lat, Min_Gnt, IRQ Pin, IRQ Line */
+ }
+ c = findpcicap(d, addr);
+ if(c != nil){+ val = c->read(c, addr - c->addr);
+ if(addr == c->addr){+ val &= ~0xff00;
+ if(c->next != nil)
+ val |= c->next->addr << 8;
+ }
+ return val;
+ }
+ vmdebug("pcidev %.6ux: ignoring read from addr %#ux", d->bdf, addr);+ return 0;
+}
+
+
+static void
+pciwrite(PCIDev *d, int addr, u32int val, u32int mask)
+{+ int n, idx;
+ PCICap *c;
+ int do_irq_update = 0;
+
+ /* Lock shared state */
+ if(pcishared != nil)
+ lock(&pcishared->lock);
+
+ switch(addr){+ case 0x04:
+ d->ctrl = (d->ctrl & ~mask | val & mask) & 0x21f;
+ for(n = 0; n < nelem(d->bar); n++)
+ updatebar(&d->bar[n]);
+ break; /* changed from return */
+ case 0x10: case 0x14: case 0x18: case 0x1c: case 0x20: case 0x24:
+ n = addr - 0x10 >> 2;
+ val &= (d->bar[n].type & 1) != 0 ? ~15 : ~3;
+ d->bar[n].addr = (d->bar[n].addr & ~mask | val & mask) & ~(d->bar[n].length - 1);
+ updatebar(&d->bar[n]);
+ break; /* changed from return */
+ case 0x30:
+ break; /* changed from return */
+ case 0x3c:
+ {+ extern IOApic *ioapic;
+ if(ioapic == nil)
+ d->irqno = (d->irqno & ~mask | val & mask) & 0xff;
+ do_irq_update = 1;
+ break; /* changed from return */
+ }
+ default:
+ c = findpcicap(d, addr);
+ if(c != nil && c->write != nil){+ c->write(c, addr - c->addr, val, mask);
+ break;
+ }
+ vmdebug("pcidev %.6ux: ignoring write to addr %#ux, val %#ux", d->bdf, addr, val);+ break;
+ }
+
+ /* Sync to shared memory */
+ if(pcishared != nil){+ idx = d->sharedidx;
+ if(idx >= 0){+ pcishared->dev[idx].ctrl = d->ctrl;
+ pcishared->dev[idx].irqno = d->irqno;
+ for(n = 0; n < 6; n++)
+ pcishared->dev[idx].bar_addr[n] = d->bar[n].addr;
+ }
+ unlock(&pcishared->lock);
+ }
+
+ /* NEW: Call AFTER releasing lock */
+ if(do_irq_update)
+ pciirqupdate();
+}
+
+/*
+ * pciwrite_unlocked - Write to PCI config space without taking pcishared->lock
+ * Called from pciio() which already holds the lock.
+ * Returns 1 if pciirqupdate() should be called after releasing the lock.
+ *
+ * CRITICAL: Do NOT call pciirqupdate() here - it would take ioapic->lock
+ * while pcishared->lock is held, causing potential deadlock.
+ */
+static int
+pciwrite_unlocked(PCIDev *d, int addr, u32int val, u32int mask)
+{+ int n, idx;
+ PCICap *c;
+ int do_irq_update = 0;
+
+ switch(addr){+ case 0x04:
+ d->ctrl = (d->ctrl & ~mask | val & mask) & 0x21f;
+ for(n = 0; n < nelem(d->bar); n++)
+ updatebar(&d->bar[n]);
+ break;
+ case 0x10: case 0x14: case 0x18: case 0x1c: case 0x20: case 0x24:
+ n = addr - 0x10 >> 2;
+ val &= (d->bar[n].type & 1) != 0 ? ~15 : ~3;
+ d->bar[n].addr = (d->bar[n].addr & ~mask | val & mask) & ~(d->bar[n].length - 1);
+ updatebar(&d->bar[n]);
+ break;
+ case 0x30:
+ break;
+ case 0x3c:
+ {+ extern IOApic *ioapic;
+ if(ioapic == nil)
+ d->irqno = (d->irqno & ~mask | val & mask) & 0xff;
+ do_irq_update = 1; /* Caller must call pciirqupdate() after releasing lock */
+ break;
+ }
+ default:
+ c = findpcicap(d, addr);
+ if(c != nil && c->write != nil){+ c->write(c, addr - c->addr, val, mask);
+ break;
+ }
+ vmdebug("pcidev %.6ux: ignoring write to addr %#ux, val %#ux", d->bdf, addr, val);+ break;
+ }
+
+ /* Sync to shared memory - lock held by caller */
+ if(pcishared != nil){+ idx = d->sharedidx;
+ if(idx >= 0){+ pcishared->dev[idx].ctrl = d->ctrl;
+ pcishared->dev[idx].irqno = d->irqno;
+ for(n = 0; n < 6; n++)
+ pcishared->dev[idx].bar_addr[n] = d->bar[n].addr;
+ }
+ }
+
+ return do_irq_update;
+}
+
+
+u32int
+pciio(int isin, u16int port, u32int val, int sz, void *)
+{+ u32int mask, cfgaddr, ret;
+ PCIDev *d;
+ int do_irq_update = 0;
+
+ if(pcishared == nil)
+ return -1;
+
+ lock(&pcishared->lock);
+
+ switch(isin << 16 | port){+ case 0x0cf8:
+ pcishared->cfgaddr = val;
+ ret = 0;
+ break;
+ case 0x10cf8:
+ ret = pcishared->cfgaddr & ~0x7f000003;
+ break;
+ case 0xcfc: case 0xcfd: case 0xcfe: case 0xcff:
+ cfgaddr = pcishared->cfgaddr;
+ val <<= 8 * (port & 3);
+ mask = -1UL >> 32 - 8 * sz << 8 * (port & 3);
+ if((cfgaddr & 1<<31) != 0 && (d = findpcidev(cfgaddr & 0xffff00), d != nil))
+ do_irq_update = pciwrite_unlocked(d, cfgaddr & 0xfc, val, mask);
+ ret = 0;
+ break;
+ case 0x10cfc: case 0x10cfd: case 0x10cfe: case 0x10cff:
+ cfgaddr = pcishared->cfgaddr;
+ if((cfgaddr & 1<<31) == 0 || (d = findpcidev(cfgaddr & 0xffff00), d == nil))
+ ret = -1;
+ else
+ ret = pciread_unlocked(d, cfgaddr & 0xfc) >> 8 * (port & 3);
+ break;
+ default:
+ unlock(&pcishared->lock);
+ return iowhine(isin, port, val, sz, "pci");
+ }
+
+ unlock(&pcishared->lock);
+
+ /* CRITICAL: Call pciirqupdate() AFTER releasing pcishared->lock
+ * to prevent deadlock with ioapic->lock */
+ if(do_irq_update)
+ pciirqupdate();
+
+ return ret;
+}
+
+extern void ioapic_irqline_smp(int, int);
+
+/*
+ * pciirq - Set PCI device IRQ line
+ * WARNING: Calls ioapic_set_irq() which takes ioapic->lock.
+ * Caller must NOT hold pcishared->lock to avoid deadlock.
+ */
+void
+pciirq(PCIDev *d, int status)
+{+ int devno, pin;
+ extern IOApic *ioapic;
+
+ d->irqactive = status != 0;
+ devno = (d->bdf >> 11) & 0x1f;
+ pin = 16 + (devno % 4);
+ if (debug)
+ fprint(2, "pciirq: bdf=%#x devno=%d irqno=%d status=%d pin=%d ioapic=%p\n",
+ d->bdf, devno, d->irqno, status, pin, ioapic);
+
+ if(ioapic != nil){+ if (debug)
+ fprint(2, "pciirq: using IOAPIC path, calling ioapic_set_irq(%d, %d)\n",
+ pin, d->irqactive);
+ ioapic_irqline_smp(pin, d->irqactive);
+ } else if(d->irqno < 16){+ if (debug)
+ fprint(2, "pciirq: using legacy PIC path, irqline(%d, %d)\n",
+ d->irqno, d->irqactive ? 0 : 1);
+ ioapic_irqline_smp(d->irqno, d->irqactive ? 0 : 1);
+ } else {+ if (debug)
+ fprint(2, "pciirq: NO PATH TAKEN - ioapic=%p irqno=%d\n",
+ ioapic, d->irqno);
+ }
+}
+
+void
+pciinit(void)
+{+ iobars.busnext = &iobars;
+ iobars.busprev = &iobars;
+ membars.busprev = &membars;
+ membars.busnext = &membars;
+ mkpcidev(BDF(0,0,0), 0x01008086, 0x06000000, 0);
+}
+
+void
+pcibusmap(void)
+{+ u16int iop;
+ u16int irqs, uirqs;
+ PCIDev *d;
+ PCIBar *b;
+ int irq, devno;
+ int i;
+ extern IOApic *ioapic;
+
+ iop = 0x1000;
+ irqs = 1<<5|1<<7|1<<9|1<<10|1<<11;
+ uirqs = 0;
+ irq = 0;
+
+ for(d = pcidevs; d != nil; d = d->next){+ d->ctrl |= 3;
+ for(b = d->bar; b < d->bar + nelem(d->bar); b++){+ if(b->length == 0 || b->addr != 0)
+ continue;
+ if((b->type & 1) == 0){+ vmerror("pci device %.6ux: memory bars unsupported", d->bdf);+ continue;
+ }
+ if(iop + b->length >= 0x10000){+ vmerror("pci device %.6ux: not enough I/O address space for BAR%d (len=%d)", d->bdf, (int)(b - d->bar), b->length);+ continue;
+ }
+ b->addr = iop;
+ iop += b->length;
+ updatebar(b);
+ }
+ if(d->irqno == 0){+ if(ioapic != nil){+ /* IOAPIC mode: assign pins 16+ based on device number */
+ devno = (d->bdf >> 11) & 0x1f;
+ d->irqno = 16 + (devno % 4);
+ } else {+ /* Legacy PIC mode: assign from available IRQs */
+ do
+ irq = irq + 1 & 15;
+ while((irqs & 1<<irq) == 0);
+ d->irqno = irq;
+ uirqs |= 1<<irq;
+ }
+ }
+ }
+
+ /* Legacy PIC setup - only needed when no IOAPIC */
+ if(ioapic == nil){+ elcr(uirqs);
+ for(i = 0; i < 16; i++)
+ if((uirqs & 1<<i) != 0)
+ ioapic_irqline_smp(i, 1);
+ }
+
+ /* NEW: Sync assigned IRQs to shared memory */
+ if(pcishared != nil){+ lock(&pcishared->lock);
+ for(d = pcidevs; d != nil; d = d->next){+ int idx = d->sharedidx;
+ if(idx >= 0){+ pcishared->dev[idx].irqno = d->irqno;
+ pcishared->dev[idx].ctrl = d->ctrl;
+ for(i = 0; i < 6; i++)
+ pcishared->dev[idx].bar_addr[i] = d->bar[i].addr;
+ }
+ }
+ unlock(&pcishared->lock);
+ }
+
+}
+
+void
+pcidump(void)
+{+ PCIDev *d;
+ PCIBar *b;
+ int i, j;
+ extern IOApic *ioapic;
+ extern u32int lapic_svr[];
+
+ fprint(2, "=== PCI Dump ===\n");
+ fprint(2, "ioapic=%p, lapic_svr[0]=%#ux (APIC %s)\n",
+ ioapic, lapic_svr[0], (lapic_svr[0] & 0x100) ? "enabled" : "disabled");
+
+ for(d = pcidevs, i = 0; d != nil; d = d->next, i++){+ fprint(2, "[%d] bdf=%#ux viddid=%#ux class=%#ux irqno=%d irqactive=%d ctrl=%#ux\n",
+ i, d->bdf, d->viddid, d->clrev >> 8, d->irqno, d->irqactive, d->ctrl);
+ for(j = 0; j < 6; j++){+ b = &d->bar[j];
+ if(b->length > 0)
+ fprint(2, " BAR%d: type=%#ux addr=%#ux len=%#ux io=%p\n",
+ j, b->type, b->addr, b->length, b->io);
+ }
+ }
+ fprint(2, "================\n");
+}
+
+PciShared *pcishared;
+
+void
+pcisharedinit(void)
+{+ int fd;
+ char buf[128];
+
+ remove("#g/vmx.pci/ctl");+ remove("#g/vmx.pci");+
+ snprint(buf, sizeof(buf), "#g/vmx.pci");
+ fd = create(buf, OREAD, DMDIR | 0777);
+ if(fd >= 0) close(fd);
+
+ snprint(buf, sizeof(buf), "#g/vmx.pci/ctl");
+ fd = open(buf, OWRITE|OTRUNC);
+ if(fd < 0) {+ fprint(2, "pcisharedinit: cannot open ctl: %r\n");
+ return;
+ }
+ snprint(buf, sizeof(buf), "va 0x300002000 0x1000 sticky");
+ write(fd, buf, strlen(buf));
+ close(fd);
+
+ pcishared = segattach(0, "vmx.pci", nil, 0x1000);
+ if(pcishared == (void*)-1)
+ sysfatal("segattach vmx.pci: %r");+
+ memset(pcishared, 0, sizeof(PciShared));
+}
+
+/*
+ * Sync BAR state from shared memory and update local iobars list.
+ * Must be called before scanning iobars to ensure we see changes made by other CPUs.
+ */
+void
+pcisyncbars(void)
+{+ PCIDev *d;
+ int i, idx;
+ u16int oldctrl;
+ u32int oldaddr;
+
+ if(pcishared == nil)
+ return;
+
+ lock(&pcishared->lock);
+ for(d = pcidevs; d != nil; d = d->next){+ idx = d->sharedidx;
+ if(idx < 0)
+ continue;
+
+ oldctrl = d->ctrl;
+ d->ctrl = pcishared->dev[idx].ctrl;
+ d->irqno = pcishared->dev[idx].irqno;
+
+ for(i = 0; i < 6; i++){+ oldaddr = d->bar[i].addr;
+ d->bar[i].addr = pcishared->dev[idx].bar_addr[i];
+
+ /* If ctrl or addr changed, update the iobars list */
+ if(d->ctrl != oldctrl || d->bar[i].addr != oldaddr)
+ updatebar(&d->bar[i]);
+ }
+ }
+ unlock(&pcishared->lock);
+}
--- /dev/null
+++ b/x86.h
@@ -1,0 +1,42 @@
+#define GDTTYPE(x) ((uvlong)(x)<<40)
+enum {+ GDTR = GDTTYPE(0x10), /* read-only */
+ GDTRW = GDTTYPE(0x12), /* read-write *
+ GDTX = GDTTYPE(0x18), /* execute-only */
+ GDTRX = GDTTYPE(0x1A), /* read-execute */
+
+ GDTTSS = GDTTYPE(0x09),
+
+ GDTA = 1ULL<<40, /* accessed */
+ GDTE = 1ULL<<42, /* expand down (data only) */
+ GDTC = GDTE, /* conforming (code only) */
+ GDTP = 1ULL<<47, /* present */
+ GDT64 = 1ULL<<53, /* 64-bit code segment */
+ GDT32 = 1ULL<<54, /* 32-bit segment */
+ GDTG = 1ULL<<55, /* granularity */
+};
+#define GDTLIM(l) ((l) & 0xffff | (uvlong)((l) & 0xf0000)<<32)
+#define GDTBASE(l) (((uvlong)(l) & 0xffffff)<<16 | (uvlong)((l) & 0xff000000)<<32)
+#define GDTDPL(l) ((uvlong)(l)<<45)
+
+enum {+ Cr0Pg = 1<<31,
+
+ Cr4Pse = 1<<4,
+ Cr4Pae = 1<<5,
+ Cr4Osxsave = 1<<18,
+
+ EferLme = 1<<8,
+};
+
+extern char *x86reg[16];
+extern char *x86segreg[8];
+
+enum {+ CF = 1<<0,
+ PF = 1<<2,
+ AF = 1<<4,
+ ZF = 1<<6,
+ SF = 1<<7,
+ OF = 1<<11,
+};
--
⑨