shithub: vmxsmp

Download patch

ref: c8c81df608dc488bbea1312f5e4a386aa73450a7
parent: 4a28392169ffedfff514d54d4039e048dd0562d8
author: glenda <glenda@fileserver>
date: Sat Dec 27 22:42:11 EST 2025

missing files added

--- /dev/null
+++ b/mptable.c
@@ -1,0 +1,304 @@
+/*
+ * mptable.c - MP Specification table generation for vmx SMP
+ *
+ * MP Floating Pointer: 16 bytes, signature "_MP_"
+ * MP Config Header: 44 bytes, signature "PCMP"
+ * Followed by variable entries:
+ *   - Processor: 20 bytes, type 0
+ *   - Bus: 8 bytes, type 1
+ *   - I/O APIC: 8 bytes, type 2
+ *   - I/O Interrupt: 8 bytes, type 3
+ *   - Local Interrupt: 8 bytes, type 4
+ *
+ * Bus Layout (matching typical PC):
+ *   Bus 0: PCI (primary PCI bus)
+ *   Bus 1: ISA (behind PCI-ISA bridge)
+ */
+
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include "dat.h"
+#include "fns.h"
+
+/* MP table entry types */
+enum {
+	MP_PROCESSOR	= 0,
+	MP_BUS		= 1,
+	MP_IOAPIC	= 2,
+	MP_IOINTR	= 3,
+	MP_LINTR	= 4,
+};
+
+/* Bus IDs - must be consistent throughout */
+enum {
+	BUS_PCI		= 0,	/* Primary PCI bus */
+	BUS_ISA		= 1,	/* ISA bus (behind PCI-ISA bridge) */
+};
+
+static uchar
+mpchecksum(uchar *p, int len)
+{
+	uchar sum = 0;
+	while(len-- > 0)
+		sum += *p++;
+	return sum;
+}
+
+/*
+ * Generate MP tables in guest memory at 0xF0000
+ */
+void
+mpmktable(void)
+{
+	uchar *base, *fp, *cfg, *p, *entrystart;
+	int i, ncpu, entries;
+	u16int tbllen;
+	
+	base = gptr(0xF0000, 0x1000);
+	if(base == nil){
+		vmerror("mpmktable: cannot map 0xF0000");
+		return;
+	}
+	
+	/* Clear the area first */
+	memset(base, 0, 0x1000);
+	
+	ncpu = nvcpu;
+	fp = base;
+	cfg = base + 0x40;  /* Config table at physical 0xF0040 */
+	
+	/*
+	 * MP Floating Pointer Structure (16 bytes)
+	 */
+	fp[0] = '_';
+	fp[1] = 'M';
+	fp[2] = 'P';
+	fp[3] = '_';
+	/* Physical address 0x000F0040 in little-endian */
+	fp[4] = 0x40;
+	fp[5] = 0x00;
+	fp[6] = 0x0F;
+	fp[7] = 0x00;
+	fp[8] = 1;		/* Length: 1 * 16 = 16 bytes */
+	fp[9] = 4;		/* MP spec version 1.4 */
+	fp[10] = 0;		/* Checksum - fill later */
+	fp[11] = 0;		/* Feature 1: 0 means config table is present */
+	fp[12] = 0;		/* Feature 2: bit 7 = IMCR present */
+	fp[13] = 0;
+	fp[14] = 0;
+	fp[15] = 0;
+	fp[10] = -mpchecksum(fp, 16);
+	
+	/*
+	 * MP Configuration Table Header (44 bytes)
+	 */
+	p = cfg;
+	
+	/* Signature */
+	p[0] = 'P';
+	p[1] = 'C';
+	p[2] = 'M';
+	p[3] = 'P';
+	/* Skip length (offset 4-5), fill later */
+	/* Spec revision */
+	p[6] = 4;
+	/* Skip checksum (offset 7), fill later */
+	/* OEM ID (8 bytes) */
+	memcpy(p + 8, "9FRONTVX", 8);
+	/* Product ID (12 bytes) */
+	memcpy(p + 16, "VMXSMP      ", 12);
+	/* OEM Table Pointer (4 bytes) = 0 */
+	p[28] = 0; p[29] = 0; p[30] = 0; p[31] = 0;
+	/* OEM Table Size (2 bytes) = 0 */
+	p[32] = 0; p[33] = 0;
+	/* Entry Count - fill later at offset 34 */
+	/* Local APIC Address (4 bytes) = 0xFEE00000 little-endian */
+	p[36] = 0x00;
+	p[37] = 0x00;
+	p[38] = 0xE0;
+	p[39] = 0xFE;
+	/* Extended Table Length (2 bytes) = 0 */
+	p[40] = 0; p[41] = 0;
+	/* Extended Table Checksum = 0 */
+	p[42] = 0;
+	/* Reserved */
+	p[43] = 0;
+	
+	/* Entries start at offset 44 */
+	entrystart = cfg + 44;
+	p = entrystart;
+	entries = 0;
+	
+	/*
+	 * Processor entries (20 bytes each)
+	 */
+	for(i = 0; i < ncpu; i++){
+		p[0] = MP_PROCESSOR;
+		p[1] = i;			/* APIC ID */
+		p[2] = 0x14;			/* APIC version */
+		p[3] = 0x01 | (i == 0 ? 0x02 : 0x00);  /* enabled + BSP for CPU 0 */
+		/* CPU signature: family 6, model 9, stepping 1 */
+		p[4] = 0x91;  /* stepping=1, model low=9 */
+		p[5] = 0x06;  /* family low=6, model high=0 */
+		p[6] = 0x00;  /* family high=0, type=0 */
+		p[7] = 0x00;
+		/* Feature flags - basic x86 features */
+		p[8] = 0x78;   /* FPU, VME, DE, PSE */
+		p[9] = 0x1A;   /* TSC, MSR, PAE, MCE */
+		p[10] = 0x20;  /* APIC */
+		p[11] = 0x00;
+		p[12] = 0x00;
+		p[13] = 0x00;
+		p[14] = 0x00;
+		p[15] = 0x00;
+		/* Reserved */
+		p[16] = 0; p[17] = 0; p[18] = 0; p[19] = 0;
+		p += 20;
+		entries++;
+	}
+	
+	/*
+	 * Bus entries (8 bytes each)
+	 * 
+	 * IMPORTANT: The kernel looks up buses by TYPE and NUMBER.
+	 * PCI devices request "BusPCI, number 0" so we MUST have PCI as bus 0.
+	 * ISA devices use mpisabus which gets set from parsing these entries.
+	 */
+	
+	/* PCI bus - ID 0 (primary PCI bus) */
+	p[0] = MP_BUS;
+	p[1] = BUS_PCI;			/* Bus ID 0 */
+	memcpy(p + 2, "PCI   ", 6);
+	p += 8;
+	entries++;
+	
+	/* ISA bus - ID 1 (behind PCI-ISA bridge) */
+	p[0] = MP_BUS;
+	p[1] = BUS_ISA;			/* Bus ID 1 */
+	memcpy(p + 2, "ISA   ", 6);
+	p += 8;
+	entries++;
+	
+	/*
+	 * I/O APIC entry (8 bytes)
+	 */
+	p[0] = MP_IOAPIC;
+	p[1] = ncpu;		/* I/O APIC ID = ncpu (after CPU IDs) */
+	p[2] = 0x11;		/* Version */
+	p[3] = 0x01;		/* Enabled */
+	/* Address 0xFEC00000 little-endian */
+	p[4] = 0x00;
+	p[5] = 0x00;
+	p[6] = 0xC0;
+	p[7] = 0xFE;
+	p += 8;
+	entries++;
+	
+	/*
+	 * I/O Interrupt entries (8 bytes each)
+	 * 
+	 * Map ISA IRQs to I/O APIC pins
+	 * IRQ 0 (timer) -> pin 2 (standard override)
+	 * IRQ 2 is not used (was cascade)
+	 * All other IRQs -> identity mapping
+	 */
+	
+	/* IRQ 0 -> IOAPIC pin 2 (timer override) */
+	p[0] = MP_IOINTR;
+	p[1] = 0;           /* INT type */
+	p[2] = 0;           /* Polarity: conforms to bus spec */
+	p[3] = 0;           /* Trigger: conforms (edge for ISA) */
+	p[4] = BUS_ISA;     /* Source: ISA bus */
+	p[5] = 0;           /* Source IRQ 0 */
+	p[6] = ncpu;        /* Dest: I/O APIC ID */
+	p[7] = 2;           /* INTIN# 2 */
+	p += 8;
+	entries++;
+	
+	/* IRQs 1, 3-15 -> identity mapping (skip IRQ 2, it's cascade) */
+	for(i = 1; i < 16; i++){
+	    if(i == 2) continue;  /* Skip cascade */
+	    p[0] = MP_IOINTR;
+	    p[1] = 0;
+	    p[2] = 0;
+	    p[3] = 0;
+	    p[4] = BUS_ISA;
+	    p[5] = i;       /* Source IRQ */
+	    p[6] = ncpu;    /* Dest: I/O APIC ID */
+	    p[7] = i;       /* INTIN# = IRQ# */
+	    p += 8;
+	    entries++;
+}
+
+	/*
+	 * PCI interrupt routing entries
+	 * Map PCI devices to I/O APIC pins 16-19
+	 * Source bus is PCI (BUS_PCI = 0)
+	 * 
+	 * The IRQ field for PCI encodes (device << 2) | (pin - 1)
+	 * where pin is 1=INTA, 2=INTB, 3=INTC, 4=INTD
+	 * 
+	 * pcibusmap assigns: irqno = 16 + (devno % 4)
+	 * So device 1 -> pin 17, device 2 -> pin 18, etc.
+	 * 
+	 * We create entries for devices 1-4 (device 0 is host bridge)
+	 */
+	for(i = 1; i <= 4; i++){
+	    p[0] = MP_IOINTR;
+	    p[1] = 0;           /* INT type */
+	    p[2] = 0x0F;        /* Flags: active-low (bits 1:0=11) + level (bits 3:2=11) = 0x0F */
+	    p[3] = 0x00;        /* Flags high byte = 0 */
+	    p[4] = BUS_PCI;     /* Source: PCI bus (ID 0) */
+	    p[5] = (i << 2) | 0;    /* device i, INTA */
+	    p[6] = ncpu;        /* Dest: I/O APIC ID */
+	    p[7] = 16 + (i % 4);    /* INTIN# */
+	    p += 8;
+	    entries++;
+	}
+ 
+	/*
+	 * Local Interrupt entries (8 bytes each)
+	 */
+	
+	/* LINT0: ExtINT (for 8259 compatibility) */
+	p[0] = MP_LINTR;
+	p[1] = 3;		/* ExtINT */
+	p[2] = 0;		/* Polarity: conforms */
+	p[3] = 0;		/* Trigger: conforms */
+	p[4] = BUS_ISA;		/* Source bus: ISA */
+	p[5] = 0;		/* Source IRQ */
+	p[6] = 0xFF;		/* Dest: all local APICs */
+	p[7] = 0;		/* LINT0 */
+	p += 8;
+	entries++;
+	
+	/* LINT1: NMI */
+	p[0] = MP_LINTR;
+	p[1] = 1;		/* NMI */
+	p[2] = 0;		/* Polarity: conforms */
+	p[3] = 0;		/* Trigger: conforms */
+	p[4] = BUS_ISA;		/* Source bus: ISA */
+	p[5] = 0;		/* Source IRQ */
+	p[6] = 0xFF;		/* All local APICs */
+	p[7] = 1;		/* LINT1 */
+	p += 8;
+	entries++;
+	
+	/*
+	 * Fill in table length and entry count
+	 */
+	tbllen = p - cfg;
+	cfg[4] = tbllen & 0xFF;
+	cfg[5] = (tbllen >> 8) & 0xFF;
+	cfg[34] = entries & 0xFF;
+	cfg[35] = (entries >> 8) & 0xFF;
+	
+	/* Compute checksum */
+	cfg[7] = -mpchecksum(cfg, tbllen);
+	
+	/* Debug: print what we created */
+	vmerror("mpmktable: created MP table at 0xF0000");
+	vmerror("  %d CPUs, %d entries, %d bytes", ncpu, entries, tbllen);
+	vmerror("  PCI bus ID=%d, ISA bus ID=%d", BUS_PCI, BUS_ISA);
+}
--- /dev/null
+++ b/nanosec.c
@@ -1,0 +1,39 @@
+#include <u.h>
+#include <libc.h>
+#include <tos.h>
+
+#define Nsec 1000000000ULL
+
+/*
+ * nsec() is wallclock and can be adjusted by timesync
+ * so need to use cycles() instead, but fall back to
+ * nsec() in case we can't
+ */
+uvlong
+nanosec(void)
+{
+ 
+	static uvlong fasthz, xstart;
+	uvlong x;
+
+	if(fasthz == ~0ULL)
+		return nsec() - xstart;
+
+	if(fasthz == 0){
+		if(_tos->cyclefreq){
+			fasthz = _tos->cyclefreq;
+			cycles(&xstart);
+		} else {
+			fasthz = ~0ULL;
+			xstart = nsec();
+		}
+		return 0;
+	}
+	cycles(&x);
+	x -= xstart;
+
+	uvlong q = x / fasthz;
+	uvlong r = x % fasthz;
+
+	return q*Nsec + r*Nsec/fasthz;
+}
--- /dev/null
+++ b/pci.c
@@ -1,0 +1,728 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include "dat.h"
+#include "fns.h"
+
+PCIDev *pcidevs;
+PCIBar membars, iobars;
+
+PciShared *pcishared;
+
+static int
+findsharedidx(u32int bdf)
+{
+    int i;
+    for(i = 0; i < pcishared->ndev; i++)
+        if(pcishared->dev[i].bdf == bdf)
+            return i;
+    return -1;
+}
+
+static void
+pciregister(PCIDev *d)
+{
+    int i;
+    
+    if(pcishared == nil)
+        return;
+    
+    lock(&pcishared->lock);  /* FIX: Add lock */
+    if(pcishared->ndev >= MAXPCIDEV){
+        unlock(&pcishared->lock);
+        return;
+    }
+    
+    i = pcishared->ndev++;
+	d->sharedidx = i;
+    pcishared->dev[i].bdf = d->bdf;
+    pcishared->dev[i].ctrl = d->ctrl;
+    pcishared->dev[i].irqno = d->irqno;
+    pcishared->dev[i].irqactive = d->irqactive;
+    for(int j = 0; j < 6; j++)
+        pcishared->dev[i].bar_addr[j] = d->bar[j].addr;
+    unlock(&pcishared->lock);  /* FIX: Release lock */
+}
+
+
+PCIDev *
+mkpcidev(u32int bdf, u32int viddid, u32int clrev, int needirq)
+{
+	PCIDev *d;
+	int n;
+	
+	d = emalloc(sizeof(PCIDev));
+	d->bdf = bdf;
+	d->viddid = viddid;
+	d->clrev = clrev;
+	d->next = pcidevs;
+	d->irqno = needirq ? 0 : 0xff;
+	for(n = 0; n < nelem(d->bar); n++){
+		d->bar[n].d = d;
+		d->bar[n].busnext = &d->bar[n];
+		d->bar[n].busprev = &d->bar[n];
+	}
+	d->capalloc = 64;
+	pcidevs = d;
+
+	pciregister(d);
+
+	return d;  
+
+}
+
+u32int
+allocbdf(void)
+{
+    static int dev = 1;
+    u32int bdf = BDF(0, dev, 0);
+    fprint(2, "allocbdf: dev=%d BDF=%#x\n", dev, bdf);
+    dev++;
+    return bdf;
+}
+u32int
+roundpow2(u32int l)
+{
+	l = -l;
+	l &= (int)l >> 16;
+	l &= (int)l >> 8;
+	l &= (int)l >> 4;
+	l &= (int)l >> 2;
+	l &= (int)l >> 1;
+	return -l;
+}
+
+PCIBar *
+mkpcibar(PCIDev *d, u8int t, u32int a, u32int l, void *fn, void *aux)
+{
+	PCIBar *b;
+	int idx, barno;
+
+	assert((t & 1) == 0 || (t & 2) == 0);
+	assert((t & 1) != 0 || (t & 6) == 0);
+	if((t & 1) != 0 && l < 4) l = 4;
+	if((t & 1) == 0 && l < 4096) l = 4096;
+	if((l & l-1) != 0)
+		l = roundpow2(l);
+	for(b = d->bar; b < d->bar + nelem(d->bar); b++)
+		if(b->length == 0)
+			break;
+	if(b == d->bar + nelem(d->bar))
+		sysfatal("pci bdf %6ux: too many bars", d->bdf);
+	b->addr = a;
+	b->type = t;
+	b->length = l;
+	b->busnext = b;
+	b->busprev = b;
+	b->d = d;
+	if((b->type & 1) != 0)
+		b->io = fn;
+	b->aux = aux;
+
+	/* Sync BAR address to shared memory */
+	if(pcishared != nil){
+		lock(&pcishared->lock);
+		idx = d->sharedidx; 
+		if(idx >= 0){
+			barno = b - d->bar;
+			pcishared->dev[idx].bar_addr[barno] = b->addr;
+		}
+		unlock(&pcishared->lock);
+	}
+
+	return b;
+}
+static void
+updatebar(PCIBar *b)
+{
+	int devno, barno;
+
+	b->busnext->busprev = b->busprev;
+	b->busprev->busnext = b->busnext;
+	b->busnext = b;
+	b->busprev = b;
+	if(b->length == 0) return;
+	if((b->type & 1) == 0){
+		if((b->d->ctrl & 2) == 0) return;
+		b->busnext = &membars;
+		b->busprev = membars.busprev;
+		b->busnext->busprev = b;
+		b->busprev->busnext = b;
+	}else{
+		if((b->d->ctrl & 1) == 0 || b->addr == 0 || b->io == nil) return;
+		b->busnext = &iobars;
+		b->busprev = iobars.busprev;
+		b->busnext->busprev = b;
+		b->busprev->busnext = b;
+	}
+
+    if(pcishared != nil){
+        for(devno = 0; devno < pcishared->ndev; devno++){
+            if(pcishared->dev[devno].bdf == b->d->bdf){
+                barno = b - b->d->bar;
+                pcishared->dev[devno].bar_addr[barno] = b->addr;
+                break;
+            }
+        }
+    }
+
+}
+
+static void
+pciirqupdate(void)
+{
+	PCIDev *d;
+	
+
+
+	for(d = pcidevs; d != nil; d = d->next)
+		if(d->irqno != 0xff)
+			pciirq(d, d->irqactive);
+}
+
+PCICap *
+mkpcicap(PCIDev *d, u8int length, u32int (*readf)(PCICap *, u8int), void (*writef)(PCICap *, u8int, u32int, u32int))
+{
+	PCICap *c, **p;
+
+	assert(readf != nil);
+	if(d->capalloc + length > 256)
+		sysfatal("mkpcicap (dev %#ux): out of configuration space", d->bdf);
+	c = emalloc(sizeof(PCICap));
+	c->dev = d;
+	c->read = readf;
+	c->write = writef;
+	c->length = length;
+	
+	c->addr = d->capalloc;
+	d->capalloc += length;
+	for(p = &d->cap; *p != nil; p = &(*p)->next)
+		;
+	*p = c;
+	return c;
+}
+
+static PCIDev *
+findpcidev(u32int bdf)
+{
+    PCIDev *d;
+	if (debug)
+    fprint(2, "findpcidev: looking for bdf=%#x\n", bdf);
+    for(d = pcidevs; d != nil; d = d->next) {
+		if (debug)
+        fprint(2, "  checking d->bdf=%#x\n", d->bdf);
+        if(d->bdf == bdf)
+            return d;
+    }
+	if (debug)
+    fprint(2, "  not found!\n");
+    return nil;
+}
+
+static PCICap *
+findpcicap(PCIDev *d, u8int addr)
+{
+	PCICap *c;
+	
+	for(c = d->cap; c != nil; c = c->next)
+		if((uint)(addr - c->addr) < c->length)
+			return c;
+	return nil;
+}
+
+static u32int
+pciread (PCIDev *d, int addr)
+{
+    u32int val;
+    PCICap *c;
+    int n, idx;
+
+    /* Lock and sync from shared memory */
+    if(pcishared != nil){
+        lock(&pcishared->lock);
+        idx = d->sharedidx; 
+        if(idx >= 0){
+            d->ctrl = pcishared->dev[idx].ctrl;
+            d->irqno = pcishared->dev[idx].irqno;
+            for(n = 0; n < 6; n++)
+                d->bar[n].addr = pcishared->dev[idx].bar_addr[n];
+        }
+        unlock(&pcishared->lock);
+    }
+	if (debug)
+	fprint(2, "pciread: d=%p bdf=%#x addr=%#x\n", d, d->bdf, addr);
+
+	switch(addr){
+	case 0x00: 
+		if (debug)
+		fprint(2, "pciread: returning viddid=%#x\n", d->viddid);
+		return d->viddid;
+	case 0x04: return 0xa00000 | (d->cap != nil ? 1<<20 : 0) | d->ctrl;
+	case 0x08: return d->clrev;
+	case 0x0c: return 0; /* BIST, Header Type, Latency Timer, Cache Size */
+	case 0x10: case 0x14: case 0x18: case 0x1c: case 0x20: case 0x24:
+		n = addr - 0x10 >> 2;
+		return d->bar[n].addr | d->bar[n].type;
+	case 0x28: return 0; /* Cardbus */
+	case 0x2c: return d->subid; /* Subsystem ID */
+	case 0x30: return 0; /* Expansion ROM */
+	case 0x34: return d->cap != nil ? d->cap->addr : 0; /* Capabilities */
+	case 0x38: return 0; /* Reserved */
+	case 0x3c: return 1 << 8 | d->irqno; /* Max_Lat, Min_Gnt, IRQ Pin, IRQ Line */
+	}
+	c = findpcicap(d, addr);
+	if(c != nil){
+		val = c->read(c, addr - c->addr);
+		if(addr == c->addr){
+			val &= ~0xff00;
+			if(c->next != nil)
+				val |= c->next->addr << 8;
+		}
+		return val;
+	}
+	vmdebug("pcidev %.6ux: ignoring read from addr %#ux", d->bdf, addr);
+	return 0;
+}
+
+static u32int
+pciread_unlocked (PCIDev *d, int addr)
+{
+    u32int val;
+    PCICap *c;
+    int n, idx;
+
+    /* Lock and sync from shared memory */
+    if(pcishared != nil){
+        //lock(&pcishared->lock);
+        idx = d->sharedidx;
+        if(idx >= 0){
+            d->ctrl = pcishared->dev[idx].ctrl;
+            d->irqno = pcishared->dev[idx].irqno;
+            for(n = 0; n < 6; n++)
+                d->bar[n].addr = pcishared->dev[idx].bar_addr[n];
+        }
+        //unlock(&pcishared->lock);
+    }
+	if (debug)
+	fprint(2, "pciread: d=%p bdf=%#x addr=%#x\n", d, d->bdf, addr);
+
+	switch(addr){
+	case 0x00: 
+		if (debug)
+		fprint(2, "pciread: returning viddid=%#x\n", d->viddid);
+		return d->viddid;
+	case 0x04: return 0xa00000 | (d->cap != nil ? 1<<20 : 0) | d->ctrl;
+	case 0x08: return d->clrev;
+	case 0x0c: return 0; /* BIST, Header Type, Latency Timer, Cache Size */
+	case 0x10: case 0x14: case 0x18: case 0x1c: case 0x20: case 0x24:
+		n = addr - 0x10 >> 2;
+		return d->bar[n].addr | d->bar[n].type;
+	case 0x28: return 0; /* Cardbus */
+	case 0x2c: return d->subid; /* Subsystem ID */
+	case 0x30: return 0; /* Expansion ROM */
+	case 0x34: return d->cap != nil ? d->cap->addr : 0; /* Capabilities */
+	case 0x38: return 0; /* Reserved */
+	case 0x3c: return 1 << 8 | d->irqno; /* Max_Lat, Min_Gnt, IRQ Pin, IRQ Line */
+	}
+	c = findpcicap(d, addr);
+	if(c != nil){
+		val = c->read(c, addr - c->addr);
+		if(addr == c->addr){
+			val &= ~0xff00;
+			if(c->next != nil)
+				val |= c->next->addr << 8;
+		}
+		return val;
+	}
+	vmdebug("pcidev %.6ux: ignoring read from addr %#ux", d->bdf, addr);
+	return 0;
+}
+
+
+static void
+pciwrite(PCIDev *d, int addr, u32int val, u32int mask)
+{
+    int n, idx;
+    PCICap *c;
+	int do_irq_update = 0;
+
+    /* Lock shared state */
+    if(pcishared != nil)
+        lock(&pcishared->lock);
+
+    switch(addr){
+    case 0x04:
+        d->ctrl = (d->ctrl & ~mask | val & mask) & 0x21f;
+        for(n = 0; n < nelem(d->bar); n++)
+            updatebar(&d->bar[n]);
+        break;  /* changed from return */
+    case 0x10: case 0x14: case 0x18: case 0x1c: case 0x20: case 0x24:
+        n = addr - 0x10 >> 2;
+        val &= (d->bar[n].type & 1) != 0 ? ~15 : ~3;
+        d->bar[n].addr = (d->bar[n].addr & ~mask | val & mask) & ~(d->bar[n].length - 1);
+        updatebar(&d->bar[n]);
+        break;  /* changed from return */
+    case 0x30:
+        break;  /* changed from return */
+    case 0x3c:
+        {
+        extern IOApic *ioapic;
+        if(ioapic == nil)
+            d->irqno = (d->irqno & ~mask | val & mask) & 0xff;
+        do_irq_update = 1;
+        break;  /* changed from return */
+        }
+    default:
+        c = findpcicap(d, addr);
+        if(c != nil && c->write != nil){
+            c->write(c, addr - c->addr, val, mask);
+            break;
+        }
+        vmdebug("pcidev %.6ux: ignoring write to addr %#ux, val %#ux", d->bdf, addr, val);
+        break;
+    }
+
+    /* Sync to shared memory */
+    if(pcishared != nil){
+        idx = d->sharedidx;
+        if(idx >= 0){
+            pcishared->dev[idx].ctrl = d->ctrl;
+            pcishared->dev[idx].irqno = d->irqno;
+            for(n = 0; n < 6; n++)
+                pcishared->dev[idx].bar_addr[n] = d->bar[n].addr;
+        }
+        unlock(&pcishared->lock);
+    }
+
+    /* NEW: Call AFTER releasing lock */
+    if(do_irq_update)
+        pciirqupdate();
+}
+
+/*
+ * pciwrite_unlocked - Write to PCI config space without taking pcishared->lock
+ * Called from pciio() which already holds the lock.
+ * Returns 1 if pciirqupdate() should be called after releasing the lock.
+ *
+ * CRITICAL: Do NOT call pciirqupdate() here - it would take ioapic->lock
+ * while pcishared->lock is held, causing potential deadlock.
+ */
+static int
+pciwrite_unlocked(PCIDev *d, int addr, u32int val, u32int mask)
+{
+    int n, idx;
+    PCICap *c;
+    int do_irq_update = 0;
+
+    switch(addr){
+    case 0x04:
+        d->ctrl = (d->ctrl & ~mask | val & mask) & 0x21f;
+        for(n = 0; n < nelem(d->bar); n++)
+            updatebar(&d->bar[n]);
+        break;
+    case 0x10: case 0x14: case 0x18: case 0x1c: case 0x20: case 0x24:
+        n = addr - 0x10 >> 2;
+        val &= (d->bar[n].type & 1) != 0 ? ~15 : ~3;
+        d->bar[n].addr = (d->bar[n].addr & ~mask | val & mask) & ~(d->bar[n].length - 1);
+        updatebar(&d->bar[n]);
+        break;
+    case 0x30:
+        break;
+    case 0x3c:
+        {
+        extern IOApic *ioapic;
+        if(ioapic == nil)
+            d->irqno = (d->irqno & ~mask | val & mask) & 0xff;
+        do_irq_update = 1;  /* Caller must call pciirqupdate() after releasing lock */
+        break;
+        }
+    default:
+        c = findpcicap(d, addr);
+        if(c != nil && c->write != nil){
+            c->write(c, addr - c->addr, val, mask);
+            break;
+        }
+        vmdebug("pcidev %.6ux: ignoring write to addr %#ux, val %#ux", d->bdf, addr, val);
+        break;
+    }
+
+    /* Sync to shared memory - lock held by caller */
+    if(pcishared != nil){
+        idx = d->sharedidx;
+        if(idx >= 0){
+            pcishared->dev[idx].ctrl = d->ctrl;
+            pcishared->dev[idx].irqno = d->irqno;
+            for(n = 0; n < 6; n++)
+                pcishared->dev[idx].bar_addr[n] = d->bar[n].addr;
+        }
+    }
+
+    return do_irq_update;
+}
+
+
+u32int
+pciio(int isin, u16int port, u32int val, int sz, void *)
+{
+    u32int mask, cfgaddr, ret;
+    PCIDev *d;
+    int do_irq_update = 0;
+
+    if(pcishared == nil)
+        return -1;
+
+    lock(&pcishared->lock);
+
+    switch(isin << 16 | port){
+    case 0x0cf8:
+        pcishared->cfgaddr = val;
+        ret = 0;
+        break;
+    case 0x10cf8:
+        ret = pcishared->cfgaddr & ~0x7f000003;
+        break;
+    case 0xcfc: case 0xcfd: case 0xcfe: case 0xcff:
+        cfgaddr = pcishared->cfgaddr;
+        val <<= 8 * (port & 3);
+        mask = -1UL >> 32 - 8 * sz << 8 * (port & 3);
+        if((cfgaddr & 1<<31) != 0 && (d = findpcidev(cfgaddr & 0xffff00), d != nil))
+            do_irq_update = pciwrite_unlocked(d, cfgaddr & 0xfc, val, mask);
+        ret = 0;
+        break;
+    case 0x10cfc: case 0x10cfd: case 0x10cfe: case 0x10cff:
+        cfgaddr = pcishared->cfgaddr;
+        if((cfgaddr & 1<<31) == 0 || (d = findpcidev(cfgaddr & 0xffff00), d == nil))
+            ret = -1;
+        else
+            ret = pciread_unlocked(d, cfgaddr & 0xfc) >> 8 * (port & 3);
+        break;
+    default:
+        unlock(&pcishared->lock);
+        return iowhine(isin, port, val, sz, "pci");
+    }
+
+    unlock(&pcishared->lock);
+
+    /* CRITICAL: Call pciirqupdate() AFTER releasing pcishared->lock
+     * to prevent deadlock with ioapic->lock */
+    if(do_irq_update)
+        pciirqupdate();
+
+    return ret;
+}
+
+extern void ioapic_irqline_smp(int, int);
+
+/*
+ * pciirq - Set PCI device IRQ line
+ * WARNING: Calls ioapic_set_irq() which takes ioapic->lock.
+ * Caller must NOT hold pcishared->lock to avoid deadlock.
+ */
+void
+pciirq(PCIDev *d, int status)
+{
+    int devno, pin;
+    extern IOApic *ioapic;
+    
+    d->irqactive = status != 0;
+    devno = (d->bdf >> 11) & 0x1f;
+    pin = 16 + (devno % 4);
+    if (debug)
+    fprint(2, "pciirq: bdf=%#x devno=%d irqno=%d status=%d pin=%d ioapic=%p\n",
+           d->bdf, devno, d->irqno, status, pin, ioapic);
+    
+    if(ioapic != nil){
+		if (debug)
+        fprint(2, "pciirq: using IOAPIC path, calling ioapic_set_irq(%d, %d)\n", 
+               pin, d->irqactive);
+		ioapic_irqline_smp(pin, d->irqactive); 
+    } else if(d->irqno < 16){
+		if (debug)
+        fprint(2, "pciirq: using legacy PIC path, irqline(%d, %d)\n",
+               d->irqno, d->irqactive ? 0 : 1);
+        ioapic_irqline_smp(d->irqno, d->irqactive ? 0 : 1);
+    } else {
+		if (debug)
+        fprint(2, "pciirq: NO PATH TAKEN - ioapic=%p irqno=%d\n", 
+               ioapic, d->irqno);
+    }
+}
+
+void
+pciinit(void)
+{
+	iobars.busnext = &iobars;
+	iobars.busprev = &iobars;
+	membars.busprev = &membars;
+	membars.busnext = &membars;
+	mkpcidev(BDF(0,0,0), 0x01008086, 0x06000000, 0);
+}
+
+void
+pcibusmap(void)
+{
+	u16int iop;
+	u16int irqs, uirqs;
+	PCIDev *d;
+	PCIBar *b;
+	int irq, devno;
+	int i;
+	extern IOApic *ioapic;
+	
+	iop = 0x1000;
+	irqs = 1<<5|1<<7|1<<9|1<<10|1<<11;
+	uirqs = 0;
+	irq = 0;
+	
+	for(d = pcidevs; d != nil; d = d->next){
+		d->ctrl |= 3;
+		for(b = d->bar; b < d->bar + nelem(d->bar); b++){
+			if(b->length == 0 || b->addr != 0)
+				continue;
+			if((b->type & 1) == 0){
+				vmerror("pci device %.6ux: memory bars unsupported", d->bdf);
+				continue;
+			}
+			if(iop + b->length >= 0x10000){
+				vmerror("pci device %.6ux: not enough I/O address space for BAR%d (len=%d)", d->bdf, (int)(b - d->bar), b->length);
+				continue;
+			}
+			b->addr = iop;
+			iop += b->length;
+			updatebar(b);
+		}
+		if(d->irqno == 0){
+			if(ioapic != nil){
+				/* IOAPIC mode: assign pins 16+ based on device number */
+				devno = (d->bdf >> 11) & 0x1f;
+				d->irqno = 16 + (devno % 4);
+			} else {
+				/* Legacy PIC mode: assign from available IRQs */
+				do
+					irq = irq + 1 & 15;
+				while((irqs & 1<<irq) == 0);
+				d->irqno = irq;
+				uirqs |= 1<<irq;
+			}
+		}
+	}
+	
+	/* Legacy PIC setup - only needed when no IOAPIC */
+	if(ioapic == nil){
+		elcr(uirqs);
+		for(i = 0; i < 16; i++)
+			if((uirqs & 1<<i) != 0)
+				ioapic_irqline_smp(i, 1);
+	}
+
+    /* NEW: Sync assigned IRQs to shared memory */
+    if(pcishared != nil){
+        lock(&pcishared->lock);
+        for(d = pcidevs; d != nil; d = d->next){
+            int idx = d->sharedidx;
+            if(idx >= 0){
+                pcishared->dev[idx].irqno = d->irqno;
+                pcishared->dev[idx].ctrl = d->ctrl;
+                for(i = 0; i < 6; i++)
+                    pcishared->dev[idx].bar_addr[i] = d->bar[i].addr;
+            }
+        }
+        unlock(&pcishared->lock);
+    }
+
+}
+
+void
+pcidump(void)
+{
+	PCIDev *d;
+	PCIBar *b;
+	int i, j;
+	extern IOApic *ioapic;
+	extern u32int lapic_svr[];
+	
+	fprint(2, "=== PCI Dump ===\n");
+	fprint(2, "ioapic=%p, lapic_svr[0]=%#ux (APIC %s)\n", 
+		ioapic, lapic_svr[0], (lapic_svr[0] & 0x100) ? "enabled" : "disabled");
+	
+	for(d = pcidevs, i = 0; d != nil; d = d->next, i++){
+		fprint(2, "[%d] bdf=%#ux viddid=%#ux class=%#ux irqno=%d irqactive=%d ctrl=%#ux\n",
+			i, d->bdf, d->viddid, d->clrev >> 8, d->irqno, d->irqactive, d->ctrl);
+		for(j = 0; j < 6; j++){
+			b = &d->bar[j];
+			if(b->length > 0)
+				fprint(2, "    BAR%d: type=%#ux addr=%#ux len=%#ux io=%p\n",
+					j, b->type, b->addr, b->length, b->io);
+		}
+	}
+	fprint(2, "================\n");
+}
+
+PciShared *pcishared;
+
+void
+pcisharedinit(void)
+{
+    int fd;
+    char buf[128];
+    
+	remove("#g/vmx.pci/ctl");
+	remove("#g/vmx.pci");
+
+    snprint(buf, sizeof(buf), "#g/vmx.pci");
+    fd = create(buf, OREAD, DMDIR | 0777);
+    if(fd >= 0) close(fd);
+    
+    snprint(buf, sizeof(buf), "#g/vmx.pci/ctl");
+    fd = open(buf, OWRITE|OTRUNC);
+    if(fd < 0) {
+        fprint(2, "pcisharedinit: cannot open ctl: %r\n");
+        return;
+    }
+    snprint(buf, sizeof(buf), "va 0x300002000 0x1000 sticky");
+    write(fd, buf, strlen(buf));
+    close(fd);
+    
+    pcishared = segattach(0, "vmx.pci", nil, 0x1000);
+    if(pcishared == (void*)-1)
+        sysfatal("segattach vmx.pci: %r");
+    
+    memset(pcishared, 0, sizeof(PciShared));
+}
+
+/*
+ * Sync BAR state from shared memory and update local iobars list.
+ * Must be called before scanning iobars to ensure we see changes made by other CPUs.
+ */
+void
+pcisyncbars(void)
+{
+    PCIDev *d;
+    int i, idx;
+    u16int oldctrl;
+    u32int oldaddr;
+    
+    if(pcishared == nil)
+        return;
+    
+    lock(&pcishared->lock);
+    for(d = pcidevs; d != nil; d = d->next){
+        idx = d->sharedidx;
+        if(idx < 0)
+            continue;
+        
+        oldctrl = d->ctrl;
+        d->ctrl = pcishared->dev[idx].ctrl;
+        d->irqno = pcishared->dev[idx].irqno;
+        
+        for(i = 0; i < 6; i++){
+            oldaddr = d->bar[i].addr;
+            d->bar[i].addr = pcishared->dev[idx].bar_addr[i];
+            
+            /* If ctrl or addr changed, update the iobars list */
+            if(d->ctrl != oldctrl || d->bar[i].addr != oldaddr)
+                updatebar(&d->bar[i]);
+        }
+    }
+    unlock(&pcishared->lock);
+}
--- /dev/null
+++ b/x86.h
@@ -1,0 +1,42 @@
+#define GDTTYPE(x) ((uvlong)(x)<<40)
+enum {
+	GDTR	= GDTTYPE(0x10), /* read-only */
+	GDTRW	= GDTTYPE(0x12), /* read-write *
+	GDTX	= GDTTYPE(0x18), /* execute-only */
+	GDTRX	= GDTTYPE(0x1A), /* read-execute */
+	
+	GDTTSS	= GDTTYPE(0x09),
+	
+	GDTA	= 1ULL<<40,	/* accessed */
+	GDTE	= 1ULL<<42,	/* expand down (data only) */
+	GDTC	= GDTE,		/* conforming (code only) */
+	GDTP	= 1ULL<<47,	/* present */
+	GDT64	= 1ULL<<53,	/* 64-bit code segment */
+	GDT32	= 1ULL<<54,	/* 32-bit segment */
+	GDTG	= 1ULL<<55,	/* granularity */
+};
+#define GDTLIM(l) ((l) & 0xffff | (uvlong)((l) & 0xf0000)<<32)
+#define GDTBASE(l) (((uvlong)(l) & 0xffffff)<<16 | (uvlong)((l) & 0xff000000)<<32)
+#define GDTDPL(l) ((uvlong)(l)<<45)
+
+enum {
+	Cr0Pg	= 1<<31,
+	
+	Cr4Pse		= 1<<4,
+	Cr4Pae		= 1<<5,
+	Cr4Osxsave	= 1<<18,
+	
+	EferLme	= 1<<8,
+};
+
+extern char *x86reg[16];
+extern char *x86segreg[8];
+
+enum {
+	CF	= 1<<0,
+	PF	= 1<<2,
+	AF	= 1<<4,
+	ZF	= 1<<6,
+	SF	= 1<<7,
+	OF	= 1<<11,
+};
--