shithub: vmxsmp

ref: c8c81df608dc488bbea1312f5e4a386aa73450a7
dir: /pci.c/

View raw version
#include <u.h>
#include <libc.h>
#include <thread.h>
#include "dat.h"
#include "fns.h"

PCIDev *pcidevs;
PCIBar membars, iobars;

PciShared *pcishared;

static int
findsharedidx(u32int bdf)
{
    int i;
    for(i = 0; i < pcishared->ndev; i++)
        if(pcishared->dev[i].bdf == bdf)
            return i;
    return -1;
}

static void
pciregister(PCIDev *d)
{
    int i;
    
    if(pcishared == nil)
        return;
    
    lock(&pcishared->lock);  /* FIX: Add lock */
    if(pcishared->ndev >= MAXPCIDEV){
        unlock(&pcishared->lock);
        return;
    }
    
    i = pcishared->ndev++;
	d->sharedidx = i;
    pcishared->dev[i].bdf = d->bdf;
    pcishared->dev[i].ctrl = d->ctrl;
    pcishared->dev[i].irqno = d->irqno;
    pcishared->dev[i].irqactive = d->irqactive;
    for(int j = 0; j < 6; j++)
        pcishared->dev[i].bar_addr[j] = d->bar[j].addr;
    unlock(&pcishared->lock);  /* FIX: Release lock */
}


PCIDev *
mkpcidev(u32int bdf, u32int viddid, u32int clrev, int needirq)
{
	PCIDev *d;
	int n;
	
	d = emalloc(sizeof(PCIDev));
	d->bdf = bdf;
	d->viddid = viddid;
	d->clrev = clrev;
	d->next = pcidevs;
	d->irqno = needirq ? 0 : 0xff;
	for(n = 0; n < nelem(d->bar); n++){
		d->bar[n].d = d;
		d->bar[n].busnext = &d->bar[n];
		d->bar[n].busprev = &d->bar[n];
	}
	d->capalloc = 64;
	pcidevs = d;

	pciregister(d);

	return d;  

}

u32int
allocbdf(void)
{
    static int dev = 1;
    u32int bdf = BDF(0, dev, 0);
    fprint(2, "allocbdf: dev=%d BDF=%#x\n", dev, bdf);
    dev++;
    return bdf;
}
u32int
roundpow2(u32int l)
{
	l = -l;
	l &= (int)l >> 16;
	l &= (int)l >> 8;
	l &= (int)l >> 4;
	l &= (int)l >> 2;
	l &= (int)l >> 1;
	return -l;
}

PCIBar *
mkpcibar(PCIDev *d, u8int t, u32int a, u32int l, void *fn, void *aux)
{
	PCIBar *b;
	int idx, barno;

	assert((t & 1) == 0 || (t & 2) == 0);
	assert((t & 1) != 0 || (t & 6) == 0);
	if((t & 1) != 0 && l < 4) l = 4;
	if((t & 1) == 0 && l < 4096) l = 4096;
	if((l & l-1) != 0)
		l = roundpow2(l);
	for(b = d->bar; b < d->bar + nelem(d->bar); b++)
		if(b->length == 0)
			break;
	if(b == d->bar + nelem(d->bar))
		sysfatal("pci bdf %6ux: too many bars", d->bdf);
	b->addr = a;
	b->type = t;
	b->length = l;
	b->busnext = b;
	b->busprev = b;
	b->d = d;
	if((b->type & 1) != 0)
		b->io = fn;
	b->aux = aux;

	/* Sync BAR address to shared memory */
	if(pcishared != nil){
		lock(&pcishared->lock);
		idx = d->sharedidx; 
		if(idx >= 0){
			barno = b - d->bar;
			pcishared->dev[idx].bar_addr[barno] = b->addr;
		}
		unlock(&pcishared->lock);
	}

	return b;
}
static void
updatebar(PCIBar *b)
{
	int devno, barno;

	b->busnext->busprev = b->busprev;
	b->busprev->busnext = b->busnext;
	b->busnext = b;
	b->busprev = b;
	if(b->length == 0) return;
	if((b->type & 1) == 0){
		if((b->d->ctrl & 2) == 0) return;
		b->busnext = &membars;
		b->busprev = membars.busprev;
		b->busnext->busprev = b;
		b->busprev->busnext = b;
	}else{
		if((b->d->ctrl & 1) == 0 || b->addr == 0 || b->io == nil) return;
		b->busnext = &iobars;
		b->busprev = iobars.busprev;
		b->busnext->busprev = b;
		b->busprev->busnext = b;
	}

    if(pcishared != nil){
        for(devno = 0; devno < pcishared->ndev; devno++){
            if(pcishared->dev[devno].bdf == b->d->bdf){
                barno = b - b->d->bar;
                pcishared->dev[devno].bar_addr[barno] = b->addr;
                break;
            }
        }
    }

}

static void
pciirqupdate(void)
{
	PCIDev *d;
	


	for(d = pcidevs; d != nil; d = d->next)
		if(d->irqno != 0xff)
			pciirq(d, d->irqactive);
}

PCICap *
mkpcicap(PCIDev *d, u8int length, u32int (*readf)(PCICap *, u8int), void (*writef)(PCICap *, u8int, u32int, u32int))
{
	PCICap *c, **p;

	assert(readf != nil);
	if(d->capalloc + length > 256)
		sysfatal("mkpcicap (dev %#ux): out of configuration space", d->bdf);
	c = emalloc(sizeof(PCICap));
	c->dev = d;
	c->read = readf;
	c->write = writef;
	c->length = length;
	
	c->addr = d->capalloc;
	d->capalloc += length;
	for(p = &d->cap; *p != nil; p = &(*p)->next)
		;
	*p = c;
	return c;
}

static PCIDev *
findpcidev(u32int bdf)
{
    PCIDev *d;
	if (debug)
    fprint(2, "findpcidev: looking for bdf=%#x\n", bdf);
    for(d = pcidevs; d != nil; d = d->next) {
		if (debug)
        fprint(2, "  checking d->bdf=%#x\n", d->bdf);
        if(d->bdf == bdf)
            return d;
    }
	if (debug)
    fprint(2, "  not found!\n");
    return nil;
}

static PCICap *
findpcicap(PCIDev *d, u8int addr)
{
	PCICap *c;
	
	for(c = d->cap; c != nil; c = c->next)
		if((uint)(addr - c->addr) < c->length)
			return c;
	return nil;
}

static u32int
pciread (PCIDev *d, int addr)
{
    u32int val;
    PCICap *c;
    int n, idx;

    /* Lock and sync from shared memory */
    if(pcishared != nil){
        lock(&pcishared->lock);
        idx = d->sharedidx; 
        if(idx >= 0){
            d->ctrl = pcishared->dev[idx].ctrl;
            d->irqno = pcishared->dev[idx].irqno;
            for(n = 0; n < 6; n++)
                d->bar[n].addr = pcishared->dev[idx].bar_addr[n];
        }
        unlock(&pcishared->lock);
    }
	if (debug)
	fprint(2, "pciread: d=%p bdf=%#x addr=%#x\n", d, d->bdf, addr);

	switch(addr){
	case 0x00: 
		if (debug)
		fprint(2, "pciread: returning viddid=%#x\n", d->viddid);
		return d->viddid;
	case 0x04: return 0xa00000 | (d->cap != nil ? 1<<20 : 0) | d->ctrl;
	case 0x08: return d->clrev;
	case 0x0c: return 0; /* BIST, Header Type, Latency Timer, Cache Size */
	case 0x10: case 0x14: case 0x18: case 0x1c: case 0x20: case 0x24:
		n = addr - 0x10 >> 2;
		return d->bar[n].addr | d->bar[n].type;
	case 0x28: return 0; /* Cardbus */
	case 0x2c: return d->subid; /* Subsystem ID */
	case 0x30: return 0; /* Expansion ROM */
	case 0x34: return d->cap != nil ? d->cap->addr : 0; /* Capabilities */
	case 0x38: return 0; /* Reserved */
	case 0x3c: return 1 << 8 | d->irqno; /* Max_Lat, Min_Gnt, IRQ Pin, IRQ Line */
	}
	c = findpcicap(d, addr);
	if(c != nil){
		val = c->read(c, addr - c->addr);
		if(addr == c->addr){
			val &= ~0xff00;
			if(c->next != nil)
				val |= c->next->addr << 8;
		}
		return val;
	}
	vmdebug("pcidev %.6ux: ignoring read from addr %#ux", d->bdf, addr);
	return 0;
}

static u32int
pciread_unlocked (PCIDev *d, int addr)
{
    u32int val;
    PCICap *c;
    int n, idx;

    /* Lock and sync from shared memory */
    if(pcishared != nil){
        //lock(&pcishared->lock);
        idx = d->sharedidx;
        if(idx >= 0){
            d->ctrl = pcishared->dev[idx].ctrl;
            d->irqno = pcishared->dev[idx].irqno;
            for(n = 0; n < 6; n++)
                d->bar[n].addr = pcishared->dev[idx].bar_addr[n];
        }
        //unlock(&pcishared->lock);
    }
	if (debug)
	fprint(2, "pciread: d=%p bdf=%#x addr=%#x\n", d, d->bdf, addr);

	switch(addr){
	case 0x00: 
		if (debug)
		fprint(2, "pciread: returning viddid=%#x\n", d->viddid);
		return d->viddid;
	case 0x04: return 0xa00000 | (d->cap != nil ? 1<<20 : 0) | d->ctrl;
	case 0x08: return d->clrev;
	case 0x0c: return 0; /* BIST, Header Type, Latency Timer, Cache Size */
	case 0x10: case 0x14: case 0x18: case 0x1c: case 0x20: case 0x24:
		n = addr - 0x10 >> 2;
		return d->bar[n].addr | d->bar[n].type;
	case 0x28: return 0; /* Cardbus */
	case 0x2c: return d->subid; /* Subsystem ID */
	case 0x30: return 0; /* Expansion ROM */
	case 0x34: return d->cap != nil ? d->cap->addr : 0; /* Capabilities */
	case 0x38: return 0; /* Reserved */
	case 0x3c: return 1 << 8 | d->irqno; /* Max_Lat, Min_Gnt, IRQ Pin, IRQ Line */
	}
	c = findpcicap(d, addr);
	if(c != nil){
		val = c->read(c, addr - c->addr);
		if(addr == c->addr){
			val &= ~0xff00;
			if(c->next != nil)
				val |= c->next->addr << 8;
		}
		return val;
	}
	vmdebug("pcidev %.6ux: ignoring read from addr %#ux", d->bdf, addr);
	return 0;
}


static void
pciwrite(PCIDev *d, int addr, u32int val, u32int mask)
{
    int n, idx;
    PCICap *c;
	int do_irq_update = 0;

    /* Lock shared state */
    if(pcishared != nil)
        lock(&pcishared->lock);

    switch(addr){
    case 0x04:
        d->ctrl = (d->ctrl & ~mask | val & mask) & 0x21f;
        for(n = 0; n < nelem(d->bar); n++)
            updatebar(&d->bar[n]);
        break;  /* changed from return */
    case 0x10: case 0x14: case 0x18: case 0x1c: case 0x20: case 0x24:
        n = addr - 0x10 >> 2;
        val &= (d->bar[n].type & 1) != 0 ? ~15 : ~3;
        d->bar[n].addr = (d->bar[n].addr & ~mask | val & mask) & ~(d->bar[n].length - 1);
        updatebar(&d->bar[n]);
        break;  /* changed from return */
    case 0x30:
        break;  /* changed from return */
    case 0x3c:
        {
        extern IOApic *ioapic;
        if(ioapic == nil)
            d->irqno = (d->irqno & ~mask | val & mask) & 0xff;
        do_irq_update = 1;
        break;  /* changed from return */
        }
    default:
        c = findpcicap(d, addr);
        if(c != nil && c->write != nil){
            c->write(c, addr - c->addr, val, mask);
            break;
        }
        vmdebug("pcidev %.6ux: ignoring write to addr %#ux, val %#ux", d->bdf, addr, val);
        break;
    }

    /* Sync to shared memory */
    if(pcishared != nil){
        idx = d->sharedidx;
        if(idx >= 0){
            pcishared->dev[idx].ctrl = d->ctrl;
            pcishared->dev[idx].irqno = d->irqno;
            for(n = 0; n < 6; n++)
                pcishared->dev[idx].bar_addr[n] = d->bar[n].addr;
        }
        unlock(&pcishared->lock);
    }

    /* NEW: Call AFTER releasing lock */
    if(do_irq_update)
        pciirqupdate();
}

/*
 * pciwrite_unlocked - Write to PCI config space without taking pcishared->lock
 * Called from pciio() which already holds the lock.
 * Returns 1 if pciirqupdate() should be called after releasing the lock.
 *
 * CRITICAL: Do NOT call pciirqupdate() here - it would take ioapic->lock
 * while pcishared->lock is held, causing potential deadlock.
 */
static int
pciwrite_unlocked(PCIDev *d, int addr, u32int val, u32int mask)
{
    int n, idx;
    PCICap *c;
    int do_irq_update = 0;

    switch(addr){
    case 0x04:
        d->ctrl = (d->ctrl & ~mask | val & mask) & 0x21f;
        for(n = 0; n < nelem(d->bar); n++)
            updatebar(&d->bar[n]);
        break;
    case 0x10: case 0x14: case 0x18: case 0x1c: case 0x20: case 0x24:
        n = addr - 0x10 >> 2;
        val &= (d->bar[n].type & 1) != 0 ? ~15 : ~3;
        d->bar[n].addr = (d->bar[n].addr & ~mask | val & mask) & ~(d->bar[n].length - 1);
        updatebar(&d->bar[n]);
        break;
    case 0x30:
        break;
    case 0x3c:
        {
        extern IOApic *ioapic;
        if(ioapic == nil)
            d->irqno = (d->irqno & ~mask | val & mask) & 0xff;
        do_irq_update = 1;  /* Caller must call pciirqupdate() after releasing lock */
        break;
        }
    default:
        c = findpcicap(d, addr);
        if(c != nil && c->write != nil){
            c->write(c, addr - c->addr, val, mask);
            break;
        }
        vmdebug("pcidev %.6ux: ignoring write to addr %#ux, val %#ux", d->bdf, addr, val);
        break;
    }

    /* Sync to shared memory - lock held by caller */
    if(pcishared != nil){
        idx = d->sharedidx;
        if(idx >= 0){
            pcishared->dev[idx].ctrl = d->ctrl;
            pcishared->dev[idx].irqno = d->irqno;
            for(n = 0; n < 6; n++)
                pcishared->dev[idx].bar_addr[n] = d->bar[n].addr;
        }
    }

    return do_irq_update;
}


u32int
pciio(int isin, u16int port, u32int val, int sz, void *)
{
    u32int mask, cfgaddr, ret;
    PCIDev *d;
    int do_irq_update = 0;

    if(pcishared == nil)
        return -1;

    lock(&pcishared->lock);

    switch(isin << 16 | port){
    case 0x0cf8:
        pcishared->cfgaddr = val;
        ret = 0;
        break;
    case 0x10cf8:
        ret = pcishared->cfgaddr & ~0x7f000003;
        break;
    case 0xcfc: case 0xcfd: case 0xcfe: case 0xcff:
        cfgaddr = pcishared->cfgaddr;
        val <<= 8 * (port & 3);
        mask = -1UL >> 32 - 8 * sz << 8 * (port & 3);
        if((cfgaddr & 1<<31) != 0 && (d = findpcidev(cfgaddr & 0xffff00), d != nil))
            do_irq_update = pciwrite_unlocked(d, cfgaddr & 0xfc, val, mask);
        ret = 0;
        break;
    case 0x10cfc: case 0x10cfd: case 0x10cfe: case 0x10cff:
        cfgaddr = pcishared->cfgaddr;
        if((cfgaddr & 1<<31) == 0 || (d = findpcidev(cfgaddr & 0xffff00), d == nil))
            ret = -1;
        else
            ret = pciread_unlocked(d, cfgaddr & 0xfc) >> 8 * (port & 3);
        break;
    default:
        unlock(&pcishared->lock);
        return iowhine(isin, port, val, sz, "pci");
    }

    unlock(&pcishared->lock);

    /* CRITICAL: Call pciirqupdate() AFTER releasing pcishared->lock
     * to prevent deadlock with ioapic->lock */
    if(do_irq_update)
        pciirqupdate();

    return ret;
}

extern void ioapic_irqline_smp(int, int);

/*
 * pciirq - Set PCI device IRQ line
 * WARNING: Calls ioapic_set_irq() which takes ioapic->lock.
 * Caller must NOT hold pcishared->lock to avoid deadlock.
 */
void
pciirq(PCIDev *d, int status)
{
    int devno, pin;
    extern IOApic *ioapic;
    
    d->irqactive = status != 0;
    devno = (d->bdf >> 11) & 0x1f;
    pin = 16 + (devno % 4);
    if (debug)
    fprint(2, "pciirq: bdf=%#x devno=%d irqno=%d status=%d pin=%d ioapic=%p\n",
           d->bdf, devno, d->irqno, status, pin, ioapic);
    
    if(ioapic != nil){
		if (debug)
        fprint(2, "pciirq: using IOAPIC path, calling ioapic_set_irq(%d, %d)\n", 
               pin, d->irqactive);
		ioapic_irqline_smp(pin, d->irqactive); 
    } else if(d->irqno < 16){
		if (debug)
        fprint(2, "pciirq: using legacy PIC path, irqline(%d, %d)\n",
               d->irqno, d->irqactive ? 0 : 1);
        ioapic_irqline_smp(d->irqno, d->irqactive ? 0 : 1);
    } else {
		if (debug)
        fprint(2, "pciirq: NO PATH TAKEN - ioapic=%p irqno=%d\n", 
               ioapic, d->irqno);
    }
}

void
pciinit(void)
{
	iobars.busnext = &iobars;
	iobars.busprev = &iobars;
	membars.busprev = &membars;
	membars.busnext = &membars;
	mkpcidev(BDF(0,0,0), 0x01008086, 0x06000000, 0);
}

void
pcibusmap(void)
{
	u16int iop;
	u16int irqs, uirqs;
	PCIDev *d;
	PCIBar *b;
	int irq, devno;
	int i;
	extern IOApic *ioapic;
	
	iop = 0x1000;
	irqs = 1<<5|1<<7|1<<9|1<<10|1<<11;
	uirqs = 0;
	irq = 0;
	
	for(d = pcidevs; d != nil; d = d->next){
		d->ctrl |= 3;
		for(b = d->bar; b < d->bar + nelem(d->bar); b++){
			if(b->length == 0 || b->addr != 0)
				continue;
			if((b->type & 1) == 0){
				vmerror("pci device %.6ux: memory bars unsupported", d->bdf);
				continue;
			}
			if(iop + b->length >= 0x10000){
				vmerror("pci device %.6ux: not enough I/O address space for BAR%d (len=%d)", d->bdf, (int)(b - d->bar), b->length);
				continue;
			}
			b->addr = iop;
			iop += b->length;
			updatebar(b);
		}
		if(d->irqno == 0){
			if(ioapic != nil){
				/* IOAPIC mode: assign pins 16+ based on device number */
				devno = (d->bdf >> 11) & 0x1f;
				d->irqno = 16 + (devno % 4);
			} else {
				/* Legacy PIC mode: assign from available IRQs */
				do
					irq = irq + 1 & 15;
				while((irqs & 1<<irq) == 0);
				d->irqno = irq;
				uirqs |= 1<<irq;
			}
		}
	}
	
	/* Legacy PIC setup - only needed when no IOAPIC */
	if(ioapic == nil){
		elcr(uirqs);
		for(i = 0; i < 16; i++)
			if((uirqs & 1<<i) != 0)
				ioapic_irqline_smp(i, 1);
	}

    /* NEW: Sync assigned IRQs to shared memory */
    if(pcishared != nil){
        lock(&pcishared->lock);
        for(d = pcidevs; d != nil; d = d->next){
            int idx = d->sharedidx;
            if(idx >= 0){
                pcishared->dev[idx].irqno = d->irqno;
                pcishared->dev[idx].ctrl = d->ctrl;
                for(i = 0; i < 6; i++)
                    pcishared->dev[idx].bar_addr[i] = d->bar[i].addr;
            }
        }
        unlock(&pcishared->lock);
    }

}

void
pcidump(void)
{
	PCIDev *d;
	PCIBar *b;
	int i, j;
	extern IOApic *ioapic;
	extern u32int lapic_svr[];
	
	fprint(2, "=== PCI Dump ===\n");
	fprint(2, "ioapic=%p, lapic_svr[0]=%#ux (APIC %s)\n", 
		ioapic, lapic_svr[0], (lapic_svr[0] & 0x100) ? "enabled" : "disabled");
	
	for(d = pcidevs, i = 0; d != nil; d = d->next, i++){
		fprint(2, "[%d] bdf=%#ux viddid=%#ux class=%#ux irqno=%d irqactive=%d ctrl=%#ux\n",
			i, d->bdf, d->viddid, d->clrev >> 8, d->irqno, d->irqactive, d->ctrl);
		for(j = 0; j < 6; j++){
			b = &d->bar[j];
			if(b->length > 0)
				fprint(2, "    BAR%d: type=%#ux addr=%#ux len=%#ux io=%p\n",
					j, b->type, b->addr, b->length, b->io);
		}
	}
	fprint(2, "================\n");
}

PciShared *pcishared;

void
pcisharedinit(void)
{
    int fd;
    char buf[128];
    
	remove("#g/vmx.pci/ctl");
	remove("#g/vmx.pci");

    snprint(buf, sizeof(buf), "#g/vmx.pci");
    fd = create(buf, OREAD, DMDIR | 0777);
    if(fd >= 0) close(fd);
    
    snprint(buf, sizeof(buf), "#g/vmx.pci/ctl");
    fd = open(buf, OWRITE|OTRUNC);
    if(fd < 0) {
        fprint(2, "pcisharedinit: cannot open ctl: %r\n");
        return;
    }
    snprint(buf, sizeof(buf), "va 0x300002000 0x1000 sticky");
    write(fd, buf, strlen(buf));
    close(fd);
    
    pcishared = segattach(0, "vmx.pci", nil, 0x1000);
    if(pcishared == (void*)-1)
        sysfatal("segattach vmx.pci: %r");
    
    memset(pcishared, 0, sizeof(PciShared));
}

/*
 * Sync BAR state from shared memory and update local iobars list.
 * Must be called before scanning iobars to ensure we see changes made by other CPUs.
 */
void
pcisyncbars(void)
{
    PCIDev *d;
    int i, idx;
    u16int oldctrl;
    u32int oldaddr;
    
    if(pcishared == nil)
        return;
    
    lock(&pcishared->lock);
    for(d = pcidevs; d != nil; d = d->next){
        idx = d->sharedidx;
        if(idx < 0)
            continue;
        
        oldctrl = d->ctrl;
        d->ctrl = pcishared->dev[idx].ctrl;
        d->irqno = pcishared->dev[idx].irqno;
        
        for(i = 0; i < 6; i++){
            oldaddr = d->bar[i].addr;
            d->bar[i].addr = pcishared->dev[idx].bar_addr[i];
            
            /* If ctrl or addr changed, update the iobars list */
            if(d->ctrl != oldctrl || d->bar[i].addr != oldaddr)
                updatebar(&d->bar[i]);
        }
    }
    unlock(&pcishared->lock);
}