ref: e0e889fb69a6d4d3f2332244ef4f79f900a66d84
dir: /virtio.c/
#include <u.h>
#include <libc.h>
#include <thread.h>
#include "dat.h"
#include "fns.h"
#include <ip.h> /* parseether() */
#include <libsec.h> /* genrandom() */
static int local_devpipes[8][2] = {{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1}};
typedef struct VIODev VIODev;
typedef struct VIOQueue VIOQueue;
typedef struct VIOBuf VIOBuf;
typedef struct VIONetDev VIONetDev;
typedef struct VIOBlkDev VIOBlkDev;
enum {
BUFCHAIN = 1,
BUFWR = 2,
USEDNOIRQ = 1,
DRIVEROK = 4, /* devstat */
MAXVIODEV = 8,
};
struct VIOBuf {
u32int flags;
VIOQueue *qu;
void *p;
u64int addr;
u32int len;
u32int idx;
VIOBuf *next, *head;
u32int rptr, wptr;
};
struct VIOQueue {
Lock lk;
VIODev *d;
u8int (*desc)[16], *avail, *used;
u16int size;
u32int addr;
u16int availidx, usedidx;
void (*notify)(VIOQueue*);
int livebuf;
int qidx;
};
struct VIONetDev {
int readfd, writefd;
u8int mac[6];
enum {
VNETPROMISC = 1,
VNETALLMULTI = 2,
VNETALLUNI = 4,
VNETNOMULTI = 8,
VNETNOUNI = 16,
VNETNOBCAST = 32,
VNETHEADER = 1<<31,
} flags;
u64int macbloom, multibloom;
};
struct VIOBlkDev {
int fd;
uvlong size;
};
struct VIODev {
PCIDev *pci;
int isrstat ; /* per-CPU instead of single */
u32int devfeat, guestfeat;
u16int qsel;
u8int devstat ;
VIOQueue *qu;
int nqu, allocqu;
u32int (*io)(int, u16int, u32int, int, VIODev *);
void (*reset)(VIODev *);
int devidx;
Lock isrlk;
union {
VIONetDev net;
VIOBlkDev blk;
};
};
VIOShared *vioshared;
extern I8042 *i8042;
extern KbdShared *kbdshared;
extern UartShared *uartshared[2];
static int devpipes[MAXVIODEV][2];
static int ndevpipes;
/* Forward declarations */
static u32int vioblkio(int, u16int, u32int, int, VIODev *);
static u32int vionetio(int, u16int, u32int, int, VIODev *);
void *
vioalloc(ulong sz)
{
void *p;
sz = (sz + 7) & ~7;
if(vioshared->alloc + sz > sizeof(vioshared->data))
sysfatal("vioalloc: out of space");
p = &vioshared->data[vioshared->alloc];
vioshared->alloc += sz;
return p;
}
void
viosharedinit(void)
{
vioshared = mkseg("vio", 0x300004000, 0x10000);
memset(vioshared, 0, sizeof(VIOShared));
ndevpipes = 0;
/* Allocate shared keyboard buffer */
kbdshared = vioalloc(sizeof(KbdShared));
memset(kbdshared, 0, sizeof(KbdShared));
/* Allocate shared UART buffers */
uartshared[0] = vioalloc(sizeof(UartShared));
memset(uartshared[0], 0, sizeof(UartShared));
uartshared[1] = vioalloc(sizeof(UartShared));
memset(uartshared[1], 0, sizeof(UartShared));
/* Allocate shared i8042 controller */
i8042 = vioalloc(sizeof(I8042));
memset(i8042, 0, sizeof(I8042));
i8042->cfg = 0x47; // 0x74;
i8042->stat = 0x10;
i8042->oport = 0x03; // 0x01
i8042->cmd = -1;
extern UART *uart;
uart = vioalloc(sizeof(UART) * 2);
memset(uart, 0, sizeof(UART) * 2);
uart[0].lsr = 0x60;
uart[1].lsr = 0x60;
}
void
vionotifyinit(void)
{
}
static void
vioirq(VIODev *d, int val)
{
int idx = d->devidx;
lock(&d->isrlk);
if(val != 0)
d->isrstat |= val;
else
d->isrstat = 0;
unlock(&d->isrlk);
pciirq(d->pci, val != 0);
// pciirq(d->pci, val != 0);
}
static void *
checkdesc(VIOQueue *q, int i)
{
if(i >= q->size){
vmerror("virtio device %#x: invalid next pointer %d in queue (size %d), ignoring descriptor", q->d->pci->bdf, i, q->size);
return nil;
}
return q->desc[i];
}
static Lock bufpool_lock;
static VIOBuf *bufpool;
static VIOBuf *
viobuf_alloc(void)
{
VIOBuf *b;
lock(&bufpool_lock);
if(bufpool != nil){
b = bufpool;
bufpool = b->next;
unlock(&bufpool_lock);
memset(b, 0, sizeof(VIOBuf));
return b;
}
unlock(&bufpool_lock);
return emalloc(sizeof(VIOBuf));
}
static void
viobuf_free(VIOBuf *b)
{
lock(&bufpool_lock);
b->next = bufpool;
bufpool = b;
unlock(&bufpool_lock);
}
static VIOBuf *
viogetbuf(VIOQueue *q, int wait)
{
u16int gidx;
VIOBuf *b, *rb, **bp;
void *dp;
USED(wait);
lock(&q->lk);
if((q->d->devstat & DRIVEROK) == 0 || q->desc == nil){
unlock(&q->lk);
return nil;
}
gidx = GET16(q->avail, 2);
if(gidx == q->availidx){
unlock(&q->lk);
return nil;
}
dp = checkdesc(q, GET16(q->avail, 4 + 2 * (q->availidx % q->size)));
rb = nil;
bp = &rb;
for(;;){
b = viobuf_alloc();
b->qu = q;
b->idx = (u8int(*)[16])dp - q->desc;
b->addr = GET64(dp, 0);
b->len = GET32(dp, 8);
b->flags = GET16(dp, 12);
b->p = gptr(b->addr, b->len);
if(b->p == nil){
vmerror("virtio device %#x: invalid buffer pointer %#p in queue", q->d->pci->bdf, (void*)b->addr);
viobuf_free(b);
break;
}
*bp = b;
b->head = rb;
bp = &b->next;
if((b->flags & BUFCHAIN) == 0) break;
dp = checkdesc(q, GET16(dp, 14));
if(dp == nil) break;
}
q->availidx++;
if(rb == nil){
unlock(&q->lk);
return nil;
}
q->livebuf++;
unlock(&q->lk);
return rb;
}
void
vioputbuf(VIOBuf *b)
{
VIOBuf *bn;
VIOQueue *q;
u8int *p;
if(b == nil) return;
q = b->qu;
lock(&q->lk);
/* CRITICAL FIX: Always decrement livebuf, even during reset.
* viodevstatset() is waiting for livebuf == 0 to proceed with reset.
* Without this, reset hangs for 5 seconds (timeout). */
q->livebuf--;
if((q->d->devstat & DRIVEROK) == 0){
unlock(&q->lk);
goto end;
}
if(q->used == nil)
vmerror("virtio device %#x: address was set to an invalid value while holding buffer", q->d->pci->bdf);
else{
p = q->used + 4 + 8 * (q->usedidx % q->size);
PUT32(p, 4, b->wptr);
PUT32(p, 0, b->idx);
coherence();
PUT16(q->used, 2, ++q->usedidx);
coherence();
}
dprint("VIOPUTBUF: usedidx=%d id=%d len=%d avail_flags=%#x\n",
q->usedidx, b->idx, b->wptr, GET16(q->avail, 0));
int need_irq = (q->avail != nil && (GET16(q->avail, 0) & USEDNOIRQ) == 0);
unlock(&q->lk);
if(need_irq)
vioirq(q->d, 1);
end:
while(b != nil){
bn = b->next;
viobuf_free(b); // Instead of free()
b = bn;
}
}
ulong
vioqread(VIOBuf *b, void *v, ulong n)
{
VIOBuf *c;
u32int p;
int rc;
ulong m;
p = b->rptr;
c = b;
rc = 0;
for(;;){
if(rc >= n) return rc;
for(;;){
if(c == nil) return rc;
if((c->flags & BUFWR) == 0){
if(p < c->len) break;
p -= c->len;
}
c = c->next;
}
m = c->len - p;
if(m > n - rc) m = n - rc;
memmove(v, (u8int*)c->p + p, m);
p += m, rc += m;
v = (u8int*)v + m;
b->rptr += m;
}
}
ulong
vioqwrite(VIOBuf *b, void *v, ulong n)
{
VIOBuf *c;
u32int p;
int rc;
ulong m;
p = b->wptr;
c = b;
rc = 0;
for(;;){
if(rc >= n) return rc;
for(;;){
if(c == nil) return rc;
if((c->flags & BUFWR) != 0){
if(p < c->len) break;
p -= c->len;
}
c = c->next;
}
m = c->len - p;
if(m > n - rc) m = n - rc;
memmove((u8int*)c->p + p, v, m);
p += m, rc += m;
v = (u8int*)v + m;
b->wptr += m;
}
}
ulong
vioqrem(VIOBuf *b, int wr)
{
VIOBuf *c;
u32int p;
ulong rc;
p = wr ? b->wptr : b->rptr;
for(c = b;; c = c->next){
if(c == nil) return 0;
if(((c->flags & BUFWR) != 0) == wr){
if(p < c->len) break;
p -= c->len;
}
}
rc = c->len - p;
for(c = c->next; c != nil; c = c->next)
if(((c->flags & BUFWR) != 0) == wr)
rc += c->len;
return rc;
}
static int local_ackpipes[8][2]; /* worker completion */
static void
viowakeup(VIOQueue *q)
{
char c = 1;
int idx = q->d->devidx;
if(idx >= 0 && idx < 8 && vioshared->devpipes[idx][1] > 0){
write(vioshared->devpipes[idx][1], &c, 1);
//read(local_ackpipes[idx][0], &c, 1);
}
}
static void
vioqaddrset(VIOQueue *q, u64int addr)
{
void *p;
int sz1, sz;
addr <<= 12;
sz1 = -(-(18 * q->size + 4) & -4096);
sz = sz1 + (-(-(8 * q->size + 6) & -4096));
p = gptr(addr, sz);
if(p == nil)
vmerror("virtio device %#x: attempt to set queue to invalid address %#p", q->d->pci->bdf, (void *) addr);
lock(&q->lk);
q->addr = addr;
if(p == nil){
q->desc = nil;
q->avail = nil;
q->used = nil;
}else{
q->desc = p;
q->avail = (u8int*)p + 16 * q->size;
q->used = (u8int*)p + sz1;
}
coherence();
unlock(&q->lk);
viowakeup(q);
}
static void
vioqreset(VIOQueue *q)
{
q->desc = nil;
q->avail = nil;
q->used = nil;
q->addr = 0;
q->availidx = 0;
q->usedidx = 0;
}
static void
viodevstatset(VIODev *v, u32int val)
{
int i;
v->devstat = val;
coherence();
if(val == 0){
if(v->reset != nil)
v->reset(v);
v->guestfeat = 0;
vioirq(v, 0);
for(i = 0; i < v->nqu; i++){
int waited = 0; /* NEW: timeout counter */
lock(&v->qu[i].lk);
while(v->qu[i].livebuf > 0){
unlock(&v->qu[i].lk);
sleep(1);
waited++; /* NEW */
if(waited > 5000){ /* NEW: 5 second timeout */
vmerror("virtio device %#x: timeout waiting for "
"queue %d buffers (livebuf=%d)",
v->pci->bdf, i, v->qu[i].livebuf);
lock(&v->qu[i].lk);
break; /* Force reset anyway */
}
lock(&v->qu[i].lk);
}
vioqreset(&v->qu[i]);
unlock(&v->qu[i].lk);
}
}else{
for(i = 0; i < v->nqu; i++)
v->qu[i].notify(&v->qu[i]);
}
}
u32int
vioio(int isin, u16int port, u32int val, int sz, void *vp)
{
VIODev *v;
int rc;
static char whinebuf[32];
v = vp;
coherence();
switch(isin << 16 | port){
case 0x4: v->guestfeat = val; return 0;
case 0x8: if(v->qsel < v->nqu) vioqaddrset(&v->qu[v->qsel], val); return 0;
case 0xe: v->qsel = val; return 0;
case 0x10: if(val < v->nqu) v->qu[val].notify(&v->qu[val]); return 0;
case 0x12: viodevstatset(v, val); return 0;
case 0x10000: return v->devfeat;
case 0x10004: return v->guestfeat;
case 0x10008: return v->qsel >= v->nqu ? 0 : v->qu[v->qsel].addr >> 12;
case 0x1000c: return v->qsel >= v->nqu ? 0 : v->qu[v->qsel].size;
case 0x1000e: return v->qsel;
case 0x10010: return 0;
case 0x10012: return v->devstat;
case 0x10013: /* ISR read - atomic read and clear */
lock(&v->isrlk);
rc = v->isrstat;
v->isrstat = 0;
unlock(&v->isrlk);
pciirq(v->pci, 0);
dprint("VIRTIO: ISR read, was %#x\n", rc);
return rc;
}
if(port >= 20 && v->io != nil)
return v->io(isin, port - 20, val, sz, v);
snprint(whinebuf, sizeof(whinebuf), "virtio device %6x", v->pci->bdf);
return iowhine(isin, port, val, sz, whinebuf);
}
/*
* mkviodev_alloc - allocate VIODev structure only
*/
static VIODev *
mkviodev_alloc(int nqu)
{
VIODev *d;
int i, idx;
if(vioshared->ndevpipes >= MAXVIODEV)
sysfatal("mkviodev: too many virtio devices");
d = vioalloc(sizeof(VIODev));
memset(d, 0, sizeof(VIODev));
// Lock is already zeroed, which is fine for Plan 9 locks
d->allocqu = nqu;
d->qu = vioalloc(sizeof(VIOQueue) * nqu);
memset(d->qu, 0, sizeof(VIOQueue) * nqu);
for(i = 0; i < nqu; i++){
d->qu[i].d = d;
d->qu[i].qidx = i;
}
idx = vioshared->ndevpipes;
d->devidx = idx;
if(pipe(vioshared->devpipes[idx]) < 0)
sysfatal("mkviodev pipe: %r");
vioshared->ndevpipes++;
return d;
}
/*
* mkviodev_register - register device with PCI after all fields are set
*/
static void
mkviodev_register(VIODev *d, u16int devid, u32int subclass, u32int pciclass)
{
coherence();
d->pci = mkpcidev(allocbdf(), (devid << 16) | 0x1af4, subclass << 8, 1);
d->pci->subid = (pciclass << 16) | 0x1af4;
coherence();
mkpcibar(d->pci, BARIO, 0, 256, vioio, d);
}
static VIOBuf *
viogetbuf_smp(VIOQueue *q, int wait)
{
VIOBuf *b;
char c;
int idx = q->d->devidx;
for(;;) {
b = viogetbuf(q, 0);
if(b != nil)
return b;
if(!wait)
return nil;
if(idx >= 0 && idx < 8 && vioshared->devpipes[idx][0] > 0)
read(vioshared->devpipes[idx][0], &c, 1);
}
}
VIOQueue *
mkvioqueue(VIODev *d, int sz, void (*notify)(VIOQueue *))
{
VIOQueue *q;
if(d->nqu >= d->allocqu)
sysfatal("mkvioqueue: too many queues");
q = &d->qu[d->nqu++];
q->size = sz;
q->notify = notify;
q->d = d;
vioqreset(q);
return q;
}
int
bloomhash(u8int *mac)
{
int x;
x = mac[0];
x ^= mac[0] >> 6 ^ mac[1] << 2;
x ^= mac[1] >> 4 ^ mac[2] << 4;
x ^= mac[2] >> 2;
x ^= mac[3];
x ^= mac[3] >> 6 ^ mac[4] << 2;
x ^= mac[4] >> 4 ^ mac[5] << 4;
x ^= mac[5] >> 2;
return x & 63;
}
int
viomacok(VIODev *d, u8int *mac)
{
static u8int bcast[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
if((d->net.flags & VNETPROMISC) != 0) return 1;
if((mac[0] & 1) == 0){
if((d->net.flags & (VNETNOUNI|VNETALLUNI)) != 0)
return (d->net.flags & VNETNOUNI) == 0;
if(memcmp(mac, d->net.mac, 6) == 0) return 1;
if(d->net.macbloom == 0) return 0;
return d->net.macbloom >> bloomhash(mac) & 1;
}else if(memcmp(mac, bcast, 6) == 0)
return (d->net.flags & VNETNOBCAST) == 0;
else{
if((d->net.flags & (VNETNOMULTI|VNETALLMULTI)) != 0)
return (d->net.flags & VNETNOMULTI) == 0;
if(d->net.multibloom == 0) return 0;
return d->net.multibloom >> bloomhash(mac) & 1;
}
}
void
vionetrproc(void *vp)
{
VIODev *v;
VIOQueue *q;
VIOBuf *vb;
uchar rxhead[10];
uchar rxbuf[1600];
int rc;
threadsetname("vionetrproc");
v = vp;
q = &v->qu[0];
memset(rxhead, 0, sizeof(rxhead));
for(;;){
rc = read(v->net.readfd, rxbuf, sizeof(rxbuf));
if(rc == 0){
vmerror("read(vionetrproc): eof");
threadexits("read: eof");
}
if(rc < 0){
vmerror("read(vionetrproc): %r");
threadexits("read: %r");
}
if(rc < 14){
vmerror("vionetrproc: short packet received (len=%d)", rc);
continue;
}
if(!viomacok(v, rxbuf))
continue;
vb = viogetbuf_smp(q, 1);
if(vb == nil){
vmerror("viogetbuf: %r");
continue;
}
vioqwrite(vb, rxhead, sizeof(rxhead));
vioqwrite(vb, rxbuf, rc);
vioputbuf(vb);
}
}
void
vionetwproc(void *vp)
{
VIODev *v;
VIOQueue *q;
VIOBuf *vb;
uchar txhead[10];
uchar txbuf[1610];
int rc, len;
uvlong ns;
threadsetname("vionetwproc");
v = vp;
q = &v->qu[1];
for(;;){
vb = viogetbuf_smp(q, 1);
if(vb == nil){
vmerror("viogetbuf: %r");
threadexits("viogetbuf: %r");
}
vioqread(vb, txhead, sizeof(txhead));
len = vioqread(vb, txbuf+10, sizeof(txbuf)-10);
if(len == sizeof(txbuf)-10){
vmerror("virtio net: ignoring excessively long packet");
vioputbuf(vb);
continue;
}
if(len < 14){
if(len != 0)
vmerror("virtio net: ignoring short packet (length=%d)", len);
vioputbuf(vb);
continue;
}else if(len < 60){
memset(txbuf + 10 + len, 0, 60 - len);
len = 60;
}
if((v->net.flags & VNETHEADER) != 0){
txbuf[0] = len >> 8;
txbuf[1] = len;
ns = nanosec();
txbuf[2] = ns >> 56;
txbuf[3] = ns >> 48;
txbuf[4] = ns >> 40;
txbuf[5] = ns >> 32;
txbuf[6] = ns >> 24;
txbuf[7] = ns >> 16;
txbuf[8] = ns >> 8;
txbuf[9] = ns;
rc = write(v->net.writefd, txbuf, len + 10);
}else
rc = write(v->net.writefd, txbuf + 10, len);
vioputbuf(vb);
if(rc < 0){
vmerror("write(vionetwproc): %r");
continue;
}
if(rc < len){
vmerror("write(vionetwproc): incomplete write (%d < %d)", rc, len);
continue;
}
}
}
static u32int
vionetio(int isin, u16int port, u32int val, int sz, VIODev *v)
{
switch(isin << 16 | port){
case 0x10000: case 0x10001: case 0x10002: case 0x10003:
return GET32(v->net.mac, 0) >> (port & 3) * 8;
case 0x10004: case 0x10005: case 0x10006: case 0x10007:
return (GET16(v->net.mac, 4) | 1 << 16) >> (port & 3) * 8;
}
return iowhine(isin, port, val, sz, "virtio net");
}
int
vionettables(VIODev *d, VIOBuf *b)
{
u8int buf[4];
u8int mac[6];
u64int bloom[2];
int i, l;
bloom[0] = 0;
bloom[1] = 0;
for(i = 0; i < 2; i++){
if(vioqread(b, buf, 4) < 4)
return 1;
l = GET32(buf, 0);
while(l--){
if(vioqread(b, mac, 6) < 6)
return 1;
bloom[i] |= 1ULL<<bloomhash(mac);
}
}
d->net.macbloom = bloom[0];
d->net.multibloom = bloom[1];
return 0;
}
void
vionetcmd(VIOQueue *q)
{
VIODev *d;
VIOBuf *b;
u8int cmd[2], buf[6];
u8int ack;
int fl;
d = q->d;
for(; b = viogetbuf_smp(q, 0), b != nil; vioputbuf(b)){
if(vioqread(b, cmd, 2) < 2){
ack = 1;
vioqwrite(b, &ack, 1);
continue;
}
ack = 0;
switch(cmd[0] << 8 | cmd[1]){
case 0x0000: fl = VNETPROMISC; goto flag;
case 0x0001: fl = VNETALLMULTI; goto flag;
case 0x0002: fl = VNETALLUNI; goto flag;
case 0x0003: fl = VNETNOMULTI; goto flag;
case 0x0004: fl = VNETNOUNI; goto flag;
case 0x0005: fl = VNETNOBCAST; goto flag;
flag:
if(vioqread(b, buf, 1) < 1) ack = 1;
else if(buf[0] == 1) d->net.flags |= fl;
else if(buf[0] == 0) d->net.flags &= ~fl;
else ack = 1;
break;
case 0x0100:
ack = vionettables(d, b);
break;
case 0x0101:
if(vioqread(b, buf, 6) < 6) ack = 1;
else memmove(d->net.mac, buf, 6);
break;
default:
ack = 1;
}
vioqwrite(b, &ack, 1);
}
}
void
vionetreset(VIODev *d)
{
d->net.flags &= VNETHEADER;
d->net.macbloom = 0;
d->net.multibloom = 0;
}
int
mkvionet(char *net)
{
int fd, cfd;
VIODev *d;
char *ea;
int flags;
enum { VNETFILE = 1 };
ea = nil;
flags = 0;
for(;;){
if(strncmp(net, "hdr!", 4) == 0){
net += 4;
flags |= VNETHEADER;
}else if(strncmp(net, "file!", 5) == 0){
net += 5;
flags |= VNETFILE;
}else if(strncmp(net, "ea:", 3) == 0){
net = strchr(ea = net+3, '!');
if(net++ == nil){
werrstr("missing: !");
return -1;
}
}else
break;
}
if((flags & VNETFILE) != 0){
flags &= ~VNETFILE;
fd = open(net, ORDWR);
if(fd < 0) return -1;
}else{
fd = dial(netmkaddr("-1", net, nil), nil, nil, &cfd);
if(fd < 0) return -1;
if(cfd >= 0) {
write(cfd, "promiscuous", 11);
write(cfd, "bridge", 6);
}
}
d = mkviodev_alloc(3);
d->io = vionetio;
d->reset = vionetreset;
d->devfeat = 1<<5|1<<16|1<<17|1<<18|1<<20;
d->net.readfd = fd;
d->net.writefd = fd;
d->net.flags = flags;
if(ea == nil){
genrandom(d->net.mac, 6);
d->net.mac[0] = d->net.mac[0] & ~1 | 2;
}else{
if(parseether(d->net.mac, ea) != 0){
fprint(2, "unparsable mac addr: %s\n", ea);
return -1;
}
if((d->net.mac[0] & 1) != 0){
werrstr("invalid mac addr %s: must be unicast", ea);
return -1;
}
if((d->net.mac[0] & 2) == 0){
fprint(2, "invalid mac addr %s: must not be local", ea);
return -1;
}
}
mkvioqueue(d, 1024, viowakeup);
mkvioqueue(d, 1024, viowakeup);
mkvioqueue(d, 1024, vionetcmd);
mkviodev_register(d, 0x1000, 0x020000, 1);
vioshared->netdevs[vioshared->nnetdevs++] = d;
// proccreate(vionetrproc, d, 8192);
// proccreate(vionetwproc, d, 8192);
return 0;
}
static u32int
vioblkio(int isin, u16int port, u32int val, int sz, VIODev *v)
{
uvlong cap;
u32int sizemax, segmax, blksz;
coherence();
cap = v->blk.size;
sizemax = 0x400000; /* 4MB */
segmax = 128;
blksz = 512;
switch(isin << 16 | port){
/* capacity: 8 bytes at offset 0-7 */
case 0x10000: case 0x10001: case 0x10002: case 0x10003:
return (u32int)cap >> (port & 3) * 8;
case 0x10004: case 0x10005: case 0x10006: case 0x10007:
return (u32int)(cap >> 32) >> (port & 3) * 8;
/* size_max: 4 bytes at offset 8-11 */
case 0x10008: case 0x10009: case 0x1000a: case 0x1000b:
return sizemax >> ((port - 8) & 3) * 8;
/* seg_max: 4 bytes at offset 12-15 */
case 0x1000c: case 0x1000d: case 0x1000e: case 0x1000f:
return segmax >> ((port - 12) & 3) * 8;
/* geometry: 4 bytes at offset 16-19 */
case 0x10010: case 0x10011: case 0x10012: case 0x10013:
return 0;
/* blk_size: 4 bytes at offset 20-23 */
case 0x10014: case 0x10015: case 0x10016: case 0x10017:
return blksz >> ((port - 20) & 3) * 8;
}
return 0;
}
void
vioblkproc(void *vp)
{
VIODev *v;
VIOQueue *q;
VIOBuf *b;
u8int cmd[16];
u8int ack;
char buf[65536];
uvlong addr;
int rc, m;
ulong n;
vlong offset;
static Lock reqlock;
static uvlong writecount = 0;
static uvlong reqcount = 0;
static uvlong readcount = 0;
uvlong myreq, mywrite;
threadsetname("vioblkproc");
v = vp;
q = &v->qu[0];
dprint( "VIOBLK: started fd=%d size=%llud sectors (%llud bytes)\n",
v->blk.fd, v->blk.size, v->blk.size * 512ULL);
if(v->blk.size == 0)
fprint(2, "VIOBLK: WARNING: disk size is 0!\n");
int idx = v->devidx;
char c = 'w';
for(;;){
b = viogetbuf_smp(q, 1);
if(b == nil){
vmerror("vioblkproc: viogetbuf: %r");
threadexits("vioblkproc: viogetbuf: %r");
}
lock(&reqlock);
myreq = ++reqcount;
unlock(&reqlock);
ack = 0;
if(debug || myreq <= 5){
VIOBuf *tb;
int i = 0;
ulong total_rd = 0, total_wr = 0;
dprint("VIOBLK[%llud]: descriptor chain:\n", myreq);
for(tb = b; tb != nil; tb = tb->next, i++){
dprint(" desc[%d]: addr=%#llux len=%ud flags=%#ux",
i, tb->addr, tb->len, tb->flags);
if(tb->flags & BUFCHAIN) dprint(" NEXT");
if(tb->flags & BUFWR) {
dprint(" WRITE");
total_wr += tb->len;
} else {
total_rd += tb->len;
}
dprint("\n");
}
dprint(" totals: readable=%lud writable=%lud\n", total_rd, total_wr);
}
ulong hdr_read = vioqread(b, cmd, sizeof(cmd));
if(hdr_read < sizeof(cmd)){
dprint("VIOBLK[%llud]: ERROR: header read failed: got %lud, need 16\n", myreq, hdr_read);
goto nope;
}
u32int type = GET32(cmd, 0);
u32int reserved = GET32(cmd, 4);
addr = GET64(cmd, 8);
if(debug || myreq <= 5)
dprint("VIOBLK[%llud]: type=%ud reserved=%#ux sector=%llud rptr_after_hdr=%ud\n", myreq, type, reserved, addr, b->rptr);
switch(type){
case 0: /* READ */
lock(&reqlock);
readcount++;
unlock(&reqlock);
n = vioqrem(b, 1);
dprint("VIOBLK[%llud]: READ vioqrem(wr=1)=%lud\n", myreq, n);
if(n == 0){
dprint("VIOBLK[%llud]: ERROR: READ no writable space\n", myreq);
ack = 1;
break;
}
n -= 1;
dprint("VIOBLK[%llud]: READ %lud bytes from sector %llud\n", myreq, n, addr);
if(addr * 512 + n > v->blk.size * 512){
dprint("VIOBLK[%llud]: ERROR: READ bounds failed\n", myreq);
ack = 1;
break;
}
offset = addr << 9;
for(; n > 0; n -= rc){
rc = sizeof(buf);
if(n < rc) rc = n;
rc = pread(v->blk.fd, buf, rc, offset);
if(rc < 0){
dprint("VIOBLK[%llud]: ERROR: pread failed: %r\n", myreq);
ack = 1;
break;
}
if(rc == 0){
dprint("VIOBLK[%llud]: ERROR: pread EOF\n", myreq);
ack = 1;
break;
}
vioqwrite(b, buf, rc);
offset += rc;
}
break;
case 1: /* WRITE */
lock(&reqlock);
mywrite = ++writecount;
unlock(&reqlock);
n = vioqrem(b, 0);
if(debug || myreq <= 5)
dprint("VIOBLK[%llud]: WRITE #%llud vioqrem(wr=0)=%lud sector=%llud\n", myreq, mywrite, n, addr);
if(n == 0){
dprint("VIOBLK[%llud]: WARNING: WRITE with no data\n", myreq);
break;
}
if(addr * 512 + n > v->blk.size * 512){
dprint("VIOBLK[%llud]: ERROR: WRITE bounds failed\n", myreq);
ack = 1;
break;
}
offset = addr << 9;
for(; n > 0; n -= m){
m = vioqread(b, buf, sizeof(buf));
if(m <= 0)
break;
if(n < m) m = n;
rc = pwrite(v->blk.fd, buf, m, offset);
if(rc < 0){
dprint("VIOBLK[%llud]: ERROR: pwrite failed: %r\n", myreq);
ack = 1;
break;
}
if(rc < m){
dprint("VIOBLK[%llud]: ERROR: short write %d < %d\n", myreq, rc, m);
ack = 1;
break;
}
offset += m;
}
break;
case 4: /* FLUSH */
dprint("VIOBLK[%llud]: FLUSH\n", myreq);
ack = 0;
break;
case 8: /* GET_ID */
{
char serial[20];
memset(serial, 0, 20);
snprint(serial, 20, "vmx-vioblk");
vioqwrite(b, serial, 20);
ack = 0;
}
break;
default:
nope:
ack = 2;
}
vioqwrite(b, &ack, 1);
if(debug || myreq <= 5)
dprint("VIOBLK[%llud]: completing, ack=%d\n", myreq, ack);
if(myreq % 1000 == 0)
dprint("REQ %llud\n", myreq);
vioputbuf(b);
if(myreq % 1000 == 0)
dprint("VIOBLK: %llud requests (%llud reads, %llud writes)\n", myreq, readcount, writecount);
extern int wakepipe[MAXVCPU][2];
extern int hltpipe[MAXVCPU][2];
// vioirq(v, 1); /* set ISR here after all work done */
// write(local_ackpipes[idx][1], &c, 1); /* signal done */
// for(int i = 0; i < nvcpu; i++)
// write(wakepipe[i][1], &c, 1);
}
}
void
virtio_start_workers(void)
{
int i;
/* Create LOCAL pipes for this process */
for(i = 0; i < vioshared->nblkdevs; i++){
if(pipe(local_devpipes[i]) < 0)
sysfatal("virtio pipe: %r");
if(pipe(local_ackpipes[i]) < 0)
sysfatal("virtio pipe: %r");
proccreate(vioblkproc, vioshared->blkdevs[i], 131072);
}
for(i = 0; i < vioshared->nnetdevs; i++){
if(pipe(local_devpipes[vioshared->nblkdevs + i]) < 0)
sysfatal("virtio pipe: %r");
proccreate(vionetrproc, vioshared->netdevs[i], 8192);
proccreate(vionetwproc, vioshared->netdevs[i], 8192);
}
}
int
mkvioblk(char *fn)
{
int fd;
VIODev *d;
uvlong size;
Dir *dir;
fd = open(fn, ORDWR);
if(fd < 0){
fprint(2, "VIOBLK: ERROR: cannot open '%s': %r\n", fn);
return -1;
}
/* Verify file is writable */
dir = dirfstat(fd);
if(dir != nil){
free(dir);
}
/* Get size */
vlong filesize = seek(fd, 0, 2);
if(filesize < 0){
fprint(2, "VIOBLK: ERROR: seek to end failed: %r\n");
close(fd);
return -1;
}
size = filesize >> 9;
dprint("VIOBLK: opened '%s' fd=%d filesize=%lld size=%llud sectors\n",
fn, fd, filesize, size);
if(size == 0){
fprint(2, "VIOBLK: WARNING: disk size is 0 sectors!\n");
fprint(2, "VIOBLK: All I/O will fail bounds check. Is the file empty?\n");
}
/* Test write at offset 0 */
if(seek(fd, 0, 0) < 0){
fprint(2, "VIOBLK: ERROR: seek to 0 failed: %r\n");
}
d = mkviodev_alloc(1);
d->io = vioblkio;
d->devfeat = (1<<1) | (1<<2) | (1<<6); /* SIZE_MAX, SEG_MAX, BLK_SIZE */;
d->blk.fd = fd;
d->blk.size = size;
mkvioqueue(d, 1024, viowakeup);
mkviodev_register(d, 0x1001, 0x018000, 2);
vioshared->blkdevs[vioshared->nblkdevs++] = d;
// for(int i = 0; i < 1; i++)
// proccreate(vioblkproc, d, 131072);
return 0;
}