shithub: front

Download patch

ref: 3459c8cb8e7414366c96a2e11420b95877ab4b37
parent: 8b7a03c5e98c2763273bb994f15a21ca93c89479
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Fri Jan 10 09:32:50 EST 2025

allocb: implement buffer pools for ethernet drivers

Some ethernet drivers implemented their own buffer pools
using Block.free() callbacks.

Usually, these implementations have the defect that the
driver only allocates a fixed number of buffers total.

This upper bound is impossible to predict (depends
on protocol queue limits).

Also, allocation is not space efficient when large
alignments are needed.

This change removes the Block.free() callback and
replaces it with a common buffer pool implementation:

A Bpool struct, containing the block size and
alignment needed for the blocks. Block.pool
is non-nil when the block is from a pool.

growbp(Bpool*,int) supplies new blocks to a pool.

Allocation is done such that the data area and
Block headers are in separate allocations.

All the blocks share the same allocation for
data avoiding waste as not every block needs
to add alignment slack individually.

Block *iallocbp(Bpool*) allocates a block,
growing the pool if neccessary.

freeb(): returns the block back to the pool.

We also tweak the input queue sizes for devether,
making it twice as large as the transmit queue.

--- a/sys/src/9/bcm64/ethergenet.c
+++ b/sys/src/9/bcm64/ethergenet.c
@@ -242,6 +242,8 @@
 	Lock;
 	u32int	*regs;
 
+	Bpool	pool[1];
+
 	Desc	rd[256];
 	Desc	td[256];
 
@@ -315,9 +317,10 @@
 }
 
 static void
-replenish(Desc *d)
+replenish(Ctlr *c, Desc *d)
 {
-	d->b = allocb(Rbsz);
+	while((d->b = iallocbp(c->pool)) == nil)
+		resrcwait("out of genet rx buffers");
 	dmaflush(1, d->b->rp, Rbsz);
 	setdma(d, d->b->rp);
 }
@@ -355,7 +358,7 @@
 		b = d->b;
 		dmaflush(0, b->rp, Rbsz);
 		s = REG(d->d[0]);
-		replenish(d);
+		replenish(ctlr, d);
 		coherence();
 		ctlr->rx->rp = (ctlr->rx->rp + 1) & 0xFFFF;
 		REG(ctlr->rx->regs[RxRP]) = ctlr->rx->rp;
@@ -503,8 +506,15 @@
 {
 	int i;
 
+	if(ctlr->pool->size == 0){
+		ctlr->pool->size = Rbsz;
+		growbp(ctlr->pool, nelem(ctlr->rd)*4);
+	}
+
 	if(scratch == nil){
-		scratch = allocb(Rbsz);
+		scratch = iallocbp(ctlr->pool);
+		if(scratch == nil)
+			error("out of rx buffers");
 		memset(scratch->rp, 0xFF, Rbsz);
 		dmaflush(1, scratch->rp, Rbsz);
 	}
@@ -511,7 +521,7 @@
 
 	for(i = 0; i < nelem(ctlr->rd); i++){
 		ctlr->rd[i].d = &ctlr->regs[RdmaOffset + i*3];
-		replenish(&ctlr->rd[i]);
+		replenish(ctlr, &ctlr->rd[i]);
 	}
 
 	for(i = 0; i < nelem(ctlr->td); i++){
--- a/sys/src/9/imx8/etherimx.c
+++ b/sys/src/9/imx8/etherimx.c
@@ -216,6 +216,8 @@
 	u32int	*regs;
 	u32int	intmask;
 
+	Bpool	pool;
+
 	struct {
 		Block	*b[256];
 		Descr	*d;
@@ -441,7 +443,9 @@
 			etheriq(edev, b);
 
 			/* replenish */
-			b = allocb(R_BUF_SIZE);
+			while((b = iallocbp(&ctlr->pool)) == nil)
+				resrcwait("out of imx rx buffers");
+
 			ctlr->rx->b[i] = b;
 			dmaflush(1, b->rp, R_BUF_SIZE);
 			d->addr = PADDR(b->rp); 
@@ -569,11 +573,23 @@
 
 	if(ctlr->rx->d == nil)
 		ctlr->rx->d = ucalloc(sizeof(Descr) * nelem(ctlr->rx->b));
+
+	if(ctlr->pool.size == 0){
+		ctlr->pool.size = R_BUF_SIZE;
+		ctlr->pool.align = BLOCKALIGN;
+		growbp(&ctlr->pool, 4*nelem(ctlr->rx->b));
+	}
+
 	for(i=0; i<nelem(ctlr->rx->b); i++){
-		Block *b = allocb(R_BUF_SIZE);
-		ctlr->rx->b[i] = b;
-		d = &ctlr->rx->d[i];
+		Block *b = ctlr->rx->b[i];
+		if(b == nil){
+			b = iallocbp(&ctlr->pool);
+			if(b == nil)
+				error("out of rx buffers");
+			ctlr->rx->b[i] = b;
+		}
 		dmaflush(1, b->rp, R_BUF_SIZE);
+		d = &ctlr->rx->d[i];
 		d->addr = PADDR(b->rp);
 		d->status = RD_E;
 	}
--- a/sys/src/9/kw/ether1116.c
+++ b/sys/src/9/kw/ether1116.c
@@ -49,11 +49,6 @@
 typedef struct Rx Rx;
 typedef struct Tx Tx;
 
-static struct {
-	Lock;
-	Block	*head;
-} freeblocks;
-
 /* hardware receive buffer descriptor */
 struct Rx {
 	ulong	cs;
@@ -129,6 +124,8 @@
 	Lock	initlock;
 	int	init;
 
+	Bpool	pool;
+
 	Rx	*rx;		/* receive descriptors */
 	Block	*rxb[Nrx];	/* blocks belonging to the descriptors */
 	int	rxhead;		/* descr ethernet will write to next */
@@ -515,36 +512,6 @@
 static void getmibstats(Ctlr *);
 
 static void
-rxfreeb(Block *b)
-{
-	b->wp = b->rp =
-		(uchar*)((uintptr)(b->lim - Rxblklen) & ~(Bufalign - 1));
-	assert(((uintptr)b->rp & (Bufalign - 1)) == 0);
-	b->free = rxfreeb;
-
-	ilock(&freeblocks);
-	b->next = freeblocks.head;
-	freeblocks.head = b;
-	iunlock(&freeblocks);
-}
-
-static Block *
-rxallocb(void)
-{
-	Block *b;
-
-	ilock(&freeblocks);
-	b = freeblocks.head;
-	if(b != nil) {
-		freeblocks.head = b->next;
-		b->next = nil;
-		b->free = rxfreeb;
-	}
-	iunlock(&freeblocks);
-	return b;
-}
-
-static void
 rxkick(Ctlr *ctlr)
 {
 	Gbereg *reg = ctlr->reg;
@@ -575,12 +542,9 @@
 	Block *b;
 
 	while(ctlr->rxb[ctlr->rxtail] == nil) {
-		b = rxallocb();
-		if(b == nil) {
-			iprint("#l%d: rxreplenish out of buffers\n",
-				ctlr->ether->ctlrno);
+		b = iallocbp(&ctlr->pool);
+		if(b == nil)
 			break;
-		}
 
 		ctlr->rxb[ctlr->rxtail] = b;
 
@@ -1437,26 +1401,9 @@
 ctlralloc(Ctlr *ctlr)
 {
 	int i;
-	Block *b;
 	Rx *r;
 	Tx *t;
 
-	ilock(&freeblocks);
-	for(i = 0; i < Nrxblks; i++) {
-		b = iallocb(Rxblklen+Bufalign-1);
-		if(b == nil) {
-			iprint("ether1116: no memory for rx buffers\n");
-			break;
-		}
-		b->wp = b->rp = (uchar*)
-			((uintptr)(b->lim - Rxblklen) & ~(Bufalign - 1));
-		assert(((uintptr)b->rp & (Bufalign - 1)) == 0);
-		b->free = rxfreeb;
-		b->next = freeblocks.head;
-		freeblocks.head = b;
-	}
-	iunlock(&freeblocks);
-
 	/*
 	 * allocate uncached rx ring descriptors because rings are shared
 	 * with the ethernet controller and more than one fits in a cache line.
@@ -1473,6 +1420,12 @@
 		ctlr->rxb[i] = nil;
 	}
 	ctlr->rxtail = ctlr->rxhead = 0;
+
+	/* allocate private buffer pool */
+	ctlr->pool.size = Rxblklen;
+	ctlr->pool.align = Bufalign;
+	growbp(&ctlr->pool, Nrx*4);
+
 	rxreplenish(ctlr);
 
 	/* allocate uncached tx ring descriptors */
--- a/sys/src/9/pc/ether8169.c
+++ b/sys/src/9/pc/ether8169.c
@@ -306,6 +306,8 @@
 	int	rdt;			/* tail - consumer index (host) */
 	int	nrq;
 
+	Bpool	pool;
+
 	int	tcr;			/* transmit configuration register */
 	int	rcr;			/* receive configuration register */
 	int	imr;
@@ -646,11 +648,9 @@
 
 	x = ctlr->rdt;
 	while(NEXT(x, ctlr->nrd) != ctlr->rdh){
-		bp = iallocb(Mps);
-		if(bp == nil){
-			iprint("rtl8169: no available buffers\n");
+		bp = iallocbp(&ctlr->pool);
+		if(bp == nil)
 			break;
-		}
 		ctlr->rb[x] = bp;
 		ctlr->nrq++;
 		pa = PCIWADDR(bp->rp);
@@ -698,7 +698,10 @@
 			ctlr->rb[i] = nil;
 			freeb(bp);
 		}
-
+	if(ctlr->pool.size == 0){
+		ctlr->pool.size = Mps;
+		growbp(&ctlr->pool, ctlr->nrd*4);
+	}
 	rtl8169replenish(ctlr);
 
 	cplusc = csr16r(ctlr, Cplusc);
--- a/sys/src/9/pc/ether82543gc.c
+++ /dev/null
@@ -1,1377 +1,0 @@
-/*
- * Intel RS-82543GC Gigabit Ethernet Controller
- * as found on the Intel PRO/1000[FT] Server Adapter.
- * The older non-[FT] cards use the 82542 (LSI L2A1157) chip; no attempt
- * is made to handle the older chip although it should be possible.
- * The datasheet is not very clear about running on a big-endian system
- * and this driver assumes little-endian throughout.
- * To do:
- *	GMII/MII
- *	receive tuning
- *	transmit tuning
- */
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "io.h"
-#include "../port/pci.h"
-#include "../port/error.h"
-#include "../port/netif.h"
-#include "../port/etherif.h"
-
-enum {
-	Ctrl		= 0x00000000,	/* Device Control */
-	Status		= 0x00000008,	/* Device Status */
-	Eecd		= 0x00000010,	/* EEPROM/Flash Control/Data */
-	Ctrlext		= 0x00000018,	/* Extended Device Control */
-	Mdic		= 0x00000020,	/* MDI Control */
-	Fcal		= 0x00000028,	/* Flow Control Address Low */
-	Fcah		= 0x0000002C,	/* Flow Control Address High */
-	Fct		= 0x00000030,	/* Flow Control Type */
-	Icr		= 0x000000C0,	/* Interrupt Cause Read */
-	Ics		= 0x000000C8,	/* Interrupt Cause Set */
-	Ims		= 0x000000D0,	/* Interrupt Mask Set/Read */
-	Imc		= 0x000000D8,	/* Interrupt mask Clear */
-	Rctl		= 0x00000100,	/* Receive Control */
-	Fcttv		= 0x00000170,	/* Flow Control Transmit Timer Value */
-	Txcw		= 0x00000178,	/* Transmit configuration word reg. */
-	Rxcw		= 0x00000180,	/* Receive configuration word reg. */
-	Tctl		= 0x00000400,	/* Transmit Control */
-	Tipg		= 0x00000410,	/* Transmit IPG */
-	Tbt		= 0x00000448,	/* Transmit Burst Timer */
-	Ait		= 0x00000458,	/* Adaptive IFS Throttle */
-	Fcrtl		= 0x00002160,	/* Flow Control RX Threshold Low */
-	Fcrth		= 0x00002168,	/* Flow Control Rx Threshold High */
-	Rdfh		= 0x00002410,	/* Receive data fifo head */
-	Rdft		= 0x00002418,	/* Receive data fifo tail */
-	Rdfhs		= 0x00002420,	/* Receive data fifo head saved */
-	Rdfts		= 0x00002428,	/* Receive data fifo tail saved */
-	Rdfpc		= 0x00002430,	/* Receive data fifo packet count */
-	Rdbal		= 0x00002800,	/* Rdesc Base Address Low */
-	Rdbah		= 0x00002804,	/* Rdesc Base Address High */
-	Rdlen		= 0x00002808,	/* Receive Descriptor Length */
-	Rdh		= 0x00002810,	/* Receive Descriptor Head */
-	Rdt		= 0x00002818,	/* Receive Descriptor Tail */
-	Rdtr		= 0x00002820,	/* Receive Descriptor Timer Ring */
-	Rxdctl		= 0x00002828,	/* Receive Descriptor Control */
-	Txdmac		= 0x00003000,	/* Transfer DMA Control */
-	Ett		= 0x00003008,	/* Early Transmit Control */
-	Tdfh		= 0x00003410,	/* Transmit data fifo head */
-	Tdft		= 0x00003418,	/* Transmit data fifo tail */
-	Tdfhs		= 0x00003420,	/* Transmit data Fifo Head saved */
-	Tdfts		= 0x00003428,	/* Transmit data fifo tail saved */
-	Tdfpc		= 0x00003430,	/* Trasnmit data Fifo packet count */
-	Tdbal		= 0x00003800,	/* Tdesc Base Address Low */
-	Tdbah		= 0x00003804,	/* Tdesc Base Address High */
-	Tdlen		= 0x00003808,	/* Transmit Descriptor Length */
-	Tdh		= 0x00003810,	/* Transmit Descriptor Head */
-	Tdt		= 0x00003818,	/* Transmit Descriptor Tail */
-	Tidv		= 0x00003820,	/* Transmit Interrupt Delay Value */
-	Txdctl		= 0x00003828,	/* Transmit Descriptor Control */
-
-	Statistics	= 0x00004000,	/* Start of Statistics Area */
-	Gorcl		= 0x88/4,	/* Good Octets Received Count */
-	Gotcl		= 0x90/4,	/* Good Octets Transmitted Count */
-	Torl		= 0xC0/4,	/* Total Octets Received */
-	Totl		= 0xC8/4,	/* Total Octets Transmitted */
-	Nstatistics	= 64,
-
-	Rxcsum		= 0x00005000,	/* Receive Checksum Control */
-	Mta		= 0x00005200,	/* Multicast Table Array */
-	Ral		= 0x00005400,	/* Receive Address Low */
-	Rah		= 0x00005404,	/* Receive Address High */
-};
-
-enum {					/* Ctrl */
-	Bem		= 0x00000002,	/* Big Endian Mode */
-	Prior		= 0x00000004,	/* Priority on the PCI bus */
-	Lrst		= 0x00000008,	/* Link Reset */
-	Asde		= 0x00000020,	/* Auto-Speed Detection Enable */
-	Slu		= 0x00000040,	/* Set Link Up */
-	Ilos		= 0x00000080,	/* Invert Loss of Signal (LOS) */
-	Frcspd		= 0x00000800,	/* Force Speed */
-	Frcdplx		= 0x00001000,	/* Force Duplex */
-	Swdpinslo	= 0x003C0000,	/* Software Defined Pins - lo nibble */
-	Swdpin0		= 0x00040000,
-	Swdpin1		= 0x00080000,
-	Swdpin2		= 0x00100000,
-	Swdpin3		= 0x00200000,
-	Swdpiolo	= 0x03C00000,	/* Software Defined I/O Pins */
-	Swdpio0		= 0x00400000,
-	Swdpio1		= 0x00800000,
-	Swdpio2		= 0x01000000,
-	Swdpio3		= 0x02000000,
-	Devrst		= 0x04000000,	/* Device Reset */
-	Rfce		= 0x08000000,	/* Receive Flow Control Enable */
-	Tfce		= 0x10000000,	/* Transmit Flow Control Enable */
-	Vme		= 0x40000000,	/* VLAN Mode Enable */
-};
-
-enum {					/* Status */
-	Lu		= 0x00000002,	/* Link Up */
-	Tckok		= 0x00000004,	/* Transmit clock is running */
-	Rbcok		= 0x00000008,	/* Receive clock is running */
-	Txoff		= 0x00000010,	/* Transmission Paused */
-	Tbimode		= 0x00000020,	/* TBI Mode Indication */
-	SpeedMASK	= 0x000000C0,
-	Speed10		= 0x00000000,	/* 10Mb/s */
-	Speed100	= 0x00000040,	/* 100Mb/s */
-	Speed1000	= 0x00000080,	/* 1000Mb/s */
-	Mtxckok		= 0x00000400,	/* MTX clock is running */
-	Pci66		= 0x00000800,	/* PCI Bus speed indication */
-	Bus64		= 0x00001000,	/* PCI Bus width indication */
-};
-
-enum {					/* Ctrl and Status */
-	Fd		= 0x00000001,	/* Full-Duplex */
-	AsdvMASK	= 0x00000300,
-	Asdv10		= 0x00000000,	/* 10Mb/s */
-	Asdv100		= 0x00000100,	/* 100Mb/s */
-	Asdv1000	= 0x00000200,	/* 1000Mb/s */
-};
-
-enum {					/* Eecd */
-	Sk		= 0x00000001,	/* Clock input to the EEPROM */
-	Cs		= 0x00000002,	/* Chip Select */
-	Di		= 0x00000004,	/* Data Input to the EEPROM */
-	Do		= 0x00000008,	/* Data Output from the EEPROM */
-};
-
-enum {					/* Ctrlext */
-	Gpien		= 0x0000000F,	/* General Purpose Interrupt Enables */
-	Swdpinshi	= 0x000000F0,	/* Software Defined Pins - hi nibble */
-	Swdpiohi	= 0x00000F00,	/* Software Defined Pins - I or O */
-	Asdchk		= 0x00001000,	/* ASD Check */
-	Eerst		= 0x00002000,	/* EEPROM Reset */
-	Ips		= 0x00004000,	/* Invert Power State */
-	Spdbyps		= 0x00008000,	/* Speed Select Bypass */
-};
-
-enum {					/* EEPROM content offsets */
-	Ea		= 0x00,		/* Ethernet Address */
-	Cf		= 0x03,		/* Compatibility Field */
-	Pba		= 0x08,		/* Printed Board Assembly number */
-	Icw1		= 0x0A,		/* Initialization Control Word 1 */
-	Sid		= 0x0B,		/* Subsystem ID */
-	Svid		= 0x0C,		/* Subsystem Vendor ID */
-	Did		= 0x0D,		/* Device ID */
-	Vid		= 0x0E,		/* Vendor ID */
-	Icw2		= 0x0F,		/* Initialization Control Word 2 */
-};
-
-enum {					/* Mdic */
-	MDIdMASK	= 0x0000FFFF,	/* Data */
-	MDIdSHIFT	= 0,
-	MDIrMASK	= 0x001F0000,	/* PHY Register Address */
-	MDIrSHIFT	= 16,
-	MDIpMASK	= 0x03E00000,	/* PHY Address */
-	MDIpSHIFT	= 21,
-	MDIwop		= 0x04000000,	/* Write Operation */
-	MDIrop		= 0x08000000,	/* Read Operation */
-	MDIready	= 0x10000000,	/* End of Transaction */
-	MDIie		= 0x20000000,	/* Interrupt Enable */
-	MDIe		= 0x40000000,	/* Error */
-};
-
-enum {					/* Icr, Ics, Ims, Imc */
-	Txdw		= 0x00000001,	/* Transmit Descriptor Written Back */
-	Txqe		= 0x00000002,	/* Transmit Queue Empty */
-	Lsc		= 0x00000004,	/* Link Status Change */
-	Rxseq		= 0x00000008,	/* Receive Sequence Error */
-	Rxdmt0		= 0x00000010,	/* Rdesc Minimum Threshold Reached */
-	Rxo		= 0x00000040,	/* Receiver Overrun */
-	Rxt0		= 0x00000080,	/* Receiver Timer Interrupt */
-	Mdac		= 0x00000200,	/* MDIO Access Completed */
-	Rxcfg		= 0x00000400,	/* Receiving /C/ ordered sets */
-	Gpi0		= 0x00000800,	/* General Purpose Interrupts */
-	Gpi1		= 0x00001000,
-	Gpi2		= 0x00002000,
-	Gpi3		= 0x00004000,
-};
-
-enum {					/* Txcw */
-	Ane		= 0x80000000,	/* Autonegotiate enable */
-	Np		= 0x00008000,	/* Next Page */
-	As		= 0x00000100,	/* Asymmetric Flow control desired */
-	Ps		= 0x00000080,	/* Pause supported */
-	Hd		= 0x00000040,	/* Half duplex supported */
-	TxcwFd		= 0x00000020,	/* Full Duplex supported */
-};
-
-enum {					/* Rxcw */
-	Rxword		= 0x0000FFFF,	/* Data from auto-negotiation process */
-	Rxnocarrier	= 0x04000000,	/* Carrier Sense indication */
-	Rxinvalid	= 0x08000000,	/* Invalid Symbol during configuration */
-	Rxchange	= 0x10000000,	/* Change to the Rxword indication */
-	Rxconfig	= 0x20000000,	/* /C/ order set reception indication */
-	Rxsync		= 0x40000000,	/* Lost bit synchronization indication */
-	Anc		= 0x80000000,	/* Auto Negotiation Complete */
-};
-
-enum {					/* Rctl */
-	Rrst		= 0x00000001,	/* Receiver Software Reset */
-	Ren		= 0x00000002,	/* Receiver Enable */
-	Sbp		= 0x00000004,	/* Store Bad Packets */
-	Upe		= 0x00000008,	/* Unicast Promiscuous Enable */
-	Mpe		= 0x00000010,	/* Multicast Promiscuous Enable */
-	Lpe		= 0x00000020,	/* Long Packet Reception Enable */
-	LbmMASK		= 0x000000C0,	/* Loopback Mode */
-	LbmOFF		= 0x00000000,	/* No Loopback */
-	LbmTBI		= 0x00000040,	/* TBI Loopback */
-	LbmMII		= 0x00000080,	/* GMII/MII Loopback */
-	LbmXCVR		= 0x000000C0,	/* Transceiver Loopback */
-	RdtmsMASK	= 0x00000300,	/* Rdesc Minimum Threshold Size */
-	RdtmsHALF	= 0x00000000,	/* Threshold is 1/2 Rdlen */
-	RdtmsQUARTER	= 0x00000100,	/* Threshold is 1/4 Rdlen */
-	RdtmsEIGHTH	= 0x00000200,	/* Threshold is 1/8 Rdlen */
-	MoMASK		= 0x00003000,	/* Multicast Offset */
-	Bam		= 0x00008000,	/* Broadcast Accept Mode */
-	BsizeMASK	= 0x00030000,	/* Receive Buffer Size */
-	Bsize2048	= 0x00000000,	/* Bsex = 0 */
-	Bsize1024	= 0x00010000,	/* Bsex = 0 */
-	Bsize512	= 0x00020000,	/* Bsex = 0 */
-	Bsize256	= 0x00030000,	/* Bsex = 0 */
-	Bsize16384	= 0x00010000,	/* Bsex = 1 */
-	Vfe		= 0x00040000,	/* VLAN Filter Enable */
-	Cfien		= 0x00080000,	/* Canonical Form Indicator Enable */
-	Cfi		= 0x00100000,	/* Canonical Form Indicator value */
-	Dpf		= 0x00400000,	/* Discard Pause Frames */
-	Pmcf		= 0x00800000,	/* Pass MAC Control Frames */
-	Bsex		= 0x02000000,	/* Buffer Size Extension */
-	Secrc		= 0x04000000,	/* Strip CRC from incoming packet */
-};
-
-enum {					/* Tctl */
-	Trst		= 0x00000001,	/* Transmitter Software Reset */
-	Ten		= 0x00000002,	/* Transmit Enable */
-	Psp		= 0x00000008,	/* Pad Short Packets */
-	CtMASK		= 0x00000FF0,	/* Collision Threshold */
-	CtSHIFT		= 4,
-	ColdMASK	= 0x003FF000,	/* Collision Distance */
-	ColdSHIFT	= 12,
-	Swxoff		= 0x00400000,	/* Sofware XOFF Transmission */
-	Pbe		= 0x00800000,	/* Packet Burst Enable */
-	Rtlc		= 0x01000000,	/* Re-transmit on Late Collision */
-	Nrtu		= 0x02000000,	/* No Re-transmit on Underrrun */
-};
-
-enum {					/* [RT]xdctl */
-	PthreshMASK	= 0x0000003F,	/* Prefetch Threshold */
-	PthreshSHIFT	= 0,
-	HthreshMASK	= 0x00003F00,	/* Host Threshold */
-	HthreshSHIFT	= 8,
-	WthreshMASK	= 0x003F0000,	/* Writeback Threshold */
-	WthreshSHIFT	= 16,
-	Gran		= 0x00000000,	/* Granularity */
-	RxGran		= 0x01000000,	/* Granularity */
-};
-
-enum {					/* Rxcsum */
-	PcssMASK	= 0x000000FF,	/* Packet Checksum Start */
-	PcssSHIFT	= 0,
-	Ipofl		= 0x00000100,	/* IP Checksum Off-load Enable */
-	Tuofl		= 0x00000200,	/* TCP/UDP Checksum Off-load Enable */
-};
-
-enum {					/* Receive Delay Timer Ring */
-	Fpd		= 0x80000000,	/* Flush partial Descriptor Block */
-};
-
-typedef struct Rdesc {			/* Receive Descriptor */
-	uint	addr[2];
-	ushort	length;
-	ushort	checksum;
-	uchar	status;
-	uchar	errors;
-	ushort	special;
-} Rdesc;
-
-enum {					/* Rdesc status */
-	Rdd		= 0x01,		/* Descriptor Done */
-	Reop		= 0x02,		/* End of Packet */
-	Ixsm		= 0x04,		/* Ignore Checksum Indication */
-	Vp		= 0x08,		/* Packet is 802.1Q (matched VET) */
-	Tcpcs		= 0x20,		/* TCP Checksum Calculated on Packet */
-	Ipcs		= 0x40,		/* IP Checksum Calculated on Packet */
-	Pif		= 0x80,		/* Passed in-exact filter */
-};
-
-enum {					/* Rdesc errors */
-	Ce		= 0x01,		/* CRC Error or Alignment Error */
-	Se		= 0x02,		/* Symbol Error */
-	Seq		= 0x04,		/* Sequence Error */
-	Cxe		= 0x10,		/* Carrier Extension Error */
-	Tcpe		= 0x20,		/* TCP/UDP Checksum Error */
-	Ipe		= 0x40,		/* IP Checksum Error */
-	Rxe		= 0x80,		/* RX Data Error */
-};
-
-typedef struct Tdesc {			/* Legacy+Normal Transmit Descriptor */
-	uint	addr[2];
-	uint	control;		/* varies with descriptor type */
-	uint	status;			/* varies with descriptor type */
-} Tdesc;
-
-enum {					/* Tdesc control */
-	CsoMASK		= 0x00000F00,	/* Checksum Offset */
-	CsoSHIFT	= 16,
-	Teop		= 0x01000000,	/* End of Packet */
-	Ifcs		= 0x02000000,	/* Insert FCS */
-	Ic		= 0x04000000,	/* Insert Checksum (Dext == 0) */
-	Tse		= 0x04000000,	/* TCP Segmentaion Enable (Dext == 1) */
-	Rs		= 0x08000000,	/* Report Status */
-	Rps		= 0x10000000,	/* Report Status Sent */
-	Dext		= 0x20000000,	/* Extension (!legacy) */
-	Vle		= 0x40000000,	/* VLAN Packet Enable */
-	Ide		= 0x80000000,	/* Interrupt Delay Enable */
-};
-
-enum {					/* Tdesc status */
-	Tdd		= 0x00000001,	/* Descriptor Done */
-	Ec		= 0x00000002,	/* Excess Collisions */
-	Lc		= 0x00000004,	/* Late Collision */
-	Tu		= 0x00000008,	/* Transmit Underrun */
-	CssMASK		= 0x0000FF00,	/* Checksum Start Field */
-	CssSHIFT	= 8,
-};
-
-enum {
-	Nrdesc		= 256,		/* multiple of 8 */
-	Ntdesc		= 256,		/* multiple of 8 */
-	Nblocks		= 4098,		/* total number of blocks to use */
-
-	SBLOCKSIZE	= 2048,
-	JBLOCKSIZE	= 16384,
-
-	NORMAL		= 1,
-	JUMBO		= 2,
-};
-
-typedef struct Ctlr Ctlr;
-typedef struct Ctlr {
-	uvlong	port;
-	Pcidev*	pcidev;
-	Ctlr*	next;
-	int	active;
-	int	started;
-	int	id;
-	ushort	eeprom[0x40];
-
-	int*	nic;
-	int	im;			/* interrupt mask */
-
-	Lock	slock;
-	uint	statistics[Nstatistics];
-
-	Lock	rdlock;
-	Rdesc*	rdba;			/* receive descriptor base address */
-	Block*	rb[Nrdesc];		/* receive buffers */
-	int	rdh;			/* receive descriptor head */
-	int	rdt;			/* receive descriptor tail */
-	Block**	freehead;		/* points to long or short head */
-
-	Lock	tdlock;
-	Tdesc*	tdba;			/* transmit descriptor base address */
-	Block*	tb[Ntdesc];		/* transmit buffers */
-	int	tdh;			/* transmit descriptor head */
-	int	tdt;			/* transmit descriptor tail */
-	int	txstalled;		/* count of times unable to send */
-
-	int	txcw;
-	int	fcrtl;
-	int	fcrth;
-
-	ulong	multimask[128];		/* bit mask for multicast addresses */
-} Ctlr;
-
-static Ctlr* gc82543ctlrhead;
-static Ctlr* gc82543ctlrtail;
-
-static Lock freelistlock;
-static Block* freeShortHead;
-static Block* freeJumboHead;
-
-#define csr32r(c, r)	(*((c)->nic+((r)/4)))
-#define csr32w(c, r, v)	(*((c)->nic+((r)/4)) = (v))
-
-static void gc82543watchdog(void* arg);
-
-static void
-gc82543attach(Ether* edev)
-{
-	int ctl;
-	Ctlr *ctlr;
-	char name[KNAMELEN];
-
-	/*
-	 * To do here:
-	 *	one-time stuff;
-	 *		adjust queue length depending on speed;
-	 *		flow control.
-	 *	more needed here...
-	 */
-	ctlr = edev->ctlr;
-	lock(&ctlr->slock);
-	if(ctlr->started){
-		unlock(&ctlr->slock);
-		return;
-	}
-
-	ctl = csr32r(ctlr, Rctl)|Ren;
-	csr32w(ctlr, Rctl, ctl);
-	ctl = csr32r(ctlr, Tctl)|Ten;
-	csr32w(ctlr, Tctl, ctl);
-
-	csr32w(ctlr, Ims, ctlr->im);
-
-	ctlr->started = 1;
-	unlock(&ctlr->slock);
-
-	snprint(name, KNAMELEN, "#l%d82543", edev->ctlrno);
-	kproc(name, gc82543watchdog, edev);
-}
-
-static char* statistics[Nstatistics] = {
-	"CRC Error",
-	"Alignment Error",
-	"Symbol Error",
-	"RX Error",
-	"Missed Packets",
-	"Single Collision",
-	"Excessive Collisions",
-	"Multiple Collision",
-	"Late Collisions",
-	nil,
-	"Collision",
-	"Transmit Underrun",
-	"Defer",
-	"Transmit - No CRS",
-	"Sequence Error",
-	"Carrier Extension Error",
-	"Receive Error Length",
-	nil,
-	"XON Received",
-	"XON Transmitted",
-	"XOFF Received",
-	"XOFF Transmitted",
-	"FC Received Unsupported",
-	"Packets Received (64 Bytes)",
-	"Packets Received (65-127 Bytes)",
-	"Packets Received (128-255 Bytes)",
-	"Packets Received (256-511 Bytes)",
-	"Packets Received (512-1023 Bytes)",
-	"Packets Received (1024-1522 Bytes)",
-	"Good Packets Received",
-	"Broadcast Packets Received",
-	"Multicast Packets Received",
-	"Good Packets Transmitted",
-	nil,
-	"Good Octets Received",
-	nil,
-	"Good Octets Transmitted",
-	nil,
-	nil,
-	nil,
-	"Receive No Buffers",
-	"Receive Undersize",
-	"Receive Fragment",
-	"Receive Oversize",
-	"Receive Jabber",
-	nil,
-	nil,
-	nil,
-	"Total Octets Received",
-	nil,
-	"Total Octets Transmitted",
-	nil,
-	"Total Packets Received",
-	"Total Packets Transmitted",
-	"Packets Transmitted (64 Bytes)",
-	"Packets Transmitted (65-127 Bytes)",
-	"Packets Transmitted (128-255 Bytes)",
-	"Packets Transmitted (256-511 Bytes)",
-	"Packets Transmitted (512-1023 Bytes)",
-	"Packets Transmitted (1024-1522 Bytes)",
-	"Multicast Packets Transmitted",
-	"Broadcast Packets Transmitted",
-	"TCP Segmentation Context Transmitted",
-	"TCP Segmentation Context Fail",
-};
-
-static char*
-gc82543ifstat(void *arg, char *p, char *e)
-{
-	Ether *edev;
-	Ctlr *ctlr;
-	char *s;
-	int i, r;
-	uvlong tuvl, ruvl;
-
-	if(p >= e)
-		return p;
-
-	edev = arg;
-	ctlr = edev->ctlr;
-	lock(&ctlr->slock);
-	for(i = 0; i < Nstatistics; i++){
-		r = csr32r(ctlr, Statistics+i*4);
-		if((s = statistics[i]) == nil)
-			continue;
-		switch(i){
-		case Gorcl:
-		case Gotcl:
-		case Torl:
-		case Totl:
-			ruvl = r;
-			ruvl += ((uvlong)csr32r(ctlr, Statistics+(i+1)*4))<<32;
-			tuvl = ruvl;
-			tuvl += ctlr->statistics[i];
-			tuvl += ((uvlong)ctlr->statistics[i+1])<<32;
-			if(tuvl == 0)
-				continue;
-			ctlr->statistics[i] = tuvl;
-			ctlr->statistics[i+1] = tuvl>>32;
-			p = seprint(p, e, "%s: %llud %llud\n",
-				s, tuvl, ruvl);
-			i++;
-			break;
-
-		default:
-			ctlr->statistics[i] += r;
-			if(ctlr->statistics[i] == 0)
-				continue;
-			p = seprint(p, e, "%s: %ud %ud\n",
-				s, ctlr->statistics[i], r);
-			break;
-		}
-	}
-
-	p = seprint(p, e, "eeprom:");
-	for(i = 0; i < 0x40; i++){
-		if(i && ((i & 0x07) == 0))
-			p = seprint(p, e, "\n       ");
-		p = seprint(p, e, " %4.4uX", ctlr->eeprom[i]);
-	}
-
-	p = seprint(p, e, "\ntxstalled %d\n", ctlr->txstalled);
-	unlock(&ctlr->slock);
-
-	return p;
-}
-
-static void
-gc82543promiscuous(void* arg, int on)
-{
-	int rctl;
-	Ctlr *ctlr;
-	Ether *edev;
-
-	edev = arg;
-	ctlr = edev->ctlr;
-
-	rctl = csr32r(ctlr, Rctl);
-	rctl &= ~MoMASK;		/* make sure we're using bits 47:36 */
-	if(on)
-		rctl |= Upe|Mpe;
-	else
-		rctl &= ~(Upe|Mpe);
-	csr32w(ctlr, Rctl, rctl);
-}
-
-static void
-gc82543multicast(void* arg, uchar* addr, int on)
-{
-	int bit, x;
-	Ctlr *ctlr;
-	Ether *edev;
-
-	edev = arg;
-	ctlr = edev->ctlr;
-	x = addr[5]>>1;
-	bit = ((addr[5] & 1)<<4)|(addr[4]>>4);
-	if(on)
-		ctlr->multimask[x] |= 1<<bit;
-	else
-		ctlr->multimask[x] &= ~(1<<bit);
-	
-	csr32w(ctlr, Mta+x*4, ctlr->multimask[x]);
-}
-
-static long
-gc82543ctl(Ether* edev, void* buf, long n)
-{
-	Cmdbuf *cb;
-	Ctlr *ctlr;
-	int ctrl, i, r;
-
-	ctlr = edev->ctlr;
-	if(ctlr == nil)
-		error(Enonexist);
-
-	lock(&ctlr->slock);
-	r = 0;
-	cb = parsecmd(buf, n);
-	if(cb->nf < 2)
-		r = -1;
-	else if(cistrcmp(cb->f[0], "auto") == 0){
-		ctrl = csr32r(ctlr, Ctrl);
-		if(cistrcmp(cb->f[1], "off") == 0){
-			csr32w(ctlr, Txcw, ctlr->txcw & ~Ane);
-			ctrl |= (Slu|Fd);
-			if(ctlr->txcw & As)
-				ctrl |= Rfce;
-			if(ctlr->txcw & Ps)
-				ctrl |= Tfce;
-			csr32w(ctlr, Ctrl, ctrl);
-		}
-		else if(cistrcmp(cb->f[1], "on") == 0){
-			csr32w(ctlr, Txcw, ctlr->txcw);
-			ctrl &= ~(Slu|Fd);
-			csr32w(ctlr, Ctrl, ctrl);
-		}
-		else
-			r = -1;
-	}
-	else if(cistrcmp(cb->f[0], "clear") == 0){
-		if(cistrcmp(cb->f[1], "stats") == 0){
-			for(i = 0; i < Nstatistics; i++)
-				ctlr->statistics[i] = 0;
-		}
-		else
-			r = -1;
-	}
-	else
-		r = -1;
-	unlock(&ctlr->slock);
-
-	free(cb);
-	return (r == 0) ? n : r;
-}
-
-static void
-gc82543txinit(Ctlr* ctlr)
-{
-	int i;
-	int tdsize;
-	Block *bp, **bpp;
-
-	tdsize = ROUND(Ntdesc*sizeof(Tdesc), 4096);
-
-	if(ctlr->tdba == nil)
-		ctlr->tdba = xspanalloc(tdsize, 32, 0);
-
-	for(i = 0; i < Ntdesc; i++){
-		bpp = &ctlr->tb[i];
-		bp = *bpp;
-		if(bp != nil){
-			*bpp = nil;
-			freeb(bp);
-		}
-		memset(&ctlr->tdba[i], 0, sizeof(Tdesc));
-	}
-
-	csr32w(ctlr, Tdbal, PCIWADDR(ctlr->tdba));
-	csr32w(ctlr, Tdbah, 0);
-	csr32w(ctlr, Tdlen, Ntdesc*sizeof(Tdesc));
-
-	/*
-	 * set the ring head and tail pointers.
-	 */
-	ctlr->tdh = 0;
-	csr32w(ctlr, Tdh, ctlr->tdh);
-	ctlr->tdt = 0;
-	csr32w(ctlr, Tdt, ctlr->tdt);
-
-	csr32w(ctlr, Tipg, (6<<20)|(8<<10)|6);
-	csr32w(ctlr, Tidv, 128);
-	csr32w(ctlr, Ait, 0);
-	csr32w(ctlr, Txdmac, 0);
-	csr32w(ctlr, Txdctl, Gran|(4<<WthreshSHIFT)|(1<<HthreshSHIFT)|16);
-	csr32w(ctlr, Tctl, (0x0F<<CtSHIFT)|Psp|(6<<ColdSHIFT));
-
-	ctlr->im |= Txdw;
-}
-
-static void
-gc82543transmit(Ether* edev)
-{
-	Block *bp, **bpp;
-	Ctlr *ctlr;
-	Tdesc *tdesc;
-	int tdh, tdt, s;
-
-	ctlr = edev->ctlr;
-
-	ilock(&ctlr->tdlock);
-	tdh = ctlr->tdh;
-	for(;;){
-		/*
-		 * Free any completed packets
-		 */
-		tdesc = &ctlr->tdba[tdh];
-		if(!(tdesc->status & Tdd))
-			break;
-		memset(tdesc, 0, sizeof(Tdesc));
-		bpp = &ctlr->tb[tdh];
-		bp = *bpp;
-		if(bp != nil){
-			*bpp = nil;
-			freeb(bp);
-		}
-		tdh = NEXT(tdh, Ntdesc);
-	}
-	ctlr->tdh = tdh;
-	s = csr32r(ctlr, Status);
-
-	/*
-	 * Try to fill the ring back up
-	 * but only if link is up and transmission isn't paused.
-	 */
-	if((s & (Txoff|Lu)) == Lu){
-		tdt = ctlr->tdt;
-		while(NEXT(tdt, Ntdesc) != tdh){
-			if((bp = qget(edev->oq)) == nil)
-				break;
-
-			tdesc = &ctlr->tdba[tdt];
-			tdesc->addr[0] = PCIWADDR(bp->rp);
-			tdesc->control = Ide|Rs|Ifcs|Teop|BLEN(bp);
-			ctlr->tb[tdt] = bp;
-			tdt = NEXT(tdt, Ntdesc);
-		}
-
-		if(tdt != ctlr->tdt){
-			ctlr->tdt = tdt;
-			csr32w(ctlr, Tdt, tdt);
-		}
-	}
-	else
-		ctlr->txstalled++;
-
-	iunlock(&ctlr->tdlock);
-}
-
-static Block *
-gc82543allocb(Ctlr* ctlr)
-{
-	Block *bp;
-
-	ilock(&freelistlock);
-	if((bp = *(ctlr->freehead)) != nil){
-		*(ctlr->freehead) = bp->next;
-		bp->next = nil;
-	}
-	iunlock(&freelistlock);
-	return bp;
-}
-
-static void
-gc82543replenish(Ctlr* ctlr)
-{
-	int rdt;
-	Block *bp;
-	Rdesc *rdesc;
-
-	ilock(&ctlr->rdlock);
-	rdt = ctlr->rdt;
-	while(NEXT(rdt, Nrdesc) != ctlr->rdh){
-		rdesc = &ctlr->rdba[rdt];
-		if(ctlr->rb[rdt] == nil){
-			bp = gc82543allocb(ctlr);
-			if(bp == nil){
-				iprint("no available buffers\n");
-				break;
-			}
-			ctlr->rb[rdt] = bp;
-			rdesc->addr[0] = PCIWADDR(bp->rp);
-			rdesc->addr[1] = 0;
-		}
-		coherence();
-		rdesc->status = 0;
-		rdt = NEXT(rdt, Nrdesc);
-	}
-	ctlr->rdt = rdt;
-	csr32w(ctlr, Rdt, rdt);
-	iunlock(&ctlr->rdlock);
-}
-
-static void
-gc82543rxinit(Ctlr* ctlr)
-{
-	int rdsize, i;
-
-	csr32w(ctlr, Rctl, Dpf|Bsize2048|Bam|RdtmsHALF);
-
-	/*
-	 * Allocate the descriptor ring and load its
-	 * address and length into the NIC.
-	 */
-	rdsize = ROUND(Nrdesc*sizeof(Rdesc), 4096);
-	if(ctlr->rdba == nil)
-		ctlr->rdba = xspanalloc(rdsize, 32, 0);
-	memset(ctlr->rdba, 0, rdsize);
-
-	ctlr->rdh = 0;
-	ctlr->rdt = 0;
-
-	csr32w(ctlr, Rdtr, Fpd|64);
-	csr32w(ctlr, Rdbal, PCIWADDR(ctlr->rdba));
-	csr32w(ctlr, Rdbah, 0);
-	csr32w(ctlr, Rdlen, Nrdesc*sizeof(Rdesc));
-	csr32w(ctlr, Rdh, 0);
-	csr32w(ctlr, Rdt, 0);
-	for(i = 0; i < Nrdesc; i++){
-		if(ctlr->rb[i] != nil){
-			freeb(ctlr->rb[i]);
-			ctlr->rb[i] = nil;
-		}
-	}
-	gc82543replenish(ctlr);
-
-	csr32w(ctlr, Rxdctl, RxGran|(8<<WthreshSHIFT)|(4<<HthreshSHIFT)|1);
-	ctlr->im |= Rxt0|Rxo|Rxdmt0|Rxseq;
-}
-
-static void
-gc82543recv(Ether* edev, int icr)
-{
-	Block *bp;
-	Ctlr *ctlr;
-	Rdesc *rdesc;
-	int rdh;
-
-	ctlr = edev->ctlr;
-
-	rdh = ctlr->rdh;
-	for(;;){
-		rdesc = &ctlr->rdba[rdh];
-
-		if(!(rdesc->status & Rdd))
-			break;
-
-		if((rdesc->status & Reop) && rdesc->errors == 0){
-			bp = ctlr->rb[rdh];
-			ctlr->rb[rdh] = nil;
-			bp->wp += rdesc->length;
-			bp->next = nil;
-			etheriq(edev, bp);
-		}
-
-		if(ctlr->rb[rdh] != nil){
-			/* either non eop packet, or error */
-			freeb(ctlr->rb[rdh]);
-			ctlr->rb[rdh] = nil;
-		}
-		memset(rdesc, 0, sizeof(Rdesc));
-		coherence();
-		rdh = NEXT(rdh, Nrdesc);
-	}
-	ctlr->rdh = rdh;
-
-	if(icr & Rxdmt0)
-		gc82543replenish(ctlr);
-}
-
-static void
-freegc82543short(Block *bp)
-{
-	ilock(&freelistlock);
-	/* reset read/write pointer to proper positions */
-	bp->rp = bp->lim - ROUND(SBLOCKSIZE, BLOCKALIGN);
-	bp->wp = bp->rp;
-	bp->next = freeShortHead;
-	freeShortHead = bp;
-	iunlock(&freelistlock);
-}
-
-static void
-freegc82532jumbo(Block *bp)
-{
-	ilock(&freelistlock);
-	/* reset read/write pointer to proper positions */
-	bp->rp = bp->lim - ROUND(JBLOCKSIZE, BLOCKALIGN);
-	bp->wp = bp->rp;
-	bp->next = freeJumboHead;
-	freeJumboHead = bp;
-	iunlock(&freelistlock);
-}
-
-static void
-linkintr(Ctlr* ctlr)
-{
-	int ctrl;
-
-	ctrl = csr32r(ctlr, Ctrl);
-
-	if((ctrl & Swdpin1) ||
-	  ((csr32r(ctlr, Rxcw) & Rxconfig) && !(csr32r(ctlr, Txcw) & Ane))){
- 		csr32w(ctlr, Txcw, ctlr->txcw);
-		ctrl &= ~(Slu|Fd|Frcdplx);
-		csr32w(ctlr, Ctrl, ctrl);
-	}
-}
-
-static void
-gc82543interrupt(Ureg*, void* arg)
-{
-	Ctlr *ctlr;
-	Ether *edev;
-	int icr;
-
-	edev = arg;
-	ctlr = edev->ctlr;
-
-	while((icr = csr32r(ctlr, Icr) & ctlr->im) != 0){
-		/*
-		 * Link status changed.
-		 */
-		if(icr & (Lsc|Rxseq))
-			linkintr(ctlr);
-
-		/*
-		 * Process recv buffers.
-		 */
-		gc82543recv(edev, icr);
-
-		/*
-		 * Refill transmit ring and free packets.
-		 */
-		gc82543transmit(edev);
-	}
-}
-
-static int
-gc82543init(Ether* edev)
-{
-	int csr, i;
-	Block *bp;
-	Ctlr *ctlr;
-
-	ctlr = edev->ctlr;
-
-	/*
-	 * Allocate private buffer pool to use for receiving packets.
-	 */
-	ilock(&freelistlock);
-	if (ctlr->freehead == nil){
-		for(i = 0; i < Nblocks; i++){
-			bp = iallocb(SBLOCKSIZE);
-			if(bp != nil){
-				bp->next = freeShortHead;
-				bp->free = freegc82543short;
-				freeShortHead = bp;
-			}
-			else{
-				print("82543gc: no memory\n");
-				break;
-			}
-		}
-		ctlr->freehead = &freeShortHead;
-	}
-	iunlock(&freelistlock);
-
-	/*
-	 * Set up the receive addresses.
-	 * There are 16 addresses. The first should be the MAC address.
-	 * The others are cleared and not marked valid (MS bit of Rah).
-	 */
-	csr = (edev->ea[3]<<24)|(edev->ea[2]<<16)|(edev->ea[1]<<8)|edev->ea[0];
-	csr32w(ctlr, Ral, csr);
-	csr = 0x80000000|(edev->ea[5]<<8)|edev->ea[4];
-	csr32w(ctlr, Rah, csr);
-	for(i = 1; i < 16; i++){
-		csr32w(ctlr, Ral+i*8, 0);
-		csr32w(ctlr, Rah+i*8, 0);
-	}
-
-	/*
-	 * Clear the Multicast Table Array.
-	 * It's a 4096 bit vector accessed as 128 32-bit registers.
-	 */
-	for(i = 0; i < 128; i++)
-		csr32w(ctlr, Mta+i*4, 0);
-
-	gc82543txinit(ctlr);
-	gc82543rxinit(ctlr);
-
-	return 0;
-}
-
-static int
-at93c46io(Ctlr* ctlr, char* op, int data)
-{
-	char *lp, *p;
-	int i, loop, eecd, r;
-
-	eecd = csr32r(ctlr, Eecd);
-
-	r = 0;
-	loop = -1;
-	lp = nil;
-	for(p = op; *p != '\0'; p++){
-		switch(*p){
-		default:
-			return -1;
-		case ' ':
-			continue;
-		case ':':			/* start of loop */
-			if(lp != nil){
-				if(p != (lp+1) || loop != 7)
-					return -1;
-				lp = p;
-				loop = 15;
-				continue;
-			}
-			lp = p;
-			loop = 7;
-			continue;
-		case ';':			/* end of loop */
-			if(lp == nil)
-				return -1;
-			loop--;
-			if(loop >= 0)
-				p = lp;
-			else
-				lp = nil;
-			continue;
-		case 'C':			/* assert clock */
-			eecd |= Sk;
-			break;
-		case 'c':			/* deassert clock */
-			eecd &= ~Sk;
-			break;
-		case 'D':			/* next bit in 'data' byte */
-			if(loop < 0)
-				return -1;
-			if(data & (1<<loop))
-				eecd |= Di;
-			else
-				eecd &= ~Di;
-			break;
-		case 'O':			/* collect data output */
-			i = (csr32r(ctlr, Eecd) & Do) != 0;
-			if(loop >= 0)
-				r |= (i<<loop);
-			else
-				r = i;
-			continue;
-		case 'I':			/* assert data input */
-			eecd |= Di;
-			break;
-		case 'i':			/* deassert data input */
-			eecd &= ~Di;
-			break;
-		case 'S':			/* enable chip select */
-			eecd |= Cs;
-			break;
-		case 's':			/* disable chip select */
-			eecd &= ~Cs;
-			break;
-		}
-		csr32w(ctlr, Eecd, eecd);
-		microdelay(1);
-	}
-	if(loop >= 0)
-		return -1;
-	return r;
-}
-
-static int
-at93c46r(Ctlr* ctlr)
-{
-	ushort sum;
-	int addr, data;
-
-	sum = 0;
-	for(addr = 0; addr < 0x40; addr++){
-		/*
-		 * Read a word at address 'addr' from the Atmel AT93C46
-		 * 3-Wire Serial EEPROM or compatible. The EEPROM access is
-		 * controlled by 4 bits in Eecd. See the AT93C46 datasheet
-		 * for protocol details.
-		 */
-		if(at93c46io(ctlr, "S ICc :DCc;", (0x02<<6)|addr) != 0)
-			break;
-		data = at93c46io(ctlr, "::COc;", 0);
-		at93c46io(ctlr, "sic", 0);
-		ctlr->eeprom[addr] = data;
-		sum += data;
-	}
-
-	return sum;
-}
-
-static void
-gc82543detach(Ctlr* ctlr)
-{
-	/*
-	 * Perform a device reset to get the chip back to the
-	 * power-on state, followed by an EEPROM reset to read
-	 * the defaults for some internal registers.
-	 */
-	csr32w(ctlr, Imc, ~0);
-	csr32w(ctlr, Rctl, 0);
-	csr32w(ctlr, Tctl, 0);
-
-	delay(10);
-
-	csr32w(ctlr, Ctrl, Devrst);
-	while(csr32r(ctlr, Ctrl) & Devrst)
-		;
-
-	csr32w(ctlr, Ctrlext, Eerst);
-	while(csr32r(ctlr, Ctrlext) & Eerst)
-		;
-
-	csr32w(ctlr, Imc, ~0);
-	while(csr32r(ctlr, Icr))
-		;
-}
-
-static void
-gc82543checklink(Ctlr* ctlr)
-{
-	int ctrl, status, rxcw;
-
-	ctrl = csr32r(ctlr, Ctrl);
-	status = csr32r(ctlr, Status);
-	rxcw = csr32r(ctlr, Rxcw);
-
-	if(!(status & Lu)){
-		if(!(ctrl & (Swdpin1|Slu)) && !(rxcw & Rxconfig)){
-			csr32w(ctlr, Txcw, ctlr->txcw & ~Ane);
-			ctrl |= (Slu|Fd);
-			if(ctlr->txcw & As)
-				ctrl |= Rfce;
-			if(ctlr->txcw & Ps)
-				ctrl |= Tfce;
-			csr32w(ctlr, Ctrl, ctrl);
-		}
-	}
-	else if((ctrl & Slu) && (rxcw & Rxconfig)){
-		csr32w(ctlr, Txcw, ctlr->txcw);
-		ctrl &= ~(Slu|Fd);
-		csr32w(ctlr, Ctrl, ctrl);
-	}
-}
-
-static void
-gc82543shutdown(Ether* ether)
-{
-	gc82543detach(ether->ctlr);
-}
-
-static int
-gc82543reset(Ctlr* ctlr)
-{
-	int ctl;
-	int te;
-
-	/*
-	 * Read the EEPROM, validate the checksum
-	 * then get the device back to a power-on state.
-	 */
-	if(at93c46r(ctlr) != 0xBABA)
-		return -1;
-
-	gc82543detach(ctlr);
-
-	te = ctlr->eeprom[Icw2];
-	if((te & 0x3000) == 0){
-		ctlr->fcrtl = 0x00002000;
-		ctlr->fcrth = 0x00004000;
-		ctlr->txcw = Ane|TxcwFd;
-	}
-	else if((te & 0x3000) == 0x2000){
-		ctlr->fcrtl = 0;
-		ctlr->fcrth = 0;
-		ctlr->txcw = Ane|TxcwFd|As;
-	}
-	else{
-		ctlr->fcrtl = 0x00002000;
-		ctlr->fcrth = 0x00004000;
-		ctlr->txcw = Ane|TxcwFd|As|Ps;
-	}
-
-	csr32w(ctlr, Txcw, ctlr->txcw);
-
-	csr32w(ctlr, Ctrlext, (te & 0x00f0)<<4);
-
-	csr32w(ctlr, Tctl, csr32r(ctlr, Tctl)|(64<<ColdSHIFT));
-
-	te = ctlr->eeprom[Icw1];
-	ctl = ((te & 0x01E0)<<17)|(te & 0x0010)<<3;
-	csr32w(ctlr, Ctrl, ctl);
-
-	delay(10);
-
-	/*
-	 * Flow control - values from the datasheet.
-	 */
-	csr32w(ctlr, Fcal, 0x00C28001);
-	csr32w(ctlr, Fcah, 0x00000100);
-	csr32w(ctlr, Fct, 0x00008808);
-	csr32w(ctlr, Fcttv, 0x00000100);
-
-	csr32w(ctlr, Fcrtl, ctlr->fcrtl);
-	csr32w(ctlr, Fcrth, ctlr->fcrth);
-
-	ctlr->im = Lsc;
-	gc82543checklink(ctlr);
-
-	return 0;
-}
-
-static void
-gc82543watchdog(void* arg)
-{
-	Ether *edev;
-	Ctlr *ctlr;
-
-	edev = arg;
-	while(waserror())
-		;
-	for(;;){
-		tsleep(&up->sleep, return0, 0, 1000);
-		ctlr = edev->ctlr;
-		if(ctlr == nil)
-			break;
-
-		gc82543checklink(ctlr);
-		gc82543replenish(ctlr);
-	}
-	print("%s: exiting\n", up->text);
-	pexit("disabled", 1);
-}
-
-static void
-gc82543pci(void)
-{
-	int cls;
-	void *mem;
-	Pcidev *p;
-	Ctlr *ctlr;
-	uvlong io;
-
-	p = nil;
-	while(p = pcimatch(p, 0, 0)){
-		if(p->ccrb != 0x02 || p->ccru != 0)
-			continue;
-		if(p->mem[0].bar & 1)
-			continue;
-
-		switch((p->did<<16)|p->vid){
-		case (0x1000<<16)|0x8086:	/* LSI L2A1157 (82542) */
-		case (0x1004<<16)|0x8086:	/* Intel PRO/1000 T */
-		case (0x1008<<16)|0x8086:	/* Intel PRO/1000 XT */
-		default:
-			continue;
-		case (0x1001<<16)|0x8086:	/* Intel PRO/1000 F */
-			break;
-		}
-
-		ctlr = malloc(sizeof(Ctlr));
-		if(ctlr == nil){
-			print("82543gc: can't allocate memory\n");
-			continue;
-		}
-		mem = vmap(p->mem[0].bar & ~0x0F, p->mem[0].size);
-		if(mem == 0){
-			print("82543gc: can't map %8.8luX\n", p->mem[0].bar);
-			free(ctlr);
-			continue;
-		}
-		switch(p->cls){
-		case 0x08:
-		case 0x10:
-			break;
-		default:
-			print("82543gc: p->cls %#ux, setting to 0x10\n", p->cls);
-			p->cls = 0x10;
-			pcicfgw8(p, PciCLS, p->cls);
-		}
-		ctlr->port = p->mem[0].bar & ~0x0F;
-		ctlr->pcidev = p;
-		ctlr->id = (p->did<<16)|p->vid;
-		ctlr->nic = mem;
-
-		if(gc82543ctlrhead != nil)
-			gc82543ctlrtail->next = ctlr;
-		else
-			gc82543ctlrhead = ctlr;
-		gc82543ctlrtail = ctlr;
-	}
-}
-
-static int
-gc82543pnp(Ether* edev)
-{
-	int i;
-	Ctlr *ctlr;
-	uchar ea[Eaddrlen];
-
-	if(gc82543ctlrhead == nil)
-		gc82543pci();
-
-	/*
-	 * Any adapter matches if no edev->port is supplied,
-	 * otherwise the ports must match.
-	 */
-	for(ctlr = gc82543ctlrhead; ctlr != nil; ctlr = ctlr->next){
-		if(ctlr->active)
-			continue;
-		if(edev->port == 0 || edev->port == ctlr->port){
-			ctlr->active = 1;
-			break;
-		}
-	}
-	if(ctlr == nil)
-		return -1;
-	
-	pcienable(ctlr->pcidev);
-	gc82543reset(ctlr);
-
-	edev->ctlr = ctlr;
-	edev->port = ctlr->port;
-	edev->irq = ctlr->pcidev->intl;
-	edev->tbdf = ctlr->pcidev->tbdf;
-	edev->mbps = 1000;
-
-	/*
-	 * Check if the adapter's station address is to be overridden.
-	 * If not, read it from the EEPROM and set in ether->ea prior to
-	 * loading the station address in the hardware.
-	 */
-	memset(ea, 0, Eaddrlen);
-	if(memcmp(ea, edev->ea, Eaddrlen) == 0){
-		for(i = Ea; i < Eaddrlen/2; i++){
-			edev->ea[2*i] = ctlr->eeprom[i];
-			edev->ea[2*i+1] = ctlr->eeprom[i]>>8;
-		}
-	}
-	gc82543init(edev);
-	pcisetbme(ctlr->pcidev);
-
-	/*
-	 * Linkage to the generic ethernet driver.
-	 */
-	edev->attach = gc82543attach;
-	edev->transmit = gc82543transmit;
-	edev->interrupt = gc82543interrupt;
-	edev->shutdown = gc82543shutdown;
-	edev->ctl = gc82543ctl;
-	edev->arg = edev;
-	edev->ifstat = gc82543ifstat;
-	edev->promiscuous = gc82543promiscuous;
-	edev->multicast = gc82543multicast;
-
-	return 0;
-}
-
-void
-ether82543gclink(void)
-{
-	addethercard("82543GC", gc82543pnp);
-}
--- a/sys/src/9/pc/ether82563.c
+++ b/sys/src/9/pc/ether82563.c
@@ -518,6 +518,8 @@
 	int	ntd;
 	int	rbsz;
 
+	Bpool	pool;
+
 	u32int	*nic;
 	Lock	imlock;
 	int	im;			/* interrupt mask */
@@ -907,6 +909,7 @@
 i82563replenish(Ctlr *ctlr)
 {
 	uint rdt, i;
+	uvlong pa;
 	Block *bp;
 	Rd *rd;
 
@@ -913,18 +916,18 @@
 	i = 0;
 	for(rdt = ctlr->rdt; NEXT(rdt, ctlr->nrd) != ctlr->rdh; rdt = NEXT(rdt, ctlr->nrd)){
 		rd = &ctlr->rdba[rdt];
-		if(ctlr->rb[rdt] != nil){
-			iprint("#l%d: %s: tx overrun\n", ctlr->edev->ctlrno, cname(ctlr));
+		if(ctlr->rb[rdt] != nil)
 			break;
-		}
-		i++;
-		bp = allocb(ctlr->rbsz + Rbalign);
-		bp->rp = bp->wp = (uchar*)ROUND((uintptr)bp->base, Rbalign);
+		bp = iallocbp(&ctlr->pool);
+		if(bp == nil)
+			break;
 		ctlr->rb[rdt] = bp;
-		rd->addr[0] = PCIWADDR(bp->rp);
-		rd->addr[1] = 0;
+		pa = PCIWADDR(bp->rp);
+		rd->addr[0] = pa;
+		rd->addr[1] = pa>>32;
 		rd->status = 0;
 		ctlr->rdfree++;
+		i++;
 	}
 	if(i != 0){
 		coherence();
@@ -977,6 +980,13 @@
 			ctlr->rb[i] = nil;
 			freeb(bp);
 		}
+
+	if(ctlr->pool.size == 0){
+		ctlr->pool.size = ctlr->rbsz;
+		ctlr->pool.align = Rbalign;
+		growbp(&ctlr->pool, Nrb);
+	}
+
 	if(cttab[ctlr->type].flag & F75)
 		csr32w(ctlr, Rxdctl, 1<<WthreshSHIFT | 8<<PthreshSHIFT | 1<<HthreshSHIFT | Enable);
 	else
@@ -1007,6 +1017,7 @@
 	ctlr = edev->ctlr;
 
 	i82563rxinit(ctlr);
+
 	csr32w(ctlr, Rctl, csr32r(ctlr, Rctl) | Ren);
 	if(cttab[ctlr->type].flag & F75){
 		csr32w(ctlr, Rxdctl, csr32r(ctlr, Rxdctl) | Enable);
--- a/sys/src/9/pc/etherdp83820.c
+++ b/sys/src/9/pc/etherdp83820.c
@@ -362,6 +362,8 @@
 
 	Mii*	mii;
 
+	Bpool	pool;
+
 	Lock	rdlock;			/* receive */
 	Desc*	rd;
 	int	nrd;
@@ -395,9 +397,6 @@
 static Ctlr* dp83820ctlrhead;
 static Ctlr* dp83820ctlrtail;
 
-static Lock dp83820rblock;		/* free receive Blocks */
-static Block* dp83820rbpool;
-
 static char* dp83820mibs[Nmibd] = {
 	"RXErroredPkts",
 	"RXFCSErrors",
@@ -497,30 +496,20 @@
 }
 
 static Block *
-dp83820rballoc(Desc* desc)
+dp83820rballoc(Ctlr *ctlr, Desc* desc)
 {
 	Block *bp;
 
-	if(desc->bp == nil){
-		ilock(&dp83820rblock);
-		if((bp = dp83820rbpool) == nil){
-			iunlock(&dp83820rblock);
-			desc->bp = nil;
+	bp = desc->bp;
+	if(bp == nil){
+		desc->bp = iallocbp(&ctlr->pool);
+		if(desc->bp == nil){
 			desc->cmdsts = Own;
 			return nil;
 		}
-		dp83820rbpool = bp->next;
-		bp->next = nil;
-		iunlock(&dp83820rblock);
-	
 		desc->bufptr = PCIWADDR(bp->rp);
 		desc->bp = bp;
 	}
-	else{
-		bp = desc->bp;
-		bp->rp = bp->lim - Rbsz;
-		bp->wp = bp->rp;
-	}
 
 	coherence();
 	desc->cmdsts = Intr|Rbsz;
@@ -529,18 +518,6 @@
 }
 
 static void
-dp83820rbfree(Block *bp)
-{
-	bp->rp = bp->lim - Rbsz;
-	bp->wp = bp->rp;
-
-	ilock(&dp83820rblock);
-	bp->next = dp83820rbpool;
-	dp83820rbpool = bp;
-	iunlock(&dp83820rblock);
-}
-
-static void
 dp83820halt(Ctlr* ctlr)
 {
 	int i, timeo;
@@ -635,6 +612,11 @@
 
 	dp83820halt(ctlr);
 
+	if(ctlr->pool.size == 0){
+		ctlr->pool.size = Rbsz;
+		growbp(&ctlr->pool, ctlr->nrb);
+	}
+
 	/*
 	 * Receiver
 	 */
@@ -646,7 +628,7 @@
 	for(i = 0; i < ctlr->nrd; i++){
 		desc = &ctlr->rd[i];
 		desc->link = PCIWADDR(&ctlr->rd[NEXT(i, ctlr->nrd)]);
-		if(dp83820rballoc(desc) == nil)
+		if(dp83820rballoc(ctlr, desc) == nil)
 			continue;
 	}
 	csr32w(ctlr, Rxdphi, 0);
@@ -698,7 +680,6 @@
 static void
 dp83820attach(Ether* edev)
 {
-	Block *bp;
 	Ctlr *ctlr;
 
 	ctlr = edev->ctlr;
@@ -739,14 +720,6 @@
 	ctlr->alloc = mallocz((ctlr->nrd+ctlr->ntd)*sizeof(Desc) + 7, 0);
 	if(ctlr->alloc == nil)
 		error(Enomem);
-
-	for(ctlr->nrb = 0; ctlr->nrb < Nrb; ctlr->nrb++){
-		if((bp = allocb(Rbsz)) == nil)
-			break;
-		bp->free = dp83820rbfree;
-		dp83820rbfree(bp);
-	}
-
 	dp83820init(edev);
 
 	qunlock(&ctlr->alock);
@@ -848,7 +821,7 @@
 						iprint(" %2.2uX", bp->rp[i]);
 					iprint("\n");
 				}
-				dp83820rballoc(desc);
+				dp83820rballoc(ctlr, desc);
 
 				x = NEXT(x, ctlr->nrd);
 				desc = &ctlr->rd[x];
--- a/sys/src/9/pc/etherigbe.c
+++ b/sys/src/9/pc/etherigbe.c
@@ -492,6 +492,8 @@
 	uchar	ra[Eaddrlen];		/* receive address */
 	ulong	mta[128];		/* multicast table array */
 
+	Bpool	pool;
+
 	Rendez	rrendez;
 	int	rim;
 	int	rdfree;
@@ -858,6 +860,7 @@
 {
 	int i, r;
 	Block *bp;
+	uvlong pa;
 
 	csr32w(ctlr, Tctl, (0x0F<<CtSHIFT)|Psp|(66<<ColdSHIFT));
 	switch(ctlr->id){
@@ -887,8 +890,9 @@
 	csr32w(ctlr, Ait, 0);
 	csr32w(ctlr, Txdmac, 0);
 
-	csr32w(ctlr, Tdbal, PCIWADDR(ctlr->tdba));
-	csr32w(ctlr, Tdbah, 0);
+	pa = PCIWADDR(ctlr->tdba);
+	csr32w(ctlr, Tdbal, pa);
+	csr32w(ctlr, Tdbah, pa >> 32);
 	csr32w(ctlr, Tdlen, ctlr->ntd*sizeof(Td));
 	ctlr->tdh = PREV(0, ctlr->ntd);
 	csr32w(ctlr, Tdh, 0);
@@ -942,6 +946,7 @@
 	Block *bp;
 	Ctlr *ctlr;
 	int tdh, tdt;
+	uvlong pa;
 
 	ctlr = edev->ctlr;
 
@@ -969,7 +974,9 @@
 		if((bp = qget(edev->oq)) == nil)
 			break;
 		td = &ctlr->tdba[tdt];
-		td->addr[0] = PCIWADDR(bp->rp);
+		pa = PCIWADDR(bp->rp);
+		td->addr[0] = pa;
+		td->addr[1] = pa >> 32;
 		td->control = ((BLEN(bp) & LenMASK)<<LenSHIFT);
 		td->control |= Dext|Ifcs|Teop|DtypeDD;
 		ctlr->tb[tdt] = bp;
@@ -995,17 +1002,19 @@
 	Rd *rd;
 	int rdt;
 	Block *bp;
+	uvlong pa;
 
 	rdt = ctlr->rdt;
 	while(NEXT(rdt, ctlr->nrd) != ctlr->rdh){
 		rd = &ctlr->rdba[rdt];
 		if(ctlr->rb[rdt] == nil){
-			bp = allocb(Rbsz);
-			bp->rp = bp->lim - Rbsz;
-			bp->wp = bp->rp;
+			bp = iallocbp(&ctlr->pool);
+			if(bp == nil)
+				break;
 			ctlr->rb[rdt] = bp;
-			rd->addr[0] = PCIWADDR(bp->rp);
-			rd->addr[1] = 0;
+			pa = PCIWADDR(bp->rp);
+			rd->addr[0] = pa;
+			rd->addr[1] = pa >> 32;
 		}
 		coherence();
 		rd->status = 0;
@@ -1021,12 +1030,14 @@
 {
 	int i;
 	Block *bp;
+	uvlong pa;
 
 	/* temporarily keep Mpe on */
 	csr32w(ctlr, Rctl, Dpf|Bsize2048|Bam|RdtmsHALF|Mpe);
 
-	csr32w(ctlr, Rdbal, PCIWADDR(ctlr->rdba));
-	csr32w(ctlr, Rdbah, 0);
+	pa = PCIWADDR(ctlr->rdba);
+	csr32w(ctlr, Rdbal, pa);
+	csr32w(ctlr, Rdbah, pa >> 32);
 	csr32w(ctlr, Rdlen, ctlr->nrd*sizeof(Rd));
 	ctlr->rdh = 0;
 	csr32w(ctlr, Rdh, 0);
@@ -1040,6 +1051,10 @@
 			ctlr->rb[i] = nil;
 			freeb(bp);
 		}
+	}
+	if(ctlr->pool.size == 0){
+		ctlr->pool.size = Rbsz;
+		growbp(&ctlr->pool, Nrb);
 	}
 	igbereplenish(ctlr);
 
--- a/sys/src/9/pc/etherm10g.c
+++ b/sys/src/9/pc/etherm10g.c
@@ -26,8 +26,8 @@
 #define	pcicapdbg(...)
 #define malign(n)	mallocalign((n), 4*KiB, 0, 0)
 
-#include "etherm10g2k.i"
-#include "etherm10g4k.i"
+#include "../pc/etherm10g2k.i"
+#include "../pc/etherm10g4k.i"
 
 static int 	debug		= 0;
 static char	Etimeout[]	= "timeout";
@@ -137,18 +137,8 @@
 } Tx;
 
 typedef struct {
-	Lock;
-	Block	*head;
-	uint	size;		/* buffer size of each block */
-	uint	n;		/* n free buffers */
-	uint	cnt;
-} Bpool;
+	Bpool	pool;
 
-static Bpool	smpool 	= { .size = 128, };
-static Bpool	bgpool	= { .size = Maxmtu, };
-
-typedef struct {
-	Bpool	*pool;		/* free buffers */
 	ulong	*lanai;		/* rx ring; we have no permanent host shadow */
 	Block	**host;		/* called "info" in myricom driver */
 //	uchar	*wcfifo;	/* cmd submission fifo */
@@ -254,51 +244,6 @@
 };
 
 static int
-pcicap(Pcidev *p, int cap)
-{
-	int i, c, off;
-
-	pcicapdbg("pcicap: %x:%d\n", p->vid, p->did);
-	off = 0x34;			/* 0x14 for cardbus */
-	for(i = 48; i--; ){
-		pcicapdbg("\t" "loop %x\n", off);
-		off = pcicfgr8(p, off);
-		pcicapdbg("\t" "pcicfgr8 %x\n", off);
-		if(off < 0x40)
-			break;
-		off &= ~3;
-		c = pcicfgr8(p, off);
-		pcicapdbg("\t" "pcicfgr8 %x\n", c);
-		if(c == 0xff)
-			break;
-		if(c == cap)
-			return off;
-		off++;
-	}
-	return 0;
-}
-
-/*
- * this function doesn't work because pcicgr32 doesn't have access
- * to the pcie extended configuration space.
- */
-static int
-pciecap(Pcidev *p, int cap)
-{
-	uint off, i;
-
-	off = 0x100;
-	while(((i = pcicfgr32(p, off)) & 0xffff) != cap){
-		off = i >> 20;
-		print("m10g: pciecap offset = %ud",  off);
-		if(off < 0x100 || off >= 4*KiB - 1)
-			return 0;
-	}
-	print("m10g: pciecap found = %ud",  off);
-	return off;
-}
-
-static int
 setpcie(Pcidev *p)
 {
 	int off;
@@ -328,7 +273,7 @@
 	lanes = (cap>>4) & 0x3f;
 
 	/* check AERC register.  we need it on.  */
-	off = pciecap(p, PcieAERC);
+	off = pcicap(p, PcieAERC);
 	print("; offset %d returned\n", off);
 	cap = 0;
 	if(off != 0){
@@ -486,7 +431,6 @@
 	iprint("m10g: cmd timeout [%ux %ux] cmd=%d\n",
 		cmd->i[0], cmd->i[1], type);
 	error(Etimeout);
-	return ~0;			/* silence! */
 }
 
 ulong
@@ -525,7 +469,6 @@
 	iprint("m10g: maccmd timeout [%ux %ux] cmd=%d\n",
 		cmd->i[0], cmd->i[1], type);
 	error(Etimeout);
-	return ~0;			/* silence! */
 }
 
 /* remove this garbage after testing */
@@ -564,7 +507,6 @@
 		tsleep(&up->sleep, return0, 0, 5);
 	}
 	error(Etimeout);
-	return ~0;			/* silence! */
 }
 
 ulong
@@ -594,8 +536,6 @@
 		tsleep(&up->sleep, return0, 0, 1);
 	}
 	error(Etimeout);
-	iprint("m10g: rdmacmd timeout\n");
-	return ~0;			/* silence! */
 }
 
 static int
@@ -741,7 +681,6 @@
 	if(waserror()){
 		print("m10g: reset error\n");
 		nexterror();
-		return -1;
 	}
 
 	chkfw(c);
@@ -811,7 +750,7 @@
 		print("m10g: can't map %llux\n", raddr);
 		return -1;
 	}
-	dprint("%llux <- vmap(mem[0].size = %d)\n", raddr, p->mem[0].size);
+	dprint("%llux <- vmap(mem[0].size = %llud)\n", raddr, p->mem[0].size);
 	c->port = raddr;
 	c->ram = mem;
 	c->cmd = malign(sizeof *c->cmd);
@@ -836,81 +775,35 @@
 static Rx*
 whichrx(Ctlr *c, int sz)
 {
-	if(sz <= smpool.size)
+	if(sz <= c->sm.pool.size)
 		return &c->sm;
 	return &c->bg;
 }
 
-static Block*
-balloc(Rx* rx)
-{
-	Block *bp;
-
-	ilock(rx->pool);
-	if((bp = rx->pool->head) != nil){
-		rx->pool->head = bp->next;
-		bp->next = nil;
-		rx->pool->n--;
-	}
-	iunlock(rx->pool);
-	return bp;
-}
-
 static void
-rbfree(Block *b, Bpool *p)
-{
-	b->rp = b->wp = (uchar*)PGROUND((uintptr)b->base);
- 	b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
-
-	ilock(p);
-	b->next = p->head;
-	p->head = b;
-	p->n++;
-	p->cnt++;
-	iunlock(p);
-}
-
-static void
-smbfree(Block *b)
-{
-	rbfree(b, &smpool);
-}
-
-static void
-bgbfree(Block *b)
-{
-	rbfree(b, &bgpool);
-}
-
-static void
 replenish(Rx *rx)
 {
 	ulong buf[16], i, idx, e;
-	Bpool *p;
+	uvlong pa;
 	Block *b;
 
-	p = rx->pool;
-	if(p->n < 8)
-		return;
-	memset(buf, 0, sizeof buf);
 	e = (rx->i - rx->cnt) & ~7;
 	e += rx->n;
-	while(p->n >= 8 && e){
+	while(e){
 		idx = rx->cnt & rx->m;
 		for(i = 0; i < 8; i++){
-			b = balloc(rx);
-			buf[i*2]   = pbit32((uvlong)PCIWADDR(b->wp) >> 32);
-			buf[i*2+1] = pbit32(PCIWADDR(b->wp));
+			while((b = iallocbp(&rx->pool)) == nil)
+				resrcwait("out of m10g rx buffers");
+			pa = PCIWADDR(b->wp);
+			buf[i*2+0] = pbit32(pa >> 32);
+			buf[i*2+1] = pbit32(pa);
 			rx->host[idx+i] = b;
-			assert(b);
 		}
-		memmove(rx->lanai + 2*idx, buf, sizeof buf);
+		memmove(rx->lanai + 2*idx, buf, sizeof(buf));
 		coherence();
 		rx->cnt += 8;
 		e -= 8;
 	}
-	if(e && p->n > 7+1)
-		print("m10g: should panic? pool->n = %d", p->n);
 }
 
 /*
@@ -947,8 +840,7 @@
 static void
 open0(Ether *e, Ctlr *c)
 {
-	Block *b;
-	int i, sz, entries;
+	int entries;
 
 	entries = cmd(c, CGsendrgsz, 0) / sizeof *c->tx.lanai;
 	c->tx.lanai = (Send*)(c->ram + cmd(c, CGsendoff, 0));
@@ -958,35 +850,24 @@
 	c->tx.m = entries-1;
 
 	entries = cmd(c, CGrxrgsz, 0)/8;
-	c->sm.pool = &smpool;
-	cmd(c, CSsmallsz, c->sm.pool->size);
+	c->sm.pool.size = 128;
+	c->sm.pool.align = BY2PG;
+	cmd(c, CSsmallsz, c->sm.pool.size);
 	c->sm.lanai = (ulong*)(c->ram + cmd(c, CGsmallrxoff, 0));
 	c->sm.n = entries;
 	c->sm.m = entries-1;
 	c->sm.host = emalign(entries * sizeof *c->sm.host);
 
-	c->bg.pool = &bgpool;
-	c->bg.pool->size = nextpow(2 + e->maxmtu);  /* 2-byte alignment pad */
-	cmd(c, CSbigsz, c->bg.pool->size);
+	c->bg.pool.size = nextpow(2 + e->maxmtu);	/* 2-byte alignment pad */
+	c->bg.pool.align = BY2PG;
+	cmd(c, CSbigsz, c->bg.pool.size);
 	c->bg.lanai = (ulong*)(c->ram + cmd(c, CGbigrxoff, 0));
 	c->bg.n = entries;
 	c->bg.m = entries-1;
 	c->bg.host = emalign(entries * sizeof *c->bg.host);
 
-	sz = c->sm.pool->size + BY2PG;
-	for(i = 0; i < c->sm.n; i++){
-		if((b = allocb(sz)) == 0)
-			break;
-		b->free = smbfree;
-		freeb(b);
-	}
-	sz = c->bg.pool->size + BY2PG;
-	for(i = 0; i < c->bg.n; i++){
-		if((b = allocb(sz)) == 0)
-			break;
-		b->free = bgbfree;
-		freeb(b);
-	}
+	growbp(&c->sm.pool, c->sm.n);
+	growbp(&c->bg.pool, c->bg.n);
 
 	cmd(c, CSstatsdma, c->statsprt);
 	c->linkstat = ~0;
@@ -1332,17 +1213,6 @@
 	return -1;
 }
 
-static int
-lstcount(Block *b)
-{
-	int i;
-
-	i = 0;
-	for(; b; b = b->next)
-		i++;
-	return i;
-}
-
 static char*
 m10gifstat(void *arg, char *p, char *e)
 {
@@ -1364,9 +1234,7 @@
 		"tx pkt = %lud\n"  "tx bytes = %lld\n"
 		"tx cnt = %ud\n"  "tx n = %ud\n"	"tx i = %ud\n"
 		"sm cnt = %ud\n"  "sm i = %ud\n"	"sm n = %ud\n"
-		"sm lst = %ud\n"
 		"bg cnt = %ud\n"  "bg i = %ud\n"	"bg n = %ud\n"
-		"bg lst = %ud\n"
 		"segsz = %lud\n"   "coal = %lud\n",
 		gbit32(s.txcnt),  gbit32(s.linkstat),	gbit32(s.dlink),
 		gbit32(s.derror), gbit32(s.drunt),	gbit32(s.doverrun),
@@ -1374,8 +1242,8 @@
 		s.txstopped,  s.down, s.updated, s.valid,
 		c->tx.npkt, c->tx.nbytes,
 		c->tx.cnt, c->tx.n, c->tx.i,
-		c->sm.cnt, c->sm.i, c->sm.pool->n, lstcount(c->sm.pool->head),
-		c->bg.cnt, c->bg.i, c->bg.pool->n, lstcount(c->bg.pool->head),
+		c->sm.cnt, c->sm.i, c->sm.n,
+		c->bg.cnt, c->bg.i, c->bg.n,
 		c->tx.segsz, gbit32((uchar*)c->coal));
 }
 
@@ -1595,9 +1463,7 @@
 
 	e->attach = m10gattach;
 	e->transmit = m10gtransmit;
-	e->interrupt = m10ginterrupt;
 	e->ctl = m10gctl;
-//	e->power = m10gpower;
 	e->shutdown = m10gshutdown;
 
 	e->arg = e;
@@ -1604,6 +1470,8 @@
 	e->ifstat = m10gifstat;
 	e->promiscuous = m10gpromiscuous;
 	e->multicast = m10gmulticast;
+
+	intrenable(e->irq, m10ginterrupt, e, e->tbdf, e->name);
 
 	return 0;
 }
--- a/sys/src/9/pc/ethervirtio.c
+++ b/sys/src/9/pc/ethervirtio.c
@@ -156,6 +156,8 @@
 	ulong	feat;
 	int	nqueue;
 
+	Bpool	pool;
+
 	/* virtioether has 3 queues: rx, tx and ctl */
 	Vqueue	queue[3];
 };
@@ -275,6 +277,9 @@
 	header = smalloc(VheaderSize);
 	blocks = smalloc(sizeof(Block*) * (q->qsize/2));
 
+	ctlr->pool.size = ETHERMAXTU;
+	growbp(&ctlr->pool, q->qsize*2);
+
 	for(i = 0; i < q->qsize/2; i++){
 		j = i << 1;
 		q->desc[j].addr = PADDR(header);
@@ -300,7 +305,7 @@
 			i = q->avail->idx & (q->qmask >> 1);
 			if(blocks[i] != nil)
 				break;
-			if((b = iallocb(ETHERMAXTU)) == nil)
+			if((b = iallocbp(&ctlr->pool)) == nil)
 				break;
 			blocks[i] = b;
 			j = (i << 1) | 1;
--- a/sys/src/9/pc/ethervt6105m.c
+++ b/sys/src/9/pc/ethervt6105m.c
@@ -308,6 +308,8 @@
 	int	nrd;
 	int	ntd;
 
+	Bpool	pool;
+
 	Ds*	rd;
 	Ds*	rdh;
 
@@ -359,10 +361,6 @@
 #define csr16w(c, r, w)	(outs((c)->port+(r), (ushort)(w)))
 #define csr32w(c, r, w)	(outl((c)->port+(r), (ulong)(w)))
 
-static Lock vt6105Mrblock;			/* receive Block freelist */
-static Block* vt6105Mrbpool;
-static uint vt6105Mrbpoolsz;
-
 typedef struct Regs Regs;
 typedef struct Regs {
 	char*	name;
@@ -472,7 +470,6 @@
 	p = seprint(p, e, "tuok: %ud\n", ctlr->tuok);
 	p = seprint(p, e, "ipok: %ud\n", ctlr->ipok);
 
-	p = seprint(p, e, "rbpoolsz: %ud\n", vt6105Mrbpoolsz);
 	p = seprint(p, e, "totalt: %uld\n", ctlr->totalt);
 
 	for(i = 0; regs[i].name != nil; i++){
@@ -575,38 +572,6 @@
 }
 
 static void
-vt6105Mrbfree(Block* bp)
-{
-	bp->rp = bp->lim - (Rdbsz+3);
-	bp->wp = bp->rp;
- 	bp->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
-
-	ilock(&vt6105Mrblock);
-	bp->next = vt6105Mrbpool;
-	vt6105Mrbpool = bp;
-	iunlock(&vt6105Mrblock);
-}
-
-static Block*
-vt6105Mrballoc(void)
-{
-	Block *bp;
-
-	ilock(&vt6105Mrblock);
-	if((bp = vt6105Mrbpool) != nil){
-		vt6105Mrbpool = bp->next;
-		bp->next = nil;
-	}
-	iunlock(&vt6105Mrblock);
-
-	if(bp == nil && (bp = iallocb(Rdbsz+3)) != nil){
-		bp->free = vt6105Mrbfree;
-		vt6105Mrbpoolsz++;
-	}
-	return bp;
-}
-
-static void
 vt6105Mattach(Ether* edev)
 {
 	Ctlr *ctlr;
@@ -655,6 +620,12 @@
 		nexterror();
 	}
 
+	if(ctlr->pool.size == 0){
+		ctlr->pool.size = Rdbsz;
+		ctlr->pool.align = 4;
+		growbp(&ctlr->pool, ctlr->nrd);
+	}
+
 	prev = (Ds*)(alloc + (ctlr->nrd-1)*dsz);
 	for(i = 0; i < ctlr->nrd; i++){
 		ds = (Ds*)alloc;
@@ -663,10 +634,8 @@
 		ds->control = Ipkt|Tcpkt|Udpkt|Rdbsz;
 		ds->branch = PCIWADDR(alloc);
 
-		ds->bp = vt6105Mrballoc();
-		if(ds->bp == nil)
-			error("vt6105M: can't allocate receive ring\n");
-		ds->bp->rp = (uchar*)ROUNDUP((ulong)ds->bp->rp, 4);
+		if((ds->bp = iallocbp(&ctlr->pool)) == nil)
+			error(Enomem);
 		ds->addr = PCIWADDR(ds->bp->rp);
 
 		ds->next = (Ds*)alloc;
@@ -853,7 +822,7 @@
 					ctlr->rxstats[i]++;
 			}
 		}
-		else if(bp = vt6105Mrballoc()){
+		else if(bp = iallocbp(&ctlr->pool)){
 			if(ds->control & Tuok){
 				ds->bp->flag |= Btcpck|Budpck;
 				ctlr->tuok++;
@@ -865,7 +834,6 @@
 			len = ((ds->status & LengthMASK)>>LengthSHIFT)-4;
 			ds->bp->wp = ds->bp->rp+len;
 			etheriq(edev, ds->bp);
-			bp->rp = (uchar*)ROUNDUP((ulong)bp->rp, 4);
 			ds->addr = PCIWADDR(bp->rp);
 			ds->bp = bp;
 		}
--- a/sys/src/9/pc/etheryuk.c
+++ b/sys/src/9/pc/etheryuk.c
@@ -696,6 +696,7 @@
 	Block	*tbring[Tringcnt];
 	Sring	rx;
 	Block	*rbring[Rringcnt];
+	Bpool	pool;
 	Kproc	txmit;
 	Kproc	rxmit;
 	Kproc	iproc;
@@ -1197,6 +1198,10 @@
 		qrwrite(c, Qr + Qcsr, Qsumen);
 	}
 	macwrite32(c, Gfrxctl, Gftroff);
+
+	c->pool.size = c->rbsz;
+	c->pool.align = Rbalign;
+	growbp(&c->pool, Nrb);
 }
 
 /* debug; remove */
@@ -1235,13 +1240,13 @@
 	if(lim > 128)
 		lim = 128;		/* hw limit? */
 	for(n = 0; n < lim; n++){
-		b = iallocb(c->rbsz + Rbalign);
-		if(b == nil || getnslot(r, &wp, tab, 1 + is64()) == -1){
+		b = iallocbp(&c->pool);
+		if(b == nil)
+			break;
+		if(getnslot(r, &wp, tab, 1 + is64()) == -1){
 			freeb(b);
 			break;
 		}
-		b->rp = b->wp = (uchar*)ROUND((uintptr)b->base, Rbalign);
-
 		t = tab[is64()];
 		if(rxscrew(e, r, t, wp) == -1){
 			freeb(b);
--- a/sys/src/9/pc/pc
+++ b/sys/src/9/pc/pc
@@ -55,8 +55,6 @@
 	ether8003	ether8390
 	ether8139	pci
 	ether8169	pci ethermii
-# should be obsoleted by igbe
-#	ether82543gc	pci
 	ether82557	pci
 	ether82563	pci
 	ether82598	pci
@@ -68,6 +66,7 @@
 	etherelnk3	pci
 	etherga620	pci
 	etherigbe	pci ethermii
+#	etherm10g	pci
 	ethervgbe	pci ethermii
 	ethervt6102	pci ethermii
 	ethervt6105m	pci ethermii
--- a/sys/src/9/pc64/mkfile
+++ b/sys/src/9/pc64/mkfile
@@ -121,6 +121,10 @@
 ../pc/sd53c8xx.i:		../pc/sd53c8xx.n
 	cd ../pc && mk sd53c8xx.i
 
+etherm10g.$O:			../pc/etherm10g2k.i ../pc/etherm10g4k.i
+../pc/etherm10g%.i:		../pc/etherm10g%.fw
+	cd ../pc && mk etherm10g$stem.i
+
 $SDEV pmmc.$O:			../port/sd.h
 sdiahci.$O:			ahci.h
 devaoe.$O sdaoe.$O:		../port/aoe.h
--- a/sys/src/9/pc64/pc64
+++ b/sys/src/9/pc64/pc64
@@ -66,6 +66,7 @@
 #	etherelnk3	pci
 #	etherga620	pci
 	etherigbe	pci ethermii
+#	etherm10g	pci
 #	ethervgbe	pci ethermii
 #	ethervt6102	pci ethermii
 #	ethervt6105m	pci ethermii
--- a/sys/src/9/port/allocb.c
+++ b/sys/src/9/port/allocb.c
@@ -13,31 +13,24 @@
 };
 
 static Block*
-_allocb(int size)
+_allocb(ulong size, ulong align)
 {
 	Block *b;
-	uintptr addr;
 
-	size += Tlrspc;
-	size = ROUND(size, BLOCKALIGN);
-	if((b = mallocz(sizeof(Block)+BLOCKALIGN+Hdrspc+size, 0)) == nil)
+	size = ROUND(size+Tlrspc, align);
+	if((b = mallocz(sizeof(Block)+Hdrspc+size+align-1, 0)) == nil)
 		return nil;
 
 	b->next = nil;
 	b->list = nil;
-	b->free = nil;
+	b->pool = nil;
 	b->flag = 0;
 
 	/* align start of data portion by rounding up */
-	addr = (uintptr)b;
-	addr = ROUND(addr + sizeof(Block), BLOCKALIGN);
-	b->base = (uchar*)addr;
+	b->base = (uchar*)ROUND((uintptr)&b[1], (uintptr)align);
 
 	/* align end of data portion by rounding down */
-	b->lim = (uchar*)b + msize(b);
-	addr = (uintptr)b->lim;
-	addr &= ~(BLOCKALIGN-1);
-	b->lim = (uchar*)addr;
+	b->lim = (uchar*)(((uintptr)b + msize(b)) & ~((uintptr)align-1));
 
 	/* leave room at beginning for added headers */
 	b->wp = b->rp = b->lim - size;
@@ -55,7 +48,7 @@
 	 */
 	if(up == nil)
 		panic("allocb without up: %#p", getcallerpc(&size));
-	while((b = _allocb(size)) == nil){
+	while((b = _allocb(size, BLOCKALIGN)) == nil){
 		if(up->nlocks || m->ilockdepth || !islo()){
 			xsummary();
 			mallocsummary();
@@ -76,7 +69,7 @@
 {
 	Block *b;
 
-	if((b = _allocb(size)) == nil){
+	if((b = _allocb(size, BLOCKALIGN)) == nil){
 		static ulong nerr;
 		if((nerr++%10000)==0){
 			if(nerr > 10000000){
@@ -97,20 +90,20 @@
 void
 freeb(Block *b)
 {
+	Bpool *p;
 	void *dead = (void*)Bdead;
 
 	if(b == nil)
 		return;
 
-	/*
-	 * drivers which perform non cache coherent DMA manage their own buffer
-	 * pool of uncached buffers and provide their own free routine.
-	 */
-	if(b->free != nil) {
+	if((p = b->pool) != nil) {
 		b->next = nil;
-		b->list = nil;
-
-		b->free(b);
+		b->rp = b->wp = b->lim - ROUND(p->size+Tlrspc, p->align);
+		b->flag = BINTR;
+		ilock(p);
+		b->list = p->head;
+		p->head = b;
+		iunlock(p);
 		return;
 	}
 
@@ -121,11 +114,83 @@
 	b->wp = dead;
 	b->lim = dead;
 	b->base = dead;
+	b->pool = dead;
 
 	free(b);
 }
 
+static ulong
+_alignment(ulong align)
+{
+	if(align <= BLOCKALIGN)
+		return BLOCKALIGN;
+
+	/* make it a power of two */
+	align--;
+	align |= align>>1;
+	align |= align>>2;
+	align |= align>>4;
+	align |= align>>8;
+	align |= align>>16;
+	align++;
+
+	return align;
+}
+
+Block*
+iallocbp(Bpool *p)
+{
+	Block *b;
+
+	ilock(p);
+	if((b = p->head) != nil){
+		p->head = b->list;
+		b->list = nil;
+		iunlock(p);
+	} else {
+		iunlock(p);
+		p->align = _alignment(p->align);
+		b = _allocb(p->size, p->align);
+		if(b == nil)
+			return nil;
+		setmalloctag(b, getcallerpc(&p));
+		b->pool = p;
+		b->flag = BINTR;
+	}
+
+	return b;
+}
+
 void
+growbp(Bpool *p, int n)
+{
+	ulong size;
+	Block *b;
+	uchar *a;
+
+	if(n < 1)
+		return;
+	if((b = malloc(sizeof(Block)*n)) == nil)
+		return;
+	p->align = _alignment(p->align);
+	size = ROUND(p->size+Hdrspc+Tlrspc, p->align);
+	if((a = mallocalign(size*n, p->align, 0, 0)) == nil){
+		free(b);
+		return;
+	}
+	setmalloctag(b, getcallerpc(&p));
+	while(n > 0){
+		b->base = a;
+		a += size;
+		b->lim = a;
+		b->pool = p;
+		freeb(b);
+		b++;
+		n--;
+	}
+}
+
+void
 checkb(Block *b, char *msg)
 {
 	void *dead = (void*)Bdead;
@@ -132,14 +197,14 @@
 
 	if(b == dead)
 		panic("checkb b %s %#p", msg, b);
-	if(b->base == dead || b->lim == dead || b->next == dead
-	  || b->rp == dead || b->wp == dead){
-		print("checkb: base %#p lim %#p next %#p\n",
-			b->base, b->lim, b->next);
+	if(b->base == dead || b->lim == dead
+	|| b->next == dead || b->list == dead || b->pool == dead
+	|| b->rp == dead || b->wp == dead){
+		print("checkb: base %#p lim %#p next %#p list %#p pool %#p\n",
+			b->base, b->lim, b->next, b->list, b->pool);
 		print("checkb: rp %#p wp %#p\n", b->rp, b->wp);
 		panic("checkb dead: %s", msg);
 	}
-
 	if(b->base > b->lim)
 		panic("checkb 0 %s %#p %#p", msg, b->base, b->lim);
 	if(b->rp < b->base)
--- a/sys/src/9/port/devether.c
+++ b/sys/src/9/port/devether.c
@@ -212,16 +212,22 @@
 			etherrtrace(f, pkt, len);
 			continue;
 		}
-		if(dispose && x == nil)
+		if(dispose && x == nil){
 			x = f;
-		else if((xbp = iallocb(len)) != nil){
+			continue;
+		}
+		if(bp->pool != nil && len <= bp->pool->size)
+			xbp = iallocbp(bp->pool);
+		else
+			xbp = iallocb(len);
+		if(xbp != nil){
 			memmove(xbp->wp, pkt, len);
 			xbp->wp += len;
-			xbp->flag = bp->flag;
-			if(qpass(f->in, xbp) < 0)
-				ether->soverflows++;
-		} else
-			ether->soverflows++;
+			xbp->flag |= bp->flag & ~(BINTR|BFREE);
+			if(qpass(f->in, xbp) >= 0)
+				continue;
+		}
+		ether->soverflows++;
 	}
 	if(x != nil){
 		if(qpass(x->in, bp) < 0)
@@ -376,28 +382,20 @@
 static int
 etherqueuesize(Ether *ether)
 {
-	int lg, mb;
-	ulong bsz;
+	int b, q;
 
-	/* compute log10(mbps) into lg */
-	for(lg = 0, mb = ether->mbps; mb >= 10; lg++)
-		mb /= 10;
-	if (lg > 0)
-		lg--;
-	if (lg > 14)			/* 2^(14+17) = 2³¹ */
-		lg = 14;
-	/* allocate larger output queues for higher-speed interfaces */
-	bsz = 1UL << (lg + 17);		/* 2¹⁷ = 128K, bsz = 2ⁿ × 128K */
-	while (bsz > mainmem->maxsize / 8 && bsz > 128*1024)
-		bsz /= 2;
-if(0) print("#l%d: %d Mbps -> queue size %lud\n", ether->ctlrno, ether->mbps, bsz);
-	return (int)bsz;
+	b = ether->mbps * 2*125;	/* 2ms */
+	for(q = 128*1024; q < b; q <<= 1)
+		;
+	if(mainmem->maxsize / 8 < q)
+		q = mainmem->maxsize / 8;
+	return q;
 }
 
 static Ether*
 etherprobe(int cardno, int ctlrno, char *conf)
 {
-	int i;
+	int i, q;
 	Ether *ether;
 
 	ether = malloc(sizeof(Ether));
@@ -447,14 +445,15 @@
 	print("#l%d: %s: %dMbps port 0x%lluX irq %d ea %E\n",
 		ctlrno, ether->type, ether->mbps, (uvlong)ether->port, ether->irq, ether->ea);
 
-	netifinit(ether, ether->name, Ntypes, etherqueuesize(ether));
+	q = etherqueuesize(ether);
 	if(ether->oq == nil){
-		ether->oq = qopen(ether->limit, Qmsg, 0, 0);
+		ether->oq = qopen(q, Qmsg, 0, 0);
 		if(ether->oq == nil)
 			panic("etherreset %s: can't allocate output queue", ether->name);
 	} else {
-		qsetlimit(ether->oq, ether->limit);
+		qsetlimit(ether->oq, q);
 	}
+	netifinit(ether, ether->name, Ntypes, q*2);
 	ether->alen = Eaddrlen;
 	memmove(ether->addr, ether->ea, Eaddrlen);
 	memset(ether->bcast, 0xFF, Eaddrlen);
@@ -465,6 +464,8 @@
 void
 ethersetspeed(Ether *ether, int mbps)
 {
+	int q;
+
 	if(ether->mbps == mbps)
 		return;
 	ether->mbps = mbps;
@@ -471,9 +472,9 @@
 
 	if(mbps <= 0 || ether->f == nil || ether->oq == nil)
 		return;
-
-	netifsetlimit(ether, etherqueuesize(ether));
-	qsetlimit(ether->oq, ether->limit);
+	q = etherqueuesize(ether);
+	qsetlimit(ether->oq, q);
+	netifsetlimit(ether, q*2);
 }
 
 void
--- a/sys/src/9/port/ethervirtio10.c
+++ b/sys/src/9/port/ethervirtio10.c
@@ -197,6 +197,8 @@
 	ulong	feat[2];
 	int	nqueue;
 
+	Bpool	pool;
+
 	/* virtioether has 3 queues: rx, tx and ctl */
 	Vqueue	queue[3];
 };
@@ -316,6 +318,9 @@
 	header = smalloc(VheaderSize);
 	blocks = smalloc(sizeof(Block*) * (q->qsize/2));
 
+	ctlr->pool.size = ETHERMAXTU;
+	growbp(&ctlr->pool, q->qsize*2);
+
 	for(i = 0; i < q->qsize/2; i++){
 		j = i << 1;
 		q->desc[j].addr = PADDR(header);
@@ -341,7 +346,7 @@
 			i = q->avail->idx & (q->qmask >> 1);
 			if(blocks[i] != nil)
 				break;
-			if((b = iallocb(ETHERMAXTU)) == nil)
+			if((b = iallocbp(&ctlr->pool)) == nil)
 				break;
 			blocks[i] = b;
 			j = (i << 1) | 1;
--- a/sys/src/9/port/portdat.h
+++ b/sys/src/9/port/portdat.h
@@ -1,5 +1,6 @@
 typedef struct Alarms	Alarms;
 typedef struct Block	Block;
+typedef struct Bpool	Bpool;
 typedef struct Chan	Chan;
 typedef struct Cmdbuf	Cmdbuf;
 typedef struct Cmdtab	Cmdtab;
@@ -151,7 +152,7 @@
 	uchar*	wp;			/* first empty byte */
 	uchar*	lim;			/* 1 past the end of the buffer */
 	uchar*	base;			/* start of the buffer */
-	void	(*free)(Block*);
+	Bpool*	pool;
 	ushort	flag;
 	ushort	checksum;		/* IP checksum of complete packet (minus media header) */
 };
@@ -158,6 +159,15 @@
 
 #define BLEN(s)	((s)->wp - (s)->rp)
 #define BALLOC(s) ((s)->lim - (s)->base)
+
+struct Bpool
+{
+	ulong	size;			/* block size */
+	ulong	align;			/* block alignment */
+
+	Lock;
+	Block	*head;			/* freelist head */
+};
 
 struct Chan
 {
--- a/sys/src/9/port/portfns.h
+++ b/sys/src/9/port/portfns.h
@@ -130,11 +130,13 @@
 uintptr		getrealloctag(void*);
 _Noreturn void	gotolabel(Label*);
 char*		getconfenv(void);
+void		growbp(Bpool*, int);
 long		hostdomainwrite(char*, int);
 long		hostownerwrite(char*, int);
 void 		(*hwrandbuf)(void*, ulong);
 void		hzsched(void);
 Block*		iallocb(int);
+Block*		iallocbp(Bpool*);
 uintptr		ibrk(uintptr, int);
 void		ilock(Lock*);
 _Noreturn void	interrupted(void);
--- a/sys/src/9/port/qio.c
+++ b/sys/src/9/port/qio.c
@@ -300,7 +300,12 @@
 	assert(count >= 0);
 
 	QDEBUG checkb(bp, "copyblock 0");
-	nbp = allocb(count);
+	if(bp->pool == nil
+	|| count > bp->pool->size
+	|| (nbp = iallocbp(bp->pool)) == nil)
+		nbp = allocb(count);
+	nbp->flag |= bp->flag & ~(BINTR|BFREE);
+
 	for(; count > 0 && bp != nil; bp = bp->next){
 		l = BLEN(bp);
 		if(l > count)
--- a/sys/src/9/teg2/ether8169.c
+++ b/sys/src/9/teg2/ether8169.c
@@ -316,8 +316,6 @@
 	int	ntdfree;
 	int	ntq;
 
-	int	nrb;
-
 //	Lock	rlock;			/* receive */
 	Rendez	rrendez;
 	D*	rd;			/* descriptor ring */
@@ -328,6 +326,8 @@
 	int	rdt;			/* tail - consumer index (host) */
 	int	nrdfree;
 
+	Bpool	pool;
+
 	Lock	reglock;
 	int	tcr;			/* transmit configuration register */
 	int	rcr;			/* receive configuration register */
@@ -353,9 +353,6 @@
 static Ctlr* rtl8169ctlrhead;
 static Ctlr* rtl8169ctlrtail;
 
-static Lock rblock;			/* free receive Blocks */
-static Block* rbpool;
-
 #define csr8r(c, r)	(*((uchar *) ((c)->nic)+(r)))
 #define csr16r(c, r)	(*((u16int *)((c)->nic)+((r)/2)))
 #define csr32p(c, r)	((u32int *)  ((c)->nic)+((r)/4))
@@ -453,33 +450,7 @@
 	return 0;
 }
 
-static Block*
-rballoc(void)
-{
-	Block *bp;
-
-	ilock(&rblock);
-	if((bp = rbpool) != nil){
-		rbpool = bp->next;
-		bp->next = nil;
-	}
-	iunlock(&rblock);
-	return bp;
-}
-
 static void
-rbfree(Block *bp)
-{
-	bp->wp = bp->rp = bp->lim - Mps;
- 	bp->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
-
-	ilock(&rblock);
-	bp->next = rbpool;
-	rbpool = bp;
-	iunlock(&rblock);
-}
-
-static void
 rtl8169promiscuous(void* arg, int on)
 {
 	Ether *edev;
@@ -700,6 +671,7 @@
 	Block *bp;
 	Ctlr *ctlr;
 	D *d;
+	uvlong pa;
 
 	ctlr = edev->ctlr;
 	if (ctlr->nrd == 0) {
@@ -718,15 +690,14 @@
 			break;
 		}
 		if(ctlr->rb[rdt] == nil){
-			bp = rballoc();
-			if(bp == nil){
-				iprint("rtl8169: no available buffers\n");
+			bp = iallocbp(&ctlr->pool);
+			if(bp == nil)
 				break;
-			}
 			ctlr->rb[rdt] = bp;
-			d->addrhi = 0;
+			pa = PCIWADDR(bp->rp);
+			d->addrhi = pa >> 32;
 			coherence();
-			d->addrlo = PCIWADDR(bp->rp);
+			d->addrlo = pa;
 			coherence();
 		} else
 			iprint("8169: replenish: rx overrun\n");
@@ -1199,9 +1170,8 @@
 static void
 rtl8169attach(Ether* edev)
 {
-	int timeo, s, i;
+	int timeo, s;
 	char name[KNAMELEN];
-	Block *bp;
 	Ctlr *ctlr;
 
 	ctlr = edev->ctlr;
@@ -1233,13 +1203,9 @@
 	   ctlr->rb == nil || ctlr->dtcc == nil)
 		error(Enomem);
 
-	/* allocate private receive-buffer pool */
-	ctlr->nrb = Nrb;
-	for(i = 0; i < Nrb; i++){
-		if((bp = allocb(Mps)) == nil)
-			error(Enomem);
-		bp->free = rbfree;
-		freeb(bp);
+	if(ctlr->pool.size == 0){
+		ctlr->pool.size = Mps;
+		growbp(&ctlr->pool, Nrb);
 	}
 
 	rtl8169init(edev);
--- a/sys/src/9/xen/etherxen.c
+++ b/sys/src/9/xen/etherxen.c
@@ -236,18 +236,13 @@
 		vifrecv(ctlr, rx);
 		return 1;
 	}
-
 	ctlr->receives++;
-	memmove(bp->base, rx->page + rr->offset, len);
-	vifrecv(ctlr, rx);
-
-	bp->rp = bp->base;
-	bp->wp = bp->rp + len;
-	bp->free = 0;
-	bp->next = 0;
-	bp->list = 0;
 	if (rr->flags & NETRXF_data_validated)
 		bp->flag |= Btcpck|Budpck;
+	bp->rp = bp->base;
+	bp->wp = bp->rp + len;
+	memmove(bp->rp, rx->page + rr->offset, len);
+	vifrecv(ctlr, rx);
 	etheriq(ether, bp);
 	return 0;
 }
--