ref: 7e96542427bd8d6741eaa4efef3439a86e33ddf3
parent: 3258ec2db178f2df85ab3fe360b89bac608e3c04
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Sat Jun 28 12:27:13 EDT 2025
kernel: big virtual memory system overhaul The primary motivation here was to make the oom killer more robust and improve consistency in mtaintaining the invariants. make killbig() use canqlock(), just in case. get rid of mcountseg() and maintain a Segment.used and Segment.swapped count, making it now possible for killbig() to get memory usage data without needing to acquire the segment lock. Ensure that when calling putseg(), the original reference from Proc.seg[] does not point to it. Ensuse that anyone modifying Proc.seg[] must acquire Proc.seglock. Affected: segdetach(), pexit(), txt2data(), segio. This will help with debugging. Make data2txt() take a ref for Image.s when possible, handle release of original data segment in caller instead. Fix a A->B->A race with mfreeseg() in fault.c:^pio() function and make sure the txtflush bits are set *BEFORE* we add the page to the cache. Dont call procctl() in fault when up->nerrlab > 0, we have to error() out instead, and run the error handlers! Improve the per-architecture fault handlers, by having fault() handle the Porc.insyscall bit restoration and provide a helper faultnote() that let us avoid allocating a ERRMAX string on the stack, saving stack space. Try to free the pages of pte's in batches, avoiding palloc lock contention. For this, provide deadpage() function that just decrements refcount and returns non-nil Page when page can be freed. Then caller can chain the pages and build a freelist to be submitted to freepages() in a batch.
--- a/sys/src/9/arm64/trap.c
+++ b/sys/src/9/arm64/trap.c
@@ -328,21 +328,6 @@
}
}
-static void
-faultnote(Ureg *ureg, char *access, uintptr addr)
-{
- extern void checkpages(void);
- char buf[ERRMAX];
-
- if(!userureg(ureg)){
- dumpregs(ureg);
- panic("fault: %s addr=%#p", access, addr);
- }
- checkpages();
- snprint(buf, sizeof(buf), "sys: trap: fault %s addr=%#p", access, addr);
- postnote(up, 1, buf, NDebug);
-}
-
void
faultarm64(Ureg *ureg)
{
@@ -350,9 +335,7 @@
uintptr addr;
user = userureg(ureg);
- if(user)
- up->insyscall = 1;
- else {
+ if(!user){
extern void _peekinst(void);
if(ureg->pc == (uintptr)_peekinst){
@@ -395,12 +378,14 @@
case 61: // first level domain fault
case 62: // second level domain fault
default:
- faultnote(ureg, read? "read": "write", addr);
+ if(!user){
+ dumpregs(ureg);
+ panic("kernel fault: %s addr=%#p", read? "read": "write", addr);
+ }
+ faultnote("fault", read? "read": "write", addr);
}
- if(user)
- up->insyscall = 0;
- else
+ if(!user)
poperror();
}
--- a/sys/src/9/bcm/trap.c
+++ b/sys/src/9/bcm/trap.c
@@ -85,27 +85,13 @@
static void
faultarm(Ureg *ureg, uintptr va, int user, int read)
{
- int n, insyscall;
- char buf[ERRMAX];
-
- if(up == nil) {
- dumpregs(ureg);
- panic("fault: nil up in faultarm, accessing %#p", va);
- }
- insyscall = up->insyscall;
- up->insyscall = 1;
- n = fault(va, ureg->pc, read);
- if(n < 0){
+ if(fault(va, ureg->pc, read) < 0){
if(!user){
dumpregs(ureg);
- panic("fault: kernel accessing %#p", va);
+ panic("kernel fault: %s addr=%#p", read? "read": "write", va);
}
- /* don't dump registers; programs suicide all the time */
- snprint(buf, sizeof buf, "sys: trap: fault %s va=%#p",
- read? "read": "write", va);
- postnote(up, 1, buf, NDebug);
+ faultnote("fault", read? "read": "write", va);
}
- up->insyscall = insyscall;
}
/*
--- a/sys/src/9/cycv/trap.c
+++ b/sys/src/9/cycv/trap.c
@@ -70,25 +70,17 @@
static void
faultarm(Ureg *ureg, ulong fsr, uintptr addr)
{
- int user, insyscall, read;
- static char buf[ERRMAX];
- char *err;
+ int read;
+ char *type;
- read = (fsr & (1<<11)) == 0;
- user = userureg(ureg);
- if(!user){
- if(addr >= USTKTOP || up == nil)
- _dumpstack(ureg);
- if(addr >= USTKTOP)
+ if(!userureg(ureg)){
+ if(addr >= USTKTOP){
+ dumpregs(ureg);
panic("kernel fault: bad address pc=%#.8lux addr=%#.8lux fsr=%#.8lux", ureg->pc, addr, fsr);
- if(up == nil)
- panic("kernel fault: no user process pc=%#.8lux addr=%#.8lux fsr=%#.8lux", ureg->pc, addr, fsr);
+ }
}
- if(up == nil)
- panic("user fault: up=nil pc=%#.8lux addr=%#.8lux fsr=%#.8lux", ureg->pc, addr, fsr);
- insyscall = up->insyscall;
- up->insyscall = 1;
+ read = (fsr & (1<<11)) == 0;
switch(fsr & 0x1F){
case 0x05: /* translation fault L1 */
case 0x07: /* translation fault L2 */
@@ -102,18 +94,15 @@
break;
/* wet floor */
default:
- err = faulterr[fsr & 0x1F];
- if(err == nil)
- err = "fault";
- if(!user){
+ type = faulterr[fsr & 0x1F];
+ if(type == nil)
+ type = "fault";
+ if(!userureg(ureg)){
dumpregs(ureg);
- _dumpstack(ureg);
- panic("kernel %s: pc=%#.8lux addr=%#.8lux fsr=%#.8lux", err, ureg->pc, addr, fsr);
+ panic("kernel %s: pc=%#.8lux addr=%#.8lux fsr=%#.8lux", type, ureg->pc, addr, fsr);
}
- sprint(buf, "sys: trap: %s %s addr=%#.8lux", err, read ? "read" : "write", addr);
- postnote(up, 1, buf, NDebug);
+ faultnote(type, read? "read": "write", addr);
}
- up->insyscall = insyscall;
}
static void
--- a/sys/src/9/kw/trap.c
+++ b/sys/src/9/kw/trap.c
@@ -322,27 +322,13 @@
static void
faultarm(Ureg *ureg, uintptr va, int user, int read)
{
- int n, insyscall;
- char buf[ERRMAX];
-
- if(up == nil) {
- dumpregs(ureg);
- panic("fault: nil up in faultarm, accessing %#p", va);
- }
- insyscall = up->insyscall;
- up->insyscall = 1;
- n = fault(va, ureg->pc, read);
- if(n < 0){
+ if(fault(va, ureg->pc, read) < 0){
if(!user){
dumpregs(ureg);
- panic("fault: kernel accessing %#p", va);
+ panic("kernel fault: %s addr=%#p", read? "read": "write", va);
}
- /* don't dump registers; programs suicide all the time */
- snprint(buf, sizeof buf, "sys: trap: fault %s va=%#p",
- read? "read": "write", va);
- postnote(up, 1, buf, NDebug);
+ faultnote("fault", read? "read": "write", va);
}
- up->insyscall = insyscall;
}
/*
--- a/sys/src/9/mt7688/faultmips.c
+++ b/sys/src/9/mt7688/faultmips.c
@@ -186,27 +186,17 @@
faultmips(Ureg *ur, int user, int code)
{
int read;
- ulong addr;
- char *p, buf[ERRMAX];
- addr = ur->badvaddr;
- addr &= ~(BY2PG-1);
-
read = !(code==CTLBM || code==CTLBS);
// iprint("fault: %s code %d va %#p pc %#p r31 %#lux tlbvirt %#lux\n", up->text, code, ur->badvaddr, ur->pc, ur->r31, tlbvirt());
// delay(20);
- if (Debug && ckfaultstuck(ur, read, code) || fault(addr, ur->pc, read) == 0)
+ if (Debug && ckfaultstuck(ur, read, code) || fault(ur->badvaddr, ur->pc, read) == 0)
return;
if(user) {
- p = "store";
- if(read)
- p = "load";
- snprint(buf, sizeof buf, "sys: trap: fault %s addr=%#lux r31=%#lux",
- p, ur->badvaddr, ur->r31);
- postnote(up, 1, buf, NDebug);
+ faultnote("fault", read? "read": "write", ur->badvaddr);
return;
}
--- a/sys/src/9/mt7688/trap.c
+++ b/sys/src/9/mt7688/trap.c
@@ -146,7 +146,7 @@
void
trap(Ureg *ur)
{
- int ecode, user, cop, x, fpchk;
+ int ecode, user, cop, fpchk;
ulong fpfcr31;
char buf[2*ERRMAX], buf1[ERRMAX], *fpexcep;
static int dumps;
@@ -172,11 +172,8 @@
kfault(ur);
break;
}
- x = up->insyscall;
- up->insyscall = 1;
spllo();
faultmips(ur, user, ecode);
- up->insyscall = x;
break;
case CVCEI:
--- a/sys/src/9/mtx/trap.c
+++ b/sys/src/9/mtx/trap.c
@@ -308,22 +308,13 @@
void
faultpower(Ureg *ureg, ulong addr, int read)
{
- int user, insyscall, n;
- char buf[ERRMAX];
-
- user = (ureg->srr1 & MSR_PR) != 0;
- insyscall = up->insyscall;
- up->insyscall = 1;
- n = fault(addr, ureg->pc, read);
- if(n < 0){
- if(!user){
+ if(fault(addr, ureg->pc, read) < 0){
+ if(!userureg(ureg)){
dumpregs(ureg);
- panic("fault: 0x%lux", addr);
+ panic("kernel fault: 0x%lux", addr);
}
- sprint(buf, "sys: trap: fault %s addr=0x%lux", read? "read" : "write", addr);
- postnote(up, 1, buf, NDebug);
+ faultnote("fault", read? "read": "write", addr);
}
- up->insyscall = insyscall;
}
void
--- a/sys/src/9/omap/trap.c
+++ b/sys/src/9/omap/trap.c
@@ -324,27 +324,13 @@
static void
faultarm(Ureg *ureg, uintptr va, int user, int read)
{
- int n, insyscall;
- char buf[ERRMAX];
-
- if(up == nil) {
- dumpregs(ureg);
- panic("fault: nil up in faultarm, accessing %#p", va);
- }
- insyscall = up->insyscall;
- up->insyscall = 1;
- n = fault(va, ureg->pc, read);
- if(n < 0){
+ if(fault(va, ureg->pc, read) < 0){
if(!user){
dumpregs(ureg);
- panic("fault: kernel accessing %#p", va);
+ panic("kernel fault: %s addr=%#p", read? "read": "write", va);
}
- /* don't dump registers; programs suicide all the time */
- snprint(buf, sizeof buf, "sys: trap: fault %s va=%#p",
- read? "read": "write", va);
- postnote(up, 1, buf, NDebug);
+ faultnote("fault", read? "read": "write", va);
}
- up->insyscall = insyscall;
}
/*
--- a/sys/src/9/pc/trap.c
+++ b/sys/src/9/pc/trap.c
@@ -399,20 +399,16 @@
print("unexpected trap %lud; ignoring\n", ureg->trap);
}
-extern void checkpages(void);
-extern void checkfault(ulong, ulong);
static void
fault386(Ureg* ureg, void*)
{
ulong addr;
- int read, user, n, insyscall;
- char buf[ERRMAX];
+ int read;
addr = getcr2();
read = !(ureg->ecode & 2);
- user = userureg(ureg);
- if(!user){
+ if(!userureg(ureg)){
if(vmapsync(addr))
return;
{
@@ -424,27 +420,15 @@
}
if(addr >= USTKTOP)
panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
- if(up == nil)
- panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
}
- if(up == nil)
- panic("user fault: up=0 pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
- insyscall = up->insyscall;
- up->insyscall = 1;
- n = fault(addr, ureg->pc, read);
- if(n < 0){
- if(!user){
+ if(fault(addr, ureg->pc, read) < 0){
+ if(!userureg(ureg)){
dumpregs(ureg);
- panic("fault: 0x%lux", addr);
+ panic("kernel fault: %s addr=%#p", read? "read": "write", addr);
}
- checkpages();
- checkfault(addr, ureg->pc);
- sprint(buf, "sys: trap: fault %s addr=0x%lux",
- read ? "read" : "write", addr);
- postnote(up, 1, buf, NDebug);
+ faultnote("fault", read? "read": "write", addr);
}
- up->insyscall = insyscall;
}
/*
--- a/sys/src/9/pc64/trap.c
+++ b/sys/src/9/pc64/trap.c
@@ -366,21 +366,6 @@
}
static void
-faultnote(Ureg *ureg, char *access, uintptr addr)
-{
- extern void checkpages(void);
- char buf[ERRMAX];
-
- if(!userureg(ureg)){
- dumpregs(ureg);
- panic("fault: %s addr=%#p", access, addr);
- }
- checkpages();
- snprint(buf, sizeof(buf), "sys: trap: fault %s addr=%#p", access, addr);
- postnote(up, 1, buf, NDebug);
-}
-
-static void
faultamd64(Ureg* ureg, void*)
{
uintptr addr;
@@ -389,9 +374,7 @@
addr = getcr2();
read = !(ureg->error & 2);
user = userureg(ureg);
- if(user)
- up->insyscall = 1;
- else {
+ if(!user){
extern void _peekinst(void);
if((void(*)(void))ureg->pc == _peekinst){
@@ -415,12 +398,15 @@
}
}
- if(fault(addr, ureg->pc, read))
- faultnote(ureg, read? "read": "write", addr);
+ if(fault(addr, ureg->pc, read) < 0){
+ if(!user){
+ dumpregs(ureg);
+ panic("kernel fault: %s addr=%#p", read? "read": "write", addr);
+ }
+ faultnote("fault", read? "read": "write", addr);
+ }
- if(user)
- up->insyscall = 0;
- else
+ if(!user)
poperror();
}
--- a/sys/src/9/port/cache.c
+++ b/sys/src/9/port/cache.c
@@ -51,7 +51,12 @@
Mntcache *hash[NHASH];
};
-Image fscache;
+Image fscache = {
+ {
+ .ref = 1,
+ },
+ .notext = 1,
+};
static Cache cache;
@@ -77,8 +82,6 @@
cache.tail = m;
cache.tail->next = nil;
cache.head->prev = nil;
-
- fscache.notext = 1;
}
static uintptr
--- a/sys/src/9/port/devproc.c
+++ b/sys/src/9/port/devproc.c
@@ -1644,9 +1644,6 @@
Segment *s;
int i;
- s = seg(p, offset, 0);
- if(s == nil)
- error(Ebadarg);
eqlock(&p->seglock);
if(waserror()) {
qunlock(&p->seglock);
@@ -1654,7 +1651,13 @@
}
if(p->state <= New || p->pid != PID(c->qid))
error(Eprocdied);
-
+ s = seg(p, offset, 1);
+ if(s == nil)
+ error(Ebadarg);
+ if(waserror()){
+ qunlock(s);
+ nexterror();
+ }
for(i = 0; i < NSEG; i++) {
if(p->seg[i] == s)
break;
@@ -1661,27 +1664,21 @@
}
if(i == NSEG)
error(Egreg); /* segment gone */
-
- eqlock(s);
- if(waserror()){
+ if(!read && (s->type&SG_TYPE) == SG_TEXT) {
+ p->seg[i] = txt2data(s);
qunlock(s);
- nexterror();
+ putseg(s);
+ s = p->seg[i];
+ } else {
+ qunlock(s);
}
- if(!read && (s->type&SG_TYPE) == SG_TEXT) {
- s = txt2data(s);
- p->seg[i] = s;
- }
- offset -= s->base;
incref(s); /* for us while we copy */
- qunlock(s);
poperror();
-
sio = c->aux;
if(sio == nil){
sio = smalloc(sizeof(Segio));
c->aux = sio;
}
-
qunlock(&p->seglock);
poperror();
@@ -1689,6 +1686,7 @@
putseg(s);
nexterror();
}
+ offset -= s->base;
n = segio(sio, s, a, n, offset, read);
putseg(s);
poperror();
--- a/sys/src/9/port/devswap.c
+++ b/sys/src/9/port/devswap.c
@@ -11,10 +11,13 @@
static int canflush(Proc*, Segment*);
static void executeio(void);
static void pageout(Proc*, Segment*);
-static void pagepte(int, Page**);
+static void pagepte(Segment*, Page**);
static void pager(void*);
Image swapimage = {
+ {
+ .ref = 1
+ },
.notext = 1,
};
@@ -193,11 +196,7 @@
for(i = 0; i < NSEG; i++) {
if((s = p->seg[i]) != nil) {
switch(s->type&SG_TYPE) {
- default:
- break;
case SG_TEXT:
- pageout(p, s);
- break;
case SG_DATA:
case SG_BSS:
case SG_STACK:
@@ -219,7 +218,7 @@
static void
pageout(Proc *p, Segment *s)
{
- int type, i, size;
+ int i;
short age;
Pte *l;
Page **pg, *entry;
@@ -235,9 +234,7 @@
}
/* Pass through the pte tables looking for memory pages to swap out */
- type = s->type&SG_TYPE;
- size = s->mapsize;
- for(i = 0; i < size; i++) {
+ for(i = 0; i < s->mapsize; i++) {
l = s->map[i];
if(l == nil)
continue;
@@ -253,11 +250,11 @@
age = (short)(ageclock - entry->refage);
if(age < 16)
continue;
- pagepte(type, pg);
+ pagepte(s, pg);
}
}
- poperror();
qunlock(s);
+ poperror();
putseg(s);
}
@@ -286,16 +283,17 @@
}
static void
-pagepte(int type, Page **pg)
+pagepte(Segment *s, Page **pg)
{
uintptr daddr;
Page *outp;
outp = *pg;
- switch(type) {
+ switch(s->type & SG_TYPE) {
case SG_TEXT: /* Revert to demand load */
- putpage(outp);
*pg = nil;
+ s->used--;
+ putpage(outp);
break;
case SG_DATA:
@@ -327,6 +325,7 @@
outp->daddr = daddr;
cachepage(outp, &swapimage);
*pg = (Page*)(daddr|PG_ONSWAP);
+ s->swapped++;
/* Add page to IO transaction list */
iolist[ioptr++] = outp;
--- a/sys/src/9/port/fault.c
+++ b/sys/src/9/port/fault.c
@@ -23,6 +23,16 @@
pexit(s, 1);
}
+void
+faultnote(char *type, char *access, uintptr addr)
+{
+ char buf[ERRMAX];
+
+ checkpages();
+ snprint(buf, sizeof(buf), "sys: trap: %s %s addr=%#p", type, access, addr);
+ postnote(up, 1, buf, NDebug);
+}
+
static void
pio(Segment *s, uintptr addr, uintptr soff, Page **p)
{
@@ -33,18 +43,20 @@
char *kaddr;
uintptr daddr;
Page *loadrec;
+ Image *image;
retry:
loadrec = *p;
if(loadrec == nil) { /* from a text/data image */
daddr = s->fstart+soff;
- new = lookpage(s->image, daddr);
+ image = s->image;
+ new = lookpage(image, daddr);
if(new != nil) {
*p = new;
+ s->used++;
return;
}
- c = s->image->c;
ask = BY2PG;
if(soff >= s->flen)
ask = 0;
@@ -53,14 +65,15 @@
}
else { /* from a swap image */
daddr = swapaddr(loadrec);
- new = lookpage(&swapimage, daddr);
+ image = &swapimage;
+ new = lookpage(image, daddr);
if(new != nil) {
- putswap(loadrec);
*p = new;
+ s->swapped--;
+ putswap(loadrec);
return;
}
- c = swapimage.c;
ask = BY2PG;
}
qunlock(s);
@@ -67,7 +80,7 @@
new = newpage(0, 0, addr);
k = kmap(new);
- kaddr = (char*)VA(k);
+ c = image->c;
while(waserror()) {
if(strcmp(up->errstr, Eintr) == 0)
continue;
@@ -75,6 +88,7 @@
putpage(new);
faulterror(Eioload, c);
}
+ kaddr = (char*)VA(k);
n = devtab[c->type]->read(c, kaddr, ask, daddr);
if(n != ask)
error(Eshort);
@@ -84,58 +98,39 @@
kunmap(k);
qlock(s);
- if(loadrec == nil) { /* This is demand load */
- /*
- * race, another proc may have gotten here first while
- * s was unlocked
- */
- if(*p == nil) {
- /*
- * check page cache again after i/o to reduce double caching
- */
- *p = lookpage(s->image, daddr);
- if(*p == nil) {
- incref(new);
- new->daddr = daddr;
- cachepage(new, s->image);
- *p = new;
- }
- }
+ /*
+ * race, another proc may have gotten here first
+ * (and the pager may have run on that page) while
+ * s was unlocked
+ */
+ if(*p != loadrec) {
+ putpage(new);
+
+ /* another process did it for me */
+ if(!pagedout(*p))
+ return;
+
+ /* another process or the pager got in */
+ goto retry;
}
- else { /* This is paged out */
- /*
- * race, another proc may have gotten here first
- * (and the pager may have run on that page) while
- * s was unlocked
- */
- if(*p != loadrec) {
- if(!pagedout(*p)) {
- /* another process did it for me */
- goto done;
- } else if(*p != nil) {
- /* another process and the pager got in */
- putpage(new);
- goto retry;
- } else {
- /* another process segfreed the page */
- incref(new);
- k = kmap(new);
- memset((void*)VA(k), 0, ask);
- kunmap(k);
- *p = new;
- goto done;
- }
- }
- incref(new);
+ /*
+ * check the cache again to avoid double caching.
+ */
+ if((*p = lookpage(image, daddr)) != nil)
+ putpage(new);
+ else {
new->daddr = daddr;
- cachepage(new, &swapimage);
+ settxtflush(new, s->flushme);
+ cachepage(new, image);
*p = new;
+ }
+ if(loadrec == nil)
+ s->used++;
+ else {
+ s->swapped--;
putswap(loadrec);
}
-done:
- putpage(new);
- settxtflush(*p, s->flushme);
}
static int
@@ -177,6 +172,7 @@
if(s == nil)
return -1;
*pg = new;
+ s->used++;
}
/* wet floor */
case SG_DATA: /* Demand load/pagein/copy on write */
@@ -200,9 +196,10 @@
new = newpage(0, &s, addr);
if(s == nil)
return -1;
+ copypage(old, new);
settxtflush(new, s->flushme);
*pg = new;
- copypage(old, *pg);
+ /* s->used count unchanged */
putpage(old);
}
/* wet floor */
@@ -272,10 +269,11 @@
{
Segment *s;
char *sps;
- int pnd, attr;
+ int pnd, ins, attr;
if(up == nil)
- panic("fault: nil up");
+ panic("fault: no user process pc=%#p addr=%#p", pc, addr);
+
if(up->nlocks){
Lock *l = up->lastlock;
print("fault: nlocks %d, proc %lud %s, addr %#p, lock %#p, lpc %#p\n",
@@ -283,10 +281,13 @@
}
pnd = up->notepending;
+ ins = up->insyscall;
+ up->insyscall = 1;
sps = up->psstate;
up->psstate = "Fault";
m->pfault++;
+
for(;;) {
spllo();
@@ -293,6 +294,7 @@
s = seg(up, addr, 1); /* leaves s locked if seg != nil */
if(s == nil) {
up->psstate = sps;
+ up->insyscall = ins;
return -1;
}
@@ -305,6 +307,7 @@
(attr & SG_RONLY) != 0) {
qunlock(s);
up->psstate = sps;
+ up->insyscall = ins;
if(up->kp && up->nerrlab) /* for segio */
error(Eio);
return -1;
@@ -322,11 +325,21 @@
switch(up->procctl){
case Proc_exitme:
case Proc_exitbig:
+ if(up->nerrlab){
+ up->psstate = sps;
+ up->insyscall = ins;
+ up->notepending |= pnd;
+ error(up->procctl==Proc_exitbig?
+ "Killed: Insufficient physical memory":
+ "Killed");
+ }
procctl();
+ break;
}
}
up->psstate = sps;
+ up->insyscall = ins;
up->notepending |= pnd;
return 0;
--- a/sys/src/9/port/page.c
+++ b/sys/src/9/port/page.c
@@ -119,8 +119,6 @@
unlock(i);
return 0;
}
- incref(i);
-
np = 0;
fh = ft = nil;
for(h = i->pghash; h < &i->pghash[PGHSIZE]; h++){
@@ -147,15 +145,15 @@
ft = p;
np++;
+ if(--i->pgref == 0){
+ putimage(i);
+ goto Done;
+ }
decref(i);
- if(--i->pgref == 0)
- break;
}
- putimage(i);
-
- if(np > 0)
- freepages(fh, ft, np);
-
+ unlock(i);
+Done:
+ freepages(fh, ft, np);
return np;
}
@@ -234,18 +232,27 @@
return p;
}
-void
-putpage(Page *p)
+/*
+ * deadpage() decrements the page refcount
+ * and returns the page when it becomes freeable.
+ */
+Page*
+deadpage(Page *p)
{
- if(onswap(p)) {
- putswap(p);
- return;
- }
- if(p->image != nil) {
+ if(p->image != nil){
decref(p);
- return;
+ return nil;
}
- if(decref(p) == 0)
+ if(decref(p) != 0)
+ return nil;
+ return p;
+}
+
+void
+putpage(Page *p)
+{
+ p = deadpage(p);
+ if(p != nil)
freepages(p, p, 1);
}
@@ -355,19 +362,19 @@
ptecpy(Pte *old)
{
Pte *new;
- Page **src, **dst;
+ Page **src, **dst, *entry;
new = ptealloc();
dst = &new->pages[old->first-old->pages];
new->first = dst;
for(src = old->first; src <= old->last; src++, dst++)
- if(*src != nil) {
- if(onswap(*src))
- dupswap(*src);
+ if((entry = *src) != nil) {
+ if(onswap(entry))
+ dupswap(entry);
else
- incref(*src);
+ incref(entry);
new->last = dst;
- *dst = *src;
+ *dst = entry;
}
return new;
@@ -387,15 +394,33 @@
void
freepte(Segment*, Pte *p)
{
- Page **pg, **pe;
+ Page **pg, **pe, *entry;
+ Page *fh, *ft;
+ ulong np;
+ np = 0;
+ fh = ft = nil;
pg = p->first;
pe = p->last;
while(pg <= pe){
- if(*pg != nil)
- putpage(*pg);
+ if((entry = *pg) != nil){
+ if(onswap(entry))
+ putswap(entry);
+ else {
+ entry = deadpage(entry);
+ if(entry != nil){
+ if(fh != nil)
+ ft->next = entry;
+ else
+ fh = entry;
+ ft = entry;
+ np++;
+ }
+ }
+ }
pg++;
}
+ freepages(fh, ft, np);
free(p);
}
--- a/sys/src/9/port/portdat.h
+++ b/sys/src/9/port/portdat.h
@@ -429,20 +429,25 @@
Ref;
QLock;
int type; /* segment type */
+ ulong size; /* size in pages */
+
uintptr base; /* virtual base */
uintptr top; /* virtual top */
- ulong size; /* size in pages */
uintptr fstart; /* start address in file for demand load */
uintptr flen; /* length of segment in file */
+
int flushme; /* maintain icache for this segment */
Image *image; /* text in file attached to this segment */
Physseg *pseg;
- ulong* profile; /* Tick profile area */
+ ulong *profile; /* Tick profile area */
Pte **map;
int mapsize;
Pte *ssegmap[SSEGMAPSIZE];
+
+ ulong used; /* pages used (swapped or not) */
+ ulong swapped; /* pages swapped */
+
Sema sema;
- ulong mark; /* portcountrefs */
};
struct Segio
--- a/sys/src/9/port/portfns.h
+++ b/sys/src/9/port/portfns.h
@@ -27,6 +27,7 @@
void chandevshutdown(void);
void chanfree(Chan*);
void checkalarms(void);
+void checkpages(void);
void checkb(Block*, char*);
void cinit(void);
Chan* cclone(Chan*);
@@ -55,6 +56,7 @@
void cupdate(Chan*, uchar*, int, vlong);
void cwrite(Chan*, uchar*, int, vlong);
uintptr dbgpc(Proc*);
+Page* deadpage(Page*);
long decref(Ref*);
int decrypt(void*, void*, int);
void delay(int);
@@ -111,6 +113,7 @@
uvlong fastticks2ns(uvlong);
uvlong fastticks2us(uvlong);
int fault(uintptr, uintptr, int);
+void faultnote(char*, char*, uintptr);
void fdclose(int, int);
Chan* fdtochan(int, int, int, int);
int findmount(Chan**, Mhead**, int, int, Qid);
@@ -191,7 +194,6 @@
void memmapadd(uvlong, uvlong, ulong);
uvlong memmapalloc(uvlong, uvlong, uvlong, ulong);
void memmapfree(uvlong, uvlong, ulong);
-ulong mcountseg(Segment*);
void mfreeseg(Segment*, uintptr, ulong);
void microdelay(int);
uvlong mk64fract(uvlong, uvlong);
--- a/sys/src/9/port/proc.c
+++ b/sys/src/9/port/proc.c
@@ -1250,7 +1250,6 @@
pexit(char *exitstr, int freemem)
{
Proc *p;
- Segment **s;
ulong utime, stime;
Waitq *wq;
Fgrp *fgrp;
@@ -1259,6 +1258,8 @@
Pgrp *pgrp;
Chan *dot;
void (*pt)(Proc*, int, vlong);
+ Segment *s;
+ int i;
up->alarm = 0;
timerdel(up);
@@ -1382,10 +1383,11 @@
qunlock(&up->debug);
qlock(&up->seglock);
- for(s = up->seg; s < &up->seg[NSEG]; s++) {
- if(*s != nil) {
- putseg(*s);
- *s = nil;
+ for(i = 0; i < NSEG; i++){
+ s = up->seg[i];
+ if(s != nil){
+ up->seg[i] = nil;
+ putseg(s);
}
}
qunlock(&up->seglock);
@@ -1724,22 +1726,11 @@
ulong pages;
int i;
- eqlock(&p->seglock);
- if(waserror()){
- qunlock(&p->seglock);
- nexterror();
- }
pages = 0;
for(i=0; i<NSEG; i++){
- if((s = p->seg[i]) != nil){
- eqlock(s);
- pages += mcountseg(s);
- qunlock(s);
- }
+ if((s = p->seg[i]) != nil)
+ pages += s->used;
}
- qunlock(&p->seglock);
- poperror();
-
return pages;
}
@@ -1766,26 +1757,26 @@
}
if(kp == nil)
return;
- qlock(&kp->debug);
- if(kp->pid == 0 || kp->procctl == Proc_exitbig){
+ if(!canqlock(&kp->debug))
+ return;
+ if(!canqlock(&kp->seglock)){
qunlock(&kp->debug);
return;
}
- qlock(&kp->seglock);
s = kp->seg[BSEG];
+ if(kp->procctl != Proc_exitbig)
+ killproc(kp, Proc_exitbig);
+ qunlock(&kp->debug);
if(s != nil && s->ref > 1){
for(i = 0; (p = proctab(i)) != nil; i++) {
- if(p == kp || p->state <= New || p->kp)
+ if(p == kp || !matchseg(p, s) || !canqlock(&p->debug))
continue;
- qlock(&p->debug);
- if(p->pid != 0 && matchseg(p, s))
+ if(p->procctl != Proc_exitbig)
killproc(p, Proc_exitbig);
qunlock(&p->debug);
}
}
qunlock(&kp->seglock);
- killproc(kp, Proc_exitbig);
- qunlock(&kp->debug);
}
/*
--- a/sys/src/9/port/segment.c
+++ b/sys/src/9/port/segment.c
@@ -58,9 +58,10 @@
error(Enomem);
s->ref = 1;
s->type = type;
+ s->size = size;
s->base = base;
s->top = base+(size*BY2PG);
- s->size = size;
+ s->used = s->swapped = 0;
s->sema.prev = &s->sema;
s->sema.next = &s->sema;
@@ -113,10 +114,10 @@
if(s->mapsize > 0){
emap = &s->map[s->mapsize];
- for(pte = s->map; pte < emap; pte++)
+ for(pte = s->map; pte < emap; pte++){
if(*pte != nil)
freepte(s, *pte);
-
+ }
if(s->map != s->ssegmap)
free(s->map);
}
@@ -148,13 +149,11 @@
Segment*
dupseg(Segment **seg, int segno, int share)
{
- int i, size;
+ int i;
Pte *pte;
Segment *n, *s;
- SET(n);
s = seg[segno];
-
qlock(s);
if(waserror()){
qunlock(s);
@@ -166,6 +165,7 @@
case SG_PHYSICAL:
case SG_FIXED:
case SG_STICKY:
+ default:
goto sameseg;
case SG_STACK:
@@ -185,22 +185,20 @@
poperror();
return n;
}
-
if(share)
goto sameseg;
n = newseg(s->type, s->base, s->size);
-
- incref(s->image);
n->image = s->image;
n->fstart = s->fstart;
n->flen = s->flen;
+ incref(s->image);
break;
}
- size = s->mapsize;
- for(i = 0; i < size; i++)
+ for(i = 0; i < s->mapsize; i++)
if((pte = s->map[i]) != nil)
n->map[i] = ptecpy(pte);
-
+ n->used = s->used;
+ n->swapped = s->swapped;
n->flushme = s->flushme;
if(s->ref > 1)
procflushseg(s);
@@ -230,9 +228,10 @@
pte = &s->map[soff/PTEMAPMEM];
if((etp = *pte) == nil)
*pte = etp = ptealloc();
-
pg = &etp->pages[(soff&(PTEMAPMEM-1))/BY2PG];
+ assert(*pg == nil);
*pg = p;
+ s->used++;
if(pg < etp->first)
etp->first = pg;
if(pg > etp->last)
@@ -284,7 +283,6 @@
i->c = c;
incref(c);
}
-
return i;
}
@@ -461,29 +459,6 @@
}
/*
- * called with s locked
- */
-ulong
-mcountseg(Segment *s)
-{
- Pte **pte, **emap;
- Page **pg, **pe;
- ulong pages;
-
- pages = 0;
- emap = &s->map[s->mapsize];
- for(pte = s->map; pte < emap; pte++){
- if(*pte == nil)
- continue;
- pe = (*pte)->last;
- for(pg = (*pte)->first; pg <= pe; pg++)
- if(!pagedout(*pg))
- pages++;
- }
- return pages;
-}
-
-/*
* Must be called with s locked.
* This relies on s->ref > 1 indicating that
* the segment is shared with other processes
@@ -497,7 +472,7 @@
{
uintptr off;
Pte **pte, **emap;
- Page **pg, **pe;
+ Page **pg, **pe, *entry;
if(pages == 0)
return;
@@ -529,9 +504,14 @@
}
pg = &(*pte)->pages[off];
for(pe = &(*pte)->pages[PTEPERTAB]; pg < pe; pg++) {
- if(*pg != nil){
- putpage(*pg);
+ if((entry = *pg) != nil){
*pg = nil;
+ if(onswap(entry)){
+ putswap(entry);
+ s->swapped--;
+ } else
+ putpage(entry);
+ s->used--;
}
if(--pages == 0)
return;
@@ -605,6 +585,12 @@
if(va != 0 && va >= USTKTOP)
error(Ebadarg);
+ qlock(&up->seglock);
+ if(waserror()){
+ qunlock(&up->seglock);
+ nexterror();
+ }
+
for(sno = 0; sno < NSEG; sno++)
if(up->seg[sno] == nil && sno != ESEG)
break;
@@ -619,12 +605,14 @@
if(_globalsegattach != nil){
s = (*_globalsegattach)(name);
if(s != nil){
- if(isoverlap(s->base, s->top - s->base) != nil){
+ va = s->base;
+ len = s->top - va;
+ if(isoverlap(va, len) != nil){
putseg(s);
error(Esoverlap);
}
up->seg[sno] = s;
- return s->base;
+ goto done;
}
}
@@ -675,6 +663,9 @@
s = newseg(attr, va, len/BY2PG);
s->pseg = ps;
up->seg[sno] = s;
+done:
+ qunlock(&up->seglock);
+ poperror();
return va;
}
@@ -747,7 +738,6 @@
s = up->seg[TSEG];
if(s == nil || s->profile == nil)
return;
-
s->profile[0] += TK2MS(1);
if(pc >= s->base && pc < s->top) {
pc -= s->base;
@@ -762,13 +752,10 @@
ps = newseg(SG_DATA, s->base, s->size);
ps->image = s->image;
- incref(ps->image);
ps->fstart = s->fstart;
ps->flen = s->flen;
ps->flushme = 1;
- qunlock(s);
- putseg(s);
- qlock(ps);
+ incref(s->image);
return ps;
}
@@ -776,13 +763,30 @@
data2txt(Segment *s)
{
Segment *ps;
+ Image *i;
+ i = s->image;
+ lock(i);
+ if((ps = i->s) != nil && ps->flen == s->flen){
+ assert(ps->image == i);
+ incref(ps);
+ unlock(i);
+ return ps;
+ }
+ if(waserror()){
+ unlock(i);
+ nexterror();
+ }
ps = newseg(SG_TEXT | SG_RONLY, s->base, s->size);
- ps->image = s->image;
- incref(ps->image);
+ ps->image = i;
ps->fstart = s->fstart;
ps->flen = s->flen;
ps->flushme = 1;
+ if(i->s == nil)
+ i->s = ps;
+ incref(i);
+ unlock(i);
+ poperror();
return ps;
}
@@ -832,15 +836,16 @@
int done;
int sno;
+ qlock(&up->seglock);
for(sno = 0; sno < NSEG; sno++)
if(up->seg[sno] == nil && sno != ESEG)
break;
if(sno == NSEG)
panic("segmentkproc");
-
sio->p = up;
incref(sio->s);
up->seg[sno] = sio->s;
+ qunlock(&up->seglock);
while(waserror())
;
@@ -850,9 +855,13 @@
sio->err = up->errstr;
else {
if(sio->s != nil && up->seg[sno] != sio->s){
- putseg(up->seg[sno]);
+ Segment *tmp;
+ qlock(&up->seglock);
incref(sio->s);
+ tmp = up->seg[sno];
up->seg[sno] = sio->s;
+ putseg(tmp);
+ qunlock(&up->seglock);
flushmmu();
}
switch(sio->cmd){
--- a/sys/src/9/port/sysproc.c
+++ b/sys/src/9/port/sysproc.c
@@ -567,16 +567,15 @@
}
/* Text. Shared. */
+ assert(ts->ref > 0);
up->seg[TSEG] = ts;
/* Data. Shared. */
s = newseg(SG_DATA, t, (d-t)>>PGSHIFT);
-
- /* Attached by hand */
- incref(img);
s->image = img;
s->fstart = text;
s->flen = data;
+ incref(img);
up->seg[DSEG] = s;
/* BSS. Zero fill on demand */
--- a/sys/src/9/ppc/trap.c
+++ b/sys/src/9/ppc/trap.c
@@ -289,22 +289,13 @@
void
faultpower(Ureg *ureg, ulong addr, int read)
{
- int user, insyscall, n;
- char buf[ERRMAX];
-
- user = (ureg->srr1 & MSR_PR) != 0;
- insyscall = up->insyscall;
- up->insyscall = 1;
- n = fault(addr, ureg->pc, read);
- if(n < 0){
- if(!user){
+ if(fault(addr, ureg->pc, read) < 0){
+ if(!userureg(ureg)){
dumpregs(ureg);
- panic("fault: 0x%lux", addr);
+ panic("kernel fault: 0x%lux", addr);
}
- sprint(buf, "sys: trap: fault %s addr=0x%lux", read? "read" : "write", addr);
- postnote(up, 1, buf, NDebug);
+ faultnote("fault", read? "read": "write", addr);
}
- up->insyscall = insyscall;
}
void
--- a/sys/src/9/sgi/faultmips.c
+++ b/sys/src/9/sgi/faultmips.c
@@ -185,27 +185,17 @@
faultmips(Ureg *ur, int user, int code)
{
int read;
- ulong addr;
- char *p, buf[ERRMAX];
- addr = ur->badvaddr;
- addr &= ~(BY2PG-1);
-
read = !(code==CTLBM || code==CTLBS);
/* print("fault: %s code %d va %#p pc %#p r31 %#lux tlbvirt %#lux\n",
up->text, code, ur->badvaddr, ur->pc, ur->r31, tlbvirt());/**/
- if (Debug && ckfaultstuck(ur, read, code) || fault(addr, ur->pc, read) == 0)
+ if (Debug && ckfaultstuck(ur, read, code) || fault(ur->badvaddr, ur->pc, read) == 0)
return;
if(user) {
- p = "store";
- if(read)
- p = "load";
- snprint(buf, sizeof buf, "sys: trap: fault %s addr=%#lux r31=%#lux",
- p, ur->badvaddr, ur->r31);
- postnote(up, 1, buf, NDebug);
+ faultnote("fault", read? "read": "write", ur->badvaddr);
return;
}
--- a/sys/src/9/sgi/trap.c
+++ b/sys/src/9/sgi/trap.c
@@ -154,7 +154,7 @@
void
trap(Ureg *ur)
{
- int ecode, user, cop, x, fpchk;
+ int ecode, user, cop, fpchk;
ulong fpfcr31;
char buf[2*ERRMAX], buf1[ERRMAX], *fpexcep;
@@ -187,11 +187,8 @@
kfault(ur);
break;
}
- x = up->insyscall;
- up->insyscall = 1;
spllo();
faultmips(ur, user, ecode);
- up->insyscall = x;
break;
case CVCEI:
--- a/sys/src/9/teg2/trap.c
+++ b/sys/src/9/teg2/trap.c
@@ -597,32 +597,13 @@
static void
faultarm(Ureg *ureg, uintptr va, int user, int read)
{
- int n, insyscall;
-
- if(up == nil) {
- dumpstackwithureg(ureg);
- panic("faultarm: cpu%d: nil up, %sing %#p at %#p",
- m->machno, (read? "read": "writ"), va, ureg->pc);
- }
- insyscall = up->insyscall;
- up->insyscall = 1;
-
- n = fault(va, ureg->pc, read); /* goes spllo */
- splhi();
- if(n < 0){
- char buf[ERRMAX];
-
+ if(fault(va, ureg->pc, read) < 0){
if(!user){
- dumpstackwithureg(ureg);
- panic("fault: cpu%d: kernel %sing %#p at %#p",
- m->machno, read? "read": "writ", va, ureg->pc);
+ dumpregs(ureg);
+ panic("kernel fault: %s addr=%#p", read? "read": "write", va);
}
- /* don't dump registers; programs suicide all the time */
- snprint(buf, sizeof buf, "sys: trap: fault %s va=%#p",
- read? "read": "write", va);
- postnote(up, 1, buf, NDebug);
+ faultnote("fault", read? "read": "write", va);
}
- up->insyscall = insyscall;
}
/*
--- a/sys/src/9/xen/trap.c
+++ b/sys/src/9/xen/trap.c
@@ -324,8 +324,7 @@
fault386(Ureg* ureg, void*)
{
ulong addr;
- int read, user, n, insyscall;
- char buf[ERRMAX];
+ int read;
addr = HYPERVISOR_shared_info->vcpu_info[m->machno].arch.cr2;
if (faultpanic) {
@@ -334,27 +333,16 @@
dumpstack();
panic("fault386");
}
-
- user = (ureg->cs & 0xFFFF) == UESEL;
- if(!user && mmukmapsync(addr))
+ if(!userureg(ureg) && mmukmapsync(addr))
return;
read = !(ureg->ecode & 2);
- if(up == nil)
- panic("fault but up is zero; pc 0x%8.8lux addr 0x%8.8lux\n", ureg->pc, addr);
- insyscall = up->insyscall;
- up->insyscall = 1;
- n = fault(addr, ureg->pc, read);
- if(n < 0){
- if(!user){
+ if(fault(addr, ureg->pc, read) < 0){
+ if(!userureg(ureg)){
dumpregs(ureg);
- panic("fault: 0x%lux\n", addr);
+ panic("kernel fault: %s addr=%#p", read? "read": "write", addr);
}
- sprint(buf, "sys: trap: fault %s addr=0x%lux",
- read? "read" : "write", addr);
- dprint("Posting %s to %lud\n", buf, up->pid);
- postnote(up, 1, buf, NDebug);
+ faultnote("fault", read? "read": "write", addr);
}
- up->insyscall = insyscall;
FAULTLOG(dprint("fault386: all done\n");)
}
--- a/sys/src/9/zynq/trap.c
+++ b/sys/src/9/zynq/trap.c
@@ -70,25 +70,17 @@
static void
faultarm(Ureg *ureg, ulong fsr, uintptr addr)
{
- int user, insyscall, read;
- static char buf[ERRMAX];
- char *err;
+ char *type;
+ int read;
- read = (fsr & (1<<11)) == 0;
- user = userureg(ureg);
- if(!user){
- if(addr >= USTKTOP || up == nil)
- _dumpstack(ureg);
- if(addr >= USTKTOP)
+ if(!userureg(ureg)){
+ if(addr >= USTKTOP){
+ dumpregs(ureg);
panic("kernel fault: bad address pc=%#.8lux addr=%#.8lux fsr=%#.8lux", ureg->pc, addr, fsr);
- if(up == nil)
- panic("kernel fault: no user process pc=%#.8lux addr=%#.8lux fsr=%#.8lux", ureg->pc, addr, fsr);
+ }
}
- if(up == nil)
- panic("user fault: up=nil pc=%#.8lux addr=%#.8lux fsr=%#.8lux", ureg->pc, addr, fsr);
- insyscall = up->insyscall;
- up->insyscall = 1;
+ read = (fsr & (1<<11)) == 0;
switch(fsr & 0x1F){
case 0x05: /* translation fault L1 */
case 0x07: /* translation fault L2 */
@@ -102,18 +94,15 @@
break;
/* wet floor */
default:
- err = faulterr[fsr & 0x1F];
- if(err == nil)
- err = "fault";
- if(!user){
+ type = faulterr[fsr & 0x1F];
+ if(type == nil)
+ type = "fault";
+ if(!userureg(ureg)){
dumpregs(ureg);
- _dumpstack(ureg);
- panic("kernel %s: pc=%#.8lux addr=%#.8lux fsr=%#.8lux", err, ureg->pc, addr, fsr);
+ panic("kernel %s: pc=%#.8lux addr=%#.8lux fsr=%#.8lux", type, ureg->pc, addr, fsr);
}
- sprint(buf, "sys: trap: %s %s addr=%#.8lux", err, read ? "read" : "write", addr);
- postnote(up, 1, buf, NDebug);
+ faultnote(type, read? "read": "write", addr);
}
- up->insyscall = insyscall;
}
static FPsave*
--
⑨