ref: 0a6f5be3dc30edbfa182adde0efc86a8838d2d7d
parent: 2aad54c167140d05d56735a9e998d546e7a7b602
author: Ori Bernstein <ori@eigenstate.org>
date: Mon Jun 23 13:50:05 EDT 2025
gefs: fix deadlocks on very slow disks
--- a/sys/src/cmd/gefs/blk.c
+++ b/sys/src/cmd/gefs/blk.c
@@ -887,7 +887,7 @@
}
void
-epochclean(int sync)
+epochclean(void)
{
ulong c, e, ge;
Limbo *p, *n;
@@ -902,7 +902,7 @@
for(i = 0; i < fs->nworker; i++){
e = agetl(&fs->lepoch[i]);
if((e & Eactive) && e != (ge | Eactive)){
- if(!sync && c < fs->cmax/4)
+ if(c < fs->cmax/4)
return;
epochwait();
}
--- a/sys/src/cmd/gefs/dat.h
+++ b/sys/src/cmd/gefs/dat.h
@@ -533,7 +533,7 @@
QLock synclk;
Rendez syncrz;
- RWLock mountlk;
+ QLock mountlk;
Mount *mounts;
Mount *snapmnt;
Lock connlk;
--- a/sys/src/cmd/gefs/fns.h
+++ b/sys/src/cmd/gefs/fns.h
@@ -59,7 +59,7 @@
void epochstart(int);
void epochend(int);
void epochwait(void);
-void epochclean(int);
+void epochclean(void);
void limbo(int op, Limbo*);
void freeblk(Tree*, Blk*);
void freebp(Tree*, Bptr);
@@ -202,6 +202,7 @@
Chan* mkchan(int);
void* chrecv(Chan*);
void chsend(Chan*, void*);
+int chsendnb(Chan*, void*, int);
void runfs(int, void*);
void runmutate(int, void*);
void runread(int, void*);
--- a/sys/src/cmd/gefs/fs.c
+++ b/sys/src/cmd/gefs/fs.c
@@ -115,15 +115,8 @@
}
tracem("packb");
- rlock(&fs->mountlk);
- if(waserror()){
- runlock(&fs->mountlk);
- nexterror();
- }
- for(mnt = fs->mounts; mnt != nil; mnt = mnt->next)
+ for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next)
updatesnap(&mnt->root, mnt->root, mnt->name, mnt->flag);
- runlock(&fs->mountlk);
- poperror();
/*
* Now that we've updated the snaps, we can sync the
* dlist; the snap tree will not change from here.
@@ -218,12 +211,7 @@
t = nil;
r = nil;
*tp = nil;
- rlock(&fs->mountlk);
- if(waserror()){
- runlock(&fs->mountlk);
- nexterror();
- }
- for(mnt = fs->mounts; mnt != nil; mnt = mnt->next){
+ for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next){
if(strcmp(a->old, mnt->name) == 0){
updatesnap(&mnt->root, mnt->root, mnt->name, mnt->flag);
t = agetp(&mnt->root);
@@ -234,8 +222,6 @@
if(t == nil && (t = opensnap(a->old, nil)) == nil){
if(a->fd != -1)
fprint(a->fd, "snap: open '%s': does not exist\n", a->old);
- runlock(&fs->mountlk);
- poperror();
return;
}
if(a->delete){
@@ -242,8 +228,6 @@
if(mnt != nil) {
if(a->fd != -1)
fprint(a->fd, "snap: snap is mounted: '%s'\n", a->old);
- runlock(&fs->mountlk);
- poperror();
return;
}
if(t->nlbl == 1 && t->nref <= 1 && t->succ == -1){
@@ -256,15 +240,11 @@
if(a->fd != -1)
fprint(a->fd, "snap: already exists '%s'\n", a->new);
closesnap(s);
- runlock(&fs->mountlk);
- poperror();
return;
}
tagsnap(t, a->new, a->flag);
}
closesnap(t);
- runlock(&fs->mountlk);
- poperror();
*tp = r;
if(a->fd != -1){
if(a->delete)
@@ -335,7 +315,8 @@
v = agetl(&c->count);
if(v == 0 || !acasl(&c->count, v, v-1))
- semacquire(&c->count, 1);
+ while(semacquire(&c->count, 1) == -1)
+ continue;
lock(&c->rl);
a = *c->rp;
if(++c->rp >= &c->args[c->size])
@@ -345,14 +326,19 @@
return a;
}
-void
-chsend(Chan *c, void *m)
+int
+chsendnb(Chan *c, void *m, int block)
{
long v;
+ int r;
v = agetl(&c->avail);
- if(v == 0 || !acasl(&c->avail, v, v-1))
- semacquire(&c->avail, 1);
+ if(v == 0 || !acasl(&c->avail, v, v-1)){
+ while((r = semacquire(&c->avail, block)) == -1)
+ continue;
+ if(r == 0)
+ return 0;
+ }
lock(&c->wl);
*c->wp = m;
if(++c->wp >= &c->args[c->size])
@@ -359,8 +345,15 @@
c->wp = c->args;
unlock(&c->wl);
semrelease(&c->count, 1);
+ return 1;
}
+void
+chsend(Chan *c, void *m)
+{
+ chsendnb(c, m, 1);
+}
+
static void
fshangup(Conn *c, char *fmt, ...)
{
@@ -430,16 +423,8 @@
{
if(!(mnt->flag & Lmut))
error(Erdonly);
- if(mnt->root->nlbl != 1 || mnt->root->nref != 0){
- rlock(&fs->mountlk);
- if(waserror()){
- runlock(&fs->mountlk);
- nexterror();
- }
+ if(mnt->root->nlbl != 1 || mnt->root->nref != 0)
updatesnap(&mnt->root, mnt->root, mnt->name, mnt->flag);
- poperror();
- runlock(&fs->mountlk);
- }
btupsert(mnt->root, m, nm);
}
@@ -702,15 +687,15 @@
return fs->snapmnt;
}
- wlock(&fs->mountlk);
+ qlock(&fs->mountlk);
for(mnt = fs->mounts; mnt != nil; mnt = mnt->next){
if(strcmp(name, mnt->name) == 0){
ainc(&mnt->ref);
- goto Out;
+ qunlock(&fs->mountlk);
+ return mnt;
}
}
if(waserror()){
- wunlock(&fs->mountlk);
free(mnt);
nexterror();
}
@@ -723,11 +708,10 @@
mnt->root = t;
mnt->next = fs->mounts;
loadautos(mnt);
- fs->mounts = mnt;
- poperror();
-Out:
- wunlock(&fs->mountlk);
+ asetp(&fs->mounts, mnt);
+ qunlock(&fs->mountlk);
+ poperror();
return mnt;
}
@@ -738,7 +722,7 @@
if(mnt == nil)
return;
- wlock(&fs->mountlk);
+ qlock(&fs->mountlk);
if(adec(&mnt->ref) == 0){
for(p = &fs->mounts; (me = *p) != nil; p = &me->next){
if(me == mnt)
@@ -748,7 +732,7 @@
*p = me->next;
limbo(DFmnt, me);
}
- wunlock(&fs->mountlk);
+ qunlock(&fs->mountlk);
}
static void
@@ -2633,7 +2617,7 @@
assert(estacksz() == 0);
epochend(id);
qunlock(&fs->mutlk);
- epochclean(0);
+ epochclean();
if(a != nil)
chsend(fs->admchan, a);
@@ -2682,7 +2666,7 @@
freetree(bp, pred); /* leak b on error() */
qlock(&fs->mutlk);
qunlock(&fs->mutlk);
- epochclean(0);
+ epochclean();
}
}
if(rb.gen > pred)
@@ -2718,7 +2702,7 @@
freebp(nil, bp);
qlock(&fs->mutlk);
qunlock(&fs->mutlk);
- epochclean(0);
+ epochclean();
}
btexit(&s);
freetree(t->bp, t->pred);
@@ -2789,8 +2773,10 @@
if(!agetl(&fs->rdonly)){
ainc(&fs->rdonly);
/* cycle through all epochs to clear them. */
- for(i = 0; i < 3; i++)
- epochclean(1);
+ for(i = 0; i < 4; i++){
+ epochwait();
+ epochclean();
+ }
sync();
}
postnote(PNGROUP, getpid(), "halted");
@@ -2829,7 +2815,7 @@
poperror();
}
qunlock(a);
- epochclean(0);
+ epochclean();
}
sync(); /* oldhd blocks leaked on error() */
@@ -2850,7 +2836,7 @@
epochend(id);
qunlock(&fs->mutlk);
poperror();
- epochclean(0);
+ epochclean();
}
}
@@ -2965,7 +2951,7 @@
epochend(id);
qunlock(&fs->mutlk);
poperror();
- epochclean(0);
+ epochclean();
nm = 0;
}
}
@@ -2997,7 +2983,15 @@
a->delete = 1;
else
strecpy(a->new, a->new+sizeof(a->new), new);
- chsend(fs->admchan, a);
+ /*
+ * We're within an epoch, which means we need to guarantee
+ * forward progress; snapshots are non-critical enough that
+ * skipping one is the best option.
+ */
+ if(!chsendnb(fs->admchan, a, 0)){
+ fprint(2, "skipping snapshot %s => %s (file system too busy)\n", a->old, (a->new != nil) ? a->new : "(delete)");
+ free(a);
+ }
}
static void
@@ -3028,7 +3022,7 @@
}
void
-runtasks(int, void *)
+runtasks(int tid, void *)
{
vlong now;
Mount *mnt;
@@ -3052,14 +3046,16 @@
tmnow(&tm, nil);
now = tmnorm(&tm);
- rlock(&fs->mountlk);
- for(mnt = fs->mounts; mnt != nil; mnt = mnt->next){
+
+ epochstart(tid);
+ for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next){
if(!(mnt->flag & Lmut))
continue;
for(i = 0; i < nelem(mnt->cron); i++)
cronsync(mnt->name, &mnt->cron[i], &tm, now);
}
- runlock(&fs->mountlk);
+ epochend(tid);
+ epochclean();
poperror();
}
}
--- a/sys/src/cmd/gefs/snap.c
+++ b/sys/src/cmd/gefs/snap.c
@@ -362,8 +362,7 @@
btupsert(&fs->snap, m, nm);
if(deltree){
reclaimblocks(t->gen, succ, t->pred);
- assert(!canwlock(&fs->mountlk));
- for(mnt = fs->mounts; mnt != nil; mnt = mnt->next){
+ for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next){
if(mnt->root->gen == t->succ)
mnt->root->pred = t->pred;
if(mnt->root->gen == t->pred)
--
⑨