shithub: front

Download patch

ref: 0a6f5be3dc30edbfa182adde0efc86a8838d2d7d
parent: 2aad54c167140d05d56735a9e998d546e7a7b602
author: Ori Bernstein <ori@eigenstate.org>
date: Mon Jun 23 13:50:05 EDT 2025

gefs: fix deadlocks on very slow disks

--- a/sys/src/cmd/gefs/blk.c
+++ b/sys/src/cmd/gefs/blk.c
@@ -887,7 +887,7 @@
 }
 
 void
-epochclean(int sync)
+epochclean(void)
 {
 	ulong c, e, ge;
 	Limbo *p, *n;
@@ -902,7 +902,7 @@
 	for(i = 0; i < fs->nworker; i++){
 		e = agetl(&fs->lepoch[i]);
 		if((e & Eactive) && e != (ge | Eactive)){
-			if(!sync && c < fs->cmax/4)
+			if(c < fs->cmax/4)
 				return;
 			epochwait();
 		}
--- a/sys/src/cmd/gefs/dat.h
+++ b/sys/src/cmd/gefs/dat.h
@@ -533,7 +533,7 @@
 	QLock	synclk;
 	Rendez	syncrz;
 
-	RWLock	mountlk;
+	QLock	mountlk;
 	Mount	*mounts;
 	Mount	*snapmnt;
 	Lock	connlk;
--- a/sys/src/cmd/gefs/fns.h
+++ b/sys/src/cmd/gefs/fns.h
@@ -59,7 +59,7 @@
 void	epochstart(int);
 void	epochend(int);
 void	epochwait(void);
-void	epochclean(int);
+void	epochclean(void);
 void	limbo(int op, Limbo*);
 void	freeblk(Tree*, Blk*);
 void	freebp(Tree*, Bptr);
@@ -202,6 +202,7 @@
 Chan*	mkchan(int);
 void*	chrecv(Chan*);
 void	chsend(Chan*, void*);
+int	chsendnb(Chan*, void*, int);
 void	runfs(int, void*);
 void	runmutate(int, void*);
 void	runread(int, void*);
--- a/sys/src/cmd/gefs/fs.c
+++ b/sys/src/cmd/gefs/fs.c
@@ -115,15 +115,8 @@
 	}
 	tracem("packb");
 
-	rlock(&fs->mountlk);
-	if(waserror()){
-		runlock(&fs->mountlk);
-		nexterror();
-	}
-	for(mnt = fs->mounts; mnt != nil; mnt = mnt->next)
+	for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next)
 		updatesnap(&mnt->root, mnt->root, mnt->name, mnt->flag);
-	runlock(&fs->mountlk);
-	poperror();
 	/*
 	 * Now that we've updated the snaps, we can sync the
 	 * dlist; the snap tree will not change from here.
@@ -218,12 +211,7 @@
 	t = nil;
 	r = nil;
 	*tp = nil;
-	rlock(&fs->mountlk);
-	if(waserror()){
-		runlock(&fs->mountlk);
-		nexterror();
-	}
-	for(mnt = fs->mounts; mnt != nil; mnt = mnt->next){
+	for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next){
 		if(strcmp(a->old, mnt->name) == 0){
 			updatesnap(&mnt->root, mnt->root, mnt->name, mnt->flag);
 			t = agetp(&mnt->root);
@@ -234,8 +222,6 @@
 	if(t == nil && (t = opensnap(a->old, nil)) == nil){
 		if(a->fd != -1)
 			fprint(a->fd, "snap: open '%s': does not exist\n", a->old);
-		runlock(&fs->mountlk);
-		poperror();
 		return;
 	}
 	if(a->delete){
@@ -242,8 +228,6 @@
 		if(mnt != nil) {
 			if(a->fd != -1)
 				fprint(a->fd, "snap: snap is mounted: '%s'\n", a->old);
-			runlock(&fs->mountlk);
-			poperror();
 			return;
 		}
 		if(t->nlbl == 1 && t->nref <= 1 && t->succ == -1){
@@ -256,15 +240,11 @@
 			if(a->fd != -1)
 				fprint(a->fd, "snap: already exists '%s'\n", a->new);
 			closesnap(s);
-			runlock(&fs->mountlk);
-			poperror();
 			return;
 		}
 		tagsnap(t, a->new, a->flag);
 	}
 	closesnap(t);
-	runlock(&fs->mountlk);
-	poperror();
 	*tp = r;
 	if(a->fd != -1){
 		if(a->delete)
@@ -335,7 +315,8 @@
 
 	v = agetl(&c->count);
 	if(v == 0 || !acasl(&c->count, v, v-1))
-		semacquire(&c->count, 1);
+		while(semacquire(&c->count, 1) == -1)
+			continue;
 	lock(&c->rl);
 	a = *c->rp;
 	if(++c->rp >= &c->args[c->size])
@@ -345,14 +326,19 @@
 	return a;
 }
 
-void
-chsend(Chan *c, void *m)
+int
+chsendnb(Chan *c, void *m, int block)
 {
 	long v;
+	int r;
 
 	v = agetl(&c->avail);
-	if(v == 0 || !acasl(&c->avail, v, v-1))
-		semacquire(&c->avail, 1);
+	if(v == 0 || !acasl(&c->avail, v, v-1)){
+		while((r = semacquire(&c->avail, block)) == -1)
+			continue;
+		if(r == 0)
+			return 0;
+	}
 	lock(&c->wl);
 	*c->wp = m;
 	if(++c->wp >= &c->args[c->size])
@@ -359,8 +345,15 @@
 		c->wp = c->args;
 	unlock(&c->wl);
 	semrelease(&c->count, 1);
+	return 1;
 }
 
+void
+chsend(Chan *c, void *m)
+{
+	chsendnb(c, m, 1);
+}
+
 static void
 fshangup(Conn *c, char *fmt, ...)
 {
@@ -430,16 +423,8 @@
 {
 	if(!(mnt->flag & Lmut))
 		error(Erdonly);
-	if(mnt->root->nlbl != 1 || mnt->root->nref != 0){
-		rlock(&fs->mountlk);
-		if(waserror()){
-			runlock(&fs->mountlk);
-			nexterror();
-		}
+	if(mnt->root->nlbl != 1 || mnt->root->nref != 0)
 		updatesnap(&mnt->root, mnt->root, mnt->name, mnt->flag);
-		poperror();
-		runlock(&fs->mountlk);
-	}
 	btupsert(mnt->root, m, nm);
 }
 
@@ -702,15 +687,15 @@
 		return fs->snapmnt;
 	}
 
-	wlock(&fs->mountlk);
+	qlock(&fs->mountlk);
 	for(mnt = fs->mounts; mnt != nil; mnt = mnt->next){
 		if(strcmp(name, mnt->name) == 0){
 			ainc(&mnt->ref);
-			goto Out;
+			qunlock(&fs->mountlk);
+			return mnt;
 		}
 	}
 	if(waserror()){
-		wunlock(&fs->mountlk);
 		free(mnt);
 		nexterror();
 	}
@@ -723,11 +708,10 @@
 	mnt->root = t;
 	mnt->next = fs->mounts;
 	loadautos(mnt);
-	fs->mounts = mnt;
-	poperror();
 
-Out:
-	wunlock(&fs->mountlk);
+	asetp(&fs->mounts, mnt);
+	qunlock(&fs->mountlk);
+	poperror();
 	return mnt;
 }
 
@@ -738,7 +722,7 @@
 
 	if(mnt == nil)
 		return;
-	wlock(&fs->mountlk);
+	qlock(&fs->mountlk);
 	if(adec(&mnt->ref) == 0){
 		for(p = &fs->mounts; (me = *p) != nil; p = &me->next){
 			if(me == mnt)
@@ -748,7 +732,7 @@
 		*p = me->next;
 		limbo(DFmnt, me);
 	}
-	wunlock(&fs->mountlk);
+	qunlock(&fs->mountlk);
 }
 
 static void
@@ -2633,7 +2617,7 @@
 		assert(estacksz() == 0);
 		epochend(id);
 		qunlock(&fs->mutlk);
-		epochclean(0);
+		epochclean();
 
 		if(a != nil)
 			chsend(fs->admchan, a);
@@ -2682,7 +2666,7 @@
 			freetree(bp, pred);	/* leak b on error() */
 			qlock(&fs->mutlk);
 			qunlock(&fs->mutlk);
-			epochclean(0);
+			epochclean();
 		}
 	}
 	if(rb.gen > pred)
@@ -2718,7 +2702,7 @@
 			freebp(nil, bp);
 		qlock(&fs->mutlk);
 		qunlock(&fs->mutlk);
-		epochclean(0);
+		epochclean();
 	}
 	btexit(&s);
 	freetree(t->bp, t->pred);
@@ -2789,8 +2773,10 @@
 			if(!agetl(&fs->rdonly)){
 				ainc(&fs->rdonly);
 				/* cycle through all epochs to clear them.  */
-				for(i = 0; i < 3; i++)
-					epochclean(1);
+				for(i = 0; i < 4; i++){
+					epochwait();
+					epochclean();
+				}
 				sync();
 			}
 			postnote(PNGROUP, getpid(), "halted");
@@ -2829,7 +2815,7 @@
 					poperror();
 				}
 				qunlock(a);
-				epochclean(0);
+				epochclean();
 			}
 
 			sync();	/* oldhd blocks leaked on error() */
@@ -2850,7 +2836,7 @@
 					epochend(id);
 					qunlock(&fs->mutlk);
 					poperror();
-					epochclean(0);
+					epochclean();
 				}
 			}
 
@@ -2965,7 +2951,7 @@
 					epochend(id);
 					qunlock(&fs->mutlk);
 					poperror();
-					epochclean(0);
+					epochclean();
 					nm = 0;
 				}
 			}
@@ -2997,7 +2983,15 @@
 		a->delete = 1;
 	else
 		strecpy(a->new, a->new+sizeof(a->new), new);
-	chsend(fs->admchan, a);
+	/*
+	 * We're within an epoch, which means we need to guarantee
+	 * forward progress; snapshots are non-critical enough that
+	 * skipping one is the best option.
+	 */
+	if(!chsendnb(fs->admchan, a, 0)){
+		fprint(2, "skipping snapshot %s => %s (file system too busy)\n", a->old, (a->new != nil) ? a->new : "(delete)");
+		free(a);
+	}
 }
 
 static void
@@ -3028,7 +3022,7 @@
 }
 
 void
-runtasks(int, void *)
+runtasks(int tid, void *)
 {
 	vlong now;
 	Mount *mnt;
@@ -3052,14 +3046,16 @@
 
 		tmnow(&tm, nil);
 		now = tmnorm(&tm);
-		rlock(&fs->mountlk);
-		for(mnt = fs->mounts; mnt != nil; mnt = mnt->next){
+
+		epochstart(tid);
+		for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next){
 			if(!(mnt->flag & Lmut))
 				continue;
 			for(i = 0; i < nelem(mnt->cron); i++)
 				cronsync(mnt->name, &mnt->cron[i], &tm, now);
 		}
-		runlock(&fs->mountlk);
+		epochend(tid);
+		epochclean();
 		poperror();
 	}
 }
--- a/sys/src/cmd/gefs/snap.c
+++ b/sys/src/cmd/gefs/snap.c
@@ -362,8 +362,7 @@
 	btupsert(&fs->snap, m, nm);
 	if(deltree){
 		reclaimblocks(t->gen, succ, t->pred);
-		assert(!canwlock(&fs->mountlk));
-		for(mnt = fs->mounts; mnt != nil; mnt = mnt->next){
+		for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next){
 			if(mnt->root->gen == t->succ)
 				mnt->root->pred = t->pred;
 			if(mnt->root->gen == t->pred)
--