shithub: neoventi

Download patch

ref: 0d00f3f90ad95b4f03617ece28db893ef0b99204
parent: 337f74d8143862b34559cb82f459e12827f30ff9
author: Noam Preil <noam@pixelhero.dev>
date: Tue Sep 24 05:24:03 EDT 2024

cache: keep entries locking during reads

--- a/cache.c
+++ b/cache.c
@@ -183,6 +183,8 @@
 // this grabs a free buffer, inserts it into cache, and yields
 // its index so that the caller may fill it up.
 // Returns whether the data was already in cache.
+// ALWAYS returns the entry locked. The caller MUST unlock via
+// cacheunlock when done with the buffer!
 int
 cachelookup(char **buf, u16int arenaindex, u32int blockindex)
 {
@@ -211,7 +213,6 @@
 		case 1:
 			// already exists
 			get(bucket, i, buf);
-			unlock(bucket);
 			return 1;
 		}
 	}
--- a/disk.c
+++ b/disk.c
@@ -73,11 +73,11 @@
 			werrstr("Failed to read bucket");
 			return 0;
 		}
-		cacheunlock(key, bucket & 0xffff);
 	}
 	if(s_sect->bucketmagic && U32GET(buf + 2) != s_sect->bucketmagic)
 		sysfatal("index is corrupt: invalid bucket magic: sect %ux, buck %ux", s_sect->bucketmagic, U32GET(buf + 2));
 	if(!bucketlookup(buf, score, &entry)){
+		cacheunlock(key, bucket & 0xffff);
 		werrstr("entry not found in bucket");
 		return 0;
 	}
@@ -84,6 +84,7 @@
 	addr->offset = U64GET((buf + 6 + (entry * IEntrySize) + 26));
 	addr->size = U16GET((buf + 6 + (entry * IEntrySize) + 34));
 	addr->blocks = buf[6 + (entry*IEntrySize) + 37];
+	cacheunlock(key, bucket & 0xffff);
 	aindex = aindexfromaddr(addr->offset);
 	addr->s_arena = index.amap[aindex].arena;
 	addr->offset -= index.amap[aindex].start;
@@ -122,10 +123,8 @@
 		sysfatal("invalid blocksize %d\n", arena->blocksize);
 	if(!cachelookup(&buf, arena->index, blockindex)){
 		if(pread(arena->fd, buf, arena->blocksize, arena->base+(blockindex*arena->blocksize)) != arena->blocksize){
-			cacheunlock(arena->index, blockindex);
 			return nil;
 		}
-		cacheunlock(arena->index, blockindex);
 	}
 	return buf;
 }
@@ -135,6 +134,7 @@
 {
 	u64int end = arena->size - arenadirsize(arena);
 	u16int off, n, m, size;
+	u32int blockindex;
 	char *buf;
 	size = reqsize;
 	if(addr + reqsize > end)
@@ -143,7 +143,8 @@
 	addr -= off;
 	n = 0;
 	while(n < size){
-		buf = vtreadarenablock(arena, addr/arena->blocksize);
+		blockindex = addr/arena->blocksize;
+		buf = vtreadarenablock(arena, blockindex);
 		if(buf == nil)
 			// TODO: I/O error should not crash the disk layer.
 			// Might be good to be able to recover cached data in this case?
@@ -152,6 +153,7 @@
 		if(m > size - n)
 			m = size - n;
 		memcpy(&dbuf[n], &buf[off], m);
+		cacheunlock(arena->index, blockindex);
 		n += m;
 		off = 0;
 		addr += arena->blocksize;
--- a/notebook
+++ b/notebook
@@ -2121,3 +2121,44 @@
 ...performance can wait. This is good, and a 2x improvement's nothing to sneeze at >_<
 
 Cache eviction should be next. Get the read side _fully done_, and I should be able to get fossil running off of it while I implement write support, probably?
+
+128 1024 1024 * *
+8 1024 * /
+8 / 
+4 *
+p
+8192
+8192
+
+2 13 ^ p
+8192
+8192
+
+Okay, so 128M of cache requires 13 bits of addressing.
+
+An RWLock would require much more memory; the easiest solution for protecting buffers during reads is probably to just take a normal Lock. This is not ideal, but it's probably okay; during a read, we only need the lock long enough to copy the data. Copying 8KiB should be fast enough that the lock _does not matter_. Let's test that.
+
+% cat /tmp/bench.c
+
+#include <u.h>
+#include <libc.h>
+
+void
+main(void)
+{
+	uchar src[8192], dest[8192];
+	vlong start = nsec();
+	for(int i = 0; i < 8192; i++)
+		src[i] = rand() & 0xFF;
+	vlong fillend = nsec();
+	memcpy(dest, src, 8192);
+	vlong end = nsec();
+	print("fill time: %f, copy time:  %f\n", ((double)fillend-(double)start) / 1000000000, ((double)end-(double)fillend) / 1000000000);
+}
+
+...
+copy time:  0.000004
+
+4 microseconds. Holding a lock for 4 microseconds on read should be _completely fine. That's on the Reform!! I'm not worried that there will be heavy contention here. I can revisit this if proven wrong.
+
+Performance of the simple `walk` test is unchanged, at least. This seems more than fine. committing :)
--