ref: 22806f6ca81790a672b6b5c23916c15c211cc944
parent: 77e30263409bc04ec12db3a0bd162f8f821f016d
author: Noam Preil <noam@pixelhero.dev>
date: Tue Aug 19 04:26:38 EDT 2025
more functional
--- a/cache.c
+++ b/cache.c
@@ -81,6 +81,7 @@
int nbucket, i;
bucket_t *bucket;
u32int index;
+ fprint(2, "CL EVICT\n");
while(1){nbucket = rand() % BUCKETS;
bucket = &buckets[nbucket];
@@ -201,11 +202,13 @@
// miss, but there's an empty slot in this bucket.
cacheindex = grabfree();
*buf = data+(usize)cacheindex*8192;
+ fprint(2, "CL %d %d => key %d: %08x\n", arenaindex, blockindex, key, *buf);
put(bucket, i, key, cacheindex);
return 0;
case 1:
// already exists
get(bucket, i, buf);
+ fprint(2, "CL %d %d => key %d: HIT %08x\n", arenaindex, blockindex, key, *buf);
return 1;
}
}
--- a/disk.c
+++ b/disk.c
@@ -68,10 +68,10 @@
int
vtwriteindexclump(uchar *score, u64int offset, u16int len)
{- u64int bucket;
+ u32int bucket;
u32int t32;
u8int blocks;
- char *buf;
+ uchar *buf;
VtISect *s_sect;
u16int key;
u32int magic;
@@ -79,15 +79,16 @@
u16int n, off;
s_sect = &index.sects[isectforbucket(bucket)];
bucket -= s_sect->start;
- key = s_sect->cacheindex + (bucket >> 16);
- if(!cachelookup((char**)&buf, key, bucket & 0xffff)){+ cachelookup((char**)&buf, s_sect->cacheindex, bucket);
+ fprint(2, "reading from disk...\n");
if(pread(s_sect->fd, (char*)buf, s_sect->blocksize, s_sect->blockbase + (bucket << s_sect->blocklog)) != s_sect->blocksize){- cacheunlock(key, bucket & 0xffff);
+ cacheunlock(key, bucket);
werrstr("Failed to read bucket");return 0;
}
- }
+
n = U16GET(buf);
+ fprint(2, "appending to bucket %d, key %08x, n %d\n", bucket, key, n);
if(s_sect->bucketmagic){magic = U32GET(buf+2);
if(magic == 0){@@ -97,18 +98,21 @@
}
}
off = 6 + n*IEntrySize;
+ if(off + 38 > s_sect->blocksize)
+ sysfatal("index bucket full? off %d, blocksize %d", off, s_sect->blocksize);memcpy(buf+off, score, 20);
U64PUT(buf+off+26, offset, t32);
U16PUT(buf+off+34, len);
blocks = (len + 38 + (1<<ABlockLog) - 1) >> ABlockLog;
buf[off+37] = blocks;
- U16PUT(buf, n+1);
+ n += 1;
+ U16PUT(buf, n);
if(pwrite(s_sect->fd, (char*)buf, s_sect->blocksize, s_sect->blockbase + (bucket << s_sect->blocklog)) != s_sect->blocksize){- cacheunlock(key, bucket & 0xffff);
+ cacheunlock(s_sect->cacheindex, bucket);
werrstr("Failed to writeback bucket: %r");return 0;
}
- cacheunlock(key, bucket&0xffff);
+ cacheunlock(s_sect->cacheindex, bucket);
return 1;
}
@@ -118,22 +122,22 @@
u8int *buf;
u16int entry;
u64int aindex;
- u64int bucket = U32GET(score) / index.div;
+ u32int bucket = U32GET(score) / index.div;
VtISect *s_sect = &index.sects[isectforbucket(bucket)];
bucket -= s_sect->start;
- u16int key = s_sect->cacheindex + (bucket >> 16);
- if(!cachelookup((char**)&buf, key, bucket & 0xffff)){+ cachelookup((char**)&buf, s_sect->cacheindex, bucket);
+ fprint(2, "reading from disk for readlookup...\n");
if(pread(s_sect->fd, (char*)buf, s_sect->blocksize, s_sect->blockbase + (bucket << s_sect->blocklog)) != s_sect->blocksize){- cacheunlock(key, bucket & 0xffff);
+ cacheunlock(s_sect->cacheindex, bucket);
werrstr("Failed to read bucket");return 0;
}
- }
+
if(s_sect->bucketmagic && U32GET(buf + 2) != s_sect->bucketmagic){if(U32GET(buf+2) != 0)
sysfatal("index is corrupt: invalid bucket magic: sect %ux, buck %ux", s_sect->bucketmagic, U32GET(buf + 2)); else{- cacheunlock(key, bucket & 0xffff);
+ cacheunlock(s_sect->cacheindex, bucket);
// invoking code should not care about the
// distinction between "bucket is empty" and
// "bucket has data but not what we want."
@@ -142,7 +146,7 @@
}
}
if(!bucketlookup(buf, score, &entry)){- cacheunlock(key, bucket & 0xffff);
+ cacheunlock(s_sect->cacheindex, bucket);
werrstr("entry not found in bucket");return 0;
}
@@ -149,10 +153,11 @@
addr->offset = U64GET((buf + 6 + (entry * IEntrySize) + 26));
addr->size = U16GET((buf + 6 + (entry * IEntrySize) + 34));
addr->blocks = buf[6 + (entry*IEntrySize) + 37];
- cacheunlock(key, bucket & 0xffff);
+ cacheunlock(s_sect->cacheindex, bucket);
aindex = aindexfromaddr(addr->offset);
addr->s_arena = &arenas[aindex];
addr->offset -= (addr->s_arena->base - addr->s_arena->blocksize);
+ fprint(2, "entry found in bucket: arena %d, offset %d\n", aindex, addr->offset);
return 1;
}
@@ -186,11 +191,11 @@
char *buf;
if(arena->blocksize != 8192)
sysfatal("invalid blocksize %d\n", arena->blocksize);- if(!cachelookup(&buf, arena->index, blockindex)){+ cachelookup(&buf, arena->index, blockindex);
if(pread(arena->fd, buf, arena->blocksize, arena->base+(blockindex*arena->blocksize)) != arena->blocksize){return nil;
}
- }
+
return buf;
}
@@ -251,7 +256,7 @@
if(index.arena->index+1 == numarenas)
sysfatal("TODO last arena full!");index.arena = &arenas[index.arena->index+1];
- if(index.arena->block != 0 || index.arena->offset != 0 || index.arena->blockremain != index.arena->blocksize)
+ if(index.arena->block != 0 || index.arena->offset != 0 || index.arena->blockremain != index.arena->blocksize || index.arena->buf != nil)
sysfatal("TODO handle writing to venti which previously experienced nonlinear writes from other software?");}
@@ -258,6 +263,8 @@
static void
blockflush(void)
{+ if(index.arena->buf == nil)
+ return;
if(pwrite(index.arena->fd, index.arena->buf, index.arena->blocksize, index.arena->base+(index.arena->block*index.arena->blocksize)) != index.arena->blocksize){ sysfatal("flush failed: %r");}
@@ -271,9 +278,7 @@
static void
blockadvance(void)
{- if(index.arena->buf != nil){- blockflush();
- }
+ blockflush();
index.arena->block += 1;
index.arena->arenastats.used += index.arena->blockremain;
index.arena->offset = 0;
@@ -293,7 +298,8 @@
static void
arenarequireminimum(u16int space)
{- // TODO
+ if(index.arena->arenastats.used + space > index.arena->size)
+ advancearena();
}
// Grabs the current block for writing into the cache, reading any existing contents
@@ -355,13 +361,17 @@
nn = len - n;
if(nn > index.arena->blockremain)
nn = index.arena->blockremain;
+ fprint(2, "WPTR block %d offset %d, n %d nn %d, block must be at least %d\n", index.arena->block, index.arena->offset, n, nn, n+nn);
// Use however much space is available in the block, then move to the next
- memcpy(wptr, buf+n, nn);
+ memcpy(wptr, buf + n, nn);
n += nn;
- if(len != n)
+ blockupdate(nn);
+ if(len != n){blockadvance();
+ getblock();
+ wptr = index.arena->buf;
+ }
}
- blockupdate(len);
index.arena->arenastats.uncsize += len;
index.arena->arenastats.clumps += 1;
blockflush();
@@ -382,6 +392,7 @@
wunlock(&index);
return 0;
}
+
if(!vtwriteindexclump(score, offset, len)){ werrstr("failed to write clump to index: %r");return 0;
@@ -476,6 +487,9 @@
arena->size = entry.stop - entry.start - 2*blocksize;
memcpy(arena->name, entry.name, NameSize);
loadarena(arena);
+ if(fd > 0xFF || fd < 0){+ sysfatal("assupmtion violated: file descriptor not in range");+ }
}
static void
@@ -591,7 +605,7 @@
sysfatal("invalid or corrupt index section: section overflows available space");if(sect->stop < sect->start)
sysfatal("invalid or corrupt index section: impossible range");-
+ fprint(2, "blockbase %08d\n", sect->blockbase);
}
static void
--- a/mkfile
+++ b/mkfile
@@ -31,3 +31,5 @@
paper.pdf:D: paper.ms
cat $prereq | pic | tbl | eqn | troff -ms | lp -dstdout | ps2pdf > $target
+
+reformat:V:
--- a/notebook
+++ b/notebook
@@ -2943,4 +2943,130 @@
Okay, can write data but readback fails with hash collision??
-...oh. I'm actually using index address. lol. Let's just drop that check...
\ No newline at end of file
+...oh. I'm actually using index address. lol. Let's just drop that check...
+
+Everything's moooostly good, but... not quite. There's reliability problems.
+
+Firstly, we're not writing back the arena, let's just fix that real quick.
+
+We currently write the clump to disk, then to the index. We should probably write-back the arena _before_ the index, for now; long term, index writes need to be asynchronous. TODO.
+
+Should failure be treated as failure to commit? Probably, but possibly not; as long as the clump is committed, a future scan can detect the data even if the arena stats have not been updated - but it's probably best to treat the arena writeback as the proper commit.
+
+Venti logic: grab disk block from cache, pack arena, write updated arena score, hand the block back to the cache, it'll get flushed.
+
+Okay, that was worth looking at, were not currently computing or verifying arena scores.. TODO.
+
+For now, can reliably trigger problems just by vac-ing the organizer/ directory, "index bucket full? off 42814, blocksize 8192".
+
+Let's grab the cache trace to /tmp/cache...
+
+% <./reformat.rc
+64+0 records in
+64+0 records out
+8+0 records in
+8+0 records out
+fmtisect /tmp/fooindex: 927 buckets of 215 entries, 524,288 bytes for index map
+fmtarenas /tmp/foo: 1 arenas, 66,314,240 bytes storage, 524,288 bytes for index map
+fmtindex: 1 arenas, 927 index buckets, 66,297,856 bytes storage
+% ./7.neoventi -c /tmp/foo >[2]/tmp/cache
+
+...organizer got uploaded without issue, on its own. Okay, then.
+
+...but, interestingly, a second upload finds a hash collision on a specific file. Is it corrupted? Let's verify we can actually read it back after a single write...
+
+...oh, no, first write failed. I'm silly. ./7.neoventi: index bucket full? off 42814, blocksize 8192
+
+term% grep 672 /tmp/cache
+looking up index bucket 672
+CL 1 672 => key 66208: 003618e8
+CL 1 672 => key 66208: HIT 003618e8
+appending to bucket 672, key 00000001, n 0
+looking up index bucket 672
+CL 1 672 => key 66208: HIT 003618e8
+CL 1 672 => key 66208: HIT 003618e8
+appending to bucket 672, key 00000001, n 26996
+
+So, index bucket 672, _in cache_, is corrupt?
+
+Let's bypass the cache for the index, and see if that helps.
+
+...it did. The index bucket is definitely getting corrupted within the cache, then.
+
+term% time unvac -v vac:79de69e8e949e7b073ec2d59ba6b277866db50f4
+organizer/
+0.00u 0.00s 0.02r unvac -v vac:79de69e8e949e7b073ec2d59ba6b277866db50f4
+term% ls organizer/
+term%
+
+okay, except that the resulting vac is corrupt?
+
+% vacfs ...
+% ls /n/vac/organizer
+ls: organizer: read asked for f7ae6196c8e7775380ef49d654e372c7427a31bb got 443fc9c0755b3494edf
+
+Even fully bypassing the cache there's problems. Is buffer being overrun on write somwhere?
+
+Okay, what's the plan then? Look over the bytes on disk? Compare each routine to oldventi?
+
+Able to reproduce with pretty small workloads. Can just toss in a bunch of debug info to the index and arena layers... sure, but what needs logging?
+
+Every write and every read. Subsystem, fd, offset, size, buffer address in memory. Writes to the buffers, and their destinations, because the cache is clearly broken too.
+
+...also not a huge fan of the cache API. If there's one thing venti's gets right, it's having the cache itself handle the read and writes. The problem is, to be able to do that, I either need a layer on top of the cache, or I need to change the keys to include the file descriptors instead of the arbitrary index. I was using fds before, is there a way I can finagle that to work again? Probably.
+
+Arenas and index sections each have u16 cache indices. Can _probably_ fit fds in there and just use them directly.
+
+Ahhhh wait, but each arena is also supposed to have its own index, no?
+
+Right, so, fd gets one bytes, arena within fd gets three. What about index? Well, that's just.... OHHHHHHHHHHH I'M A RETARD
+
+I've got multiple index buckets mapped to the same key don't I.
+
+Aha!
+
+key = s_sect->cacheindex + (bucket >> 16)
+
+cacheindex = numarenas;
+
+Let's actually look at the API
+
+cachelookup(&buf, u16 arena, u32 block)
+
+So, the combination of arena and block must be unique.
+
+...which isnt' held AT ALL for the index what the hell was I thinking
+
+For arenas, I'm using the arena index as that field. Okay, but is this actually the _problem_?
+
+
+ fprint(2, "looking up index bucket %d, %d, %d\n", bucket, bucket >> 16, numarenas + (bucket >> 16));
+D looking up index bucket 725, 0, 1
+
+...yep. It's literally using arena 1 as the key.
+
+So, definitely worth redesigning the cache _interface_ to be bulletproof.
+
+Currently, we've got three APIs: cacheinit(void), cachelookup(char**buf, u16 arenaindex, u32 blockindex), and cacheunlock(u16 arenaindex, u32 blockindex).
+
+cacheinit is fine as is. Lookups shouldn't be using the current index form though. It's clearly not good enough.
+
+Worse, index keys aren't even capable of being big enough right now. Bucket index is u32; section is u16. That obviously won't fit in... wait, blockindex is u32 lmao. We can just... fit the entire bucket as block index. Would that resolve it?
+
+...but now reformatting isn't working reliably. What?
+
+Ah, no, it is, but vac is failing really fast. Need to be validating hashes on the server side, too.
+
+
+...I _did_ change the write loop didn't I.
+
+...blockupdate(len) is definitely wrong, we're advancing the block already.
+
+hahahahahahaha okay yeah main write loop was somewhat buggy for sure. Still probably is.
+
+
+annnnnd with a few tweaks, the entire organizer/ folder is vacing and vacfs and unvacing without issue ;D
+
+...I'm still cheating the cache though, but I'm not sure how necessary it is. Probably necessary.
+
+...if I vac the same data multiple times, I'm getting completely different scores each time. This is... not how vac is suposed to work. Each one seems valid, but... that's really, really not how this is intended to work.
--
⑨