ref: ae879b8cf5a87199802a33d6b15c76afafa8002b
parent: 19dc6aedddde8b5306f1fb0dc4d46ba57f318cce
author: Runxi Yu <me@runxiyu.org>
date: Sat Feb 21 00:35:12 EST 2026
objectstore/packed: Add initial pack reading support
--- /dev/null
+++ b/objectstore/packed/TODO
@@ -1,0 +1,3 @@
+* Per delta-plan memo map
+* Internal handle/request context (might expose it externally later and add to global interface)
+* Audit on mutex
--- /dev/null
+++ b/objectstore/packed/delta_apply.go
@@ -1,0 +1,166 @@
+package packed
+
+import (
+ "fmt"
+
+ "codeberg.org/lindenii/furgit/objecttype"
+)
+
+// deltaResolveContent resolves one object's content bytes from its pack location.
+func (store *Store) deltaResolveContent(start location) (objecttype.Type, []byte, error) {+ plan, err := store.deltaPlanFor(start)
+ if err != nil {+ return objecttype.TypeInvalid, nil, err
+ }
+
+ baseType, out, err := store.deltaResolveBase(plan)
+ if err != nil {+ return objecttype.TypeInvalid, nil, err
+ }
+ for i := len(plan.frames) - 1; i >= 0; i-- {+ frame := plan.frames[i]
+ pack, err := store.openPack(frame.packName)
+ if err != nil {+ return objecttype.TypeInvalid, nil, err
+ }
+ delta, err := inflateAt(pack, frame.dataOffset, -1)
+ if err != nil {+ return objecttype.TypeInvalid, nil, err
+ }
+ out, err = applyDelta(out, delta)
+ if err != nil {+ return objecttype.TypeInvalid, nil, err
+ }
+ }
+ if int64(len(out)) != plan.declaredSize {+ return objecttype.TypeInvalid, nil, fmt.Errorf(
+ "objectstore/packed: resolved content size mismatch: got %d want %d",
+ len(out),
+ plan.declaredSize,
+ )
+ }
+ return baseType, out, nil
+}
+
+// applyDelta applies one Git delta instruction stream to base.
+func applyDelta(base, delta []byte) ([]byte, error) {+ pos := 0
+ srcSize, err := readDeltaVarint(delta, &pos)
+ if err != nil {+ return nil, err
+ }
+ dstSize, err := readDeltaVarint(delta, &pos)
+ if err != nil {+ return nil, err
+ }
+ if srcSize != len(base) {+ return nil, fmt.Errorf("objectstore/packed: delta source size mismatch: got %d want %d", srcSize, len(base))+ }
+
+ out := make([]byte, dstSize)
+ outPos := 0
+ for pos < len(delta) {+ op := delta[pos]
+ pos++
+ if op&0x80 != 0 {+ off := 0
+ if op&0x01 != 0 {+ if pos >= len(delta) {+ return nil, fmt.Errorf("objectstore/packed: malformed delta copy offset")+ }
+ off |= int(delta[pos])
+ pos++
+ }
+ if op&0x02 != 0 {+ if pos >= len(delta) {+ return nil, fmt.Errorf("objectstore/packed: malformed delta copy offset")+ }
+ off |= int(delta[pos]) << 8
+ pos++
+ }
+ if op&0x04 != 0 {+ if pos >= len(delta) {+ return nil, fmt.Errorf("objectstore/packed: malformed delta copy offset")+ }
+ off |= int(delta[pos]) << 16
+ pos++
+ }
+ if op&0x08 != 0 {+ if pos >= len(delta) {+ return nil, fmt.Errorf("objectstore/packed: malformed delta copy offset")+ }
+ off |= int(delta[pos]) << 24
+ pos++
+ }
+
+ n := 0
+ if op&0x10 != 0 {+ if pos >= len(delta) {+ return nil, fmt.Errorf("objectstore/packed: malformed delta copy size")+ }
+ n |= int(delta[pos])
+ pos++
+ }
+ if op&0x20 != 0 {+ if pos >= len(delta) {+ return nil, fmt.Errorf("objectstore/packed: malformed delta copy size")+ }
+ n |= int(delta[pos]) << 8
+ pos++
+ }
+ if op&0x40 != 0 {+ if pos >= len(delta) {+ return nil, fmt.Errorf("objectstore/packed: malformed delta copy size")+ }
+ n |= int(delta[pos]) << 16
+ pos++
+ }
+ if n == 0 {+ n = 0x10000
+ }
+ if off < 0 || n < 0 || off+n > len(base) || outPos+n > len(out) {+ return nil, fmt.Errorf("objectstore/packed: delta copy out of bounds")+ }
+ copy(out[outPos:outPos+n], base[off:off+n])
+ outPos += n
+ continue
+ }
+
+ if op == 0 {+ return nil, fmt.Errorf("objectstore/packed: invalid delta opcode 0")+ }
+ n := int(op)
+ if pos+n > len(delta) || outPos+n > len(out) {+ return nil, fmt.Errorf("objectstore/packed: delta insert out of bounds")+ }
+ copy(out[outPos:outPos+n], delta[pos:pos+n])
+ outPos += n
+ pos += n
+ }
+ if outPos != len(out) {+ return nil, fmt.Errorf("objectstore/packed: delta output size mismatch: got %d want %d", outPos, len(out))+ }
+ return out, nil
+}
+
+// readDeltaVarint parses one Git delta varint and advances pos.
+func readDeltaVarint(buf []byte, pos *int) (int, error) {+ value := 0
+ shift := uint(0)
+ for {+ if *pos >= len(buf) {+ return 0, fmt.Errorf("objectstore/packed: malformed delta varint")+ }
+ b := buf[*pos]
+ *pos++
+ value |= int(b&0x7f) << shift
+ if b&0x80 == 0 {+ break
+ }
+ shift += 7
+ if shift > 63 {+ return 0, fmt.Errorf("objectstore/packed: delta varint overflow")+ }
+ }
+ return value, nil
+}
--- /dev/null
+++ b/objectstore/packed/delta_base.go
@@ -1,0 +1,39 @@
+package packed
+
+import (
+ "fmt"
+
+ "codeberg.org/lindenii/furgit/objecttype"
+)
+
+// deltaResolveBase materializes the base object body for one delta plan.
+func (store *Store) deltaResolveBase(plan deltaPlan) (objecttype.Type, []byte, error) {+ cacheKey := deltaBaseKey{+ packName: plan.baseLoc.packName,
+ offset: plan.baseLoc.offset,
+ }
+
+ store.cacheMu.RLock()
+ if ty, content, ok := store.deltaCache.get(cacheKey); ok {+ store.cacheMu.RUnlock()
+ return ty, content, nil
+ }
+ store.cacheMu.RUnlock()
+
+ pack, meta, err := store.entryMetaAt(plan.baseLoc)
+ if err != nil {+ return objecttype.TypeInvalid, nil, err
+ }
+ if !isBaseObjectType(meta.ty) {+ return objecttype.TypeInvalid, nil, fmt.Errorf("objectstore/packed: delta plan base is not a base object")+ }
+ base, err := inflateAt(pack, meta.dataOffset, meta.size)
+ if err != nil {+ return objecttype.TypeInvalid, nil, err
+ }
+
+ store.cacheMu.Lock()
+ store.deltaCache.add(cacheKey, meta.ty, base)
+ store.cacheMu.Unlock()
+ return meta.ty, base, nil
+}
--- /dev/null
+++ b/objectstore/packed/delta_cache.go
@@ -1,0 +1,58 @@
+package packed
+
+import (
+ "codeberg.org/lindenii/furgit/internal/cache/lru"
+ "codeberg.org/lindenii/furgit/objecttype"
+)
+
+// deltaBaseKey identifies one base object by pack location.
+type deltaBaseKey struct {+ packName string
+ offset uint64
+}
+
+// deltaBaseValue stores one cached base object body.
+type deltaBaseValue struct {+ ty objecttype.Type
+ content []byte
+}
+
+// deltaCache wraps a weighted LRU for resolved delta bases.
+type deltaCache struct {+ lru *lru.Cache[deltaBaseKey, deltaBaseValue]
+}
+
+// newDeltaCache creates a delta base cache with a byte budget.
+func newDeltaCache(maxBytes int64) *deltaCache {+ return &deltaCache{+ lru: lru.New(
+ maxBytes,
+ func(_ deltaBaseKey, value deltaBaseValue) int64 {+ return int64(len(value.content))
+ },
+ nil,
+ ),
+ }
+}
+
+// get returns a cloned cached base object value.
+func (cache *deltaCache) get(key deltaBaseKey) (objecttype.Type, []byte, bool) {+ value, ok := cache.lru.Get(key)
+ if !ok {+ return objecttype.TypeInvalid, nil, false
+ }
+ return value.ty, append([]byte(nil), value.content...), true
+}
+
+// add stores a cloned base object value.
+func (cache *deltaCache) add(key deltaBaseKey, ty objecttype.Type, content []byte) {+ cache.lru.Add(key, deltaBaseValue{+ ty: ty,
+ content: append([]byte(nil), content...),
+ })
+}
+
+// clear removes all cached entries.
+func (cache *deltaCache) clear() {+ cache.lru.Clear()
+}
--- /dev/null
+++ b/objectstore/packed/delta_plan.go
@@ -1,0 +1,81 @@
+package packed
+
+import (
+ "fmt"
+
+ "codeberg.org/lindenii/furgit/objecttype"
+)
+
+// deltaFrame describes one delta payload to apply during reconstruction.
+type deltaFrame struct {+ // packName identifies where the delta payload lives.
+ packName string
+ // dataOffset points to the start of the delta zlib payload in pack.
+ dataOffset int
+}
+
+// deltaPlan describes how to reconstruct one requested object.
+type deltaPlan struct {+ // declaredSize is the target object's declared content size.
+ declaredSize int64
+ // baseLoc points to the innermost base object.
+ baseLoc location
+ // baseType is the canonical object type resolved from baseLoc.
+ baseType objecttype.Type
+ // frames contains deltas from target down toward base.
+ frames []deltaFrame
+}
+
+// deltaPlanFor walks one object's chain and builds a delta reconstruction plan.
+func (store *Store) deltaPlanFor(start location) (deltaPlan, error) {+ visited := make(map[location]struct{})+ current := start
+
+ var plan deltaPlan
+ plan.declaredSize = -1
+
+ for {+ if _, ok := visited[current]; ok {+ return deltaPlan{}, fmt.Errorf("objectstore/packed: delta cycle while resolving object")+ }
+ visited[current] = struct{}{}+
+ _, meta, err := store.entryMetaAt(current)
+ if err != nil {+ return deltaPlan{}, err+ }
+ if plan.declaredSize < 0 {+ plan.declaredSize = meta.size
+ }
+
+ if isBaseObjectType(meta.ty) {+ plan.baseLoc = current
+ plan.baseType = meta.ty
+ return plan, nil
+ }
+
+ switch meta.ty {+ case objecttype.TypeRefDelta:
+ plan.frames = append(plan.frames, deltaFrame{+ packName: current.packName,
+ dataOffset: meta.dataOffset,
+ })
+ next, err := store.lookup(meta.baseRefID)
+ if err != nil {+ return deltaPlan{}, err+ }
+ current = next
+ case objecttype.TypeOfsDelta:
+ plan.frames = append(plan.frames, deltaFrame{+ packName: current.packName,
+ dataOffset: meta.dataOffset,
+ })
+ current = location{+ packName: current.packName,
+ offset: meta.baseOfs,
+ }
+ default:
+ return deltaPlan{}, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty)+ }
+ }
+}
--- /dev/null
+++ b/objectstore/packed/entry_inflate.go
@@ -1,0 +1,41 @@
+package packed
+
+import (
+ "bytes"
+ "compress/zlib"
+ "fmt"
+ "io"
+)
+
+// zlibReaderAt opens a zlib reader starting at data offset within pack.
+func zlibReaderAt(pack *packFile, offset int) (io.ReadCloser, error) {+ if offset < 0 || offset > len(pack.data) {+ return nil, fmt.Errorf("objectstore/packed: pack %q zlib offset out of bounds", pack.name)+ }
+ return zlib.NewReader(bytes.NewReader(pack.data[offset:]))
+}
+
+// inflateAt inflates one entry payload from data offset.
+//
+// When expectedSize is non-negative, the inflated length must match.
+func inflateAt(pack *packFile, offset int, expectedSize int64) ([]byte, error) {+ reader, err := zlibReaderAt(pack, offset)
+ if err != nil {+ return nil, err
+ }
+ defer func() { _ = reader.Close() }()+
+ body, err := io.ReadAll(reader)
+ if err != nil {+ return nil, err
+ }
+ if expectedSize >= 0 && int64(len(body)) != expectedSize {+ return nil, fmt.Errorf(
+ "objectstore/packed: pack %q inflated size mismatch: got %d want %d",
+ pack.name,
+ len(body),
+ expectedSize,
+ )
+ }
+ return body, nil
+}
--- /dev/null
+++ b/objectstore/packed/entry_parse.go
@@ -1,0 +1,117 @@
+package packed
+
+import (
+ "fmt"
+
+ "codeberg.org/lindenii/furgit/objectid"
+ "codeberg.org/lindenii/furgit/objecttype"
+)
+
+// entryMeta describes one parsed pack entry header.
+type entryMeta struct {+ // ty is the pack entry type tag.
+ ty objecttype.Type
+ // size is the declared resulting content size.
+ size int64
+ // dataOffset points to the zlib payload start.
+ dataOffset int
+ // baseRefID is set for ref-delta entries.
+ baseRefID objectid.ObjectID
+ // baseOfs is set for ofs-delta entries.
+ baseOfs uint64
+}
+
+// parseEntryMeta parses one pack entry header at offset.
+func parseEntryMeta(pack *packFile, algo objectid.Algorithm, offset uint64) (entryMeta, error) {+ var zero entryMeta
+ if offset >= uint64(len(pack.data)) {+ return zero, fmt.Errorf("objectstore/packed: pack %q offset %d out of bounds", pack.name, offset)+ }
+
+ pos := int(offset)
+ first := pack.data[pos]
+ pos++
+
+ meta := entryMeta{+ ty: objecttype.Type((first >> 4) & 0x07),
+ size: int64(first & 0x0f),
+ }
+
+ shift := uint(4)
+ b := first
+ for b&0x80 != 0 {+ if pos >= len(pack.data) {+ return zero, fmt.Errorf("objectstore/packed: pack %q truncated entry header", pack.name)+ }
+ b = pack.data[pos]
+ pos++
+ meta.size |= int64(b&0x7f) << shift
+ shift += 7
+ }
+ if meta.size < 0 {+ return zero, fmt.Errorf("objectstore/packed: pack %q entry has negative size", pack.name)+ }
+
+ switch meta.ty {+ case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag:
+ // Base object entries have no extra header fields.
+ case objecttype.TypeRefDelta:
+ hashSize := algo.Size()
+ if pos+hashSize > len(pack.data) {+ return zero, fmt.Errorf("objectstore/packed: pack %q truncated ref-delta base id", pack.name)+ }
+ baseID, err := objectid.FromBytes(algo, pack.data[pos:pos+hashSize])
+ if err != nil {+ return zero, err
+ }
+ meta.baseRefID = baseID
+ pos += hashSize
+ case objecttype.TypeOfsDelta:
+ dist, consumed, err := parseOfsDeltaDistance(pack.data[pos:])
+ if err != nil {+ return zero, err
+ }
+ pos += consumed
+ if offset <= dist {+ return zero, fmt.Errorf("objectstore/packed: pack %q has invalid ofs-delta base", pack.name)+ }
+ meta.baseOfs = offset - dist
+ default:
+ return zero, fmt.Errorf("objectstore/packed: pack %q has unsupported object type %d", pack.name, meta.ty)+ }
+
+ meta.dataOffset = pos
+ if meta.dataOffset > len(pack.data) {+ return zero, fmt.Errorf("objectstore/packed: pack %q entry data offset out of bounds", pack.name)+ }
+ return meta, nil
+}
+
+// parseOfsDeltaDistance parses one ofs-delta backward distance.
+func parseOfsDeltaDistance(buf []byte) (uint64, int, error) {+ if len(buf) == 0 {+ return 0, 0, fmt.Errorf("objectstore/packed: malformed ofs-delta distance")+ }
+ b := buf[0]
+ dist := uint64(b & 0x7f)
+ consumed := 1
+ for b&0x80 != 0 {+ if consumed >= len(buf) {+ return 0, 0, fmt.Errorf("objectstore/packed: malformed ofs-delta distance")+ }
+ b = buf[consumed]
+ consumed++
+ dist = ((dist + 1) << 7) + uint64(b&0x7f)
+ }
+ return dist, consumed, nil
+}
+
+// isBaseObjectType reports whether ty is one of the four canonical object types.
+func isBaseObjectType(ty objecttype.Type) bool {+ switch ty {+ case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag:
+ return true
+ default:
+ return false
+ }
+}
--- /dev/null
+++ b/objectstore/packed/helpers_test.go
@@ -1,0 +1,96 @@
+package packed_test
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "testing"
+
+ "codeberg.org/lindenii/furgit/internal/testgit"
+ "codeberg.org/lindenii/furgit/objectheader"
+ "codeberg.org/lindenii/furgit/objectid"
+ "codeberg.org/lindenii/furgit/objectstore/packed"
+ "codeberg.org/lindenii/furgit/objecttype"
+)
+
+func openPackedStore(t *testing.T, repoPath string, algo objectid.Algorithm) *packed.Store {+ t.Helper()
+ packPath := filepath.Join(repoPath, "objects", "pack")
+ root, err := os.OpenRoot(packPath)
+ if err != nil {+ t.Fatalf("OpenRoot(%q): %v", packPath, err)+ }
+ t.Cleanup(func() { _ = root.Close() })+
+ store, err := packed.New(root, algo)
+ if err != nil {+ t.Fatalf("packed.New: %v", err)+ }
+ return store
+}
+
+func mustReadAllAndClose(t *testing.T, reader io.ReadCloser) []byte {+ t.Helper()
+ data, err := io.ReadAll(reader)
+ if err != nil {+ _ = reader.Close()
+ t.Fatalf("ReadAll: %v", err)+ }
+ if err := reader.Close(); err != nil {+ t.Fatalf("Close: %v", err)+ }
+ return data
+}
+
+func expectedRawObject(t *testing.T, testRepo *testgit.TestRepo, id objectid.ObjectID) (objecttype.Type, []byte, []byte) {+ t.Helper()
+
+ typeName := testRepo.Run(t, "cat-file", "-t", id.String())
+ ty, ok := objecttype.ParseName(typeName)
+ if !ok {+ t.Fatalf("ParseName(%q) failed", typeName)+ }
+ body := testRepo.CatFile(t, typeName, id)
+ header, ok := objectheader.Encode(ty, int64(len(body)))
+ if !ok {+ t.Fatalf("objectheader.Encode failed")+ }
+
+ raw := make([]byte, len(header)+len(body))
+ copy(raw, header)
+ copy(raw[len(header):], body)
+ return ty, body, raw
+}
+
+func createPackedFixtureRepo(t *testing.T, algo objectid.Algorithm) (*testgit.TestRepo, []objectid.ObjectID) {+ t.Helper()
+
+ testRepo := testgit.NewBareRepo(t, algo)
+ blobID, treeID, commitID := testRepo.MakeCommit(t, "packed store base commit")
+ testRepo.Run(t, "update-ref", "refs/heads/main", commitID.String())
+ tagID := testRepo.TagAnnotated(t, "v1.0.0", commitID, "packed-store-tag")
+
+ parent := commitID
+ for i := range 24 {+ content := "common-prefix\n" + strings.Repeat("line-"+strconv.Itoa(i%3)+"\n", 256) + fmt.Sprintf("tail-%d\n", i)+ nextBlob, nextTree := testRepo.MakeSingleFileTree(t, fmt.Sprintf("file-%02d.txt", i), []byte(content))+ nextCommit := testRepo.CommitTree(t, nextTree, fmt.Sprintf("commit-%02d", i), parent)+ testRepo.Run(t, "update-ref", "refs/heads/main", nextCommit.String())
+ parent = nextCommit
+
+ _ = nextBlob
+ _ = nextTree
+ }
+
+ testRepo.Repack(t, "-a", "-d", "-f", "--window=64", "--depth=64")
+ return testRepo, []objectid.ObjectID{+ blobID,
+ treeID,
+ commitID,
+ tagID,
+ parent,
+ }
+}
--- /dev/null
+++ b/objectstore/packed/idx_load.go
@@ -1,0 +1,145 @@
+package packed
+
+import (
+ "fmt"
+ "os"
+ "slices"
+ "strings"
+ "syscall"
+
+ "codeberg.org/lindenii/furgit/objectid"
+)
+
+// location identifies one object entry in a specific pack file.
+type location struct {+ packName string
+ offset uint64
+}
+
+// idxFile stores one mapped and validated idx v2 file.
+type idxFile struct {+ // idxName is the basename of this .idx file.
+ idxName string
+ // packName is the matching .pack basename.
+ packName string
+ // algo is the hash algorithm encoded by the index.
+ algo objectid.Algorithm
+
+ // file is the opened index file descriptor.
+ file *os.File
+ // data is the mapped index bytes.
+ data []byte
+
+ // fanout stores fanout table values.
+ fanout [256]uint32
+ // numObjects equals fanout[255].
+ numObjects int
+
+ // namesOffset starts the sorted object-id table.
+ namesOffset int
+ // offset32Offset starts the 32-bit offset table.
+ offset32Offset int
+ // offset64Offset starts the 64-bit offset table.
+ offset64Offset int
+ // offset64Count is the number of 64-bit offset entries.
+ offset64Count int
+}
+
+// loadIndexes loads and validates all .idx files under objects/pack.
+func (store *Store) loadIndexes() ([]*idxFile, error) {+ dir, err := store.root.Open(".")+ if err != nil {+ if os.IsNotExist(err) {+ return nil, nil
+ }
+ return nil, err
+ }
+ defer func() { _ = dir.Close() }()+ entries, err := dir.ReadDir(-1)
+ if err != nil {+ return nil, err
+ }
+
+ idxNames := make([]string, 0, len(entries))
+ for _, entry := range entries {+ if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".idx") {+ continue
+ }
+ idxNames = append(idxNames, entry.Name())
+ }
+ slices.Sort(idxNames)
+
+ out := make([]*idxFile, 0, len(idxNames))
+ for _, idxName := range idxNames {+ packName := strings.TrimSuffix(idxName, ".idx") + ".pack"
+ if _, err := store.root.Stat(packName); err != nil {+ if os.IsNotExist(err) {+ return nil, fmt.Errorf("objectstore/packed: missing pack file for index %q", idxName)+ }
+ return nil, err
+ }
+ index, err := openIdxFile(store.root, idxName, packName, store.algo)
+ if err != nil {+ for _, loaded := range out {+ _ = loaded.close()
+ }
+ return nil, err
+ }
+ out = append(out, index)
+ }
+ return out, nil
+}
+
+// openIdxFile maps and validates one idx v2 file.
+func openIdxFile(root *os.Root, idxName, packName string, algo objectid.Algorithm) (*idxFile, error) {+ file, err := root.Open(idxName)
+ if err != nil {+ return nil, err
+ }
+ info, err := file.Stat()
+ if err != nil {+ _ = file.Close()
+ return nil, err
+ }
+ size := info.Size()
+ if size < 0 || size > int64(int(^uint(0)>>1)) {+ _ = file.Close()
+ return nil, fmt.Errorf("objectstore/packed: idx %q has unsupported size", idxName)+ }
+ data, err := syscall.Mmap(int(file.Fd()), 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE)
+ if err != nil {+ _ = file.Close()
+ return nil, err
+ }
+
+ index := &idxFile{+ idxName: idxName,
+ packName: packName,
+ algo: algo,
+ file: file,
+ data: data,
+ }
+ if err := index.parse(); err != nil {+ _ = index.close()
+ return nil, err
+ }
+ return index, nil
+}
+
+// close unmaps and closes one idx handle.
+func (index *idxFile) close() error {+ var closeErr error
+ if index.data != nil {+ if err := syscall.Munmap(index.data); err != nil && closeErr == nil {+ closeErr = err
+ }
+ index.data = nil
+ }
+ if index.file != nil {+ if err := index.file.Close(); err != nil && closeErr == nil {+ closeErr = err
+ }
+ index.file = nil
+ }
+ return closeErr
+}
--- /dev/null
+++ b/objectstore/packed/idx_parse.go
@@ -1,0 +1,142 @@
+package packed
+
+import (
+ "bytes"
+ "encoding/binary"
+ "fmt"
+
+ "codeberg.org/lindenii/furgit/objectid"
+)
+
+const (
+ idxMagicV2 = 0xff744f63
+ idxVersionV2 = 2
+)
+
+// parse validates mapped idx v2 structure and stores table boundaries.
+func (index *idxFile) parse() error {+ hashSize := index.algo.Size()
+ if hashSize <= 0 {+ return fmt.Errorf("objectstore/packed: idx %q has invalid hash algorithm", index.idxName)+ }
+ minLen := 8 + 256*4 + 2*hashSize
+ if len(index.data) < minLen {+ return fmt.Errorf("objectstore/packed: idx %q too short", index.idxName)+ }
+ if binary.BigEndian.Uint32(index.data[:4]) != idxMagicV2 {+ return fmt.Errorf("objectstore/packed: idx %q invalid magic", index.idxName)+ }
+ if binary.BigEndian.Uint32(index.data[4:8]) != idxVersionV2 {+ return fmt.Errorf("objectstore/packed: idx %q unsupported version", index.idxName)+ }
+
+ prev := uint32(0)
+ for i := range 256 {+ base := 8 + i*4
+ cur := binary.BigEndian.Uint32(index.data[base : base+4])
+ if cur < prev {+ return fmt.Errorf("objectstore/packed: idx %q has non-monotonic fanout table", index.idxName)+ }
+ index.fanout[i] = cur
+ prev = cur
+ }
+ index.numObjects = int(index.fanout[255])
+ if index.numObjects < 0 {+ return fmt.Errorf("objectstore/packed: idx %q has invalid object count", index.idxName)+ }
+
+ namesBytes := index.numObjects * hashSize
+ crcBytes := index.numObjects * 4
+ offset32Bytes := index.numObjects * 4
+ minSize := 8 + 256*4 + namesBytes + crcBytes + offset32Bytes + 2*hashSize
+ if minSize < 0 || len(index.data) < minSize {+ return fmt.Errorf("objectstore/packed: idx %q has truncated tables", index.idxName)+ }
+
+ index.namesOffset = 8 + 256*4
+ index.offset32Offset = index.namesOffset + namesBytes + crcBytes
+ index.offset64Offset = index.offset32Offset + offset32Bytes
+
+ offset64Bytes := len(index.data) - index.offset64Offset - 2*hashSize
+ if offset64Bytes < 0 || offset64Bytes%8 != 0 {+ return fmt.Errorf("objectstore/packed: idx %q has malformed 64-bit offset table", index.idxName)+ }
+ index.offset64Count = offset64Bytes / 8
+ maxOffset64Count := index.numObjects - 1
+ if maxOffset64Count < 0 {+ maxOffset64Count = 0
+ }
+ if index.offset64Count > maxOffset64Count {+ return fmt.Errorf("objectstore/packed: idx %q has oversized 64-bit offset table", index.idxName)+ }
+ return nil
+}
+
+// lookup resolves one object ID to its pack offset within this index.
+func (index *idxFile) lookup(id objectid.ObjectID) (uint64, bool, error) {+ if id.Algorithm() != index.algo {+ return 0, false, fmt.Errorf("objectstore/packed: object id algorithm mismatch")+ }
+ idBytes := (&id).RawBytes()
+ hashSize := len(idBytes)
+ if hashSize != index.algo.Size() {+ return 0, false, fmt.Errorf("objectstore/packed: unexpected object id length")+ }
+
+ first := int(idBytes[0])
+ lo := 0
+ if first > 0 {+ lo = int(index.fanout[first-1])
+ }
+ hi := int(index.fanout[first])
+ if lo < 0 || hi < 0 || lo > hi || hi > index.numObjects {+ return 0, false, fmt.Errorf("objectstore/packed: idx %q has invalid fanout bounds", index.idxName)+ }
+
+ for lo < hi {+ mid := lo + (hi-lo)/2
+ nameOffset := index.namesOffset + mid*hashSize
+ if nameOffset < 0 || nameOffset+hashSize > len(index.data) {+ return 0, false, fmt.Errorf("objectstore/packed: idx %q truncated name table", index.idxName)+ }
+ cmp := bytes.Compare(index.data[nameOffset:nameOffset+hashSize], idBytes)
+ if cmp == 0 {+ offset, err := index.offsetAt(mid)
+ if err != nil {+ return 0, false, err
+ }
+ return offset, true, nil
+ }
+ if cmp < 0 {+ lo = mid + 1
+ } else {+ hi = mid
+ }
+ }
+ return 0, false, nil
+}
+
+// offsetAt resolves the pack offset for one object index entry.
+func (index *idxFile) offsetAt(objectIndex int) (uint64, error) {+ if objectIndex < 0 || objectIndex >= index.numObjects {+ return 0, fmt.Errorf("objectstore/packed: idx %q offset index out of bounds", index.idxName)+ }
+ wordOffset := index.offset32Offset + objectIndex*4
+ if wordOffset < 0 || wordOffset+4 > len(index.data) {+ return 0, fmt.Errorf("objectstore/packed: idx %q truncated 32-bit offset table", index.idxName)+ }
+ word := binary.BigEndian.Uint32(index.data[wordOffset : wordOffset+4])
+ if word&0x80000000 == 0 {+ return uint64(word), nil
+ }
+
+ pos := int(word & 0x7fffffff)
+ if pos < 0 || pos >= index.offset64Count {+ return 0, fmt.Errorf("objectstore/packed: idx %q invalid 64-bit offset position", index.idxName)+ }
+ offOffset := index.offset64Offset + pos*8
+ if offOffset < 0 || offOffset+8 > len(index.data)-2*index.algo.Size() {+ return 0, fmt.Errorf("objectstore/packed: idx %q truncated 64-bit offset table", index.idxName)+ }
+ return binary.BigEndian.Uint64(index.data[offOffset : offOffset+8]), nil
+}
--- /dev/null
+++ b/objectstore/packed/pack.go
@@ -1,0 +1,62 @@
+package packed
+
+import (
+ "encoding/binary"
+ "fmt"
+ "os"
+ "syscall"
+)
+
+const packSignature = 0x5041434b
+
+// packFile stores one mapped and validated .pack file.
+type packFile struct {+ // name is the .pack basename.
+ name string
+ // file is the opened pack file descriptor.
+ file *os.File
+ // data is the mapped pack bytes.
+ data []byte
+}
+
+// openPackFile maps and validates one pack file.
+func openPackFile(name string, file *os.File, size int64) (*packFile, error) {+ if size < 12 {+ return nil, fmt.Errorf("objectstore/packed: pack %q too short", name)+ }
+ if size > int64(int(^uint(0)>>1)) {+ return nil, fmt.Errorf("objectstore/packed: pack %q has unsupported size", name)+ }
+ data, err := syscall.Mmap(int(file.Fd()), 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE)
+ if err != nil {+ return nil, err
+ }
+ if binary.BigEndian.Uint32(data[:4]) != packSignature {+ _ = syscall.Munmap(data)
+ return nil, fmt.Errorf("objectstore/packed: pack %q invalid signature", name)+ }
+ version := binary.BigEndian.Uint32(data[4:8])
+ if version != 2 && version != 3 {+ _ = syscall.Munmap(data)
+ return nil, fmt.Errorf("objectstore/packed: pack %q unsupported version %d", name, version)+ }
+ return &packFile{name: name, file: file, data: data}, nil+}
+
+// close unmaps and closes one pack handle.
+func (pack *packFile) close() error {+ var closeErr error
+ if pack.data != nil {+ if err := syscall.Munmap(pack.data); err != nil && closeErr == nil {+ closeErr = err
+ }
+ pack.data = nil
+ }
+ if pack.file != nil {+ if err := pack.file.Close(); err != nil && closeErr == nil {+ closeErr = err
+ }
+ pack.file = nil
+ }
+ return closeErr
+}
--- /dev/null
+++ b/objectstore/packed/read_bytes.go
@@ -1,0 +1,34 @@
+package packed
+
+import (
+ "fmt"
+
+ "codeberg.org/lindenii/furgit/objectheader"
+ "codeberg.org/lindenii/furgit/objectid"
+ "codeberg.org/lindenii/furgit/objecttype"
+)
+
+// ReadBytesContent reads an object's type and content bytes.
+func (store *Store) ReadBytesContent(id objectid.ObjectID) (objecttype.Type, []byte, error) {+ loc, err := store.lookup(id)
+ if err != nil {+ return objecttype.TypeInvalid, nil, err
+ }
+ return store.deltaResolveContent(loc)
+}
+
+// ReadBytesFull reads a full serialized object as "type size\0content".
+func (store *Store) ReadBytesFull(id objectid.ObjectID) ([]byte, error) {+ ty, content, err := store.ReadBytesContent(id)
+ if err != nil {+ return nil, err
+ }
+ header, ok := objectheader.Encode(ty, int64(len(content)))
+ if !ok {+ return nil, fmt.Errorf("objectstore/packed: failed to encode object header for type %d", ty)+ }
+ out := make([]byte, len(header)+len(content))
+ copy(out, header)
+ copy(out[len(header):], content)
+ return out, nil
+}
--- /dev/null
+++ b/objectstore/packed/read_header.go
@@ -1,0 +1,19 @@
+package packed
+
+import (
+ "codeberg.org/lindenii/furgit/objectid"
+ "codeberg.org/lindenii/furgit/objecttype"
+)
+
+// ReadHeader reads an object's type and declared content size.
+func (store *Store) ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error) {+ loc, err := store.lookup(id)
+ if err != nil {+ return objecttype.TypeInvalid, 0, err
+ }
+ plan, err := store.deltaPlanFor(loc)
+ if err != nil {+ return objecttype.TypeInvalid, 0, err
+ }
+ return plan.baseType, plan.declaredSize, nil
+}
--- /dev/null
+++ b/objectstore/packed/read_reader.go
@@ -1,0 +1,93 @@
+package packed
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+
+ "codeberg.org/lindenii/furgit/objectheader"
+ "codeberg.org/lindenii/furgit/objectid"
+ "codeberg.org/lindenii/furgit/objecttype"
+)
+
+// readCloser proxies reads and closes one underlying closer.
+type readCloser struct {+ reader io.Reader
+ closer io.Closer
+}
+
+// Read proxies reads to the underlying reader.
+func (reader *readCloser) Read(dst []byte) (int, error) {+ return reader.reader.Read(dst)
+}
+
+// Close closes the underlying closer.
+func (reader *readCloser) Close() error {+ return reader.closer.Close()
+}
+
+// ReadReaderContent reads an object's type, declared content size, and content stream.
+//
+// The caller must close the returned reader.
+func (store *Store) ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error) {+ loc, err := store.lookup(id)
+ if err != nil {+ return objecttype.TypeInvalid, 0, nil, err
+ }
+
+ pack, meta, err := store.entryMetaAt(loc)
+ if err != nil {+ return objecttype.TypeInvalid, 0, nil, err
+ }
+ if isBaseObjectType(meta.ty) {+ zr, err := zlibReaderAt(pack, meta.dataOffset)
+ if err != nil {+ return objecttype.TypeInvalid, 0, nil, err
+ }
+ return meta.ty, meta.size, &readCloser{+ reader: io.LimitReader(zr, meta.size),
+ closer: zr,
+ }, nil
+ }
+
+ ty, content, err := store.deltaResolveContent(loc)
+ if err != nil {+ return objecttype.TypeInvalid, 0, nil, err
+ }
+ return ty, int64(len(content)), io.NopCloser(bytes.NewReader(content)), nil
+}
+
+// ReadReaderFull reads a full serialized object stream as "type size\0content".
+//
+// The caller must close the returned reader.
+func (store *Store) ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error) {+ loc, err := store.lookup(id)
+ if err != nil {+ return nil, err
+ }
+
+ pack, meta, err := store.entryMetaAt(loc)
+ if err != nil {+ return nil, err
+ }
+ if isBaseObjectType(meta.ty) {+ header, ok := objectheader.Encode(meta.ty, meta.size)
+ if !ok {+ return nil, fmt.Errorf("objectstore/packed: failed to encode object header for type %d", meta.ty)+ }
+ zr, err := zlibReaderAt(pack, meta.dataOffset)
+ if err != nil {+ return nil, err
+ }
+ return &readCloser{+ reader: io.MultiReader(bytes.NewReader(header), io.LimitReader(zr, meta.size)),
+ closer: zr,
+ }, nil
+ }
+
+ raw, err := store.ReadBytesFull(id)
+ if err != nil {+ return nil, err
+ }
+ return io.NopCloser(bytes.NewReader(raw)), nil
+}
--- /dev/null
+++ b/objectstore/packed/read_test.go
@@ -1,0 +1,149 @@
+package packed_test
+
+import (
+ "bytes"
+ "errors"
+ "os"
+ "strings"
+ "testing"
+
+ "codeberg.org/lindenii/furgit/internal/testgit"
+ "codeberg.org/lindenii/furgit/objectid"
+ "codeberg.org/lindenii/furgit/objectstore"
+ "codeberg.org/lindenii/furgit/objectstore/packed"
+)
+
+func TestPackedStoreReadAgainstGit(t *testing.T) {+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) {+ testRepo, ids := createPackedFixtureRepo(t, algo)
+ store := openPackedStore(t, testRepo.Dir(), algo)
+
+ for _, id := range ids {+ id := id
+ t.Run(id.String(), func(t *testing.T) {+ wantType, wantBody, wantRaw := expectedRawObject(t, testRepo, id)
+
+ gotHeaderType, gotHeaderSize, err := store.ReadHeader(id)
+ if err != nil {+ t.Fatalf("ReadHeader: %v", err)+ }
+ if gotHeaderType != wantType {+ t.Fatalf("ReadHeader type = %v, want %v", gotHeaderType, wantType)+ }
+ if gotHeaderSize != int64(len(wantBody)) {+ t.Fatalf("ReadHeader size = %d, want %d", gotHeaderSize, len(wantBody))+ }
+
+ gotRaw, err := store.ReadBytesFull(id)
+ if err != nil {+ t.Fatalf("ReadBytesFull: %v", err)+ }
+ if !bytes.Equal(gotRaw, wantRaw) {+ t.Fatalf("ReadBytesFull mismatch")+ }
+
+ gotType, gotBody, err := store.ReadBytesContent(id)
+ if err != nil {+ t.Fatalf("ReadBytesContent: %v", err)+ }
+ if gotType != wantType {+ t.Fatalf("ReadBytesContent type = %v, want %v", gotType, wantType)+ }
+ if !bytes.Equal(gotBody, wantBody) {+ t.Fatalf("ReadBytesContent mismatch")+ }
+
+ fullReader, err := store.ReadReaderFull(id)
+ if err != nil {+ t.Fatalf("ReadReaderFull: %v", err)+ }
+ if got := mustReadAllAndClose(t, fullReader); !bytes.Equal(got, wantRaw) {+ t.Fatalf("ReadReaderFull mismatch")+ }
+
+ contentType, contentSize, contentReader, err := store.ReadReaderContent(id)
+ if err != nil {+ t.Fatalf("ReadReaderContent: %v", err)+ }
+ if contentType != wantType {+ t.Fatalf("ReadReaderContent type = %v, want %v", contentType, wantType)+ }
+ if contentSize != int64(len(wantBody)) {+ t.Fatalf("ReadReaderContent size = %d, want %d", contentSize, len(wantBody))+ }
+ if got := mustReadAllAndClose(t, contentReader); !bytes.Equal(got, wantBody) {+ t.Fatalf("ReadReaderContent mismatch")+ }
+ })
+ }
+ })
+}
+
+func TestPackedStoreErrors(t *testing.T) {+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) {+ testRepo, _ := createPackedFixtureRepo(t, algo)
+ store := openPackedStore(t, testRepo.Dir(), algo)
+
+ notFoundID, err := objectid.ParseHex(algo, strings.Repeat("0", algo.HexLen()))+ if err != nil {+ t.Fatalf("ParseHex(notFound): %v", err)+ }
+
+ if _, err := store.ReadBytesFull(notFoundID); !errors.Is(err, objectstore.ErrObjectNotFound) {+ t.Fatalf("ReadBytesFull not-found error = %v", err)+ }
+ if _, _, err := store.ReadBytesContent(notFoundID); !errors.Is(err, objectstore.ErrObjectNotFound) {+ t.Fatalf("ReadBytesContent not-found error = %v", err)+ }
+ if _, err := store.ReadReaderFull(notFoundID); !errors.Is(err, objectstore.ErrObjectNotFound) {+ t.Fatalf("ReadReaderFull not-found error = %v", err)+ }
+ if _, _, _, err := store.ReadReaderContent(notFoundID); !errors.Is(err, objectstore.ErrObjectNotFound) {+ t.Fatalf("ReadReaderContent not-found error = %v", err)+ }
+ if _, _, err := store.ReadHeader(notFoundID); !errors.Is(err, objectstore.ErrObjectNotFound) {+ t.Fatalf("ReadHeader not-found error = %v", err)+ }
+
+ var otherAlgo objectid.Algorithm
+ for _, candidate := range objectid.SupportedAlgorithms() {+ if candidate != algo {+ otherAlgo = candidate
+ break
+ }
+ }
+ if otherAlgo != objectid.AlgorithmUnknown {+ mismatchID, err := objectid.ParseHex(otherAlgo, strings.Repeat("0", otherAlgo.HexLen()))+ if err != nil {+ t.Fatalf("ParseHex(mismatch): %v", err)+ }
+ if _, err := store.ReadBytesFull(mismatchID); err == nil || !strings.Contains(err.Error(), "algorithm mismatch") {+ t.Fatalf("ReadBytesFull algorithm-mismatch error = %v", err)+ }
+ }
+ })
+}
+
+func TestPackedStoreNewValidation(t *testing.T) {+ testRepo, _ := createPackedFixtureRepo(t, objectid.AlgorithmSHA1)
+ store := openPackedStore(t, testRepo.Dir(), objectid.AlgorithmSHA1)
+ if err := store.Close(); err != nil {+ t.Fatalf("Close: %v", err)+ }
+ if err := store.Close(); err != nil {+ t.Fatalf("Close second: %v", err)+ }
+}
+
+func TestPackedStoreInvalidAlgorithm(t *testing.T) {+ testRepo := testgit.NewBareRepo(t, objectid.AlgorithmSHA1)
+ root, err := os.OpenRoot(testRepo.Dir())
+ if err != nil {+ t.Fatalf("OpenRoot(%q): %v", testRepo.Dir(), err)+ }
+ t.Cleanup(func() { _ = root.Close() })+
+ if _, err := packed.New(root, objectid.AlgorithmUnknown); !errors.Is(err, objectid.ErrInvalidAlgorithm) {+ t.Fatalf("packed.New invalid algorithm error = %v", err)+ }
+}
--- /dev/null
+++ b/objectstore/packed/store.go
@@ -1,0 +1,182 @@
+// Package packed provides read access to packed Git objects from objects/pack.
+package packed
+
+import (
+ "errors"
+ "os"
+ "sync"
+
+ "codeberg.org/lindenii/furgit/objectid"
+ "codeberg.org/lindenii/furgit/objectstore"
+)
+
+// Store reads Git objects from pack/index files under an objects/pack root.
+//
+// Store does not own root. Callers are responsible for closing root.
+type Store struct {+ // root is the objects/pack capability used for all file access.
+ root *os.Root
+ // algo is the expected object ID algorithm for lookups.
+ algo objectid.Algorithm
+
+ // loadOnce guards one-time index loading.
+ loadOnce sync.Once
+ // loadErr stores index loading failures.
+ loadErr error
+ // indexesLoaded reports whether indexes/loadErr have been initialized.
+ indexesLoaded bool
+ // indexes stores parsed .idx handles.
+ indexes []*idxFile
+
+ // stateMu guards index publication, pack cache, and close state.
+ stateMu sync.RWMutex
+ // cacheMu guards delta cache operations.
+ cacheMu sync.RWMutex
+ // packs caches opened .pack handles by basename.
+ packs map[string]*packFile
+ // deltaCache caches resolved base objects by pack location.
+ deltaCache *deltaCache
+ // closed reports whether Close has been called.
+ closed bool
+}
+
+const defaultDeltaCacheMaxBytes = 32 << 20
+
+var _ objectstore.Store = (*Store)(nil)
+
+// New creates a packed-object store rooted at an objects/pack directory.
+func New(root *os.Root, algo objectid.Algorithm) (*Store, error) {+ if algo.Size() == 0 {+ return nil, objectid.ErrInvalidAlgorithm
+ }
+ return &Store{+ root: root,
+ algo: algo,
+ packs: make(map[string]*packFile),
+ deltaCache: newDeltaCache(defaultDeltaCacheMaxBytes),
+ }, nil
+}
+
+// Close releases mapped pack/index resources associated with the store.
+func (store *Store) Close() error {+ store.stateMu.Lock()
+ if store.closed {+ store.stateMu.Unlock()
+ return nil
+ }
+ store.closed = true
+ packs := store.packs
+ store.packs = make(map[string]*packFile)
+ indexes := store.indexes
+ store.indexes = nil
+ store.stateMu.Unlock()
+
+ var closeErr error
+ for _, pack := range packs {+ if err := pack.close(); err != nil && closeErr == nil {+ closeErr = err
+ }
+ }
+ for _, index := range indexes {+ if index == nil {+ continue
+ }
+ if err := index.close(); err != nil && closeErr == nil {+ closeErr = err
+ }
+ }
+ store.cacheMu.Lock()
+ if store.deltaCache != nil {+ store.deltaCache.clear()
+ }
+ store.cacheMu.Unlock()
+ return closeErr
+}
+
+// ensureIndexes loads and validates all pack indexes once.
+func (store *Store) ensureIndexes() error {+ store.loadOnce.Do(func() {+ indexes, err := store.loadIndexes()
+ store.stateMu.Lock()
+ store.indexes = indexes
+ store.loadErr = err
+ store.indexesLoaded = true
+ store.stateMu.Unlock()
+ })
+
+ store.stateMu.RLock()
+ defer store.stateMu.RUnlock()
+ if store.indexesLoaded {+ return store.loadErr
+ }
+ return errors.New("objectstore/packed: indexes were not initialized")+}
+
+// lookup resolves one object ID to its pack location.
+func (store *Store) lookup(id objectid.ObjectID) (location, error) {+ var zero location
+ if id.Algorithm() != store.algo {+ return zero, errors.New("objectstore/packed: object id algorithm mismatch")+ }
+ if err := store.ensureIndexes(); err != nil {+ return zero, err
+ }
+ for _, index := range store.indexes {+ offset, ok, err := index.lookup(id)
+ if err != nil {+ return zero, err
+ }
+ if ok {+ return location{packName: index.packName, offset: offset}, nil+ }
+ }
+ return zero, objectstore.ErrObjectNotFound
+}
+
+// openPack returns one opened and validated pack handle.
+func (store *Store) openPack(name string) (*packFile, error) {+ store.stateMu.RLock()
+ if pack, ok := store.packs[name]; ok {+ store.stateMu.RUnlock()
+ return pack, nil
+ }
+ store.stateMu.RUnlock()
+
+ file, err := store.root.Open(name)
+ if err != nil {+ return nil, err
+ }
+ info, err := file.Stat()
+ if err != nil {+ _ = file.Close()
+ return nil, err
+ }
+ pack, err := openPackFile(name, file, info.Size())
+ if err != nil {+ _ = file.Close()
+ return nil, err
+ }
+
+ store.stateMu.Lock()
+ if existing, ok := store.packs[name]; ok {+ store.stateMu.Unlock()
+ _ = pack.close()
+ return existing, nil
+ }
+ store.packs[name] = pack
+ store.stateMu.Unlock()
+ return pack, nil
+}
+
+// entryMetaAt parses one pack entry header at location.
+func (store *Store) entryMetaAt(loc location) (*packFile, entryMeta, error) {+ pack, err := store.openPack(loc.packName)
+ if err != nil {+ return nil, entryMeta{}, err+ }
+ meta, err := parseEntryMeta(pack, store.algo, loc.offset)
+ if err != nil {+ return nil, entryMeta{}, err+ }
+ return pack, meta, nil
+}
--
⑨