ref: 8e320c9ca634e6b2431f9442b7d5191864735ae4
parent: 1fa0d2bcfa7aebdcec8644f53acc58465c109b72
author: Runxi Yu <me@runxiyu.org>
date: Fri Jan 30 12:06:51 EST 2026
packed, delta: Implement thin packs
--- a/delta_write_select.go
+++ b/delta_write_select.go
@@ -8,6 +8,7 @@
body []byte
offset uint64
deltaDepth int
+ inPack bool
}
type deltaContext struct {--- a/packed_write_pack.go
+++ b/packed_write_pack.go
@@ -283,12 +283,40 @@
// packWrite writes a pack stream for the provided object ids.
func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOptions) (Hash, error) {+ if opts.EnableThinPack {+ return Hash{}, errThinPackUnimplemented+ }
+ return repo.packWriteObjects(w, objects, opts, nil)
+}
+
+// packWriteReachable writes a pack stream for objects reachable from the
+// provided reachability query.
+func (repo *Repository) packWriteReachable(w io.Writer, query ReachabilityQuery, opts packWriteOptions) (Hash, error) { if repo == nil { return Hash{}, ErrInvalidObject}
- if opts.EnableThinPack {- return Hash{}, errThinPackUnimplemented+ query.Mode = ReachabilityAllObjects
+ walk, err := repo.ReachableObjects(query)
+ if err != nil {+ return Hash{}, err}
+ var objects []Hash
+ for obj := range walk.Seq() {+ objects = append(objects, obj.ID)
+ }
+ if err := walk.Err(); err != nil {+ return Hash{}, err+ }
+ return repo.packWriteObjects(w, objects, opts, walk)
+}
+
+func (repo *Repository) packWriteObjects(w io.Writer, objects []Hash, opts packWriteOptions, have *ReachabilityWalk) (Hash, error) {+ if repo == nil {+ return Hash{}, ErrInvalidObject+ }
+ if opts.EnableThinPack && have == nil {+ return Hash{}, ErrInvalidObject+ }
if len(objects) > int(^uint32(0)) { return Hash{}, ErrInvalidObject}
@@ -312,6 +340,12 @@
deltaSeed = binary.LittleEndian.Uint64(seedBytes[:])
}
+ if opts.EnableDeltas && opts.EnableThinPack {+ if err := repo.seedDeltaCandidatesFromHaves(&dctx, have.query.Haves); err != nil {+ return Hash{}, err+ }
+ }
+
for _, id := range objects {ty, body, err := repo.ReadObjectTypeRaw(id)
if err != nil {@@ -318,9 +352,10 @@
return Hash{}, err}
obj := &objectToPack{- id: id,
- ty: ty,
- body: body,
+ id: id,
+ ty: ty,
+ body: body,
+ inPack: true,
}
startOffset := pw.bytesWritten
wroteDelta := false
@@ -328,11 +363,27 @@
if opts.EnableDeltas && ty == ObjectTypeBlob {base, delta := pickDeltaBase(&dctx, obj, deltaSeed, opts.MinDeltaSavings, opts.MaxDeltaDepth)
if base != nil && delta != nil {- if err := pw.WriteOfsDelta(base.offset, len(base.body), len(body), delta); err != nil {- return Hash{}, err+ switch {+ case base.inPack:
+ if err := pw.WriteOfsDelta(base.offset, len(base.body), len(body), delta); err != nil {+ return Hash{}, err+ }
+ wroteDelta = true
+ obj.deltaDepth = base.deltaDepth + 1
+ case opts.EnableThinPack:
+ inHave, err := have.HaveContains(base.id)
+ if err != nil {+ return Hash{}, err+ }
+ if inHave {+ if err := pw.WriteRefDelta(base.id, len(base.body), len(body), delta); err != nil {+ return Hash{}, err+ }
+ wroteDelta = true
+ obj.deltaDepth = base.deltaDepth + 1
+ }
+ default:
}
- wroteDelta = true
- obj.deltaDepth = base.deltaDepth + 1
}
}
if !wroteDelta {@@ -349,6 +400,39 @@
}
return pw.Close()
+}
+
+func (repo *Repository) seedDeltaCandidatesFromHaves(ctx *deltaContext, haves []Hash) error {+ if repo == nil {+ return ErrInvalidObject
+ }
+ if ctx == nil || ctx.window <= 0 || len(haves) == 0 {+ return nil
+ }
+ walk, err := repo.ReachableObjects(ReachabilityQuery{+ Wants: haves,
+ Mode: ReachabilityAllObjects,
+ })
+ if err != nil {+ return err
+ }
+ for obj := range walk.Seq() {+ if obj.Type != ObjectTypeBlob {+ continue
+ }
+ ty, body, err := repo.ReadObjectTypeRaw(obj.ID)
+ if err != nil {+ return err
+ }
+ candidate := &objectToPack{+ id: obj.ID,
+ ty: ty,
+ body: body,
+ inPack: false,
+ }
+ ctx.addCandidate(candidate)
+ }
+ return walk.Err()
}
type packWriteOptions struct {--- a/packed_write_test.go
+++ b/packed_write_test.go
@@ -358,6 +358,90 @@
_ = gitCmd(t, repoPath, "fsck", "--full", "--strict")
}
+func TestPackWriteThinPackReachable(t *testing.T) {+ repoPath, cleanup := setupTestRepo(t)
+ defer cleanup()
+
+ workDir, cleanupWork := setupWorkDir(t)
+ defer cleanupWork()
+
+ base := bytes.Repeat([]byte("A"), 16384)+ if err := os.WriteFile(filepath.Join(workDir, "file.txt"), base, 0o644); err != nil {+ t.Fatalf("write base file: %v", err)+ }
+ gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+ gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "base")
+ haveHash := gitCmd(t, repoPath, "rev-parse", "HEAD")
+
+ mod := append([]byte(nil), base...)
+ mod[1024] = 'B'
+ if err := os.WriteFile(filepath.Join(workDir, "file.txt"), mod, 0o644); err != nil {+ t.Fatalf("write mod file: %v", err)+ }
+ gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+ gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "target")
+ wantHash := gitCmd(t, repoPath, "rev-parse", "HEAD")
+
+ repo, err := OpenRepository(repoPath)
+ if err != nil {+ t.Fatalf("OpenRepository failed: %v", err)+ }
+ defer func() { _ = repo.Close() }()+
+ wantID, _ := repo.ParseHash(wantHash)
+ haveID, _ := repo.ParseHash(haveHash)
+
+ query := ReachabilityQuery{+ Wants: []Hash{wantID},+ Haves: []Hash{haveID},+ Mode: ReachabilityAllObjects,
+ StopAtHaves: true,
+ }
+ var buf bytes.Buffer
+ if _, err := repo.packWriteReachable(&buf, query, packWriteOptions{+ EnableDeltas: true,
+ EnableThinPack: true,
+ MinDeltaSavings: 1,
+ }); err != nil {+ t.Fatalf("packWriteReachable failed: %v", err)+ }
+
+ thinSeen, err := checkThinPackStream(buf.Bytes(), repo)
+ if err != nil {+ t.Fatalf("thin pack stream invalid: %v", err)+ }
+ if !thinSeen {+ t.Fatalf("expected thin pack with ref-delta base outside pack")+ }
+
+ packDir := filepath.Join(repoPath, "objects", "pack")
+ if err := os.MkdirAll(packDir, 0o755); err != nil {+ t.Fatalf("failed to create pack dir: %v", err)+ }
+ packPath := filepath.Join(packDir, "furgit-thin-test.pack")
+ idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx"
+ _ = os.Remove(packPath)
+ _ = os.Remove(idxPath)
+
+ cmd := exec.Command("git", "index-pack", "--stdin", "--fix-thin", "-o", idxPath, packPath)+ cmd.Dir = repoPath
+ cmd.Env = append(os.Environ(),
+ "GIT_CONFIG_GLOBAL=/dev/null",
+ "GIT_CONFIG_SYSTEM=/dev/null",
+ )
+ cmd.Stdin = bytes.NewReader(buf.Bytes())
+ output, err := cmd.CombinedOutput()
+ if err != nil {+ t.Fatalf("git index-pack --fix-thin failed: %v\n%s", err, output)+ }
+
+ _ = gitCmd(t, repoPath, "cat-file", "-p", wantHash)
+ _ = gitCmd(t, repoPath, "fsck", "--full", "--strict")
+
+ _ = os.Remove(packPath)
+ _ = os.Remove(idxPath)
+}
+
func checkPackStream(path string, algo hashAlgorithm, objectCount int) error {data, err := os.ReadFile(path)
if err != nil {@@ -502,6 +586,134 @@
}
}
return nil
+}
+
+func checkThinPackStream(data []byte, repo *Repository) (bool, error) {+ if repo == nil {+ return false, ErrInvalidObject
+ }
+ if len(data) < 12 {+ return false, ErrInvalidObject
+ }
+ if binary.BigEndian.Uint32(data[0:4]) != packMagic || binary.BigEndian.Uint32(data[4:8]) != packVersion2 {+ return false, ErrInvalidObject
+ }
+ count := int(binary.BigEndian.Uint32(data[8:12]))
+ pos := 12
+ hashSize := repo.hashAlgo.Size()
+ type objEntry struct {+ offset uint64
+ ty ObjectType
+ body []byte
+ }
+ byOffset := make(map[uint64]objEntry, count)
+ byHash := make(map[string]objEntry, count)
+ thinSeen := false
+
+ for i := 0; i < count; i++ {+ objOffset := uint64(pos)
+ ty, size, consumed, err := packHeaderParse(data[pos:])
+ if err != nil {+ return thinSeen, fmt.Errorf("obj %d header at %d: %v", i, pos, err)+ }
+ pos += consumed
+ baseTy := ObjectTypeInvalid
+ var baseBody []byte
+ switch ty {+ case ObjectTypeOfsDelta:
+ dist, distConsumed, err := packDeltaReadOfsDistance(data[pos:])
+ if err != nil {+ return thinSeen, fmt.Errorf("obj %d ofs at %d: %v", i, pos, err)+ }
+ pos += distConsumed
+ if dist == 0 || dist > objOffset {+ return thinSeen, fmt.Errorf("obj %d ofs at %d: invalid dist", i, pos)+ }
+ baseOffset := objOffset - dist
+ base, ok := byOffset[baseOffset]
+ if !ok {+ return thinSeen, fmt.Errorf("obj %d ofs at %d: missing base", i, pos)+ }
+ baseTy = base.ty
+ baseBody = base.body
+ case ObjectTypeRefDelta:
+ if pos+hashSize > len(data) {+ return thinSeen, ErrInvalidObject
+ }
+ var baseHash Hash
+ copy(baseHash.data[:], data[pos:pos+hashSize])
+ baseHash.algo = repo.hashAlgo
+ baseEntry, ok := byHash[baseHash.String()]
+ if ok {+ baseTy = baseEntry.ty
+ baseBody = baseEntry.body
+ } else {+ thinSeen = true
+ ty, body, err := repo.ReadObjectTypeRaw(baseHash)
+ if err != nil {+ return thinSeen, err
+ }
+ baseTy = ty
+ baseBody = body
+ }
+ pos += hashSize
+ default:
+ }
+
+ payloadBuf, zconsumed, err := zlibx.DecompressSized(data[pos:], size)
+ if err != nil {+ return thinSeen, fmt.Errorf("obj %d zlib at %d: %v", i, pos, err)+ }
+ payload := append([]byte(nil), payloadBuf.Bytes()...)
+ payloadBuf.Release()
+ pos += zconsumed
+ switch ty {+ case ObjectTypeOfsDelta, ObjectTypeRefDelta:
+ if baseBody == nil {+ return thinSeen, fmt.Errorf("obj %d missing base body", i)+ }
+ pos := 0
+ baseSize, err := packVarintRead(payload, &pos)
+ if err != nil {+ return thinSeen, fmt.Errorf("obj %d delta base size: %v", i, err)+ }
+ resultSize, err := packVarintRead(payload, &pos)
+ if err != nil {+ return thinSeen, fmt.Errorf("obj %d delta result size: %v", i, err)+ }
+ if baseSize != len(baseBody) {+ return thinSeen, fmt.Errorf("obj %d delta base size mismatch: got %d want %d", i, baseSize, len(baseBody))+ }
+ out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload))
+ if err != nil {+ return thinSeen, fmt.Errorf("obj %d delta apply: %v", i, err)+ }
+ body := append([]byte(nil), out.Bytes()...)
+ out.Release()
+ if resultSize != len(body) {+ return thinSeen, fmt.Errorf("obj %d delta result size mismatch: got %d want %d", i, len(body), resultSize)+ }
+ byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body}+ default:
+ if size >= 0 && len(payload) != size {+ return thinSeen, fmt.Errorf("obj %d size mismatch: got %d want %d", i, len(payload), size)+ }
+ body := append([]byte(nil), payload...)
+ byOffset[objOffset] = objEntry{offset: objOffset, ty: ty, body: body}+ }
+
+ entry := byOffset[objOffset]
+ if entry.body != nil && entry.ty != ObjectTypeInvalid {+ hdr, err := headerForType(entry.ty, entry.body)
+ if err != nil {+ return thinSeen, err
+ }
+ raw := append(hdr, entry.body...)
+ hash := repo.hashAlgo.Sum(raw)
+ byHash[hash.String()] = entry
+ }
+ }
+ return thinSeen, nil
}
func removeLooseObject(repoPath, oid string) error {--
⑨