shithub: furgit

Download patch

ref: 8e320c9ca634e6b2431f9442b7d5191864735ae4
parent: 1fa0d2bcfa7aebdcec8644f53acc58465c109b72
author: Runxi Yu <me@runxiyu.org>
date: Fri Jan 30 12:06:51 EST 2026

packed, delta: Implement thin packs

--- a/delta_write_select.go
+++ b/delta_write_select.go
@@ -8,6 +8,7 @@
 	body       []byte
 	offset     uint64
 	deltaDepth int
+	inPack     bool
 }
 
 type deltaContext struct {
--- a/packed_write_pack.go
+++ b/packed_write_pack.go
@@ -283,12 +283,40 @@
 
 // packWrite writes a pack stream for the provided object ids.
 func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOptions) (Hash, error) {
+	if opts.EnableThinPack {
+		return Hash{}, errThinPackUnimplemented
+	}
+	return repo.packWriteObjects(w, objects, opts, nil)
+}
+
+// packWriteReachable writes a pack stream for objects reachable from the
+// provided reachability query.
+func (repo *Repository) packWriteReachable(w io.Writer, query ReachabilityQuery, opts packWriteOptions) (Hash, error) {
 	if repo == nil {
 		return Hash{}, ErrInvalidObject
 	}
-	if opts.EnableThinPack {
-		return Hash{}, errThinPackUnimplemented
+	query.Mode = ReachabilityAllObjects
+	walk, err := repo.ReachableObjects(query)
+	if err != nil {
+		return Hash{}, err
 	}
+	var objects []Hash
+	for obj := range walk.Seq() {
+		objects = append(objects, obj.ID)
+	}
+	if err := walk.Err(); err != nil {
+		return Hash{}, err
+	}
+	return repo.packWriteObjects(w, objects, opts, walk)
+}
+
+func (repo *Repository) packWriteObjects(w io.Writer, objects []Hash, opts packWriteOptions, have *ReachabilityWalk) (Hash, error) {
+	if repo == nil {
+		return Hash{}, ErrInvalidObject
+	}
+	if opts.EnableThinPack && have == nil {
+		return Hash{}, ErrInvalidObject
+	}
 	if len(objects) > int(^uint32(0)) {
 		return Hash{}, ErrInvalidObject
 	}
@@ -312,6 +340,12 @@
 		deltaSeed = binary.LittleEndian.Uint64(seedBytes[:])
 	}
 
+	if opts.EnableDeltas && opts.EnableThinPack {
+		if err := repo.seedDeltaCandidatesFromHaves(&dctx, have.query.Haves); err != nil {
+			return Hash{}, err
+		}
+	}
+
 	for _, id := range objects {
 		ty, body, err := repo.ReadObjectTypeRaw(id)
 		if err != nil {
@@ -318,9 +352,10 @@
 			return Hash{}, err
 		}
 		obj := &objectToPack{
-			id:   id,
-			ty:   ty,
-			body: body,
+			id:     id,
+			ty:     ty,
+			body:   body,
+			inPack: true,
 		}
 		startOffset := pw.bytesWritten
 		wroteDelta := false
@@ -328,11 +363,27 @@
 		if opts.EnableDeltas && ty == ObjectTypeBlob {
 			base, delta := pickDeltaBase(&dctx, obj, deltaSeed, opts.MinDeltaSavings, opts.MaxDeltaDepth)
 			if base != nil && delta != nil {
-				if err := pw.WriteOfsDelta(base.offset, len(base.body), len(body), delta); err != nil {
-					return Hash{}, err
+				switch {
+				case base.inPack:
+					if err := pw.WriteOfsDelta(base.offset, len(base.body), len(body), delta); err != nil {
+						return Hash{}, err
+					}
+					wroteDelta = true
+					obj.deltaDepth = base.deltaDepth + 1
+				case opts.EnableThinPack:
+					inHave, err := have.HaveContains(base.id)
+					if err != nil {
+						return Hash{}, err
+					}
+					if inHave {
+						if err := pw.WriteRefDelta(base.id, len(base.body), len(body), delta); err != nil {
+							return Hash{}, err
+						}
+						wroteDelta = true
+						obj.deltaDepth = base.deltaDepth + 1
+					}
+				default:
 				}
-				wroteDelta = true
-				obj.deltaDepth = base.deltaDepth + 1
 			}
 		}
 		if !wroteDelta {
@@ -349,6 +400,39 @@
 	}
 
 	return pw.Close()
+}
+
+func (repo *Repository) seedDeltaCandidatesFromHaves(ctx *deltaContext, haves []Hash) error {
+	if repo == nil {
+		return ErrInvalidObject
+	}
+	if ctx == nil || ctx.window <= 0 || len(haves) == 0 {
+		return nil
+	}
+	walk, err := repo.ReachableObjects(ReachabilityQuery{
+		Wants: haves,
+		Mode:  ReachabilityAllObjects,
+	})
+	if err != nil {
+		return err
+	}
+	for obj := range walk.Seq() {
+		if obj.Type != ObjectTypeBlob {
+			continue
+		}
+		ty, body, err := repo.ReadObjectTypeRaw(obj.ID)
+		if err != nil {
+			return err
+		}
+		candidate := &objectToPack{
+			id:     obj.ID,
+			ty:     ty,
+			body:   body,
+			inPack: false,
+		}
+		ctx.addCandidate(candidate)
+	}
+	return walk.Err()
 }
 
 type packWriteOptions struct {
--- a/packed_write_test.go
+++ b/packed_write_test.go
@@ -358,6 +358,90 @@
 	_ = gitCmd(t, repoPath, "fsck", "--full", "--strict")
 }
 
+func TestPackWriteThinPackReachable(t *testing.T) {
+	repoPath, cleanup := setupTestRepo(t)
+	defer cleanup()
+
+	workDir, cleanupWork := setupWorkDir(t)
+	defer cleanupWork()
+
+	base := bytes.Repeat([]byte("A"), 16384)
+	if err := os.WriteFile(filepath.Join(workDir, "file.txt"), base, 0o644); err != nil {
+		t.Fatalf("write base file: %v", err)
+	}
+	gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+	gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "base")
+	haveHash := gitCmd(t, repoPath, "rev-parse", "HEAD")
+
+	mod := append([]byte(nil), base...)
+	mod[1024] = 'B'
+	if err := os.WriteFile(filepath.Join(workDir, "file.txt"), mod, 0o644); err != nil {
+		t.Fatalf("write mod file: %v", err)
+	}
+	gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+	gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "target")
+	wantHash := gitCmd(t, repoPath, "rev-parse", "HEAD")
+
+	repo, err := OpenRepository(repoPath)
+	if err != nil {
+		t.Fatalf("OpenRepository failed: %v", err)
+	}
+	defer func() { _ = repo.Close() }()
+
+	wantID, _ := repo.ParseHash(wantHash)
+	haveID, _ := repo.ParseHash(haveHash)
+
+	query := ReachabilityQuery{
+		Wants:       []Hash{wantID},
+		Haves:       []Hash{haveID},
+		Mode:        ReachabilityAllObjects,
+		StopAtHaves: true,
+	}
+	var buf bytes.Buffer
+	if _, err := repo.packWriteReachable(&buf, query, packWriteOptions{
+		EnableDeltas:    true,
+		EnableThinPack:  true,
+		MinDeltaSavings: 1,
+	}); err != nil {
+		t.Fatalf("packWriteReachable failed: %v", err)
+	}
+
+	thinSeen, err := checkThinPackStream(buf.Bytes(), repo)
+	if err != nil {
+		t.Fatalf("thin pack stream invalid: %v", err)
+	}
+	if !thinSeen {
+		t.Fatalf("expected thin pack with ref-delta base outside pack")
+	}
+
+	packDir := filepath.Join(repoPath, "objects", "pack")
+	if err := os.MkdirAll(packDir, 0o755); err != nil {
+		t.Fatalf("failed to create pack dir: %v", err)
+	}
+	packPath := filepath.Join(packDir, "furgit-thin-test.pack")
+	idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx"
+	_ = os.Remove(packPath)
+	_ = os.Remove(idxPath)
+
+	cmd := exec.Command("git", "index-pack", "--stdin", "--fix-thin", "-o", idxPath, packPath)
+	cmd.Dir = repoPath
+	cmd.Env = append(os.Environ(),
+		"GIT_CONFIG_GLOBAL=/dev/null",
+		"GIT_CONFIG_SYSTEM=/dev/null",
+	)
+	cmd.Stdin = bytes.NewReader(buf.Bytes())
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Fatalf("git index-pack --fix-thin failed: %v\n%s", err, output)
+	}
+
+	_ = gitCmd(t, repoPath, "cat-file", "-p", wantHash)
+	_ = gitCmd(t, repoPath, "fsck", "--full", "--strict")
+
+	_ = os.Remove(packPath)
+	_ = os.Remove(idxPath)
+}
+
 func checkPackStream(path string, algo hashAlgorithm, objectCount int) error {
 	data, err := os.ReadFile(path)
 	if err != nil {
@@ -502,6 +586,134 @@
 		}
 	}
 	return nil
+}
+
+func checkThinPackStream(data []byte, repo *Repository) (bool, error) {
+	if repo == nil {
+		return false, ErrInvalidObject
+	}
+	if len(data) < 12 {
+		return false, ErrInvalidObject
+	}
+	if binary.BigEndian.Uint32(data[0:4]) != packMagic || binary.BigEndian.Uint32(data[4:8]) != packVersion2 {
+		return false, ErrInvalidObject
+	}
+	count := int(binary.BigEndian.Uint32(data[8:12]))
+	pos := 12
+	hashSize := repo.hashAlgo.Size()
+	type objEntry struct {
+		offset uint64
+		ty     ObjectType
+		body   []byte
+	}
+	byOffset := make(map[uint64]objEntry, count)
+	byHash := make(map[string]objEntry, count)
+	thinSeen := false
+
+	for i := 0; i < count; i++ {
+		objOffset := uint64(pos)
+		ty, size, consumed, err := packHeaderParse(data[pos:])
+		if err != nil {
+			return thinSeen, fmt.Errorf("obj %d header at %d: %v", i, pos, err)
+		}
+		pos += consumed
+		baseTy := ObjectTypeInvalid
+		var baseBody []byte
+		switch ty {
+		case ObjectTypeOfsDelta:
+			dist, distConsumed, err := packDeltaReadOfsDistance(data[pos:])
+			if err != nil {
+				return thinSeen, fmt.Errorf("obj %d ofs at %d: %v", i, pos, err)
+			}
+			pos += distConsumed
+			if dist == 0 || dist > objOffset {
+				return thinSeen, fmt.Errorf("obj %d ofs at %d: invalid dist", i, pos)
+			}
+			baseOffset := objOffset - dist
+			base, ok := byOffset[baseOffset]
+			if !ok {
+				return thinSeen, fmt.Errorf("obj %d ofs at %d: missing base", i, pos)
+			}
+			baseTy = base.ty
+			baseBody = base.body
+		case ObjectTypeRefDelta:
+			if pos+hashSize > len(data) {
+				return thinSeen, ErrInvalidObject
+			}
+			var baseHash Hash
+			copy(baseHash.data[:], data[pos:pos+hashSize])
+			baseHash.algo = repo.hashAlgo
+			baseEntry, ok := byHash[baseHash.String()]
+			if ok {
+				baseTy = baseEntry.ty
+				baseBody = baseEntry.body
+			} else {
+				thinSeen = true
+				ty, body, err := repo.ReadObjectTypeRaw(baseHash)
+				if err != nil {
+					return thinSeen, err
+				}
+				baseTy = ty
+				baseBody = body
+			}
+			pos += hashSize
+		default:
+		}
+
+		payloadBuf, zconsumed, err := zlibx.DecompressSized(data[pos:], size)
+		if err != nil {
+			return thinSeen, fmt.Errorf("obj %d zlib at %d: %v", i, pos, err)
+		}
+		payload := append([]byte(nil), payloadBuf.Bytes()...)
+		payloadBuf.Release()
+		pos += zconsumed
+		switch ty {
+		case ObjectTypeOfsDelta, ObjectTypeRefDelta:
+			if baseBody == nil {
+				return thinSeen, fmt.Errorf("obj %d missing base body", i)
+			}
+			pos := 0
+			baseSize, err := packVarintRead(payload, &pos)
+			if err != nil {
+				return thinSeen, fmt.Errorf("obj %d delta base size: %v", i, err)
+			}
+			resultSize, err := packVarintRead(payload, &pos)
+			if err != nil {
+				return thinSeen, fmt.Errorf("obj %d delta result size: %v", i, err)
+			}
+			if baseSize != len(baseBody) {
+				return thinSeen, fmt.Errorf("obj %d delta base size mismatch: got %d want %d", i, baseSize, len(baseBody))
+			}
+			out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload))
+			if err != nil {
+				return thinSeen, fmt.Errorf("obj %d delta apply: %v", i, err)
+			}
+			body := append([]byte(nil), out.Bytes()...)
+			out.Release()
+			if resultSize != len(body) {
+				return thinSeen, fmt.Errorf("obj %d delta result size mismatch: got %d want %d", i, len(body), resultSize)
+			}
+			byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body}
+		default:
+			if size >= 0 && len(payload) != size {
+				return thinSeen, fmt.Errorf("obj %d size mismatch: got %d want %d", i, len(payload), size)
+			}
+			body := append([]byte(nil), payload...)
+			byOffset[objOffset] = objEntry{offset: objOffset, ty: ty, body: body}
+		}
+
+		entry := byOffset[objOffset]
+		if entry.body != nil && entry.ty != ObjectTypeInvalid {
+			hdr, err := headerForType(entry.ty, entry.body)
+			if err != nil {
+				return thinSeen, err
+			}
+			raw := append(hdr, entry.body...)
+			hash := repo.hashAlgo.Sum(raw)
+			byHash[hash.String()] = entry
+		}
+	}
+	return thinSeen, nil
 }
 
 func removeLooseObject(repoPath, oid string) error {
--