shithub: furgit

Download patch

ref: 1fa0d2bcfa7aebdcec8644f53acc58465c109b72
parent: 68ab022cf978fc1159cdcfb04c62cd57521be847
author: Runxi Yu <me@runxiyu.org>
date: Fri Jan 30 11:44:28 EST 2026

reachability: Add basic reachability API

Bitmaps not supported yet

--- /dev/null
+++ b/reachability.go
@@ -1,0 +1,385 @@
+package furgit
+
+import (
+	"fmt"
+	"iter"
+)
+
+// ReachabilityMode controls which object types are walked.
+type ReachabilityMode uint8
+
+const (
+	// ReachabilityCommitsOnly walks only commit objects.
+	ReachabilityCommitsOnly ReachabilityMode = iota
+	// ReachabilityAllObjects walks commits, trees, blobs, and tags reachable
+	// from the commits in Wants.
+	ReachabilityAllObjects
+)
+
+// ReachabilityQuery describes a want/have reachability walk.
+//
+// ReachableObjects returns objects reachable from Wants. If Mode is
+// ReachabilityCommitsOnly, non-commit Wants are ignored except for tags,
+// which are peeled to their target.
+type ReachabilityQuery struct {
+	Wants []Hash
+	Haves []Hash
+	Mode  ReachabilityMode
+
+	// StopAtHaves prunes traversal when an object is reachable from Haves.
+	StopAtHaves bool
+}
+
+// ReachableObject reports a reachable object and whether it is also reachable
+// from the Have set.
+type ReachableObject struct {
+	ID     Hash
+	Type   ObjectType
+	InHave bool
+}
+
+// ReachabilityWalk is a single-use reachability iterator.
+// After iterating, Err reports any error encountered during the walk.
+type ReachabilityWalk struct {
+	repo  *Repository
+	query ReachabilityQuery
+	err   error
+
+	haveInit bool
+	haveErr  error
+	haveSet  map[Hash]struct{}
+}
+
+// ReachableObjects returns a single-use iterator over objects reachable from
+// query.Wants.
+//
+// It yields ReachableObject values; InHave is true when the object is also
+// reachable from query.Haves.
+func (repo *Repository) ReachableObjects(query ReachabilityQuery) (*ReachabilityWalk, error) {
+	if repo == nil {
+		return nil, ErrInvalidObject
+	}
+	switch query.Mode {
+	case ReachabilityCommitsOnly, ReachabilityAllObjects:
+	default:
+		return nil, ErrInvalidObject
+	}
+	for _, id := range query.Wants {
+		if id.algo != repo.hashAlgo {
+			return nil, fmt.Errorf("furgit: reachability: want hash algorithm mismatch")
+		}
+	}
+	for _, id := range query.Haves {
+		if id.algo != repo.hashAlgo {
+			return nil, fmt.Errorf("furgit: reachability: have hash algorithm mismatch")
+		}
+	}
+	return &ReachabilityWalk{
+		repo:  repo,
+		query: query,
+	}, nil
+}
+
+// Seq returns the iterator.
+func (w *ReachabilityWalk) Seq() iter.Seq[ReachableObject] {
+	return func(yield func(ReachableObject) bool) {
+		if w == nil || w.repo == nil {
+			w.err = ErrInvalidObject
+			return
+		}
+		haveSet, err := w.ensureHaveSet()
+		if err != nil {
+			w.err = err
+			return
+		}
+
+		wantWalk := reachabilityWalker{
+			repo:        w.repo,
+			mode:        w.query.Mode,
+			seenCommits: make(map[Hash]struct{}),
+			seenObjects: make(map[Hash]struct{}),
+			haveSet:     haveSet,
+			stopAtHaves: w.query.StopAtHaves,
+		}
+		if err := wantWalk.walkRoots(w.query.Wants, func(obj ReachableObject) bool {
+			return yield(obj)
+		}); err != nil {
+			w.err = err
+			return
+		}
+	}
+}
+
+// Err reports the first error encountered by the iterator.
+func (w *ReachabilityWalk) Err() error {
+	if w == nil {
+		return ErrInvalidObject
+	}
+	return w.err
+}
+
+// HaveContains reports whether id is reachable from Haves.
+func (w *ReachabilityWalk) HaveContains(id Hash) (bool, error) {
+	if w == nil || w.repo == nil {
+		return false, ErrInvalidObject
+	}
+	haveSet, err := w.ensureHaveSet()
+	if err != nil {
+		return false, err
+	}
+	_, ok := haveSet[id]
+	return ok, nil
+}
+
+func (w *ReachabilityWalk) ensureHaveSet() (map[Hash]struct{}, error) {
+	if w.haveInit {
+		return w.haveSet, w.haveErr
+	}
+	w.haveInit = true
+	w.haveSet = make(map[Hash]struct{})
+	if len(w.query.Haves) == 0 {
+		return w.haveSet, nil
+	}
+	haveWalk := reachabilityWalker{
+		repo:           w.repo,
+		mode:           w.query.Mode,
+		seenCommits:    make(map[Hash]struct{}),
+		seenObjects:    make(map[Hash]struct{}),
+		recordHaveOnly: true,
+		haveSet:        w.haveSet,
+	}
+	if err := haveWalk.walkRoots(w.query.Haves, nil); err != nil {
+		w.haveErr = err
+		return nil, err
+	}
+	return w.haveSet, nil
+}
+
+type reachabilityWalker struct {
+	repo *Repository
+	mode ReachabilityMode
+
+	seenCommits map[Hash]struct{}
+	seenObjects map[Hash]struct{}
+
+	haveSet        map[Hash]struct{}
+	recordHaveOnly bool
+	stopAtHaves    bool
+
+	cg     *commitGraph
+	cgInit bool
+}
+
+func (rw *reachabilityWalker) initCommitGraph() {
+	if rw.cgInit {
+		return
+	}
+	rw.cgInit = true
+	cg, err := rw.repo.CommitGraph()
+	if err == nil {
+		rw.cg = cg
+	}
+}
+
+func (rw *reachabilityWalker) walkRoots(roots []Hash, emit func(ReachableObject) bool) error {
+	for _, id := range roots {
+		if err := rw.walkObject(id, emit); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (rw *reachabilityWalker) walkObject(id Hash, emit func(ReachableObject) bool) error {
+	if rw.stopAtHaves {
+		if _, ok := rw.haveSet[id]; ok {
+			return nil
+		}
+	}
+	if rw.recordHaveOnly {
+		if _, ok := rw.haveSet[id]; ok {
+			return nil
+		}
+	} else {
+		if _, ok := rw.seenObjects[id]; ok {
+			return nil
+		}
+	}
+
+	rw.initCommitGraph()
+	if rw.cg != nil {
+		if pos, ok := rw.cg.CommitPosition(id); ok {
+			return rw.walkCommitByPos(pos, id, emit)
+		}
+	}
+
+	ty, body, err := rw.repo.ReadObjectTypeRaw(id)
+	if err != nil {
+		return err
+	}
+
+	switch ty {
+	case ObjectTypeCommit:
+		return rw.walkCommitBody(id, body, emit)
+	case ObjectTypeTree:
+		if rw.mode != ReachabilityAllObjects {
+			return nil
+		}
+		return rw.walkTreeBody(id, body, emit)
+	case ObjectTypeBlob:
+		if rw.mode != ReachabilityAllObjects {
+			return nil
+		}
+		return rw.emitObject(id, ObjectTypeBlob, emit)
+	case ObjectTypeTag:
+		return rw.walkTagBody(id, body, emit)
+	default:
+		return ErrInvalidObject
+	}
+}
+
+func (rw *reachabilityWalker) walkCommitByPos(pos uint32, id Hash, emit func(ReachableObject) bool) error {
+	if _, ok := rw.seenCommits[id]; ok {
+		return nil
+	}
+	rw.seenCommits[id] = struct{}{}
+
+	cc, err := rw.cg.CommitAt(pos)
+	if err != nil {
+		return err
+	}
+
+	if err := rw.emitObject(id, ObjectTypeCommit, emit); err != nil {
+		return err
+	}
+
+	if rw.mode == ReachabilityAllObjects {
+		if err := rw.walkTreeByID(cc.Tree, emit); err != nil {
+			return err
+		}
+	}
+
+	for _, parentPos := range cc.Parents {
+		parentID, err := rw.cg.OIDAt(parentPos)
+		if err != nil {
+			return err
+		}
+		if err := rw.walkObject(parentID, emit); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (rw *reachabilityWalker) walkCommitBody(id Hash, body []byte, emit func(ReachableObject) bool) error {
+	if _, ok := rw.seenCommits[id]; ok {
+		return nil
+	}
+	rw.seenCommits[id] = struct{}{}
+
+	commit, err := parseCommit(id, body, rw.repo)
+	if err != nil {
+		return err
+	}
+	if err := rw.emitObject(id, ObjectTypeCommit, emit); err != nil {
+		return err
+	}
+	if rw.mode == ReachabilityAllObjects {
+		if err := rw.walkTreeByID(commit.Tree, emit); err != nil {
+			return err
+		}
+	}
+	for _, parent := range commit.Parents {
+		if err := rw.walkObject(parent, emit); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (rw *reachabilityWalker) walkTagBody(id Hash, body []byte, emit func(ReachableObject) bool) error {
+	tag, err := parseTag(id, body, rw.repo)
+	if err != nil {
+		return err
+	}
+	if rw.mode == ReachabilityAllObjects {
+		if err := rw.emitObject(id, ObjectTypeTag, emit); err != nil {
+			return err
+		}
+	}
+	if tag.TargetType == ObjectTypeCommit {
+		return rw.walkObject(tag.Target, emit)
+	}
+	if rw.mode == ReachabilityAllObjects {
+		return rw.walkObject(tag.Target, emit)
+	}
+	return nil
+}
+
+func (rw *reachabilityWalker) walkTreeByID(id Hash, emit func(ReachableObject) bool) error {
+	if rw.mode != ReachabilityAllObjects {
+		return nil
+	}
+	if _, ok := rw.seenObjects[id]; ok && !rw.recordHaveOnly {
+		return nil
+	}
+	ty, body, err := rw.repo.ReadObjectTypeRaw(id)
+	if err != nil {
+		return err
+	}
+	if ty != ObjectTypeTree {
+		return ErrInvalidObject
+	}
+	return rw.walkTreeBody(id, body, emit)
+}
+
+func (rw *reachabilityWalker) walkTreeBody(id Hash, body []byte, emit func(ReachableObject) bool) error {
+	if rw.mode != ReachabilityAllObjects {
+		return nil
+	}
+	tree, err := parseTree(id, body, rw.repo)
+	if err != nil {
+		return err
+	}
+	if err := rw.emitObject(id, ObjectTypeTree, emit); err != nil {
+		return err
+	}
+	for _, entry := range tree.Entries {
+		switch entry.Mode {
+		case FileModeDir:
+			if err := rw.walkTreeByID(entry.ID, emit); err != nil {
+				return err
+			}
+		case FileModeGitlink:
+			// IIRC Gitlinks are references to external repositories
+			// and do not imply reachability of the target commit...
+			continue
+		default:
+			if err := rw.emitObject(entry.ID, ObjectTypeBlob, emit); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func (rw *reachabilityWalker) emitObject(id Hash, ty ObjectType, emit func(ReachableObject) bool) error {
+	if rw.recordHaveOnly {
+		rw.haveSet[id] = struct{}{}
+		return nil
+	}
+	if _, ok := rw.seenObjects[id]; ok {
+		return nil
+	}
+	rw.seenObjects[id] = struct{}{}
+	inHave := false
+	if _, ok := rw.haveSet[id]; ok {
+		inHave = true
+	}
+	if emit != nil {
+		if !emit(ReachableObject{ID: id, Type: ty, InHave: inHave}) {
+			return nil
+		}
+	}
+	return nil
+}
--- /dev/null
+++ b/reachability_test.go
@@ -1,0 +1,196 @@
+package furgit
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestReachabilityCommitsWantHave(t *testing.T) {
+	repoPath, cleanup := setupTestRepo(t)
+	defer cleanup()
+
+	workDir, cleanupWork := setupWorkDir(t)
+	defer cleanupWork()
+
+	var commits []string
+	for i := 0; i < 3; i++ {
+		path := filepath.Join(workDir, "file.txt")
+		if err := os.WriteFile(path, []byte{byte('a' + i), '\n'}, 0o644); err != nil {
+			t.Fatalf("write file: %v", err)
+		}
+		gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+		gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "commit")
+		commits = append(commits, gitCmd(t, repoPath, "rev-parse", "HEAD"))
+	}
+
+	repo, err := OpenRepository(repoPath)
+	if err != nil {
+		t.Fatalf("OpenRepository failed: %v", err)
+	}
+	defer func() { _ = repo.Close() }()
+
+	wantID, _ := repo.ParseHash(commits[2])
+	haveID, _ := repo.ParseHash(commits[1])
+	walk, err := repo.ReachableObjects(ReachabilityQuery{
+		Wants: []Hash{wantID},
+		Haves: []Hash{haveID},
+		Mode:  ReachabilityCommitsOnly,
+	})
+	if err != nil {
+		t.Fatalf("ReachableObjects failed: %v", err)
+	}
+
+	seen := make(map[Hash]ReachableObject)
+	for obj := range walk.Seq() {
+		seen[obj.ID] = obj
+		if obj.Type != ObjectTypeCommit {
+			t.Fatalf("unexpected object type: %v", obj.Type)
+		}
+	}
+	if err := walk.Err(); err != nil {
+		t.Fatalf("Reachability walk error: %v", err)
+	}
+
+	headID := wantID
+	parentID, _ := repo.ParseHash(commits[1])
+	rootID, _ := repo.ParseHash(commits[0])
+	if _, ok := seen[headID]; !ok {
+		t.Fatalf("missing head commit")
+	}
+	if _, ok := seen[parentID]; !ok {
+		t.Fatalf("missing parent commit")
+	}
+	if _, ok := seen[rootID]; !ok {
+		t.Fatalf("missing root commit")
+	}
+	if seen[headID].InHave {
+		t.Fatalf("head commit incorrectly marked InHave")
+	}
+	if !seen[parentID].InHave || !seen[rootID].InHave {
+		t.Fatalf("expected parent and root commits to be InHave")
+	}
+
+	inHave, err := walk.HaveContains(parentID)
+	if err != nil {
+		t.Fatalf("HaveContains failed: %v", err)
+	}
+	if !inHave {
+		t.Fatalf("expected parent to be reachable from have")
+	}
+}
+
+func TestReachabilityAllObjects(t *testing.T) {
+	repoPath, cleanup := setupTestRepo(t)
+	defer cleanup()
+
+	workDir, cleanupWork := setupWorkDir(t)
+	defer cleanupWork()
+
+	if err := os.WriteFile(filepath.Join(workDir, "file1.txt"), []byte("one\n"), 0o644); err != nil {
+		t.Fatalf("write file1: %v", err)
+	}
+	if err := os.Mkdir(filepath.Join(workDir, "dir"), 0o755); err != nil {
+		t.Fatalf("mkdir dir: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(workDir, "dir", "file2.txt"), []byte("two\n"), 0o644); err != nil {
+		t.Fatalf("write file2: %v", err)
+	}
+	gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+	gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "commit")
+
+	repo, err := OpenRepository(repoPath)
+	if err != nil {
+		t.Fatalf("OpenRepository failed: %v", err)
+	}
+	defer func() { _ = repo.Close() }()
+
+	head := gitCmd(t, repoPath, "rev-parse", "HEAD")
+	wantID, _ := repo.ParseHash(head)
+	walk, err := repo.ReachableObjects(ReachabilityQuery{
+		Wants: []Hash{wantID},
+		Mode:  ReachabilityAllObjects,
+	})
+	if err != nil {
+		t.Fatalf("ReachableObjects failed: %v", err)
+	}
+
+	seen := make(map[Hash]ObjectType)
+	for obj := range walk.Seq() {
+		seen[obj.ID] = obj.Type
+	}
+	if err := walk.Err(); err != nil {
+		t.Fatalf("Reachability walk error: %v", err)
+	}
+
+	treeStr := gitCmd(t, repoPath, "show", "-s", "--format=%T", head)
+	treeID, _ := repo.ParseHash(treeStr)
+	lsTree := gitCmd(t, repoPath, "ls-tree", "-r", treeStr)
+	fields := strings.Fields(lsTree)
+	if len(fields) < 3 {
+		t.Fatalf("unexpected ls-tree output: %q", lsTree)
+	}
+	blobID, _ := repo.ParseHash(fields[2])
+
+	if seen[wantID] != ObjectTypeCommit {
+		t.Fatalf("missing commit in reachability walk")
+	}
+	if seen[treeID] != ObjectTypeTree {
+		t.Fatalf("missing tree in reachability walk")
+	}
+	if seen[blobID] != ObjectTypeBlob {
+		t.Fatalf("missing blob in reachability walk")
+	}
+}
+
+func TestReachabilityStopAtHaves(t *testing.T) {
+	repoPath, cleanup := setupTestRepo(t)
+	defer cleanup()
+
+	workDir, cleanupWork := setupWorkDir(t)
+	defer cleanupWork()
+
+	var commits []string
+	for i := 0; i < 3; i++ {
+		path := filepath.Join(workDir, "file.txt")
+		if err := os.WriteFile(path, []byte{byte('a' + i), '\n'}, 0o644); err != nil {
+			t.Fatalf("write file: %v", err)
+		}
+		gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+		gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "commit")
+		commits = append(commits, gitCmd(t, repoPath, "rev-parse", "HEAD"))
+	}
+
+	repo, err := OpenRepository(repoPath)
+	if err != nil {
+		t.Fatalf("OpenRepository failed: %v", err)
+	}
+	defer func() { _ = repo.Close() }()
+
+	wantID, _ := repo.ParseHash(commits[2])
+	haveID, _ := repo.ParseHash(commits[1])
+	walk, err := repo.ReachableObjects(ReachabilityQuery{
+		Wants:       []Hash{wantID},
+		Haves:       []Hash{haveID},
+		Mode:        ReachabilityCommitsOnly,
+		StopAtHaves: true,
+	})
+	if err != nil {
+		t.Fatalf("ReachableObjects failed: %v", err)
+	}
+
+	var got []Hash
+	for obj := range walk.Seq() {
+		got = append(got, obj.ID)
+		if obj.InHave {
+			t.Fatalf("unexpected InHave object in send set")
+		}
+	}
+	if err := walk.Err(); err != nil {
+		t.Fatalf("Reachability walk error: %v", err)
+	}
+	if len(got) != 1 || got[0] != wantID {
+		t.Fatalf("StopAtHaves mismatch: got %d objects", len(got))
+	}
+}
--