ref: 2536a35b68aa37d3a6d57305ae12b50b99c51f42
parent: 1adc6198a4c45e882965cce2fd8771e9b4ee113c
author: Runxi Yu <me@runxiyu.org>
date: Tue Mar 3 15:27:57 EST 2026
reachability: Add basic reachability API
--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -14,6 +14,7 @@
- gosmopolitan # completely normal to have CJK and such in tests
- gochecknoglobals # unlikely to be introduce accidentally and are usually intentional
- nonamedreturns # named returns are often good for clarity
+ - errname # ErrXXX is better than XXXError
- exhaustruct # tmp: should fix... but too annoying at the moment
- wsl_v5 # tmp
- wsl # tmp
--- /dev/null
+++ b/reachability/reachability.go
@@ -1,0 +1,382 @@
+package reachability
+
+import (
+ "errors"
+ "fmt"
+ "iter"
+
+ "codeberg.org/lindenii/furgit/object"
+ "codeberg.org/lindenii/furgit/objectid"
+ "codeberg.org/lindenii/furgit/objectstore"
+ "codeberg.org/lindenii/furgit/objecttype"
+)
+
+// Domain specifies which graph edges are traversed.
+type Domain uint8
+
+const (
+ // DomainCommits traverses commit-parent edges and annotated-tag target edges.
+ DomainCommits Domain = iota
+ // DomainObjects traverses full commit/tree/blob objects.
+ DomainObjects
+)
+
+// Reachability provides graph traversal over objects in one object store.
+//
+// It is not safe for concurrent use.
+type Reachability struct {+ Store objectstore.Store
+}
+
+// New builds a Reachability over one object store.
+func New(store objectstore.Store) *Reachability {+ return &Reachability{Store: store}+}
+
+// IsAncestor reports whether ancestor is reachable from descendant via commit
+// parent edges.
+//
+// Both inputs are peeled through annotated tags before commit traversal.
+func (r *Reachability) IsAncestor(ancestor, descendant objectid.ObjectID) (bool, error) {+ ancestorCommit, err := r.peelRootToDomain(ancestor, DomainCommits)
+ if err != nil {+ return false, err
+ }
+ descendantCommit, err := r.peelRootToDomain(descendant, DomainCommits)
+ if err != nil {+ return false, err
+ }
+ if ancestorCommit == descendantCommit {+ return true, nil
+ }
+
+ walk := r.Walk(DomainCommits, nil, map[objectid.ObjectID]struct{}{descendantCommit: {}})+ for id := range walk.Seq() {+ if id == ancestorCommit {+ return true, nil
+ }
+ }
+ if err := walk.Err(); err != nil {+ return false, err
+ }
+ return false, nil
+}
+
+// CheckConnected verifies that all objects reachable from wants (under the
+// selected domain) can be fully traversed without missing-object/type/parse
+// errors, excluding subgraphs rooted at haves.
+func (r *Reachability) CheckConnected(domain Domain, haves, wants map[objectid.ObjectID]struct{}) error {+ walk := r.Walk(domain, haves, wants)
+ for range walk.Seq() {+ }
+ return walk.Err()
+}
+
+// Walk creates one single-use traversal over the selected domain.
+func (r *Reachability) Walk(domain Domain, haves, wants map[objectid.ObjectID]struct{}) *Walk {+ walk := &Walk{+ reachability: r,
+ domain: domain,
+ haves: haves,
+ wants: wants,
+ }
+ if err := validateDomain(domain); err != nil {+ walk.err = err
+ }
+ return walk
+}
+
+// ErrObjectMissing indicates that a referenced object is absent from the store.
+type ErrObjectMissing struct {+ OID objectid.ObjectID
+}
+
+func (e *ErrObjectMissing) Error() string {+ return fmt.Sprintf("reachability: missing object %s", e.OID)+}
+
+// ErrObjectType indicates that a referenced object has a different type than
+// what traversal expected on that edge.
+type ErrObjectType struct {+ OID objectid.ObjectID
+ Got objecttype.Type
+ Want objecttype.Type
+}
+
+func (e *ErrObjectType) Error() string {+ gotName, gotOK := objecttype.Name(e.Got)
+ if !gotOK {+ gotName = fmt.Sprintf("type(%d)", e.Got)+ }
+ wantName, wantOK := objecttype.Name(e.Want)
+ if !wantOK {+ wantName = fmt.Sprintf("type(%d)", e.Want)+ }
+ return fmt.Sprintf("reachability: object %s has type %s, want %s", e.OID, gotName, wantName)+}
+
+// Walk is one single-use iterator-style traversal.
+type Walk struct {+ reachability *Reachability
+ domain Domain
+ haves map[objectid.ObjectID]struct{}+ wants map[objectid.ObjectID]struct{}+
+ seqUsed bool
+ err error
+}
+
+// Seq returns the traversal sequence. It is single-use.
+func (walk *Walk) Seq() iter.Seq[objectid.ObjectID] {+ if walk.seqUsed {+ return func(yield func(objectid.ObjectID) bool) {+ _ = yield
+ if walk.err == nil {+ walk.err = errors.New("reachability: walk sequence already consumed")+ }
+ }
+ }
+ walk.seqUsed = true
+ return func(yield func(objectid.ObjectID) bool) {+ if walk.err != nil {+ return
+ }
+ stack := walk.initialStack()
+ var err error
+ visited := make(map[objectid.ObjectID]struct{}, len(stack))+ for len(stack) > 0 {+ item := stack[len(stack)-1]
+ stack = stack[:len(stack)-1]
+
+ if containsOID(walk.haves, item.id) {+ continue
+ }
+ if _, ok := visited[item.id]; ok {+ continue
+ }
+ visited[item.id] = struct{}{}+
+ var next []walkItem
+ next, err = walk.expand(item)
+ if err != nil {+ walk.err = err
+ return
+ }
+ if !yield(item.id) {+ return
+ }
+ stack = append(stack, next...)
+ }
+ }
+}
+
+// Err returns the terminal error, if any, once Seq has been consumed.
+func (walk *Walk) Err() error {+ return walk.err
+}
+
+type walkItem struct {+ id objectid.ObjectID
+ want objecttype.Type
+}
+
+func (walk *Walk) initialStack() []walkItem {+ if len(walk.wants) == 0 {+ return nil
+ }
+ stack := make([]walkItem, 0, len(walk.wants))
+ for want := range walk.wants {+ stack = append(stack, walkItem{id: want, want: objecttype.TypeInvalid})+ }
+ return stack
+}
+
+func (walk *Walk) expand(item walkItem) ([]walkItem, error) {+ if walk.domain == DomainCommits {+ return walk.expandCommits(item)
+ }
+ return walk.expandObjects(item)
+}
+
+func (walk *Walk) expandCommits(item walkItem) ([]walkItem, error) {+ ty, err := walk.readHeaderType(item.id)
+ if err != nil {+ return nil, err
+ }
+ switch ty {+ case objecttype.TypeCommit:
+ content, err := walk.readBytesContent(item.id)
+ if err != nil {+ return nil, err
+ }
+ commit, err := object.ParseCommit(content, item.id.Algorithm())
+ if err != nil {+ return nil, err
+ }
+ next := make([]walkItem, 0, len(commit.Parents))
+ for _, parent := range commit.Parents {+ next = append(next, walkItem{id: parent, want: objecttype.TypeInvalid})+ }
+ return next, nil
+ case objecttype.TypeTag:
+ content, err := walk.readBytesContent(item.id)
+ if err != nil {+ return nil, err
+ }
+ tag, err := object.ParseTag(content, item.id.Algorithm())
+ if err != nil {+ return nil, err
+ }
+ return []walkItem{{id: tag.Target, want: objecttype.TypeInvalid}}, nil+ case objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeInvalid,
+ objecttype.TypeFuture, objecttype.TypeOfsDelta, objecttype.TypeRefDelta:
+ return nil, &ErrObjectType{OID: item.id, Got: ty, Want: objecttype.TypeCommit}+ }
+ return nil, fmt.Errorf("reachability: unreachable object type %d", ty)+}
+
+func (walk *Walk) expandObjects(item walkItem) ([]walkItem, error) {+ ty, err := walk.readHeaderType(item.id)
+ if err != nil {+ return nil, err
+ }
+ if item.want != objecttype.TypeInvalid && ty != item.want {+ return nil, &ErrObjectType{OID: item.id, Got: ty, Want: item.want}+ }
+
+ switch ty {+ case objecttype.TypeBlob:
+ return nil, nil
+ case objecttype.TypeCommit:
+ content, err := walk.readBytesContent(item.id)
+ if err != nil {+ return nil, err
+ }
+ commit, err := object.ParseCommit(content, item.id.Algorithm())
+ if err != nil {+ return nil, err
+ }
+ next := make([]walkItem, 0, len(commit.Parents)+1)
+ next = append(next, walkItem{id: commit.Tree, want: objecttype.TypeTree})+ for _, parent := range commit.Parents {+ next = append(next, walkItem{id: parent, want: objecttype.TypeCommit})+ }
+ return next, nil
+ case objecttype.TypeTree:
+ content, err := walk.readBytesContent(item.id)
+ if err != nil {+ return nil, err
+ }
+ tree, err := object.ParseTree(content, item.id.Algorithm())
+ if err != nil {+ return nil, err
+ }
+ next := make([]walkItem, 0, len(tree.Entries))
+ for _, entry := range tree.Entries {+ switch entry.Mode {+ case object.FileModeGitlink:
+ continue
+ case object.FileModeDir:
+ next = append(next, walkItem{id: entry.ID, want: objecttype.TypeTree})+ case object.FileModeRegular, object.FileModeExecutable, object.FileModeSymlink:
+ next = append(next, walkItem{id: entry.ID, want: objecttype.TypeBlob})+ }
+ }
+ return next, nil
+ case objecttype.TypeTag:
+ content, err := walk.readBytesContent(item.id)
+ if err != nil {+ return nil, err
+ }
+ tag, err := object.ParseTag(content, item.id.Algorithm())
+ if err != nil {+ return nil, err
+ }
+ return []walkItem{{id: tag.Target, want: tag.TargetType}}, nil+ case objecttype.TypeInvalid, objecttype.TypeFuture, objecttype.TypeOfsDelta, objecttype.TypeRefDelta:
+ return nil, &ErrObjectType{OID: item.id, Got: ty, Want: item.want}+ }
+ return nil, fmt.Errorf("reachability: unreachable object type %d", ty)+}
+
+func (r *Reachability) peelRootToDomain(id objectid.ObjectID, domain Domain) (objectid.ObjectID, error) {+ if err := validateDomain(domain); err != nil {+ return objectid.ObjectID{}, err+ }
+ for {+ ty, err := r.readHeaderType(id)
+ if err != nil {+ return objectid.ObjectID{}, err+ }
+ if ty != objecttype.TypeTag {+ if domain == DomainCommits && ty != objecttype.TypeCommit {+ return objectid.ObjectID{}, &ErrObjectType{OID: id, Got: ty, Want: objecttype.TypeCommit}+ }
+ return id, nil
+ }
+
+ content, err := r.readBytesContent(id)
+ if err != nil {+ return objectid.ObjectID{}, err+ }
+ tag, err := object.ParseTag(content, id.Algorithm())
+ if err != nil {+ return objectid.ObjectID{}, err+ }
+ id = tag.Target
+ }
+}
+
+func validateDomain(domain Domain) error {+ switch domain {+ case DomainCommits, DomainObjects:
+ return nil
+ default:
+ return fmt.Errorf("reachability: invalid domain %d", domain)+ }
+}
+
+func containsOID(set map[objectid.ObjectID]struct{}, id objectid.ObjectID) bool {+ if len(set) == 0 {+ return false
+ }
+ _, ok := set[id]
+ return ok
+}
+
+// The following helpers exist because we don't have unified error handling across the entire project.
+// This will be fixed later.
+
+func (walk *Walk) readHeaderType(id objectid.ObjectID) (objecttype.Type, error) {+ return walk.reachability.readHeaderType(id)
+}
+
+func (r *Reachability) readHeaderType(id objectid.ObjectID) (objecttype.Type, error) {+ ty, _, err := r.Store.ReadHeader(id)
+ if err != nil {+ if errors.Is(err, objectstore.ErrObjectNotFound) {+ return objecttype.TypeInvalid, &ErrObjectMissing{OID: id}+ }
+ return objecttype.TypeInvalid, err
+ }
+ return ty, nil
+}
+
+func (walk *Walk) readBytesContent(id objectid.ObjectID) ([]byte, error) {+ content, err := walk.reachability.readBytesContent(id)
+ if err != nil {+ return nil, err
+ }
+ return content, nil
+}
+
+func (r *Reachability) readBytesContent(id objectid.ObjectID) ([]byte, error) {+ _, content, err := r.Store.ReadBytesContent(id)
+ if err != nil {+ if errors.Is(err, objectstore.ErrObjectNotFound) {+ return nil, &ErrObjectMissing{OID: id}+ }
+ return nil, err
+ }
+ return content, nil
+}
--- /dev/null
+++ b/reachability/reachability_integration_test.go
@@ -1,0 +1,388 @@
+package reachability_test
+
+import (
+ "errors"
+ "fmt"
+ "maps"
+ "os"
+ "path/filepath"
+ "slices"
+ "strings"
+ "testing"
+
+ "codeberg.org/lindenii/furgit/internal/testgit"
+ "codeberg.org/lindenii/furgit/objectid"
+ "codeberg.org/lindenii/furgit/reachability"
+ "codeberg.org/lindenii/furgit/repository"
+)
+
+func TestWalkCommitsMatchesGitRevList(t *testing.T) {+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper+ testRepo := testgit.NewRepo(t, testgit.RepoOptions{+ ObjectFormat: algo,
+ Bare: true,
+ RefFormat: "files",
+ })
+
+ _, tree1 := testRepo.MakeSingleFileTree(t, "base.txt", []byte("base\n"))+ base := testRepo.CommitTree(t, tree1, "base")
+
+ _, tree2 := testRepo.MakeSingleFileTree(t, "left.txt", []byte("left\n"))+ left := testRepo.CommitTree(t, tree2, "left", base)
+
+ _, tree3 := testRepo.MakeSingleFileTree(t, "right.txt", []byte("right\n"))+ right := testRepo.CommitTree(t, tree3, "right", base)
+
+ _, tree4 := testRepo.MakeSingleFileTree(t, "merge.txt", []byte("merge\n"))+ merge := testRepo.CommitTree(t, tree4, "merge", left, right)
+
+ tag1 := testRepo.TagAnnotated(t, "v1", merge, "v1")
+ tag2 := testRepo.TagAnnotated(t, "v2", tag1, "v2")
+
+ r := openReachabilityFromTestRepo(t, testRepo)
+ walk := r.Walk(
+ reachability.DomainCommits,
+ nil,
+ map[objectid.ObjectID]struct{}{merge: {}},+ )
+ got := oidSetFromSeq(walk.Seq())
+ if err := walk.Err(); err != nil {+ t.Fatalf("walk.Err(): %v", err)+ }
+
+ want := gitRevListSet(t, testRepo, false, []objectid.ObjectID{merge}, nil)+ if !maps.Equal(got, want) {+ t.Fatalf("commit walk mismatch:\n got=%v\nwant=%v", sortedOIDStrings(got), sortedOIDStrings(want))+ }
+
+ peelWalk := r.Walk(
+ reachability.DomainCommits,
+ nil,
+ map[objectid.ObjectID]struct{}{tag2: {}},+ )
+ peelGot := oidSetFromSeq(peelWalk.Seq())
+ if err := peelWalk.Err(); err != nil {+ t.Fatalf("peelWalk.Err(): %v", err)+ }
+ wantWithTags := maps.Clone(want)
+ wantWithTags[tag1] = struct{}{}+ wantWithTags[tag2] = struct{}{}+ if !maps.Equal(peelGot, wantWithTags) {+ t.Fatalf("tag-root commit walk mismatch:\n got=%v\nwant=%v", sortedOIDStrings(peelGot), sortedOIDStrings(wantWithTags))+ }
+ })
+}
+
+func TestWalkObjectsMatchesGitRevListObjects(t *testing.T) {+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper+ testRepo := testgit.NewRepo(t, testgit.RepoOptions{+ ObjectFormat: algo,
+ Bare: true,
+ RefFormat: "files",
+ })
+
+ aBlob := testRepo.HashObject(t, "blob", []byte("a\n"))+ bBlob := testRepo.HashObject(t, "blob", []byte("b\n"))+ nestedTree := testRepo.Mktree(t, fmt.Sprintf("100644 blob %s\tb.txt\n", bBlob))+ rootTree := testRepo.Mktree(t,
+ fmt.Sprintf("100644 blob %s\ta.txt\n040000 tree %s\tdir\n", aBlob, nestedTree),+ )
+ base := testRepo.CommitTree(t, rootTree, "base")
+
+ cBlob := testRepo.HashObject(t, "blob", []byte("c\n"))+ tree2 := testRepo.Mktree(t, fmt.Sprintf("100644 blob %s\tc.txt\n", cBlob))+ head := testRepo.CommitTree(t, tree2, "head", base)
+ tag := testRepo.TagAnnotated(t, "objtag", head, "objtag")
+
+ r := openReachabilityFromTestRepo(t, testRepo)
+ walk := r.Walk(
+ reachability.DomainObjects,
+ nil,
+ map[objectid.ObjectID]struct{}{head: {}},+ )
+ got := oidSetFromSeq(walk.Seq())
+ if err := walk.Err(); err != nil {+ t.Fatalf("walk.Err(): %v", err)+ }
+
+ want := gitRevListSet(t, testRepo, true, []objectid.ObjectID{head}, nil)+ if !maps.Equal(got, want) {+ t.Fatalf("object walk mismatch:\n got=%v\nwant=%v", sortedOIDStrings(got), sortedOIDStrings(want))+ }
+
+ peelWalk := r.Walk(
+ reachability.DomainObjects,
+ nil,
+ map[objectid.ObjectID]struct{}{tag: {}},+ )
+ peelGot := oidSetFromSeq(peelWalk.Seq())
+ if err := peelWalk.Err(); err != nil {+ t.Fatalf("peelWalk.Err(): %v", err)+ }
+ wantFromTag := gitRevListSet(t, testRepo, true, []objectid.ObjectID{tag}, nil)+ if !maps.Equal(peelGot, wantFromTag) {+ t.Fatalf("tag-root object walk mismatch:\n got=%v\nwant=%v", sortedOIDStrings(peelGot), sortedOIDStrings(wantFromTag))+ }
+
+ walkWithHave := r.Walk(
+ reachability.DomainObjects,
+ map[objectid.ObjectID]struct{}{base: {}},+ map[objectid.ObjectID]struct{}{head: {}},+ )
+ withHave := oidSetFromSeq(walkWithHave.Seq())
+ if err := walkWithHave.Err(); err != nil {+ t.Fatalf("walkWithHave.Err(): %v", err)+ }
+ if _, ok := withHave[base]; ok {+ t.Fatalf("walk output unexpectedly contains have commit %s", base)+ }
+ })
+}
+
+func TestIsAncestorMatchesGitMergeBase(t *testing.T) {+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper+ testRepo := testgit.NewRepo(t, testgit.RepoOptions{+ ObjectFormat: algo,
+ Bare: true,
+ RefFormat: "files",
+ })
+
+ _, tree1 := testRepo.MakeSingleFileTree(t, "one.txt", []byte("one\n"))+ c1 := testRepo.CommitTree(t, tree1, "c1")
+
+ _, tree2 := testRepo.MakeSingleFileTree(t, "two.txt", []byte("two\n"))+ c2 := testRepo.CommitTree(t, tree2, "c2", c1)
+
+ _, tree3 := testRepo.MakeSingleFileTree(t, "three.txt", []byte("three\n"))+ c3 := testRepo.CommitTree(t, tree3, "c3", c2)
+
+ tag := testRepo.TagAnnotated(t, "tip", c2, "tip")
+
+ r := openReachabilityFromTestRepo(t, testRepo)
+
+ got, err := r.IsAncestor(c1, tag)
+ if err != nil {+ t.Fatalf("IsAncestor(c1, tag): %v", err)+ }
+ if want := gitMergeBaseIsAncestor(t, testRepo, c1, c2); got != want {+ t.Fatalf("IsAncestor(c1, tag)=%v, want %v", got, want)+ }
+
+ got, err = r.IsAncestor(c3, c2)
+ if err != nil {+ t.Fatalf("IsAncestor(c3, c2): %v", err)+ }
+ if want := gitMergeBaseIsAncestor(t, testRepo, c3, c2); got != want {+ t.Fatalf("IsAncestor(c3, c2)=%v, want %v", got, want)+ }
+ })
+}
+
+func TestCheckConnectedMissingObject(t *testing.T) {+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper+ testRepo := testgit.NewRepo(t, testgit.RepoOptions{+ ObjectFormat: algo,
+ Bare: true,
+ RefFormat: "files",
+ })
+
+ _, treeID, commitID := testRepo.MakeCommit(t, "missing")
+ if err := os.Remove(looseObjectPath(testRepo.Dir(), treeID)); err != nil {+ t.Fatalf("remove tree object: %v", err)+ }
+
+ r := openReachabilityFromTestRepo(t, testRepo)
+ err := r.CheckConnected(
+ reachability.DomainObjects,
+ nil,
+ map[objectid.ObjectID]struct{}{commitID: {}},+ )
+ if err == nil {+ t.Fatal("expected error")+ }
+ var missing *reachability.ErrObjectMissing
+ if !errors.As(err, &missing) {+ t.Fatalf("expected ErrObjectMissing, got %T (%v)", err, err)+ }
+ if missing.OID != treeID {+ t.Fatalf("missing oid = %s, want %s", missing.OID, treeID)+ }
+ })
+}
+
+func TestWalkOnPackedOnlyRepo(t *testing.T) {+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper+ testRepo := testgit.NewRepo(t, testgit.RepoOptions{+ ObjectFormat: algo,
+ Bare: true,
+ RefFormat: "files",
+ })
+
+ _, tree1 := testRepo.MakeSingleFileTree(t, "one.txt", []byte("one\n"))+ c1 := testRepo.CommitTree(t, tree1, "one")
+ _, tree2 := testRepo.MakeSingleFileTree(t, "two.txt", []byte("two\n"))+ c2 := testRepo.CommitTree(t, tree2, "two", c1)
+ testRepo.UpdateRef(t, "refs/heads/main", c2)
+ testRepo.SymbolicRef(t, "HEAD", "refs/heads/main")
+
+ testRepo.Repack(t, "-ad")
+ testRepo.Run(t, "prune-packed")
+
+ assertPackedOnly(t, testRepo.Dir())
+
+ r := openReachabilityFromTestRepo(t, testRepo)
+ walk := r.Walk(
+ reachability.DomainCommits,
+ nil,
+ map[objectid.ObjectID]struct{}{c2: {}},+ )
+ got := oidSetFromSeq(walk.Seq())
+ if err := walk.Err(); err != nil {+ t.Fatalf("walk.Err(): %v", err)+ }
+ if _, ok := got[c2]; !ok {+ t.Fatalf("walk output missing HEAD commit %s", c2)+ }
+ if _, ok := got[c1]; !ok {+ t.Fatalf("walk output missing parent commit %s", c1)+ }
+ })
+}
+
+func openReachabilityFromTestRepo(t *testing.T, testRepo *testgit.TestRepo) *reachability.Reachability {+ t.Helper()
+ root, err := os.OpenRoot(testRepo.Dir())
+ if err != nil {+ t.Fatalf("os.OpenRoot: %v", err)+ }
+ t.Cleanup(func() { _ = root.Close() })+
+ repo, err := repository.Open(root)
+ if err != nil {+ t.Fatalf("repository.Open: %v", err)+ }
+ t.Cleanup(func() { _ = repo.Close() })+
+ return reachability.New(repo.Objects())
+}
+
+func oidSetFromSeq(seq func(func(objectid.ObjectID) bool)) map[objectid.ObjectID]struct{} {+ out := make(map[objectid.ObjectID]struct{})+ seq(func(id objectid.ObjectID) bool {+ out[id] = struct{}{}+ return true
+ })
+ return out
+}
+
+func gitRevListSet(
+ t *testing.T,
+ testRepo *testgit.TestRepo,
+ includeObjects bool,
+ wants []objectid.ObjectID,
+ haves []objectid.ObjectID,
+) map[objectid.ObjectID]struct{} {+ t.Helper()
+
+ args := []string{"rev-list"}+ if includeObjects {+ args = append(args, "--objects")
+ }
+ for _, want := range wants {+ args = append(args, want.String())
+ }
+ if len(haves) > 0 {+ args = append(args, "--not")
+ for _, have := range haves {+ args = append(args, have.String())
+ }
+ }
+
+ out := testRepo.Run(t, args...)
+ set := make(map[objectid.ObjectID]struct{})+ for line := range strings.SplitSeq(strings.TrimSpace(out), "\n") {+ line = strings.TrimSpace(line)
+ if line == "" {+ continue
+ }
+ tok := line
+ if i := strings.IndexByte(tok, ' '); i >= 0 {+ tok = tok[:i]
+ }
+ id, err := objectid.ParseHex(testRepo.Algorithm(), tok)
+ if err != nil {+ t.Fatalf("parse rev-list oid %q: %v", tok, err)+ }
+ set[id] = struct{}{}+ }
+ return set
+}
+
+func gitMergeBaseIsAncestor(t *testing.T, testRepo *testgit.TestRepo, a, b objectid.ObjectID) bool {+ t.Helper()
+ // testgit.Run fatals on non-zero status, so we compare merge-base output.
+ mb := testRepo.Run(t, "merge-base", a.String(), b.String())
+ return mb == a.String()
+}
+
+func sortedOIDStrings(set map[objectid.ObjectID]struct{}) []string {+ out := make([]string, 0, len(set))
+ for id := range set {+ out = append(out, id.String())
+ }
+ slices.Sort(out)
+ return out
+}
+
+func looseObjectPath(repoDir string, id objectid.ObjectID) string {+ hex := id.String()
+ return filepath.Join(repoDir, "objects", hex[:2], hex[2:])
+}
+
+func assertPackedOnly(t *testing.T, repoDir string) {+ t.Helper()
+ objectsDir := filepath.Join(repoDir, "objects")
+
+ entries, err := os.ReadDir(objectsDir)
+ if err != nil {+ t.Fatalf("ReadDir(objects): %v", err)+ }
+ for _, entry := range entries {+ name := entry.Name()
+ if name == "pack" || name == "info" {+ continue
+ }
+ if len(name) == 2 && isHexDirName(name) {+ subEntries, err := os.ReadDir(filepath.Join(objectsDir, name))
+ if err != nil {+ t.Fatalf("ReadDir(objects/%s): %v", name, err)+ }
+ if len(subEntries) != 0 {+ t.Fatalf("found loose objects in %s", filepath.Join(objectsDir, name))+ }
+ }
+ }
+}
+
+func isHexDirName(name string) bool {+ if len(name) != 2 {+ return false
+ }
+ for i := range 2 {+ c := name[i]
+ if (c < '0' || c > '9') && (c < 'a' || c > 'f') {+ return false
+ }
+ }
+ return true
+}
--- /dev/null
+++ b/reachability/reachability_unit_test.go
@@ -1,0 +1,422 @@
+package reachability_test
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "maps"
+ "slices"
+ "testing"
+
+ "codeberg.org/lindenii/furgit/internal/testgit"
+ "codeberg.org/lindenii/furgit/object"
+ "codeberg.org/lindenii/furgit/objectheader"
+ "codeberg.org/lindenii/furgit/objectid"
+ "codeberg.org/lindenii/furgit/objectstore"
+ "codeberg.org/lindenii/furgit/objecttype"
+ "codeberg.org/lindenii/furgit/reachability"
+)
+
+type storeObject struct {+ ty objecttype.Type
+ content []byte
+}
+
+type memStore struct {+ algo objectid.Algorithm
+ objects map[objectid.ObjectID]storeObject
+ readBytesByObjectID map[objectid.ObjectID]int
+}
+
+func newMemStore(algo objectid.Algorithm) *memStore {+ return &memStore{+ algo: algo,
+ objects: make(map[objectid.ObjectID]storeObject),
+ readBytesByObjectID: make(map[objectid.ObjectID]int),
+ }
+}
+
+func (store *memStore) ReadBytesFull(id objectid.ObjectID) ([]byte, error) {+ obj, ok := store.objects[id]
+ if !ok {+ return nil, objectstore.ErrObjectNotFound
+ }
+ header, ok := objectheader.Encode(obj.ty, int64(len(obj.content)))
+ if !ok {+ panic("failed to encode object header")+ }
+ raw := make([]byte, len(header)+len(obj.content))
+ copy(raw, header)
+ copy(raw[len(header):], obj.content)
+ return raw, nil
+}
+
+func (store *memStore) ReadBytesContent(id objectid.ObjectID) (objecttype.Type, []byte, error) {+ obj, ok := store.objects[id]
+ if !ok {+ return objecttype.TypeInvalid, nil, objectstore.ErrObjectNotFound
+ }
+ store.readBytesByObjectID[id]++
+ return obj.ty, append([]byte(nil), obj.content...), nil
+}
+
+func (store *memStore) ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error) {+ raw, err := store.ReadBytesFull(id)
+ if err != nil {+ return nil, err
+ }
+ return io.NopCloser(bytes.NewReader(raw)), nil
+}
+
+func (store *memStore) ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error) {+ ty, content, err := store.ReadBytesContent(id)
+ if err != nil {+ return objecttype.TypeInvalid, 0, nil, err
+ }
+ return ty, int64(len(content)), io.NopCloser(bytes.NewReader(content)), nil
+}
+
+func (store *memStore) ReadSize(id objectid.ObjectID) (int64, error) {+ _, size, err := store.ReadHeader(id)
+ if err != nil {+ return 0, err
+ }
+ return size, nil
+}
+
+func (store *memStore) ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error) {+ obj, ok := store.objects[id]
+ if !ok {+ return objecttype.TypeInvalid, 0, objectstore.ErrObjectNotFound
+ }
+ return obj.ty, int64(len(obj.content)), nil
+}
+
+func (store *memStore) Close() error {+ return nil
+}
+
+func commitBody(tree objectid.ObjectID, parents ...objectid.ObjectID) []byte {+ buf := fmt.Appendf(nil, "tree %s\n", tree.String())
+ for _, parent := range parents {+ buf = append(buf, fmt.Appendf(nil, "parent %s\n", parent.String())...)
+ }
+ buf = append(buf, []byte("\nmsg\n")...)+ return buf
+}
+
+func tagBody(target objectid.ObjectID, targetType objecttype.Type) []byte {+ targetName, ok := objecttype.Name(targetType)
+ if !ok {+ panic("invalid tag target type")+ }
+ return fmt.Appendf(nil, "object %s\ntype %s\ntag t\n\nmsg\n", target.String(), targetName)
+}
+
+func collectSeq(seq func(func(objectid.ObjectID) bool)) []objectid.ObjectID {+ var out []objectid.ObjectID
+ seq(func(id objectid.ObjectID) bool {+ out = append(out, id)
+ return true
+ })
+ return out
+}
+
+func toSet(ids []objectid.ObjectID) map[objectid.ObjectID]struct{} {+ set := make(map[objectid.ObjectID]struct{}, len(ids))+ for _, id := range ids {+ set[id] = struct{}{}+ }
+ return set
+}
+
+func TestWalkDomainCommitsIncludesTagNodes(t *testing.T) {+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper+ store := newMemStore(algo)
+ blob := store.addObject(objecttype.TypeBlob, []byte("blob\n"))+ tree := store.addObject(objecttype.TypeTree, mustSerializeTree(t, &object.Tree{Entries: []object.TreeEntry{{+ Mode: object.FileModeRegular,
+ Name: []byte("f"),+ ID: blob,
+ }}}))
+ commit1 := store.addObject(objecttype.TypeCommit, commitBody(tree))
+ commit2 := store.addObject(objecttype.TypeCommit, commitBody(tree, commit1))
+ tag1 := store.addObject(objecttype.TypeTag, tagBody(commit2, objecttype.TypeCommit))
+ tag2 := store.addObject(objecttype.TypeTag, tagBody(tag1, objecttype.TypeTag))
+
+ r := reachability.New(store)
+ walk := r.Walk(reachability.DomainCommits, nil, map[objectid.ObjectID]struct{}{tag2: {}})+ got := collectSeq(walk.Seq())
+ if err := walk.Err(); err != nil {+ t.Fatalf("walk.Err(): %v", err)+ }
+
+ gotSet := toSet(got)
+ wantSet := map[objectid.ObjectID]struct{}{tag2: {}, tag1: {}, commit2: {}, commit1: {}}+ if !maps.Equal(gotSet, wantSet) {+ t.Fatalf("walk output mismatch: got %v, want %v", slices.Collect(maps.Keys(gotSet)), slices.Collect(maps.Keys(wantSet)))+ }
+ })
+}
+
+func TestWalkExcludesHavesCompletely(t *testing.T) {+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper+ store := newMemStore(algo)
+ blob := store.addObject(objecttype.TypeBlob, []byte("blob\n"))+ tree := store.addObject(objecttype.TypeTree, mustSerializeTree(t, &object.Tree{Entries: []object.TreeEntry{{+ Mode: object.FileModeRegular,
+ Name: []byte("f"),+ ID: blob,
+ }}}))
+ commit := store.addObject(objecttype.TypeCommit, commitBody(tree))
+
+ r := reachability.New(store)
+ walk := r.Walk(reachability.DomainCommits, map[objectid.ObjectID]struct{}{commit: {}}, map[objectid.ObjectID]struct{}{commit: {}})+ got := collectSeq(walk.Seq())
+ if err := walk.Err(); err != nil {+ t.Fatalf("walk.Err(): %v", err)+ }
+ if len(got) != 0 {+ t.Fatalf("expected empty output, got %v", got)+ }
+ })
+}
+
+func TestWalkDomainCommitsRejectsNonCommitRootAfterPeel(t *testing.T) {+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper+ store := newMemStore(algo)
+ blob := store.addObject(objecttype.TypeBlob, []byte("blob\n"))+ tree := store.addObject(objecttype.TypeTree, mustSerializeTree(t, &object.Tree{Entries: []object.TreeEntry{{+ Mode: object.FileModeRegular,
+ Name: []byte("f"),+ ID: blob,
+ }}}))
+ tag := store.addObject(objecttype.TypeTag, tagBody(tree, objecttype.TypeTree))
+
+ r := reachability.New(store)
+ walk := r.Walk(reachability.DomainCommits, nil, map[objectid.ObjectID]struct{}{tag: {}})+ _ = collectSeq(walk.Seq())
+ err := walk.Err()
+ if err == nil {+ t.Fatal("expected error")+ }
+ var typeErr *reachability.ErrObjectType
+ if !errors.As(err, &typeErr) {+ t.Fatalf("expected ErrObjectType, got %T (%v)", err, err)+ }
+ if typeErr.Got != objecttype.TypeTree || typeErr.Want != objecttype.TypeCommit {+ t.Fatalf("unexpected type error: %+v", typeErr)+ }
+ })
+}
+
+func TestWalkDomainCommitsHaveTagStopsTraversal(t *testing.T) {+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper+ store := newMemStore(algo)
+ blob := store.addObject(objecttype.TypeBlob, []byte("blob\n"))+ tree := store.addObject(objecttype.TypeTree, mustSerializeTree(t, &object.Tree{Entries: []object.TreeEntry{{+ Mode: object.FileModeRegular,
+ Name: []byte("f"),+ ID: blob,
+ }}}))
+ commit1 := store.addObject(objecttype.TypeCommit, commitBody(tree))
+ commit2 := store.addObject(objecttype.TypeCommit, commitBody(tree, commit1))
+ tag1 := store.addObject(objecttype.TypeTag, tagBody(commit2, objecttype.TypeCommit))
+ tag2 := store.addObject(objecttype.TypeTag, tagBody(tag1, objecttype.TypeTag))
+
+ r := reachability.New(store)
+ walk := r.Walk(
+ reachability.DomainCommits,
+ map[objectid.ObjectID]struct{}{tag1: {}},+ map[objectid.ObjectID]struct{}{tag2: {}},+ )
+ got := collectSeq(walk.Seq())
+ if err := walk.Err(); err != nil {+ t.Fatalf("walk.Err(): %v", err)+ }
+
+ gotSet := toSet(got)
+ wantSet := map[objectid.ObjectID]struct{}{tag2: {}}+ if !maps.Equal(gotSet, wantSet) {+ t.Fatalf("walk output mismatch: got %v, want %v", slices.Collect(maps.Keys(gotSet)), slices.Collect(maps.Keys(wantSet)))+ }
+ })
+}
+
+func TestWalkDomainObjectsRecursesTreesAndSkipsBlobContentReads(t *testing.T) {+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper+ store := newMemStore(algo)
+
+ blob1 := store.addObject(objecttype.TypeBlob, []byte("b1\n"))+ blob2 := store.addObject(objecttype.TypeBlob, []byte("b2\n"))+ gitlinkTarget := store.algo.Sum([]byte("external-submodule"))+
+ subtree := store.addObject(objecttype.TypeTree, mustSerializeTree(t, &object.Tree{Entries: []object.TreeEntry{{+ Mode: object.FileModeRegular,
+ Name: []byte("nested"),+ ID: blob2,
+ }}}))
+ rootTree := store.addObject(objecttype.TypeTree, mustSerializeTree(t, &object.Tree{Entries: []object.TreeEntry{+ {Mode: object.FileModeRegular, Name: []byte("a"), ID: blob1},+ {Mode: object.FileModeDir, Name: []byte("dir"), ID: subtree},+ {Mode: object.FileModeGitlink, Name: []byte("submodule"), ID: gitlinkTarget},+ }}))
+ commit := store.addObject(objecttype.TypeCommit, commitBody(rootTree))
+
+ r := reachability.New(store)
+ walk := r.Walk(reachability.DomainObjects, nil, map[objectid.ObjectID]struct{}{commit: {}})+ got := collectSeq(walk.Seq())
+ if err := walk.Err(); err != nil {+ t.Fatalf("walk.Err(): %v", err)+ }
+
+ gotSet := toSet(got)
+ wantSet := map[objectid.ObjectID]struct{}{commit: {}, rootTree: {}, subtree: {}, blob1: {}, blob2: {}}+ if !maps.Equal(gotSet, wantSet) {+ t.Fatalf("walk output mismatch: got %v, want %v", slices.Collect(maps.Keys(gotSet)), slices.Collect(maps.Keys(wantSet)))+ }
+ if store.readBytesByObjectID[blob1] != 0 || store.readBytesByObjectID[blob2] != 0 {+ t.Fatalf("blob contents should not be read; counts: blob1=%d blob2=%d", store.readBytesByObjectID[blob1], store.readBytesByObjectID[blob2])+ }
+ })
+}
+
+func TestCheckConnectedReturnsConcreteMissingObject(t *testing.T) {+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper+ store := newMemStore(algo)
+ blob := store.addObject(objecttype.TypeBlob, []byte("blob\n"))+ tree := store.addObject(objecttype.TypeTree, mustSerializeTree(t, &object.Tree{Entries: []object.TreeEntry{{+ Mode: object.FileModeRegular,
+ Name: []byte("f"),+ ID: blob,
+ }}}))
+ missingParent := store.algo.Sum([]byte("missing-parent"))+ commit := store.addObject(objecttype.TypeCommit, commitBody(tree, missingParent))
+
+ r := reachability.New(store)
+ err := r.CheckConnected(reachability.DomainCommits, nil, map[objectid.ObjectID]struct{}{commit: {}})+ if err == nil {+ t.Fatal("expected error")+ }
+ var missing *reachability.ErrObjectMissing
+ if !errors.As(err, &missing) {+ t.Fatalf("expected ErrObjectMissing, got %T (%v)", err, err)+ }
+ if missing.OID != missingParent {+ t.Fatalf("unexpected missing oid: got %s want %s", missing.OID, missingParent)+ }
+ })
+}
+
+func TestWalkInvalidDomainReturnsPlainError(t *testing.T) {+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper+ r := reachability.New(newMemStore(algo))
+ walk := r.Walk(reachability.Domain(99), nil, nil)
+ _ = collectSeq(walk.Seq())
+ if err := walk.Err(); err == nil {+ t.Fatal("expected error")+ }
+ })
+}
+
+func TestIsAncestor(t *testing.T) {+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper+ store := newMemStore(algo)
+ blob := store.addObject(objecttype.TypeBlob, []byte("blob\n"))+ tree := store.addObject(objecttype.TypeTree, mustSerializeTree(t, &object.Tree{Entries: []object.TreeEntry{{+ Mode: object.FileModeRegular,
+ Name: []byte("f"),+ ID: blob,
+ }}}))
+ c1 := store.addObject(objecttype.TypeCommit, commitBody(tree))
+ c2 := store.addObject(objecttype.TypeCommit, commitBody(tree, c1))
+ otherBlob := store.addObject(objecttype.TypeBlob, []byte("other-blob\n"))+ otherTree := store.addObject(objecttype.TypeTree, mustSerializeTree(t, &object.Tree{Entries: []object.TreeEntry{{+ Mode: object.FileModeRegular,
+ Name: []byte("g"),+ ID: otherBlob,
+ }}}))
+ c3 := store.addObject(objecttype.TypeCommit, commitBody(otherTree))
+ tag := store.addObject(objecttype.TypeTag, tagBody(c2, objecttype.TypeCommit))
+
+ r := reachability.New(store)
+ ok, err := r.IsAncestor(c1, tag)
+ if err != nil {+ t.Fatalf("IsAncestor(c1, tag): %v", err)+ }
+ if !ok {+ t.Fatal("expected c1 to be ancestor of tag->c2")+ }
+
+ ok, err = r.IsAncestor(c3, c2)
+ if err != nil {+ t.Fatalf("IsAncestor(c3, c2): %v", err)+ }
+ if ok {+ t.Fatal("did not expect c3 to be ancestor of c2")+ }
+ })
+}
+
+func TestIsAncestorRejectsNonCommitAfterPeel(t *testing.T) {+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper+ store := newMemStore(algo)
+ blob := store.addObject(objecttype.TypeBlob, []byte("blob\n"))+ tree := store.addObject(objecttype.TypeTree, mustSerializeTree(t, &object.Tree{Entries: []object.TreeEntry{{+ Mode: object.FileModeRegular,
+ Name: []byte("f"),+ ID: blob,
+ }}}))
+ commit := store.addObject(objecttype.TypeCommit, commitBody(tree))
+ tagToTree := store.addObject(objecttype.TypeTag, tagBody(tree, objecttype.TypeTree))
+
+ r := reachability.New(store)
+ _, err := r.IsAncestor(commit, tagToTree)
+ if err == nil {+ t.Fatal("expected error")+ }
+ var typeErr *reachability.ErrObjectType
+ if !errors.As(err, &typeErr) {+ t.Fatalf("expected ErrObjectType, got %T (%v)", err, err)+ }
+ })
+}
+
+func mustSerializeTree(tb testing.TB, tree *object.Tree) []byte {+ tb.Helper()
+ body, err := tree.SerializeWithoutHeader()
+ if err != nil {+ tb.Fatalf("SerializeWithoutHeader: %v", err)+ }
+ return body
+}
+
+func (store *memStore) addObject(ty objecttype.Type, body []byte) objectid.ObjectID {+ header, ok := objectheader.Encode(ty, int64(len(body)))
+ if !ok {+ panic("failed to encode object header")+ }
+ raw := append(append([]byte(nil), header...), body...)
+ id := store.algo.Sum(raw)
+ store.objects[id] = storeObject{ty: ty, content: append([]byte(nil), body...)}+ return id
+}
--
⑨