shithub: furgit

Download patch

ref: 70916ec7713442a1f9b80f394b980ac2ab5a92df
parent: b01bc1a344c47ded15342f7872832daa1bf5cfce
author: Runxi Yu <me@runxiyu.org>
date: Sat Feb 21 08:29:38 EST 2026

diff/trees: Add tree-diffing routines

--- /dev/null
+++ b/diff/trees/diff.go
@@ -1,0 +1,155 @@
+// Package trees provides recursive diffs between Git tree objects.
+package trees
+
+import (
+	"codeberg.org/lindenii/furgit/object"
+	"codeberg.org/lindenii/furgit/objectid"
+)
+
+// Diff compares two trees and returns recursive differences.
+//
+// readTree is used to lazily load child trees by object ID when recursion
+// reaches directory entries.
+func Diff(a, b *object.Tree, readTree func(objectid.ObjectID) (*object.Tree, error)) ([]Entry, error) {
+	var out []Entry
+	if err := diffRecursive(a, b, nil, readTree, &out); err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+func diffRecursive(a, b *object.Tree, prefix []byte, readTree func(objectid.ObjectID) (*object.Tree, error), out *[]Entry) error {
+	if a == nil && b == nil {
+		return nil
+	}
+
+	if a == nil {
+		for i := range b.Entries {
+			entry := &b.Entries[i]
+			full := joinPath(prefix, entry.Name)
+			*out = append(*out, Entry{Path: full, Kind: EntryKindAdded, Old: nil, New: entry})
+			if entry.Mode == object.FileModeDir {
+				sub, err := readTree(entry.ID)
+				if err != nil {
+					return err
+				}
+				if err := diffRecursive(nil, sub, full, readTree, out); err != nil {
+					return err
+				}
+			}
+		}
+		return nil
+	}
+
+	if b == nil {
+		for i := range a.Entries {
+			entry := &a.Entries[i]
+			full := joinPath(prefix, entry.Name)
+			*out = append(*out, Entry{Path: full, Kind: EntryKindDeleted, Old: entry, New: nil})
+			if entry.Mode == object.FileModeDir {
+				sub, err := readTree(entry.ID)
+				if err != nil {
+					return err
+				}
+				if err := diffRecursive(sub, nil, full, readTree, out); err != nil {
+					return err
+				}
+			}
+		}
+		return nil
+	}
+
+	i := 0
+	j := 0
+	for i < len(a.Entries) && j < len(b.Entries) {
+		left := &a.Entries[i]
+		right := &b.Entries[j]
+		cmp := object.TreeEntryNameCompare(
+			left.Name,
+			left.Mode,
+			right.Name,
+			right.Mode == object.FileModeDir,
+		)
+		switch {
+		case cmp < 0:
+			full := joinPath(prefix, left.Name)
+			*out = append(*out, Entry{Path: full, Kind: EntryKindDeleted, Old: left, New: nil})
+			if left.Mode == object.FileModeDir {
+				sub, err := readTree(left.ID)
+				if err != nil {
+					return err
+				}
+				if err := diffRecursive(sub, nil, full, readTree, out); err != nil {
+					return err
+				}
+			}
+			i++
+		case cmp > 0:
+			full := joinPath(prefix, right.Name)
+			*out = append(*out, Entry{Path: full, Kind: EntryKindAdded, Old: nil, New: right})
+			if right.Mode == object.FileModeDir {
+				sub, err := readTree(right.ID)
+				if err != nil {
+					return err
+				}
+				if err := diffRecursive(nil, sub, full, readTree, out); err != nil {
+					return err
+				}
+			}
+			j++
+		default:
+			full := joinPath(prefix, left.Name)
+			modified := left.Mode != right.Mode || left.ID != right.ID
+			if modified {
+				*out = append(*out, Entry{Path: full, Kind: EntryKindModified, Old: left, New: right})
+			}
+			if left.Mode == object.FileModeDir && right.Mode == object.FileModeDir && left.ID != right.ID {
+				leftSub, err := readTree(left.ID)
+				if err != nil {
+					return err
+				}
+				rightSub, err := readTree(right.ID)
+				if err != nil {
+					return err
+				}
+				if err := diffRecursive(leftSub, rightSub, full, readTree, out); err != nil {
+					return err
+				}
+			}
+			i++
+			j++
+		}
+	}
+
+	for ; i < len(a.Entries); i++ {
+		left := &a.Entries[i]
+		full := joinPath(prefix, left.Name)
+		*out = append(*out, Entry{Path: full, Kind: EntryKindDeleted, Old: left, New: nil})
+		if left.Mode == object.FileModeDir {
+			sub, err := readTree(left.ID)
+			if err != nil {
+				return err
+			}
+			if err := diffRecursive(sub, nil, full, readTree, out); err != nil {
+				return err
+			}
+		}
+	}
+
+	for ; j < len(b.Entries); j++ {
+		right := &b.Entries[j]
+		full := joinPath(prefix, right.Name)
+		*out = append(*out, Entry{Path: full, Kind: EntryKindAdded, Old: nil, New: right})
+		if right.Mode == object.FileModeDir {
+			sub, err := readTree(right.ID)
+			if err != nil {
+				return err
+			}
+			if err := diffRecursive(nil, sub, full, readTree, out); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
--- /dev/null
+++ b/diff/trees/diff_test.go
@@ -1,0 +1,244 @@
+package trees_test
+
+import (
+	"errors"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"codeberg.org/lindenii/furgit/diff/trees"
+	"codeberg.org/lindenii/furgit/internal/testgit"
+	"codeberg.org/lindenii/furgit/object"
+	"codeberg.org/lindenii/furgit/objectid"
+	"codeberg.org/lindenii/furgit/objectstore/loose"
+	"codeberg.org/lindenii/furgit/objecttype"
+)
+
+func TestDiffComplexNestedChanges(t *testing.T) {
+	testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) {
+		repo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: false})
+
+		writeTestFile(t, filepath.Join(repo.Dir(), "README.md"), "initial readme\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "unchanged.txt"), "leave me as-is\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "dir", "file_a.txt"), "alpha v1\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "file_b.txt"), "beta v1\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "deeper", "file_c.txt"), "gamma v1\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "deeper", "old.txt"), "old branch\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "treeB", "legacy.txt"), "legacy root\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "treeB", "sub", "retired.txt"), "retired\n")
+
+		repo.Run(t, "add", ".")
+		baseTreeID := parseID(t, algo, repo.Run(t, "write-tree"))
+
+		writeTestFile(t, filepath.Join(repo.Dir(), "README.md"), "updated readme\n")
+		repo.Run(t, "rm", "-f", "dir/file_a.txt")
+		writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "file_b.txt"), "beta v2\n")
+		repo.Run(t, "rm", "-f", "dir/nested/deeper/old.txt")
+		writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "deeper", "new.txt"), "new branch entry\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "deeper", "branch", "info.md"), "branch info\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "deeper", "branch", "subbranch", "leaf.txt"), "leaf data\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "deeper", "branch", "subbranch", "deep", "final.txt"), "final artifact\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "dir", "newchild.txt"), "brand new sibling\n")
+		repo.Run(t, "rm", "-r", "-f", "treeB")
+		writeTestFile(t, filepath.Join(repo.Dir(), "features", "alpha", "README.md"), "alpha docs\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "features", "alpha", "beta", "gamma.txt"), "gamma payload\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "modules", "v2", "core", "main.go"), "package core\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "root_addition.txt"), "root level file\n")
+
+		repo.Run(t, "add", ".")
+		updatedTreeID := parseID(t, algo, repo.Run(t, "write-tree"))
+
+		store := openLooseStore(t, filepath.Join(repo.Dir(), ".git", "objects"), algo)
+		readTree := makeReadTree(t, store, algo)
+		baseTree := mustReadTree(t, readTree, baseTreeID)
+		updatedTree := mustReadTree(t, readTree, updatedTreeID)
+
+		diffs, err := trees.Diff(baseTree, updatedTree, readTree)
+		if err != nil {
+			t.Fatalf("trees.Diff: %v", err)
+		}
+
+		expected := map[string]diffExpectation{
+			"README.md":                                   {kind: trees.EntryKindModified},
+			"dir":                                         {kind: trees.EntryKindModified},
+			"dir/file_a.txt":                              {kind: trees.EntryKindDeleted, newNil: true},
+			"dir/newchild.txt":                            {kind: trees.EntryKindAdded, oldNil: true},
+			"dir/nested":                                  {kind: trees.EntryKindModified},
+			"dir/nested/file_b.txt":                       {kind: trees.EntryKindModified},
+			"dir/nested/deeper":                           {kind: trees.EntryKindModified},
+			"dir/nested/deeper/old.txt":                   {kind: trees.EntryKindDeleted, newNil: true},
+			"dir/nested/deeper/new.txt":                   {kind: trees.EntryKindAdded, oldNil: true},
+			"dir/nested/deeper/branch":                    {kind: trees.EntryKindAdded, oldNil: true},
+			"dir/nested/deeper/branch/info.md":            {kind: trees.EntryKindAdded, oldNil: true},
+			"dir/nested/deeper/branch/subbranch":          {kind: trees.EntryKindAdded, oldNil: true},
+			"dir/nested/deeper/branch/subbranch/leaf.txt": {kind: trees.EntryKindAdded, oldNil: true},
+			"dir/nested/deeper/branch/subbranch/deep":     {kind: trees.EntryKindAdded, oldNil: true},
+			"dir/nested/deeper/branch/subbranch/deep/final.txt": {
+				kind:   trees.EntryKindAdded,
+				oldNil: true,
+			},
+			"features":                      {kind: trees.EntryKindAdded, oldNil: true},
+			"features/alpha":                {kind: trees.EntryKindAdded, oldNil: true},
+			"features/alpha/README.md":      {kind: trees.EntryKindAdded, oldNil: true},
+			"features/alpha/beta":           {kind: trees.EntryKindAdded, oldNil: true},
+			"features/alpha/beta/gamma.txt": {kind: trees.EntryKindAdded, oldNil: true},
+			"modules":                       {kind: trees.EntryKindAdded, oldNil: true},
+			"modules/v2":                    {kind: trees.EntryKindAdded, oldNil: true},
+			"modules/v2/core":               {kind: trees.EntryKindAdded, oldNil: true},
+			"modules/v2/core/main.go":       {kind: trees.EntryKindAdded, oldNil: true},
+			"root_addition.txt":             {kind: trees.EntryKindAdded, oldNil: true},
+			"treeB":                         {kind: trees.EntryKindDeleted, newNil: true},
+			"treeB/legacy.txt":              {kind: trees.EntryKindDeleted, newNil: true},
+			"treeB/sub":                     {kind: trees.EntryKindDeleted, newNil: true},
+			"treeB/sub/retired.txt":         {kind: trees.EntryKindDeleted, newNil: true},
+		}
+
+		checkDiffs(t, diffs, expected)
+	})
+}
+
+func TestDiffDirectoryAddDeleteDeep(t *testing.T) {
+	testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) {
+		repo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: false})
+
+		writeTestFile(t, filepath.Join(repo.Dir(), "old_dir", "old.txt"), "stale directory\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "old_dir", "sub1", "legacy.txt"), "legacy path\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "old_dir", "sub1", "nested", "end.txt"), "legacy end\n")
+
+		repo.Run(t, "add", ".")
+		originalTreeID := parseID(t, algo, repo.Run(t, "write-tree"))
+
+		repo.Run(t, "rm", "-r", "-f", "old_dir")
+		writeTestFile(t, filepath.Join(repo.Dir(), "fresh", "alpha", "beta", "new.txt"), "brand new directory\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "fresh", "alpha", "docs", "note.md"), "docs note\n")
+		writeTestFile(t, filepath.Join(repo.Dir(), "fresh", "alpha", "beta", "gamma", "delta.txt"), "delta payload\n")
+
+		repo.Run(t, "add", ".")
+		nextTreeID := parseID(t, algo, repo.Run(t, "write-tree"))
+
+		store := openLooseStore(t, filepath.Join(repo.Dir(), ".git", "objects"), algo)
+		readTree := makeReadTree(t, store, algo)
+		originalTree := mustReadTree(t, readTree, originalTreeID)
+		nextTree := mustReadTree(t, readTree, nextTreeID)
+
+		diffs, err := trees.Diff(originalTree, nextTree, readTree)
+		if err != nil {
+			t.Fatalf("trees.Diff: %v", err)
+		}
+
+		expected := map[string]diffExpectation{
+			"fresh":                            {kind: trees.EntryKindAdded, oldNil: true},
+			"fresh/alpha":                      {kind: trees.EntryKindAdded, oldNil: true},
+			"fresh/alpha/beta":                 {kind: trees.EntryKindAdded, oldNil: true},
+			"fresh/alpha/beta/new.txt":         {kind: trees.EntryKindAdded, oldNil: true},
+			"fresh/alpha/beta/gamma":           {kind: trees.EntryKindAdded, oldNil: true},
+			"fresh/alpha/beta/gamma/delta.txt": {kind: trees.EntryKindAdded, oldNil: true},
+			"fresh/alpha/docs":                 {kind: trees.EntryKindAdded, oldNil: true},
+			"fresh/alpha/docs/note.md":         {kind: trees.EntryKindAdded, oldNil: true},
+			"old_dir":                          {kind: trees.EntryKindDeleted, newNil: true},
+			"old_dir/old.txt":                  {kind: trees.EntryKindDeleted, newNil: true},
+			"old_dir/sub1":                     {kind: trees.EntryKindDeleted, newNil: true},
+			"old_dir/sub1/legacy.txt":          {kind: trees.EntryKindDeleted, newNil: true},
+			"old_dir/sub1/nested":              {kind: trees.EntryKindDeleted, newNil: true},
+			"old_dir/sub1/nested/end.txt":      {kind: trees.EntryKindDeleted, newNil: true},
+		}
+
+		checkDiffs(t, diffs, expected)
+	})
+}
+
+type diffExpectation struct {
+	kind   trees.EntryKind
+	oldNil bool
+	newNil bool
+}
+
+func writeTestFile(t *testing.T, path, data string) {
+	t.Helper()
+	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
+		t.Fatalf("create directory for %s: %v", path, err)
+	}
+	if err := os.WriteFile(path, []byte(data), 0o644); err != nil {
+		t.Fatalf("write %s: %v", path, err)
+	}
+}
+
+func openLooseStore(t *testing.T, objectsPath string, algo objectid.Algorithm) *loose.Store {
+	t.Helper()
+	root, err := os.OpenRoot(objectsPath)
+	if err != nil {
+		t.Fatalf("OpenRoot(%q): %v", objectsPath, err)
+	}
+	t.Cleanup(func() { _ = root.Close() })
+	store, err := loose.New(root, algo)
+	if err != nil {
+		t.Fatalf("loose.New: %v", err)
+	}
+	t.Cleanup(func() { _ = store.Close() })
+	return store
+}
+
+func makeReadTree(t *testing.T, store *loose.Store, algo objectid.Algorithm) func(objectid.ObjectID) (*object.Tree, error) {
+	t.Helper()
+	return func(id objectid.ObjectID) (*object.Tree, error) {
+		ty, content, err := store.ReadBytesContent(id)
+		if err != nil {
+			return nil, err
+		}
+		if ty != objecttype.TypeTree {
+			return nil, errors.New("diff/trees test: object is not a tree")
+		}
+		return object.ParseTree(content, algo)
+	}
+}
+
+func mustReadTree(t *testing.T, readTree func(objectid.ObjectID) (*object.Tree, error), id objectid.ObjectID) *object.Tree {
+	t.Helper()
+	tree, err := readTree(id)
+	if err != nil {
+		t.Fatalf("read tree %s: %v", id, err)
+	}
+	return tree
+}
+
+func parseID(t *testing.T, algo objectid.Algorithm, hex string) objectid.ObjectID {
+	t.Helper()
+	id, err := objectid.ParseHex(algo, hex)
+	if err != nil {
+		t.Fatalf("parse object id %q: %v", hex, err)
+	}
+	return id
+}
+
+func checkDiffs(t *testing.T, diffs []trees.Entry, expected map[string]diffExpectation) {
+	t.Helper()
+	got := make(map[string]trees.Entry, len(diffs))
+	for _, diff := range diffs {
+		path := string(diff.Path)
+		if _, exists := got[path]; exists {
+			t.Fatalf("duplicate diff path %q", path)
+		}
+		got[path] = diff
+	}
+	if len(got) != len(expected) {
+		t.Fatalf("diff count = %d, want %d", len(got), len(expected))
+	}
+	for path, want := range expected {
+		diff, ok := got[path]
+		if !ok {
+			t.Fatalf("missing diff for %q", path)
+		}
+		if diff.Kind != want.kind {
+			t.Errorf("%s kind = %v, want %v", path, diff.Kind, want.kind)
+		}
+		if (diff.Old == nil) != want.oldNil {
+			t.Errorf("%s old nil = %v, want %v", path, diff.Old == nil, want.oldNil)
+		}
+		if (diff.New == nil) != want.newNil {
+			t.Errorf("%s new nil = %v, want %v", path, diff.New == nil, want.newNil)
+		}
+		if diff.Kind == trees.EntryKindModified && diff.Old != nil && diff.New != nil && diff.Old.ID == diff.New.ID {
+			t.Errorf("%s modified entry should change IDs", path)
+		}
+	}
+}
--- /dev/null
+++ b/diff/trees/entry.go
@@ -1,0 +1,15 @@
+package trees
+
+import "codeberg.org/lindenii/furgit/object"
+
+// Entry is one recursive tree difference at a path.
+type Entry struct {
+	// Path is the slash-separated path relative to the diff root.
+	Path []byte
+	// Kind is the difference kind for this path.
+	Kind EntryKind
+	// Old is the old tree entry (nil when Kind is EntryKindAdded).
+	Old *object.TreeEntry
+	// New is the new tree entry (nil when Kind is EntryKindDeleted).
+	New *object.TreeEntry
+}
--- /dev/null
+++ b/diff/trees/kind.go
@@ -1,0 +1,15 @@
+package trees
+
+// EntryKind identifies a tree-diff entry kind.
+type EntryKind int
+
+const (
+	// EntryKindInvalid indicates an invalid diff entry kind.
+	EntryKindInvalid EntryKind = iota
+	// EntryKindDeleted indicates a deleted path.
+	EntryKindDeleted
+	// EntryKindAdded indicates an added path.
+	EntryKindAdded
+	// EntryKindModified indicates a modified path.
+	EntryKindModified
+)
--- /dev/null
+++ b/diff/trees/path.go
@@ -1,0 +1,14 @@
+package trees
+
+func joinPath(prefix, name []byte) []byte {
+	if len(prefix) == 0 {
+		out := make([]byte, len(name))
+		copy(out, name)
+		return out
+	}
+	out := make([]byte, len(prefix)+1+len(name))
+	copy(out, prefix)
+	out[len(prefix)] = '/'
+	copy(out[len(prefix)+1:], name)
+	return out
+}
--