shithub: furgit

Download patch

ref: 1ea061c92ca6ad435c00bea458b8f24a5e1a822a
parent: dd59d664675ad7dfef033311d279c6f988b367d4
author: Runxi Yu <me@runxiyu.org>
date: Fri Jan 30 10:46:49 EST 2026

commitgraph: Add basic commit-graph implementation

--- /dev/null
+++ b/commitgraph_read.go
@@ -1,0 +1,600 @@
+package furgit
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"syscall"
+
+	"codeberg.org/lindenii/furgit/internal/bloom"
+)
+
+const (
+	commitGraphSignature = 0x43475048 // "CGPH"
+	commitGraphVersion   = 1
+)
+
+const (
+	commitGraphChunkOIDF = 0x4f494446 // "OIDF"
+	commitGraphChunkOIDL = 0x4f49444c // "OIDL"
+	commitGraphChunkCDAT = 0x43444154 // "CDAT"
+	commitGraphChunkGDA2 = 0x47444132 // "GDA2"
+	commitGraphChunkGDO2 = 0x47444f32 // "GDO2"
+	commitGraphChunkEDGE = 0x45444745 // "EDGE"
+	commitGraphChunkBIDX = 0x42494458 // "BIDX"
+	commitGraphChunkBDAT = 0x42444154 // "BDAT"
+	commitGraphChunkBASE = 0x42415345 // "BASE"
+)
+
+const (
+	commitGraphHeaderSize     = 8
+	commitGraphChunkEntrySize = 12
+	commitGraphFanoutSize     = 256 * 4
+)
+
+type commitGraph struct {
+	repo *Repository
+
+	filename string
+	data     []byte
+	dataLen  int
+
+	numChunks uint8
+	numBase   uint8
+	hashAlgo  hashAlgorithm
+
+	numCommits       uint32
+	numCommitsInBase uint32
+
+	base *commitGraph
+
+	chunkOIDFanout []byte
+	chunkOIDLookup []byte
+	chunkCommit    []byte
+
+	chunkGeneration   []byte
+	chunkGenerationOv []byte
+	readGeneration    bool
+
+	chunkExtraEdges []byte
+
+	chunkBloomIndex []byte
+	chunkBloomData  []byte
+
+	chunkBaseGraphs []byte
+
+	bloomSettings *bloom.Settings
+
+	closeOnce sync.Once
+}
+
+func (cg *commitGraph) Close() error {
+	if cg == nil {
+		return nil
+	}
+	var closeErr error
+	cg.closeOnce.Do(func() {
+		if len(cg.data) > 0 {
+			if err := syscall.Munmap(cg.data); closeErr == nil {
+				closeErr = err
+			}
+			cg.data = nil
+		}
+	})
+	if cg.base != nil {
+		_ = cg.base.Close()
+	}
+	return closeErr
+}
+
+func (repo *Repository) CommitGraph() (*commitGraph, error) {
+	if repo == nil {
+		return nil, ErrInvalidObject
+	}
+	repo.commitGraphOnce.Do(func() {
+		repo.commitGraph, repo.commitGraphErr = repo.loadCommitGraph()
+	})
+	return repo.commitGraph, repo.commitGraphErr
+}
+
+func (repo *Repository) loadCommitGraph() (*commitGraph, error) {
+	cg, err := repo.loadCommitGraphSingle()
+	if err == nil {
+		return cg, nil
+	}
+	if !errors.Is(err, ErrNotFound) {
+		return nil, err
+	}
+	return repo.loadCommitGraphChain()
+}
+
+func (repo *Repository) loadCommitGraphSingle() (*commitGraph, error) {
+	path := repo.repoPath(filepath.Join("objects", "info", "commit-graph"))
+	return repo.loadCommitGraphFile(path)
+}
+
+func (repo *Repository) loadCommitGraphChain() (*commitGraph, error) {
+	chainPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", "commit-graph-chain"))
+	f, err := os.Open(chainPath)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, ErrNotFound
+		}
+		return nil, err
+	}
+	defer func() { _ = f.Close() }()
+
+	var hashes []string
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		if line == "" {
+			continue
+		}
+		hashes = append(hashes, line)
+	}
+	if err := scanner.Err(); err != nil {
+		return nil, err
+	}
+	if len(hashes) == 0 {
+		return nil, ErrNotFound
+	}
+
+	var chain *commitGraph
+	for i, h := range hashes {
+		graphPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", fmt.Sprintf("graph-%s.graph", h)))
+		g, err := repo.loadCommitGraphFile(graphPath)
+		if err != nil {
+			return nil, err
+		}
+		if i > 0 {
+			if g.chunkBaseGraphs == nil {
+				return nil, ErrInvalidObject
+			}
+			if len(g.chunkBaseGraphs) < i*repo.hashAlgo.Size() {
+				return nil, ErrInvalidObject
+			}
+			for j := 0; j < i; j++ {
+				want := hashes[j]
+				start := j * repo.hashAlgo.Size()
+				end := start + repo.hashAlgo.Size()
+				got := hexEncode(g.chunkBaseGraphs[start:end])
+				if got != want {
+					return nil, ErrInvalidObject
+				}
+			}
+		}
+		if chain != nil {
+			if chain.numCommits+chain.numCommitsInBase > ^uint32(0) {
+				return nil, ErrInvalidObject
+			}
+			g.numCommitsInBase = chain.numCommits + chain.numCommitsInBase
+		}
+		g.base = chain
+		chain = g
+	}
+	validateCommitGraphChain(chain)
+	return chain, nil
+}
+
+func validateCommitGraphChain(head *commitGraph) {
+	if head == nil {
+		return
+	}
+	readGeneration := true
+	for g := head; g != nil; g = g.base {
+		readGeneration = readGeneration && g.readGeneration
+	}
+	if !readGeneration {
+		for g := head; g != nil; g = g.base {
+			g.readGeneration = false
+		}
+	}
+
+	var settings *bloom.Settings
+	for g := head; g != nil; g = g.base {
+		if g.bloomSettings == nil {
+			continue
+		}
+		if settings == nil {
+			settings = g.bloomSettings
+			continue
+		}
+		if settings.HashVersion != g.bloomSettings.HashVersion ||
+			settings.NumHashes != g.bloomSettings.NumHashes ||
+			settings.BitsPerEntry != g.bloomSettings.BitsPerEntry {
+			g.chunkBloomIndex = nil
+			g.chunkBloomData = nil
+			g.bloomSettings = nil
+		}
+	}
+}
+
+func (repo *Repository) loadCommitGraphFile(path string) (*commitGraph, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, ErrNotFound
+		}
+		return nil, err
+	}
+	stat, err := f.Stat()
+	if err != nil {
+		_ = f.Close()
+		return nil, err
+	}
+	if stat.Size() < int64(commitGraphHeaderSize+commitGraphFanoutSize) {
+		_ = f.Close()
+		return nil, ErrInvalidObject
+	}
+	region, err := syscall.Mmap(
+		int(f.Fd()),
+		0,
+		int(stat.Size()),
+		syscall.PROT_READ,
+		syscall.MAP_PRIVATE,
+	)
+	if err != nil {
+		_ = f.Close()
+		return nil, err
+	}
+	err = f.Close()
+	if err != nil {
+		_ = syscall.Munmap(region)
+		return nil, err
+	}
+	cg, err := parseCommitGraph(repo, path, region)
+	if err != nil {
+		_ = syscall.Munmap(region)
+		return nil, err
+	}
+	cg.data = region
+	return cg, nil
+}
+
+func parseCommitGraph(repo *Repository, filename string, data []byte) (*commitGraph, error) {
+	if len(data) < commitGraphHeaderSize {
+		return nil, ErrInvalidObject
+	}
+	sig := binary.BigEndian.Uint32(data[0:4])
+	if sig != commitGraphSignature {
+		return nil, ErrInvalidObject
+	}
+	version := data[4]
+	if version != commitGraphVersion {
+		return nil, ErrInvalidObject
+	}
+	hashVersion := data[5]
+	if hashVersion != hashAlgoVersion(repo.hashAlgo) {
+		return nil, ErrInvalidObject
+	}
+	numChunks := data[6]
+	numBase := data[7]
+
+	tocStart := commitGraphHeaderSize
+	tocLen := (int(numChunks) + 1) * commitGraphChunkEntrySize
+	if len(data) < tocStart+tocLen+repo.hashAlgo.Size() {
+		return nil, ErrInvalidObject
+	}
+
+	trailerStart := len(data) - repo.hashAlgo.Size()
+
+	type chunkEntry struct {
+		id     uint32
+		offset uint64
+	}
+	entries := make([]chunkEntry, 0, numChunks+1)
+	for i := 0; i < int(numChunks)+1; i++ {
+		pos := tocStart + i*commitGraphChunkEntrySize
+		id := binary.BigEndian.Uint32(data[pos : pos+4])
+		offset := binary.BigEndian.Uint64(data[pos+4 : pos+12])
+		entries = append(entries, chunkEntry{id: id, offset: offset})
+	}
+
+	chunks := map[uint32][]byte{}
+	for i := 0; i < len(entries)-1; i++ {
+		id := entries[i].id
+		if id == 0 {
+			break
+		}
+		start := int(entries[i].offset)
+		end := int(entries[i+1].offset)
+		if start < tocStart+tocLen || end < start || end > trailerStart {
+			return nil, ErrInvalidObject
+		}
+		chunks[id] = data[start:end]
+	}
+
+	cg := &commitGraph{
+		repo:      repo,
+		filename:  filename,
+		dataLen:   len(data),
+		hashAlgo:  repo.hashAlgo,
+		numChunks: numChunks,
+		numBase:   numBase,
+	}
+
+	oidf := chunks[commitGraphChunkOIDF]
+	if len(oidf) != commitGraphFanoutSize {
+		return nil, ErrInvalidObject
+	}
+	cg.chunkOIDFanout = oidf
+	cg.numCommits = binary.BigEndian.Uint32(oidf[len(oidf)-4:])
+	for i := 0; i < 255; i++ {
+		if binary.BigEndian.Uint32(oidf[i*4:(i+1)*4]) >
+			binary.BigEndian.Uint32(oidf[(i+1)*4:(i+2)*4]) {
+			return nil, ErrInvalidObject
+		}
+	}
+
+	oidl := chunks[commitGraphChunkOIDL]
+	if len(oidl) != int(cg.numCommits)*repo.hashAlgo.Size() {
+		return nil, ErrInvalidObject
+	}
+	cg.chunkOIDLookup = oidl
+
+	cdat := chunks[commitGraphChunkCDAT]
+	if len(cdat) != int(cg.numCommits)*commitGraphCommitDataSize(repo.hashAlgo) {
+		return nil, ErrInvalidObject
+	}
+	cg.chunkCommit = cdat
+
+	if gda2 := chunks[commitGraphChunkGDA2]; len(gda2) != 0 {
+		if len(gda2) != int(cg.numCommits)*4 {
+			return nil, ErrInvalidObject
+		}
+		cg.chunkGeneration = gda2
+		cg.readGeneration = true
+	}
+	if gdo2 := chunks[commitGraphChunkGDO2]; len(gdo2) != 0 {
+		cg.chunkGenerationOv = gdo2
+	}
+
+	if edge := chunks[commitGraphChunkEDGE]; len(edge) != 0 {
+		if len(edge)%4 != 0 {
+			return nil, ErrInvalidObject
+		}
+		cg.chunkExtraEdges = edge
+	}
+
+	if base := chunks[commitGraphChunkBASE]; len(base) != 0 {
+		if numBase == 0 {
+			return nil, ErrInvalidObject
+		}
+		if len(base) != int(numBase)*repo.hashAlgo.Size() {
+			return nil, ErrInvalidObject
+		}
+		cg.chunkBaseGraphs = base
+	} else if numBase != 0 {
+		return nil, ErrInvalidObject
+	}
+
+	if bidx := chunks[commitGraphChunkBIDX]; len(bidx) != 0 {
+		if len(bidx) != int(cg.numCommits)*4 {
+			return nil, ErrInvalidObject
+		}
+		cg.chunkBloomIndex = bidx
+	}
+	if bdat := chunks[commitGraphChunkBDAT]; len(bdat) != 0 {
+		if len(bdat) < bloom.DataHeaderSize {
+			return nil, ErrInvalidObject
+		}
+		cg.chunkBloomData = bdat
+		settings, err := bloom.ParseSettings(bdat)
+		if err != nil {
+			return nil, err
+		}
+		cg.bloomSettings = settings
+	}
+
+	if (cg.chunkBloomIndex != nil) != (cg.chunkBloomData != nil) {
+		cg.chunkBloomIndex = nil
+		cg.chunkBloomData = nil
+		cg.bloomSettings = nil
+	}
+	if cg.chunkBloomIndex != nil && cg.chunkBloomData != nil {
+		for i := 0; i < int(cg.numCommits); i++ {
+			cur := binary.BigEndian.Uint32(cg.chunkBloomIndex[i*4 : i*4+4])
+			if i > 0 {
+				prev := binary.BigEndian.Uint32(cg.chunkBloomIndex[(i-1)*4 : i*4])
+				if cur < prev {
+					return nil, ErrInvalidObject
+				}
+			}
+			if int(cur) > len(cg.chunkBloomData)-bloom.DataHeaderSize {
+				return nil, ErrInvalidObject
+			}
+		}
+	}
+
+	if err := verifyCommitGraphTrailer(repo, data); err != nil {
+		return nil, err
+	}
+
+	return cg, nil
+}
+
+func commitGraphCommitDataSize(algo hashAlgorithm) int {
+	return algo.Size() + 16
+}
+
+func hashAlgoVersion(algo hashAlgorithm) byte {
+	switch algo {
+	case hashAlgoSHA1:
+		return 1
+	case hashAlgoSHA256:
+		return 2
+	default:
+		return 0
+	}
+}
+
+func verifyCommitGraphTrailer(repo *Repository, data []byte) error {
+	if repo == nil {
+		return ErrInvalidObject
+	}
+	hashSize := repo.hashAlgo.Size()
+	if len(data) < hashSize {
+		return ErrInvalidObject
+	}
+	want := data[len(data)-hashSize:]
+	h, err := repo.hashAlgo.New()
+	if err != nil {
+		return err
+	}
+	_, _ = h.Write(data[:len(data)-hashSize])
+	got := h.Sum(nil)
+	if !bytes.Equal(got, want) {
+		return ErrInvalidObject
+	}
+	return nil
+}
+
+func hexEncode(b []byte) string {
+	const hex = "0123456789abcdef"
+	out := make([]byte, len(b)*2)
+	for i, v := range b {
+		out[i*2] = hex[v>>4]
+		out[i*2+1] = hex[v&0x0f]
+	}
+	return string(out)
+}
+
+func (cg *commitGraph) lookupOID(id Hash) (uint32, bool) {
+	if cg == nil {
+		return 0, false
+	}
+	return commitGraphBsearchOID(cg, id)
+}
+
+func commitGraphBsearchOID(cg *commitGraph, id Hash) (uint32, bool) {
+	if cg == nil || len(cg.chunkOIDLookup) == 0 {
+		return 0, false
+	}
+	hashSize := cg.hashAlgo.Size()
+	first := int(id.data[0])
+	fanout := cg.chunkOIDFanout
+	var start uint32
+	if first > 0 {
+		start = binary.BigEndian.Uint32(fanout[(first-1)*4 : first*4])
+	}
+	end := binary.BigEndian.Uint32(fanout[first*4 : (first+1)*4])
+	lo := int(start)
+	hi := int(end) - 1
+	for lo <= hi {
+		mid := lo + (hi-lo)/2
+		pos := mid * hashSize
+		cmp := bytes.Compare(cg.chunkOIDLookup[pos:pos+hashSize], id.data[:hashSize])
+		if cmp == 0 {
+			return uint32(mid) + cg.numCommitsInBase, true
+		}
+		if cmp < 0 {
+			lo = mid + 1
+		} else {
+			hi = mid - 1
+		}
+	}
+	if cg.base != nil {
+		return commitGraphBsearchOID(cg.base, id)
+	}
+	return 0, false
+}
+
+func (cg *commitGraph) loadOID(pos uint32) (Hash, error) {
+	g := cg
+	for g != nil && pos < g.numCommitsInBase {
+		g = g.base
+	}
+	if g == nil {
+		return Hash{}, ErrInvalidObject
+	}
+	if pos >= g.numCommits+g.numCommitsInBase {
+		return Hash{}, ErrInvalidObject
+	}
+	lex := pos - g.numCommitsInBase
+	hashSize := g.hashAlgo.Size()
+	start := int(lex) * hashSize
+	end := start + hashSize
+	var out Hash
+	copy(out.data[:], g.chunkOIDLookup[start:end])
+	out.algo = g.hashAlgo
+	return out, nil
+}
+
+func (cg *commitGraph) commitDataAt(pos uint32) (*commitGraphCommit, error) {
+	g := cg
+	for g != nil && pos < g.numCommitsInBase {
+		g = g.base
+	}
+	if g == nil {
+		return nil, ErrInvalidObject
+	}
+	if pos >= g.numCommits+g.numCommitsInBase {
+		return nil, ErrInvalidObject
+	}
+	lex := pos - g.numCommitsInBase
+	stride := commitGraphCommitDataSize(g.hashAlgo)
+	start := int(lex) * stride
+	end := start + stride
+	if end > len(g.chunkCommit) {
+		return nil, ErrInvalidObject
+	}
+	buf := g.chunkCommit[start:end]
+
+	var tree Hash
+	hashSize := g.hashAlgo.Size()
+	copy(tree.data[:], buf[:hashSize])
+	tree.algo = g.hashAlgo
+
+	p1 := binary.BigEndian.Uint32(buf[hashSize : hashSize+4])
+	p2 := binary.BigEndian.Uint32(buf[hashSize+4 : hashSize+8])
+
+	genTime := binary.BigEndian.Uint32(buf[hashSize+8 : hashSize+12])
+	timeLow := binary.BigEndian.Uint32(buf[hashSize+12 : hashSize+16])
+
+	timeHigh := genTime & 0x3
+	commitTime := (uint64(timeHigh) << 32) | uint64(timeLow)
+	generation := uint64(genTime >> 2)
+
+	if g.readGeneration {
+		genOffset := binary.BigEndian.Uint32(g.chunkGeneration[int(lex)*4:])
+		if genOffset&correctedGenerationOverflow != 0 {
+			off := genOffset ^ correctedGenerationOverflow
+			idx := int(off) * 8
+			if g.chunkGenerationOv == nil || idx+8 > len(g.chunkGenerationOv) {
+				return nil, ErrInvalidObject
+			}
+			offset := binary.BigEndian.Uint64(g.chunkGenerationOv[idx : idx+8])
+			generation = commitTime + offset
+		} else {
+			generation = commitTime + uint64(genOffset)
+		}
+	}
+
+	var parents []uint32
+	var err error
+	parents, err = commitGraphAppendParent(parents, g, p1)
+	if err != nil {
+		return nil, err
+	}
+	parents, err = commitGraphAppendParent(parents, g, p2)
+	if err != nil {
+		return nil, err
+	}
+
+	return &commitGraphCommit{
+		Position:    pos,
+		Tree:        tree,
+		Parents:     parents,
+		CommitTime:  commitTime,
+		Generation:  generation,
+		Graph:       g,
+		graphLexPos: lex,
+	}, nil
+}
--- /dev/null
+++ b/commitgraph_read_access.go
@@ -1,0 +1,124 @@
+package furgit
+
+import (
+	"encoding/binary"
+
+	"codeberg.org/lindenii/furgit/internal/bloom"
+)
+
+const (
+	commitGraphParentNone      = 0x70000000
+	commitGraphParentExtraMask = 0x80000000
+	commitGraphParentLastMask  = 0x7fffffff
+
+	correctedGenerationOverflow = 0x80000000
+)
+
+type commitGraphCommit struct {
+	Position    uint32
+	Tree        Hash
+	Parents     []uint32
+	CommitTime  uint64
+	Generation  uint64
+	Graph       *commitGraph
+	graphLexPos uint32
+}
+
+func commitGraphAppendParent(parents []uint32, g *commitGraph, edge uint32) ([]uint32, error) {
+	if edge == commitGraphParentNone {
+		return parents, nil
+	}
+	if (edge & commitGraphParentExtraMask) == 0 {
+		return append(parents, edge), nil
+	}
+	if g.chunkExtraEdges == nil {
+		return nil, ErrInvalidObject
+	}
+	idx := edge & commitGraphParentLastMask
+	for {
+		off := int(idx) * 4
+		if off+4 > len(g.chunkExtraEdges) {
+			return nil, ErrInvalidObject
+		}
+		val := binary.BigEndian.Uint32(g.chunkExtraEdges[off : off+4])
+		parents = append(parents, val&commitGraphParentLastMask)
+		idx++
+		if (val & commitGraphParentExtraMask) != 0 {
+			break
+		}
+	}
+	return parents, nil
+}
+
+func (cg *commitGraph) CommitPosition(id Hash) (uint32, bool) {
+	return cg.lookupOID(id)
+}
+
+func (cg *commitGraph) CommitAt(pos uint32) (*commitGraphCommit, error) {
+	return cg.commitDataAt(pos)
+}
+
+func (cg *commitGraph) OIDAt(pos uint32) (Hash, error) {
+	return cg.loadOID(pos)
+}
+
+func (c *commitGraphCommit) HasBloom() bool {
+	return c != nil && c.Graph != nil && c.Graph.chunkBloomIndex != nil && c.Graph.chunkBloomData != nil
+}
+
+func (cg *commitGraph) BloomSettings() *bloom.Settings {
+	if cg == nil {
+		return nil
+	}
+	return cg.bloomSettings
+}
+
+func (cg *commitGraph) BloomFilter(pos uint32) (*bloom.Filter, bool) {
+	g := cg
+	for g != nil && pos < g.numCommitsInBase {
+		g = g.base
+	}
+	if g == nil || g.chunkBloomIndex == nil || g.chunkBloomData == nil || g.bloomSettings == nil {
+		return nil, false
+	}
+	if pos >= g.numCommits+g.numCommitsInBase {
+		return nil, false
+	}
+	lex := pos - g.numCommitsInBase
+	if int(lex)*4+4 > len(g.chunkBloomIndex) {
+		return nil, false
+	}
+	end := binary.BigEndian.Uint32(g.chunkBloomIndex[int(lex)*4:])
+	var start uint32
+	if lex > 0 {
+		start = binary.BigEndian.Uint32(g.chunkBloomIndex[int(lex-1)*4:])
+	}
+	if end < start {
+		return nil, false
+	}
+	if int(end) > len(g.chunkBloomData)-bloom.DataHeaderSize ||
+		int(start) > len(g.chunkBloomData)-bloom.DataHeaderSize {
+		return nil, false
+	}
+	data := g.chunkBloomData[bloom.DataHeaderSize+start : bloom.DataHeaderSize+end]
+	return &bloom.Filter{Data: data, Version: g.bloomSettings.HashVersion}, true
+}
+
+func (c *commitGraphCommit) BloomFilter() (*bloom.Filter, *bloom.Settings, bool) {
+	if c == nil || c.Graph == nil {
+		return nil, nil, false
+	}
+	filter, ok := c.Graph.BloomFilter(c.Position)
+	if !ok {
+		return nil, nil, false
+	}
+	return filter, c.Graph.bloomSettings, true
+}
+
+func (c *commitGraphCommit) ChangedPathMightContain(path []byte) bool {
+	filter, settings, ok := c.BloomFilter()
+	if !ok {
+		return false
+	}
+	return filter.MightContain(path, settings)
+}
--- /dev/null
+++ b/commitgraph_read_test.go
@@ -1,0 +1,558 @@
+package furgit
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"os"
+	"path/filepath"
+	"sort"
+	"strconv"
+	"strings"
+	"testing"
+
+	"codeberg.org/lindenii/furgit/internal/bloom"
+)
+
+func TestCommitGraphReadBasic(t *testing.T) {
+	repoPath, cleanup := setupTestRepo(t)
+	defer cleanup()
+
+	workDir, cleanupWork := setupWorkDir(t)
+	defer cleanupWork()
+
+	files := []string{"a.txt", "b.txt", "c.txt"}
+	for i, name := range files {
+		path := filepath.Join(workDir, name)
+		content := fmt.Sprintf("content-%d\n", i)
+		if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+			t.Fatalf("write %s: %v", name, err)
+		}
+		gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+		gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", fmt.Sprintf("commit %d", i))
+	}
+
+	gitCmd(t, repoPath, "repack", "-a", "-d")
+	gitCmd(t, repoPath, "commit-graph", "write", "--reachable", "--changed-paths")
+
+	repo, err := OpenRepository(repoPath)
+	if err != nil {
+		t.Fatalf("OpenRepository failed: %v", err)
+	}
+	defer func() { _ = repo.Close() }()
+
+	cg, err := repo.CommitGraph()
+	if err != nil {
+		t.Fatalf("CommitGraph failed: %v", err)
+	}
+
+	revList := gitCmd(t, repoPath, "rev-list", "--all")
+	commits := strings.Fields(revList)
+	if len(commits) == 0 {
+		t.Fatal("no commits found")
+	}
+
+	falsePositives := 0
+	for _, oidStr := range commits {
+		id, err := repo.ParseHash(oidStr)
+		if err != nil {
+			t.Fatalf("ParseHash failed: %v", err)
+		}
+		pos, ok := cg.CommitPosition(id)
+		if !ok {
+			t.Fatalf("commit %s not found in commit-graph", oidStr)
+		}
+		cc, err := cg.CommitAt(pos)
+		if err != nil {
+			t.Fatalf("CommitAt failed: %v", err)
+		}
+
+		treeStr := string(gitCmd(t, repoPath, "show", "-s", "--format=%T", oidStr))
+		tree, err := repo.ParseHash(treeStr)
+		if err != nil {
+			t.Fatalf("ParseHash tree failed: %v", err)
+		}
+		if cc.Tree != tree {
+			t.Fatalf("tree mismatch for %s: got %s want %s", oidStr, cc.Tree.String(), tree.String())
+		}
+
+		parentStr := string(gitCmd(t, repoPath, "show", "-s", "--format=%P", oidStr))
+		parents := strings.Fields(parentStr)
+		if len(parents) != len(cc.Parents) {
+			t.Fatalf("parent count mismatch for %s: got %d want %d", oidStr, len(cc.Parents), len(parents))
+		}
+		for i, p := range parents {
+			pid, err := repo.ParseHash(p)
+			if err != nil {
+				t.Fatalf("ParseHash parent failed: %v", err)
+			}
+			got, err := cg.OIDAt(cc.Parents[i])
+			if err != nil {
+				t.Fatalf("OIDAt parent failed: %v", err)
+			}
+			if got != pid {
+				t.Fatalf("parent mismatch for %s: got %s want %s", oidStr, got.String(), pid.String())
+			}
+		}
+
+		ctStr := gitCmd(t, repoPath, "show", "-s", "--format=%ct", oidStr)
+		ct, err := strconv.ParseInt(ctStr, 10, 64)
+		if err != nil {
+			t.Fatalf("parse commit time: %v", err)
+		}
+		if cc.CommitTime != uint64(ct) {
+			t.Fatalf("commit time mismatch for %s: got %d want %d", oidStr, cc.CommitTime, ct)
+		}
+
+		changed := gitCmd(t, repoPath, "diff-tree", "--no-commit-id", "--name-only", "-r", "--root", oidStr)
+		for _, path := range strings.Fields(changed) {
+			if path == "" {
+				continue
+			}
+			if !cc.ChangedPathMightContain([]byte(path)) {
+				t.Fatalf("bloom filter missing changed path %q in %s", path, oidStr)
+			}
+		}
+		if cc.ChangedPathMightContain([]byte("this-path-should-not-exist-abcdefg")) {
+			falsePositives++
+		}
+	}
+	if falsePositives > 1 {
+		t.Fatalf("unexpected bloom false positive rate: %d", falsePositives)
+	}
+}
+
+func TestCommitGraphReadExtraEdges(t *testing.T) {
+	workDir, cleanupWork := setupWorkDir(t)
+	defer cleanupWork()
+
+	gitCmd(t, workDir, "init")
+
+	if err := os.WriteFile(filepath.Join(workDir, "base.txt"), []byte("base\n"), 0o644); err != nil {
+		t.Fatalf("write base: %v", err)
+	}
+	gitCmd(t, workDir, "add", ".")
+	gitCmd(t, workDir, "commit", "-m", "base")
+
+	gitCmd(t, workDir, "checkout", "-b", "branch1")
+	if err := os.WriteFile(filepath.Join(workDir, "b1.txt"), []byte("b1\n"), 0o644); err != nil {
+		t.Fatalf("write b1: %v", err)
+	}
+	gitCmd(t, workDir, "add", ".")
+	gitCmd(t, workDir, "commit", "-m", "b1")
+
+	gitCmd(t, workDir, "checkout", "master")
+	gitCmd(t, workDir, "checkout", "-b", "branch2")
+	if err := os.WriteFile(filepath.Join(workDir, "b2.txt"), []byte("b2\n"), 0o644); err != nil {
+		t.Fatalf("write b2: %v", err)
+	}
+	gitCmd(t, workDir, "add", ".")
+	gitCmd(t, workDir, "commit", "-m", "b2")
+
+	gitCmd(t, workDir, "checkout", "master")
+	gitCmd(t, workDir, "checkout", "-b", "branch3")
+	if err := os.WriteFile(filepath.Join(workDir, "b3.txt"), []byte("b3\n"), 0o644); err != nil {
+		t.Fatalf("write b3: %v", err)
+	}
+	gitCmd(t, workDir, "add", ".")
+	gitCmd(t, workDir, "commit", "-m", "b3")
+
+	gitCmd(t, workDir, "checkout", "master")
+	gitCmd(t, workDir, "merge", "--no-ff", "-m", "octopus", "branch1", "branch2", "branch3")
+
+	gitCmd(t, workDir, "repack", "-a", "-d")
+	gitCmd(t, workDir, "commit-graph", "write", "--reachable")
+
+	repo, err := OpenRepository(filepath.Join(workDir, ".git"))
+	if err != nil {
+		t.Fatalf("OpenRepository failed: %v", err)
+	}
+	defer func() { _ = repo.Close() }()
+
+	cg, err := repo.CommitGraph()
+	if err != nil {
+		t.Fatalf("CommitGraph failed: %v", err)
+	}
+
+	mergeHash := gitCmd(t, workDir, "rev-parse", "HEAD")
+	id, _ := repo.ParseHash(mergeHash)
+	pos, ok := cg.CommitPosition(id)
+	if !ok {
+		t.Fatalf("merge commit not found")
+	}
+	cc, err := cg.CommitAt(pos)
+	if err != nil {
+		t.Fatalf("CommitAt failed: %v", err)
+	}
+	parentStr := gitCmd(t, workDir, "show", "-s", "--format=%P", mergeHash)
+	parents := strings.Fields(parentStr)
+	if len(cc.Parents) != len(parents) {
+		t.Fatalf("octopus parent mismatch: got %d want %d", len(cc.Parents), len(parents))
+	}
+}
+
+func TestCommitGraphReadSplitChain(t *testing.T) {
+	repoPath, cleanup := setupTestRepo(t)
+	defer cleanup()
+
+	workDir, cleanupWork := setupWorkDir(t)
+	defer cleanupWork()
+
+	for i := 0; i < 5; i++ {
+		path := filepath.Join(workDir, fmt.Sprintf("file%d.txt", i))
+		if err := os.WriteFile(path, []byte(fmt.Sprintf("v%d\n", i)), 0o644); err != nil {
+			t.Fatalf("write %s: %v", path, err)
+		}
+		gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+		gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", fmt.Sprintf("commit %d", i))
+	}
+
+	gitCmd(t, repoPath, "repack", "-a", "-d")
+	gitCmd(t, repoPath, "commit-graph", "write", "--reachable", "--changed-paths", "--split=no-merge")
+
+	// more commits needed to get a split chain with base layer
+	for i := 5; i < 8; i++ {
+		path := filepath.Join(workDir, fmt.Sprintf("file%d.txt", i))
+		if err := os.WriteFile(path, []byte(fmt.Sprintf("v%d\n", i)), 0o644); err != nil {
+			t.Fatalf("write %s: %v", path, err)
+		}
+		gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+		gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", fmt.Sprintf("commit %d", i))
+	}
+	gitCmd(t, repoPath, "repack", "-a", "-d")
+	gitCmd(t, repoPath, "commit-graph", "write", "--reachable", "--changed-paths", "--split=no-merge", "--append")
+
+	repo, err := OpenRepository(repoPath)
+	if err != nil {
+		t.Fatalf("OpenRepository failed: %v", err)
+	}
+	defer func() { _ = repo.Close() }()
+
+	cg, err := repo.CommitGraph()
+	if err != nil {
+		t.Fatalf("CommitGraph failed: %v", err)
+	}
+	chainPath := filepath.Join(repoPath, "objects", "info", "commit-graphs", "commit-graph-chain")
+	if _, err := os.Stat(chainPath); err != nil {
+		t.Fatalf("commit-graph chain not created by git: %v", err)
+	}
+	if cg.base == nil {
+		t.Fatalf("expected split commit-graph chain")
+	}
+	if cg.numCommitsInBase == 0 {
+		t.Fatalf("expected non-zero base commit count")
+	}
+	revList := gitCmd(t, repoPath, "rev-list", "--max-parents=0", "HEAD")
+	id, _ := repo.ParseHash(revList)
+	if _, ok := cg.CommitPosition(id); !ok {
+		t.Fatalf("base commit not found in commit-graph chain")
+	}
+}
+
+func TestCommitGraphTrailerChecksum(t *testing.T) {
+	repoPath, cleanup := setupTestRepo(t)
+	defer cleanup()
+	repo, err := OpenRepository(repoPath)
+	if err != nil {
+		t.Fatalf("OpenRepository failed: %v", err)
+	}
+	defer func() { _ = repo.Close() }()
+
+	data := buildCommitGraphBytes(t, repo, 1, nil, nil, nil)
+	data[len(data)-1] ^= 0xff
+	if _, err := parseCommitGraph(repo, "corrupt.graph", data); err == nil {
+		t.Fatalf("expected checksum error")
+	}
+}
+
+func TestCommitGraphFanoutMonotonicity(t *testing.T) {
+	repoPath, cleanup := setupTestRepo(t)
+	defer cleanup()
+	repo, err := OpenRepository(repoPath)
+	if err != nil {
+		t.Fatalf("OpenRepository failed: %v", err)
+	}
+	defer func() { _ = repo.Close() }()
+
+	oids := [][]byte{
+		bytes.Repeat([]byte{0x10}, repo.hashAlgo.Size()),
+		bytes.Repeat([]byte{0x20}, repo.hashAlgo.Size()),
+	}
+	data := buildCommitGraphBytes(t, repo, 2, oids, nil, nil)
+	start, end, ok := chunkRange(t, repo, data, commitGraphChunkOIDF)
+	if !ok {
+		t.Fatalf("OIDF chunk not found")
+	}
+	fanout := make([]byte, end-start)
+	copy(fanout, data[start:end])
+	binary.BigEndian.PutUint32(fanout[0:4], 2)
+	binary.BigEndian.PutUint32(fanout[4:8], 1)
+	data = replaceChunk(t, repo, data, commitGraphChunkOIDF, fanout)
+	if _, err := parseCommitGraph(repo, "bad-fanout.graph", data); err == nil {
+		t.Fatalf("expected fanout monotonicity error")
+	}
+}
+
+func TestCommitGraphBIDXMonotonicity(t *testing.T) {
+	repoPath, cleanup := setupTestRepo(t)
+	defer cleanup()
+	repo, err := OpenRepository(repoPath)
+	if err != nil {
+		t.Fatalf("OpenRepository failed: %v", err)
+	}
+	defer func() { _ = repo.Close() }()
+
+	oids := [][]byte{
+		bytes.Repeat([]byte{0x10}, repo.hashAlgo.Size()),
+		bytes.Repeat([]byte{0x20}, repo.hashAlgo.Size()),
+	}
+	bidx := []uint32{8, 4}
+	data := buildCommitGraphBytes(t, repo, 2, oids, bidx, []byte{0, 0, 0, 0, 0, 0, 0, 0})
+	if _, err := parseCommitGraph(repo, "bad-bidx.graph", data); err == nil {
+		t.Fatalf("expected bidx monotonicity error")
+	}
+}
+
+func TestCommitGraphExtraEdgesBounds(t *testing.T) {
+	repoPath, cleanup := setupTestRepo(t)
+	defer cleanup()
+	repo, err := OpenRepository(repoPath)
+	if err != nil {
+		t.Fatalf("OpenRepository failed: %v", err)
+	}
+	defer func() { _ = repo.Close() }()
+
+	oids := [][]byte{
+		bytes.Repeat([]byte{0x10}, repo.hashAlgo.Size()),
+		bytes.Repeat([]byte{0x20}, repo.hashAlgo.Size()),
+		bytes.Repeat([]byte{0x30}, repo.hashAlgo.Size()),
+	}
+	extraEdges := []uint32{0x80000000 | 100}
+	data := buildCommitGraphBytes(t, repo, 3, oids, nil, nil)
+
+	// inject EDGE chunk with an out-of-range index
+	data = injectChunk(t, repo, data, commitGraphChunkEDGE, u32SliceToBytes(extraEdges))
+
+	// force the first commit to use the extra edges list
+	start, end, ok := chunkRange(t, repo, data, commitGraphChunkCDAT)
+	if !ok {
+		t.Fatalf("CDAT chunk not found")
+	}
+	cdat := make([]byte, end-start)
+	copy(cdat, data[start:end])
+	hashSize := repo.hashAlgo.Size()
+	binary.BigEndian.PutUint32(cdat[hashSize+4:hashSize+8], 0x80000000|100)
+	data = replaceChunk(t, repo, data, commitGraphChunkCDAT, cdat)
+
+	cg, err := parseCommitGraph(repo, "edge.graph", data)
+	if err != nil {
+		t.Fatalf("parseCommitGraph failed: %v", err)
+	}
+	pos, ok := cg.CommitPosition(hashFromBytes(repo, oids[0]))
+	if !ok {
+		t.Fatalf("commit not found")
+	}
+	if _, err := cg.CommitAt(pos); err == nil {
+		t.Fatalf("expected extra-edges bounds error")
+	}
+}
+
+func buildCommitGraphBytes(t *testing.T, repo *Repository, n int, oids [][]byte, bidx []uint32, bdat []byte) []byte {
+	t.Helper()
+	if n <= 0 {
+		t.Fatalf("invalid num commits")
+	}
+	hashSize := repo.hashAlgo.Size()
+	if len(oids) == 0 {
+		for i := 0; i < n; i++ {
+			b := bytes.Repeat([]byte{byte(0x10 + i)}, hashSize)
+			oids = append(oids, b)
+		}
+	}
+	sort.Slice(oids, func(i, j int) bool {
+		return bytes.Compare(oids[i], oids[j]) < 0
+	})
+
+	var chunks []chunkSpec
+	oidf := make([]byte, commitGraphFanoutSize)
+	for _, oid := range oids {
+		idx := int(oid[0])
+		for i := idx; i < 256; i++ {
+			v := binary.BigEndian.Uint32(oidf[i*4 : i*4+4])
+			binary.BigEndian.PutUint32(oidf[i*4:i*4+4], v+1)
+		}
+	}
+	chunks = append(chunks, chunkSpec{commitGraphChunkOIDF, oidf})
+
+	oidl := make([]byte, 0, n*hashSize)
+	for _, oid := range oids {
+		oidl = append(oidl, oid...)
+	}
+	chunks = append(chunks, chunkSpec{commitGraphChunkOIDL, oidl})
+
+	cdat := make([]byte, n*commitGraphCommitDataSize(repo.hashAlgo))
+	for i := 0; i < n; i++ {
+		base := i * commitGraphCommitDataSize(repo.hashAlgo)
+		copy(cdat[base:base+hashSize], bytes.Repeat([]byte{0xaa}, hashSize))
+		binary.BigEndian.PutUint32(cdat[base+hashSize:], commitGraphParentNone)
+		binary.BigEndian.PutUint32(cdat[base+hashSize+4:], commitGraphParentNone)
+		binary.BigEndian.PutUint32(cdat[base+hashSize+8:], 4)
+		binary.BigEndian.PutUint32(cdat[base+hashSize+12:], 0)
+	}
+	chunks = append(chunks, chunkSpec{commitGraphChunkCDAT, cdat})
+
+	if bidx != nil && bdat != nil {
+		bidxBytes := u32SliceToBytes(bidx)
+		bdatHeader := make([]byte, bloom.DataHeaderSize)
+		binary.BigEndian.PutUint32(bdatHeader[0:4], 2)
+		binary.BigEndian.PutUint32(bdatHeader[4:8], 7)
+		binary.BigEndian.PutUint32(bdatHeader[8:12], 10)
+		bdatFull := append(bdatHeader, bdat...)
+		chunks = append(chunks, chunkSpec{commitGraphChunkBIDX, bidxBytes})
+		chunks = append(chunks, chunkSpec{commitGraphChunkBDAT, bdatFull})
+	}
+
+	return assembleCommitGraphBytes(t, repo, chunks)
+}
+
+type chunkSpec struct {
+	id   uint32
+	data []byte
+}
+
+func assembleCommitGraphBytes(t *testing.T, repo *Repository, chunks []chunkSpec) []byte {
+	t.Helper()
+	var buf bytes.Buffer
+	buf.Grow(1024)
+	var header [commitGraphHeaderSize]byte
+	binary.BigEndian.PutUint32(header[0:4], commitGraphSignature)
+	header[4] = commitGraphVersion
+	header[5] = hashAlgoVersion(repo.hashAlgo)
+	header[6] = byte(len(chunks))
+	header[7] = 0
+	buf.Write(header[:])
+
+	tocStart := buf.Len()
+	tocLen := (len(chunks) + 1) * commitGraphChunkEntrySize
+	buf.Grow(tocLen)
+	buf.Write(make([]byte, tocLen))
+
+	offset := tocStart + tocLen
+	for i, ch := range chunks {
+		pos := tocStart + i*commitGraphChunkEntrySize
+		binary.BigEndian.PutUint32(buf.Bytes()[pos:pos+4], ch.id)
+		binary.BigEndian.PutUint64(buf.Bytes()[pos+4:pos+12], uint64(offset))
+		offset += len(ch.data)
+	}
+	pos := tocStart + len(chunks)*commitGraphChunkEntrySize
+	binary.BigEndian.PutUint32(buf.Bytes()[pos:pos+4], 0)
+	binary.BigEndian.PutUint64(buf.Bytes()[pos+4:pos+12], uint64(offset))
+
+	for _, ch := range chunks {
+		buf.Write(ch.data)
+	}
+
+	h, err := repo.hashAlgo.New()
+	if err != nil {
+		t.Fatalf("hash.New failed: %v", err)
+	}
+	_, _ = h.Write(buf.Bytes())
+	buf.Write(h.Sum(nil))
+	return buf.Bytes()
+}
+
+func chunkRange(t *testing.T, repo *Repository, data []byte, id uint32) (int, int, bool) {
+	t.Helper()
+	hashSize := repo.hashAlgo.Size()
+	numChunks := int(data[6])
+	tocStart := commitGraphHeaderSize
+	trailerStart := len(data) - hashSize
+
+	for i := 0; i < numChunks; i++ {
+		pos := tocStart + i*commitGraphChunkEntrySize
+		chID := binary.BigEndian.Uint32(data[pos : pos+4])
+		if chID == 0 {
+			break
+		}
+		start := int(binary.BigEndian.Uint64(data[pos+4 : pos+12]))
+		end := trailerStart
+		if i+1 < numChunks {
+			end = int(binary.BigEndian.Uint64(data[pos+commitGraphChunkEntrySize+4 : pos+commitGraphChunkEntrySize+12]))
+		}
+		if chID == id {
+			return start, end, true
+		}
+	}
+	return 0, 0, false
+}
+
+func replaceChunk(t *testing.T, repo *Repository, data []byte, id uint32, payload []byte) []byte {
+	t.Helper()
+	hashSize := repo.hashAlgo.Size()
+	numChunks := int(data[6])
+	tocStart := commitGraphHeaderSize
+	trailerStart := len(data) - hashSize
+
+	chunks := make([]chunkSpec, 0, numChunks)
+	for i := 0; i < numChunks; i++ {
+		pos := tocStart + i*commitGraphChunkEntrySize
+		chID := binary.BigEndian.Uint32(data[pos : pos+4])
+		if chID == 0 {
+			break
+		}
+		start := int(binary.BigEndian.Uint64(data[pos+4 : pos+12]))
+		end := trailerStart
+		if i+1 < numChunks {
+			end = int(binary.BigEndian.Uint64(data[pos+commitGraphChunkEntrySize+4 : pos+commitGraphChunkEntrySize+12]))
+		}
+		if chID == id {
+			chunks = append(chunks, chunkSpec{chID, payload})
+		} else {
+			chunks = append(chunks, chunkSpec{chID, data[start:end]})
+		}
+	}
+	if len(chunks) == 0 {
+		t.Fatalf("no chunks found")
+	}
+	return assembleCommitGraphBytes(t, repo, chunks)
+}
+
+func injectChunk(t *testing.T, repo *Repository, data []byte, id uint32, payload []byte) []byte {
+	t.Helper()
+	hashSize := repo.hashAlgo.Size()
+	numChunks := int(data[6])
+	tocStart := commitGraphHeaderSize
+	trailerStart := len(data) - hashSize
+
+	chunks := make([]chunkSpec, 0, numChunks+1)
+	for i := 0; i < numChunks; i++ {
+		pos := tocStart + i*commitGraphChunkEntrySize
+		chID := binary.BigEndian.Uint32(data[pos : pos+4])
+		if chID == 0 {
+			break
+		}
+		start := int(binary.BigEndian.Uint64(data[pos+4 : pos+12]))
+		end := trailerStart
+		if i+1 < numChunks {
+			end = int(binary.BigEndian.Uint64(data[pos+commitGraphChunkEntrySize+4 : pos+commitGraphChunkEntrySize+12]))
+		}
+		chunks = append(chunks, chunkSpec{chID, data[start:end]})
+	}
+	chunks = append(chunks, chunkSpec{id, payload})
+	return assembleCommitGraphBytes(t, repo, chunks)
+}
+
+func u32SliceToBytes(vals []uint32) []byte {
+	out := make([]byte, len(vals)*4)
+	for i, v := range vals {
+		binary.BigEndian.PutUint32(out[i*4:i*4+4], v)
+	}
+	return out
+}
+
+func hashFromBytes(repo *Repository, b []byte) Hash {
+	var h Hash
+	copy(h.data[:], b)
+	h.algo = repo.hashAlgo
+	return h
+}
--- a/repo.go
+++ b/repo.go
@@ -27,7 +27,12 @@
 
 	packFiles   map[string]*packFile
 	packFilesMu sync.RWMutex
-	closeOnce   sync.Once
+
+	commitGraphOnce sync.Once
+	commitGraph     *commitGraph
+	commitGraphErr  error
+
+	closeOnce sync.Once
 }
 
 // OpenRepository opens the repository at the provided path.
@@ -100,6 +105,12 @@
 				if err != nil && closeErr == nil {
 					closeErr = err
 				}
+			}
+		}
+		if repo.commitGraph != nil {
+			err := repo.commitGraph.Close()
+			if err != nil && closeErr == nil {
+				closeErr = err
 			}
 		}
 	})
--