ref: 1ea061c92ca6ad435c00bea458b8f24a5e1a822a
parent: dd59d664675ad7dfef033311d279c6f988b367d4
author: Runxi Yu <me@runxiyu.org>
date: Fri Jan 30 10:46:49 EST 2026
commitgraph: Add basic commit-graph implementation
--- /dev/null
+++ b/commitgraph_read.go
@@ -1,0 +1,600 @@
+package furgit
+
+import (
+ "bufio"
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
+ "syscall"
+
+ "codeberg.org/lindenii/furgit/internal/bloom"
+)
+
+const (
+ commitGraphSignature = 0x43475048 // "CGPH"
+ commitGraphVersion = 1
+)
+
+const (
+ commitGraphChunkOIDF = 0x4f494446 // "OIDF"
+ commitGraphChunkOIDL = 0x4f49444c // "OIDL"
+ commitGraphChunkCDAT = 0x43444154 // "CDAT"
+ commitGraphChunkGDA2 = 0x47444132 // "GDA2"
+ commitGraphChunkGDO2 = 0x47444f32 // "GDO2"
+ commitGraphChunkEDGE = 0x45444745 // "EDGE"
+ commitGraphChunkBIDX = 0x42494458 // "BIDX"
+ commitGraphChunkBDAT = 0x42444154 // "BDAT"
+ commitGraphChunkBASE = 0x42415345 // "BASE"
+)
+
+const (
+ commitGraphHeaderSize = 8
+ commitGraphChunkEntrySize = 12
+ commitGraphFanoutSize = 256 * 4
+)
+
+type commitGraph struct {+ repo *Repository
+
+ filename string
+ data []byte
+ dataLen int
+
+ numChunks uint8
+ numBase uint8
+ hashAlgo hashAlgorithm
+
+ numCommits uint32
+ numCommitsInBase uint32
+
+ base *commitGraph
+
+ chunkOIDFanout []byte
+ chunkOIDLookup []byte
+ chunkCommit []byte
+
+ chunkGeneration []byte
+ chunkGenerationOv []byte
+ readGeneration bool
+
+ chunkExtraEdges []byte
+
+ chunkBloomIndex []byte
+ chunkBloomData []byte
+
+ chunkBaseGraphs []byte
+
+ bloomSettings *bloom.Settings
+
+ closeOnce sync.Once
+}
+
+func (cg *commitGraph) Close() error {+ if cg == nil {+ return nil
+ }
+ var closeErr error
+ cg.closeOnce.Do(func() {+ if len(cg.data) > 0 {+ if err := syscall.Munmap(cg.data); closeErr == nil {+ closeErr = err
+ }
+ cg.data = nil
+ }
+ })
+ if cg.base != nil {+ _ = cg.base.Close()
+ }
+ return closeErr
+}
+
+func (repo *Repository) CommitGraph() (*commitGraph, error) {+ if repo == nil {+ return nil, ErrInvalidObject
+ }
+ repo.commitGraphOnce.Do(func() {+ repo.commitGraph, repo.commitGraphErr = repo.loadCommitGraph()
+ })
+ return repo.commitGraph, repo.commitGraphErr
+}
+
+func (repo *Repository) loadCommitGraph() (*commitGraph, error) {+ cg, err := repo.loadCommitGraphSingle()
+ if err == nil {+ return cg, nil
+ }
+ if !errors.Is(err, ErrNotFound) {+ return nil, err
+ }
+ return repo.loadCommitGraphChain()
+}
+
+func (repo *Repository) loadCommitGraphSingle() (*commitGraph, error) {+ path := repo.repoPath(filepath.Join("objects", "info", "commit-graph"))+ return repo.loadCommitGraphFile(path)
+}
+
+func (repo *Repository) loadCommitGraphChain() (*commitGraph, error) {+ chainPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", "commit-graph-chain"))+ f, err := os.Open(chainPath)
+ if err != nil {+ if os.IsNotExist(err) {+ return nil, ErrNotFound
+ }
+ return nil, err
+ }
+ defer func() { _ = f.Close() }()+
+ var hashes []string
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {+ line := strings.TrimSpace(scanner.Text())
+ if line == "" {+ continue
+ }
+ hashes = append(hashes, line)
+ }
+ if err := scanner.Err(); err != nil {+ return nil, err
+ }
+ if len(hashes) == 0 {+ return nil, ErrNotFound
+ }
+
+ var chain *commitGraph
+ for i, h := range hashes {+ graphPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", fmt.Sprintf("graph-%s.graph", h)))+ g, err := repo.loadCommitGraphFile(graphPath)
+ if err != nil {+ return nil, err
+ }
+ if i > 0 {+ if g.chunkBaseGraphs == nil {+ return nil, ErrInvalidObject
+ }
+ if len(g.chunkBaseGraphs) < i*repo.hashAlgo.Size() {+ return nil, ErrInvalidObject
+ }
+ for j := 0; j < i; j++ {+ want := hashes[j]
+ start := j * repo.hashAlgo.Size()
+ end := start + repo.hashAlgo.Size()
+ got := hexEncode(g.chunkBaseGraphs[start:end])
+ if got != want {+ return nil, ErrInvalidObject
+ }
+ }
+ }
+ if chain != nil {+ if chain.numCommits+chain.numCommitsInBase > ^uint32(0) {+ return nil, ErrInvalidObject
+ }
+ g.numCommitsInBase = chain.numCommits + chain.numCommitsInBase
+ }
+ g.base = chain
+ chain = g
+ }
+ validateCommitGraphChain(chain)
+ return chain, nil
+}
+
+func validateCommitGraphChain(head *commitGraph) {+ if head == nil {+ return
+ }
+ readGeneration := true
+ for g := head; g != nil; g = g.base {+ readGeneration = readGeneration && g.readGeneration
+ }
+ if !readGeneration {+ for g := head; g != nil; g = g.base {+ g.readGeneration = false
+ }
+ }
+
+ var settings *bloom.Settings
+ for g := head; g != nil; g = g.base {+ if g.bloomSettings == nil {+ continue
+ }
+ if settings == nil {+ settings = g.bloomSettings
+ continue
+ }
+ if settings.HashVersion != g.bloomSettings.HashVersion ||
+ settings.NumHashes != g.bloomSettings.NumHashes ||
+ settings.BitsPerEntry != g.bloomSettings.BitsPerEntry {+ g.chunkBloomIndex = nil
+ g.chunkBloomData = nil
+ g.bloomSettings = nil
+ }
+ }
+}
+
+func (repo *Repository) loadCommitGraphFile(path string) (*commitGraph, error) {+ f, err := os.Open(path)
+ if err != nil {+ if os.IsNotExist(err) {+ return nil, ErrNotFound
+ }
+ return nil, err
+ }
+ stat, err := f.Stat()
+ if err != nil {+ _ = f.Close()
+ return nil, err
+ }
+ if stat.Size() < int64(commitGraphHeaderSize+commitGraphFanoutSize) {+ _ = f.Close()
+ return nil, ErrInvalidObject
+ }
+ region, err := syscall.Mmap(
+ int(f.Fd()),
+ 0,
+ int(stat.Size()),
+ syscall.PROT_READ,
+ syscall.MAP_PRIVATE,
+ )
+ if err != nil {+ _ = f.Close()
+ return nil, err
+ }
+ err = f.Close()
+ if err != nil {+ _ = syscall.Munmap(region)
+ return nil, err
+ }
+ cg, err := parseCommitGraph(repo, path, region)
+ if err != nil {+ _ = syscall.Munmap(region)
+ return nil, err
+ }
+ cg.data = region
+ return cg, nil
+}
+
+func parseCommitGraph(repo *Repository, filename string, data []byte) (*commitGraph, error) {+ if len(data) < commitGraphHeaderSize {+ return nil, ErrInvalidObject
+ }
+ sig := binary.BigEndian.Uint32(data[0:4])
+ if sig != commitGraphSignature {+ return nil, ErrInvalidObject
+ }
+ version := data[4]
+ if version != commitGraphVersion {+ return nil, ErrInvalidObject
+ }
+ hashVersion := data[5]
+ if hashVersion != hashAlgoVersion(repo.hashAlgo) {+ return nil, ErrInvalidObject
+ }
+ numChunks := data[6]
+ numBase := data[7]
+
+ tocStart := commitGraphHeaderSize
+ tocLen := (int(numChunks) + 1) * commitGraphChunkEntrySize
+ if len(data) < tocStart+tocLen+repo.hashAlgo.Size() {+ return nil, ErrInvalidObject
+ }
+
+ trailerStart := len(data) - repo.hashAlgo.Size()
+
+ type chunkEntry struct {+ id uint32
+ offset uint64
+ }
+ entries := make([]chunkEntry, 0, numChunks+1)
+ for i := 0; i < int(numChunks)+1; i++ {+ pos := tocStart + i*commitGraphChunkEntrySize
+ id := binary.BigEndian.Uint32(data[pos : pos+4])
+ offset := binary.BigEndian.Uint64(data[pos+4 : pos+12])
+ entries = append(entries, chunkEntry{id: id, offset: offset})+ }
+
+ chunks := map[uint32][]byte{}+ for i := 0; i < len(entries)-1; i++ {+ id := entries[i].id
+ if id == 0 {+ break
+ }
+ start := int(entries[i].offset)
+ end := int(entries[i+1].offset)
+ if start < tocStart+tocLen || end < start || end > trailerStart {+ return nil, ErrInvalidObject
+ }
+ chunks[id] = data[start:end]
+ }
+
+ cg := &commitGraph{+ repo: repo,
+ filename: filename,
+ dataLen: len(data),
+ hashAlgo: repo.hashAlgo,
+ numChunks: numChunks,
+ numBase: numBase,
+ }
+
+ oidf := chunks[commitGraphChunkOIDF]
+ if len(oidf) != commitGraphFanoutSize {+ return nil, ErrInvalidObject
+ }
+ cg.chunkOIDFanout = oidf
+ cg.numCommits = binary.BigEndian.Uint32(oidf[len(oidf)-4:])
+ for i := 0; i < 255; i++ {+ if binary.BigEndian.Uint32(oidf[i*4:(i+1)*4]) >
+ binary.BigEndian.Uint32(oidf[(i+1)*4:(i+2)*4]) {+ return nil, ErrInvalidObject
+ }
+ }
+
+ oidl := chunks[commitGraphChunkOIDL]
+ if len(oidl) != int(cg.numCommits)*repo.hashAlgo.Size() {+ return nil, ErrInvalidObject
+ }
+ cg.chunkOIDLookup = oidl
+
+ cdat := chunks[commitGraphChunkCDAT]
+ if len(cdat) != int(cg.numCommits)*commitGraphCommitDataSize(repo.hashAlgo) {+ return nil, ErrInvalidObject
+ }
+ cg.chunkCommit = cdat
+
+ if gda2 := chunks[commitGraphChunkGDA2]; len(gda2) != 0 {+ if len(gda2) != int(cg.numCommits)*4 {+ return nil, ErrInvalidObject
+ }
+ cg.chunkGeneration = gda2
+ cg.readGeneration = true
+ }
+ if gdo2 := chunks[commitGraphChunkGDO2]; len(gdo2) != 0 {+ cg.chunkGenerationOv = gdo2
+ }
+
+ if edge := chunks[commitGraphChunkEDGE]; len(edge) != 0 {+ if len(edge)%4 != 0 {+ return nil, ErrInvalidObject
+ }
+ cg.chunkExtraEdges = edge
+ }
+
+ if base := chunks[commitGraphChunkBASE]; len(base) != 0 {+ if numBase == 0 {+ return nil, ErrInvalidObject
+ }
+ if len(base) != int(numBase)*repo.hashAlgo.Size() {+ return nil, ErrInvalidObject
+ }
+ cg.chunkBaseGraphs = base
+ } else if numBase != 0 {+ return nil, ErrInvalidObject
+ }
+
+ if bidx := chunks[commitGraphChunkBIDX]; len(bidx) != 0 {+ if len(bidx) != int(cg.numCommits)*4 {+ return nil, ErrInvalidObject
+ }
+ cg.chunkBloomIndex = bidx
+ }
+ if bdat := chunks[commitGraphChunkBDAT]; len(bdat) != 0 {+ if len(bdat) < bloom.DataHeaderSize {+ return nil, ErrInvalidObject
+ }
+ cg.chunkBloomData = bdat
+ settings, err := bloom.ParseSettings(bdat)
+ if err != nil {+ return nil, err
+ }
+ cg.bloomSettings = settings
+ }
+
+ if (cg.chunkBloomIndex != nil) != (cg.chunkBloomData != nil) {+ cg.chunkBloomIndex = nil
+ cg.chunkBloomData = nil
+ cg.bloomSettings = nil
+ }
+ if cg.chunkBloomIndex != nil && cg.chunkBloomData != nil {+ for i := 0; i < int(cg.numCommits); i++ {+ cur := binary.BigEndian.Uint32(cg.chunkBloomIndex[i*4 : i*4+4])
+ if i > 0 {+ prev := binary.BigEndian.Uint32(cg.chunkBloomIndex[(i-1)*4 : i*4])
+ if cur < prev {+ return nil, ErrInvalidObject
+ }
+ }
+ if int(cur) > len(cg.chunkBloomData)-bloom.DataHeaderSize {+ return nil, ErrInvalidObject
+ }
+ }
+ }
+
+ if err := verifyCommitGraphTrailer(repo, data); err != nil {+ return nil, err
+ }
+
+ return cg, nil
+}
+
+func commitGraphCommitDataSize(algo hashAlgorithm) int {+ return algo.Size() + 16
+}
+
+func hashAlgoVersion(algo hashAlgorithm) byte {+ switch algo {+ case hashAlgoSHA1:
+ return 1
+ case hashAlgoSHA256:
+ return 2
+ default:
+ return 0
+ }
+}
+
+func verifyCommitGraphTrailer(repo *Repository, data []byte) error {+ if repo == nil {+ return ErrInvalidObject
+ }
+ hashSize := repo.hashAlgo.Size()
+ if len(data) < hashSize {+ return ErrInvalidObject
+ }
+ want := data[len(data)-hashSize:]
+ h, err := repo.hashAlgo.New()
+ if err != nil {+ return err
+ }
+ _, _ = h.Write(data[:len(data)-hashSize])
+ got := h.Sum(nil)
+ if !bytes.Equal(got, want) {+ return ErrInvalidObject
+ }
+ return nil
+}
+
+func hexEncode(b []byte) string {+ const hex = "0123456789abcdef"
+ out := make([]byte, len(b)*2)
+ for i, v := range b {+ out[i*2] = hex[v>>4]
+ out[i*2+1] = hex[v&0x0f]
+ }
+ return string(out)
+}
+
+func (cg *commitGraph) lookupOID(id Hash) (uint32, bool) {+ if cg == nil {+ return 0, false
+ }
+ return commitGraphBsearchOID(cg, id)
+}
+
+func commitGraphBsearchOID(cg *commitGraph, id Hash) (uint32, bool) {+ if cg == nil || len(cg.chunkOIDLookup) == 0 {+ return 0, false
+ }
+ hashSize := cg.hashAlgo.Size()
+ first := int(id.data[0])
+ fanout := cg.chunkOIDFanout
+ var start uint32
+ if first > 0 {+ start = binary.BigEndian.Uint32(fanout[(first-1)*4 : first*4])
+ }
+ end := binary.BigEndian.Uint32(fanout[first*4 : (first+1)*4])
+ lo := int(start)
+ hi := int(end) - 1
+ for lo <= hi {+ mid := lo + (hi-lo)/2
+ pos := mid * hashSize
+ cmp := bytes.Compare(cg.chunkOIDLookup[pos:pos+hashSize], id.data[:hashSize])
+ if cmp == 0 {+ return uint32(mid) + cg.numCommitsInBase, true
+ }
+ if cmp < 0 {+ lo = mid + 1
+ } else {+ hi = mid - 1
+ }
+ }
+ if cg.base != nil {+ return commitGraphBsearchOID(cg.base, id)
+ }
+ return 0, false
+}
+
+func (cg *commitGraph) loadOID(pos uint32) (Hash, error) {+ g := cg
+ for g != nil && pos < g.numCommitsInBase {+ g = g.base
+ }
+ if g == nil {+ return Hash{}, ErrInvalidObject+ }
+ if pos >= g.numCommits+g.numCommitsInBase {+ return Hash{}, ErrInvalidObject+ }
+ lex := pos - g.numCommitsInBase
+ hashSize := g.hashAlgo.Size()
+ start := int(lex) * hashSize
+ end := start + hashSize
+ var out Hash
+ copy(out.data[:], g.chunkOIDLookup[start:end])
+ out.algo = g.hashAlgo
+ return out, nil
+}
+
+func (cg *commitGraph) commitDataAt(pos uint32) (*commitGraphCommit, error) {+ g := cg
+ for g != nil && pos < g.numCommitsInBase {+ g = g.base
+ }
+ if g == nil {+ return nil, ErrInvalidObject
+ }
+ if pos >= g.numCommits+g.numCommitsInBase {+ return nil, ErrInvalidObject
+ }
+ lex := pos - g.numCommitsInBase
+ stride := commitGraphCommitDataSize(g.hashAlgo)
+ start := int(lex) * stride
+ end := start + stride
+ if end > len(g.chunkCommit) {+ return nil, ErrInvalidObject
+ }
+ buf := g.chunkCommit[start:end]
+
+ var tree Hash
+ hashSize := g.hashAlgo.Size()
+ copy(tree.data[:], buf[:hashSize])
+ tree.algo = g.hashAlgo
+
+ p1 := binary.BigEndian.Uint32(buf[hashSize : hashSize+4])
+ p2 := binary.BigEndian.Uint32(buf[hashSize+4 : hashSize+8])
+
+ genTime := binary.BigEndian.Uint32(buf[hashSize+8 : hashSize+12])
+ timeLow := binary.BigEndian.Uint32(buf[hashSize+12 : hashSize+16])
+
+ timeHigh := genTime & 0x3
+ commitTime := (uint64(timeHigh) << 32) | uint64(timeLow)
+ generation := uint64(genTime >> 2)
+
+ if g.readGeneration {+ genOffset := binary.BigEndian.Uint32(g.chunkGeneration[int(lex)*4:])
+ if genOffset&correctedGenerationOverflow != 0 {+ off := genOffset ^ correctedGenerationOverflow
+ idx := int(off) * 8
+ if g.chunkGenerationOv == nil || idx+8 > len(g.chunkGenerationOv) {+ return nil, ErrInvalidObject
+ }
+ offset := binary.BigEndian.Uint64(g.chunkGenerationOv[idx : idx+8])
+ generation = commitTime + offset
+ } else {+ generation = commitTime + uint64(genOffset)
+ }
+ }
+
+ var parents []uint32
+ var err error
+ parents, err = commitGraphAppendParent(parents, g, p1)
+ if err != nil {+ return nil, err
+ }
+ parents, err = commitGraphAppendParent(parents, g, p2)
+ if err != nil {+ return nil, err
+ }
+
+ return &commitGraphCommit{+ Position: pos,
+ Tree: tree,
+ Parents: parents,
+ CommitTime: commitTime,
+ Generation: generation,
+ Graph: g,
+ graphLexPos: lex,
+ }, nil
+}
--- /dev/null
+++ b/commitgraph_read_access.go
@@ -1,0 +1,124 @@
+package furgit
+
+import (
+ "encoding/binary"
+
+ "codeberg.org/lindenii/furgit/internal/bloom"
+)
+
+const (
+ commitGraphParentNone = 0x70000000
+ commitGraphParentExtraMask = 0x80000000
+ commitGraphParentLastMask = 0x7fffffff
+
+ correctedGenerationOverflow = 0x80000000
+)
+
+type commitGraphCommit struct {+ Position uint32
+ Tree Hash
+ Parents []uint32
+ CommitTime uint64
+ Generation uint64
+ Graph *commitGraph
+ graphLexPos uint32
+}
+
+func commitGraphAppendParent(parents []uint32, g *commitGraph, edge uint32) ([]uint32, error) {+ if edge == commitGraphParentNone {+ return parents, nil
+ }
+ if (edge & commitGraphParentExtraMask) == 0 {+ return append(parents, edge), nil
+ }
+ if g.chunkExtraEdges == nil {+ return nil, ErrInvalidObject
+ }
+ idx := edge & commitGraphParentLastMask
+ for {+ off := int(idx) * 4
+ if off+4 > len(g.chunkExtraEdges) {+ return nil, ErrInvalidObject
+ }
+ val := binary.BigEndian.Uint32(g.chunkExtraEdges[off : off+4])
+ parents = append(parents, val&commitGraphParentLastMask)
+ idx++
+ if (val & commitGraphParentExtraMask) != 0 {+ break
+ }
+ }
+ return parents, nil
+}
+
+func (cg *commitGraph) CommitPosition(id Hash) (uint32, bool) {+ return cg.lookupOID(id)
+}
+
+func (cg *commitGraph) CommitAt(pos uint32) (*commitGraphCommit, error) {+ return cg.commitDataAt(pos)
+}
+
+func (cg *commitGraph) OIDAt(pos uint32) (Hash, error) {+ return cg.loadOID(pos)
+}
+
+func (c *commitGraphCommit) HasBloom() bool {+ return c != nil && c.Graph != nil && c.Graph.chunkBloomIndex != nil && c.Graph.chunkBloomData != nil
+}
+
+func (cg *commitGraph) BloomSettings() *bloom.Settings {+ if cg == nil {+ return nil
+ }
+ return cg.bloomSettings
+}
+
+func (cg *commitGraph) BloomFilter(pos uint32) (*bloom.Filter, bool) {+ g := cg
+ for g != nil && pos < g.numCommitsInBase {+ g = g.base
+ }
+ if g == nil || g.chunkBloomIndex == nil || g.chunkBloomData == nil || g.bloomSettings == nil {+ return nil, false
+ }
+ if pos >= g.numCommits+g.numCommitsInBase {+ return nil, false
+ }
+ lex := pos - g.numCommitsInBase
+ if int(lex)*4+4 > len(g.chunkBloomIndex) {+ return nil, false
+ }
+ end := binary.BigEndian.Uint32(g.chunkBloomIndex[int(lex)*4:])
+ var start uint32
+ if lex > 0 {+ start = binary.BigEndian.Uint32(g.chunkBloomIndex[int(lex-1)*4:])
+ }
+ if end < start {+ return nil, false
+ }
+ if int(end) > len(g.chunkBloomData)-bloom.DataHeaderSize ||
+ int(start) > len(g.chunkBloomData)-bloom.DataHeaderSize {+ return nil, false
+ }
+ data := g.chunkBloomData[bloom.DataHeaderSize+start : bloom.DataHeaderSize+end]
+ return &bloom.Filter{Data: data, Version: g.bloomSettings.HashVersion}, true+}
+
+func (c *commitGraphCommit) BloomFilter() (*bloom.Filter, *bloom.Settings, bool) {+ if c == nil || c.Graph == nil {+ return nil, nil, false
+ }
+ filter, ok := c.Graph.BloomFilter(c.Position)
+ if !ok {+ return nil, nil, false
+ }
+ return filter, c.Graph.bloomSettings, true
+}
+
+func (c *commitGraphCommit) ChangedPathMightContain(path []byte) bool {+ filter, settings, ok := c.BloomFilter()
+ if !ok {+ return false
+ }
+ return filter.MightContain(path, settings)
+}
--- /dev/null
+++ b/commitgraph_read_test.go
@@ -1,0 +1,558 @@
+package furgit
+
+import (
+ "bytes"
+ "encoding/binary"
+ "fmt"
+ "os"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "strings"
+ "testing"
+
+ "codeberg.org/lindenii/furgit/internal/bloom"
+)
+
+func TestCommitGraphReadBasic(t *testing.T) {+ repoPath, cleanup := setupTestRepo(t)
+ defer cleanup()
+
+ workDir, cleanupWork := setupWorkDir(t)
+ defer cleanupWork()
+
+ files := []string{"a.txt", "b.txt", "c.txt"}+ for i, name := range files {+ path := filepath.Join(workDir, name)
+ content := fmt.Sprintf("content-%d\n", i)+ if err := os.WriteFile(path, []byte(content), 0o644); err != nil {+ t.Fatalf("write %s: %v", name, err)+ }
+ gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+ gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", fmt.Sprintf("commit %d", i))+ }
+
+ gitCmd(t, repoPath, "repack", "-a", "-d")
+ gitCmd(t, repoPath, "commit-graph", "write", "--reachable", "--changed-paths")
+
+ repo, err := OpenRepository(repoPath)
+ if err != nil {+ t.Fatalf("OpenRepository failed: %v", err)+ }
+ defer func() { _ = repo.Close() }()+
+ cg, err := repo.CommitGraph()
+ if err != nil {+ t.Fatalf("CommitGraph failed: %v", err)+ }
+
+ revList := gitCmd(t, repoPath, "rev-list", "--all")
+ commits := strings.Fields(revList)
+ if len(commits) == 0 {+ t.Fatal("no commits found")+ }
+
+ falsePositives := 0
+ for _, oidStr := range commits {+ id, err := repo.ParseHash(oidStr)
+ if err != nil {+ t.Fatalf("ParseHash failed: %v", err)+ }
+ pos, ok := cg.CommitPosition(id)
+ if !ok {+ t.Fatalf("commit %s not found in commit-graph", oidStr)+ }
+ cc, err := cg.CommitAt(pos)
+ if err != nil {+ t.Fatalf("CommitAt failed: %v", err)+ }
+
+ treeStr := string(gitCmd(t, repoPath, "show", "-s", "--format=%T", oidStr))
+ tree, err := repo.ParseHash(treeStr)
+ if err != nil {+ t.Fatalf("ParseHash tree failed: %v", err)+ }
+ if cc.Tree != tree {+ t.Fatalf("tree mismatch for %s: got %s want %s", oidStr, cc.Tree.String(), tree.String())+ }
+
+ parentStr := string(gitCmd(t, repoPath, "show", "-s", "--format=%P", oidStr))
+ parents := strings.Fields(parentStr)
+ if len(parents) != len(cc.Parents) {+ t.Fatalf("parent count mismatch for %s: got %d want %d", oidStr, len(cc.Parents), len(parents))+ }
+ for i, p := range parents {+ pid, err := repo.ParseHash(p)
+ if err != nil {+ t.Fatalf("ParseHash parent failed: %v", err)+ }
+ got, err := cg.OIDAt(cc.Parents[i])
+ if err != nil {+ t.Fatalf("OIDAt parent failed: %v", err)+ }
+ if got != pid {+ t.Fatalf("parent mismatch for %s: got %s want %s", oidStr, got.String(), pid.String())+ }
+ }
+
+ ctStr := gitCmd(t, repoPath, "show", "-s", "--format=%ct", oidStr)
+ ct, err := strconv.ParseInt(ctStr, 10, 64)
+ if err != nil {+ t.Fatalf("parse commit time: %v", err)+ }
+ if cc.CommitTime != uint64(ct) {+ t.Fatalf("commit time mismatch for %s: got %d want %d", oidStr, cc.CommitTime, ct)+ }
+
+ changed := gitCmd(t, repoPath, "diff-tree", "--no-commit-id", "--name-only", "-r", "--root", oidStr)
+ for _, path := range strings.Fields(changed) {+ if path == "" {+ continue
+ }
+ if !cc.ChangedPathMightContain([]byte(path)) {+ t.Fatalf("bloom filter missing changed path %q in %s", path, oidStr)+ }
+ }
+ if cc.ChangedPathMightContain([]byte("this-path-should-not-exist-abcdefg")) {+ falsePositives++
+ }
+ }
+ if falsePositives > 1 {+ t.Fatalf("unexpected bloom false positive rate: %d", falsePositives)+ }
+}
+
+func TestCommitGraphReadExtraEdges(t *testing.T) {+ workDir, cleanupWork := setupWorkDir(t)
+ defer cleanupWork()
+
+ gitCmd(t, workDir, "init")
+
+ if err := os.WriteFile(filepath.Join(workDir, "base.txt"), []byte("base\n"), 0o644); err != nil {+ t.Fatalf("write base: %v", err)+ }
+ gitCmd(t, workDir, "add", ".")
+ gitCmd(t, workDir, "commit", "-m", "base")
+
+ gitCmd(t, workDir, "checkout", "-b", "branch1")
+ if err := os.WriteFile(filepath.Join(workDir, "b1.txt"), []byte("b1\n"), 0o644); err != nil {+ t.Fatalf("write b1: %v", err)+ }
+ gitCmd(t, workDir, "add", ".")
+ gitCmd(t, workDir, "commit", "-m", "b1")
+
+ gitCmd(t, workDir, "checkout", "master")
+ gitCmd(t, workDir, "checkout", "-b", "branch2")
+ if err := os.WriteFile(filepath.Join(workDir, "b2.txt"), []byte("b2\n"), 0o644); err != nil {+ t.Fatalf("write b2: %v", err)+ }
+ gitCmd(t, workDir, "add", ".")
+ gitCmd(t, workDir, "commit", "-m", "b2")
+
+ gitCmd(t, workDir, "checkout", "master")
+ gitCmd(t, workDir, "checkout", "-b", "branch3")
+ if err := os.WriteFile(filepath.Join(workDir, "b3.txt"), []byte("b3\n"), 0o644); err != nil {+ t.Fatalf("write b3: %v", err)+ }
+ gitCmd(t, workDir, "add", ".")
+ gitCmd(t, workDir, "commit", "-m", "b3")
+
+ gitCmd(t, workDir, "checkout", "master")
+ gitCmd(t, workDir, "merge", "--no-ff", "-m", "octopus", "branch1", "branch2", "branch3")
+
+ gitCmd(t, workDir, "repack", "-a", "-d")
+ gitCmd(t, workDir, "commit-graph", "write", "--reachable")
+
+ repo, err := OpenRepository(filepath.Join(workDir, ".git"))
+ if err != nil {+ t.Fatalf("OpenRepository failed: %v", err)+ }
+ defer func() { _ = repo.Close() }()+
+ cg, err := repo.CommitGraph()
+ if err != nil {+ t.Fatalf("CommitGraph failed: %v", err)+ }
+
+ mergeHash := gitCmd(t, workDir, "rev-parse", "HEAD")
+ id, _ := repo.ParseHash(mergeHash)
+ pos, ok := cg.CommitPosition(id)
+ if !ok {+ t.Fatalf("merge commit not found")+ }
+ cc, err := cg.CommitAt(pos)
+ if err != nil {+ t.Fatalf("CommitAt failed: %v", err)+ }
+ parentStr := gitCmd(t, workDir, "show", "-s", "--format=%P", mergeHash)
+ parents := strings.Fields(parentStr)
+ if len(cc.Parents) != len(parents) {+ t.Fatalf("octopus parent mismatch: got %d want %d", len(cc.Parents), len(parents))+ }
+}
+
+func TestCommitGraphReadSplitChain(t *testing.T) {+ repoPath, cleanup := setupTestRepo(t)
+ defer cleanup()
+
+ workDir, cleanupWork := setupWorkDir(t)
+ defer cleanupWork()
+
+ for i := 0; i < 5; i++ {+ path := filepath.Join(workDir, fmt.Sprintf("file%d.txt", i))+ if err := os.WriteFile(path, []byte(fmt.Sprintf("v%d\n", i)), 0o644); err != nil {+ t.Fatalf("write %s: %v", path, err)+ }
+ gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+ gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", fmt.Sprintf("commit %d", i))+ }
+
+ gitCmd(t, repoPath, "repack", "-a", "-d")
+ gitCmd(t, repoPath, "commit-graph", "write", "--reachable", "--changed-paths", "--split=no-merge")
+
+ // more commits needed to get a split chain with base layer
+ for i := 5; i < 8; i++ {+ path := filepath.Join(workDir, fmt.Sprintf("file%d.txt", i))+ if err := os.WriteFile(path, []byte(fmt.Sprintf("v%d\n", i)), 0o644); err != nil {+ t.Fatalf("write %s: %v", path, err)+ }
+ gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+ gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", fmt.Sprintf("commit %d", i))+ }
+ gitCmd(t, repoPath, "repack", "-a", "-d")
+ gitCmd(t, repoPath, "commit-graph", "write", "--reachable", "--changed-paths", "--split=no-merge", "--append")
+
+ repo, err := OpenRepository(repoPath)
+ if err != nil {+ t.Fatalf("OpenRepository failed: %v", err)+ }
+ defer func() { _ = repo.Close() }()+
+ cg, err := repo.CommitGraph()
+ if err != nil {+ t.Fatalf("CommitGraph failed: %v", err)+ }
+ chainPath := filepath.Join(repoPath, "objects", "info", "commit-graphs", "commit-graph-chain")
+ if _, err := os.Stat(chainPath); err != nil {+ t.Fatalf("commit-graph chain not created by git: %v", err)+ }
+ if cg.base == nil {+ t.Fatalf("expected split commit-graph chain")+ }
+ if cg.numCommitsInBase == 0 {+ t.Fatalf("expected non-zero base commit count")+ }
+ revList := gitCmd(t, repoPath, "rev-list", "--max-parents=0", "HEAD")
+ id, _ := repo.ParseHash(revList)
+ if _, ok := cg.CommitPosition(id); !ok {+ t.Fatalf("base commit not found in commit-graph chain")+ }
+}
+
+func TestCommitGraphTrailerChecksum(t *testing.T) {+ repoPath, cleanup := setupTestRepo(t)
+ defer cleanup()
+ repo, err := OpenRepository(repoPath)
+ if err != nil {+ t.Fatalf("OpenRepository failed: %v", err)+ }
+ defer func() { _ = repo.Close() }()+
+ data := buildCommitGraphBytes(t, repo, 1, nil, nil, nil)
+ data[len(data)-1] ^= 0xff
+ if _, err := parseCommitGraph(repo, "corrupt.graph", data); err == nil {+ t.Fatalf("expected checksum error")+ }
+}
+
+func TestCommitGraphFanoutMonotonicity(t *testing.T) {+ repoPath, cleanup := setupTestRepo(t)
+ defer cleanup()
+ repo, err := OpenRepository(repoPath)
+ if err != nil {+ t.Fatalf("OpenRepository failed: %v", err)+ }
+ defer func() { _ = repo.Close() }()+
+ oids := [][]byte{+ bytes.Repeat([]byte{0x10}, repo.hashAlgo.Size()),+ bytes.Repeat([]byte{0x20}, repo.hashAlgo.Size()),+ }
+ data := buildCommitGraphBytes(t, repo, 2, oids, nil, nil)
+ start, end, ok := chunkRange(t, repo, data, commitGraphChunkOIDF)
+ if !ok {+ t.Fatalf("OIDF chunk not found")+ }
+ fanout := make([]byte, end-start)
+ copy(fanout, data[start:end])
+ binary.BigEndian.PutUint32(fanout[0:4], 2)
+ binary.BigEndian.PutUint32(fanout[4:8], 1)
+ data = replaceChunk(t, repo, data, commitGraphChunkOIDF, fanout)
+ if _, err := parseCommitGraph(repo, "bad-fanout.graph", data); err == nil {+ t.Fatalf("expected fanout monotonicity error")+ }
+}
+
+func TestCommitGraphBIDXMonotonicity(t *testing.T) {+ repoPath, cleanup := setupTestRepo(t)
+ defer cleanup()
+ repo, err := OpenRepository(repoPath)
+ if err != nil {+ t.Fatalf("OpenRepository failed: %v", err)+ }
+ defer func() { _ = repo.Close() }()+
+ oids := [][]byte{+ bytes.Repeat([]byte{0x10}, repo.hashAlgo.Size()),+ bytes.Repeat([]byte{0x20}, repo.hashAlgo.Size()),+ }
+ bidx := []uint32{8, 4}+ data := buildCommitGraphBytes(t, repo, 2, oids, bidx, []byte{0, 0, 0, 0, 0, 0, 0, 0})+ if _, err := parseCommitGraph(repo, "bad-bidx.graph", data); err == nil {+ t.Fatalf("expected bidx monotonicity error")+ }
+}
+
+func TestCommitGraphExtraEdgesBounds(t *testing.T) {+ repoPath, cleanup := setupTestRepo(t)
+ defer cleanup()
+ repo, err := OpenRepository(repoPath)
+ if err != nil {+ t.Fatalf("OpenRepository failed: %v", err)+ }
+ defer func() { _ = repo.Close() }()+
+ oids := [][]byte{+ bytes.Repeat([]byte{0x10}, repo.hashAlgo.Size()),+ bytes.Repeat([]byte{0x20}, repo.hashAlgo.Size()),+ bytes.Repeat([]byte{0x30}, repo.hashAlgo.Size()),+ }
+ extraEdges := []uint32{0x80000000 | 100}+ data := buildCommitGraphBytes(t, repo, 3, oids, nil, nil)
+
+ // inject EDGE chunk with an out-of-range index
+ data = injectChunk(t, repo, data, commitGraphChunkEDGE, u32SliceToBytes(extraEdges))
+
+ // force the first commit to use the extra edges list
+ start, end, ok := chunkRange(t, repo, data, commitGraphChunkCDAT)
+ if !ok {+ t.Fatalf("CDAT chunk not found")+ }
+ cdat := make([]byte, end-start)
+ copy(cdat, data[start:end])
+ hashSize := repo.hashAlgo.Size()
+ binary.BigEndian.PutUint32(cdat[hashSize+4:hashSize+8], 0x80000000|100)
+ data = replaceChunk(t, repo, data, commitGraphChunkCDAT, cdat)
+
+ cg, err := parseCommitGraph(repo, "edge.graph", data)
+ if err != nil {+ t.Fatalf("parseCommitGraph failed: %v", err)+ }
+ pos, ok := cg.CommitPosition(hashFromBytes(repo, oids[0]))
+ if !ok {+ t.Fatalf("commit not found")+ }
+ if _, err := cg.CommitAt(pos); err == nil {+ t.Fatalf("expected extra-edges bounds error")+ }
+}
+
+func buildCommitGraphBytes(t *testing.T, repo *Repository, n int, oids [][]byte, bidx []uint32, bdat []byte) []byte {+ t.Helper()
+ if n <= 0 {+ t.Fatalf("invalid num commits")+ }
+ hashSize := repo.hashAlgo.Size()
+ if len(oids) == 0 {+ for i := 0; i < n; i++ {+ b := bytes.Repeat([]byte{byte(0x10 + i)}, hashSize)+ oids = append(oids, b)
+ }
+ }
+ sort.Slice(oids, func(i, j int) bool {+ return bytes.Compare(oids[i], oids[j]) < 0
+ })
+
+ var chunks []chunkSpec
+ oidf := make([]byte, commitGraphFanoutSize)
+ for _, oid := range oids {+ idx := int(oid[0])
+ for i := idx; i < 256; i++ {+ v := binary.BigEndian.Uint32(oidf[i*4 : i*4+4])
+ binary.BigEndian.PutUint32(oidf[i*4:i*4+4], v+1)
+ }
+ }
+ chunks = append(chunks, chunkSpec{commitGraphChunkOIDF, oidf})+
+ oidl := make([]byte, 0, n*hashSize)
+ for _, oid := range oids {+ oidl = append(oidl, oid...)
+ }
+ chunks = append(chunks, chunkSpec{commitGraphChunkOIDL, oidl})+
+ cdat := make([]byte, n*commitGraphCommitDataSize(repo.hashAlgo))
+ for i := 0; i < n; i++ {+ base := i * commitGraphCommitDataSize(repo.hashAlgo)
+ copy(cdat[base:base+hashSize], bytes.Repeat([]byte{0xaa}, hashSize))+ binary.BigEndian.PutUint32(cdat[base+hashSize:], commitGraphParentNone)
+ binary.BigEndian.PutUint32(cdat[base+hashSize+4:], commitGraphParentNone)
+ binary.BigEndian.PutUint32(cdat[base+hashSize+8:], 4)
+ binary.BigEndian.PutUint32(cdat[base+hashSize+12:], 0)
+ }
+ chunks = append(chunks, chunkSpec{commitGraphChunkCDAT, cdat})+
+ if bidx != nil && bdat != nil {+ bidxBytes := u32SliceToBytes(bidx)
+ bdatHeader := make([]byte, bloom.DataHeaderSize)
+ binary.BigEndian.PutUint32(bdatHeader[0:4], 2)
+ binary.BigEndian.PutUint32(bdatHeader[4:8], 7)
+ binary.BigEndian.PutUint32(bdatHeader[8:12], 10)
+ bdatFull := append(bdatHeader, bdat...)
+ chunks = append(chunks, chunkSpec{commitGraphChunkBIDX, bidxBytes})+ chunks = append(chunks, chunkSpec{commitGraphChunkBDAT, bdatFull})+ }
+
+ return assembleCommitGraphBytes(t, repo, chunks)
+}
+
+type chunkSpec struct {+ id uint32
+ data []byte
+}
+
+func assembleCommitGraphBytes(t *testing.T, repo *Repository, chunks []chunkSpec) []byte {+ t.Helper()
+ var buf bytes.Buffer
+ buf.Grow(1024)
+ var header [commitGraphHeaderSize]byte
+ binary.BigEndian.PutUint32(header[0:4], commitGraphSignature)
+ header[4] = commitGraphVersion
+ header[5] = hashAlgoVersion(repo.hashAlgo)
+ header[6] = byte(len(chunks))
+ header[7] = 0
+ buf.Write(header[:])
+
+ tocStart := buf.Len()
+ tocLen := (len(chunks) + 1) * commitGraphChunkEntrySize
+ buf.Grow(tocLen)
+ buf.Write(make([]byte, tocLen))
+
+ offset := tocStart + tocLen
+ for i, ch := range chunks {+ pos := tocStart + i*commitGraphChunkEntrySize
+ binary.BigEndian.PutUint32(buf.Bytes()[pos:pos+4], ch.id)
+ binary.BigEndian.PutUint64(buf.Bytes()[pos+4:pos+12], uint64(offset))
+ offset += len(ch.data)
+ }
+ pos := tocStart + len(chunks)*commitGraphChunkEntrySize
+ binary.BigEndian.PutUint32(buf.Bytes()[pos:pos+4], 0)
+ binary.BigEndian.PutUint64(buf.Bytes()[pos+4:pos+12], uint64(offset))
+
+ for _, ch := range chunks {+ buf.Write(ch.data)
+ }
+
+ h, err := repo.hashAlgo.New()
+ if err != nil {+ t.Fatalf("hash.New failed: %v", err)+ }
+ _, _ = h.Write(buf.Bytes())
+ buf.Write(h.Sum(nil))
+ return buf.Bytes()
+}
+
+func chunkRange(t *testing.T, repo *Repository, data []byte, id uint32) (int, int, bool) {+ t.Helper()
+ hashSize := repo.hashAlgo.Size()
+ numChunks := int(data[6])
+ tocStart := commitGraphHeaderSize
+ trailerStart := len(data) - hashSize
+
+ for i := 0; i < numChunks; i++ {+ pos := tocStart + i*commitGraphChunkEntrySize
+ chID := binary.BigEndian.Uint32(data[pos : pos+4])
+ if chID == 0 {+ break
+ }
+ start := int(binary.BigEndian.Uint64(data[pos+4 : pos+12]))
+ end := trailerStart
+ if i+1 < numChunks {+ end = int(binary.BigEndian.Uint64(data[pos+commitGraphChunkEntrySize+4 : pos+commitGraphChunkEntrySize+12]))
+ }
+ if chID == id {+ return start, end, true
+ }
+ }
+ return 0, 0, false
+}
+
+func replaceChunk(t *testing.T, repo *Repository, data []byte, id uint32, payload []byte) []byte {+ t.Helper()
+ hashSize := repo.hashAlgo.Size()
+ numChunks := int(data[6])
+ tocStart := commitGraphHeaderSize
+ trailerStart := len(data) - hashSize
+
+ chunks := make([]chunkSpec, 0, numChunks)
+ for i := 0; i < numChunks; i++ {+ pos := tocStart + i*commitGraphChunkEntrySize
+ chID := binary.BigEndian.Uint32(data[pos : pos+4])
+ if chID == 0 {+ break
+ }
+ start := int(binary.BigEndian.Uint64(data[pos+4 : pos+12]))
+ end := trailerStart
+ if i+1 < numChunks {+ end = int(binary.BigEndian.Uint64(data[pos+commitGraphChunkEntrySize+4 : pos+commitGraphChunkEntrySize+12]))
+ }
+ if chID == id {+ chunks = append(chunks, chunkSpec{chID, payload})+ } else {+ chunks = append(chunks, chunkSpec{chID, data[start:end]})+ }
+ }
+ if len(chunks) == 0 {+ t.Fatalf("no chunks found")+ }
+ return assembleCommitGraphBytes(t, repo, chunks)
+}
+
+func injectChunk(t *testing.T, repo *Repository, data []byte, id uint32, payload []byte) []byte {+ t.Helper()
+ hashSize := repo.hashAlgo.Size()
+ numChunks := int(data[6])
+ tocStart := commitGraphHeaderSize
+ trailerStart := len(data) - hashSize
+
+ chunks := make([]chunkSpec, 0, numChunks+1)
+ for i := 0; i < numChunks; i++ {+ pos := tocStart + i*commitGraphChunkEntrySize
+ chID := binary.BigEndian.Uint32(data[pos : pos+4])
+ if chID == 0 {+ break
+ }
+ start := int(binary.BigEndian.Uint64(data[pos+4 : pos+12]))
+ end := trailerStart
+ if i+1 < numChunks {+ end = int(binary.BigEndian.Uint64(data[pos+commitGraphChunkEntrySize+4 : pos+commitGraphChunkEntrySize+12]))
+ }
+ chunks = append(chunks, chunkSpec{chID, data[start:end]})+ }
+ chunks = append(chunks, chunkSpec{id, payload})+ return assembleCommitGraphBytes(t, repo, chunks)
+}
+
+func u32SliceToBytes(vals []uint32) []byte {+ out := make([]byte, len(vals)*4)
+ for i, v := range vals {+ binary.BigEndian.PutUint32(out[i*4:i*4+4], v)
+ }
+ return out
+}
+
+func hashFromBytes(repo *Repository, b []byte) Hash {+ var h Hash
+ copy(h.data[:], b)
+ h.algo = repo.hashAlgo
+ return h
+}
--- a/repo.go
+++ b/repo.go
@@ -27,7 +27,12 @@
packFiles map[string]*packFile
packFilesMu sync.RWMutex
- closeOnce sync.Once
+
+ commitGraphOnce sync.Once
+ commitGraph *commitGraph
+ commitGraphErr error
+
+ closeOnce sync.Once
}
// OpenRepository opens the repository at the provided path.
@@ -100,6 +105,12 @@
if err != nil && closeErr == nil {closeErr = err
}
+ }
+ }
+ if repo.commitGraph != nil {+ err := repo.commitGraph.Close()
+ if err != nil && closeErr == nil {+ closeErr = err
}
}
})
--
⑨