ref: 16aa3c8d6ad11d8df278bd604aa6a30887445f84
dir: /packed_write_test.go/
package furgit
import (
"bytes"
"crypto/rand"
"encoding/binary"
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"codeberg.org/lindenii/furgit/internal/bufpool"
"codeberg.org/lindenii/furgit/internal/zlibx"
)
func TestPackHeaderEncodeParseRoundtrip(t *testing.T) {
cases := []struct {
ty ObjectType
sizes []int
}{
{ObjectTypeCommit, []int{0, 1, 15, 16, 127, 128, 1024, 1 << 20}},
{ObjectTypeTree, []int{0, 3, 31, 32, 255, 256, 4096}},
{ObjectTypeBlob, []int{0, 7, 63, 64, 511, 512, 99999}},
{ObjectTypeTag, []int{0, 2, 14, 15, 16, 127, 128}},
}
for _, c := range cases {
for _, size := range c.sizes {
encoded, err := packHeaderEncode(c.ty, size)
if err != nil {
t.Fatalf("packHeaderEncode(%v,%d) error: %v", c.ty, size, err)
}
gotTy, gotSize, consumed, err := packHeaderParse(encoded)
if err != nil {
t.Fatalf("packHeaderParse error: %v", err)
}
if gotTy != c.ty || gotSize != size {
t.Fatalf("roundtrip mismatch: got (%v,%d), want (%v,%d)", gotTy, gotSize, c.ty, size)
}
if consumed != len(encoded) {
t.Fatalf("consumed=%d, encoded=%d", consumed, len(encoded))
}
}
}
}
func TestPackVarintEncodeRoundtrip(t *testing.T) {
values := []int{0, 1, 2, 7, 8, 127, 128, 129, 255, 1024, 1 << 20}
for _, v := range values {
encoded, err := packVarintEncode(v)
if err != nil {
t.Fatalf("packVarintEncode(%d) error: %v", v, err)
}
pos := 0
got, err := packVarintRead(encoded, &pos)
if err != nil {
t.Fatalf("packVarintRead error: %v", err)
}
if got != v {
t.Fatalf("roundtrip mismatch: got %d, want %d", got, v)
}
if pos != len(encoded) {
t.Fatalf("pos=%d, encoded=%d", pos, len(encoded))
}
}
}
func TestPackOfsEncodeRoundtrip(t *testing.T) {
values := []uint64{1, 2, 7, 8, 9, 0x7f, 0x80, 0x81, 0x1000, 0x12345}
for _, v := range values {
encoded, err := packOfsEncode(v)
if err != nil {
t.Fatalf("packOfsEncode(%d) error: %v", v, err)
}
dist, consumed, err := packDeltaReadOfsDistance(encoded)
if err != nil {
t.Fatalf("packDeltaReadOfsDistance error: %v", err)
}
if dist != v {
t.Fatalf("roundtrip mismatch: got %d, want %d", dist, v)
}
if consumed != len(encoded) {
t.Fatalf("consumed=%d, encoded=%d", consumed, len(encoded))
}
}
}
func TestPackWriteNoDeltas(t *testing.T) {
repoPath, cleanup := setupTestRepo(t)
defer cleanup()
workDir, cleanupWork := setupWorkDir(t)
defer cleanupWork()
const (
fileCount = 1000
fileSize = 1024
)
buf := make([]byte, fileSize)
for i := 0; i < fileCount; i++ {
if _, err := rand.Read(buf); err != nil {
t.Fatalf("rand.Read failed: %v", err)
}
name := filepath.Join(workDir, fmt.Sprintf("file%04d.bin", i))
if err := os.WriteFile(name, buf, 0o644); err != nil {
t.Fatalf("failed to write %s: %v", name, err)
}
}
gitCmd(t, repoPath, nil, "--work-tree="+workDir, "add", ".")
gitCmd(t, repoPath, nil, "--work-tree="+workDir, "commit", "-m", "Test commit")
commitHash := gitCmd(t, repoPath, nil, "rev-parse", "HEAD")
commitBody := gitCatFile(t, repoPath, "commit", commitHash)
lines := bytes.Split(commitBody, []byte{'\n'})
if len(lines) == 0 || !bytes.HasPrefix(lines[0], []byte("tree ")) {
t.Fatalf("commit missing tree header")
}
treeHash := strings.TrimSpace(string(bytes.TrimPrefix(lines[0], []byte("tree "))))
lsTree := gitCmd(t, repoPath, nil, "ls-tree", "-r", treeHash)
var blobHashes []string
for _, line := range strings.Split(lsTree, "\n") {
if line == "" {
continue
}
fields := strings.Fields(line)
if len(fields) < 3 {
t.Fatalf("unexpected ls-tree line: %q", line)
}
blobHashes = append(blobHashes, fields[2])
}
repo, err := OpenRepository(repoPath)
if err != nil {
t.Fatalf("OpenRepository failed: %v", err)
}
defer func() { _ = repo.Close() }()
var objects []Hash
commitID, _ := repo.ParseHash(commitHash)
objects = append(objects, commitID)
treeID, _ := repo.ParseHash(treeHash)
objects = append(objects, treeID)
for _, bh := range blobHashes {
id, _ := repo.ParseHash(bh)
objects = append(objects, id)
}
expectedOids := append([]string{commitHash, treeHash}, blobHashes...)
packDir := filepath.Join(repoPath, "objects", "pack")
if err := os.MkdirAll(packDir, 0o755); err != nil {
t.Fatalf("failed to create pack dir: %v", err)
}
pf, err := os.CreateTemp(packDir, "furgit-test-*.pack")
if err != nil {
t.Fatalf("failed to create pack file: %v", err)
}
packPath := pf.Name()
idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx"
if _, err := repo.packWrite(pf, objects, packWriteOptions{}, nil); err != nil {
_ = pf.Close()
t.Fatalf("packWrite failed: %v", err)
}
if err := pf.Close(); err != nil {
t.Fatalf("failed to close pack file: %v", err)
}
defer func() {
_ = os.Remove(packPath)
_ = os.Remove(idxPath)
}()
if err := checkPackStream(packPath, repo.hashAlgo, len(objects)); err != nil {
t.Fatalf("pack stream invalid: %v", err)
}
_ = gitCmd(t, repoPath, nil, "index-pack", "-o", idxPath, packPath)
verifyOut := gitCmd(t, repoPath, nil, "verify-pack", "-v", idxPath)
seen := make(map[string]struct{})
for _, line := range strings.Split(verifyOut, "\n") {
if strings.TrimSpace(line) == "" {
continue
}
if strings.HasPrefix(line, "chain length") || strings.HasPrefix(line, "non delta") {
continue
}
parts := strings.Fields(line)
if len(parts) == 0 {
continue
}
seen[parts[0]] = struct{}{}
}
for _, oid := range expectedOids {
if _, ok := seen[oid]; !ok {
t.Fatalf("verify-pack missing object %s", oid)
}
}
for _, oid := range expectedOids {
if err := removeLooseObject(repoPath, oid); err != nil {
t.Fatalf("remove loose object %s: %v", oid, err)
}
}
for _, oid := range expectedOids {
_ = gitCmd(t, repoPath, nil, "cat-file", "-p", oid)
}
_ = gitCmd(t, repoPath, nil, "fsck", "--full", "--strict")
}
func TestPackWriteDeltas(t *testing.T) {
repoPath, cleanup := setupTestRepo(t)
defer cleanup()
workDir, cleanupWork := setupWorkDir(t)
defer cleanupWork()
const (
fileCount = 200
fileSize = 2048
)
base := bytes.Repeat([]byte("delta-base-"), fileSize/10)
for i := 0; i < fileCount; i++ {
buf := make([]byte, len(base))
copy(buf, base)
buf[i%len(buf)] ^= byte(i)
name := filepath.Join(workDir, fmt.Sprintf("delta%04d.txt", i))
if err := os.WriteFile(name, buf, 0o644); err != nil {
t.Fatalf("failed to write %s: %v", name, err)
}
}
gitCmd(t, repoPath, nil, "--work-tree="+workDir, "add", ".")
gitCmd(t, repoPath, nil, "--work-tree="+workDir, "commit", "-m", "Delta commit")
commitHash := gitCmd(t, repoPath, nil, "rev-parse", "HEAD")
commitBody := gitCatFile(t, repoPath, "commit", commitHash)
lines := bytes.Split(commitBody, []byte{'\n'})
if len(lines) == 0 || !bytes.HasPrefix(lines[0], []byte("tree ")) {
t.Fatalf("commit missing tree header")
}
treeHash := strings.TrimSpace(string(bytes.TrimPrefix(lines[0], []byte("tree "))))
lsTree := gitCmd(t, repoPath, nil, "ls-tree", "-r", treeHash)
var blobHashes []string
for _, line := range strings.Split(lsTree, "\n") {
if line == "" {
continue
}
fields := strings.Fields(line)
if len(fields) < 3 {
t.Fatalf("unexpected ls-tree line: %q", line)
}
blobHashes = append(blobHashes, fields[2])
}
repo, err := OpenRepository(repoPath)
if err != nil {
t.Fatalf("OpenRepository failed: %v", err)
}
defer func() { _ = repo.Close() }()
var objects []Hash
commitID, _ := repo.ParseHash(commitHash)
objects = append(objects, commitID)
treeID, _ := repo.ParseHash(treeHash)
objects = append(objects, treeID)
for _, bh := range blobHashes {
id, _ := repo.ParseHash(bh)
objects = append(objects, id)
}
expectedOids := append([]string{commitHash, treeHash}, blobHashes...)
packDir := filepath.Join(repoPath, "objects", "pack")
if err := os.MkdirAll(packDir, 0o755); err != nil {
t.Fatalf("failed to create pack dir: %v", err)
}
pf, err := os.CreateTemp(packDir, "furgit-delta-test-*.pack")
if err != nil {
t.Fatalf("failed to create pack file: %v", err)
}
packPath := pf.Name()
idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx"
if _, err := repo.packWrite(pf, objects, packWriteOptions{
EnableDeltas: true,
MinDeltaSavings: 1,
}, nil); err != nil {
_ = pf.Close()
t.Fatalf("packWrite failed: %v", err)
}
if err := pf.Close(); err != nil {
t.Fatalf("failed to close pack file: %v", err)
}
defer func() {
_ = os.Remove(packPath)
_ = os.Remove(idxPath)
}()
if err := checkPackStream(packPath, repo.hashAlgo, len(objects)); err != nil {
t.Fatalf("pack stream invalid: %v", err)
}
_ = gitCmd(t, repoPath, nil, "index-pack", "-o", idxPath, packPath)
verifyOut := gitCmd(t, repoPath, nil, "verify-pack", "-v", idxPath)
seen := make(map[string]struct{})
for _, line := range strings.Split(verifyOut, "\n") {
if strings.TrimSpace(line) == "" {
continue
}
if strings.HasPrefix(line, "chain length") || strings.HasPrefix(line, "non delta") {
continue
}
parts := strings.Fields(line)
if len(parts) == 0 {
continue
}
seen[parts[0]] = struct{}{}
}
for _, oid := range expectedOids {
if _, ok := seen[oid]; !ok {
t.Fatalf("verify-pack missing object %s", oid)
}
}
for _, oid := range expectedOids {
if err := removeLooseObject(repoPath, oid); err != nil {
t.Fatalf("remove loose object %s: %v", oid, err)
}
}
for _, oid := range expectedOids {
_ = gitCmd(t, repoPath, nil, "cat-file", "-p", oid)
}
_ = gitCmd(t, repoPath, nil, "fsck", "--full", "--strict")
}
func TestPackWriteThinPackReachable(t *testing.T) {
repoPath, cleanup := setupTestRepo(t)
defer cleanup()
workDir, cleanupWork := setupWorkDir(t)
defer cleanupWork()
base := bytes.Repeat([]byte("A"), 16384)
if err := os.WriteFile(filepath.Join(workDir, "file.txt"), base, 0o644); err != nil {
t.Fatalf("write base file: %v", err)
}
gitCmd(t, repoPath, nil, "--work-tree="+workDir, "add", ".")
gitCmd(t, repoPath, nil, "--work-tree="+workDir, "commit", "-m", "base")
haveHash := gitCmd(t, repoPath, nil, "rev-parse", "HEAD")
mod := append([]byte(nil), base...)
mod[1024] = 'B'
if err := os.WriteFile(filepath.Join(workDir, "file.txt"), mod, 0o644); err != nil {
t.Fatalf("write mod file: %v", err)
}
gitCmd(t, repoPath, nil, "--work-tree="+workDir, "add", ".")
gitCmd(t, repoPath, nil, "--work-tree="+workDir, "commit", "-m", "target")
wantHash := gitCmd(t, repoPath, nil, "rev-parse", "HEAD")
repo, err := OpenRepository(repoPath)
if err != nil {
t.Fatalf("OpenRepository failed: %v", err)
}
defer func() { _ = repo.Close() }()
wantID, _ := repo.ParseHash(wantHash)
haveID, _ := repo.ParseHash(haveHash)
query := ReachabilityQuery{
Wants: []Hash{wantID},
Haves: []Hash{haveID},
Mode: ReachabilityAllObjects,
StopAtHaves: true,
}
var buf bytes.Buffer
if _, err := repo.packWriteReachable(&buf, query, packWriteOptions{
EnableDeltas: true,
EnableThinPack: true,
MinDeltaSavings: 1,
}); err != nil {
t.Fatalf("packWriteReachable failed: %v", err)
}
thinSeen, err := checkThinPackStream(buf.Bytes(), repo)
if err != nil {
t.Fatalf("thin pack stream invalid: %v", err)
}
if !thinSeen {
t.Fatalf("expected thin pack with ref-delta base outside pack")
}
packDir := filepath.Join(repoPath, "objects", "pack")
if err := os.MkdirAll(packDir, 0o755); err != nil {
t.Fatalf("failed to create pack dir: %v", err)
}
packPath := filepath.Join(packDir, "furgit-thin-test.pack")
idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx"
_ = os.Remove(packPath)
_ = os.Remove(idxPath)
_ = gitCmd(t, repoPath, buf.Bytes(), "index-pack", "--stdin", "--fix-thin", "-o", idxPath, packPath)
_ = gitCmd(t, repoPath, nil, "cat-file", "-p", wantHash)
_ = gitCmd(t, repoPath, nil, "fsck", "--full", "--strict")
_ = os.Remove(packPath)
_ = os.Remove(idxPath)
}
func checkPackStream(path string, algo hashAlgorithm, objectCount int) error {
data, err := os.ReadFile(path)
if err != nil {
return err
}
if len(data) < 12 {
return ErrInvalidObject
}
if binary.BigEndian.Uint32(data[0:4]) != packMagic || binary.BigEndian.Uint32(data[4:8]) != packVersion2 {
return ErrInvalidObject
}
pos := 12
hashSize := algo.Size()
type objEntry struct {
offset uint64
ty ObjectType
body []byte
}
byOffset := make(map[uint64]objEntry, objectCount)
byHash := make(map[string]objEntry, objectCount)
for i := 0; i < objectCount; i++ {
objOffset := uint64(pos)
ty, size, consumed, err := packHeaderParse(data[pos:])
if err != nil {
return fmt.Errorf("obj %d header at %d: %v", i, pos, err)
}
pos += consumed
baseOffset := uint64(0)
baseTy := ObjectTypeInvalid
var baseBody []byte
var baseHash Hash
switch ty {
case ObjectTypeOfsDelta:
dist, distConsumed, err := packDeltaReadOfsDistance(data[pos:])
if err != nil {
return fmt.Errorf("obj %d ofs at %d: %v", i, pos, err)
}
pos += distConsumed
if dist == 0 || dist > objOffset {
return fmt.Errorf("obj %d ofs at %d: invalid dist", i, pos)
}
baseOffset = objOffset - dist
base, ok := byOffset[baseOffset]
if !ok {
return fmt.Errorf("obj %d ofs at %d: missing base", i, pos)
}
baseTy = base.ty
baseBody = base.body
case ObjectTypeRefDelta:
if pos+hashSize > len(data) {
return ErrInvalidObject
}
copy(baseHash.data[:], data[pos:pos+hashSize])
baseHash.algo = algo
baseEntry, ok := byHash[baseHash.String()]
if !ok {
return fmt.Errorf("obj %d ref base not found", i)
}
baseTy = baseEntry.ty
baseBody = baseEntry.body
pos += hashSize
default:
}
payloadBuf, zconsumed, err := zlibx.DecompressSized(data[pos:], size)
if err != nil {
return fmt.Errorf("obj %d zlib at %d: %v", i, pos, err)
}
payload := append([]byte(nil), payloadBuf.Bytes()...)
payloadBuf.Release()
pos += zconsumed
switch ty {
case ObjectTypeOfsDelta:
if baseBody == nil {
return fmt.Errorf("obj %d missing base body", i)
}
pos := 0
baseSize, err := packVarintRead(payload, &pos)
if err != nil {
return fmt.Errorf("obj %d delta base size: %v", i, err)
}
resultSize, err := packVarintRead(payload, &pos)
if err != nil {
return fmt.Errorf("obj %d delta result size: %v", i, err)
}
if baseSize != len(baseBody) {
return fmt.Errorf("obj %d delta base size mismatch: got %d want %d", i, baseSize, len(baseBody))
}
out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload))
if err != nil {
return fmt.Errorf("obj %d delta apply: %v", i, err)
}
body := append([]byte(nil), out.Bytes()...)
out.Release()
if resultSize != len(body) {
return fmt.Errorf("obj %d delta result size mismatch: got %d want %d", i, len(body), resultSize)
}
byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body}
case ObjectTypeRefDelta:
if baseBody == nil {
return fmt.Errorf("obj %d missing ref base body", i)
}
pos := 0
baseSize, err := packVarintRead(payload, &pos)
if err != nil {
return fmt.Errorf("obj %d ref delta base size: %v", i, err)
}
resultSize, err := packVarintRead(payload, &pos)
if err != nil {
return fmt.Errorf("obj %d ref delta result size: %v", i, err)
}
if baseSize != len(baseBody) {
return fmt.Errorf("obj %d ref delta base size mismatch: got %d want %d", i, baseSize, len(baseBody))
}
out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload))
if err != nil {
return fmt.Errorf("obj %d ref delta apply: %v", i, err)
}
body := append([]byte(nil), out.Bytes()...)
out.Release()
if resultSize != len(body) {
return fmt.Errorf("obj %d ref delta result size mismatch: got %d want %d", i, len(body), resultSize)
}
byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body}
default:
if size >= 0 && len(payload) != size {
return fmt.Errorf("obj %d size mismatch: got %d want %d", i, len(payload), size)
}
body := append([]byte(nil), payload...)
byOffset[objOffset] = objEntry{offset: objOffset, ty: ty, body: body}
}
entry := byOffset[objOffset]
if entry.body != nil && entry.ty != ObjectTypeInvalid {
hdr, err := headerForType(entry.ty, entry.body)
if err != nil {
return err
}
raw := append(hdr, entry.body...)
hash := algo.Sum(raw)
byHash[hash.String()] = entry
}
}
return nil
}
func checkThinPackStream(data []byte, repo *Repository) (bool, error) {
if repo == nil {
return false, ErrInvalidObject
}
if len(data) < 12 {
return false, ErrInvalidObject
}
if binary.BigEndian.Uint32(data[0:4]) != packMagic || binary.BigEndian.Uint32(data[4:8]) != packVersion2 {
return false, ErrInvalidObject
}
count := int(binary.BigEndian.Uint32(data[8:12]))
pos := 12
hashSize := repo.hashAlgo.Size()
type objEntry struct {
offset uint64
ty ObjectType
body []byte
}
byOffset := make(map[uint64]objEntry, count)
byHash := make(map[string]objEntry, count)
thinSeen := false
for i := 0; i < count; i++ {
objOffset := uint64(pos)
ty, size, consumed, err := packHeaderParse(data[pos:])
if err != nil {
return thinSeen, fmt.Errorf("obj %d header at %d: %v", i, pos, err)
}
pos += consumed
baseTy := ObjectTypeInvalid
var baseBody []byte
switch ty {
case ObjectTypeOfsDelta:
dist, distConsumed, err := packDeltaReadOfsDistance(data[pos:])
if err != nil {
return thinSeen, fmt.Errorf("obj %d ofs at %d: %v", i, pos, err)
}
pos += distConsumed
if dist == 0 || dist > objOffset {
return thinSeen, fmt.Errorf("obj %d ofs at %d: invalid dist", i, pos)
}
baseOffset := objOffset - dist
base, ok := byOffset[baseOffset]
if !ok {
return thinSeen, fmt.Errorf("obj %d ofs at %d: missing base", i, pos)
}
baseTy = base.ty
baseBody = base.body
case ObjectTypeRefDelta:
if pos+hashSize > len(data) {
return thinSeen, ErrInvalidObject
}
var baseHash Hash
copy(baseHash.data[:], data[pos:pos+hashSize])
baseHash.algo = repo.hashAlgo
baseEntry, ok := byHash[baseHash.String()]
if ok {
baseTy = baseEntry.ty
baseBody = baseEntry.body
} else {
thinSeen = true
ty, body, err := repo.ReadObjectTypeRaw(baseHash)
if err != nil {
return thinSeen, err
}
baseTy = ty
baseBody = body
}
pos += hashSize
default:
}
payloadBuf, zconsumed, err := zlibx.DecompressSized(data[pos:], size)
if err != nil {
return thinSeen, fmt.Errorf("obj %d zlib at %d: %v", i, pos, err)
}
payload := append([]byte(nil), payloadBuf.Bytes()...)
payloadBuf.Release()
pos += zconsumed
switch ty {
case ObjectTypeOfsDelta, ObjectTypeRefDelta:
if baseBody == nil {
return thinSeen, fmt.Errorf("obj %d missing base body", i)
}
pos := 0
baseSize, err := packVarintRead(payload, &pos)
if err != nil {
return thinSeen, fmt.Errorf("obj %d delta base size: %v", i, err)
}
resultSize, err := packVarintRead(payload, &pos)
if err != nil {
return thinSeen, fmt.Errorf("obj %d delta result size: %v", i, err)
}
if baseSize != len(baseBody) {
return thinSeen, fmt.Errorf("obj %d delta base size mismatch: got %d want %d", i, baseSize, len(baseBody))
}
out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload))
if err != nil {
return thinSeen, fmt.Errorf("obj %d delta apply: %v", i, err)
}
body := append([]byte(nil), out.Bytes()...)
out.Release()
if resultSize != len(body) {
return thinSeen, fmt.Errorf("obj %d delta result size mismatch: got %d want %d", i, len(body), resultSize)
}
byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body}
default:
if size >= 0 && len(payload) != size {
return thinSeen, fmt.Errorf("obj %d size mismatch: got %d want %d", i, len(payload), size)
}
body := append([]byte(nil), payload...)
byOffset[objOffset] = objEntry{offset: objOffset, ty: ty, body: body}
}
entry := byOffset[objOffset]
if entry.body != nil && entry.ty != ObjectTypeInvalid {
hdr, err := headerForType(entry.ty, entry.body)
if err != nil {
return thinSeen, err
}
raw := append(hdr, entry.body...)
hash := repo.hashAlgo.Sum(raw)
byHash[hash.String()] = entry
}
}
return thinSeen, nil
}
func removeLooseObject(repoPath, oid string) error {
if len(oid) < 2 {
return ErrInvalidObject
}
path := filepath.Join(repoPath, "objects", oid[:2], oid[2:])
if err := os.Remove(path); err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
return nil
}