shithub: furgit

ref: dd59d664675ad7dfef033311d279c6f988b367d4
dir: /packed_write_test.go/

View raw version
package furgit

import (
	"bytes"
	"crypto/rand"
	"encoding/binary"
	"fmt"
	"os"
	"os/exec"
	"path/filepath"
	"strings"
	"testing"

	"codeberg.org/lindenii/furgit/internal/bufpool"
	"codeberg.org/lindenii/furgit/internal/zlibx"
)

func TestPackHeaderEncodeParseRoundtrip(t *testing.T) {
	cases := []struct {
		ty    ObjectType
		sizes []int
	}{
		{ObjectTypeCommit, []int{0, 1, 15, 16, 127, 128, 1024, 1 << 20}},
		{ObjectTypeTree, []int{0, 3, 31, 32, 255, 256, 4096}},
		{ObjectTypeBlob, []int{0, 7, 63, 64, 511, 512, 99999}},
		{ObjectTypeTag, []int{0, 2, 14, 15, 16, 127, 128}},
	}

	for _, c := range cases {
		for _, size := range c.sizes {
			encoded, err := packHeaderEncode(c.ty, size)
			if err != nil {
				t.Fatalf("packHeaderEncode(%v,%d) error: %v", c.ty, size, err)
			}
			gotTy, gotSize, consumed, err := packHeaderParse(encoded)
			if err != nil {
				t.Fatalf("packHeaderParse error: %v", err)
			}
			if gotTy != c.ty || gotSize != size {
				t.Fatalf("roundtrip mismatch: got (%v,%d), want (%v,%d)", gotTy, gotSize, c.ty, size)
			}
			if consumed != len(encoded) {
				t.Fatalf("consumed=%d, encoded=%d", consumed, len(encoded))
			}
		}
	}
}

func TestPackVarintEncodeRoundtrip(t *testing.T) {
	values := []int{0, 1, 2, 7, 8, 127, 128, 129, 255, 1024, 1 << 20}
	for _, v := range values {
		encoded, err := packVarintEncode(v)
		if err != nil {
			t.Fatalf("packVarintEncode(%d) error: %v", v, err)
		}
		pos := 0
		got, err := packVarintRead(encoded, &pos)
		if err != nil {
			t.Fatalf("packVarintRead error: %v", err)
		}
		if got != v {
			t.Fatalf("roundtrip mismatch: got %d, want %d", got, v)
		}
		if pos != len(encoded) {
			t.Fatalf("pos=%d, encoded=%d", pos, len(encoded))
		}
	}
}

func TestPackOfsEncodeRoundtrip(t *testing.T) {
	values := []uint64{1, 2, 7, 8, 9, 0x7f, 0x80, 0x81, 0x1000, 0x12345}
	for _, v := range values {
		encoded, err := packOfsEncode(v)
		if err != nil {
			t.Fatalf("packOfsEncode(%d) error: %v", v, err)
		}
		dist, consumed, err := packDeltaReadOfsDistance(encoded)
		if err != nil {
			t.Fatalf("packDeltaReadOfsDistance error: %v", err)
		}
		if dist != v {
			t.Fatalf("roundtrip mismatch: got %d, want %d", dist, v)
		}
		if consumed != len(encoded) {
			t.Fatalf("consumed=%d, encoded=%d", consumed, len(encoded))
		}
	}
}

func TestPackWriteNoDeltas(t *testing.T) {
	repoPath, cleanup := setupTestRepo(t)
	defer cleanup()

	workDir, cleanupWork := setupWorkDir(t)
	defer cleanupWork()

	const (
		fileCount = 1000
		fileSize  = 1024
	)
	buf := make([]byte, fileSize)
	for i := 0; i < fileCount; i++ {
		if _, err := rand.Read(buf); err != nil {
			t.Fatalf("rand.Read failed: %v", err)
		}
		name := filepath.Join(workDir, fmt.Sprintf("file%04d.bin", i))
		if err := os.WriteFile(name, buf, 0o644); err != nil {
			t.Fatalf("failed to write %s: %v", name, err)
		}
	}

	gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
	gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Test commit")
	commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD")

	commitBody := gitCatFile(t, repoPath, "commit", commitHash)
	lines := bytes.Split(commitBody, []byte{'\n'})
	if len(lines) == 0 || !bytes.HasPrefix(lines[0], []byte("tree ")) {
		t.Fatalf("commit missing tree header")
	}
	treeHash := strings.TrimSpace(string(bytes.TrimPrefix(lines[0], []byte("tree "))))

	lsTree := gitCmd(t, repoPath, "ls-tree", "-r", treeHash)
	var blobHashes []string
	for _, line := range strings.Split(lsTree, "\n") {
		if line == "" {
			continue
		}
		fields := strings.Fields(line)
		if len(fields) < 3 {
			t.Fatalf("unexpected ls-tree line: %q", line)
		}
		blobHashes = append(blobHashes, fields[2])
	}

	repo, err := OpenRepository(repoPath)
	if err != nil {
		t.Fatalf("OpenRepository failed: %v", err)
	}
	defer func() { _ = repo.Close() }()

	var objects []Hash
	commitID, _ := repo.ParseHash(commitHash)
	objects = append(objects, commitID)
	treeID, _ := repo.ParseHash(treeHash)
	objects = append(objects, treeID)
	for _, bh := range blobHashes {
		id, _ := repo.ParseHash(bh)
		objects = append(objects, id)
	}
	expectedOids := append([]string{commitHash, treeHash}, blobHashes...)

	packDir := filepath.Join(repoPath, "objects", "pack")
	if err := os.MkdirAll(packDir, 0o755); err != nil {
		t.Fatalf("failed to create pack dir: %v", err)
	}
	pf, err := os.CreateTemp(packDir, "furgit-test-*.pack")
	if err != nil {
		t.Fatalf("failed to create pack file: %v", err)
	}
	packPath := pf.Name()
	idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx"
	if _, err := repo.packWrite(pf, objects, packWriteOptions{}); err != nil {
		_ = pf.Close()
		t.Fatalf("packWrite failed: %v", err)
	}
	if err := pf.Close(); err != nil {
		t.Fatalf("failed to close pack file: %v", err)
	}

	defer func() {
		_ = os.Remove(packPath)
		_ = os.Remove(idxPath)
	}()

	if err := checkPackStream(packPath, repo.hashAlgo, len(objects)); err != nil {
		t.Fatalf("pack stream invalid: %v", err)
	}

	cmd := exec.Command("git", "index-pack", "-o", idxPath, packPath)
	cmd.Dir = repoPath
	cmd.Env = append(os.Environ(),
		"GIT_CONFIG_GLOBAL=/dev/null",
		"GIT_CONFIG_SYSTEM=/dev/null",
	)
	output, err := cmd.CombinedOutput()
	if err != nil {
		t.Fatalf("git index-pack failed: %v\n%s", err, output)
	}

	verifyOut := gitCmd(t, repoPath, "verify-pack", "-v", idxPath)
	seen := make(map[string]struct{})
	for _, line := range strings.Split(verifyOut, "\n") {
		if strings.TrimSpace(line) == "" {
			continue
		}
		if strings.HasPrefix(line, "chain length") || strings.HasPrefix(line, "non delta") {
			continue
		}
		parts := strings.Fields(line)
		if len(parts) == 0 {
			continue
		}
		seen[parts[0]] = struct{}{}
	}
	for _, oid := range expectedOids {
		if _, ok := seen[oid]; !ok {
			t.Fatalf("verify-pack missing object %s", oid)
		}
	}

	for _, oid := range expectedOids {
		if err := removeLooseObject(repoPath, oid); err != nil {
			t.Fatalf("remove loose object %s: %v", oid, err)
		}
	}
	for _, oid := range expectedOids {
		_ = gitCmd(t, repoPath, "cat-file", "-p", oid)
	}

	_ = gitCmd(t, repoPath, "fsck", "--full", "--strict")
}

func TestPackWriteDeltas(t *testing.T) {
	repoPath, cleanup := setupTestRepo(t)
	defer cleanup()

	workDir, cleanupWork := setupWorkDir(t)
	defer cleanupWork()

	const (
		fileCount = 200
		fileSize  = 2048
	)
	base := bytes.Repeat([]byte("delta-base-"), fileSize/10)
	for i := 0; i < fileCount; i++ {
		buf := make([]byte, len(base))
		copy(buf, base)
		buf[i%len(buf)] ^= byte(i)
		name := filepath.Join(workDir, fmt.Sprintf("delta%04d.txt", i))
		if err := os.WriteFile(name, buf, 0o644); err != nil {
			t.Fatalf("failed to write %s: %v", name, err)
		}
	}

	gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
	gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Delta commit")
	commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD")

	commitBody := gitCatFile(t, repoPath, "commit", commitHash)
	lines := bytes.Split(commitBody, []byte{'\n'})
	if len(lines) == 0 || !bytes.HasPrefix(lines[0], []byte("tree ")) {
		t.Fatalf("commit missing tree header")
	}
	treeHash := strings.TrimSpace(string(bytes.TrimPrefix(lines[0], []byte("tree "))))

	lsTree := gitCmd(t, repoPath, "ls-tree", "-r", treeHash)
	var blobHashes []string
	for _, line := range strings.Split(lsTree, "\n") {
		if line == "" {
			continue
		}
		fields := strings.Fields(line)
		if len(fields) < 3 {
			t.Fatalf("unexpected ls-tree line: %q", line)
		}
		blobHashes = append(blobHashes, fields[2])
	}

	repo, err := OpenRepository(repoPath)
	if err != nil {
		t.Fatalf("OpenRepository failed: %v", err)
	}
	defer func() { _ = repo.Close() }()

	var objects []Hash
	commitID, _ := repo.ParseHash(commitHash)
	objects = append(objects, commitID)
	treeID, _ := repo.ParseHash(treeHash)
	objects = append(objects, treeID)
	for _, bh := range blobHashes {
		id, _ := repo.ParseHash(bh)
		objects = append(objects, id)
	}
	expectedOids := append([]string{commitHash, treeHash}, blobHashes...)

	packDir := filepath.Join(repoPath, "objects", "pack")
	if err := os.MkdirAll(packDir, 0o755); err != nil {
		t.Fatalf("failed to create pack dir: %v", err)
	}
	pf, err := os.CreateTemp(packDir, "furgit-delta-test-*.pack")
	if err != nil {
		t.Fatalf("failed to create pack file: %v", err)
	}
	packPath := pf.Name()
	idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx"
	if _, err := repo.packWrite(pf, objects, packWriteOptions{
		EnableDeltas:    true,
		MinDeltaSavings: 1,
	}); err != nil {
		_ = pf.Close()
		t.Fatalf("packWrite failed: %v", err)
	}
	if err := pf.Close(); err != nil {
		t.Fatalf("failed to close pack file: %v", err)
	}

	defer func() {
		_ = os.Remove(packPath)
		_ = os.Remove(idxPath)
	}()

	if err := checkPackStream(packPath, repo.hashAlgo, len(objects)); err != nil {
		t.Fatalf("pack stream invalid: %v", err)
	}

	cmd := exec.Command("git", "index-pack", "-o", idxPath, packPath)
	cmd.Dir = repoPath
	cmd.Env = append(os.Environ(),
		"GIT_CONFIG_GLOBAL=/dev/null",
		"GIT_CONFIG_SYSTEM=/dev/null",
	)
	output, err := cmd.CombinedOutput()
	if err != nil {
		t.Fatalf("git index-pack failed: %v\n%s", err, output)
	}

	verifyOut := gitCmd(t, repoPath, "verify-pack", "-v", idxPath)
	seen := make(map[string]struct{})
	for _, line := range strings.Split(verifyOut, "\n") {
		if strings.TrimSpace(line) == "" {
			continue
		}
		if strings.HasPrefix(line, "chain length") || strings.HasPrefix(line, "non delta") {
			continue
		}
		parts := strings.Fields(line)
		if len(parts) == 0 {
			continue
		}
		seen[parts[0]] = struct{}{}
	}
	for _, oid := range expectedOids {
		if _, ok := seen[oid]; !ok {
			t.Fatalf("verify-pack missing object %s", oid)
		}
	}

	for _, oid := range expectedOids {
		if err := removeLooseObject(repoPath, oid); err != nil {
			t.Fatalf("remove loose object %s: %v", oid, err)
		}
	}
	for _, oid := range expectedOids {
		_ = gitCmd(t, repoPath, "cat-file", "-p", oid)
	}

	_ = gitCmd(t, repoPath, "fsck", "--full", "--strict")
}

func checkPackStream(path string, algo hashAlgorithm, objectCount int) error {
	data, err := os.ReadFile(path)
	if err != nil {
		return err
	}
	if len(data) < 12 {
		return ErrInvalidObject
	}
	if binary.BigEndian.Uint32(data[0:4]) != packMagic || binary.BigEndian.Uint32(data[4:8]) != packVersion2 {
		return ErrInvalidObject
	}
	pos := 12
	hashSize := algo.Size()
	type objEntry struct {
		offset uint64
		ty     ObjectType
		body   []byte
	}
	byOffset := make(map[uint64]objEntry, objectCount)
	byHash := make(map[string]objEntry, objectCount)
	for i := 0; i < objectCount; i++ {
		objOffset := uint64(pos)
		ty, size, consumed, err := packHeaderParse(data[pos:])
		if err != nil {
			return fmt.Errorf("obj %d header at %d: %v", i, pos, err)
		}
		pos += consumed
		baseOffset := uint64(0)
		baseTy := ObjectTypeInvalid
		var baseBody []byte
		var baseHash Hash
		switch ty {
		case ObjectTypeOfsDelta:
			dist, distConsumed, err := packDeltaReadOfsDistance(data[pos:])
			if err != nil {
				return fmt.Errorf("obj %d ofs at %d: %v", i, pos, err)
			}
			pos += distConsumed
			if dist == 0 || dist > objOffset {
				return fmt.Errorf("obj %d ofs at %d: invalid dist", i, pos)
			}
			baseOffset = objOffset - dist
			base, ok := byOffset[baseOffset]
			if !ok {
				return fmt.Errorf("obj %d ofs at %d: missing base", i, pos)
			}
			baseTy = base.ty
			baseBody = base.body
		case ObjectTypeRefDelta:
			if pos+hashSize > len(data) {
				return ErrInvalidObject
			}
			copy(baseHash.data[:], data[pos:pos+hashSize])
			baseHash.algo = algo
			baseEntry, ok := byHash[baseHash.String()]
			if !ok {
				return fmt.Errorf("obj %d ref base not found", i)
			}
			baseTy = baseEntry.ty
			baseBody = baseEntry.body
			pos += hashSize
		default:
		}

		payloadBuf, zconsumed, err := zlibx.DecompressSized(data[pos:], size)
		if err != nil {
			return fmt.Errorf("obj %d zlib at %d: %v", i, pos, err)
		}
		payload := append([]byte(nil), payloadBuf.Bytes()...)
		payloadBuf.Release()
		pos += zconsumed
		switch ty {
		case ObjectTypeOfsDelta:
			if baseBody == nil {
				return fmt.Errorf("obj %d missing base body", i)
			}
			pos := 0
			baseSize, err := packVarintRead(payload, &pos)
			if err != nil {
				return fmt.Errorf("obj %d delta base size: %v", i, err)
			}
			resultSize, err := packVarintRead(payload, &pos)
			if err != nil {
				return fmt.Errorf("obj %d delta result size: %v", i, err)
			}
			if baseSize != len(baseBody) {
				return fmt.Errorf("obj %d delta base size mismatch: got %d want %d", i, baseSize, len(baseBody))
			}
			out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload))
			if err != nil {
				return fmt.Errorf("obj %d delta apply: %v", i, err)
			}
			body := append([]byte(nil), out.Bytes()...)
			out.Release()
			if resultSize != len(body) {
				return fmt.Errorf("obj %d delta result size mismatch: got %d want %d", i, len(body), resultSize)
			}
			byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body}
		case ObjectTypeRefDelta:
			if baseBody == nil {
				return fmt.Errorf("obj %d missing ref base body", i)
			}
			pos := 0
			baseSize, err := packVarintRead(payload, &pos)
			if err != nil {
				return fmt.Errorf("obj %d ref delta base size: %v", i, err)
			}
			resultSize, err := packVarintRead(payload, &pos)
			if err != nil {
				return fmt.Errorf("obj %d ref delta result size: %v", i, err)
			}
			if baseSize != len(baseBody) {
				return fmt.Errorf("obj %d ref delta base size mismatch: got %d want %d", i, baseSize, len(baseBody))
			}
			out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload))
			if err != nil {
				return fmt.Errorf("obj %d ref delta apply: %v", i, err)
			}
			body := append([]byte(nil), out.Bytes()...)
			out.Release()
			if resultSize != len(body) {
				return fmt.Errorf("obj %d ref delta result size mismatch: got %d want %d", i, len(body), resultSize)
			}
			byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body}
		default:
			if size >= 0 && len(payload) != size {
				return fmt.Errorf("obj %d size mismatch: got %d want %d", i, len(payload), size)
			}
			body := append([]byte(nil), payload...)
			byOffset[objOffset] = objEntry{offset: objOffset, ty: ty, body: body}
		}

		entry := byOffset[objOffset]
		if entry.body != nil && entry.ty != ObjectTypeInvalid {
			hdr, err := headerForType(entry.ty, entry.body)
			if err != nil {
				return err
			}
			raw := append(hdr, entry.body...)
			hash := algo.Sum(raw)
			byHash[hash.String()] = entry
		}
	}
	return nil
}

func removeLooseObject(repoPath, oid string) error {
	if len(oid) < 2 {
		return ErrInvalidObject
	}
	path := filepath.Join(repoPath, "objects", oid[:2], oid[2:])
	if err := os.Remove(path); err != nil {
		if os.IsNotExist(err) {
			return nil
		}
		return err
	}
	return nil
}