ref: 17c9aee0e781026353ead4ac749a3ae89c83d007
parent: 75abe70f32430f0b1cfa4eb9159158564000c636
author: Runxi Yu <me@runxiyu.org>
date: Thu Jan 29 12:41:23 EST 2026
packed: Write packs with deltas
--- /dev/null
+++ b/delta_read_apply.go
@@ -1,0 +1,159 @@
+package furgit
+
+import (
+ "io"
+
+ "codeberg.org/lindenii/furgit/internal/bufpool"
+)
+
+func packDeltaReadOfsDistance(data []byte) (uint64, int, error) {+ if len(data) == 0 {+ return 0, 0, io.ErrUnexpectedEOF
+ }
+ b := data[0]
+ dist := uint64(b & 0x7f)
+ consumed := 1
+ for (b & 0x80) != 0 {+ if consumed >= len(data) {+ return 0, 0, io.ErrUnexpectedEOF
+ }
+ b = data[consumed]
+ consumed++
+ dist += 1
+ dist <<= 7
+ dist |= uint64(b & 0x7f)
+ }
+ return dist, consumed, nil
+}
+
+func packDeltaApply(base, delta bufpool.Buffer) (bufpool.Buffer, error) {+ pos := 0
+ baseBytes := base.Bytes()
+ deltaBytes := delta.Bytes()
+ srcSize, err := packVarintRead(deltaBytes, &pos)
+ if err != nil {+ return bufpool.Buffer{}, err+ }
+ dstSize, err := packVarintRead(deltaBytes, &pos)
+ if err != nil {+ return bufpool.Buffer{}, err+ }
+ if srcSize != len(baseBytes) {+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ out := bufpool.Borrow(dstSize)
+ out.Resize(dstSize)
+ outBytes := out.Bytes()
+ outPos := 0
+
+ for pos < len(deltaBytes) {+ op := deltaBytes[pos]
+ pos++
+ switch {+ case op&0x80 != 0:
+ off := 0
+ n := 0
+ if op&0x01 != 0 {+ if pos >= len(deltaBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ off |= int(deltaBytes[pos])
+ pos++
+ }
+ if op&0x02 != 0 {+ if pos >= len(deltaBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ off |= int(deltaBytes[pos]) << 8
+ pos++
+ }
+ if op&0x04 != 0 {+ if pos >= len(deltaBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ off |= int(deltaBytes[pos]) << 16
+ pos++
+ }
+ if op&0x08 != 0 {+ if pos >= len(deltaBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ off |= int(deltaBytes[pos]) << 24
+ pos++
+ }
+ if op&0x10 != 0 {+ if pos >= len(deltaBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ n |= int(deltaBytes[pos])
+ pos++
+ }
+ if op&0x20 != 0 {+ if pos >= len(deltaBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ n |= int(deltaBytes[pos]) << 8
+ pos++
+ }
+ if op&0x40 != 0 {+ if pos >= len(deltaBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ n |= int(deltaBytes[pos]) << 16
+ pos++
+ }
+ if n == 0 {+ n = 0x10000
+ }
+ if off+n > len(baseBytes) || outPos+n > len(outBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ copy(outBytes[outPos:], baseBytes[off:off+n])
+ outPos += n
+ case op != 0:
+ n := int(op)
+ if pos+n > len(deltaBytes) || outPos+n > len(outBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ copy(outBytes[outPos:], deltaBytes[pos:pos+n])
+ pos += n
+ outPos += n
+ default:
+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ }
+
+ if outPos != len(outBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ return out, nil
+}
+
+func packVarintRead(buf []byte, pos *int) (int, error) {+ res := 0
+ shift := 0
+ for {+ if *pos >= len(buf) {+ return 0, ErrInvalidObject
+ }
+ b := buf[*pos]
+ *pos++
+ res |= int(b&0x7f) << shift
+ if (b & 0x80) == 0 {+ break
+ }
+ shift += 7
+ }
+ return res, nil
+}
--- /dev/null
+++ b/delta_test.go
@@ -1,0 +1,55 @@
+package furgit
+
+import (
+ "bytes"
+ "testing"
+
+ "codeberg.org/lindenii/furgit/internal/bufpool"
+)
+
+func TestDeltaEncodeApplyRoundtrip(t *testing.T) {+ base := []byte("Hello, world!")+ target := []byte("Hello, brave new world!")+
+ instr := []deltaInstruction{+ {copy: true, offset: 0, length: 7},+ {copy: false, data: []byte("brave new "), length: len("brave new ")},+ {copy: true, offset: 7, length: 6},+ }
+ delta, err := deltaEncode(len(base), len(target), instr)
+ if err != nil {+ t.Fatalf("deltaEncode error: %v", err)+ }
+
+ out, err := packDeltaApply(bufpool.FromOwned(base), bufpool.FromOwned(delta))
+ if err != nil {+ t.Fatalf("packDeltaApply error: %v", err)+ }
+ defer out.Release()
+ if !bytes.Equal(out.Bytes(), target) {+ t.Fatalf("delta apply mismatch: got %q, want %q", out.Bytes(), target)+ }
+}
+
+func TestDeltifyRoundtrip(t *testing.T) {+ base := []byte("The quick brown fox jumps over the lazy dog.")+ target := []byte("The quick red fox jumps over the very lazy dog.")+
+ instr, err := deltify(base, target, 0)
+ if err != nil {+ t.Fatalf("deltify error: %v", err)+ }
+ delta, err := deltaEncode(len(base), len(target), instr)
+ if err != nil {+ t.Fatalf("deltaEncode error: %v", err)+ }
+
+ out, err := packDeltaApply(bufpool.FromOwned(base), bufpool.FromOwned(delta))
+ if err != nil {+ t.Fatalf("packDeltaApply error: %v", err)+ }
+ defer out.Release()
+ if !bytes.Equal(out.Bytes(), target) {+ t.Fatalf("delta apply mismatch: got %q, want %q", out.Bytes(), target)+ }
+}
--- /dev/null
+++ b/delta_write_deltify.go
@@ -1,0 +1,306 @@
+package furgit
+
+import (
+ "bytes"
+
+ "codeberg.org/lindenii/furgit/internal/murmurhash2"
+)
+
+const (
+ deltaMinChunk = 32
+ deltaMaxChunk = 8192
+ deltaSplitMask = (1 << 8) - 1
+ deltaStretchMax = (1 << 24) - 1
+)
+
+var deltaGearTab = [...]uint32{+ 0x67ed26b7, 0x32da500c, 0x53d0fee0, 0xce387dc7, 0xcd406d90, 0x2e83a4d4,
+ 0x9fc9a38d, 0xb67259dc, 0xca6b1722, 0x6d2ea08c, 0x235cea2e, 0x3149bb5f,
+ 0x1beda787, 0x2a6b77d5, 0x2f22d9ac, 0x91fc0544, 0xe413acfa, 0x5a30ff7a,
+ 0xad6fdde0, 0x444fd0f5, 0x7ad87864, 0x58c5ff05, 0x8d2ec336, 0x2371f853,
+ 0x550f8572, 0x6aa448dd, 0x7c9ddbcf, 0x95221e14, 0x2a82ec33, 0xcbec5a78,
+ 0xc6795a0d, 0x243995b7, 0x1c909a2f, 0x4fded51c, 0x635d334b, 0x0e2b9999,
+ 0x2702968d, 0x856de1d5, 0x3325d60e, 0xeb6a7502, 0xec2a9844, 0x0905835a,
+ 0xa1820375, 0xa4be5cab, 0x96a6c058, 0x2c2ccd70, 0xba40fce3, 0xd794c46b,
+ 0x8fbae83e, 0xc3aa7899, 0x3d3ff8ed, 0xa0d42b5b, 0x571c0c97, 0xd2811516,
+ 0xf7e7b96c, 0x4fd2fcbd, 0xe2fdec94, 0x282cc436, 0x78e8e95c, 0x80a3b613,
+ 0xcfbee20c, 0xd4a32d1c, 0x2a12ff13, 0x6af82936, 0xe5630258, 0x8efa6a98,
+ 0x294fb2d1, 0xdeb57086, 0x5f0fddb3, 0xeceda7ce, 0x4c87305f, 0x3a6d3307,
+ 0xe22d2942, 0x9d060217, 0x1e42ed02, 0xb6f63b52, 0x4367f39f, 0x055cf262,
+ 0x03a461b2, 0x5ef9e382, 0x386bc03a, 0x2a1e79c7, 0xf1a0058b, 0xd4d2dea9,
+ 0x56baf37d, 0x5daff6cc, 0xf03a951d, 0xaef7de45, 0xa8f4581e, 0x3960b555,
+ 0xffbfff6d, 0xbe702a23, 0x8f5b6d6f, 0x061739fb, 0x98696f47, 0x3fd596d4,
+ 0x151eac6b, 0xa9fcc4f5, 0x69181a12, 0x3ac5a107, 0xb5198fe7, 0x96bcb1da,
+ 0x1b5ddf8e, 0xc757d650, 0x65865c3a, 0x8fc0a41a, 0x87435536, 0x99eda6f2,
+ 0x41874794, 0x29cff4e8, 0xb70efd9a, 0x3103f6e7, 0x84d2453b, 0x15a450bd,
+ 0x74f49af1, 0x60f664b1, 0xa1c86935, 0xfdafbce1, 0xe36353e3, 0x5d9ba739,
+ 0xbc0559ba, 0x708b0054, 0xd41d808c, 0xb2f31723, 0x9027c41f, 0xf136d165,
+ 0xb5374b12, 0x9420a6ac, 0x273958b6, 0xe6c2fad0, 0xebdc1f21, 0xfb33af8b,
+ 0xc71c25cd, 0xe9a2d8e5, 0xbeb38a50, 0xbceb7cc2, 0x4e4e73f0, 0xcd6c251d,
+ 0xde4c032c, 0x4b04ac30, 0x725b8b21, 0x4eb8c33b, 0x20d07b75, 0x0567aa63,
+ 0xb56b2bb7, 0xc1f5fd3a, 0xcafd35ca, 0x470dd4da, 0xfe4f94cd, 0xfb8de424,
+ 0xe8dbcf40, 0xfe50a37a, 0x62db5b5d, 0xf32f4ab6, 0x2c4a8a51, 0x18473dc0,
+ 0xfe0cbb6e, 0xfe399efd, 0xdf34ecc9, 0x6ccd5055, 0x46097073, 0x139135c2,
+ 0x721c76f6, 0x1c6a94b4, 0x6eee014d, 0x8a508e02, 0x3da538f5, 0x280d394f,
+ 0x5248a0c4, 0x3ce94c6c, 0x9a71ad3a, 0x8493dd05, 0xe43f0ab6, 0x18e4ed42,
+ 0x6c5c0e09, 0x42b06ec9, 0x8d330343, 0xa45b6f59, 0x2a573c0c, 0xd7fd3de6,
+ 0xeedeab68, 0x5c84dafc, 0xbbd1b1a8, 0xa3ce1ad1, 0x85b70bed, 0xb6add07f,
+ 0xa531309c, 0x8f8ab852, 0x564de332, 0xeac9ed0c, 0x73da402c, 0x3ec52761,
+ 0x43af2f4d, 0xd6ff45c8, 0x4c367462, 0xd553bd6a, 0x44724855, 0x3b2aa728,
+ 0x56e5eb65, 0xeaf16173, 0x33fa42ff, 0xd714bb5d, 0xfbd0a3b9, 0xaf517134,
+ 0x9416c8cd, 0x534cf94f, 0x548947c2, 0x34193569, 0x32f4389a, 0xfe7028bc,
+ 0xed73b1ed, 0x9db95770, 0x468e3922, 0x0440c3cd, 0x60059a62, 0x33504562,
+ 0x2b229fbd, 0x5174dca5, 0xf7028752, 0xd63c6aa8, 0x31276f38, 0x0646721c,
+ 0xb0191da8, 0xe00e6de0, 0x9eac1a6e, 0x9f7628a5, 0xed6c06ea, 0x0bb8af15,
+ 0xf119fb12, 0x38693c1c, 0x732bc0fe, 0x84953275, 0xb82ec888, 0x33a4f1b3,
+ 0x3099835e, 0x028a8782, 0x5fdd51d7, 0xc6c717b3, 0xb06caf71, 0x17c8c111,
+ 0x61bad754, 0x9fd03061, 0xe09df1af, 0x3bc9eb73, 0x85878413, 0x9889aaf2,
+ 0x3f5a9e46, 0x42c9f01f, 0x9984a4f4, 0xd5de43cc, 0xd294daed, 0xbecba2d2,
+ 0xf1f6e72c, 0x5551128a, 0x83af87e2, 0x6f0342ba,
+}
+
+type deltaBlock struct {+ length int
+ offset int
+ hash uint32
+}
+
+type deltaTable struct {+ blocks []deltaBlock
+ nblocks int
+ nalloc int
+
+ offs []uint32
+ len int
+ size int
+}
+
+type deltaInstruction struct {+ copy bool
+ offset int
+ length int
+ data []byte
+}
+
+func deltify(base, target []byte, seed uint32) ([]deltaInstruction, error) {+ if base == nil {+ base = []byte{}+ }
+ if target == nil {+ target = []byte{}+ }
+ dt, err := deltaTableInitMem(base, 0, len(base), seed)
+ if err != nil {+ return nil, err
+ }
+ return deltifyMemMem(target, 0, len(target), seed, dt, base, 0, len(base))
+}
+
+func deltifyMemMem(target []byte, fileoffset, filesize int, seed uint32, dt *deltaTable, base []byte, baseOffset0, baseSize int) ([]deltaInstruction, error) {+ const allocChunkSize = 64
+ instr := make([]deltaInstruction, 0, allocChunkSize)
+ offset0 := fileoffset
+
+ for fileoffset < filesize {+ blocklen := deltaNextBlockLen(target, fileoffset, filesize)
+ if blocklen == 0 {+ if fileoffset < filesize {+ data := target[fileoffset:filesize]
+ instr = append(instr, deltaInstruction{+ copy: false,
+ offset: fileoffset - offset0,
+ length: len(data),
+ data: data,
+ })
+ }
+ break
+ }
+
+ block := dt.lookupBlock(target[fileoffset:fileoffset+blocklen], blocklen, seed, base, baseOffset0)
+ if block != nil {+ if err := deltaStretchMemMem(base, baseOffset0, baseSize, block, target, fileoffset+blocklen, filesize, &blocklen); err != nil {+ return nil, err
+ }
+ instr = append(instr, deltaInstruction{+ copy: true,
+ offset: block.offset,
+ length: blocklen,
+ })
+ } else {+ data := target[fileoffset : fileoffset+blocklen]
+ instr = append(instr, deltaInstruction{+ copy: false,
+ offset: fileoffset - offset0,
+ length: len(data),
+ data: data,
+ })
+ }
+ fileoffset += blocklen
+ }
+
+ return instr, nil
+}
+
+func deltaTableInitMem(data []byte, fileoffset, filesize int, seed uint32) (*deltaTable, error) {+ dt := &deltaTable{+ nalloc: 128,
+ size: 128,
+ }
+ dt.blocks = make([]deltaBlock, dt.nalloc)
+ dt.offs = make([]uint32, dt.size)
+
+ offset0 := fileoffset
+ for fileoffset < filesize {+ blocklen := deltaNextBlockLen(data, fileoffset, filesize)
+ if blocklen == 0 {+ break
+ }
+ h := deltaHashBlock(data[fileoffset:fileoffset+blocklen], seed)
+ if err := dt.addBlock(data, offset0, blocklen, fileoffset-offset0, h); err != nil {+ return nil, err
+ }
+ fileoffset += blocklen
+ }
+ return dt, nil
+}
+
+func deltaHashBlock(p []byte, seed uint32) uint32 {+ return murmurhash2.Sum32(p, seed)
+}
+
+func deltaNextBlockLen(data []byte, fileoffset, filesize int) int {+ if fileoffset >= filesize || filesize-fileoffset < deltaMinChunk {+ return 0
+ }
+ p := fileoffset + deltaMinChunk
+ end := fileoffset + deltaMaxChunk
+ if end > filesize {+ end = filesize
+ }
+ var gh uint32
+ for p < end {+ gh = (gh << 1) + deltaGearTab[data[p]]
+ p++
+ if (gh & deltaSplitMask) == 0 {+ break
+ }
+ }
+ return p - fileoffset
+}
+
+func (dt *deltaTable) addBlock(data []byte, offset0, length, offset int, h uint32) error {+ if length == 0 {+ return nil
+ }
+ if err := dt.resize(); err != nil {+ return err
+ }
+ idx, found := dt.lookupSlot(data, offset0, offset, length, h)
+ if found {+ return nil
+ }
+ dt.offs[idx] = uint32(dt.nblocks + 1)
+ dt.blocks[dt.nblocks] = deltaBlock{+ length: length,
+ offset: offset,
+ hash: h,
+ }
+ dt.nblocks++
+ dt.len++
+ return nil
+}
+
+func (dt *deltaTable) lookupBlock(chunk []byte, length int, seed uint32, base []byte, baseOffset0 int) *deltaBlock {+ if dt == nil || dt.size == 0 {+ return nil
+ }
+ h := deltaHashBlock(chunk, seed)
+ for i := int(h % uint32(dt.size)); dt.offs[i] != 0; i = (i + 1) % dt.size {+ block := &dt.blocks[dt.offs[i]-1]
+ if block.hash != h || block.length != length {+ continue
+ }
+ baseOff := baseOffset0 + block.offset
+ if baseOff < 0 || baseOff+length > len(base) {+ continue
+ }
+ if bytes.Equal(base[baseOff:baseOff+length], chunk) {+ return block
+ }
+ }
+ return nil
+}
+
+func (dt *deltaTable) lookupSlot(data []byte, offset0, offset, length int, h uint32) (int, bool) {+ if dt == nil || dt.size == 0 {+ return 0, false
+ }
+ for i := int(h % uint32(dt.size)); ; i = (i + 1) % dt.size {+ if dt.offs[i] == 0 {+ return i, false
+ }
+ block := &dt.blocks[dt.offs[i]-1]
+ if block.hash != h || block.length != length {+ continue
+ }
+ left := offset0 + block.offset
+ right := offset0 + offset
+ if left < 0 || right < 0 || left+length > len(data) || right+length > len(data) {+ continue
+ }
+ if bytes.Equal(data[left:left+length], data[right:right+length]) {+ return i, true
+ }
+ }
+}
+
+func (dt *deltaTable) resize() error {+ if dt.nblocks == dt.nalloc {+ newAlloc := dt.nalloc + 256
+ newBlocks := make([]deltaBlock, newAlloc)
+ copy(newBlocks, dt.blocks[:dt.nblocks])
+ dt.blocks = newBlocks
+ dt.nalloc = newAlloc
+ }
+
+ if dt.offs == nil || (dt.len*100)/dt.size > 70 {+ oldSize := dt.size
+ dt.size *= 2
+ if dt.size == 0 {+ dt.size = oldSize
+ return ErrInvalidObject
+ }
+ dt.offs = make([]uint32, dt.size)
+ for i := 0; i < dt.nblocks; i++ {+ block := &dt.blocks[i]
+ idx := int(block.hash % uint32(dt.size))
+ for dt.offs[idx] != 0 {+ idx = (idx + 1) % dt.size
+ }
+ dt.offs[idx] = uint32(i + 1)
+ }
+ }
+
+ return nil
+}
+
+func deltaStretchMemMem(base []byte, baseOffset0, baseSize int, block *deltaBlock, data []byte, fileoffset, filesize int, blocklen *int) error {+ baseOffset := baseOffset0 + block.offset + *blocklen
+ if baseOffset > baseSize {+ return ErrInvalidObject
+ }
+ if fileoffset > filesize {+ return ErrInvalidObject
+ }
+ maxlen := baseSize - baseOffset
+ if avail := filesize - fileoffset; avail < maxlen {+ maxlen = avail
+ }
+ for i := 0; i < maxlen && *blocklen < deltaStretchMax-1; i++ {+ if data[fileoffset+i] != base[baseOffset+i] {+ break
+ }
+ *blocklen++
+ }
+ return nil
+}
--- /dev/null
+++ b/delta_write_encode.go
@@ -1,0 +1,108 @@
+package furgit
+
+const deltaCopyDefaultLen = 0x10000
+
+func deltaEncode(baseSize, resultSize int, instr []deltaInstruction) ([]byte, error) {+ if baseSize < 0 || resultSize < 0 {+ return nil, ErrInvalidObject
+ }
+ out := make([]byte, 0, 64)
+ baseHdr, err := packVarintEncode(baseSize)
+ if err != nil {+ return nil, err
+ }
+ out = append(out, baseHdr...)
+ resultHdr, err := packVarintEncode(resultSize)
+ if err != nil {+ return nil, err
+ }
+ out = append(out, resultHdr...)
+
+ for _, ins := range instr {+ if ins.copy {+ if ins.offset < 0 || ins.length <= 0 {+ return nil, ErrInvalidObject
+ }
+ if ins.offset > 0xffffffff {+ return nil, ErrInvalidObject
+ }
+ if ins.length > 0xffffff {+ return nil, ErrInvalidObject
+ }
+ op := byte(0x80)
+ var tmp [7]byte
+ pos := 0
+
+ off := ins.offset
+ for i := 0; i < 4; i++ {+ op |= 1 << i
+ tmp[pos] = byte(off & 0xff)
+ pos++
+ off >>= 8
+ if off == 0 {+ break
+ }
+ }
+ if off != 0 {+ return nil, ErrInvalidObject
+ }
+
+ n := ins.length
+ if n != deltaCopyDefaultLen {+ for i := 0; i < 3 && n > 0; i++ {+ op |= 1 << (i + 4)
+ tmp[pos] = byte(n & 0xff)
+ pos++
+ n >>= 8
+ }
+ if n > 0 {+ return nil, ErrInvalidObject
+ }
+ }
+
+ out = append(out, op)
+ out = append(out, tmp[:pos]...)
+ continue
+ }
+
+ data := ins.data
+ if ins.length > 0 {+ if len(data) < ins.length {+ return nil, ErrInvalidObject
+ }
+ data = data[:ins.length]
+ }
+ if len(data) == 0 {+ return nil, ErrInvalidObject
+ }
+ for len(data) > 0 {+ n := len(data)
+ if n > 127 {+ n = 127
+ }
+ out = append(out, byte(n))
+ out = append(out, data[:n]...)
+ data = data[n:]
+ }
+ }
+
+ return out, nil
+}
+
+func deltaTry(base, target []byte, seed uint32, minSavings int) ([]byte, bool) {+ if minSavings < 0 {+ minSavings = 0
+ }
+ instr, err := deltify(base, target, seed)
+ if err != nil {+ return nil, false
+ }
+ delta, err := deltaEncode(len(base), len(target), instr)
+ if err != nil {+ return nil, false
+ }
+ if len(delta)+minSavings >= len(target) {+ return nil, false
+ }
+ return delta, true
+}
--- /dev/null
+++ b/delta_write_select.go
@@ -1,0 +1,56 @@
+package furgit
+
+const defaultDeltaWindow = 64
+
+type objectToPack struct {+ id Hash
+ ty ObjectType
+ body []byte
+ offset uint64
+ deltaDepth int
+}
+
+type deltaContext struct {+ window int
+ candidates []*objectToPack
+}
+
+func (ctx *deltaContext) addCandidate(obj *objectToPack) {+ if ctx.window <= 0 {+ return
+ }
+ ctx.candidates = append(ctx.candidates, obj)
+ if len(ctx.candidates) > ctx.window {+ over := len(ctx.candidates) - ctx.window
+ ctx.candidates = ctx.candidates[over:]
+ }
+}
+
+func pickDeltaBase(ctx *deltaContext, obj *objectToPack, seed uint32, minSavings, maxDepth int) (*objectToPack, []byte) {+ if ctx == nil || len(ctx.candidates) == 0 {+ return nil, nil
+ }
+ if maxDepth <= 0 {+ maxDepth = 1
+ }
+ var bestBase *objectToPack
+ var bestDelta []byte
+ for i := len(ctx.candidates) - 1; i >= 0; i-- {+ base := ctx.candidates[i]
+ if base.ty != ObjectTypeBlob {+ continue
+ }
+ if base.deltaDepth >= maxDepth {+ continue
+ }
+ delta, ok := deltaTry(base.body, obj.body, seed, minSavings)
+ if !ok {+ continue
+ }
+ if bestDelta == nil || len(delta) < len(bestDelta) {+ bestDelta = delta
+ bestBase = base
+ }
+ }
+ return bestBase, bestDelta
+}
--- /dev/null
+++ b/internal/murmurhash2/murmurhash2.go
@@ -1,0 +1,42 @@
+// Package murmurhash2 provides a non-cryptographic hash.
+package murmurhash2
+
+import "encoding/binary"
+
+// Sum32 computes the MurmurHash2 value for key with the provided seed.
+func Sum32(key []byte, seed uint32) uint32 {+ const (
+ m uint32 = 0x5bd1e995
+ r uint32 = 24
+ )
+
+ h := seed ^ uint32(len(key))
+ i := 0
+ for len(key)-i >= 4 {+ k := binary.LittleEndian.Uint32(key[i:])
+ k *= m
+ k ^= k >> r
+ k *= m
+
+ h *= m
+ h ^= k
+ i += 4
+ }
+
+ switch len(key) - i {+ case 3:
+ h ^= uint32(key[i+2]) << 16
+ fallthrough
+ case 2:
+ h ^= uint32(key[i+1]) << 8
+ fallthrough
+ case 1:
+ h ^= uint32(key[i])
+ h *= m
+ }
+
+ h ^= h >> 13
+ h *= m
+ h ^= h >> 15
+ return h
+}
--- a/packed_read_pack.go
+++ b/packed_read_pack.go
@@ -108,24 +108,6 @@
return body, nil
}
-func packDeltaReadOfsDistance(data []byte) (uint64, int, error) {- if len(data) == 0 {- return 0, 0, io.ErrUnexpectedEOF
- }
- b := data[0]
- dist := uint64(b & 0x7f)
- consumed := 1
- for (b & 0x80) != 0 {- if consumed >= len(data) {- return 0, 0, io.ErrUnexpectedEOF
- }
- b = data[consumed]
- consumed++
- dist = ((dist + 1) << 7) + uint64(b&0x7f)
- }
- return dist, consumed, nil
-}
-
type packKey struct {path string
ofs uint64
@@ -342,138 +324,6 @@
}
frames = nil
return resultTy, body, nil
-}
-
-func packDeltaApply(base, delta bufpool.Buffer) (bufpool.Buffer, error) {- pos := 0
- baseBytes := base.Bytes()
- deltaBytes := delta.Bytes()
- srcSize, err := packVarintRead(deltaBytes, &pos)
- if err != nil {- return bufpool.Buffer{}, err- }
- dstSize, err := packVarintRead(deltaBytes, &pos)
- if err != nil {- return bufpool.Buffer{}, err- }
- if srcSize != len(baseBytes) {- return bufpool.Buffer{}, ErrInvalidObject- }
- out := bufpool.Borrow(dstSize)
- out.Resize(dstSize)
- outBytes := out.Bytes()
- outPos := 0
-
- for pos < len(deltaBytes) {- op := deltaBytes[pos]
- pos++
- switch {- case op&0x80 != 0:
- off := 0
- n := 0
- if op&0x01 != 0 {- if pos >= len(deltaBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- off |= int(deltaBytes[pos])
- pos++
- }
- if op&0x02 != 0 {- if pos >= len(deltaBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- off |= int(deltaBytes[pos]) << 8
- pos++
- }
- if op&0x04 != 0 {- if pos >= len(deltaBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- off |= int(deltaBytes[pos]) << 16
- pos++
- }
- if op&0x08 != 0 {- if pos >= len(deltaBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- off |= int(deltaBytes[pos]) << 24
- pos++
- }
- if op&0x10 != 0 {- if pos >= len(deltaBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- n |= int(deltaBytes[pos])
- pos++
- }
- if op&0x20 != 0 {- if pos >= len(deltaBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- n |= int(deltaBytes[pos]) << 8
- pos++
- }
- if op&0x40 != 0 {- if pos >= len(deltaBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- n |= int(deltaBytes[pos]) << 16
- pos++
- }
- if n == 0 {- n = 0x10000
- }
- if off+n > len(baseBytes) || outPos+n > len(outBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- copy(outBytes[outPos:], baseBytes[off:off+n])
- outPos += n
- case op != 0:
- n := int(op)
- if pos+n > len(deltaBytes) || outPos+n > len(outBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- copy(outBytes[outPos:], deltaBytes[pos:pos+n])
- pos += n
- outPos += n
- default:
- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- }
-
- if outPos != len(outBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- return out, nil
-}
-
-func packVarintRead(buf []byte, pos *int) (int, error) {- res := 0
- shift := 0
- for {- if *pos >= len(buf) {- return 0, ErrInvalidObject
- }
- b := buf[*pos]
- *pos++
- res |= int(b&0x7f) << shift
- if (b & 0x80) == 0 {- break
- }
- shift += 7
- }
- return res, nil
}
type packFile struct {--- a/packed_write_pack.go
+++ b/packed_write_pack.go
@@ -115,19 +115,84 @@
}
func (pw *packWriter) WriteOfsDelta(baseOffset uint64, baseSize, resultSize int, delta []byte) error {- _ = baseOffset
- _ = baseSize
- _ = resultSize
- _ = delta
- return errPackDeltaUnimplemented
+ if pw == nil || !pw.wroteHeader {+ return ErrInvalidObject
+ }
+ if baseSize < 0 || resultSize < 0 {+ return ErrInvalidObject
+ }
+ if delta == nil {+ delta = []byte{}+ }
+ deltaSize := len(delta)
+ if deltaSize <= 0 {+ return ErrInvalidObject
+ }
+ currentOffset := pw.bytesWritten
+ if baseOffset >= currentOffset {+ return ErrInvalidObject
+ }
+ dist := currentOffset - baseOffset
+
+ hdr, err := packHeaderEncode(ObjectTypeOfsDelta, deltaSize)
+ if err != nil {+ return err
+ }
+ if err := pw.writePacked(hdr); err != nil {+ return err
+ }
+ ofs, err := packOfsEncode(dist)
+ if err != nil {+ return err
+ }
+ if err := pw.writePacked(ofs); err != nil {+ return err
+ }
+
+ zw := zlib.NewWriter(&packHashWriter{pw: pw})+ if _, err := zw.Write(delta); err != nil {+ _ = zw.Close()
+ return err
+ }
+ return zw.Close()
}
func (pw *packWriter) WriteRefDelta(base Hash, baseSize, resultSize int, delta []byte) error {- _ = base
- _ = baseSize
- _ = resultSize
- _ = delta
- return errPackDeltaUnimplemented
+ if pw == nil || !pw.wroteHeader {+ return ErrInvalidObject
+ }
+ if baseSize < 0 || resultSize < 0 {+ return ErrInvalidObject
+ }
+ if delta == nil {+ delta = []byte{}+ }
+ deltaSize := len(delta)
+ if deltaSize <= 0 {+ return ErrInvalidObject
+ }
+ baseBytes := base.Bytes()
+ if len(baseBytes) == 0 {+ return ErrInvalidObject
+ }
+
+ hdr, err := packHeaderEncode(ObjectTypeRefDelta, deltaSize)
+ if err != nil {+ return err
+ }
+ if err := pw.writePacked(hdr); err != nil {+ return err
+ }
+ if err := pw.writePacked(baseBytes); err != nil {+ return err
+ }
+
+ zw := zlib.NewWriter(&packHashWriter{pw: pw})+ if _, err := zw.Write(delta); err != nil {+ _ = zw.Close()
+ return err
+ }
+ return zw.Close()
}
func (pw *packWriter) Close() (Hash, error) {@@ -237,7 +302,7 @@
if repo == nil { return Hash{}, ErrInvalidObject}
- if opts.EnableDeltas || opts.EnableThinPack {+ if opts.EnableThinPack { return Hash{}, errPackDeltaUnimplemented}
if len(objects) > int(^uint32(0)) {@@ -252,13 +317,45 @@
return Hash{}, err}
+ var dctx deltaContext
+ if opts.EnableDeltas {+ dctx.window = defaultDeltaWindow
+ }
+ deltaSeed := uint32(0)
+
for _, id := range objects {ty, body, err := repo.ReadObjectTypeRaw(id)
if err != nil { return Hash{}, err}
- if err := pw.WriteObject(ty, body); err != nil {- return Hash{}, err+ obj := &objectToPack{+ id: id,
+ ty: ty,
+ body: body,
+ }
+ startOffset := pw.bytesWritten
+ wroteDelta := false
+
+ if opts.EnableDeltas && ty == ObjectTypeBlob {+ base, delta := pickDeltaBase(&dctx, obj, deltaSeed, opts.MinDeltaSavings, opts.MaxDeltaDepth)
+ if base != nil && delta != nil {+ if err := pw.WriteOfsDelta(base.offset, len(base.body), len(body), delta); err != nil {+ return Hash{}, err+ }
+ wroteDelta = true
+ obj.deltaDepth = base.deltaDepth + 1
+ }
+ }
+ if !wroteDelta {+ if err := pw.WriteObject(ty, body); err != nil {+ return Hash{}, err+ }
+ obj.deltaDepth = 0
+ }
+ obj.offset = startOffset
+
+ if opts.EnableDeltas && ty == ObjectTypeBlob {+ dctx.addCandidate(obj)
}
}
--- a/packed_write_test.go
+++ b/packed_write_test.go
@@ -3,12 +3,16 @@
import (
"bytes"
"crypto/rand"
- "errors"
"fmt"
"os"
+ "os/exec"
"path/filepath"
"strings"
"testing"
+
+ "codeberg.org/lindenii/furgit/internal/adler32"
+ "codeberg.org/lindenii/furgit/internal/bufpool"
+ "codeberg.org/lindenii/furgit/internal/flatex"
)
func TestPackHeaderEncodeParseRoundtrip(t *testing.T) {@@ -169,8 +173,21 @@
_ = os.Remove(idxPath)
}()
- _ = gitCmd(t, repoPath, "index-pack", "-o", idxPath, packPath)
+ if err := checkPackStream(packPath, repo.hashAlgo, len(objects)); err != nil {+ t.Fatalf("pack stream invalid: %v", err)+ }
+ cmd := exec.Command("git", "index-pack", "-o", idxPath, packPath)+ cmd.Dir = repoPath
+ cmd.Env = append(os.Environ(),
+ "GIT_CONFIG_GLOBAL=/dev/null",
+ "GIT_CONFIG_SYSTEM=/dev/null",
+ )
+ output, err := cmd.CombinedOutput()
+ if err != nil {+ t.Fatalf("git index-pack failed: %v\n%s", err, output)+ }
+
verifyOut := gitCmd(t, repoPath, "verify-pack", "-v", idxPath)
seen := make(map[string]struct{}) for _, line := range strings.Split(verifyOut, "\n") {@@ -204,10 +221,52 @@
_ = gitCmd(t, repoPath, "fsck", "--full", "--strict")
}
-func TestPackWriteDeltasUnimplemented(t *testing.T) {+func TestPackWriteDeltas(t *testing.T) {repoPath, cleanup := setupTestRepo(t)
defer cleanup()
+ workDir, cleanupWork := setupWorkDir(t)
+ defer cleanupWork()
+
+ const (
+ fileCount = 200
+ fileSize = 2048
+ )
+ base := bytes.Repeat([]byte("delta-base-"), fileSize/10)+ for i := 0; i < fileCount; i++ {+ buf := make([]byte, len(base))
+ copy(buf, base)
+ buf[i%len(buf)] ^= byte(i)
+ name := filepath.Join(workDir, fmt.Sprintf("delta%04d.txt", i))+ if err := os.WriteFile(name, buf, 0o644); err != nil {+ t.Fatalf("failed to write %s: %v", name, err)+ }
+ }
+
+ gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+ gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Delta commit")
+ commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD")
+
+ commitBody := gitCatFile(t, repoPath, "commit", commitHash)
+ lines := bytes.Split(commitBody, []byte{'\n'})+ if len(lines) == 0 || !bytes.HasPrefix(lines[0], []byte("tree ")) {+ t.Fatalf("commit missing tree header")+ }
+ treeHash := strings.TrimSpace(string(bytes.TrimPrefix(lines[0], []byte("tree "))))+
+ lsTree := gitCmd(t, repoPath, "ls-tree", "-r", treeHash)
+ var blobHashes []string
+ for _, line := range strings.Split(lsTree, "\n") {+ if line == "" {+ continue
+ }
+ fields := strings.Fields(line)
+ if len(fields) < 3 {+ t.Fatalf("unexpected ls-tree line: %q", line)+ }
+ blobHashes = append(blobHashes, fields[2])
+ }
+
repo, err := OpenRepository(repoPath)
if err != nil { t.Fatalf("OpenRepository failed: %v", err)@@ -214,11 +273,266 @@
}
defer func() { _ = repo.Close() }()- buf := new(bytes.Buffer)
- _, err = repo.packWrite(buf, nil, packWriteOptions{EnableDeltas: true})- if !errors.Is(err, errPackDeltaUnimplemented) {- t.Fatalf("expected errPackDeltaUnimplemented, got %v", err)+ var objects []Hash
+ commitID, _ := repo.ParseHash(commitHash)
+ objects = append(objects, commitID)
+ treeID, _ := repo.ParseHash(treeHash)
+ objects = append(objects, treeID)
+ for _, bh := range blobHashes {+ id, _ := repo.ParseHash(bh)
+ objects = append(objects, id)
}
+ expectedOids := append([]string{commitHash, treeHash}, blobHashes...)+
+ packDir := filepath.Join(repoPath, "objects", "pack")
+ if err := os.MkdirAll(packDir, 0o755); err != nil {+ t.Fatalf("failed to create pack dir: %v", err)+ }
+ pf, err := os.CreateTemp(packDir, "furgit-delta-test-*.pack")
+ if err != nil {+ t.Fatalf("failed to create pack file: %v", err)+ }
+ packPath := pf.Name()
+ idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx"
+ if _, err := repo.packWrite(pf, objects, packWriteOptions{+ EnableDeltas: true,
+ MinDeltaSavings: 1,
+ }); err != nil {+ _ = pf.Close()
+ t.Fatalf("packWrite failed: %v", err)+ }
+ if err := pf.Close(); err != nil {+ t.Fatalf("failed to close pack file: %v", err)+ }
+
+ defer func() {+ _ = os.Remove(packPath)
+ _ = os.Remove(idxPath)
+ }()
+
+ if err := checkPackStream(packPath, repo.hashAlgo, len(objects)); err != nil {+ t.Fatalf("pack stream invalid: %v", err)+ }
+
+ cmd := exec.Command("git", "index-pack", "-o", idxPath, packPath)+ cmd.Dir = repoPath
+ cmd.Env = append(os.Environ(),
+ "GIT_CONFIG_GLOBAL=/dev/null",
+ "GIT_CONFIG_SYSTEM=/dev/null",
+ )
+ output, err := cmd.CombinedOutput()
+ if err != nil {+ t.Fatalf("git index-pack failed: %v\n%s", err, output)+ }
+
+ verifyOut := gitCmd(t, repoPath, "verify-pack", "-v", idxPath)
+ seen := make(map[string]struct{})+ for _, line := range strings.Split(verifyOut, "\n") {+ if strings.TrimSpace(line) == "" {+ continue
+ }
+ if strings.HasPrefix(line, "chain length") || strings.HasPrefix(line, "non delta") {+ continue
+ }
+ parts := strings.Fields(line)
+ if len(parts) == 0 {+ continue
+ }
+ seen[parts[0]] = struct{}{}+ }
+ for _, oid := range expectedOids {+ if _, ok := seen[oid]; !ok {+ t.Fatalf("verify-pack missing object %s", oid)+ }
+ }
+
+ for _, oid := range expectedOids {+ if err := removeLooseObject(repoPath, oid); err != nil {+ t.Fatalf("remove loose object %s: %v", oid, err)+ }
+ }
+ for _, oid := range expectedOids {+ _ = gitCmd(t, repoPath, "cat-file", "-p", oid)
+ }
+
+ _ = gitCmd(t, repoPath, "fsck", "--full", "--strict")
+}
+
+func checkPackStream(path string, algo hashAlgorithm, objectCount int) error {+ data, err := os.ReadFile(path)
+ if err != nil {+ return err
+ }
+ if len(data) < 12 {+ return ErrInvalidObject
+ }
+ if readBE32(data[0:4]) != packMagic || readBE32(data[4:8]) != packVersion2 {+ return ErrInvalidObject
+ }
+ pos := 12
+ hashSize := algo.Size()
+ type objEntry struct {+ offset uint64
+ ty ObjectType
+ body []byte
+ }
+ byOffset := make(map[uint64]objEntry, objectCount)
+ byHash := make(map[string]objEntry, objectCount)
+ for i := 0; i < objectCount; i++ {+ objOffset := uint64(pos)
+ ty, size, consumed, err := packHeaderParse(data[pos:])
+ if err != nil {+ return fmt.Errorf("obj %d header at %d: %v", i, pos, err)+ }
+ pos += consumed
+ baseOffset := uint64(0)
+ baseTy := ObjectTypeInvalid
+ var baseBody []byte
+ var baseHash Hash
+ switch ty {+ case ObjectTypeOfsDelta:
+ dist, distConsumed, err := packDeltaReadOfsDistance(data[pos:])
+ if err != nil {+ return fmt.Errorf("obj %d ofs at %d: %v", i, pos, err)+ }
+ pos += distConsumed
+ if dist == 0 || dist > objOffset {+ return fmt.Errorf("obj %d ofs at %d: invalid dist", i, pos)+ }
+ baseOffset = objOffset - dist
+ base, ok := byOffset[baseOffset]
+ if !ok {+ return fmt.Errorf("obj %d ofs at %d: missing base", i, pos)+ }
+ baseTy = base.ty
+ baseBody = base.body
+ case ObjectTypeRefDelta:
+ if pos+hashSize > len(data) {+ return ErrInvalidObject
+ }
+ copy(baseHash.data[:], data[pos:pos+hashSize])
+ baseHash.algo = algo
+ baseEntry, ok := byHash[baseHash.String()]
+ if !ok {+ return fmt.Errorf("obj %d ref base not found", i)+ }
+ baseTy = baseEntry.ty
+ baseBody = baseEntry.body
+ pos += hashSize
+ default:
+ }
+
+ payload, zconsumed, err := consumeZlibStream(data[pos:], size)
+ if err != nil {+ return fmt.Errorf("obj %d zlib at %d: %v", i, pos, err)+ }
+ pos += zconsumed
+ switch ty {+ case ObjectTypeOfsDelta:
+ if baseBody == nil {+ return fmt.Errorf("obj %d missing base body", i)+ }
+ baseSize, resultSize, err := readDeltaSizes(payload)
+ if err != nil {+ return fmt.Errorf("obj %d delta sizes: %v", i, err)+ }
+ if baseSize != len(baseBody) {+ return fmt.Errorf("obj %d delta base size mismatch: got %d want %d", i, baseSize, len(baseBody))+ }
+ out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload))
+ if err != nil {+ return fmt.Errorf("obj %d delta apply: %v", i, err)+ }
+ body := append([]byte(nil), out.Bytes()...)
+ out.Release()
+ if resultSize != len(body) {+ return fmt.Errorf("obj %d delta result size mismatch: got %d want %d", i, len(body), resultSize)+ }
+ byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body}+ case ObjectTypeRefDelta:
+ if baseBody == nil {+ return fmt.Errorf("obj %d missing ref base body", i)+ }
+ baseSize, resultSize, err := readDeltaSizes(payload)
+ if err != nil {+ return fmt.Errorf("obj %d ref delta sizes: %v", i, err)+ }
+ if baseSize != len(baseBody) {+ return fmt.Errorf("obj %d ref delta base size mismatch: got %d want %d", i, baseSize, len(baseBody))+ }
+ out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload))
+ if err != nil {+ return fmt.Errorf("obj %d ref delta apply: %v", i, err)+ }
+ body := append([]byte(nil), out.Bytes()...)
+ out.Release()
+ if resultSize != len(body) {+ return fmt.Errorf("obj %d ref delta result size mismatch: got %d want %d", i, len(body), resultSize)+ }
+ byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body}+ default:
+ if size >= 0 && len(payload) != size {+ return fmt.Errorf("obj %d size mismatch: got %d want %d", i, len(payload), size)+ }
+ body := append([]byte(nil), payload...)
+ byOffset[objOffset] = objEntry{offset: objOffset, ty: ty, body: body}+ }
+
+ entry := byOffset[objOffset]
+ if entry.body != nil && entry.ty != ObjectTypeInvalid {+ hdr, err := headerForType(entry.ty, entry.body)
+ if err != nil {+ return err
+ }
+ raw := append(hdr, entry.body...)
+ hash := algo.Sum(raw)
+ byHash[hash.String()] = entry
+ }
+ }
+ return nil
+}
+
+func consumeZlibStream(src []byte, sizeHint int) ([]byte, int, error) {+ if len(src) < 6 {+ return nil, 0, fmt.Errorf("zlib too short")+ }
+ cmf := src[0]
+ flg := src[1]
+ if (cmf&0x0f != 8) || (cmf>>4 > 7) || (uint16(cmf)<<8|uint16(flg))%31 != 0 {+ return nil, 0, fmt.Errorf("zlib header invalid")+ }
+ if flg&0x20 != 0 {+ return nil, 0, fmt.Errorf("zlib dict not supported")+ }
+ deflateData := src[2:]
+ out, consumed, err := flatex.DecompressSized(deflateData, sizeHint)
+ if err != nil {+ return nil, 0, err
+ }
+ payload := append([]byte(nil), out.Bytes()...)
+ out.Release()
+ total := 2 + consumed + 4
+ if total > len(src) {+ return nil, 0, fmt.Errorf("zlib truncated")+ }
+ expected := readBE32(src[2+consumed : 2+consumed+4])
+ if expected != adler32.Checksum(payload) {+ return nil, 0, fmt.Errorf("zlib checksum mismatch")+ }
+ return payload, total, nil
+}
+
+func readDeltaSizes(delta []byte) (int, int, error) {+ pos := 0
+ baseSize, err := packVarintRead(delta, &pos)
+ if err != nil {+ return 0, 0, err
+ }
+ resultSize, err := packVarintRead(delta, &pos)
+ if err != nil {+ return 0, 0, err
+ }
+ return baseSize, resultSize, nil
}
func removeLooseObject(repoPath, oid string) error {--
⑨