ref: 9730560a82426408243cb15349f6955a4ba34f60
parent: b2ca764959a7f213b0d1d8b534c3f4d66e34285e
author: Runxi Yu <me@runxiyu.org>
date: Fri Feb 20 07:51:16 EST 2026
Revert "packed: Write packs with deltas" This reverts commit 17c9aee0e781026353ead4ac749a3ae89c83d007.
--- a/delta_read_apply.go
+++ /dev/null
@@ -1,159 +1,0 @@
-package furgit
-
-import (
- "io"
-
- "codeberg.org/lindenii/furgit/internal/bufpool"
-)
-
-func packDeltaReadOfsDistance(data []byte) (uint64, int, error) {- if len(data) == 0 {- return 0, 0, io.ErrUnexpectedEOF
- }
- b := data[0]
- dist := uint64(b & 0x7f)
- consumed := 1
- for (b & 0x80) != 0 {- if consumed >= len(data) {- return 0, 0, io.ErrUnexpectedEOF
- }
- b = data[consumed]
- consumed++
- dist += 1
- dist <<= 7
- dist |= uint64(b & 0x7f)
- }
- return dist, consumed, nil
-}
-
-func packDeltaApply(base, delta bufpool.Buffer) (bufpool.Buffer, error) {- pos := 0
- baseBytes := base.Bytes()
- deltaBytes := delta.Bytes()
- srcSize, err := packVarintRead(deltaBytes, &pos)
- if err != nil {- return bufpool.Buffer{}, err- }
- dstSize, err := packVarintRead(deltaBytes, &pos)
- if err != nil {- return bufpool.Buffer{}, err- }
- if srcSize != len(baseBytes) {- return bufpool.Buffer{}, ErrInvalidObject- }
- out := bufpool.Borrow(dstSize)
- out.Resize(dstSize)
- outBytes := out.Bytes()
- outPos := 0
-
- for pos < len(deltaBytes) {- op := deltaBytes[pos]
- pos++
- switch {- case op&0x80 != 0:
- off := 0
- n := 0
- if op&0x01 != 0 {- if pos >= len(deltaBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- off |= int(deltaBytes[pos])
- pos++
- }
- if op&0x02 != 0 {- if pos >= len(deltaBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- off |= int(deltaBytes[pos]) << 8
- pos++
- }
- if op&0x04 != 0 {- if pos >= len(deltaBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- off |= int(deltaBytes[pos]) << 16
- pos++
- }
- if op&0x08 != 0 {- if pos >= len(deltaBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- off |= int(deltaBytes[pos]) << 24
- pos++
- }
- if op&0x10 != 0 {- if pos >= len(deltaBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- n |= int(deltaBytes[pos])
- pos++
- }
- if op&0x20 != 0 {- if pos >= len(deltaBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- n |= int(deltaBytes[pos]) << 8
- pos++
- }
- if op&0x40 != 0 {- if pos >= len(deltaBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- n |= int(deltaBytes[pos]) << 16
- pos++
- }
- if n == 0 {- n = 0x10000
- }
- if off+n > len(baseBytes) || outPos+n > len(outBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- copy(outBytes[outPos:], baseBytes[off:off+n])
- outPos += n
- case op != 0:
- n := int(op)
- if pos+n > len(deltaBytes) || outPos+n > len(outBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- copy(outBytes[outPos:], deltaBytes[pos:pos+n])
- pos += n
- outPos += n
- default:
- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- }
-
- if outPos != len(outBytes) {- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject- }
- return out, nil
-}
-
-func packVarintRead(buf []byte, pos *int) (int, error) {- res := 0
- shift := 0
- for {- if *pos >= len(buf) {- return 0, ErrInvalidObject
- }
- b := buf[*pos]
- *pos++
- res |= int(b&0x7f) << shift
- if (b & 0x80) == 0 {- break
- }
- shift += 7
- }
- return res, nil
-}
--- a/delta_test.go
+++ /dev/null
@@ -1,55 +1,0 @@
-package furgit
-
-import (
- "bytes"
- "testing"
-
- "codeberg.org/lindenii/furgit/internal/bufpool"
-)
-
-func TestDeltaEncodeApplyRoundtrip(t *testing.T) {- base := []byte("Hello, world!")- target := []byte("Hello, brave new world!")-
- instr := []deltaInstruction{- {copy: true, offset: 0, length: 7},- {copy: false, data: []byte("brave new "), length: len("brave new ")},- {copy: true, offset: 7, length: 6},- }
- delta, err := deltaEncode(len(base), len(target), instr)
- if err != nil {- t.Fatalf("deltaEncode error: %v", err)- }
-
- out, err := packDeltaApply(bufpool.FromOwned(base), bufpool.FromOwned(delta))
- if err != nil {- t.Fatalf("packDeltaApply error: %v", err)- }
- defer out.Release()
- if !bytes.Equal(out.Bytes(), target) {- t.Fatalf("delta apply mismatch: got %q, want %q", out.Bytes(), target)- }
-}
-
-func TestDeltifyRoundtrip(t *testing.T) {- base := []byte("The quick brown fox jumps over the lazy dog.")- target := []byte("The quick red fox jumps over the very lazy dog.")-
- instr, err := deltify(base, target, 0)
- if err != nil {- t.Fatalf("deltify error: %v", err)- }
- delta, err := deltaEncode(len(base), len(target), instr)
- if err != nil {- t.Fatalf("deltaEncode error: %v", err)- }
-
- out, err := packDeltaApply(bufpool.FromOwned(base), bufpool.FromOwned(delta))
- if err != nil {- t.Fatalf("packDeltaApply error: %v", err)- }
- defer out.Release()
- if !bytes.Equal(out.Bytes(), target) {- t.Fatalf("delta apply mismatch: got %q, want %q", out.Bytes(), target)- }
-}
--- a/delta_write_deltify.go
+++ /dev/null
@@ -1,306 +1,0 @@
-package furgit
-
-import (
- "bytes"
-
- "codeberg.org/lindenii/furgit/internal/murmurhash2"
-)
-
-const (
- deltaMinChunk = 32
- deltaMaxChunk = 8192
- deltaSplitMask = (1 << 8) - 1
- deltaStretchMax = (1 << 24) - 1
-)
-
-var deltaGearTab = [...]uint32{- 0x67ed26b7, 0x32da500c, 0x53d0fee0, 0xce387dc7, 0xcd406d90, 0x2e83a4d4,
- 0x9fc9a38d, 0xb67259dc, 0xca6b1722, 0x6d2ea08c, 0x235cea2e, 0x3149bb5f,
- 0x1beda787, 0x2a6b77d5, 0x2f22d9ac, 0x91fc0544, 0xe413acfa, 0x5a30ff7a,
- 0xad6fdde0, 0x444fd0f5, 0x7ad87864, 0x58c5ff05, 0x8d2ec336, 0x2371f853,
- 0x550f8572, 0x6aa448dd, 0x7c9ddbcf, 0x95221e14, 0x2a82ec33, 0xcbec5a78,
- 0xc6795a0d, 0x243995b7, 0x1c909a2f, 0x4fded51c, 0x635d334b, 0x0e2b9999,
- 0x2702968d, 0x856de1d5, 0x3325d60e, 0xeb6a7502, 0xec2a9844, 0x0905835a,
- 0xa1820375, 0xa4be5cab, 0x96a6c058, 0x2c2ccd70, 0xba40fce3, 0xd794c46b,
- 0x8fbae83e, 0xc3aa7899, 0x3d3ff8ed, 0xa0d42b5b, 0x571c0c97, 0xd2811516,
- 0xf7e7b96c, 0x4fd2fcbd, 0xe2fdec94, 0x282cc436, 0x78e8e95c, 0x80a3b613,
- 0xcfbee20c, 0xd4a32d1c, 0x2a12ff13, 0x6af82936, 0xe5630258, 0x8efa6a98,
- 0x294fb2d1, 0xdeb57086, 0x5f0fddb3, 0xeceda7ce, 0x4c87305f, 0x3a6d3307,
- 0xe22d2942, 0x9d060217, 0x1e42ed02, 0xb6f63b52, 0x4367f39f, 0x055cf262,
- 0x03a461b2, 0x5ef9e382, 0x386bc03a, 0x2a1e79c7, 0xf1a0058b, 0xd4d2dea9,
- 0x56baf37d, 0x5daff6cc, 0xf03a951d, 0xaef7de45, 0xa8f4581e, 0x3960b555,
- 0xffbfff6d, 0xbe702a23, 0x8f5b6d6f, 0x061739fb, 0x98696f47, 0x3fd596d4,
- 0x151eac6b, 0xa9fcc4f5, 0x69181a12, 0x3ac5a107, 0xb5198fe7, 0x96bcb1da,
- 0x1b5ddf8e, 0xc757d650, 0x65865c3a, 0x8fc0a41a, 0x87435536, 0x99eda6f2,
- 0x41874794, 0x29cff4e8, 0xb70efd9a, 0x3103f6e7, 0x84d2453b, 0x15a450bd,
- 0x74f49af1, 0x60f664b1, 0xa1c86935, 0xfdafbce1, 0xe36353e3, 0x5d9ba739,
- 0xbc0559ba, 0x708b0054, 0xd41d808c, 0xb2f31723, 0x9027c41f, 0xf136d165,
- 0xb5374b12, 0x9420a6ac, 0x273958b6, 0xe6c2fad0, 0xebdc1f21, 0xfb33af8b,
- 0xc71c25cd, 0xe9a2d8e5, 0xbeb38a50, 0xbceb7cc2, 0x4e4e73f0, 0xcd6c251d,
- 0xde4c032c, 0x4b04ac30, 0x725b8b21, 0x4eb8c33b, 0x20d07b75, 0x0567aa63,
- 0xb56b2bb7, 0xc1f5fd3a, 0xcafd35ca, 0x470dd4da, 0xfe4f94cd, 0xfb8de424,
- 0xe8dbcf40, 0xfe50a37a, 0x62db5b5d, 0xf32f4ab6, 0x2c4a8a51, 0x18473dc0,
- 0xfe0cbb6e, 0xfe399efd, 0xdf34ecc9, 0x6ccd5055, 0x46097073, 0x139135c2,
- 0x721c76f6, 0x1c6a94b4, 0x6eee014d, 0x8a508e02, 0x3da538f5, 0x280d394f,
- 0x5248a0c4, 0x3ce94c6c, 0x9a71ad3a, 0x8493dd05, 0xe43f0ab6, 0x18e4ed42,
- 0x6c5c0e09, 0x42b06ec9, 0x8d330343, 0xa45b6f59, 0x2a573c0c, 0xd7fd3de6,
- 0xeedeab68, 0x5c84dafc, 0xbbd1b1a8, 0xa3ce1ad1, 0x85b70bed, 0xb6add07f,
- 0xa531309c, 0x8f8ab852, 0x564de332, 0xeac9ed0c, 0x73da402c, 0x3ec52761,
- 0x43af2f4d, 0xd6ff45c8, 0x4c367462, 0xd553bd6a, 0x44724855, 0x3b2aa728,
- 0x56e5eb65, 0xeaf16173, 0x33fa42ff, 0xd714bb5d, 0xfbd0a3b9, 0xaf517134,
- 0x9416c8cd, 0x534cf94f, 0x548947c2, 0x34193569, 0x32f4389a, 0xfe7028bc,
- 0xed73b1ed, 0x9db95770, 0x468e3922, 0x0440c3cd, 0x60059a62, 0x33504562,
- 0x2b229fbd, 0x5174dca5, 0xf7028752, 0xd63c6aa8, 0x31276f38, 0x0646721c,
- 0xb0191da8, 0xe00e6de0, 0x9eac1a6e, 0x9f7628a5, 0xed6c06ea, 0x0bb8af15,
- 0xf119fb12, 0x38693c1c, 0x732bc0fe, 0x84953275, 0xb82ec888, 0x33a4f1b3,
- 0x3099835e, 0x028a8782, 0x5fdd51d7, 0xc6c717b3, 0xb06caf71, 0x17c8c111,
- 0x61bad754, 0x9fd03061, 0xe09df1af, 0x3bc9eb73, 0x85878413, 0x9889aaf2,
- 0x3f5a9e46, 0x42c9f01f, 0x9984a4f4, 0xd5de43cc, 0xd294daed, 0xbecba2d2,
- 0xf1f6e72c, 0x5551128a, 0x83af87e2, 0x6f0342ba,
-}
-
-type deltaBlock struct {- length int
- offset int
- hash uint32
-}
-
-type deltaTable struct {- blocks []deltaBlock
- nblocks int
- nalloc int
-
- offs []uint32
- len int
- size int
-}
-
-type deltaInstruction struct {- copy bool
- offset int
- length int
- data []byte
-}
-
-func deltify(base, target []byte, seed uint32) ([]deltaInstruction, error) {- if base == nil {- base = []byte{}- }
- if target == nil {- target = []byte{}- }
- dt, err := deltaTableInitMem(base, 0, len(base), seed)
- if err != nil {- return nil, err
- }
- return deltifyMemMem(target, 0, len(target), seed, dt, base, 0, len(base))
-}
-
-func deltifyMemMem(target []byte, fileoffset, filesize int, seed uint32, dt *deltaTable, base []byte, baseOffset0, baseSize int) ([]deltaInstruction, error) {- const allocChunkSize = 64
- instr := make([]deltaInstruction, 0, allocChunkSize)
- offset0 := fileoffset
-
- for fileoffset < filesize {- blocklen := deltaNextBlockLen(target, fileoffset, filesize)
- if blocklen == 0 {- if fileoffset < filesize {- data := target[fileoffset:filesize]
- instr = append(instr, deltaInstruction{- copy: false,
- offset: fileoffset - offset0,
- length: len(data),
- data: data,
- })
- }
- break
- }
-
- block := dt.lookupBlock(target[fileoffset:fileoffset+blocklen], blocklen, seed, base, baseOffset0)
- if block != nil {- if err := deltaStretchMemMem(base, baseOffset0, baseSize, block, target, fileoffset+blocklen, filesize, &blocklen); err != nil {- return nil, err
- }
- instr = append(instr, deltaInstruction{- copy: true,
- offset: block.offset,
- length: blocklen,
- })
- } else {- data := target[fileoffset : fileoffset+blocklen]
- instr = append(instr, deltaInstruction{- copy: false,
- offset: fileoffset - offset0,
- length: len(data),
- data: data,
- })
- }
- fileoffset += blocklen
- }
-
- return instr, nil
-}
-
-func deltaTableInitMem(data []byte, fileoffset, filesize int, seed uint32) (*deltaTable, error) {- dt := &deltaTable{- nalloc: 128,
- size: 128,
- }
- dt.blocks = make([]deltaBlock, dt.nalloc)
- dt.offs = make([]uint32, dt.size)
-
- offset0 := fileoffset
- for fileoffset < filesize {- blocklen := deltaNextBlockLen(data, fileoffset, filesize)
- if blocklen == 0 {- break
- }
- h := deltaHashBlock(data[fileoffset:fileoffset+blocklen], seed)
- if err := dt.addBlock(data, offset0, blocklen, fileoffset-offset0, h); err != nil {- return nil, err
- }
- fileoffset += blocklen
- }
- return dt, nil
-}
-
-func deltaHashBlock(p []byte, seed uint32) uint32 {- return murmurhash2.Sum32(p, seed)
-}
-
-func deltaNextBlockLen(data []byte, fileoffset, filesize int) int {- if fileoffset >= filesize || filesize-fileoffset < deltaMinChunk {- return 0
- }
- p := fileoffset + deltaMinChunk
- end := fileoffset + deltaMaxChunk
- if end > filesize {- end = filesize
- }
- var gh uint32
- for p < end {- gh = (gh << 1) + deltaGearTab[data[p]]
- p++
- if (gh & deltaSplitMask) == 0 {- break
- }
- }
- return p - fileoffset
-}
-
-func (dt *deltaTable) addBlock(data []byte, offset0, length, offset int, h uint32) error {- if length == 0 {- return nil
- }
- if err := dt.resize(); err != nil {- return err
- }
- idx, found := dt.lookupSlot(data, offset0, offset, length, h)
- if found {- return nil
- }
- dt.offs[idx] = uint32(dt.nblocks + 1)
- dt.blocks[dt.nblocks] = deltaBlock{- length: length,
- offset: offset,
- hash: h,
- }
- dt.nblocks++
- dt.len++
- return nil
-}
-
-func (dt *deltaTable) lookupBlock(chunk []byte, length int, seed uint32, base []byte, baseOffset0 int) *deltaBlock {- if dt == nil || dt.size == 0 {- return nil
- }
- h := deltaHashBlock(chunk, seed)
- for i := int(h % uint32(dt.size)); dt.offs[i] != 0; i = (i + 1) % dt.size {- block := &dt.blocks[dt.offs[i]-1]
- if block.hash != h || block.length != length {- continue
- }
- baseOff := baseOffset0 + block.offset
- if baseOff < 0 || baseOff+length > len(base) {- continue
- }
- if bytes.Equal(base[baseOff:baseOff+length], chunk) {- return block
- }
- }
- return nil
-}
-
-func (dt *deltaTable) lookupSlot(data []byte, offset0, offset, length int, h uint32) (int, bool) {- if dt == nil || dt.size == 0 {- return 0, false
- }
- for i := int(h % uint32(dt.size)); ; i = (i + 1) % dt.size {- if dt.offs[i] == 0 {- return i, false
- }
- block := &dt.blocks[dt.offs[i]-1]
- if block.hash != h || block.length != length {- continue
- }
- left := offset0 + block.offset
- right := offset0 + offset
- if left < 0 || right < 0 || left+length > len(data) || right+length > len(data) {- continue
- }
- if bytes.Equal(data[left:left+length], data[right:right+length]) {- return i, true
- }
- }
-}
-
-func (dt *deltaTable) resize() error {- if dt.nblocks == dt.nalloc {- newAlloc := dt.nalloc + 256
- newBlocks := make([]deltaBlock, newAlloc)
- copy(newBlocks, dt.blocks[:dt.nblocks])
- dt.blocks = newBlocks
- dt.nalloc = newAlloc
- }
-
- if dt.offs == nil || (dt.len*100)/dt.size > 70 {- oldSize := dt.size
- dt.size *= 2
- if dt.size == 0 {- dt.size = oldSize
- return ErrInvalidObject
- }
- dt.offs = make([]uint32, dt.size)
- for i := 0; i < dt.nblocks; i++ {- block := &dt.blocks[i]
- idx := int(block.hash % uint32(dt.size))
- for dt.offs[idx] != 0 {- idx = (idx + 1) % dt.size
- }
- dt.offs[idx] = uint32(i + 1)
- }
- }
-
- return nil
-}
-
-func deltaStretchMemMem(base []byte, baseOffset0, baseSize int, block *deltaBlock, data []byte, fileoffset, filesize int, blocklen *int) error {- baseOffset := baseOffset0 + block.offset + *blocklen
- if baseOffset > baseSize {- return ErrInvalidObject
- }
- if fileoffset > filesize {- return ErrInvalidObject
- }
- maxlen := baseSize - baseOffset
- if avail := filesize - fileoffset; avail < maxlen {- maxlen = avail
- }
- for i := 0; i < maxlen && *blocklen < deltaStretchMax-1; i++ {- if data[fileoffset+i] != base[baseOffset+i] {- break
- }
- *blocklen++
- }
- return nil
-}
--- a/delta_write_encode.go
+++ /dev/null
@@ -1,108 +1,0 @@
-package furgit
-
-const deltaCopyDefaultLen = 0x10000
-
-func deltaEncode(baseSize, resultSize int, instr []deltaInstruction) ([]byte, error) {- if baseSize < 0 || resultSize < 0 {- return nil, ErrInvalidObject
- }
- out := make([]byte, 0, 64)
- baseHdr, err := packVarintEncode(baseSize)
- if err != nil {- return nil, err
- }
- out = append(out, baseHdr...)
- resultHdr, err := packVarintEncode(resultSize)
- if err != nil {- return nil, err
- }
- out = append(out, resultHdr...)
-
- for _, ins := range instr {- if ins.copy {- if ins.offset < 0 || ins.length <= 0 {- return nil, ErrInvalidObject
- }
- if ins.offset > 0xffffffff {- return nil, ErrInvalidObject
- }
- if ins.length > 0xffffff {- return nil, ErrInvalidObject
- }
- op := byte(0x80)
- var tmp [7]byte
- pos := 0
-
- off := ins.offset
- for i := 0; i < 4; i++ {- op |= 1 << i
- tmp[pos] = byte(off & 0xff)
- pos++
- off >>= 8
- if off == 0 {- break
- }
- }
- if off != 0 {- return nil, ErrInvalidObject
- }
-
- n := ins.length
- if n != deltaCopyDefaultLen {- for i := 0; i < 3 && n > 0; i++ {- op |= 1 << (i + 4)
- tmp[pos] = byte(n & 0xff)
- pos++
- n >>= 8
- }
- if n > 0 {- return nil, ErrInvalidObject
- }
- }
-
- out = append(out, op)
- out = append(out, tmp[:pos]...)
- continue
- }
-
- data := ins.data
- if ins.length > 0 {- if len(data) < ins.length {- return nil, ErrInvalidObject
- }
- data = data[:ins.length]
- }
- if len(data) == 0 {- return nil, ErrInvalidObject
- }
- for len(data) > 0 {- n := len(data)
- if n > 127 {- n = 127
- }
- out = append(out, byte(n))
- out = append(out, data[:n]...)
- data = data[n:]
- }
- }
-
- return out, nil
-}
-
-func deltaTry(base, target []byte, seed uint32, minSavings int) ([]byte, bool) {- if minSavings < 0 {- minSavings = 0
- }
- instr, err := deltify(base, target, seed)
- if err != nil {- return nil, false
- }
- delta, err := deltaEncode(len(base), len(target), instr)
- if err != nil {- return nil, false
- }
- if len(delta)+minSavings >= len(target) {- return nil, false
- }
- return delta, true
-}
--- a/delta_write_select.go
+++ /dev/null
@@ -1,56 +1,0 @@
-package furgit
-
-const defaultDeltaWindow = 64
-
-type objectToPack struct {- id Hash
- ty ObjectType
- body []byte
- offset uint64
- deltaDepth int
-}
-
-type deltaContext struct {- window int
- candidates []*objectToPack
-}
-
-func (ctx *deltaContext) addCandidate(obj *objectToPack) {- if ctx.window <= 0 {- return
- }
- ctx.candidates = append(ctx.candidates, obj)
- if len(ctx.candidates) > ctx.window {- over := len(ctx.candidates) - ctx.window
- ctx.candidates = ctx.candidates[over:]
- }
-}
-
-func pickDeltaBase(ctx *deltaContext, obj *objectToPack, seed uint32, minSavings, maxDepth int) (*objectToPack, []byte) {- if ctx == nil || len(ctx.candidates) == 0 {- return nil, nil
- }
- if maxDepth <= 0 {- maxDepth = 1
- }
- var bestBase *objectToPack
- var bestDelta []byte
- for i := len(ctx.candidates) - 1; i >= 0; i-- {- base := ctx.candidates[i]
- if base.ty != ObjectTypeBlob {- continue
- }
- if base.deltaDepth >= maxDepth {- continue
- }
- delta, ok := deltaTry(base.body, obj.body, seed, minSavings)
- if !ok {- continue
- }
- if bestDelta == nil || len(delta) < len(bestDelta) {- bestDelta = delta
- bestBase = base
- }
- }
- return bestBase, bestDelta
-}
--- a/packed_read_pack.go
+++ b/packed_read_pack.go
@@ -108,6 +108,24 @@
return body, nil
}
+func packDeltaReadOfsDistance(data []byte) (uint64, int, error) {+ if len(data) == 0 {+ return 0, 0, io.ErrUnexpectedEOF
+ }
+ b := data[0]
+ dist := uint64(b & 0x7f)
+ consumed := 1
+ for (b & 0x80) != 0 {+ if consumed >= len(data) {+ return 0, 0, io.ErrUnexpectedEOF
+ }
+ b = data[consumed]
+ consumed++
+ dist = ((dist + 1) << 7) + uint64(b&0x7f)
+ }
+ return dist, consumed, nil
+}
+
type packKey struct {path string
ofs uint64
@@ -324,6 +342,138 @@
}
frames = nil
return resultTy, body, nil
+}
+
+func packDeltaApply(base, delta bufpool.Buffer) (bufpool.Buffer, error) {+ pos := 0
+ baseBytes := base.Bytes()
+ deltaBytes := delta.Bytes()
+ srcSize, err := packVarintRead(deltaBytes, &pos)
+ if err != nil {+ return bufpool.Buffer{}, err+ }
+ dstSize, err := packVarintRead(deltaBytes, &pos)
+ if err != nil {+ return bufpool.Buffer{}, err+ }
+ if srcSize != len(baseBytes) {+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ out := bufpool.Borrow(dstSize)
+ out.Resize(dstSize)
+ outBytes := out.Bytes()
+ outPos := 0
+
+ for pos < len(deltaBytes) {+ op := deltaBytes[pos]
+ pos++
+ switch {+ case op&0x80 != 0:
+ off := 0
+ n := 0
+ if op&0x01 != 0 {+ if pos >= len(deltaBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ off |= int(deltaBytes[pos])
+ pos++
+ }
+ if op&0x02 != 0 {+ if pos >= len(deltaBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ off |= int(deltaBytes[pos]) << 8
+ pos++
+ }
+ if op&0x04 != 0 {+ if pos >= len(deltaBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ off |= int(deltaBytes[pos]) << 16
+ pos++
+ }
+ if op&0x08 != 0 {+ if pos >= len(deltaBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ off |= int(deltaBytes[pos]) << 24
+ pos++
+ }
+ if op&0x10 != 0 {+ if pos >= len(deltaBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ n |= int(deltaBytes[pos])
+ pos++
+ }
+ if op&0x20 != 0 {+ if pos >= len(deltaBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ n |= int(deltaBytes[pos]) << 8
+ pos++
+ }
+ if op&0x40 != 0 {+ if pos >= len(deltaBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ n |= int(deltaBytes[pos]) << 16
+ pos++
+ }
+ if n == 0 {+ n = 0x10000
+ }
+ if off+n > len(baseBytes) || outPos+n > len(outBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ copy(outBytes[outPos:], baseBytes[off:off+n])
+ outPos += n
+ case op != 0:
+ n := int(op)
+ if pos+n > len(deltaBytes) || outPos+n > len(outBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ copy(outBytes[outPos:], deltaBytes[pos:pos+n])
+ pos += n
+ outPos += n
+ default:
+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ }
+
+ if outPos != len(outBytes) {+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject+ }
+ return out, nil
+}
+
+func packVarintRead(buf []byte, pos *int) (int, error) {+ res := 0
+ shift := 0
+ for {+ if *pos >= len(buf) {+ return 0, ErrInvalidObject
+ }
+ b := buf[*pos]
+ *pos++
+ res |= int(b&0x7f) << shift
+ if (b & 0x80) == 0 {+ break
+ }
+ shift += 7
+ }
+ return res, nil
}
type packFile struct {--- a/packed_write_pack.go
+++ b/packed_write_pack.go
@@ -104,84 +104,19 @@
}
func (pw *packWriter) WriteOfsDelta(baseOffset uint64, baseSize, resultSize int, delta []byte) error {- if pw == nil || !pw.wroteHeader {- return ErrInvalidObject
- }
- if baseSize < 0 || resultSize < 0 {- return ErrInvalidObject
- }
- if delta == nil {- delta = []byte{}- }
- deltaSize := len(delta)
- if deltaSize <= 0 {- return ErrInvalidObject
- }
- currentOffset := pw.bytesWritten
- if baseOffset >= currentOffset {- return ErrInvalidObject
- }
- dist := currentOffset - baseOffset
-
- hdr, err := packHeaderEncode(ObjectTypeOfsDelta, deltaSize)
- if err != nil {- return err
- }
- if err := pw.writePacked(hdr); err != nil {- return err
- }
- ofs, err := packOfsEncode(dist)
- if err != nil {- return err
- }
- if err := pw.writePacked(ofs); err != nil {- return err
- }
-
- zw := zlib.NewWriter(&packHashWriter{pw: pw})- if _, err := zw.Write(delta); err != nil {- _ = zw.Close()
- return err
- }
- return zw.Close()
+ _ = baseOffset
+ _ = baseSize
+ _ = resultSize
+ _ = delta
+ return errPackDeltaUnimplemented
}
func (pw *packWriter) WriteRefDelta(base Hash, baseSize, resultSize int, delta []byte) error {- if pw == nil || !pw.wroteHeader {- return ErrInvalidObject
- }
- if baseSize < 0 || resultSize < 0 {- return ErrInvalidObject
- }
- if delta == nil {- delta = []byte{}- }
- deltaSize := len(delta)
- if deltaSize <= 0 {- return ErrInvalidObject
- }
- baseBytes := base.Bytes()
- if len(baseBytes) == 0 {- return ErrInvalidObject
- }
-
- hdr, err := packHeaderEncode(ObjectTypeRefDelta, deltaSize)
- if err != nil {- return err
- }
- if err := pw.writePacked(hdr); err != nil {- return err
- }
- if err := pw.writePacked(baseBytes); err != nil {- return err
- }
-
- zw := zlib.NewWriter(&packHashWriter{pw: pw})- if _, err := zw.Write(delta); err != nil {- _ = zw.Close()
- return err
- }
- return zw.Close()
+ _ = base
+ _ = baseSize
+ _ = resultSize
+ _ = delta
+ return errPackDeltaUnimplemented
}
func (pw *packWriter) Close() (Hash, error) {@@ -291,7 +226,7 @@
if repo == nil { return Hash{}, ErrInvalidObject}
- if opts.EnableThinPack {+ if opts.EnableDeltas || opts.EnableThinPack { return Hash{}, errPackDeltaUnimplemented}
if len(objects) > int(^uint32(0)) {@@ -306,45 +241,13 @@
return Hash{}, err}
- var dctx deltaContext
- if opts.EnableDeltas {- dctx.window = defaultDeltaWindow
- }
- deltaSeed := uint32(0)
-
for _, id := range objects {ty, body, err := repo.ReadObjectTypeRaw(id)
if err != nil { return Hash{}, err}
- obj := &objectToPack{- id: id,
- ty: ty,
- body: body,
- }
- startOffset := pw.bytesWritten
- wroteDelta := false
-
- if opts.EnableDeltas && ty == ObjectTypeBlob {- base, delta := pickDeltaBase(&dctx, obj, deltaSeed, opts.MinDeltaSavings, opts.MaxDeltaDepth)
- if base != nil && delta != nil {- if err := pw.WriteOfsDelta(base.offset, len(base.body), len(body), delta); err != nil {- return Hash{}, err- }
- wroteDelta = true
- obj.deltaDepth = base.deltaDepth + 1
- }
- }
- if !wroteDelta {- if err := pw.WriteObject(ty, body); err != nil {- return Hash{}, err- }
- obj.deltaDepth = 0
- }
- obj.offset = startOffset
-
- if opts.EnableDeltas && ty == ObjectTypeBlob {- dctx.addCandidate(obj)
+ if err := pw.WriteObject(ty, body); err != nil {+ return Hash{}, err}
}
--- a/packed_write_test.go
+++ b/packed_write_test.go
@@ -3,15 +3,12 @@
import (
"bytes"
"crypto/rand"
+ "errors"
"fmt"
"os"
- "os/exec"
"path/filepath"
"strings"
"testing"
-
- "codeberg.org/lindenii/furgit/internal/bufpool"
- "codeberg.org/lindenii/furgit/internal/zlibx"
)
func TestPackHeaderEncodeParseRoundtrip(t *testing.T) {@@ -172,21 +169,8 @@
_ = os.Remove(idxPath)
}()
- if err := checkPackStream(packPath, repo.hashAlgo, len(objects)); err != nil {- t.Fatalf("pack stream invalid: %v", err)- }
+ _ = gitCmd(t, repoPath, "index-pack", "-o", idxPath, packPath)
- cmd := exec.Command("git", "index-pack", "-o", idxPath, packPath)- cmd.Dir = repoPath
- cmd.Env = append(os.Environ(),
- "GIT_CONFIG_GLOBAL=/dev/null",
- "GIT_CONFIG_SYSTEM=/dev/null",
- )
- output, err := cmd.CombinedOutput()
- if err != nil {- t.Fatalf("git index-pack failed: %v\n%s", err, output)- }
-
verifyOut := gitCmd(t, repoPath, "verify-pack", "-v", idxPath)
seen := make(map[string]struct{}) for _, line := range strings.Split(verifyOut, "\n") {@@ -220,52 +204,10 @@
_ = gitCmd(t, repoPath, "fsck", "--full", "--strict")
}
-func TestPackWriteDeltas(t *testing.T) {+func TestPackWriteDeltasUnimplemented(t *testing.T) {repoPath, cleanup := setupTestRepo(t)
defer cleanup()
- workDir, cleanupWork := setupWorkDir(t)
- defer cleanupWork()
-
- const (
- fileCount = 200
- fileSize = 2048
- )
- base := bytes.Repeat([]byte("delta-base-"), fileSize/10)- for i := 0; i < fileCount; i++ {- buf := make([]byte, len(base))
- copy(buf, base)
- buf[i%len(buf)] ^= byte(i)
- name := filepath.Join(workDir, fmt.Sprintf("delta%04d.txt", i))- if err := os.WriteFile(name, buf, 0o644); err != nil {- t.Fatalf("failed to write %s: %v", name, err)- }
- }
-
- gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
- gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Delta commit")
- commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD")
-
- commitBody := gitCatFile(t, repoPath, "commit", commitHash)
- lines := bytes.Split(commitBody, []byte{'\n'})- if len(lines) == 0 || !bytes.HasPrefix(lines[0], []byte("tree ")) {- t.Fatalf("commit missing tree header")- }
- treeHash := strings.TrimSpace(string(bytes.TrimPrefix(lines[0], []byte("tree "))))-
- lsTree := gitCmd(t, repoPath, "ls-tree", "-r", treeHash)
- var blobHashes []string
- for _, line := range strings.Split(lsTree, "\n") {- if line == "" {- continue
- }
- fields := strings.Fields(line)
- if len(fields) < 3 {- t.Fatalf("unexpected ls-tree line: %q", line)- }
- blobHashes = append(blobHashes, fields[2])
- }
-
repo, err := OpenRepository(repoPath)
if err != nil { t.Fatalf("OpenRepository failed: %v", err)@@ -272,238 +214,11 @@
}
defer func() { _ = repo.Close() }()- var objects []Hash
- commitID, _ := repo.ParseHash(commitHash)
- objects = append(objects, commitID)
- treeID, _ := repo.ParseHash(treeHash)
- objects = append(objects, treeID)
- for _, bh := range blobHashes {- id, _ := repo.ParseHash(bh)
- objects = append(objects, id)
+ buf := new(bytes.Buffer)
+ _, err = repo.packWrite(buf, nil, packWriteOptions{EnableDeltas: true})+ if !errors.Is(err, errPackDeltaUnimplemented) {+ t.Fatalf("expected errPackDeltaUnimplemented, got %v", err)}
- expectedOids := append([]string{commitHash, treeHash}, blobHashes...)-
- packDir := filepath.Join(repoPath, "objects", "pack")
- if err := os.MkdirAll(packDir, 0o755); err != nil {- t.Fatalf("failed to create pack dir: %v", err)- }
- pf, err := os.CreateTemp(packDir, "furgit-delta-test-*.pack")
- if err != nil {- t.Fatalf("failed to create pack file: %v", err)- }
- packPath := pf.Name()
- idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx"
- if _, err := repo.packWrite(pf, objects, packWriteOptions{- EnableDeltas: true,
- MinDeltaSavings: 1,
- }); err != nil {- _ = pf.Close()
- t.Fatalf("packWrite failed: %v", err)- }
- if err := pf.Close(); err != nil {- t.Fatalf("failed to close pack file: %v", err)- }
-
- defer func() {- _ = os.Remove(packPath)
- _ = os.Remove(idxPath)
- }()
-
- if err := checkPackStream(packPath, repo.hashAlgo, len(objects)); err != nil {- t.Fatalf("pack stream invalid: %v", err)- }
-
- cmd := exec.Command("git", "index-pack", "-o", idxPath, packPath)- cmd.Dir = repoPath
- cmd.Env = append(os.Environ(),
- "GIT_CONFIG_GLOBAL=/dev/null",
- "GIT_CONFIG_SYSTEM=/dev/null",
- )
- output, err := cmd.CombinedOutput()
- if err != nil {- t.Fatalf("git index-pack failed: %v\n%s", err, output)- }
-
- verifyOut := gitCmd(t, repoPath, "verify-pack", "-v", idxPath)
- seen := make(map[string]struct{})- for _, line := range strings.Split(verifyOut, "\n") {- if strings.TrimSpace(line) == "" {- continue
- }
- if strings.HasPrefix(line, "chain length") || strings.HasPrefix(line, "non delta") {- continue
- }
- parts := strings.Fields(line)
- if len(parts) == 0 {- continue
- }
- seen[parts[0]] = struct{}{}- }
- for _, oid := range expectedOids {- if _, ok := seen[oid]; !ok {- t.Fatalf("verify-pack missing object %s", oid)- }
- }
-
- for _, oid := range expectedOids {- if err := removeLooseObject(repoPath, oid); err != nil {- t.Fatalf("remove loose object %s: %v", oid, err)- }
- }
- for _, oid := range expectedOids {- _ = gitCmd(t, repoPath, "cat-file", "-p", oid)
- }
-
- _ = gitCmd(t, repoPath, "fsck", "--full", "--strict")
-}
-
-func checkPackStream(path string, algo hashAlgorithm, objectCount int) error {- data, err := os.ReadFile(path)
- if err != nil {- return err
- }
- if len(data) < 12 {- return ErrInvalidObject
- }
- if readBE32(data[0:4]) != packMagic || readBE32(data[4:8]) != packVersion2 {- return ErrInvalidObject
- }
- pos := 12
- hashSize := algo.Size()
- type objEntry struct {- offset uint64
- ty ObjectType
- body []byte
- }
- byOffset := make(map[uint64]objEntry, objectCount)
- byHash := make(map[string]objEntry, objectCount)
- for i := 0; i < objectCount; i++ {- objOffset := uint64(pos)
- ty, size, consumed, err := packHeaderParse(data[pos:])
- if err != nil {- return fmt.Errorf("obj %d header at %d: %v", i, pos, err)- }
- pos += consumed
- baseOffset := uint64(0)
- baseTy := ObjectTypeInvalid
- var baseBody []byte
- var baseHash Hash
- switch ty {- case ObjectTypeOfsDelta:
- dist, distConsumed, err := packDeltaReadOfsDistance(data[pos:])
- if err != nil {- return fmt.Errorf("obj %d ofs at %d: %v", i, pos, err)- }
- pos += distConsumed
- if dist == 0 || dist > objOffset {- return fmt.Errorf("obj %d ofs at %d: invalid dist", i, pos)- }
- baseOffset = objOffset - dist
- base, ok := byOffset[baseOffset]
- if !ok {- return fmt.Errorf("obj %d ofs at %d: missing base", i, pos)- }
- baseTy = base.ty
- baseBody = base.body
- case ObjectTypeRefDelta:
- if pos+hashSize > len(data) {- return ErrInvalidObject
- }
- copy(baseHash.data[:], data[pos:pos+hashSize])
- baseHash.algo = algo
- baseEntry, ok := byHash[baseHash.String()]
- if !ok {- return fmt.Errorf("obj %d ref base not found", i)- }
- baseTy = baseEntry.ty
- baseBody = baseEntry.body
- pos += hashSize
- default:
- }
-
- payloadBuf, zconsumed, err := zlibx.DecompressSized(data[pos:], size)
- if err != nil {- return fmt.Errorf("obj %d zlib at %d: %v", i, pos, err)- }
- payload := append([]byte(nil), payloadBuf.Bytes()...)
- payloadBuf.Release()
- pos += zconsumed
- switch ty {- case ObjectTypeOfsDelta:
- if baseBody == nil {- return fmt.Errorf("obj %d missing base body", i)- }
- baseSize, resultSize, err := readDeltaSizes(payload)
- if err != nil {- return fmt.Errorf("obj %d delta sizes: %v", i, err)- }
- if baseSize != len(baseBody) {- return fmt.Errorf("obj %d delta base size mismatch: got %d want %d", i, baseSize, len(baseBody))- }
- out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload))
- if err != nil {- return fmt.Errorf("obj %d delta apply: %v", i, err)- }
- body := append([]byte(nil), out.Bytes()...)
- out.Release()
- if resultSize != len(body) {- return fmt.Errorf("obj %d delta result size mismatch: got %d want %d", i, len(body), resultSize)- }
- byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body}- case ObjectTypeRefDelta:
- if baseBody == nil {- return fmt.Errorf("obj %d missing ref base body", i)- }
- baseSize, resultSize, err := readDeltaSizes(payload)
- if err != nil {- return fmt.Errorf("obj %d ref delta sizes: %v", i, err)- }
- if baseSize != len(baseBody) {- return fmt.Errorf("obj %d ref delta base size mismatch: got %d want %d", i, baseSize, len(baseBody))- }
- out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload))
- if err != nil {- return fmt.Errorf("obj %d ref delta apply: %v", i, err)- }
- body := append([]byte(nil), out.Bytes()...)
- out.Release()
- if resultSize != len(body) {- return fmt.Errorf("obj %d ref delta result size mismatch: got %d want %d", i, len(body), resultSize)- }
- byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body}- default:
- if size >= 0 && len(payload) != size {- return fmt.Errorf("obj %d size mismatch: got %d want %d", i, len(payload), size)- }
- body := append([]byte(nil), payload...)
- byOffset[objOffset] = objEntry{offset: objOffset, ty: ty, body: body}- }
-
- entry := byOffset[objOffset]
- if entry.body != nil && entry.ty != ObjectTypeInvalid {- hdr, err := headerForType(entry.ty, entry.body)
- if err != nil {- return err
- }
- raw := append(hdr, entry.body...)
- hash := algo.Sum(raw)
- byHash[hash.String()] = entry
- }
- }
- return nil
-}
-
-func readDeltaSizes(delta []byte) (int, int, error) {- pos := 0
- baseSize, err := packVarintRead(delta, &pos)
- if err != nil {- return 0, 0, err
- }
- resultSize, err := packVarintRead(delta, &pos)
- if err != nil {- return 0, 0, err
- }
- return baseSize, resultSize, nil
}
func removeLooseObject(repoPath, oid string) error {--
⑨