shithub: furgit

--- /dev/null

+++ b/internal/flatex/decompress.go

@@ -1,0 +1,38 @@

+package flatex

+import (

+	"io"

+	"git.sr.ht/~runxiyu/furgit/internal/bufpool"

+)

+func DecompressSized(src []byte, sizeHint int) (bufpool.Buffer, int, error) {

+	d := sliceInflaterPool.Get().(*sliceInflater)

+	defer sliceInflaterPool.Put(d)

+	if err := d.reset(src); err != nil {

+		return bufpool.Buffer{}, 0, err

+	}

+	out := bufpool.Borrow(sizeHint)

+	out.Resize(0)

+	for {

+		if len(d.toRead) > 0 {

+			out.Append(d.toRead)

+			d.toRead = nil

+			continue

+		}

+		if d.err != nil {

+			if d.err == io.EOF {

+				return out, d.pos, nil

+			}

+			out.Release()

+			return bufpool.Buffer{}, 0, d.err

+		}

+		d.step(d)

+		if d.err != nil && len(d.toRead) == 0 {

+			d.toRead = d.window.readFlush()

+		}

+	}

+}

--- a/internal/flatex/decompress_bytes.go

+++ /dev/null

@@ -1,56 +1,0 @@

-package flatex

-import (

-	"io"

-	"sync"

-	"git.sr.ht/~runxiyu/furgit/internal/bufpool"

-)

-type bufferDecompressor struct {

-	inflater sliceInflater

-}

-var bufferDecompressorPool = sync.Pool{

-	New: func() any {

-		fixedHuffmanDecoderInit()

-		d := &bufferDecompressor{

-			inflater: sliceInflater{

-				bits:     new([maxNumLit + maxNumDist]int),

-				codebits: new([numCodes]int),

-			},

-		}

-		return d

-	},

-}

-func DecompressSized(src []byte, sizeHint int) (bufpool.Buffer, int, error) {

-	d := bufferDecompressorPool.Get().(*bufferDecompressor)

-	defer bufferDecompressorPool.Put(d)

-	if err := d.inflater.reset(src); err != nil {

-		return bufpool.Buffer{}, 0, err

-	}

-	out := bufpool.Borrow(sizeHint)

-	out.Resize(0)

-	for {

-		if len(d.inflater.toRead) > 0 {

-			out.Append(d.inflater.toRead)

-			d.inflater.toRead = nil

-			continue

-		}

-		if d.inflater.err != nil {

-			if d.inflater.err == io.EOF {

-				return out, d.inflater.pos, nil

-			}

-			out.Release()

-			return bufpool.Buffer{}, 0, d.inflater.err

-		}

-		d.inflater.step(&d.inflater)

-		if d.inflater.err != nil && len(d.inflater.toRead) == 0 {

-			d.inflater.toRead = d.inflater.window.readFlush()

-		}

-	}

-}

--- /dev/null

+++ b/internal/flatex/huffman.go

@@ -1,0 +1,245 @@

+// Copyright 2009 The Go Authors. All rights reserved.

+// Use of this source code is governed by a BSD-style

+// license that can be found in the LICENSE file.

+// Package flatex implements the DEFLATE compressed data format, described in

+// RFC 1951.  The [compress/gzip] and [compress/zlib] packages implement access

+// to DEFLATE-based file formats.

+package flatex

+import (

+	"math/bits"

+	"strconv"

+	"sync"

+)

+const (

+	// The special code used to mark the end of a block.

+	endBlockMarker = 256

+	maxCodeLen     = 16      // max length of Huffman code

+	maxMatchOffset = 1 << 15 // The largest match offset

+	// The next three numbers come from the RFC section 3.2.7, with the

+	// additional proviso in section 3.2.5 which implies that distance codes

+	// 30 and 31 should never occur in compressed data.

+	maxNumLit  = 286

+	maxNumDist = 30

+	numCodes   = 19 // number of codes in Huffman meta-code

+)

+// A CorruptInputError reports the presence of corrupt input at a given offset.

+type CorruptInputError int64

+func (e CorruptInputError) Error() string {

+	return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)

+}

+// The data structure for decoding Huffman tables is based on that of

+// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),

+// For codes smaller than the table width, there are multiple entries

+// (each combination of trailing bits has the same value). For codes

+// larger than the table width, the table contains a link to an overflow

+// table. The width of each entry in the link table is the maximum code

+// size minus the chunk width.

+//

+// Note that you can do a lookup in the table even without all bits

+// filled. Since the extra bits are zero, and the DEFLATE Huffman codes

+// have the property that shorter codes come before longer ones, the

+// bit length estimate in the result is a lower bound on the actual

+// number of bits.

+//

+// See the following:

+//	https://github.com/madler/zlib/raw/master/doc/algorithm.txt

+// chunk & 15 is number of bits

+// chunk >> 4 is value, including table link

+const (

+	huffmanChunkBits  = 9

+	huffmanNumChunks  = 1 << huffmanChunkBits

+	huffmanCountMask  = 15

+	huffmanValueShift = 4

+)

+type huffmanDecoder struct {

+	min      int                      // the minimum code length

+	chunks   [huffmanNumChunks]uint32 // chunks as described above

+	links    [][]uint32               // overflow links

+	linkMask uint32                   // mask the width of the link table

+}

+// Initialize Huffman decoding tables from array of code lengths.

+// Following this function, h is guaranteed to be initialized into a complete

+// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a

+// degenerate case where the tree has only a single symbol with length 1. Empty

+// trees are permitted.

+func (h *huffmanDecoder) init(lengths []int) bool {

+	// Sanity enables additional runtime tests during Huffman

+	// table construction. It's intended to be used during

+	// development to supplement the currently ad-hoc unit tests.

+	const sanity = false

+	if h.min != 0 {

+		*h = huffmanDecoder{}

+	}

+	// Count number of codes of each length,

+	// compute min and max length.

+	var count [maxCodeLen]int

+	var min, max int

+	for _, n := range lengths {

+		if n == 0 {

+			continue

+		}

+		if min == 0 || n < min {

+			min = n

+		}

+		if n > max {

+			max = n

+		}

+		count[n]++

+	}

+	// Empty tree. The decompressor.huffSym function will fail later if the tree

+	// is used. Technically, an empty tree is only valid for the HDIST tree and

+	// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree

+	// is guaranteed to fail since it will attempt to use the tree to decode the

+	// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is

+	// guaranteed to fail later since the compressed data section must be

+	// composed of at least one symbol (the end-of-block marker).

+	if max == 0 {

+		return true

+	}

+	code := 0

+	var nextcode [maxCodeLen]int

+	for i := min; i <= max; i++ {

+		code <<= 1

+		nextcode[i] = code

+		code += count[i]

+	}

+	// Check that the coding is complete (i.e., that we've

+	// assigned all 2-to-the-max possible bit sequences).

+	// Exception: To be compatible with zlib, we also need to

+	// accept degenerate single-code codings. See also

+	// TestDegenerateHuffmanCoding.

+	if code != 1<<uint(max) && (code != 1 || max != 1) {

+		return false

+	}

+	h.min = min

+	if max > huffmanChunkBits {

+		numLinks := 1 << (uint(max) - huffmanChunkBits)

+		h.linkMask = uint32(numLinks - 1)

+		// create link tables

+		link := nextcode[huffmanChunkBits+1] >> 1

+		h.links = make([][]uint32, huffmanNumChunks-link)

+		for j := uint(link); j < huffmanNumChunks; j++ {

+			reverse := int(bits.Reverse16(uint16(j)))

+			reverse >>= uint(16 - huffmanChunkBits)

+			off := j - uint(link)

+			if sanity && h.chunks[reverse] != 0 {

+				panic("impossible: overwriting existing chunk")

+			}

+			h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))

+			h.links[off] = make([]uint32, numLinks)

+		}

+	}

+	for i, n := range lengths {

+		if n == 0 {

+			continue

+		}

+		code := nextcode[n]

+		nextcode[n]++

+		chunk := uint32(i<<huffmanValueShift | n)

+		reverse := int(bits.Reverse16(uint16(code)))

+		reverse >>= uint(16 - n)

+		if n <= huffmanChunkBits {

+			for off := reverse; off < len(h.chunks); off += 1 << uint(n) {

+				// We should never need to overwrite

+				// an existing chunk. Also, 0 is

+				// never a valid chunk, because the

+				// lower 4 "count" bits should be

+				// between 1 and 15.

+				if sanity && h.chunks[off] != 0 {

+					panic("impossible: overwriting existing chunk")

+				}

+				h.chunks[off] = chunk

+			}

+		} else {

+			j := reverse & (huffmanNumChunks - 1)

+			if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {

+				// Longer codes should have been

+				// associated with a link table above.

+				panic("impossible: not an indirect chunk")

+			}

+			value := h.chunks[j] >> huffmanValueShift

+			linktab := h.links[value]

+			reverse >>= huffmanChunkBits

+			for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {

+				if sanity && linktab[off] != 0 {

+					panic("impossible: overwriting existing chunk")

+				}

+				linktab[off] = chunk

+			}

+		}

+	}

+	if sanity {

+		// Above we've sanity checked that we never overwrote

+		// an existing entry. Here we additionally check that

+		// we filled the tables completely.

+		for i, chunk := range h.chunks {

+			if chunk == 0 {

+				// As an exception, in the degenerate

+				// single-code case, we allow odd

+				// chunks to be missing.

+				if code == 1 && i%2 == 1 {

+					continue

+				}

+				panic("impossible: missing chunk")

+			}

+		}

+		for _, linktab := range h.links {

+			for _, chunk := range linktab {

+				if chunk == 0 {

+					panic("impossible: missing chunk")

+				}

+			}

+		}

+	}

+	return true

+}

+// RFC 1951 section 3.2.7.

+// Compression with dynamic Huffman codes

+var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}

+var (

+	// Initialize the fixedHuffmanDecoder only once upon first use.

+	fixedOnce           sync.Once

+	fixedHuffmanDecoder huffmanDecoder

+)

+func fixedHuffmanDecoderInit() {

+	fixedOnce.Do(func() {

+		// These come from the RFC section 3.2.6.

+		var bits [288]int

+		for i := 0; i < 144; i++ {

+			bits[i] = 8

+		}

+		for i := 144; i < 256; i++ {

+			bits[i] = 9

+		}

+		for i := 256; i < 280; i++ {

+			bits[i] = 7

+		}

+		for i := 280; i < 288; i++ {

+			bits[i] = 8

+		}

+		fixedHuffmanDecoder.init(bits[:])

+	})

+}

--- a/internal/flatex/inflate.go

+++ /dev/null

@@ -1,245 +1,0 @@

-// Copyright 2009 The Go Authors. All rights reserved.

-// Use of this source code is governed by a BSD-style

-// license that can be found in the LICENSE file.

-// Package flatex implements the DEFLATE compressed data format, described in

-// RFC 1951.  The [compress/gzip] and [compress/zlib] packages implement access

-// to DEFLATE-based file formats.

-package flatex

-import (

-	"math/bits"

-	"strconv"

-	"sync"

-)

-const (

-	// The special code used to mark the end of a block.

-	endBlockMarker = 256

-	maxCodeLen     = 16      // max length of Huffman code

-	maxMatchOffset = 1 << 15 // The largest match offset

-	// The next three numbers come from the RFC section 3.2.7, with the

-	// additional proviso in section 3.2.5 which implies that distance codes

-	// 30 and 31 should never occur in compressed data.

-	maxNumLit  = 286

-	maxNumDist = 30

-	numCodes   = 19 // number of codes in Huffman meta-code

-)

-var (

-	// Initialize the fixedHuffmanDecoder only once upon first use.

-	fixedOnce           sync.Once

-	fixedHuffmanDecoder huffmanDecoder

-)

-// A CorruptInputError reports the presence of corrupt input at a given offset.

-type CorruptInputError int64

-func (e CorruptInputError) Error() string {

-	return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)

-}

-// The data structure for decoding Huffman tables is based on that of

-// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),

-// For codes smaller than the table width, there are multiple entries

-// (each combination of trailing bits has the same value). For codes

-// larger than the table width, the table contains a link to an overflow

-// table. The width of each entry in the link table is the maximum code

-// size minus the chunk width.

-//

-// Note that you can do a lookup in the table even without all bits

-// filled. Since the extra bits are zero, and the DEFLATE Huffman codes

-// have the property that shorter codes come before longer ones, the

-// bit length estimate in the result is a lower bound on the actual

-// number of bits.

-//

-// See the following:

-//	https://github.com/madler/zlib/raw/master/doc/algorithm.txt

-// chunk & 15 is number of bits

-// chunk >> 4 is value, including table link

-const (

-	huffmanChunkBits  = 9

-	huffmanNumChunks  = 1 << huffmanChunkBits

-	huffmanCountMask  = 15

-	huffmanValueShift = 4

-)

-type huffmanDecoder struct {

-	min      int                      // the minimum code length

-	chunks   [huffmanNumChunks]uint32 // chunks as described above

-	links    [][]uint32               // overflow links

-	linkMask uint32                   // mask the width of the link table

-}

-// Initialize Huffman decoding tables from array of code lengths.

-// Following this function, h is guaranteed to be initialized into a complete

-// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a

-// degenerate case where the tree has only a single symbol with length 1. Empty

-// trees are permitted.

-func (h *huffmanDecoder) init(lengths []int) bool {

-	// Sanity enables additional runtime tests during Huffman

-	// table construction. It's intended to be used during

-	// development to supplement the currently ad-hoc unit tests.

-	const sanity = false

-	if h.min != 0 {

-		*h = huffmanDecoder{}

-	}

-	// Count number of codes of each length,

-	// compute min and max length.

-	var count [maxCodeLen]int

-	var min, max int

-	for _, n := range lengths {

-		if n == 0 {

-			continue

-		}

-		if min == 0 || n < min {

-			min = n

-		}

-		if n > max {

-			max = n

-		}

-		count[n]++

-	}

-	// Empty tree. The decompressor.huffSym function will fail later if the tree

-	// is used. Technically, an empty tree is only valid for the HDIST tree and

-	// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree

-	// is guaranteed to fail since it will attempt to use the tree to decode the

-	// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is

-	// guaranteed to fail later since the compressed data section must be

-	// composed of at least one symbol (the end-of-block marker).

-	if max == 0 {

-		return true

-	}

-	code := 0

-	var nextcode [maxCodeLen]int

-	for i := min; i <= max; i++ {

-		code <<= 1

-		nextcode[i] = code

-		code += count[i]

-	}

-	// Check that the coding is complete (i.e., that we've

-	// assigned all 2-to-the-max possible bit sequences).

-	// Exception: To be compatible with zlib, we also need to

-	// accept degenerate single-code codings. See also

-	// TestDegenerateHuffmanCoding.

-	if code != 1<<uint(max) && (code != 1 || max != 1) {

-		return false

-	}

-	h.min = min

-	if max > huffmanChunkBits {

-		numLinks := 1 << (uint(max) - huffmanChunkBits)

-		h.linkMask = uint32(numLinks - 1)

-		// create link tables

-		link := nextcode[huffmanChunkBits+1] >> 1

-		h.links = make([][]uint32, huffmanNumChunks-link)

-		for j := uint(link); j < huffmanNumChunks; j++ {

-			reverse := int(bits.Reverse16(uint16(j)))

-			reverse >>= uint(16 - huffmanChunkBits)

-			off := j - uint(link)

-			if sanity && h.chunks[reverse] != 0 {

-				panic("impossible: overwriting existing chunk")

-			}

-			h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))

-			h.links[off] = make([]uint32, numLinks)

-		}

-	}

-	for i, n := range lengths {

-		if n == 0 {

-			continue

-		}

-		code := nextcode[n]

-		nextcode[n]++

-		chunk := uint32(i<<huffmanValueShift | n)

-		reverse := int(bits.Reverse16(uint16(code)))

-		reverse >>= uint(16 - n)

-		if n <= huffmanChunkBits {

-			for off := reverse; off < len(h.chunks); off += 1 << uint(n) {

-				// We should never need to overwrite

-				// an existing chunk. Also, 0 is

-				// never a valid chunk, because the

-				// lower 4 "count" bits should be

-				// between 1 and 15.

-				if sanity && h.chunks[off] != 0 {

-					panic("impossible: overwriting existing chunk")

-				}

-				h.chunks[off] = chunk

-			}

-		} else {

-			j := reverse & (huffmanNumChunks - 1)

-			if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {

-				// Longer codes should have been

-				// associated with a link table above.

-				panic("impossible: not an indirect chunk")

-			}

-			value := h.chunks[j] >> huffmanValueShift

-			linktab := h.links[value]

-			reverse >>= huffmanChunkBits

-			for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {

-				if sanity && linktab[off] != 0 {

-					panic("impossible: overwriting existing chunk")

-				}

-				linktab[off] = chunk

-			}

-		}

-	}

-	if sanity {

-		// Above we've sanity checked that we never overwrote

-		// an existing entry. Here we additionally check that

-		// we filled the tables completely.

-		for i, chunk := range h.chunks {

-			if chunk == 0 {

-				// As an exception, in the degenerate

-				// single-code case, we allow odd

-				// chunks to be missing.

-				if code == 1 && i%2 == 1 {

-					continue

-				}

-				panic("impossible: missing chunk")

-			}

-		}

-		for _, linktab := range h.links {

-			for _, chunk := range linktab {

-				if chunk == 0 {

-					panic("impossible: missing chunk")

-				}

-			}

-		}

-	}

-	return true

-}

-// RFC 1951 section 3.2.7.

-// Compression with dynamic Huffman codes

-var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}

-func fixedHuffmanDecoderInit() {

-	fixedOnce.Do(func() {

-		// These come from the RFC section 3.2.6.

-		var bits [288]int

-		for i := 0; i < 144; i++ {

-			bits[i] = 8

-		}

-		for i := 144; i < 256; i++ {

-			bits[i] = 9

-		}

-		for i := 256; i < 280; i++ {

-			bits[i] = 7

-		}

-		for i := 280; i < 288; i++ {

-			bits[i] = 8

-		}

-		fixedHuffmanDecoder.init(bits[:])

-	})

-}

--- a/internal/flatex/slice_inflate.go

+++ b/internal/flatex/slice_inflate.go

@@ -3,6 +3,7 @@

 import (

 	"io"

 	"math/bits"

+	"sync"

 // sliceInflater is a specialized DEFLATE decoder that reads directly from an

@@ -31,6 +32,16 @@

 	hl, hd    *huffmanDecoder

 	copyLen   int

 	copyDist  int

+}

+var sliceInflaterPool = sync.Pool{

+	New: func() any {

+		fixedHuffmanDecoderInit()

+		return &sliceInflater{

+			bits:     new([maxNumLit + maxNumDist]int),

+			codebits: new([numCodes]int),

+		}

+	},

 func (f *sliceInflater) reset(src []byte) error {

--

⑨