ref: 94a6325ee646be4a06ca0646fcd797b2a9c74581
parent: 84e32bfc60150fdd216166606966d6ebebbf3ac7
author: Runxi Yu <me@runxiyu.org>
date: Tue Nov 25 03:00:00 EST 2025
flatex: Restructure a little
--- /dev/null
+++ b/internal/flatex/decompress.go
@@ -1,0 +1,38 @@
+package flatex
+
+import (
+ "io"
+
+ "git.sr.ht/~runxiyu/furgit/internal/bufpool"
+)
+
+func DecompressSized(src []byte, sizeHint int) (bufpool.Buffer, int, error) {+ d := sliceInflaterPool.Get().(*sliceInflater)
+ defer sliceInflaterPool.Put(d)
+
+ if err := d.reset(src); err != nil {+ return bufpool.Buffer{}, 0, err+ }
+
+ out := bufpool.Borrow(sizeHint)
+ out.Resize(0)
+
+ for {+ if len(d.toRead) > 0 {+ out.Append(d.toRead)
+ d.toRead = nil
+ continue
+ }
+ if d.err != nil {+ if d.err == io.EOF {+ return out, d.pos, nil
+ }
+ out.Release()
+ return bufpool.Buffer{}, 0, d.err+ }
+ d.step(d)
+ if d.err != nil && len(d.toRead) == 0 {+ d.toRead = d.window.readFlush()
+ }
+ }
+}
--- a/internal/flatex/decompress_bytes.go
+++ /dev/null
@@ -1,56 +1,0 @@
-package flatex
-
-import (
- "io"
- "sync"
-
- "git.sr.ht/~runxiyu/furgit/internal/bufpool"
-)
-
-type bufferDecompressor struct {- inflater sliceInflater
-}
-
-var bufferDecompressorPool = sync.Pool{- New: func() any {- fixedHuffmanDecoderInit()
- d := &bufferDecompressor{- inflater: sliceInflater{- bits: new([maxNumLit + maxNumDist]int),
- codebits: new([numCodes]int),
- },
- }
- return d
- },
-}
-
-func DecompressSized(src []byte, sizeHint int) (bufpool.Buffer, int, error) {- d := bufferDecompressorPool.Get().(*bufferDecompressor)
- defer bufferDecompressorPool.Put(d)
-
- if err := d.inflater.reset(src); err != nil {- return bufpool.Buffer{}, 0, err- }
-
- out := bufpool.Borrow(sizeHint)
- out.Resize(0)
-
- for {- if len(d.inflater.toRead) > 0 {- out.Append(d.inflater.toRead)
- d.inflater.toRead = nil
- continue
- }
- if d.inflater.err != nil {- if d.inflater.err == io.EOF {- return out, d.inflater.pos, nil
- }
- out.Release()
- return bufpool.Buffer{}, 0, d.inflater.err- }
- d.inflater.step(&d.inflater)
- if d.inflater.err != nil && len(d.inflater.toRead) == 0 {- d.inflater.toRead = d.inflater.window.readFlush()
- }
- }
-}
--- /dev/null
+++ b/internal/flatex/huffman.go
@@ -1,0 +1,245 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package flatex implements the DEFLATE compressed data format, described in
+// RFC 1951. The [compress/gzip] and [compress/zlib] packages implement access
+// to DEFLATE-based file formats.
+package flatex
+
+import (
+ "math/bits"
+ "strconv"
+ "sync"
+)
+
+const (
+ // The special code used to mark the end of a block.
+ endBlockMarker = 256
+ maxCodeLen = 16 // max length of Huffman code
+ maxMatchOffset = 1 << 15 // The largest match offset
+ // The next three numbers come from the RFC section 3.2.7, with the
+ // additional proviso in section 3.2.5 which implies that distance codes
+ // 30 and 31 should never occur in compressed data.
+ maxNumLit = 286
+ maxNumDist = 30
+ numCodes = 19 // number of codes in Huffman meta-code
+)
+
+// A CorruptInputError reports the presence of corrupt input at a given offset.
+type CorruptInputError int64
+
+func (e CorruptInputError) Error() string {+ return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
+}
+
+// The data structure for decoding Huffman tables is based on that of
+// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
+// For codes smaller than the table width, there are multiple entries
+// (each combination of trailing bits has the same value). For codes
+// larger than the table width, the table contains a link to an overflow
+// table. The width of each entry in the link table is the maximum code
+// size minus the chunk width.
+//
+// Note that you can do a lookup in the table even without all bits
+// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
+// have the property that shorter codes come before longer ones, the
+// bit length estimate in the result is a lower bound on the actual
+// number of bits.
+//
+// See the following:
+// https://github.com/madler/zlib/raw/master/doc/algorithm.txt
+
+// chunk & 15 is number of bits
+// chunk >> 4 is value, including table link
+
+const (
+ huffmanChunkBits = 9
+ huffmanNumChunks = 1 << huffmanChunkBits
+ huffmanCountMask = 15
+ huffmanValueShift = 4
+)
+
+type huffmanDecoder struct {+ min int // the minimum code length
+ chunks [huffmanNumChunks]uint32 // chunks as described above
+ links [][]uint32 // overflow links
+ linkMask uint32 // mask the width of the link table
+}
+
+// Initialize Huffman decoding tables from array of code lengths.
+// Following this function, h is guaranteed to be initialized into a complete
+// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
+// degenerate case where the tree has only a single symbol with length 1. Empty
+// trees are permitted.
+func (h *huffmanDecoder) init(lengths []int) bool {+ // Sanity enables additional runtime tests during Huffman
+ // table construction. It's intended to be used during
+ // development to supplement the currently ad-hoc unit tests.
+ const sanity = false
+
+ if h.min != 0 {+ *h = huffmanDecoder{}+ }
+
+ // Count number of codes of each length,
+ // compute min and max length.
+ var count [maxCodeLen]int
+ var min, max int
+ for _, n := range lengths {+ if n == 0 {+ continue
+ }
+ if min == 0 || n < min {+ min = n
+ }
+ if n > max {+ max = n
+ }
+ count[n]++
+ }
+
+ // Empty tree. The decompressor.huffSym function will fail later if the tree
+ // is used. Technically, an empty tree is only valid for the HDIST tree and
+ // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
+ // is guaranteed to fail since it will attempt to use the tree to decode the
+ // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
+ // guaranteed to fail later since the compressed data section must be
+ // composed of at least one symbol (the end-of-block marker).
+ if max == 0 {+ return true
+ }
+
+ code := 0
+ var nextcode [maxCodeLen]int
+ for i := min; i <= max; i++ {+ code <<= 1
+ nextcode[i] = code
+ code += count[i]
+ }
+
+ // Check that the coding is complete (i.e., that we've
+ // assigned all 2-to-the-max possible bit sequences).
+ // Exception: To be compatible with zlib, we also need to
+ // accept degenerate single-code codings. See also
+ // TestDegenerateHuffmanCoding.
+ if code != 1<<uint(max) && (code != 1 || max != 1) {+ return false
+ }
+
+ h.min = min
+ if max > huffmanChunkBits {+ numLinks := 1 << (uint(max) - huffmanChunkBits)
+ h.linkMask = uint32(numLinks - 1)
+
+ // create link tables
+ link := nextcode[huffmanChunkBits+1] >> 1
+ h.links = make([][]uint32, huffmanNumChunks-link)
+ for j := uint(link); j < huffmanNumChunks; j++ {+ reverse := int(bits.Reverse16(uint16(j)))
+ reverse >>= uint(16 - huffmanChunkBits)
+ off := j - uint(link)
+ if sanity && h.chunks[reverse] != 0 {+ panic("impossible: overwriting existing chunk")+ }
+ h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
+ h.links[off] = make([]uint32, numLinks)
+ }
+ }
+
+ for i, n := range lengths {+ if n == 0 {+ continue
+ }
+ code := nextcode[n]
+ nextcode[n]++
+ chunk := uint32(i<<huffmanValueShift | n)
+ reverse := int(bits.Reverse16(uint16(code)))
+ reverse >>= uint(16 - n)
+ if n <= huffmanChunkBits {+ for off := reverse; off < len(h.chunks); off += 1 << uint(n) {+ // We should never need to overwrite
+ // an existing chunk. Also, 0 is
+ // never a valid chunk, because the
+ // lower 4 "count" bits should be
+ // between 1 and 15.
+ if sanity && h.chunks[off] != 0 {+ panic("impossible: overwriting existing chunk")+ }
+ h.chunks[off] = chunk
+ }
+ } else {+ j := reverse & (huffmanNumChunks - 1)
+ if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {+ // Longer codes should have been
+ // associated with a link table above.
+ panic("impossible: not an indirect chunk")+ }
+ value := h.chunks[j] >> huffmanValueShift
+ linktab := h.links[value]
+ reverse >>= huffmanChunkBits
+ for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {+ if sanity && linktab[off] != 0 {+ panic("impossible: overwriting existing chunk")+ }
+ linktab[off] = chunk
+ }
+ }
+ }
+
+ if sanity {+ // Above we've sanity checked that we never overwrote
+ // an existing entry. Here we additionally check that
+ // we filled the tables completely.
+ for i, chunk := range h.chunks {+ if chunk == 0 {+ // As an exception, in the degenerate
+ // single-code case, we allow odd
+ // chunks to be missing.
+ if code == 1 && i%2 == 1 {+ continue
+ }
+ panic("impossible: missing chunk")+ }
+ }
+ for _, linktab := range h.links {+ for _, chunk := range linktab {+ if chunk == 0 {+ panic("impossible: missing chunk")+ }
+ }
+ }
+ }
+
+ return true
+}
+
+// RFC 1951 section 3.2.7.
+// Compression with dynamic Huffman codes
+var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}+
+var (
+ // Initialize the fixedHuffmanDecoder only once upon first use.
+ fixedOnce sync.Once
+ fixedHuffmanDecoder huffmanDecoder
+)
+
+func fixedHuffmanDecoderInit() {+ fixedOnce.Do(func() {+ // These come from the RFC section 3.2.6.
+ var bits [288]int
+ for i := 0; i < 144; i++ {+ bits[i] = 8
+ }
+ for i := 144; i < 256; i++ {+ bits[i] = 9
+ }
+ for i := 256; i < 280; i++ {+ bits[i] = 7
+ }
+ for i := 280; i < 288; i++ {+ bits[i] = 8
+ }
+ fixedHuffmanDecoder.init(bits[:])
+ })
+}
--- a/internal/flatex/inflate.go
+++ /dev/null
@@ -1,245 +1,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package flatex implements the DEFLATE compressed data format, described in
-// RFC 1951. The [compress/gzip] and [compress/zlib] packages implement access
-// to DEFLATE-based file formats.
-package flatex
-
-import (
- "math/bits"
- "strconv"
- "sync"
-)
-
-const (
- // The special code used to mark the end of a block.
- endBlockMarker = 256
- maxCodeLen = 16 // max length of Huffman code
- maxMatchOffset = 1 << 15 // The largest match offset
- // The next three numbers come from the RFC section 3.2.7, with the
- // additional proviso in section 3.2.5 which implies that distance codes
- // 30 and 31 should never occur in compressed data.
- maxNumLit = 286
- maxNumDist = 30
- numCodes = 19 // number of codes in Huffman meta-code
-)
-
-var (
- // Initialize the fixedHuffmanDecoder only once upon first use.
- fixedOnce sync.Once
- fixedHuffmanDecoder huffmanDecoder
-)
-
-// A CorruptInputError reports the presence of corrupt input at a given offset.
-type CorruptInputError int64
-
-func (e CorruptInputError) Error() string {- return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
-}
-
-// The data structure for decoding Huffman tables is based on that of
-// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
-// For codes smaller than the table width, there are multiple entries
-// (each combination of trailing bits has the same value). For codes
-// larger than the table width, the table contains a link to an overflow
-// table. The width of each entry in the link table is the maximum code
-// size minus the chunk width.
-//
-// Note that you can do a lookup in the table even without all bits
-// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
-// have the property that shorter codes come before longer ones, the
-// bit length estimate in the result is a lower bound on the actual
-// number of bits.
-//
-// See the following:
-// https://github.com/madler/zlib/raw/master/doc/algorithm.txt
-
-// chunk & 15 is number of bits
-// chunk >> 4 is value, including table link
-
-const (
- huffmanChunkBits = 9
- huffmanNumChunks = 1 << huffmanChunkBits
- huffmanCountMask = 15
- huffmanValueShift = 4
-)
-
-type huffmanDecoder struct {- min int // the minimum code length
- chunks [huffmanNumChunks]uint32 // chunks as described above
- links [][]uint32 // overflow links
- linkMask uint32 // mask the width of the link table
-}
-
-// Initialize Huffman decoding tables from array of code lengths.
-// Following this function, h is guaranteed to be initialized into a complete
-// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
-// degenerate case where the tree has only a single symbol with length 1. Empty
-// trees are permitted.
-func (h *huffmanDecoder) init(lengths []int) bool {- // Sanity enables additional runtime tests during Huffman
- // table construction. It's intended to be used during
- // development to supplement the currently ad-hoc unit tests.
- const sanity = false
-
- if h.min != 0 {- *h = huffmanDecoder{}- }
-
- // Count number of codes of each length,
- // compute min and max length.
- var count [maxCodeLen]int
- var min, max int
- for _, n := range lengths {- if n == 0 {- continue
- }
- if min == 0 || n < min {- min = n
- }
- if n > max {- max = n
- }
- count[n]++
- }
-
- // Empty tree. The decompressor.huffSym function will fail later if the tree
- // is used. Technically, an empty tree is only valid for the HDIST tree and
- // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
- // is guaranteed to fail since it will attempt to use the tree to decode the
- // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
- // guaranteed to fail later since the compressed data section must be
- // composed of at least one symbol (the end-of-block marker).
- if max == 0 {- return true
- }
-
- code := 0
- var nextcode [maxCodeLen]int
- for i := min; i <= max; i++ {- code <<= 1
- nextcode[i] = code
- code += count[i]
- }
-
- // Check that the coding is complete (i.e., that we've
- // assigned all 2-to-the-max possible bit sequences).
- // Exception: To be compatible with zlib, we also need to
- // accept degenerate single-code codings. See also
- // TestDegenerateHuffmanCoding.
- if code != 1<<uint(max) && (code != 1 || max != 1) {- return false
- }
-
- h.min = min
- if max > huffmanChunkBits {- numLinks := 1 << (uint(max) - huffmanChunkBits)
- h.linkMask = uint32(numLinks - 1)
-
- // create link tables
- link := nextcode[huffmanChunkBits+1] >> 1
- h.links = make([][]uint32, huffmanNumChunks-link)
- for j := uint(link); j < huffmanNumChunks; j++ {- reverse := int(bits.Reverse16(uint16(j)))
- reverse >>= uint(16 - huffmanChunkBits)
- off := j - uint(link)
- if sanity && h.chunks[reverse] != 0 {- panic("impossible: overwriting existing chunk")- }
- h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
- h.links[off] = make([]uint32, numLinks)
- }
- }
-
- for i, n := range lengths {- if n == 0 {- continue
- }
- code := nextcode[n]
- nextcode[n]++
- chunk := uint32(i<<huffmanValueShift | n)
- reverse := int(bits.Reverse16(uint16(code)))
- reverse >>= uint(16 - n)
- if n <= huffmanChunkBits {- for off := reverse; off < len(h.chunks); off += 1 << uint(n) {- // We should never need to overwrite
- // an existing chunk. Also, 0 is
- // never a valid chunk, because the
- // lower 4 "count" bits should be
- // between 1 and 15.
- if sanity && h.chunks[off] != 0 {- panic("impossible: overwriting existing chunk")- }
- h.chunks[off] = chunk
- }
- } else {- j := reverse & (huffmanNumChunks - 1)
- if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {- // Longer codes should have been
- // associated with a link table above.
- panic("impossible: not an indirect chunk")- }
- value := h.chunks[j] >> huffmanValueShift
- linktab := h.links[value]
- reverse >>= huffmanChunkBits
- for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {- if sanity && linktab[off] != 0 {- panic("impossible: overwriting existing chunk")- }
- linktab[off] = chunk
- }
- }
- }
-
- if sanity {- // Above we've sanity checked that we never overwrote
- // an existing entry. Here we additionally check that
- // we filled the tables completely.
- for i, chunk := range h.chunks {- if chunk == 0 {- // As an exception, in the degenerate
- // single-code case, we allow odd
- // chunks to be missing.
- if code == 1 && i%2 == 1 {- continue
- }
- panic("impossible: missing chunk")- }
- }
- for _, linktab := range h.links {- for _, chunk := range linktab {- if chunk == 0 {- panic("impossible: missing chunk")- }
- }
- }
- }
-
- return true
-}
-
-// RFC 1951 section 3.2.7.
-// Compression with dynamic Huffman codes
-var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}-
-func fixedHuffmanDecoderInit() {- fixedOnce.Do(func() {- // These come from the RFC section 3.2.6.
- var bits [288]int
- for i := 0; i < 144; i++ {- bits[i] = 8
- }
- for i := 144; i < 256; i++ {- bits[i] = 9
- }
- for i := 256; i < 280; i++ {- bits[i] = 7
- }
- for i := 280; i < 288; i++ {- bits[i] = 8
- }
- fixedHuffmanDecoder.init(bits[:])
- })
-}
--- a/internal/flatex/slice_inflate.go
+++ b/internal/flatex/slice_inflate.go
@@ -3,6 +3,7 @@
import (
"io"
"math/bits"
+ "sync"
)
// sliceInflater is a specialized DEFLATE decoder that reads directly from an
@@ -31,6 +32,16 @@
hl, hd *huffmanDecoder
copyLen int
copyDist int
+}
+
+var sliceInflaterPool = sync.Pool{+ New: func() any {+ fixedHuffmanDecoderInit()
+ return &sliceInflater{+ bits: new([maxNumLit + maxNumDist]int),
+ codebits: new([numCodes]int),
+ }
+ },
}
func (f *sliceInflater) reset(src []byte) error {--
⑨