shithub: furgit

Download patch

ref: 94a6325ee646be4a06ca0646fcd797b2a9c74581
parent: 84e32bfc60150fdd216166606966d6ebebbf3ac7
author: Runxi Yu <me@runxiyu.org>
date: Tue Nov 25 03:00:00 EST 2025

flatex: Restructure a little

--- /dev/null
+++ b/internal/flatex/decompress.go
@@ -1,0 +1,38 @@
+package flatex
+
+import (
+	"io"
+
+	"git.sr.ht/~runxiyu/furgit/internal/bufpool"
+)
+
+func DecompressSized(src []byte, sizeHint int) (bufpool.Buffer, int, error) {
+	d := sliceInflaterPool.Get().(*sliceInflater)
+	defer sliceInflaterPool.Put(d)
+
+	if err := d.reset(src); err != nil {
+		return bufpool.Buffer{}, 0, err
+	}
+
+	out := bufpool.Borrow(sizeHint)
+	out.Resize(0)
+
+	for {
+		if len(d.toRead) > 0 {
+			out.Append(d.toRead)
+			d.toRead = nil
+			continue
+		}
+		if d.err != nil {
+			if d.err == io.EOF {
+				return out, d.pos, nil
+			}
+			out.Release()
+			return bufpool.Buffer{}, 0, d.err
+		}
+		d.step(d)
+		if d.err != nil && len(d.toRead) == 0 {
+			d.toRead = d.window.readFlush()
+		}
+	}
+}
--- a/internal/flatex/decompress_bytes.go
+++ /dev/null
@@ -1,56 +1,0 @@
-package flatex
-
-import (
-	"io"
-	"sync"
-
-	"git.sr.ht/~runxiyu/furgit/internal/bufpool"
-)
-
-type bufferDecompressor struct {
-	inflater sliceInflater
-}
-
-var bufferDecompressorPool = sync.Pool{
-	New: func() any {
-		fixedHuffmanDecoderInit()
-		d := &bufferDecompressor{
-			inflater: sliceInflater{
-				bits:     new([maxNumLit + maxNumDist]int),
-				codebits: new([numCodes]int),
-			},
-		}
-		return d
-	},
-}
-
-func DecompressSized(src []byte, sizeHint int) (bufpool.Buffer, int, error) {
-	d := bufferDecompressorPool.Get().(*bufferDecompressor)
-	defer bufferDecompressorPool.Put(d)
-
-	if err := d.inflater.reset(src); err != nil {
-		return bufpool.Buffer{}, 0, err
-	}
-
-	out := bufpool.Borrow(sizeHint)
-	out.Resize(0)
-
-	for {
-		if len(d.inflater.toRead) > 0 {
-			out.Append(d.inflater.toRead)
-			d.inflater.toRead = nil
-			continue
-		}
-		if d.inflater.err != nil {
-			if d.inflater.err == io.EOF {
-				return out, d.inflater.pos, nil
-			}
-			out.Release()
-			return bufpool.Buffer{}, 0, d.inflater.err
-		}
-		d.inflater.step(&d.inflater)
-		if d.inflater.err != nil && len(d.inflater.toRead) == 0 {
-			d.inflater.toRead = d.inflater.window.readFlush()
-		}
-	}
-}
--- /dev/null
+++ b/internal/flatex/huffman.go
@@ -1,0 +1,245 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package flatex implements the DEFLATE compressed data format, described in
+// RFC 1951.  The [compress/gzip] and [compress/zlib] packages implement access
+// to DEFLATE-based file formats.
+package flatex
+
+import (
+	"math/bits"
+	"strconv"
+	"sync"
+)
+
+const (
+	// The special code used to mark the end of a block.
+	endBlockMarker = 256
+	maxCodeLen     = 16      // max length of Huffman code
+	maxMatchOffset = 1 << 15 // The largest match offset
+	// The next three numbers come from the RFC section 3.2.7, with the
+	// additional proviso in section 3.2.5 which implies that distance codes
+	// 30 and 31 should never occur in compressed data.
+	maxNumLit  = 286
+	maxNumDist = 30
+	numCodes   = 19 // number of codes in Huffman meta-code
+)
+
+// A CorruptInputError reports the presence of corrupt input at a given offset.
+type CorruptInputError int64
+
+func (e CorruptInputError) Error() string {
+	return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
+}
+
+// The data structure for decoding Huffman tables is based on that of
+// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
+// For codes smaller than the table width, there are multiple entries
+// (each combination of trailing bits has the same value). For codes
+// larger than the table width, the table contains a link to an overflow
+// table. The width of each entry in the link table is the maximum code
+// size minus the chunk width.
+//
+// Note that you can do a lookup in the table even without all bits
+// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
+// have the property that shorter codes come before longer ones, the
+// bit length estimate in the result is a lower bound on the actual
+// number of bits.
+//
+// See the following:
+//	https://github.com/madler/zlib/raw/master/doc/algorithm.txt
+
+// chunk & 15 is number of bits
+// chunk >> 4 is value, including table link
+
+const (
+	huffmanChunkBits  = 9
+	huffmanNumChunks  = 1 << huffmanChunkBits
+	huffmanCountMask  = 15
+	huffmanValueShift = 4
+)
+
+type huffmanDecoder struct {
+	min      int                      // the minimum code length
+	chunks   [huffmanNumChunks]uint32 // chunks as described above
+	links    [][]uint32               // overflow links
+	linkMask uint32                   // mask the width of the link table
+}
+
+// Initialize Huffman decoding tables from array of code lengths.
+// Following this function, h is guaranteed to be initialized into a complete
+// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
+// degenerate case where the tree has only a single symbol with length 1. Empty
+// trees are permitted.
+func (h *huffmanDecoder) init(lengths []int) bool {
+	// Sanity enables additional runtime tests during Huffman
+	// table construction. It's intended to be used during
+	// development to supplement the currently ad-hoc unit tests.
+	const sanity = false
+
+	if h.min != 0 {
+		*h = huffmanDecoder{}
+	}
+
+	// Count number of codes of each length,
+	// compute min and max length.
+	var count [maxCodeLen]int
+	var min, max int
+	for _, n := range lengths {
+		if n == 0 {
+			continue
+		}
+		if min == 0 || n < min {
+			min = n
+		}
+		if n > max {
+			max = n
+		}
+		count[n]++
+	}
+
+	// Empty tree. The decompressor.huffSym function will fail later if the tree
+	// is used. Technically, an empty tree is only valid for the HDIST tree and
+	// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
+	// is guaranteed to fail since it will attempt to use the tree to decode the
+	// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
+	// guaranteed to fail later since the compressed data section must be
+	// composed of at least one symbol (the end-of-block marker).
+	if max == 0 {
+		return true
+	}
+
+	code := 0
+	var nextcode [maxCodeLen]int
+	for i := min; i <= max; i++ {
+		code <<= 1
+		nextcode[i] = code
+		code += count[i]
+	}
+
+	// Check that the coding is complete (i.e., that we've
+	// assigned all 2-to-the-max possible bit sequences).
+	// Exception: To be compatible with zlib, we also need to
+	// accept degenerate single-code codings. See also
+	// TestDegenerateHuffmanCoding.
+	if code != 1<<uint(max) && (code != 1 || max != 1) {
+		return false
+	}
+
+	h.min = min
+	if max > huffmanChunkBits {
+		numLinks := 1 << (uint(max) - huffmanChunkBits)
+		h.linkMask = uint32(numLinks - 1)
+
+		// create link tables
+		link := nextcode[huffmanChunkBits+1] >> 1
+		h.links = make([][]uint32, huffmanNumChunks-link)
+		for j := uint(link); j < huffmanNumChunks; j++ {
+			reverse := int(bits.Reverse16(uint16(j)))
+			reverse >>= uint(16 - huffmanChunkBits)
+			off := j - uint(link)
+			if sanity && h.chunks[reverse] != 0 {
+				panic("impossible: overwriting existing chunk")
+			}
+			h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
+			h.links[off] = make([]uint32, numLinks)
+		}
+	}
+
+	for i, n := range lengths {
+		if n == 0 {
+			continue
+		}
+		code := nextcode[n]
+		nextcode[n]++
+		chunk := uint32(i<<huffmanValueShift | n)
+		reverse := int(bits.Reverse16(uint16(code)))
+		reverse >>= uint(16 - n)
+		if n <= huffmanChunkBits {
+			for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
+				// We should never need to overwrite
+				// an existing chunk. Also, 0 is
+				// never a valid chunk, because the
+				// lower 4 "count" bits should be
+				// between 1 and 15.
+				if sanity && h.chunks[off] != 0 {
+					panic("impossible: overwriting existing chunk")
+				}
+				h.chunks[off] = chunk
+			}
+		} else {
+			j := reverse & (huffmanNumChunks - 1)
+			if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
+				// Longer codes should have been
+				// associated with a link table above.
+				panic("impossible: not an indirect chunk")
+			}
+			value := h.chunks[j] >> huffmanValueShift
+			linktab := h.links[value]
+			reverse >>= huffmanChunkBits
+			for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
+				if sanity && linktab[off] != 0 {
+					panic("impossible: overwriting existing chunk")
+				}
+				linktab[off] = chunk
+			}
+		}
+	}
+
+	if sanity {
+		// Above we've sanity checked that we never overwrote
+		// an existing entry. Here we additionally check that
+		// we filled the tables completely.
+		for i, chunk := range h.chunks {
+			if chunk == 0 {
+				// As an exception, in the degenerate
+				// single-code case, we allow odd
+				// chunks to be missing.
+				if code == 1 && i%2 == 1 {
+					continue
+				}
+				panic("impossible: missing chunk")
+			}
+		}
+		for _, linktab := range h.links {
+			for _, chunk := range linktab {
+				if chunk == 0 {
+					panic("impossible: missing chunk")
+				}
+			}
+		}
+	}
+
+	return true
+}
+
+// RFC 1951 section 3.2.7.
+// Compression with dynamic Huffman codes
+var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
+
+var (
+	// Initialize the fixedHuffmanDecoder only once upon first use.
+	fixedOnce           sync.Once
+	fixedHuffmanDecoder huffmanDecoder
+)
+
+func fixedHuffmanDecoderInit() {
+	fixedOnce.Do(func() {
+		// These come from the RFC section 3.2.6.
+		var bits [288]int
+		for i := 0; i < 144; i++ {
+			bits[i] = 8
+		}
+		for i := 144; i < 256; i++ {
+			bits[i] = 9
+		}
+		for i := 256; i < 280; i++ {
+			bits[i] = 7
+		}
+		for i := 280; i < 288; i++ {
+			bits[i] = 8
+		}
+		fixedHuffmanDecoder.init(bits[:])
+	})
+}
--- a/internal/flatex/inflate.go
+++ /dev/null
@@ -1,245 +1,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package flatex implements the DEFLATE compressed data format, described in
-// RFC 1951.  The [compress/gzip] and [compress/zlib] packages implement access
-// to DEFLATE-based file formats.
-package flatex
-
-import (
-	"math/bits"
-	"strconv"
-	"sync"
-)
-
-const (
-	// The special code used to mark the end of a block.
-	endBlockMarker = 256
-	maxCodeLen     = 16      // max length of Huffman code
-	maxMatchOffset = 1 << 15 // The largest match offset
-	// The next three numbers come from the RFC section 3.2.7, with the
-	// additional proviso in section 3.2.5 which implies that distance codes
-	// 30 and 31 should never occur in compressed data.
-	maxNumLit  = 286
-	maxNumDist = 30
-	numCodes   = 19 // number of codes in Huffman meta-code
-)
-
-var (
-	// Initialize the fixedHuffmanDecoder only once upon first use.
-	fixedOnce           sync.Once
-	fixedHuffmanDecoder huffmanDecoder
-)
-
-// A CorruptInputError reports the presence of corrupt input at a given offset.
-type CorruptInputError int64
-
-func (e CorruptInputError) Error() string {
-	return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
-}
-
-// The data structure for decoding Huffman tables is based on that of
-// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
-// For codes smaller than the table width, there are multiple entries
-// (each combination of trailing bits has the same value). For codes
-// larger than the table width, the table contains a link to an overflow
-// table. The width of each entry in the link table is the maximum code
-// size minus the chunk width.
-//
-// Note that you can do a lookup in the table even without all bits
-// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
-// have the property that shorter codes come before longer ones, the
-// bit length estimate in the result is a lower bound on the actual
-// number of bits.
-//
-// See the following:
-//	https://github.com/madler/zlib/raw/master/doc/algorithm.txt
-
-// chunk & 15 is number of bits
-// chunk >> 4 is value, including table link
-
-const (
-	huffmanChunkBits  = 9
-	huffmanNumChunks  = 1 << huffmanChunkBits
-	huffmanCountMask  = 15
-	huffmanValueShift = 4
-)
-
-type huffmanDecoder struct {
-	min      int                      // the minimum code length
-	chunks   [huffmanNumChunks]uint32 // chunks as described above
-	links    [][]uint32               // overflow links
-	linkMask uint32                   // mask the width of the link table
-}
-
-// Initialize Huffman decoding tables from array of code lengths.
-// Following this function, h is guaranteed to be initialized into a complete
-// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
-// degenerate case where the tree has only a single symbol with length 1. Empty
-// trees are permitted.
-func (h *huffmanDecoder) init(lengths []int) bool {
-	// Sanity enables additional runtime tests during Huffman
-	// table construction. It's intended to be used during
-	// development to supplement the currently ad-hoc unit tests.
-	const sanity = false
-
-	if h.min != 0 {
-		*h = huffmanDecoder{}
-	}
-
-	// Count number of codes of each length,
-	// compute min and max length.
-	var count [maxCodeLen]int
-	var min, max int
-	for _, n := range lengths {
-		if n == 0 {
-			continue
-		}
-		if min == 0 || n < min {
-			min = n
-		}
-		if n > max {
-			max = n
-		}
-		count[n]++
-	}
-
-	// Empty tree. The decompressor.huffSym function will fail later if the tree
-	// is used. Technically, an empty tree is only valid for the HDIST tree and
-	// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
-	// is guaranteed to fail since it will attempt to use the tree to decode the
-	// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
-	// guaranteed to fail later since the compressed data section must be
-	// composed of at least one symbol (the end-of-block marker).
-	if max == 0 {
-		return true
-	}
-
-	code := 0
-	var nextcode [maxCodeLen]int
-	for i := min; i <= max; i++ {
-		code <<= 1
-		nextcode[i] = code
-		code += count[i]
-	}
-
-	// Check that the coding is complete (i.e., that we've
-	// assigned all 2-to-the-max possible bit sequences).
-	// Exception: To be compatible with zlib, we also need to
-	// accept degenerate single-code codings. See also
-	// TestDegenerateHuffmanCoding.
-	if code != 1<<uint(max) && (code != 1 || max != 1) {
-		return false
-	}
-
-	h.min = min
-	if max > huffmanChunkBits {
-		numLinks := 1 << (uint(max) - huffmanChunkBits)
-		h.linkMask = uint32(numLinks - 1)
-
-		// create link tables
-		link := nextcode[huffmanChunkBits+1] >> 1
-		h.links = make([][]uint32, huffmanNumChunks-link)
-		for j := uint(link); j < huffmanNumChunks; j++ {
-			reverse := int(bits.Reverse16(uint16(j)))
-			reverse >>= uint(16 - huffmanChunkBits)
-			off := j - uint(link)
-			if sanity && h.chunks[reverse] != 0 {
-				panic("impossible: overwriting existing chunk")
-			}
-			h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
-			h.links[off] = make([]uint32, numLinks)
-		}
-	}
-
-	for i, n := range lengths {
-		if n == 0 {
-			continue
-		}
-		code := nextcode[n]
-		nextcode[n]++
-		chunk := uint32(i<<huffmanValueShift | n)
-		reverse := int(bits.Reverse16(uint16(code)))
-		reverse >>= uint(16 - n)
-		if n <= huffmanChunkBits {
-			for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
-				// We should never need to overwrite
-				// an existing chunk. Also, 0 is
-				// never a valid chunk, because the
-				// lower 4 "count" bits should be
-				// between 1 and 15.
-				if sanity && h.chunks[off] != 0 {
-					panic("impossible: overwriting existing chunk")
-				}
-				h.chunks[off] = chunk
-			}
-		} else {
-			j := reverse & (huffmanNumChunks - 1)
-			if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
-				// Longer codes should have been
-				// associated with a link table above.
-				panic("impossible: not an indirect chunk")
-			}
-			value := h.chunks[j] >> huffmanValueShift
-			linktab := h.links[value]
-			reverse >>= huffmanChunkBits
-			for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
-				if sanity && linktab[off] != 0 {
-					panic("impossible: overwriting existing chunk")
-				}
-				linktab[off] = chunk
-			}
-		}
-	}
-
-	if sanity {
-		// Above we've sanity checked that we never overwrote
-		// an existing entry. Here we additionally check that
-		// we filled the tables completely.
-		for i, chunk := range h.chunks {
-			if chunk == 0 {
-				// As an exception, in the degenerate
-				// single-code case, we allow odd
-				// chunks to be missing.
-				if code == 1 && i%2 == 1 {
-					continue
-				}
-				panic("impossible: missing chunk")
-			}
-		}
-		for _, linktab := range h.links {
-			for _, chunk := range linktab {
-				if chunk == 0 {
-					panic("impossible: missing chunk")
-				}
-			}
-		}
-	}
-
-	return true
-}
-
-// RFC 1951 section 3.2.7.
-// Compression with dynamic Huffman codes
-var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
-
-func fixedHuffmanDecoderInit() {
-	fixedOnce.Do(func() {
-		// These come from the RFC section 3.2.6.
-		var bits [288]int
-		for i := 0; i < 144; i++ {
-			bits[i] = 8
-		}
-		for i := 144; i < 256; i++ {
-			bits[i] = 9
-		}
-		for i := 256; i < 280; i++ {
-			bits[i] = 7
-		}
-		for i := 280; i < 288; i++ {
-			bits[i] = 8
-		}
-		fixedHuffmanDecoder.init(bits[:])
-	})
-}
--- a/internal/flatex/slice_inflate.go
+++ b/internal/flatex/slice_inflate.go
@@ -3,6 +3,7 @@
 import (
 	"io"
 	"math/bits"
+	"sync"
 )
 
 // sliceInflater is a specialized DEFLATE decoder that reads directly from an
@@ -31,6 +32,16 @@
 	hl, hd    *huffmanDecoder
 	copyLen   int
 	copyDist  int
+}
+
+var sliceInflaterPool = sync.Pool{
+	New: func() any {
+		fixedHuffmanDecoderInit()
+		return &sliceInflater{
+			bits:     new([maxNumLit + maxNumDist]int),
+			codebits: new([numCodes]int),
+		}
+	},
 }
 
 func (f *sliceInflater) reset(src []byte) error {
--