shithub: furgit

Download patch

ref: bd9519b464f919f2ca174a45c3c19c9a8a1fe3d1
parent: c0d1cc442031c8200df4a3a499c07081fab40553
author: Runxi Yu <me@runxiyu.org>
date: Tue Mar 3 13:26:57 EST 2026

objectstore/packed: Check pack/idx checksums here.

We previously had helpers in format/pack/checksum that checks
.pack/.idx-related checksums with []byte-based APIs. But it only really
makes sense to use those []byte-based APIs on mmap's (otherwise it'd be
horribly inefficient).

Since the packed object-store only needs to check that the .pack and
.idx trailer match, we move the relevant part into objectstore/packed.
The rest are deleted for now; we'll definitely need a streaming version
for the pack verification (when ingesting packfiles from the network)
(though we might just make it a streaming API (writer? reader? not
decided yet) that *produces* a hash, then verify it in the caller; this
way we could reuse the function in the pack-producing routines). The
others might get the []byte-based APIs back, or perhaps they too get
streaming APIs.

Remember that "reading objects from a packed object store",
"creating/writing packfiles", and "ingesting an incoming pack (which
usually involves creating an .idx for it)", are all very different
tasks.

--- a/format/pack/checksum/checksum.go
+++ /dev/null
@@ -1,108 +1,0 @@
-// Package checksum provides Git pack/index checksum primitives.
-package checksum
-
-import (
-	"bytes"
-	"fmt"
-
-	"codeberg.org/lindenii/furgit/objectid"
-)
-
-// VerifyPackTrailer verifies one pack trailer hash against the pack payload.
-//
-// This computes the object hash over all bytes except the trailing hash, so it
-// is O(pack size).
-func VerifyPackTrailer(data []byte, algo objectid.Algorithm) error {
-	hashSize := algo.Size()
-	if hashSize <= 0 {
-		return objectid.ErrInvalidAlgorithm
-	}
-	if len(data) < hashSize {
-		return fmt.Errorf("format/pack/checksum: pack too short for trailer hash")
-	}
-
-	hash, err := algo.New()
-	if err != nil {
-		return err
-	}
-	if _, err := hash.Write(data[:len(data)-hashSize]); err != nil {
-		return err
-	}
-	computed := hash.Sum(nil)
-	trailer := data[len(data)-hashSize:]
-	if !bytes.Equal(computed, trailer) {
-		return fmt.Errorf("format/pack/checksum: pack trailer hash mismatch")
-	}
-	return nil
-}
-
-// PackTrailerHash returns the trailer hash bytes from one pack file.
-func PackTrailerHash(data []byte, algo objectid.Algorithm) ([]byte, error) {
-	hashSize := algo.Size()
-	if hashSize <= 0 {
-		return nil, objectid.ErrInvalidAlgorithm
-	}
-	if len(data) < hashSize {
-		return nil, fmt.Errorf("format/pack/checksum: pack too short for trailer hash")
-	}
-	return data[len(data)-hashSize:], nil
-}
-
-// ParseIdxTrailer parses one idx v2 trailer and returns (packHash, idxHash).
-func ParseIdxTrailer(data []byte, algo objectid.Algorithm) ([]byte, []byte, error) {
-	hashSize := algo.Size()
-	if hashSize <= 0 {
-		return nil, nil, objectid.ErrInvalidAlgorithm
-	}
-	if len(data) < hashSize*2 {
-		return nil, nil, fmt.Errorf("format/pack/checksum: idx too short for trailer hashes")
-	}
-	packHashOff := len(data) - hashSize*2
-	idxHashOff := len(data) - hashSize
-	return data[packHashOff:idxHashOff], data[idxHashOff:], nil
-}
-
-// VerifyIdxTrailer verifies one idx trailer checksum against preceding bytes.
-func VerifyIdxTrailer(data []byte, algo objectid.Algorithm) error {
-	hashSize := algo.Size()
-	if hashSize <= 0 {
-		return objectid.ErrInvalidAlgorithm
-	}
-	if len(data) < hashSize*2 {
-		return fmt.Errorf("format/pack/checksum: idx too short for trailer hashes")
-	}
-
-	_, idxHash, err := ParseIdxTrailer(data, algo)
-	if err != nil {
-		return err
-	}
-	hash, err := algo.New()
-	if err != nil {
-		return err
-	}
-	if _, err := hash.Write(data[:len(data)-hashSize]); err != nil {
-		return err
-	}
-	computed := hash.Sum(nil)
-	if !bytes.Equal(computed, idxHash) {
-		return fmt.Errorf("format/pack/checksum: idx trailer hash mismatch")
-	}
-	return nil
-}
-
-// VerifyPackMatchesIdx compares a pack trailer hash with one idx-recorded pack
-// hash.
-func VerifyPackMatchesIdx(packData, idxData []byte, algo objectid.Algorithm) error {
-	packTrailerHash, err := PackTrailerHash(packData, algo)
-	if err != nil {
-		return err
-	}
-	idxPackHash, _, err := ParseIdxTrailer(idxData, algo)
-	if err != nil {
-		return err
-	}
-	if !bytes.Equal(packTrailerHash, idxPackHash) {
-		return fmt.Errorf("format/pack/checksum: pack hash does not match idx")
-	}
-	return nil
-}
--- /dev/null
+++ b/objectstore/packed/pack_idx_checksum.go
@@ -1,0 +1,30 @@
+package packed
+
+import (
+	"bytes"
+	"fmt"
+
+	"codeberg.org/lindenii/furgit/objectid"
+)
+
+// verifyMappedPackMatchesMappedIdx compares one mapped pack trailer hash with
+// the pack hash recorded in one mapped idx trailer.
+func verifyMappedPackMatchesMappedIdx(packData, idxData []byte, algo objectid.Algorithm) error {
+	hashSize := algo.Size()
+	if hashSize <= 0 {
+		return objectid.ErrInvalidAlgorithm
+	}
+	if len(packData) < hashSize {
+		return fmt.Errorf("objectstore/packed: pack too short for trailer hash")
+	}
+	if len(idxData) < hashSize*2 {
+		return fmt.Errorf("objectstore/packed: idx too short for trailer hashes")
+	}
+
+	packTrailerHash := packData[len(packData)-hashSize:]
+	idxPackHash := idxData[len(idxData)-hashSize*2 : len(idxData)-hashSize]
+	if !bytes.Equal(packTrailerHash, idxPackHash) {
+		return fmt.Errorf("objectstore/packed: pack hash does not match idx")
+	}
+	return nil
+}
--- a/objectstore/packed/store.go
+++ b/objectstore/packed/store.go
@@ -7,7 +7,6 @@
 	"os"
 	"sync"
 
-	packchecksum "codeberg.org/lindenii/furgit/format/pack/checksum"
 	"codeberg.org/lindenii/furgit/objectid"
 	"codeberg.org/lindenii/furgit/objectstore"
 )
@@ -195,7 +194,7 @@
 	if err != nil {
 		return err
 	}
-	if err := packchecksum.VerifyPackMatchesIdx(pack.data, index.data, store.algo); err != nil {
+	if err := verifyMappedPackMatchesMappedIdx(pack.data, index.data, store.algo); err != nil {
 		return fmt.Errorf("objectstore/packed: pack %q does not match idx %q: %w", pack.name, index.idxName, err)
 	}
 	return nil
--