shithub: furgit

Download patch

ref: 8f577284f47f699855dcb3ceda21aa9d8be77c2f
parent: 82bc1cd67431d11690217813192ccc1510ce378b
author: Runxi Yu <runxiyu@umich.edu>
date: Sun Mar 22 13:27:37 EDT 2026

objectstore{,/loose}: Document contracts more clearly

--- a/objectstore/loose/helpers_test.go
+++ b/objectstore/loose/helpers_test.go
@@ -2,6 +2,7 @@
 
 import (
 	"io"
+	"os"
 	"testing"
 
 	"codeberg.org/lindenii/furgit/internal/testgit"
@@ -64,4 +65,42 @@
 	copy(raw[len(header):], body)
 
 	return ty, body, raw
+}
+
+func corruptLooseObjectTrailer(t *testing.T, testRepo *testgit.TestRepo, id objectid.ObjectID) {
+	t.Helper()
+
+	root := testRepo.OpenObjectsRoot(t)
+
+	hex := id.String()
+	relPath := hex[:2] + "/" + hex[2:]
+
+	file, err := root.OpenFile(relPath, os.O_RDWR, 0)
+	if err != nil {
+		t.Fatalf("OpenFile(%q): %v", relPath, err)
+	}
+
+	defer func() { _ = file.Close() }()
+
+	info, err := file.Stat()
+	if err != nil {
+		t.Fatalf("Stat(%q): %v", relPath, err)
+	}
+
+	if info.Size() == 0 {
+		t.Fatalf("corrupt trailer on empty file %q", relPath)
+	}
+
+	last := make([]byte, 1)
+	_, err = file.ReadAt(last, info.Size()-1)
+	if err != nil {
+		t.Fatalf("ReadAt(%q): %v", relPath, err)
+	}
+
+	last[0] ^= 0xff
+
+	_, err = file.WriteAt(last, info.Size()-1)
+	if err != nil {
+		t.Fatalf("WriteAt(%q): %v", relPath, err)
+	}
 }
--- a/objectstore/loose/read_header.go
+++ b/objectstore/loose/read_header.go
@@ -9,6 +9,10 @@
 )
 
 // ReadHeader reads an object's type and declared content length.
+//
+// It parses only enough of the zlib-decoded object to recover the object
+// header. It does not verify that the remaining object content is readable and
+// does not verify the zlib Adler-32 trailer.
 func (store *Store) ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error) {
 	file, err := store.openObject(id)
 	if err != nil {
--- a/objectstore/loose/read_reader.go
+++ b/objectstore/loose/read_reader.go
@@ -52,7 +52,13 @@
 }
 
 // ReadReaderFull reads a full serialized object stream as "type size\0content".
+//
 // The caller must close the returned reader.
+//
+// Close releases resources only. It does not drain unread data for additional
+// validation. In particular, malformed trailing compressed data, trailing bytes
+// past the declared object size, and the zlib Adler-32 trailer may go
+// unverified unless the caller reads to io.EOF.
 func (store *Store) ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error) {
 	file, zr, err := store.openInflated(id)
 	if err != nil {
@@ -79,8 +85,15 @@
 	}, nil
 }
 
-// ReadReaderContent reads an object's type, declared content length, and content stream.
+// ReadReaderContent reads an object's type, declared content length, and
+// content stream.
+//
 // The caller must close the returned reader.
+//
+// Close releases resources only. It does not drain unread data for additional
+// validation. In particular, malformed trailing compressed data, trailing bytes
+// past the declared object size, and the zlib Adler-32 trailer may go
+// unverified unless the caller reads to io.EOF.
 func (store *Store) ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error) {
 	file, zr, err := store.openInflated(id)
 	if err != nil {
--- a/objectstore/loose/read_size.go
+++ b/objectstore/loose/read_size.go
@@ -3,6 +3,9 @@
 import "codeberg.org/lindenii/furgit/objectid"
 
 // ReadSize reads an object's declared content length.
+//
+// Like ReadHeader, it parses only enough of the zlib-decoded object to recover
+// the header and does not verify the zlib Adler-32 trailer.
 func (store *Store) ReadSize(id objectid.ObjectID) (int64, error) {
 	_, size, err := store.ReadHeader(id)
 
--- a/objectstore/loose/read_test.go
+++ b/objectstore/loose/read_test.go
@@ -11,6 +11,7 @@
 	"codeberg.org/lindenii/furgit/objectid"
 	"codeberg.org/lindenii/furgit/objectstore"
 	"codeberg.org/lindenii/furgit/objectstore/loose"
+	"codeberg.org/lindenii/furgit/objecttype"
 )
 
 func TestLooseStoreReadAgainstGit(t *testing.T) {
@@ -173,4 +174,38 @@
 	if err == nil {
 		t.Fatalf("loose.New(root, unknown) expected error")
 	}
+}
+
+func TestLooseStoreReadHeaderDoesNotVerifyAdler32(t *testing.T) {
+	t.Parallel()
+	testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper
+		testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true})
+		store := openLooseStore(t, testRepo, algo)
+
+		content := []byte("header-only-check\n")
+		id, err := store.WriteBytesContent(objecttype.TypeBlob, content)
+		if err != nil {
+			t.Fatalf("WriteBytesContent: %v", err)
+		}
+
+		corruptLooseObjectTrailer(t, testRepo, id)
+
+		ty, size, err := store.ReadHeader(id)
+		if err != nil {
+			t.Fatalf("ReadHeader: %v", err)
+		}
+
+		if ty != objecttype.TypeBlob {
+			t.Fatalf("ReadHeader type = %v, want %v", ty, objecttype.TypeBlob)
+		}
+
+		if size != int64(len(content)) {
+			t.Fatalf("ReadHeader size = %d, want %d", size, len(content))
+		}
+
+		_, err = store.ReadBytesFull(id)
+		if err == nil {
+			t.Fatalf("ReadBytesFull on corrupted trailer succeeded")
+		}
+	})
 }
--- a/objectstore/loose/store.go
+++ b/objectstore/loose/store.go
@@ -9,6 +9,10 @@
 
 // Store reads loose Git objects from an objects directory root.
 //
+// Loose objects are zlib streams whose trailer uses Adler-32. Which reads
+// consume enough of the stream to reach and verify that trailer is documented
+// on the individual methods.
+//
 // Store owns root and closes it in Close.
 type Store struct {
 	// root is the objects directory capability used for all object file access.
@@ -32,6 +36,8 @@
 }
 
 // Close releases resources associated with the backend.
+//
+// Repeated calls to Close are undefined behavior.
 func (store *Store) Close() error {
 	return store.root.Close()
 }
--- a/objectstore/loose/write_writer.go
+++ b/objectstore/loose/write_writer.go
@@ -6,7 +6,6 @@
 	"os"
 
 	"codeberg.org/lindenii/furgit/internal/compress/zlib"
-	"codeberg.org/lindenii/furgit/objectid"
 )
 
 const tempObjectFilePrefix = "tmp_obj_"
@@ -38,8 +37,6 @@
 
 	closed    bool
 	finalized bool
-	finalID   objectid.ObjectID
-	finalErr  error
 }
 
 // newStreamWriter creates a stream writer with a temp file rooted in objects/.
--- a/objectstore/loose/write_writer_finalize.go
+++ b/objectstore/loose/write_writer_finalize.go
@@ -9,17 +9,14 @@
 )
 
 // Close flushes and closes the underlying zlib stream and temp file.
-// It is safe to call multiple times.
+//
+// Repeated calls to Close are undefined behavior.
 func (writer *streamWriter) Close() error {
-	if writer.closed {
-		return nil
-	}
-
-	writer.closed = true
-
 	errZlib := writer.zw.Close()
 	errSync := writer.file.Sync()
 	errFile := writer.file.Close()
+
+	writer.closed = true
 	writer.file = nil
 
 	return errors.Join(errZlib, errSync, errFile)
@@ -29,10 +26,6 @@
 // Publication is no-clobber: it links tmpRelPath to the object path and treats
 // existing destination objects as success.
 func (writer *streamWriter) finalize() (objectid.ObjectID, error) {
-	if writer.finalized {
-		return writer.finalID, writer.finalErr
-	}
-
 	writer.finalized = true
 
 	var zero objectid.ObjectID
@@ -40,22 +33,16 @@
 	if !writer.closed {
 		err := writer.Close()
 		if err != nil {
-			writer.finalErr = err
-
 			return zero, err
 		}
 	}
 
 	if writer.fullMode && !writer.headerDone {
-		writer.finalErr = errors.New("objectstore/loose: missing full object header")
-
-		return zero, writer.finalErr
+		return zero, errors.New("objectstore/loose: missing full object header")
 	}
 
 	if writer.expectedContentLeft != 0 {
-		writer.finalErr = errors.New("objectstore/loose: object content shorter than declared size")
-
-		return zero, writer.finalErr
+		return zero, errors.New("objectstore/loose: object content shorter than declared size")
 	}
 
 	idBytes := writer.hash.Sum(nil)
@@ -62,15 +49,11 @@
 
 	id, err := objectid.FromBytes(writer.store.algo, idBytes)
 	if err != nil {
-		writer.finalErr = err
-
 		return zero, err
 	}
 
 	relPath, err := writer.store.objectPath(id)
 	if err != nil {
-		writer.finalErr = err
-
 		return zero, err
 	}
 
@@ -78,8 +61,6 @@
 
 	err = writer.store.root.MkdirAll(dir, 0o755)
 	if err != nil {
-		writer.finalErr = err
-
 		return zero, err
 	}
 
@@ -94,7 +75,6 @@
 	err = writer.store.root.Link(writer.tmpRelPath, relPath)
 	if err != nil {
 		if errors.Is(err, fs.ErrExist) {
-			writer.finalID = id
 			cleanup = false
 			_ = writer.store.root.Remove(writer.tmpRelPath)
 
@@ -101,12 +81,9 @@
 			return id, nil
 		}
 
-		writer.finalErr = err
-
 		return zero, err
 	}
 
-	writer.finalID = id
 	cleanup = false
 
 	return id, nil
--- a/objectstore/objectstore.go
+++ b/objectstore/objectstore.go
@@ -25,18 +25,32 @@
 	// In a valid repository, hashing this payload with the same algorithm yields
 	// the requested object ID. Readers should treat this as a repository
 	// invariant and should not re-verify it on every read.
+	//
+	// Any read-time integrity verification beyond producing this payload is
+	// implementation-defined.
 	ReadBytesFull(id objectid.ObjectID) ([]byte, error)
 
 	// ReadBytesContent reads an object's type and content bytes.
+	//
+	// Any read-time integrity verification beyond producing this payload is
+	// implementation-defined.
 	ReadBytesContent(id objectid.ObjectID) (objecttype.Type, []byte, error)
 
 	// ReadReaderFull reads a full serialized object stream as "type size\0content".
+	//
 	// Caller must close the returned reader.
+	//
+	// Any read-time integrity verification performed while producing the stream
+	// is implementation-defined.
 	ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error)
 
 	// ReadReaderContent reads an object's type, declared content length,
 	// and content stream.
+	//
 	// Caller must close the returned reader.
+	//
+	// Any read-time integrity verification performed while producing the stream
+	// is implementation-defined.
 	ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error)
 
 	// ReadSize reads an object's declared content length.
@@ -43,9 +57,15 @@
 	//
 	// This is equivalent to ReadHeader(...).size and may be cheaper than
 	// ReadHeader when callers do not need object type.
+	//
+	// Any read-time integrity verification performed to produce the size is
+	// implementation-defined.
 	ReadSize(id objectid.ObjectID) (int64, error)
 
 	// ReadHeader reads an object's type and declared content length.
+	//
+	// Any read-time integrity verification performed to produce the header is
+	// implementation-defined.
 	ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error)
 
 	// Refresh updates any backend-local discovery/cache view of on-disk objects.
@@ -54,6 +74,9 @@
 	Refresh() error
 
 	// Close releases resources associated with the backend.
+	//
+	// Repeated calls to Close are undefined behavior unless the implementation
+	// explicitly documents otherwise.
 	Close() error
 }
 
--