shithub: furgit

ref: dff530bd93b9b7200d5d492b4ccb86c17daadf21
dir: /format/pack/ingest/resolve.go/

View raw version
package ingest

import (
	"bytes"
	"errors"
	"fmt"
	"io"
	"slices"

	deltaapply "codeberg.org/lindenii/furgit/format/delta/apply"
	packfmt "codeberg.org/lindenii/furgit/format/pack"
	"codeberg.org/lindenii/furgit/internal/compress/zlib"
	"codeberg.org/lindenii/furgit/internal/intconv"
	"codeberg.org/lindenii/furgit/objectheader"
	"codeberg.org/lindenii/furgit/objectid"
	"codeberg.org/lindenii/furgit/objecttype"
)

var errExternalThinBase = errors.New("format/pack/ingest: external thin base required")

// resolveAll resolves all delta records and finalizes ObjectID/RealType for every record.
func resolveAll(state *ingestState) error {
	state.unresolvedRefDeltas = state.unresolvedRefDeltas[:0]

	for idx := range state.records {
		if state.records[idx].resolved {
			continue
		}

		visiting := make(map[int]struct{})

		ty, content, err := resolveRecord(state, idx, visiting)
		if err != nil {
			if errors.Is(err, errExternalThinBase) {
				state.unresolvedRefDeltas = append(state.unresolvedRefDeltas, idx)

				continue
			}

			return err
		}

		id, err := hashCanonicalObject(state.algo, ty, content)
		if err != nil {
			return err
		}

		record := &state.records[idx]
		record.realType = ty
		record.objectID = id
		record.resolved = true
		state.objectToRecord[id] = idx
		state.baseCache.add(idx, ty, content)
	}

	return nil
}

// resolveRecord resolves one record and returns canonical type/content.
func resolveRecord(state *ingestState, idx int, visiting map[int]struct{}) (objecttype.Type, []byte, error) {
	if idx < 0 || idx >= len(state.records) {
		return objecttype.TypeInvalid, nil, fmt.Errorf("format/pack/ingest: record index out of bounds")
	}

	if _, ok := visiting[idx]; ok {
		return objecttype.TypeInvalid, nil, &ErrDeltaCycle{Offset: state.records[idx].offset}
	}

	visiting[idx] = struct{}{}
	defer delete(visiting, idx)

	record := &state.records[idx]
	if ty, content, ok := state.baseCache.get(idx); ok {
		return ty, content, nil
	}

	if packfmt.IsBaseObjectType(record.packedType) {
		ty, content, err := readBaseRecordContent(state, idx)
		if err != nil {
			return objecttype.TypeInvalid, nil, err
		}

		if record.resolved {
			state.baseCache.add(idx, record.realType, content)

			return record.realType, content, nil
		}

		id, err := hashCanonicalObject(state.algo, ty, content)
		if err != nil {
			return objecttype.TypeInvalid, nil, err
		}

		record.objectID = id
		record.realType = ty
		record.resolved = true
		state.objectToRecord[id] = idx
		state.baseCache.add(idx, ty, content)

		return ty, content, nil
	}

	var (
		baseType    objecttype.Type
		baseContent []byte
		err         error
	)
	switch record.packedType {
	case objecttype.TypeOfsDelta:
		baseIdx, ok := state.offsetToRecord[record.baseOffset]
		if !ok {
			return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{
				Offset: record.offset,
				Reason: "missing ofs-delta base entry",
			}
		}

		baseType, baseContent, err = resolveRecord(state, baseIdx, visiting)
		if err != nil {
			return objecttype.TypeInvalid, nil, err
		}
	case objecttype.TypeRefDelta:
		baseIdx, ok := state.objectToRecord[record.baseObject]
		if ok {
			baseType, baseContent, err = resolveRecord(state, baseIdx, visiting)
			if err != nil {
				return objecttype.TypeInvalid, nil, err
			}
		} else {
			return objecttype.TypeInvalid, nil, errExternalThinBase
		}
	case objecttype.TypeInvalid,
		objecttype.TypeCommit,
		objecttype.TypeTree,
		objecttype.TypeBlob,
		objecttype.TypeTag,
		objecttype.TypeFuture:
		return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{
			Offset: record.offset,
			Reason: "unsupported delta type",
		}
	default:
		return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{
			Offset: record.offset,
			Reason: "unsupported delta type",
		}
	}

	ty, content, err := applyDeltaRecord(state, idx, baseType, baseContent)
	if err != nil {
		return objecttype.TypeInvalid, nil, err
	}

	id, err := hashCanonicalObject(state.algo, ty, content)
	if err != nil {
		return objecttype.TypeInvalid, nil, err
	}

	record.objectID = id
	record.realType = ty
	record.resolved = true
	state.objectToRecord[id] = idx
	state.baseCache.add(idx, ty, content)

	return ty, content, nil
}

// readBaseRecordContent reads canonical base content for one non-delta record.
func readBaseRecordContent(state *ingestState, idx int) (objecttype.Type, []byte, error) {
	record := state.records[idx]
	if !packfmt.IsBaseObjectType(record.packedType) {
		return objecttype.TypeInvalid, nil, fmt.Errorf("format/pack/ingest: record %d is not a base object", idx)
	}

	content, err := inflateRecordPayload(state, idx)
	if err != nil {
		return objecttype.TypeInvalid, nil, err
	}

	if int64(len(content)) != record.declaredSize {
		return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{
			Offset: record.offset,
			Reason: fmt.Sprintf("base content size mismatch got %d want %d", len(content), record.declaredSize),
		}
	}

	return record.packedType, content, nil
}

// applyDeltaRecord applies one delta record onto base content.
func applyDeltaRecord(state *ingestState, idx int, baseType objecttype.Type, baseContent []byte) (objecttype.Type, []byte, error) {
	record := state.records[idx]
	if record.packedType != objecttype.TypeOfsDelta && record.packedType != objecttype.TypeRefDelta {
		return objecttype.TypeInvalid, nil, fmt.Errorf("format/pack/ingest: record %d is not a delta record", idx)
	}

	deltaPayload, err := inflateRecordPayload(state, idx)
	if err != nil {
		return objecttype.TypeInvalid, nil, err
	}

	if int64(len(deltaPayload)) != record.declaredSize {
		return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{
			Offset: record.offset,
			Reason: fmt.Sprintf("delta payload size mismatch got %d want %d", len(deltaPayload), record.declaredSize),
		}
	}

	srcSize, dstSize, err := readDeltaHeaderSizes(deltaPayload)
	if err != nil {
		return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{
			Offset: record.offset,
			Reason: fmt.Sprintf("read delta header: %v", err),
		}
	}

	if srcSize != len(baseContent) {
		return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{
			Offset: record.offset,
			Reason: fmt.Sprintf("delta source size mismatch got %d want %d", srcSize, len(baseContent)),
		}
	}

	content, err := deltaapply.Apply(baseContent, deltaPayload)
	if err != nil {
		return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{
			Offset: record.offset,
			Reason: fmt.Sprintf("apply delta: %v", err),
		}
	}

	if len(content) != dstSize {
		return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{
			Offset: record.offset,
			Reason: fmt.Sprintf("delta result size mismatch got %d want %d", len(content), dstSize),
		}
	}

	return baseType, content, nil
}

// inflateRecordPayload inflates one record's zlib payload from pack file.
func inflateRecordPayload(state *ingestState, idx int) ([]byte, error) {
	record := state.records[idx]
	if record.packedLen < uint64(record.headerLen) {
		return nil, &ErrMalformedPackEntry{Offset: record.offset, Reason: "entry packed span underflow"}
	}

	compressedOffset := record.offset + uint64(record.headerLen)
	compressedLen := record.packedLen - uint64(record.headerLen)

	compressedOffsetInt64, err := intconv.Uint64ToInt64(compressedOffset)
	if err != nil {
		return nil, err
	}

	compressedLenInt64, err := intconv.Uint64ToInt64(compressedLen)
	if err != nil {
		return nil, err
	}

	section := io.NewSectionReader(state.packFile, compressedOffsetInt64, compressedLenInt64)

	reader, err := zlib.NewReader(section)
	if err != nil {
		return nil, &ErrMalformedPackEntry{Offset: record.offset, Reason: fmt.Sprintf("open payload zlib: %v", err)}
	}

	defer func() { _ = reader.Close() }()

	out, err := io.ReadAll(reader)
	if err != nil {
		return nil, &ErrMalformedPackEntry{Offset: record.offset, Reason: fmt.Sprintf("inflate payload: %v", err)}
	}

	return out, nil
}

// hashCanonicalObject hashes canonical object bytes (header+content).
func hashCanonicalObject(algo objectid.Algorithm, ty objecttype.Type, content []byte) (objectid.ObjectID, error) {
	header, ok := objectheader.Encode(ty, int64(len(content)))
	if !ok {
		return objectid.ObjectID{}, fmt.Errorf("format/pack/ingest: encode object header for type %d", ty)
	}

	hashImpl, err := algo.New()
	if err != nil {
		return objectid.ObjectID{}, err
	}

	_, _ = hashImpl.Write(header)
	_, _ = hashImpl.Write(content)

	return objectid.FromBytes(algo, hashImpl.Sum(nil))
}

// unresolvedThinBaseIDs returns sorted unique unresolved ref base IDs.
func unresolvedThinBaseIDs(state *ingestState) []objectid.ObjectID {
	seen := make(map[objectid.ObjectID]struct{})

	for _, idx := range state.unresolvedRefDeltas {
		record := state.records[idx]
		if record.packedType != objecttype.TypeRefDelta {
			continue
		}

		seen[record.baseObject] = struct{}{}
	}

	out := make([]objectid.ObjectID, 0, len(seen))
	for id := range seen {
		out = append(out, id)
	}

	slices.SortFunc(out, func(a, b objectid.ObjectID) int {
		return bytes.Compare(a.RawBytes(), b.RawBytes())
	})

	return out
}