ref: c7e88852add91975a0924088b8c09ff5b917ac9b
dir: /pack_pack.go/
package furgit
import (
"bytes"
"git.sr.ht/~runxiyu/furgit/internal/zlib"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"os"
"sync"
"syscall"
"git.sr.ht/~runxiyu/furgit/internal/bufpool"
)
const (
packMagic = 0x5041434b
packVersion2 = 2
)
type packlocation struct {
PackPath string
Offset uint64
}
func (repo *Repository) packRead(id Hash) (StoredObject, error) {
loc, err := repo.packIndexFind(id)
if err != nil {
return nil, err
}
return repo.packReadAt(loc, id)
}
func (repo *Repository) packIndexFind(id Hash) (packlocation, error) {
midx, err := repo.multiPackIndex()
if err == nil {
loc, err := midx.lookup(id)
if err == nil {
return loc, nil
}
if !errors.Is(err, ErrNotFound) {
return packlocation{}, err
}
} else if !errors.Is(err, ErrNotFound) {
return packlocation{}, err
}
idxs, err := repo.packIndexes()
if err != nil {
return packlocation{}, err
}
for _, idx := range idxs {
loc, err := idx.lookup(id)
if errors.Is(err, ErrNotFound) {
continue
}
if err != nil {
return packlocation{}, err
}
return loc, nil
}
return packlocation{}, ErrNotFound
}
func (repo *Repository) packReadAt(loc packlocation, want Hash) (StoredObject, error) {
ty, body, err := repo.packBodyResolveAtLocation(loc)
if err != nil {
return nil, err
}
data := body.Bytes()
// if !repo.verifyTypedObject(ty, data, want) {
// body.Release()
// return nil, ErrInvalidObject
// }
obj, err := parseObjectBody(ty, want, data, repo)
body.Release()
return obj, err
}
func (repo *Repository) packBodyResolveAtLocation(loc packlocation) (ObjectType, bufpool.Buffer, error) {
pf, err := repo.packFile(loc.PackPath)
if err != nil {
return ObjectTypeInvalid, bufpool.Buffer{}, err
}
return repo.packBodyResolveWithin(pf, loc.Offset)
}
func (repo *Repository) packTypeSizeAtLocation(loc packlocation, seen map[packKey]struct{}) (ObjectType, int64, error) {
pf, err := repo.packFile(loc.PackPath)
if err != nil {
return ObjectTypeInvalid, 0, err
}
return repo.packTypeSizeWithin(pf, loc.Offset, seen)
}
func (repo *Repository) packTypeSizeByID(id Hash, seen map[packKey]struct{}) (ObjectType, int64, error) {
loc, err := repo.packIndexFind(id)
if err == nil {
return repo.packTypeSizeAtLocation(loc, seen)
}
if !errors.Is(err, ErrNotFound) {
return ObjectTypeInvalid, 0, err
}
return repo.looseTypeSize(id)
}
func packHeaderRead(r io.Reader) (ObjectType, int, error) {
var b [1]byte
_, err := io.ReadFull(r, b[:])
if err != nil {
return ObjectTypeInvalid, 0, err
}
ty := ObjectType((b[0] >> 4) & 0x07)
size := int(b[0] & 0x0f)
shift := 4
for (b[0] & 0x80) != 0 {
_, err = io.ReadFull(r, b[:])
if err != nil {
return ObjectTypeInvalid, 0, err
}
size |= int(b[0]&0x7f) << shift
shift += 7
if (b[0] & 0x80) == 0 {
break
}
}
return ty, size, nil
}
func packSectionInflate(r io.Reader, sizeHint int) (bufpool.Buffer, error) {
zr, err := zlib.NewReader(r)
if err != nil {
return bufpool.Buffer{}, err
}
defer func() { _ = zr.Close() }()
if sizeHint > 0 {
body := bufpool.Borrow(sizeHint)
body.Resize(sizeHint)
_, err := io.ReadFull(zr, body.Bytes())
if err != nil {
body.Release()
return bufpool.Buffer{}, err
}
var extra [1]byte
_, err = zr.Read(extra[:])
if err != io.EOF {
body.Release()
if err == nil {
return bufpool.Buffer{}, ErrInvalidObject
}
return bufpool.Buffer{}, err
}
return body, nil
}
body := bufpool.Borrow(bufpool.DefaultBufferCap)
var scratch [32 * 1024]byte
for {
n, err := zr.Read(scratch[:])
if n > 0 {
body.Append(scratch[:n])
}
if err == io.EOF {
return body, nil
}
if err != nil {
body.Release()
return bufpool.Buffer{}, err
}
}
}
func (repo *Repository) packDeltaResolveOfs(pf *packFile, deltaOffset uint64, r io.Reader) (ObjectType, bufpool.Buffer, error) {
dist, err := packDeltaReadOfsDistance(r)
if err != nil {
return ObjectTypeInvalid, bufpool.Buffer{}, err
}
var baseOfs uint64
if deltaOffset > dist {
baseOfs = deltaOffset - dist
}
if baseOfs == 0 {
return ObjectTypeInvalid, bufpool.Buffer{}, ErrInvalidObject
}
ty, body, err := repo.packBodyResolveWithin(pf, baseOfs)
if err != nil {
return ObjectTypeInvalid, bufpool.Buffer{}, err
}
delta, err := packSectionInflate(r, 0)
if err != nil {
body.Release()
return ObjectTypeInvalid, bufpool.Buffer{}, err
}
out, err := packDeltaApply(body, delta)
delta.Release()
body.Release()
if err != nil {
out.Release()
return ObjectTypeInvalid, bufpool.Buffer{}, err
}
return ty, out, nil
}
func packDeltaReadOfsDistance(r io.Reader) (uint64, error) {
var b [1]byte
_, err := io.ReadFull(r, b[:])
if err != nil {
return 0, err
}
dist := uint64(b[0] & 0x7f)
for (b[0] & 0x80) != 0 {
_, err = io.ReadFull(r, b[:])
if err != nil {
return 0, err
}
dist = ((dist + 1) << 7) + uint64(b[0]&0x7f)
}
return dist, nil
}
func (repo *Repository) packBodyResolveByID(id Hash) (ObjectType, bufpool.Buffer, error) {
loc, err := repo.packIndexFind(id)
if err == nil {
return repo.packBodyResolveAtLocation(loc)
}
if !errors.Is(err, ErrNotFound) {
return ObjectTypeInvalid, bufpool.Buffer{}, err
}
ty, body, err := repo.looseReadTyped(id)
if err != nil {
return ObjectTypeInvalid, bufpool.Buffer{}, err
}
return ty, bufpool.FromOwned(body), nil
}
type packKey struct {
path string
ofs uint64
}
func (repo *Repository) packTypeSizeWithin(pf *packFile, ofs uint64, seen map[packKey]struct{}) (ObjectType, int64, error) {
if pf == nil {
return ObjectTypeInvalid, 0, ErrInvalidObject
}
if seen == nil {
seen = make(map[packKey]struct{})
}
key := packKey{path: pf.relPath, ofs: ofs}
if _, dup := seen[key]; dup {
return ObjectTypeInvalid, 0, ErrInvalidObject
}
seen[key] = struct{}{}
defer delete(seen, key)
r, err := pf.cursor(ofs)
if err != nil {
return ObjectTypeInvalid, 0, err
}
ty, size, err := packHeaderRead(r)
if err != nil {
return ObjectTypeInvalid, 0, err
}
declaredSize := int64(size)
switch ty {
case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag:
return ty, declaredSize, nil
case ObjectTypeRefDelta:
var base Hash
_, err := io.ReadFull(r, base.data[:repo.hashSize])
if err != nil {
return ObjectTypeInvalid, 0, err
}
base.size = repo.hashSize
baseTy, _, err := repo.packTypeSizeByID(base, seen)
if err != nil {
return ObjectTypeInvalid, 0, err
}
return baseTy, declaredSize, nil
case ObjectTypeOfsDelta:
dist, err := packDeltaReadOfsDistance(r)
if err != nil {
return ObjectTypeInvalid, 0, err
}
if ofs <= dist {
return ObjectTypeInvalid, 0, ErrInvalidObject
}
baseOfs := ofs - dist
baseTy, _, err := repo.packTypeSizeWithin(pf, baseOfs, seen)
if err != nil {
return ObjectTypeInvalid, 0, err
}
return baseTy, declaredSize, nil
case ObjectTypeInvalid, ObjectTypeFuture:
return ObjectTypeInvalid, 0, ErrInvalidObject
default:
return ObjectTypeInvalid, 0, ErrInvalidObject
}
}
func (repo *Repository) packBodyResolveWithin(pf *packFile, ofs uint64) (ObjectType, bufpool.Buffer, error) {
r, err := pf.cursor(ofs)
if err != nil {
return ObjectTypeInvalid, bufpool.Buffer{}, err
}
ty, size, err := packHeaderRead(r)
if err != nil {
return ObjectTypeInvalid, bufpool.Buffer{}, err
}
switch ty {
case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag:
body, err := packSectionInflate(r, size)
return ty, body, err
case ObjectTypeRefDelta:
var base Hash
_, err := io.ReadFull(r, base.data[:repo.hashSize])
if err != nil {
return ObjectTypeInvalid, bufpool.Buffer{}, err
}
base.size = repo.hashSize
delta, err := packSectionInflate(r, 0)
if err != nil {
return ObjectTypeInvalid, bufpool.Buffer{}, err
}
bt, body, err := repo.packBodyResolveByID(base)
if err != nil {
delta.Release()
return ObjectTypeInvalid, bufpool.Buffer{}, err
}
out, err := packDeltaApply(body, delta)
delta.Release()
body.Release()
if err != nil {
out.Release()
return ObjectTypeInvalid, bufpool.Buffer{}, err
}
return bt, out, nil
case ObjectTypeOfsDelta:
return repo.packDeltaResolveOfs(pf, ofs, r)
case ObjectTypeInvalid, ObjectTypeFuture:
return ObjectTypeInvalid, bufpool.Buffer{}, ErrInvalidObject
default:
return ObjectTypeInvalid, bufpool.Buffer{}, ErrInvalidObject
}
}
func packDeltaApply(base, delta bufpool.Buffer) (bufpool.Buffer, error) {
pos := 0
baseBytes := base.Bytes()
deltaBytes := delta.Bytes()
srcSize, err := packVarintRead(deltaBytes, &pos)
if err != nil {
return bufpool.Buffer{}, err
}
dstSize, err := packVarintRead(deltaBytes, &pos)
if err != nil {
return bufpool.Buffer{}, err
}
if srcSize != len(baseBytes) {
return bufpool.Buffer{}, ErrInvalidObject
}
out := bufpool.Borrow(dstSize)
out.Resize(dstSize)
outBytes := out.Bytes()
outPos := 0
for pos < len(deltaBytes) {
op := deltaBytes[pos]
pos++
switch {
case op&0x80 != 0:
off := 0
n := 0
if op&0x01 != 0 {
if pos >= len(deltaBytes) {
out.Release()
return bufpool.Buffer{}, ErrInvalidObject
}
off |= int(deltaBytes[pos])
pos++
}
if op&0x02 != 0 {
if pos >= len(deltaBytes) {
out.Release()
return bufpool.Buffer{}, ErrInvalidObject
}
off |= int(deltaBytes[pos]) << 8
pos++
}
if op&0x04 != 0 {
if pos >= len(deltaBytes) {
out.Release()
return bufpool.Buffer{}, ErrInvalidObject
}
off |= int(deltaBytes[pos]) << 16
pos++
}
if op&0x08 != 0 {
if pos >= len(deltaBytes) {
out.Release()
return bufpool.Buffer{}, ErrInvalidObject
}
off |= int(deltaBytes[pos]) << 24
pos++
}
if op&0x10 != 0 {
if pos >= len(deltaBytes) {
out.Release()
return bufpool.Buffer{}, ErrInvalidObject
}
n |= int(deltaBytes[pos])
pos++
}
if op&0x20 != 0 {
if pos >= len(deltaBytes) {
out.Release()
return bufpool.Buffer{}, ErrInvalidObject
}
n |= int(deltaBytes[pos]) << 8
pos++
}
if op&0x40 != 0 {
if pos >= len(deltaBytes) {
out.Release()
return bufpool.Buffer{}, ErrInvalidObject
}
n |= int(deltaBytes[pos]) << 16
pos++
}
if n == 0 {
n = 0x10000
}
if off+n > len(baseBytes) || outPos+n > len(outBytes) {
out.Release()
return bufpool.Buffer{}, ErrInvalidObject
}
copy(outBytes[outPos:], baseBytes[off:off+n])
outPos += n
case op != 0:
n := int(op)
if pos+n > len(deltaBytes) || outPos+n > len(outBytes) {
out.Release()
return bufpool.Buffer{}, ErrInvalidObject
}
copy(outBytes[outPos:], deltaBytes[pos:pos+n])
pos += n
outPos += n
default:
out.Release()
return bufpool.Buffer{}, ErrInvalidObject
}
}
if outPos != len(outBytes) {
out.Release()
return bufpool.Buffer{}, ErrInvalidObject
}
return out, nil
}
func packVarintRead(buf []byte, pos *int) (int, error) {
res := 0
shift := 0
for {
if *pos >= len(buf) {
return 0, ErrInvalidObject
}
b := buf[*pos]
*pos++
res |= int(b&0x7f) << shift
if (b & 0x80) == 0 {
break
}
shift += 7
}
return res, nil
}
type packFile struct {
relPath string
size int64
data []byte
closeMu sync.Once
}
func openPackFile(absPath, rel string) (*packFile, error) {
f, err := os.Open(absPath)
if err != nil {
return nil, err
}
stat, err := f.Stat()
if err != nil {
_ = f.Close()
return nil, err
}
if stat.Size() < 12 {
_ = f.Close()
return nil, ErrInvalidObject
}
header := make([]byte, 12)
_, err = io.ReadFull(f, header)
if err != nil {
_ = f.Close()
return nil, err
}
magic := binary.BigEndian.Uint32(header[:4])
ver := binary.BigEndian.Uint32(header[4:8])
if magic != packMagic || ver != packVersion2 {
_ = f.Close()
return nil, ErrInvalidObject
}
region, err := syscall.Mmap(
int(f.Fd()),
0,
int(stat.Size()),
syscall.PROT_READ,
syscall.MAP_PRIVATE,
)
if err != nil {
_ = f.Close()
return nil, err
}
err = f.Close()
if err != nil {
_ = syscall.Munmap(region)
return nil, err
}
return &packFile{
relPath: rel,
size: stat.Size(),
data: region,
}, nil
}
func (pf *packFile) Close() error {
if pf == nil {
return nil
}
var closeErr error
pf.closeMu.Do(func() {
if len(pf.data) > 0 {
if err := syscall.Munmap(pf.data); closeErr == nil {
closeErr = err
}
pf.data = nil
}
})
return closeErr
}
func (pf *packFile) cursor(ofs uint64) (io.Reader, error) {
if pf == nil {
return nil, ErrInvalidObject
}
if pf.size < 0 {
return nil, ErrInvalidObject
}
if ofs > uint64(pf.size) {
return nil, fmt.Errorf("furgit: pack: offset %d beyond %s", ofs, pf.relPath)
}
if ofs > uint64(math.MaxInt64) {
return nil, fmt.Errorf("furgit: pack: offset %d too large", ofs)
}
return bytes.NewReader(pf.data[ofs:]), nil
}
func (repo *Repository) packFile(rel string) (*packFile, error) {
repo.packFilesMu.RLock()
pf, ok := repo.packFiles[rel]
repo.packFilesMu.RUnlock()
if ok {
return pf, nil
}
pf, err := openPackFile(repo.repoPath(rel), rel)
if err != nil {
return nil, err
}
repo.packFilesMu.Lock()
if existing, ok := repo.packFiles[rel]; ok {
repo.packFilesMu.Unlock()
_ = pf.Close()
return existing, nil
}
repo.packFiles[rel] = pf
repo.packFilesMu.Unlock()
return pf, nil
}