shithub: wl9m

ref: 9c25e3a38f465444d684a58a24f7567e6b59299c
dir: /plan9/lz4.c/

View raw version
/*
 * lz4.c - Minimal LZ4 block decompressor for Plan 9
 *
 * This implements only LZ4_decompress_safe() which is all drawserv needs.
 * The Linux side uses the real liblz4 for compression.
 *
 * LZ4 block format is simple:
 * - Sequences of: [token] [literals...] [offset] [extra match len...]
 * - Token high nibble: literal length (15 = more bytes follow)
 * - Token low nibble: match length - 4 (15 = more bytes follow)
 * - Offset: 2 bytes little-endian, distance back to copy from
 *
 * Public domain / Unlicense - use freely
 */

#include <u.h>
#include <libc.h>
#include "lz4.h"

#define LZ4_MIN_MATCH 4

/*
 * Read variable-length integer
 * If initial value is 15, keep adding bytes until one is < 255
 */
static int
readvlen(uchar **pp, uchar *end, int initial)
{
	int len = initial;
	uchar *p = *pp;
	
	if(initial == 15){
		int s;
		do {
			if(p >= end)
				return -1;
			s = *p++;
			len += s;
		} while(s == 255);
	}
	*pp = p;
	return len;
}

/*
 * LZ4_decompress_safe - decompress LZ4 block with bounds checking
 *
 * src: compressed data
 * dst: output buffer (must be pre-allocated)
 * compressedSize: exact size of compressed data
 * dstCapacity: size of output buffer
 *
 * Returns: number of bytes decompressed, or negative on error
 */
int
LZ4_decompress_safe(char *src, char *dst, int compressedSize, int dstCapacity)
{
	uchar *ip = (uchar*)src;           /* input pointer */
	uchar *iend = ip + compressedSize; /* input end */
	uchar *op = (uchar*)dst;           /* output pointer */
	uchar *oend = op + dstCapacity;    /* output end */
	uchar *cpy;
	uchar *match;
	int token, litlen, matchlen;
	int offset;
	
	if(compressedSize <= 0 || dstCapacity <= 0)
		return -1;
	
	for(;;){
		/* Get token */
		if(ip >= iend)
			return -1;
		token = *ip++;
		
		/* Decode literal length */
		litlen = token >> 4;
		litlen = readvlen(&ip, iend, litlen);
		if(litlen < 0)
			return -1;
		
		/* Copy literals */
		cpy = op + litlen;
		if(cpy > oend || ip + litlen > iend){
			/* Check for valid end condition */
			if(cpy == oend && ip + litlen == iend){
				/* Last sequence - just literals, no match */
				memmove(op, ip, litlen);
				return cpy - (uchar*)dst;
			}
			return -1;  /* overflow */
		}
		memmove(op, ip, litlen);
		op = cpy;
		ip += litlen;
		
		/* Check for end of block */
		if(ip >= iend)
			break;
		
		/* Decode match offset (little-endian) */
		if(ip + 2 > iend)
			return -1;
		offset = ip[0] | (ip[1] << 8);
		ip += 2;
		
		if(offset == 0)
			return -1;  /* invalid offset */
		
		match = op - offset;
		if(match < (uchar*)dst)
			return -1;  /* offset too far back */
		
		/* Decode match length */
		matchlen = (token & 0x0F) + LZ4_MIN_MATCH;
		if((token & 0x0F) == 15){
			int extra = readvlen(&ip, iend, 15) - 15;
			if(extra < 0)
				return -1;
			matchlen += extra;
		}
		
		/* Copy match */
		cpy = op + matchlen;
		if(cpy > oend)
			return -1;  /* output overflow */
		
		/*
		 * Copy match bytes - must handle overlapping copies
		 * (when offset < matchlen, we repeat the pattern)
		 */
		if(offset >= matchlen){
			/* Non-overlapping - fast copy */
			memmove(op, match, matchlen);
			op = cpy;
		} else {
			/* Overlapping - copy byte by byte */
			while(op < cpy)
				*op++ = *match++;
		}
	}
	
	return op - (uchar*)dst;
}

/*
 * LZ4_compressBound - maximum compressed size for given input
 * Only needed if compressing on Plan 9 (unlikely)
 */
int
LZ4_compressBound(int inputSize)
{
	return inputSize + (inputSize/255) + 16;
}