shithub: sdbm

ref: 64e312636599ea2c86f21dc1176dca7da77bd74a
dir: /sdbm.c/

View raw version
/*
 * sdbm - ndbm work-alike hashed database library
 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
 * author: oz@nexus.yorku.ca
 * status: public domain.
 *
 * core routines
 */

#include <u.h>
#include <libc.h>

#include "sdbm.h"
#include "tune.h"
#include "pair.h"

/*
 * forward
 */
static int getdbit(DBM *, long);
static int setdbit(DBM *, long);
static int getpage(DBM *, long);
static datum getnext(DBM *);
static int makroom(DBM *, long, int);

/*
 * useful macros
 */
#define bad(x)		((x).dptr == nil || (x).dsize <= 0)
#define exhash(item)	dbm_hash((item).dptr, (item).dsize)
#define ioerr(db)	((db)->flags |= DBM_IOERR)

#define OFF_PAG(off)	(long) (off) * PBLKSIZ
#define OFF_DIR(off)	(long) (off) * DBLKSIZ

static long masks[] = {
	000000000000, 000000000001, 000000000003, 000000000007,
	000000000017, 000000000037, 000000000077, 000000000177,
	000000000377, 000000000777, 000000001777, 000000003777,
	000000007777, 000000017777, 000000037777, 000000077777,
	000000177777, 000000377777, 000000777777, 000001777777,
	000003777777, 000007777777, 000017777777, 000037777777,
	000077777777, 000177777777, 000377777777, 000777777777,
	001777777777, 003777777777, 007777777777, 017777777777
};

datum nullitem = {nil, 0};

DBM *
dbm_open(char *file, int flags, int mode, int createdb)
{
	DBM *db;
	char *dirname;
	char *pagname;
	int n;

	debug(("dbm_open(%s, %d, %o)\n", file, flags, mode));
	if (file == nil || !*file)
	{
		werrstr("%s", "Invalid file");
		return (DBM *) nil;
	}

	/*
 	 * need space for two seperate filenames
 	 */
	n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2;

	if ((dirname = malloc((unsigned) n)) == nil) 
	{
		fprint(2, "mem");
		werrstr("%s", "No memory");
		return (DBM *) nil;
	}
	/*
 	 * build the file names
 	 */
	dirname = strcat(strcpy(dirname, file), DIRFEXT);
	pagname = strcpy(dirname + strlen(dirname) + 1, file);
	pagname = strcat(pagname, PAGFEXT);

	db = dbm_prep(dirname, pagname, flags, mode, createdb);

	free((char *) dirname);
	return db;
}

DBM *
dbm_prep(char *dirname, char *pagname, int flags, int mode, int createdb)
{
	DBM *db;
	Dir *dstat;

	debug(("dbm_prep(%s, %s, %d, %o, %d)\n", dirname, pagname, flags, mode, createdb));
	if ((db = (DBM *) malloc(sizeof(DBM))) == nil) 
	{
		werrstr("%s", "No memory for database");
		return (DBM *) nil;
	}

	db->flags = 0;
    db->hmask = 0;
    db->blkptr = 0;
    db->keyptr = 0;

	/*
 	 * adjust user flags so that WRONLY becomes RDWR, 
 	 * as required by this package. Also set our internal
 	 * flag for RDONLY if needed.
 	 */

	if (flags & OWRITE) 
		flags = ORDWR;

	else if ((flags & OREAD) == flags) 
		db->flags = DBM_RDONLY;	

	/*
 	 * open the files in sequence, and stat the dirfile.
 	 * If we fail anywhere, undo everything, return nil.
 	 */

	if ((dstat = dirstat(pagname)) == nil && createdb)
		db->pagf = create(pagname, flags, mode);
	else {
		db->pagf = open(pagname, flags);
		free(dstat);
	}

	if(db->pagf > -1) {
		if((dstat = dirstat(dirname)) == nil && createdb == 1) 
			db->dirf = create(dirname, flags, mode);
		else {
			db->dirf = open(dirname, flags);
			free(dstat);
		}

		debug(("dirf: %d, pagf: %d\n", db->dirf, db->pagf));

		if(db->dirf > -1) {
		
			/*
 			 * need the dirfile size to establish max bit number.
 			 */
			dstat = dirfstat(db->dirf);
			debug(("prep:dirf %d length %d\n", db->dirf, dstat->length));
			if (dstat != nil) {
				/*
 				 * zero size: either a fresh database, or one with a single,
 				 * unsplit data page: dirpage is all zeros.
 				 */
				db->dirbno = (!dstat->length) ? 0 : -1;
				db->pagbno = -1;
				db->maxbno = dstat->length * BYTESIZ;

				(void) memset(db->pagbuf, 0, PBLKSIZ);
				(void) memset(db->dirbuf, 0, DBLKSIZ);
				free(dstat);
			/*
			 * success
			 */
				return db;
			}
			debug(("%s\n", "Prep: dirf close"));
			(void) close(db->dirf);
		}
		debug(("%s\n", "Prep: pagf close"));
		(void) close(db->pagf);
	}
	free((char *) db);
	return (DBM *) nil;
}

void
dbm_close(DBM *db)
{
	debug(("dbm_close\n"));
	if (db == nil)
		werrstr("%s", "Close of invalid database");
	else {
		(void) close(db->dirf);
		(void) close(db->pagf);
		free((char *) db);
	}
}

datum
dbm_fetch(DBM *db, datum key)
{
	debug(("dbm_fetch(%d, %s)\n",db->dirf, key.dptr));
	if (db == nil || bad(key)) 
	{
		werrstr("%s", "No DB or invalid key");
		return nullitem;
	}
	if (getpage(db, exhash(key)))
		return getpair(db->pagbuf, key);

	werrstr("%s", "DB io error");
	return nullitem;
}


long
dbm_forder(DBM *db, datum key)
{
	debug(("dbm_forder(%d, %s)\n", db->dirf, key.dptr));
	if (db == nil || bad(key))
	{
		werrstr("%s:%d:%s", "Invalid DB or key", db->dirf, key.dptr);
		return -1;
	}

	return getpage(db, exhash(key));
	}

	
int
dbm_delete(DBM *db, datum key)
{
	debug(("dbm_delete(%d, %s)\n", db->dirf, key.dptr));
	if (db == nil || bad(key))
	{
		werrstr("%s", "Invalid DB or key");
		return -1;
	}
	if (dbm_rdonly(db))
	{
		werrstr("%s", "Delete: read-only database");
		return -1;
	}

	if (getpage(db, exhash(key))) {
		if (!delpair(db->pagbuf, key))
			return -1;
		/*
 	 	update the page file
 		*/
		if (seek(db->pagf, OFF_PAG(db->pagbno), 0) < 0
		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
		{
			werrstr("%s", "Seek IO error");	
			return -1;
		}

		return 0;
	}

	return ioerr(db), -1;
}

int
dbm_store(DBM *db, datum key, datum val, int flags)
{
	debug(("dbm_store(%d, %s, %s, %d)\n", db->dirf, key.dptr, val.dptr, flags));
	int need;
	register long hash;

	if (db == nil || bad(key)) 
	{
		werrstr("%s", "Store: bad DB or key");
		return -1;
	}
	if (dbm_rdonly(db))
	{
		werrstr("%s", "Store: read-only database");
		return -1;
	}

	need = key.dsize + val.dsize;
	/*
 	 * is the pair too big (or too small) for this database ??
 	 */
	if (need < 0 || need > PAIRMAX)
	{
		werrstr("%s", "Store: pair out of range");
		return -1;
	}

	if (getpage(db, (hash = exhash(key)))) {
		/*
 		 * if we need to replace, delete the key/data pair first. If it is not there, ignore.
 		 */
		if (flags == DBM_REPLACE)
			(void) delpair(db->pagbuf, key);
#ifdef SEEDUPS
		else if (duppair(db->pagbuf, key))
			return 1;
#endif
		/*
 		 * if we do not have enough room, we have to split.
 		 */
		if (!fitpair(db->pagbuf, need))
			if (!makroom(db, hash, need)) 
			{
				werrstr("%s", "Store: makroom io error");
				return -1;
			}
		/*
 		 * we have enough room or split is successful. insert the key,
		 * and update the page file.
		 */
		(void) putpair(db->pagbuf, key, val);

		if (seek(db->pagf, OFF_PAG(db->pagbno), 0) < 0
		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 
		{
			werrstr("%s", "Store: insert failed");
			return -1;
		}
		/*
	 	 * success
	 	 */
		return 0;
	}
	werrstr("%s", "STORE io error");
	return -1;
}

/*
 * makroom - make room by splitting the overfull page
 * this routine will attempt to make room for SPLTMAX times before
 * giving up.
 */
static int
makroom(DBM *db, long hash, int need)
{
	long newp;
	char twin[PBLKSIZ];
	char *pag = db->pagbuf;
	char *new = twin;
 	int smax = SPLTMAX;

	debug(("makroom(%d, %d, %d)\n", db->dirf, hash, need));
	do {
		/*
 		 * split the current page
 		 */
		(void) splpage(pag, new, db->hmask + 1);
		/*
 		 * address of the new page
 		 */
		newp = (hash & db->hmask) | (db->hmask + 1);

		/*
		 * write delay, read avoidence/cache shuffle:
		 * select the page for incoming pair: if key is to go to the new page,
		 * write out the previous one, and copy the new one over, thus making
		 * it the current page. If not, simply write the new page, and we are
		 * still looking at the page of interest. current page is not updated
		 * here, as dbm_store will do so, after it inserts the incoming pair.
		 */
		if (hash & (db->hmask + 1)) {
			if (seek(db->pagf, OFF_PAG(db->pagbno), 0) < 0
			    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
				return 0;
			db->pagbno = newp;
			(void) memcpy(pag, new, PBLKSIZ);
		}
		else if (seek(db->pagf, OFF_PAG(newp), 0) < 0
			 || write(db->pagf, new, PBLKSIZ) < 0)
			return 0;

		if (!setdbit(db, db->curbit))
			return 0;
		/*
		 * see if we have enough room now
		 */
		if (fitpair(pag, need))
			return 1;
		/*
		 * try again... update curbit and hmask as getpage would have
		 * done. because of our update of the current page, we do not
		 * need to read in anything. BUT we have to write the current
		 * [deferred] page out, as the window of failure is too great.
		 */
		db->curbit = 2 * db->curbit +
			((hash & (db->hmask + 1)) ? 2 : 1);
		db->hmask |= db->hmask + 1;

		if (seek(db->pagf, OFF_PAG(db->pagbno), 0) < 0
		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
			return 0;

	} while (--smax);
	/*
	 * if we are here, this is real bad news. After SPLTMAX splits,
	 * we still cannot fit the key. say goodnight.
	 */
#ifdef BADMESS
	(void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
#endif
	return 0;

}

/*
 * the following two routines will break if
 * deletions aren't taken into account. (ndbm bug)
 */
datum
dbm_firstkey(DBM *db)
{
	debug(("dbm_firstkey(%d)\n", db->dirf));
	if (db == nil) 
	{
		werrstr("%s", "Firstkey: null DB");
		return nullitem;
	}
	/*
 	* start at page 0
 	*/
	if (seek(db->pagf, OFF_PAG(0), 0) < 0
	    || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) {
		werrstr("firstkey Ioerror");
		return nullitem;
	}
	db->pagbno = 0;
	db->blkptr = 0;
	db->keyptr = 0;
	return getnext(db);
}

datum
dbm_nextkey(DBM *db)
{
	debug(("dbm_nextkey(%d)\n", db->dirf));
	if (db == nil) 
	{
		werrstr("%s", "Nextkey: null DB");
		nullitem;
	}
	return getnext(db);
}


/*
 * all important binary trie traversal
 */
static int
getpage(DBM *db, long hash)
{
	int hbit;
	long dbit;
	long pagb;

	debug(("getpage(%d, %d)\n", db->dirf, hash));

	dbit = 0;
	hbit = 0;
	while (dbit < db->maxbno && getdbit(db, dbit))
		dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);

	debug(("dbit: %d...", dbit));

	db->curbit = dbit;
	db->hmask = masks[hbit];

	pagb = hash & db->hmask;
	/*
	 * see if the block we need is already in memory.
	 * note: this lookaside cache has about 10% hit rate.
	 */
	if (pagb != db->pagbno) { 
	/*
	 * note: here, we assume a "hole" is read as 0s.
	 * if not, must zero pagbuf first.
	 */
		if (seek(db->pagf, OFF_PAG(pagb), 0) < 0
		    || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
			return 0;
		if (!chkpage(db->pagbuf))
			return 0;
		db->pagbno = pagb;

		debug(("pag read: %d\n", pagb));
	}
	return 1;
}

static int
getdbit(DBM *db, long dbit)
{
	long c;
	long dirb;

	debug(("getdbit(%d, %d)\n", db->dirf, dbit));

	c = dbit / BYTESIZ;
	dirb = c / DBLKSIZ;

	if (dirb != db->dirbno) 
	{
		if (seek(db->dirf, OFF_DIR(dirb), 0) < 0
		    || read(db->dirf, db->dirbuf, DBLKSIZ) < 0)
			return 0;
		db->dirbno = dirb;

		debug(("dir read: %d\n", dirb));
	}

	return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
}


static int
setdbit(DBM *db, long dbit)
{
	long c;
	long dirb;
	
	debug(("setdbit(%d, %d)\n", db->dirf, dbit));
	c = dbit / BYTESIZ;
	dirb = c / DBLKSIZ;

	if (dirb != db->dirbno) {
		if (seek(db->dirf, OFF_DIR(dirb), 0) < 0
		    || read(db->dirf, db->dirbuf, DBLKSIZ) < 0)
			return 0;
		db->dirbno = dirb;

		debug(("dir read: %d\n", dirb));
	}

	db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);

	if (dbit >= db->maxbno)
		db->maxbno += DBLKSIZ * BYTESIZ;

	if (seek(db->dirf, OFF_DIR(dirb), 0) < 0
	    || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
		return 0;

	return 1;
}

/*
 * getnext - get the next key in the page, and if done with
 * the page, try the next page in sequence
 */
static datum
getnext(DBM *db)
{
	datum key;

	debug(("getnext(%d)\n", db->dirf));
	for (;;) {
		db->keyptr++;
		key = getnkey(db->pagbuf, db->keyptr);
		if (key.dptr != nil)
			return key;
		/*
 		 * we either run out, or there is nothing on this page..
 		 * try the next one... If we lost our position on the
 		 * file, we will have to seek.
 		 */
		db->keyptr = 0;
		if (db->pagbno != db->blkptr++)
			if (seek(db->pagf, OFF_PAG(db->blkptr), 0) < 0)
				break;
		db->pagbno = db->blkptr;
		if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
			break;
		if (!chkpage(db->pagbuf))
			break;
	}

	werrstr("getnext I/O error");
	return nullitem;
}