ref: 64e312636599ea2c86f21dc1176dca7da77bd74a
author: glenda <glenda@dwp9>
date: Tue Jan 20 02:55:10 EST 2026
Initial commit
--- /dev/null
+++ b/CHANGES
@@ -1,0 +1,38 @@
+July 2025:
+
+o Ported to 9front
+
+o 'Upgraded' from K&R to function prototypes
+
+o Increased PBLKSIZ to 2048
+
+o Changed PAIRMAX to PBLKSIZ - 16 rather than hard-coded
+
+o DBLKSIZ set to 16K to match cwfs block size
+
+o dbu uses arg(2) instead of getopt
+
+June 1997:
+
+o fixed a long-hidden memmove bug in delpair that causes database
+ corruption in MEMMOVE versions of sdbm. [sdbm defaults to duff's
+ device to move data, so memmove version is almost never used.]
+
+Changes from the earlier BETA releases.
+
+o dbm_prep does everything now, so dbm_open is just a simple
+ wrapper that builds the default filenames. dbm_prep no longer
+ requires a (DBM *) db parameter: it allocates one itself. It
+ returns (DBM *) db or (DBM *) NULL.
+
+o makroom is now reliable. In the common-case optimization of the page
+ split, the page into which the incoming key/value pair is to be inserted
+ is write-deferred (if the split is successful), thereby saving a cosly
+ write. BUT, if the split does not make enough room (unsuccessful), the
+ deferred page is written out, as the failure-window is now dependent on
+ the number of split attempts.
+
+o if -DDUFF is defined, hash function will also use the DUFF construct.
+ This may look like a micro-performance tweak (maybe it is), but in fact,
+ the hash function is the third most-heavily used function, after read
+ and write.
--- /dev/null
+++ b/README
@@ -1,0 +1,330 @@
+
+
+
+
+
+
+ sdbm - Substitute DBM
+ or
+ Berkeley ndbm for Every UN*X Made Simple
+
+ Ozan (oz) Yigit
+
+ The Guild of PD Software Toolmakers
+ Toronto - Canada
+
+ oz@nexus.yorku.ca
+
+
+
+Implementation is the sincerest form of flattery. - L. Peter
+Deutsch
+
+A The Clone of the ndbm library
+
+ The sources accompanying this notice - sdbm - consti-
+tute the first public release (Dec. 1990) of a complete
+clone of the Berkeley UN*X ndbm library. The sdbm library is
+meant to clone the proven functionality of ndbm as closely
+as possible, including a few improvements. It is practical,
+easy to understand, and compatible. The sdbm library is not
+derived from any licensed, proprietary or copyrighted
+software.
+
+ The sdbm implementation is based on a 1978 algorithm
+[Lar78] by P.-A. (Paul) Larson known as ``Dynamic Hashing''.
+In the course of searching for a substitute for ndbm, I pro-
+totyped three different external-hashing algorithms [Lar78,
+Fag79, Lit80] and ultimately chose Larson's algorithm as a
+basis of the sdbm implementation. The Bell Labs dbm (and
+therefore ndbm) is based on an algorithm invented by Ken
+Thompson, [Tho90, Tor87] and predates Larson's work.
+
+ The sdbm programming interface is totally compatible
+with ndbm and includes a slight improvement in database ini-
+tialization. It is also expected to be binary-compatible
+under most UN*X versions that support the ndbm library.
+
+ The sdbm implementation shares the shortcomings of the
+ndbm library, as a side effect of various simplifications to
+the original Larson algorithm. It does produce holes in the
+page file as it writes pages past the end of file. (Larson's
+paper include a clever solution to this problem that is a
+result of using the hash value directly as a block address.)
+On the other hand, extensive tests seem to indicate that
+sdbm creates fewer holes in general, and the resulting page-
+files are smaller. The sdbm implementation is also faster
+than ndbm in database creation. Unlike the ndbm, the sdbm
+__________________________
+UN*X is not a trademark of any (dis)organization.
+
+
+
+
+
+
+
+
+
+
+ - 2 -
+
+
+store operation will not ``wander away'' trying to split its
+data pages to insert a datum that cannot (due to elaborate
+worst-case situations) be inserted. (It will fail after a
+pre-defined number of attempts.)
+
+Important Compatibility Warning
+
+ The sdbm and ndbm libraries cannot share databases: one
+cannot read the (dir/pag) database created by the other.
+This is due to the differences between the ndbm and sdbm
+algorithms, and the hash functions used. It is easy to con-
+vert between the dbm/ndbm databases and sdbm by ignoring the
+index completely: see dbd, dbu etc.
+
+
+Notice of Intellectual Property
+
+The entire sdbm library package, as authored by me, Ozan S.
+Yigit, is hereby placed in the public domain. As such, the
+author is not responsible for the consequences of use of
+this software, no matter how awful, even if they arise from
+defects in it. There is no expressed or implied warranty for
+the sdbm library.
+
+ Since the sdbm library package is in the public domain,
+this original release or any additional public-domain
+releases of the modified original cannot possibly (by defin-
+ition) be withheld from you. Also by definition, You (singu-
+lar) have all the rights to this code (including the right
+to sell without permission, the right to hoard and the right
+to do other icky things as you see fit) but those rights are
+also granted to everyone else.
+
+ Please note that all previous distributions of this
+software contained a copyright (which is now dropped) to
+protect its origins and its current public domain status
+against any possible claims and/or challenges.
+
+Acknowledgments
+
+ Many people have been very helpful and supportive. A
+partial list would necessarily include Rayan Zacherissen
+(who contributed the man page, and also hacked a MMAP ver-
+sion of sdbm), Arnold Robbins, Chris Lewis, Bill Davidsen,
+__________________________
+Torek's discussion [Tor87] indicates that dbm/ndbm
+implementations use the hash value to traverse the
+radix trie differently than sdbm and as a result, the
+page indexes are generated in different order. For
+more information, send e-mail to the author.
+You cannot really hoard something that is available to
+the public at large, but try if it makes you feel any
+better.
+
+
+
+
+
+
+
+
+
+
+ - 3 -
+
+
+Henry Spencer, Geoff Collyer, Rich Salz (who got me started
+in the first place), Johannes Ruschein (who did the minix
+port) and David Tilbrook. I thank you all.
+
+Distribution Manifest and Notes
+
+This distribution of sdbm includes (at least) the following:
+ CHANGES change log
+ README this file.
+ biblio a small bibliography on external hashing
+ dba.c a crude (n/s)dbm page file analyzer
+ dbd.c a crude (n/s)dbm page file dumper (for conversion)
+ dbe.1 man page for dbe.c
+ dbe.c Janick's database editor
+ dbm.c a dbm library emulation wrapper for ndbm/sdbm
+ dbm.h header file for the above
+ dbu.c a crude db management utility
+ hash.c hashing function
+ makefile guess.
+ pair.c page-level routines (posted earlier)
+ pair.h header file for the above
+ readme.ms troff source for the README file
+ sdbm.3 man page
+ sdbm.c the real thing
+ sdbm.h header file for the above
+ tune.h place for tuning & portability thingies
+ util.c miscellaneous
+
+ dbu is a simple database manipulation program that
+tries to look like Bell Labs' cbt utility. It is currently
+incomplete in functionality. I use dbu to test out the rou-
+tines: it takes (from stdin) tab separated key/value pairs
+for commands like build or insert or takes keys for commands
+like delete or look.
+ dbu <build|creat|look|insert|cat|delete> dbmfile
+
+ dba is a crude analyzer of dbm/sdbm/ndbm page files. It
+scans the entire page file, reporting page level statistics,
+and totals at the end.
+
+ dbd is a crude dump program for dbm/ndbm/sdbm data-
+bases. It ignores the bitmap, and dumps the data pages in
+sequence. It can be used to create input for the dbu util-
+ity. Note that dbd will skip any NULLs in the key and data
+fields, thus is unsuitable to convert some peculiar data-
+bases that insist in including the terminating null.
+
+
+__________________________
+The dbd, dba, dbu utilities are quick hacks and are not
+fit for production use. They were developed late one
+night, just to test out sdbm, and convert some
+databases.
+
+
+
+
+
+
+
+
+
+
+ - 4 -
+
+
+ I have also included a copy of the dbe (ndbm DataBase
+Editor) by Janick Bergeron [janick@bnr.ca] for your pleas-
+ure. You may find it more useful than the little dbu util-
+ity.
+
+ dbm.[ch] is a dbm library emulation on top of ndbm (and
+hence suitable for sdbm). Written by Robert Elz.
+
+ The sdbm library has been around in beta test for quite
+a long time, and from whatever little feedback I received
+(maybe no news is good news), I believe it has been func-
+tioning without any significant problems. I would, of
+course, appreciate all fixes and/or improvements. Portabil-
+ity enhancements would especially be useful.
+
+Implementation Issues
+
+ Hash functions: The algorithm behind sdbm implementa-
+tion needs a good bit-scrambling hash function to be effec-
+tive. I ran into a set of constants for a simple hash func-
+tion that seem to help sdbm perform better than ndbm for
+various inputs:
+ /*
+ * polynomial conversion ignoring overflows
+ * 65599 nice. 65587 even better.
+ */
+ long
+ dbm_hash(char *str, int len) {+ register unsigned long n = 0;
+
+ while (len--)
+ n = n * 65599 + *str++;
+ return n;
+ }
+
+ There may be better hash functions for the purposes of
+dynamic hashing. Try your favorite, and check the pagefile.
+If it contains too many pages with too many holes, (in rela-
+tion to this one for example) or if sdbm simply stops work-
+ing (fails after SPLTMAX attempts to split) when you feed
+your NEWS history file to it, you probably do not have a
+good hashing function. If you do better (for different
+types of input), I would like to know about the function you
+use.
+
+ Block sizes: It seems (from various tests on a few
+machines) that a page file block size PBLKSIZ of 1024 is by
+far the best for performance, but this also happens to limit
+the size of a key/value pair. Depending on your needs, you
+may wish to increase the page size, and also adjust PAIRMAX
+(the maximum size of a key/value pair allowed: should always
+be at least three words smaller than PBLKSIZ.) accordingly.
+The system-wide version of the library should probably be
+configured with 1024 (distribution default), as this appears
+
+
+
+
+
+
+
+
+
+ - 5 -
+
+
+to be sufficient for most common uses of sdbm.
+
+Portability
+
+ This package has been tested in many different UN*Xes
+even including minix, and appears to be reasonably portable.
+This does not mean it will port easily to non-UN*X systems.
+
+Notes and Miscellaneous
+
+ The sdbm is not a very complicated package, at least
+not after you familiarize yourself with the literature on
+external hashing. There are other interesting algorithms in
+existence that ensure (approximately) single-read access to
+a data value associated with any key. These are directory-
+less schemes such as linear hashing [Lit80] (+ Larson varia-
+tions), spiral storage [Mar79] or directory schemes such as
+extensible hashing [Fag79] by Fagin et al. I do hope these
+sources provide a reasonable playground for experimentation
+with other algorithms. See the June 1988 issue of ACM Com-
+puting Surveys [Enb88] for an excellent overview of the
+field.
+
+References
+
+
+[Lar78]P.-A. Larson, ``Dynamic Hashing'', BIT, vol. 18,
+ pp. 184-201, 1978.
+
+[Tho90]Ken Thompson, private communication, Nov. 1990
+
+[Lit80]W. Litwin, `` Linear Hashing: A new tool for file
+ and table addressing'', Proceedings of the 6th Confer-
+ ence on Very Large Dabatases (Montreal), pp. 212-223,
+ Very Large Database Foundation, Saratoga, Calif., 1980.
+
+[Fag79]R. Fagin, J. Nievergelt, N. Pippinger, and H.
+ R. Strong, ``Extendible Hashing - A Fast Access Method
+ for Dynamic Files'', ACM Trans. Database Syst., vol. 4,
+ no.3, pp. 315-344, Sept. 1979.
+
+[Wal84]Rich Wales, ``Discussion of "dbm" data base system'',
+ USENET newsgroup unix.wizards, Jan. 1984.
+
+[Tor87]Chris Torek, ``Re: dbm.a and ndbm.a archives'',
+ USENET newsgroup comp.unix, 1987.
+
+[Mar79]G. N. Martin, ``Spiral Storage: Incrementally Aug-
+ mentable Hash Addressed Storage'', Technical Report
+ #27, University of Warwick, Coventry, U.K., 1979.
+
+[Enb88]R. J. Enbody and H. C. Du, ``Dynamic Hashing
+ Schemes'',ACM Computing Surveys, vol. 20, no. 2, pp.
+ 85-113, June 1988.
+
+
+
+
+
+
--- /dev/null
+++ b/biblio
@@ -1,0 +1,64 @@
+%A R. J. Enbody
+%A H. C. Du
+%T Dynamic Hashing Schemes
+%J ACM Computing Surveys
+%V 20
+%N 2
+%D June 1988
+%P 85-113
+%K surveys
+
+%A P.-A. Larson
+%T Dynamic Hashing
+%J BIT
+%V 18
+%P 184-201
+%D 1978
+%K dynamic
+
+%A W. Litwin
+%T Linear Hashing: A new tool for file and table addressing
+%J Proceedings of the 6th Conference on Very Large Dabatases (Montreal)
+%I Very Large Database Foundation
+%C Saratoga, Calif.
+%P 212-223
+%D 1980
+%K linear
+
+%A R. Fagin
+%A J. Nievergelt
+%A N. Pippinger
+%A H. R. Strong
+%T Extendible Hashing - A Fast Access Method for Dynamic Files
+%J ACM Trans. Database Syst.
+%V 4
+%N 3
+%D Sept. 1979
+%P 315-344
+%K extend
+
+%A G. N. Martin
+%T Spiral Storage: Incrementally Augmentable Hash Addressed Storage
+%J Technical Report #27
+%I University of Warwick
+%C Coventry, U.K.
+%D 1979
+%K spiral
+
+%A Chris Torek
+%T Re: dbm.a and ndbm.a archives
+%B USENET newsgroup comp.unix
+%D 1987
+%K torek
+
+%A Rich Wales
+%T Discusson of "dbm" data base system
+%B USENET newsgroup unix.wizards
+%D Jan. 1984
+%K rich
+
+
+
+
+
+
--- /dev/null
+++ b/dba.c
@@ -1,0 +1,94 @@
+/*
+ * dba dbm analysis/recovery
+ */
+
+#include <u.h>
+#include <libc.h>
+#include <stdio.h>
+#include "sdbm.h"
+
+char *progname;
+extern void oops(char *, char *);
+extern int okpage(char *);
+
+void sdump(int);
+int pagestat(char *);
+
+void
+main(int argc, char **argv)
+{+ int n;
+ char *p;
+ char *name;
+ int pagf;
+
+ progname = argv[0];
+
+ if (argc != 2) oops("usage: %s dbname\n", progname); /* keeps compiler quiet */+
+ if (p = argv[1]) {+ name = (char *) malloc((n = strlen(p)) + 5);
+ strcpy(name, p);
+ strcpy(name + n, ".pag");
+
+ if ((pagf = open(name, OREAD)) < 0)
+ oops("cannot open %s.", name);+
+ sdump(pagf);
+ }
+ else
+ oops("usage: %s dbname\n", progname);+
+ exits(0);
+}
+
+void
+sdump(int pagf)
+{+ int b;
+ int n = 0;
+ int t = 0;
+ int o = 0;
+ int e;
+ char pag[PBLKSIZ];
+ char num[16];
+
+ while ((b = read(pagf, pag, PBLKSIZ)) > 0) {+ printf("#%d: ", n);+ if (!okpage(pag))
+ printf("bad\n");+ else {+ printf("ok. ");+ if (!(e = pagestat(pag)))
+ o++;
+ else
+ t += e;
+ }
+ n++;
+ }
+
+ if (b == 0)
+ printf("%d pages (%d holes): %d entries\n", n, o, t);+ else
+ {+ snprintf(num, 15, "%d", n);
+ oops("read failed: block %d", num);+ }
+}
+
+int
+pagestat(char *pag)
+{+ register n;
+ register free;
+ register short *ino = (short *) pag;
+
+ if (!(n = ino[0]))
+ printf("no entries.\n");+ else {+ free = ino[n] - (n + 1) * sizeof(short);
+ printf("%3d entries %2d%% used free %d.\n",+ n / 2, ((PBLKSIZ - free) * 100) / PBLKSIZ, free);
+ }
+ return n / 2;
+}
--- /dev/null
+++ b/dbd.c
@@ -1,0 +1,95 @@
+/*
+ * dbd - dump a dbm data file
+ */
+
+#include <u.h>
+#include <libc.h>
+#include <stdio.h>
+
+#include "sdbm.h"
+
+char *progname;
+extern void oops(char *, char *);
+extern int okpage(char *);
+
+void sdump(int);
+void dispage(char *);
+
+#define empty(page) (((short *) page)[0] == 0)
+
+void
+main(int argc, char ** argv)
+{+ int n;
+ char *p;
+ char *name;
+ int pagf;
+
+ progname = argv[0];
+
+ if (argc != 2) oops("usage: %s dbname\n", progname); /* keeps compiler quiet */+
+ if (p = argv[1]) {+ name = (char *) malloc((n = strlen(p)) + 5);
+ strcpy(name, p);
+ strcpy(name + n, ".pag");
+
+ if ((pagf = open(name, OREAD)) < 0)
+ oops("cannot open %s.", name);+
+ sdump(pagf);
+ }
+ else
+ oops("usage: %s dbname\n", progname);+ exits(0);
+}
+
+void
+sdump(int pagf)
+{+ int r;
+ int n = 0;
+ int o = 0;
+ char pag[PBLKSIZ];
+ char num[16];
+
+ while ((r = read(pagf, pag, PBLKSIZ)) > 0) {+ if (!okpage(pag))
+ fprintf(stderr, "%d: bad page.\n", n);
+ else if (empty(pag))
+ o++;
+ else
+ dispage(pag);
+ n++;
+ }
+
+ if (r == 0)
+ fprintf(stderr, "%d pages (%d holes).\n", n, o);
+ else
+ {+ snprint(num, 15, "%d", n);
+ oops("read failed: block %d", num);+ }
+}
+
+void
+dispage(char *pag){+ int i, n;
+ int off;
+ short *ino = (short *) pag;
+
+ off = PBLKSIZ;
+ for (i = 1; i < ino[0]; i += 2) {+ for (n = ino[i]; n < off; n++)
+ if (pag[n] != 0)
+ putchar(pag[n]);
+ putchar('\t');+ off = ino[i];
+ for (n = ino[i + 1]; n < off; n++)
+ if (pag[n] != 0)
+ putchar(pag[n]);
+ putchar('\n');+ off = ino[i + 1];
+ }
+}
+
--- /dev/null
+++ b/dbe.c
@@ -1,0 +1,433 @@
+#include <u.h>
+#include <libc.h>
+#include <stdio.h>
+#include "sdbm.h"
+#include <ctype.h>
+#include <regexp.h>
+
+/***************************************************************************\
+** **
+** Function name: getopt() **
+** Author: Henry Spencer, UofT **
+** Coding date: 84/04/28 **
+** **
+** Description: **
+** **
+** Parses argv[] for arguments. **
+** Works with Whitesmith's C compiler. **
+** **
+** Inputs - The number of arguments **
+** - The base address of the array of arguments **
+** - A string listing the valid options (':' indicates an **+** argument to the preceding option is required, a ';' **
+** indicates an argument to the preceding option is optional) **
+** **
+** Outputs - Returns the next option character, **
+** '?' for non '-' arguments **
+** or ':' when there is no more arguments. **
+** **
+** Side Effects + The argument to an option is pointed to by 'optarg' **
+** **
+*****************************************************************************
+** **
+** REVISION HISTORY: **
+** **
+** DATE NAME DESCRIPTION **
+** YY/MM/DD ------------------ ------------------------------------ **
+** 88/10/20 Janick Bergeron Returns '?' on unamed arguments **
+** returns '!' on unknown options **
+** and 'EOF' only when exhausted. **
+** 88/11/18 Janick Bergeron Return ':' when no more arguments **
+** 89/08/11 Janick Bergeron Optional optarg when ';' in optstring **
+** **
+\***************************************************************************/
+
+
+char *optarg; /* Global argument pointer. */
+char err[ERRMAX]; /* for convenience */
+
+char
+getopt(int argc, char **argv, char *optstring)
+{+ register int c;
+ register char *place;
+ extern char *index();
+ static int optind = 0;
+ static char *scan = nil;
+
+ optarg = nil;
+
+ if (scan == nil || *scan == '\0') {+
+ if (optind == 0)
+ optind++;
+ if (optind >= argc)
+ return ':';
+
+ optarg = place = argv[optind++];
+ if (place[0] != '-' || place[1] == '\0')
+ return '?';
+ if (place[1] == '-' && place[2] == '\0')
+ return '?';
+ scan = place + 1;
+ }
+
+ c = *scan++;
+ place = strchr(optstring, c);
+ if (place == nil || c == ':' || c == ';') {+
+ (void) fprint(2, "%s: unknown option %c\n", argv[0], c);
+ scan = nil;
+ return '!';
+ }
+ if (*++place == ':') {+
+ if (*scan != '\0') {+
+ optarg = scan;
+ scan = nil;
+
+ }
+ else {+
+ if (optind >= argc) {+
+ (void) fprint(2, "%s: %c requires an argument\n",
+ argv[0], c);
+ return '!';
+ }
+ optarg = argv[optind];
+ optind++;
+ }
+ }
+ else if (*place == ';') {+
+ if (*scan != '\0') {+
+ optarg = scan;
+ scan = nil;
+
+ }
+ else {+
+ if (optind >= argc || *argv[optind] == '-')
+ optarg = nil;
+ else {+ optarg = argv[optind];
+ optind++;
+ }
+ }
+ }
+ return c;
+}
+
+
+void
+print_datum(datum db)
+{+ int i;
+
+ putchar('"');+ for (i = 0; i < db.dsize; i++) {+ if (isprint(db.dptr[i]))
+ putchar(db.dptr[i]);
+ else {+ putchar('\\');+ putchar('0' + ((db.dptr[i] >> 6) & 0x07));+ putchar('0' + ((db.dptr[i] >> 3) & 0x07));+ putchar('0' + (db.dptr[i] & 0x07));+ }
+ }
+ putchar('"');+}
+
+
+datum
+read_datum(char *s)
+{+ datum db;
+ char *p;
+ int i;
+
+ db.dsize = 0;
+ db.dptr = (char *) malloc(strlen(s) * sizeof(char));
+ for (p = db.dptr; *s != '\0'; p++, db.dsize++, s++) {+ if (*s == '\\') {+ if (*++s == 'n')
+ *p = '\n';
+ else if (*s == 'r')
+ *p = '\r';
+ else if (*s == 'f')
+ *p = '\f';
+ else if (*s == 't')
+ *p = '\t';
+ else if (isdigit(*s) && isdigit(*(s + 1)) && isdigit(*(s + 2))) {+ i = (*s++ - '0') << 6;
+ i |= (*s++ - '0') << 3;
+ i |= *s - '0';
+ *p = i;
+ }
+ else if (*s == '0')
+ *p = '\0';
+ else
+ *p = *s;
+ }
+ else
+ *p = *s;
+ }
+
+ return db;
+}
+
+
+char *
+key2s(datum db)
+{+ char *buf;
+ char *p1, *p2;
+
+ buf = (char *) malloc((db.dsize + 1) * sizeof(char));
+ for (p1 = buf, p2 = db.dptr; *p2 != '\0'; *p1++ = *p2++);
+ *p1 = '\0';
+ return buf;
+}
+
+void
+main(int argc, char **argv)
+{+ typedef enum {+ YOW, FETCH, STORE, DELETE, SCAN, REGEXP
+ } commands;
+ char opt;
+ int flags;
+ int giveusage = 0;
+ int verbose = 0;
+ commands what = YOW;
+ char *comarg[3];
+ int st_flag = DBM_INSERT;
+ int argn;
+ DBM *db;
+ datum key;
+ datum content;
+ int createdb;
+ Reprog *re;
+
+ createdb = 0;
+
+ flags = ORDWR;
+ argn = 0;
+
+ while ((opt = getopt(argc, argv, "acdfFm:rstvx")) != ':') {+ switch (opt) {+ case 'a':
+ what = SCAN;
+ break;
+ case 'c':
+ flags |= ORDWR;
+ createdb = 1;
+ break;
+ case 'd':
+ what = DELETE;
+ break;
+ case 'f':
+ what = FETCH;
+ break;
+ case 'F':
+ what = REGEXP;
+ break;
+ case 'm':
+ flags = 0;
+ if (strcmp(optarg, "r") == 0)
+ flags = OREAD;
+ else if (strcmp(optarg, "w") == 0)
+ flags = OWRITE;
+ else if (strcmp(optarg, "rw") == 0)
+ flags = ORDWR;
+ else {+ fprintf(stderr, "Invalid mode: \"%s\"\n", optarg);
+ giveusage = 1;
+ }
+ break;
+ case 'r':
+ st_flag = DBM_REPLACE;
+ break;
+ case 's':
+ what = STORE;
+ break;
+ case 't':
+ flags |= OTRUNC;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case 'x':
+ flags |= OEXCL;
+ break;
+ case '!':
+ giveusage = 1;
+ break;
+ case '?':
+ if (argn < 3)
+ comarg[argn++] = optarg;
+ else {+ fprintf(stderr, "Too many arguments.\n");
+ giveusage = 1;
+ }
+ break;
+ }
+ }
+
+ if (giveusage | what == YOW | argn < 1) {+ fprintf(stderr, "Usage: %s database [-m r|w|rw] [-crtx] -a|-d|-f|-F|-s [key [content]]\n", argv[0]);
+ exits("usage");+ }
+
+ if ((db = dbm_open(comarg[0], flags, 0777, createdb)) == nil) {+ snprintf(err, ERRMAX-1, "Error opening database \"%s\"\n", comarg[0]);
+ fprintf(stderr, err);
+ werrstr(err);
+ exits(err);
+ }
+
+ if (argn > 1)
+ key = read_datum(comarg[1]);
+ if (argn > 2)
+ content = read_datum(comarg[2]);
+
+ switch (what) {+
+ case SCAN:
+ key = dbm_firstkey(db);
+ if (dbm_error(db)) {+ rerrstr(err, ERRMAX-1);
+ fprintf(stderr, "%s: %s", err, "Error when fetching first key\n");
+ goto db_exit;
+ }
+ while (key.dptr != nil) {+ content = dbm_fetch(db, key);
+ if (dbm_error(db)) {+ fprintf(stderr, "Error when fetching\n");
+ print_datum(key);
+ printf("\n");+ goto db_exit;
+ }
+ print_datum(key);
+ printf(": ");+ print_datum(content);
+ printf("\n");+ if (dbm_error(db)) {+ fprintf(stderr, "Error when fetching next key\n");
+ goto db_exit;
+ }
+ key = dbm_nextkey(db);
+ }
+ break;
+
+ case REGEXP:
+ if (argn < 2) {+ fprintf(stderr, "Missing regular expression.\n");
+ goto db_exit;
+ }
+ re = regcomp(comarg[1]);
+ if (re == 0) {+ fprintf(stderr, "Invalid regular expression\n");
+ goto db_exit;
+ }
+ key = dbm_firstkey(db);
+ if (dbm_error(db)) {+ fprintf(stderr, "Error when fetching first key\n");
+ goto db_exit;
+ }
+ while (key.dptr != nil) {+ if (regexec(re, key2s(key), 0, 0) > 0) {+ content = dbm_fetch(db, key);
+ if (dbm_error(db)) {+ fprintf(stderr, "Error when fetching\n");
+ print_datum(key);
+ printf("\n");+ goto db_exit;
+ }
+ print_datum(key);
+ printf(": ");+ print_datum(content);
+ printf("\n");+ if (dbm_error(db)) {+ fprintf(stderr, "Error when fetching next key\n");
+ goto db_exit;
+ }
+ }
+ key = dbm_nextkey(db);
+ }
+ break;
+
+ case FETCH:
+ if (argn < 2) {+ fprintf(stderr, "Missing fetch key.\n");
+ goto db_exit;
+ }
+ content = dbm_fetch(db, key);
+ if (dbm_error(db)) {+ fprintf(stderr, "Error when fetching\n");
+ print_datum(key);
+ printf("\n");+ goto db_exit;
+ }
+ if (content.dptr == nil) {+ fprintf(stderr, "Cannot find\n");
+ print_datum(key);
+ printf("\n");+ goto db_exit;
+ }
+ print_datum(key);
+ printf(": ");+ print_datum(content);
+ printf("\n");+ break;
+
+ case DELETE:
+ if (argn < 2) {+ fprintf(stderr, "Missing delete key.\n");
+ goto db_exit;
+ }
+ if (dbm_delete(db, key) || dbm_error(db)) {+ fprintf(stderr, "Error when deleting\n");
+ print_datum(key);
+ printf("\n");+ goto db_exit;
+ }
+ if (verbose) {+ print_datum(key);
+ printf(": DELETED\n");+ }
+ break;
+
+ case STORE:
+ if (argn < 3) {+ fprintf(stderr, "Missing key and/or content.\n");
+ goto db_exit;
+ }
+ if (dbm_store(db, key, content, st_flag) || dbm_error(db)) {+ rerrstr(err, ERRMAX -1);
+ if(strlen(err) > 0) fprintf(stderr, "%s\n", err);
+ fprintf(stderr, "%s\n", "Error when storing ");
+ print_datum(key);
+ printf("\n");+ goto db_exit;
+ }
+ if (verbose) {+ print_datum(key);
+ printf(": ");+ print_datum(content);
+ printf(" STORED\n");+ }
+ break;
+ }
+
+db_exit:
+ dbm_clearerr(db);
+ dbm_close(db);
+ if (dbm_error(db)) {+ fprintf(stderr, "Error closing database \"%s\"\n", comarg[0]);
+ exits("close error");+ }
+}
--- /dev/null
+++ b/dbm.c
@@ -1,0 +1,115 @@
+/*
+ * Copyright (c) 1985 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley. The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <u.h>
+#include <libc.h>
+
+#include "dbm.h"
+
+#define NODB ((DBM *)0)
+
+static DBM *cur_db = NODB;
+
+static char no_db[] = "dbm: no open database\n";
+
+int
+dbminit(char *file)
+{+ if (cur_db != NODB)
+ dbm_close(cur_db);
+
+ cur_db = dbm_open(file, 2, 0, 0);
+ if (cur_db == NODB) {+ cur_db = dbm_open(file, 0, 0, 0);
+ if (cur_db == NODB)
+ return (-1);
+ }
+ return (0);
+}
+
+long
+forder(datum key)
+{+ if (cur_db == NODB) {+ print(no_db);
+ return (0L);
+ }
+ return (dbm_forder(cur_db, key));
+}
+
+datum
+fetch(datum key)
+{+ datum item;
+
+ if (cur_db == NODB) {+ print(no_db);
+ item.dptr = 0;
+ return (item);
+ }
+ return (dbm_fetch(cur_db, key));
+}
+
+delete(datum key)
+{+ if (cur_db == NODB) {+ print(no_db);
+ return (-1);
+ }
+ if (dbm_rdonly(cur_db))
+ return (-1);
+ return (dbm_delete(cur_db, key));
+}
+
+store(key, dat)
+datum key, dat;
+{+ if (cur_db == NODB) {+ print(no_db);
+ return (-1);
+ }
+ if (dbm_rdonly(cur_db))
+ return (-1);
+
+ return (dbm_store(cur_db, key, dat, DBM_REPLACE));
+}
+
+datum
+firstkey()
+{+ datum item;
+
+ if (cur_db == NODB) {+ print(no_db);
+ item.dptr = 0;
+ return (item);
+ }
+ return (dbm_firstkey(cur_db));
+}
+
+datum
+nextkey(datum key)
+{+ datum item;
+
+ if (cur_db == NODB) {+ print(no_db);
+ item.dptr = 0;
+ return (item);
+ }
+ return (dbm_nextkey(cur_db));
+}
--- /dev/null
+++ b/dbm.h
@@ -1,0 +1,24 @@
+/*
+ * Copyright (c) 1983 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley. The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * @(#)dbm.h 5.2 (Berkeley) 5/24/89
+ */
+
+#include <sdbm.h>
+
+datum fetch(datem);
+datum firstkey();
+datum nextkey(datum);
--- /dev/null
+++ b/dbu.c
@@ -1,0 +1,237 @@
+#include <u.h>
+#include <libc.h>
+#include <stdio.h>
+#include "sdbm.h"
+
+extern void oops(char *, char *);
+extern char getopt(int, char *[], char *);
+
+
+char *progname;
+
+static int rflag;
+static char *usage = "[-R] cat | look |... dbmname";
+
+#define DERROR 0
+#define DLOOK 1
+#define DINSERT 2
+#define DDELETE 3
+#define DCAT 4
+#define DBUILD 5
+#define DPRESS 6
+#define DCREAT 7
+
+#define LINEMAX 8192
+
+typedef struct {+ char *sname;
+ int scode;
+ int flags;
+} cmd;
+
+static cmd cmds[] = {+
+ "fetch", DLOOK, OREAD,
+ "get", DLOOK, OREAD,
+ "look", DLOOK, OREAD,
+ "add", DINSERT, ORDWR,
+ "insert", DINSERT, ORDWR,
+ "store", DINSERT, ORDWR,
+ "delete", DDELETE, ORDWR,
+ "remove", DDELETE, ORDWR,
+ "dump", DCAT, OREAD,
+ "list", DCAT, OREAD,
+ "cat", DCAT, OREAD,
+ "creat", DCREAT, ORDWR | OTRUNC,
+ "new", DCREAT, ORDWR | OTRUNC,
+ "build", DBUILD, ORDWR,
+ "squash", DPRESS, ORDWR,
+ "compact", DPRESS, ORDWR,
+ "compress", DPRESS, ORDWR
+};
+
+#define CTABSIZ (sizeof (cmds)/sizeof (cmd))
+
+static cmd *parse(char *);
+static void badk(char *), doit(cmd *, char *), prdatum(FILE *, datum);
+
+int
+main(int argc, char *argv[])
+{+ cmd *act;
+
+ progname = argv[0];
+
+ ARGBEGIN {+ case 'R':
+ rflag++;
+ break;
+
+ default:
+ oops("usage %s", usage);+ break;
+ } ARGEND
+
+ if (argc < 2) oops("usage %s", usage);+
+ if ((act = parse(argv[0])) == nil) badk(argv[0]);
+
+ doit(act, argv[1]);
+ exits(0);
+}
+
+static void
+doit(cmd *act, char* file)
+{+ datum key;
+ datum val;
+ register DBM *db;
+ register char *op;
+ register int n;
+ char *line;
+#ifdef TIME
+ long start;
+ extern long time(long *);
+#endif
+ int dbcreate;
+ Dir *dstat;
+
+ if ((dstat = dirstat(file)) == nil)
+ dbcreate = 1;
+ else {+ dbcreate = 0;
+ free(dstat);
+ }
+
+ if ((db = dbm_open(file, act->flags, 0644, dbcreate)) == nil)
+ oops("cannot open: %s", file);+
+ if ((line = (char *) malloc(LINEMAX)) == nil)
+ oops("%s: cannot get memory", "line alloc");+
+ switch (act->scode) {+
+ case DLOOK:
+ while (fgets(line, LINEMAX, stdin) != nil) {+ n = strlen(line) - 1;
+ line[n] = 0;
+ key.dptr = line;
+ key.dsize = n;
+ val = dbm_fetch(db, key);
+ if (val.dptr != nil) {+ prdatum(stdout, val);
+ putchar('\n');+ continue;
+ }
+ prdatum(stderr, key);
+ fprintf(stderr, ": not found.\n");
+ }
+ break;
+ case DINSERT:
+ break;
+ case DDELETE:
+ while (fgets(line, LINEMAX, stdin) != nil) {+ n = strlen(line) - 1;
+ line[n] = 0;
+ key.dptr = line;
+ key.dsize = n;
+ if (dbm_delete(db, key) == -1) {+ prdatum(stderr, key);
+ fprintf(stderr, ": not found.\n");
+ }
+ }
+ break;
+ case DCAT:
+ for (key = dbm_firstkey(db); key.dptr != 0;
+ key = dbm_nextkey(db)) {+ prdatum(stdout, key);
+ putchar('\t');+ prdatum(stdout, dbm_fetch(db, key));
+ putchar('\n');+ }
+ break;
+ case DBUILD:
+#ifdef TIME
+ start = time(0);
+#endif
+ while (fgets(line, LINEMAX, stdin) != nil) {+ n = strlen(line) - 1;
+ line[n] = 0;
+ key.dptr = line;
+ if ((op = strchr(line, '\t')) != 0) {+ key.dsize = op - line;
+ *op++ = 0;
+ val.dptr = op;
+ val.dsize = line + n - op;
+ }
+ else
+ oops("bad input; %s", line);+
+ if (dbm_store(db, key, val, DBM_REPLACE) < 0) {+ prdatum(stderr, key);
+ fprintf(stderr, ": ");
+ oops("store: %s", "failed");+ }
+ }
+#ifdef TIME
+ printf("done: %d seconds.\n", time(0) - start);+#endif
+ break;
+ case DPRESS:
+ break;
+ case DCREAT:
+ break;
+ }
+
+ dbm_close(db);
+}
+
+static void
+badk(char *word)
+{+ register int i;
+
+ if (progname)
+ fprintf(stderr, "%s: ", progname);
+ fprintf(stderr, "bad keywd %s. use one of\n", word);
+ for (i = 0; i < (int)CTABSIZ; i++)
+ fprintf(stderr, "%-8s%c", cmds[i].sname,
+ ((i + 1) % 6 == 0) ? '\n' : ' ');
+ fprintf(stderr, "\n");
+ exits("bad keyword");+ /*NOTREACHED*/
+}
+
+static cmd *
+parse(char *str)
+{+ register int i = CTABSIZ;
+ register cmd *p;
+
+ for (p = cmds; i--; p++)
+ if (strcmp(p->sname, str) == 0)
+ return p;
+ return nil;
+}
+
+static void
+prdatum(FILE *stream, datum d)
+{+ register int c;
+ register char *p = d.dptr;
+ register int n = d.dsize;
+
+ while (n--) {+ c = *p++ & 0377;
+ if (c & 0200) {+ fprintf(stream, "M-");
+ c &= 0177;
+ }
+ if (c == 0177 || c < ' ')
+ fprintf(stream, "^%c", (c == 0177) ? '?' : c + '@');
+ else
+ putc(c, stream);
+ }
+}
+
+
--- /dev/null
+++ b/hash.c
@@ -1,0 +1,45 @@
+/*
+ * sdbm - ndbm work-alike hashed database library
+ * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
+ * author: oz@nexus.yorku.ca
+ * status: public domain. keep it that way.
+ *
+ * hashing routine
+ */
+
+#include "sdbm.h"
+/*
+ * polynomial conversion ignoring overflows
+ * [this seems to work remarkably well, in fact better
+ * then the ndbm hash function. Replace at your own risk]
+ * use: 65599 nice.
+ * 65587 even better.
+ */
+long
+dbm_hash(char *str, int len)
+{+ register unsigned long n = 0;
+
+#ifdef DUFF
+
+#define HASHC n = *str++ + 65599 * n
+
+ if (len > 0) {+ register int loop = (len + 8 - 1) >> 3;
+
+ switch(len & (8 - 1)) {+ case 0: do {+ HASHC; case 7: HASHC;
+ case 6: HASHC; case 5: HASHC;
+ case 4: HASHC; case 3: HASHC;
+ case 2: HASHC; case 1: HASHC;
+ } while (--loop);
+ }
+
+ }
+#else
+ while (len--)
+ n = *str++ + 65599 * n;
+#endif
+ return n;
+}
--- /dev/null
+++ b/man/dbe.1
@@ -1,0 +1,46 @@
+.TH dbe 1 "ndbm(3) EDITOR"
+.SH NAME
+dbe \- Edit a ndbm(3) database
+.SH USAGE
+dbe <database> [-m r|w|rw] [-crtvx] -a|-d|-f|-F|-s [<key> [<content>]]
+.SH DESCRIPTION
+\fIdbme\fP operates on ndbm(3) databases.
+It can be used to create them, look at them or change them.
+When specifying the value of a key or the content of its associated entry,
+\\nnn, \\0, \\n, \\t, \\f and \\r are interpreted as usual.
+When displaying key/content pairs, non-printable characters are displayed
+using the \\nnn notation.
+.SH OPTIONS
+.IP -a
+List all entries in the database.
+.IP -c
+Create the database if it does not exist.
+.IP -d
+Delete the entry associated with the specified key.
+.IP -f
+Fetch and display the entry associated with the specified key.
+.IP -F
+Fetch and display all the entries whose key match the specified
+regular-expression
+.IP "-m r|w|rw"
+Open the database in read-only, write-only or read-write mode
+.IP -r
+Replace the entry associated with the specified key if it already exists.
+See option -s.
+.IP -s
+Store an entry under a specific key.
+An error occurs if the key already exists and the option -r was not specified.
+.IP -t
+Re-initialize the database before executing the command.
+.IP -v
+Verbose mode.
+Confirm stores and deletions.
+.IP -x
+If option -x is used with option -c, then if the database already exists,
+an error occurs.
+This can be used to implement a simple exclusive access locking mechanism.
+.SH SEE ALSO
+ndbm(3)
+.SH AUTHOR
+janick@bnr.ca
+
--- /dev/null
+++ b/man/sdbm.3
@@ -1,0 +1,274 @@
+.\" $Id: sdbm.3,v 1.2 90/12/13 13:00:57 oz Exp $
+.TH SDBM 3 "1 March 1990"
+.SH NAME
+sdbm, dbm_open, dbm_prep, dbm_close, dbm_fetch, dbm_store, dbm_delete, dbm_firstkey, dbm_nextkey, dbm_hash, dbm_rdonly, dbm_error, dbm_clearerr, dbm_dirfno, dbm_pagfno \- data base subroutines
+.SH SYNOPSIS
+.nf
+.ft B
+#include <sdbm.h>
+.sp
+typedef struct {+ char *dptr;
+ int dsize;
+} datum;
+.sp
+datum nullitem = { NULL, 0 };+.sp
+\s-1DBM\s0 *dbm_open(char *file, int flags, int mode, int createdb)
+.sp
+\s-1DBM\s0 *dbm_prep(char *dirname, char *pagname, int flags, int mode, int createdb)
+.sp
+void dbm_close(\s-1DBM\s0 *db)
+.sp
+datum dbm_fetch(\s-1DBM\s0 *db, key)
+.sp
+int dbm_store(\s-1DBM\s0 *db, datum key, datum val, int flags)
+.sp
+int dbm_delete(\s-1DBM\s0 *db, datum key)
+.sp
+datum dbm_firstkey(\s-1DBM\s0 *db)
+.sp
+datum dbm_nextkey(\s-1DBM\s0 *db)
+.sp
+long dbm_hash(char *string, int len)
+.sp
+int dbm_rdonly(\s-1DBM\s0 *db)
+int dbm_error(\s-1DBM\s0 *db)
+dbm_clearerr(\s-1DBM\s0 *db)
+int dbm_dirfno(\s-1DBM\s0 *db)
+int dbm_pagfno(\s-1DBM\s0 *db)
+.ft R
+.fi
+.SH DESCRIPTION
+.IX "database library" sdbm "" "\fLsdbm\fR"
+.IX dbm_open "" "\fLdbm_open\fR \(em open \fLsdbm\fR database"
+.IX dbm_prep "" "\fLdbm_prep\fR \(em prepare \fLsdbm\fR database"
+.IX dbm_close "" "\fLdbm_close\fR \(em close \fLsdbm\fR routine"
+.IX dbm_fetch "" "\fLdbm_fetch\fR \(em fetch \fLsdbm\fR database data"
+.IX dbm_store "" "\fLdbm_store\fR \(em add data to \fLsdbm\fR database"
+.IX dbm_delete "" "\fLdbm_delete\fR \(em remove data from \fLsdbm\fR database"
+.IX dbm_firstkey "" "\fLdbm_firstkey\fR \(em access \fLsdbm\fR database"
+.IX dbm_nextkey "" "\fLdbm_nextkey\fR \(em access \fLsdbm\fR database"
+.IX dbm_hash "" "\fLdbm_hash\fR \(em string hash for \fLsdbm\fR database"
+.IX dbm_rdonly "" "\fLdbm_rdonly\fR \(em return \fLsdbm\fR database read-only mode"
+.IX dbm_error "" "\fLdbm_error\fR \(em return \fLsdbm\fR database error condition"
+.IX dbm_clearerr "" "\fLdbm_clearerr\fR \(em clear \fLsdbm\fR database error condition"
+.IX dbm_dirfno "" "\fLdbm_dirfno\fR \(em return \fLsdbm\fR database bitmap file descriptor"
+.IX dbm_pagfno "" "\fLdbm_pagfno\fR \(em return \fLsdbm\fR database data file descriptor"
+.IX "database functions \(em \fLsdbm\fR" dbm_open "" \fLdbm_open\fP
+.IX "database functions \(em \fLsdbm\fR" dbm_prep "" \fLdbm_prep\fP
+.IX "database functions \(em \fLsdbm\fR" dbm_close "" \fLdbm_close\fP
+.IX "database functions \(em \fLsdbm\fR" dbm_fetch "" \fLdbm_fetch\fP
+.IX "database functions \(em \fLsdbm\fR" dbm_store "" \fLdbm_store\fP
+.IX "database functions \(em \fLsdbm\fR" dbm_delete "" \fLdbm_delete\fP
+.IX "database functions \(em \fLsdbm\fR" dbm_firstkey "" \fLdbm_firstkey\fP
+.IX "database functions \(em \fLsdbm\fR" dbm_nextkey "" \fLdbm_nextkey\fP
+.IX "database functions \(em \fLsdbm\fR" dbm_rdonly "" \fLdbm_rdonly\fP
+.IX "database functions \(em \fLsdbm\fR" dbm_error "" \fLdbm_error\fP
+.IX "database functions \(em \fLsdbm\fR" dbm_clearerr "" \fLdbm_clearerr\fP
+.IX "database functions \(em \fLsdbm\fR" dbm_dirfno "" \fLdbm_dirfno\fP
+.IX "database functions \(em \fLsdbm\fR" dbm_pagfno "" \fLdbm_pagfno\fP
+.LP
+This package allows an application to maintain a mapping of <key,value> pairs
+in disk files. This is not to be considered a real database system, but is
+still useful in many simple applications built around fast retrieval of a data
+value from a key. This implementation uses an external hashing scheme,
+called Dynamic Hashing, as described by Per-Aake Larson in BIT 18 (1978) pp.
+184-201. Retrieval of any item usually requires a single disk access.
+The application interface is compatible with the
+.IR ndbm (3)
+library.
+.LP
+An
+.B sdbm
+database is kept in two files usually given the extensions
+.B \.dir
+and
+.BR \.pag .
+The
+.B \.dir
+file contains a bitmap representing a forest of binary hash trees, the leaves
+of which indicate data pages in the
+.B \.pag
+file.
+.LP
+The application interface uses the
+.B datum
+structure to describe both
+.I keys
+and
+.IR value s.
+A
+.B datum
+specifies a byte sequence of
+.I dsize
+size pointed to by
+.IR dptr .
+If you use
+.SM ASCII
+strings as
+.IR key s
+or
+.IR value s,
+then you must decide whether or not to include the terminating
+.SM NUL
+byte which sometimes defines strings. Including it will require larger
+database files, but it will be possible to get sensible output from a
+.IR strings (1)
+command applied to the data file.
+.LP
+In order to allow a process using this package to manipulate multiple
+databases, the applications interface always requires a
+.IR handle ,
+a
+.BR "DBM *" ,
+to identify the database to be manipulated. Such a handle can be obtained
+from the only routines that do not require it, namely
+.BR dbm_open (\|)
+or
+.BR dbm_prep (\|).
+Either of these will open or create the two necessary files. The
+difference is that the latter allows explicitly naming the bitmap and data
+files whereas
+.BR dbm_open (\|)
+will take a base file name and call
+.BR dbm_prep (\|)
+with the default extensions.
+The
+.I flags
+and
+.I mode
+parameters are the same as for
+.BR open (2).
+If createdb is non-zero, the database will be created, otherwise an existing
+database will be opened, if present.
+.LP
+To free the resources occupied while a database handle is active, call
+.BR dbm_close (\|).
+.LP
+Given a handle, one can retrieve data associated with a key by using the
+.BR dbm_fetch (\|)
+routine, and associate data with a key by using the
+.BR dbm_store (\|)
+routine.
+.LP
+The values of the
+.I flags
+parameter for
+.BR dbm_store (\|)
+can be either
+.BR \s-1DBM_INSERT\s0 ,
+which will not change an existing entry with the same key, or
+.BR \s-1DBM_REPLACE\s0 ,
+which will replace an existing entry with the same key.
+Keys are unique within the database.
+.LP
+To delete a key and its associated value use the
+.BR dbm_delete (\|)
+routine.
+.LP
+To retrieve every key in the database, use a loop like:
+.sp
+.nf
+.ft B
+for (key = dbm_firstkey(db); key.dptr != NULL; key = dbm_nextkey(db))
+ ;
+.ft R
+.fi
+.LP
+The order of retrieval is unspecified.
+.LP
+If you determine that the performance of the database is inadequate or
+you notice clustering or other effects that may be due to the hashing
+algorithm used by this package, you can override it by supplying your
+own
+.BR dbm_hash (\|)
+routine. Doing so will make the database unintelligable to any other
+applications that do not use your specialized hash function.
+.sp
+.LP
+The following macros are defined in the header file:
+.IP
+.BR dbm_rdonly (\|)
+returns true if the database has been opened read\-only.
+.IP
+.BR dbm_error (\|)
+returns true if an I/O error has occurred.
+.IP
+.BR dbm_clearerr (\|)
+allows you to clear the error flag if you think you know what the error
+was and insist on ignoring it.
+.IP
+.BR dbm_dirfno (\|)
+returns the file descriptor associated with the bitmap file.
+.IP
+.BR dbm_pagfno (\|)
+returns the file descriptor associated with the data file.
+.SH SEE ALSO
+.IR open (2).
+.SH DIAGNOSTICS
+Functions that return a
+.B "DBM *"
+handle will use
+.SM NULL
+to indicate an error.
+Functions that return an
+.B int
+will use \-1 to indicate an error. The normal return value in that case is 0.
+Functions that return a
+.B datum
+will return
+.B nullitem
+to indicate an error.
+.LP
+As a special case of
+.BR dbm_store (\|),
+if it is called with the
+.B \s-1DBM_INSERT\s0
+flag and the key already exists in the database, the return value will be 1.
+.LP
+In general, if a function parameter is invalid,
+.B errstr
+will be set.
+.SH AUTHOR
+.IP "Ozan S. Yigit" (oz@nexus.yorku.ca)
+.SH BUGS
+The sum of key and value data sizes must not exceed
+.B \s-1PAIRMAX\s0
+(2032 bytes).
+.LP
+The sum of the key and value data sizes where several keys hash to the
+same value must fit within one bitmap page.
+.LP
+The
+.B \.pag
+file will contain holes, so its apparent size is larger than its contents.
+When copied through the filesystem the holes will be filled.
+.LP
+The contents of
+.B datum
+values returned are in volatile storage. If you want to retain the values
+pointed to, you must copy them immediately before another call to this package.
+.LP
+The only safe way for multiple processes to (read and) update a database at
+the same time, is to implement a private locking scheme outside this package
+and open and close the database between lock acquisitions. It is safe for
+multiple processes to concurrently access a database read-only.
+.SH APPLICATIONS PORTABILITY
+For (nearly) complete source code compatibility with the Berkeley Unix
+.IR ndbm (3)
+library, the
+.B sdbm.h
+header file should be installed as
+.BR ndbm.h .
+.LP
+The
+.B nullitem
+data item, and the
+.BR dbm_prep (\|),
+.BR dbm_hash (\|),
+.BR dbm_rdonly (\|),
+.BR dbm_dirfno (\|),
+and
+.BR dbm_pagfno (\|)
+functions are unique to this package.
--- /dev/null
+++ b/mkfile
@@ -1,0 +1,95 @@
+</$objtype/mkfile
+
+PROGS= dba\
+ dbd\
+ dbe\
+ dbu
+
+LIB= libsdbm.a
+
+LIBOBJ= hash.$O\
+ pair.$O\
+ sdbm.$O
+
+LIBHDR= sdbm.h
+
+HFILES= $LIBHDR\
+ pair.h\
+ tune.h
+
+BIN= /$objtype/bin
+LIBDIR= /$objtype/lib
+INCLUDEDIR= /sys/include
+MANDIR= /sys/man
+
+CFLAGS= $CFLAGS -DDUFF -DTIME #-DDEBUG
+
+TESTDB= /tmp/sdbmtest
+
+all:V: $LIB $PROGS
+
+$LIBOBJ: $HFILES
+
+$LIB: $LIBOBJ
+ ar vu $LIB $LIBOBJ
+
+dba: dba.$O util.$O $LIB
+ $LD $LDFLAGS -o $target $prereq
+
+dbd: dbd.$O util.$O $LIB
+ $LD $LDFLAGS -o $target $prereq
+
+dbu: dbu.$O util.$O $LIB
+ $LD $LDFLAGS -o $target $prereq
+
+dbe: dbe.$O $LIB
+ $LD $LDFLAGS -o $target $prereq
+
+%.$O: %.c
+ $CC $CFLAGS $stem.c
+
+nuke:V: clean
+clean:V: cleantest
+ rm -f $PROGS *.$O
+ rm -f $LIBOBJ
+ rm -f $LIB
+ rm -f README
+
+readme:V: README
+README: readme.ms
+ nroff -ms readme.ms | col -b >README
+
+cleantest:V:
+ rm -f $TESTDB.pag $TESTDB.dir
+
+test:V: all cleantest
+ awk ' /^[A-Z].*/ {+ printf "%s\t", $1
+ for (i = 0; i< 40; i++)
+ printf "%s.", $1
+ printf "\n"
+ }' /lib/dict/pgwindex | dbu build $TESTDB
+ dba $TESTDB | tail -2
+
+uninstall:V:
+ rm -f $LIBDIR/$LIB
+ rm -f $INCLUDEDIR/sdbm.h
+ rm -f $MANDIR/3/sdbm
+
+install: $LIB sdbm.h
+ cp $LIB $LIBDIR
+ cp sdbm.h $INCLUDEDIR
+ echo '#pragma lib "'$LIB'"' >> $INCLUDEDIR/sdbm.h
+ cp man/sdbm.3 $MANDIR/3/sdbm
+
+dbeinstall: dbe
+ cp dbe $BIN
+ cp man/dbe.1 $MANDIR/1/dbe
+
+dbeuninstall:
+ rm -f $BIN/dbe
+ rm -f $MANDIR/1/dbe
+
+allinstall:V: install dbeinstall
+
+alluninstall:V: uninstall dbeuninstall
--- /dev/null
+++ b/pair.c
@@ -1,0 +1,262 @@
+/*
+ * sdbm - ndbm work-alike hashed database library
+ * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
+ * author: oz@nexus.yorku.ca
+ * status: public domain.
+ *
+ * page-level routines
+ */
+
+#include <u.h>
+#include <libc.h>
+
+#include "sdbm.h"
+#include "tune.h"
+#include "pair.h"
+
+#define exhash(item) dbm_hash((item).dptr, (item).dsize)
+
+
+
+/*
+ * forward
+ */
+
+int seepair(char *, int, char *, int);
+
+/*
+ * page format:
+ * +------------------------------+
+ * ino | n | keyoff | datoff | keyoff |
+ * +------------+--------+--------+
+ * | datoff | - - - ----> |
+ * +--------+---------------------+
+ * | F R E E A R E A |
+ * +--------------+---------------+
+ * | <---- - - - | data |
+ * +--------+-----+----+----------+
+ * | key | data | key |
+ * +--------+----------+----------+
+ *
+ * calculating the offsets for free area: if the number
+ * of entries (ino[0]) is zero, the offset to the END of
+ * the free area is the block size. Otherwise, it is the
+ * nth (ino[ino[0]]) entry's offset.
+ */
+
+int
+fitpair(char * pag, int need)
+{+ int n;
+ int off;
+ int free;
+ short *ino = (short *) pag;
+
+ off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ;
+ free = off - (n + 1) * sizeof(short);
+ need += 2 * sizeof(short);
+
+ debug(("free %d need %d\n", free, need));+
+ return need <= free;
+}
+
+void
+putpair(char * pag, datum key, datum val)
+{+ int n;
+ int off;
+ short *ino = (short *) pag;
+
+ off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ;
+/*
+ * enter the key first
+ */
+ off -= key.dsize;
+ (void) memcpy(pag + off, key.dptr, key.dsize);
+ ino[n + 1] = off;
+/*
+ * now the data
+ */
+ off -= val.dsize;
+ (void) memcpy(pag + off, val.dptr, val.dsize);
+ ino[n + 2] = off;
+/*
+ * adjust item count
+ */
+ ino[0] += 2;
+}
+
+datum
+getpair(char * pag, datum key)
+{+ int i;
+ int n;
+ datum val;
+ short *ino = (short *) pag;
+
+ if ((n = ino[0]) == 0)
+ return nullitem;
+
+ if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
+ return nullitem;
+
+ val.dptr = pag + ino[i + 1];
+ val.dsize = ino[i] - ino[i + 1];
+ return val;
+}
+
+int
+duppair(char * pag, datum key)
+{+ register short *ino = (short *) pag;
+ return ino[0] > 0 && seepair(pag, ino[0], key.dptr, key.dsize) > 0;
+}
+
+
+datum
+getnkey(char *pag, int num)
+{+ datum key;
+ int off;
+ short *ino = (short *) pag;
+
+ num = num * 2 - 1;
+ if (ino[0] == 0 || num > ino[0])
+ return nullitem;
+
+ off = (num > 1) ? ino[num - 1] : PBLKSIZ;
+
+ key.dptr = pag + ino[num];
+ key.dsize = off - ino[num];
+
+ return key;
+}
+
+int
+delpair(char *pag, datum key)
+{+ int n;
+ int i;
+ short *ino = (short *) pag;
+
+ if ((n = ino[0]) == 0)
+ return 0;
+
+ if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
+ return 0;
+/*
+ * found the key. if it is the last entry
+ * [i.e. i == n - 1] we just adjust the entry count.
+ * hard case: move all data down onto the deleted pair,
+ * shift offsets onto deleted offsets, and adjust them.
+ * [note: 0 < i < n]
+ */
+ if (i < n - 1) {+ int m;
+ char *dst = pag + (i == 1 ? PBLKSIZ : ino[i - 1]);
+ char *src = pag + ino[i + 1];
+ int zoo = dst - src;
+
+ debug(("free-up %d ", zoo));+/*
+ * shift data/keys down
+ */
+ m = ino[i + 1] - ino[n];
+ memmove(dst - m, src - m, m);
+/*
+ * adjust offset index up
+ */
+ while (i < n - 1) {+ ino[i] = ino[i + 2] + zoo;
+ i++;
+ }
+ }
+ ino[0] -= 2;
+ return 1;
+}
+
+/*
+ * search for the key in the page.
+ * return offset index in the range 0 < i < n.
+ * return 0 if not found.
+ */
+int
+seepair(char *pag, int n, char * key, int siz)
+{+ int i;
+ int off = PBLKSIZ;
+ short *ino = (short *) pag;
+
+ for (i = 1; i < n; i += 2) {+ if (siz == off - ino[i] &&
+ memcmp(key, pag + ino[i], siz) == 0)
+ return i;
+ off = ino[i + 1];
+ }
+ return 0;
+}
+
+void
+splpage(char *pag, char *new, long sbit)
+{+ datum key;
+ datum val;
+
+ register int n;
+ register int off = PBLKSIZ;
+ char cur[PBLKSIZ];
+ register short *ino = (short *) cur;
+
+ (void) memcpy(cur, pag, PBLKSIZ);
+ (void) memset(pag, 0, PBLKSIZ);
+ (void) memset(new, 0, PBLKSIZ);
+
+ n = ino[0];
+ for (ino++; n > 0; ino += 2) {+ key.dptr = cur + ino[0];
+ key.dsize = off - ino[0];
+ val.dptr = cur + ino[1];
+ val.dsize = ino[0] - ino[1];
+/*
+ * select the page pointer (by looking at sbit) and insert
+ */
+ (void) putpair((exhash(key) & sbit) ? new : pag, key, val);
+
+ off = ino[1];
+ n -= 2;
+ }
+
+ debug(("%d split %d/%d\n", ((short *) cur)[0] / 2, + ((short *) new)[0] / 2,
+ ((short *) pag)[0] / 2));
+}
+
+/*
+ * check page sanity:
+ * number of entries should be something
+ * reasonable, and all offsets in the index should be in order.
+ * this could be made more rigorous.
+ */
+int
+chkpage(char *pag)
+{+ int n;
+ int off;
+ short *ino = (short *) pag;
+
+ if ((n = ino[0]) < 0 || n > PBLKSIZ / sizeof(short))
+ return 0;
+
+ if (n > 0) {+ off = PBLKSIZ;
+ for (ino++; n > 0; ino += 2) {+ if (ino[0] > off || ino[1] > off ||
+ ino[1] > ino[0])
+ return 0;
+ off = ino[1];
+ n -= 2;
+ }
+ }
+ return 1;
+}
--- /dev/null
+++ b/pair.h
@@ -1,0 +1,10 @@
+extern int fitpair(char *, int);
+extern void putpair(char *, datum, datum);
+extern datum getpair(char *, datum);
+extern int delpair(char *, datum);
+extern int chkpage(char *);
+extern datum getnkey(char *, int);
+extern void splpage(char *, char *, long);
+#ifdef SEEDUPS
+extern int duppair(char *, datum);
+#endif
--- /dev/null
+++ b/readme.ms
@@ -1,0 +1,353 @@
+.\" tbl | readme.ms | [tn]roff -ms | ...
+.\" note the "C" (courier) and "CB" fonts: you will probably have to
+.\" change these.
+.\" $Id: readme.ms,v 1.1 90/12/13 13:09:15 oz Exp Locker: oz $
+
+.de P1
+.br
+.nr dT 4
+.nf
+.ft C
+.sp .5
+.nr t \\n(dT*\\w'x'u
+.ta 1u*\\ntu 2u*\\ntu 3u*\\ntu 4u*\\ntu 5u*\\ntu 6u*\\ntu 7u*\\ntu 8u*\\ntu 9u*\\ntu 10u*\\ntu 11u*\\ntu 12u*\\ntu 13u*\\ntu 14u*\\ntu
+..
+.de P2
+.br
+.ft 1
+.br
+.sp .5
+.br
+.fi
+..
+.\" CW uses the typewriter/courier font.
+.de CW
+\fC\\$1\\fP\\$2
+..
+
+.\" Footnote numbering [by Henry Spencer]
+.\" <text>\*f for a footnote number..
+.\" .FS
+.\" \*F <footnote text>
+.\" .FE
+.\"
+.ds f \\u\\s-2\\n+f\\s+2\\d
+.nr f 0 1
+.ds F \\n+F.
+.nr F 0 1
+
+.ND
+.LP
+.TL
+\fIsdbm\fP \(em Substitute DBM
+.br
+or
+.br
+Berkeley \fIndbm\fP for Every UN*X\** Made Simple
+.AU
+Ozan (oz) Yigit
+.AI
+The Guild of PD Software Toolmakers
+Toronto - Canada
+.sp
+oz@nexus.yorku.ca
+.LP
+.FS
+UN*X is not a trademark of any (dis)organization.
+.FE
+.sp 2
+\fIImplementation is the sincerest form of flattery. \(em L. Peter Deutsch\fP
+.SH
+A The Clone of the \fIndbm\fP library
+.PP
+The sources accompanying this notice \(em \fIsdbm\fP \(em constitute
+the first public release (Dec. 1990) of a complete clone of
+the Berkeley UN*X \fIndbm\fP library. The \fIsdbm\fP library is meant to
+clone the proven functionality of \fIndbm\fP as closely as possible,
+including a few improvements. It is practical, easy to understand, and
+compatible.
+The \fIsdbm\fP library is not derived from any licensed, proprietary or
+copyrighted software.
+.PP
+The \fIsdbm\fP implementation is based on a 1978 algorithm
+[Lar78] by P.-A. (Paul) Larson known as ``Dynamic Hashing''.
+In the course of searching for a substitute for \fIndbm\fP, I
+prototyped three different external-hashing algorithms [Lar78, Fag79, Lit80]
+and ultimately chose Larson's algorithm as a basis of the \fIsdbm\fP
+implementation. The Bell Labs
+\fIdbm\fP (and therefore \fIndbm\fP) is based on an algorithm invented by
+Ken Thompson, [Tho90, Tor87] and predates Larson's work.
+.PP
+The \fIsdbm\fR programming interface is totally compatible
+with \fIndbm\fP and includes a slight improvement in database initialization.
+It is also expected to be binary-compatible under most UN*X versions that
+support the \fIndbm\fP library.
+.PP
+The \fIsdbm\fP implementation shares the shortcomings of the \fIndbm\fP
+library, as a side effect of various simplifications to the original Larson
+algorithm. It does produce \fIholes\fP in the page file as it writes
+pages past the end of file. (Larson's paper include a clever solution to
+this problem that is a result of using the hash value directly as a block
+address.) On the other hand, extensive tests seem to indicate that \fIsdbm\fP
+creates fewer holes in general, and the resulting pagefiles are
+smaller. The \fIsdbm\fP implementation is also faster than \fIndbm\fP
+in database creation.
+Unlike the \fIndbm\fP, the \fIsdbm\fP
+.CW store
+operation will not ``wander away'' trying to split its
+data pages to insert a datum that \fIcannot\fP (due to elaborate worst-case
+situations) be inserted. (It will fail after a pre-defined number of attempts.)
+.SH
+Important Compatibility Warning
+.PP
+The \fIsdbm\fP and \fIndbm\fP
+libraries \fIcannot\fP share databases: one cannot read the (dir/pag)
+database created by the other. This is due to the differences
+between the \fIndbm\fP and \fIsdbm\fP algorithms\**,
+.FS
+Torek's discussion [Tor87]
+indicates that \fIdbm/ndbm\fP implementations use the hash
+value to traverse the radix trie differently than \fIsdbm\fP
+and as a result, the page indexes are generated in \fIdifferent\fP order.
+For more information, send e-mail to the author.
+.FE
+and the hash functions
+used.
+It is easy to convert between the \fIdbm/ndbm\fP databases and \fIsdbm\fP
+by ignoring the index completely: see
+.CW dbd ,
+.CW dbu
+etc.
+.R
+.LP
+.SH
+Notice of Intellectual Property
+.LP
+\fIThe entire\fP sdbm \fIlibrary package, as authored by me,\fP Ozan S. Yigit,
+\fIis hereby placed in the public domain.\fP As such, the author is not
+responsible for the consequences of use of this software, no matter how
+awful, even if they arise from defects in it. There is no expressed or
+implied warranty for the \fIsdbm\fP library.
+.PP
+Since the \fIsdbm\fP
+library package is in the public domain, this \fIoriginal\fP
+release or any additional public-domain releases of the modified original
+cannot possibly (by definition) be withheld from you. Also by definition,
+You (singular) have all the rights to this code (including the right to
+sell without permission, the right to hoard\**
+.FS
+You cannot really hoard something that is available to the public at
+large, but try if it makes you feel any better.
+.FE
+and the right to do other icky things as
+you see fit) but those rights are also granted to everyone else.
+.PP
+Please note that all previous distributions of this software contained
+a copyright (which is now dropped) to protect its
+origins and its current public domain status against any possible claims
+and/or challenges.
+.SH
+Acknowledgments
+.PP
+Many people have been very helpful and supportive. A partial list would
+necessarily include Rayan Zacherissen (who contributed the man page,
+and also hacked a MMAP version of \fIsdbm\fP),
+Arnold Robbins, Chris Lewis,
+Bill Davidsen, Henry Spencer, Geoff Collyer, Rich Salz (who got me started
+in the first place), Johannes Ruschein
+(who did the minix port) and David Tilbrook. I thank you all.
+.SH
+Distribution Manifest and Notes
+.LP
+This distribution of \fIsdbm\fP includes (at least) the following:
+.P1
+ CHANGES change log
+ README this file.
+ biblio a small bibliography on external hashing
+ dba.c a crude (n/s)dbm page file analyzer
+ dbd.c a crude (n/s)dbm page file dumper (for conversion)
+ dbe.1 man page for dbe.c
+ dbe.c Janick's database editor
+ dbm.c a dbm library emulation wrapper for ndbm/sdbm
+ dbm.h header file for the above
+ dbu.c a crude db management utility
+ hash.c hashing function
+ makefile guess.
+ pair.c page-level routines (posted earlier)
+ pair.h header file for the above
+ readme.ms troff source for the README file
+ sdbm.3 man page
+ sdbm.c the real thing
+ sdbm.h header file for the above
+ tune.h place for tuning & portability thingies
+ util.c miscellaneous
+.P2
+.PP
+.CW dbu
+is a simple database manipulation program\** that tries to look
+.FS
+The
+.CW dbd ,
+.CW dba ,
+.CW dbu
+utilities are quick hacks and are not fit for production use. They were
+developed late one night, just to test out \fIsdbm\fP, and convert some
+databases.
+.FE
+like Bell Labs'
+.CW cbt
+utility. It is currently incomplete in functionality.
+I use
+.CW dbu
+to test out the routines: it takes (from stdin) tab separated
+key/value pairs for commands like
+.CW build
+or
+.CW insert
+or takes keys for
+commands like
+.CW delete
+or
+.CW look .
+.P1
+ dbu <build|creat|look|insert|cat|delete> dbmfile
+.P2
+.PP
+.CW dba
+is a crude analyzer of \fIdbm/sdbm/ndbm\fP
+page files. It scans the entire
+page file, reporting page level statistics, and totals at the end.
+.PP
+.CW dbd
+is a crude dump program for \fIdbm/ndbm/sdbm\fP
+databases. It ignores the
+bitmap, and dumps the data pages in sequence. It can be used to create
+input for the
+.CW dbu
+utility.
+Note that
+.CW dbd
+will skip any NULLs in the key and data
+fields, thus is unsuitable to convert some peculiar databases that
+insist in including the terminating null.
+.PP
+I have also included a copy of the
+.CW dbe
+(\fIndbm\fP DataBase Editor) by Janick Bergeron [janick@bnr.ca] for
+your pleasure. You may find it more useful than the little
+.CW dbu
+utility.
+.PP
+.CW dbm.[ch]
+is a \fIdbm\fP library emulation on top of \fIndbm\fP
+(and hence suitable for \fIsdbm\fP). Written by Robert Elz.
+.PP
+The \fIsdbm\fP
+library has been around in beta test for quite a long time, and from whatever
+little feedback I received (maybe no news is good news), I believe it has been
+functioning without any significant problems. I would, of course, appreciate
+all fixes and/or improvements. Portability enhancements would especially be
+useful.
+.SH
+Implementation Issues
+.PP
+Hash functions:
+The algorithm behind \fIsdbm\fP implementation needs a good bit-scrambling
+hash function to be effective. I ran into a set of constants for a simple
+hash function that seem to help \fIsdbm\fP perform better than \fIndbm\fP
+for various inputs:
+.P1
+ /*
+ * polynomial conversion ignoring overflows
+ * 65599 nice. 65587 even better.
+ */
+ long
+ dbm_hash(char *str, int len) {+ register unsigned long n = 0;
+
+ while (len--)
+ n = n * 65599 + *str++;
+ return n;
+ }
+.P2
+.PP
+There may be better hash functions for the purposes of dynamic hashing.
+Try your favorite, and check the pagefile. If it contains too many pages
+with too many holes, (in relation to this one for example) or if
+\fIsdbm\fP
+simply stops working (fails after
+.CW SPLTMAX
+attempts to split) when you feed your
+NEWS
+.CW history
+file to it, you probably do not have a good hashing function.
+If you do better (for different types of input), I would like to know
+about the function you use.
+.PP
+Block sizes: It seems (from various tests on a few machines) that a page
+file block size
+.CW PBLKSIZ
+of 1024 is by far the best for performance, but
+this also happens to limit the size of a key/value pair. Depending on your
+needs, you may wish to increase the page size, and also adjust
+.CW PAIRMAX
+(the maximum size of a key/value pair allowed: should always be at least
+three words smaller than
+.CW PBLKSIZ .)
+accordingly. The system-wide version of the library
+should probably be
+configured with 1024 (distribution default), as this appears to be sufficient
+for most common uses of \fIsdbm\fP.
+.SH
+Portability
+.PP
+This package has been tested in many different UN*Xes even including minix,
+and appears to be reasonably portable. This does not mean it will port
+easily to non-UN*X systems.
+.SH
+Notes and Miscellaneous
+.PP
+The \fIsdbm\fP is not a very complicated package, at least not after you
+familiarize yourself with the literature on external hashing. There are
+other interesting algorithms in existence that ensure (approximately)
+single-read access to a data value associated with any key. These are
+directory-less schemes such as \fIlinear hashing\fP [Lit80] (+ Larson
+variations), \fIspiral storage\fP [Mar79] or directory schemes such as
+\fIextensible hashing\fP [Fag79] by Fagin et al. I do hope these sources
+provide a reasonable playground for experimentation with other algorithms.
+See the June 1988 issue of ACM Computing Surveys [Enb88] for an
+excellent overview of the field.
+.PG
+.SH
+References
+.LP
+.IP [Lar78] 4m
+P.-A. Larson,
+``Dynamic Hashing'', \fIBIT\fP, vol. 18, pp. 184-201, 1978.
+.IP [Tho90] 4m
+Ken Thompson, \fIprivate communication\fP, Nov. 1990
+.IP [Lit80] 4m
+W. Litwin,
+`` Linear Hashing: A new tool for file and table addressing'',
+\fIProceedings of the 6th Conference on Very Large Dabatases (Montreal)\fP,
+pp. 212-223, Very Large Database Foundation, Saratoga, Calif., 1980.
+.IP [Fag79] 4m
+R. Fagin, J. Nievergelt, N. Pippinger, and H. R. Strong,
+``Extendible Hashing - A Fast Access Method for Dynamic Files'',
+\fIACM Trans. Database Syst.\fP, vol. 4, no.3, pp. 315-344, Sept. 1979.
+.IP [Wal84] 4m
+Rich Wales,
+``Discussion of "dbm" data base system'', \fIUSENET newsgroup unix.wizards\fP,
+Jan. 1984.
+.IP [Tor87] 4m
+Chris Torek,
+``Re: dbm.a and ndbm.a archives'', \fIUSENET newsgroup comp.unix\fP,
+1987.
+.IP [Mar79] 4m
+G. N. Martin,
+``Spiral Storage: Incrementally Augmentable Hash Addressed Storage'',
+\fITechnical Report #27\fP, University of Warwick, Coventry, U.K., 1979.
+.IP [Enb88] 4m
+R. J. Enbody and H. C. Du,
+``Dynamic Hashing Schemes'',\fIACM Computing Surveys\fP,
+vol. 20, no. 2, pp. 85-113, June 1988.
--- /dev/null
+++ b/sdbm.c
@@ -1,0 +1,570 @@
+/*
+ * sdbm - ndbm work-alike hashed database library
+ * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
+ * author: oz@nexus.yorku.ca
+ * status: public domain.
+ *
+ * core routines
+ */
+
+#include <u.h>
+#include <libc.h>
+
+#include "sdbm.h"
+#include "tune.h"
+#include "pair.h"
+
+/*
+ * forward
+ */
+static int getdbit(DBM *, long);
+static int setdbit(DBM *, long);
+static int getpage(DBM *, long);
+static datum getnext(DBM *);
+static int makroom(DBM *, long, int);
+
+/*
+ * useful macros
+ */
+#define bad(x) ((x).dptr == nil || (x).dsize <= 0)
+#define exhash(item) dbm_hash((item).dptr, (item).dsize)
+#define ioerr(db) ((db)->flags |= DBM_IOERR)
+
+#define OFF_PAG(off) (long) (off) * PBLKSIZ
+#define OFF_DIR(off) (long) (off) * DBLKSIZ
+
+static long masks[] = {+ 000000000000, 000000000001, 000000000003, 000000000007,
+ 000000000017, 000000000037, 000000000077, 000000000177,
+ 000000000377, 000000000777, 000000001777, 000000003777,
+ 000000007777, 000000017777, 000000037777, 000000077777,
+ 000000177777, 000000377777, 000000777777, 000001777777,
+ 000003777777, 000007777777, 000017777777, 000037777777,
+ 000077777777, 000177777777, 000377777777, 000777777777,
+ 001777777777, 003777777777, 007777777777, 017777777777
+};
+
+datum nullitem = {nil, 0};+
+DBM *
+dbm_open(char *file, int flags, int mode, int createdb)
+{+ DBM *db;
+ char *dirname;
+ char *pagname;
+ int n;
+
+ debug(("dbm_open(%s, %d, %o)\n", file, flags, mode));+ if (file == nil || !*file)
+ {+ werrstr("%s", "Invalid file");+ return (DBM *) nil;
+ }
+
+ /*
+ * need space for two seperate filenames
+ */
+ n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2;
+
+ if ((dirname = malloc((unsigned) n)) == nil)
+ {+ fprint(2, "mem");
+ werrstr("%s", "No memory");+ return (DBM *) nil;
+ }
+ /*
+ * build the file names
+ */
+ dirname = strcat(strcpy(dirname, file), DIRFEXT);
+ pagname = strcpy(dirname + strlen(dirname) + 1, file);
+ pagname = strcat(pagname, PAGFEXT);
+
+ db = dbm_prep(dirname, pagname, flags, mode, createdb);
+
+ free((char *) dirname);
+ return db;
+}
+
+DBM *
+dbm_prep(char *dirname, char *pagname, int flags, int mode, int createdb)
+{+ DBM *db;
+ Dir *dstat;
+
+ debug(("dbm_prep(%s, %s, %d, %o, %d)\n", dirname, pagname, flags, mode, createdb));+ if ((db = (DBM *) malloc(sizeof(DBM))) == nil)
+ {+ werrstr("%s", "No memory for database");+ return (DBM *) nil;
+ }
+
+ db->flags = 0;
+ db->hmask = 0;
+ db->blkptr = 0;
+ db->keyptr = 0;
+
+ /*
+ * adjust user flags so that WRONLY becomes RDWR,
+ * as required by this package. Also set our internal
+ * flag for RDONLY if needed.
+ */
+
+ if (flags & OWRITE)
+ flags = ORDWR;
+
+ else if ((flags & OREAD) == flags)
+ db->flags = DBM_RDONLY;
+
+ /*
+ * open the files in sequence, and stat the dirfile.
+ * If we fail anywhere, undo everything, return nil.
+ */
+
+ if ((dstat = dirstat(pagname)) == nil && createdb)
+ db->pagf = create(pagname, flags, mode);
+ else {+ db->pagf = open(pagname, flags);
+ free(dstat);
+ }
+
+ if(db->pagf > -1) {+ if((dstat = dirstat(dirname)) == nil && createdb == 1)
+ db->dirf = create(dirname, flags, mode);
+ else {+ db->dirf = open(dirname, flags);
+ free(dstat);
+ }
+
+ debug(("dirf: %d, pagf: %d\n", db->dirf, db->pagf));+
+ if(db->dirf > -1) {+
+ /*
+ * need the dirfile size to establish max bit number.
+ */
+ dstat = dirfstat(db->dirf);
+ debug(("prep:dirf %d length %d\n", db->dirf, dstat->length));+ if (dstat != nil) {+ /*
+ * zero size: either a fresh database, or one with a single,
+ * unsplit data page: dirpage is all zeros.
+ */
+ db->dirbno = (!dstat->length) ? 0 : -1;
+ db->pagbno = -1;
+ db->maxbno = dstat->length * BYTESIZ;
+
+ (void) memset(db->pagbuf, 0, PBLKSIZ);
+ (void) memset(db->dirbuf, 0, DBLKSIZ);
+ free(dstat);
+ /*
+ * success
+ */
+ return db;
+ }
+ debug(("%s\n", "Prep: dirf close"));+ (void) close(db->dirf);
+ }
+ debug(("%s\n", "Prep: pagf close"));+ (void) close(db->pagf);
+ }
+ free((char *) db);
+ return (DBM *) nil;
+}
+
+void
+dbm_close(DBM *db)
+{+ debug(("dbm_close\n"));+ if (db == nil)
+ werrstr("%s", "Close of invalid database");+ else {+ (void) close(db->dirf);
+ (void) close(db->pagf);
+ free((char *) db);
+ }
+}
+
+datum
+dbm_fetch(DBM *db, datum key)
+{+ debug(("dbm_fetch(%d, %s)\n",db->dirf, key.dptr));+ if (db == nil || bad(key))
+ {+ werrstr("%s", "No DB or invalid key");+ return nullitem;
+ }
+ if (getpage(db, exhash(key)))
+ return getpair(db->pagbuf, key);
+
+ werrstr("%s", "DB io error");+ return nullitem;
+}
+
+
+long
+dbm_forder(DBM *db, datum key)
+{+ debug(("dbm_forder(%d, %s)\n", db->dirf, key.dptr));+ if (db == nil || bad(key))
+ {+ werrstr("%s:%d:%s", "Invalid DB or key", db->dirf, key.dptr);+ return -1;
+ }
+
+ return getpage(db, exhash(key));
+ }
+
+
+int
+dbm_delete(DBM *db, datum key)
+{+ debug(("dbm_delete(%d, %s)\n", db->dirf, key.dptr));+ if (db == nil || bad(key))
+ {+ werrstr("%s", "Invalid DB or key");+ return -1;
+ }
+ if (dbm_rdonly(db))
+ {+ werrstr("%s", "Delete: read-only database");+ return -1;
+ }
+
+ if (getpage(db, exhash(key))) {+ if (!delpair(db->pagbuf, key))
+ return -1;
+ /*
+ update the page file
+ */
+ if (seek(db->pagf, OFF_PAG(db->pagbno), 0) < 0
+ || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
+ {+ werrstr("%s", "Seek IO error"); + return -1;
+ }
+
+ return 0;
+ }
+
+ return ioerr(db), -1;
+}
+
+int
+dbm_store(DBM *db, datum key, datum val, int flags)
+{+ debug(("dbm_store(%d, %s, %s, %d)\n", db->dirf, key.dptr, val.dptr, flags));+ int need;
+ register long hash;
+
+ if (db == nil || bad(key))
+ {+ werrstr("%s", "Store: bad DB or key");+ return -1;
+ }
+ if (dbm_rdonly(db))
+ {+ werrstr("%s", "Store: read-only database");+ return -1;
+ }
+
+ need = key.dsize + val.dsize;
+ /*
+ * is the pair too big (or too small) for this database ??
+ */
+ if (need < 0 || need > PAIRMAX)
+ {+ werrstr("%s", "Store: pair out of range");+ return -1;
+ }
+
+ if (getpage(db, (hash = exhash(key)))) {+ /*
+ * if we need to replace, delete the key/data pair first. If it is not there, ignore.
+ */
+ if (flags == DBM_REPLACE)
+ (void) delpair(db->pagbuf, key);
+#ifdef SEEDUPS
+ else if (duppair(db->pagbuf, key))
+ return 1;
+#endif
+ /*
+ * if we do not have enough room, we have to split.
+ */
+ if (!fitpair(db->pagbuf, need))
+ if (!makroom(db, hash, need))
+ {+ werrstr("%s", "Store: makroom io error");+ return -1;
+ }
+ /*
+ * we have enough room or split is successful. insert the key,
+ * and update the page file.
+ */
+ (void) putpair(db->pagbuf, key, val);
+
+ if (seek(db->pagf, OFF_PAG(db->pagbno), 0) < 0
+ || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
+ {+ werrstr("%s", "Store: insert failed");+ return -1;
+ }
+ /*
+ * success
+ */
+ return 0;
+ }
+ werrstr("%s", "STORE io error");+ return -1;
+}
+
+/*
+ * makroom - make room by splitting the overfull page
+ * this routine will attempt to make room for SPLTMAX times before
+ * giving up.
+ */
+static int
+makroom(DBM *db, long hash, int need)
+{+ long newp;
+ char twin[PBLKSIZ];
+ char *pag = db->pagbuf;
+ char *new = twin;
+ int smax = SPLTMAX;
+
+ debug(("makroom(%d, %d, %d)\n", db->dirf, hash, need));+ do {+ /*
+ * split the current page
+ */
+ (void) splpage(pag, new, db->hmask + 1);
+ /*
+ * address of the new page
+ */
+ newp = (hash & db->hmask) | (db->hmask + 1);
+
+ /*
+ * write delay, read avoidence/cache shuffle:
+ * select the page for incoming pair: if key is to go to the new page,
+ * write out the previous one, and copy the new one over, thus making
+ * it the current page. If not, simply write the new page, and we are
+ * still looking at the page of interest. current page is not updated
+ * here, as dbm_store will do so, after it inserts the incoming pair.
+ */
+ if (hash & (db->hmask + 1)) {+ if (seek(db->pagf, OFF_PAG(db->pagbno), 0) < 0
+ || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
+ return 0;
+ db->pagbno = newp;
+ (void) memcpy(pag, new, PBLKSIZ);
+ }
+ else if (seek(db->pagf, OFF_PAG(newp), 0) < 0
+ || write(db->pagf, new, PBLKSIZ) < 0)
+ return 0;
+
+ if (!setdbit(db, db->curbit))
+ return 0;
+ /*
+ * see if we have enough room now
+ */
+ if (fitpair(pag, need))
+ return 1;
+ /*
+ * try again... update curbit and hmask as getpage would have
+ * done. because of our update of the current page, we do not
+ * need to read in anything. BUT we have to write the current
+ * [deferred] page out, as the window of failure is too great.
+ */
+ db->curbit = 2 * db->curbit +
+ ((hash & (db->hmask + 1)) ? 2 : 1);
+ db->hmask |= db->hmask + 1;
+
+ if (seek(db->pagf, OFF_PAG(db->pagbno), 0) < 0
+ || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
+ return 0;
+
+ } while (--smax);
+ /*
+ * if we are here, this is real bad news. After SPLTMAX splits,
+ * we still cannot fit the key. say goodnight.
+ */
+#ifdef BADMESS
+ (void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
+#endif
+ return 0;
+
+}
+
+/*
+ * the following two routines will break if
+ * deletions aren't taken into account. (ndbm bug)
+ */
+datum
+dbm_firstkey(DBM *db)
+{+ debug(("dbm_firstkey(%d)\n", db->dirf));+ if (db == nil)
+ {+ werrstr("%s", "Firstkey: null DB");+ return nullitem;
+ }
+ /*
+ * start at page 0
+ */
+ if (seek(db->pagf, OFF_PAG(0), 0) < 0
+ || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) {+ werrstr("firstkey Ioerror");+ return nullitem;
+ }
+ db->pagbno = 0;
+ db->blkptr = 0;
+ db->keyptr = 0;
+ return getnext(db);
+}
+
+datum
+dbm_nextkey(DBM *db)
+{+ debug(("dbm_nextkey(%d)\n", db->dirf));+ if (db == nil)
+ {+ werrstr("%s", "Nextkey: null DB");+ nullitem;
+ }
+ return getnext(db);
+}
+
+
+/*
+ * all important binary trie traversal
+ */
+static int
+getpage(DBM *db, long hash)
+{+ int hbit;
+ long dbit;
+ long pagb;
+
+ debug(("getpage(%d, %d)\n", db->dirf, hash));+
+ dbit = 0;
+ hbit = 0;
+ while (dbit < db->maxbno && getdbit(db, dbit))
+ dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
+
+ debug(("dbit: %d...", dbit));+
+ db->curbit = dbit;
+ db->hmask = masks[hbit];
+
+ pagb = hash & db->hmask;
+ /*
+ * see if the block we need is already in memory.
+ * note: this lookaside cache has about 10% hit rate.
+ */
+ if (pagb != db->pagbno) { + /*
+ * note: here, we assume a "hole" is read as 0s.
+ * if not, must zero pagbuf first.
+ */
+ if (seek(db->pagf, OFF_PAG(pagb), 0) < 0
+ || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
+ return 0;
+ if (!chkpage(db->pagbuf))
+ return 0;
+ db->pagbno = pagb;
+
+ debug(("pag read: %d\n", pagb));+ }
+ return 1;
+}
+
+static int
+getdbit(DBM *db, long dbit)
+{+ long c;
+ long dirb;
+
+ debug(("getdbit(%d, %d)\n", db->dirf, dbit));+
+ c = dbit / BYTESIZ;
+ dirb = c / DBLKSIZ;
+
+ if (dirb != db->dirbno)
+ {+ if (seek(db->dirf, OFF_DIR(dirb), 0) < 0
+ || read(db->dirf, db->dirbuf, DBLKSIZ) < 0)
+ return 0;
+ db->dirbno = dirb;
+
+ debug(("dir read: %d\n", dirb));+ }
+
+ return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
+}
+
+
+static int
+setdbit(DBM *db, long dbit)
+{+ long c;
+ long dirb;
+
+ debug(("setdbit(%d, %d)\n", db->dirf, dbit));+ c = dbit / BYTESIZ;
+ dirb = c / DBLKSIZ;
+
+ if (dirb != db->dirbno) {+ if (seek(db->dirf, OFF_DIR(dirb), 0) < 0
+ || read(db->dirf, db->dirbuf, DBLKSIZ) < 0)
+ return 0;
+ db->dirbno = dirb;
+
+ debug(("dir read: %d\n", dirb));+ }
+
+ db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
+
+ if (dbit >= db->maxbno)
+ db->maxbno += DBLKSIZ * BYTESIZ;
+
+ if (seek(db->dirf, OFF_DIR(dirb), 0) < 0
+ || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * getnext - get the next key in the page, and if done with
+ * the page, try the next page in sequence
+ */
+static datum
+getnext(DBM *db)
+{+ datum key;
+
+ debug(("getnext(%d)\n", db->dirf));+ for (;;) {+ db->keyptr++;
+ key = getnkey(db->pagbuf, db->keyptr);
+ if (key.dptr != nil)
+ return key;
+ /*
+ * we either run out, or there is nothing on this page..
+ * try the next one... If we lost our position on the
+ * file, we will have to seek.
+ */
+ db->keyptr = 0;
+ if (db->pagbno != db->blkptr++)
+ if (seek(db->pagf, OFF_PAG(db->blkptr), 0) < 0)
+ break;
+ db->pagbno = db->blkptr;
+ if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
+ break;
+ if (!chkpage(db->pagbuf))
+ break;
+ }
+
+ werrstr("getnext I/O error");+ return nullitem;
+}
--- /dev/null
+++ b/sdbm.h
@@ -1,0 +1,75 @@
+/*
+ * sdbm - ndbm work-alike hashed database library
+ * based on Per-Ake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
+ * author: oz@nexus.yorku.ca
+ * status: public domain.
+ */
+#define DBLKSIZ 1024 * 16
+#define PBLKSIZ 2048
+#define PAIRMAX PBLKSIZ - 16 /* arbitrary on PBLKSIZ-N */
+#define SPLTMAX 10 /* maximum allowed splits */
+ /* for a single insertion */
+#define DIRFEXT ".dir"
+#define PAGFEXT ".pag"
+
+typedef struct {+ int dirf; /* directory file descriptor */
+ int pagf; /* page file descriptor */
+ int flags; /* status/error flags, see below */
+ long maxbno; /* size of dirfile in bits */
+ long curbit; /* current bit number */
+ long hmask; /* current hash mask */
+ long blkptr; /* current block for nextkey */
+ int keyptr; /* current key for nextkey */
+ long blkno; /* current page to read/write */
+ long pagbno; /* current page in pagbuf */
+ char pagbuf[PBLKSIZ]; /* page file block buffer */
+ long dirbno; /* current block in dirbuf */
+ char dirbuf[DBLKSIZ]; /* directory file block buffer */
+} DBM;
+
+#define DBM_RDONLY 0x1 /* data base open read-only */
+#define DBM_IOERR 0x2 /* data base I/O error */
+
+/*
+ * utility macros
+ */
+#define dbm_rdonly(db) ((db)->flags & DBM_RDONLY)
+#define dbm_error(db) ((db)->flags & DBM_IOERR)
+
+#define dbm_clearerr(db) ((db)->flags &= ~DBM_IOERR) /* ouch */
+
+#define dbm_dirfno(db) ((db)->dirf)
+#define dbm_pagfno(db) ((db)->pagf)
+
+typedef struct {+ char *dptr;
+ int dsize;
+} datum;
+
+extern datum nullitem;
+
+/*
+ * flags to dbm_store
+ */
+#define DBM_INSERT 0
+#define DBM_REPLACE 1
+
+/*
+ * ndbm interface
+ */
+extern DBM *dbm_open(char *, int, int, int);
+extern void dbm_close(DBM *);
+extern datum dbm_fetch(DBM *, datum);
+extern int dbm_delete(DBM *, datum);
+extern int dbm_store(DBM *, datum, datum, int);
+extern datum dbm_firstkey(DBM *);
+extern datum dbm_nextkey(DBM *);
+extern long dbm_forder(DBM *, datum);
+
+/*
+ * other
+ */
+extern DBM *dbm_prep(char *, char *, int, int, int);
+extern long dbm_hash(char *, int);
+
--- /dev/null
+++ b/tune.h
@@ -1,0 +1,21 @@
+/*
+ * sdbm - ndbm work-alike hashed database library
+ * tuning and portability constructs [not nearly enough]
+ * author: oz@nexus.yorku.ca
+ */
+#define BYTESIZ 8
+/*
+ * important tuning parms (hah)
+ */
+
+#define SEEDUPS /* always detect duplicates */
+#define BADMESS /* generate a message for worst case:
+ cannot make room after SPLTMAX splits */
+/*
+ * misc
+ */
+#ifdef DEBUG
+#define debug(x) print x
+#else
+#define debug(x)
+#endif
--- /dev/null
+++ b/util.c
@@ -1,0 +1,41 @@
+#include <u.h>
+#include <libc.h>
+#include <stdio.h>
+#include "sdbm.h"
+
+void
+oops(char *s1, char *s2)
+{+ extern char *progname;
+ if (progname)
+ fprintf(stderr, "%s:", progname);
+ fprintf(stderr, s1, s2);
+ fprintf(stderr, "\n");
+
+ exits(s2);
+}
+
+int
+okpage(char *pag)
+{+ register unsigned n;
+ register off;
+ register short *ino = (short *) pag;
+
+ if ((n = ino[0]) > PBLKSIZ / sizeof(short))
+ return 0;
+
+ if (!n)
+ return 1;
+
+ off = PBLKSIZ;
+ for (ino++; n; ino += 2) {+ if (ino[0] > off || ino[1] > off ||
+ ino[1] > ino[0])
+ return 0;
+ off = ino[1];
+ n -= 2;
+ }
+
+ return 1;
+}
--
⑨