shithub: semblance

Download patch

ref: 5dca33a54a4fd738473b429563e840d5c24ce620
parent: 18809f2691c47d132c77a7d9402e2fe3b176dffe
author: rodri <rgl@antares-labs.eu>
date: Sun Mar 23 13:37:12 EDT 2025

get rid of yacc and begin work on a custom compiler

--- a/.workspace
+++ b/.workspace
@@ -1,2 +1,2 @@
 #!/bin/rc
-B *.[hcy] mkfile readme.md
+B *.[hc] mkfile readme.md
--- a/alloc.c
+++ b/alloc.c
@@ -6,9 +6,9 @@
 {
 	void *p;
 
-	p = malloc(n);
+	p = mallocz(n, 1);
 	if(p == nil)
-		sysfatal("malloc: %r");
+		sysfatal("mallocz: %r");
 	setmalloctag(p, getcallerpc(&n));
 	return p;
 }
--- a/builtin.c
+++ b/builtin.c
@@ -1,24 +1,32 @@
 #include <u.h>
 #include <libc.h>
+#include <bio.h>
 #include <geometry.h>
 #include "dat.h"
 #include "fns.h"
-#include "y.tab.h"
 
-static char *types[] = {
-	"double",
-	"point",
-	"vector",
-	"normal",
-	"quat",
+static Keyword kwtab[] = {
+	"double", TDOUBLE,
+	"point2", TPT2,
+	"point", TPT3,
+	"point3", TPT3,
+	"vector2", TVEC2,
+	"vector", TVEC3,
+	"vector3", TVEC3,
+	"normal", TNORMAL3,
+	"normal2", TNORMAL2,
+	"normal3", TNORMAL3,
+	"quat", TQUAT,
+	"matrix3", TMAT3,
+	"matrix4", TMAT4,
 };
 
-static char *ctypes[] = {
-	"double",
-	"Point3",
-	"Point3",
-	"Point3",
-	"Quaternion",
+static Keyword optab[] = {
+	"++", TPP,
+	"--", TMM,
+	"==", TEQ,
+	"&&", TLAND,
+	"||", TLOR,
 };
 
 static Const consts[] = {
@@ -29,62 +37,94 @@
 	"Φ",	1.61803398874989484820,
 };
 
-static Builtin builtins[] = {
-	"sin",		sin,
-	"cos",		cos,
-	"atan",		atan,
-	"atan2",	atan2,
-	"log",		log,
-	"log10",	log10,
-	"exp",		exp,
-	"sqrt",		sqrt,
-	"int",		round,
-	"abs",		fabs,
-};
+int
+lookupkw(char *s)
+{
+	int i;
 
-double
-round(double n)
+	for(i = 0; i < nelem(kwtab); i++)
+		if(strcmp(s, kwtab[i].name) == 0)
+			return kwtab[i].tok;
+	return -1;
+}
+
+int
+opstart(int c)
 {
-	return floor(n + 0.5);
+	int i;
+
+	for(i = 0; i < nelem(optab); i++)
+		if(optab[i].name[0] == c)
+			return 1;
+	return 0;
 }
 
 int
-lookuptype(char *s)
+findop(char *s)
 {
 	int i;
 
-	for(i = 0; i < nelem(types); i++)
-		if(strcmp(s, types[i]) == 0)
-			return i;
+	if(strlen(s) == 1)
+		return s[0];
+	for(i = 0; i < nelem(optab); i++)
+		if(strcmp(s, optab[i].name) == 0)
+			return optab[i].tok;
 	return -1;
 }
 
-char *
-typename(int t)
+void
+initsyms(void)
 {
-	if(t >= 0 && t < nelem(types))
-		return types[t];
-	return nil;
+	int i;
+
+	for(i = 0; i < nelem(consts); i++)
+		declsym(consts[i].name, SYMCONST, consts[i].val);
 }
 
 char *
-ctypename(int t)
+gettokenname(Token *t)
 {
-	if(t >= 0 && t < nelem(ctypes))
-		return ctypes[t];
-	return nil;
+	static char *tab[] = {
+	 [TDOUBLE-TEOF]		"TDOUBLE",
+	 [TPT2-TEOF]		"TPT2",
+	 [TPT3-TEOF]		"TPT3",
+	 [TVEC2-TEOF]		"TVEC2",
+	 [TVEC3-TEOF]		"TVEC3",
+	 [TNORMAL2-TEOF]	"TNORMAL2",
+	 [TNORMAL3-TEOF]	"TNORMAL3",
+	 [TQUAT-TEOF]		"TQUAT",
+	 [TMAT3-TEOF]		"TMAT3",
+	 [TMAT4-TEOF]		"TMAT4",
+	 [TNUM-TEOF]		"TNUM",
+	 [TSTR-TEOF]		"TSTR",
+	 [TPP-TEOF]		"TPP",
+	 [TMM-TEOF]		"TMM",
+	 [TEQ-TEOF]		"TEQ",
+	 [TLAND-TEOF]		"TLAND",
+	 [TLOR-TEOF]		"TLOR",
+	 [TID-TEOF]		"TID",
+	};
+
+	if(t->type < TEOF || t->type >= TEOF + nelem(tab))
+		return nil;
+
+	return tab[t->type-TEOF];
 }
 
 void
-init(void)
+printtoken(Token *t)
 {
-	Symbol *s;
-	int i;
+	char *s;
 
-	for(i = 0; i < nelem(consts); i++)
-		install(consts[i].name, CONST, consts[i].val);
-	for(i = 0; i < nelem(builtins); i++){
-		s = install(builtins[i].name, BLTIN, 0);
-		s->fn = builtins[i].fn;
+	s = gettokenname(t);
+	if(s == nil){
+		print("%C\n", t->type);
+		return;
 	}
+	print("%s", s);
+	if(t->type == TNUM)
+		print(" (%g)", t->v);
+	else if(t->type == TSTR || t->type == TID)
+		print(" (\"%s\")", t->s);
+	print("\n");
 }
--- a/dat.h
+++ b/dat.h
@@ -1,44 +1,99 @@
-enum
-{
+enum {
+	TEOF = 1<<24,
 	TDOUBLE,
-	TPOINT,
-	TVECTOR,
-	TNORMAL,
+	TPT2,
+	TPT3,
+	TVEC2,
+	TVEC3,
+	TNORMAL2,
+	TNORMAL3,
 	TQUAT,
+	TMAT3,
+	TMAT4,
+	TNUM,
+	TSTR,
+	TPP,
+	TMM,
+	TEQ,
+	TLAND,
+	TLOR,
+	TID,
 };
 
-enum
+typedef struct Line Line;
+typedef struct Token Token;
+typedef struct Lexer Lexer;
+
+struct Line
 {
+	char *file;
+	ulong line;
+};
+
+struct Token
+{
+	int type;
+	char *s;
+	double v;
+};
+
+struct Lexer
+{
+	Biobuf *in;
+	Line ln;
+	Token tok;
+	Token peektok;
+};
+
+enum {
 	NODENUM,
 	NODESYM,
 };
 
+enum {
+	SYMVAR,
+	SYMCONST,
+	SYMTYPE,
+};
+
+enum {
+	TYPDOUBLE,
+	TYPPOINT,
+	TYPVECTOR,
+	TYPNORMAL,
+	TYPQUAT,
+	TYPMAT3,
+	TYPMAT4,
+};
+
+typedef struct Keyword Keyword;
+typedef struct Type Type;
 typedef struct Const Const;
-typedef struct Builtin Builtin;
 typedef struct Var Var;
 typedef struct Symbol Symbol;
 typedef struct Node Node;
 
-struct Const
+struct Keyword
 {
 	char *name;
-	double val;
+	Rune tok;
 };
 
-struct Builtin
+struct Type
 {
+	int type;
+};
+
+struct Const
+{
 	char *name;
-	double (*fn)();
+	double val;
 };
 
 struct Var
 {
 	int type;
-	union {
-		double dval;
-		Point3 pval;
-		Quaternion qval;
-	};
+	double val[4];
 };
 
 struct Symbol
@@ -46,9 +101,8 @@
 	char *name;
 	int type;
 	union {
-		Var var;		/* ID */
-		double dconst;		/* CONST */
-		double (*fn)(double);	/* BLTIN */
+		Var var;		/* SYMVAR */
+		double cval;		/* SYMCONST */
 	};
 	Symbol *next;
 };
--- a/fns.h
+++ b/fns.h
@@ -1,12 +1,20 @@
-Symbol *install(char*, int, double);
-Symbol *lookup(char*);
-double round(double);
-int lookuptype(char*);
-char* typename(int);
-char* ctypename(int);
-void init(void);
+/* alloc */
 void *emalloc(ulong);
 void *erealloc(void*, ulong);
 char *estrdup(char*);
 
-int yyparse(void);
+/* sym */
+Symbol *declsym(char*, int, double);
+Symbol *getsym(char*);
+
+/* builtin */
+int lookupkw(char*);
+int opstart(int);
+int findop(char*);
+void initsyms(void);
+char *gettokenname(Token*);
+void printtoken(Token*);
+
+/* lexer */
+int lex(Lexer*);
+int peek(Lexer*);
--- /dev/null
+++ b/lexer.c
@@ -1,0 +1,137 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <geometry.h>
+#include "dat.h"
+#include "fns.h"
+
+static Token Teof = {TEOF};
+static Token Terr = {-1};
+
+static Token
+scan(Lexer *l)
+{
+	Token tok;
+	char buf[256], *p;
+	Rune r;
+begin:
+	memset(&tok, 0, sizeof(Token));
+
+	do{
+		r = Bgetrune(l->in);
+		if(r == '\n')
+			l->ln.line++;
+	}while(isspacerune(r));
+
+	if(r == Beof)
+		return Teof;
+
+	if(r == '/'){
+		switch(Bgetrune(l->in)){
+		case '/':
+			do
+				r = Bgetrune(l->in);
+			while(r != Beof && r != '\n');
+			Bungetrune(l->in);
+			goto begin;
+		case '*':
+comment:
+			do{
+				r = Bgetrune(l->in);
+				if(r == Beof)
+					return Teof;
+				else if(r == '\n')
+					l->ln.line++;
+			}while(r != '*');
+			while(r == '*'){
+				r = Bgetrune(l->in);
+				if(r == Beof)
+					return Teof;
+				else if(r == '\n')
+					l->ln.line++;
+				else if(r == '/')
+					goto begin;
+			}
+			goto comment;
+		}
+		Bungetrune(l->in);
+	}
+
+	if(isdigitrune(r)){
+		Bungetrune(l->in);
+		Bgetd(l->in, &tok.v);
+		tok.type = TNUM;
+	}else if(isalpharune(r) || r == '_'){
+		p = buf;
+		do{
+			if(p+runelen(r) >= buf + sizeof(buf)){
+				werrstr("lexeme is too long");
+				return Terr;
+			}
+			p += runetochar(p, &r);
+		}while((r = Bgetrune(l->in)) != Beof &&
+			(isalpharune(r) || isdigitrune(r) || r == '_'));
+		Bungetrune(l->in);
+		*p = 0;
+
+		if((tok.type = lookupkw(buf)) < 0){
+			tok.s = estrdup(buf);
+			tok.type = TID;
+		}
+	}else if(r == '"'){
+		p = buf;
+		while((r = Bgetrune(l->in)) != Beof && r != '"' && r != '\n'){
+			if(p+runelen(r) >= buf + sizeof(buf)){
+				werrstr("string is too long");
+				return Terr;
+			}
+			p += runetochar(p, &r);
+		}
+		if(r != '"'){
+			werrstr("unterminated string");
+			return Terr;
+		}
+		*p = 0;
+
+		tok.s = estrdup(buf);
+		tok.type = TSTR;
+	}else if(opstart(r)){
+		p = buf;
+		p += runetochar(p, &r);
+		do{
+			r = Bgetrune(l->in);
+			if(p+runelen(r) >= buf + sizeof(buf)){
+				werrstr("op token is too long");
+				return Terr;
+			}
+			p += runetochar(p, &r);
+			*p = 0;
+		}while(findop(buf) >= 0);
+		Bungetrune(l->in);
+		*--p = 0;
+
+		tok.type = findop(buf);
+	}else
+		tok.type = r;
+
+	return tok;
+}
+
+int
+lex(Lexer *l)
+{
+	if(l->peektok.type > 0){
+		l->tok = l->peektok;
+		memset(&l->peektok, 0, sizeof(Token));
+	}else
+		l->tok = scan(l);
+	return l->tok.type;
+}
+
+int
+peek(Lexer *l)
+{
+	if(l->peektok.type <= 0)
+		l->peektok = scan(l);
+	return l->peektok.type;
+}
--- a/mkfile
+++ b/mkfile
@@ -3,15 +3,14 @@
 BIN=/$objtype/bin
 TARG=sslc
 OFILES=\
+	main.$O\
 	alloc.$O\
 	sym.$O\
 	builtin.$O\
-	y.tab.$O
-YFILES=\
-	semblance.y
+	lexer.$O\
+
 HFILES=\
 	dat.h\
 	fns.h\
-	y.tab.h
 
 </sys/src/cmd/mkone
--- a/semblance.y
+++ /dev/null
@@ -1,271 +1,0 @@
-%{
-#include <u.h>
-#include <libc.h>
-#include <ctype.h>
-#include <bio.h>
-#include <geometry.h>
-#include "dat.h"
-#include "fns.h"
-%}
-%union {
-	int type;
-	Node node;
-}
-%token	PRINT
-%token	<type> TYPE
-%token	<node> NUMBER ID CONST BLTIN UNDEF
-%type	<node> expr exprs
-%type	<node> asgn asgns
-%right	'='
-%%
-list:	/* ε */		{ fprint(2, "list: ε\n"); }
-	  prog		{ fprint(2, "list: prog\n"); }
-	| list prog	{ fprint(2, "list: list prog\n"); }
-	;
-
-prog:	/* ε */		{ fprint(2, "prog: ε\n"); }
-	| decls		{ fprint(2, "prog: decls\n"); }
-	| asgns		{ fprint(2, "prog: asgns\n"); }
-	| exprs		{ fprint(2, "prog: exprs\n"); }
-	| PRINT exprs
-	{
-		fprint(2, "prog: PRINT exprs\n");
-		if($2.type == NODENUM)
-			fprint(2, "%g\n", $2.num);
-		if($2.type == NODESYM){
-			fprint(2, "%s = ", $2.sym->name);
-			switch($2.sym->type){
-			case ID:
-				switch($2.sym->var.type){
-				case TDOUBLE: fprint(2, "%g\n", $2.sym->var.dval); break;
-				case TPOINT:
-				case TVECTOR:
-				case TNORMAL:
-				case TQUAT: fprint(2, "%V\n", $2.sym->var.pval); break;
-				}
-				break;
-			case CONST: fprint(2, "%g\n", $2.sym->dconst); break;
-			case BLTIN: fprint(2, "f()\n"); break;
-			}
-		}
-	}
-	;
-
-decls:	  decl		{ decltype = -1; fprint(2, "decls: decl\n"); }
-	| decls decl	{ decltype = -1; fprint(2, "decls: decls decl\n"); }
-	;
-
-decl:	  TYPE { decltype = $1; } idlist ';'	{ fprint(2, "decl: TYPE idlist\n"); }
-	;
-
-idlist:	  ID
-	{
-		fprint(2, "idlist: ID\n");
-
-		if($1.sym->type != UNDEF)
-			yyerror("variable already exists");
-
-		if(decltype < 0)
-			yyerror("no type specified");
-
-		$1.sym->type = ID;
-		$1.sym->var.type = decltype;
-		print("%s %s;\n", ctypename(decltype), $1.sym->name);
-	}
-	| idlist ',' ID
-	{
-		fprint(2, "idlist: ID , idlist\n");
-
-		if($3.sym->type != UNDEF)
-			yyerror("variable already exists");
-
-		if(decltype < 0)
-			yyerror("no type specified");
-
-		$3.sym->type = ID;
-		$3.sym->var.type = decltype;
-		print("%s %s;\n", ctypename(decltype), $3.sym->name);
-	}
-	;
-
-asgns:	  asgn ';'		{ fprint(2, "asgns: asgn\n"); }
-	| asgns asgn ';'	{ fprint(2, "asgns: asgns asgn\n"); }
-	;
-
-asgn:	 ID '=' expr
-	{
-		fprint(2, "asgn: ID = expr\n");
-
-		print("%s = ", $1.sym->name);
-		switch($1.sym->var.type){
-		case TDOUBLE:
-			if($3.type == NODENUM)
-				print("%g", $3.num);
-			else if($3.sym->type == CONST)
-				print("%g", $3.sym->dconst);
-			else if($3.sym->type == ID && $3.sym->var.type == TDOUBLE)
-				print("%s", $3.sym->name);
-			else
-				yyerror("illegal assignment");
-			break;
-		case TPOINT:
-		case TVECTOR:
-		case TNORMAL:
-		case TQUAT:
-			if($3.type == NODENUM)
-				print("Pt3(%g,%g,%g,%g)", $3.num, $3.num, $3.num, $3.num);
-			else if($3.sym->type == CONST)
-				print("Pt3(%g,%g,%g,%g)",
-					$3.sym->dconst,
-					$3.sym->dconst,
-					$3.sym->dconst,
-					$3.sym->dconst);
-			else if($3.sym->type == ID)
-				switch($3.sym->var.type){
-				case TDOUBLE:
-					print("Pt3(%g,%g,%g,%g)",
-						$3.sym->var.dval,
-						$3.sym->var.dval,
-						$3.sym->var.dval,
-						$3.sym->var.dval);
-					break;
-				case TPOINT:
-				case TVECTOR:
-				case TNORMAL:
-					print("%s", $3.sym->name);
-					break;
-				case TQUAT:
-					print("Pt3(%g,%g,%g,%g)",
-						$3.sym->var.pval.y,
-						$3.sym->var.pval.z,
-						$3.sym->var.pval.w,
-						$3.sym->var.pval.x);
-					break;
-				}
-			else
-				yyerror("illegal assignment");
-			break;
-		}
-		print(";\n");
-
-		$$ = $1;
-		break;
-	}
-	;
-
-exprs:	  expr ';'		{ fprint(2, "exprs: expr\n"); }
-	| exprs expr ';'	{ fprint(2, "exprs: exprs expr\n"); }
-	;
-
-expr:	  NUMBER	{ fprint(2, "expr: NUMBER %g\n", $1.num); }
-	| ID		{ fprint(2, "expr: ID\n"); }
-	;
-%%
-
-int decltype = -1;
-Biobuf *bin;
-int lineno;
-
-void
-yyerror(char *msg)
-{
-	fprint(2, "%s at line %d\n", msg, lineno);
-	exits("syntax error");
-}
-
-int
-yylex(void)
-{
-	Symbol *s;
-	char buf[256], *p;
-	Rune r;
-	int t;
-
-	do{
-		r = Bgetrune(bin);
-		if(r == '\n')
-			lineno++;
-	}while(isspace(r));
-
-	if(r == Beof)
-		return 0;
-
-	if(r == '.' || isdigitrune(r)){
-		Bungetrune(bin);
-		Bgetd(bin, &yylval.node.num);
-		yylval.node.type = NODENUM;
-		return NUMBER;
-	}
-
-	if(isalpharune(r)){
-		p = buf;
-		do{
-			if(p+runelen(r) - buf >= sizeof(buf))
-				return r;	/* force syntax error. */
-			p += runetochar(p, &r);
-		}while((r = Bgetrune(bin)) != Beof &&
-			(isalpharune(r) || isdigitrune(r)));
-		Bungetrune(bin);
-		*p = 0;
-
-		if(strcmp(buf, "print") == 0)
-			return PRINT;
-
-		if((t = lookuptype(buf)) >= 0){
-			yylval.type = t;
-			return TYPE;
-		}
-
-		if((s = lookup(buf)) == nil)
-			s = install(buf, UNDEF, 0);
-		yylval.node.sym = s;
-		yylval.node.type = NODESYM;
-
-		return s->type == UNDEF || s->type == CONST ? ID : s->type;
-	}
-
-	return r;
-}
-
-void
-usage(void)
-{
-	fprint(2, "usage: %s\n", argv0);
-	exits("usage");
-}
-
-void
-main(int argc, char *argv[])
-{
-	GEOMfmtinstall();
-	ARGBEGIN{
-	default: usage();
-	}ARGEND;
-	if(argc > 0)
-		usage();
-
-	bin = Bfdopen(0, OREAD);
-	if(bin == nil)
-		sysfatal("Bfdopen: %r");
-
-	lineno++;
-	init();
-
-	yyparse();
-//	int n;
-//	char *s, *name;
-//	while((n = yylex())){
-//		s =	n == NUMBER? "NUMBER":
-//			n == ID? "ID":
-//			n == NODENUM? "NODENUM":
-//			n == NODESYM? "NODESYM":
-//			n == TYPE? "TYPE":
-//			n == PRINT? "PRINT":
-//			n == UNDEF? "UNDEF": nil;
-//		name = n == ID? yylval.node.sym->name: "";
-//		print("%d: %s%C%s\n", lineno, s?s:"", s?' ':n, name);
-//	}
-
-	Bterm(bin);
-	exits(nil);
-}
--- a/sym.c
+++ b/sym.c
@@ -1,5 +1,6 @@
 #include <u.h>
 #include <libc.h>
+#include <bio.h>
 #include <geometry.h>
 #include "dat.h"
 #include "fns.h"
@@ -7,15 +8,14 @@
 static Symbol *symtab;
 
 Symbol *
-install(char *s, int t, double v)
+declsym(char *s, int t, double v)
 {
 	Symbol *sym;
 
 	sym = emalloc(sizeof(Symbol));
-	memset(sym, 0, sizeof *sym);
 	sym->name = estrdup(s);
 	sym->type = t;
-	sym->dconst = v;
+	sym->cval = v;
 	sym->var.type = -1;
 	sym->next = symtab;
 	symtab = sym;
@@ -23,7 +23,7 @@
 }
 
 Symbol *
-lookup(char *s)
+getsym(char *s)
 {
 	Symbol *sym;
 
--