ref: 5dca33a54a4fd738473b429563e840d5c24ce620
parent: 18809f2691c47d132c77a7d9402e2fe3b176dffe
author: rodri <rgl@antares-labs.eu>
date: Sun Mar 23 13:37:12 EDT 2025
get rid of yacc and begin work on a custom compiler
--- a/.workspace
+++ b/.workspace
@@ -1,2 +1,2 @@
#!/bin/rc
-B *.[hcy] mkfile readme.md
+B *.[hc] mkfile readme.md
--- a/alloc.c
+++ b/alloc.c
@@ -6,9 +6,9 @@
{
void *p;
- p = malloc(n);
+ p = mallocz(n, 1);
if(p == nil)
- sysfatal("malloc: %r");
+ sysfatal("mallocz: %r");
setmalloctag(p, getcallerpc(&n));
return p;
}
--- a/builtin.c
+++ b/builtin.c
@@ -1,24 +1,32 @@
#include <u.h>
#include <libc.h>
+#include <bio.h>
#include <geometry.h>
#include "dat.h"
#include "fns.h"
-#include "y.tab.h"
-static char *types[] = {
- "double",
- "point",
- "vector",
- "normal",
- "quat",
+static Keyword kwtab[] = {
+ "double", TDOUBLE,
+ "point2", TPT2,
+ "point", TPT3,
+ "point3", TPT3,
+ "vector2", TVEC2,
+ "vector", TVEC3,
+ "vector3", TVEC3,
+ "normal", TNORMAL3,
+ "normal2", TNORMAL2,
+ "normal3", TNORMAL3,
+ "quat", TQUAT,
+ "matrix3", TMAT3,
+ "matrix4", TMAT4,
};
-static char *ctypes[] = {
- "double",
- "Point3",
- "Point3",
- "Point3",
- "Quaternion",
+static Keyword optab[] = {
+ "++", TPP,
+ "--", TMM,
+ "==", TEQ,
+ "&&", TLAND,
+ "||", TLOR,
};
static Const consts[] = {
@@ -29,62 +37,94 @@
"Φ", 1.61803398874989484820,
};
-static Builtin builtins[] = {
- "sin", sin,
- "cos", cos,
- "atan", atan,
- "atan2", atan2,
- "log", log,
- "log10", log10,
- "exp", exp,
- "sqrt", sqrt,
- "int", round,
- "abs", fabs,
-};
+int
+lookupkw(char *s)
+{
+ int i;
-double
-round(double n)
+ for(i = 0; i < nelem(kwtab); i++)
+ if(strcmp(s, kwtab[i].name) == 0)
+ return kwtab[i].tok;
+ return -1;
+}
+
+int
+opstart(int c)
{
- return floor(n + 0.5);
+ int i;
+
+ for(i = 0; i < nelem(optab); i++)
+ if(optab[i].name[0] == c)
+ return 1;
+ return 0;
}
int
-lookuptype(char *s)
+findop(char *s)
{
int i;
- for(i = 0; i < nelem(types); i++)
- if(strcmp(s, types[i]) == 0)
- return i;
+ if(strlen(s) == 1)
+ return s[0];
+ for(i = 0; i < nelem(optab); i++)
+ if(strcmp(s, optab[i].name) == 0)
+ return optab[i].tok;
return -1;
}
-char *
-typename(int t)
+void
+initsyms(void)
{
- if(t >= 0 && t < nelem(types))
- return types[t];
- return nil;
+ int i;
+
+ for(i = 0; i < nelem(consts); i++)
+ declsym(consts[i].name, SYMCONST, consts[i].val);
}
char *
-ctypename(int t)
+gettokenname(Token *t)
{
- if(t >= 0 && t < nelem(ctypes))
- return ctypes[t];
- return nil;
+ static char *tab[] = {
+ [TDOUBLE-TEOF] "TDOUBLE",
+ [TPT2-TEOF] "TPT2",
+ [TPT3-TEOF] "TPT3",
+ [TVEC2-TEOF] "TVEC2",
+ [TVEC3-TEOF] "TVEC3",
+ [TNORMAL2-TEOF] "TNORMAL2",
+ [TNORMAL3-TEOF] "TNORMAL3",
+ [TQUAT-TEOF] "TQUAT",
+ [TMAT3-TEOF] "TMAT3",
+ [TMAT4-TEOF] "TMAT4",
+ [TNUM-TEOF] "TNUM",
+ [TSTR-TEOF] "TSTR",
+ [TPP-TEOF] "TPP",
+ [TMM-TEOF] "TMM",
+ [TEQ-TEOF] "TEQ",
+ [TLAND-TEOF] "TLAND",
+ [TLOR-TEOF] "TLOR",
+ [TID-TEOF] "TID",
+ };
+
+ if(t->type < TEOF || t->type >= TEOF + nelem(tab))
+ return nil;
+
+ return tab[t->type-TEOF];
}
void
-init(void)
+printtoken(Token *t)
{
- Symbol *s;
- int i;
+ char *s;
- for(i = 0; i < nelem(consts); i++)
- install(consts[i].name, CONST, consts[i].val);
- for(i = 0; i < nelem(builtins); i++){
- s = install(builtins[i].name, BLTIN, 0);
- s->fn = builtins[i].fn;
+ s = gettokenname(t);
+ if(s == nil){
+ print("%C\n", t->type);
+ return;
}
+ print("%s", s);
+ if(t->type == TNUM)
+ print(" (%g)", t->v);
+ else if(t->type == TSTR || t->type == TID)
+ print(" (\"%s\")", t->s);
+ print("\n");
}
--- a/dat.h
+++ b/dat.h
@@ -1,44 +1,99 @@
-enum
-{
+enum {
+ TEOF = 1<<24,
TDOUBLE,
- TPOINT,
- TVECTOR,
- TNORMAL,
+ TPT2,
+ TPT3,
+ TVEC2,
+ TVEC3,
+ TNORMAL2,
+ TNORMAL3,
TQUAT,
+ TMAT3,
+ TMAT4,
+ TNUM,
+ TSTR,
+ TPP,
+ TMM,
+ TEQ,
+ TLAND,
+ TLOR,
+ TID,
};
-enum
+typedef struct Line Line;
+typedef struct Token Token;
+typedef struct Lexer Lexer;
+
+struct Line
{
+ char *file;
+ ulong line;
+};
+
+struct Token
+{
+ int type;
+ char *s;
+ double v;
+};
+
+struct Lexer
+{
+ Biobuf *in;
+ Line ln;
+ Token tok;
+ Token peektok;
+};
+
+enum {
NODENUM,
NODESYM,
};
+enum {
+ SYMVAR,
+ SYMCONST,
+ SYMTYPE,
+};
+
+enum {
+ TYPDOUBLE,
+ TYPPOINT,
+ TYPVECTOR,
+ TYPNORMAL,
+ TYPQUAT,
+ TYPMAT3,
+ TYPMAT4,
+};
+
+typedef struct Keyword Keyword;
+typedef struct Type Type;
typedef struct Const Const;
-typedef struct Builtin Builtin;
typedef struct Var Var;
typedef struct Symbol Symbol;
typedef struct Node Node;
-struct Const
+struct Keyword
{
char *name;
- double val;
+ Rune tok;
};
-struct Builtin
+struct Type
{
+ int type;
+};
+
+struct Const
+{
char *name;
- double (*fn)();
+ double val;
};
struct Var
{
int type;
- union {
- double dval;
- Point3 pval;
- Quaternion qval;
- };
+ double val[4];
};
struct Symbol
@@ -46,9 +101,8 @@
char *name;
int type;
union {
- Var var; /* ID */
- double dconst; /* CONST */
- double (*fn)(double); /* BLTIN */
+ Var var; /* SYMVAR */
+ double cval; /* SYMCONST */
};
Symbol *next;
};
--- a/fns.h
+++ b/fns.h
@@ -1,12 +1,20 @@
-Symbol *install(char*, int, double);
-Symbol *lookup(char*);
-double round(double);
-int lookuptype(char*);
-char* typename(int);
-char* ctypename(int);
-void init(void);
+/* alloc */
void *emalloc(ulong);
void *erealloc(void*, ulong);
char *estrdup(char*);
-int yyparse(void);
+/* sym */
+Symbol *declsym(char*, int, double);
+Symbol *getsym(char*);
+
+/* builtin */
+int lookupkw(char*);
+int opstart(int);
+int findop(char*);
+void initsyms(void);
+char *gettokenname(Token*);
+void printtoken(Token*);
+
+/* lexer */
+int lex(Lexer*);
+int peek(Lexer*);
--- /dev/null
+++ b/lexer.c
@@ -1,0 +1,137 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <geometry.h>
+#include "dat.h"
+#include "fns.h"
+
+static Token Teof = {TEOF};
+static Token Terr = {-1};
+
+static Token
+scan(Lexer *l)
+{
+ Token tok;
+ char buf[256], *p;
+ Rune r;
+begin:
+ memset(&tok, 0, sizeof(Token));
+
+ do{
+ r = Bgetrune(l->in);
+ if(r == '\n')
+ l->ln.line++;
+ }while(isspacerune(r));
+
+ if(r == Beof)
+ return Teof;
+
+ if(r == '/'){
+ switch(Bgetrune(l->in)){
+ case '/':
+ do
+ r = Bgetrune(l->in);
+ while(r != Beof && r != '\n');
+ Bungetrune(l->in);
+ goto begin;
+ case '*':
+comment:
+ do{
+ r = Bgetrune(l->in);
+ if(r == Beof)
+ return Teof;
+ else if(r == '\n')
+ l->ln.line++;
+ }while(r != '*');
+ while(r == '*'){
+ r = Bgetrune(l->in);
+ if(r == Beof)
+ return Teof;
+ else if(r == '\n')
+ l->ln.line++;
+ else if(r == '/')
+ goto begin;
+ }
+ goto comment;
+ }
+ Bungetrune(l->in);
+ }
+
+ if(isdigitrune(r)){
+ Bungetrune(l->in);
+ Bgetd(l->in, &tok.v);
+ tok.type = TNUM;
+ }else if(isalpharune(r) || r == '_'){
+ p = buf;
+ do{
+ if(p+runelen(r) >= buf + sizeof(buf)){
+ werrstr("lexeme is too long");
+ return Terr;
+ }
+ p += runetochar(p, &r);
+ }while((r = Bgetrune(l->in)) != Beof &&
+ (isalpharune(r) || isdigitrune(r) || r == '_'));
+ Bungetrune(l->in);
+ *p = 0;
+
+ if((tok.type = lookupkw(buf)) < 0){
+ tok.s = estrdup(buf);
+ tok.type = TID;
+ }
+ }else if(r == '"'){
+ p = buf;
+ while((r = Bgetrune(l->in)) != Beof && r != '"' && r != '\n'){
+ if(p+runelen(r) >= buf + sizeof(buf)){
+ werrstr("string is too long");
+ return Terr;
+ }
+ p += runetochar(p, &r);
+ }
+ if(r != '"'){
+ werrstr("unterminated string");
+ return Terr;
+ }
+ *p = 0;
+
+ tok.s = estrdup(buf);
+ tok.type = TSTR;
+ }else if(opstart(r)){
+ p = buf;
+ p += runetochar(p, &r);
+ do{
+ r = Bgetrune(l->in);
+ if(p+runelen(r) >= buf + sizeof(buf)){
+ werrstr("op token is too long");
+ return Terr;
+ }
+ p += runetochar(p, &r);
+ *p = 0;
+ }while(findop(buf) >= 0);
+ Bungetrune(l->in);
+ *--p = 0;
+
+ tok.type = findop(buf);
+ }else
+ tok.type = r;
+
+ return tok;
+}
+
+int
+lex(Lexer *l)
+{
+ if(l->peektok.type > 0){
+ l->tok = l->peektok;
+ memset(&l->peektok, 0, sizeof(Token));
+ }else
+ l->tok = scan(l);
+ return l->tok.type;
+}
+
+int
+peek(Lexer *l)
+{
+ if(l->peektok.type <= 0)
+ l->peektok = scan(l);
+ return l->peektok.type;
+}
--- a/mkfile
+++ b/mkfile
@@ -3,15 +3,14 @@
BIN=/$objtype/bin
TARG=sslc
OFILES=\
+ main.$O\
alloc.$O\
sym.$O\
builtin.$O\
- y.tab.$O
-YFILES=\
- semblance.y
+ lexer.$O\
+
HFILES=\
dat.h\
fns.h\
- y.tab.h
</sys/src/cmd/mkone
--- a/semblance.y
+++ /dev/null
@@ -1,271 +1,0 @@
-%{
-#include <u.h>
-#include <libc.h>
-#include <ctype.h>
-#include <bio.h>
-#include <geometry.h>
-#include "dat.h"
-#include "fns.h"
-%}
-%union {
- int type;
- Node node;
-}
-%token PRINT
-%token <type> TYPE
-%token <node> NUMBER ID CONST BLTIN UNDEF
-%type <node> expr exprs
-%type <node> asgn asgns
-%right '='
-%%
-list: /* ε */ { fprint(2, "list: ε\n"); }
- prog { fprint(2, "list: prog\n"); }
- | list prog { fprint(2, "list: list prog\n"); }
- ;
-
-prog: /* ε */ { fprint(2, "prog: ε\n"); }
- | decls { fprint(2, "prog: decls\n"); }
- | asgns { fprint(2, "prog: asgns\n"); }
- | exprs { fprint(2, "prog: exprs\n"); }
- | PRINT exprs
- {
- fprint(2, "prog: PRINT exprs\n");
- if($2.type == NODENUM)
- fprint(2, "%g\n", $2.num);
- if($2.type == NODESYM){
- fprint(2, "%s = ", $2.sym->name);
- switch($2.sym->type){
- case ID:
- switch($2.sym->var.type){
- case TDOUBLE: fprint(2, "%g\n", $2.sym->var.dval); break;
- case TPOINT:
- case TVECTOR:
- case TNORMAL:
- case TQUAT: fprint(2, "%V\n", $2.sym->var.pval); break;
- }
- break;
- case CONST: fprint(2, "%g\n", $2.sym->dconst); break;
- case BLTIN: fprint(2, "f()\n"); break;
- }
- }
- }
- ;
-
-decls: decl { decltype = -1; fprint(2, "decls: decl\n"); }
- | decls decl { decltype = -1; fprint(2, "decls: decls decl\n"); }
- ;
-
-decl: TYPE { decltype = $1; } idlist ';' { fprint(2, "decl: TYPE idlist\n"); }
- ;
-
-idlist: ID
- {
- fprint(2, "idlist: ID\n");
-
- if($1.sym->type != UNDEF)
- yyerror("variable already exists");
-
- if(decltype < 0)
- yyerror("no type specified");
-
- $1.sym->type = ID;
- $1.sym->var.type = decltype;
- print("%s %s;\n", ctypename(decltype), $1.sym->name);
- }
- | idlist ',' ID
- {
- fprint(2, "idlist: ID , idlist\n");
-
- if($3.sym->type != UNDEF)
- yyerror("variable already exists");
-
- if(decltype < 0)
- yyerror("no type specified");
-
- $3.sym->type = ID;
- $3.sym->var.type = decltype;
- print("%s %s;\n", ctypename(decltype), $3.sym->name);
- }
- ;
-
-asgns: asgn ';' { fprint(2, "asgns: asgn\n"); }
- | asgns asgn ';' { fprint(2, "asgns: asgns asgn\n"); }
- ;
-
-asgn: ID '=' expr
- {
- fprint(2, "asgn: ID = expr\n");
-
- print("%s = ", $1.sym->name);
- switch($1.sym->var.type){
- case TDOUBLE:
- if($3.type == NODENUM)
- print("%g", $3.num);
- else if($3.sym->type == CONST)
- print("%g", $3.sym->dconst);
- else if($3.sym->type == ID && $3.sym->var.type == TDOUBLE)
- print("%s", $3.sym->name);
- else
- yyerror("illegal assignment");
- break;
- case TPOINT:
- case TVECTOR:
- case TNORMAL:
- case TQUAT:
- if($3.type == NODENUM)
- print("Pt3(%g,%g,%g,%g)", $3.num, $3.num, $3.num, $3.num);
- else if($3.sym->type == CONST)
- print("Pt3(%g,%g,%g,%g)",
- $3.sym->dconst,
- $3.sym->dconst,
- $3.sym->dconst,
- $3.sym->dconst);
- else if($3.sym->type == ID)
- switch($3.sym->var.type){
- case TDOUBLE:
- print("Pt3(%g,%g,%g,%g)",
- $3.sym->var.dval,
- $3.sym->var.dval,
- $3.sym->var.dval,
- $3.sym->var.dval);
- break;
- case TPOINT:
- case TVECTOR:
- case TNORMAL:
- print("%s", $3.sym->name);
- break;
- case TQUAT:
- print("Pt3(%g,%g,%g,%g)",
- $3.sym->var.pval.y,
- $3.sym->var.pval.z,
- $3.sym->var.pval.w,
- $3.sym->var.pval.x);
- break;
- }
- else
- yyerror("illegal assignment");
- break;
- }
- print(";\n");
-
- $$ = $1;
- break;
- }
- ;
-
-exprs: expr ';' { fprint(2, "exprs: expr\n"); }
- | exprs expr ';' { fprint(2, "exprs: exprs expr\n"); }
- ;
-
-expr: NUMBER { fprint(2, "expr: NUMBER %g\n", $1.num); }
- | ID { fprint(2, "expr: ID\n"); }
- ;
-%%
-
-int decltype = -1;
-Biobuf *bin;
-int lineno;
-
-void
-yyerror(char *msg)
-{
- fprint(2, "%s at line %d\n", msg, lineno);
- exits("syntax error");
-}
-
-int
-yylex(void)
-{
- Symbol *s;
- char buf[256], *p;
- Rune r;
- int t;
-
- do{
- r = Bgetrune(bin);
- if(r == '\n')
- lineno++;
- }while(isspace(r));
-
- if(r == Beof)
- return 0;
-
- if(r == '.' || isdigitrune(r)){
- Bungetrune(bin);
- Bgetd(bin, &yylval.node.num);
- yylval.node.type = NODENUM;
- return NUMBER;
- }
-
- if(isalpharune(r)){
- p = buf;
- do{
- if(p+runelen(r) - buf >= sizeof(buf))
- return r; /* force syntax error. */
- p += runetochar(p, &r);
- }while((r = Bgetrune(bin)) != Beof &&
- (isalpharune(r) || isdigitrune(r)));
- Bungetrune(bin);
- *p = 0;
-
- if(strcmp(buf, "print") == 0)
- return PRINT;
-
- if((t = lookuptype(buf)) >= 0){
- yylval.type = t;
- return TYPE;
- }
-
- if((s = lookup(buf)) == nil)
- s = install(buf, UNDEF, 0);
- yylval.node.sym = s;
- yylval.node.type = NODESYM;
-
- return s->type == UNDEF || s->type == CONST ? ID : s->type;
- }
-
- return r;
-}
-
-void
-usage(void)
-{
- fprint(2, "usage: %s\n", argv0);
- exits("usage");
-}
-
-void
-main(int argc, char *argv[])
-{
- GEOMfmtinstall();
- ARGBEGIN{
- default: usage();
- }ARGEND;
- if(argc > 0)
- usage();
-
- bin = Bfdopen(0, OREAD);
- if(bin == nil)
- sysfatal("Bfdopen: %r");
-
- lineno++;
- init();
-
- yyparse();
-// int n;
-// char *s, *name;
-// while((n = yylex())){
-// s = n == NUMBER? "NUMBER":
-// n == ID? "ID":
-// n == NODENUM? "NODENUM":
-// n == NODESYM? "NODESYM":
-// n == TYPE? "TYPE":
-// n == PRINT? "PRINT":
-// n == UNDEF? "UNDEF": nil;
-// name = n == ID? yylval.node.sym->name: "";
-// print("%d: %s%C%s\n", lineno, s?s:"", s?' ':n, name);
-// }
-
- Bterm(bin);
- exits(nil);
-}
--- a/sym.c
+++ b/sym.c
@@ -1,5 +1,6 @@
#include <u.h>
#include <libc.h>
+#include <bio.h>
#include <geometry.h>
#include "dat.h"
#include "fns.h"
@@ -7,15 +8,14 @@
static Symbol *symtab;
Symbol *
-install(char *s, int t, double v)
+declsym(char *s, int t, double v)
{
Symbol *sym;
sym = emalloc(sizeof(Symbol));
- memset(sym, 0, sizeof *sym);
sym->name = estrdup(s);
sym->type = t;
- sym->dconst = v;
+ sym->cval = v;
sym->var.type = -1;
sym->next = symtab;
symtab = sym;
@@ -23,7 +23,7 @@
}
Symbol *
-lookup(char *s)
+getsym(char *s)
{
Symbol *sym;
--
⑨