shithub: semblance

ref: de92a40b756cd06c31c70e77277ca270e8dae4d1
dir: /lexer.c/

View raw version
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <geometry.h>
#include "dat.h"
#include "fns.h"

static Token Teof = {TEOF};
static Token Terr = {-1};

static Token
scan(Lexer *l)
{
	Token tok;
	char buf[256], *p;
	Rune r;
	int base;
begin:
	memset(&tok, 0, sizeof(Token));

	do{
		r = Bgetrune(l->in);
		if(r == '\n')
			l->ln.line++;
	}while(isspacerune(r));

	if(r == Beof)
		return Teof;

	if(r == '/'){
		switch(Bgetrune(l->in)){
		case '/':
			do
				r = Bgetrune(l->in);
			while(r != Beof && r != '\n');
			Bungetrune(l->in);
			goto begin;
		case '*':
comment:
			do{
				r = Bgetrune(l->in);
				if(r == Beof)
					return Teof;
				else if(r == '\n')
					l->ln.line++;
			}while(r != '*');
			while(r == '*'){
				r = Bgetrune(l->in);
				if(r == Beof)
					return Teof;
				else if(r == '\n')
					l->ln.line++;
				else if(r == '/')
					goto begin;
			}
			goto comment;
		}
		Bungetrune(l->in);
	}

	if(isdigitrune(r)){
		if(r == '0'){
			r = Bgetc(l->in);
			if(r != 'b' && r != 'o' && r != 'x'){
				Bungetrune(l->in);
				goto decimal;
			}

			switch(r){
			case 'b': base =  2; break;
			case 'o': base =  8; break;
			case 'x': base = 16; break;
			default:  base = 10; break;	/* can't happen but calms the compiler */
			}

			p = buf;
			while((r = Bgetrune(l->in)) != Beof && isdigitrune(r)){
				if(p+runelen(r) >= buf + sizeof(buf)){
					werrstr("number is too long");
					return Terr;
				}
				p += runetochar(p, &r);
			}
			Bungetrune(l->in);
			*p = 0;

			tok.v = strtoll(buf, nil, base);
		}else{
decimal:
			Bungetrune(l->in);
			Bgetd(l->in, &tok.v);
		}
		tok.type = TNUM;
	}else if(isalpharune(r) || r == '_'){
		p = buf;
		do{
			if(p+runelen(r) >= buf + sizeof(buf)){
				werrstr("lexeme is too long");
				return Terr;
			}
			p += runetochar(p, &r);
		}while((r = Bgetrune(l->in)) != Beof &&
			(isalpharune(r) || isdigitrune(r) || r == '_'));
		Bungetrune(l->in);
		*p = 0;

		if((tok.type = lookupkw(buf)) < 0){
			tok.s = estrdup(buf);
			tok.type = TID;
		}
	}else if(r == '"'){
		p = buf;
		while((r = Bgetrune(l->in)) != Beof && r != '"' && r != '\n'){
			if(p+runelen(r) >= buf + sizeof(buf)){
				werrstr("string is too long");
				return Terr;
			}
			p += runetochar(p, &r);
		}
		if(r != '"'){
			werrstr("unterminated string");
			return Terr;
		}
		*p = 0;

		tok.s = estrdup(buf);
		tok.type = TSTR;
	}else if(opstart(r)){
		p = buf;
		p += runetochar(p, &r);
		do{
			r = Bgetrune(l->in);
			if(p+runelen(r) >= buf + sizeof(buf)){
				werrstr("op token is too long");
				return Terr;
			}
			p += runetochar(p, &r);
			*p = 0;
		}while(findop(buf) >= 0);
		Bungetrune(l->in);
		*--p = 0;

		tok.type = findop(buf);
	}else
		tok.type = r;

	return tok;
}

int
lex(Lexer *l)
{
	if(l->peektok.type > 0){
		l->tok = l->peektok;
		memset(&l->peektok, 0, sizeof(Token));
	}else
		l->tok = scan(l);
	return l->tok.type;
}

int
peek(Lexer *l)
{
	if(l->peektok.type <= 0)
		l->peektok = scan(l);
	return l->peektok.type;
}