ref: 689862826175d1b783f98018c1484c78396a33aa
dir: /reader.ml/
module T = Types.Types
let token_re = Str.regexp "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"?\\|;.*\\$\\|[^][ \n{}('\"`,;)]*"
let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\""
let registered_macros = Env.make None
type reader =
{ form : Types.m9type
; tokens : string list
}
type list_reader =
{ list_form : Types.m9type list
; tokens : string list
}
let tokenize str =
List.map
(function
| Str.Delim x -> String.trim x (* move trim to regex for speed? *)
| Str.Text x -> "tokenize botch")
(List.filter
(function
| Str.Delim x -> true
| Str.Text x -> false)
(Str.full_split token_re str))
;;
let unescape_string token =
if Str.string_match string_re token 0
then (
let without_quotes = String.sub token 1 (String.length token - 2) in
Utils.gsub
(Str.regexp "\\\\.")
(function
| "\\n" -> "\n"
| x -> String.sub x 1 1)
without_quotes)
else (
output_string stderr "expected '\"', got EOF\n";
flush stderr;
raise End_of_file)
;;
let read_atom token =
match token with
| "null" -> T.Nil
| "#t" | "#true" -> T.Bool true
| "#f" | "#false" -> T.Bool false
| _ ->
(match token.[0] with
| '0' .. '9' -> Types.number (float_of_string token)
| '#' ->
(match token.[1], token.[2] with
| '\\', '0' .. '9' | '\\', 'a' .. 'z' | '\\', 'A' .. 'Z' -> T.Char token.[2]
| _ -> Types.symbol token)
| '-' ->
(match String.length token with
| 1 -> Types.symbol token
| _ ->
(match token.[1] with
| '0' .. '9' -> Types.number (float_of_string token)
| _ -> Types.symbol token))
| '"' -> T.String (unescape_string token)
| _ -> Types.symbol token)
;;
let rec read_list eol list_reader =
match list_reader.tokens with
| [] -> raise (Utils.Syntax_error ("unterminated '" ^ eol ^ "'"))
| token :: tokens ->
if Str.string_match (Str.regexp eol) token 0
then { list_form = list_reader.list_form; tokens }
else (
let reader = read_form list_reader.tokens in
read_list eol { list_form = list_reader.list_form @ [ reader.form ]; tokens = reader.tokens })
and read_quote sym tokens =
let reader = read_form tokens in
{ form = Types.list [ Types.symbol sym; reader.form ]; tokens = reader.tokens }
and read_vector all_tokens =
match all_tokens with
| [] -> raise End_of_file
| token :: tokens ->
(match token with
| "(" ->
let list_reader = read_list ")" { list_form = []; tokens } in
{ form = Types.vector list_reader.list_form; tokens = list_reader.tokens }
| _ -> read_form tokens)
and read_macro tokens =
let list_reader = read_list ")" { list_form = []; tokens } in
print_endline ("MACRO: " ^ Printer.dump list_reader.list_form);
(match list_reader.list_form with
(* | sym :: T.List { T.value = [ T.Symbol { T.value = "syntax-rules" }; clause ] } -> *)
| sym :: rest ->
print_endline (" sym: " ^ Printer.print sym true);
print_endline (" rest: " ^ Printer.dump rest);
(match rest with
| [ T.List { T.value = T.Symbol { T.value = "syntax-rules" } :: literals :: clauses } ] ->
let symbol = Printer.print sym true in
print_endline (" clauses: " ^ Printer.dump clauses);
let macro_entry = Types.macro symbol literals (Types.list clauses) in
Macro.register_macro
macro_entry
symbol
literals
(List.map (fun x -> Printer.print x true) clauses)
registered_macros
| _ -> raise (Utils.Syntax_error "read_macro botch"))
| _ as x -> print_endline (" rest: " ^ Printer.dump x));
(* | T.List { T.value = [ T.Symbol { T.value = "define-syntax" }; keyword; T.List { T.value = macro } ] } ->
* (match macro with
* | _ :: literals :: groups ->
* let macro_entry = Types.macro (Printer.print keyword true) literals (Types.list groups) in
* Env.set env keyword macro_entry;
* macro_entry) *)
{ form = Types.list list_reader.list_form; tokens = list_reader.tokens }
and read_form all_tokens =
(* print_endline ("READ_FORM: " ^ String.concat " " all_tokens); *)
match all_tokens with
| [] -> raise End_of_file
| token :: tokens ->
(match token with
| "'" -> read_quote "quote" tokens
| "`" -> read_quote "quasiquote" tokens
| "#" -> read_vector tokens
| "#|" ->
let list_reader = read_list "|#" { list_form = []; tokens } in
print_endline ("block comment: " ^ Printer.dump list_reader.list_form);
{ form = T.Unspecified; tokens = list_reader.tokens }
| "(" ->
let list_reader = read_list ")" { list_form = []; tokens } in
{ form = Types.list list_reader.list_form; tokens = list_reader.tokens }
| "" | "\t" | "\n" -> read_form tokens
| "define-syntax" -> read_macro tokens
| _ ->
if token.[0] = ';'
then (
let list_reader = read_list "\\n" { list_form = []; tokens } in
print_endline ("line comment: " ^ String.concat " " list_reader.tokens);
{ form = T.Unspecified; tokens = list_reader.tokens })
else { form = read_atom token; tokens })
;;
let slurp filename =
let chan = open_in filename in
let b = Buffer.create 27 in
Buffer.add_channel b chan (in_channel_length chan);
close_in chan;
Buffer.contents b
;;
let read str = (read_form (tokenize str)).form