ref: 6cec07e20602ff1a19e3179c48ad11203c61c274
dir: /reader.ml/
module T = Types.Types
let token_re = Str.regexp "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"?\\|;.*\\$\\|[^][ \n{}('\"`,;)]*"
let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\""
let registered_macros = Env.make None
type reader =
{ form : Types.m9type
; tokens : string list
}
type list_reader =
{ list_form : Types.m9type list
; tokens : string list
}
let tokenize str =
List.map
(function
| Str.Delim x -> String.trim x (* move trim to regex for speed? *)
| Str.Text x -> "tokenize botch")
(List.filter
(function
| Str.Delim x -> true
| Str.Text x -> false)
(Str.full_split token_re str))
;;
let unescape_string token =
if Str.string_match string_re token 0
then (
let without_quotes = String.sub token 1 (String.length token - 2) in
Utils.gsub
(Str.regexp "\\\\.")
(function
| "\\n" -> "\n"
| x -> String.sub x 1 1)
without_quotes)
else (
output_string stderr "expected '\"', got EOF\n";
flush stderr;
raise End_of_file)
;;
let fix_pattern sym pattern =
let tokenized_pattern = tokenize pattern in
let new_pattern = ref [] in
let rec replace_token tokens =
match tokens with
| token :: [] -> let t = if token = "_" then (Printer.print sym true) else token in
new_pattern := !new_pattern @ [t];
!new_pattern
| token :: rest -> let t = if token = "_" then (Printer.print sym true) else token in
new_pattern := !new_pattern @ [t];
replace_token rest
| _ -> raise (Utils.Syntax_error "unable to fix pattern")
in
replace_token tokenized_pattern
;;
let read_atom token =
match token with
| "null" -> T.Nil
| "#t" | "#true" -> T.Bool true
| "#f" | "#false" -> T.Bool false
| _ ->
(match token.[0] with
| '0' .. '9' -> Types.number (float_of_string token)
| '#' ->
(match token.[1], token.[2] with
| '\\', '0' .. '9' | '\\', 'a' .. 'z' | '\\', 'A' .. 'Z' -> T.Char token.[2]
| _ -> Types.symbol token)
| '-' ->
(match String.length token with
| 1 -> Types.symbol token
| _ ->
(match token.[1] with
| '0' .. '9' -> Types.number (float_of_string token)
| _ -> Types.symbol token))
| '"' -> T.String (unescape_string token)
| _ -> Types.symbol token)
;;
let rec read_list eol list_reader =
if (List.length list_reader.tokens > 1) && (List.hd list_reader.tokens) = "("
then
(match
try Env.get registered_macros (Types.symbol (List.nth list_reader.tokens 1)) with
| _ -> T.Nil
with
| T.Macro { T.value = sym; meta } ->
print_endline("\nFOUND A MACRO! " ^ Printer.print sym true);
print_endline(" tokens: " ^ String.concat " " list_reader.tokens);
let rec collect_args tokens args =
match tokens with
| t :: [] -> args @ [t]
| t :: ts -> if t = eol then args else collect_args ts args @ [t]
| _ -> []
in
let args = collect_args (List.tl list_reader.tokens) [] in
print_endline(" ### " ^ String.concat " " args);
Macro.match_variant meta args
| _ -> ());
match list_reader.tokens with
| [] -> raise (Utils.Syntax_error ("unterminated '" ^ eol ^ "'"))
| token :: tokens ->
if Str.string_match (Str.regexp eol) token 0
then { list_form = list_reader.list_form; tokens }
else (
let reader = read_form list_reader.tokens in
read_list eol { list_form = list_reader.list_form @ [ reader.form ]; tokens = reader.tokens })
and read_quote sym tokens =
let reader = read_form tokens in
{ form = Types.list [ Types.symbol sym; reader.form ]; tokens = reader.tokens }
and read_vector all_tokens =
match all_tokens with
| [] -> raise End_of_file
| token :: tokens ->
(match token with
| "(" ->
let list_reader = read_list ")" { list_form = []; tokens } in
{ form = Types.vector list_reader.list_form; tokens = list_reader.tokens }
| _ -> read_form tokens)
and read_macro tokens =
let list_reader = read_list ")" { list_form = []; tokens } in
print_endline ("MACRO: " ^ Printer.dump list_reader.list_form);
(match list_reader.list_form with
| sym :: rest ->
print_endline (" sym: " ^ Printer.print sym true);
print_endline (" rest: " ^ Printer.dump rest);
(match rest with
| [ T.List { T.value = T.Symbol { T.value = "syntax-rules" } :: literals :: clauses } ] ->
let variants = Macro.generate_variants sym literals clauses in
print_endline (" variants: " ^ (Printer.print (Types.map variants) true));
let macro_entry = Types.macro sym literals (Types.list clauses) variants in
Env.set registered_macros sym macro_entry;
Types.M9map.iter (fun k v ->
print_endline (" >>> " ^ String.concat " " (fix_pattern k (Printer.print v true)));
print_endline (" >> " ^ Printer.print k true ^ ": " ^ Printer.print v true))
variants;
print_endline(" >>>>>> MACRO: " ^ Printer.print macro_entry true)
| _ -> raise (Utils.Syntax_error "read_macro botch"))
| _ as x -> print_endline (" rest: " ^ Printer.dump x));
{ form = Types.list list_reader.list_form; tokens = list_reader.tokens }
and read_form all_tokens =
(* print_endline ("READ_FORM: " ^ String.concat " " all_tokens); *)
match all_tokens with
| [] -> raise End_of_file
| token :: tokens ->
(match token with
| "'" -> read_quote "quote" tokens
| "`" -> read_quote "quasiquote" tokens
| "#" -> read_vector tokens
| "#|" ->
let list_reader = read_list "|#" { list_form = []; tokens } in
print_endline ("block comment: " ^ Printer.dump list_reader.list_form);
{ form = T.Unspecified; tokens = list_reader.tokens }
| "(" ->
let list_reader = read_list ")" { list_form = []; tokens } in
{ form = Types.list list_reader.list_form; tokens = list_reader.tokens }
| "" | "\t" | "\n" -> read_form tokens
| "define-syntax" -> read_macro tokens
| _ ->
if token.[0] = ';'
then (
let list_reader = read_list "\\n" { list_form = []; tokens } in
print_endline ("line comment: " ^ String.concat " " list_reader.tokens);
{ form = T.Unspecified; tokens = list_reader.tokens })
else { form = read_atom token; tokens })
;;
let slurp filename =
let chan = open_in filename in
let b = Buffer.create 27 in
Buffer.add_channel b chan (in_channel_length chan);
close_in chan;
Buffer.contents b
;;
let read str = (read_form (tokenize str)).form