ref: dc596842d658ab664a025d4e98c89b50cac465c4
dir: /reader.ml/
module T = Types.Types
let token_re = Str.regexp "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"?\\|;.*\\$\\|[^][ \n{}('\"`,;)]*"
let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\""
let registered_macros = Env.make None
type reader =
{ form : Types.m9type
; tokens : string list
}
type list_reader =
{ list_form : Types.m9type list
; tokens : string list
}
let tokenize str =
List.map
(function
| Str.Delim x -> String.trim x (* move trim to regex for speed? *)
| Str.Text x -> "tokenize botch")
(List.filter
(function
| Str.Delim x -> true
| Str.Text x -> false)
(Str.full_split token_re str))
;;
let unescape_string token =
if Str.string_match string_re token 0
then (
let without_quotes = String.sub token 1 (String.length token - 2) in
Utils.gsub
(Str.regexp "\\\\.")
(function
| "\\n" -> "\n"
| x -> String.sub x 1 1)
without_quotes)
else raise (Utils.Syntax_error "unterminated string")
;;
let fix_pattern sym pattern =
let tokenized_pattern = tokenize pattern in
let new_pattern = ref [] in
let rec replace_token tokens =
match tokens with
| [ token ] ->
let t = if token = "_" then Printer.print sym true else token in
new_pattern := !new_pattern @ [ t ];
!new_pattern
| token :: rest ->
let t = if token = "_" then Printer.print sym true else token in
new_pattern := !new_pattern @ [ t ];
replace_token rest
| _ -> raise (Utils.Syntax_error "unable to fix pattern")
in
replace_token (List.hd tokenized_pattern :: "define" :: List.tl tokenized_pattern)
;;
let read_atom token =
match token with
| "null" -> T.Nil
| "#t" | "#true" -> T.Bool true
| "#f" | "#false" -> T.Bool false
| _ ->
(match token.[0] with
| '0' .. '9' -> Types.number (float_of_string token)
| '#' ->
(match token.[1], token.[2] with
| '\\', '0' .. '9' | '\\', 'a' .. 'z' | '\\', 'A' .. 'Z' -> T.Char token.[2]
| _ -> Types.symbol token)
| '-' ->
(match String.length token with
| 1 -> Types.symbol token
| _ ->
(match token.[1] with
| '0' .. '9' -> Types.number (float_of_string token)
| _ -> Types.symbol token))
| '"' -> T.String (unescape_string token)
| _ -> Types.symbol token)
;;
let rec read_list eol list_reader =
if List.length list_reader.tokens > 1 && List.hd list_reader.tokens = "("
then (
match
try Env.get registered_macros (Types.symbol (List.nth list_reader.tokens 1)) with
| _ -> T.Nil
with
| T.List { T.value = xs; T.meta } ->
print_endline "XXXX MACRO FOUND";
let rec collect_args tokens args =
match tokens with
| [ t ] -> args @ [ t ]
| t :: ts -> if t = eol then args else collect_args ts args @ [ t ]
| _ -> []
in
let args = collect_args (List.tl list_reader.tokens) [] in
print_endline ("<><><> args: " ^ String.concat " " args);
print_endline ("<><><><>: " ^ Macro.match_variant meta args)
| _ -> ());
match list_reader.tokens with
| [] ->
print_endline ("ERROR: " ^ Printer.dump list_reader.list_form);
raise (Utils.Syntax_error ("unterminated '" ^ eol ^ "'"))
| [ token ] -> { list_form = list_reader.list_form; tokens = [ ")" ] }
| token :: tokens ->
if Str.string_match (Str.regexp eol) token 0
then { list_form = list_reader.list_form; tokens }
else (
let reader = read_form list_reader.tokens in
read_list eol { list_form = list_reader.list_form @ [ reader.form ]; tokens = reader.tokens })
and read_quote sym tokens =
let reader = read_form tokens in
{ form = Types.list [ Types.symbol sym; reader.form ]; tokens = reader.tokens }
and read_vector all_tokens =
match all_tokens with
| [] -> raise End_of_file
| token :: tokens ->
(match token with
| "(" ->
let list_reader = read_list ")" { list_form = []; tokens } in
{ form = Types.vector list_reader.list_form; tokens = list_reader.tokens }
| _ -> read_form tokens)
and read_macro tokens =
let macro = ref [] in
let list_reader = read_list ")" { list_form = []; tokens } in
print_endline ("MACRO: " ^ Printer.dump list_reader.list_form);
(match list_reader.list_form with
| sym :: rest ->
print_endline (" sym: " ^ Printer.print sym true);
print_endline (" rest: " ^ Printer.dump rest);
(match rest with
| [ T.List { T.value = T.Symbol { T.value = "syntax-rules" } :: literals :: clauses } ] ->
let variants = Macro.generate_variants sym literals clauses in
let macro_entry = Types.macro sym literals (Types.list clauses) variants in
Env.set registered_macros sym macro_entry;
Types.M9map.iter
(fun k v ->
print_endline
(" >>> " ^ Printer.print k true ^ ": " ^ String.concat " " (fix_pattern k (Printer.print v true)));
macro := !macro @ fix_pattern k (Printer.print v true);
Env.set registered_macros k (read_form (fix_pattern k (Printer.print v true))).form)
variants
| _ -> raise (Utils.Syntax_error "read_macro botch"))
| _ as x -> print_endline (" last rest: " ^ Printer.dump x));
read_form !macro
and read_form all_tokens =
(* print_endline ("READ_FORM: " ^ String.concat " " all_tokens); *)
match all_tokens with
| [] -> raise End_of_file
| token :: tokens ->
(match token with
| "'" -> read_quote "quote" tokens
| "`" -> read_quote "quasiquote" tokens
| "#" -> read_vector tokens
| "#|" ->
let list_reader = read_list "|#" { list_form = []; tokens } in
print_endline ("block comment: " ^ Printer.dump list_reader.list_form);
{ form = T.Unspecified; tokens = list_reader.tokens }
| "(" ->
let list_reader = read_list ")" { list_form = []; tokens } in
{ form = Types.list list_reader.list_form; tokens = list_reader.tokens }
| "" | "\t" | "\n" -> read_form tokens
| "define-syntax" -> read_macro tokens
| _ ->
if token.[0] = ';'
then (
let list_reader = read_list "\\n" { list_form = []; tokens } in
print_endline ("line comment: " ^ String.concat " " list_reader.tokens);
{ form = T.Unspecified; tokens = list_reader.tokens })
else { form = read_atom token; tokens })
;;
let slurp filename =
let chan = open_in filename in
let b = Buffer.create 27 in
Buffer.add_channel b chan (in_channel_length chan);
close_in chan;
Buffer.contents b
;;
let read str = (read_form (tokenize str)).form