starmelon/src/transpile.rs

505 lines
20 KiB
Rust
Raw Normal View History

use crate::elm;
use crate::reporting::{CompilerError, Problem, TypeError};
use genco::lang::rust;
use genco::tokens::quoted;
use genco::{quote, quote_in};
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
use tracing::info_span;
pub fn transpile(
file: PathBuf,
debug: bool,
function: String,
//output: Option<PathBuf>,
verbosity: u64,
) -> Result<(), Problem> {
// Our first elm make call is where we build the users program. There is a pretty good chance
// this won't work.
elm::make(&file, debug, verbosity)?;
// step 2 find the elm artifacts cache directory just like with exec
let elm_project_dir =
elm::find_project_root("elm.json", "./").map_err(CompilerError::MissingElmJson)?;
let elm_cache_dir = elm_project_dir.join("elm-stuff").join("0.19.1");
if !elm_cache_dir.is_dir() {
return Err(CompilerError::MissingElmStuff(elm_cache_dir).into());
}
let data = std::fs::read(&file)
.map_err(|io_err| CompilerError::ReadInputFailed(io_err, file.clone()))?;
let elmi::Name(target_module) = elm::parse_module_name(&data)?;
// Side note: the transpile function really should be taking a list of functions in modules and
// transpiling the entire forest of dependencies. This would allow avoiding kernel javascript
// used in the project. So I would want to reduce the tuples of file function into a set of
// files to load. Then recursively load all dependencies.
let entrypoint = elmi::Global(
elmi::ModuleNameCanonical {
package: elmi::PackageName::new("author", "project"),
module: elmi::Name(target_module.clone()),
},
elmi::Name(function.clone()),
);
// step 3 find all the filepaths in the elm-stuff/0.19.1/* folder
let interfaces = elm::load_interfaces(&elm_cache_dir)?;
// Step 4, check for the desired functions have types that we can compile.
let span = info_span!("resolved target function");
let timing_guard = span.enter();
let signature = match interfaces.get(&entrypoint.0) {
Some(interface) => match interface.values.get(&elmi::Name::from(&function)) {
Some(annotation) => {
let elmi::CannonicalAnnotation(_free_vars, ref tipe) = annotation;
validate_function_type(tipe)?;
annotation
}
None => return Err(CompilerError::BadImport(entrypoint).into()),
},
None => return Err(CompilerError::MissingModuleTypeInformation(entrypoint.0).into()),
};
drop(timing_guard);
// all the symbols in author/project will be found in the elm_cache_dir, while the rest will be
// found in the elm_home_dir
// Step 5, recursively load all the symbols from the ~/.elm stuff artifacts.dat
println!("ok the function was acceptable, ready to build symbol table");
let objects = elm::load_objects(&elm_cache_dir)?;
if let Some(node) = objects.get(&entrypoint) {
match node {
elmi::Node::Define(elmi::Expr::Function(ref parameters, ref body), deps) => {
for dep in deps {
println!("I depend on {}", dep);
}
let elmi::CannonicalAnnotation(elmi::FreeVars(free_variables), tipe) = signature;
let (parameter_types, return_type) =
extract_function_types(&tipe, parameters.len()).unwrap();
let xs = parameters
.into_iter()
.zip(parameter_types.into_iter())
.collect::<Vec<_>>();
// TODO add any TLambdas in the signature to the type parameters of the functions
// as where bounds
let mut tokens = rust::Tokens::new();
codegen_function(
&mut tokens,
&entrypoint.1,
&free_variables,
&xs,
return_type,
body,
);
println!("{}", tokens.to_file_string().unwrap());
}
_ => println!("I don't know how to transpile that node"),
}
}
println!("total symbols {}", objects.len());
//let visited: HashSet<Global> = HashSet::new();
for (key, node) in objects.iter() {
//println!("key {}", key);
match node {
elmi::Node::Define(expr, deps) => {
//println!("key => {:?}", expr);
for dep in deps.iter() {
if !objects.contains_key(&dep) {
println!("could not find dep {}", dep);
}
}
break;
}
elmi::Node::DefineTailFunc(_, _, deps) => {
for dep in deps.iter() {
if !objects.contains_key(&dep) {
println!("could not find dep {}", dep);
}
}
}
elmi::Node::Ctor(_, _) => {}
elmi::Node::Enum(_) => {}
elmi::Node::Box => {}
elmi::Node::Link(dep) => {
if !objects.contains_key(&dep) {
println!("could not find dep {}", dep);
}
}
elmi::Node::Cycle(_, _, _, deps) => {
for dep in deps.iter() {
if !objects.contains_key(&dep) {
println!("could not find dep {}", dep);
}
}
}
elmi::Node::Manager(_) => (),
elmi::Node::Kernel(_, deps) => {
for dep in deps.iter() {
if !objects.contains_key(&dep) {
println!("could not find dep {}", dep);
}
}
}
elmi::Node::PortIncoming(_, deps) => {
for dep in deps.iter() {
if !objects.contains_key(&dep) {
println!("could not find dep {}", dep);
}
}
}
elmi::Node::PortOutgoing(_, deps) => {
for dep in deps.iter() {
if !objects.contains_key(&dep) {
println!("could not find dep {}", dep);
}
}
}
}
}
let _symbol_table: HashMap<elmi::Global, elmi::Node> = HashMap::new();
// step 6, start generating rust code using a tree visitor on each of the entry points.
// Accumulate the contents of each rust module in map.
//
// step 6a decide on the name mangling rules for elm indentifiers to rust identifiers. I can't
// use the $ in identifiers. Maybe I could use Z. Not sure why I can't use underscore.
// author_project__module_name__identifier?
//
// step 6b figure out how to handle partial apply, one approach requires me to track the arity
// of each function call and generate a custom closure for each partial apply.
// step 7 write out each of the generated rust modulues.
Ok(())
}
fn validate_function_type(tipe: &elmi::Type) -> Result<(), TypeError> {
match tipe {
elmi::Type::TLambda(a, b) => {
// We want to check the output types first because this is where we will figure out if
// there is more than one argument. I only want to accept functions with a single
// argument to keep the complexity down while I figure out how to map over to rust. I
// am specifically worried about implementing partial application in a performant way.
// Nested partial application worries me.
validate_output_type(&**b)?;
validate_input_type(&**a)?;
Ok(())
}
elmi::Type::TVar(_) => Err(TypeError::CantEvalGeneric),
elmi::Type::TType(module_name, name, args) if args.is_empty() => {
// If our function returns a primitive type
if module_name == "elm/core/String" && name == "String" {
return Ok(());
}
if module_name == "elm/bytes/Bytes" && name == "Bytes" {
return Ok(());
}
Err(TypeError::CantEvalType(tipe.clone()))
}
elmi::Type::TType(_, _, _) => Err(TypeError::CantEvalCustomType),
elmi::Type::TRecord(_, _) => Err(TypeError::CantEvalRecord),
elmi::Type::TUnit => Err(TypeError::CantEvalUnit),
elmi::Type::TTuple(_, _, _) => Err(TypeError::CantEvalTuple),
elmi::Type::TAlias(_, _, _, ref alias) => {
match &**alias {
elmi::AliasType::Filled(tipe) => {
// I think the recursion is limited to a single step. I have not tested what
// the CannonicalAnnotation would look like for a doubly indirect alias, for
// example for `view` below
// ```elm
// type alias Foo = Int
// type alias Bar = String
//
// type alias Zap = Foo -> Bar
//
// view : Zap
// ```
validate_function_type(tipe)
}
elmi::AliasType::Holey(_) => return Err(TypeError::CantEvalHoleyAlias),
}
}
}
}
fn validate_input_type(tipe: &elmi::Type) -> Result<(), TypeError> {
match tipe {
elmi::Type::TLambda(_, _) => Err(TypeError::EvalRequiresSingleArgument(tipe.clone())),
elmi::Type::TType(module_name, name, args) if args.is_empty() => {
if module_name == "elm/core/String" && name == "String" {
Ok(())
} else if module_name == "elm/bytes/Bytes" && name == "Bytes" {
Ok(())
} else if module_name == "elm/json/Json.Encode" && name == "Value" {
Ok(())
} else {
Err(TypeError::InputTypeNotSupported(tipe.clone()))
}
}
elmi::Type::TAlias(_, _, _, ref alias) => match &**alias {
elmi::AliasType::Filled(tipe) => validate_input_type(tipe),
elmi::AliasType::Holey(_) => Err(TypeError::CantEvalHoleyAlias),
},
elmi::Type::TRecord(_, _) => Ok(()),
_ => Err(TypeError::OutputTypeNotSupported(tipe.clone())),
}
}
fn validate_output_type(tipe: &elmi::Type) -> Result<(), TypeError> {
match tipe {
elmi::Type::TType(module_name, name, _args) => {
if module_name == "elm/core/String" && name == "String" {
Ok(())
} else if module_name == "elm/bytes/Bytes" && name == "Bytes" {
Ok(())
} else if module_name == "elm/json/Json.Encode" && name == "Value" {
Ok(())
} else if module_name == "elm/virtual-dom/VirtualDom" && name == "Node" {
Ok(())
} else {
Err(TypeError::OutputTypeNotSupported(tipe.clone()))
}
}
elmi::Type::TAlias(_, _, _, ref alias) => match &**alias {
elmi::AliasType::Filled(tipe) => validate_output_type(tipe),
elmi::AliasType::Holey(_) => Err(TypeError::CantEvalHoleyAlias),
},
_ => Err(TypeError::OutputTypeNotSupported(tipe.clone())),
}
}
fn codegen_function(
tokens: &mut rust::Tokens,
name: &elmi::Name,
type_variables: &HashSet<elmi::Name>,
parameters: &[(&elmi::Name, elmi::Type)],
return_type: elmi::Type,
body: &elmi::Expr,
) {
quote_in! { *tokens =>
fn #(&name.0)#(if !type_variables.is_empty() =>
<#(for elmi::Name(ref tvar) in type_variables.iter() join (, ) =>
#tvar
)>
)(#(for (elmi::Name(ref parameter), tipe) in parameters.iter() join (, ) =>
#parameter: #(ref out { codegen_type(out, tipe) })
)) -> #(ref out { codegen_type(out, &return_type) }) {
#(ref out { codegen_expr(out, body) })
}
}
}
fn codegen_type(tokens: &mut rust::Tokens, tipe: &elmi::Type) {
quote_in! { *tokens =>
#(match tipe {
elmi::Type::TLambda(a, b) => {
( #(ref out => codegen_type(out, a) ) -> #(ref out => codegen_type(out, b) ) )
}
elmi::Type::TVar(elmi::Name(variable)) => {
#variable
},
elmi::Type::TType(module_name, name, args) if module_name == "elm/core/String" && name == "String" && args.is_empty() => {
String
}
elmi::Type::TType(home, name, args) if args.is_empty() => {
#(ref out => codegen_name_from_global(out, home, name))
}
elmi::Type::TType(home, name, args) => {
#(ref out => codegen_name_from_global(out, home, name))<#(for arg in args join(, ) =>
#(ref out => codegen_type(out, arg))
)>
}
// // Might be a primitive type
// #(if module_name == "elm/core/String" && name == "String" => String)
// #(if module_name == "elm/core/Basics" && name == "Int" => i64)
// #(if module_name == "elm/core/Basics" && name == "Float" => f64)
// #(if module_name == "elm/core/Basics" && name == "Bool" => bool)
// #(if module_name == "elm/core/Maybe" && name == "Maybe" => Option<i32>)
// #(if module_name == "elm/bytes/Bytes" && name == "Bytes" => Vec<u8>)
//}
//elmi::Type::TType(_, _, _) => Err(TypeError::CantEvalCustomType),
//elmi::Type::TRecord(_, _) => Err(TypeError::CantEvalRecord),
elmi::Type::TUnit => (),
_ => {
println!("failed to solve code {:?}", tipe);
todo_tipe
},
//elmi::Type::TTuple(_, _, _) => Err(TypeError::CantEvalTuple),
//elmi::Type::TAlias(_, _, _, ref alias) => {
// match &**alias {
// elmi::AliasType::Filled(tipe) => {
// // I think the recursion is limited to a single step. I have not tested what
// // the CannonicalAnnotation would look like for a doubly indirect alias, for
// // example for `view` below
// // ```elm
// // type alias Foo = Int
// // type alias Bar = String
// //
// // type alias Zap = Foo -> Bar
// //
// // view : Zap
// // ```
// validate_function_type(tipe)
// }
// elmi::AliasType::Holey(_) => return Err(TypeError::CantEvalHoleyAlias),
// }
//}
})
};
}
fn codegen_name_from_global(
tokens: &mut rust::Tokens,
home: &elmi::ModuleNameCanonical,
name: &elmi::Name,
) {
quote_in! { *tokens =>
#(ref out => codegen_home_to_builder(out, home) )__#(&name.0)
}
}
fn codegen_home_to_builder(tokens: &mut rust::Tokens, global: &elmi::ModuleNameCanonical) {
let elmi::ModuleNameCanonical {
package: elmi::PackageName { author, project },
module: home,
} = global;
quote_in! { *tokens =>
_#(author.replace("-", "_"))_#(project.replace("-", "_"))__#(home.0.replace(".", "_"))
}
}
fn codegen_expr(tokens: &mut rust::Tokens, expr: &elmi::Expr) {
match expr {
elmi::Expr::Bool(true) => quote_in! { *tokens => true },
elmi::Expr::Bool(false) => quote_in! { *tokens => false },
elmi::Expr::Chr(c) => quote_in! { *tokens => #("'")#c#("'") },
elmi::Expr::Str(s) => quote_in! { *tokens => #(quoted(s)) },
elmi::Expr::Int(x) => quote_in! { *tokens => #(x.to_string()) },
elmi::Expr::Float(x) => quote_in! { *tokens => #(x.to_string()) },
elmi::Expr::VarLocal(elmi::Name(ref name)) => {
quote_in! { *tokens =>
#name
}
}
elmi::Expr::VarGlobal(elmi::Global(home, name)) => {
quote_in! { *tokens =>
#(ref out => codegen_name_from_global(out, home, name))
}
}
//elmi::Expr::VarEnum(Global, IndexZeroBased),
//elmi::Expr::VarBox(Global),
//elmi::Expr::VarCycle(ModuleNameCanonical, Name),
//elmi::Expr::VarDebug(Name, ModuleNameCanonical, AnnotationRegion, Option<Name>),
//elmi::Expr::VarKernel(Name, Name),
elmi::Expr::List(xs) => {
if xs.is_empty() {
quote_in! { *tokens => &[] }
} else {
quote_in! { *tokens =>
&[
#(for x in xs join (,#<push>) => #(ref out => codegen_expr(out, x) ) )
]
}
}
}
elmi::Expr::Function(parameters, body) => {
quote_in! { *tokens =>
"i don't know how to code gen a function expression"
//#(for elmi::Name(ref parameter) in parameters.iter() join (, ) =>
//)
}
}
elmi::Expr::Call(ref fexpr, args) => {
quote_in! { *tokens =>
#(match &**fexpr {
elmi::Expr::VarGlobal(elmi::Global(home, name)) => {
#(ref out => codegen_name_from_global(out, home, name))
}
_ => {
"unknown"
}
})(
#(for arg in args join (,#<push>) => #(ref out =>
codegen_expr(out, arg) )
)
)
}
}
//elmi::Expr::TailCall(Name, Vec<(Name, Expr)>),
//elmi::Expr::If(Vec<(Expr, Expr)>, Box<Expr>),
//elmi::Expr::Let(Def, Box<Expr>),
//elmi::Expr::Destruct(Destructor, Box<Expr>),
//elmi::Expr::Case(Name, Name, Decider<Choice>, Vec<(i64, Expr)>),
//elmi::Expr::Accessor(Name),
//elmi::Expr::Access(Box<Expr>, Name),
//elmi::Expr::Update(Box<Expr>, HashMap<Name, Expr>),
//elmi::Expr::Record(HashMap<Name, Expr>),
elmi::Expr::Unit => (),
elmi::Expr::Tuple(a, b, None) => {
quote_in! { *tokens =>
( #(ref out => codegen_expr(out, a) ), #(ref out => codegen_expr(out, b) ) )
}
}
elmi::Expr::Tuple(a, b, Some(c)) => {
quote_in! { *tokens =>
( #(ref out => codegen_expr(out, a) ), #(ref out => codegen_expr(out, b) ), #(ref out => codegen_expr(out, c) ) )
}
}
//elmi::Expr::Shader(ShaderSource, HashSet<Name>, HashSet<Name>),
_ => quote_in! { *tokens => #(format!("{:?}", expr)) },
}
}
fn extract_function_types(
mut tipe: &elmi::Type,
mut nargs: usize,
) -> Option<(Vec<elmi::Type>, elmi::Type)> {
let mut parameters = Vec::with_capacity(nargs);
loop {
if nargs == 0 {
return Some((parameters, tipe.clone()));
}
match tipe {
elmi::Type::TLambda(a, b) => {
parameters.push(reduce_alias_types(&*a).clone());
tipe = reduce_alias_types(&*b);
nargs -= 1;
}
_ => return None,
}
}
}
fn reduce_alias_types(a: &elmi::Type) -> &elmi::Type {
match a {
elmi::Type::TAlias(_, _, _, ref alias) => match &**alias {
elmi::AliasType::Filled(b) => &b,
elmi::AliasType::Holey(_) => a,
},
_ => a,
}
}
// Figure out how to do structural types. If I could name mangle all the functions I could write
// them out in the same namespace as lambdas which would avoid the structural typing problem if the
// lambda was used by one type. Monomorphism. But if the lambda is used by multiple types then I
// would either need to narrow the type into a tuple at the call site, generate a specialize struct
// and borrow all the children.
//
//
// The question is should I use Rc with immutable datastructures? Or should I try to statically
// analyse copies and use mutable state when possible.