use crate::elm; use crate::reporting::{CompilerError, Problem, TypeError}; use genco::lang::rust; use genco::tokens::quoted; use genco::{quote, quote_in}; use std::collections::{HashMap, HashSet}; use std::path::PathBuf; use tracing::info_span; pub fn transpile( file: PathBuf, debug: bool, function: String, //output: Option, verbosity: u64, ) -> Result<(), Problem> { // Our first elm make call is where we build the users program. There is a pretty good chance // this won't work. elm::make(&file, debug, verbosity)?; // step 2 find the elm artifacts cache directory just like with exec let elm_project_dir = elm::find_project_root("elm.json", "./").map_err(CompilerError::MissingElmJson)?; let elm_cache_dir = elm_project_dir.join("elm-stuff").join("0.19.1"); if !elm_cache_dir.is_dir() { return Err(CompilerError::MissingElmStuff(elm_cache_dir).into()); } let data = std::fs::read(&file) .map_err(|io_err| CompilerError::ReadInputFailed(io_err, file.clone()))?; let elmi::Name(target_module) = elm::parse_module_name(&data)?; // Side note: the transpile function really should be taking a list of functions in modules and // transpiling the entire forest of dependencies. This would allow avoiding kernel javascript // used in the project. So I would want to reduce the tuples of file function into a set of // files to load. Then recursively load all dependencies. let entrypoint = elmi::Global( elmi::ModuleNameCanonical { package: elmi::PackageName::new("author", "project"), module: elmi::Name(target_module.clone()), }, elmi::Name(function.clone()), ); // step 3 find all the filepaths in the elm-stuff/0.19.1/* folder let interfaces = elm::load_interfaces(&elm_cache_dir)?; // Step 4, check for the desired functions have types that we can compile. let span = info_span!("resolved target function"); let timing_guard = span.enter(); let signature = match interfaces.get(&entrypoint.0) { Some(interface) => match interface.values.get(&elmi::Name::from(&function)) { Some(annotation) => { let elmi::CannonicalAnnotation(_free_vars, ref tipe) = annotation; validate_function_type(tipe)?; annotation } None => return Err(CompilerError::BadImport(entrypoint).into()), }, None => return Err(CompilerError::MissingModuleTypeInformation(entrypoint.0).into()), }; drop(timing_guard); // all the symbols in author/project will be found in the elm_cache_dir, while the rest will be // found in the elm_home_dir // Step 5, recursively load all the symbols from the ~/.elm stuff artifacts.dat println!("ok the function was acceptable, ready to build symbol table"); let objects = elm::load_objects(&elm_cache_dir)?; if let Some(node) = objects.get(&entrypoint) { match node { elmi::Node::Define(elmi::Expr::Function(ref parameters, ref body), deps) => { for dep in deps { println!("I depend on {}", dep); } let elmi::CannonicalAnnotation(elmi::FreeVars(free_variables), tipe) = signature; let (parameter_types, return_type) = extract_function_types(&tipe, parameters.len()).unwrap(); let xs = parameters .into_iter() .zip(parameter_types.into_iter()) .collect::>(); // TODO add any TLambdas in the signature to the type parameters of the functions // as where bounds let mut tokens = rust::Tokens::new(); codegen_function( &mut tokens, &entrypoint.1, &free_variables, &xs, return_type, body, ); println!("{}", tokens.to_file_string().unwrap()); } _ => println!("I don't know how to transpile that node"), } } println!("total symbols {}", objects.len()); //let visited: HashSet = HashSet::new(); for (key, node) in objects.iter() { //println!("key {}", key); match node { elmi::Node::Define(expr, deps) => { //println!("key => {:?}", expr); for dep in deps.iter() { if !objects.contains_key(&dep) { println!("could not find dep {}", dep); } } break; } elmi::Node::DefineTailFunc(_, _, deps) => { for dep in deps.iter() { if !objects.contains_key(&dep) { println!("could not find dep {}", dep); } } } elmi::Node::Ctor(_, _) => {} elmi::Node::Enum(_) => {} elmi::Node::Box => {} elmi::Node::Link(dep) => { if !objects.contains_key(&dep) { println!("could not find dep {}", dep); } } elmi::Node::Cycle(_, _, _, deps) => { for dep in deps.iter() { if !objects.contains_key(&dep) { println!("could not find dep {}", dep); } } } elmi::Node::Manager(_) => (), elmi::Node::Kernel(_, deps) => { for dep in deps.iter() { if !objects.contains_key(&dep) { println!("could not find dep {}", dep); } } } elmi::Node::PortIncoming(_, deps) => { for dep in deps.iter() { if !objects.contains_key(&dep) { println!("could not find dep {}", dep); } } } elmi::Node::PortOutgoing(_, deps) => { for dep in deps.iter() { if !objects.contains_key(&dep) { println!("could not find dep {}", dep); } } } } } let _symbol_table: HashMap = HashMap::new(); // step 6, start generating rust code using a tree visitor on each of the entry points. // Accumulate the contents of each rust module in map. // // step 6a decide on the name mangling rules for elm indentifiers to rust identifiers. I can't // use the $ in identifiers. Maybe I could use Z. Not sure why I can't use underscore. // author_project__module_name__identifier? // // step 6b figure out how to handle partial apply, one approach requires me to track the arity // of each function call and generate a custom closure for each partial apply. // step 7 write out each of the generated rust modulues. Ok(()) } fn validate_function_type(tipe: &elmi::Type) -> Result<(), TypeError> { match tipe { elmi::Type::TLambda(a, b) => { // We want to check the output types first because this is where we will figure out if // there is more than one argument. I only want to accept functions with a single // argument to keep the complexity down while I figure out how to map over to rust. I // am specifically worried about implementing partial application in a performant way. // Nested partial application worries me. validate_output_type(&**b)?; validate_input_type(&**a)?; Ok(()) } elmi::Type::TVar(_) => Err(TypeError::CantEvalGeneric), elmi::Type::TType(module_name, name, args) if args.is_empty() => { // If our function returns a primitive type if module_name == "elm/core/String" && name == "String" { return Ok(()); } if module_name == "elm/bytes/Bytes" && name == "Bytes" { return Ok(()); } Err(TypeError::CantEvalType(tipe.clone())) } elmi::Type::TType(_, _, _) => Err(TypeError::CantEvalCustomType), elmi::Type::TRecord(_, _) => Err(TypeError::CantEvalRecord), elmi::Type::TUnit => Err(TypeError::CantEvalUnit), elmi::Type::TTuple(_, _, _) => Err(TypeError::CantEvalTuple), elmi::Type::TAlias(_, _, _, ref alias) => { match &**alias { elmi::AliasType::Filled(tipe) => { // I think the recursion is limited to a single step. I have not tested what // the CannonicalAnnotation would look like for a doubly indirect alias, for // example for `view` below // ```elm // type alias Foo = Int // type alias Bar = String // // type alias Zap = Foo -> Bar // // view : Zap // ``` validate_function_type(tipe) } elmi::AliasType::Holey(_) => return Err(TypeError::CantEvalHoleyAlias), } } } } fn validate_input_type(tipe: &elmi::Type) -> Result<(), TypeError> { match tipe { elmi::Type::TLambda(_, _) => Err(TypeError::EvalRequiresSingleArgument(tipe.clone())), elmi::Type::TType(module_name, name, args) if args.is_empty() => { if module_name == "elm/core/String" && name == "String" { Ok(()) } else if module_name == "elm/bytes/Bytes" && name == "Bytes" { Ok(()) } else if module_name == "elm/json/Json.Encode" && name == "Value" { Ok(()) } else { Err(TypeError::InputTypeNotSupported(tipe.clone())) } } elmi::Type::TAlias(_, _, _, ref alias) => match &**alias { elmi::AliasType::Filled(tipe) => validate_input_type(tipe), elmi::AliasType::Holey(_) => Err(TypeError::CantEvalHoleyAlias), }, elmi::Type::TRecord(_, _) => Ok(()), _ => Err(TypeError::OutputTypeNotSupported(tipe.clone())), } } fn validate_output_type(tipe: &elmi::Type) -> Result<(), TypeError> { match tipe { elmi::Type::TType(module_name, name, _args) => { if module_name == "elm/core/String" && name == "String" { Ok(()) } else if module_name == "elm/bytes/Bytes" && name == "Bytes" { Ok(()) } else if module_name == "elm/json/Json.Encode" && name == "Value" { Ok(()) } else if module_name == "elm/virtual-dom/VirtualDom" && name == "Node" { Ok(()) } else { Err(TypeError::OutputTypeNotSupported(tipe.clone())) } } elmi::Type::TAlias(_, _, _, ref alias) => match &**alias { elmi::AliasType::Filled(tipe) => validate_output_type(tipe), elmi::AliasType::Holey(_) => Err(TypeError::CantEvalHoleyAlias), }, _ => Err(TypeError::OutputTypeNotSupported(tipe.clone())), } } fn codegen_function( tokens: &mut rust::Tokens, name: &elmi::Name, type_variables: &HashSet, parameters: &[(&elmi::Name, elmi::Type)], return_type: elmi::Type, body: &elmi::Expr, ) { quote_in! { *tokens => fn #(&name.0)#(if !type_variables.is_empty() => <#(for elmi::Name(ref tvar) in type_variables.iter() join (, ) => #tvar )> )(#(for (elmi::Name(ref parameter), tipe) in parameters.iter() join (, ) => #parameter: #(ref out { codegen_type(out, tipe) }) )) -> #(ref out { codegen_type(out, &return_type) }) { #(ref out { codegen_expr(out, body) }) } } } fn codegen_type(tokens: &mut rust::Tokens, tipe: &elmi::Type) { quote_in! { *tokens => #(match tipe { elmi::Type::TLambda(a, b) => { ( #(ref out => codegen_type(out, a) ) -> #(ref out => codegen_type(out, b) ) ) } elmi::Type::TVar(elmi::Name(variable)) => { #variable }, elmi::Type::TType(module_name, name, args) if module_name == "elm/core/String" && name == "String" && args.is_empty() => { String } elmi::Type::TType(home, name, args) if args.is_empty() => { #(ref out => codegen_name_from_global(out, home, name)) } elmi::Type::TType(home, name, args) => { #(ref out => codegen_name_from_global(out, home, name))<#(for arg in args join(, ) => #(ref out => codegen_type(out, arg)) )> } // // Might be a primitive type // #(if module_name == "elm/core/String" && name == "String" => String) // #(if module_name == "elm/core/Basics" && name == "Int" => i64) // #(if module_name == "elm/core/Basics" && name == "Float" => f64) // #(if module_name == "elm/core/Basics" && name == "Bool" => bool) // #(if module_name == "elm/core/Maybe" && name == "Maybe" => Option) // #(if module_name == "elm/bytes/Bytes" && name == "Bytes" => Vec) //} //elmi::Type::TType(_, _, _) => Err(TypeError::CantEvalCustomType), //elmi::Type::TRecord(_, _) => Err(TypeError::CantEvalRecord), elmi::Type::TUnit => (), _ => { println!("failed to solve code {:?}", tipe); todo_tipe }, //elmi::Type::TTuple(_, _, _) => Err(TypeError::CantEvalTuple), //elmi::Type::TAlias(_, _, _, ref alias) => { // match &**alias { // elmi::AliasType::Filled(tipe) => { // // I think the recursion is limited to a single step. I have not tested what // // the CannonicalAnnotation would look like for a doubly indirect alias, for // // example for `view` below // // ```elm // // type alias Foo = Int // // type alias Bar = String // // // // type alias Zap = Foo -> Bar // // // // view : Zap // // ``` // validate_function_type(tipe) // } // elmi::AliasType::Holey(_) => return Err(TypeError::CantEvalHoleyAlias), // } //} }) }; } fn codegen_name_from_global( tokens: &mut rust::Tokens, home: &elmi::ModuleNameCanonical, name: &elmi::Name, ) { quote_in! { *tokens => #(ref out => codegen_home_to_builder(out, home) )__#(&name.0) } } fn codegen_home_to_builder(tokens: &mut rust::Tokens, global: &elmi::ModuleNameCanonical) { let elmi::ModuleNameCanonical { package: elmi::PackageName { author, project }, module: home, } = global; quote_in! { *tokens => _#(author.replace("-", "_"))_#(project.replace("-", "_"))__#(home.0.replace(".", "_")) } } fn codegen_expr(tokens: &mut rust::Tokens, expr: &elmi::Expr) { match expr { elmi::Expr::Bool(true) => quote_in! { *tokens => true }, elmi::Expr::Bool(false) => quote_in! { *tokens => false }, elmi::Expr::Chr(c) => quote_in! { *tokens => #("'")#c#("'") }, elmi::Expr::Str(s) => quote_in! { *tokens => #(quoted(s)) }, elmi::Expr::Int(x) => quote_in! { *tokens => #(x.to_string()) }, elmi::Expr::Float(x) => quote_in! { *tokens => #(x.to_string()) }, elmi::Expr::VarLocal(elmi::Name(ref name)) => { quote_in! { *tokens => #name } } elmi::Expr::VarGlobal(elmi::Global(home, name)) => { quote_in! { *tokens => #(ref out => codegen_name_from_global(out, home, name)) } } //elmi::Expr::VarEnum(Global, IndexZeroBased), //elmi::Expr::VarBox(Global), //elmi::Expr::VarCycle(ModuleNameCanonical, Name), //elmi::Expr::VarDebug(Name, ModuleNameCanonical, AnnotationRegion, Option), //elmi::Expr::VarKernel(Name, Name), elmi::Expr::List(xs) => { if xs.is_empty() { quote_in! { *tokens => &[] } } else { quote_in! { *tokens => &[ #(for x in xs join (,#) => #(ref out => codegen_expr(out, x) ) ) ] } } } elmi::Expr::Function(parameters, body) => { quote_in! { *tokens => "i don't know how to code gen a function expression" //#(for elmi::Name(ref parameter) in parameters.iter() join (, ) => //) } } elmi::Expr::Call(ref fexpr, args) => { quote_in! { *tokens => #(match &**fexpr { elmi::Expr::VarGlobal(elmi::Global(home, name)) => { #(ref out => codegen_name_from_global(out, home, name)) } _ => { "unknown" } })( #(for arg in args join (,#) => #(ref out => codegen_expr(out, arg) ) ) ) } } //elmi::Expr::TailCall(Name, Vec<(Name, Expr)>), //elmi::Expr::If(Vec<(Expr, Expr)>, Box), //elmi::Expr::Let(Def, Box), //elmi::Expr::Destruct(Destructor, Box), //elmi::Expr::Case(Name, Name, Decider, Vec<(i64, Expr)>), //elmi::Expr::Accessor(Name), //elmi::Expr::Access(Box, Name), //elmi::Expr::Update(Box, HashMap), //elmi::Expr::Record(HashMap), elmi::Expr::Unit => (), elmi::Expr::Tuple(a, b, None) => { quote_in! { *tokens => ( #(ref out => codegen_expr(out, a) ), #(ref out => codegen_expr(out, b) ) ) } } elmi::Expr::Tuple(a, b, Some(c)) => { quote_in! { *tokens => ( #(ref out => codegen_expr(out, a) ), #(ref out => codegen_expr(out, b) ), #(ref out => codegen_expr(out, c) ) ) } } //elmi::Expr::Shader(ShaderSource, HashSet, HashSet), _ => quote_in! { *tokens => #(format!("{:?}", expr)) }, } } fn extract_function_types( mut tipe: &elmi::Type, mut nargs: usize, ) -> Option<(Vec, elmi::Type)> { let mut parameters = Vec::with_capacity(nargs); loop { if nargs == 0 { return Some((parameters, tipe.clone())); } match tipe { elmi::Type::TLambda(a, b) => { parameters.push(reduce_alias_types(&*a).clone()); tipe = reduce_alias_types(&*b); nargs -= 1; } _ => return None, } } } fn reduce_alias_types(a: &elmi::Type) -> &elmi::Type { match a { elmi::Type::TAlias(_, _, _, ref alias) => match &**alias { elmi::AliasType::Filled(b) => &b, elmi::AliasType::Holey(_) => a, }, _ => a, } } // Figure out how to do structural types. If I could name mangle all the functions I could write // them out in the same namespace as lambdas which would avoid the structural typing problem if the // lambda was used by one type. Monomorphism. But if the lambda is used by multiple types then I // would either need to narrow the type into a tuple at the call site, generate a specialize struct // and borrow all the children. // // // The question is should I use Rc with immutable datastructures? Or should I try to statically // analyse copies and use mutable state when possible.