starmelon/src/transpile.rs

255 lines
10 KiB
Rust
Raw Normal View History

use crate::reporting::{CompilerError, Problem, TypeError};
use elmi::DataBinary;
use std::collections::HashMap;
use std::path::{PathBuf};
use tracing::info_span;
use crate::elm;
pub fn transpile(
file: PathBuf,
debug: bool,
function: String,
//output: Option<PathBuf>,
verbosity: u64,
) -> Result<(), Problem> {
// Our first elm make call is where we build the users program. There is a pretty good chance
// this won't work.
elm::make(&file, debug, verbosity)?;
// step 2 find the elm artifacts cache directory just like with exec
let elm_project_dir =
elm::find_project_root("elm.json", "./").map_err(CompilerError::MissingElmJson)?;
let elm_cache_dir = elm_project_dir.join("elm-stuff").join("0.19.1");
if !elm_cache_dir.is_dir() {
return Err(CompilerError::MissingElmStuff(elm_cache_dir).into());
}
let data = std::fs::read(&file)
.map_err(|io_err| CompilerError::ReadInputFailed(io_err, file.clone()))?;
let elmi::Name(target_module) = elm::parse_module_name(&data)?;
// Side note: the transpile function really should be taking a list of functions in modules and
// transpiling the entire forest of dependencies. This would allow avoiding kernel javascript
// used in the project. So I would want to reduce the tuples of file function into a set of
// files to load. Then recursively load all dependencies.
// step 3 find all the filepaths in the elm-stuff/0.19.1/* folder
let interfaces = elm::load_interfaces(&elm_cache_dir)?;
// Step 4, check for the desired functions have types that we can compile.
let span = info_span!("resolved target function");
let timing_guard = span.enter();
match interfaces.get(&target_module) {
Some(interface) => match interface.values.get(&elmi::Name::from(&function)) {
Some(annotation) => {
let elmi::CannonicalAnnotation(_free_vars, tipe) = annotation;
validate_function_type(tipe)?
}
None => return Err(CompilerError::BadImport(target_module, function).into()),
},
None => return Err(CompilerError::MissingModuleTypeInformation(target_module).into()),
};
drop(timing_guard);
// all the symbols in author/project will be found in the elm_cache_dir, while the rest will be
// found in the elm_home_dir
// Step 5, recursively load all the symbols from the ~/.elm stuff artifacts.dat
println!("ok the function was acceptable, ready to build symbol table");
let objects = elm::load_objects(&elm_cache_dir)?;
let entrypoint = elmi::Global(
elmi::ModuleNameCanonical {
package: elmi::PackageName::new("author", "project"),
module: elmi::Name(target_module.clone()),
},
elmi::Name(function.clone()),
);
println!("the artifacts has the symbol {}", objects.contains_key(&entrypoint));
//let visited: HashSet<Global> = HashSet::new();
for (_key, node) in objects.iter() {
//println!("key {}", key);
match node {
elmi::Node::Define(_expr, deps) => {
for dep in deps.iter() {
if !objects.contains_key(&dep) {
println!("could not find dep {}", dep);
}
}
}
elmi::Node::DefineTailFunc(_, _, deps) => {
for dep in deps.iter() {
if !objects.contains_key(&dep) {
println!("could not find dep {}", dep);
}
}
}
elmi::Node::Ctor(_, _) => {
}
elmi::Node::Enum(_) => {
}
elmi::Node::Box => {
}
elmi::Node::Link(dep) => {
if !objects.contains_key(&dep) {
println!("could not find dep {}", dep);
}
}
elmi::Node::Cycle(_, _, _, deps) => {
for dep in deps.iter() {
if !objects.contains_key(&dep) {
println!("could not find dep {}", dep);
}
}
}
elmi::Node::Manager(_) => (),
elmi::Node::Kernel(_, deps) => {
for dep in deps.iter() {
if !objects.contains_key(&dep) {
println!("could not find dep {}", dep);
}
}
}
elmi::Node::PortIncoming(_, deps) => {
for dep in deps.iter() {
if !objects.contains_key(&dep) {
println!("could not find dep {}", dep);
}
}
}
elmi::Node::PortOutgoing(_, deps) => {
for dep in deps.iter() {
if !objects.contains_key(&dep) {
println!("could not find dep {}", dep);
}
}
}
}
}
let _symbol_table: HashMap<elmi::Global, elmi::Node> = HashMap::new();
// step 6, start generating rust code using a tree visitor on each of the entry points.
// Accumulate the contents of each rust module in map.
//
// step 6a decide on the name mangling rules for elm indentifiers to rust identifiers. I can't
// use the $ in identifiers. Maybe I could use Z. Not sure why I can't use underscore.
// author_project__module_name__identifier?
//
// step 6b figure out how to handle partial apply, one approach requires me to track the arity
// of each function call and generate a custom closure for each partial apply.
// step 7 write out each of the generated rust modulues.
Ok(())
}
fn validate_function_type(tipe: &elmi::Type) -> Result<(), TypeError> {
match tipe {
elmi::Type::TLambda(a, b) => {
// We want to check the output types first because this is where we will figure out if
// there is more than one argument. I only want to accept functions with a single
// argument to keep the complexity down while I figure out how to map over to rust. I
// am specifically worried about implementing partial application in a performant way.
// Nested partial application worries me.
validate_output_type(&**b)?;
validate_input_type(&**a)?;
Ok(())
}
elmi::Type::TVar(_) => Err(TypeError::CantEvalGeneric),
elmi::Type::TType(module_name, name, args) if args.is_empty() => {
// If our function returns a primitive type
if module_name == "elm/core/String" && name == "String" {
return Ok(());
}
if module_name == "elm/bytes/Bytes" && name == "Bytes" {
return Ok(());
}
Err(TypeError::CantEvalType(tipe.clone()))
}
elmi::Type::TType(_, _, _) => Err(TypeError::CantEvalCustomType),
elmi::Type::TRecord(_, _) => Err(TypeError::CantEvalRecord),
elmi::Type::TUnit => Err(TypeError::CantEvalUnit),
elmi::Type::TTuple(_, _, _) => Err(TypeError::CantEvalTuple),
elmi::Type::TAlias(_, _, _, ref alias) => {
match &**alias {
elmi::AliasType::Filled(tipe) => {
// I think the recursion is limited to a single step. I have not tested what
// the CannonicalAnnotation would look like for a doubly indirect alias, for
// example for `view` below
// ```elm
// type alias Foo = Int
// type alias Bar = String
//
// type alias Zap = Foo -> Bar
//
// view : Zap
// ```
validate_function_type(tipe)
}
elmi::AliasType::Holey(_) => return Err(TypeError::CantEvalHoleyAlias),
}
}
}
}
fn validate_input_type(tipe: &elmi::Type) -> Result<(), TypeError> {
match tipe {
elmi::Type::TLambda(_, _) => Err(TypeError::EvalRequiresSingleArgument(tipe.clone())),
elmi::Type::TType(module_name, name, args) if args.is_empty() => {
if module_name == "elm/core/String" && name == "String" {
Ok(())
} else if module_name == "elm/bytes/Bytes" && name == "Bytes" {
Ok(())
} else if module_name == "elm/json/Json.Encode" && name == "Value" {
Ok(())
} else {
Err(TypeError::InputTypeNotSupported(tipe.clone()))
}
}
elmi::Type::TAlias(_, _, _, ref alias) => match &**alias {
elmi::AliasType::Filled(tipe) => validate_input_type(tipe),
elmi::AliasType::Holey(_) => Err(TypeError::CantEvalHoleyAlias),
},
_ => Err(TypeError::OutputTypeNotSupported(tipe.clone())),
}
}
fn validate_output_type(tipe: &elmi::Type) -> Result<(), TypeError> {
match tipe {
elmi::Type::TType(module_name, name, _args) => {
if module_name == "elm/core/String" && name == "String" {
Ok(())
} else if module_name == "elm/bytes/Bytes" && name == "Bytes" {
Ok(())
} else if module_name == "elm/json/Json.Encode" && name == "Value" {
Ok(())
} else if module_name == "elm/virtual-dom/VirtualDom" && name == "Node" {
Ok(())
} else {
Err(TypeError::OutputTypeNotSupported(tipe.clone()))
}
}
elmi::Type::TAlias(_, _, _, ref alias) => match &**alias {
elmi::AliasType::Filled(tipe) => validate_output_type(tipe),
elmi::AliasType::Holey(_) => Err(TypeError::CantEvalHoleyAlias),
},
_ => Err(TypeError::OutputTypeNotSupported(tipe.clone())),
}
}
// Figure out how to do structural types. If I could name mangle all the functions I could write
// them out in the same namespace as lambdas which would avoid the structural typing problem if the
// lambda was used by one type. Monomorphism. But if the lambda is used by multiple types then I
// would either need to narrow the type into a tuple at the call site, generate a specialize struct
// and borrow all the children.
//
//
// The question is should I use Rc with immutable datastructures? Or should I try to statically
// analyse copies and use mutable state when possible.