starmelon/src/transpile.rs

use crate::reporting::{CompilerError, Problem, TypeError};
use elmi::DataBinary;
use std::collections::HashMap;
use std::path::{PathBuf};
use tracing::info_span;
use crate::elm;

pub fn transpile(
    file: PathBuf,
    debug: bool,
    function: String,
    //output: Option<PathBuf>,
    verbosity: u64,
) -> Result<(), Problem> {
    // Our first elm make call is where we build the users program. There is a pretty good chance
    // this won't work.
    elm::make(&file, debug, verbosity)?;

    // step 2 find the elm artifacts cache directory just like with exec
    let elm_project_dir =
        elm::find_project_root("elm.json", "./").map_err(CompilerError::MissingElmJson)?;

    let elm_cache_dir = elm_project_dir.join("elm-stuff").join("0.19.1");

    if !elm_cache_dir.is_dir() {
        return Err(CompilerError::MissingElmStuff(elm_cache_dir).into());
    }

    let data = std::fs::read(&file)
        .map_err(|io_err| CompilerError::ReadInputFailed(io_err, file.clone()))?;

    let elmi::Name(target_module) = elm::parse_module_name(&data)?;

    // Side note: the transpile function really should be taking a list of functions in modules and
    // transpiling the entire forest of dependencies. This would allow avoiding kernel javascript
    // used in the project. So I would want to reduce the tuples of file function into a set of
    // files to load. Then recursively load all dependencies.

    // step 3 find all the filepaths in the elm-stuff/0.19.1/* folder
    let interfaces = elm::load_interfaces(&elm_cache_dir)?;

    // Step 4, check for the desired functions have types that we can compile.
    let span = info_span!("resolved target function");
    let timing_guard = span.enter();
    match interfaces.get(&target_module) {
        Some(interface) => match interface.values.get(&elmi::Name::from(&function)) {
            Some(annotation) => {
                let elmi::CannonicalAnnotation(_free_vars, tipe) = annotation;

                validate_function_type(tipe)?
            }
            None => return Err(CompilerError::BadImport(target_module, function).into()),
        },
        None => return Err(CompilerError::MissingModuleTypeInformation(target_module).into()),
    };
    drop(timing_guard);

    // all the symbols in author/project will be found in the elm_cache_dir, while the rest will be
    // found in the elm_home_dir

    // Step 5, recursively load all the symbols from the ~/.elm stuff artifacts.dat
    println!("ok the function was acceptable, ready to build symbol table");

    let objects = elm::load_objects(&elm_cache_dir)?;

    let entrypoint = elmi::Global(
        elmi::ModuleNameCanonical {
            package: elmi::PackageName::new("author", "project"),
            module: elmi::Name(target_module.clone()),
        },
        elmi::Name(function.clone()),
    );

    println!("the artifacts has the symbol {}", objects.contains_key(&entrypoint));
    //let visited: HashSet<Global> = HashSet::new();
    for (_key, node) in objects.iter() {
        //println!("key {}", key);
       	match node {
			elmi::Node::Define(_expr, deps) => {
				for dep in deps.iter() {
					if !objects.contains_key(&dep) {
                        println!("could not find dep {}", dep);
                    }
				}
			}
			elmi::Node::DefineTailFunc(_, _, deps) => {
				for dep in deps.iter() {
					if !objects.contains_key(&dep) {
                        println!("could not find dep {}", dep);
                    }
				}
			}
			elmi::Node::Ctor(_, _) => {
			}
			elmi::Node::Enum(_) => {
			}
			elmi::Node::Box => {
			}
			elmi::Node::Link(dep) => {
                if !objects.contains_key(&dep) {
                    println!("could not find dep {}", dep);
                }
			}
			elmi::Node::Cycle(_, _, _, deps) => {
				for dep in deps.iter() {
                    if !objects.contains_key(&dep) {
                        println!("could not find dep {}", dep);
                    }
				}
			}
			elmi::Node::Manager(_) => (),
			elmi::Node::Kernel(_, deps) => {
				for dep in deps.iter() {
                    if !objects.contains_key(&dep) {
                        println!("could not find dep {}", dep);
                    }
				}
			}
			elmi::Node::PortIncoming(_, deps) => {
				for dep in deps.iter() {
                    if !objects.contains_key(&dep) {
                        println!("could not find dep {}", dep);
                    }
				}
			}
			elmi::Node::PortOutgoing(_, deps) => {
				for dep in deps.iter() {
                    if !objects.contains_key(&dep) {
                        println!("could not find dep {}", dep);
                    }
				}
			}
		}
    }

    let _symbol_table: HashMap<elmi::Global, elmi::Node> = HashMap::new();

    // step 6, start generating rust code using a tree visitor on each of the entry points.
    // Accumulate the contents of each rust module in map.
    //
    // step 6a decide on the name mangling rules for elm indentifiers to rust identifiers. I can't
    // use the $ in identifiers. Maybe I could use Z. Not sure why I can't use underscore.
    // author_project__module_name__identifier?
    //
    // step 6b figure out how to handle partial apply, one approach requires me to track the arity
    // of each function call and generate a custom closure for each partial apply.

    // step 7 write out each of the generated rust modulues.
    Ok(())
}

fn validate_function_type(tipe: &elmi::Type) -> Result<(), TypeError> {
    match tipe {
        elmi::Type::TLambda(a, b) => {
            // We want to check the output types first because this is where we will figure out if
            // there is more than one argument. I only want to accept functions with a single
            // argument to keep the complexity down while I figure out how to map over to rust. I
            // am specifically worried about implementing partial application in a performant way.
            // Nested partial application worries me.
            validate_output_type(&**b)?;
            validate_input_type(&**a)?;
            Ok(())
        }
        elmi::Type::TVar(_) => Err(TypeError::CantEvalGeneric),
        elmi::Type::TType(module_name, name, args) if args.is_empty() => {
            // If our function returns a primitive type
            if module_name == "elm/core/String" && name == "String" {
                return Ok(());
            }
            if module_name == "elm/bytes/Bytes" && name == "Bytes" {
                return Ok(());
            }

            Err(TypeError::CantEvalType(tipe.clone()))
        }
        elmi::Type::TType(_, _, _) => Err(TypeError::CantEvalCustomType),
        elmi::Type::TRecord(_, _) => Err(TypeError::CantEvalRecord),
        elmi::Type::TUnit => Err(TypeError::CantEvalUnit),
        elmi::Type::TTuple(_, _, _) => Err(TypeError::CantEvalTuple),
        elmi::Type::TAlias(_, _, _, ref alias) => {
            match &**alias {
                elmi::AliasType::Filled(tipe) => {
                    // I think the recursion is limited to a single step. I have not tested what
                    // the CannonicalAnnotation would look like for a doubly indirect alias, for
                    // example for `view` below
                    // ```elm
                    // type alias Foo = Int
                    // type alias Bar = String
                    //
                    // type alias Zap = Foo -> Bar
                    //
                    // view : Zap
                    // ```
                    validate_function_type(tipe)
                }
                elmi::AliasType::Holey(_) => return Err(TypeError::CantEvalHoleyAlias),
            }
        }
    }
}

fn validate_input_type(tipe: &elmi::Type) -> Result<(), TypeError> {
    match tipe {
        elmi::Type::TLambda(_, _) => Err(TypeError::EvalRequiresSingleArgument(tipe.clone())),
        elmi::Type::TType(module_name, name, args) if args.is_empty() => {
            if module_name == "elm/core/String" && name == "String" {
                Ok(())
            } else if module_name == "elm/bytes/Bytes" && name == "Bytes" {
                Ok(())
            } else if module_name == "elm/json/Json.Encode" && name == "Value" {
                Ok(())
            } else {
                Err(TypeError::InputTypeNotSupported(tipe.clone()))
            }
        }
        elmi::Type::TAlias(_, _, _, ref alias) => match &**alias {
            elmi::AliasType::Filled(tipe) => validate_input_type(tipe),
            elmi::AliasType::Holey(_) => Err(TypeError::CantEvalHoleyAlias),
        },
        _ => Err(TypeError::OutputTypeNotSupported(tipe.clone())),
    }
}

fn validate_output_type(tipe: &elmi::Type) -> Result<(), TypeError> {
    match tipe {
        elmi::Type::TType(module_name, name, _args) => {
            if module_name == "elm/core/String" && name == "String" {
                Ok(())
            } else if module_name == "elm/bytes/Bytes" && name == "Bytes" {
                Ok(())
            } else if module_name == "elm/json/Json.Encode" && name == "Value" {
                Ok(())
            } else if module_name == "elm/virtual-dom/VirtualDom" && name == "Node" {
                Ok(())
            } else {
                Err(TypeError::OutputTypeNotSupported(tipe.clone()))
            }
        }
        elmi::Type::TAlias(_, _, _, ref alias) => match &**alias {
            elmi::AliasType::Filled(tipe) => validate_output_type(tipe),
            elmi::AliasType::Holey(_) => Err(TypeError::CantEvalHoleyAlias),
        },
        _ => Err(TypeError::OutputTypeNotSupported(tipe.clone())),
    }
}

// Figure out how to do structural types. If I could name mangle all the functions I could write
// them out in the same namespace as lambdas which would avoid the structural typing problem if the
// lambda was used by one type. Monomorphism. But if the lambda is used by multiple types then I
// would either need to narrow the type into a tuple at the call site, generate a specialize struct
// and borrow all the children.
//
//
// The question is should I use Rc with immutable datastructures? Or should I try to statically
// analyse copies and use mutable state when possible.
feat: prove elm object cache is complete 2021-12-11 18:28:04 +00:00			`use crate::reporting::{CompilerError, Problem, TypeError};`
			`use elmi::DataBinary;`
			`use std::collections::HashMap;`
			`use std::path::{PathBuf};`
			`use tracing::info_span;`
			`use crate::elm;`
feat: use tracing crate to measure cpu time 2021-12-10 00:18:01 +00:00
feat: prove elm object cache is complete 2021-12-11 18:28:04 +00:00			`pub fn transpile(`
			`file: PathBuf,`
			`debug: bool,`
			`function: String,`
			`//output: Option<PathBuf>,`
			`verbosity: u64,`
			`) -> Result<(), Problem> {`
			`// Our first elm make call is where we build the users program. There is a pretty good chance`
			`// this won't work.`
			`elm::make(&file, debug, verbosity)?;`

			`// step 2 find the elm artifacts cache directory just like with exec`
			`let elm_project_dir =`
			`elm::find_project_root("elm.json", "./").map_err(CompilerError::MissingElmJson)?;`

			`let elm_cache_dir = elm_project_dir.join("elm-stuff").join("0.19.1");`

			`if !elm_cache_dir.is_dir() {`
			`return Err(CompilerError::MissingElmStuff(elm_cache_dir).into());`
			`}`

			`let data = std::fs::read(&file)`
			`.map_err(\|io_err\| CompilerError::ReadInputFailed(io_err, file.clone()))?;`

			`let elmi::Name(target_module) = elm::parse_module_name(&data)?;`

			`// Side note: the transpile function really should be taking a list of functions in modules and`
			`// transpiling the entire forest of dependencies. This would allow avoiding kernel javascript`
			`// used in the project. So I would want to reduce the tuples of file function into a set of`
			`// files to load. Then recursively load all dependencies.`

			`// step 3 find all the filepaths in the elm-stuff/0.19.1/* folder`
			`let interfaces = elm::load_interfaces(&elm_cache_dir)?;`

			`// Step 4, check for the desired functions have types that we can compile.`
			`let span = info_span!("resolved target function");`
			`let timing_guard = span.enter();`
			`match interfaces.get(&target_module) {`
			`Some(interface) => match interface.values.get(&elmi::Name::from(&function)) {`
			`Some(annotation) => {`
			`let elmi::CannonicalAnnotation(_free_vars, tipe) = annotation;`

			`validate_function_type(tipe)?`
			`}`
			`None => return Err(CompilerError::BadImport(target_module, function).into()),`
			`},`
			`None => return Err(CompilerError::MissingModuleTypeInformation(target_module).into()),`
			`};`
			`drop(timing_guard);`

			`// all the symbols in author/project will be found in the elm_cache_dir, while the rest will be`
			`// found in the elm_home_dir`

			`// Step 5, recursively load all the symbols from the ~/.elm stuff artifacts.dat`
			`println!("ok the function was acceptable, ready to build symbol table");`

			`let objects = elm::load_objects(&elm_cache_dir)?;`

			`let entrypoint = elmi::Global(`
			`elmi::ModuleNameCanonical {`
			`package: elmi::PackageName::new("author", "project"),`
			`module: elmi::Name(target_module.clone()),`
			`},`
			`elmi::Name(function.clone()),`
			`);`

			`println!("the artifacts has the symbol {}", objects.contains_key(&entrypoint));`
			`//let visited: HashSet<Global> = HashSet::new();`
			`for (_key, node) in objects.iter() {`
			`//println!("key {}", key);`
			`match node {`
			`elmi::Node::Define(_expr, deps) => {`
			`for dep in deps.iter() {`
			`if !objects.contains_key(&dep) {`
			`println!("could not find dep {}", dep);`
			`}`
			`}`
			`}`
			`elmi::Node::DefineTailFunc(_, _, deps) => {`
			`for dep in deps.iter() {`
			`if !objects.contains_key(&dep) {`
			`println!("could not find dep {}", dep);`
			`}`
			`}`
			`}`
			`elmi::Node::Ctor(_, _) => {`
			`}`
			`elmi::Node::Enum(_) => {`
			`}`
			`elmi::Node::Box => {`
			`}`
			`elmi::Node::Link(dep) => {`
			`if !objects.contains_key(&dep) {`
			`println!("could not find dep {}", dep);`
			`}`
			`}`
			`elmi::Node::Cycle(_, _, _, deps) => {`
			`for dep in deps.iter() {`
			`if !objects.contains_key(&dep) {`
			`println!("could not find dep {}", dep);`
			`}`
			`}`
			`}`
			`elmi::Node::Manager(_) => (),`
			`elmi::Node::Kernel(_, deps) => {`
			`for dep in deps.iter() {`
			`if !objects.contains_key(&dep) {`
			`println!("could not find dep {}", dep);`
			`}`
			`}`
			`}`
			`elmi::Node::PortIncoming(_, deps) => {`
			`for dep in deps.iter() {`
			`if !objects.contains_key(&dep) {`
			`println!("could not find dep {}", dep);`
			`}`
			`}`
			`}`
			`elmi::Node::PortOutgoing(_, deps) => {`
			`for dep in deps.iter() {`
			`if !objects.contains_key(&dep) {`
			`println!("could not find dep {}", dep);`
			`}`
			`}`
			`}`
			`}`
			`}`

			`let _symbol_table: HashMap<elmi::Global, elmi::Node> = HashMap::new();`

			`// step 6, start generating rust code using a tree visitor on each of the entry points.`
			`// Accumulate the contents of each rust module in map.`
			`//`
			`// step 6a decide on the name mangling rules for elm indentifiers to rust identifiers. I can't`
			`// use the $ in identifiers. Maybe I could use Z. Not sure why I can't use underscore.`
			`// author_project__module_name__identifier?`
			`//`
			`// step 6b figure out how to handle partial apply, one approach requires me to track the arity`
			`// of each function call and generate a custom closure for each partial apply.`

			`// step 7 write out each of the generated rust modulues.`
			`Ok(())`
			`}`

			`fn validate_function_type(tipe: &elmi::Type) -> Result<(), TypeError> {`
			`match tipe {`
			`elmi::Type::TLambda(a, b) => {`
			`// We want to check the output types first because this is where we will figure out if`
			`// there is more than one argument. I only want to accept functions with a single`
			`// argument to keep the complexity down while I figure out how to map over to rust. I`
			`// am specifically worried about implementing partial application in a performant way.`
			`// Nested partial application worries me.`
			`validate_output_type(&**b)?;`
			`validate_input_type(&**a)?;`
			`Ok(())`
			`}`
			`elmi::Type::TVar(_) => Err(TypeError::CantEvalGeneric),`
			`elmi::Type::TType(module_name, name, args) if args.is_empty() => {`
			`// If our function returns a primitive type`
			`if module_name == "elm/core/String" && name == "String" {`
			`return Ok(());`
			`}`
			`if module_name == "elm/bytes/Bytes" && name == "Bytes" {`
			`return Ok(());`
			`}`

			`Err(TypeError::CantEvalType(tipe.clone()))`
			`}`
			`elmi::Type::TType(_, _, _) => Err(TypeError::CantEvalCustomType),`
			`elmi::Type::TRecord(_, _) => Err(TypeError::CantEvalRecord),`
			`elmi::Type::TUnit => Err(TypeError::CantEvalUnit),`
			`elmi::Type::TTuple(_, _, _) => Err(TypeError::CantEvalTuple),`
			`elmi::Type::TAlias(_, _, _, ref alias) => {`
			`match &**alias {`
			`elmi::AliasType::Filled(tipe) => {`
			`// I think the recursion is limited to a single step. I have not tested what`
			`// the CannonicalAnnotation would look like for a doubly indirect alias, for`
			// example for `view` below
			// ```elm
			`// type alias Foo = Int`
			`// type alias Bar = String`
			`//`
			`// type alias Zap = Foo -> Bar`
			`//`
			`// view : Zap`
			// ```
			`validate_function_type(tipe)`
			`}`
			`elmi::AliasType::Holey(_) => return Err(TypeError::CantEvalHoleyAlias),`
			`}`
			`}`
			`}`
			`}`

			`fn validate_input_type(tipe: &elmi::Type) -> Result<(), TypeError> {`
			`match tipe {`
			`elmi::Type::TLambda(_, _) => Err(TypeError::EvalRequiresSingleArgument(tipe.clone())),`
			`elmi::Type::TType(module_name, name, args) if args.is_empty() => {`
			`if module_name == "elm/core/String" && name == "String" {`
			`Ok(())`
			`} else if module_name == "elm/bytes/Bytes" && name == "Bytes" {`
			`Ok(())`
			`} else if module_name == "elm/json/Json.Encode" && name == "Value" {`
			`Ok(())`
			`} else {`
			`Err(TypeError::InputTypeNotSupported(tipe.clone()))`
			`}`
			`}`
			`elmi::Type::TAlias(_, _, _, ref alias) => match &**alias {`
			`elmi::AliasType::Filled(tipe) => validate_input_type(tipe),`
			`elmi::AliasType::Holey(_) => Err(TypeError::CantEvalHoleyAlias),`
			`},`
			`_ => Err(TypeError::OutputTypeNotSupported(tipe.clone())),`
			`}`
			`}`

			`fn validate_output_type(tipe: &elmi::Type) -> Result<(), TypeError> {`
			`match tipe {`
			`elmi::Type::TType(module_name, name, _args) => {`
			`if module_name == "elm/core/String" && name == "String" {`
			`Ok(())`
			`} else if module_name == "elm/bytes/Bytes" && name == "Bytes" {`
			`Ok(())`
			`} else if module_name == "elm/json/Json.Encode" && name == "Value" {`
			`Ok(())`
			`} else if module_name == "elm/virtual-dom/VirtualDom" && name == "Node" {`
			`Ok(())`
			`} else {`
			`Err(TypeError::OutputTypeNotSupported(tipe.clone()))`
			`}`
			`}`
			`elmi::Type::TAlias(_, _, _, ref alias) => match &**alias {`
			`elmi::AliasType::Filled(tipe) => validate_output_type(tipe),`
			`elmi::AliasType::Holey(_) => Err(TypeError::CantEvalHoleyAlias),`
			`},`
			`_ => Err(TypeError::OutputTypeNotSupported(tipe.clone())),`
			`}`
			`}`

			`// Figure out how to do structural types. If I could name mangle all the functions I could write`
			`// them out in the same namespace as lambdas which would avoid the structural typing problem if the`
			`// lambda was used by one type. Monomorphism. But if the lambda is used by multiple types then I`
			`// would either need to narrow the type into a tuple at the call site, generate a specialize struct`
			`// and borrow all the children.`
			`//`
			`//`
			`// The question is should I use Rc with immutable datastructures? Or should I try to statically`
			`// analyse copies and use mutable state when possible.`