starmelon/src/exec/scripting.rs
2025-11-23 19:34:17 -08:00

638 lines
24 KiB
Rust

use crate::exec::{fixtures, runtime};
use crate::reporting::{CompilerError, InterpreterError, Problem, TypeError};
use deno_core::{Extension, OpState};
use elm_project_utils::{setup_generator_project, ElmResult};
use os_pipe::dup_stderr;
use sqlx::sqlite::SqlitePool;
use std::cell::RefCell;
use std::convert::TryFrom;
use std::fs;
use std::io::{self, Read, Write};
use std::path::PathBuf;
use std::process::{Command, Stdio};
use std::sync::Arc;
use tokio;
use tracing::info_span;
pub(crate) fn run(
debug: bool,
verbosity: u64,
sqlite_path: Option<PathBuf>,
elm_project_dir: PathBuf,
source_checksum: u64,
entrypoint: elmi::Global,
input_type: Option<InputType>,
output_type: OutputType,
input_source: Option<PathBuf>,
output: Option<PathBuf>,
) -> Result<(), Problem> {
// step 6 create our private project
let packages = ["ThinkAlexandria/elm-html-in-elm", "elm/json", "elm/bytes"];
// I added a couple of random bytes to the directory name to reduce the risk of
// collisions with other programs that also use elm-stuff for their scratch space
let our_temp_dir = elm_project_dir.join("elm-stuff").join("starmelon-5d9ecc");
let generator_dir =
setup_generator_project(verbosity, elm_project_dir.clone(), our_temp_dir, &packages)?;
// step 7 create an Elm fixture file to run our function
let span = info_span!("create Elm fixture files");
let timing_guard = span.enter();
let (gen_module_name, source) = fixtures::scripting::generate(
source_checksum,
entrypoint.0.module.clone(),
entrypoint.1.clone(),
input_type,
output_type,
);
let mut source_filename = generator_dir.join("src").join(&gen_module_name);
source_filename.set_extension("elm");
let span = info_span!("file writes");
let file_write_timing_guard = span.enter();
fs::File::create(&source_filename)
.map_err(|io_err| CompilerError::WriteOutputFailed(io_err, source_filename.clone()))?
.write_all(source.as_bytes())
.map_err(|io_err| CompilerError::WriteOutputFailed(io_err, source_filename.clone()))?;
drop(file_write_timing_guard);
drop(timing_guard);
// step 8 compile the fixture
let mut intermediate_file = generator_dir.join("obj").join(&gen_module_name);
intermediate_file.set_extension("js");
let mut command = Command::new("elm");
command
.arg("make")
.arg("--output")
.arg(&intermediate_file)
.current_dir(&generator_dir)
.stdin(Stdio::null())
.stderr(Stdio::piped());
if verbosity < 1 {
command.stdout(Stdio::piped());
} else {
let pipe = dup_stderr()
.map_err(|io_err| CompilerError::ReadInputFailed(io_err, "stdout".into()))?;
command.stdout(pipe);
}
if debug {
command.arg("--debug");
}
command.arg(&source_filename);
let span = info_span!("elm make fixture file");
let timing_guard = span.enter();
match command.output() {
Ok(output) => {
if !output.status.success() {
return Err(CompilerError::FailedBuildingFixture.into());
}
}
Err(_) => {
return Err(CompilerError::FailedBuildingFixture.into());
}
}
drop(timing_guard);
// Step 9 fixup the compiled script to run in Deno
let data = fs::read_to_string(&intermediate_file)
.map_err(|io_err| CompilerError::ReadInputFailed(io_err, intermediate_file.clone()))?;
// TODO figure out how to replace multiple substrings in a single pass. One neat trick
// might be to allocate enough space in our starting buffer to write the new code by
// having a really large replace block. For example if we are replacing a string
// `"REPLACE_ME" + "abc" * 2000` we would have over 6k bytes to write out the new code. We
// will have to do some extra book keeping to make sure the buffer space is big enough
// for the replacement code.
let span = info_span!("munge fixture javascript");
let timing_guard = span.enter();
let final_script = (|| {
let mut final_script = data
.replace("'REPLACE_ME_WITH_JSON_STRINGIFY'", "JSON.stringify(x)")
.replace(
"$elm$json$Json$Decode$fail('REPLACE_ME_WITH_BYTES_DECODER');",
r#" _Json_decodePrim(function(value) {
return (typeof value === 'object' && value instanceof DataView)
? $elm$core$Result$Ok(value)
: _Json_expecting('a DataView', value);
});"#,
)
.replace(";}(this));", ";}(globalThis));");
if sqlite_path.is_some() {
final_script = final_script
.replace(
"var $author$project$Astrid$Query$execute = function (query) {\n\treturn $author$project$Astrid$Query$dummyExecute;\n};",
include_str!("fixtures/sql-client-integration.js"),
)
.replace(
"var $author$project$Astrid$Query$fetch = F3(\n\tfunction (sql, parameters, decoder) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});",
"var $author$project$Astrid$Query$fetch = _Query_fetchAll;",
)
.replace(
"var $author$project$Astrid$Query$fetchOne = F3(\n\tfunction (sql, parameters, decoder) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});",
"var $author$project$Astrid$Query$fetchOne = _Query_fetchOne;",
)
.replace(
"var $author$project$Astrid$Query$map3 = F4(\n\tfunction (f, a, b, c) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});",
r#"var $author$project$Astrid$Query$map3 = _Query_map3;"#,
)
.replace(
"var $author$project$Astrid$Query$map2 = F3(\n\tfunction (f, a, b) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});",
r#"var $author$project$Astrid$Query$map2 = _Query_map2;"#,
)
.replace(
"var $author$project$Astrid$Query$map = F2(\n\tfunction (f, a) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});",
r#"var $author$project$Astrid$Query$map = _Query_map1;"#,
)
.replace(
"var $author$project$Astrid$Query$andThen = F2(\n\tfunction (f, q) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});",
r#"var $author$project$Astrid$Query$andThen = _Query_andThen;"#,
);
// final_script.replace("var $author$project$Astrid$Query$run = ", "JSON.stringify(x)");
final_script.push_str("\n\n");
//final_script.push_str(r#"
// Deno.core.print(JSON.stringify(
// Deno.core.opSync(
// 'op_starmelon_batch_queries',
// [ [true, "select json_object('id', id, 'foo', foo) from foobar", []]
// , [false, "select json_object('id', id, 'foo', foo) from foobar", []]
// ]
// )
// ))
//"#);
}
final_script.push_str("\n\n");
// I think that when I set this script to be the main module, I am skipping the
// deno/runtime/js/99_main.js script that sets up a bunch of global variables. If I
// manually add the timer related code below then setTimeout works again.
// NB. there are 706 lines of setup code that add a bunch of apis to the global window
// scope. Figure out if I need to include all of them. For example, starmelon does not need
// to perform http calls right now, but I eventually want to.
final_script.push_str("const { setTimeout } = globalThis.__bootstrap.timers;\n");
final_script.push_str(
"Deno.core.setMacrotaskCallback(globalThis.__bootstrap.timers.handleTimerMacrotask);\n",
);
final_script.push_str("globalThis.setTimeout = setTimeout;\n");
final_script.push_str(&format!("var worker = Elm.{}.init();\n", &gen_module_name));
// add a shortcut for invoking the function so I don't have to traverse so many object
// lookups using the rust v8 API.
match input_type {
None => {
final_script.push_str(
"globalThis.runOnInput = function() { worker.ports.onInput.send(null) };\n",
);
}
Some(InputType::Value) => {
final_script
.push_str("globalThis.runOnInput = function(data) { worker.ports.onInput.send(JSON.parse(data)) };\n");
}
Some(InputType::String) => {
final_script.push_str(
"globalThis.runOnInput = function(data) { worker.ports.onInput.send(data) };",
);
}
Some(InputType::Bytes) => {
final_script.push_str(
r#"
globalThis.runOnInput = function(data) {
const dv = new DataView(data.buffer)
worker.ports.onInput.send(dv)
};"#,
);
}
}
match output_type {
OutputType::Value => {
final_script.push_str(
r#"
worker.ports.onOutput.subscribe(function(output){
if (output.ctor === "Ok") {
const json = JSON.stringify(output.a);
Deno.core.opSync('op_starmelon_string_output', output);
} else {
Deno.core.opSync('op_starmelon_problem', output.a);
}
});
"#,
);
}
OutputType::Bytes => {
final_script.push_str(
r#"
// Elm will send a DataView
worker.ports.onOutput.subscribe(function(output){
if (output.ctor === "Ok") {
const ui8 = new Uint8Array(output.a.buffer);
Deno.core.opSync('op_starmelon_bytes_output', ui8);
} else {
Deno.core.opSync('op_starmelon_problem', output.a);
}
});
"#,
);
}
OutputType::String | OutputType::Html => {
final_script.push_str(
r#"
worker.ports.onOutput.subscribe(function(output){
if (output.ctor === "Ok") {
Deno.core.opSync('op_starmelon_string_output', output.a);
} else {
Deno.core.opSync('op_starmelon_problem', output.a);
}
});
"#,
);
}
}
final_script
})();
drop(timing_guard);
let mut final_file = generator_dir.join("bin").join(&gen_module_name);
final_file.set_extension("js");
let span = info_span!("file writes");
let timing_guard = span.enter();
std::fs::write(&final_file, final_script)
.map_err(|io_err| CompilerError::WriteOutputFailed(io_err, final_file.clone()))?;
drop(timing_guard);
// Create a tokio runtime before registering ops so we can block on futures inside sync ops
let span = info_span!("create tokio runtime");
let timing_guard = span.enter();
let sys = tokio::runtime::Builder::new_current_thread()
// The default number of additional threads for running blocking FnOnce is 512.
.max_blocking_threads(1)
.enable_all()
.build()
.unwrap();
drop(timing_guard);
// step 10 create a v8 isolate. We need to register a different callback depending on
// the output type (string, or bytes)
let mailbox: Arc<RefCell<Option<Result<Vec<u8>, String>>>> = Arc::new(RefCell::new(None));
let mailbox_clone = Arc::clone(&mailbox);
let mut extensions = vec![Extension::builder()
.ops(vec![
op_starmelon_bytes_output::decl(),
op_starmelon_string_output::decl(),
op_starmelon_problem::decl(),
])
.state(move |state| {
state.put(Arc::clone(&mailbox_clone));
Ok(())
})
.build()];
let span = info_span!("register private api");
let timing_guard = span.enter();
// Step 10.B setup the sqlite database feature
let sql_background_thread_handle = if let Some(database_url) = sqlite_path {
// I want to construct the connection in the initial thread so I can tell if the connection
// failed
let db_pool = sys
.block_on(async { SqlitePool::connect(&database_url.to_string_lossy()).await })
.unwrap();
if let Ok((extension, thread_handle)) = fixtures::sqlite::init(db_pool) {
extensions.push(extension);
Some(thread_handle)
} else {
None
}
} else {
None
};
drop(timing_guard);
let span = info_span!("create v8 isolate");
let timing_guard = span.enter();
let (worker, main_module) = runtime::setup_worker(extensions, &final_file.to_string_lossy())
.map_err(|err| InterpreterError::EventLoop(err))?;
drop(timing_guard);
// step 11 marshal the input into the v8 isolate. If we are reading from an
// input file load that, if we are reading from stdin read that.
let input = match input_type {
None => None,
Some(input_type) => {
let buffer = match input_source {
None => {
// read from stdin when input_source was not present
let mut stdin = io::stdin();
let mut buffer = Vec::new();
stdin.read_to_end(&mut buffer).unwrap();
buffer
}
Some(path) => {
let buffer = std::fs::read(&path)
.map_err(|io_err| CompilerError::ReadInputFailed(io_err, path.clone()))?;
buffer
}
};
match input_type {
InputType::String => {
let s = String::from_utf8(buffer).map_err(|_| CompilerError::BadInput)?;
Some(ValidatedInput::String(s))
}
InputType::Bytes => Some(ValidatedInput::Bytes(buffer)),
InputType::Value => {
let _: serde_json::Value =
serde_json::from_slice(&buffer).map_err(|_| CompilerError::BadInput)?;
let s = String::from_utf8(buffer).map_err(|_| CompilerError::BadInput)?;
Some(ValidatedInput::Value(s))
}
}
}
};
let foo = move |mut scope: deno_core::v8::HandleScope| -> Result<(), InterpreterError> {
let scope = &mut scope;
let ctx = scope.get_current_context();
let global = ctx.global(scope);
use deno_core::v8;
let entrypoint = {
let x =
v8::String::new(scope, "runOnInput").ok_or(InterpreterError::AllocationFailed)?;
v8::Local::new(scope, x).into()
};
let v8_value = global
.get(scope, entrypoint)
.ok_or(InterpreterError::ReferenceError)?;
// step 12 invoke the function
let function = v8::Local::<v8::Function>::try_from(v8_value)?;
let this = v8::undefined(scope).into();
let span = info_span!("dispatch v8 call");
let timing_guard = span.enter();
match input {
None => {
function.call(scope, this, &[]);
}
Some(ValidatedInput::String(s)) => {
let arg1 = {
let x = v8::String::new(scope, &s).ok_or(InterpreterError::AllocationFailed)?;
v8::Local::new(scope, x).into()
};
function.call(scope, this, &[arg1]);
}
Some(ValidatedInput::Value(v)) => {
let arg1 = {
let x = v8::String::new(scope, &v).ok_or(InterpreterError::AllocationFailed)?;
v8::Local::new(scope, x).into()
};
function.call(scope, this, &[arg1]);
}
Some(ValidatedInput::Bytes(data)) => {
let length = data.len();
let y = data.into_boxed_slice();
let k = v8::ArrayBuffer::new_backing_store_from_boxed_slice(y).make_shared();
let x = v8::ArrayBuffer::with_backing_store(scope, &k);
let arg1 = v8::Local::new(scope, x).into();
let c = v8::Uint8Array::new(scope, arg1, 0, length).unwrap();
let arg2 = v8::Local::new(scope, c).into();
function.call(scope, this, &[arg2]);
}
}
drop(timing_guard);
Ok(())
};
let span = info_span!("eval javascript");
let timing_guard = span.enter();
sys.block_on(async move { runtime::xyz(worker, main_module, foo).await })?;
drop(timing_guard);
// step 13 receive the callback
// If I understood which combination of run_event_loop was required to execute
// the javascript to completion then we should have something in our mailbox by
// now. Another way to do this would be with an Arc. This will panic if the
// mailbox is currently borrowed
match mailbox.replace(None) {
Some(Ok(buffer)) => match output {
None => {
io::stdout()
.write_all(&buffer)
.map_err(|io_err| CompilerError::WriteOutputFailed(io_err, "stdout".into()))?;
}
Some(filename) => {
let mut f = fs::File::create(&filename)
.map_err(|io_err| CompilerError::WriteOutputFailed(io_err, filename))?;
f.write_all(&buffer)
.map_err(|io_err| CompilerError::WriteOutputFailed(io_err, "stdout".into()))?;
}
},
Some(Err(problem)) => {
println!("had a problem {}", problem);
}
None => println!("nothing in the mailbox"),
}
if let Some(thread_handle) = sql_background_thread_handle {
thread_handle.join().unwrap();
}
Ok(())
}
type OutputMailbox = Arc<RefCell<Option<Result<Vec<u8>, String>>>>;
#[deno_core::op]
fn op_starmelon_problem(
state: &mut deno_core::OpState,
msg: String,
) -> Result<(), deno_core::error::AnyError> {
eprintln!("got problem from v8 runtime {:?}", &msg);
let mailbox_clone = state.borrow::<OutputMailbox>();
if let Ok(mut mailbox) = mailbox_clone.try_borrow_mut() {
mailbox.replace(Err(msg));
}
Ok(())
}
#[deno_core::op]
fn op_starmelon_bytes_output(
state: &mut OpState,
msg: ElmResult<deno_core::ZeroCopyBuf, String>,
) -> Result<(), deno_core::error::AnyError> {
let mailbox_clone = state.borrow::<OutputMailbox>();
if let Ok(mut mailbox) = mailbox_clone.try_borrow_mut() {
match msg {
ElmResult::Ok { a: buffer } => {
let slice: &[u8] = &buffer;
mailbox.replace(Ok(slice.to_owned()));
}
ElmResult::Err { a: err } => {
mailbox.replace(Err(err));
}
}
}
Ok(())
}
#[deno_core::op]
fn op_starmelon_string_output(
state: &mut OpState,
msg: ElmResult<String, String>,
) -> Result<(), deno_core::error::AnyError> {
let mailbox_clone = state.borrow::<OutputMailbox>();
if let Ok(mut mailbox) = mailbox_clone.try_borrow_mut() {
match msg {
ElmResult::Ok { a: s } => mailbox.replace(Ok(s.into_bytes())),
ElmResult::Err { a: err } => mailbox.replace(Err(err)),
};
}
Ok(())
}
#[derive(Debug, Copy, Clone)]
pub enum InputType {
Value,
String,
Bytes,
}
pub enum ValidatedInput {
String(String),
Value(String),
Bytes(Vec<u8>),
}
#[derive(Debug, Copy, Clone)]
pub enum OutputType {
Html,
String,
Bytes,
Value,
}
pub(crate) fn resolve_function_type(
tipe: &elmi::Type,
) -> Result<(Option<InputType>, OutputType), TypeError> {
match tipe {
elmi::Type::TLambda(a, b) => {
// We want to check the output types first because this is where we will figure out if
// there is more than one argument
let output_type = resolve_output_type(&**b)?;
let input_type = resolve_input_type(&**a)?;
Ok((Some(input_type), output_type))
}
elmi::Type::TVar(_) => Err(TypeError::CantEvalGeneric),
elmi::Type::TType(module_name, name, args) if args.is_empty() => {
// If our function returns a primitive type
if module_name == "elm/core/String" && name == "String" {
return Ok((None, OutputType::String));
}
if module_name == "elm/bytes/Bytes" && name == "Bytes" {
return Ok((None, OutputType::String));
}
Err(TypeError::CantEvalType(tipe.clone()))
}
elmi::Type::TType(module_name, name, _args) => {
if module_name == "elm/virtual-dom/VirtualDom" && name == "Node" {
return Ok((None, OutputType::Html));
}
eprintln!("found this type {:?} {:?}", module_name, name);
Err(TypeError::CantEvalCustomType)
}
elmi::Type::TRecord(_, _) => Err(TypeError::CantEvalRecord),
elmi::Type::TUnit => Err(TypeError::CantEvalUnit),
elmi::Type::TTuple(_, _, _) => Err(TypeError::CantEvalTuple),
elmi::Type::TAlias(_, _, _, ref alias) => {
match &**alias {
elmi::AliasType::Filled(tipe) => {
// I think the recursion is limited to a single step. I have not tested what
// the CannonicalAnnotation would look like for a doubly indirect alias, for
// example for `view` below
// ```elm
// type alias Foo = Int
// type alias Bar = String
//
// type alias Zap = Foo -> Bar
//
// view : Zap
// ```
resolve_function_type(tipe)
}
elmi::AliasType::Holey(_) => return Err(TypeError::CantEvalHoleyAlias),
}
}
}
}
fn resolve_input_type(tipe: &elmi::Type) -> Result<InputType, TypeError> {
match tipe {
elmi::Type::TLambda(_, _) => Err(TypeError::EvalRequiresSingleArgument(tipe.clone())),
elmi::Type::TType(module_name, name, args) if args.is_empty() => {
if module_name == "elm/core/String" && name == "String" {
Ok(InputType::String)
} else if module_name == "elm/bytes/Bytes" && name == "Bytes" {
Ok(InputType::Bytes)
} else if module_name == "elm/json/Json.Encode" && name == "Value" {
Ok(InputType::Value)
} else {
Err(TypeError::InputTypeNotSupported(tipe.clone()))
}
}
elmi::Type::TAlias(_, _, _, ref alias) => match &**alias {
elmi::AliasType::Filled(tipe) => resolve_input_type(tipe),
elmi::AliasType::Holey(_) => Err(TypeError::CantEvalHoleyAlias),
},
_ => Err(TypeError::OutputTypeNotSupported(tipe.clone())),
}
}
fn resolve_output_type(tipe: &elmi::Type) -> Result<OutputType, TypeError> {
match tipe {
elmi::Type::TType(module_name, name, _args) => {
if module_name == "elm/core/String" && name == "String" {
Ok(OutputType::String)
} else if module_name == "elm/bytes/Bytes" && name == "Bytes" {
Ok(OutputType::Bytes)
} else if module_name == "elm/json/Json.Encode" && name == "Value" {
Ok(OutputType::Value)
} else if module_name == "elm/virtual-dom/VirtualDom" && name == "Node" {
Ok(OutputType::Html)
} else {
Err(TypeError::OutputTypeNotSupported(tipe.clone()))
}
}
elmi::Type::TAlias(_, _, _, ref alias) => match &**alias {
elmi::AliasType::Filled(tipe) => resolve_output_type(tipe),
elmi::AliasType::Holey(_) => Err(TypeError::CantEvalHoleyAlias),
},
_ => Err(TypeError::OutputTypeNotSupported(tipe.clone())),
}
}