feat: speed up javascript munging

Perform a single pass so the work of doing `m` replacements in a string
of length `n` is O(m + n) instead of O(m * n)

For the yogalogy template this change improves the script munging step
from ~2.5ms to 0.5ms [release] (from ~17ms to 3ms [debug]).
This commit is contained in:
YetAnotherMinion 2022-01-13 01:07:19 +00:00 committed by nobody
commit cc4c1cf9d5
Signed by: GrocerPublishAgent
GPG key ID: D460CD54A9E3AB86
4 changed files with 186 additions and 185 deletions

View file

@ -1,71 +1,73 @@
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
// TODO figure out why this code takes ~70ms to munge the javascript. By comparison just writing a
// bunch of chainged string.replace( , ).replace( , ).replace .... takes about 16ms.
let mut patterns = Vec::new();
let mut replace_with = Vec::new();
patterns.push("'REPLACE_ME_WITH_JSON_STRINGIFY'");
replace_with.push("JSON.stringify(x)");
patterns.push("$elm$json$Json$Decode$fail('REPLACE_ME_WITH_BYTES_DECODER');");
replace_with.push(r#"
_Json_decodePrim(function(value) {
return (typeof value === 'object' && value instanceof DataView)
? $elm$core$Result$Ok(value)
: _Json_expecting('a DataView', value);
});
"#);
patterns.push(";}(this));");
replace_with.push(";}(globalThis));");
// let mut final_script = data
if sqlite_path.is_some() {
patterns.push("var $author$project$Astrid$Query$execute = function (query) {\n\treturn $author$project$Astrid$Query$dummyExecute;\n};");
replace_with.push(include_str!("fixtures/sql-client-integration.js"));
patterns.push("var $author$project$Astrid$Query$fetch = F3(\n\tfunction (sql, parameters, decoder) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
replace_with.push("var $author$project$Astrid$Query$fetch = _Query_fetchAll;");
patterns.push("var $author$project$Astrid$Query$fetchOne = F3(\n\tfunction (sql, parameters, decoder) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
replace_with.push("var $author$project$Astrid$Query$fetchOne = _Query_fetchOne;");
patterns.push("var $author$project$Astrid$Query$map5 = F6(\n\tfunction (f, a, b, c, d, e) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
replace_with.push(r#"var $author$project$Astrid$Query$map5 = _Query_map5;"#);
patterns.push("var $author$project$Astrid$Query$map4 = F5(\n\tfunction (f, a, b, c, d) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
replace_with.push(r#"var $author$project$Astrid$Query$map4 = _Query_map4;"#);
patterns.push("var $author$project$Astrid$Query$map3 = F4(\n\tfunction (f, a, b, c) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
replace_with.push(r#"var $author$project$Astrid$Query$map3 = _Query_map3;"#);
patterns.push("var $author$project$Astrid$Query$map2 = F3(\n\tfunction (f, a, b) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
replace_with.push(r#"var $author$project$Astrid$Query$map2 = _Query_map2;"#);
patterns.push("var $author$project$Astrid$Query$map = F2(\n\tfunction (f, a) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
replace_with.push(r#"var $author$project$Astrid$Query$map = _Query_map1;"#);
patterns.push("var $author$project$Astrid$Query$andThen = F2(\n\tfunction (f, q) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
replace_with.push(r#"var $author$project$Astrid$Query$andThen = _Query_andThen;"#);
}
debug_assert!(patterns.len() == replace_with.len());
// let mut final_script = Vec::with_capacity(data.len() + 8 * 1024);
let span = info_span!("build aho-corasick patterns");
let timing_guard = span.enter();
//let ac = AhoCorasick::new(&patterns);
let ac = AhoCorasickBuilder::new()
.auto_configure(&patterns)
.build(&patterns);
drop(timing_guard);
let span = info_span!("run replacements");
let timing_guard = span.enter();
for _ in ac.find_iter(data.as_bytes()) {
}
drop(timing_guard);
let mut final_script = ac.replace_all_bytes(data.as_bytes(), &replace_with);
// bunch of chainged string.replace( , ).replace( , ).replace .... takes about (16ms debug) (2.5ms
// release).
// There has to be a faster way to do all of this.
// let mut patterns = Vec::new();
// let mut replace_with = Vec::new();
//
// patterns.push("'REPLACE_ME_WITH_JSON_STRINGIFY'");
// replace_with.push("JSON.stringify(x)");
//
// patterns.push("$elm$json$Json$Decode$fail('REPLACE_ME_WITH_BYTES_DECODER');");
// replace_with.push(r#"
// _Json_decodePrim(function(value) {
// return (typeof value === 'object' && value instanceof DataView)
// ? $elm$core$Result$Ok(value)
// : _Json_expecting('a DataView', value);
// });
// "#);
//
// patterns.push(";}(this));");
// replace_with.push(";}(globalThis));");
//
//
// // let mut final_script = data
//
// if sqlite_path.is_some() {
// patterns.push("var $author$project$Astrid$Query$execute = function (query) {\n\treturn $author$project$Astrid$Query$dummyExecute;\n};");
// replace_with.push(include_str!("fixtures/sql-client-integration.js"));
//
// patterns.push("var $author$project$Astrid$Query$fetch = F3(\n\tfunction (sql, parameters, decoder) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
// replace_with.push("var $author$project$Astrid$Query$fetch = _Query_fetchAll;");
//
// patterns.push("var $author$project$Astrid$Query$fetchOne = F3(\n\tfunction (sql, parameters, decoder) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
// replace_with.push("var $author$project$Astrid$Query$fetchOne = _Query_fetchOne;");
//
// patterns.push("var $author$project$Astrid$Query$map5 = F6(\n\tfunction (f, a, b, c, d, e) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
// replace_with.push(r#"var $author$project$Astrid$Query$map5 = _Query_map5;"#);
//
// patterns.push("var $author$project$Astrid$Query$map4 = F5(\n\tfunction (f, a, b, c, d) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
// replace_with.push(r#"var $author$project$Astrid$Query$map4 = _Query_map4;"#);
//
// patterns.push("var $author$project$Astrid$Query$map3 = F4(\n\tfunction (f, a, b, c) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
// replace_with.push(r#"var $author$project$Astrid$Query$map3 = _Query_map3;"#);
//
// patterns.push("var $author$project$Astrid$Query$map2 = F3(\n\tfunction (f, a, b) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
// replace_with.push(r#"var $author$project$Astrid$Query$map2 = _Query_map2;"#);
//
// patterns.push("var $author$project$Astrid$Query$map = F2(\n\tfunction (f, a) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
// replace_with.push(r#"var $author$project$Astrid$Query$map = _Query_map1;"#);
//
// patterns.push("var $author$project$Astrid$Query$andThen = F2(\n\tfunction (f, q) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});");
// replace_with.push(r#"var $author$project$Astrid$Query$andThen = _Query_andThen;"#);
// }
// debug_assert!(patterns.len() == replace_with.len());
//
// // let mut final_script = Vec::with_capacity(data.len() + 8 * 1024);
//
// let span = info_span!("build aho-corasick patterns");
// let timing_guard = span.enter();
// //let ac = AhoCorasick::new(&patterns);
// let ac = AhoCorasickBuilder::new()
// .auto_configure(&patterns)
// .build(&patterns);
// drop(timing_guard);
// let span = info_span!("run replacements");
// let timing_guard = span.enter();
// for _ in ac.find_iter(data.as_bytes()) {
//
// }
// drop(timing_guard);
// let mut final_script = ac.replace_all_bytes(data.as_bytes(), &replace_with);

View file

@ -42,7 +42,7 @@ errorToString error =
"Failure `" ++ message ++ "`"
NotFound sql ->
"NotFound `" ++ sql ++ "`"
"No rows returned by a query that expected to return at least one row. `" ++ sql ++ "`"
execute : Query a -> Result Error a
execute query =

View file

@ -2,9 +2,10 @@ use crate::exec::fixtures::astrid_pages::ScriptError;
use crate::exec::{fixtures, runtime};
use crate::reporting::{CompilerError, InterpreterError, Problem, TypeError};
use deno_core::futures::StreamExt;
use elm_project_utils::{setup_generator_project, ElmResult};
use elm_project_utils::{setup_generator_project, ElmPostProcessor, ElmResult};
use os_pipe::dup_stderr;
use rusty_v8 as v8;
use serde::{Deserialize, Serialize};
use sqlx::sqlite::SqlitePool;
use sqlx::Row;
use std::cell::RefCell;
@ -124,103 +125,87 @@ pub(crate) fn run(
// for the replacement code.
let span = info_span!("munge fixture javascript");
let timing_guard = span.enter();
let final_script = (|| {
let mut final_script = data
.replace("'REPLACE_ME_WITH_JSON_STRINGIFY'", "JSON.stringify(x)")
.replace(
"$elm$json$Json$Decode$fail('REPLACE_ME_WITH_BYTES_DECODER');",
r#" _Json_decodePrim(function(value) {
return (typeof value === 'object' && value instanceof DataView)
? $elm$core$Result$Ok(value)
: _Json_expecting('a DataView', value);
});"#,
let mut munger = ElmPostProcessor::new();
let mut buffer = String::with_capacity(data.len());
munger.redefine(
format!("$author$project${}$asJsonString", gen_module_name),
"function (x) {\n\treturn JSON.stringify(x);\n};",
);
if sqlite_path.is_some() {
munger
.redefine(
"$author$project$Astrid$Query$execute",
include_str!("fixtures/sql-client-integration.js"),
)
.replace(";}(this));", ";}(globalThis));");
.redefine("$author$project$Astrid$Query$fetch", " _Query_fetchAll;")
.redefine("$author$project$Astrid$Query$fetchOne", " _Query_fetchOne;")
.redefine("$author$project$Astrid$Query$map5", " _Query_map5;")
.redefine("$author$project$Astrid$Query$map4", " _Query_map4;")
.redefine("$author$project$Astrid$Query$map3", " _Query_map3;")
.redefine("$author$project$Astrid$Query$map2", " _Query_map2;")
.redefine("$author$project$Astrid$Query$map", " _Query_map1;")
.redefine("$author$project$Astrid$Query$andThen", " _Query_andThen;");
}
munger.run(&data, &mut buffer);
if sqlite_path.is_some() {
final_script = final_script
.replace(
"var $author$project$Astrid$Query$execute = function (query) {\n\treturn $author$project$Astrid$Query$dummyExecute;\n};",
include_str!("fixtures/sql-client-integration.js"),
)
.replace(
"var $author$project$Astrid$Query$fetch = F3(\n\tfunction (sql, parameters, decoder) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});",
"var $author$project$Astrid$Query$fetch = _Query_fetchAll;",
)
.replace(
"var $author$project$Astrid$Query$fetchOne = F3(\n\tfunction (sql, parameters, decoder) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});",
"var $author$project$Astrid$Query$fetchOne = _Query_fetchOne;",
)
.replace(
"var $author$project$Astrid$Query$map5 = F6(\n\tfunction (f, a, b, c, d, e) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});",
r#"var $author$project$Astrid$Query$map5 = _Query_map5;"#,
)
.replace(
"var $author$project$Astrid$Query$map4 = F5(\n\tfunction (f, a, b, c, d) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});",
r#"var $author$project$Astrid$Query$map4 = _Query_map4;"#,
)
.replace(
"var $author$project$Astrid$Query$map3 = F4(\n\tfunction (f, a, b, c) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});",
r#"var $author$project$Astrid$Query$map3 = _Query_map3;"#,
)
.replace(
"var $author$project$Astrid$Query$map2 = F3(\n\tfunction (f, a, b) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});",
r#"var $author$project$Astrid$Query$map2 = _Query_map2;"#,
)
.replace(
"var $author$project$Astrid$Query$map = F2(\n\tfunction (f, a) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});",
r#"var $author$project$Astrid$Query$map = _Query_map1;"#,
)
.replace(
"var $author$project$Astrid$Query$andThen = F2(\n\tfunction (f, q) {\n\t\treturn $author$project$Astrid$Query$Dummy;\n\t});",
r#"var $author$project$Astrid$Query$andThen = _Query_andThen;"#,
);
final_script.push_str("\n\n");
// Mutate the buffer in place for the final fixup
let pattern = ";}(this));";
match buffer.get((buffer.len() - pattern.len())..) {
Some(end) if end == pattern => {
buffer.truncate(buffer.len() - pattern.len());
buffer.push_str(";}(globalThis));");
}
_ => (),
}
// I think that when I set this script to be the main module, I am skipping the
// deno/runtime/js/99_main.js script that sets up a bunch of global variables. If I
// manually add the timer related code below then setTimeout works again.
// NB. there are 706 lines of setup code that add a bunch of apis to the global window
// scope. Figure out if I need to include all of them. For example, starmelon does not need
// to perform http calls right now, but I eventually want to.
buffer.push_str(
r#"
const { setTimeout } = globalThis.__bootstrap.timers;
Deno.core.setMacrotaskCallback(globalThis.__bootstrap.timers.handleTimerMacrotask);
globalThis.setTimeout = setTimeout;
"#,
);
final_script.push_str("\n\n");
// I think that when I set this script to be the main module, I am skipping the
// deno/runtime/js/99_main.js script that sets up a bunch of global variables. If I
// manually add the timer related code below then setTimeout works again.
// NB. there are 706 lines of setup code that add a bunch of apis to the global window
// scope. Figure out if I need to include all of them. For example, starmelon does not need
// to perform http calls right now, but I eventually want to.
final_script.push_str("const { setTimeout } = globalThis.__bootstrap.timers;\n");
final_script.push_str(
"Deno.core.setMacrotaskCallback(globalThis.__bootstrap.timers.handleTimerMacrotask);\n",
);
final_script.push_str("globalThis.setTimeout = setTimeout;\n");
buffer.push_str(&format!(
"var worker = Elm.{}.init({{flags: {{ stagename: \"Atago\"}} }});\n",
&gen_module_name
));
// add a shortcut for invoking the function so I don't have to traverse so many object
// lookups using the rust v8 API.
buffer.push_str(
r#"
globalThis.runOnInput = function(route) { worker.ports.onRequest.send(route) };
final_script.push_str(&format!(
"var worker = Elm.{}.init({{flags: {{ stagename: \"Atago\"}} }});\n",
&gen_module_name
));
// add a shortcut for invoking the function so I don't have to traverse so many object
// lookups using the rust v8 API.
final_script.push_str(
"globalThis.runOnInput = function(route) { worker.ports.onRequest.send(route) };\n",
);
final_script.push_str(
r#"
worker.ports.onStringOutput.subscribe(function(result) {
Deno.core.opSync('op_starmelon_string_output', result);
worker.ports.onStringOutput.subscribe(function(result) {
Deno.core.opSync('op_starmelon_string_output', result);
});
// Elm will send a DataView
if (worker.ports.onBytesOutput) {
worker.ports.onBytesOutput.subscribe(function(result){
if (result.$ === "Ok") {
const ui8 = new Uint8Array(result.a.buffer);
output.a = ui8;
}
Deno.core.opSync('op_starmelon_bytes_output', result)
});
// Elm will send a DataView
if (worker.ports.onBytesOutput) {
worker.ports.onBytesOutput.subscribe(function(result){
if (result.$ === "Ok") {
const ui8 = new Uint8Array(result.a.buffer);
output.a = ui8;
}
Deno.core.opSync('op_starmelon_bytes_output', result)
});
}"#,
);
}"#,
);
final_script
})();
drop(timing_guard);
let mut buffer_file = generator_dir.join("bin").join(&gen_module_name);
buffer_file.set_extension("js");
let span = info_span!("file writes");
let timing_guard = span.enter();
std::fs::write(&buffer_file, buffer)
.map_err(|io_err| CompilerError::WriteOutputFailed(io_err, buffer_file.clone()))?;
drop(timing_guard);
let desired_route = entrypoint.0.module.clone().to_string();
@ -257,14 +242,6 @@ pub(crate) fn run(
Ok(())
};
let mut final_file = generator_dir.join("bin").join(&gen_module_name);
final_file.set_extension("js");
let span = info_span!("file writes");
let timing_guard = span.enter();
std::fs::write(&final_file, final_script)
.map_err(|io_err| CompilerError::WriteOutputFailed(io_err, final_file.clone()))?;
drop(timing_guard);
// Create a tokio runtime before registering ops so we can block on futures inside sync ops
let span = info_span!("create tokio runtime");
let timing_guard = span.enter();
@ -281,7 +258,7 @@ pub(crate) fn run(
let span = info_span!("create v8 isolate");
let timing_guard = span.enter();
let (mut worker, main_module) = runtime::setup_worker(&final_file.to_string_lossy())
let (mut worker, main_module) = runtime::setup_worker(&buffer_file.to_string_lossy())
.map_err(|err| InterpreterError::EventLoop(err))?;
drop(timing_guard);
@ -354,7 +331,7 @@ pub(crate) fn run(
let _start = Instant::now();
let db_pool = db_pool_clone;
let mut result: Vec<Vec<String>> = vec![];
let mut failure: Option<String> = None;
let mut failure: Option<AstridQueryError> = None;
for (fetch_all, sql, _args) in queries {
let mut acc = Vec::new();
if fetch_all {
@ -371,7 +348,10 @@ pub(crate) fn run(
}
Some(Err(err)) => {
eprintln!("got fetch_all sql error {:?}", err);
failure = Some(err.to_string());
failure = Some(AstridQueryError::Execute {
sql: sql.clone(),
message: err.to_string(),
});
break;
}
}
@ -384,9 +364,15 @@ pub(crate) fn run(
.and_then(|row| row.try_get::<String, _>(0))
{
Ok(s) => result.push(vec![s]),
Err(sqlx::Error::RowNotFound) => {
failure = Some(AstridQueryError::NotFound { sql });
}
Err(err) => {
eprintln!("got fetchOne sql error {:?}", err);
failure = Some(err.to_string());
failure = Some(AstridQueryError::Execute {
sql,
message: err.to_string(),
});
}
}
}
@ -418,8 +404,7 @@ pub(crate) fn run(
deno_core::op_sync(
move |_state, queries: Vec<(bool, String, Vec<String>)>, _: ()| {
let worker_mailbox = worker_mailbox_clone.clone();
let (sender, receiver) =
oneshot::channel::<ElmResult<Vec<Vec<String>>, String>>();
let (sender, receiver) = oneshot::channel::<ElmResult<Vec<Vec<String>>, _>>();
let span = info_span!("run sql");
let timing_guard = span.enter();
@ -480,3 +465,18 @@ pub(crate) fn run(
Ok(())
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "$")]
enum AstridQueryError {
Execute {
#[serde(rename = "a")]
sql: String,
#[serde(rename = "b")]
message: String,
},
NotFound {
#[serde(rename = "a")]
sql: String,
},
}

View file

@ -1,3 +1,7 @@
function (query) {
return $author$project$Astrid$Query$dummyExecute;
};
// CORE QUERIES
function __Debug_print(object) {
@ -141,12 +145,15 @@ function _Query_runDecoder(decoder, sql, xs)
case 1:
if (xs.length === 0) {
__Debug_assert("did not find any results");
return $elm$core$Result$Err($author$project$Astrid$Query$NotFound(sql));
}
var result = _Json_runOnString.f(decoder.a, xs[0]);
if (!$elm$core$Result$isOk(result))
{
__Debug_assert("did not find any results for fetch One");
__Debug_assert(sql);
return $elm$core$Result$Err(
A3(
$author$project$Astrid$Query$Decode,
@ -167,12 +174,7 @@ function _Query_runDecoder(decoder, sql, xs)
try
{
var value = JSON.parse(string);
__Debug_print("parsed the json");
__Debug_print(value);
__Debug_print(decoder);
var result = _Json_runHelp(decoder.a, value);
__Debug_print("result of parsing the json");
__Debug_print(result);
if (!$elm$core$Result$isOk(result))
{
return $elm$core$Result$Err(
@ -260,7 +262,6 @@ var _Query_execute = function(query)
break;
}
}
__Debug_print("-----------------------");
if (statements.length > 0) {
var queryResult = Deno.core.opSync(
@ -273,7 +274,7 @@ var _Query_execute = function(query)
return queryResult
}
var results = queryResult.a;
var len = results.length;
for (var i = 0; i < len; i++) {
var { decoder, slot } = decoders[i];
@ -288,8 +289,6 @@ var _Query_execute = function(query)
decoders.length = 0;
}
__Debug_print({"processing callbacks stack": callbacks});
reduce:
while(callbacks.length > 0) {
var last = callbacks[callbacks.length - 1];
@ -329,12 +328,12 @@ var _Query_execute = function(query)
__Debug_assert(maybeValue.$ === 'Just');
queries.push({slot: slot, query: fun(maybeValue.a)})
}
}
}
if (queries.length == 0 && callbacks.length == 0) {
__Debug_assert(values.length === 1);
__Debug_assert(values[0].$ === 'Just');
return $elm$core$Result$Ok(values.pop().a)
}
}