From f74f5f85dd942a70b86c956a0b358a28bec52c19 Mon Sep 17 00:00:00 2001 From: Doehyun Baek Date: Tue, 13 Feb 2024 17:36:46 +0900 Subject: [PATCH] wip --- crates/replay_gen/src/jsgen.rs | 326 ------------------------------- crates/replay_gen/src/lib.rs | 1 - crates/replay_gen/src/main.rs | 13 +- crates/replay_gen/src/wasmgen.rs | 275 +++++++++++++++++++++++--- src/benchmark.cts | 1 + tests/online/gotemplate/test.js | 2 +- tests/run-tests.cts | 6 +- 7 files changed, 262 insertions(+), 362 deletions(-) delete mode 100644 crates/replay_gen/src/jsgen.rs diff --git a/crates/replay_gen/src/jsgen.rs b/crates/replay_gen/src/jsgen.rs deleted file mode 100644 index 77907642..00000000 --- a/crates/replay_gen/src/jsgen.rs +++ /dev/null @@ -1,326 +0,0 @@ -use std::io::Write; -use std::{fs::File, path::Path}; - -use crate::irgen::{HostEvent, Import, INIT_INDEX}; -use crate::trace::ValType; -use crate::{ - irgen::{Context, Replay, WriteResult}, - trace::F64, - write, -}; - -pub fn generate_replay_javascript(out_path: &Path, code: &Replay) -> std::io::Result<()> { - let mut file = File::create(&out_path)?; - let stream = &mut file; - // write(stream, "import fs from 'fs'\n")?; - // write(stream, "import path from 'path'\n")?; - write(stream, "let instance\n")?; - write(stream, "let imports = {}\n")?; - - // Init modules - for module in &code.imported_modules() { - write(stream, &format!("{}\n", write_module(module)))?; - } - // Init memories - for (_i, mem) in &code.imported_mems() { - let Import { module, name } = mem.import.clone().unwrap(); - write( - stream, - &format!( - "const {name} = new WebAssembly.Memory({{ initial: {}, maximum: {} }})\n", - mem.initial, - match mem.maximum { - Some(max) => max.to_string(), - None => "undefined".to_string(), - } - ), - )?; - write(stream, &format!("{}{name}\n", write_import(&module, &name)))?; - } - // Init globals - for (_i, global) in &code.imported_globals() { - let Import { module, name } = global.import.clone().unwrap(); - if global.initial.0.is_nan() || !global.initial.0.is_finite() { - if name.to_lowercase() == "infinity" { - write( - stream, - &format!("{}Infinity\n", write_import(&module, &name)), - )?; - } else if name.to_lowercase() == "nan" { - write(stream, &format!("{}NaN\n", write_import(&module, &name)))?; - } else { - panic!("Could not generate javascript code for the global initialisation, the initial value is NaN. The website you where recording did some weired stuff that I was not considering during the implementation of Wasm-R3. Tried to genereate global:"); - } - } else { - write( - stream, - &format!( - "const {name} = new WebAssembly.Global({{ value: '{}', mutable: {}}}, {})\n", - global.valtype, global.mutable, global.initial - ), - )?; - write(stream, &format!("{}{name}\n", write_import(&module, &name)))?; - } - } - // Init tables - for (_i, table) in &code.imported_tables() { - let Import { module, name } = table.import.clone().unwrap(); - write( - stream, - &format!( - "const {name} = new WebAssembly.Table({{ initial: {}, maximum: {}, element: '{}'}})\n", - table.initial, - match table.maximum { - Some(max) => max.to_string(), - None => "undefined".to_string(), - }, - table.reftype - ), - )?; - write( - stream, - &format!("{}{name}\n", write_import(&module, &name),), - )?; - } - // Imported functions - for (funcidx, func) in &code.imported_funcs() { - let Import { module, name } = func.import.clone().unwrap(); - // TODO: better handling of initialization - if *funcidx == INIT_INDEX { - continue; - } - write(stream, &format!("let {} = 0\n", write_func_global(funcidx)))?; - write( - stream, - &format!("{}() => {{\n", write_import(&module, &name)), - )?; - if !func.bodys.is_empty() { - write( - stream, - &format!("switch ({}) {{\n", write_func_global(funcidx)), - )?; - for (i, body) in func.bodys.iter().enumerate() { - if let Some(body) = body { - write_body(stream, body, i)? - } - } - write(stream, "}\n")?; - } - write(stream, &format!("{}++\n", write_func_global(funcidx)))?; - write_results(stream, &func.results, &write_func_global(funcidx))?; - write(stream, "}\n")?; - } - write(stream, "export function replay(wasm) {")?; - write(stream, "instance = wasm.instance\n")?; - let initialization = code.funcs.get(&INIT_INDEX).unwrap().bodys.last().unwrap(); - if let Some(initialization) = initialization { - for event in initialization { - let str = hostevent_to_js(&event); - writeln!(stream, "{}", str)?; - } - } - write(stream, "}\n")?; - - write(stream, "export function instantiate(wasmBinary) {\n")?; - write( - stream, - "return WebAssembly.instantiate(wasmBinary, imports)\n", - )?; - write(stream, "}\n")?; - write(stream, "let firstArg\n")?; - write(stream, "if (typeof Deno === 'undefined'){firstArg=process.argv[2]}else{firstArg=Deno.args[0]}\n")?; - write(stream, "if (firstArg === 'run') {\n")?; - write(stream, "let nodeModules;\n")?; - write(stream, "if (typeof Deno === 'undefined') { nodeModules = Promise.all([import('path'),import('fs')])}else{nodeModules=Promise.all([import('node:path'),import('node:fs')])}\n")?; - write(stream, "nodeModules.then(([path,fs])=>{")?; - write( - stream, - "const p = path.join(path.dirname(import.meta.url).replace(/^file:/, ''), 'index.wasm')\n", - )?; - write(stream, "const wasmBinary = fs.readFileSync(p)\n")?; - write( - stream, - "instantiate(wasmBinary).then((wasm) => replay(wasm))\n", - )?; - write(stream, "})}\n")?; - Ok(()) -} - -fn write_module(module: &str) -> String { - format!("imports['{}'] = {{}}", module) -} - -fn write_func_global(funcidx: &usize) -> String { - format!("global_{}", funcidx.to_string()) -} - -fn write_import(module: &str, name: &str) -> String { - format!("imports['{}']['{}'] = ", module, name) -} - -fn write_results( - stream: &mut File, - results: &[WriteResult], - func_global: &str, -) -> std::io::Result<()> { - let mut current = 0; - for r in results { - let new_c = current + r.reps; - writeln!( - stream, - "if (({} >= {}) && {} < {}) {{", - func_global, - current + 1, - func_global, - new_c + 1 - )?; - let res = match r.results.get(0) { - Some(r) => r.to_string(), - None => "undefined".to_string(), - }; - writeln!(stream, "return {} }}", res)?; - current = new_c; - } - Ok(()) -} - -fn write_body(stream: &mut File, b: &Context, i: usize) -> std::io::Result<()> { - if !b.is_empty() { - writeln!(stream, "case {}:", i)?; - for event in b { - let str = hostevent_to_js(event); - writeln!(stream, "{}", str)?; - } - writeln!(stream, "break")?; - } - Ok(()) -} - -fn hostevent_to_js(event: &HostEvent) -> String { - fn write_params_string(params: &[F64]) -> String { - params - .iter() - .map(|p| p.to_string()) - .collect::>() - .join(",") - } - let str = match event { - HostEvent::ExportCall { - idx: _, - name, - params, - } => { - format!( - "instance.exports.{}({})\n", - name, - write_params_string(¶ms) - ) - } - HostEvent::ExportCallTable { - idx: _, - table_name, - funcidx, - params, - } => { - format!( - "instance.exports.{}.get({})({})\n", - table_name, - funcidx, - write_params_string(¶ms) - ) - } - HostEvent::MutateMemory { - addr, - data, - import, - name, - } => { - let mut js_string = String::new(); - for (j, byte) in data.iter().enumerate() { - if *import { - js_string += &format!( - "new Uint8Array({}.buffer)[{}] = {}\n", - name, - addr + j as i32, - byte - ); - } else { - js_string += &format!( - "new Uint8Array(instance.exports.{}.buffer)[{}] = {}\n", - name, - addr + j as i32, - byte - ); - } - } - js_string - } - HostEvent::GrowMemory { - amount, - import, - name, - } => { - if *import { - format!("{}.grow({})\n", name, amount) - } else { - format!("instance.exports.{}.grow({})\n", name, amount) - } - } - HostEvent::MutateTable { - tableidx: _, - funcidx: _, - idx, - func_import, - func_name, - import, - name, - } => { - let mut js_string = if *import { - format!("{}.set({}, ", name, idx) - } else { - format!("instance.exports.{}.set({}, ", name, idx) - }; - if *func_import { - js_string.push_str(&func_name); - } else { - js_string.push_str(&format!("instance.exports.{}", func_name)); - } - js_string.push_str(")\n"); - js_string - } - HostEvent::GrowTable { - idx: _, - amount, - import, - name, - } => { - if *import { - format!("{}.grow({})\n", name, amount) - } else { - format!("instance.exports.{}.grow({})\n", name, amount) - } - } - HostEvent::MutateGlobal { - idx: _, - value, - valtype, - import, - name, - } => { - if *import { - format!("{}.value = {}\n", name, value) - } else { - format!( - "instance.exports.{}.value = {}\n", - name, - if *valtype == ValType::I64 { - format!("BigInt({})", value) - } else { - value.to_string() - } - ) - } - } - }; - str -} diff --git a/crates/replay_gen/src/lib.rs b/crates/replay_gen/src/lib.rs index 5b98c8c0..2e8a0bb2 100644 --- a/crates/replay_gen/src/lib.rs +++ b/crates/replay_gen/src/lib.rs @@ -1,7 +1,6 @@ use std::{fs::File, io::Write}; pub mod irgen; -pub mod jsgen; pub mod opt; pub mod trace; pub mod wasmgen; diff --git a/crates/replay_gen/src/main.rs b/crates/replay_gen/src/main.rs index 984a6edd..df6889f1 100644 --- a/crates/replay_gen/src/main.rs +++ b/crates/replay_gen/src/main.rs @@ -5,10 +5,9 @@ use std::path::Path; use std::{env, fs}; use replay_gen::irgen::IRGenerator; -use replay_gen::jsgen::generate_replay_javascript; use replay_gen::opt::{discard_empty_body, merge_fn_results, split_big_body}; use replay_gen::trace; -use replay_gen::wasmgen::generate_replay_wasm; +use replay_gen::wasmgen::generate_replay; use walrus::Module; fn main() -> io::Result<()> { @@ -46,16 +45,10 @@ fn main() -> io::Result<()> { // opt phase merge_fn_results(&mut generator.replay); discard_empty_body(&mut generator.replay); + split_big_body(&mut generator.replay); // codegen phase - let is_standalone = replay_path.is_none(); - let is_replay_wasm = !is_standalone && replay_path.unwrap().extension().unwrap() == "wasm"; - if is_replay_wasm { - split_big_body(&mut generator.replay); // works only for wasm - generate_replay_wasm(replay_path.unwrap(), &generator.replay)?; - } else { - generate_replay_javascript(replay_path.unwrap(), &generator.replay)?; - } + generate_replay(replay_path.unwrap(), &generator.replay)?; Ok(()) } diff --git a/crates/replay_gen/src/wasmgen.rs b/crates/replay_gen/src/wasmgen.rs index 0e9c4c7f..23302b10 100644 --- a/crates/replay_gen/src/wasmgen.rs +++ b/crates/replay_gen/src/wasmgen.rs @@ -1,14 +1,16 @@ -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::io::Write; use std::process::Command; -use std::vec; +use std::{fs, vec}; use std::{fs::File, path::Path}; +use walrus::{Import, Module}; + use crate::irgen::{FunctionTy, HostEvent, INIT_INDEX}; use crate::trace::{ValType, F64}; use crate::{irgen::Replay, write}; -pub fn generate_replay_wasm(replay_path: &Path, code: &Replay) -> std::io::Result<()> { +pub fn generate_replay(replay_path: &Path, code: &Replay) -> std::io::Result<()> { let mut module_set: HashSet<&String> = code .module .imports @@ -98,29 +100,28 @@ pub fn generate_replay_wasm(replay_path: &Path, code: &Replay) -> std::io::Resul Some(max) => max.to_string(), None => "".to_string(), }; - if module == *current_module { - write( - stream, - &format!("(memory (export \"{name}\") {initial} {maximum})\n",), - )?; - } else { - write( - stream, - &format!("(import \"{module}\" \"{name}\" (memory {initial} {maximum}))\n",), - )?; - } + + write( + stream, + &format!("(import \"{module}\" \"{name}\" (memory {initial} {maximum}))\n",), + )?; } for (_i, global) in &code.imported_globals() { let import = global.import.clone().unwrap(); if import.module != *current_module { continue; } + + let module = import.module.clone(); let name = import.name.clone(); let valtype = global.valtype.clone(); - let initial = global.initial; + let typedecl = match global.mutable { + true => format!("(mut {valtype})"), + false => format!("{valtype}"), + }; write( stream, - &format!("(global (export \"{name}\") {valtype} ({valtype}.const {initial:?}))\n",), + &format!("(import \"{module}\" \"{name}\" (global ${name} {typedecl}))\n",), )?; } // tables @@ -129,6 +130,7 @@ pub fn generate_replay_wasm(replay_path: &Path, code: &Replay) -> std::io::Resul if import.module != *current_module { continue; } + let module = import.module.clone(); let name = import.name.clone(); let initial = table.initial; let maximum = match table.maximum { @@ -138,7 +140,9 @@ pub fn generate_replay_wasm(replay_path: &Path, code: &Replay) -> std::io::Resul let reftype = table.reftype.clone(); write( stream, - &format!("(table (export \"{name}\") {initial} {maximum} {reftype:?})\n",), + &format!( + "(import \"{module}\" \"{name}\" (table {initial} {maximum} {reftype:?}))\n", + ), )?; } // functions @@ -281,6 +285,146 @@ pub fn generate_replay_wasm(replay_path: &Path, code: &Replay) -> std::io::Resul let mut modle_wasm_file = File::create(&module_wasm_path).unwrap(); modle_wasm_file.write_all(&binary).unwrap(); } + + generate_replay_js(replay_path, &module_set, code)?; + generate_replay_wasm(replay_path, &module_set)?; + + Ok(()) +} + +fn generate_replay_js( + replay_path: &Path, + module_set: &HashSet<&String>, + code: &Replay, +) -> Result<(), std::io::Error> { + let replay_js_path = replay_path.parent().unwrap().join(&format!("replay.js")); + let stream = &mut File::create(replay_js_path).unwrap(); + + for (_i, memory) in &code.imported_mems() { + let import = memory.import.clone().unwrap(); + let module = import.module.clone(); + let name = import.name.clone(); + let module_name = &format!("{module}_{name}"); + let initial = memory.initial; + let maximum = match memory.maximum { + Some(max) => max.to_string(), + None => "undefined".to_string(), + }; + + write( + stream, + &format!( + "const {module_name} = new WebAssembly.Memory({{ initial: {initial}, maximum: {maximum} }})\n" + ), + )?; + } + for (_i, global) in &code.imported_globals() { + let import = global.import.clone().unwrap(); + let module = import.module.clone(); + let name = import.name.clone(); + let module_name = &format!("{module}_{name}"); + let valtype = global.valtype.clone(); + let mutable = global.mutable; + let initial = global.initial; + write( + stream, + &format!( + "const {module_name} = new WebAssembly.Global({{ value: '{valtype}', mutable: {mutable}}}, {initial})\n" + ), + )?; + } + for (_i, table) in &code.imported_tables() { + let import = table.import.clone().unwrap(); + let module = import.module.clone(); + let name = import.name.clone(); + let module_name = &format!("{module}_{name}"); + let initial = table.initial; + let maximum = match table.maximum { + Some(max) => max.to_string(), + None => "undefined".to_string(), + }; + let reftype = table.reftype.clone(); + write( + stream, + &format!( + "const {module_name} = new WebAssembly.Table({{ initial: {initial}, maximum: {maximum}, element: '{reftype}'}})\n",) + )?; + } + write(stream, "\n")?; + let var_name = "index".to_string(); + let iterable = std::iter::once(&var_name).chain(module_set.clone().into_iter()); + for current_module in iterable { + let wasm_path = replay_path + .parent() + .unwrap() + .join(&format!("{current_module}.wasm")); + let buffer = &fs::read(wasm_path).unwrap(); + let walrus_module = Module::from_buffer(buffer).unwrap(); + let module_set = walrus_module + .imports + .iter() + .map(|import| &import.module) + .collect::>(); + let module_escaped = current_module.replace(|c: char| !c.is_alphanumeric(), "_"); + let object = format!("{module_escaped}Import"); + let mut import_object_str = format!("const {object} = {{}}\n"); + for module in module_set { + import_object_str += &format!("{object}['{module}'] = {{}}\n"); + } + for import in walrus_module.imports.iter() { + let module = &import.module; + let module_escaped = module.replace(|c: char| !c.is_alphanumeric(), "_"); + let name = &import.name; + let module_name = &format!("{module_escaped}_{name}"); + let value = match import.kind { + walrus::ImportKind::Function(_) => { + format!("(...args) => {{ return {module_escaped}.exports['{name}'](...args) }}") + } + _ => { + if module == "index" { + format!("index.exports.{name}") + } else { + format!("{module_name}") + } + } + }; + import_object_str += &format!("{object}['{module}']['{name}'] = {value}\n",); + } + + write( + stream, + &format!( + "{import_object_str} +const {module_escaped} = new WebAssembly.Instance(new WebAssembly.Module(await readFile(\"{current_module}.wasm\")), {module_escaped}Import)\n\n", + ), + )?; + } + + write( + stream, + "main.exports.main(); +async function readFile(filename) { +let data; +if (typeof Deno !== 'undefined') { + data = await Deno.readFile(filename); +} else if (typeof process !== 'undefined') { + const fs = await import('fs').then(module => module.promises); + data = await fs.readFile(filename); +} else if (typeof Bun !== 'undefined') { + data = await Bun.fs.readFile(filename, 'utf8'); +} else { + throw new Error('Not suppported'); +} +return data; +} ", + )?; + Ok(()) +} + +fn generate_replay_wasm( + replay_path: &Path, + module_set: &HashSet<&String>, +) -> Result<(), std::io::Error> { let module_args = module_set .iter() .map(|module| vec![format!("{}.wasm", module), module.to_string()]) @@ -298,12 +442,101 @@ pub fn generate_replay_wasm(replay_path: &Path, code: &Replay) -> std::io::Resul .iter() .cloned() .chain(module_args.iter().map(|s| s.as_str())) - .chain(["-o", "merged.wasm"]); + .chain(["-o", "merged_1.wasm"]); let _output = Command::new("wasm-merge") .current_dir(replay_path.parent().unwrap()) .args(args) .output() .expect("Failed to execute wasm-merge"); + + let wasm_path = replay_path + .parent() + .unwrap() + .join(&format!("merged_1.wasm")); + let buffer = &fs::read(wasm_path).unwrap(); + let walrus_module = Module::from_buffer(buffer).unwrap(); + let mut module_map: HashMap> = HashMap::new(); + for import in walrus_module.imports.iter() { + let import_vec = module_map + .entry(import.module.clone()) + .or_insert_with(Vec::new); + if !import_vec.iter().any(|i| i.name == import.name) { + import_vec.push(import); + } + } + for (module, import_list) in &module_map { + let module_wasm_path = replay_path + .parent() + .unwrap() + .join(&format!("{module}_merge.wasm")); + let stream = &mut File::create(&module_wasm_path).unwrap(); + + let export_list = + import_list + .iter() + .map(|import| { + let name = &import.name; + match &import.kind { + walrus::ImportKind::Memory(mid) => { + let memory = walrus_module.memories.get(*mid); + let initial = memory.initial; + let maximum = match memory.maximum { + Some(max) => max.to_string(), + None => "".to_string(), + }; + format!("(memory (export \"{name}\") {initial} {maximum})\n",) + } + walrus::ImportKind::Global(gid) => { + let global = walrus_module.globals.get(*gid); + let valtype = global.ty; + // TODO: this might be wrong + let initial = 0; + format!("(global (export \"{name}\") {valtype} ({valtype}.const {initial:?}))\n",) + } + walrus::ImportKind::Table(tid) => { + let table = walrus_module.tables.get(*tid); + let initial = table.initial; + let maximum = match table.maximum { + Some(max) => max.to_string(), + None => "".to_string(), + }; + let reftype = table.element_ty; + format!("(table (export \"{name}\") {initial} {maximum} {reftype:?})\n",) + }, + walrus::ImportKind::Function(_) => { + unreachable!("it never imports function here") + } + } + }) + .collect::>() + .join("\n"); + write!(stream, "(module {export_list})")?; + } + + let module_args = module_map + .iter() + .map(|(module, _)| vec![format!("{}_merge.wasm", module), module.to_string()]) + .flatten() + .collect::>(); + let args = [ + "--rename-export-conflicts", + "--enable-reference-types", + "--enable-multimemory", + "--enable-bulk-memory", + "--debuginfo", + "merged_1.wasm", + "index", + ] + .iter() + .cloned() + .chain(module_args.iter().map(|s| s.as_str())) + .chain(["-o", "merged_2.wasm"]); + let _output = Command::new("wasm-merge") + .current_dir(replay_path.parent().unwrap()) + .args(args) + .output() + .expect("Failed to execute wasm-merge"); + let _output = Command::new("wasm-opt") .current_dir(replay_path.parent().unwrap()) .args([ @@ -313,13 +546,11 @@ pub fn generate_replay_wasm(replay_path: &Path, code: &Replay) -> std::io::Resul "--debuginfo", // for handling inlining of imported globals. Without this glob-merge node test will fail. "--simplify-globals", - "merged.wasm", + "merged_2.wasm", "-o", replay_path.to_str().unwrap(), ]) - .output() - .expect("Failed to execute wasm-opt"); - + .output()?; Ok(()) } diff --git a/src/benchmark.cts b/src/benchmark.cts index 63509872..a195a677 100644 --- a/src/benchmark.cts +++ b/src/benchmark.cts @@ -41,6 +41,7 @@ export default class Benchmark { execSync(`./crates/target/debug/replay_gen ${diskSave} ${path.join(binPath, 'index.wasm')} ${path.join(binPath, 'replay.js')}`); } else { execSync(`./crates/target/debug/replay_gen ${diskSave} ${path.join(binPath, 'index.wasm')} ${path.join(binPath, 'replay.wasm')}`); + execSync(`node ${path.join(binPath, "replay.js")}`, { cwd: binPath }) execSync(`wasm-tools validate -f all ${path.join(binPath, "replay.wasm")}`) } p_measureCodeGen() diff --git a/tests/online/gotemplate/test.js b/tests/online/gotemplate/test.js index d2d54c81..c32dffd9 100644 --- a/tests/online/gotemplate/test.js +++ b/tests/online/gotemplate/test.js @@ -3,7 +3,7 @@ import { expect } from 'playwright/test' export default async function test(analyser) { const url = 'https://gotemplate.io/' - const page = await analyser.start(url, { headless: false }) + const page = await analyser.start(url, { headless: true }) const templateText = page.locator('#input-tmpl') await templateText.waitFor({state: 'visible'}) diff --git a/tests/run-tests.cts b/tests/run-tests.cts index 6698e471..75aa0050 100644 --- a/tests/run-tests.cts +++ b/tests/run-tests.cts @@ -111,8 +111,8 @@ async function runNodeTest(name: string, options): Promise { execSync(`./crates/target/debug/replay_gen ${tracePath} ${wasmPath} ${replayJsPath}`); } else { execSync(`./crates/target/debug/replay_gen ${tracePath} ${wasmPath} ${replayWasmPath}`); - // we validate and early return as for single wasm accuracy test doesn't make sense - execSync(`wasm-validate ${replayWasmPath}`) + execSync(`node ${replayJsPath}`, { cwd: testPath }) + execSync(`wasm-tools validate ${replayWasmPath}`) return { testPath, roundTripTime: p_roundTrip().duration, success: true } } } @@ -223,6 +223,8 @@ async function runOnlineTests(names: string[], options) { 'lichess', // failing test 'livesplit', // uses simd, filter for now 'onnxjs', // // unknown func: failed to find name `$1000008`" + 'gotemplate', // timeout for locator('#output') + 'commanderkeen', // unreachable 'hnset-bench', // no benchmark generated 'fractals' // no benchmark generated ]