Skip to content

Commit

Permalink
feat: Add html parser
Browse files Browse the repository at this point in the history
  • Loading branch information
d3xter666 committed Mar 22, 2024
1 parent a157e13 commit a3cd5bf
Showing 1 changed file with 58 additions and 0 deletions.
58 changes: 58 additions & 0 deletions src/detectors/transpilers/html/transpiler.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import type { ReadStream } from "node:fs";
import { Detail, SaxEventType, SAXParser, Tag as SaxTag } from "sax-wasm";
import { finished } from "node:stream/promises";
import fs from "node:fs/promises";
import { createRequire } from "node:module";
const require = createRequire(import.meta.url);

let saxWasmBuffer: Buffer;
async function initSaxWasm() {
if (!saxWasmBuffer) {
const saxPath = require.resolve("sax-wasm/lib/sax-wasm.wasm");
saxWasmBuffer = await fs.readFile(saxPath);
}

return saxWasmBuffer;
}

export async function parseHtml(contentStream: ReadStream, parseHandler: (type: SaxEventType, tag: Detail) => void) {
const options = { highWaterMark: 32 * 1024 }; // 32k chunks
const saxWasmBuffer = await initSaxWasm();
const saxParser = new SAXParser(SaxEventType.OpenTag | SaxEventType.CloseTag, options);

saxParser.eventHandler = parseHandler;

// Instantiate and prepare the wasm for parsing
if (!await saxParser.prepareWasm(saxWasmBuffer)) {
throw new Error("Unknown error during WASM Initialization");
}

// stream from a file in the current directory
contentStream.on("data", (chunk: Uint8Array) => {
try {
saxParser.write(chunk);
} catch (err) {
if (err instanceof Error) {
// In case of an error, destroy the content stream to make the
// error bubble up to our callers
contentStream.destroy(err);
} else {
throw err;
}
}
});
await finished(contentStream);
saxParser.end();
}

export async function extractScriptTags(contentStream: ReadStream) {
await parseHtml(contentStream, (event, tag) => {
if (tag instanceof SaxTag) {
if (event === SaxEventType.OpenTag) {
console.log(tag.value);
} else if (event === SaxEventType.CloseTag) {
console.log(tag.value);
}
}
});
}

0 comments on commit a3cd5bf

Please sign in to comment.