generated from SAP/repository-template
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
58 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import type { ReadStream } from "node:fs"; | ||
import { Detail, SaxEventType, SAXParser, Tag as SaxTag } from "sax-wasm"; | ||
import { finished } from "node:stream/promises"; | ||
import fs from "node:fs/promises"; | ||
import { createRequire } from "node:module"; | ||
const require = createRequire(import.meta.url); | ||
|
||
let saxWasmBuffer: Buffer; | ||
async function initSaxWasm() { | ||
if (!saxWasmBuffer) { | ||
const saxPath = require.resolve("sax-wasm/lib/sax-wasm.wasm"); | ||
saxWasmBuffer = await fs.readFile(saxPath); | ||
} | ||
|
||
return saxWasmBuffer; | ||
} | ||
|
||
export async function parseHtml(contentStream: ReadStream, parseHandler: (type: SaxEventType, tag: Detail) => void) { | ||
const options = { highWaterMark: 32 * 1024 }; // 32k chunks | ||
const saxWasmBuffer = await initSaxWasm(); | ||
const saxParser = new SAXParser(SaxEventType.OpenTag | SaxEventType.CloseTag, options); | ||
|
||
saxParser.eventHandler = parseHandler; | ||
|
||
// Instantiate and prepare the wasm for parsing | ||
if (!await saxParser.prepareWasm(saxWasmBuffer)) { | ||
throw new Error("Unknown error during WASM Initialization"); | ||
} | ||
|
||
// stream from a file in the current directory | ||
contentStream.on("data", (chunk: Uint8Array) => { | ||
try { | ||
saxParser.write(chunk); | ||
} catch (err) { | ||
if (err instanceof Error) { | ||
// In case of an error, destroy the content stream to make the | ||
// error bubble up to our callers | ||
contentStream.destroy(err); | ||
} else { | ||
throw err; | ||
} | ||
} | ||
}); | ||
await finished(contentStream); | ||
saxParser.end(); | ||
} | ||
|
||
export async function extractScriptTags(contentStream: ReadStream) { | ||
await parseHtml(contentStream, (event, tag) => { | ||
if (tag instanceof SaxTag) { | ||
if (event === SaxEventType.OpenTag) { | ||
console.log(tag.value); | ||
} else if (event === SaxEventType.CloseTag) { | ||
console.log(tag.value); | ||
} | ||
} | ||
}); | ||
} |