Skip to content

Commit

Permalink
chore: added test cases for geSanitizedHTML function
Browse files Browse the repository at this point in the history
  • Loading branch information
surajair committed Nov 28, 2024
1 parent cf8782f commit 0417e3c
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 15 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"author": "Suraj Air",
"license": "BSD-3-Clause",
"homepage": "https://chaibuilder.com",
"version": "2.0.0-beta.26",
"version": "2.0.0-beta.27",
"type": "module",
"repository": {
"type": "git",
Expand Down
60 changes: 60 additions & 0 deletions src/core/import-html/html-to-json.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import { getSanitizedHTML } from "./html-to-json";

describe("getSanitizedHTML", () => {
test("should remove escaped quotes and backslashes from attributes", () => {
const input = '<div class=\\"test\\" data-value=\\"123\\">Content</div>';
const expected = '<div class="test" data-value="123">Content</div>';
expect(getSanitizedHTML(input)).toBe(expected);
});

test("should remove escaped newlines and whitespace characters", () => {
const input = "Line 1\\nLine 2\\n<div>\\n Content\\n</div>";
const expected = "Line 1Line 2<div> Content</div>";
expect(getSanitizedHTML(input)).toBe(expected);
});

test("should remove script tags and their content", () => {
const input = '<div>Before<script>alert("test");</script>After</div>';
const expected = "<div>BeforeAfter</div>";
expect(getSanitizedHTML(input)).toBe(expected);
});

test("should convert body tags to div tags", () => {
const input = '<body class="body-class">Content</body>';
const expected = '<div class="body-class">Content</div>';
expect(getSanitizedHTML(input)).toBe(expected);
});

test("should remove excessive whitespace between tags", () => {
const input = "<div> <span> Content </span> </div>";
const expected = "<div><span> Content </span></div>";
expect(getSanitizedHTML(input)).toBe(expected);
});

test("should handle multiple attributes with escaped values", () => {
const input = '<div class=\\"c1\\" id=\\"id1\\" data-value=\\"test\\">Content</div>';
const expected = '<div class="c1" id="id1" data-value="test">Content</div>';
expect(getSanitizedHTML(input)).toBe(expected);
});

test("should handle empty input", () => {
expect(getSanitizedHTML("")).toBe("");
});

test("should preserve valid HTML structure", () => {
const input = `
<div class="container">
<h1>Title</h1>
<p>Paragraph</p>
</div>
`;
const expected = '<div class="container"><h1>Title</h1><p>Paragraph</p></div>';
expect(getSanitizedHTML(input)).toBe(expected);
});

test("should handle HTML with escaped special characters", () => {
const input = '<div data-special=\\"<>/?\\">Content</div>';
const expected = '<div data-special="<>/?">Content</div>';
expect(getSanitizedHTML(input)).toBe(expected);
});
});
32 changes: 19 additions & 13 deletions src/core/import-html/html-to-json.ts
Original file line number Diff line number Diff line change
Expand Up @@ -411,29 +411,35 @@ const traverseNodes = (nodes: Node[], parent: any = null): ChaiBlock[] => {
* @param html
* @returns sanitizing html content
*/
const getSanitizedHTML = (html: string) => {
// First handle all attributes with escaped quotes
html = html.replace(/(\w+)=\\?"([^"]*?)\\?"/g, (match, attr, value) => {
// Remove escaped quotes and backslashes from attribute values
return `${attr}="${value.replace(/\\\\/g, "").replace(/\\"/g, '"')}"`;
export const getSanitizedHTML = (html: string) => {
// First, handle the JSON-like structures in attributes
html = html.replace(/(\w+)=\\?"(.*?)\\?"/g, (_match, attr, value) => {
// Remove initial escaping
let cleanValue = value.replace(/\\"/g, '"');

// Re-escape quotes that are part of JSON structure
cleanValue = cleanValue.replace(/{([^}]+)}/g, (jsonMatch) => {
return jsonMatch.replace(/"/g, '\\"');
});

// Unescape the outer quotes and return
return `${attr}="${cleanValue.replace(/\\"/g, '"')}"`;
});

// Remove all escaped newlines and remaining escape sequences
// Rest of the function remains the same
html = html
.replace(/\\n/g, "") // Remove escaped newlines
.replace(/\\\\/g, "") // Remove double backslashes
.replace(/\\([/<>])/g, "$1") // Handle escaped HTML tags
.replace(/\\./g, "") // Remove any remaining escape sequences
.replace(/[\n\r\t\f\v]/g, ""); // Remove actual whitespace characters
.replace(/\\n/g, "")
.replace(/\\\\/g, "")
.replace(/\\([/<>])/g, "$1")
.replace(/\\./g, "")
.replace(/[\n\r\t\f\v]/g, "");

// * Checking if having body tag then converting it to div and using that as root
const bodyContent = html.match(/<body[^>]*>[\s\S]*?<\/body>/);
const htmlContent =
bodyContent && bodyContent.length > 0
? bodyContent[0].replace(/<body/, "<div").replace(/<\/body>/, "</div>")
: html;

// * Replacing script and unwanted whitespaces
return htmlContent
.replace(/\s+/g, " ")
.replaceAll("> <", "><")
Expand Down
2 changes: 1 addition & 1 deletion src/web-blocks/row-col.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
registerChaiBlockSchema,
StylesProp,
} from "@chaibuilder/runtime";
import _, { get } from "lodash";
import { get } from "lodash-es";
import { Columns, Rows } from "lucide-react";

export type RowProps = {
Expand Down

0 comments on commit 0417e3c

Please sign in to comment.