From 0417e3c4d586d9cea2129eba90e5c2a7bf1466d0 Mon Sep 17 00:00:00 2001 From: Suraj Air Date: Thu, 28 Nov 2024 12:35:12 +0530 Subject: [PATCH] chore: added test cases for `geSanitizedHTML` function --- package.json | 2 +- src/core/import-html/html-to-json.test.ts | 60 +++++++++++++++++++++++ src/core/import-html/html-to-json.ts | 32 +++++++----- src/web-blocks/row-col.tsx | 2 +- 4 files changed, 81 insertions(+), 15 deletions(-) create mode 100644 src/core/import-html/html-to-json.test.ts diff --git a/package.json b/package.json index fe5fcd4..e63ddcb 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,7 @@ "author": "Suraj Air", "license": "BSD-3-Clause", "homepage": "https://chaibuilder.com", - "version": "2.0.0-beta.26", + "version": "2.0.0-beta.27", "type": "module", "repository": { "type": "git", diff --git a/src/core/import-html/html-to-json.test.ts b/src/core/import-html/html-to-json.test.ts new file mode 100644 index 0000000..b5b208e --- /dev/null +++ b/src/core/import-html/html-to-json.test.ts @@ -0,0 +1,60 @@ +import { getSanitizedHTML } from "./html-to-json"; + +describe("getSanitizedHTML", () => { + test("should remove escaped quotes and backslashes from attributes", () => { + const input = '
Content
'; + const expected = '
Content
'; + expect(getSanitizedHTML(input)).toBe(expected); + }); + + test("should remove escaped newlines and whitespace characters", () => { + const input = "Line 1\\nLine 2\\n
\\n Content\\n
"; + const expected = "Line 1Line 2
Content
"; + expect(getSanitizedHTML(input)).toBe(expected); + }); + + test("should remove script tags and their content", () => { + const input = '
BeforeAfter
'; + const expected = "
BeforeAfter
"; + expect(getSanitizedHTML(input)).toBe(expected); + }); + + test("should convert body tags to div tags", () => { + const input = 'Content'; + const expected = '
Content
'; + expect(getSanitizedHTML(input)).toBe(expected); + }); + + test("should remove excessive whitespace between tags", () => { + const input = "
Content
"; + const expected = "
Content
"; + expect(getSanitizedHTML(input)).toBe(expected); + }); + + test("should handle multiple attributes with escaped values", () => { + const input = '
Content
'; + const expected = '
Content
'; + expect(getSanitizedHTML(input)).toBe(expected); + }); + + test("should handle empty input", () => { + expect(getSanitizedHTML("")).toBe(""); + }); + + test("should preserve valid HTML structure", () => { + const input = ` +
+

Title

+

Paragraph

+
+ `; + const expected = '

Title

Paragraph

'; + expect(getSanitizedHTML(input)).toBe(expected); + }); + + test("should handle HTML with escaped special characters", () => { + const input = '
/?\\">Content
'; + const expected = '
Content
'; + expect(getSanitizedHTML(input)).toBe(expected); + }); +}); diff --git a/src/core/import-html/html-to-json.ts b/src/core/import-html/html-to-json.ts index 609c233..ee8b2d3 100644 --- a/src/core/import-html/html-to-json.ts +++ b/src/core/import-html/html-to-json.ts @@ -411,29 +411,35 @@ const traverseNodes = (nodes: Node[], parent: any = null): ChaiBlock[] => { * @param html * @returns sanitizing html content */ -const getSanitizedHTML = (html: string) => { - // First handle all attributes with escaped quotes - html = html.replace(/(\w+)=\\?"([^"]*?)\\?"/g, (match, attr, value) => { - // Remove escaped quotes and backslashes from attribute values - return `${attr}="${value.replace(/\\\\/g, "").replace(/\\"/g, '"')}"`; +export const getSanitizedHTML = (html: string) => { + // First, handle the JSON-like structures in attributes + html = html.replace(/(\w+)=\\?"(.*?)\\?"/g, (_match, attr, value) => { + // Remove initial escaping + let cleanValue = value.replace(/\\"/g, '"'); + + // Re-escape quotes that are part of JSON structure + cleanValue = cleanValue.replace(/{([^}]+)}/g, (jsonMatch) => { + return jsonMatch.replace(/"/g, '\\"'); + }); + + // Unescape the outer quotes and return + return `${attr}="${cleanValue.replace(/\\"/g, '"')}"`; }); - // Remove all escaped newlines and remaining escape sequences + // Rest of the function remains the same html = html - .replace(/\\n/g, "") // Remove escaped newlines - .replace(/\\\\/g, "") // Remove double backslashes - .replace(/\\([/<>])/g, "$1") // Handle escaped HTML tags - .replace(/\\./g, "") // Remove any remaining escape sequences - .replace(/[\n\r\t\f\v]/g, ""); // Remove actual whitespace characters + .replace(/\\n/g, "") + .replace(/\\\\/g, "") + .replace(/\\([/<>])/g, "$1") + .replace(/\\./g, "") + .replace(/[\n\r\t\f\v]/g, ""); - // * Checking if having body tag then converting it to div and using that as root const bodyContent = html.match(/]*>[\s\S]*?<\/body>/); const htmlContent = bodyContent && bodyContent.length > 0 ? bodyContent[0].replace(//, "") : html; - // * Replacing script and unwanted whitespaces return htmlContent .replace(/\s+/g, " ") .replaceAll("> <", "><") diff --git a/src/web-blocks/row-col.tsx b/src/web-blocks/row-col.tsx index aa97f71..df60613 100644 --- a/src/web-blocks/row-col.tsx +++ b/src/web-blocks/row-col.tsx @@ -5,7 +5,7 @@ import { registerChaiBlockSchema, StylesProp, } from "@chaibuilder/runtime"; -import _, { get } from "lodash"; +import { get } from "lodash-es"; import { Columns, Rows } from "lucide-react"; export type RowProps = {