diff --git a/js/md4w.d.ts b/js/md4w.d.ts index 13c001f..43908a4 100644 --- a/js/md4w.d.ts +++ b/js/md4w.d.ts @@ -76,8 +76,8 @@ export enum NodeType { OL = 3, LI = 4, HR = 5, - HTML = 6, CODE_BLOCK = 7, + HTML = 8, P = 9, TABLE = 10, THEAD = 11, diff --git a/js/md4w.js b/js/md4w.js index 16792a9..f17655e 100644 --- a/js/md4w.js +++ b/js/md4w.js @@ -51,8 +51,8 @@ export const NodeType = Object.freeze({ OL: 3, LI: 4, HR: 5, - HTML: 6, CODE_BLOCK: 7, + HTML: 8, P: 9, TABLE: 10, THEAD: 11, @@ -198,9 +198,12 @@ export function mdToReadableHtml(input, options = {}) { */ export function mdToJSON(input, options = {}) { const output = mdToString(input, options, 2); - console.log(output) - const children = JSON.parse(output); - return { children }; + try { + const children = JSON.parse(output); + return { children }; + } catch (error) { + throw new Error("Failed to parse JSON: " + error.message + "\n" + output); + } } /** diff --git a/src/md4w.zig b/src/md4w.zig index ea30cd8..a79702e 100644 --- a/src/md4w.zig +++ b/src/md4w.zig @@ -33,9 +33,6 @@ const Writer = struct { slug: []u8 = undefined, slug_len: usize = 0, current_block: c.MD_BLOCKTYPE = 0, - current_span: c.MD_SPANTYPE = 100, - has_block_children: bool = false, - has_span_children: bool = false, image_nesting_level: usize = 0, has_code_highlighter: bool = undefined, pub fn init(buffer_size: usize, has_code_highlighter: bool) Writer { @@ -49,10 +46,13 @@ const Writer = struct { allocator.free(self.buf); allocator.free(self.slug); } + fn flush(self: *Writer) void { + push(toJS(self.buf[0..self.len])); + self.len = 0; + } pub fn writeByte(self: *Writer, byte: u8) void { if (self.len >= self.buf.len) { - push(toJS(self.buf[0..self.buf.len])); - self.len = 0; + self.flush(); } self.buf[self.len] = byte; self.len += 1; @@ -60,19 +60,26 @@ const Writer = struct { pub fn write(self: *Writer, chunk: []const u8) void { if (chunk.len >= self.buf.len) { if (self.len > 0) { - push(toJS(self.buf[0..self.len])); - self.len = 0; + self.flush(); } push(toJS(chunk)); return; } if (self.len + chunk.len > self.buf.len) { - push(toJS(self.buf[0..self.len])); - self.len = 0; + self.flush(); } std.mem.copy(u8, self.buf[self.len..], chunk); self.len += chunk.len; } + fn safeWriteChar(self: *Writer, ch: u8) void { + switch (ch) { + '<' => self.write("<"), + '>' => self.write(">"), + '&' => self.write("&"), + '"' => self.write("""), + else => self.writeByte(ch), + } + } pub fn safeWrite(self: *Writer, chunk: []const u8) void { var start: usize = 0; while (true) { @@ -87,19 +94,13 @@ const Writer = struct { if (i == chunk.len) { break; } - switch (chunk[i]) { - '<' => self.write("<"), - '>' => self.write(">"), - '&' => self.write("&"), - '"' => self.write("""), - else => {}, - } + self.safeWriteChar(chunk[i]); start = i + 1; } } - pub fn safeWriteUrl(self: *Writer, input: []const u8) void { - for (input) |ch| switch (ch) { - 'A'...'Z', 'a'...'z', '0'...'9', '_', '$', '@', ':', '+', '-', '*', '/', '.', ',', ';', '~', '=', '?', '!', '#', '&', '%', '(', ')' => self.writeByte(ch), + fn safeWriteUrlChar(self: *Writer, ch: u8) void { + switch (ch) { + 'A'...'Z', 'a'...'z', '0'...'9', '_', '$', '@', ':', '+', '-', '*', '/', '.', ',', ';', '~', '=', '?', '!', '#', '&', '%', '(', ')', '[', ']' => self.writeByte(ch), else => { var buf: [2]u8 = undefined; _ = std.fmt.bufPrint(&buf, "{X:0>2}", .{ch}) catch unreachable; @@ -107,7 +108,12 @@ const Writer = struct { self.writeByte(buf[0]); self.writeByte(buf[1]); }, - }; + } + } + pub fn safeWriteUrl(self: *Writer, input: []const u8) void { + for (input) |ch| { + self.safeWriteUrlChar(ch); + } } pub fn updateSlug(self: *Writer, ch: u8) void { // skip if the last character is already a hyphen @@ -142,12 +148,22 @@ const Writer = struct { pub fn writeJSONProps(self: *Writer) void { self.write(",\"props\":{"); } - pub fn safeWriteJSONString(self: *Writer, input: []const u8) void { - for (input) |ch| { - if (ch == '"') { + pub fn writeJSONString(self: *Writer, input: []const u8, escape: u2) void { + for (input, 0..) |ch, i| { + const br = ch == '\n'; + if (br or (ch == '"' and (i == 0 or input[i - 1] != '\\'))) { self.writeByte('\\'); } - self.writeByte(ch); + if (br) { + self.writeByte('n'); + continue; + } + switch (escape) { + 0 => self.writeByte(ch), + 1 => self.safeWriteChar(ch), + 2 => self.safeWriteUrlChar(ch), + else => unreachable, + } } } }; @@ -445,11 +461,9 @@ const JOSNRenderer = struct { const w: *Writer = @ptrCast(@alignCast(userdata)); // skip the document block - if (typ == c.MD_BLOCK_DOC or typ == c.MD_BLOCK_HTML) { + if (typ == c.MD_BLOCK_DOC) { return 0; } - - if (w.current_block > 0) w.writeByte(','); w.current_block = typ; switch (typ) { @@ -460,7 +474,7 @@ const JOSNRenderer = struct { w.writeJSONProps(); w.write("\"start\":"); w.writeByte('0' + @as(u8, @intCast(ol.start))); - w.write("}"); + w.writeByte('}'); } w.writeJSONChildren(); }, @@ -471,7 +485,7 @@ const JOSNRenderer = struct { w.writeJSONProps(); w.write("\"isTask\":true,\"done\":"); w.write(if (li.task_mark == 'x' or li.task_mark == 'X') "true" else "false"); - w.write("}"); + w.writeByte('}'); } w.writeJSONChildren(); }, @@ -486,7 +500,7 @@ const JOSNRenderer = struct { if (code.lang.size > 0) { w.writeJSONProps(); w.write("\"lang\":\""); - w.safeWriteJSONString(@as([*]const u8, @ptrCast(code.lang.text))[0..code.lang.size]); + w.writeJSONString(@as([*]const u8, @ptrCast(code.lang.text))[0..code.lang.size], 1); w.write("\"}"); } w.writeJSONChildren(); @@ -520,12 +534,19 @@ const JOSNRenderer = struct { _ = detail; // skip the document block - if (typ == c.MD_BLOCK_DOC or typ == c.MD_BLOCK_HTML) { + if (typ == c.MD_BLOCK_DOC) { return 0; } + w.current_block = 0; - w.has_block_children = false; - w.write("]}"); + if (typ == c.MD_BLOCK_HR) { + w.writeByte(','); + } else { + if (w.buf[w.len - 1] == ',') { + w.len -= 1; + } + w.write("]},"); + } return 0; } @@ -543,21 +564,16 @@ const JOSNRenderer = struct { if (inside_img) return 0; - if (w.has_block_children) w.writeByte(','); - w.current_span = typ; - w.has_block_children = true; - w.has_span_children = false; - switch (typ) { c.MD_SPAN_A => { const a: *c.MD_SPAN_A_DETAIL = @ptrCast(@alignCast(detail)); w.writeJSONType(100 + typ); w.writeJSONProps(); w.write("\"href\":\""); - w.safeWriteJSONString(@as([*]const u8, @ptrCast(a.href.text))[0..a.href.size]); + w.writeJSONString(@as([*]const u8, @ptrCast(a.href.text))[0..a.href.size], 2); if (a.title.size > 0) { w.write("\",\"title\":"); - w.safeWriteJSONString(@as([*]const u8, @ptrCast(a.title.text))[0..a.title.size]); + w.writeJSONString(@as([*]const u8, @ptrCast(a.title.text))[0..a.title.size], 1); } w.write("\"}"); w.writeJSONChildren(); @@ -567,7 +583,7 @@ const JOSNRenderer = struct { w.writeJSONType(100 + typ); w.writeJSONProps(); w.write("\"src\":\""); - w.safeWriteJSONString(@as([*]const u8, @ptrCast(img.src.text))[0..img.src.size]); + w.writeJSONString(@as([*]const u8, @ptrCast(img.src.text))[0..img.src.size], 2); w.write("\",\"alt\":"); // alt text will be added in the text callback }, c.MD_SPAN_WIKILINK => { @@ -597,20 +613,22 @@ const JOSNRenderer = struct { if (w.image_nesting_level > 0) return 0; - w.current_span = 100; if (typ == c.MD_SPAN_IMG) { const img: *c.MD_SPAN_IMG_DETAIL = @ptrCast(@alignCast(detail)); if (w.buf[w.len - 1] == ':') { w.write("\"\""); // no alt text } if (img.title.size > 0) { - w.write(",\"title\":\""); - w.safeWriteJSONString(@as([*]const u8, @ptrCast(img.title.text))[0..img.title.size]); + w.write("\"title\":\""); + w.writeJSONString(@as([*]const u8, @ptrCast(img.title.text))[0..img.title.size], 1); w.write("\""); } - w.write("}}"); + w.write("}},"); } else { - w.write("]}"); + if (w.buf[w.len - 1] == ',') { + w.len -= 1; + } + w.write("]},"); } return 0; @@ -624,22 +642,44 @@ const JOSNRenderer = struct { ) callconv(.C) c_int { const w: *Writer = @ptrCast(@alignCast(userdata)); + if (typ == c.MD_TEXT_NULLCHAR) { + // ignore null character + return 0; + } + switch (typ) { - c.MD_TEXT_NULLCHAR, c.MD_TEXT_BR, c.MD_TEXT_SOFTBR => { - // skip + c.MD_TEXT_BR => { + if (w.image_nesting_level == 0) { + w.writeJSONTypeAndChildren(c.MD_BLOCK_HTML); + w.write("\"
\n\"],"); + } else { + w.write("\" \","); + } }, - else => { - if (w.current_span != 100) { - if (w.has_span_children) w.writeByte(','); - w.has_span_children = true; + c.MD_TEXT_SOFTBR => { + if (w.image_nesting_level == 0) { + w.write("\"\\n\","); } else { - if (w.has_block_children) w.writeByte(','); - w.has_block_children = true; + w.write("\" \","); } - const text_content = @as([*]const u8, @ptrCast(ptr))[0..len]; - w.writeByte('"'); - w.safeWriteJSONString(text_content); + }, + c.MD_TEXT_ENTITY, c.MD_TEXT_HTML => { + if (w.current_block == c.MD_BLOCK_HTML) { + w.writeByte('"'); + w.writeJSONString(@as([*]const u8, @ptrCast(ptr))[0..len], 0); + w.write("\","); + } else { + w.writeJSONTypeAndChildren(c.MD_BLOCK_HTML); + w.writeByte('"'); + w.writeJSONString(@as([*]const u8, @ptrCast(ptr))[0..len], 0); + w.write("\"]},"); + } + }, + else => { + const escape: u2 = if (typ == c.MD_TEXT_CODE) 0 else 1; w.writeByte('"'); + w.writeJSONString(@as([*]const u8, @ptrCast(ptr))[0..len], escape); + w.write("\","); }, } @@ -701,6 +741,9 @@ export fn render(ptr_len: u64, flags: usize, buffer_size: usize, has_code_highli ); if (output_json) { + if (writer.buf[writer.len - 1] == ',') { + writer.len -= 1; + } writer.writeByte(']'); } diff --git a/test/test.js b/test/test.js index 2397654..d373ae5 100644 --- a/test/test.js +++ b/test/test.js @@ -137,20 +137,51 @@ Deno.test("using code hightlighter", async () => { Deno.test("render to json", async () => { const md = ` -![image.png](https://example.com/image.png 'this is an image') -![](https://example.com/image.png) - - # Jobs Stay _foolish_, stay **hungry**! +![image.png](https://example.com/image.png 'this is an image') +![](https://example.com/image.png) + [Apple](https://apple.com) Apple + + +--- + +- fruit + - Apple + - Orange + - Banana + +2. Apple +3. Orange +4. Banana + +- [ ] Make apple pie + - [x] Buy apples + - [ ] Make the crust + +| Command | Description | +| :--- | ---: | +| \`git status\` | List all *new or modified* files | +| \`git diff\` | Show file differences that **haven't been** staged | `; const tree = mdToJSON(md); assertEquals(tree, { children: [ + { type: NodeType.H1, children: ["Jobs"] }, + { + type: NodeType.P, + children: [ + "Stay ", + { type: NodeType.EM, children: ["foolish"] }, + ", stay ", + { type: NodeType.STRONG, children: ["hungry"] }, + "!", + ], + }, { type: NodeType.P, children: [ @@ -162,6 +193,7 @@ Stay _foolish_, stay **hungry**! title: "this is an image", }, }, + "\n", { type: NodeType.IMG, props: { @@ -171,17 +203,6 @@ Stay _foolish_, stay **hungry**! }, ], }, - { type: NodeType.H1, children: ["Jobs"] }, - { - type: NodeType.P, - children: [ - "Stay ", - { type: NodeType.EM, children: ["foolish"] }, - ", stay ", - { type: NodeType.STRONG, children: ["hungry"] }, - "!", - ], - }, { type: NodeType.P, children: [ @@ -190,9 +211,228 @@ Stay _foolish_, stay **hungry**! props: { href: "https://apple.com" }, children: ["Apple"], }, - '', + "\n", + { type: NodeType.HTML, children: [''] }, "Apple", - "", + { type: NodeType.HTML, children: [""] }, + ], + }, + { + type: NodeType.HTML, + children: [ + "", + "\n", + ], + }, + { + type: NodeType.HR, + }, + { + type: NodeType.UL, + children: [ + { + type: NodeType.LI, + children: [ + "fruit", + { + type: NodeType.UL, + children: [ + { + type: NodeType.LI, + children: [ + "Apple", + ], + }, + { + type: NodeType.LI, + children: [ + "Orange", + ], + }, + { + type: NodeType.LI, + children: [ + "Banana", + ], + }, + ], + }, + ], + }, + ], + }, + { + type: NodeType.OL, + props: { + start: 2, + }, + children: [ + { + children: [ + "Apple", + ], + type: NodeType.LI, + }, + { + children: [ + "Orange", + ], + type: NodeType.LI, + }, + { + children: [ + "Banana", + ], + type: NodeType.LI, + }, + ], + }, + { + type: NodeType.UL, + children: [ + { + type: NodeType.LI, + props: { + done: false, + isTask: true, + }, + children: [ + "Make apple pie", + { + type: NodeType.UL, + children: [ + { + type: NodeType.LI, + props: { + done: true, + isTask: true, + }, + children: [ + "Buy apples", + ], + }, + { + type: NodeType.LI, + props: { + done: false, + isTask: true, + }, + children: [ + "Make the crust", + ], + }, + ], + }, + ], + }, + ], + }, + { + type: NodeType.TABLE, + children: [ + { + type: NodeType.THEAD, + children: [ + { + type: NodeType.TR, + children: [ + { + type: NodeType.TH, + props: { + align: "left", + }, + children: [ + "Command", + ], + }, + { + type: NodeType.TH, + props: { + align: "right", + }, + children: [ + "Description", + ], + }, + ], + }, + ], + }, + { + type: NodeType.TBODY, + children: [ + { + type: NodeType.TR, + children: [ + { + type: NodeType.TD, + props: { + align: "left", + }, + children: [ + { + type: NodeType.CODE_SPAN, + children: [ + "git status", + ], + }, + ], + }, + { + type: NodeType.TD, + props: { + align: "right", + }, + children: [ + "List all ", + { + type: NodeType.EM, + children: [ + "new or modified", + ], + }, + " files", + ], + }, + ], + }, + { + type: NodeType.TR, + children: [ + { + type: NodeType.TD, + props: { + align: "left", + }, + children: [ + { + type: NodeType.CODE_SPAN, + children: [ + "git diff", + ], + }, + ], + }, + { + type: NodeType.TD, + props: { + align: "right", + }, + children: [ + "Show file differences that ", + { + type: NodeType.STRONG, + children: [ + "haven't been", + ], + }, + " staged", + ], + }, + ], + }, + ], + }, ], }, ],