From 92c2df20a9b3765369aa0ad5f658d4aa7e8dc77f Mon Sep 17 00:00:00 2001 From: Stephan Robotta Date: Fri, 13 Dec 2024 10:07:21 +0100 Subject: [PATCH] Fix parsing issue with html comments. --- amd/build/htmlparser.min.js | 2 +- amd/build/htmlparser.min.js.map | 2 +- amd/src/htmlparser.js | 21 +++++++- tests/js/htmlparser.test.js | 88 +++++++++++++++++++++++---------- 4 files changed, 84 insertions(+), 29 deletions(-) diff --git a/amd/build/htmlparser.min.js b/amd/build/htmlparser.min.js index 0a33969..8e3ff08 100644 --- a/amd/build/htmlparser.min.js +++ b/amd/build/htmlparser.min.js @@ -7,6 +7,6 @@ define("tiny_multilang2/htmlparser",["exports","./constants"],(function(_exports * @copyright 2024 Stephan Robotta * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later */ -class HTMLParser{constructor(){this.onTagOpen=null,this.onTagClose=null,this.onText=null,this.chunk="",this.parse=function(input){let content=input;for(;content.length>0;){let match=content.match(/<[^>]*>/);if(match){let index=match.index;if(index>0&&(this.chunk=content.substring(0,index),"function"==typeof this.onText&&this.onText(this.chunk)),this.chunk=match[0],"/"===match[0].charAt(1))"function"==typeof this.onTagClose&&this.onTagClose(match[0].substring(2,match[0].length-1).trim());else if("function"==typeof this.onTagOpen){const attr1=this.mapAttrs(match[0].match(/([\w\-_]+)="([^"]*)"/g)),attr2=this.mapAttrs(match[0].match(/([\w\-_]+)='([^']*)'/g)),tag=match[0].match(/^<(\w+)/);this.onTagOpen(tag[1].toLowerCase(),{...attr1,...attr2})}content=content.substring(index+match[0].length)}else"function"==typeof this.onText&&this.onText(content),this.chunk=content,content=""}},this.getChunk=function(){return this.chunk},this.mapAttrs=function(attrs){let res={};if(attrs)for(let i=0;i-1?mlang++:"span"===tag&&attr.class&&attr.class.indexOf("multilang-end")>-1&&(mlang--,inClose=!0),newHtml+=parser.getChunk()},parser.onTagClose=function(tag){if(_constants.blockTags.indexOf(tag)>-1&&0!=mlang)if(mlang>0)newHtml+=_constants.spanMultilangEnd,mlang--;else{const t=newHtml.lastIndexOf(_constants.spanMultilangEnd);newHtml=newHtml.substring(0,t)+_constants.spanMultilangBegin.replace(new RegExp("%lang","g"),"other")+newHtml.substring(t),mlang++}else"span"===tag&&inClose&&(inClose=!1),newHtml+=parser.getChunk()},parser.onText=function(text){if(mlang>0||inClose)return void(newHtml+=text);const intermediateReplacements=[];for(;;){const m=text.match(new RegExp("{\\s*mlang(\\s+([^}]+?))?\\s*}","i"));if(!m)break;const textBefore=text.substring(0,m.index),textAfter=text.substring(m.index+m[0].length);let r=m[0];m[2]?(r=_constants.spanMultilangBegin.replace(new RegExp("%lang","g"),m[2]),mlang++):(r=_constants.spanMultilangEnd,mlang--),intermediateReplacements.push(r),text="".concat(textBefore,"___~~").concat(intermediateReplacements.length,"~~___").concat(textAfter)}for(let i=0;i0;){let match=content.match(/<[^>]*>/);if(match){let index=match.index;if(index>0&&(this.chunk=content.substring(0,index),content=content.substring(index),"function"==typeof this.onText&&this.onText(this.chunk)),this.chunk=match[0],"/"===match[0].charAt(1)){if("function"==typeof this.onTagClose){const tag=match[0].substring(2,match[0].length-1).trim().toLowerCase();this.onTagClose(tag)}}else if(0===match[0].indexOf("\x3c!--")){let end=content.indexOf("--\x3e");-1===end?end=content.length:end+=3,this.chunk=content.substring(0,end),"function"==typeof this.onComment&&this.onComment(this.chunk)}else if("function"==typeof this.onTagOpen){const attr1=this.mapAttrs(match[0].match(/([\w\-_]+)="([^"]*)"/g)),attr2=this.mapAttrs(match[0].match(/([\w\-_]+)='([^']*)'/g)),tag=match[0].match(/^<(\w+)/);this.onTagOpen(tag[1].toLowerCase(),{...attr1,...attr2})}content=content.substring(this.chunk.length)}else"function"==typeof this.onText&&this.onText(content),this.chunk=content,content=""}},this.getChunk=function(){return this.chunk},this.mapAttrs=function(attrs){let res={};if(attrs)for(let i=0;i-1?mlang++:"span"===tag&&attr.class&&attr.class.indexOf("multilang-end")>-1&&(mlang--,inClose=!0),newHtml+=parser.getChunk()},parser.onTagClose=function(tag){if(_constants.blockTags.indexOf(tag)>-1&&0!=mlang)if(mlang>0)newHtml+=_constants.spanMultilangEnd,mlang--;else{const t=newHtml.lastIndexOf(_constants.spanMultilangEnd);newHtml=newHtml.substring(0,t)+_constants.spanMultilangBegin.replace(new RegExp("%lang","g"),"other")+newHtml.substring(t),mlang++}else"span"===tag&&inClose&&(inClose=!1),newHtml+=parser.getChunk()},parser.onText=function(text){if(mlang>0||inClose)return void(newHtml+=text);const intermediateReplacements=[];for(;;){const m=text.match(new RegExp("{\\s*mlang(\\s+([^}]+?))?\\s*}","i"));if(!m)break;const textBefore=text.substring(0,m.index),textAfter=text.substring(m.index+m[0].length);let r=m[0];m[2]?(r=_constants.spanMultilangBegin.replace(new RegExp("%lang","g"),m[2]),mlang++):(r=_constants.spanMultilangEnd,mlang--),intermediateReplacements.push(r),text="".concat(textBefore,"___~~").concat(intermediateReplacements.length,"~~___").concat(textAfter)}for(let i=0;i.\n\n/**\n * Handling of the editor content to add and remove the visual styling and\n * helper nodes to modify language settings.\n *\n * @module tiny_multilang2\n * @copyright 2024 Stephan Robotta \n * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later\n */\n\nimport {spanMultilangBegin, spanMultilangEnd, blockTags} from './constants';\n\n/**\n * This class is used to parse HTML content and call a callback function\n * when a tag is opened, closed or text is found.\n */\nclass HTMLParser {\n constructor() {\n this.onTagOpen = null;\n this.onTagClose = null;\n this.onText = null;\n this.chunk = '';\n this.parse = function(input) {\n let content = input;\n while (content.length > 0) {\n let match = content.match(/<[^>]*>/);\n if (match) {\n let index = match.index;\n if (index > 0) {\n this.chunk = content.substring(0, index);\n if (typeof this.onText === 'function') {\n this.onText(this.chunk);\n }\n }\n this.chunk = match[0];\n if (match[0].charAt(1) === '/') {\n if (typeof this.onTagClose === 'function') {\n this.onTagClose(match[0].substring(2, match[0].length - 1).trim());\n }\n } else if (typeof this.onTagOpen === 'function') {\n const attr1 = this.mapAttrs(match[0].match(/([\\w\\-_]+)=\"([^\"]*)\"/g));\n const attr2 = this.mapAttrs(match[0].match(/([\\w\\-_]+)='([^']*)'/g));\n const tag = match[0].match(/^<(\\w+)/);\n this.onTagOpen(tag[1].toLowerCase(), {...attr1, ...attr2});\n }\n content = content.substring(index + match[0].length);\n } else {\n if (typeof this.onText === 'function') {\n this.onText(content);\n }\n this.chunk = content;\n content = '';\n }\n }\n };\n this.getChunk = function() {\n return this.chunk;\n };\n this.mapAttrs = function(attrs) {\n let res = {};\n if (attrs) {\n for (let i = 0; i < attrs.length; i++) {\n let [k, v] = attrs[i].split('=');\n res[k] = v ? v.substring(1, v.length) : null;\n }\n }\n return res;\n };\n }\n}\n\nexport const parseEditorContent = function(html) {\n let newHtml = '';\n let mlang = 0;\n let inClose = false;\n const parser = new HTMLParser();\n parser.onTagOpen = function(tag, attr) {\n if (tag === 'span' && attr.class && attr.class.indexOf('multilang-begin') > -1) {\n mlang++;\n } else if (tag === 'span' && attr.class && attr.class.indexOf('multilang-end') > -1) {\n mlang--;\n inClose = true;\n }\n newHtml += parser.getChunk();\n };\n parser.onTagClose = function(tag) {\n if (blockTags.indexOf(tag) > -1 && mlang != 0) {\n if (mlang > 0) {\n newHtml += spanMultilangEnd;\n mlang--;\n } else {\n const t = newHtml.lastIndexOf(spanMultilangEnd);\n newHtml = newHtml.substring(0, t)\n + spanMultilangBegin.replace(new RegExp('%lang', 'g'), 'other')\n + newHtml.substring(t);\n mlang++;\n }\n return;\n }\n if (tag === 'span' && inClose) {\n inClose = false;\n }\n newHtml += parser.getChunk();\n };\n parser.onText = function(text) {\n if (mlang > 0 || inClose) {\n newHtml += text;\n return;\n }\n const intermediateReplacements = [];\n // eslint-disable-next-line no-constant-condition\n while (1) {\n const m = text.match(new RegExp('{\\\\s*mlang(\\\\s+([^}]+?))?\\\\s*}', 'i'));\n if (!m) {\n break;\n }\n const textBefore = text.substring(0, m.index);\n const textAfter = text.substring(m.index + m[0].length);\n let r = m[0];\n if (!m[2]) {\n r = spanMultilangEnd;\n mlang--;\n } else {\n r = spanMultilangBegin.replace(new RegExp('%lang', 'g'), m[2]);\n mlang++;\n }\n intermediateReplacements.push(r);\n text = `${textBefore}___~~${intermediateReplacements.length}~~___${textAfter}`;\n }\n // Revert all placeholders back to the original {mlang} tags.\n for (let i = 0; i < intermediateReplacements.length; i++) {\n text = text.replace(`___~~${i + 1}~~___`, intermediateReplacements[i]);\n }\n newHtml += text;\n };\n parser.parse(html);\n return newHtml;\n};\n"],"names":["HTMLParser","constructor","onTagOpen","onTagClose","onText","chunk","parse","input","content","length","match","index","substring","this","charAt","trim","attr1","mapAttrs","attr2","tag","toLowerCase","getChunk","attrs","res","i","k","v","split","html","newHtml","mlang","inClose","parser","attr","class","indexOf","blockTags","spanMultilangEnd","t","lastIndexOf","spanMultilangBegin","replace","RegExp","text","intermediateReplacements","m","textBefore","textAfter","r","push"],"mappings":";;;;;;;;;MA8BMA,WACFC,mBACSC,UAAY,UACZC,WAAa,UACbC,OAAS,UACTC,MAAQ,QACRC,MAAQ,SAASC,WACdC,QAAUD,WACPC,QAAQC,OAAS,GAAG,KACnBC,MAAQF,QAAQE,MAAM,cACtBA,MAAO,KACHC,MAAQD,MAAMC,SACdA,MAAQ,SACHN,MAAQG,QAAQI,UAAU,EAAGD,OACP,mBAAhBE,KAAKT,aACPA,OAAOS,KAAKR,aAGpBA,MAAQK,MAAM,GACQ,MAAvBA,MAAM,GAAGI,OAAO,GACe,mBAApBD,KAAKV,iBACPA,WAAWO,MAAM,GAAGE,UAAU,EAAGF,MAAM,GAAGD,OAAS,GAAGM,aAE5D,GAA8B,mBAAnBF,KAAKX,UAA0B,OACvCc,MAAQH,KAAKI,SAASP,MAAM,GAAGA,MAAM,0BACrCQ,MAAQL,KAAKI,SAASP,MAAM,GAAGA,MAAM,0BACrCS,IAAMT,MAAM,GAAGA,MAAM,gBACtBR,UAAUiB,IAAI,GAAGC,cAAe,IAAIJ,SAAUE,QAEvDV,QAAUA,QAAQI,UAAUD,MAAQD,MAAM,GAAGD,YAElB,mBAAhBI,KAAKT,aACPA,OAAOI,cAEXH,MAAQG,QACbA,QAAU,UAIjBa,SAAW,kBACLR,KAAKR,YAEXY,SAAW,SAASK,WACjBC,IAAM,MACND,UACK,IAAIE,EAAI,EAAGA,EAAIF,MAAMb,OAAQe,IAAK,KAC9BC,EAAGC,GAAKJ,MAAME,GAAGG,MAAM,KAC5BJ,IAAIE,GAAKC,EAAIA,EAAEd,UAAU,EAAGc,EAAEjB,QAAU,YAGzCc,kCAKe,SAASK,UACnCC,QAAU,GACVC,MAAQ,EACRC,SAAU,QACRC,OAAS,IAAIhC,kBACnBgC,OAAO9B,UAAY,SAASiB,IAAKc,MACjB,SAARd,KAAkBc,KAAKC,OAASD,KAAKC,MAAMC,QAAQ,oBAAsB,EACzEL,QACe,SAARX,KAAkBc,KAAKC,OAASD,KAAKC,MAAMC,QAAQ,kBAAoB,IAC9EL,QACAC,SAAU,GAEdF,SAAWG,OAAOX,YAEtBW,OAAO7B,WAAa,SAASgB,QACrBiB,qBAAUD,QAAQhB,MAAQ,GAAc,GAATW,SAC3BA,MAAQ,EACRD,SAAWQ,4BACXP,YACG,OACGQ,EAAIT,QAAQU,YAAYF,6BAC9BR,QAAUA,QAAQjB,UAAU,EAAG0B,GACzBE,8BAAmBC,QAAQ,IAAIC,OAAO,QAAS,KAAM,SACrDb,QAAQjB,UAAU0B,GACxBR,YAII,SAARX,KAAkBY,UAClBA,SAAU,GAEdF,SAAWG,OAAOX,YAEtBW,OAAO5B,OAAS,SAASuC,SACjBb,MAAQ,GAAKC,oBACbF,SAAWc,YAGTC,yBAA2B,UAEvB,OACAC,EAAIF,KAAKjC,MAAM,IAAIgC,OAAO,iCAAkC,UAC7DG,cAGCC,WAAaH,KAAK/B,UAAU,EAAGiC,EAAElC,OACjCoC,UAAYJ,KAAK/B,UAAUiC,EAAElC,MAAQkC,EAAE,GAAGpC,YAC5CuC,EAAIH,EAAE,GACLA,EAAE,IAIHG,EAAIR,8BAAmBC,QAAQ,IAAIC,OAAO,QAAS,KAAMG,EAAE,IAC3Df,UAJAkB,EAAIX,4BACJP,SAKJc,yBAAyBK,KAAKD,GAC9BL,eAAUG,2BAAkBF,yBAAyBnC,uBAAcsC,eAGlE,IAAIvB,EAAI,EAAGA,EAAIoB,yBAAyBnC,OAAQe,IACjDmB,KAAOA,KAAKF,uBAAgBjB,EAAI,WAAUoB,yBAAyBpB,IAEvEK,SAAWc,MAEfX,OAAO1B,MAAMsB,MACNC"} \ No newline at end of file +{"version":3,"file":"htmlparser.min.js","sources":["../src/htmlparser.js"],"sourcesContent":["// This file is part of Moodle - https://moodle.org/\n//\n// Moodle is free software: you can redistribute it and/or modify\n// it under the terms of the GNU General Public License as published by\n// the Free Software Foundation, either version 3 of the License, or\n// (at your option) any later version.\n//\n// Moodle is distributed in the hope that it will be useful,\n// but WITHOUT ANY WARRANTY; without even the implied warranty of\n// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n// GNU General Public License for more details.\n//\n// You should have received a copy of the GNU General Public License\n// along with Moodle. If not, see .\n\n/**\n * Handling of the editor content to add and remove the visual styling and\n * helper nodes to modify language settings.\n *\n * @module tiny_multilang2\n * @copyright 2024 Stephan Robotta \n * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later\n */\n\nimport {spanMultilangBegin, spanMultilangEnd, blockTags} from './constants';\n\n/**\n * This class is used to parse HTML content and call a callback function\n * when a tag is opened, closed or text is found.\n */\nclass HTMLParser {\n constructor() {\n this.onTagOpen = null;\n this.onTagClose = null;\n this.onText = null;\n this.onComment = null;\n this.chunk = '';\n this.parse = function(input) {\n let content = input;\n while (content.length > 0) {\n let match = content.match(/<[^>]*>/);\n if (match) {\n let index = match.index;\n if (index > 0) {\n this.chunk = content.substring(0, index);\n content = content.substring(index);\n if (typeof this.onText === 'function') {\n this.onText(this.chunk);\n }\n }\n this.chunk = match[0];\n if (match[0].charAt(1) === '/') {\n if (typeof this.onTagClose === 'function') {\n const tag = match[0].substring(2, match[0].length - 1).trim().toLowerCase();\n this.onTagClose(tag);\n }\n } else if (match[0].indexOf('');\n if (end === -1) {\n end = content.length;\n } else {\n end += 3;\n }\n this.chunk = content.substring(0, end);\n if (typeof this.onComment === 'function') {\n this.onComment(this.chunk);\n }\n } else if (typeof this.onTagOpen === 'function') {\n const attr1 = this.mapAttrs(match[0].match(/([\\w\\-_]+)=\"([^\"]*)\"/g));\n const attr2 = this.mapAttrs(match[0].match(/([\\w\\-_]+)='([^']*)'/g));\n const tag = match[0].match(/^<(\\w+)/);\n this.onTagOpen(tag[1].toLowerCase(), {...attr1, ...attr2});\n }\n content = content.substring(this.chunk.length);\n } else {\n if (typeof this.onText === 'function') {\n this.onText(content);\n }\n this.chunk = content;\n content = '';\n }\n }\n };\n this.getChunk = function() {\n return this.chunk;\n };\n this.mapAttrs = function(attrs) {\n let res = {};\n if (attrs) {\n for (let i = 0; i < attrs.length; i++) {\n let [k, v] = attrs[i].split('=');\n res[k] = v ? v.substring(1, v.length) : null;\n }\n }\n return res;\n };\n }\n}\n\nexport const parseEditorContent = function(html) {\n let newHtml = '';\n let mlang = 0;\n let inClose = false;\n const parser = new HTMLParser();\n parser.onTagOpen = function(tag, attr) {\n if (tag === 'span' && attr.class && attr.class.indexOf('multilang-begin') > -1) {\n mlang++;\n } else if (tag === 'span' && attr.class && attr.class.indexOf('multilang-end') > -1) {\n mlang--;\n inClose = true;\n }\n newHtml += parser.getChunk();\n };\n parser.onTagClose = function(tag) {\n if (blockTags.indexOf(tag) > -1 && mlang != 0) {\n if (mlang > 0) {\n newHtml += spanMultilangEnd;\n mlang--;\n } else {\n const t = newHtml.lastIndexOf(spanMultilangEnd);\n newHtml = newHtml.substring(0, t)\n + spanMultilangBegin.replace(new RegExp('%lang', 'g'), 'other')\n + newHtml.substring(t);\n mlang++;\n }\n return;\n }\n if (tag === 'span' && inClose) {\n inClose = false;\n }\n newHtml += parser.getChunk();\n };\n parser.onText = function(text) {\n if (mlang > 0 || inClose) {\n newHtml += text;\n return;\n }\n const intermediateReplacements = [];\n // eslint-disable-next-line no-constant-condition\n while (1) {\n const m = text.match(new RegExp('{\\\\s*mlang(\\\\s+([^}]+?))?\\\\s*}', 'i'));\n if (!m) {\n break;\n }\n const textBefore = text.substring(0, m.index);\n const textAfter = text.substring(m.index + m[0].length);\n let r = m[0];\n if (!m[2]) {\n r = spanMultilangEnd;\n mlang--;\n } else {\n r = spanMultilangBegin.replace(new RegExp('%lang', 'g'), m[2]);\n mlang++;\n }\n intermediateReplacements.push(r);\n text = `${textBefore}___~~${intermediateReplacements.length}~~___${textAfter}`;\n }\n // Revert all placeholders back to the original {mlang} tags.\n for (let i = 0; i < intermediateReplacements.length; i++) {\n text = text.replace(`___~~${i + 1}~~___`, intermediateReplacements[i]);\n }\n newHtml += text;\n };\n parser.onComment = function(comment) {\n newHtml += comment;\n };\n parser.parse(html);\n return newHtml;\n};\n"],"names":["HTMLParser","constructor","onTagOpen","onTagClose","onText","onComment","chunk","parse","input","content","length","match","index","substring","this","charAt","tag","trim","toLowerCase","indexOf","end","attr1","mapAttrs","attr2","getChunk","attrs","res","i","k","v","split","html","newHtml","mlang","inClose","parser","attr","class","blockTags","spanMultilangEnd","t","lastIndexOf","spanMultilangBegin","replace","RegExp","text","intermediateReplacements","m","textBefore","textAfter","r","push","comment"],"mappings":";;;;;;;;;MA8BMA,WACFC,mBACSC,UAAY,UACZC,WAAa,UACbC,OAAS,UACTC,UAAY,UACZC,MAAQ,QACRC,MAAQ,SAASC,WACdC,QAAUD,WACPC,QAAQC,OAAS,GAAG,KACnBC,MAAQF,QAAQE,MAAM,cACtBA,MAAO,KACHC,MAAQD,MAAMC,SACdA,MAAQ,SACHN,MAAQG,QAAQI,UAAU,EAAGD,OAClCH,QAAUA,QAAQI,UAAUD,OACD,mBAAhBE,KAAKV,aACPA,OAAOU,KAAKR,aAGpBA,MAAQK,MAAM,GACQ,MAAvBA,MAAM,GAAGI,OAAO,OACe,mBAApBD,KAAKX,WAA2B,OACjCa,IAAML,MAAM,GAAGE,UAAU,EAAGF,MAAM,GAAGD,OAAS,GAAGO,OAAOC,mBACzDf,WAAWa,WAEjB,GAAiC,IAA7BL,MAAM,GAAGQ,QAAQ,WAAe,KACnCC,IAAMX,QAAQU,QAAQ,WACb,IAATC,IACAA,IAAMX,QAAQC,OAEdU,KAAO,OAENd,MAAQG,QAAQI,UAAU,EAAGO,KACJ,mBAAnBN,KAAKT,gBACPA,UAAUS,KAAKR,YAErB,GAA8B,mBAAnBQ,KAAKZ,UAA0B,OACvCmB,MAAQP,KAAKQ,SAASX,MAAM,GAAGA,MAAM,0BACrCY,MAAQT,KAAKQ,SAASX,MAAM,GAAGA,MAAM,0BACrCK,IAAML,MAAM,GAAGA,MAAM,gBACtBT,UAAUc,IAAI,GAAGE,cAAe,IAAIG,SAAUE,QAEvDd,QAAUA,QAAQI,UAAUC,KAAKR,MAAMI,YAEZ,mBAAhBI,KAAKV,aACPA,OAAOK,cAEXH,MAAQG,QACbA,QAAU,UAIjBe,SAAW,kBACLV,KAAKR,YAEXgB,SAAW,SAASG,WACjBC,IAAM,MACND,UACK,IAAIE,EAAI,EAAGA,EAAIF,MAAMf,OAAQiB,IAAK,KAC9BC,EAAGC,GAAKJ,MAAME,GAAGG,MAAM,KAC5BJ,IAAIE,GAAKC,EAAIA,EAAEhB,UAAU,EAAGgB,EAAEnB,QAAU,YAGzCgB,kCAKe,SAASK,UACnCC,QAAU,GACVC,MAAQ,EACRC,SAAU,QACRC,OAAS,IAAInC,kBACnBmC,OAAOjC,UAAY,SAASc,IAAKoB,MACjB,SAARpB,KAAkBoB,KAAKC,OAASD,KAAKC,MAAMlB,QAAQ,oBAAsB,EACzEc,QACe,SAARjB,KAAkBoB,KAAKC,OAASD,KAAKC,MAAMlB,QAAQ,kBAAoB,IAC9Ec,QACAC,SAAU,GAEdF,SAAWG,OAAOX,YAEtBW,OAAOhC,WAAa,SAASa,QACrBsB,qBAAUnB,QAAQH,MAAQ,GAAc,GAATiB,SAC3BA,MAAQ,EACRD,SAAWO,4BACXN,YACG,OACGO,EAAIR,QAAQS,YAAYF,6BAC9BP,QAAUA,QAAQnB,UAAU,EAAG2B,GACzBE,8BAAmBC,QAAQ,IAAIC,OAAO,QAAS,KAAM,SACrDZ,QAAQnB,UAAU2B,GACxBP,YAII,SAARjB,KAAkBkB,UAClBA,SAAU,GAEdF,SAAWG,OAAOX,YAEtBW,OAAO/B,OAAS,SAASyC,SACjBZ,MAAQ,GAAKC,oBACbF,SAAWa,YAGTC,yBAA2B,UAEvB,OACAC,EAAIF,KAAKlC,MAAM,IAAIiC,OAAO,iCAAkC,UAC7DG,cAGCC,WAAaH,KAAKhC,UAAU,EAAGkC,EAAEnC,OACjCqC,UAAYJ,KAAKhC,UAAUkC,EAAEnC,MAAQmC,EAAE,GAAGrC,YAC5CwC,EAAIH,EAAE,GACLA,EAAE,IAIHG,EAAIR,8BAAmBC,QAAQ,IAAIC,OAAO,QAAS,KAAMG,EAAE,IAC3Dd,UAJAiB,EAAIX,4BACJN,SAKJa,yBAAyBK,KAAKD,GAC9BL,eAAUG,2BAAkBF,yBAAyBpC,uBAAcuC,eAGlE,IAAItB,EAAI,EAAGA,EAAImB,yBAAyBpC,OAAQiB,IACjDkB,KAAOA,KAAKF,uBAAgBhB,EAAI,WAAUmB,yBAAyBnB,IAEvEK,SAAWa,MAEfV,OAAO9B,UAAY,SAAS+C,SACxBpB,SAAWoB,SAEfjB,OAAO5B,MAAMwB,MACNC"} \ No newline at end of file diff --git a/amd/src/htmlparser.js b/amd/src/htmlparser.js index 5a893a3..76ecdde 100644 --- a/amd/src/htmlparser.js +++ b/amd/src/htmlparser.js @@ -33,6 +33,7 @@ class HTMLParser { this.onTagOpen = null; this.onTagClose = null; this.onText = null; + this.onComment = null; this.chunk = ''; this.parse = function(input) { let content = input; @@ -42,6 +43,7 @@ class HTMLParser { let index = match.index; if (index > 0) { this.chunk = content.substring(0, index); + content = content.substring(index); if (typeof this.onText === 'function') { this.onText(this.chunk); } @@ -49,7 +51,19 @@ class HTMLParser { this.chunk = match[0]; if (match[0].charAt(1) === '/') { if (typeof this.onTagClose === 'function') { - this.onTagClose(match[0].substring(2, match[0].length - 1).trim()); + const tag = match[0].substring(2, match[0].length - 1).trim().toLowerCase(); + this.onTagClose(tag); + } + } else if (match[0].indexOf(''); + if (end === -1) { + end = content.length; + } else { + end += 3; + } + this.chunk = content.substring(0, end); + if (typeof this.onComment === 'function') { + this.onComment(this.chunk); } } else if (typeof this.onTagOpen === 'function') { const attr1 = this.mapAttrs(match[0].match(/([\w\-_]+)="([^"]*)"/g)); @@ -57,7 +71,7 @@ class HTMLParser { const tag = match[0].match(/^<(\w+)/); this.onTagOpen(tag[1].toLowerCase(), {...attr1, ...attr2}); } - content = content.substring(index + match[0].length); + content = content.substring(this.chunk.length); } else { if (typeof this.onText === 'function') { this.onText(content); @@ -147,6 +161,9 @@ export const parseEditorContent = function(html) { } newHtml += text; }; + parser.onComment = function(comment) { + newHtml += comment; + }; parser.parse(html); return newHtml; }; diff --git a/tests/js/htmlparser.test.js b/tests/js/htmlparser.test.js index dd69351..20ec12a 100644 --- a/tests/js/htmlparser.test.js +++ b/tests/js/htmlparser.test.js @@ -21,36 +21,74 @@ test('html 2', () => { }); test('html 3', () => { - const html = `

{mlang en}This is a test{mlang}{mlang de}Das ist ein Test{mlang}.

-

This is a multilang link: {mlang - de}de{mlang}{mlang en}other{mlang}

-

{mlang de}

-

ein Paragraf auf Deutch

-

-

{mlang}

-

{mlang other}This is a test{mlang}{mlang de}Das ist ein Test{mlang}.

-

{mlang en}This is a test{mlang}{mlang de}Das ist ein Test{mlang}.

-

{mlang en}English rules{mlang}

`; - const parsed = `

{mlang en}This is a test{mlang}{mlang de}Das ist ein Test{mlang}.

-

This is a multilang link: {mlang de}de{mlang}{mlang en}other{mlang}

-

{mlang de}{mlang} -

ein Paragraf auf Deutch

-

-

{mlang other}{mlang} -

{mlang other}This is a test{mlang}{mlang de}Das ist ein Test{mlang}.{mlang} -

{mlang en}This is a test{mlang}{mlang de}Das ist ein Test{mlang}.

-

{mlang en}English rules{mlang}

`; + const html = '

{mlang en}This is a test{mlang}{mlang de}Das ist ein Test{mlang}.

\n' + + '

This is a multilang link: {mlang\n' + + ' de}de{mlang}{mlang en}other{mlang}

\n' + + '

{mlang de}

\n' + + '

ein Paragraf auf Deutch

\n' + + '

\n' + + '

{mlang}

\n' + + '

{mlang other}This is a test{mlang}{mlang de}Das ist ein Test{mlang}.

\n' + + '

{mlang en}This is a test{mlang}{mlang de}Das ist ein Test{mlang}.

\n' + + '

{mlang en}English rules' + + '{mlang}

'; + const parsed = '

{mlang en}This is a test{mlang}{mlang de}Das ist ein Test{mlang}.

\n' + + '

This is a multilang link: ' + + '{mlang de}de{mlang}{mlang en}other{mlang}

\n' + + '

{mlang de}{mlang}\n' + + '

ein Paragraf auf Deutch

\n' + + '

\n' + + '

{mlang other}{mlang}\n' + + '

{mlang other}This is a test{mlang}{mlang de}Das ist ein Test{mlang}.' + + '{mlang}\n' + + '

{mlang en}This is a test{mlang}{mlang de}Das ist ein Test{mlang}.

\n' + + '

{mlang en}English rules{mlang}

'; expect(parseEditorContent(html)).toEqual(parsed); }); test('html 4', () => { - const html = `

{mlang en}English rules{mlang}

`; + const html = '

{mlang en}English rules' + + '{mlang}' + + '

'; const parsed = '

{mlang en}English rules{mlang}

'; expect(parseEditorContent(html)).toEqual(parsed); +}); +test('html 5', () => { + const html = '

\n' + + '

{mlang other}Hello{mlang}

\n' + + '

Done

\n'; + const parsed = '

\n' + + '

{mlang other}Hello' + + '{mlang}

\n' + + '

Done

\n'; + expect(parseEditorContent(html)).toEqual(parsed); }); \ No newline at end of file