From a469a6510122356a7cae3fb1259e999e6cc34c94 Mon Sep 17 00:00:00 2001 From: Jamie Peabody Date: Sun, 16 Jun 2024 20:36:17 +0100 Subject: [PATCH] feat: Supports unicode diacritical marks when rendering line diff (fixes #169) (#197) --- src/diff.js | 8 +++- src/vdoc.js | 20 ++++++++- test/markup.spec.js | 104 +++++++++++++++++++++++++++++++++++++++++++- webpack.dev.js | 5 ++- 4 files changed, 130 insertions(+), 7 deletions(-) diff --git a/src/diff.js b/src/diff.js index 72e5a55..4211b3c 100644 --- a/src/diff.js +++ b/src/diff.js @@ -244,7 +244,9 @@ function CodeifyText(lhs, rhs, options) { if (typeof lhs === 'string') { if (this.options.split === 'chars') { - this.lhs = lhs.split(''); + // split characters and include their diacritical marks + this.lhs = lhs.match(/\p{Letter}\p{Mark}*|\p{White_Space}/gu) || []; + // this.lhs = [...lhs]; } else if (this.options.split === 'words') { this.lhs = lhs.split(/\s/); } else if (this.options.split === 'lines') { @@ -255,7 +257,9 @@ function CodeifyText(lhs, rhs, options) { } if (typeof rhs === 'string') { if (this.options.split === 'chars') { - this.rhs = rhs.split(''); + // split characters and include their diacritical marks + this.rhs = rhs.match(/\p{Letter}\p{Mark}*|\p{White_Space}/gu) || []; + // this.rhs = [...rhs]; } else if (this.options.split === 'words') { this.rhs = rhs.split(/\s/); } else if (this.options.split === 'lines') { diff --git a/src/vdoc.js b/src/vdoc.js index 190927c..9ee8366 100644 --- a/src/vdoc.js +++ b/src/vdoc.js @@ -2,6 +2,8 @@ const diff = require('./diff'); const trace = console.log; +const expLetters = new RegExp(/\p{Letter}\p{Mark}*|\p{White_Space}/gu); + class VDoc { constructor(options) { this.options = options; @@ -275,15 +277,18 @@ class VLine { editor.setGutterMarker(this.id, name, item); } if (this.markup.length) { + // while Mergely diffs unicode chars (letters+mark), CM is by character, + // so diffs need to be mapped. + const mapped = mapLettersToChars(editor.getValue()); for (const markup of this.markup) { const [ charFrom, charTo, className ] = markup; const fromPos = { line: this.id }; const toPos = { line: this.id }; if (charFrom >= 0) { - fromPos.ch = charFrom; + fromPos.ch = mapped[charFrom]; } if (charTo >= 0) { - toPos.ch = charTo; + toPos.ch = mapped[charTo]; } this._clearMarkup.push( editor.markText(fromPos, toPos, { className })); @@ -334,4 +339,15 @@ function getExtents(side, change) { }; } +function mapLettersToChars(text) { + let match; + let mapped = {}; + let index = 0; + expLetters.lastIndex = 0; + while ((match = expLetters.exec(text)) !== null) { + mapped[index++] = match.index; + } + return mapped; +} + module.exports = VDoc; diff --git a/test/markup.spec.js b/test/markup.spec.js index 6e2be09..61c3341 100644 --- a/test/markup.spec.js +++ b/test/markup.spec.js @@ -233,7 +233,109 @@ describe('markup', () => { expect(rhs_spans[1].innerText).to.equal('h'); expect(rhs_spans[2].innerText).to.equal('ir'); } - } + }, + { + name: 'single word single diacritic non-spacing marks', + lhs: 'كلمة', + rhs: 'كَلمة', + check: (editor) => { + expect(editor.querySelectorAll(LHS_CHANGE_START + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(LHS_CHANGE_END + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(RHS_CHANGE_START + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(RHS_CHANGE_END + '.cid-0')).to.have.length(1); + const lhs_spans = editor.querySelectorAll(LHS_INLINE_TEXT + '.cid-0'); + expect(lhs_spans).to.have.length(1); + expect(lhs_spans[0].innerText).to.equal('ك'); + const rhs_spans = editor.querySelectorAll(RHS_INLINE_TEXT + '.cid-0'); + expect(rhs_spans).to.have.length(1); + expect(rhs_spans[0].innerText).to.equal('كَ'); + } + }, + { + name: 'single word multiple diacritic non-spacing marks', + lhs: ['\u006E', '\u0061', '\u0314', '\u0065'].join(''), // na̔e + rhs: ['\u006E', '\u0061', '\u0314', '\u034A', '\u0065'].join(''), // na̔͊e + check: (editor) => { + expect(editor.querySelectorAll(LHS_CHANGE_START + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(LHS_CHANGE_END + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(RHS_CHANGE_START + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(RHS_CHANGE_END + '.cid-0')).to.have.length(1); + const lhs_spans = editor.querySelectorAll(LHS_INLINE_TEXT + '.cid-0'); + expect(lhs_spans).to.have.length(1); + expect(lhs_spans[0].innerText).to.equal(['\u0061', '\u0314'].join('')); + const rhs_spans = editor.querySelectorAll(RHS_INLINE_TEXT + '.cid-0'); + expect(rhs_spans).to.have.length(1); + expect(rhs_spans[0].innerText).to.equal('a̔͊'); + } + }, + { + name: 'multiple words diacritic non-spacing marks', + lhs: 'كلمة اخرى', + rhs: 'كْلمة اخرى', + check: (editor) => { + expect(editor.querySelectorAll(LHS_CHANGE_START + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(LHS_CHANGE_END + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(RHS_CHANGE_START + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(RHS_CHANGE_END + '.cid-0')).to.have.length(1); + const lhs_spans = editor.querySelectorAll(LHS_INLINE_TEXT + '.cid-0'); + expect(lhs_spans).to.have.length(1); + expect(lhs_spans[0].innerText).to.equal('ك'); + const rhs_spans = editor.querySelectorAll(RHS_INLINE_TEXT + '.cid-0'); + expect(rhs_spans).to.have.length(1); + expect(rhs_spans[0].innerText).to.equal('كْ'); + } + }, + { + name: 'nonnormalizable diacritic non-spacing marks', + lhs: 'naeg', + // there are 2 marks on 'e', tilde (0303) and x (0353) + rhs: ['\u006E', '\u0061', '\u0353', '\u0065', '\u0353', '\u0303', '\u0067'].join(''), + check: (editor) => { + expect(editor.querySelectorAll(LHS_CHANGE_START + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(LHS_CHANGE_END + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(RHS_CHANGE_START + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(RHS_CHANGE_END + '.cid-0')).to.have.length(1); + const lhs_spans = editor.querySelectorAll(LHS_INLINE_TEXT + '.cid-0'); + expect(lhs_spans).to.have.length(1); + expect(lhs_spans[0].innerText).to.equal('ae'); + const rhs_spans = editor.querySelectorAll(RHS_INLINE_TEXT + '.cid-0'); + expect(rhs_spans).to.have.length(1); + expect(rhs_spans[0].innerText).to.equal( + ['\u0061', '\u0353', '\u0065', '\u0353', '\u0303'].join('') + ); + } + }, + { + name: 'nonnormalizable diacritic non-spacing marks', + lhs: [ + '\u0065', '\u0353', '\u0303', + '\u0065', '\u0353', '\u0303', + '\u0065', '\u0353', '\u0303', + 'x', + '\u0065', '\u0353', '\u0303', + ].join(''), + // there are 2 marks on 'e', tilde (0303) and x (0353) + rhs: [ + '\u0065', '\u0353', '\u0303', + '\u0065', '\u0353', '\u0303', + '\u0065', '\u0353', '\u0303', + 'y', + '\u0065', '\u0353', '\u0303', + ].join(''), + check: (editor) => { + expect(editor.querySelectorAll(LHS_CHANGE_START + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(LHS_CHANGE_END + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(RHS_CHANGE_START + '.cid-0')).to.have.length(1); + expect(editor.querySelectorAll(RHS_CHANGE_END + '.cid-0')).to.have.length(1); + const lhs_spans = editor.querySelectorAll(LHS_INLINE_TEXT + '.cid-0'); + expect(lhs_spans).to.have.length(1); + expect(lhs_spans[0].innerText).to.equal('x'); + const rhs_spans = editor.querySelectorAll(RHS_INLINE_TEXT + '.cid-0'); + expect(rhs_spans).to.have.length(1); + expect(rhs_spans[0].innerText).to.equal('y'); + } + }, + ]; // to debug, add `only: true` to the test `opts` above, and run `npm run debug` diff --git a/webpack.dev.js b/webpack.dev.js index aeddaad..9d6178a 100644 --- a/webpack.dev.js +++ b/webpack.dev.js @@ -1,4 +1,5 @@ const path = require('path') +const chalk = require('chalk'); const HtmlWebpackPlugin = require('html-webpack-plugin'); module.exports = { @@ -50,8 +51,8 @@ module.exports = { compiler.hooks.entryOption.tap('MyPlugin', (context, entry) => { console.log('-'.repeat(78)); console.log('Applications:'); - console.log('http://localhost:8080/app.html'); - console.log('http://localhost:8080/app-styles.html'); + console.log(chalk.bold(chalk.underline(chalk.cyan('http://localhost:8080/app.html')))); + console.log(chalk.bold(chalk.underline(chalk.cyan('http://localhost:8080/app-styles.html')))); console.log('-'.repeat(78)); }); }