Skip to content

Commit

Permalink
feat: Supports unicode diacritical marks when rendering line diff (fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
wickedest committed Jun 16, 2024
1 parent 65c71be commit 5f3034f
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 7 deletions.
8 changes: 6 additions & 2 deletions src/diff.js
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,9 @@ function CodeifyText(lhs, rhs, options) {

if (typeof lhs === 'string') {
if (this.options.split === 'chars') {
this.lhs = lhs.split('');
// split characters and include their diacritical marks
this.lhs = lhs.match(/\p{Letter}\p{Mark}*|\p{White_Space}/gu) || [];
// this.lhs = [...lhs];
} else if (this.options.split === 'words') {
this.lhs = lhs.split(/\s/);
} else if (this.options.split === 'lines') {
Expand All @@ -255,7 +257,9 @@ function CodeifyText(lhs, rhs, options) {
}
if (typeof rhs === 'string') {
if (this.options.split === 'chars') {
this.rhs = rhs.split('');
// split characters and include their diacritical marks
this.rhs = rhs.match(/\p{Letter}\p{Mark}*|\p{White_Space}/gu) || [];
// this.rhs = [...rhs];
} else if (this.options.split === 'words') {
this.rhs = rhs.split(/\s/);
} else if (this.options.split === 'lines') {
Expand Down
20 changes: 18 additions & 2 deletions src/vdoc.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ const diff = require('./diff');

const trace = console.log;

const expLetters = new RegExp(/\p{Letter}\p{Mark}*|\p{White_Space}/gu);

class VDoc {
constructor(options) {
this.options = options;
Expand Down Expand Up @@ -275,15 +277,18 @@ class VLine {
editor.setGutterMarker(this.id, name, item);
}
if (this.markup.length) {
// while Mergely diffs unicode chars (letters+mark), CM is by character,
// so diffs need to be mapped.
const mapped = mapLettersToChars(editor.getValue());
for (const markup of this.markup) {
const [ charFrom, charTo, className ] = markup;
const fromPos = { line: this.id };
const toPos = { line: this.id };
if (charFrom >= 0) {
fromPos.ch = charFrom;
fromPos.ch = mapped[charFrom];
}
if (charTo >= 0) {
toPos.ch = charTo;
toPos.ch = mapped[charTo];
}
this._clearMarkup.push(
editor.markText(fromPos, toPos, { className }));
Expand Down Expand Up @@ -334,4 +339,15 @@ function getExtents(side, change) {
};
}

function mapLettersToChars(text) {
let match;
let mapped = {};
let index = 0;
expLetters.lastIndex = 0;
while ((match = expLetters.exec(text)) !== null) {
mapped[index++] = match.index;
}
return mapped;
}

module.exports = VDoc;
104 changes: 103 additions & 1 deletion test/markup.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,109 @@ describe('markup', () => {
expect(rhs_spans[1].innerText).to.equal('h');
expect(rhs_spans[2].innerText).to.equal('ir');
}
}
},
{
name: 'single word single diacritic non-spacing marks',
lhs: 'كلمة',
rhs: 'كَلمة',
check: (editor) => {
expect(editor.querySelectorAll(LHS_CHANGE_START + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(LHS_CHANGE_END + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(RHS_CHANGE_START + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(RHS_CHANGE_END + '.cid-0')).to.have.length(1);
const lhs_spans = editor.querySelectorAll(LHS_INLINE_TEXT + '.cid-0');
expect(lhs_spans).to.have.length(1);
expect(lhs_spans[0].innerText).to.equal('ك');
const rhs_spans = editor.querySelectorAll(RHS_INLINE_TEXT + '.cid-0');
expect(rhs_spans).to.have.length(1);
expect(rhs_spans[0].innerText).to.equal('كَ');
}
},
{
name: 'single word multiple diacritic non-spacing marks',
lhs: ['\u006E', '\u0061', '\u0314', '\u0065'].join(''), // na̔e
rhs: ['\u006E', '\u0061', '\u0314', '\u034A', '\u0065'].join(''), // na̔͊e
check: (editor) => {
expect(editor.querySelectorAll(LHS_CHANGE_START + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(LHS_CHANGE_END + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(RHS_CHANGE_START + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(RHS_CHANGE_END + '.cid-0')).to.have.length(1);
const lhs_spans = editor.querySelectorAll(LHS_INLINE_TEXT + '.cid-0');
expect(lhs_spans).to.have.length(1);
expect(lhs_spans[0].innerText).to.equal(['\u0061', '\u0314'].join(''));
const rhs_spans = editor.querySelectorAll(RHS_INLINE_TEXT + '.cid-0');
expect(rhs_spans).to.have.length(1);
expect(rhs_spans[0].innerText).to.equal('a̔͊');
}
},
{
name: 'multiple words diacritic non-spacing marks',
lhs: 'كلمة اخرى',
rhs: 'كْلمة اخرى',
check: (editor) => {
expect(editor.querySelectorAll(LHS_CHANGE_START + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(LHS_CHANGE_END + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(RHS_CHANGE_START + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(RHS_CHANGE_END + '.cid-0')).to.have.length(1);
const lhs_spans = editor.querySelectorAll(LHS_INLINE_TEXT + '.cid-0');
expect(lhs_spans).to.have.length(1);
expect(lhs_spans[0].innerText).to.equal('ك');
const rhs_spans = editor.querySelectorAll(RHS_INLINE_TEXT + '.cid-0');
expect(rhs_spans).to.have.length(1);
expect(rhs_spans[0].innerText).to.equal('كْ');
}
},
{
name: 'nonnormalizable diacritic non-spacing marks',
lhs: 'naeg',
// there are 2 marks on 'e', tilde (0303) and x (0353)
rhs: ['\u006E', '\u0061', '\u0353', '\u0065', '\u0353', '\u0303', '\u0067'].join(''),
check: (editor) => {
expect(editor.querySelectorAll(LHS_CHANGE_START + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(LHS_CHANGE_END + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(RHS_CHANGE_START + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(RHS_CHANGE_END + '.cid-0')).to.have.length(1);
const lhs_spans = editor.querySelectorAll(LHS_INLINE_TEXT + '.cid-0');
expect(lhs_spans).to.have.length(1);
expect(lhs_spans[0].innerText).to.equal('ae');
const rhs_spans = editor.querySelectorAll(RHS_INLINE_TEXT + '.cid-0');
expect(rhs_spans).to.have.length(1);
expect(rhs_spans[0].innerText).to.equal(
['\u0061', '\u0353', '\u0065', '\u0353', '\u0303'].join('')
);
}
},
{
name: 'nonnormalizable diacritic non-spacing marks',
lhs: [
'\u0065', '\u0353', '\u0303',
'\u0065', '\u0353', '\u0303',
'\u0065', '\u0353', '\u0303',
'x',
'\u0065', '\u0353', '\u0303',
].join(''),
// there are 2 marks on 'e', tilde (0303) and x (0353)
rhs: [
'\u0065', '\u0353', '\u0303',
'\u0065', '\u0353', '\u0303',
'\u0065', '\u0353', '\u0303',
'y',
'\u0065', '\u0353', '\u0303',
].join(''),
check: (editor) => {
expect(editor.querySelectorAll(LHS_CHANGE_START + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(LHS_CHANGE_END + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(RHS_CHANGE_START + '.cid-0')).to.have.length(1);
expect(editor.querySelectorAll(RHS_CHANGE_END + '.cid-0')).to.have.length(1);
const lhs_spans = editor.querySelectorAll(LHS_INLINE_TEXT + '.cid-0');
expect(lhs_spans).to.have.length(1);
expect(lhs_spans[0].innerText).to.equal('x');
const rhs_spans = editor.querySelectorAll(RHS_INLINE_TEXT + '.cid-0');
expect(rhs_spans).to.have.length(1);
expect(rhs_spans[0].innerText).to.equal('y');
}
},

];

// to debug, add `only: true` to the test `opts` above, and run `npm run debug`
Expand Down
5 changes: 3 additions & 2 deletions webpack.dev.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const path = require('path')
const chalk = require('chalk');
const HtmlWebpackPlugin = require('html-webpack-plugin');

module.exports = {
Expand Down Expand Up @@ -50,8 +51,8 @@ module.exports = {
compiler.hooks.entryOption.tap('MyPlugin', (context, entry) => {
console.log('-'.repeat(78));
console.log('Applications:');
console.log('http://localhost:8080/app.html');
console.log('http://localhost:8080/app-styles.html');
console.log(chalk.bold(chalk.underline(chalk.cyan('http://localhost:8080/app.html'))));
console.log(chalk.bold(chalk.underline(chalk.cyan('http://localhost:8080/app-styles.html'))));
console.log('-'.repeat(78));
});
}
Expand Down

0 comments on commit 5f3034f

Please sign in to comment.