Skip to content

Commit

Permalink
Handle section breaks when pasting text from other sources (#210)
Browse files Browse the repository at this point in the history
* On paste, convert a line containing triple dashes into a section break

* Convert typed '---' live into a section break.

* Support section breaks from Word Desktop

* Rename

* Donit reimplement inputRules

* Add some documentation

* Small tweak to avoid too many section breaks

* Doc tweak

* Trying to fix PW failure
  • Loading branch information
bosschaert authored Sep 12, 2024
1 parent ec51c5c commit 499103e
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 11 deletions.
37 changes: 37 additions & 0 deletions blocks/edit/prose/index.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
/* eslint-disable max-classes-per-file */
import {
DOMParser,
EditorState,
EditorView,
Schema,
TextSelection,
baseSchema,
history,
buildKeymap,
Expand All @@ -17,6 +19,8 @@ import {
liftListItem,
sinkListItem,
gapCursor,
InputRule,
inputRules,
Y,
WebsocketProvider,
ySyncPlugin,
Expand All @@ -31,6 +35,7 @@ import prose2aem from '../../shared/prose2aem.js';
import menu from './plugins/menu.js';
import imageDrop from './plugins/imageDrop.js';
import linkConverter from './plugins/linkConverter.js';
import sectionPasteHandler from './plugins/sectionPasteHandler.js';
import { COLLAB_ORIGIN, getDaAdmin } from '../../shared/constants.js';
import { addLocNodes, getLocClass } from './loc-utils.js';

Expand Down Expand Up @@ -201,6 +206,36 @@ function generateColor(name, hRange = [0, 360], sRange = [60, 80], lRange = [40,
return `#${f(0)}${f(8)}${f(4)}`;
}

function getDashesInputRule() {
return new InputRule(
/^---[\n]$/,
(state, match, start, end) => {
const div = document.createElement('div');
div.append(document.createElement('hr'));
const newNodes = DOMParser.fromSchema(state.schema).parse(div);

const selection = TextSelection.create(state.doc, start, end);
dispatchTransaction(state.tr.setSelection(selection).replaceSelectionWith(newNodes));
},
);
}

// This function returns a modified inputrule plugin that triggers when the regex in the
// rule matches and the Enter key is pressed
function getInputRulesPlugin() {
const irsplugin = inputRules({ rules: [getDashesInputRule()] });

const hkd = (view, event) => {
if (event.key !== 'Enter') return false;
const { $cursor } = view.state.selection;
if ($cursor) return irsplugin.props.handleTextInput(view, $cursor.pos, $cursor.pos, '\n');
return false;
};
irsplugin.props.handleKeyDown = hkd; // Add the handleKeyDown function

return irsplugin;
}

export default function initProse({ editor, path }) {
const schema = getSchema();

Expand Down Expand Up @@ -258,8 +293,10 @@ export default function initProse({ editor, path }) {
menu,
imageDrop(schema),
linkConverter(schema),
sectionPasteHandler(schema),
columnResizing(),
tableEditing(),
getInputRulesPlugin(),
keymap(buildKeymap(schema)),
keymap(baseKeymap),
keymap({
Expand Down
153 changes: 153 additions & 0 deletions blocks/edit/prose/plugins/sectionPasteHandler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import { Plugin, Slice } from 'da-y-wrapper';

function closeParagraph(paraContent, newContent) {
if (paraContent.length > 0) {
const newPara = {
type: 'paragraph',
content: [...paraContent],
};
newContent.push(newPara);
paraContent.length = 0;
}
}

/**
* Find section breaks in HTML pasted from desktop Word and add a horizontal rule
* after each one.
* In Desktop Word each section is represented as a top-level div element, right
* under the body element.
*/
function handleDesktopWordSectionBreaks(html) {
try {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');

if (doc.querySelector('meta[name="ProgId"]')?.content !== 'Word.Document') {
// This is not a word document
return html;
}

let modified = false;
// Add a hr element after all top-level div elements
const sections = doc.querySelectorAll('body > div');
sections.forEach((section) => {
if (section.nextElementSibling) {
// only add the hr if there is something after the section
section.after(doc.createElement('hr'));
modified = true;
}
});

if (!modified) {
return html;
}

const serializer = new XMLSerializer();
return serializer.serializeToString(doc);
} catch (error) {
// eslint-disable-next-line no-console
console.error('Error handling desktop Word section breaks:', error);
return html;
}
}

/**
* Find section breaks in HTML pasted from Word online and add a horizontal rule
* after each one.
* In Word online section breaks are quite hard to identify, but it turns out that
* they seem to be indicated by a span element with a data-ccp-props attribute, of
* which one of the values is 'single'. This is quite brittle but right now seems
* to be the only way to find them. In the future Word online might provide a
* better way to identify section breaks.
*/
function handleWordOnlineSectionBreaks(html) {
try {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');

let modified = false;
// The span[data-ccp-props] are the magic indicator if one of the JSON values in there is the
// word 'single' then we need to add a section break.
const sections = doc.querySelectorAll('div > p > span[data-ccp-props]');
sections.forEach((section) => {
const props = JSON.parse(section.getAttribute('data-ccp-props'));
for (const key of Object.keys(props)) {
if (props[key] === 'single') {
const hr = doc.createElement('hr');
section.parentNode.after(hr);
modified = true;
break;
}
}
});

if (!modified) {
return html;
}

const serializer = new XMLSerializer();
return serializer.serializeToString(doc);
} catch (error) {
// eslint-disable-next-line no-console
console.error('Error handling Word online section breaks:', error);
return html;
}
}

/* When text is pasted, handle section breaks. */
export default function sectionPasteHandler(schema) {
return new Plugin({
props: {
/* A section break entered in Word is not kept in the text of the document, but
* buried in the HTML that is pasted. This function uses highly specific ways to find
* these section breaks and adds a <hr/> element for them.
*/
transformPastedHTML: (html) => {
const newHTML = handleDesktopWordSectionBreaks(html);
const newHTML2 = handleWordOnlineSectionBreaks(newHTML);
return newHTML2;
},

/* Convert 3 dashes on a line by itself (top level only) to a horizontal rule,
* which is then interpreted as a section break.
*/
transformPasted: (slice) => {
const jslice = slice.toJSON();
const { content } = jslice;
if (!content) return slice;

const newContent = [];

for (const el of content) {
if (el.type !== 'paragraph') {
newContent.push(el);
} else {
const newParaCont = [];

for (const pc of el.content) {
if (pc.type !== 'text') {
newParaCont.push(pc);
} else if (pc.text.trim() === '---') {
closeParagraph(newParaCont, newContent);

newContent.push({ type: 'horizontal_rule' });
} else {
newParaCont.push(pc);
}
}

closeParagraph(newParaCont, newContent);
}
}

const newSlice = {
content: newContent,
openStart: slice.openStart,
openEnd: slice.openEnd,
};

return Slice.fromJSON(schema, newSlice);
},
},
});
}
Loading

0 comments on commit 499103e

Please sign in to comment.