Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle section breaks when pasting text from other sources #210

Merged
merged 10 commits into from
Sep 12, 2024
37 changes: 37 additions & 0 deletions blocks/edit/prose/index.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
/* eslint-disable max-classes-per-file */
import {
DOMParser,
EditorState,
EditorView,
Schema,
TextSelection,
baseSchema,
history,
buildKeymap,
Expand All @@ -17,6 +19,8 @@ import {
liftListItem,
sinkListItem,
gapCursor,
InputRule,
inputRules,
Y,
WebsocketProvider,
ySyncPlugin,
Expand All @@ -31,6 +35,7 @@ import prose2aem from '../../shared/prose2aem.js';
import menu from './plugins/menu.js';
import imageDrop from './plugins/imageDrop.js';
import linkConverter from './plugins/linkConverter.js';
import sectionPasteHandler from './plugins/sectionPasteHandler.js';
import { COLLAB_ORIGIN, getDaAdmin } from '../../shared/constants.js';
import { addLocNodes, getLocClass } from './loc-utils.js';

Expand Down Expand Up @@ -201,6 +206,36 @@ function generateColor(name, hRange = [0, 360], sRange = [60, 80], lRange = [40,
return `#${f(0)}${f(8)}${f(4)}`;
}

function getDashesInputRule() {
return new InputRule(
/^---[\n]$/,
(state, match, start, end) => {
const div = document.createElement('div');
div.append(document.createElement('hr'));
const newNodes = DOMParser.fromSchema(state.schema).parse(div);

const selection = TextSelection.create(state.doc, start, end);
dispatchTransaction(state.tr.setSelection(selection).replaceSelectionWith(newNodes));
},
);
}

// This function returns a modified inputrule plugin that triggers when the regex in the
// rule matches and the Enter key is pressed
function getInputRulesPlugin() {
const irsplugin = inputRules({ rules: [getDashesInputRule()] });

const hkd = (view, event) => {
if (event.key !== 'Enter') return false;
const { $cursor } = view.state.selection;
if ($cursor) return irsplugin.props.handleTextInput(view, $cursor.pos, $cursor.pos, '\n');
return false;
};
irsplugin.props.handleKeyDown = hkd; // Add the handleKeyDown function

return irsplugin;
}

export default function initProse({ editor, path }) {
const schema = getSchema();

Expand Down Expand Up @@ -258,8 +293,10 @@ export default function initProse({ editor, path }) {
menu,
imageDrop(schema),
linkConverter(schema),
sectionPasteHandler(schema),
columnResizing(),
tableEditing(),
getInputRulesPlugin(),
keymap(buildKeymap(schema)),
keymap(baseKeymap),
keymap({
Expand Down
153 changes: 153 additions & 0 deletions blocks/edit/prose/plugins/sectionPasteHandler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import { Plugin, Slice } from 'da-y-wrapper';

function closeParagraph(paraContent, newContent) {
if (paraContent.length > 0) {
const newPara = {
type: 'paragraph',
content: [...paraContent],
};
newContent.push(newPara);
paraContent.length = 0;
}
}

/**
* Find section breaks in HTML pasted from desktop Word and add a horizontal rule
* after each one.
* In Desktop Word each section is represented as a top-level div element, right
* under the body element.
*/
function handleDesktopWordSectionBreaks(html) {
try {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');

if (doc.querySelector('meta[name="ProgId"]')?.content !== 'Word.Document') {
// This is not a word document
return html;
}

let modified = false;
// Add a hr element after all top-level div elements
const sections = doc.querySelectorAll('body > div');
sections.forEach((section) => {
if (section.nextElementSibling) {
// only add the hr if there is something after the section
section.after(doc.createElement('hr'));
modified = true;
}
});

if (!modified) {
return html;
}

const serializer = new XMLSerializer();
return serializer.serializeToString(doc);
} catch (error) {
// eslint-disable-next-line no-console
console.error('Error handling desktop Word section breaks:', error);
return html;
}
}

/**
* Find section breaks in HTML pasted from Word online and add a horizontal rule
* after each one.
* In Word online section breaks are quite hard to identify, but it turns out that
* they seem to be indicated by a span element with a data-ccp-props attribute, of
* which one of the values is 'single'. This is quite brittle but right now seems
* to be the only way to find them. In the future Word online might provide a
* better way to identify section breaks.
*/
function handleWordOnlineSectionBreaks(html) {
try {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');

let modified = false;
// The span[data-ccp-props] are the magic indicator if one of the JSON values in there is the
// word 'single' then we need to add a section break.
const sections = doc.querySelectorAll('div > p > span[data-ccp-props]');
sections.forEach((section) => {
const props = JSON.parse(section.getAttribute('data-ccp-props'));
for (const key of Object.keys(props)) {
if (props[key] === 'single') {
const hr = doc.createElement('hr');
section.parentNode.after(hr);
modified = true;
break;
}
}
});

if (!modified) {
return html;
}

const serializer = new XMLSerializer();
return serializer.serializeToString(doc);
} catch (error) {
// eslint-disable-next-line no-console
console.error('Error handling Word online section breaks:', error);
return html;
}
}

/* When text is pasted, handle section breaks. */
export default function sectionPasteHandler(schema) {
return new Plugin({
props: {
/* A section break entered in Word is not kept in the text of the document, but
* buried in the HTML that is pasted. This function uses highly specific ways to find
* these section breaks and adds a <hr/> element for them.
*/
transformPastedHTML: (html) => {
const newHTML = handleDesktopWordSectionBreaks(html);
const newHTML2 = handleWordOnlineSectionBreaks(newHTML);
return newHTML2;
},

/* Convert 3 dashes on a line by itself (top level only) to a horizontal rule,
* which is then interpreted as a section break.
*/
transformPasted: (slice) => {
const jslice = slice.toJSON();
const { content } = jslice;
if (!content) return slice;

const newContent = [];

for (const el of content) {
if (el.type !== 'paragraph') {
newContent.push(el);
} else {
const newParaCont = [];

for (const pc of el.content) {
if (pc.type !== 'text') {
newParaCont.push(pc);
} else if (pc.text.trim() === '---') {
closeParagraph(newParaCont, newContent);

newContent.push({ type: 'horizontal_rule' });
} else {
newParaCont.push(pc);
}
}

closeParagraph(newParaCont, newContent);
}
}

const newSlice = {
content: newContent,
openStart: slice.openStart,
openEnd: slice.openEnd,
};

return Slice.fromJSON(schema, newSlice);
},
},
});
}
Loading
Loading