Skip to content

Commit

Permalink
Optimize span and anchor parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
davivcu committed Dec 28, 2023
1 parent ee0fd42 commit 2280610
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 66 deletions.
25 changes: 12 additions & 13 deletions src/app/services/xml-parsers/basic-parsers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import {
Addition, Analogue, Anchor, Attributes, Damage, Deletion, Gap, GenericElement, Lb, Milestone, Note, NoteLayout,
Paragraph, PlacementType, Ptr, QuoteEntry, Span, SpanGrp, Supplied, Term, Text, Verse, VersesGroup, Word, XMLElement,
} from '../../models/evt-models';
import { isNestedInElem, xpath } from '../../utils/dom-utils';
import { getContentBetweenElementAndId, getExternalElements, isAnalogue, isSource, replaceMultispaces } from '../../utils/xml-utils';
import { getElementsBetweenTreeNode, isNestedInElem, xpath } from '../../utils/dom-utils';
import { getExternalElements, isAnalogue, isSource, replaceMultispaces } from '../../utils/xml-utils';
import { createParser, getClass, getDefaultN, getID, parseChildren, ParseFn, Parser } from './parser-models';
import { AppConfig } from 'src/app/app.config';
import { AnalogueParser } from './analogue-parser';
Expand Down Expand Up @@ -429,15 +429,17 @@ export class TermParser extends GenericElemParser implements Parser<XMLElement>
export class MilestoneParser extends GenericElemParser implements Parser<XMLElement> {
parse(xml: XMLElement): Milestone {

const elements = getContentBetweenElementAndId(xml, xml.getAttribute('spanTo'));
const parsedElements = elements.elements.map((x) => super.parse(x));
const endElement = (xml.getAttribute('spanTo')) ? getExternalElements(xml, ['spanTo'], 'xml:id', 'anchor') : [];
const includedElements = (endElement.length !== 0) ? getElementsBetweenTreeNode(xml, endElement[0]) : [];
const parsedElements = (includedElements.length !== 0) ?
includedElements.map((x: XMLElement) => (x.nodeType !== 3 && x.nodeType !== 8) ? super.parse(x) : x) : [];

return {
type: Milestone,
id: xml.getAttribute('xml:id'),
attributes: this.attributeParser.parse(xml),
unit: xml.getAttribute('unit'),
spanText: elements.text,
spanText: '',
spanElements: parsedElements,
content: parseChildren(xml, this.genericParse),
};
Expand Down Expand Up @@ -481,21 +483,18 @@ export class SpanParser extends GenericElemParser implements Parser<XMLElement>
}

} else if (xml.tagName === 'span') {
let included = { text: '', elements: [] };
let parsedElements = [];
const startingElement = getExternalElements(xml, ['from'], 'xml:id', 'anchor');
if (startingElement.length > 0) {
included = getContentBetweenElementAndId(startingElement[0], xml.getAttribute('to'));
parsedElements = included.elements.map((x) => super.parse(x));
}
const endElement = (xml.getAttribute('spanTo')) ? getExternalElements(xml, ['from'], 'xml:id', 'anchor') : [];
const includedElements = (endElement.length !== 0) ? getElementsBetweenTreeNode(xml, endElement[0]) : [];
const parsedElements = (includedElements.length !== 0) ?
includedElements.map((x: XMLElement) => (x.nodeType !== 3 && x.nodeType !== 8) ? super.parse(x) : x) : [];

return <Span> {
type: Span,
id: xml.getAttribute('xml:id'),
attributes: this.attributeParser.parse(xml),
from: xml.getAttribute('from'),
to: xml.getAttribute('to'),
includedText: included.text,
includedText: '',
includedElements: parsedElements,
content: parseChildren(xml, this.genericParse),
};
Expand Down
53 changes: 0 additions & 53 deletions src/app/utils/xml-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,56 +107,3 @@ export function isSource(elem: XMLElement, attrs: string[]): boolean {

return (validAttrs);
}


/**
* Retrieve textContent and elements between the provided element and the one found with the xml:id provided
*/
export function getContentBetweenElementAndId(fromElement: XMLElement, toXMLID: string): { text: string, elements: any } {

if ((fromElement === null) || (fromElement === undefined) || (toXMLID === null)) {
return { text: '', elements: [] };
}

const cleanID = toXMLID.replace('#','');
let found = false;
// text after the milestone but still inside the parent element
let foundText = (fromElement.nextSibling !== null) ? fromElement.nextSibling.textContent : '';
// the milestone is always inside another element?
// otherwise const foundElements = [fromElement.nextSibling];
let next = (fromElement.nextElementSibling !== null) ?
fromElement.nextElementSibling as XMLElement :
fromElement.parentElement.nextElementSibling as XMLElement;

// creating a fake element for partial text included from milestone on
let foundElements = [];
if (fromElement.parentElement.nextElementSibling !== null) {
const nextEdited = fromElement.parentElement.nextElementSibling.cloneNode(true);
nextEdited.textContent = foundText;
foundElements.push(nextEdited);
}

let maxExec = 50;

while(!found && next !== null && maxExec !== 0) {
foundElements.push(next);
foundText = foundText + next.textContent;
if (next.getAttribute('xml:id') === cleanID) {
found = true;
} else {
maxExec--;
if (next.nextElementSibling === null) {
if (next.parentElement !== null) {
next = next.parentElement.nextElementSibling as XMLElement;
maxExec--;
} else {
next = null;
}
} else {
next = next.nextElementSibling as XMLElement;
}
}
}

return { 'text': foundText, 'elements': foundElements };
}

0 comments on commit 2280610

Please sign in to comment.