Skip to content

Commit

Permalink
clean up lxml code
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Feb 12, 2024
1 parent 62c3e2f commit f23bb05
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 13 deletions.
8 changes: 3 additions & 5 deletions src/ocrd_models/ocrd_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,11 @@
import re
import typing
from typing_extensions import Optional
from lxml import etree as ET # type: ignore
from copy import deepcopy
from lxml import etree as ET
from warnings import warn

from ocrd_utils import (
getLogger,
deprecation_warning,
generate_range,
VERSION,
REGEX_PREFIX,
Expand Down Expand Up @@ -188,6 +186,7 @@ def unique_identifier(self, purl):
break
if id_el is None:
mods = self._tree.getroot().find('.//mods:mods', NS)
assert mods is not None
id_el = ET.SubElement(mods, TAG_MODS_IDENTIFIER)
id_el.set('type', 'purl')
id_el.text = purl
Expand Down Expand Up @@ -482,7 +481,6 @@ def add_file(self, fileGrp, mimetype=None, url=None, ID=None, pageId=None, force
raise ValueError("Invalid syntax for mets:file/@ID %s (not an xs:ID)" % ID)
if not REGEX_FILE_ID.fullmatch(fileGrp):
raise ValueError("Invalid syntax for mets:fileGrp/@USE %s (not an xs:ID)" % fileGrp)
log = getLogger('ocrd.models.ocrd_mets.add_file')

el_fileGrp = self.add_file_group(fileGrp)
if not ignore:
Expand Down Expand Up @@ -566,7 +564,7 @@ def remove_one_file(self, ID, fileGrp=None):
if self._cache_flag:
del self._fptr_cache[page_div.get('ID')][ID]
# delete empty pages
if not page_div.getchildren():
if not list(page_div):
log.debug("Delete empty page %s", page_div)
page_div.getparent().remove(page_div)
# Delete the empty pages from caches as well
Expand Down
2 changes: 2 additions & 0 deletions src/ocrd_models/ocrd_page_generateds.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
# core
#

# type: ignore

from itertools import zip_longest
import os
import sys
Expand Down
6 changes: 3 additions & 3 deletions src/ocrd_models/ocrd_xml_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Base class for XML documents loaded from either content or filename.
"""
from os.path import exists
from lxml import etree as ET # type: ignore
from lxml import etree as ET

from .constants import NAMESPACES
from .utils import xmllint_format
Expand All @@ -29,11 +29,11 @@ def __init__(self, filename=None, content=None, cache_flag=False):
elif content:
self._tree = ET.ElementTree(ET.XML(content, parser=ET.XMLParser(encoding='utf-8')))
else:
self._tree = ET.ElementTree()
assert filename
filename = filename.replace('file://', '')
if not exists(filename):
raise Exception('File does not exist: %s' % filename)
self._tree.parse(filename)
self._tree = ET.parse(filename)

# Cache enabled - True/False
self._cache_flag = cache_flag
Expand Down
1 change: 0 additions & 1 deletion src/ocrd_models/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ def xmllint_format(xml):
Arguments:
xml (string): Serialized XML
"""
log = getLogger('ocrd.models.utils.xmllint_format')
parser = ET.XMLParser(resolve_entities=False, strip_cdata=False, remove_blank_text=True)
document = ET.fromstring(xml, parser)
return ('%s\n%s' % ('<?xml version="1.0" encoding="UTF-8"?>',
Expand Down
8 changes: 4 additions & 4 deletions tests/model/test_ocrd_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from contextlib import contextmanager
import shutil
from logging import StreamHandler
import lxml
from lxml import etree as ET

from tests.base import (
main,
Expand Down Expand Up @@ -104,7 +104,7 @@ def test_physical_pages(sbb_sample_01):
assert len(sbb_sample_01.physical_pages) == 3, '3 physical pages'
assert isinstance(sbb_sample_01.physical_pages, list)
assert isinstance(sbb_sample_01.physical_pages[0], str)
assert not isinstance(sbb_sample_01.physical_pages[0], lxml.etree._ElementUnicodeResult)
assert not isinstance(sbb_sample_01.physical_pages[0], ET._ElementUnicodeResult)

def test_physical_pages_from_empty_mets():
mets = OcrdMets(content="<mets></mets>")
Expand Down Expand Up @@ -236,9 +236,9 @@ def test_metshdr():
Test whether metsHdr is created on-demand
"""
mets = OcrdMets(content="<mets></mets>")
assert not mets._tree.getroot().getchildren()
assert not list(mets._tree.getroot())
mets.add_agent()
assert len(mets._tree.getroot().getchildren()) == 1
assert len(mets._tree.getroot()) == 1


def test_nocontent_nofilename_exception():
Expand Down

0 comments on commit f23bb05

Please sign in to comment.