Skip to content

Commit

Permalink
set @type as well as TAGREFS from PAGE @type, #4
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Apr 13, 2021
1 parent 5cac144 commit aea4424
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 2 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
* [x] TextStyle
* [x] ParagraphStyle
* [x] table regions
* [ ] recursive regions
* [ ] recursive regions, #
* [ ] rotation
* [x] reading order
* [x] input PAGE-XML not having words
* [x] input PAGE-XML not having words #5
* [x] multiple pc:TextEquivs
* [x] language
* [X] ~~script~~ no equivalent in ALTO :(
Expand Down
1 change: 1 addition & 0 deletions ocrd_page_to_alto/styles.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,5 +122,6 @@ def __init__(self):
def set_alto_tag_from_type(self, reg_alto, reg_page):
typ = reg_page.get_type() if hasattr(reg_page, 'get_type') else None
if typ:
reg_alto.set('TYPE', typ)
reg_alto.set('TAGREFS', self.get_id(label=typ))

2 changes: 2 additions & 0 deletions tests/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def test_layouttag():
c = OcrdPageAltoConverter(page_filename='tests/data/layouttag.page.xml').convert()
tree = ET.fromstring(str(c).encode('utf-8'))
assert [x.get('LABEL') for x in tree.xpath('//alto:Tags/alto:LayoutTag', namespaces=NAMESPACES)] == ['paragraph']
assert len(tree.xpath('//*[@TYPE="paragraph"]')) == 1
assert len(tree.xpath('//*[@TYPE="catch-word"]')) == 0 # @TYPE only allowed for BlockType

def test_pararaphstyle():
c = OcrdPageAltoConverter(page_filename='tests/data/align.page.xml').convert()
Expand Down

0 comments on commit aea4424

Please sign in to comment.