From 847e1a5dfd226bbad30a5c86b9b077dabb08fce0 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com>
Date: Mon, 7 Feb 2022 11:11:53 +0100
Subject: [PATCH 1/3] convert_textlines: implement by-index order option

---
 ocrd_page_to_alto/convert.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/ocrd_page_to_alto/convert.py b/ocrd_page_to_alto/convert.py
index 2d2f5a5..df2af11 100644
--- a/ocrd_page_to_alto/convert.py
+++ b/ocrd_page_to_alto/convert.py
@@ -69,6 +69,7 @@ def __init__(
         textequiv_index=0,
         textequiv_fallback_strategy='last',
         region_order='document',
+        textline_order='document',
         page_filename=None,
         dummy_textline=True,
         dummy_word=True,
@@ -86,6 +87,7 @@ def __init__(
             textequiv_index (int): @index of the TextEquiv to choose
             textequiv_fallback_strategy ("raise"|"first"|"last"): Strategy to handle case of no matchin TextEquiv by textequiv_index
             region_order ("document"|"reading-order"|"reading-order-only"): The order in which to iterate over regions.
+            textline_order ("document"|"index"|"textline-order"): The order in which to iterate over textlines.
             dummy_textline (boolean): Whether to create a TextLine for regions that have TextEquiv/Unicode but no TextLine
             dummy_word (boolean): Whether to create a Word for TextLine that have TextEquiv/Unicode but no Word
         """
@@ -98,6 +100,7 @@ def __init__(
         self.trailing_dash_to_hyp = trailing_dash_to_hyp
         self.dummy_textline = dummy_textline
         self.region_order = region_order
+        self.textline_order = textline_order
         self.dummy_word = dummy_word
         self.logger = logger if logger else getLogger('page-to-alto')
         if pcgts:
@@ -274,7 +277,19 @@ def convert_metadata(self):
     def _convert_textlines(self, reg_alto, reg_page):
         if self.dummy_textline:
             self.set_dummy_line_for_region(reg_page)
-        for line_page in reg_page.get_TextLine():
+        lines = reg_page.get_TextLine()
+        if self.textline_order == 'document':
+            :
+        elif self.textline_order == 'index':
+            def by_index(line):
+                if line.index is None:
+                    return 0 # keep order
+                return line.index
+            lines = sorted(lines, key=by_index)
+        elif self.textline_order == 'textline-order':
+            # something with reg_page.textLineOrder or reg_page.get_parent_.textLineOrder
+            raise Exception("@textLineOrder semantics not implemented; cf. PRImA-Research-Lab/PAGE-XML#26")
+        for line_page in lines:
             is_empty_line = not(line_page.get_TextEquiv() and line_page.get_TextEquiv()[0].get_Unicode()) and not(line_page.get_Word())
             if is_empty_line and self.skip_empty_lines:
                 self.logger.debug("Skipping empty line '%s'", line_page.id)

From d81e8e68553205277be8a9cec78660c2ce513563 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com>
Date: Mon, 7 Feb 2022 11:14:20 +0100
Subject: [PATCH 2/3] add textline-order option

---
 ocrd_page_to_alto/cli.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ocrd_page_to_alto/cli.py b/ocrd_page_to_alto/cli.py
index fd8f3f7..ea30800 100644
--- a/ocrd_page_to_alto/cli.py
+++ b/ocrd_page_to_alto/cli.py
@@ -18,10 +18,12 @@
 @click.option('--textequiv-index', default=0, help='If multiple textequiv, use the n-th TextEquiv by @index')
 @click.option('--textequiv-fallback-strategy', default='last', type=click.Choice(['raise', 'first', 'last']), help="What to do if nth textequiv isn't available. 'raise' will lead to a runtime error, 'first' will use the first TextEquiv, 'last' will use the last TextEquiv on the element")
 @click.option('--region-order', default='document', help="Order in which to iterate over the regions", type=click.Choice(['document', 'reading-order', 'reading-order-only']))
+@click.option('--textline-order', default='document', help="Order in which to iterate over the textlines", type=click.Choice(['document', 'index', 'textline-order']))
 @click.option('-O', '--output-file', default='-', help='Output filename (or "-" for standard output, the default)',
               type=click.Path(dir_okay=False, writable=True, exists=False, allow_dash=True))
 @click.argument('filename',  type=click.Path(dir_okay=False, exists=True))
-def main(log_level, alto_version, check_words, check_border, skip_empty_lines, trailing_dash_to_hyp, dummy_textline, dummy_word, textequiv_index, textequiv_fallback_strategy, region_order, output_file, filename):
+def main(log_level, alto_version, check_words, check_border, skip_empty_lines, trailing_dash_to_hyp, dummy_textline, dummy_word, 
+         textequiv_index, textequiv_fallback_strategy, region_order, textline_order, output_file, filename):
     """
     Convert PAGE to ALTO
     """
@@ -37,7 +39,8 @@ def main(log_level, alto_version, check_words, check_border, skip_empty_lines, t
         dummy_word=dummy_word,
         textequiv_index=textequiv_index,
         textequiv_fallback_strategy=textequiv_fallback_strategy,
-        region_order=region_order
+        region_order=region_order,
+        textline_order=textline_order
     )
     converter.convert()
     with open(1 if output_file == '-' else output_file, 'w') as output:

From 5c974a10546f6ffb08edf91925c5f123c5455e41 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com>
Date: Mon, 7 Feb 2022 11:16:10 +0100
Subject: [PATCH 3/3] fix typo

---
 ocrd_page_to_alto/convert.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ocrd_page_to_alto/convert.py b/ocrd_page_to_alto/convert.py
index df2af11..06dc868 100644
--- a/ocrd_page_to_alto/convert.py
+++ b/ocrd_page_to_alto/convert.py
@@ -279,7 +279,7 @@ def _convert_textlines(self, reg_alto, reg_page):
             self.set_dummy_line_for_region(reg_page)
         lines = reg_page.get_TextLine()
         if self.textline_order == 'document':
-            :
+            pass
         elif self.textline_order == 'index':
             def by_index(line):
                 if line.index is None: