forked from epoz/textbase
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
107 lines (92 loc) · 4.27 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from collections import OrderedDict
import textbase
example = bytes("""# Here is a comment
NAME John Doe
DOB 19720601
COLOURS red
; green
; mauve
STORY Once upon a time there lived a frog in a
far away land. He was a very sprightly frog
and could jump around all day long.
$
NAME Peter Piper
DOB 19840101
COLOURS black
; purple
; white
$
# ANd this is effectively an empty record.
$
""", 'utf-8')
def test_main():
expected = [
{
"NAME": ["John Doe"],
"DOB": ["19720601"],
"COLOURS": ["red", "green", "mauve"],
"STORY": [
"""Once upon a time there lived a frog in a far away land. He was a very sprightly frog and could jump around all day long."""
],
},
{
"NAME": ["Peter Piper"],
"DOB": ["19840101"],
"COLOURS": ["black", "purple", "white"],
},
]
output = [x for x in textbase.TextBase(example)]
assert output == expected
example_multiline = bytes("""
ID seg_entry_0000001
TYPE entry
TITLE Athens (in museum). Titulum antiquissimum editum a Studniczka, Ath. Mitt.
ENTRYTYPE new reading
HEADING Titulum antiquissimum editum a Studniczka, Ath. Mitt.
LABEL 1-1
VOL 1
ENTRY 1
BIBL seg_bibl_100001
INDEX seg_index_0071817
; seg_index_0090033
; seg_index_0090034
; seg_index_0090035
; seg_index_0090036
; seg_index_0090037
; seg_index_0090038
TEXT # Titulum antiquissimum editum a Studniczka
Titulum antiquissimum editum a Studniczka, _Ath. Mitt._ XVIII, 225, denuo tractaverunt W. Brandenstein et E. Kalinka, _Klio_ XVII, 262 sq., 267 sq. — Kalinka ipse Athenis titulum in vase inscriptum examinavit atque testatur post verba ὃς νῦν ὀρχηστῶν πάντων ἀταλώτατα παίζει· non scriptum esse τοῦτο δεκᾶν μιν sed τοῦτον ἐκαύμιν, litteram autem paenultimam potius ε quam ι esse. Verba vertit: „qui nunc omnium saltatorum optime saltavit. eius amore incensus sum.” — Etiam W. Brandenstein inde profectus est ut litteram δ post τοῦτο non recte lectam esse putaret, verum pro δεκᾶν legendum esse ἑκᾶν i. e.: „dieses Gefäsz soll ihn erfreuen,” quod minime nobis persuasit. — Contra utrumque Studniczka lectionem suam defendit _Arch. Αnz._ XXXVI, 341 sq.
$
""", 'utf-8')
def test_multiline():
expected = [
OrderedDict({
"ID": ["seg_entry_0000001"],
"TYPE": ["entry"],
"TITLE": ["Athens (in museum). Titulum antiquissimum editum a Studniczka, Ath. Mitt."],
"ENTRYTYPE": ["new reading"],
"HEADING": ["Titulum antiquissimum editum a Studniczka, Ath. Mitt."],
"LABEL": ["1-1"],
"VOL": ["1"],
"ENTRY": ["1"],
"BIBL": ["seg_bibl_100001"],
"INDEX": [
"seg_index_0071817",
"seg_index_0090033",
"seg_index_0090034",
"seg_index_0090035",
"seg_index_0090036",
"seg_index_0090037",
"seg_index_0090038",
],
"TEXT": [
"""# Titulum antiquissimum editum a Studniczka
Titulum antiquissimum editum a Studniczka, _Ath. Mitt._ XVIII, 225, denuo tractaverunt W. Brandenstein et E. Kalinka, _Klio_ XVII, 262 sq., 267 sq. — Kalinka ipse Athenis titulum in vase inscriptum examinavit atque testatur post verba ὃς νῦν ὀρχηστῶν πάντων ἀταλώτατα παίζει· non scriptum esse τοῦτο δεκᾶν μιν sed τοῦτον ἐκαύμιν, litteram autem paenultimam potius ε quam ι esse. Verba vertit: „qui nunc omnium saltatorum optime saltavit. eius amore incensus sum.” — Etiam W. Brandenstein inde profectus est ut litteram δ post τοῦτο non recte lectam esse putaret, verum pro δεκᾶν legendum esse ἑκᾶν i. e.: „dieses Gefäsz soll ihn erfreuen,” quod minime nobis persuasit. — Contra utrumque Studniczka lectionem suam defendit _Arch. Αnz._ XXXVI, 341 sq.""",
],
}),
]
output = [x for x in textbase.TextBase(example_multiline)]
assert output == expected
if __name__ == "__main__":
test_main()
test_multiline()