forked from LibreTranslate/Locomotive
-
Notifications
You must be signed in to change notification settings - Fork 0
/
transforms.py
60 lines (53 loc) · 1.94 KB
/
transforms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def _remove_unpaired_quotes_and_brackets(line):
# Deleting unpaired quotation marks
if (line.count('“') + line.count('”')) % 2 != 0:
line = line.replace('“', '')
line = line.replace('”', '')
if (line.count('«') + line.count('»')) % 2 != 0:
line = line.replace('«', '')
line = line.replace('»', '')
while line.count('"') % 2 != 0:
line = line.replace('"', '', 1)
# Deleting unpaired square brackets
while line.count('[') != line.count(']'):
if '[' in line:
line = line.replace('[', '', 1)
if ']' in line:
line = line.replace(']', '', 1)
# Deleting unpaired parentheses
while line.count('(') != line.count(')'):
if '(' in line:
line = line.replace('(', '', 1)
if ')' in line:
line = line.replace(')', '', 1)
# Deleting unpaired curly braces
while line.count('{') != line.count('}'):
if '{' in line:
line = line.replace('{', '', 1)
if '}' in line:
line = line.replace('}', '', 1)
return line
def remove_unpaired_quotes_and_brackets(src, tgt):
"""
Removes unmatched quotations (“, ”, ", «, ») and parentheses ((, ), [, ], {, })
"""
return _remove_unpaired_quotes_and_brackets(src), _remove_unpaired_quotes_and_brackets(tgt)
def remove_chars(src, tgt, chars = []):
"""
Remove these characters or words
:param list(str)|str chars: List of characters or words
"""
for c in chars:
src = src.replace(c, '')
tgt = tgt.replace(c, '')
return src, tgt
def first_case_normalize(src, tgt):
"""
Normalize the case of the first letter
"""
if src[0].isalpha() and tgt[0].isalpha():
if src[0].isupper() and not tgt[0].isupper():
tgt = tgt[0].upper() + tgt[1:]
elif src[0].islower() and not tgt[0].islower():
tgt = tgt[0].lower() + tgt[1:]
return src, tgt