-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
66 lines (53 loc) · 2.45 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from entity.word import *
import re
# Sort the list, make words which are the same state together, for example:
# ----------------------------------------------------------
# This is ==another== ~~a~~ ==example== ~~sample~~ sentence.
# This is ~~a~~ ~~sample~~ ==another== ==example== sentence.
def sort_corrected_words(words):
# first, find whether has ==?== before ~~?~~, if has then exchange
for i in range(1, len(words)):
if (words[i - 1].state == State.MISS
and words[i].state == State.REDUNDANT):
red_start = red_end = i
miss_end = miss_start = i - 1
# try to find consecutive REDUNDANT and MISS area
while red_end + 1 < len(words) \
and words[red_end + 1].state == State.REDUNDANT:
red_end = red_end + 1
while miss_start - 1 >= 0 \
and words[miss_start - 1].state == State.MISS:
miss_start = miss_start - 1
# exchange
exchange_words(words, miss_start, miss_end, red_start, red_end)
# second, make words which are the same state together
# main focus "miss redundant miss" and "redundant miss redundant" two types
for i in range(2, len(words)):
# "miss redundant miss"
if ((words[i].state == State.MISS
and words[i - 1].state == State.REDUNDANT
and words[i - 2].state == State.MISS)
# "redundant miss redundant"
or (words[i].state == State.REDUNDANT
and words[i - 1].state == State.MISS
and words[i - 2].state == State.REDUNDANT)):
exchange_word(words, i, i - 1)
# Warning: be sure the index i1/i2 are valid and consecutive
def exchange_word(words, i1, i2):
exchange_words(words, i1, i1, i2, i2)
# exchange the [start1, end1] and [start2, end2]
# index order: start1 < end1 < start2 < end2; and index are consecutive
def exchange_words(words, start1, end1, start2, end2):
words[start2:end2 + 1], words[start1:end1 + 1] \
= words[start1:end1 + 1], words[start2:end2 + 1]
for i in range(start1, end2 + 1):
words[i].index = i
# bug fix: add the procession towards $
# Markdown highlight: ==test==
def md_highlight(content):
content = re.sub(r'\$', r'\\$', content)
return f"=={content}=="
# Markdown strikethrough: ~~test~~
def md_strikethrough(content):
content = re.sub(r'\$', r'\\$', content)
return f"~~{content}~~"