Skip to content

Commit

Permalink
修复成对符号出现在一行对白内时,不进行时间重叠搜索的bug
Browse files Browse the repository at this point in the history
  • Loading branch information
barryZZJ committed Apr 26, 2023
1 parent 665e870 commit 3465d2d
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 11 deletions.
24 changes: 15 additions & 9 deletions SubCleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from utils.conf import conf
from utils.mydialogue import MyDialogue

VER = 'v3.0.0'
VER = 'v3.0.2'

DESCRIPTION = '字幕清理器\n' + \
'对ts源中提取出的ass字幕进行处理,包括合并多行对白、清理各种不必要的符号、说话人备注、转换假名半角等,输出ass或txt\n' + \
Expand Down Expand Up @@ -61,21 +61,21 @@ def postProcess(event: Dialogue) -> Dialogue:
return event


def findMergeStart(event: Dialogue) -> tuple[MergeType, str]:
def findMergeStart(event: Dialogue, ignored_mergetypes: list[MergeType]) -> tuple[MergeType, str]:
"""
:returns: mergetype and matched symbol
"""
if conf.merge.pair:
if conf.merge.pair and MergeType.Pair not in ignored_mergetypes:
for pairleft in pairs:
if pairleft in event.text:
return MergeType.Pair, pairleft

if conf.merge.singlesuf:
if conf.merge.singlesuf and MergeType.Singlesuf not in ignored_mergetypes:
for suf in singlesufs:
if event.text.endswith(suf):
return MergeType.Singlesuf, suf

if conf.merge.time:
if conf.merge.time and MergeType.Time not in ignored_mergetypes:
return MergeType.Time, ''

return MergeType.No, ''
Expand Down Expand Up @@ -126,12 +126,12 @@ def findMergeEnd(events: list[Dialogue],
raise NotImplementedError('Unexpected mergetype ' + str(mergetype))


def findMergeInterval(events: list[Dialogue], start: int) -> tuple[int, str, MergeType]:
def findMergeInterval(events: list[Dialogue], start: int, ignored_mergetypes: list[MergeType]) -> tuple[int, str, MergeType]:
"""
:returns: end index of merged events; merge reason (or warning msg if end index == -1); Mergetype
"""
eventL = events[start]
mergetype, symb = findMergeStart(eventL)
mergetype, symb = findMergeStart(eventL, ignored_mergetypes)

reason = ''
if mergetype == MergeType.No:
Expand Down Expand Up @@ -159,17 +159,23 @@ def mergeEvents(events: list[Dialogue],
"""
merge_list = [events[start]] # 所有需要合并的对白
start_ = start
ignored_mergetypes = [] # 不考虑的mergetype
while True:
# 考虑到存在下一行时间仍相同,或者出现新的标识符的情况,故不断搜索直到没有合并的情况
end, reason, mergetype = findMergeInterval(events, start_)
end, reason, mergetype = findMergeInterval(events, start_, ignored_mergetypes)

if end == -1:
warning(reason)
warnings.append((procid, reason))
end = start_

if end == start_:
break
# 只有MergeType.No或MergeType.Pair时才会出现此情况,
# 后者时应考虑存在其他的mergetype,故需要排除掉pair后continue
if mergetype == MergeType.No:
break
ignored_mergetypes.append(mergetype)
continue

log_reason.append(reason)
merge_list.extend(MyDialogue(event, mergetype) for event in events[start_+1:end+1])
Expand Down
1 change: 1 addition & 0 deletions utils/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
(re.compile(r'~'), ''), (re.compile(r'~'), ''), (re.compile(r'∼'), ''),
(re.compile(r'・'), ''),
(re.compile(r'♪'), ''),
(re.compile(r'≫'), ''),
# 顿号、改为半角空格
(re.compile(r'、'), ' '), (re.compile(r'、'), ' '),
# 双引号改为单引号(取消)
Expand Down
5 changes: 3 additions & 2 deletions utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,12 @@ def overlaps(e1: Dialogue, e2: Dialogue) -> bool:


def extendEvent(e1: Dialogue, e2: Dialogue, sep: str, ignore_sep_on_pairs=True) -> Dialogue:
"""ignore_sep_on_pairs: do not add sep after left symbol / before end symbol, i.e. `[text]` not `[ text ]`"""
if e2.text:
# extend end time
e1.end = e2.end
# extend text
if ignore_sep_on_pairs and any(e1.text.endswith(left) or e1.text.endswith(right) or e2.text.startswith(right) for left, right in pairs.items()):
# extend text, but do not add sep inside pairs, i.e. `[text]` not `[ text ]`
if ignore_sep_on_pairs and any(e1.text.endswith(left) or e2.text.startswith(right) for left, right in pairs.items()):
e1.text = e1.text + e2.text
else:
e1.text = e1.text + sep + e2.text
Expand Down

0 comments on commit 3465d2d

Please sign in to comment.