Skip to content

Commit

Permalink
patch minor UD bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
Jemoka committed May 24, 2024
1 parent dc31c9d commit ca65e5c
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 5 deletions.
7 changes: 5 additions & 2 deletions batchalign/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,12 @@ def __len__(self):

def __str__(self):
if self.text != None:
return self.text
t = self.text
else:
return self._detokenize()
t = self._detokenize()

t = t.replace(". . .", "+...")
return t

def __repr__(self):
return str(self)
Expand Down
2 changes: 1 addition & 1 deletion batchalign/pipelines/asr/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def process_generation(output, lang="eng", utterance_engine=None):

final_utterances.append(Utterance(
tier=participant,
content = words
content=words
))

doc.content = final_utterances
Expand Down
3 changes: 3 additions & 0 deletions batchalign/pipelines/morphosyntax/ud.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,10 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
for j in i]

retokenized_ut = " ".join(i for i in chunks_backplate if i.strip() not in ["(", ")"])
retokenized_ut = retokenized_ut.replace("^", "")
retokenized_ut = re.sub(r" +", " ", retokenized_ut)
retokenized_ut = retokenized_ut.replace("+ \"", "+\"")
retokenized_ut = retokenized_ut.replace(" >", ">")
# pray to everyone that it works---this will simply crash and ignore
# the utterance if it didn't work, so we are doing this as a sanity
# check rather than needing the parsed result
Expand Down
4 changes: 2 additions & 2 deletions batchalign/version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
0.7.1-beta.9
0.7.1-beta.10
May 21st, 2024
minute %umor implementation changes
patch minor ud bugs

0 comments on commit ca65e5c

Please sign in to comment.