Skip to content

Commit

Permalink
[ci skip] fix comma
Browse files Browse the repository at this point in the history
  • Loading branch information
Jemoka committed Jul 7, 2024
1 parent 7953e11 commit 7ff64f8
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 11 deletions.
1 change: 1 addition & 0 deletions batchalign/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ def __str__(self):
# t = re.sub(r"^[^\w\d\s<]+", "", t.strip()).strip()
t = re.sub(r",", " , ", t.strip()).strip()
t = re.sub(r" +", " ", t.strip()).strip()
t = t.replace("+ ,", "+,").strip()
return t

def __repr__(self):
Expand Down
1 change: 1 addition & 0 deletions batchalign/pipelines/morphosyntax/ud.py
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
retokenized_ut = retokenized_ut.replace(" >", ">")
retokenized_ut = retokenized_ut.replace("< ", "<")
retokenized_ut = retokenized_ut.replace(" :", ":")
retokenized_ut = retokenized_ut.replace("+ ,", "+,")
retokenized_ut = retokenized_ut.replace(": <", ": <")
retokenized_ut = retokenized_ut.replace(" ↑", "↑")
retokenized_ut = re.sub(r"@ ?w ?p", "@wp", retokenized_ut)
Expand Down
2 changes: 1 addition & 1 deletion batchalign/version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
0.7.3-beta.13
0.7.3-beta.14
July 6th, 2024
UD Fixes
21 changes: 11 additions & 10 deletions scratchpad.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# json.dump(Document.model_json_schema(), df, indent=4)

########### The Batchalign Core Test Harness ###########
# from batchalign.formats.chat.parser import chat_parse_utterance
from batchalign.formats.chat.parser import chat_parse_utterance

# ng = NgramRetraceEngine()
# disf = DisfluencyReplacementEngine()
Expand Down Expand Up @@ -100,19 +100,20 @@

# text = "ice ice cream ice cream"

# function = "morphosyntax"
# lang = "cym"
# num_speakers = 1
function = "morphosyntax"
lang = "ron"
num_speakers = 1

# forms, delim = chat_parse_utterance("<ポン@o ポン@o> [/] ポン@o .", None, None, None, None)
# utterance = Utterance(content=forms, delim=delim, text="<ポン@o ポン@o> [/] ポン@o .")
forms, delim = chat_parse_utterance("+, culoarea galbenă. ", None, None, None, None)
utterance = Utterance(content=forms, delim=delim, text="+, culoarea galbenă .")

# ut = Document(content=[utterance], langs=["jpn"])

# pipeline = BatchalignPipeline.new("morphosyntax", lang="jpn")
# res = pipeline(ut, retokenize=True)
ut = Document(content=[utterance], langs=["ron"])

# print(str(CHATFile(doc=res)))
pipeline = BatchalignPipeline.new("morphosyntax", lang="ron")
res = pipeline(ut, retokenize=True)

print(str(CHATFile(doc=res)))


########### The Batchalign Individual Engine Harness ###########
Expand Down

0 comments on commit 7ff64f8

Please sign in to comment.