Skip to content

Commit

Permalink
feat: add lowercasing functionality to merge_tokens_into_text and col…
Browse files Browse the repository at this point in the history
…lect_svo_triples
  • Loading branch information
nxgeo committed Sep 27, 2024
1 parent 9ba08c6 commit d7e3d81
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 8 deletions.
14 changes: 7 additions & 7 deletions id_svo_extractor/utils.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from spacy.tokens import Doc, Token


def merge_tokens_into_text(tokens: list[Token]) -> str:
text = "".join(token.text_with_ws for token in tokens)
return text.rstrip()
def merge_tokens_into_text(tokens: list[Token], lower: bool = False) -> str:
text = "".join(token.text_with_ws for token in tokens).rstrip()
return text.lower() if lower else text


def collect_svo_triples(doc: Doc) -> list[tuple[str, str, str]]:
def collect_svo_triples(doc: Doc, lower: bool = False) -> list[tuple[str, str, str]]:
svo_triples = []
for sentence in doc.sents:
for s, v, o in sentence._.svo_triples:
s = merge_tokens_into_text(s)
v = merge_tokens_into_text(v)
o = merge_tokens_into_text(o)
s = merge_tokens_into_text(s, lower)
v = merge_tokens_into_text(v, lower)
o = merge_tokens_into_text(o, lower)
svo_triples.append((s, v, o))
return svo_triples
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "id-svo-extractor"
version = "0.2.0"
version = "0.3.0"
requires-python = ">=3.10"
description = "id-svo-extractor: Extract SVO triples from Indonesian text."
readme = "README.md"
Expand Down

0 comments on commit d7e3d81

Please sign in to comment.