Skip to content

Commit

Permalink
chore: lint
Browse files Browse the repository at this point in the history
  • Loading branch information
jamescalam committed May 11, 2024
1 parent 83725db commit c597fc3
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 9 deletions.
2 changes: 1 addition & 1 deletion semantic_chunkers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@
"StatisticalChunker",
]

__version__ = "0.0.2"
__version__ = "0.0.2"
4 changes: 3 additions & 1 deletion semantic_chunkers/chunkers/statistical.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,9 @@ def plot_sentence_similarity_scores(
sentence after a similarity score below
a specified threshold.
"""
sentences = [sentence for doc in docs for sentence in sentence.regex_splitter(doc)]
sentences = [
sentence for doc in docs for sentence in sentence.regex_splitter(doc)
]
encoded_sentences = self._encode_documents(sentences)
similarity_scores = []

Expand Down
2 changes: 1 addition & 1 deletion semantic_chunkers/splitters/sentence.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,4 @@ def regex_splitter(text: str) -> list[str]:
"""
sentences = regex.split(regex_pattern, text, flags=regex.VERBOSE)
sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
return sentences
return sentences
12 changes: 6 additions & 6 deletions tests/unit/test_splitters.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from semantic_router.encoders.base import BaseEncoder
from semantic_router.encoders.cohere import CohereEncoder
from semantic_chunkers import BaseChunker
from semantic_chunkers import ConsecutiveSimSplitter
from semantic_chunkers import CumulativeSimSplitter
from semantic_chunkers import ConsecutiveChunker
from semantic_chunkers import CumulativeChunker


def test_consecutive_sim_splitter():
Expand All @@ -21,7 +21,7 @@ def test_consecutive_sim_splitter():
input_type="",
)
# Instantiate the ConsecutiveSimSplitter with the mock encoder
splitter = ConsecutiveSimSplitter(encoder=cohere_encoder, score_threshold=0.9)
splitter = ConsecutiveChunker(encoder=cohere_encoder, score_threshold=0.9)
splitter.encoder = mock_encoder

# Define some documents
Expand Down Expand Up @@ -55,7 +55,7 @@ def test_cumulative_sim_splitter():
cohere_api_key="a",
input_type="",
)
splitter = CumulativeSimSplitter(encoder=cohere_encoder, score_threshold=0.9)
splitter = CumulativeChunker(encoder=cohere_encoder, score_threshold=0.9)
splitter.encoder = mock_encoder

# Define some documents
Expand Down Expand Up @@ -83,7 +83,7 @@ def test_consecutive_similarity_splitter_single_doc():
# Assuming any return value since it should not reach the point of using the encoder
mock_encoder.return_value = np.array([[0.5, 0]])

splitter = ConsecutiveSimSplitter(encoder=mock_encoder, score_threshold=0.5)
splitter = ConsecutiveChunker(encoder=mock_encoder, score_threshold=0.5)

docs = ["doc1"]
with pytest.raises(ValueError) as excinfo:
Expand All @@ -96,7 +96,7 @@ def test_cumulative_similarity_splitter_single_doc():
# Assuming any return value since it should not reach the point of using the encoder
mock_encoder.return_value = np.array([[0.5, 0]])

splitter = CumulativeSimSplitter(encoder=mock_encoder, score_threshold=0.5)
splitter = CumulativeChunker(encoder=mock_encoder, score_threshold=0.5)

docs = ["doc1"]
with pytest.raises(ValueError) as excinfo:
Expand Down

0 comments on commit c597fc3

Please sign in to comment.