Skip to content

Commit

Permalink
added value error for when there is no spm_model
Browse files Browse the repository at this point in the history
  • Loading branch information
CaptainVee committed Sep 21, 2023
1 parent 2ac3362 commit c2f66cd
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 1 deletion.
4 changes: 4 additions & 0 deletions laser_encoders/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ def __call__(self, sentences):
if self.spm_model:
sentences = self.tokenizer(sentences)
return self.encode_sentences(sentences)
else:
raise ValueError(
"Either initialize the encoder with an spm_model or pre-tokenize and use the encode_sentences method."
)

def _process_batch(self, batch):
tokens = batch.tokens
Expand Down
2 changes: 1 addition & 1 deletion laser_encoders/test_laser_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,5 +173,5 @@ def test_sentence_encoder(
sentence_embedding = sentence_encoder.encode_sentences([tokenized_text])

assert isinstance(sentence_embedding, np.ndarray)
# assert sentence_embedding.shape == (1, 1024)
assert sentence_embedding.shape == (1, 1024)
assert np.allclose(expected_array, sentence_embedding[:, :10], atol=1e-3)

0 comments on commit c2f66cd

Please sign in to comment.