-
Notifications
You must be signed in to change notification settings - Fork 135
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #214 from JohnSnowLabs/sod_embedding_integration
InstructorEmbeddings integration
Showing
11 changed files
with
1,604 additions
and
0 deletions.
There are no files selected for viewing
1,502 changes: 1,502 additions & 0 deletions
1,502
...les/colab/component_examples/sentence_embeddings/NLU_INSTRUCTOR_sentence_embeddings.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
20 changes: 20 additions & 0 deletions
20
nlu/components/embeddings/instructor_sentence/InstructorEmbeddings.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from sparknlp.annotator import InstructorEmbeddings | ||
|
||
|
||
class Instructor: | ||
@staticmethod | ||
def get_default_model(): | ||
return InstructorEmbeddings.pretrained() \ | ||
.setInstruction("Instruction here: ") \ | ||
.setInputCols(["documents"]) \ | ||
.setOutputCol("instructor") | ||
|
||
@staticmethod | ||
def get_pretrained_model(name, language, bucket=None): | ||
return InstructorEmbeddings.pretrained(name,language,bucket) \ | ||
.setInstruction("Instruction here: ") \ | ||
.setInputCols(["documents"]) \ | ||
.setOutputCol("instructor") | ||
|
||
|
||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
46 changes: 46 additions & 0 deletions
46
...u_core_tests/component_tests/embed_tests/sentence_embeddings/sentence_instructor_tests.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import unittest | ||
|
||
from nlu import * | ||
|
||
|
||
class TestInstructorSentenceEmbeddings(unittest.TestCase): | ||
def test_instructor_embeds_sentence_level(self): | ||
pipe = nlu.load("en.embed_sentence.instructor_base", verbose=True) | ||
pipe['instructor_sentence_embeddings@INSTRUCTOR_EMBEDDINGS_1c5e51202650'].setInstruction( | ||
"Represent the Amazon title for retrieving relevant reviews: ") | ||
res = pipe.predict("Loved it! It is Exciting, interesting, and even including information about the space program.", | ||
output_level='sentence') | ||
|
||
for c in res: | ||
print(res[c]) | ||
|
||
pipe = nlu.load("en.embed_sentence.instructor_large", verbose=True) | ||
pipe['instructor_sentence_embeddings@INSTRUCTOR_EMBEDDINGS_46e0451abc97'].setInstruction( | ||
"Represent the Amazon title for retrieving relevant reviews: ") | ||
res = pipe.predict("Loved it! It is Exciting, interesting, and even including information about the space program.", | ||
output_level='sentence') | ||
|
||
for c in res: | ||
print(res[c]) | ||
|
||
def test_instructor_embeds_document_level(self): | ||
pipe = nlu.load("en.embed_sentence.instructor_base", verbose=True) | ||
pipe['instructor_sentence_embeddings@INSTRUCTOR_EMBEDDINGS_1c5e51202650'].setInstruction( | ||
"Represent the Amazon title for retrieving relevant reviews: ") | ||
res = pipe.predict("Loved it! It is Exciting, interesting, and even including information about the space program.", | ||
output_level='document') | ||
|
||
for c in res: | ||
print(res[c]) | ||
|
||
pipe = nlu.load("en.embed_sentence.instructor_large", verbose=True) | ||
pipe['instructor_sentence_embeddings@INSTRUCTOR_EMBEDDINGS_46e0451abc97'].setInstruction( | ||
"Represent the Amazon title for retrieving relevant reviews: ") | ||
res = pipe.predict("Loved it! It is Exciting, interesting, and even including information about the space program.", | ||
output_level='document') | ||
|
||
for c in res: | ||
print(res[c]) | ||
|
||
if __name__ == "__main__": | ||
unittest.main() |