-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into scipy-ml-fixup
- Loading branch information
Showing
9 changed files
with
152 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
name: migrate scipy | ||
|
||
env: | ||
DOCKER_CLIENT_TIMEOUT: "300" | ||
COMPOSE_HTTP_TIMEOUT: "300" | ||
REGISTRY: ghcr.io | ||
|
||
on: | ||
push: | ||
paths: | ||
- "images/**" | ||
- "model/**" | ||
- "scripts/**" | ||
- "dodo.py" | ||
- ".github/workflows/main.yml" | ||
|
||
pull_request: | ||
branches: [ main ] | ||
paths: | ||
- "images/**" | ||
- "model/**" | ||
- "scripts/**" | ||
- "dodo.py" | ||
- ".github/workflows/main.yml" | ||
|
||
workflow_dispatch: | ||
|
||
jobs: | ||
docker-pipeline: | ||
runs-on: ubuntu-latest | ||
if: > | ||
!contains(github.event.head_commit.message , 'skip ci') && | ||
!contains(github.event.pull_request.title, 'skip ci') | ||
steps: | ||
- name: Checkout after Free Space | ||
uses: actions/checkout@v3 | ||
with: | ||
fetch-depth: 0 | ||
|
||
- name: Check Free Space 0 | ||
run: | | ||
echo "Free space:" | ||
df -h | ||
- name: Docker/ENV cleanup Cleanup | ||
run: | | ||
docker image prune -a -f | ||
docker container prune -f | ||
sudo rm -rf /usr/local/lib/android | ||
sudo rm -rf /usr/share/dotnet | ||
sudo rm -rf /opt/ghc | ||
sudo rm -rf "/usr/local/share/boost" | ||
sudo rm -rf "$AGENT_TOOLSDIRECTORY" | ||
- name: Log in to the Container registry | ||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 | ||
with: | ||
registry: ${{ env.REGISTRY }} | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Docker push | ||
run: | | ||
docker pull ucsdets/scipy-ml-notebook:2021.3-stable | ||
docker tag docker.io/ucsdets/scipy-ml-notebook:2021.3-stable ghcr.io/ucsd-ets/scipy-ml-notebook:2021.3-stable | ||
docker push ghcr.io/ucsd-ets/scipy-ml-notebook:2021.3-stable |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Tensorflow compatability matrix: https://www.tensorflow.org/install/source?hl=en#gpu |
76 changes: 76 additions & 0 deletions
76
images/scipy-ml-notebook/workflow_tests/test_huggingface.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
### THESE TESTS WILL DOWNLOAD A BUNCH OF MODELS TO YOUR .CACHE DIR | ||
### IF MANUALLY RUN, DELETE THEM AFTER TO SAVE SPACE | ||
|
||
# The results of these tests are somewhat subject to randomness. It's possible that values will change as models change. You can always run these from the container to see what's wrong with them. | ||
|
||
from transformers import pipeline | ||
from transformers import AutoTokenizer | ||
|
||
import pytest | ||
|
||
# test basic sentiment analysis | ||
def get_sentiment_analysis(string): | ||
return pipeline("sentiment-analysis")(string) | ||
|
||
def test_positive_sent(): | ||
sent = get_sentiment_analysis("I love you")[0] | ||
assert sent["label"] == "POSITIVE" | ||
assert sent["score"] > .9 | ||
|
||
def test_negative_sent(): | ||
sent = get_sentiment_analysis("I hate you you")[0] | ||
assert sent["label"] == "NEGATIVE" | ||
assert sent["score"] > .9 | ||
|
||
# basic transcription, don't specify a model if you care about the space in your .cache dir | ||
def test_transcribe_mlk(): | ||
transcriber = pipeline(task="automatic-speech-recognition") | ||
result = transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")["text"] | ||
assert "HAVE A DREAM" in result | ||
|
||
def test_cat_recognition(): | ||
vision_classifier = pipeline(model="google/vit-base-patch16-224") | ||
preds = vision_classifier( | ||
images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg" | ||
) | ||
preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds] | ||
|
||
assert any('cat' in pred["label"] for pred in preds) | ||
|
||
def test_zero_shot_class(): | ||
classifier = pipeline(task="zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli") | ||
results = classifier( | ||
"I have a problem with my iphone that needs to be resolved asap!!", | ||
candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"], | ||
) | ||
assert results["labels"][0] == "urgent" | ||
assert results["scores"][0] > .4 | ||
|
||
# the function will return a bunch of nonsense that we can't assert but will verify that | ||
# tensorflow probably works fine with transformer | ||
def test_tf_tokenizer(): | ||
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased") | ||
|
||
batch_sentences = [ | ||
"But what about second breakfast?", | ||
"Don't think he knows about second breakfast, Pip.", | ||
"What about elevensies?", | ||
] | ||
encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf") | ||
assert str(type(encoded_input["input_ids"])) == "<class 'tensorflow.python.framework.ops.EagerTensor'>" | ||
|
||
# the function will return a bunch of nonsense that we can't assert but will verify that | ||
# pytorch probably works fine with transformer | ||
def test_pytorch_tokenizer(): | ||
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased") | ||
|
||
batch_sentences = [ | ||
"But what about second breakfast?", | ||
"Don't think he knows about second breakfast, Pip.", | ||
"What about elevensies?", | ||
] | ||
encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="pt") | ||
print(encoded_input) | ||
|
||
assert str(type(encoded_input["input_ids"])) == "<class 'torch.Tensor'>" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters