chore: merge main

trigaten · Nov 2, 2023 · 42fdd14 · 42fdd14
2 parents e4129f0 + 8a9df74
commit 42fdd14
Show file tree

Hide file tree

Showing 7 changed files with 158 additions and 19 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -11,6 +11,7 @@ on:
 
 jobs:
   build:
+    environment: HF
 
     runs-on: ubuntu-latest
     strategy:
@@ -36,5 +37,7 @@ jobs:
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
     - name: Test with pytest
+      env:
+        HF_AUTH_TOKEN: ${{ secrets.HF_AUTH_TOKEN }}
       run: |
-        pytest
+        export HF_AUTH_TOKEN=${{ secrets.HF_AUTH_TOKEN }} && pytest
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 __pycache__
 _build
 .DS_Store
-build/
+build/
+*.egg-info/
diff --git a/data/prompt_engineering_reviewed.csv b/data/prompt_engineering_reviewed.csv
@@ -1,17 +1,21 @@
-Title, Link, Labels
- "Don't Complete It! Preventing Unhelpful Code Completion for Productive and Sustainable Neural Code Completion Systems", https://arxiv.org/pdf/2209.05948v2.pdf, Analysis of Technique;New Model;Applying Prompts/prompt Engineering
- "Unleashing the potential of prompt engineering in Large Language Models: a comprehensive review", https://arxiv.org/abs/2310.08129v1, Survey;Analysis of Technique;Applying Prompts/prompt Engineering;Agents
- "LoGoPrompt: Synthetic Text Images Can Be Good Visual Prompts for Vision-Language Models", https://arxiv.org/pdf/2309.01155v2.pdf, Survey;Analysis of Technique;Applying Promtps/Prompt Engineering
- "Prompt Engineering For Students of Medicine and Their Teachers", https://arxiv.org/pdf/2308.11628v1.pdf, Survey;Analysis of Technique;Applying prompts/prompt engineering
- "A Systematic Survey of Prompt Engineering on Vision-Language Foundation Models", https://arxiv.org/pdf/2307.12980v1.pdf, Survey;Other modality than text;Applying Prompts/prompt Engineering
-"Cutting Down on Prompts and Parameters: Simple Few-Shot Learning with Language Models", https://arxiv.org/abs/2106.13353v2, New Technique;Analysis of     Technique;Applying Prompts/Prompt Engineering
-"Tailored Visions: Enhancing Text-to-Image Generation with Personalized Prompt Rewriting", https://arxiv.org/abs/2310.08129v1, New Technique;Analysis of Technique;Other Modality than Text;Applying Prompts/Prompt Engineering
-"Large Language Models in the Workplace: A Case Study on Prompt Engineering for Job Type Classification", https://arxiv.org/abs/2303.07142v3, Applying Promtps/Prompt Engineering;Analysis of Model
-"Open-Ended Instructable Embodied Agents with Memory-Augmented Large Language Models", https://arxiv.org/abs/2310.15127v1, New model;New Technique;Agents
-"Addressing Compiler Errors: Stack Overflow or Large Language Models?", https://arxiv.org/abs/2307.10793v1, Analysis of model;Applying Prompts/Prompt Engineering
-"Prompt Learning for Action Recognition", http://arxiv.org/abs/2305.12437v1, New Technique; Agents; Analysis of technique; Applying prompts/prompt engineering; Other modality than text
-"Effective Structured Prompting by Meta-Learning and Representative Verbalizer",http://arxiv.org/abs/2306.00618v1, New Technique; Analysis of technique
-"Invalid Logic, Equivalent Gains: The Bizarreness of Reasoning in Language Model Prompting",http://arxiv.org/abs/2307.10573v2, Analysis of technique
-"High-Fidelity Lake Extraction via Two-Stage Prompt Enhancement: Establishing a Novel Baseline and Benchmark",http://arxiv.org/abs/2308.08443v1, Analysis of technique; Other modality than text; Applying prompts/prompt engineering
-Structured Prompt Tuning,http://arxiv.org/abs/2205.12309v1, New Technique
-
+Title, Link, Labels
+"The Creativity of Text-to-Image Generation", http://dx.doi.org/10.1145/3569219.3569352, Applying prompts/prompt engineering
+"Beyond Traditional Teaching: The Potential of Large Language Models and Chatbots in Graduate Engineering Education", http://dx.doi.org/10.32388/MD04B0, Analysis of model;Applying prompts/prompt engineering;Agents;Ethics/Consequences
+"Collective Creativity in Crowd Ideation", http://dx.doi.org/10.1145/3411764.3445782, Survey;Analysis of technique;Applying prompts/prompt engineering;Agents
+"Massive prompt cusps: A new signature of warm dark matter", http://dx.doi.org/10.1093/mnrasl/slad043, Analysis of technique;Analysis of model
+"Scientific Literature Text Mining and the Case for Open Access", http://dx.doi.org/10.21428/14888, New model
+ "Don't Complete It! Preventing Unhelpful Code Completion for Productive and Sustainable Neural Code Completion Systems", https://arxiv.org/pdf/2209.05948v2.pdf, Analysis of Technique;New Model;Applying Prompts/prompt Engineering
+ "Unleashing the potential of prompt engineering in Large Language Models: a comprehensive review", https://arxiv.org/abs/2310.08129v1, Survey;Analysis of Technique;Applying Prompts/prompt Engineering;Agents
+ "LoGoPrompt: Synthetic Text Images Can Be Good Visual Prompts for Vision-Language Models", https://arxiv.org/pdf/2309.01155v2.pdf, Survey;Analysis of Technique;Applying Promtps/Prompt Engineering
+ "Prompt Engineering For Students of Medicine and Their Teachers", https://arxiv.org/pdf/2308.11628v1.pdf, Survey;Analysis of Technique;Applying prompts/prompt engineering
+ "A Systematic Survey of Prompt Engineering on Vision-Language Foundation Models", https://arxiv.org/pdf/2307.12980v1.pdf, Survey;Other modality than text;Applying Prompts/prompt Engineering
+"Cutting Down on Prompts and Parameters: Simple Few-Shot Learning with Language Models", https://arxiv.org/abs/2106.13353v2, New Technique;Analysis of     Technique;Applying Prompts/Prompt Engineering
+"Tailored Visions: Enhancing Text-to-Image Generation with Personalized Prompt Rewriting", https://arxiv.org/abs/2310.08129v1, New Technique;Analysis of Technique;Other Modality than Text;Applying Prompts/Prompt Engineering
+"Large Language Models in the Workplace: A Case Study on Prompt Engineering for Job Type Classification", https://arxiv.org/abs/2303.07142v3, Applying Promtps/Prompt Engineering;Analysis of Model
+"Open-Ended Instructable Embodied Agents with Memory-Augmented Large Language Models", https://arxiv.org/abs/2310.15127v1, New model;New Technique;Agents
+"Addressing Compiler Errors: Stack Overflow or Large Language Models?", https://arxiv.org/abs/2307.10793v1, Analysis of model;Applying Prompts/Prompt Engineering
+"Prompt Learning for Action Recognition", http://arxiv.org/abs/2305.12437v1, New Technique; Agents; Analysis of technique; Applying prompts/prompt engineering; Other modality than text
+"Effective Structured Prompting by Meta-Learning and Representative Verbalizer",http://arxiv.org/abs/2306.00618v1, New Technique; Analysis of technique
+"Invalid Logic, Equivalent Gains: The Bizarreness of Reasoning in Language Model Prompting",http://arxiv.org/abs/2307.10573v2, Analysis of technique
+"High-Fidelity Lake Extraction via Two-Stage Prompt Enhancement: Establishing a Novel Baseline and Benchmark",http://arxiv.org/abs/2308.08443v1, Analysis of technique; Other modality than text; Applying prompts/prompt engineering
+Structured Prompt Tuning,http://arxiv.org/abs/2205.12309v1, New Technique
diff --git a/requirements.txt b/requirements.txt
@@ -2,6 +2,9 @@ numpy
 pandas
 requests
 matplotlib
+datasets
+huggingface_hub
+pytest
 jellyfish
 black
 sphinx

diff --git a/setup.cfg b/setup.cfg
@@ -31,6 +31,9 @@ install_requires =
     requests
     matplotlib
     jellyfish
+    datasets
+    huggingface_hub
+    black
 
 [options.packages.find]
 where=src
diff --git a/src/prompt_systematic_review/pipeline.py b/src/prompt_systematic_review/pipeline.py
@@ -0,0 +1,61 @@
+from huggingface_hub import HfFileSystem, login
+import pandas as pd
+from io import StringIO
+import os
+
+
+"""
+READ THIS
+https://docs.github.com/en/actions/security-guides/using-secrets-in-github-actions
+https://huggingface.co/docs/huggingface_hub/v0.18.0.rc0/guides/hf_file_system
+
+"""
+
+
+class Pipeline:
+    def __init__(self, token=None, revision="main"):
+        self.token = token
+        self.root = f"hf://datasets/PromptSystematicReview/Prompt_Systematic_Review_Dataset@{revision}/"
+        if token is not None:
+            self.fs = HfFileSystem(token=token)
+            login(token=token)
+        else:
+            self.fs = HfFileSystem()
+        self.revision = revision
+
+    def is_logged_in(self):
+        return self.token is not None
+
+    def get_revision(self):
+        return self.revision
+
+    def set_revision(self, revision):
+        try:
+            assert revision.isalnum()
+            self.revision = revision
+            self.root = f"hf://datasets/PromptSystematicReview/Prompt_Systematic_Review_Dataset@{revision}/"
+        except:
+            raise ValueError("Revision must be alphanumeric")
+
+    def login(self, token):
+        if self.token is not None:
+            raise ValueError("Already Logged In")
+        else:
+            self.fs = HfFileSystem(token=self.token)
+            login(token=token)
+            self.token = token
+
+    def get_all_files(self):
+        return self.fs.ls(self.root, detail=False, revision=self.revision)
+
+    def get_all_data_files(self):
+        return self.fs.glob(self.root + "**.csv", revision=self.revision)
+
+    def read_from_file(self, fileName):
+        return pd.read_csv(os.path.join(self.root, fileName))
+
+    def write_to_file(self, fileName, dataFrame):
+        if not self.is_logged_in():
+            raise ValueError("Not Logged In")
+        path = os.path.join(self.root, fileName)
+        dataFrame.to_csv(path, index=False)
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
@@ -0,0 +1,64 @@
+# test_pipeline.py
+import pytest
+import os
+from prompt_systematic_review.pipeline import *
+from huggingface_hub import delete_file
+import random
+import time
+import hashlib
+
+
+def hashString(bytes):
+    return str(hashlib.md5(bytes).hexdigest())
+
+
+@pytest.fixture
+def client():
+    return Pipeline(token=os.environ["HF_AUTH_TOKEN"], revision="test")
+
+
+def test_login():
+    testClient = Pipeline(revision="test")
+    assert testClient.is_logged_in() == False
+    testClient.login(os.environ["HF_AUTH_TOKEN"])
+    assert testClient.is_logged_in() == True
+
+
+def test_get_all_files(client):
+    assert len(client.get_all_files()) > 0
+
+
+def test_get_all_data_files(client):
+    assert len(client.get_all_data_files()) > 0
+    assert all([x.endswith(".csv") for x in client.get_all_data_files()])
+
+
+def test_read_from_file(client):
+    assert len(client.read_from_file("test.csv")) > 0
+    assert len(client.read_from_file("test.csv").columns) == 2
+    assert client.read_from_file("test.csv")["Age"].mean() == 21
+
+
+def test_write_to_file(client):
+    lenOfFiles = len(client.get_all_files())
+    randString = random.randbytes(100) + str(time.time()).encode()
+    randHash = hashString(randString)
+    csvDict = {"test": [1, 3], "test2": [2, 4]}
+    print(client.revision)
+    client.write_to_file(f"{randHash[:10]}_test.csv", pd.DataFrame(csvDict))
+    print(client.revision)
+    time.sleep(1)
+    # assert client.revision == "main"
+    df = client.read_from_file(f"{randHash[:10]}_test.csv")
+    assert df["test"].sum() == 4
+    assert df["test2"].sum() == 6
+    # time.sleep(1)
+    print(client.root + f"{randHash[:10]}_test.csv")
+    delete_file(
+        f"{randHash[:10]}_test.csv",
+        "PromptSystematicReview/Prompt_Systematic_Review_Dataset",
+        repo_type="dataset",
+        revision="test",
+    )
+
+    assert len(client.get_all_files()) == lenOfFiles