diff --git a/python/graphy/apps/demo_app.py b/python/graphy/apps/demo_app.py index ae73d1b03..b025b8522 100644 --- a/python/graphy/apps/demo_app.py +++ b/python/graphy/apps/demo_app.py @@ -1,7 +1,3 @@ -import torchtext - -torchtext.disable_torchtext_deprecation_warning() - from workflow import SurveyPaperReading, ThreadPoolWorkflowExecutor from graph.nodes.paper_reading_nodes import ProgressInfo from config import WF_UPLOADS_DIR, WF_OUTPUT_DIR, WF_DATA_DIR, WF_VECTDB_DIR diff --git a/python/graphy/models/__init__.py b/python/graphy/models/__init__.py index 540f13791..fe14a2cfa 100644 --- a/python/graphy/models/__init__.py +++ b/python/graphy/models/__init__.py @@ -10,7 +10,6 @@ TextEmbedding, DefaultEmbedding, TfidfEmbedding, - GloveEmbedding, SentenceTransformerEmbedding, ) @@ -21,7 +20,6 @@ "TextEmbedding", "DefaultEmbedding", "TfidfEmbedding", - "GloveEmbedding", "SentenceTransformerEmbedding", ] diff --git a/python/graphy/models/embedding_model.py b/python/graphy/models/embedding_model.py index 8b38082f6..be9e77247 100644 --- a/python/graphy/models/embedding_model.py +++ b/python/graphy/models/embedding_model.py @@ -2,7 +2,6 @@ from typing import List from chromadb.utils import embedding_functions from sentence_transformers import SentenceTransformer -from torchtext.vocab import GloVe from sklearn.feature_extraction.text import TfidfVectorizer import numpy as np @@ -57,36 +56,6 @@ def get_name(self): return "TF-IDF" -class GloveEmbedding(TextEmbedding): - def __init__(self): - # TODO: the parameters can be configurable if necessary - self.embeddings = GloVe(name="6B", dim=100) - self.max_length = 100 - self.embedding_dim = 100 - - def embed(self, text_data: List[str]): - def sentence_embedding(sentence): - words = sentence.split() - num_words = min(len(words), self.max_length) - embedding_sentence = np.zeros((self.max_length, self.embedding_dim)) - for i in range(num_words): - word = words[i] - if word in self.embeddings.stoi: - embedding_sentence[i] = self.embeddings.vectors[ - self.embeddings.stoi[word] - ] - return embedding_sentence.flatten() - - return np.vstack([sentence_embedding(data) for data in text_data]) - - def chroma_embedding_model(self): - # TODO - return None - - def get_name(self): - return "GloVe" - - class SentenceTransformerEmbedding(TextEmbedding): def __init__(self, embedding_model_name: str = ""): if not embedding_model_name: diff --git a/python/graphy/paper_scrapper.py b/python/graphy/paper_scrapper.py index ce74be13e..580207525 100644 --- a/python/graphy/paper_scrapper.py +++ b/python/graphy/paper_scrapper.py @@ -1,7 +1,3 @@ -import torchtext - -torchtext.disable_torchtext_deprecation_warning() - import ray from workflow import ThreadPoolWorkflowExecutor, SurveyPaperReading diff --git a/python/graphy/requirements.txt b/python/graphy/requirements.txt index 3eaaee735..7ecca125b 100644 --- a/python/graphy/requirements.txt +++ b/python/graphy/requirements.txt @@ -38,7 +38,6 @@ spacy==3.7.4 tiktoken>=0.8.0 tools>=0.1.9 torch==2.3.0 -torchtext==0.18.0 transformers==4.41 webdriver-manager>=4.0 Werkzeug>=3.0.3 diff --git a/python/graphy/tests/workflow/inspector_navigator_test.py b/python/graphy/tests/workflow/inspector_navigator_test.py index 7efd6b1f2..cba882ab5 100644 --- a/python/graphy/tests/workflow/inspector_navigator_test.py +++ b/python/graphy/tests/workflow/inspector_navigator_test.py @@ -1,7 +1,3 @@ -import torchtext - -torchtext.disable_torchtext_deprecation_warning() - import pytest from models import DEFAULT_LLM_MODEL_CONFIG from workflow import ThreadPoolWorkflowExecutor diff --git a/python/graphy/tests/workflow/paper_inspector_test.py b/python/graphy/tests/workflow/paper_inspector_test.py index 19097b590..8d568f3b2 100644 --- a/python/graphy/tests/workflow/paper_inspector_test.py +++ b/python/graphy/tests/workflow/paper_inspector_test.py @@ -1,7 +1,3 @@ -import torchtext - -torchtext.disable_torchtext_deprecation_warning() - import pytest from unittest.mock import MagicMock, create_autospec from graph import BaseGraph diff --git a/python/graphy/utils/text_clustering.py b/python/graphy/utils/text_clustering.py index 311ac025c..d1f39cde3 100644 --- a/python/graphy/utils/text_clustering.py +++ b/python/graphy/utils/text_clustering.py @@ -1,7 +1,3 @@ -import torchtext - -torchtext.disable_torchtext_deprecation_warning() - import json import os import numpy as np