Upgrade of ruff, adding codespell, toml-sort (#251)

* Adds toml-sort, codespell, removes isort in favor of ruff * Consolidates tools into pyproject.toml, and adds tool configs * Fixed all codespell errors * All ruff autofixes * Other ruff errors using --add-noqa
Future-House · Mar 8, 2024 · 7b28a34 · 7b28a34
1 parent a50dff1
commit 7b28a34
Show file tree

Hide file tree

Showing 14 changed files with 430 additions and 290 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,15 +2,23 @@ default_language_version:
   python: python3
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
     hooks:
-      - id: trailing-whitespace
+      - id: check-added-large-files
+      - id: check-byte-order-marker
+      - id: check-case-conflict
+      - id: check-merge-conflict
+      - id: check-shebang-scripts-are-executable
+      - id: check-symlinks
+      - id: check-toml
       - id: check-yaml
+      - id: debug-statements
+      - id: detect-private-key
       - id: end-of-file-fixer
       - id: mixed-line-ending
-      - id: check-added-large-files
+      - id: trailing-whitespace
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.0.270"
+    rev: v0.3.1
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
@@ -24,14 +32,18 @@ repos:
       - id: mypy
         args: [--pretty, --ignore-missing-imports]
         additional_dependencies: [types-requests, types-setuptools]
-  - repo: https://github.com/PyCQA/isort
-    rev: "5.12.0"
-    hooks:
-      - id: isort
-        args: [--profile=black, "--skip=__init__.py", "--filter-files"]
   - repo: https://github.com/pre-commit/mirrors-prettier
     rev: v3.1.0
     hooks:
       - id: prettier
         additional_dependencies:
           - [email protected] # SEE: https://github.com/pre-commit/pre-commit/issues/3133
+  - repo: https://github.com/pappasam/toml-sort
+    rev: v0.23.1
+    hooks:
+      - id: toml-sort-fix
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.2.6
+    hooks:
+      - id: codespell
+        additional_dependencies: [".[toml]"]
diff --git a/.ruff.toml b/.ruff.toml
diff --git a/README.md b/README.md
@@ -378,7 +378,7 @@ It's not that different! This is similar to the tree response method in LlamaInd
 
 ### How is this different from LangChain?
 
-There has been some great work on retrievers in langchain and you could say this is an example of a retreiver.
+There has been some great work on retrievers in langchain and you could say this is an example of a retriever.
 
 ### Can I save or load?
 

diff --git a/paperqa/__init__.py b/paperqa/__init__.py
@@ -1,21 +1,21 @@
-from .docs import Answer, Docs, PromptCollection, Doc, Text, Context, print_callback
-from .version import __version__
+from .docs import Answer, Context, Doc, Docs, PromptCollection, Text, print_callback
 from .llms import (
-    LLMModel,
+    AnthropicLLMModel,
     EmbeddingModel,
+    HybridEmbeddingModel,
     LangchainEmbeddingModel,
-    OpenAIEmbeddingModel,
     LangchainLLMModel,
-    OpenAILLMModel,
-    AnthropicLLMModel,
+    LangchainVectorStore,
     LlamaEmbeddingModel,
-    HybridEmbeddingModel,
-    SparseEmbeddingModel,
+    LLMModel,
+    LLMResult,
     NumpyVectorStore,
-    LangchainVectorStore,
+    OpenAIEmbeddingModel,
+    OpenAILLMModel,
     SentenceTransformerEmbeddingModel,
-    LLMResult,
+    SparseEmbeddingModel,
 )
+from .version import __version__
 
 __all__ = [
     "Docs",

diff --git a/paperqa/contrib/zotero.py b/paperqa/contrib/zotero.py
@@ -9,15 +9,15 @@
 try:
     from pyzotero import zotero
 except ImportError:
-    raise ImportError("Please install pyzotero: `pip install pyzotero`")
+    raise ImportError("Please install pyzotero: `pip install pyzotero`")  # noqa: B904
 from ..paths import PAPERQA_DIR
 from ..utils import StrPath, count_pdf_pages
 
 
 class ZoteroPaper(BaseModel):
     """A paper from Zotero.
 
-    Attributes
+    Attributes:
     ----------
     key : str
         The citation key.
@@ -65,9 +65,9 @@ def __init__(
         self,
         *,
         library_type: str = "user",
-        library_id: Optional[str] = None,
-        api_key: Optional[str] = None,
-        storage: Optional[StrPath] = None,
+        library_id: Optional[str] = None,  # noqa: FA100
+        api_key: Optional[str] = None,  # noqa: FA100
+        storage: Optional[StrPath] = None,  # noqa: FA100
         **kwargs,
     ):
         self.logger = logging.getLogger("ZoteroDB")
@@ -81,7 +81,7 @@ def __init__(
                     " from the text 'Your userID for use in API calls is [XXXXXX]'."
                     " Then, set the environment variable ZOTERO_USER_ID to this value."
                 )
-            else:
+            else:  # noqa: RET506
                 library_id = os.environ["ZOTERO_USER_ID"]
 
         if api_key is None:
@@ -93,7 +93,7 @@ def __init__(
                     " with access to your library."
                     " Then, set the environment variable ZOTERO_API_KEY to this value."
                 )
-            else:
+            else:  # noqa: RET506
                 api_key = os.environ["ZOTERO_API_KEY"]
 
         self.logger.info(f"Using library ID: {library_id} with type: {library_type}.")
@@ -108,7 +108,7 @@ def __init__(
             library_type=library_type, library_id=library_id, api_key=api_key, **kwargs
         )
 
-    def get_pdf(self, item: dict) -> Union[Path, None]:
+    def get_pdf(self, item: dict) -> Union[Path, None]:  # noqa: FA100
         """Gets a filename for a given Zotero key for a PDF.
 
         If the PDF is not found locally, the PDF will be downloaded to a local file at the correct key.
@@ -120,7 +120,7 @@ def get_pdf(self, item: dict) -> Union[Path, None]:
             An item from `pyzotero`. Should have a `key` field, and also have an entry
             `links->attachment->attachmentType == application/pdf`.
         """
-        if type(item) != dict:
+        if type(item) != dict:  # noqa: E721
             raise TypeError("Pass the full item of the paper. The item must be a dict.")
 
         pdf_key = _extract_pdf_key(item)
@@ -137,17 +137,17 @@ def get_pdf(self, item: dict) -> Union[Path, None]:
 
         return pdf_path
 
-    def iterate(
+    def iterate(  # noqa: C901, PLR0912
         self,
         limit: int = 25,
         start: int = 0,
-        q: Optional[str] = None,
-        qmode: Optional[str] = None,
-        since: Optional[str] = None,
-        tag: Optional[str] = None,
-        sort: Optional[str] = None,
-        direction: Optional[str] = None,
-        collection_name: Optional[str] = None,
+        q: Optional[str] = None,  # noqa: FA100
+        qmode: Optional[str] = None,  # noqa: FA100
+        since: Optional[str] = None,  # noqa: FA100
+        tag: Optional[str] = None,  # noqa: FA100
+        sort: Optional[str] = None,  # noqa: FA100
+        direction: Optional[str] = None,  # noqa: FA100
+        collection_name: Optional[str] = None,  # noqa: FA100
     ):
         """Given a search query, this will lazily iterate over papers in a Zotero library, downloading PDFs as needed.
 
@@ -210,8 +210,8 @@ def iterate(
 
         max_limit = 100
 
-        items: List = []
-        pdfs: List[Path] = []
+        items: List = []  # noqa: FA100
+        pdfs: List[Path] = []  # noqa: FA100
         i = 0
         actual_i = 0
         num_remaining = limit
@@ -247,7 +247,7 @@ def iterate(
                 if no_pdf or is_duplicate:
                     continue
                 pdf = cast(Path, pdf)
-                title = item["data"]["title"] if "title" in item["data"] else ""
+                title = item["data"].get("title", "")
                 if len(items) >= start:
                     yield ZoteroPaper(
                         key=_get_citation_key(item),
@@ -277,12 +277,12 @@ def _get_collection_id(self, collection_name: str) -> str:
         """Get the collection id for a given collection name
             Raises ValueError if collection not found
         Args:
-            collection_name (str): The name of the collection
+            collection_name (str): The name of the collection.
 
         Returns:
             str: collection id
-        """
-        # specfic collection
+        """  # noqa: D205
+        # specific collection
         collections = self.collections()
         collection_id = ""
 
@@ -326,9 +326,8 @@ def _get_citation_key(item: dict) -> str:
     return f"{last_name}_{short_title}_{date}_{item['key']}".replace(" ", "")
 
 
-def _extract_pdf_key(item: dict) -> Union[str, None]:
+def _extract_pdf_key(item: dict) -> Union[str, None]:  # noqa: FA100
     """Extract the PDF key from a Zotero item."""
-
     if "links" not in item:
         return None
 
@@ -337,7 +336,7 @@ def _extract_pdf_key(item: dict) -> Union[str, None]:
 
     attachments = item["links"]["attachment"]
 
-    if type(attachments) != dict:
+    if type(attachments) != dict:  # noqa: E721
         # Find first attachment with attachmentType == application/pdf:
         for attachment in attachments:
             # TODO: This assumes there's only one PDF attachment.