Skip to content

Commit

Permalink
Upgrade of ruff, adding codespell, toml-sort (#251)
Browse files Browse the repository at this point in the history
* Adds toml-sort, codespell, removes isort in favor of ruff

* Consolidates tools into pyproject.toml, and adds tool configs

* Fixed all codespell errors

* All ruff autofixes

* Other ruff errors using --add-noqa
  • Loading branch information
jamesbraza authored Mar 8, 2024
1 parent a50dff1 commit 7b28a34
Show file tree
Hide file tree
Showing 14 changed files with 430 additions and 290 deletions.
30 changes: 21 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,23 @@ default_language_version:
python: python3
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: trailing-whitespace
- id: check-added-large-files
- id: check-byte-order-marker
- id: check-case-conflict
- id: check-merge-conflict
- id: check-shebang-scripts-are-executable
- id: check-symlinks
- id: check-toml
- id: check-yaml
- id: debug-statements
- id: detect-private-key
- id: end-of-file-fixer
- id: mixed-line-ending
- id: check-added-large-files
- id: trailing-whitespace
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.0.270"
rev: v0.3.1
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand All @@ -24,14 +32,18 @@ repos:
- id: mypy
args: [--pretty, --ignore-missing-imports]
additional_dependencies: [types-requests, types-setuptools]
- repo: https://github.com/PyCQA/isort
rev: "5.12.0"
hooks:
- id: isort
args: [--profile=black, "--skip=__init__.py", "--filter-files"]
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.1.0
hooks:
- id: prettier
additional_dependencies:
- [email protected] # SEE: https://github.com/pre-commit/pre-commit/issues/3133
- repo: https://github.com/pappasam/toml-sort
rev: v0.23.1
hooks:
- id: toml-sort-fix
- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
hooks:
- id: codespell
additional_dependencies: [".[toml]"]
2 changes: 0 additions & 2 deletions .ruff.toml

This file was deleted.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ It's not that different! This is similar to the tree response method in LlamaInd

### How is this different from LangChain?

There has been some great work on retrievers in langchain and you could say this is an example of a retreiver.
There has been some great work on retrievers in langchain and you could say this is an example of a retriever.

### Can I save or load?

Expand Down
20 changes: 10 additions & 10 deletions paperqa/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
from .docs import Answer, Docs, PromptCollection, Doc, Text, Context, print_callback
from .version import __version__
from .docs import Answer, Context, Doc, Docs, PromptCollection, Text, print_callback
from .llms import (
LLMModel,
AnthropicLLMModel,
EmbeddingModel,
HybridEmbeddingModel,
LangchainEmbeddingModel,
OpenAIEmbeddingModel,
LangchainLLMModel,
OpenAILLMModel,
AnthropicLLMModel,
LangchainVectorStore,
LlamaEmbeddingModel,
HybridEmbeddingModel,
SparseEmbeddingModel,
LLMModel,
LLMResult,
NumpyVectorStore,
LangchainVectorStore,
OpenAIEmbeddingModel,
OpenAILLMModel,
SentenceTransformerEmbeddingModel,
LLMResult,
SparseEmbeddingModel,
)
from .version import __version__

__all__ = [
"Docs",
Expand Down
51 changes: 25 additions & 26 deletions paperqa/contrib/zotero.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
try:
from pyzotero import zotero
except ImportError:
raise ImportError("Please install pyzotero: `pip install pyzotero`")
raise ImportError("Please install pyzotero: `pip install pyzotero`") # noqa: B904
from ..paths import PAPERQA_DIR
from ..utils import StrPath, count_pdf_pages


class ZoteroPaper(BaseModel):
"""A paper from Zotero.
Attributes
Attributes:
----------
key : str
The citation key.
Expand Down Expand Up @@ -65,9 +65,9 @@ def __init__(
self,
*,
library_type: str = "user",
library_id: Optional[str] = None,
api_key: Optional[str] = None,
storage: Optional[StrPath] = None,
library_id: Optional[str] = None, # noqa: FA100
api_key: Optional[str] = None, # noqa: FA100
storage: Optional[StrPath] = None, # noqa: FA100
**kwargs,
):
self.logger = logging.getLogger("ZoteroDB")
Expand All @@ -81,7 +81,7 @@ def __init__(
" from the text 'Your userID for use in API calls is [XXXXXX]'."
" Then, set the environment variable ZOTERO_USER_ID to this value."
)
else:
else: # noqa: RET506
library_id = os.environ["ZOTERO_USER_ID"]

if api_key is None:
Expand All @@ -93,7 +93,7 @@ def __init__(
" with access to your library."
" Then, set the environment variable ZOTERO_API_KEY to this value."
)
else:
else: # noqa: RET506
api_key = os.environ["ZOTERO_API_KEY"]

self.logger.info(f"Using library ID: {library_id} with type: {library_type}.")
Expand All @@ -108,7 +108,7 @@ def __init__(
library_type=library_type, library_id=library_id, api_key=api_key, **kwargs
)

def get_pdf(self, item: dict) -> Union[Path, None]:
def get_pdf(self, item: dict) -> Union[Path, None]: # noqa: FA100
"""Gets a filename for a given Zotero key for a PDF.
If the PDF is not found locally, the PDF will be downloaded to a local file at the correct key.
Expand All @@ -120,7 +120,7 @@ def get_pdf(self, item: dict) -> Union[Path, None]:
An item from `pyzotero`. Should have a `key` field, and also have an entry
`links->attachment->attachmentType == application/pdf`.
"""
if type(item) != dict:
if type(item) != dict: # noqa: E721
raise TypeError("Pass the full item of the paper. The item must be a dict.")

pdf_key = _extract_pdf_key(item)
Expand All @@ -137,17 +137,17 @@ def get_pdf(self, item: dict) -> Union[Path, None]:

return pdf_path

def iterate(
def iterate( # noqa: C901, PLR0912
self,
limit: int = 25,
start: int = 0,
q: Optional[str] = None,
qmode: Optional[str] = None,
since: Optional[str] = None,
tag: Optional[str] = None,
sort: Optional[str] = None,
direction: Optional[str] = None,
collection_name: Optional[str] = None,
q: Optional[str] = None, # noqa: FA100
qmode: Optional[str] = None, # noqa: FA100
since: Optional[str] = None, # noqa: FA100
tag: Optional[str] = None, # noqa: FA100
sort: Optional[str] = None, # noqa: FA100
direction: Optional[str] = None, # noqa: FA100
collection_name: Optional[str] = None, # noqa: FA100
):
"""Given a search query, this will lazily iterate over papers in a Zotero library, downloading PDFs as needed.
Expand Down Expand Up @@ -210,8 +210,8 @@ def iterate(

max_limit = 100

items: List = []
pdfs: List[Path] = []
items: List = [] # noqa: FA100
pdfs: List[Path] = [] # noqa: FA100
i = 0
actual_i = 0
num_remaining = limit
Expand Down Expand Up @@ -247,7 +247,7 @@ def iterate(
if no_pdf or is_duplicate:
continue
pdf = cast(Path, pdf)
title = item["data"]["title"] if "title" in item["data"] else ""
title = item["data"].get("title", "")
if len(items) >= start:
yield ZoteroPaper(
key=_get_citation_key(item),
Expand Down Expand Up @@ -277,12 +277,12 @@ def _get_collection_id(self, collection_name: str) -> str:
"""Get the collection id for a given collection name
Raises ValueError if collection not found
Args:
collection_name (str): The name of the collection
collection_name (str): The name of the collection.
Returns:
str: collection id
"""
# specfic collection
""" # noqa: D205
# specific collection
collections = self.collections()
collection_id = ""

Expand Down Expand Up @@ -326,9 +326,8 @@ def _get_citation_key(item: dict) -> str:
return f"{last_name}_{short_title}_{date}_{item['key']}".replace(" ", "")


def _extract_pdf_key(item: dict) -> Union[str, None]:
def _extract_pdf_key(item: dict) -> Union[str, None]: # noqa: FA100
"""Extract the PDF key from a Zotero item."""

if "links" not in item:
return None

Expand All @@ -337,7 +336,7 @@ def _extract_pdf_key(item: dict) -> Union[str, None]:

attachments = item["links"]["attachment"]

if type(attachments) != dict:
if type(attachments) != dict: # noqa: E721
# Find first attachment with attachmentType == application/pdf:
for attachment in attachments:
# TODO: This assumes there's only one PDF attachment.
Expand Down
Loading

0 comments on commit 7b28a34

Please sign in to comment.