Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove config and omegaconf dependency #8

Merged
merged 2 commits into from
Jan 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions examples/load_datasets.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
# %%

from hdxms_datasets import DataVault
from pathlib import Path


# Creating a DataVault without giving a cache path name uses $home/.hdxms_datasets by default
vault = DataVault()
# %%
# create a data vault, specify cache_dir to download datasets to
cache_dir = Path.home() / ".hdxms_datasets"
vault = DataVault(cache_dir=cache_dir)
vault

# %%
# Download a specific HDX dataset
vault.fetch_dataset("20221007_1530_SecA_Krishnamurthy")
vault.datasets

# %%
# Load the dataset
ds = vault.load_dataset("20221007_1530_SecA_Krishnamurthy")

Expand All @@ -24,3 +34,5 @@
# States can also be referenced by their index, used here to load the peptides corresponding to
# the experiment.
peptides = ds.load_peptides(0, "experiment")

# %%
2 changes: 1 addition & 1 deletion examples/load_from_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
hdx_spec = yaml.safe_load((data_pth / data_id / "hdx_spec.yaml").read_text())
metadata = yaml.safe_load((data_pth / data_id / "metadata.yaml").read_text())

#%%
# %%

dataset = HDXDataSet.from_spec(hdx_spec, data_dir=data_pth / data_id, metadata=metadata)

Expand Down
4 changes: 2 additions & 2 deletions examples/load_to_pyhdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
test_pth = Path("../tests").resolve()
data_pth = test_pth / "datasets"

vault = DataVault()
vault = DataVault(cache_dir=data_pth)
ds = vault.load_dataset("20221007_1530_SecB_Krishnamurthy")

# Not implemented yet
# not yet in latests release
hdxm = HDXMeasurement.from_dataset(ds)

print(hdxm)
1 change: 0 additions & 1 deletion hdxms_datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Top-level package for HDXMS Datasets."""

from hdxms_datasets.__version__ import __version__
from hdxms_datasets.config import cfg
from hdxms_datasets.datasets import HDXDataSet, DataFile
from hdxms_datasets.datavault import DataVault
from hdxms_datasets.process import (
Expand Down
131 changes: 0 additions & 131 deletions hdxms_datasets/config.py

This file was deleted.

6 changes: 0 additions & 6 deletions hdxms_datasets/config.yaml

This file was deleted.

10 changes: 5 additions & 5 deletions hdxms_datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import pandas as pd
import yaml

from hdxms_datasets.config import cfg
from hdxms_datasets.process import filter_peptides, convert_temperature, parse_data_files
from hdxms_datasets.reader import read_dynamx

Expand All @@ -24,13 +23,14 @@ class DataFile(object):

filepath_or_buffer: Union[Path, StringIO]

time_conversion: tuple[Literal["h", "min", "s"], Literal["h", "min", "s"]] = ("min", "s")
# from, to time conversion

@cached_property
def data(self) -> pd.DataFrame:
# TODO convert time after reading
if self.format == "DynamX":
# from, to time conversion
time_conversion = (cfg.dynamx.time_unit, cfg.time_unit)

data = read_dynamx(self.filepath_or_buffer, time_conversion=time_conversion)
data = read_dynamx(self.filepath_or_buffer, time_conversion=self.time_conversion)
else:
raise ValueError(f"Invalid format {self.format!r}")

Expand Down
22 changes: 11 additions & 11 deletions hdxms_datasets/datavault.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,22 @@
import requests
import yaml

from hdxms_datasets.config import cfg
from hdxms_datasets.datasets import HDXDataSet


DATABASE_URL = "https://raw.githubusercontent.com/Jhsmit/HDX-MS-datasets/master/datasets/"


class DataVault(object):
def __init__(
self,
cache_dir: Optional[Union[Path[str], str]] = None,
cache_dir: Union[Path, str],
remote_url: str = DATABASE_URL,
):
if cache_dir is None:
self.cache_dir = cfg.database_dir
self.cache_dir.mkdir(exist_ok=True, parents=True)
else:
self.cache_dir: Path = Path(cache_dir)
if not self.cache_dir.exists():
raise FileNotFoundError(f"Cache directory '{self.cache_dir}' does not exist")
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(exist_ok=True, parents=True)

self.remote_url = remote_url

def filter(self, *spec: dict):
# filters list of available datasets
Expand All @@ -35,7 +35,7 @@ def filter(self, *spec: dict):
def remote_index(self) -> list[str]:
"""List of available datasets in the remote database"""

url = urllib.parse.urljoin(cfg.database_url, "index.txt")
url = urllib.parse.urljoin(self.remote_url, "index.txt")
response = requests.get(url)
if response.ok:
index = response.text.split("\n")[1:]
Expand Down Expand Up @@ -90,7 +90,7 @@ def fetch_dataset(self, data_id: str) -> bool:
else:
output_pth.mkdir()

dataset_url = urllib.parse.urljoin(cfg.database_url, data_id + "/")
dataset_url = urllib.parse.urljoin(self.remote_url, data_id + "/")

files = ["hdx_spec.yaml", "metadata.yaml"]
optional_files = ["CITATION.cff"]
Expand Down
6 changes: 3 additions & 3 deletions hdxms_datasets/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

import pandas as pd

from hdxms_datasets.config import cfg

if TYPE_CHECKING:
from hdxms_datasets import DataFile

Expand Down Expand Up @@ -71,6 +69,7 @@ def filter_peptides(
exposure: Optional[dict] = None,
query: Optional[list[str]] = None,
dropna: bool = True,
time_unit: str = "s",
) -> pd.DataFrame:
"""
Convenience function to filter a peptides DataFrame. .
Expand All @@ -82,6 +81,7 @@ def filter_peptides(
exposure value, and "unit" for the time unit.
query: Additional queries to pass to [pandas.DataFrame.query][].
dropna: Drop rows with `NaN` uptake entries.
time_unit: Time unit for exposure column of supplied dataframe.

Examples:
Filter peptides for a specific protein state and exposure time:
Expand All @@ -97,7 +97,7 @@ def filter_peptides(
df = df[df["state"] == state]

if exposure is not None:
t_val = convert_time(exposure, target_unit=cfg.time_unit)
t_val = convert_time(exposure, time_unit) # type: ignore
if isinstance(t_val, list):
df = df[df["exposure"].isin(t_val)]
else:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ classifiers = [
]

dependencies = [
"omegaconf",
"pandas",
"PyYAML",
"requests",
Expand Down Expand Up @@ -73,3 +72,4 @@ line-length = 100

[tool.ruff]
line-length = 100
target-version = "py310"
26 changes: 10 additions & 16 deletions requirements/requirements-macOS-latest-3.10.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,29 @@
#
# pip-compile --output-file=requirements-macOS-latest-3.10.txt pyproject.toml
#
antlr4-python3-runtime==4.9.3
# via omegaconf
certifi==2023.7.22
certifi==2023.11.17
# via requests
charset-normalizer==3.2.0
charset-normalizer==3.3.2
# via requests
idna==3.4
idna==3.6
# via requests
numpy==1.25.2
numpy==1.26.2
# via pandas
omegaconf==2.3.0
packaging==23.2
# via hdxms-datasets (pyproject.toml)
packaging==23.1
# via hdxms-datasets (pyproject.toml)
pandas==2.0.3
pandas==2.1.4
# via hdxms-datasets (pyproject.toml)
python-dateutil==2.8.2
# via pandas
pytz==2023.3
pytz==2023.3.post1
# via pandas
pyyaml==6.0.1
# via
# hdxms-datasets (pyproject.toml)
# omegaconf
# via hdxms-datasets (pyproject.toml)
requests==2.31.0
# via hdxms-datasets (pyproject.toml)
six==1.16.0
# via python-dateutil
tzdata==2023.3
tzdata==2023.4
# via pandas
urllib3==2.0.4
urllib3==2.1.0
# via requests
Loading
Loading