Skip to content

Commit

Permalink
Bump hdxms datasets (#333)
Browse files Browse the repository at this point in the history
* update install docs

* comment

* bump hdxms-datasets version

* bump pinned requirements

* chore: fmt black
  • Loading branch information
Jhsmit authored Aug 25, 2023
1 parent 880227b commit 11aaf17
Show file tree
Hide file tree
Showing 18 changed files with 138 additions and 95 deletions.
16 changes: 10 additions & 6 deletions docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ This will start a Dask cluster on the scheduler address as specified in the PyHD
## Install from source


Create a new conda environment:
Create a new conda environment in a local `.venv` folder:

```bash
conda create --name py39_pyhdx python=3.9
conda activate py39_pyhdx
conda create --prefix ./.venv python=3.9
conda activate ./.venv
```

Clone the GitHub repository:
Expand All @@ -75,13 +75,17 @@ git clone https://github.com/Jhsmit/PyHDX
cd PyHDX
```

Dependencies can then be installed with `poetry`
You can install the dependencies from the pinned requirement files for your OS with pip:

```bash
$ poetry install --all-extras
$ pip install -r requirements/requirements-<operating-system>-3.9.txt
```

Use `--all-extras` if you plan to use the web interface.
Then to editable install PyHDX:

```bash
$ pip install -e .
```


### Running from source
Expand Down
9 changes: 6 additions & 3 deletions pyhdx/batch_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ class DataFile(object):
filepath_or_buffer: Union[Path, StringIO]

def __post_init__(self):
warnings.warn("Will be removed in favour of the `hdxms-datasets` package ", DeprecationWarning)
warnings.warn(
"Will be removed in favour of the `hdxms-datasets` package ", DeprecationWarning
)

@cached_property
def data(self) -> pd.DataFrame:
Expand Down Expand Up @@ -62,8 +64,9 @@ def __init__(
# filter_kwargs: Optional[dict[str, Any]] = None,
# correction_kwargs: Optional[dict[str, Any]] = None,
) -> None:

warnings.warn("Will be removed in favour of the `hdxms-datasets` package ", DeprecationWarning)
warnings.warn(
"Will be removed in favour of the `hdxms-datasets` package ", DeprecationWarning
)
self.hdx_spec = hdx_spec
self.data_files: dict[str, DataFile] = {}

Expand Down
6 changes: 6 additions & 0 deletions pyhdx/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from pyhdx.datasets import DataVault

app = typer.Typer()


@app.command()
def serve(
scheduler_address: Optional[str] = typer.Option(None, help="Address for dask scheduler to use"),
Expand Down Expand Up @@ -61,6 +63,8 @@ def serve(


datasets_app = typer.Typer(help="Manage HDX datasets")


@datasets_app.command()
def fetch(num: int = typer.Option(10, min=1, help="Maximum number of datasets to download")):
"""Update the datasets from the PyHDX repository"""
Expand All @@ -72,6 +76,7 @@ def fetch(num: int = typer.Option(10, min=1, help="Maximum number of datasets to
for data_id in tqdm(todo):
vault.fetch_dataset(data_id)


@datasets_app.command()
def clear():
"""Clear the local dataset cache"""
Expand All @@ -81,6 +86,7 @@ def clear():

app.add_typer(datasets_app, name="datasets")


@app.callback()
def callback():
pass
Expand Down
1 change: 1 addition & 0 deletions pyhdx/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def context(self, settings: dict) -> Generator[PyHDXConfig, None, None]:
finally:
cfg.conf = original_config


def valid_config() -> bool:
"""Checks if the current config file in the user home directory is a valid config
file for the current pyhdx version.
Expand Down
2 changes: 1 addition & 1 deletion pyhdx/datasets.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from hdxms_datasets import *
from hdxms_datasets import *
4 changes: 3 additions & 1 deletion pyhdx/fileIO.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ def read_header(file_obj: Union[TextIO, BinaryIO], comment: str = "#") -> List[s
return header


def parse_header(filepath_or_buffer: Union[Path[str], str, StringIO, BytesIO], comment: str = "#") -> dict:
def parse_header(
filepath_or_buffer: Union[Path[str], str, StringIO, BytesIO], comment: str = "#"
) -> dict:
"""
Reads the header from a file and returns JSON metadata from header lines marked as comment.
Expand Down
2 changes: 1 addition & 1 deletion pyhdx/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,6 @@ def linear_bars(
sort=False,
**figure_kwargs,
):

# input data should always be 3 levels
# grouping is done by the first level
# second level gives each bar
Expand Down Expand Up @@ -1142,6 +1141,7 @@ def get(self, item, default=None):
return default


# should be a frozen dataclas
CMAP_NORM_DEFAULTS = ColorTransforms()


Expand Down
4 changes: 3 additions & 1 deletion pyhdx/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,9 @@ def filter_peptides(
"""

warnings.warn("`filter_peptides` will be moved to the `hdxms-datasets` package", DeprecationWarning)
warnings.warn(
"`filter_peptides` will be moved to the `hdxms-datasets` package", DeprecationWarning
)
if state:
df = df[df["state"] == state]

Expand Down
1 change: 1 addition & 0 deletions pyhdx/support.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,7 @@ def array_intersection(arrays: Iterable[np.ndarray], fields: Iterable[str]) -> l

return selected


# https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-subobjects-chained-properties
def rsetattr(obj, attr, val):
pre, _, post = attr.rpartition(".")
Expand Down
43 changes: 24 additions & 19 deletions pyhdx/web/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@
from pyhdx.datasets import HDXDataSet, DataVault, DataFile
from pyhdx.fitting_torch import TorchFitResultSet
from pyhdx.models import (
PeptideUptakeModel, HDXMeasurement,
PeptideUptakeModel,
HDXMeasurement,
)
from pyhdx.plot import (
dG_scatter_figure,
Expand Down Expand Up @@ -293,7 +294,9 @@ class PeptideFileInputControl(PyHDXControlPanel):

batch_file = param.Parameter(doc="Batch file input:")

dataset_id = param.Selector(label="Dataset ID", doc="Dataset ID to load from hdxms-datasets database")
dataset_id = param.Selector(
label="Dataset ID", doc="Dataset ID to load from hdxms-datasets database"
)

nd_control = param.Boolean(
default=False, precedence=-1, doc="Whether to allow users to input a ND control"
Expand Down Expand Up @@ -410,7 +413,7 @@ def __init__(self, parent, **params):
todo = list(missing_datasets)[:num]
for data_id in tqdm(todo):
self.data_vault.fetch_dataset(data_id)
self.param['dataset_id'].objects = self.data_vault.datasets
self.param["dataset_id"].objects = self.data_vault.datasets
if self.data_vault.datasets:
self.dataset_id = self.data_vault.datasets[0]

Expand Down Expand Up @@ -512,19 +515,17 @@ def _update_mode(self):
"measurement_name",
"download_spec_button",
},
"Batch": {
"input_files_label",
"input_files",
"batch_file",
"batch_file_label"
},
"Batch": {"input_files_label", "input_files", "batch_file", "batch_file_label"},
"Database": {
"dataset_id",
}
},
}

#widget_dict.pop(self.input_mode)
excluded = set.union(*(v for k, v in widget_dict.items() if k != self.input_mode)) - widget_dict[self.input_mode]
# widget_dict.pop(self.input_mode)
excluded = (
set.union(*(v for k, v in widget_dict.items() if k != self.input_mode))
- widget_dict[self.input_mode]
)
#
#
# if self.input_mode == "Manual":
Expand Down Expand Up @@ -814,7 +815,9 @@ def _action_load_datasets(self) -> None:
"""Load all specified HDX measurements"""
if self.input_mode == "Manual":
data_src = self.data_file_history
dataset = HDXDataSet(data_id=uuid.uuid4().hex, data_files=data_src, hdx_spec=self.hdx_spec)
dataset = HDXDataSet(
data_id=uuid.uuid4().hex, data_files=data_src, hdx_spec=self.hdx_spec
)
elif self.input_mode == "Batch":
if self.hdxm_list:
self.parent.logger.info("Cannot add data in batch after manually inputting data")
Expand All @@ -831,7 +834,9 @@ def _action_load_datasets(self) -> None:
self.state_spec = hdx_spec["states"]
self.data_spec = hdx_spec["data_files"]

dataset = HDXDataSet(data_id=uuid.uuid4().hex, data_files=data_src, hdx_spec=self.hdx_spec)
dataset = HDXDataSet(
data_id=uuid.uuid4().hex, data_files=data_src, hdx_spec=self.hdx_spec
)
self.param["hdxm_list"].objects = dataset.states
elif self.input_mode == "Database":
if self.dataset_id is None:
Expand All @@ -841,18 +846,18 @@ def _action_load_datasets(self) -> None:
self.parent.logger.info(f"Loaded dataset {dataset.data_id} from hdxms database")

try:
authors = ", ".join([author['name'] for author in dataset.metadata['authors']])
authors = ", ".join([author["name"] for author in dataset.metadata["authors"]])
self.parent.logger.info(f"Author(s): {authors}")
except KeyError:
pass

publications = dataset.metadata.get('publications', [])
publications = dataset.metadata.get("publications", [])
if publications:
for pub in publications:
pub_str = pub['title']
if 'DOI' in pub:
pub_str = pub["title"]
if "DOI" in pub:
pub_str += f' ([{pub["DOI"]}](https://doi.org/{pub["DOI"]}))'
elif 'URL' in pub:
elif "URL" in pub:
pub_str += f' ([URL]({pub["URL"]}))'
self.parent.logger.info("Publication: " + pub_str)
else:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ dependencies = [
"param",
"pyyaml",
"omegaconf",
"hdxms-datasets>=0.1.2",
"hdxms-datasets>=0.1.3",
]
dynamic = ["version"]

Expand Down
26 changes: 14 additions & 12 deletions requirements/requirements-macOS-latest-3.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ certifi==2023.7.22
# via requests
charset-normalizer==3.2.0
# via requests
click==8.1.6
click==8.1.7
# via
# dask
# distributed
Expand All @@ -33,21 +33,21 @@ colorcet==3.0.1
# pyhdx (pyproject.toml)
cycler==0.11.0
# via matplotlib
dask==2023.8.0
dask==2023.8.1
# via
# distributed
# pyhdx (pyproject.toml)
distributed==2023.8.0
distributed==2023.8.1
# via pyhdx (pyproject.toml)
filelock==3.12.2
# via torch
fsspec==2023.6.0
# via dask
hdxms-datasets==0.1.2
hdxms-datasets==0.1.3
# via pyhdx (pyproject.toml)
hdxrate==0.2.0
# via pyhdx (pyproject.toml)
holoviews==1.17.0
holoviews==1.17.1
# via
# hvplot
# pyhdx (pyproject.toml)
Expand All @@ -66,7 +66,7 @@ jinja2==3.1.2
# bokeh
# distributed
# torch
kiwisolver==1.4.4
kiwisolver==1.4.5
# via matplotlib
lazy-loader==0.3
# via scikit-image
Expand Down Expand Up @@ -110,7 +110,7 @@ omegaconf==2.3.0
# via
# hdxms-datasets
# pyhdx (pyproject.toml)
packaging==22.0
packaging==23.1
# via
# bokeh
# dask
Expand All @@ -120,7 +120,7 @@ packaging==22.0
# hvplot
# pyhdx (pyproject.toml)
# scikit-image
pandas==1.5.3
pandas==2.0.3
# via
# hdxms-datasets
# holoviews
Expand Down Expand Up @@ -163,7 +163,7 @@ python-dateutil==2.8.2
# pandas
pytz==2023.3
# via pandas
pyviz-comms==2.3.2
pyviz-comms==3.0.0
# via
# holoviews
# panel
Expand All @@ -183,7 +183,7 @@ requests==2.31.0
# panel
scikit-image==0.21.0
# via pyhdx (pyproject.toml)
scipy==1.11.1
scipy==1.11.2
# via
# pyhdx (pyproject.toml)
# scikit-image
Expand All @@ -202,7 +202,7 @@ sympy==1.12
# torch
tblib==2.0.0
# via distributed
tifffile==2023.7.18
tifffile==2023.8.12
# via scikit-image
toolz==0.12.0
# via
Expand All @@ -217,7 +217,7 @@ tornado==6.3.3
# via
# bokeh
# distributed
tqdm==4.65.0
tqdm==4.66.1
# via
# panel
# pyhdx (pyproject.toml)
Expand All @@ -229,6 +229,8 @@ typing-extensions==4.7.1
# panel
# torch
# typer
tzdata==2023.3
# via pandas
urllib3==2.0.4
# via
# distributed
Expand Down
Loading

0 comments on commit 11aaf17

Please sign in to comment.