Skip to content

Commit

Permalink
Merge pull request #489 from pepkit/minor_fixes
Browse files Browse the repository at this point in the history
Release 0.40.3
  • Loading branch information
khoroshevskyi authored Jul 17, 2024
2 parents fc15511 + 395f12f commit a1bedea
Show file tree
Hide file tree
Showing 15 changed files with 146 additions and 44 deletions.
2 changes: 1 addition & 1 deletion peppy/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.40.2"
__version__ = "0.40.3"
7 changes: 4 additions & 3 deletions peppy/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@
SAMPLE_TABLE_INDEX_KEY = "sample_table_index"
SUBSAMPLE_TABLE_INDEX_KEY = "subsample_table_index"
CONFIG_KEY = "_config"
ORIGINAL_CONFIG_KEY = "_original_config"
PROJECT_TYPENAME = "Project"
SAMPLE_MODS_KEY = "sample_modifiers"
PROJ_MODS_KEY = "project_modifiers"
NAME_TABLE_ATTR = "sample_table"
CONSTANT_KEY = "append"
APPEND_KEY = "append"
REMOVE_KEY = "remove"
DUPLICATED_KEY = "duplicate"
DERIVED_KEY = "derive"
Expand All @@ -33,11 +34,11 @@
AMENDMENTS_KEY = "amend"
ACTIVE_AMENDMENTS_KEY = "_" + AMENDMENTS_KEY
SAMPLE_EDIT_FLAG_KEY = "_samples_touched"
SAMPLE_MODIFIERS = [CONSTANT_KEY, IMPLIED_KEY, DERIVED_KEY, DUPLICATED_KEY, REMOVE_KEY]
SAMPLE_MODIFIERS = [APPEND_KEY, IMPLIED_KEY, DERIVED_KEY, DUPLICATED_KEY, REMOVE_KEY]
PROJECT_MODIFIERS = [CFG_IMPORTS_KEY, AMENDMENTS_KEY]
PROJECT_CONSTANTS = [
"REQUIRED_VERSION",
"CONSTANT_KEY",
"APPEND_KEY",
"DERIVED_SOURCES_KEY",
"DERIVED_KEY",
"SAMPLE_MODS_KEY",
Expand Down
38 changes: 28 additions & 10 deletions peppy/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from rich.console import Console
from rich.progress import track
from ubiquerg import is_url
from copy import deepcopy

from .const import (
ACTIVE_AMENDMENTS_KEY,
Expand All @@ -27,7 +28,7 @@
CONFIG_FILE_KEY,
CONFIG_KEY,
CONFIG_VERSION_KEY,
CONSTANT_KEY,
APPEND_KEY,
DERIVED_ATTRS_KEY,
DERIVED_KEY,
DERIVED_SOURCES_KEY,
Expand Down Expand Up @@ -59,6 +60,7 @@
SUBSAMPLE_RAW_LIST_KEY,
SUBSAMPLE_TABLE_INDEX_KEY,
SUBSAMPLE_TABLES_FILE_KEY,
ORIGINAL_CONFIG_KEY,
)
from .exceptions import (
InvalidSampleTableFileException,
Expand Down Expand Up @@ -137,7 +139,6 @@ def __init__(

self._samples = []
self[SAMPLE_EDIT_FLAG_KEY] = False
self.is_private = False
self.progressbar = False

# table indexes can be specified in config or passed to the object constructor
Expand Down Expand Up @@ -200,6 +201,19 @@ def from_pandas(
)
return tmp_obj

@classmethod
def from_pephub(cls, registry_path: str) -> "Project":
"""
Init project from pephubclient.
:param registry_path: PEPhub registry path
:return: peppy Project
"""
from pephubclient import PEPHubClient

phc = PEPHubClient()
return phc.load_project(project_registry_path=registry_path)

@classmethod
def from_dict(cls, pep_dictionary: dict):
"""
Expand Down Expand Up @@ -317,13 +331,13 @@ def to_dict(
else:
sub_df = None
try:
self[CONFIG_KEY][NAME_KEY] = self.name
self[ORIGINAL_CONFIG_KEY][NAME_KEY] = self.name
except NotImplementedError:
self[CONFIG_KEY][NAME_KEY] = "unnamed"
self[CONFIG_KEY][DESC_KEY] = self.description
self[ORIGINAL_CONFIG_KEY][NAME_KEY] = "unnamed"
self[ORIGINAL_CONFIG_KEY][DESC_KEY] = self.description
p_dict = {
SAMPLE_RAW_DICT_KEY: self[SAMPLE_DF_KEY].to_dict(orient=orient),
CONFIG_KEY: dict(self[CONFIG_KEY]),
CONFIG_KEY: dict(self[ORIGINAL_CONFIG_KEY]),
SUBSAMPLE_RAW_LIST_KEY: sub_df,
}
else:
Expand Down Expand Up @@ -438,6 +452,7 @@ def parse_config_file(
)

self[CONFIG_KEY].update(**config)
self[ORIGINAL_CONFIG_KEY] = deepcopy(self[CONFIG_KEY])
# Parse yaml into the project.config attributes
_LOGGER.debug("Adding attributes: {}".format(", ".join(config)))
# Overwrite any config entries with entries in the amendments
Expand Down Expand Up @@ -587,8 +602,8 @@ def attr_constants(self):
Update each Sample with constants declared by a Project.
If Project does not declare constants, no update occurs.
"""
if self._modifier_exists(CONSTANT_KEY):
to_append = self[CONFIG_KEY][SAMPLE_MODS_KEY][CONSTANT_KEY]
if self._modifier_exists(APPEND_KEY):
to_append = self[CONFIG_KEY][SAMPLE_MODS_KEY][APPEND_KEY]
_LOGGER.debug("Applying constant attributes: {}".format(to_append))

for s in track(
Expand Down Expand Up @@ -1309,14 +1324,17 @@ def _read_sample_data(self):
_LOGGER.info("No config key in Project, or reading project from dict")
return
if CFG_SAMPLE_TABLE_KEY not in self[CONFIG_KEY]:
_LOGGER.debug("no {} found".format(CFG_SAMPLE_TABLE_KEY))
_LOGGER.debug(f"No {CFG_SAMPLE_TABLE_KEY} found in config file")
return
st = self[CONFIG_KEY][CFG_SAMPLE_TABLE_KEY]

if self[SUBSAMPLE_TABLES_FILE_KEY] is not None:
sst = self[SUBSAMPLE_TABLES_FILE_KEY]
else:
if CONFIG_KEY in self and CFG_SUBSAMPLE_TABLE_KEY in self[CONFIG_KEY]:
if (
CONFIG_KEY in self
and self[CONFIG_KEY].get(CFG_SUBSAMPLE_TABLE_KEY) is not None
):
sst = make_list(self[CONFIG_KEY][CFG_SUBSAMPLE_TABLE_KEY], str)
else:
sst = None
Expand Down
45 changes: 27 additions & 18 deletions peppy/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from copy import copy as cp
from logging import getLogger
from string import Formatter
from typing import Optional, Union

import pandas as pd
import yaml
Expand Down Expand Up @@ -134,31 +135,39 @@ def _obj2dict(obj, name=None):
serial.update({"prj": grab_project_data(self[PRJ_REF])})
return serial

def to_yaml(self, path, add_prj_ref=False):
def to_yaml(
self, path: Optional[str] = None, add_prj_ref=False
) -> Union[str, None]:
"""
Serializes itself in YAML format.
Serializes itself in YAML format. Writes to file if path is provided, else returns string representation.
:param str path: A file path to write yaml to; provide this or
the subs_folder_path
the subs_folder_path, defaults to None
:param bool add_prj_ref: whether the project reference bound do the
Sample object should be included in the YAML representation
:return str | None: returns string representation of sample yaml or None
"""
serial = self.to_dict(add_prj_ref=add_prj_ref)
path = os.path.expandvars(path)
if not os.path.exists(os.path.dirname(path)):
_LOGGER.warning(
"Could not write sample data to: {}. "
"Directory does not exist".format(path)
)
return
with open(path, "w") as outfile:
try:
yaml_data = yaml.safe_dump(serial, default_flow_style=False)
except yaml.representer.RepresenterError:
_LOGGER.error("Serialized sample data: {}".format(serial))
raise
outfile.write(yaml_data)
_LOGGER.debug("Sample data written to: {}".format(path))
if path:
path = os.path.expandvars(path)
if os.path.exists(os.path.dirname(path)):
with open(path, "w") as outfile:
try:
yaml_data = yaml.safe_dump(serial, default_flow_style=False)
except yaml.representer.RepresenterError:
_LOGGER.error("Serialized sample data: {}".format(serial))
raise
outfile.write(yaml_data)
_LOGGER.debug("Sample data written to: {}".format(path))
else:
_LOGGER.warning(
"Could not write sample data to: {}. "
"Directory does not exist".format(path)
)
return
else:
yaml_data = yaml.safe_dump(serial, stream=None, default_flow_style=False)
return yaml_data

def derive_attribute(self, data_sources, attr_name):
"""
Expand Down
3 changes: 3 additions & 0 deletions peppy/simple_attr_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def __getattr__(self, item):
except KeyError:
raise AttributeError(f"Attribute not found: {item}")

def __eq__(self, other: "SimpleAttMap"):
return self._mapped_attr == other._mapped_attr

@property
def attributes(self):
return self._mapped_attr
6 changes: 4 additions & 2 deletions peppy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import os
from typing import Dict, Mapping
from typing import Dict, Mapping, Type, Union
from urllib.request import urlopen

import yaml
Expand Down Expand Up @@ -76,14 +76,16 @@ def grab_project_data(prj):
raise KeyError("Project lacks section '{}'".format(CONFIG_KEY))


def make_list(arg, obj_class):
def make_list(arg: Union[list, str], obj_class: Type) -> list:
"""
Convert an object of predefined class to a list of objects of that class or
ensure a list is a list of objects of that class
:param list[obj] | obj arg: string or a list of strings to listify
:param str obj_class: name of the class of intrest
:return list: list of objects of the predefined class
:raise TypeError: if a faulty argument was provided
"""

Expand Down
1 change: 1 addition & 0 deletions requirements/requirements-all.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ pyyaml
rich>=10.3.0
ubiquerg>=0.6.2
numpy
pephubclient>=0.4.2
5 changes: 0 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,6 @@ def example_pep_cfg_noname_path(request):
return get_path_to_example_file(EPB, "noname", request.param)


@pytest.fixture
def example_pep_cfg_custom_index(request):
return get_path_to_example_file(EPB, "custom_index", request.param)


@pytest.fixture
def example_peps_cfg_paths(request):
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pep_version: "2.0.0"
sample_table: "samplesheet.csv"
subsample_table: "subsamplesheet.csv"
sample_table_index: "sample"
subsample_table_index: "sample"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sample,strandedness
WT_REP1,reverse
WT_REP2,reverse
RAP1_UNINDUCED_REP1,reverse
RAP1_UNINDUCED_REP2,reverse
RAP1_IAA_30M_REP1,reverse
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
sample,instrument_platform,run_accession,fastq_1,fastq_2
WT_REP1,ABI_SOLID,runaccession1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_2.fastq.gz
WT_REP1,BGISEQ,runaccession2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz
WT_REP2,CAPILLARY,123123123,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz
RAP1_UNINDUCED_REP1,COMPLETE_GENOMICS,somerunaccesion,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz,
RAP1_UNINDUCED_REP2,DNBSEQ,ERR2412421,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz,
RAP1_UNINDUCED_REP2,HELICOS,xxxxxxxxxx,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz,
RAP1_IAA_30M_REP1,ILLUMINA,None,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pep_version: "2.0.0"
sample_table: sample_table.csv
subsample_table: null
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
sample_name,protocol,file
frog_1,anySampleType,multi
frog_2,anySampleType,multi
frog_3,anySampleType,multi
31 changes: 31 additions & 0 deletions tests/smoketests/test_Sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ def test_str_repr_correctness(self, example_pep_cfg_path):
assert example_pep_cfg_path in str_repr
assert "Sample '{}'".format(sample["sample_name"]) in str_repr

@pytest.mark.parametrize("example_pep_cfg_path", EXAMPLE_TYPES, indirect=True)
def test_sample_to_yaml_no_path(self, example_pep_cfg_path):
"""
Verify that to_yaml returns representation without requiring a path.
"""
p = Project(cfg=example_pep_cfg_path)
for sample in p.samples:
yaml_repr = sample.to_yaml()
assert "sample_name" in yaml_repr

@pytest.mark.parametrize("example_pep_cfg_path", ["basic"], indirect=True)
def test_sheet_dict_excludes_private_attrs(self, example_pep_cfg_path):
"""
Expand All @@ -77,3 +87,24 @@ def test_pickle_in_samples(self, example_pep_cfg_path):
unpickled_sample = pickle.loads(pickled_data)

assert sample.to_dict() == unpickled_sample.to_dict()

@pytest.mark.parametrize("example_pep_cfg_path", ["basic"], indirect=True)
def test_equals_samples(self, example_pep_cfg_path):
p1 = Project(cfg=example_pep_cfg_path)
p2 = Project(cfg=example_pep_cfg_path)
s1 = p1.samples[0]
s2 = p2.samples[0]

assert s1 == s2

@pytest.mark.parametrize("example_pep_cfg_path", ["basic"], indirect=True)
def test_not_equals_samples(self, example_pep_cfg_path):
p1 = Project(cfg=example_pep_cfg_path)
p2 = Project(cfg=example_pep_cfg_path)
s1 = p1.samples[0]
s2 = p2.samples[0]
s3 = p2.samples[1]

s2.new = "something"
assert not s1 == s2
assert not s1 == s3
26 changes: 21 additions & 5 deletions tests/test_Project.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,14 +330,12 @@ def test_missing_sample_name_custom_index(self, example_pep_cfg_noname_path):
p = Project(cfg=example_pep_cfg_noname_path, sample_table_index="id")
assert p.sample_name_colname == "id"

@pytest.mark.parametrize(
"example_pep_cfg_custom_index", ["project_config.yaml"], indirect=True
)
def test_sample_name_custom_index(self, example_pep_cfg_custom_index):
@pytest.mark.parametrize("example_pep_cfg_path", ["custom_index"], indirect=True)
def test_sample_name_custom_index(self, example_pep_cfg_path):
"""
Verify that sample_name attribute becomes st_index from cfg
"""
p = Project(cfg=example_pep_cfg_custom_index)
p = Project(cfg=example_pep_cfg_path)
assert p.sample_name_colname == "NOT_SAMPLE_NAME"
assert p.samples[0].sample_name == "frog_1"

Expand Down Expand Up @@ -726,3 +724,21 @@ def test_sample_len(self, example_pep_cfg_path):
"""
p = Project(cfg=example_pep_cfg_path)
assert len(p.samples[0]) == 4

@pytest.mark.parametrize("example_pep_cfg_path", ["subsamples_none"], indirect=True)
def test_config_with_subsample_null(self, example_pep_cfg_path):
"""
Tests if config can have value with subsample=null
"""
p = Project(cfg=example_pep_cfg_path)
assert p.subsample_table is None

@pytest.mark.parametrize(
"example_pep_cfg_path", ["nextflow_subsamples"], indirect=True
)
def test_nextflow_subsamples(self, example_pep_cfg_path):
"""
Tests if config can have value with subsample=null
"""
p = Project(cfg=example_pep_cfg_path)
assert isinstance(p, Project)

0 comments on commit a1bedea

Please sign in to comment.