Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release 0.40.3 #489

Merged
merged 11 commits into from
Jul 17, 2024
7 changes: 4 additions & 3 deletions peppy/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@
SAMPLE_TABLE_INDEX_KEY = "sample_table_index"
SUBSAMPLE_TABLE_INDEX_KEY = "subsample_table_index"
CONFIG_KEY = "_config"
ORIGINAL_CONFIG_KEY = "_original_config"
PROJECT_TYPENAME = "Project"
SAMPLE_MODS_KEY = "sample_modifiers"
PROJ_MODS_KEY = "project_modifiers"
NAME_TABLE_ATTR = "sample_table"
CONSTANT_KEY = "append"
APPEND_KEY = "append"
REMOVE_KEY = "remove"
DUPLICATED_KEY = "duplicate"
DERIVED_KEY = "derive"
Expand All @@ -33,11 +34,11 @@
AMENDMENTS_KEY = "amend"
ACTIVE_AMENDMENTS_KEY = "_" + AMENDMENTS_KEY
SAMPLE_EDIT_FLAG_KEY = "_samples_touched"
SAMPLE_MODIFIERS = [CONSTANT_KEY, IMPLIED_KEY, DERIVED_KEY, DUPLICATED_KEY, REMOVE_KEY]
SAMPLE_MODIFIERS = [APPEND_KEY, IMPLIED_KEY, DERIVED_KEY, DUPLICATED_KEY, REMOVE_KEY]
PROJECT_MODIFIERS = [CFG_IMPORTS_KEY, AMENDMENTS_KEY]
PROJECT_CONSTANTS = [
"REQUIRED_VERSION",
"CONSTANT_KEY",
"APPEND_KEY",
"DERIVED_SOURCES_KEY",
"DERIVED_KEY",
"SAMPLE_MODS_KEY",
Expand Down
25 changes: 15 additions & 10 deletions peppy/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from rich.console import Console
from rich.progress import track
from ubiquerg import is_url
from copy import deepcopy

from .const import (
ACTIVE_AMENDMENTS_KEY,
Expand All @@ -27,7 +28,7 @@
CONFIG_FILE_KEY,
CONFIG_KEY,
CONFIG_VERSION_KEY,
CONSTANT_KEY,
APPEND_KEY,
DERIVED_ATTRS_KEY,
DERIVED_KEY,
DERIVED_SOURCES_KEY,
Expand Down Expand Up @@ -59,6 +60,7 @@
SUBSAMPLE_RAW_LIST_KEY,
SUBSAMPLE_TABLE_INDEX_KEY,
SUBSAMPLE_TABLES_FILE_KEY,
ORIGINAL_CONFIG_KEY,
)
from .exceptions import (
InvalidSampleTableFileException,
Expand Down Expand Up @@ -137,7 +139,6 @@ def __init__(

self._samples = []
self[SAMPLE_EDIT_FLAG_KEY] = False
self.is_private = False
self.progressbar = False

# table indexes can be specified in config or passed to the object constructor
Expand Down Expand Up @@ -317,13 +318,13 @@ def to_dict(
else:
sub_df = None
try:
self[CONFIG_KEY][NAME_KEY] = self.name
self[ORIGINAL_CONFIG_KEY][NAME_KEY] = self.name
except NotImplementedError:
self[CONFIG_KEY][NAME_KEY] = "unnamed"
self[CONFIG_KEY][DESC_KEY] = self.description
self[ORIGINAL_CONFIG_KEY][NAME_KEY] = "unnamed"
self[ORIGINAL_CONFIG_KEY][DESC_KEY] = self.description
p_dict = {
SAMPLE_RAW_DICT_KEY: self[SAMPLE_DF_KEY].to_dict(orient=orient),
CONFIG_KEY: dict(self[CONFIG_KEY]),
CONFIG_KEY: dict(self[ORIGINAL_CONFIG_KEY]),
SUBSAMPLE_RAW_LIST_KEY: sub_df,
}
else:
Expand Down Expand Up @@ -438,6 +439,7 @@ def parse_config_file(
)

self[CONFIG_KEY].update(**config)
self[ORIGINAL_CONFIG_KEY] = deepcopy(self[CONFIG_KEY])
# Parse yaml into the project.config attributes
_LOGGER.debug("Adding attributes: {}".format(", ".join(config)))
# Overwrite any config entries with entries in the amendments
Expand Down Expand Up @@ -587,8 +589,8 @@ def attr_constants(self):
Update each Sample with constants declared by a Project.
If Project does not declare constants, no update occurs.
"""
if self._modifier_exists(CONSTANT_KEY):
to_append = self[CONFIG_KEY][SAMPLE_MODS_KEY][CONSTANT_KEY]
if self._modifier_exists(APPEND_KEY):
to_append = self[CONFIG_KEY][SAMPLE_MODS_KEY][APPEND_KEY]
_LOGGER.debug("Applying constant attributes: {}".format(to_append))

for s in track(
Expand Down Expand Up @@ -1309,14 +1311,17 @@ def _read_sample_data(self):
_LOGGER.info("No config key in Project, or reading project from dict")
return
if CFG_SAMPLE_TABLE_KEY not in self[CONFIG_KEY]:
_LOGGER.debug("no {} found".format(CFG_SAMPLE_TABLE_KEY))
_LOGGER.debug(f"No {CFG_SAMPLE_TABLE_KEY} found in config file")
return
st = self[CONFIG_KEY][CFG_SAMPLE_TABLE_KEY]

if self[SUBSAMPLE_TABLES_FILE_KEY] is not None:
sst = self[SUBSAMPLE_TABLES_FILE_KEY]
else:
if CONFIG_KEY in self and CFG_SUBSAMPLE_TABLE_KEY in self[CONFIG_KEY]:
if (
CONFIG_KEY in self
and self[CONFIG_KEY].get(CFG_SUBSAMPLE_TABLE_KEY) is not None
):
sst = make_list(self[CONFIG_KEY][CFG_SUBSAMPLE_TABLE_KEY], str)
else:
sst = None
Expand Down
45 changes: 27 additions & 18 deletions peppy/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from copy import copy as cp
from logging import getLogger
from string import Formatter
from typing import Optional, Union

import pandas as pd
import yaml
Expand Down Expand Up @@ -134,31 +135,39 @@ def _obj2dict(obj, name=None):
serial.update({"prj": grab_project_data(self[PRJ_REF])})
return serial

def to_yaml(self, path, add_prj_ref=False):
def to_yaml(
self, path: Optional[str] = None, add_prj_ref=False
) -> Union[str, None]:
"""
Serializes itself in YAML format.
Serializes itself in YAML format. Writes to file if path is provided, else returns string representation.

:param str path: A file path to write yaml to; provide this or
the subs_folder_path
the subs_folder_path, defaults to None
:param bool add_prj_ref: whether the project reference bound do the
Sample object should be included in the YAML representation
:return str | None: returns string representation of sample yaml or None
"""
serial = self.to_dict(add_prj_ref=add_prj_ref)
path = os.path.expandvars(path)
if not os.path.exists(os.path.dirname(path)):
_LOGGER.warning(
"Could not write sample data to: {}. "
"Directory does not exist".format(path)
)
return
with open(path, "w") as outfile:
try:
yaml_data = yaml.safe_dump(serial, default_flow_style=False)
except yaml.representer.RepresenterError:
_LOGGER.error("Serialized sample data: {}".format(serial))
raise
outfile.write(yaml_data)
_LOGGER.debug("Sample data written to: {}".format(path))
if path:
path = os.path.expandvars(path)
if os.path.exists(os.path.dirname(path)):
with open(path, "w") as outfile:
try:
yaml_data = yaml.safe_dump(serial, default_flow_style=False)
except yaml.representer.RepresenterError:
_LOGGER.error("Serialized sample data: {}".format(serial))
raise
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this do?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

saving config to yaml file

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It saves the Sample to yaml

outfile.write(yaml_data)
_LOGGER.debug("Sample data written to: {}".format(path))
else:
_LOGGER.warning(
"Could not write sample data to: {}. "
"Directory does not exist".format(path)
)
return
else:
yaml_data = yaml.safe_dump(serial, stream=None, default_flow_style=False)
return yaml_data

def derive_attribute(self, data_sources, attr_name):
"""
Expand Down
3 changes: 3 additions & 0 deletions peppy/simple_attr_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def __getattr__(self, item):
except KeyError:
raise AttributeError(f"Attribute not found: {item}")

def __eq__(self, other: "SimpleAttMap"):
return self._mapped_attr == other._mapped_attr
Comment on lines +49 to +50
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests for these? Is the mapped_attr just a dictionary?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I added tests. it is just dict:)


@property
def attributes(self):
return self._mapped_attr
6 changes: 4 additions & 2 deletions peppy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import os
from typing import Dict, Mapping
from typing import Dict, Mapping, Type, Union
from urllib.request import urlopen

import yaml
Expand Down Expand Up @@ -76,14 +76,16 @@ def grab_project_data(prj):
raise KeyError("Project lacks section '{}'".format(CONFIG_KEY))


def make_list(arg, obj_class):
def make_list(arg: Union[list, str], obj_class: Type) -> list:
"""
Convert an object of predefined class to a list of objects of that class or
ensure a list is a list of objects of that class

:param list[obj] | obj arg: string or a list of strings to listify
:param str obj_class: name of the class of intrest

:return list: list of objects of the predefined class

:raise TypeError: if a faulty argument was provided
"""

Expand Down
5 changes: 0 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,6 @@ def example_pep_cfg_noname_path(request):
return get_path_to_example_file(EPB, "noname", request.param)


@pytest.fixture
def example_pep_cfg_custom_index(request):
return get_path_to_example_file(EPB, "custom_index", request.param)


@pytest.fixture
def example_peps_cfg_paths(request):
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pep_version: "2.0.0"
sample_table: "samplesheet.csv"
subsample_table: "subsamplesheet.csv"
sample_table_index: "sample"
subsample_table_index: "sample"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sample,strandedness
WT_REP1,reverse
WT_REP2,reverse
RAP1_UNINDUCED_REP1,reverse
RAP1_UNINDUCED_REP2,reverse
RAP1_IAA_30M_REP1,reverse
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
sample,instrument_platform,run_accession,fastq_1,fastq_2
WT_REP1,ABI_SOLID,runaccession1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_2.fastq.gz
WT_REP1,BGISEQ,runaccession2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz
WT_REP2,CAPILLARY,123123123,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz
RAP1_UNINDUCED_REP1,COMPLETE_GENOMICS,somerunaccesion,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz,
RAP1_UNINDUCED_REP2,DNBSEQ,ERR2412421,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz,
RAP1_UNINDUCED_REP2,HELICOS,xxxxxxxxxx,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz,
RAP1_IAA_30M_REP1,ILLUMINA,None,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pep_version: "2.0.0"
sample_table: sample_table.csv
subsample_table: null
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
sample_name,protocol,file
frog_1,anySampleType,multi
frog_2,anySampleType,multi
frog_3,anySampleType,multi
31 changes: 31 additions & 0 deletions tests/smoketests/test_Sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ def test_str_repr_correctness(self, example_pep_cfg_path):
assert example_pep_cfg_path in str_repr
assert "Sample '{}'".format(sample["sample_name"]) in str_repr

@pytest.mark.parametrize("example_pep_cfg_path", EXAMPLE_TYPES, indirect=True)
def test_sample_to_yaml_no_path(self, example_pep_cfg_path):
"""
Verify that to_yaml returns representation without requiring a path.
"""
p = Project(cfg=example_pep_cfg_path)
for sample in p.samples:
yaml_repr = sample.to_yaml()
assert "sample_name" in yaml_repr

@pytest.mark.parametrize("example_pep_cfg_path", ["basic"], indirect=True)
def test_sheet_dict_excludes_private_attrs(self, example_pep_cfg_path):
"""
Expand All @@ -77,3 +87,24 @@ def test_pickle_in_samples(self, example_pep_cfg_path):
unpickled_sample = pickle.loads(pickled_data)

assert sample.to_dict() == unpickled_sample.to_dict()

@pytest.mark.parametrize("example_pep_cfg_path", ["basic"], indirect=True)
def test_equals_samples(self, example_pep_cfg_path):
p1 = Project(cfg=example_pep_cfg_path)
p2 = Project(cfg=example_pep_cfg_path)
s1 = p1.samples[0]
s2 = p2.samples[0]

assert s1 == s2

@pytest.mark.parametrize("example_pep_cfg_path", ["basic"], indirect=True)
def test_not_equals_samples(self, example_pep_cfg_path):
p1 = Project(cfg=example_pep_cfg_path)
p2 = Project(cfg=example_pep_cfg_path)
s1 = p1.samples[0]
s2 = p2.samples[0]
s3 = p2.samples[1]

s2.new = "something"
assert not s1 == s2
assert not s1 == s3
26 changes: 21 additions & 5 deletions tests/test_Project.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,14 +330,12 @@ def test_missing_sample_name_custom_index(self, example_pep_cfg_noname_path):
p = Project(cfg=example_pep_cfg_noname_path, sample_table_index="id")
assert p.sample_name_colname == "id"

@pytest.mark.parametrize(
"example_pep_cfg_custom_index", ["project_config.yaml"], indirect=True
)
def test_sample_name_custom_index(self, example_pep_cfg_custom_index):
@pytest.mark.parametrize("example_pep_cfg_path", ["custom_index"], indirect=True)
def test_sample_name_custom_index(self, example_pep_cfg_path):
"""
Verify that sample_name attribute becomes st_index from cfg
"""
p = Project(cfg=example_pep_cfg_custom_index)
p = Project(cfg=example_pep_cfg_path)
assert p.sample_name_colname == "NOT_SAMPLE_NAME"
assert p.samples[0].sample_name == "frog_1"

Expand Down Expand Up @@ -726,3 +724,21 @@ def test_sample_len(self, example_pep_cfg_path):
"""
p = Project(cfg=example_pep_cfg_path)
assert len(p.samples[0]) == 4

@pytest.mark.parametrize("example_pep_cfg_path", ["subsamples_none"], indirect=True)
def test_config_with_subsample_null(self, example_pep_cfg_path):
"""
Tests if config can have value with subsample=null
"""
p = Project(cfg=example_pep_cfg_path)
assert p.subsample_table is None

@pytest.mark.parametrize(
"example_pep_cfg_path", ["nextflow_subsamples"], indirect=True
)
def test_nextflow_subsamples(self, example_pep_cfg_path):
"""
Tests if config can have value with subsample=null
"""
p = Project(cfg=example_pep_cfg_path)
assert isinstance(p, Project)
Loading