From e266bb03df8d567108c806f5da11a5f3dbecc711 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 24 May 2023 15:36:58 -0400 Subject: [PATCH 01/53] Fixed #447 (Removal of attmap dependency) --- .github/workflows/run-pytest.yml | 4 +- docs/changelog.md | 8 +- docs/hello-world.md | 2 +- peppy/_version.py | 2 +- peppy/parsers.py | 6 +- peppy/project.py | 157 +++++++++++++++++-------------- peppy/sample.py | 91 +++++++++--------- peppy/utils.py | 4 +- setup.py | 8 +- tests/conftest.py | 2 +- tests/smoketests/test_Project.py | 2 +- tests/smoketests/test_Sample.py | 10 +- tests/test_Project.py | 36 ++++--- 13 files changed, 183 insertions(+), 149 deletions(-) diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index 8918af5a..11bc86fe 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -4,14 +4,14 @@ on: push: branches: [dev] pull_request: - branches: [master] + branches: [master, dev] jobs: pytest: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.11"] os: [ubuntu-20.04] steps: diff --git a/docs/changelog.md b/docs/changelog.md index 22ea48b2..8132cc35 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,12 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.35.6] -- 2023-05-25 +### Changed +- Replaced attmap with MutableMapping. (which resulted in the removal of the attribute functionality previously available in attmap) +- Replaced OrderedDict with dict. +- Deprecated support for Python versions older than 2.7. + ## [0.35.5] -- 2023-03-27 ### Fixed - A [bug](https://github.com/pepkit/peppy/issues/435) with custom sample ids @@ -18,8 +24,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [0.35.2] -- 2022-09-13 ### Fixed - - Returning `NaN` value within `to_dict` method was fixed and method now returns `None` instead + ## [0.35.1] -- 2022-09-07 ### Changed - Organization of test files. Separated unittests from smoketests. diff --git a/docs/hello-world.md b/docs/hello-world.md index d6cf503b..b8162e86 100644 --- a/docs/hello-world.md +++ b/docs/hello-world.md @@ -40,7 +40,7 @@ project = peppy.Project("example_basic/project_config.yaml") # instantiate in-me samples = project.samples # grab the list of Sample objects defined in this Project # Find the input file for the first sample in the project -samples[0].file +samples[0]["file"] ``` That's it! You've got `peppy` running on an example project. diff --git a/peppy/_version.py b/peppy/_version.py index 322d9f85..a51f1e97 100644 --- a/peppy/_version.py +++ b/peppy/_version.py @@ -1 +1 @@ -__version__ = "0.35.5" +__version__ = "0.35.6" diff --git a/peppy/parsers.py b/peppy/parsers.py index 7276a6d0..961d6e17 100644 --- a/peppy/parsers.py +++ b/peppy/parsers.py @@ -1,7 +1,9 @@ +import os from typing import Any, Dict, List -from .exceptions import InvalidSampleTableFileException + import pandas as pd -import os + +from .exceptions import InvalidSampleTableFileException class TableParser: diff --git a/peppy/project.py b/peppy/project.py index a98be147..7acb13a9 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -1,23 +1,20 @@ """ Build a Project object. """ -import math -import os, sys -from collections.abc import Mapping +import os +import sys +from collections.abc import Mapping, MutableMapping from contextlib import suppress from logging import getLogger from typing import Dict, Iterable, List, Tuple, Union import numpy as np import pandas as pd -from attmap import PathExAttMap from pandas.core.common import flatten -from rich.progress import track from rich.console import Console +from rich.progress import track from ubiquerg import is_url -from peppy.sample import Sample - from .const import ( ACTIVE_AMENDMENTS_KEY, AMENDMENTS_KEY, @@ -78,7 +75,7 @@ @copy -class Project(PathExAttMap): +class Project(MutableMapping): """ A class to model a Project (collection of samples and metadata). @@ -114,6 +111,7 @@ def __init__( self.__class__.__name__, " from file {}".format(cfg) if cfg else "" ) ) + self._project_data = {} super(Project, self).__init__() is_cfg = is_cfg_or_anno(cfg) if is_cfg is None: @@ -244,11 +242,11 @@ def to_dict(self, expand: bool = False, extended: bool = False) -> dict: SAMPLE_RAW_DICT_KEY: self[SAMPLE_DF_KEY].to_dict(), CONFIG_KEY: dict(self[CONFIG_KEY]), SUBSAMPLE_RAW_DICT_KEY: sub_df, - NAME_KEY: self[NAME_KEY], - DESC_KEY: self[DESC_KEY], + NAME_KEY: self.name, + DESC_KEY: self.description, } else: - p_dict = self.config.to_dict(expand=expand) + p_dict = self.config p_dict["_samples"] = [s.to_dict() for s in self.samples] return p_dict @@ -268,9 +266,13 @@ def _reinit(self): """ Clear all object attributes and initialize again """ - cfg_path = self[CONFIG_FILE_KEY] if CONFIG_FILE_KEY in self else None - for attr in self.keys(): - del self[attr] + if hasattr(self, "config_file"): + cfg_path = getattr(self, "config_file") + else: + cfg_path = None + obj_attributes = self.__dict__.copy().keys() + for attr in obj_attributes: + delattr(self, attr) self.__init__(cfg=cfg_path) def _get_table_from_samples(self, index, initial=False): @@ -286,7 +288,10 @@ def _get_table_from_samples(self, index, initial=False): # (there is no chance of manual sample edits) # and no sample_modifiers section is defined in the config, # then we can simply reuse the previously read anno sheet. - df = self[SAMPLE_DF_KEY] if hasattr(self, SAMPLE_DF_KEY) else pd.DataFrame() + if SAMPLE_DF_KEY in self: + df = self[SAMPLE_DF_KEY] + else: + df = pd.DataFrame() else: df = pd.DataFrame.from_dict([s.to_dict() for s in self.samples]) index = [index] if isinstance(index, str) else index @@ -313,7 +318,7 @@ def parse_config_file( :raises KeyError: if config file lacks required section(s) """ if CONFIG_KEY not in self: - self[CONFIG_KEY] = PathExAttMap() + self[CONFIG_KEY] = {} if not os.path.exists(cfg_path) and not is_url(cfg_path): raise OSError(f"Project config file path does not exist: {cfg_path}") config = load_yaml(cfg_path) @@ -324,7 +329,7 @@ def parse_config_file( config, type(config) ) - _LOGGER.debug("Raw ({}) config data: {}".format(cfg_path, config)) + _LOGGER.debug(f"Raw ({cfg_path}) config data: {config}") self.st_index = ( config[SAMPLE_TABLE_INDEX_KEY] if SAMPLE_TABLE_INDEX_KEY in config else None @@ -355,7 +360,7 @@ def parse_config_file( "External Project configuration does not" " exist: {}".format(i) ) - self[CONFIG_KEY].add_entries(config) + self[CONFIG_KEY].update(**config) # Parse yaml into the project.config attributes _LOGGER.debug("Adding attributes: {}".format(", ".join(config))) # Overwrite any config entries with entries in the amendments @@ -376,7 +381,7 @@ def parse_config_file( amendment, c[PROJ_MODS_KEY][AMENDMENTS_KEY] ) _LOGGER.debug("Updating with: {}".format(amends)) - self[CONFIG_KEY].add_entries(amends) + self[CONFIG_KEY].update(**amends) _LOGGER.info("Using amendments: {}".format(amendment)) else: raise MissingAmendmentError(amendment) @@ -423,13 +428,14 @@ def modify_samples(self): Perform any sample modifications defined in the config. """ if self._modifier_exists(): + # check for unrecognizable modification keys mod_diff = set(self[CONFIG_KEY][SAMPLE_MODS_KEY].keys()) - set( SAMPLE_MODIFIERS ) if len(mod_diff) > 0: _LOGGER.warning( - "Config '{}' section contains unrecognized " - "subsections: {}".format(SAMPLE_MODS_KEY, mod_diff) + f"Config '{SAMPLE_MODS_KEY}' section contains unrecognized " + f"subsections: {mod_diff}" ) self.attr_remove() self.attr_constants() @@ -471,7 +477,7 @@ def _del_if_in(obj, attr): if self._modifier_exists(REMOVE_KEY): to_remove = self[CONFIG_KEY][SAMPLE_MODS_KEY][REMOVE_KEY] - _LOGGER.debug("Removing attributes: {}".format(to_remove)) + _LOGGER.debug(f"Removing attributes: {to_remove}") for s in track( self.samples, description="Removing sample attributes", @@ -506,7 +512,7 @@ def attr_synonyms(self): """ if self._modifier_exists(DUPLICATED_KEY): synonyms = self[CONFIG_KEY][SAMPLE_MODS_KEY][DUPLICATED_KEY] - _LOGGER.debug("Applying synonyms: {}".format(synonyms)) + _LOGGER.debug(f"Applying synonyms: {synonyms}") for sample in track( self.samples, description="Applying synonymous sample attributes", @@ -515,7 +521,7 @@ def attr_synonyms(self): ): for attr, new in synonyms.items(): if attr in sample: - setattr(sample, new, getattr(sample, attr)) + sample[new] = sample[attr] else: _LOGGER.warning( f"The sample attribute to duplicate not found: {attr}" @@ -546,12 +552,12 @@ def _assert_samples_have_names(self): ) if self.st_index != SAMPLE_NAME_ATTR: try: - custom_sample_name = getattr(sample, self.st_index) - except AttributeError: + custom_sample_name = sample[self.st_index] + except KeyError: raise InvalidSampleTableFileException( f"Specified {CFG_SAMPLE_TABLE_KEY} index ({self.st_index}) does not exist" ) - setattr(sample, SAMPLE_NAME_ATTR, custom_sample_name) + sample[SAMPLE_NAME_ATTR] = custom_sample_name _LOGGER.warning( message + f"using specified {CFG_SAMPLE_TABLE_KEY} index ({self.st_index}) instead. " @@ -567,14 +573,15 @@ def _auto_merge_duplicated_names(self): :raises IllegalStateException: if both duplicated samples are detected and subsample_table is specified in the config """ - sample_names_list = [getattr(s, self.st_index) for s in self.samples] + sample_names_list = [s[self.st_index] for s in self.samples] duplicated_sample_ids = self._get_duplicated_sample_ids(sample_names_list) if not duplicated_sample_ids: return _LOGGER.info( - f"Found {len(duplicated_sample_ids)} samples with non-unique names: {duplicated_sample_ids}. Attempting to auto-merge." + f"Found {len(duplicated_sample_ids)} samples with non-unique names: {duplicated_sample_ids}. " + f"Attempting to auto-merge." ) if SUBSAMPLE_DF_KEY in self and self[SUBSAMPLE_DF_KEY] is not None: raise IllegalStateException( @@ -654,7 +661,7 @@ def _get_merged_attributes( for attr in sample_attributes: attribute_values = [] for sample in duplicated_samples: - attribute_value_for_sample = getattr(sample, attr, "") + attribute_value_for_sample = sample.get(attr, "") attribute_values.append(attribute_value_for_sample) merged_attributes[attr] = list(flatten(attribute_values)) @@ -725,12 +732,10 @@ def _select_new_attval(merged_attrs, attname, attval): for attname, attval in rowdata.items(): if attname == sample_colname or not attval: - _LOGGER.debug("Skipping KV: {}={}".format(attname, attval)) + _LOGGER.debug(f"Skipping KV: {attname}={attval}") continue _LOGGER.debug( - "merge: sample '{}'; '{}'='{}'".format( - sample[self.st_index], attname, attval - ) + f"merge: sample '{sample[self.st_index]}'; '{attname}'='{attval}'" ) merged_attrs[attname] = _select_new_attval( merged_attrs, attname, attval @@ -740,7 +745,7 @@ def _select_new_attval(merged_attrs, attname, attval): merged_attrs.pop(sample_colname, None) _LOGGER.debug( - "Updating Sample {}: {}".format(sample[self.st_index], merged_attrs) + f"Updating Sample {sample[self.st_index]}: {merged_attrs}" ) sample.update(merged_attrs) @@ -757,17 +762,13 @@ def attr_imply(self): implications = self[CONFIG_KEY][SAMPLE_MODS_KEY][IMPLIED_KEY] if not isinstance(implications, list): raise InvalidConfigFileException( - "{}.{} has to be a list of key-value pairs".format( - SAMPLE_MODS_KEY, IMPLIED_KEY - ) + f"{SAMPLE_MODS_KEY}.{IMPLIED_KEY} has to be a list of key-value pairs" ) - _LOGGER.debug("Sample attribute implications: {}".format(implications)) + _LOGGER.debug(f"Sample attribute implications: {implications}") for implication in implications: if not all([key in implication for key in IMPLIED_COND_KEYS]): raise InvalidConfigFileException( - "{}.{} section is invalid: {}".format( - SAMPLE_MODS_KEY, IMPLIED_KEY, implication - ) + f"{SAMPLE_MODS_KEY}.{IMPLIED_KEY} section is invalid: {implication}" ) for sample in track( self.samples, @@ -778,24 +779,19 @@ def attr_imply(self): for implication in implications: implier_attrs = list(implication[IMPLIED_IF_KEY].keys()) implied_attrs = list(implication[IMPLIED_THEN_KEY].keys()) - _LOGGER.debug( - "Setting Sample attributes implied by '{}'".format(implier_attrs) - ) + _LOGGER.debug(f"Setting Sample attributes implied by '{implier_attrs}'") for implier_attr in implier_attrs: implier_val = implication[IMPLIED_IF_KEY][implier_attr] if implier_attr not in sample: _LOGGER.debug( - "Sample lacks implier attr ({}), " - "skipping:".format(implier_attr) + f"Sample lacks implier attr ({implier_attr}), skipping:" ) break sample_val = sample[implier_attr] if sample_val not in implier_val: _LOGGER.debug( "Sample attr value does not match any of implier " - "requirements ({} not in {}), skipping".format( - sample_val, implier_val - ) + f"requirements ({sample_val} not in {implier_val}), skipping" ) break else: @@ -803,9 +799,7 @@ def attr_imply(self): for implied_attr in implied_attrs: imp_val = implication[IMPLIED_THEN_KEY][implied_attr] _LOGGER.debug( - "Setting implied attr: '{}={}'".format( - implied_attr, imp_val - ) + f"Setting implied attr: '{implied_attr}={imp_val}'" ) sample.__setitem__(implied_attr, imp_val) @@ -826,29 +820,26 @@ def attr_derive(self, attrs=None): console=Console(file=sys.stderr), ): for attr in derivations: - if not hasattr(sample, attr): - _LOGGER.debug("sample lacks '{}' attribute".format(attr)) + if not attr in sample: + _LOGGER.debug(f"sample lacks '{attr}' attribute") continue elif attr in sample._derived_cols_done: - _LOGGER.debug("'{}' has been derived".format(attr)) + _LOGGER.debug(f"'{attr}' has been derived") continue _LOGGER.debug( - "Deriving '{}' attribute for '{}'".format( - attr, sample[self.st_index] - ) + f"Deriving '{attr}' attribute for '{sample[self.st_index]}'" ) # Set {atr}_key, so the original source can also be retrieved - setattr(sample, ATTR_KEY_PREFIX + attr, getattr(sample, attr)) + sample[ATTR_KEY_PREFIX + attr] = sample[attr] derived_attr = sample.derive_attribute(ds, attr) if derived_attr: _LOGGER.debug("Setting '{}' to '{}'".format(attr, derived_attr)) - setattr(sample, attr, derived_attr) + sample[attr] = derived_attr else: _LOGGER.debug( - "Not setting null/empty value for data source" - " '{}': {}".format(attr, type(derived_attr)) + f"Not setting null/empty value for data source '{attr}': {type(derived_attr)}" ) sample._derived_cols_done.append(attr) @@ -952,14 +943,14 @@ def infer_name(self): """ if CONFIG_KEY not in self: return - if hasattr(self[CONFIG_KEY], "name"): - if " " in self[CONFIG_KEY].name: + if NAME_KEY in self[CONFIG_KEY]: + if " " in self[CONFIG_KEY][NAME_KEY]: raise InvalidConfigFileException( "Specified Project name ({}) contains whitespace".format( - self[CONFIG_KEY].name + self[CONFIG_KEY][NAME_KEY] ) ) - return self[CONFIG_KEY].name.replace(" ", "_") + return self[CONFIG_KEY][NAME_KEY].replace(" ", "_") if not self[CONFIG_FILE_KEY]: raise NotImplementedError( "Project name inference isn't supported " @@ -983,7 +974,7 @@ def get_description(self): """ if CONFIG_KEY not in self: return - if hasattr(self[CONFIG_KEY], DESC_KEY): + if DESC_KEY in self[CONFIG_KEY]: desc_str = str(self[CONFIG_KEY][DESC_KEY]) if not isinstance(desc_str, str): try: @@ -1015,7 +1006,7 @@ def __str__(self): num_samples = 0 if num_samples > 0: msg = f"{msg}\n{num_samples} samples" - sample_names = [getattr(s, self.st_index) for s in self.samples] + sample_names = [s[self.st_index] for s in self.samples] repr_names = sample_names[:MAX_PROJECT_SAMPLES_REPR] context = ( f" (showing first {MAX_PROJECT_SAMPLES_REPR})" @@ -1056,7 +1047,7 @@ def list_amendments(self): :return Iterable[str]: a list of available amendment names """ try: - return self[CONFIG_KEY][PROJ_MODS_KEY][AMENDMENTS_KEY].keys() + return list(self[CONFIG_KEY][PROJ_MODS_KEY][AMENDMENTS_KEY].keys()) except Exception as e: _LOGGER.debug( "Could not retrieve available amendments: {}".format( @@ -1073,7 +1064,7 @@ def config(self): :return Mapping: config. May be formatted to comply with the most recent version specifications """ - return self[CONFIG_KEY] if CONFIG_KEY in self else PathExAttMap() + return self[CONFIG_KEY] if CONFIG_KEY in self else {} @property def config_file(self): @@ -1341,6 +1332,30 @@ def get_samples(self, sample_names): """ return [s for s in self.samples if s[self.st_index] in sample_names] + def __setitem__(self, item, value): + self._project_data[item] = value + + def __getitem__(self, item): + """ + Fetch the value of given key. + + :param hashable item: key for which to fetch value + :return object: value mapped to given key, if available + :raise KeyError: if the requested key is unmapped. + """ + return self._project_data[item] + + def __iter__(self): + return iter(self._project_data) + + def __len__(self): + return len(self._project_data) + + def __delitem__(self, key): + value = self[key] + del self._project_data[key] + self.pop(value, None) + def infer_delimiter(filepath): """ diff --git a/peppy/sample.py b/peppy/sample.py index bd65dc1e..9df3ef7c 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -1,14 +1,13 @@ import glob import os -from collections import OrderedDict -from collections.abc import Mapping +from collections.abc import Mapping, MutableMapping from copy import copy as cp from logging import getLogger from string import Formatter import pandas as pd import yaml -from attmap import AttMap, PathExAttMap +from pandas import Series, isnull from .const import ( CONFIG_FILE_KEY, @@ -31,7 +30,7 @@ def __missing__(self, key): @copy -class Sample(PathExAttMap): +class Sample(MutableMapping): """ Class to model Samples based on a pandas Series. @@ -40,9 +39,10 @@ class Sample(PathExAttMap): def __init__(self, series, prj=None): super(Sample, self).__init__() + self._sample_dict = {} - data = OrderedDict(series) - _LOGGER.debug("Sample data: {}".format(data)) + data = dict(series) + _LOGGER.debug("Sample data: {data}") # Attach Project reference try: @@ -50,12 +50,11 @@ def __init__(self, series, prj=None): except (AttributeError, KeyError): data_proj = None - self.add_entries(data) + self.update(**data) if data_proj and PRJ_REF not in self: self[PRJ_REF] = data_proj - typefam = PathExAttMap if PRJ_REF in self and prj: _LOGGER.warning( "Project data provided both in data and as separate" @@ -68,13 +67,10 @@ def __init__(self, series, prj=None): self[PRJ_REF] = None _LOGGER.debug("No project reference for sample") else: - prefix = "Project reference on a sample must be an instance of {}".format( - typefam.__name__ - ) + prefix = "Project reference on a sample must be an instance of dict" + if not isinstance(self[PRJ_REF], Mapping): - raise TypeError( - prefix + "; got {}".format(type(self[PRJ_REF]).__name__) - ) + raise TypeError(f"{prefix}; got {type(self[PRJ_REF]).__name__}") self._derived_cols_done = [] self._attributes = list(series.keys()) @@ -88,7 +84,7 @@ def get_sheet_dict(self): originally provided via the sample sheet (i.e., the a map-like representation of the instance, excluding derived items) """ - return OrderedDict([[k, getattr(self, k)] for k in self._attributes]) + return dict([[k, self[k]] for k in self._attributes]) def to_dict(self, add_prj_ref=False): """ @@ -108,18 +104,10 @@ def _obj2dict(obj, name=None): :param str name: name of the object to represent. :param Iterable[str] to_skip: names of attributes to ignore. """ - from pandas import Series, isnull - if name: - _LOGGER.log(5, "Converting to dict: {}".format(name)) + _LOGGER.log(5, "Converting to dict: {name}") if isinstance(obj, list): return [_obj2dict(i) for i in obj] - if isinstance(obj, AttMap): - return { - k: _obj2dict(v, name=k) - for k, v in obj.items() - if not k.startswith("_") - } elif isinstance(obj, Mapping): return { k: _obj2dict(v, name=k) @@ -269,7 +257,7 @@ def _glob_regex(patterns): return None sn = self[SAMPLE_NAME_ATTR] if SAMPLE_NAME_ATTR in self else "this sample" try: - source_key = getattr(self, attr_name) + source_key = self[attr_name] except AttributeError: reason = ( "'{attr}': to locate sample's derived attribute source, " @@ -282,31 +270,24 @@ def _glob_regex(patterns): try: regex = data_sources[source_key] - _LOGGER.debug("Data sources: {}".format(data_sources)) + _LOGGER.debug("Data sources: {data_sources}") except KeyError: _LOGGER.debug( - "{}: config lacks entry for {} key: " - "'{}' in column '{}'; known: {}".format( - sn, DERIVED_SOURCES_KEY, source_key, attr_name, data_sources.keys() - ) + f"{sn}: config lacks entry for {DERIVED_SOURCES_KEY} key: " + f"'{source_key}' in column '{attr_name}'; known: {data_sources.keys()}" ) return "" deriv_exc_base = ( - "In sample '{sn}' cannot correctly parse derived " - "attribute source: {r}.".format(sn=sn, r=regex) + f"In sample '{sn}' cannot correctly parse derived " + f"attribute source: {regex}." ) try: vals = _format_regex(regex, dict(self.items())) _LOGGER.debug("Formatted regex: {}".format(vals)) except KeyError as ke: - _LOGGER.warning( - deriv_exc_base + " Can't access {ke} attribute".format(ke=str(ke)) - ) + _LOGGER.warning(f"{deriv_exc_base} Can't access {str(ke)} attribute") except Exception as e: - _LOGGER.warning( - deriv_exc_base - + " Caught exception: {e}".format(e=getattr(e, "message", repr(e))) - ) + _LOGGER.warning(f"{deriv_exc_base} Caught exception: {str(e)}") else: return _glob_regex(vals) return None @@ -320,17 +301,37 @@ def project(self): """ return self[PRJ_REF] - def __setattr__(self, key, value): - self._try_touch_samples() - super(Sample, self).__setattr__(key, value) - def __delattr__(self, item): self._try_touch_samples() super(Sample, self).__delattr__(item) - def __setitem__(self, key, value): + def __setitem__(self, item, value): self._try_touch_samples() - super(Sample, self).__setitem__(key, value) + self._sample_dict[item] = value + + def __getitem__(self, item): + """ + Fetch the value of given key. + + :param hashable item: key for which to fetch value + :return object: value mapped to given key, if available + :raise KeyError: if the requested key is unmapped. + """ + return self._sample_dict[item] + + def __iter__(self): + return iter(self._sample_dict) + + def __len__(self): + return len(self._sample_dict) + + def __delitem__(self, key): + value = self[key] + del self._sample_dict[key] + self.pop(value, None) + + def __contains__(self, key): + return key in list(self.keys()) # The __reduce__ function provides an interface for # correct object serialization with the pickle module. diff --git a/peppy/utils.py b/peppy/utils.py index bc2a1838..123a2a0b 100644 --- a/peppy/utils.py +++ b/peppy/utils.py @@ -8,9 +8,8 @@ import yaml from ubiquerg import expandpath, is_url -from .exceptions import * - from .const import CONFIG_KEY, SAMPLE_TABLE_INDEX_KEY, SUBSAMPLE_TABLE_INDEX_KEY +from .exceptions import * _LOGGER = logging.getLogger(__name__) @@ -75,7 +74,6 @@ def grab_project_data(prj): return prj[CONFIG_KEY].to_dict() except KeyError: raise KeyError("Project lacks section '{}'".format(CONFIG_KEY)) - return data def make_list(arg, obj_class): diff --git a/setup.py b/setup.py index 75528d67..ac84c383 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ -import sys import os +import sys + from setuptools import find_packages, setup PACKAGE_NAME = "peppy" @@ -47,16 +48,15 @@ def get_static(name, condition=None): classifiers=[ "Development Status :: 4 - Beta", "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Bio-Informatics", ], keywords="project, metadata, bioinformatics, sequencing, ngs, workflow", url="https://github.com/pepkit/peppy/", - author="Michal Stolarczyk, Nathan Sheffield, Vince Reuter, Andre Rendeiro", + author="Michal Stolarczyk, Nathan Sheffield, Vince Reuter, Andre Rendeiro, Oleksandr Khoroshevskyi", license="BSD2", include_package_data=True, tests_require=(["pytest"]), diff --git a/tests/conftest.py b/tests/conftest.py index de98c0e3..90e93a94 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,10 @@ """ Configuration for modules with independent tests of models. """ +import json import os import pandas as pd import pytest -import json __author__ = "Michal Stolarczyk" __email__ = "michal.stolarczyk@nih.gov" diff --git a/tests/smoketests/test_Project.py b/tests/smoketests/test_Project.py index eda46102..197ba9b0 100644 --- a/tests/smoketests/test_Project.py +++ b/tests/smoketests/test_Project.py @@ -44,7 +44,7 @@ def test_derive(self, example_pep_cfg_path): """ p = Project(cfg=example_pep_cfg_path) assert all(["file_path" in s for s in p.samples]) - assert all(["file_path" in s["_derived_cols_done"] for s in p.samples]) + assert all(["file_path" in s._derived_cols_done for s in p.samples]) @pytest.mark.parametrize("example_pep_cfg_path", ["remove"], indirect=True) def test_remove(self, example_pep_cfg_path): diff --git a/tests/smoketests/test_Sample.py b/tests/smoketests/test_Sample.py index 58de9a41..e1ba3c12 100644 --- a/tests/smoketests/test_Sample.py +++ b/tests/smoketests/test_Sample.py @@ -27,7 +27,7 @@ ] -class SampleTests: +class TestsSample: @pytest.mark.parametrize("example_pep_cfg_path", ["basic"], indirect=True) def test_serialization(self, example_pep_cfg_path): """ @@ -37,9 +37,9 @@ def test_serialization(self, example_pep_cfg_path): fn = os.path.join(td, "serialized_sample.yaml") p = Project(cfg=example_pep_cfg_path) sample = p.samples[0] - sample.set = set(["set"]) - sample.dict = dict({"dict": "dict"}) - sample.list = list(["list"]) + sample["set"] = set("set") + sample["dict"] = dict({"dict": "dict"}) + sample["list"] = list(["list"]) sample.to_yaml(fn) with open(fn, "r") as f: contents = f.read() @@ -56,7 +56,7 @@ def test_str_repr_correctness(self, example_pep_cfg_path): for sample in p.samples: str_repr = sample.__str__(max_attr=100) assert example_pep_cfg_path in str_repr - assert "Sample '{}'".format(sample.sample_name) in str_repr + assert "Sample '{}'".format(sample["sample_name"]) in str_repr @pytest.mark.parametrize("example_pep_cfg_path", ["basic"], indirect=True) def test_sheet_dict_excludes_private_attrs(self, example_pep_cfg_path): diff --git a/tests/test_Project.py b/tests/test_Project.py index b585e04a..9149a9be 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -65,9 +65,8 @@ def _cmp_all_samples_attr(p1, p2, attr): :param attr: attribute name to compare :type attr: str """ - - assert [getattr(s, attr, None) for s in p1.samples] == [ - getattr(s, attr, None) for s in p2.samples + assert [s1.get(attr, "") for s1 in p1.samples] == [ + s2.get(attr, "") for s2 in p2.samples ] @@ -245,7 +244,7 @@ def test_no_description(self, example_pep_cfg_path, defer): """ p = Project(cfg=example_pep_cfg_path, defer_samples_creation=defer) assert isinstance(p, Project) - assert "description" in p and p.description is None + assert p.description is None @pytest.mark.parametrize("defer", [False, True]) @pytest.mark.parametrize("desc", ["desc1", "desc 2 123$!@#;11", 11, None]) @@ -264,7 +263,7 @@ def test_description(self, example_pep_cfg_path, desc, defer): dump(data, f) p = Project(cfg=temp_path_cfg, defer_samples_creation=defer) assert isinstance(p, Project) - assert "description" in p and p.description == str(desc) + assert p.description == str(desc) @pytest.mark.parametrize( "example_pep_cfg_noname_path", ["project_config.yaml"], indirect=True @@ -390,7 +389,7 @@ def test_peppy_initializes_samples_with_correct_attributes( self, example_pep_cfg_path, expected_attribute ): p = Project(example_pep_cfg_path, sample_table_index="sample") - assert all([hasattr(sample, expected_attribute) for sample in p.samples]) + assert all([expected_attribute in sample for sample in p.samples]) class TestProjectManipulationTests: @@ -543,8 +542,11 @@ def test_equality(self, example_pep_cfg_path): p2 = Project(cfg=example_pep_cfg_path) assert p1 == p2 - @pytest.mark.parametrize("example_pep_cfg_path", ["append"], indirect=True) - @pytest.mark.parametrize("example_pep_csv_path", ["derive"], indirect=True) + @pytest.mark.parametrize( + "example_pep_cfg_path, example_pep_csv_path", + [["append", "derive"]], + indirect=True, + ) def test_unequality(self, example_pep_cfg_path, example_pep_csv_path): """ Test equality function of two projects @@ -553,7 +555,11 @@ def test_unequality(self, example_pep_cfg_path, example_pep_csv_path): p2 = Project(cfg=example_pep_csv_path) assert not p1 == p2 - @pytest.mark.parametrize("example_pep_cfg_path", ["append"], indirect=True) + @pytest.mark.parametrize( + "example_pep_cfg_path", + ["append", "derive", "duplicate", "subtable1"], + indirect=True, + ) def test_from_dict(self, example_pep_cfg_path): """ Test initializing project from dict @@ -564,13 +570,19 @@ def test_from_dict(self, example_pep_cfg_path): p2 = Project().from_dict(p1_dict) assert p1 == p2 - @pytest.mark.parametrize("config_with_pandas_obj", ["append"], indirect=True) - @pytest.mark.parametrize("example_pep_csv_path", ["append"], indirect=True) + @pytest.mark.parametrize( + "config_with_pandas_obj, example_pep_csv_path", + [ + ["append", "append"], + ["derive", "derive"], + ["subtable1", "subtable1"], + ], + indirect=True, + ) def test_from_pandas(self, config_with_pandas_obj, example_pep_csv_path): """ Test initializing project from dict """ p1 = Project().from_pandas(config_with_pandas_obj) p2 = Project(example_pep_csv_path) - assert p1 == p2 From 820619b53cb13d9f8ee5114a279d23b35547b3dd Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 25 May 2023 11:54:25 -0400 Subject: [PATCH 02/53] updated requirements --- requirements/requirements-all.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 9436a3fb..65b60371 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,4 +1,3 @@ -attmap>=0.13.2 pandas>=0.24.2 pyyaml rich>=10.3.0 From 443a69b404d6f5bab160313fbbb14f1942c4f39d Mon Sep 17 00:00:00 2001 From: Nathan Sheffield Date: Thu, 25 May 2023 12:04:24 -0400 Subject: [PATCH 03/53] Update changelog.md --- docs/changelog.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 8132cc35..e4f19b57 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,11 +2,14 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.35.6] -- 2023-05-25 +## [0.40.0] -- 2023-05-25 + +**This version introduced backwards-incompatible changes.** + ### Changed - Replaced attmap with MutableMapping. (which resulted in the removal of the attribute functionality previously available in attmap) - Replaced OrderedDict with dict. -- Deprecated support for Python versions older than 2.7. +- Deprecated support for Python versions <= 3.6. ## [0.35.5] -- 2023-03-27 ### Fixed From a86f8aafacf6c5f0520fc11a695d7a7b2299af12 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 25 May 2023 12:06:00 -0400 Subject: [PATCH 04/53] updated Python version --- .github/workflows/run-pytest.yml | 2 +- docs/changelog.md | 2 +- setup.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index 11bc86fe..35328dff 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -11,7 +11,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.8", "3.11"] + python-version: ["3.7", "3.11"] os: [ubuntu-20.04] steps: diff --git a/docs/changelog.md b/docs/changelog.md index 8132cc35..c402cbdc 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -6,7 +6,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Changed - Replaced attmap with MutableMapping. (which resulted in the removal of the attribute functionality previously available in attmap) - Replaced OrderedDict with dict. -- Deprecated support for Python versions older than 2.7. +- Deprecated support for Python versions older than 3.6. ## [0.35.5] -- 2023-03-27 ### Fixed diff --git a/setup.py b/setup.py index ac84c383..d69a3c05 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ def get_static(name, condition=None): classifiers=[ "Development Status :: 4 - Beta", "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", From b0a503be314d8bbc06f5aed33d5e2dfdac174f01 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 25 May 2023 12:25:37 -0400 Subject: [PATCH 05/53] fixed variables naming and version --- peppy/_version.py | 2 +- peppy/project.py | 4 ++-- tests/smoketests/test_Sample.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/peppy/_version.py b/peppy/_version.py index a51f1e97..da7ed90a 100644 --- a/peppy/_version.py +++ b/peppy/_version.py @@ -1 +1 @@ -__version__ = "0.35.6" +__version__ = "0.40.0" diff --git a/peppy/project.py b/peppy/project.py index 7acb13a9..1e3e3608 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -1332,8 +1332,8 @@ def get_samples(self, sample_names): """ return [s for s in self.samples if s[self.st_index] in sample_names] - def __setitem__(self, item, value): - self._project_data[item] = value + def __setitem__(self, key, value): + self._project_data[key] = value def __getitem__(self, item): """ diff --git a/tests/smoketests/test_Sample.py b/tests/smoketests/test_Sample.py index e1ba3c12..1fdf89f0 100644 --- a/tests/smoketests/test_Sample.py +++ b/tests/smoketests/test_Sample.py @@ -27,7 +27,7 @@ ] -class TestsSample: +class TestSample: @pytest.mark.parametrize("example_pep_cfg_path", ["basic"], indirect=True) def test_serialization(self, example_pep_cfg_path): """ From 13857873fd7f691cab77b60c65f30fa159a9daa2 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 30 May 2023 17:34:19 -0400 Subject: [PATCH 06/53] fixed recursive issue --- peppy/sample.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/peppy/sample.py b/peppy/sample.py index 9df3ef7c..b98f5ba8 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -39,7 +39,8 @@ class Sample(MutableMapping): def __init__(self, series, prj=None): super(Sample, self).__init__() - self._sample_dict = {} + super(Sample, self).__setattr__("_sample_dict", {}) + # self._sample_dict = {} data = dict(series) _LOGGER.debug("Sample data: {data}") @@ -309,6 +310,13 @@ def __setitem__(self, item, value): self._try_touch_samples() self._sample_dict[item] = value + def __getattr__(self, item): + # return super(Sample, self).__getattr__(item) + return self._sample_dict[item] + + def __setattr__(self, item, value): + self._sample_dict[item] = value + def __getitem__(self, item): """ Fetch the value of given key. From 2ac4b02846bd86faf1f0e1c2a78fb9efadbda65c Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 31 May 2023 11:26:54 -0400 Subject: [PATCH 07/53] created simplified attrmap --- peppy/sample.py | 46 +++------------------------------------- peppy/simple_attr_map.py | 44 ++++++++++++++++++++++++++++++++++++++ tests/test_Project.py | 46 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 43 deletions(-) create mode 100644 peppy/simple_attr_map.py diff --git a/peppy/sample.py b/peppy/sample.py index b98f5ba8..92b3ab46 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -1,6 +1,6 @@ import glob import os -from collections.abc import Mapping, MutableMapping +from collections.abc import Mapping from copy import copy as cp from logging import getLogger from string import Formatter @@ -20,6 +20,7 @@ ) from .exceptions import InvalidSampleTableFileException from .utils import copy, grab_project_data +from .simple_attr_map import SimpleAttrMap _LOGGER = getLogger(PKG_NAME) @@ -30,7 +31,7 @@ def __missing__(self, key): @copy -class Sample(MutableMapping): +class Sample(SimpleAttrMap): """ Class to model Samples based on a pandas Series. @@ -39,8 +40,6 @@ class Sample(MutableMapping): def __init__(self, series, prj=None): super(Sample, self).__init__() - super(Sample, self).__setattr__("_sample_dict", {}) - # self._sample_dict = {} data = dict(series) _LOGGER.debug("Sample data: {data}") @@ -302,45 +301,6 @@ def project(self): """ return self[PRJ_REF] - def __delattr__(self, item): - self._try_touch_samples() - super(Sample, self).__delattr__(item) - - def __setitem__(self, item, value): - self._try_touch_samples() - self._sample_dict[item] = value - - def __getattr__(self, item): - # return super(Sample, self).__getattr__(item) - return self._sample_dict[item] - - def __setattr__(self, item, value): - self._sample_dict[item] = value - - def __getitem__(self, item): - """ - Fetch the value of given key. - - :param hashable item: key for which to fetch value - :return object: value mapped to given key, if available - :raise KeyError: if the requested key is unmapped. - """ - return self._sample_dict[item] - - def __iter__(self): - return iter(self._sample_dict) - - def __len__(self): - return len(self._sample_dict) - - def __delitem__(self, key): - value = self[key] - del self._sample_dict[key] - self.pop(value, None) - - def __contains__(self, key): - return key in list(self.keys()) - # The __reduce__ function provides an interface for # correct object serialization with the pickle module. def __reduce__(self): diff --git a/peppy/simple_attr_map.py b/peppy/simple_attr_map.py new file mode 100644 index 00000000..ba9f40bf --- /dev/null +++ b/peppy/simple_attr_map.py @@ -0,0 +1,44 @@ +from collections.abc import MutableMapping + + +class SimpleAttrMap(MutableMapping): + """ + Simplified the AttrMap class, which enables storing key-value pairs in + a dictionary-like structure. + It allows assigning and accessing the values both through attributes and items. + In most cases used as SuperClass. + """ + + def __init__(self): + super(SimpleAttrMap, self).__init__() + super(SimpleAttrMap, self).__setattr__("sample", {}) + + def __delitem__(self, key): + value = self[key] + del self.sample[key] + self.pop(value, None) + + def __setitem__(self, item, value): + self._try_touch_samples() + self.sample[item] = value + + def __getitem__(self, item): + return self.sample[item] + + def __iter__(self): + return iter(self.sample) + + def __len__(self): + return len(self.sample) + + def __contains__(self, key): + return key in list(self.keys()) + + def __delattr__(self, key): + del self[key] + + def __setattr__(self, item, value): + self.sample[item] = value + + def __getattr__(self, item): + return self.sample[item] diff --git a/tests/test_Project.py b/tests/test_Project.py index 9149a9be..5ce2cbf0 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -586,3 +586,49 @@ def test_from_pandas(self, config_with_pandas_obj, example_pep_csv_path): p1 = Project().from_pandas(config_with_pandas_obj) p2 = Project(example_pep_csv_path) assert p1 == p2 + + @pytest.mark.parametrize( + "config_with_pandas_obj, example_pep_csv_path", + [ + ["append", "append"], + ["derive", "derive"], + ["subtable1", "subtable1"], + ], + indirect=True, + ) + def test_from_pandas_unequal(self, config_with_pandas_obj, example_pep_csv_path): + """ + Test initializing project from pandas changing one of the samples + and checking inequality + """ + p1 = Project().from_pandas(config_with_pandas_obj) + + del p1.samples[0].sample_name + p2 = Project(example_pep_csv_path) + assert p1 != p2 + + +class TestSampleAttrMap: + @pytest.mark.parametrize("example_pep_cfg_path", ["append"], indirect=True) + def test_sample_getattr(self, example_pep_cfg_path): + """ + Verify that the getattr works + """ + p = Project(cfg=example_pep_cfg_path) + p1 = Project(cfg=example_pep_cfg_path) + + for s1, s2 in zip(p.samples, p1.samples): + assert s1.sample_name == s1["sample_name"] + assert s2.organism == s2["organism"] + + @pytest.mark.parametrize("example_pep_cfg_path", ["append"], indirect=True) + def test_sample_getattr(self, example_pep_cfg_path): + """ + Verify that the setattr works + """ + p = Project(cfg=example_pep_cfg_path) + new_name = "bingo" + p.samples[0].sample_name = new_name + + df = p.samples[0].to_dict() + assert df["sample_name"] == new_name From 5b16401ff107d4ffe56541836deea173e2abfa16 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 31 May 2023 11:57:22 -0400 Subject: [PATCH 08/53] added test --- peppy/sample.py | 3 +++ tests/test_Project.py | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/peppy/sample.py b/peppy/sample.py index 92b3ab46..98ce4483 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -312,6 +312,9 @@ def __reduce__(self): iter({PRJ_REF: self[PRJ_REF]}.items()), ) + def __len__(self): + return len(self.to_dict()) + def __str__(self, max_attr=10): """Representation in interpreter.""" if len(self) == 0: diff --git a/tests/test_Project.py b/tests/test_Project.py index 5ce2cbf0..6cd35148 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -632,3 +632,11 @@ def test_sample_getattr(self, example_pep_cfg_path): df = p.samples[0].to_dict() assert df["sample_name"] == new_name + + @pytest.mark.parametrize("example_pep_cfg_path", ["append"], indirect=True) + def test_sample_len(self, example_pep_cfg_path): + """ + Verify that the len works + """ + p = Project(cfg=example_pep_cfg_path) + assert len(p.samples[0]) == 4 From 0d97581de1fdff78dfe33deecb26d075d6f81809 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 2 Jun 2023 11:43:26 -0400 Subject: [PATCH 09/53] added yacman --- peppy/project.py | 3 ++- peppy/sample.py | 4 ++-- peppy/simple_attr_map.py | 6 +++--- requirements/requirements-all.txt | 1 + 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/peppy/project.py b/peppy/project.py index 1e3e3608..ce4fc445 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -7,6 +7,7 @@ from contextlib import suppress from logging import getLogger from typing import Dict, Iterable, List, Tuple, Union +from yacman import YAMLConfigManager import numpy as np import pandas as pd @@ -318,7 +319,7 @@ def parse_config_file( :raises KeyError: if config file lacks required section(s) """ if CONFIG_KEY not in self: - self[CONFIG_KEY] = {} + self[CONFIG_KEY] = YAMLConfigManager() if not os.path.exists(cfg_path) and not is_url(cfg_path): raise OSError(f"Project config file path does not exist: {cfg_path}") config = load_yaml(cfg_path) diff --git a/peppy/sample.py b/peppy/sample.py index 98ce4483..c38ae6e4 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -20,7 +20,7 @@ ) from .exceptions import InvalidSampleTableFileException from .utils import copy, grab_project_data -from .simple_attr_map import SimpleAttrMap +from .simple_attr_map import SimpleAttMap _LOGGER = getLogger(PKG_NAME) @@ -31,7 +31,7 @@ def __missing__(self, key): @copy -class Sample(SimpleAttrMap): +class Sample(SimpleAttMap): """ Class to model Samples based on a pandas Series. diff --git a/peppy/simple_attr_map.py b/peppy/simple_attr_map.py index ba9f40bf..007caec9 100644 --- a/peppy/simple_attr_map.py +++ b/peppy/simple_attr_map.py @@ -1,7 +1,7 @@ from collections.abc import MutableMapping -class SimpleAttrMap(MutableMapping): +class SimpleAttMap(MutableMapping): """ Simplified the AttrMap class, which enables storing key-value pairs in a dictionary-like structure. @@ -10,8 +10,8 @@ class SimpleAttrMap(MutableMapping): """ def __init__(self): - super(SimpleAttrMap, self).__init__() - super(SimpleAttrMap, self).__setattr__("sample", {}) + super(SimpleAttMap, self).__init__() + super(SimpleAttMap, self).__setattr__("sample", {}) def __delitem__(self, key): value = self[key] diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 65b60371..737868a9 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -2,3 +2,4 @@ pandas>=0.24.2 pyyaml rich>=10.3.0 ubiquerg>=0.6.2 +yacman>=0.9.0 From c5737a2ae3891c0c91b81adf67e69f11cade3ced Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 13 Jun 2023 17:15:42 -0400 Subject: [PATCH 10/53] Changed attmap naming --- peppy/simple_attr_map.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/peppy/simple_attr_map.py b/peppy/simple_attr_map.py index 007caec9..9da52c2d 100644 --- a/peppy/simple_attr_map.py +++ b/peppy/simple_attr_map.py @@ -11,25 +11,25 @@ class SimpleAttMap(MutableMapping): def __init__(self): super(SimpleAttMap, self).__init__() - super(SimpleAttMap, self).__setattr__("sample", {}) + super(SimpleAttMap, self).__setattr__("_mapped_attr", {}) def __delitem__(self, key): value = self[key] - del self.sample[key] + del self._mapped_attr[key] self.pop(value, None) def __setitem__(self, item, value): self._try_touch_samples() - self.sample[item] = value + self._mapped_attr[item] = value def __getitem__(self, item): - return self.sample[item] + return self._mapped_attr[item] def __iter__(self): - return iter(self.sample) + return iter(self._mapped_attr) def __len__(self): - return len(self.sample) + return len(self._mapped_attr) def __contains__(self, key): return key in list(self.keys()) @@ -38,7 +38,7 @@ def __delattr__(self, key): del self[key] def __setattr__(self, item, value): - self.sample[item] = value + self._mapped_attr[item] = value def __getattr__(self, item): - return self.sample[item] + return self._mapped_attr[item] From 562224e66721711bf96ac3e539e552bd430a850a Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 13 Jun 2023 18:10:43 -0400 Subject: [PATCH 11/53] added repr + description and name setter --- peppy/project.py | 22 +++++++++++++++++++--- peppy/sample.py | 3 +++ tests/test_Project.py | 26 ++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/peppy/project.py b/peppy/project.py index ce4fc445..3532da47 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -155,9 +155,6 @@ def __init__( or [SAMPLE_NAME_ATTR, SUBSAMPLE_NAME_ATTR] ) - self.name = self.infer_name() - self.description = self.get_description() - if not defer_samples_creation: self.create_samples(modify=False if self[SAMPLE_TABLE_FILE_KEY] else True) self._sample_table = self._get_table_from_samples( @@ -1333,6 +1330,22 @@ def get_samples(self, sample_names): """ return [s for s in self.samples if s[self.st_index] in sample_names] + @property + def description(self): + return self.get_description() + + @description.setter + def description(self, value): + self[CONFIG_KEY][DESC_KEY] = str(value) + + @property + def name(self): + return self.infer_name() + + @name.setter + def name(self, value): + self[CONFIG_KEY][NAME_KEY] = str(value) + def __setitem__(self, key, value): self._project_data[key] = value @@ -1357,6 +1370,9 @@ def __delitem__(self, key): del self._project_data[key] self.pop(value, None) + def __repr__(self): + return str(self) + def infer_delimiter(filepath): """ diff --git a/peppy/sample.py b/peppy/sample.py index c38ae6e4..80a47fcf 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -344,6 +344,9 @@ def __str__(self, max_attr=10): counter += 1 return head + "\n" + attrs + def __repr__(self): + return str(self) + def _excl_from_eq(self, k): """Exclude the Project reference from object comparison.""" return k == PRJ_REF or super(Sample, self)._excl_from_eq(k) diff --git a/tests/test_Project.py b/tests/test_Project.py index 6cd35148..c0f149ee 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -607,6 +607,32 @@ def test_from_pandas_unequal(self, config_with_pandas_obj, example_pep_csv_path) p2 = Project(example_pep_csv_path) assert p1 != p2 + @pytest.mark.parametrize( + "example_pep_cfg_path", + ["append"], + indirect=True, + ) + def test_description_setter(self, example_pep_cfg_path): + new_description = "new_description1" + p = Project(cfg=example_pep_cfg_path) + p.description = new_description + + assert p.description == new_description + assert p.to_dict(extended=True)["description"] == new_description + + @pytest.mark.parametrize( + "example_pep_cfg_path", + ["append"], + indirect=True, + ) + def test_name_setter(self, example_pep_cfg_path): + new_name = "new_name1" + p = Project(cfg=example_pep_cfg_path) + p.name = new_name + + assert p.name == new_name + assert p.to_dict(extended=True)["name"] == new_name + class TestSampleAttrMap: @pytest.mark.parametrize("example_pep_cfg_path", ["append"], indirect=True) From 1f228045c52a57231c244062154ea8961b138559 Mon Sep 17 00:00:00 2001 From: nsheff Date: Tue, 13 Jun 2023 19:51:42 -0400 Subject: [PATCH 12/53] fix sample render when a list has a None item --- peppy/sample.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/peppy/sample.py b/peppy/sample.py index 80a47fcf..7d39baf6 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -335,9 +335,13 @@ def __str__(self, max_attr=10): attrs = "" counter = 0 for k, v in pub_attrs.items(): - attrs += "\n{}{}".format( - (k + ":").ljust(maxlen), v if not isinstance(v, list) else ", ".join(v) - ) + key_to_show = (k + ":").ljust(maxlen) + if not isinstance(v, list): + try: + val_to_show = ", ".join(v) + except TypeError: + val_to_show = "None" + attrs += f"\n{key_to_show}{val_to_show}" if counter == max_attr: attrs += "\n\n...".ljust(maxlen) + f"(showing first {max_attr})" break From 1b76c05598e60eb47170ea69b725745660b64868 Mon Sep 17 00:00:00 2001 From: nsheff Date: Tue, 13 Jun 2023 19:55:22 -0400 Subject: [PATCH 13/53] accommodate nones in list with non-nones --- peppy/sample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/peppy/sample.py b/peppy/sample.py index 7d39baf6..9e5e1a84 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -338,7 +338,7 @@ def __str__(self, max_attr=10): key_to_show = (k + ":").ljust(maxlen) if not isinstance(v, list): try: - val_to_show = ", ".join(v) + val_to_show = ", ".join([i for i in v if v is not None]) except TypeError: val_to_show = "None" attrs += f"\n{key_to_show}{val_to_show}" From 98ce55530dd1e67c524f146f3f789543d77649b5 Mon Sep 17 00:00:00 2001 From: nsheff Date: Tue, 13 Jun 2023 21:49:26 -0400 Subject: [PATCH 14/53] fix string repr --- peppy/sample.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/peppy/sample.py b/peppy/sample.py index 9e5e1a84..f8c1c725 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -337,6 +337,8 @@ def __str__(self, max_attr=10): for k, v in pub_attrs.items(): key_to_show = (k + ":").ljust(maxlen) if not isinstance(v, list): + val_to_show = v + else: try: val_to_show = ", ".join([i for i in v if v is not None]) except TypeError: From a0ad8929b51833dbb2afce2a24331a7cd8fbb9ac Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 14 Jun 2023 12:11:33 -0400 Subject: [PATCH 15/53] fixed to_dict method --- peppy/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/peppy/project.py b/peppy/project.py index 3532da47..9c41869c 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -244,7 +244,7 @@ def to_dict(self, expand: bool = False, extended: bool = False) -> dict: DESC_KEY: self.description, } else: - p_dict = self.config + p_dict = self.config.to_dict() p_dict["_samples"] = [s.to_dict() for s in self.samples] return p_dict From 473d3a7388d4a0bb6a113a4b961b4e58ac6ca2f5 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 14 Jun 2023 12:53:54 -0400 Subject: [PATCH 16/53] fixed config issues --- peppy/project.py | 5 ++--- peppy/sample.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/peppy/project.py b/peppy/project.py index 9c41869c..3ab864a9 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -7,7 +7,6 @@ from contextlib import suppress from logging import getLogger from typing import Dict, Iterable, List, Tuple, Union -from yacman import YAMLConfigManager import numpy as np import pandas as pd @@ -244,7 +243,7 @@ def to_dict(self, expand: bool = False, extended: bool = False) -> dict: DESC_KEY: self.description, } else: - p_dict = self.config.to_dict() + p_dict = self.config p_dict["_samples"] = [s.to_dict() for s in self.samples] return p_dict @@ -316,7 +315,7 @@ def parse_config_file( :raises KeyError: if config file lacks required section(s) """ if CONFIG_KEY not in self: - self[CONFIG_KEY] = YAMLConfigManager() + self[CONFIG_KEY] = {} if not os.path.exists(cfg_path) and not is_url(cfg_path): raise OSError(f"Project config file path does not exist: {cfg_path}") config = load_yaml(cfg_path) diff --git a/peppy/sample.py b/peppy/sample.py index f8c1c725..8e3c2767 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -342,7 +342,7 @@ def __str__(self, max_attr=10): try: val_to_show = ", ".join([i for i in v if v is not None]) except TypeError: - val_to_show = "None" + val_to_show = "None" attrs += f"\n{key_to_show}{val_to_show}" if counter == max_attr: attrs += "\n\n...".ljust(maxlen) + f"(showing first {max_attr})" From 03d2b2e71dea218731f59c61f3a9c76bb63931dd Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 16 Jun 2023 10:45:50 -0400 Subject: [PATCH 17/53] fixed init from dict issues --- peppy/project.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/peppy/project.py b/peppy/project.py index 3ab864a9..acf82a40 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -210,10 +210,10 @@ def from_dict(self, pep_dictionary: dict) -> "Project": for sub_a in pep_dictionary[SUBSAMPLE_RAW_DICT_KEY] ] if NAME_KEY in pep_dictionary: - self[NAME_KEY] = pep_dictionary[NAME_KEY] + self[CONFIG_KEY][NAME_KEY] = pep_dictionary[NAME_KEY] if DESC_KEY in pep_dictionary: - self[DESC_KEY] = pep_dictionary[DESC_KEY] + self[CONFIG_KEY][DESC_KEY] = pep_dictionary[DESC_KEY] self.create_samples(modify=False if self[SAMPLE_TABLE_FILE_KEY] else True) self._sample_table = self._get_table_from_samples( From abd41fea0628b7a942cfa0deeb87e934269104e4 Mon Sep 17 00:00:00 2001 From: Maria Knorps Date: Sun, 2 Jul 2023 19:33:39 +0200 Subject: [PATCH 18/53] Add numpy explicitly to requirements. --- requirements/requirements-all.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 9436a3fb..baaa089a 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -3,3 +3,4 @@ pandas>=0.24.2 pyyaml rich>=10.3.0 ubiquerg>=0.6.2 +numpy From d44665abf8b42a5243a1cc2e631bc84825dd6310 Mon Sep 17 00:00:00 2001 From: Oleksandr Date: Fri, 28 Jul 2023 17:33:46 -0400 Subject: [PATCH 19/53] fixed conflicts --- peppy/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/peppy/project.py b/peppy/project.py index 5015db26..c398be67 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -249,7 +249,7 @@ def to_dict( sub_df = None self[CONFIG_KEY][NAME_KEY] = self[NAME_KEY] self[CONFIG_KEY][DESC_KEY] = self[DESC_KEY] - p_dict = + p_dict = { SAMPLE_RAW_DICT_KEY: self[SAMPLE_DF_KEY].to_dict(orient=orient), CONFIG_KEY: dict(self[CONFIG_KEY]), SUBSAMPLE_RAW_LIST_KEY: sub_df, From b7e2ee1e61ce3d1645adb68d06abf848f85fa0ae Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 28 Jul 2023 17:49:00 -0400 Subject: [PATCH 20/53] deprecated Python<=3.7 --- .github/workflows/run-pytest.yml | 2 +- docs/changelog.md | 2 +- setup.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index 35328dff..11bc86fe 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -11,7 +11,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.7", "3.11"] + python-version: ["3.8", "3.11"] os: [ubuntu-20.04] steps: diff --git a/docs/changelog.md b/docs/changelog.md index 32aeb828..bd320c2f 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -9,7 +9,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Changed - Replaced attmap with MutableMapping. (which resulted in the removal of the attribute functionality previously available in attmap) - Replaced OrderedDict with dict. -- Deprecated support for Python versions <= 3.6. +- Deprecated support for Python versions <= 3.7. ## [0.35.7] -- 2023-07-19 ### Fixed diff --git a/setup.py b/setup.py index d69a3c05..ac84c383 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,6 @@ def get_static(name, condition=None): classifiers=[ "Development Status :: 4 - Beta", "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", From 16fd2ef433f838787f9a60024c5003ff16cd11d3 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 28 Jul 2023 18:03:57 -0400 Subject: [PATCH 21/53] fixed incorrect from to dict initialization --- peppy/project.py | 8 ++++---- tests/test_Project.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/peppy/project.py b/peppy/project.py index c398be67..27c5e054 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -210,10 +210,10 @@ def from_dict(self, pep_dictionary: dict) -> "Project": for sub_a in pep_dictionary[SUBSAMPLE_RAW_LIST_KEY] ] if NAME_KEY in self[CONFIG_KEY]: - self[NAME_KEY] = self[CONFIG_KEY][NAME_KEY] + self.name = self[CONFIG_KEY][NAME_KEY] if DESC_KEY in self[CONFIG_KEY]: - self[DESC_KEY] = self[CONFIG_KEY][DESC_KEY] + self.description = self[CONFIG_KEY][DESC_KEY] self._set_indexes(self[CONFIG_KEY]) @@ -247,8 +247,8 @@ def to_dict( ] else: sub_df = None - self[CONFIG_KEY][NAME_KEY] = self[NAME_KEY] - self[CONFIG_KEY][DESC_KEY] = self[DESC_KEY] + self[CONFIG_KEY][NAME_KEY] = self.name + self[CONFIG_KEY][DESC_KEY] = self.description p_dict = { SAMPLE_RAW_DICT_KEY: self[SAMPLE_DF_KEY].to_dict(orient=orient), CONFIG_KEY: dict(self[CONFIG_KEY]), diff --git a/tests/test_Project.py b/tests/test_Project.py index 8cf8f9f4..7a4f33c1 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -619,7 +619,7 @@ def test_description_setter(self, example_pep_cfg_path): p.description = new_description assert p.description == new_description - assert p.to_dict(extended=True)["description"] == new_description + assert p.to_dict(extended=True)["_config"]["description"] == new_description @pytest.mark.parametrize( "example_pep_cfg_path", @@ -632,7 +632,7 @@ def test_name_setter(self, example_pep_cfg_path): p.name = new_name assert p.name == new_name - assert p.to_dict(extended=True)["name"] == new_name + assert p.to_dict(extended=True)["_config"]["name"] == new_name class TestSampleAttrMap: From 76e126f7c9b5442c488d45a372d4dc5bb0276d44 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 2 Aug 2023 22:18:15 -0400 Subject: [PATCH 22/53] changed version to alpha --- docs/changelog.md | 2 +- peppy/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index bd320c2f..6efa0812 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,7 +2,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.40.0] -- 2023-05-25 +## [0.40.0] -- 2023-XX-XX **This version introduced backwards-incompatible changes.** diff --git a/peppy/_version.py b/peppy/_version.py index da7ed90a..738a9340 100644 --- a/peppy/_version.py +++ b/peppy/_version.py @@ -1 +1 @@ -__version__ = "0.40.0" +__version__ = "0.40.0a1" From 4ba9c2d0a2d7ec574ed25e4625d2132b4adf665b Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 2 Aug 2023 22:45:21 -0400 Subject: [PATCH 23/53] added info to changelog --- docs/changelog.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index 6efa0812..9e0b34e1 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -11,6 +11,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Replaced OrderedDict with dict. - Deprecated support for Python versions <= 3.7. +_Due to the changes mentioned above, a few item functionalities may be disabled. For example, the `name` and `description` properties can now be accessed and modified using attribute functionality_ + ## [0.35.7] -- 2023-07-19 ### Fixed - incorrect setting of sample and subsample indexes using from_dict function (#452) From d63346446d25443910d2a46cfde59c8172431f47 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 8 Aug 2023 03:34:44 -0400 Subject: [PATCH 24/53] fixed util function --- peppy/_version.py | 2 +- peppy/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/peppy/_version.py b/peppy/_version.py index 738a9340..f0b083c2 100644 --- a/peppy/_version.py +++ b/peppy/_version.py @@ -1 +1 @@ -__version__ = "0.40.0a1" +__version__ = "0.40.0a2" diff --git a/peppy/utils.py b/peppy/utils.py index 123a2a0b..23da5ea6 100644 --- a/peppy/utils.py +++ b/peppy/utils.py @@ -71,7 +71,7 @@ def grab_project_data(prj): return {} try: - return prj[CONFIG_KEY].to_dict() + return dict(prj[CONFIG_KEY]) except KeyError: raise KeyError("Project lacks section '{}'".format(CONFIG_KEY)) From 2eacf23159264392c5d41fd2d456b0251c048a3d Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 14 Aug 2023 20:37:33 -0400 Subject: [PATCH 25/53] Respond correctly when samples is none --- peppy/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/peppy/project.py b/peppy/project.py index 27c5e054..05db0e45 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -1105,7 +1105,7 @@ def samples(self): :return Iterable[Sample]: Sample instance for each of this Project's samples """ - if self._samples: + if self._samples is not None: return self._samples if SAMPLE_DF_KEY not in self or self[SAMPLE_DF_KEY] is None: _LOGGER.debug("No samples are defined") From e21c547117f45c79f0c5921821d902c18d2f93c0 Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 14 Aug 2023 22:14:31 -0400 Subject: [PATCH 26/53] return none instead of error on samples by default? --- peppy/simple_attr_map.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/peppy/simple_attr_map.py b/peppy/simple_attr_map.py index 9da52c2d..f47b3aee 100644 --- a/peppy/simple_attr_map.py +++ b/peppy/simple_attr_map.py @@ -41,4 +41,7 @@ def __setattr__(self, item, value): self._mapped_attr[item] = value def __getattr__(self, item): - return self._mapped_attr[item] + try: + return self._mapped_attr[item] + except KeyError: + return None From df76b04cf1490efd5ac51ed46d0b63c8c83ce7d9 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 15 Aug 2023 05:37:05 -0400 Subject: [PATCH 27/53] fixed #454 --- peppy/project.py | 3 +++ peppy/sample.py | 3 ++- tests/test_Project.py | 11 +++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/peppy/project.py b/peppy/project.py index 05db0e45..3310c3a1 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -1397,6 +1397,9 @@ def __delitem__(self, key): def __repr__(self): return str(self) + # def __reduce__(self): + # return (self.__class__,) + def infer_delimiter(filepath): """ diff --git a/peppy/sample.py b/peppy/sample.py index 8e3c2767..eac6bc45 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -306,7 +306,8 @@ def project(self): def __reduce__(self): return ( self.__class__, - (self.as_series(),), + (pd.Series(self.to_dict()),), + # (self.as_series(),), (None, {}), iter([]), iter({PRJ_REF: self[PRJ_REF]}.items()), diff --git a/tests/test_Project.py b/tests/test_Project.py index 7a4f33c1..9833c0cb 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -8,7 +8,9 @@ import pytest from pandas import DataFrame from yaml import dump, safe_load +import pickle +import peppy from peppy import Project from peppy.const import SAMPLE_NAME_ATTR, SAMPLE_TABLE_FILE_KEY from peppy.exceptions import ( @@ -391,6 +393,15 @@ def test_peppy_initializes_samples_with_correct_attributes( p = Project(example_pep_cfg_path, sample_table_index="sample") assert all([expected_attribute in sample for sample in p.samples]) + @pytest.mark.parametrize("example_pep_cfg_path", ["basic", "imply"], indirect=True) + def test_correct_pickle(self, example_pep_cfg_path): + proj = Project(example_pep_cfg_path) + + pickled_data = pickle.dumps(proj) + unpickled_project = pickle.loads(pickled_data) + + assert proj == unpickled_project + class TestProjectManipulationTests: @pytest.mark.parametrize("example_pep_cfg_path", ["amendments1"], indirect=True) From 55d142e93896c6700be41b58d8bee1d3135a1036 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 15 Aug 2023 07:03:12 -0400 Subject: [PATCH 28/53] Fixed Sample serialization --- peppy/_version.py | 2 +- peppy/project.py | 3 ++- peppy/sample.py | 7 +++---- tests/smoketests/test_Sample.py | 11 +++++++++++ tests/test_Project.py | 4 +++- 5 files changed, 20 insertions(+), 7 deletions(-) diff --git a/peppy/_version.py b/peppy/_version.py index f0b083c2..da532ef5 100644 --- a/peppy/_version.py +++ b/peppy/_version.py @@ -1 +1 @@ -__version__ = "0.40.0a2" +__version__ = "0.40.0a3" diff --git a/peppy/project.py b/peppy/project.py index 3310c3a1..090d37ed 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -226,7 +226,7 @@ def from_dict(self, pep_dictionary: dict) -> "Project": def to_dict( self, - expand: bool = False, + # expand: bool = False, # expand was used to expand paths. This functionality was removed, because of attmapp extended: bool = False, orient: Literal[ "dict", "list", "series", "split", "tight", "records", "index" @@ -1397,6 +1397,7 @@ def __delitem__(self, key): def __repr__(self): return str(self) + # # pickle now is impossible, because it's impossible to initialize Project class without using actual files # def __reduce__(self): # return (self.__class__,) diff --git a/peppy/sample.py b/peppy/sample.py index eac6bc45..61dcd38d 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -307,10 +307,9 @@ def __reduce__(self): return ( self.__class__, (pd.Series(self.to_dict()),), - # (self.as_series(),), - (None, {}), - iter([]), - iter({PRJ_REF: self[PRJ_REF]}.items()), + # (None, {}), + # iter([]), + # iter({PRJ_REF: self[PRJ_REF]}.items()), ) def __len__(self): diff --git a/tests/smoketests/test_Sample.py b/tests/smoketests/test_Sample.py index 1fdf89f0..a9a933bd 100644 --- a/tests/smoketests/test_Sample.py +++ b/tests/smoketests/test_Sample.py @@ -66,3 +66,14 @@ def test_sheet_dict_excludes_private_attrs(self, example_pep_cfg_path): p = Project(cfg=example_pep_cfg_path) for sample in p.samples: assert len(sample.get_sheet_dict()) == len(p.sample_table.columns) + + @pytest.mark.parametrize("example_pep_cfg_path", ["basic"], indirect=True) + def test_pickle_in_samples(self, example_pep_cfg_path): + import pickle + + p = Project(cfg=example_pep_cfg_path) + for sample in p.samples: + pickled_data = pickle.dumps(sample) + unpickled_sample = pickle.loads(pickled_data) + + assert sample.to_dict() == unpickled_sample.to_dict() diff --git a/tests/test_Project.py b/tests/test_Project.py index 9833c0cb..bf073918 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -393,10 +393,12 @@ def test_peppy_initializes_samples_with_correct_attributes( p = Project(example_pep_cfg_path, sample_table_index="sample") assert all([expected_attribute in sample for sample in p.samples]) + @pytest.mark.skip( + "skipping this test, because this functionality is unavailable now" + ) @pytest.mark.parametrize("example_pep_cfg_path", ["basic", "imply"], indirect=True) def test_correct_pickle(self, example_pep_cfg_path): proj = Project(example_pep_cfg_path) - pickled_data = pickle.dumps(proj) unpickled_project = pickle.loads(pickled_data) From ed1af891a9baac4c16235dc3e800e38ac2ae0efa Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 15 Aug 2023 11:04:40 -0400 Subject: [PATCH 29/53] added __reduce__ function to project --- peppy/project.py | 22 +++++++++++++++++++--- tests/test_Project.py | 6 +++--- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/peppy/project.py b/peppy/project.py index 090d37ed..987b0162 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -88,6 +88,9 @@ class Project(MutableMapping): :param str | Iterable[str] amendments: names of the amendments to activate :param Iterable[str] amendments: amendments to use within configuration file :param bool defer_samples_creation: whether the sample creation should be skipped + :param Dict[Any]: dict representation of the project {_config: str, + _samples: list | dict, + _subsamples: list[list | dict]} :Example: @@ -105,6 +108,7 @@ def __init__( sample_table_index: Union[str, Iterable[str]] = None, subsample_table_index: Union[str, Iterable[str]] = None, defer_samples_creation: bool = False, + from_dict: dict = None, ): _LOGGER.debug( "Creating {}{}".format( @@ -159,6 +163,8 @@ def __init__( self._sample_table = self._get_table_from_samples( index=self.st_index, initial=True ) + if from_dict: + self.from_dict(from_dict) def __eq__(self, other): return [s.to_dict() for s in self.samples] == [ @@ -1397,9 +1403,19 @@ def __delitem__(self, key): def __repr__(self): return str(self) - # # pickle now is impossible, because it's impossible to initialize Project class without using actual files - # def __reduce__(self): - # return (self.__class__,) + # pickle now is impossible, because it's impossible to initialize Project class without using actual files + def __reduce__(self): + return ( + self.__class__, + ( + None, + None, + None, + None, + False, + self.to_dict(extended=True, orient="records"), + ), + ) def infer_delimiter(filepath): diff --git a/tests/test_Project.py b/tests/test_Project.py index bf073918..b4a80242 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -393,9 +393,9 @@ def test_peppy_initializes_samples_with_correct_attributes( p = Project(example_pep_cfg_path, sample_table_index="sample") assert all([expected_attribute in sample for sample in p.samples]) - @pytest.mark.skip( - "skipping this test, because this functionality is unavailable now" - ) + # @pytest.mark.skip( + # "skipping this test, because this functionality is unavailable now" + # ) @pytest.mark.parametrize("example_pep_cfg_path", ["basic", "imply"], indirect=True) def test_correct_pickle(self, example_pep_cfg_path): proj = Project(example_pep_cfg_path) From d92867cee116c9e09c5dd919a9ead66318b21175 Mon Sep 17 00:00:00 2001 From: nsheff Date: Tue, 15 Aug 2023 18:51:27 -0400 Subject: [PATCH 30/53] Don't make up missing attrs, use the right error --- peppy/simple_attr_map.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/peppy/simple_attr_map.py b/peppy/simple_attr_map.py index f47b3aee..75303b2f 100644 --- a/peppy/simple_attr_map.py +++ b/peppy/simple_attr_map.py @@ -44,4 +44,4 @@ def __getattr__(self, item): try: return self._mapped_attr[item] except KeyError: - return None + raise AttributeError(f"Attribute not found: {item}") From 9a2595633bfaa2da0e504b435db44e72633eb118 Mon Sep 17 00:00:00 2001 From: nsheff Date: Tue, 15 Aug 2023 18:53:17 -0400 Subject: [PATCH 31/53] expand env vars for derived attrs ? --- peppy/sample.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/peppy/sample.py b/peppy/sample.py index 61dcd38d..9f92a6ac 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -282,7 +282,8 @@ def _glob_regex(patterns): f"attribute source: {regex}." ) try: - vals = _format_regex(regex, dict(self.items())) + expanded_regex = os.path.expandvars(regex) + vals = _format_regex(expanded_regex, dict(self.items())) _LOGGER.debug("Formatted regex: {}".format(vals)) except KeyError as ke: _LOGGER.warning(f"{deriv_exc_base} Can't access {str(ke)} attribute") From e5046fdbbee1b11b5975a6b80a86f9dcc0dafb3d Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 24 Aug 2023 12:50:57 +0200 Subject: [PATCH 32/53] fixed to_dict modification --- peppy/_version.py | 2 +- peppy/project.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/peppy/_version.py b/peppy/_version.py index da532ef5..4bbef18e 100644 --- a/peppy/_version.py +++ b/peppy/_version.py @@ -1 +1 @@ -__version__ = "0.40.0a3" +__version__ = "0.40.0a4" diff --git a/peppy/project.py b/peppy/project.py index 090d37ed..ccdc7f04 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -255,7 +255,7 @@ def to_dict( SUBSAMPLE_RAW_LIST_KEY: sub_df, } else: - p_dict = self.config + p_dict = self.config.copy() p_dict["_samples"] = [s.to_dict() for s in self.samples] return p_dict From 64d5d57a8f7e4c14795e0da8c598d8adea71768e Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 16 Nov 2023 10:30:48 -0500 Subject: [PATCH 33/53] Added public property in simpleAttrMap --- peppy/_version.py | 2 +- peppy/simple_attr_map.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/peppy/_version.py b/peppy/_version.py index 4bbef18e..3a51a04c 100644 --- a/peppy/_version.py +++ b/peppy/_version.py @@ -1 +1 @@ -__version__ = "0.40.0a4" +__version__ = "0.40.0a5" diff --git a/peppy/simple_attr_map.py b/peppy/simple_attr_map.py index 75303b2f..50b1a089 100644 --- a/peppy/simple_attr_map.py +++ b/peppy/simple_attr_map.py @@ -45,3 +45,7 @@ def __getattr__(self, item): return self._mapped_attr[item] except KeyError: raise AttributeError(f"Attribute not found: {item}") + + @property + def attributes(self): + return self._mapped_attr From dcabab660b38e4fea5f203912882993eae30d42e Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 16 Nov 2023 10:39:27 -0500 Subject: [PATCH 34/53] cleaning --- peppy/project.py | 15 +++++++++------ peppy/utils.py | 2 +- tests/conftest.py | 1 - 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/peppy/project.py b/peppy/project.py index ccdc7f04..727ba9d5 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -6,7 +6,7 @@ from collections.abc import Mapping, MutableMapping from contextlib import suppress from logging import getLogger -from typing import Dict, Iterable, List, Tuple, Union, Literal, NoReturn +from typing import Iterable, List, Tuple, Union, Literal, NoReturn import numpy as np import pandas as pd @@ -58,13 +58,16 @@ SUBSAMPLE_TABLE_INDEX_KEY, SUBSAMPLE_TABLES_FILE_KEY, ) -from .exceptions import * +from .exceptions import ( + InvalidSampleTableFileException, + MissingAmendmentError, + IllegalStateException, + InvalidConfigFileException, +) from .parsers import select_parser from .sample import Sample from .utils import ( copy, - extract_custom_index_for_sample_table, - extract_custom_index_for_subsample_table, is_cfg_or_anno, load_yaml, make_abs_via_cfg, @@ -198,7 +201,7 @@ def from_dict(self, pep_dictionary: dict) -> "Project": of an already processed PEP. :param dict pep_dictionary: in-memory dict representation of pep. """ - _LOGGER.info(f"Processing project from dictionary...") + _LOGGER.info("Processing project from dictionary...") self[SAMPLE_DF_KEY] = pd.DataFrame(pep_dictionary[SAMPLE_RAW_DICT_KEY]) self[CONFIG_KEY] = pep_dictionary[CONFIG_KEY] @@ -842,7 +845,7 @@ def attr_derive(self, attrs=None): console=Console(file=sys.stderr), ): for attr in derivations: - if not attr in sample: + if attr not in sample: _LOGGER.debug(f"sample lacks '{attr}' attribute") continue elif attr in sample._derived_cols_done: diff --git a/peppy/utils.py b/peppy/utils.py index 23da5ea6..2587e2c2 100644 --- a/peppy/utils.py +++ b/peppy/utils.py @@ -9,7 +9,7 @@ from ubiquerg import expandpath, is_url from .const import CONFIG_KEY, SAMPLE_TABLE_INDEX_KEY, SUBSAMPLE_TABLE_INDEX_KEY -from .exceptions import * +from .exceptions import RemoteYAMLError _LOGGER = logging.getLogger(__name__) diff --git a/tests/conftest.py b/tests/conftest.py index 90e93a94..bfaf5212 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,5 @@ """ Configuration for modules with independent tests of models. """ -import json import os import pandas as pd From eb42458b425740b46931030e3bf8401e62f9762f Mon Sep 17 00:00:00 2001 From: Nathan Sheffield Date: Thu, 30 Nov 2023 08:06:40 -0500 Subject: [PATCH 35/53] typo --- docs/changelog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 9e0b34e1..f6a077e7 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -79,7 +79,7 @@ _Due to the changes mentioned above, a few item functionalities may be disabled. ### Changed - Way of initialization project from dictionary. Now it's possible as follows: `Project().from_dict()` -- + ### Fixed - Fix error that was raised when duplicated sample in `sample_table` had different read types (single-end mixed with paired-end). From f0d5b76b33d1f8fad670e11667e00b50f10487cf Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 6 Dec 2023 17:08:22 -0500 Subject: [PATCH 36/53] fixed #443 --- peppy/project.py | 77 ++++++++++++++++++++----------------------- tests/test_Project.py | 6 ++-- 2 files changed, 38 insertions(+), 45 deletions(-) diff --git a/peppy/project.py b/peppy/project.py index e94097bb..4d439178 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -7,6 +7,7 @@ from contextlib import suppress from logging import getLogger from typing import Iterable, List, Tuple, Union, Literal, NoReturn +import yacman import numpy as np import pandas as pd @@ -91,9 +92,6 @@ class Project(MutableMapping): :param str | Iterable[str] amendments: names of the amendments to activate :param Iterable[str] amendments: amendments to use within configuration file :param bool defer_samples_creation: whether the sample creation should be skipped - :param Dict[Any]: dict representation of the project {_config: str, - _samples: list | dict, - _subsamples: list[list | dict]} :Example: @@ -111,7 +109,6 @@ def __init__( sample_table_index: Union[str, Iterable[str]] = None, subsample_table_index: Union[str, Iterable[str]] = None, defer_samples_creation: bool = False, - from_dict: dict = None, ): _LOGGER.debug( "Creating {}{}".format( @@ -166,72 +163,77 @@ def __init__( self._sample_table = self._get_table_from_samples( index=self.st_index, initial=True ) - if from_dict: - self.from_dict(from_dict) def __eq__(self, other): return [s.to_dict() for s in self.samples] == [ s.to_dict() for s in other.samples ] + @classmethod def from_pandas( - self, + cls, samples_df: pd.DataFrame, sub_samples_df: List[pd.DataFrame] = None, config: dict = None, - ) -> "Project": + ): """ Init a peppy project instance from a pandas Dataframe + :param samples_df: in-memory pandas DataFrame object of samples :param sub_samples_df: in-memory list of pandas DataFrame objects of sub-samples :param config: dict of yaml file """ + tmp_obj = cls() if not config: config = {CONFIG_VERSION_KEY: PEP_LATEST_VERSION} - self[SAMPLE_DF_KEY] = samples_df.replace(np.nan, "") - self[SUBSAMPLE_DF_KEY] = sub_samples_df + tmp_obj[SAMPLE_DF_KEY] = samples_df.replace(np.nan, "") + tmp_obj[SUBSAMPLE_DF_KEY] = sub_samples_df - self[SAMPLE_DF_LARGE] = self[SAMPLE_DF_KEY].shape[0] > 1000 + tmp_obj[SAMPLE_DF_LARGE] = tmp_obj[SAMPLE_DF_KEY].shape[0] > 1000 - self[CONFIG_KEY] = config + tmp_obj[CONFIG_KEY] = config - self.create_samples(modify=False if self[SAMPLE_TABLE_FILE_KEY] else True) - self._sample_table = self._get_table_from_samples( - index=self.st_index, initial=True + tmp_obj.create_samples(modify=False if tmp_obj[SAMPLE_TABLE_FILE_KEY] else True) + tmp_obj._sample_table = tmp_obj._get_table_from_samples( + index=tmp_obj.st_index, initial=True ) - return self + return tmp_obj - def from_dict(self, pep_dictionary: dict) -> "Project": + @classmethod + def from_dict(cls, pep_dictionary: dict): """ Init a peppy project instance from a dictionary representation of an already processed PEP. - :param dict pep_dictionary: in-memory dict representation of pep. + + :param Dict[Any] pep_dictionary: dict representation of the project {_config: str, + _samples: list | dict, + _subsamples: list[list | dict]} """ _LOGGER.info("Processing project from dictionary...") - - self[SAMPLE_DF_KEY] = pd.DataFrame(pep_dictionary[SAMPLE_RAW_DICT_KEY]) - self[CONFIG_KEY] = pep_dictionary[CONFIG_KEY] + tmp_obj = cls() + tmp_obj[SAMPLE_DF_KEY] = pd.DataFrame(pep_dictionary[SAMPLE_RAW_DICT_KEY]) + tmp_obj[CONFIG_KEY] = pep_dictionary[CONFIG_KEY] if SUBSAMPLE_RAW_LIST_KEY in pep_dictionary: if pep_dictionary[SUBSAMPLE_RAW_LIST_KEY]: - self[SUBSAMPLE_DF_KEY] = [ + tmp_obj[SUBSAMPLE_DF_KEY] = [ pd.DataFrame(sub_a) for sub_a in pep_dictionary[SUBSAMPLE_RAW_LIST_KEY] ] - if NAME_KEY in self[CONFIG_KEY]: - self.name = self[CONFIG_KEY][NAME_KEY] + if NAME_KEY in tmp_obj[CONFIG_KEY]: + tmp_obj.name = tmp_obj[CONFIG_KEY][NAME_KEY] - if DESC_KEY in self[CONFIG_KEY]: - self.description = self[CONFIG_KEY][DESC_KEY] + if DESC_KEY in tmp_obj[CONFIG_KEY]: + tmp_obj.description = tmp_obj[CONFIG_KEY][DESC_KEY] - self._set_indexes(self[CONFIG_KEY]) + tmp_obj._set_indexes(tmp_obj[CONFIG_KEY]) - self.create_samples(modify=False if self[SAMPLE_TABLE_FILE_KEY] else True) - self._sample_table = self._get_table_from_samples( - index=self.st_index, initial=True + tmp_obj.create_samples(modify=False if tmp_obj[SAMPLE_TABLE_FILE_KEY] else True) + tmp_obj._sample_table = tmp_obj._get_table_from_samples( + index=tmp_obj.st_index, initial=True ) - return self + return tmp_obj def to_dict( self, @@ -244,7 +246,6 @@ def to_dict( """ Convert the Project object to a dictionary. - :param bool expand: whether to expand the paths :param bool extended: whether to produce complete project dict (used to reinit the project) :param Literal orient: orientation of the returned df :return dict: a dictionary representation of the Project object @@ -1406,18 +1407,10 @@ def __delitem__(self, key): def __repr__(self): return str(self) - # pickle now is impossible, because it's impossible to initialize Project class without using actual files def __reduce__(self): return ( - self.__class__, - ( - None, - None, - None, - None, - False, - self.to_dict(extended=True, orient="records"), - ), + self.__class__.from_dict, + (self.to_dict(extended=True, orient="records"),), ) diff --git a/tests/test_Project.py b/tests/test_Project.py index b4a80242..d30952b8 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -337,7 +337,7 @@ def test_from_dict_instatiation(self, example_pep_cfg_path): representation. """ p1 = Project(cfg=example_pep_cfg_path) - p2 = Project().from_dict(p1.to_dict(extended=True)) + p2 = Project.from_dict(p1.to_dict(extended=True)) assert p1 == p2 def test_to_dict_does_not_create_nans(self, example_pep_nextflow_csv_path): @@ -581,7 +581,7 @@ def test_from_dict(self, example_pep_cfg_path, orient): p1 = Project(cfg=example_pep_cfg_path) p1_dict = p1.to_dict(extended=True, orient=orient) del p1_dict["_config"]["sample_table"] - p2 = Project().from_dict(p1_dict) + p2 = Project.from_dict(p1_dict) assert p1 == p2 @pytest.mark.parametrize( @@ -597,7 +597,7 @@ def test_from_pandas(self, config_with_pandas_obj, example_pep_csv_path): """ Test initializing project from dict """ - p1 = Project().from_pandas(config_with_pandas_obj) + p1 = Project.from_pandas(config_with_pandas_obj) p2 = Project(example_pep_csv_path) assert p1 == p2 From 922aba9ed2c5bfdb826fe2bdf051ea82d6e2b9af Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 6 Dec 2023 19:36:41 -0500 Subject: [PATCH 37/53] fixed #458 --- peppy/project.py | 14 ++++++++++++++ tests/conftest.py | 5 +++++ .../example_basic_sample_yaml/sample.yaml | 6 ++++++ tests/test_Project.py | 15 +++++++++++++++ 4 files changed, 40 insertions(+) create mode 100644 tests/data/example_peps-master/example_basic_sample_yaml/sample.yaml diff --git a/peppy/project.py b/peppy/project.py index 4d439178..0f8ce808 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -11,6 +11,7 @@ import numpy as np import pandas as pd +import yaml from pandas.core.common import flatten from rich.console import Console from rich.progress import track @@ -235,6 +236,19 @@ def from_dict(cls, pep_dictionary: dict): return tmp_obj + @classmethod + def from_yaml(cls, yaml_file: str): + """ + Init a peppy project instance from a yaml file + + :param str yaml_file: path to yaml file + """ + _LOGGER.info("Processing project from yaml...") + with open(yaml_file, "r") as f: + prj_dict = yaml.safe_load(f) + pd_df = pd.DataFrame.from_dict(prj_dict) + return cls.from_pandas(pd_df) + def to_dict( self, # expand: bool = False, # expand was used to expand paths. This functionality was removed, because of attmapp diff --git a/tests/conftest.py b/tests/conftest.py index bfaf5212..50c7ffd5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -36,6 +36,11 @@ def example_pep_csv_path(request): return get_path_to_example_file(EPB, request.param, "sample_table.csv") +@pytest.fixture +def example_yaml_sample_file(request): + return get_path_to_example_file(EPB, request.param, "sample.yaml") + + @pytest.fixture def example_pep_nextflow_csv_path(): return get_path_to_example_file(EPB, "nextflow_taxprofiler_pep", "samplesheet.csv") diff --git a/tests/data/example_peps-master/example_basic_sample_yaml/sample.yaml b/tests/data/example_peps-master/example_basic_sample_yaml/sample.yaml new file mode 100644 index 00000000..468a4eab --- /dev/null +++ b/tests/data/example_peps-master/example_basic_sample_yaml/sample.yaml @@ -0,0 +1,6 @@ +- sample_name: sample1 + file: path/to/file.tsv +- sample_name: sample2 + file: path/to/2.tsv +- sample_name: sample3 + file: path/to/3.tsv \ No newline at end of file diff --git a/tests/test_Project.py b/tests/test_Project.py index d30952b8..eabc66b3 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -601,6 +601,21 @@ def test_from_pandas(self, config_with_pandas_obj, example_pep_csv_path): p2 = Project(example_pep_csv_path) assert p1 == p2 + @pytest.mark.parametrize( + "example_yaml_sample_file", + [ + "basic_sample_yaml", + ], + indirect=True, + ) + def test_from_yaml(self, example_yaml_sample_file): + """ + Test initializing project from dict + """ + p1 = Project.from_yaml(example_yaml_sample_file) + assert p1.samples[0].sample_name == "sample1" + assert len(p1.samples) == 3 + @pytest.mark.parametrize( "config_with_pandas_obj, example_pep_csv_path", [ From 2b3cc3936cd2d6bfe5130b6dd51580e72b56f7f5 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 7 Dec 2023 12:09:49 -0500 Subject: [PATCH 38/53] added initializing docs --- docs/changelog.md | 4 ++++ docs/initialize.md | 46 ++++++++++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + peppy/_version.py | 2 +- peppy/project.py | 2 +- 5 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 docs/initialize.md diff --git a/docs/changelog.md b/docs/changelog.md index f6a077e7..f715aea4 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -13,6 +13,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm _Due to the changes mentioned above, a few item functionalities may be disabled. For example, the `name` and `description` properties can now be accessed and modified using attribute functionality_ +### Added +- Constructor methods: `from_dict`, `from_pandas`, `from_yaml` + + ## [0.35.7] -- 2023-07-19 ### Fixed - incorrect setting of sample and subsample indexes using from_dict function (#452) diff --git a/docs/initialize.md b/docs/initialize.md new file mode 100644 index 00000000..1ae33c8a --- /dev/null +++ b/docs/initialize.md @@ -0,0 +1,46 @@ +# How to initiate peppy using different methods + +peppy supports multiple ways to initiate a project. The most common way is to use a configuration file. +However, peppy also supports using a csv file (sample sheet), and a yaml file (sample sheet). +Additionally, peppy can be initiated using Python objects such as a pandas dataframe or a dictionary. + +## 1. Using a configuration file +```python +import peppy +project = peppy.Project("path/to/project/config.yaml") +``` + +## 2. Using csv file (sample sheet) +```python +import peppy +project = peppy.Project("path/to/project/sample_sheet.csv") +``` + +## 3. Using yaml sample sheet +```python +import peppy +project = peppy.Project.from_yaml("path/to/project/sample_sheet.yaml") +``` + + +## 4. Using a pandas dataframe +```python +import pandas as pd +import peppy +df = pd.read_csv("path/to/project/sample_sheet.csv") +project = peppy.Project.from_pandas(df) +``` + +## 5. Using a peppy generated dict +```python +import peppy +project = peppy.Project.from_dict({`_config`: str, + `_samples`: list | dict, + `_subsamples`: list[list | dict]}) +``` + +## 6. Using a csv file from a url +```python +import peppy +project = peppy.Project("example_url.csv") +``` diff --git a/mkdocs.yml b/mkdocs.yml index 0eea2c95..399a1cbe 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -14,6 +14,7 @@ nav: - How to use append sample modifier: feature1_append.md - How to use imply sample modifier: feature2_imply.md - How to validate a PEP: validating.md + - How to initialize a peppy: initialize.md - Reference: - API: autodoc_build/peppy.md - Support: support.md diff --git a/peppy/_version.py b/peppy/_version.py index 3a51a04c..3b158395 100644 --- a/peppy/_version.py +++ b/peppy/_version.py @@ -1 +1 @@ -__version__ = "0.40.0a5" +__version__ = "0.40.0a6" diff --git a/peppy/project.py b/peppy/project.py index 0f8ce808..0174fa8c 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -290,7 +290,7 @@ def create_samples(self, modify: bool = False): """ self._samples: List[Sample] = self.load_samples() if self.samples is None: - _LOGGER.info("No samples found in the project.") + _LOGGER.debug("No samples found in the project.") if modify: self.modify_samples() From dd69b8e2f161ac21ffa29a58728647b3747db6e3 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 7 Dec 2023 13:05:09 -0500 Subject: [PATCH 39/53] Fixed #464 --- peppy/project.py | 1 - peppy/utils.py | 5 +++-- requirements/requirements-all.txt | 2 +- tests/test_Project.py | 13 +++++++++++++ 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/peppy/project.py b/peppy/project.py index 0174fa8c..742565a7 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -7,7 +7,6 @@ from contextlib import suppress from logging import getLogger from typing import Iterable, List, Tuple, Union, Literal, NoReturn -import yacman import numpy as np import pandas as pd diff --git a/peppy/utils.py b/peppy/utils.py index 2587e2c2..0fab935b 100644 --- a/peppy/utils.py +++ b/peppy/utils.py @@ -5,6 +5,7 @@ from typing import Dict from urllib.request import urlopen +import yacman import yaml from ubiquerg import expandpath, is_url @@ -123,11 +124,11 @@ def load_yaml(filepath): ) else: data = response.read().decode("utf-8") - return yaml.safe_load(data) + return yacman.YAMLConfigManager(yamldata=data).exp else: with open(os.path.abspath(filepath), "r") as f: data = yaml.safe_load(f) - return data + return yacman.YAMLConfigManager(data).exp def is_cfg_or_anno(file_path, formats=None): diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 737868a9..32597577 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -2,4 +2,4 @@ pandas>=0.24.2 pyyaml rich>=10.3.0 ubiquerg>=0.6.2 -yacman>=0.9.0 +yacman>=0.9.2 diff --git a/tests/test_Project.py b/tests/test_Project.py index eabc66b3..685d7259 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -93,6 +93,19 @@ def test_instantiaion(self, example_pep_cfg_path, defer): p = Project(cfg=example_pep_cfg_path, defer_samples_creation=defer) assert isinstance(p, Project) + @pytest.mark.parametrize("defer", [False, True]) + @pytest.mark.parametrize("example_pep_cfg_path", ["amendments1"], indirect=True) + def test_expand_path(self, example_pep_cfg_path, defer): + """ + Verify output_path is expanded + """ + p = Project( + cfg=example_pep_cfg_path, + amendments="newLib", + defer_samples_creation=defer, + ) + assert not p.config["output_dir"].startswith("$") + @pytest.mark.parametrize( "config_path", [ From a59208c3cace159a6e8667b3e29c02f30abde3e9 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 11 Dec 2023 14:02:26 -0500 Subject: [PATCH 40/53] Removed yacman from dependencies --- peppy/utils.py | 30 +++++++++++++++++++++++++++--- requirements/requirements-all.txt | 1 - 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/peppy/utils.py b/peppy/utils.py index 0fab935b..e4fed739 100644 --- a/peppy/utils.py +++ b/peppy/utils.py @@ -2,7 +2,7 @@ import logging import os -from typing import Dict +from typing import Dict, Mapping from urllib.request import urlopen import yacman @@ -105,6 +105,30 @@ def _raise_faulty_arg(): _raise_faulty_arg() +def _expandpath(path: str): + """ + Expand a filesystem path that may or may not contain user/env vars. + + :param str path: path to expand + :return str: expanded version of input path + """ + return os.path.expandvars(os.path.expanduser(path)) + + +def expand_paths(x: dict) -> dict: + """ + Recursively expand paths in a dict. + + :param dict x: dict to expand + :return dict: dict with expanded paths + """ + if isinstance(x, str): + return expandpath(x) + elif isinstance(x, Mapping): + return {k: expand_paths(v) for k, v in x.items()} + return x + + def load_yaml(filepath): """ Load a local or remote YAML file into a Python dict @@ -124,11 +148,11 @@ def load_yaml(filepath): ) else: data = response.read().decode("utf-8") - return yacman.YAMLConfigManager(yamldata=data).exp + return expand_paths(yaml.safe_load(data)) else: with open(os.path.abspath(filepath), "r") as f: data = yaml.safe_load(f) - return yacman.YAMLConfigManager(data).exp + return expand_paths(data) def is_cfg_or_anno(file_path, formats=None): diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 32597577..65b60371 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -2,4 +2,3 @@ pandas>=0.24.2 pyyaml rich>=10.3.0 ubiquerg>=0.6.2 -yacman>=0.9.2 From a8dc90054098d8763f628af29a4e852c4f320ac7 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 11 Dec 2023 14:28:56 -0500 Subject: [PATCH 41/53] fixed docs and added from_pep_config method --- docs/initialize.md | 27 +++++++++++++++++++++------ peppy/project.py | 31 +++++++++++++++++++++++++++++++ peppy/utils.py | 1 - tests/test_Project.py | 4 ++++ 4 files changed, 56 insertions(+), 7 deletions(-) diff --git a/docs/initialize.md b/docs/initialize.md index 1ae33c8a..a762045e 100644 --- a/docs/initialize.md +++ b/docs/initialize.md @@ -7,13 +7,13 @@ Additionally, peppy can be initiated using Python objects such as a pandas dataf ## 1. Using a configuration file ```python import peppy -project = peppy.Project("path/to/project/config.yaml") +project = peppy.Project.from_pep_config("path/to/project/config.yaml") ``` ## 2. Using csv file (sample sheet) ```python import peppy -project = peppy.Project("path/to/project/sample_sheet.csv") +project = peppy.Project.from_pep_config("path/to/project/sample_sheet.csv") ``` ## 3. Using yaml sample sheet @@ -34,13 +34,28 @@ project = peppy.Project.from_pandas(df) ## 5. Using a peppy generated dict ```python import peppy -project = peppy.Project.from_dict({`_config`: str, - `_samples`: list | dict, - `_subsamples`: list[list | dict]}) +project = peppy.Project.from_dict( + {'_config': {'description': None, + 'name': 'example_basic', + 'pep_version': '2.0.0', + 'sample_table': 'sample_table.csv',}, + '_sample_dict': [{'organism': 'pig', 'sample_name': 'pig_0h', 'time': '0'}, + {'organism': 'pig', 'sample_name': 'pig_1h', 'time': '1'}, + {'organism': 'frog', 'sample_name': 'frog_0h', 'time': '0'}, + {'organism': 'frog', 'sample_name': 'frog_1h', 'time': '1'}], + '_subsample_list': [[{'read1': 'frog1a_data.txt', + 'read2': 'frog1a_data2.txt', + 'sample_name': 'frog_0h'}, + {'read1': 'frog1b_data.txt', + 'read2': 'frog1b_data2.txt', + 'sample_name': 'pig_0h'}, + {'read1': 'frog1c_data.txt', + 'read2': 'frog1b_data2.txt', + 'sample_name': 'pig_0h'}]]}) ``` ## 6. Using a csv file from a url ```python import peppy -project = peppy.Project("example_url.csv") +project = peppy.Project("https://raw.githubusercontent.com/pepkit/example_peps/master/example_basic/sample_table.csv") ``` diff --git a/peppy/project.py b/peppy/project.py index 742565a7..5fe05822 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -235,6 +235,37 @@ def from_dict(cls, pep_dictionary: dict): return tmp_obj + @classmethod + def from_pep_config( + cls, + cfg: str = None, + amendments: Union[str, Iterable[str]] = None, + sample_table_index: Union[str, Iterable[str]] = None, + subsample_table_index: Union[str, Iterable[str]] = None, + defer_samples_creation: bool = False, + ): + """ + Init a peppy project instance from a yaml file + + :param str cfg: Project config file (YAML) or sample table (CSV/TSV) + with one row per sample to constitute project + :param str | Iterable[str] sample_table_index: name of the columns to set + the sample_table index to + :param str | Iterable[str] subsample_table_index: name of the columns to set + the subsample_table index to + :param str | Iterable[str] amendments: names of the amendments to activate + :param Iterable[str] amendments: amendments to use within configuration file + :param bool defer_samples_creation: whether the sample creation should be skipped + """ + # TODO: this is just a copy of the __init__ method. It should be refactored + return cls( + cfg=cfg, + amendments=amendments, + sample_table_index=sample_table_index, + subsample_table_index=subsample_table_index, + defer_samples_creation=defer_samples_creation, + ) + @classmethod def from_yaml(cls, yaml_file: str): """ diff --git a/peppy/utils.py b/peppy/utils.py index e4fed739..35e6017a 100644 --- a/peppy/utils.py +++ b/peppy/utils.py @@ -5,7 +5,6 @@ from typing import Dict, Mapping from urllib.request import urlopen -import yacman import yaml from ubiquerg import expandpath, is_url diff --git a/tests/test_Project.py b/tests/test_Project.py index 685d7259..4aa14b27 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -350,6 +350,10 @@ def test_from_dict_instatiation(self, example_pep_cfg_path): representation. """ p1 = Project(cfg=example_pep_cfg_path) + ff = p1.to_dict(extended=True) + import pprint + + pprint.pprint(ff) p2 = Project.from_dict(p1.to_dict(extended=True)) assert p1 == p2 From fcf8193aee33405f719e9565834bc6ff6872fd62 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 11 Dec 2023 15:14:47 -0500 Subject: [PATCH 42/53] updated docs to and from dict --- docs/initialize.md | 12 ++++++++++++ peppy/project.py | 5 ++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/docs/initialize.md b/docs/initialize.md index a762045e..31822534 100644 --- a/docs/initialize.md +++ b/docs/initialize.md @@ -54,6 +54,18 @@ project = peppy.Project.from_dict( 'sample_name': 'pig_0h'}]]}) ``` +## 5.1 Generate dict from peppy and reuse it +```python +import peppy + +project = peppy.Project("https://raw.githubusercontent.com/pepkit/example_peps/master/example_basic/sample_table.csv") +project_dict = project.to_dict(extended=True) +project_copy = peppy.Project.from_dict(project_dict) + +# now you can check if this project is the same as the original project +print(project_copy == project) +``` + ## 6. Using a csv file from a url ```python import peppy diff --git a/peppy/project.py b/peppy/project.py index 5fe05822..6c2d02e8 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -301,7 +301,10 @@ def to_dict( ] else: sub_df = None - self[CONFIG_KEY][NAME_KEY] = self.name + try: + self[CONFIG_KEY][NAME_KEY] = self.name + except NotImplementedError: + self[CONFIG_KEY][NAME_KEY] = "unnamed" self[CONFIG_KEY][DESC_KEY] = self.description p_dict = { SAMPLE_RAW_DICT_KEY: self[SAMPLE_DF_KEY].to_dict(orient=orient), From 11aa6c07b957d3bd4c11fbdb4421c820ffaa6e1b Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 14 Dec 2023 11:44:56 -0500 Subject: [PATCH 43/53] Updated method naming --- docs/initialize.md | 4 +++- peppy/project.py | 2 +- tests/test_Project.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/initialize.md b/docs/initialize.md index 31822534..b0c14301 100644 --- a/docs/initialize.md +++ b/docs/initialize.md @@ -17,9 +17,11 @@ project = peppy.Project.from_pep_config("path/to/project/sample_sheet.csv") ``` ## 3. Using yaml sample sheet + ```python import peppy -project = peppy.Project.from_yaml("path/to/project/sample_sheet.yaml") + +project = peppy.Project.from_sample_yaml("path/to/project/sample_sheet.yaml") ``` diff --git a/peppy/project.py b/peppy/project.py index 6c2d02e8..f7056b40 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -267,7 +267,7 @@ def from_pep_config( ) @classmethod - def from_yaml(cls, yaml_file: str): + def from_sample_yaml(cls, yaml_file: str): """ Init a peppy project instance from a yaml file diff --git a/tests/test_Project.py b/tests/test_Project.py index 4aa14b27..b2241f2f 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -629,7 +629,7 @@ def test_from_yaml(self, example_yaml_sample_file): """ Test initializing project from dict """ - p1 = Project.from_yaml(example_yaml_sample_file) + p1 = Project.from_sample_yaml(example_yaml_sample_file) assert p1.samples[0].sample_name == "sample1" assert len(p1.samples) == 3 From e06b4a2ab6b19d29aa475eb8c8db9b7cbe21c545 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 14 Dec 2023 13:31:30 -0500 Subject: [PATCH 44/53] Looper fixes --- peppy/project.py | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/peppy/project.py b/peppy/project.py index f7056b40..66e35fa7 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -205,35 +205,47 @@ def from_dict(cls, pep_dictionary: dict): Init a peppy project instance from a dictionary representation of an already processed PEP. - :param Dict[Any] pep_dictionary: dict representation of the project {_config: str, + :param Dict[Any] pep_dictionary: dict representation of the project {_config: dict, _samples: list | dict, _subsamples: list[list | dict]} """ _LOGGER.info("Processing project from dictionary...") - tmp_obj = cls() - tmp_obj[SAMPLE_DF_KEY] = pd.DataFrame(pep_dictionary[SAMPLE_RAW_DICT_KEY]) - tmp_obj[CONFIG_KEY] = pep_dictionary[CONFIG_KEY] + temp_obj = cls() + return temp_obj._from_dict(pep_dictionary) + + def _from_dict(self, pep_dictionary) -> "Project": + """ + Initiate a peppy project instance from a dictionary representation of an already processed PEP. + + # This function is needed in looper to reinit the project after it was created from a dictionary representation. + + :param Dict[Any] pep_dictionary: dict representation of the project {_config: dict, + _samples: list | dict, + _subsamples: list[list | dict]} + """ + self[SAMPLE_DF_KEY] = pd.DataFrame(pep_dictionary[SAMPLE_RAW_DICT_KEY]) + self[CONFIG_KEY] = pep_dictionary[CONFIG_KEY] if SUBSAMPLE_RAW_LIST_KEY in pep_dictionary: if pep_dictionary[SUBSAMPLE_RAW_LIST_KEY]: - tmp_obj[SUBSAMPLE_DF_KEY] = [ + self[SUBSAMPLE_DF_KEY] = [ pd.DataFrame(sub_a) for sub_a in pep_dictionary[SUBSAMPLE_RAW_LIST_KEY] ] - if NAME_KEY in tmp_obj[CONFIG_KEY]: - tmp_obj.name = tmp_obj[CONFIG_KEY][NAME_KEY] + if NAME_KEY in self[CONFIG_KEY]: + self.name = self[CONFIG_KEY][NAME_KEY] - if DESC_KEY in tmp_obj[CONFIG_KEY]: - tmp_obj.description = tmp_obj[CONFIG_KEY][DESC_KEY] + if DESC_KEY in self[CONFIG_KEY]: + self.description = self[CONFIG_KEY][DESC_KEY] - tmp_obj._set_indexes(tmp_obj[CONFIG_KEY]) + self._set_indexes(self[CONFIG_KEY]) - tmp_obj.create_samples(modify=False if tmp_obj[SAMPLE_TABLE_FILE_KEY] else True) - tmp_obj._sample_table = tmp_obj._get_table_from_samples( - index=tmp_obj.st_index, initial=True + self.create_samples(modify=False if self[SAMPLE_TABLE_FILE_KEY] else True) + self._sample_table = self._get_table_from_samples( + index=self.st_index, initial=True ) - return tmp_obj + return self @classmethod def from_pep_config( From 4316596ecb79f4b29473c2e1f6c4ffa705a4b5b7 Mon Sep 17 00:00:00 2001 From: Nathan Sheffield Date: Mon, 18 Dec 2023 11:04:10 -0500 Subject: [PATCH 45/53] Update requirements-all.txt --- requirements/requirements-all.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index c0bb6d98..a0036b9c 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -3,5 +3,3 @@ pyyaml rich>=10.3.0 ubiquerg>=0.6.2 numpy -yacman>=0.9.0 - From 3c64fc9861f146eac931fd2cd6ba23e682295ce9 Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 18 Dec 2023 11:08:34 -0500 Subject: [PATCH 46/53] test on python 3.12 --- .github/workflows/run-pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index 11bc86fe..480eb4b2 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -11,7 +11,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.8", "3.11"] + python-version: ["3.8", "3.11", "3.12"] os: [ubuntu-20.04] steps: From 534c17feaff3e0c88060543088441dfdd101220f Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 18 Dec 2023 11:09:19 -0500 Subject: [PATCH 47/53] typo --- .github/workflows/run-pytest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index 480eb4b2..b8b00391 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -22,10 +22,10 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Install dev dependancies + - name: Install dev dependencies run: if [ -f requirements/requirements-dev.txt ]; then pip install -r requirements/requirements-dev.txt; fi - - name: Install test dependancies + - name: Install test dependencies run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi - name: Install package From 8be815bcfcfa49b1ecd3915c457b2b215dbf7940 Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 18 Dec 2023 11:10:21 -0500 Subject: [PATCH 48/53] support python 3.12 --- .github/workflows/run-pytest.yml | 2 +- setup.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index b8b00391..ee1d60cc 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -11,7 +11,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.8", "3.11", "3.12"] + python-version: ["3.8", "3.12"] os: [ubuntu-20.04] steps: diff --git a/setup.py b/setup.py index ac84c383..d0d1551b 100644 --- a/setup.py +++ b/setup.py @@ -52,6 +52,7 @@ def get_static(name, condition=None): "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering :: Bio-Informatics", ], keywords="project, metadata, bioinformatics, sequencing, ngs, workflow", From d66f3cbc89494fc1426e84c0d024dcdd57dfce45 Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 18 Dec 2023 11:36:21 -0500 Subject: [PATCH 49/53] docs updates --- docs/changelog.md | 4 +-- docs/initialize.md | 61 ++++++++++++++++++++++++++-------------------- mkdocs.yml | 2 +- 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index f715aea4..86b54aca 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -7,14 +7,14 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm **This version introduced backwards-incompatible changes.** ### Changed -- Replaced attmap with MutableMapping. (which resulted in the removal of the attribute functionality previously available in attmap) +- Replaced attmap with MutableMapping (which removes some attribute) - Replaced OrderedDict with dict. - Deprecated support for Python versions <= 3.7. _Due to the changes mentioned above, a few item functionalities may be disabled. For example, the `name` and `description` properties can now be accessed and modified using attribute functionality_ ### Added -- Constructor methods: `from_dict`, `from_pandas`, `from_yaml` +- Constructor methods: `Project.from_dict`, `Project.from_pandas`, `Project.from_yaml` ## [0.35.7] -- 2023-07-19 diff --git a/docs/initialize.md b/docs/initialize.md index b0c14301..bf659440 100644 --- a/docs/initialize.md +++ b/docs/initialize.md @@ -1,22 +1,31 @@ # How to initiate peppy using different methods -peppy supports multiple ways to initiate a project. The most common way is to use a configuration file. -However, peppy also supports using a csv file (sample sheet), and a yaml file (sample sheet). -Additionally, peppy can be initiated using Python objects such as a pandas dataframe or a dictionary. +The primary use case of `peppy` is to create a `peppy.Project` object, which will give you an API for interacting with your project and sample metadata. There are multiple ways to instantiate a `peppy.Project`. +The most common is to use a configuration file; however, you can also use a `CSV` file (sample sheet), or a sample `YAML` file (sample sheet), or use Python objects directly, such as a `pandas` DataFrame, or a Python `dict`. + +## 1. From PEP configuration file -## 1. Using a configuration file ```python import peppy project = peppy.Project.from_pep_config("path/to/project/config.yaml") ``` -## 2. Using csv file (sample sheet) +## 2. FROM `CSV` file (sample sheet) + ```python import peppy project = peppy.Project.from_pep_config("path/to/project/sample_sheet.csv") ``` -## 3. Using yaml sample sheet +You can also instantiate directly from a URL to a CSV file: + +```python +import peppy +project = peppy.Project("https://raw.githubusercontent.com/pepkit/example_peps/master/example_basic/sample_table.csv") +``` + + +## 3. From `YAML` sample sheet ```python import peppy @@ -25,7 +34,8 @@ project = peppy.Project.from_sample_yaml("path/to/project/sample_sheet.yaml") ``` -## 4. Using a pandas dataframe +## 4. From a `pandas` DataFrame + ```python import pandas as pd import peppy @@ -33,7 +43,24 @@ df = pd.read_csv("path/to/project/sample_sheet.csv") project = peppy.Project.from_pandas(df) ``` -## 5. Using a peppy generated dict +## 5. From a `peppy`-generated `dict` + +Store a `peppy.Project` object as a dict using `prj.to_dict()`. Then, load it with `Project.from_dict()`: + +```python +import peppy + +project = peppy.Project("https://raw.githubusercontent.com/pepkit/example_peps/master/example_basic/sample_table.csv") +project_dict = project.to_dict(extended=True) +project_copy = peppy.Project.from_dict(project_dict) + +# now you can check if this project is the same as the original project +print(project_copy == project) +``` + +Or, you could generate an equivalent dictionary in some other way: + + ```python import peppy project = peppy.Project.from_dict( @@ -55,21 +82,3 @@ project = peppy.Project.from_dict( 'read2': 'frog1b_data2.txt', 'sample_name': 'pig_0h'}]]}) ``` - -## 5.1 Generate dict from peppy and reuse it -```python -import peppy - -project = peppy.Project("https://raw.githubusercontent.com/pepkit/example_peps/master/example_basic/sample_table.csv") -project_dict = project.to_dict(extended=True) -project_copy = peppy.Project.from_dict(project_dict) - -# now you can check if this project is the same as the original project -print(project_copy == project) -``` - -## 6. Using a csv file from a url -```python -import peppy -project = peppy.Project("https://raw.githubusercontent.com/pepkit/example_peps/master/example_basic/sample_table.csv") -``` diff --git a/mkdocs.yml b/mkdocs.yml index 399a1cbe..c2989c63 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -8,13 +8,13 @@ nav: - Introduction: README.md - Installing and Hello World: hello-world.md - How-to Guides: + - How to initialize a Project: initialize.md - How to use peppy: tutorial.md - How to use subsample table: feature4_subsample_table.md - How to use amendments: feature5_amend.md - How to use append sample modifier: feature1_append.md - How to use imply sample modifier: feature2_imply.md - How to validate a PEP: validating.md - - How to initialize a peppy: initialize.md - Reference: - API: autodoc_build/peppy.md - Support: support.md From f9d9ab4e6a0284b0ae5516faa6821123fb6fe825 Mon Sep 17 00:00:00 2001 From: Oleksandr Date: Mon, 18 Dec 2023 12:19:20 -0500 Subject: [PATCH 50/53] Update changelog.md --- docs/changelog.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 86b54aca..fbe23a1e 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -7,14 +7,14 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm **This version introduced backwards-incompatible changes.** ### Changed -- Replaced attmap with MutableMapping (which removes some attribute) -- Replaced OrderedDict with dict. -- Deprecated support for Python versions <= 3.7. +- Replaced `attmap` with `MutableMapping` (which removes some attributes) +- Replaced OrderedDict with dict +- Deprecated support for Python versions <= 3.7 -_Due to the changes mentioned above, a few item functionalities may be disabled. For example, the `name` and `description` properties can now be accessed and modified using attribute functionality_ +_Due to the changes mentioned above, a few functionalities may be disabled. For example, the `name` and `description` project properties can no longer be accessed with `getitem`; use the `getattr` syntax instead_ ### Added -- Constructor methods: `Project.from_dict`, `Project.from_pandas`, `Project.from_yaml` +- Constructor methods: `Project.from_dict`, `Project.from_pandas`, `Project.from_sample_yaml`, `Project.from_pep_config` ## [0.35.7] -- 2023-07-19 From b757ff6b9081848caed678a9bfd38db3781d4119 Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 18 Dec 2023 13:22:41 -0500 Subject: [PATCH 51/53] add format conversion image --- docs/initialize.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/initialize.md b/docs/initialize.md index bf659440..f8d74632 100644 --- a/docs/initialize.md +++ b/docs/initialize.md @@ -3,6 +3,13 @@ The primary use case of `peppy` is to create a `peppy.Project` object, which will give you an API for interacting with your project and sample metadata. There are multiple ways to instantiate a `peppy.Project`. The most common is to use a configuration file; however, you can also use a `CSV` file (sample sheet), or a sample `YAML` file (sample sheet), or use Python objects directly, such as a `pandas` DataFrame, or a Python `dict`. + +
+ +
peppy can read from and produce various metadata formats
+
+ + ## 1. From PEP configuration file ```python From 89e13f5e00e846c3831b77e10e870a0dfe57f240 Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 18 Dec 2023 13:22:53 -0500 Subject: [PATCH 52/53] format convert --- docs/img/format_convert.svg | 1275 +++++++++++++++++++++++++++++++++++ 1 file changed, 1275 insertions(+) create mode 100644 docs/img/format_convert.svg diff --git a/docs/img/format_convert.svg b/docs/img/format_convert.svg new file mode 100644 index 00000000..bc08ce4e --- /dev/null +++ b/docs/img/format_convert.svg @@ -0,0 +1,1275 @@ + + + +PEPpeppy.ProjectDataFramesamplesYAMLInput formatsOutput formatsCSVURLCSVDataFramesamplesYAMLPEP From a9f3fb40d426207ad41209ad9d17a5a2ecbbb54f Mon Sep 17 00:00:00 2001 From: nsheff Date: Mon, 18 Dec 2023 13:23:18 -0500 Subject: [PATCH 53/53] versionb ump for release 0.40.0 --- peppy/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/peppy/_version.py b/peppy/_version.py index 3b158395..da7ed90a 100644 --- a/peppy/_version.py +++ b/peppy/_version.py @@ -1 +1 @@ -__version__ = "0.40.0a6" +__version__ = "0.40.0"