From 08723524bfe73e2a817052f62f86c031f04f2f5a Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 11 Jan 2024 12:31:17 -0500 Subject: [PATCH 1/3] Fixed NaN --- docs/changelog.md | 5 +++++ peppy/_version.py | 2 +- peppy/project.py | 6 ++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index e5597f3c..4a96c87f 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.40.1] -- 2024-01-11 +### Fixed +- Initializing Project with `NaN` value instead of `None` in `from_pandas` method + + ## [0.40.0] -- 2023-12-18 **This version introduced backwards-incompatible changes.** diff --git a/peppy/_version.py b/peppy/_version.py index da7ed90a..01b570aa 100644 --- a/peppy/_version.py +++ b/peppy/_version.py @@ -1 +1 @@ -__version__ = "0.40.0" +__version__ = "0.40.1" diff --git a/peppy/project.py b/peppy/project.py index 66e35fa7..9fb177aa 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -223,13 +223,15 @@ def _from_dict(self, pep_dictionary) -> "Project": _samples: list | dict, _subsamples: list[list | dict]} """ - self[SAMPLE_DF_KEY] = pd.DataFrame(pep_dictionary[SAMPLE_RAW_DICT_KEY]) + self[SAMPLE_DF_KEY] = pd.DataFrame(pep_dictionary[SAMPLE_RAW_DICT_KEY]).replace( + np.nan, "None" + ) self[CONFIG_KEY] = pep_dictionary[CONFIG_KEY] if SUBSAMPLE_RAW_LIST_KEY in pep_dictionary: if pep_dictionary[SUBSAMPLE_RAW_LIST_KEY]: self[SUBSAMPLE_DF_KEY] = [ - pd.DataFrame(sub_a) + pd.DataFrame(sub_a).replace(np.nan, "None") for sub_a in pep_dictionary[SUBSAMPLE_RAW_LIST_KEY] ] if NAME_KEY in self[CONFIG_KEY]: From 49dccafee69f49937910eb5f21bf724af88cc56f Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 11 Jan 2024 13:10:57 -0500 Subject: [PATCH 2/3] fixed bugs in nan tests --- peppy/__init__.py | 2 +- peppy/project.py | 9 +++++---- tests/test_Project.py | 7 +------ 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/peppy/__init__.py b/peppy/__init__.py index bce94377..1b958c15 100644 --- a/peppy/__init__.py +++ b/peppy/__init__.py @@ -14,6 +14,6 @@ from .sample import Sample __classes__ = ["Project", "Sample"] -__all__ = __classes__ + ["PeppyError"] +__all__ = __classes__ + ["PeppyError", "__version__"] LOGGING_LEVEL = "INFO" diff --git a/peppy/project.py b/peppy/project.py index 9fb177aa..4bd7f685 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -6,7 +6,7 @@ from collections.abc import Mapping, MutableMapping from contextlib import suppress from logging import getLogger -from typing import Iterable, List, Tuple, Union, Literal, NoReturn +from typing import Iterable, List, Tuple, Union, Literal import numpy as np import pandas as pd @@ -224,14 +224,14 @@ def _from_dict(self, pep_dictionary) -> "Project": _subsamples: list[list | dict]} """ self[SAMPLE_DF_KEY] = pd.DataFrame(pep_dictionary[SAMPLE_RAW_DICT_KEY]).replace( - np.nan, "None" + np.nan, None ) self[CONFIG_KEY] = pep_dictionary[CONFIG_KEY] if SUBSAMPLE_RAW_LIST_KEY in pep_dictionary: if pep_dictionary[SUBSAMPLE_RAW_LIST_KEY]: self[SUBSAMPLE_DF_KEY] = [ - pd.DataFrame(sub_a).replace(np.nan, "None") + pd.DataFrame(sub_a).replace(np.nan, None) for sub_a in pep_dictionary[SUBSAMPLE_RAW_LIST_KEY] ] if NAME_KEY in self[CONFIG_KEY]: @@ -469,7 +469,7 @@ def parse_config_file( relative_vars = [CFG_SAMPLE_TABLE_KEY, CFG_SUBSAMPLE_TABLE_KEY] _make_sections_absolute(self[CONFIG_KEY], relative_vars, cfg_path) - def _set_indexes(self, config: Mapping) -> NoReturn: + def _set_indexes(self, config: Mapping) -> None: """ Set sample and subsample indexes if they are different then Default @@ -485,6 +485,7 @@ def _set_indexes(self, config: Mapping) -> NoReturn: if SUBSAMPLE_TABLE_INDEX_KEY in config else SUBSAMPLE_NAME_ATTR ) + return None def load_samples(self): """ diff --git a/tests/test_Project.py b/tests/test_Project.py index b2241f2f..c77439ec 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -10,7 +10,6 @@ from yaml import dump, safe_load import pickle -import peppy from peppy import Project from peppy.const import SAMPLE_NAME_ATTR, SAMPLE_TABLE_FILE_KEY from peppy.exceptions import ( @@ -350,10 +349,6 @@ def test_from_dict_instatiation(self, example_pep_cfg_path): representation. """ p1 = Project(cfg=example_pep_cfg_path) - ff = p1.to_dict(extended=True) - import pprint - - pprint.pprint(ff) p2 = Project.from_dict(p1.to_dict(extended=True)) assert p1 == p2 @@ -694,7 +689,7 @@ def test_sample_getattr(self, example_pep_cfg_path): assert s2.organism == s2["organism"] @pytest.mark.parametrize("example_pep_cfg_path", ["append"], indirect=True) - def test_sample_getattr(self, example_pep_cfg_path): + def test_sample_settatr(self, example_pep_cfg_path): """ Verify that the setattr works """ From d57caad772baea5a4191866771a258a100de5a18 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 11 Jan 2024 18:39:11 -0500 Subject: [PATCH 3/3] fixed bugs in nan tests 2 --- peppy/project.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/peppy/project.py b/peppy/project.py index 4bd7f685..1e0ebb68 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -224,14 +224,14 @@ def _from_dict(self, pep_dictionary) -> "Project": _subsamples: list[list | dict]} """ self[SAMPLE_DF_KEY] = pd.DataFrame(pep_dictionary[SAMPLE_RAW_DICT_KEY]).replace( - np.nan, None + np.nan, "" ) self[CONFIG_KEY] = pep_dictionary[CONFIG_KEY] if SUBSAMPLE_RAW_LIST_KEY in pep_dictionary: if pep_dictionary[SUBSAMPLE_RAW_LIST_KEY]: self[SUBSAMPLE_DF_KEY] = [ - pd.DataFrame(sub_a).replace(np.nan, None) + pd.DataFrame(sub_a).replace(np.nan, "") for sub_a in pep_dictionary[SUBSAMPLE_RAW_LIST_KEY] ] if NAME_KEY in self[CONFIG_KEY]: @@ -1336,7 +1336,7 @@ def _read_sample_data(self): if st is not None: parser_class = select_parser(path=st) - self[SAMPLE_DF_KEY] = parser_class(path=st).table + self[SAMPLE_DF_KEY] = parser_class(path=st).table.replace(np.nan, "") self[SAMPLE_DF_LARGE] = self[SAMPLE_DF_KEY].shape[0] > 1000 else: _LOGGER.warning(no_metadata_msg.format(CFG_SAMPLE_TABLE_KEY))