From 28a45c768250dc8b5e12b1cfb262e866da230dfa Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 15 Jun 2021 13:12:45 -0400 Subject: [PATCH] exclude pandas.DataFrames from dict repr --- .pre-commit-config.yaml | 1 + peppy/project.py | 26 +++++++--- peppy/sample.py | 5 ++ requirements/requirements-all.txt | 2 +- requirements/requirements-dev.txt | 1 + setup.py | 80 +++++++------------------------ 6 files changed, 46 insertions(+), 69 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index de8f7bdf..fd759dcf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,6 +7,7 @@ repos: - id: end-of-file-fixer - id: requirements-txt-fixer - id: trailing-whitespace + - id: check-ast - repo: https://github.com/PyCQA/isort rev: 5.8.0 diff --git a/peppy/project.py b/peppy/project.py index c30f4fa8..81ac0135 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -88,6 +88,10 @@ def __init__( index=self.st_index, initial=True ) + def _excl_classes_from_todict(self): + """Exclude pandas.DataFrame from dict representation""" + return (pd.DataFrame,) + def create_samples(self, modify=False): """ Populate Project with Sample objects @@ -284,7 +288,7 @@ def _del_if_in(obj, attr): _LOGGER.debug("Removing attributes: {}".format(to_remove)) for s in track( self.samples, - description="Removing", + description="Removing sample attributes", disable=not self.is_sample_table_large, ): for attr in to_remove: @@ -301,7 +305,7 @@ def attr_constants(self): for s in track( self.samples, - description="Applying constants", + description="Applying constant sample attributes", disable=not self.is_sample_table_large, ): for attr, val in to_append.items(): @@ -317,7 +321,7 @@ def attr_synonyms(self): _LOGGER.debug("Applying synonyms: {}".format(synonyms)) for sample in track( self.samples, - description="Applying synonyms", + description="Applying synonymous sample attributes", disable=not self.is_sample_table_large, ): for attr, new in synonyms.items(): @@ -375,7 +379,17 @@ def _auto_merge_duplicated_names(self): specified in the config """ sample_names_list = [getattr(s, self.sample_name_colname) for s in self.samples] - dups_set = set([x for x in sample_names_list if sample_names_list.count(x) > 1]) + dups_set = set( + [ + x + for x in track( + sample_names_list, + description="Detecting duplicate sample names", + disable=not self.is_sample_table_large, + ) + if sample_names_list.count(x) > 1 + ] + ) if not dups_set: # all sample names are unique return @@ -521,7 +535,7 @@ def attr_imply(self): ) for sample in track( self.samples, - description=f"Implying", + description=f"Implying sample attributes", disable=not self.is_sample_table_large, ): for implication in implications: @@ -570,7 +584,7 @@ def attr_derive(self, attrs=None): _LOGGER.debug("Derivations to be done: {}".format(derivations)) for sample in track( self.samples, - description="Deriving", + description="Deriving sample attributes", disable=not self.is_sample_table_large, ): for attr in derivations: diff --git a/peppy/sample.py b/peppy/sample.py index 617ef898..b8ca765b 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -6,6 +6,7 @@ from logging import getLogger from string import Formatter +import pandas as pd import yaml from attmap import AttMap, PathExAttMap @@ -372,6 +373,10 @@ def _excl_from_repr(self, k, cls): """Exclude the Project reference from representation.""" return k.startswith("_") or super(Sample, self)._excl_from_repr(k, cls) + def _excl_classes_from_todict(self): + """Exclude pandas.DataFrame from dict representation""" + return (pd.DataFrame,) + def _try_touch_samples(self): """ Safely sets sample edited flag to true diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 3a25eec1..3fcac2b2 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,4 +1,4 @@ -attmap>=0.12.5 +# attmap>=0.13.1 logmuse>=0.2 pandas>=0.24.2 pyyaml diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index e69de29b..971fce64 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -0,0 +1 @@ +-e git+git://github.com/pepkit/attmap@dev#egg=attmap diff --git a/setup.py b/setup.py index fd23e2a2..d83f0a7a 100644 --- a/setup.py +++ b/setup.py @@ -1,65 +1,24 @@ -#! /usr/bin/env python - -import os import sys from setuptools import setup -REQDIR = "requirements" - - -def read_reqs(reqs_name): - deps = [] - with open(os.path.join(REQDIR, "requirements-{}.txt".format(reqs_name)), "r") as f: - for l in f: - if not l.strip(): - continue - # deps.append(l.split("=")[0].rstrip("<>")) - deps.append(l) - return deps - - -# Additional keyword arguments for setup(). -extra = {} +PACKAGE_NAME = "peppy" # Ordinary dependencies -DEPENDENCIES = read_reqs("all") - -# numexpr for pandas -try: - import numexpr -except ImportError: - # No numexpr is OK for pandas. - pass -else: - # pandas 0.20.2 needs updated numexpr; the claim is 2.4.6, but that failed. - DEPENDENCIES.append("numexpr>=2.6.2") +DEPENDENCIES = [] +with open("requirements/requirements-all.txt", "r") as reqs_file: + for line in reqs_file: + if not line.strip(): + continue + # DEPENDENCIES.append(line.split("=")[0].rstrip("<>")) + DEPENDENCIES.append(line) -# 2to3 +# Additional keyword arguments for setup(). +extra = {"install_requires": DEPENDENCIES} if sys.version_info >= (3,): extra["use_2to3"] = True -extra["install_requires"] = DEPENDENCIES - - -# Additional files to include with package -def get_static(name, condition=None): - static = [ - os.path.join(name, f) - for f in os.listdir( - os.path.join(os.path.dirname(os.path.realpath(__file__)), name) - ) - ] - if condition is None: - return static - else: - return [i for i in filter(lambda x: eval(condition), static)] - -# scripts to be added to the $PATH -# scripts = get_static("scripts", condition="'.' in x") -scripts = None - -with open("peppy/_version.py", "r") as versionfile: +with open(f"{PACKAGE_NAME}/_version.py", "r") as versionfile: version = versionfile.readline().split()[-1].strip("\"'\n") # Handle the pypi README formatting. @@ -67,15 +26,14 @@ def get_static(name, condition=None): import pypandoc long_description = pypandoc.convert_file("README.md", "rst") - print("Pandoc conversion succeeded") + msg = "\033[032mPandoc conversion succeeded.\033[0m" except (IOError, ImportError, OSError): - print("Warning: pandoc conversion failed!") + msg = "\033[0;31mWarning: pandoc conversion failed!\033[0m" long_description = open("README.md").read() - setup( - name="peppy", - packages=["peppy"], + name=PACKAGE_NAME, + packages=[PACKAGE_NAME], version=version, description="A python-based project metadata manager for portable encapsulated projects", long_description=long_description, @@ -91,14 +49,12 @@ def get_static(name, condition=None): ], keywords="project, metadata, bioinformatics, sequencing, ngs, workflow", url="https://github.com/pepkit/peppy/", - author=u"Michal Stolarczyk, Nathan Sheffield, Vince Reuter, Andre Rendeiro", + author="Michal Stolarczyk, Nathan Sheffield, Vince Reuter, Andre Rendeiro", license="BSD2", - scripts=scripts, include_package_data=True, - test_suite="tests", - tests_require=read_reqs("dev"), + tests_require=(["pytest"]), setup_requires=( ["pytest-runner"] if {"test", "pytest", "ptr"} & set(sys.argv) else [] ), - **extra + **extra, )