exclude pandas.DataFrames from dict repr

pepkit · Jun 15, 2021 · 28a45c7 · 28a45c7
1 parent 5cf6aca
commit 28a45c7
Show file tree

Hide file tree

Showing 6 changed files with 46 additions and 69 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,6 +7,7 @@ repos:
       - id: end-of-file-fixer
       - id: requirements-txt-fixer
       - id: trailing-whitespace
+      - id: check-ast
 
   - repo: https://github.com/PyCQA/isort
     rev: 5.8.0

diff --git a/peppy/project.py b/peppy/project.py
@@ -88,6 +88,10 @@ def __init__(
             index=self.st_index, initial=True
         )
 
+    def _excl_classes_from_todict(self):
+        """Exclude pandas.DataFrame from dict representation"""
+        return (pd.DataFrame,)
+
     def create_samples(self, modify=False):
         """
         Populate Project with Sample objects
@@ -284,7 +288,7 @@ def _del_if_in(obj, attr):
             _LOGGER.debug("Removing attributes: {}".format(to_remove))
             for s in track(
                 self.samples,
-                description="Removing",
+                description="Removing sample attributes",
                 disable=not self.is_sample_table_large,
             ):
                 for attr in to_remove:
@@ -301,7 +305,7 @@ def attr_constants(self):
 
             for s in track(
                 self.samples,
-                description="Applying constants",
+                description="Applying constant sample attributes",
                 disable=not self.is_sample_table_large,
             ):
                 for attr, val in to_append.items():
@@ -317,7 +321,7 @@ def attr_synonyms(self):
             _LOGGER.debug("Applying synonyms: {}".format(synonyms))
             for sample in track(
                 self.samples,
-                description="Applying synonyms",
+                description="Applying synonymous sample attributes",
                 disable=not self.is_sample_table_large,
             ):
                 for attr, new in synonyms.items():
@@ -375,7 +379,17 @@ def _auto_merge_duplicated_names(self):
             specified in the config
         """
         sample_names_list = [getattr(s, self.sample_name_colname) for s in self.samples]
-        dups_set = set([x for x in sample_names_list if sample_names_list.count(x) > 1])
+        dups_set = set(
+            [
+                x
+                for x in track(
+                    sample_names_list,
+                    description="Detecting duplicate sample names",
+                    disable=not self.is_sample_table_large,
+                )
+                if sample_names_list.count(x) > 1
+            ]
+        )
         if not dups_set:
             # all sample names are unique
             return
@@ -521,7 +535,7 @@ def attr_imply(self):
                 )
         for sample in track(
             self.samples,
-            description=f"Implying",
+            description=f"Implying sample attributes",
             disable=not self.is_sample_table_large,
         ):
             for implication in implications:
@@ -570,7 +584,7 @@ def attr_derive(self, attrs=None):
         _LOGGER.debug("Derivations to be done: {}".format(derivations))
         for sample in track(
             self.samples,
-            description="Deriving",
+            description="Deriving sample attributes",
             disable=not self.is_sample_table_large,
         ):
             for attr in derivations:

diff --git a/peppy/sample.py b/peppy/sample.py
@@ -6,6 +6,7 @@
 from logging import getLogger
 from string import Formatter
 
+import pandas as pd
 import yaml
 from attmap import AttMap, PathExAttMap
 
@@ -372,6 +373,10 @@ def _excl_from_repr(self, k, cls):
         """Exclude the Project reference from representation."""
         return k.startswith("_") or super(Sample, self)._excl_from_repr(k, cls)
 
+    def _excl_classes_from_todict(self):
+        """Exclude pandas.DataFrame from dict representation"""
+        return (pd.DataFrame,)
+
     def _try_touch_samples(self):
         """
         Safely sets sample edited flag to true

diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt
@@ -1,4 +1,4 @@
-attmap>=0.12.5
+# attmap>=0.13.1
 logmuse>=0.2
 pandas>=0.24.2
 pyyaml

diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt
@@ -0,0 +1 @@
+-e git+git://github.com/pepkit/attmap@dev#egg=attmap
diff --git a/setup.py b/setup.py
@@ -1,81 +1,39 @@
-#! /usr/bin/env python
-
-import os
 import sys
 
 from setuptools import setup
 
-REQDIR = "requirements"
-
-
-def read_reqs(reqs_name):
-    deps = []
-    with open(os.path.join(REQDIR, "requirements-{}.txt".format(reqs_name)), "r") as f:
-        for l in f:
-            if not l.strip():
-                continue
-            # deps.append(l.split("=")[0].rstrip("<>"))
-            deps.append(l)
-    return deps
-
-
-# Additional keyword arguments for setup().
-extra = {}
+PACKAGE_NAME = "peppy"
 
 # Ordinary dependencies
-DEPENDENCIES = read_reqs("all")
-
-# numexpr for pandas
-try:
-    import numexpr
-except ImportError:
-    # No numexpr is OK for pandas.
-    pass
-else:
-    # pandas 0.20.2 needs updated numexpr; the claim is 2.4.6, but that failed.
-    DEPENDENCIES.append("numexpr>=2.6.2")
+DEPENDENCIES = []
+with open("requirements/requirements-all.txt", "r") as reqs_file:
+    for line in reqs_file:
+        if not line.strip():
+            continue
+        # DEPENDENCIES.append(line.split("=")[0].rstrip("<>"))
+        DEPENDENCIES.append(line)
 
-# 2to3
+# Additional keyword arguments for setup().
+extra = {"install_requires": DEPENDENCIES}
 if sys.version_info >= (3,):
     extra["use_2to3"] = True
-extra["install_requires"] = DEPENDENCIES
-
-
-# Additional files to include with package
-def get_static(name, condition=None):
-    static = [
-        os.path.join(name, f)
-        for f in os.listdir(
-            os.path.join(os.path.dirname(os.path.realpath(__file__)), name)
-        )
-    ]
-    if condition is None:
-        return static
-    else:
-        return [i for i in filter(lambda x: eval(condition), static)]
 
-
-# scripts to be added to the $PATH
-# scripts = get_static("scripts", condition="'.' in x")
-scripts = None
-
-with open("peppy/_version.py", "r") as versionfile:
+with open(f"{PACKAGE_NAME}/_version.py", "r") as versionfile:
     version = versionfile.readline().split()[-1].strip("\"'\n")
 
 # Handle the pypi README formatting.
 try:
     import pypandoc
 
     long_description = pypandoc.convert_file("README.md", "rst")
-    print("Pandoc conversion succeeded")
+    msg = "\033[032mPandoc conversion succeeded.\033[0m"
 except (IOError, ImportError, OSError):
-    print("Warning: pandoc conversion failed!")
+    msg = "\033[0;31mWarning: pandoc conversion failed!\033[0m"
     long_description = open("README.md").read()
 
-
 setup(
-    name="peppy",
-    packages=["peppy"],
+    name=PACKAGE_NAME,
+    packages=[PACKAGE_NAME],
     version=version,
     description="A python-based project metadata manager for portable encapsulated projects",
     long_description=long_description,
@@ -91,14 +49,12 @@ def get_static(name, condition=None):
     ],
     keywords="project, metadata, bioinformatics, sequencing, ngs, workflow",
     url="https://github.com/pepkit/peppy/",
-    author=u"Michal Stolarczyk, Nathan Sheffield, Vince Reuter, Andre Rendeiro",
+    author="Michal Stolarczyk, Nathan Sheffield, Vince Reuter, Andre Rendeiro",
     license="BSD2",
-    scripts=scripts,
     include_package_data=True,
-    test_suite="tests",
-    tests_require=read_reqs("dev"),
+    tests_require=(["pytest"]),
     setup_requires=(
         ["pytest-runner"] if {"test", "pytest", "ptr"} & set(sys.argv) else []
     ),
-    **extra
+    **extra,
 )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		-e git+git://github.com/pepkit/attmap@dev#egg=attmap