Skip to content

Commit

Permalink
Merge pull request #418 from pepkit/417-peppyprojectto_dict-creates-u…
Browse files Browse the repository at this point in the history
…nwanted-nan-values

Remove creating `NaN`s in `to_dict` method
  • Loading branch information
rafalstepien authored Sep 13, 2022
2 parents 7197c26 + 4148ec7 commit ca8aa90
Show file tree
Hide file tree
Showing 10 changed files with 80 additions and 3 deletions.
4 changes: 3 additions & 1 deletion docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Changelog

This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.

## [0.35.2] -- 2022-09-13
### Fixed
- Returning `NaN` value within `to_dict` method was fixed and method now returns `None` instead
## [0.35.1] -- 2022-09-07
### Changed
- Organization of test files. Separated unittests from smoketests.
Expand Down
2 changes: 1 addition & 1 deletion peppy/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.35.1"
__version__ = "0.35.2"
2 changes: 1 addition & 1 deletion peppy/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def _obj2dict(obj, name=None):
elif isnull(obj):
# Missing values as evaluated by pandas.isnull().
# This gets correctly written into yaml.
return "NaN"
return None
else:
return obj

Expand Down
6 changes: 6 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pandas as pd
import pytest
import json

__author__ = "Michal Stolarczyk"
__email__ = "[email protected]"
Expand Down Expand Up @@ -36,6 +37,11 @@ def example_pep_csv_path(request):
return get_path_to_example_file(EPB, request.param, "sample_table.csv")


@pytest.fixture
def example_pep_nextflow_csv_path():
return get_path_to_example_file(EPB, "nextflow_taxprofiler_pep", "samplesheet.csv")


@pytest.fixture
def example_pep_cfg_noname_path(request):
return get_path_to_example_file(EPB, "noname", request.param)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pep_version: "2.1.0"
sample_table: "https://raw.githubusercontent.com/pepkit/example_peps/master/example_nextflow_taxprofiler_pep/samplesheet.csv"
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
sample,instrument_platform,run_accession,fastq_1,fastq_2,fasta
2611,ILLUMINA,ERR5766174,,,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fasta/ERX5474930_ERR5766174_1.fa.gz
2612,ILLUMINA,ERR5766176,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_2.fastq.gz,
2612,ILLUMINA,ERR5766176_B,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_2.fastq.gz,
2612,ILLUMINA,ERR5766180,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474936_ERR5766180_1.fastq.gz,,
2613,ILLUMINA,ERR5766181,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_2.fastq.gz,
ERR3201952,OXFORD_NANOPORE,ERR3201952,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERR3201952.fastq.gz,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
description: A schema for validation of samplesheet.csv for taxprofiler pipeline.
imports:
- https://schema.databio.org/pep/2.1.0.yaml
properties:
samples:
type: array
items:
type: object
properties:
sample:
type: string
description: "Sample identifier."
pattern: "^\\S*$"
run_accession:
type: string
description: "Run accession number."
instrument_platform:
type: string
description: "Name of the platform that sequenced the samples."
enum: ["ABI_SOLID", "BGISEQ", "CAPILLARY", "COMPLETE_GENOMICS", "DNBSEQ", "HELICOS", "ILLUMINA", "ION_TORRENT", "LS454", "OXFORD_NANOPORE", "PACBIO_SMRT"]
fastq1:
type: string
description: "FASTQ file for read 1."
pattern: "^[\\S]+.(fq\\.gz|fastq\\.gz)$"
fastq2:
type: string
description: "FASTQ file for read 2."
pattern: "^[\\S]+.(fq\\.gz|fastq\\.gz)$"
fasta:
type: string
description: "Path to FASTA file."
required:
- sample
- run_accession
- instrument_platform
files:
- fastq1
- fastq2
- fasta
required:
- samples
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
2611,ERR5766174,ILLUMINA,,,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fasta/ERX5474930_ERR5766174_1.fa.gz
2612,ERR5766176,ILLUMINA,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_2.fastq.gz,
2612,ERR5766176_B,ILLUMINA,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_2.fastq.gz,
2612,ERR5766180,ILLUMINA,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474936_ERR5766180_1.fastq.gz,,
2613,ERR5766181,ILLUMINA,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_2.fastq.gz,
ERR3201952,ERR3201952,OXFORD_NANOPORE,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERR3201952.fastq.gz,,

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions tests/test_Project.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import socket
import tempfile

import numpy as np
import pytest
from pandas import DataFrame
from yaml import dump, safe_load
Expand Down Expand Up @@ -338,6 +339,16 @@ def test_from_dict_instatiation(self, example_pep_cfg_path):
p2 = Project().from_dict(p1.to_dict(extended=True))
assert p1 == p2

def test_to_dict_does_not_create_nans(self, example_pep_nextflow_csv_path):
wrong_values = ["NaN", np.nan, "nan"]

p1 = Project(
cfg=example_pep_nextflow_csv_path, sample_table_index="sample"
).to_dict()
for sample in p1.get("_samples"):
for attribute, value in sample.items():
assert value not in wrong_values

@pytest.mark.parametrize("example_pep_cfg_path", ["missing_version"], indirect=True)
def test_missing_version(self, example_pep_cfg_path):
"""
Expand Down

0 comments on commit ca8aa90

Please sign in to comment.