diff --git a/docs/changelog.md b/docs/changelog.md index 1c1e417a..530d3f69 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,7 +1,9 @@ # Changelog This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. - +## [0.35.2] -- 2022-09-13 +### Fixed +- Returning `NaN` value within `to_dict` method was fixed and method now returns `None` instead ## [0.35.1] -- 2022-09-07 ### Changed - Organization of test files. Separated unittests from smoketests. diff --git a/peppy/_version.py b/peppy/_version.py index 98bb08f9..896d9888 100644 --- a/peppy/_version.py +++ b/peppy/_version.py @@ -1 +1 @@ -__version__ = "0.35.1" +__version__ = "0.35.2" diff --git a/peppy/sample.py b/peppy/sample.py index d246220d..d2ec0de8 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -138,7 +138,7 @@ def _obj2dict(obj, name=None): elif isnull(obj): # Missing values as evaluated by pandas.isnull(). # This gets correctly written into yaml. - return "NaN" + return None else: return obj diff --git a/tests/conftest.py b/tests/conftest.py index 9b095f38..de98c0e3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,7 @@ import pandas as pd import pytest +import json __author__ = "Michal Stolarczyk" __email__ = "michal.stolarczyk@nih.gov" @@ -36,6 +37,11 @@ def example_pep_csv_path(request): return get_path_to_example_file(EPB, request.param, "sample_table.csv") +@pytest.fixture +def example_pep_nextflow_csv_path(): + return get_path_to_example_file(EPB, "nextflow_taxprofiler_pep", "samplesheet.csv") + + @pytest.fixture def example_pep_cfg_noname_path(request): return get_path_to_example_file(EPB, "noname", request.param) diff --git a/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/config.yaml b/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/config.yaml new file mode 100644 index 00000000..c3763bc6 --- /dev/null +++ b/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/config.yaml @@ -0,0 +1,2 @@ +pep_version: "2.1.0" +sample_table: "https://raw.githubusercontent.com/pepkit/example_peps/master/example_nextflow_taxprofiler_pep/samplesheet.csv" diff --git a/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet.csv b/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet.csv new file mode 100644 index 00000000..1b17b767 --- /dev/null +++ b/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet.csv @@ -0,0 +1,7 @@ +sample,instrument_platform,run_accession,fastq_1,fastq_2,fasta +2611,ILLUMINA,ERR5766174,,,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fasta/ERX5474930_ERR5766174_1.fa.gz +2612,ILLUMINA,ERR5766176,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_2.fastq.gz, +2612,ILLUMINA,ERR5766176_B,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_2.fastq.gz, +2612,ILLUMINA,ERR5766180,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474936_ERR5766180_1.fastq.gz,, +2613,ILLUMINA,ERR5766181,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_2.fastq.gz, +ERR3201952,OXFORD_NANOPORE,ERR3201952,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERR3201952.fastq.gz,, diff --git a/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet_schema.yaml b/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet_schema.yaml new file mode 100644 index 00000000..eedeba4a --- /dev/null +++ b/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet_schema.yaml @@ -0,0 +1,41 @@ +description: A schema for validation of samplesheet.csv for taxprofiler pipeline. +imports: + - https://schema.databio.org/pep/2.1.0.yaml +properties: + samples: + type: array + items: + type: object + properties: + sample: + type: string + description: "Sample identifier." + pattern: "^\\S*$" + run_accession: + type: string + description: "Run accession number." + instrument_platform: + type: string + description: "Name of the platform that sequenced the samples." + enum: ["ABI_SOLID", "BGISEQ", "CAPILLARY", "COMPLETE_GENOMICS", "DNBSEQ", "HELICOS", "ILLUMINA", "ION_TORRENT", "LS454", "OXFORD_NANOPORE", "PACBIO_SMRT"] + fastq1: + type: string + description: "FASTQ file for read 1." + pattern: "^[\\S]+.(fq\\.gz|fastq\\.gz)$" + fastq2: + type: string + description: "FASTQ file for read 2." + pattern: "^[\\S]+.(fq\\.gz|fastq\\.gz)$" + fasta: + type: string + description: "Path to FASTA file." + required: + - sample + - run_accession + - instrument_platform + files: + - fastq1 + - fastq2 + - fasta +required: + - samples diff --git a/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/test_nextflow_original_samplesheet.csv b/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/test_nextflow_original_samplesheet.csv new file mode 100644 index 00000000..0ec5a94d --- /dev/null +++ b/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/test_nextflow_original_samplesheet.csv @@ -0,0 +1,7 @@ +sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta +2611,ERR5766174,ILLUMINA,,,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fasta/ERX5474930_ERR5766174_1.fa.gz +2612,ERR5766176,ILLUMINA,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_2.fastq.gz, +2612,ERR5766176_B,ILLUMINA,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_2.fastq.gz, +2612,ERR5766180,ILLUMINA,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474936_ERR5766180_1.fastq.gz,, +2613,ERR5766181,ILLUMINA,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_2.fastq.gz, +ERR3201952,ERR3201952,OXFORD_NANOPORE,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERR3201952.fastq.gz,, diff --git a/tests/data/example_peps-master/example_project_as_dictionary/project.json b/tests/data/example_peps-master/example_project_as_dictionary/project.json new file mode 100644 index 00000000..fb64dc6d --- /dev/null +++ b/tests/data/example_peps-master/example_project_as_dictionary/project.json @@ -0,0 +1 @@ +{"pep":{"pep_version":"2.1.0","project_name":"GSE124224","sample_table":"../pephub/examples/geo/GSE124224/GSE124224_samples.csv","sample_modifiers":{"append":{"output_file_path":"FILES"},"derive":{"sources":{"FILES":"/{GSE}/{file}"},"attributes":["output_file_path"]}}},"pep_version":"2.1.0","samples":[{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525976_Sample_DMSO_sg12_Control_1.bw","type":"BW","sgguide":"sgNT-1","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525976/suppl/GSM3525976_Sample_DMSO_sg12_Control_1.bw","genotype":"Control","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621810","cell line":"MCF-7","file_size":"342675949","treatment":"DMSO","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_DMSO_sg12_Control_1","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_DMSO_sg12_Control_1","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525976","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525976_Sample_DMSO_sg12_Control_1.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525977_Sample_DMSO_sg12_Control_2.bw","type":"BW","sgguide":"sgNT-1","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525977/suppl/GSM3525977_Sample_DMSO_sg12_Control_2.bw","genotype":"Control","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621809","cell line":"MCF-7","file_size":"456007555","treatment":"DMSO","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_DMSO_sg12_Control_2","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_DMSO_sg12_Control_2","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525977","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525977_Sample_DMSO_sg12_Control_2.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525978_Sample_DMSO_sg13_Control_1.bw","type":"BW","sgguide":"sgNT-2","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525978/suppl/GSM3525978_Sample_DMSO_sg13_Control_1.bw","genotype":"Control","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621808","cell line":"MCF-7","file_size":"250158361","treatment":"DMSO","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_DMSO_sg13_Control_1","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_DMSO_sg13_Control_1","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525978","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525978_Sample_DMSO_sg13_Control_1.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525979_Sample_DMSO_sg13_Control_2.bw","type":"BW","sgguide":"sgNT-2","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525979/suppl/GSM3525979_Sample_DMSO_sg13_Control_2.bw","genotype":"Control","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621807","cell line":"MCF-7","file_size":"257784654","treatment":"DMSO","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_DMSO_sg13_Control_2","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_DMSO_sg13_Control_2","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525979","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525979_Sample_DMSO_sg13_Control_2.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525980_Sample_DMSO_sg1_KO_1.bw","type":"BW","sgguide":"sgARID1A-1","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525980/suppl/GSM3525980_Sample_DMSO_sg1_KO_1.bw","genotype":"ARID1A KO","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621805","cell line":"MCF-7","file_size":"162113714","treatment":"DMSO","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_DMSO_sg1_KO_1","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_DMSO_sg1_KO_1","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525980","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525980_Sample_DMSO_sg1_KO_1.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525981_Sample_DMSO_sg1_KO_2.bw","type":"BW","sgguide":"sgARID1A-1","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525981/suppl/GSM3525981_Sample_DMSO_sg1_KO_2.bw","genotype":"ARID1A KO","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621804","cell line":"MCF-7","file_size":"172503888","treatment":"DMSO","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_DMSO_sg1_KO_2","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_DMSO_sg1_KO_2","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525981","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525981_Sample_DMSO_sg1_KO_2.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525982_Sample_DMSO_sg2_KO_1.bw","type":"BW","sgguide":"sgARID1A-2","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525982/suppl/GSM3525982_Sample_DMSO_sg2_KO_1.bw","genotype":"ARID1A KO","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621802","cell line":"MCF-7","file_size":"194671815","treatment":"DMSO","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_DMSO_sg2_KO_1","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_DMSO_sg2_KO_1","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525982","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525982_Sample_DMSO_sg2_KO_1.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525983_Sample_DMSO_sg2_KO_2.bw","type":"BW","sgguide":"sgARID1A-2","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525983/suppl/GSM3525983_Sample_DMSO_sg2_KO_2.bw","genotype":"ARID1A KO","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621800","cell line":"MCF-7","file_size":"233406053","treatment":"DMSO","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_DMSO_sg2_KO_2","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_DMSO_sg2_KO_2","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525983","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525983_Sample_DMSO_sg2_KO_2.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525984_Sample_DMSO_sg6_KO_1.bw","type":"BW","sgguide":"sgARID1A-3","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525984/suppl/GSM3525984_Sample_DMSO_sg6_KO_1.bw","genotype":"ARID1A KO","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621799","cell line":"MCF-7","file_size":"231716759","treatment":"DMSO","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_DMSO_sg6_KO_1","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_DMSO_sg6_KO_1","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525984","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525984_Sample_DMSO_sg6_KO_1.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525985_Sample_DMSO_sg6_KO_2.bw","type":"BW","sgguide":"sgARID1A-3","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525985/suppl/GSM3525985_Sample_DMSO_sg6_KO_2.bw","genotype":"ARID1A KO","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621798","cell line":"MCF-7","file_size":"294828943","treatment":"DMSO","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_DMSO_sg6_KO_2","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_DMSO_sg6_KO_2","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525985","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525985_Sample_DMSO_sg6_KO_2.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525986_Sample_FULV_sg12_Control_1.bw","type":"BW","sgguide":"sgNT-1","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525986/suppl/GSM3525986_Sample_FULV_sg12_Control_1.bw","genotype":"Control","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621796","cell line":"MCF-7","file_size":"277432865","treatment":"Fulvestrant","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_FULV_sg12_Control_1","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_FULV_sg12_Control_1","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525986","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525986_Sample_FULV_sg12_Control_1.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525987_Sample_FULV_sg12_Control_2.bw","type":"BW","sgguide":"sgNT-1","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525987/suppl/GSM3525987_Sample_FULV_sg12_Control_2.bw","genotype":"Control","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621854","cell line":"MCF-7","file_size":"474080506","treatment":"Fulvestrant","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_FULV_sg12_Control_2","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_FULV_sg12_Control_2","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525987","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525987_Sample_FULV_sg12_Control_2.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525988_Sample_FULV_sg13_Control_1.bw","type":"BW","sgguide":"sgNT-2","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525988/suppl/GSM3525988_Sample_FULV_sg13_Control_1.bw","genotype":"Control","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621853","cell line":"MCF-7","file_size":"315917318","treatment":"Fulvestrant","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_FULV_sg13_Control_1","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_FULV_sg13_Control_1","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525988","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525988_Sample_FULV_sg13_Control_1.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525989_Sample_FULV_sg13_Control_2.bw","type":"BW","sgguide":"sgNT-2","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525989/suppl/GSM3525989_Sample_FULV_sg13_Control_2.bw","genotype":"Control","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621852","cell line":"MCF-7","file_size":"291750803","treatment":"Fulvestrant","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_FULV_sg13_Control_2","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_FULV_sg13_Control_2","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525989","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525989_Sample_FULV_sg13_Control_2.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525990_Sample_FULV_sg1_KO_1.bw","type":"BW","sgguide":"sgARID1A-1","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525990/suppl/GSM3525990_Sample_FULV_sg1_KO_1.bw","genotype":"ARID1A KO","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621851","cell line":"MCF-7","file_size":"184466440","treatment":"Fulvestrant","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_FULV_sg1_KO_1","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_FULV_sg1_KO_1","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525990","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525990_Sample_FULV_sg1_KO_1.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525991_Sample_FULV_sg1_KO_2.bw","type":"BW","sgguide":"sgARID1A-1","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525991/suppl/GSM3525991_Sample_FULV_sg1_KO_2.bw","genotype":"ARID1A KO","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621850","cell line":"MCF-7","file_size":"263762867","treatment":"Fulvestrant","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_FULV_sg1_KO_2","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_FULV_sg1_KO_2","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525991","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525991_Sample_FULV_sg1_KO_2.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525992_Sample_FULV_sg2_KO_1.bw","type":"BW","sgguide":"sgARID1A-2","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525992/suppl/GSM3525992_Sample_FULV_sg2_KO_1.bw","genotype":"ARID1A KO","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621848","cell line":"MCF-7","file_size":"150759675","treatment":"Fulvestrant","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_FULV_sg2_KO_1","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_FULV_sg2_KO_1","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525992","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525992_Sample_FULV_sg2_KO_1.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525993_Sample_FULV_sg2_KO_2.bw","type":"BW","sgguide":"sgARID1A-2","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525993/suppl/GSM3525993_Sample_FULV_sg2_KO_2.bw","genotype":"ARID1A KO","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621847","cell line":"MCF-7","file_size":"295766583","treatment":"Fulvestrant","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_FULV_sg2_KO_2","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_FULV_sg2_KO_2","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525993","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525993_Sample_FULV_sg2_KO_2.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525994_Sample_FULV_sg6_KO_1.bw","type":"BW","sgguide":"sgARID1A-3","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525994/suppl/GSM3525994_Sample_FULV_sg6_KO_1.bw","genotype":"ARID1A KO","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621846","cell line":"MCF-7","file_size":"208899495","treatment":"Fulvestrant","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_FULV_sg6_KO_1","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_FULV_sg6_KO_1","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525994","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525994_Sample_FULV_sg6_KO_1.bw"},{"GSE":"GSE124224","SRA":"https://www.ncbi.nlm.nih.gov/sra?term","file":"GSM3525995_Sample_FULV_sg6_KO_2.bw","type":"BW","sgguide":"sgARID1A-3","file_url":"ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3525nnn/GSM3525995/suppl/GSM3525995_Sample_FULV_sg6_KO_2.bw","genotype":"ARID1A KO","BioSample":"https://www.ncbi.nlm.nih.gov/biosample/SAMN10621844","cell line":"MCF-7","file_size":"238580937","treatment":"Fulvestrant","Sample_type":"SRA","sample_name":"ATAC-Seq Sample_FULV_sg6_KO_2","Genome_build":"hg38","Sample_title":"ATAC-Seq Sample_FULV_sg6_KO_2","Sample_status":"Public on Nov 13 2019","Sample_series_id":"GSE124224, GSE124228","Sample_taxid_ch1":"9606","Sample_platform_id":"GPL11154","Sample_contact_city":"New York","Sample_contact_name":"Sagar,,Chhangawala","Sample_molecule_ch1":"genomic DNA","Sample_organism_ch1":"Homo sapiens","Sample_channel_count":"1","Sample_contact_email":"sagar.cornell@gmail.com","Sample_contact_state":"NY","Sample_geo_accession":"GSM3525995","Sample_data_row_count":"0","Sample_library_source":"genomic","Sample_contact_address":"1300 York Ave.","Sample_contact_country":"USA","Sample_data_processing":"Duplicate reads were then removed using MarkDuplicates (v2.9.0, REMOVE_DUPLICATES, In order to account for Tn5 shift, all positive strand reads in each sample were shifted by +4bps and all negative strand reads were shifted by -5bps., BigWig tracks were generated using MACS2 and then scaled using rtracklayer (v1.40.6).","Sample_source_name_ch1":"Breast Cancer Cell","Sample_submission_date":"Dec 20 2018","Sample_instrument_model":"Illumina HiSeq 2000","Sample_last_update_date":"Nov 15 2019","Sample_library_strategy":"ATAC-seq","Sample_contact_institute":"Weill Cornell Medical College","Sample_library_selection":"other","Sample_contact_department":"PBSB","Sample_growth_protocol_ch1":"MCF7 cells were obtained from ATCC and were cultured in DMEM/F-12 (Corning) and supplemented with 10% FBS, MEM non-essential amino acids (Corning), 50U/ml penicillin, and 50ng/ml streptomycin under normal oxygen conditions (5% CO2, 37 °C)","Sample_extract_protocol_ch1":"ATAC-seq was performed as described by Buenrostro et al, 2013 with the exception that 0.2% NP40 was used for cell lysis., ATAC-seq libraries were prepared using Illumina's TruSeq ChIP sample prep.Libraries were validated using the Agient Technologies 2100 Bioanalyzer and Qubit high sensitivity assay.","Sample_treatment_protocol_ch1":"MCF7 cells were treated with DMSO of Fulvestrant (100nM) for 24 hours.","Sample_contact_zip/postal_code":"10065","Supplementary_files_format_and_content":"normalized bw files","Raw reads were trimmed using trimmomatic (v0.35, Parameters":"TruSeq3-PE adapters, LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36).","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters":"-X2000 –local –mm --no-mixed --no-discordant).","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters":"--nomodel --extsize 150 --shift -75 --slocal 5000 --llocal 20000 -B --keep-dup all -p 0.05) and then on individual samples (-p 0.01).","output_file_path":"/GSE124224/GSM3525995_Sample_FULV_sg6_KO_2.bw"}],"sample_table_indx":"sample_name","sample_attributes":["GSE","SRA","file","type","sgguide","file_url","genotype","BioSample","cell line","file_size","treatment","Sample_type","sample_name","Genome_build","Sample_title","Sample_status","Sample_series_id","Sample_taxid_ch1","Sample_platform_id","Sample_contact_city","Sample_contact_name","Sample_molecule_ch1","Sample_organism_ch1","Sample_channel_count","Sample_contact_email","Sample_contact_state","Sample_geo_accession","Sample_data_row_count","Sample_library_source","Sample_contact_address","Sample_contact_country","Sample_data_processing","Sample_source_name_ch1","Sample_submission_date","Sample_instrument_model","Sample_last_update_date","Sample_library_strategy","Sample_contact_institute","Sample_library_selection","Sample_contact_department","Sample_growth_protocol_ch1","Sample_extract_protocol_ch1","Sample_treatment_protocol_ch1","Sample_contact_zip/postal_code","Supplementary_files_format_and_content","Raw reads were trimmed using trimmomatic (v0.35, Parameters","Each sample was aligned to hg38 genome using bowtie2 (v2.2.6, Parameters","Peak calling was first performed on after pooling all samples using MACS2 (v2.1.0, parameters"]} diff --git a/tests/test_Project.py b/tests/test_Project.py index a1c48a5f..4c46b707 100644 --- a/tests/test_Project.py +++ b/tests/test_Project.py @@ -4,6 +4,7 @@ import socket import tempfile +import numpy as np import pytest from pandas import DataFrame from yaml import dump, safe_load @@ -338,6 +339,16 @@ def test_from_dict_instatiation(self, example_pep_cfg_path): p2 = Project().from_dict(p1.to_dict(extended=True)) assert p1 == p2 + def test_to_dict_does_not_create_nans(self, example_pep_nextflow_csv_path): + wrong_values = ["NaN", np.nan, "nan"] + + p1 = Project( + cfg=example_pep_nextflow_csv_path, sample_table_index="sample" + ).to_dict() + for sample in p1.get("_samples"): + for attribute, value in sample.items(): + assert value not in wrong_values + @pytest.mark.parametrize("example_pep_cfg_path", ["missing_version"], indirect=True) def test_missing_version(self, example_pep_cfg_path): """