Skip to content

Commit

Permalink
Feat/meth array template (#408)
Browse files Browse the repository at this point in the history
* Add optional methylation array data props defined by GDC

* Add MethylationArrayTemplate

* Update channel prop

* Add template to menu

* Update throttle param

* Rebuild NF.jsonld, json

* Add data type values

* Lint

* Rebuild NF.jsonld, json

---------

Co-authored-by: nf-osi[bot] <[email protected]>
  • Loading branch information
anngvu and nfosi-service authored Mar 20, 2024
1 parent 1019021 commit 9dd7d7a
Show file tree
Hide file tree
Showing 8 changed files with 216 additions and 6 deletions.
163 changes: 162 additions & 1 deletion NF.jsonld
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@
}, {
"@id" : "bts:Data",
"@type" : "rdfs:Class",
"rdfs:comment" : "TBD",
"rdfs:comment" : "What the data (file) contains.",
"rdfs:label" : "Data",
"rdfs:subClassOf" : [ ],
"schema:isPartOf" : {
Expand Down Expand Up @@ -2732,6 +2732,81 @@
"@id" : "bts:progressReportNumber"
} ],
"sms:displayName" : "UpdateMilestoneReport"
}, {
"rdfs:subClassOf" : [ {
"@id" : "bts:GenomicsArrayTemplate"
} ],
"@id" : "bts:MethylationArrayTemplate",
"schema:isPartOf" : {
"@id" : "http://schema.biothings.io/"
},
"sms:required" : "sms:false",
"sms:requiresComponent" : "",
"rdfs:label" : "MethylationArrayTemplate",
"rdfs:comment" : "Template for raw data files (idat) from DNA methylation arrays.",
"@type" : "rdfs:Class",
"sms:requiresDependency" : [ {
"@id" : "bts:Component"
}, {
"@id" : "bts:Filename"
}, {
"@id" : "bts:fileFormat"
}, {
"@id" : "bts:resourceType"
}, {
"@id" : "bts:dataType"
}, {
"@id" : "bts:dataSubtype"
}, {
"@id" : "bts:assay"
}, {
"@id" : "bts:individualID"
}, {
"@id" : "bts:species"
}, {
"@id" : "bts:sex"
}, {
"@id" : "bts:age"
}, {
"@id" : "bts:ageUnit"
}, {
"@id" : "bts:diagnosis"
}, {
"@id" : "bts:nf1Genotype"
}, {
"@id" : "bts:nf2Genotype"
}, {
"@id" : "bts:tumorType"
}, {
"@id" : "bts:modelSystemName"
}, {
"@id" : "bts:organ"
}, {
"@id" : "bts:comments"
}, {
"@id" : "bts:parentSpecimenID"
}, {
"@id" : "bts:specimenID"
}, {
"@id" : "bts:aliquotID"
}, {
"@id" : "bts:platform"
}, {
"@id" : "bts:nucleicAcidSource"
}, {
"@id" : "bts:specimenPreparationMethod"
}, {
"@id" : "bts:channel"
}, {
"@id" : "bts:chipID"
}, {
"@id" : "bts:chipPosition"
}, {
"@id" : "bts:plateName"
}, {
"@id" : "bts:plateWell"
} ],
"sms:displayName" : "MethylationArrayTemplate"
}, {
"rdfs:subClassOf" : [ {
"@id" : "bts:BiologicalAssayDataTemplate"
Expand Down Expand Up @@ -5523,6 +5598,17 @@
"rdfs:comment" : "(Legacy/deprecated annotation) Whether or not is paired-end sequencing (Yes; No).",
"@type" : "rdfs:Class",
"sms:displayName" : "isPairedEnd"
}, {
"@id" : "bts:plateName",
"@type" : "rdfs:Class",
"rdfs:comment" : "User-specified identifier of the plate used to prepare the sample for analysis.",
"rdfs:label" : "plateName",
"rdfs:subClassOf" : [ ],
"schema:isPartOf" : {
"@id" : "http://schema.biothings.io/"
},
"sms:displayName" : "plateName",
"sms:required" : "sms:false"
}, {
"rdfs:subClassOf" : [ ],
"@id" : "bts:numberOfSchwannomas",
Expand Down Expand Up @@ -6129,6 +6215,22 @@
},
"sms:displayName" : "meanCoverage",
"sms:required" : "sms:false"
}, {
"rdfs:subClassOf" : [ ],
"@id" : "bts:channel",
"schema:isPartOf" : {
"@id" : "http://schema.biothings.io/"
},
"sms:required" : "sms:true",
"schema:rangeIncludes" : [ {
"@id" : "bts:Cy3"
}, {
"@id" : "bts:Cy5"
} ],
"rdfs:label" : "channel",
"rdfs:comment" : "Color channel used to generate data file.",
"@type" : "rdfs:Class",
"sms:displayName" : "channel"
}, {
"@id" : "bts:genePerturbed",
"@type" : "rdfs:Class",
Expand Down Expand Up @@ -7015,6 +7117,10 @@
"@id" : "bts:genomicVariants"
}, {
"@id" : "bts:rawcounts"
}, {
"@id" : "bts:RawIntensities"
}, {
"@id" : "bts:NormalizedIntensities"
}, {
"@id" : "bts:PharmacokineticStudy"
}, {
Expand Down Expand Up @@ -7675,6 +7781,28 @@
"rdfs:comment" : "Characterization of the manifestation of Dermal neurofibromas.",
"@type" : "rdfs:Class",
"sms:displayName" : "dermalNeurofibromas"
}, {
"@id" : "bts:plateWell",
"@type" : "rdfs:Class",
"rdfs:comment" : "User-specified identifier for the specific well of the plate used to prepare the sample for analysis.",
"rdfs:label" : "plateWell",
"rdfs:subClassOf" : [ ],
"schema:isPartOf" : {
"@id" : "http://schema.biothings.io/"
},
"sms:displayName" : "plateWell",
"sms:required" : "sms:false"
}, {
"@id" : "bts:chipID",
"@type" : "rdfs:Class",
"rdfs:comment" : "User-specified identifier for the chip used to perform the methylation microarray.",
"rdfs:label" : "chipID",
"rdfs:subClassOf" : [ ],
"schema:isPartOf" : {
"@id" : "http://schema.biothings.io/"
},
"sms:displayName" : "chipID",
"sms:required" : "sms:false"
}, {
"rdfs:subClassOf" : [ ],
"@id" : "bts:proteinExtractSource",
Expand Down Expand Up @@ -8890,6 +9018,17 @@
},
"sms:displayName" : "bisulfiteConversionKitID",
"sms:required" : "sms:false"
}, {
"@id" : "bts:chipPosition",
"@type" : "rdfs:Class",
"rdfs:comment" : "User-specified identifier for the specific position on the chip that the sample was loaded into to perform the methylation microarray.",
"rdfs:label" : "chipPosition",
"rdfs:subClassOf" : [ ],
"schema:isPartOf" : {
"@id" : "http://schema.biothings.io/"
},
"sms:displayName" : "chipPosition",
"sms:required" : "sms:false"
}, {
"rdfs:subClassOf" : [ ],
"@id" : "bts:assay",
Expand Down Expand Up @@ -21150,6 +21289,17 @@
},
"sms:displayName" : "University of Washington, Seattle",
"sms:required" : "sms:false"
}, {
"@id" : "bts:NormalizedIntensities",
"@type" : "rdfs:Class",
"rdfs:comment" : "TBD",
"rdfs:label" : "NormalizedIntensities",
"rdfs:subClassOf" : [ ],
"schema:isPartOf" : {
"@id" : "http://schema.biothings.io/"
},
"sms:displayName" : "NormalizedIntensities",
"sms:required" : "sms:false"
}, {
"@id" : "bts:LTQOrbitrapXL",
"@type" : "rdfs:Class",
Expand Down Expand Up @@ -21722,6 +21872,17 @@
},
"sms:displayName" : "bed gappedPeak",
"sms:required" : "sms:false"
}, {
"@id" : "bts:RawIntensities",
"@type" : "rdfs:Class",
"rdfs:comment" : "TBD",
"rdfs:label" : "RawIntensities",
"rdfs:subClassOf" : [ ],
"schema:isPartOf" : {
"@id" : "http://schema.biothings.io/"
},
"sms:displayName" : "RawIntensities",
"sms:required" : "sms:false"
}, {
"@id" : "bts:AnnotatedSomaticVariants",
"@type" : "rdfs:Class",
Expand Down
3 changes: 2 additions & 1 deletion dca-template-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
{"display_name": "Whole Exome Sequencing Assay", "schema_name": "WESTemplate", "type": "file"},
{"display_name": "RNA Sequencing Assay", "schema_name": "RNASeqTemplate", "type": "file"},
{"display_name": "Single-cell RNA Sequencing Assay", "schema_name": "ScRNASeqTemplate", "type": "file"},
{"display_name": "Epigenetics Assay", "schema_name": "EpigeneticsAssayTemplate", "type": "file"},
{"display_name": "Epigenetics Sequencing Assay", "schema_name": "EpigeneticsAssayTemplate", "type": "file"},
{"display_name": "Methylation Array", "schema_name": "MethylationArrayTemplate", "type": "file"},
{"display_name": "Other Genomics Assay", "schema_name": "GenomicsAssayTemplate", "type": "file"},
{"display_name": "Extended Genomics Assay", "schema_name": "GenomicsAssayTemplateExtended", "type": "file"},
{"display_name": "Imaging Assay", "schema_name": "ImagingAssayTemplate", "type": "file"},
Expand Down
7 changes: 6 additions & 1 deletion modules/Data/Data.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
enums:
Data:
description: What the data (file) contains.
permissible_values:
immunoassay:
description: Laboratory test involving interaction of antigens with specific antibodies.
Expand Down Expand Up @@ -60,6 +61,10 @@ enums:
raw counts:
description: The number or amount of something.
meaning: http://purl.obolibrary.org/obo/NCIT_C25463
RawIntensities:
description: Raw intensity values from the instrument.
NormalizedIntensities:
description: Normalized intensity values from the instrument/machine.
Pharmacokinetic Study:
description: A study of the process by which a drug is absorbed, distributed, metabolized, and eliminated by the body.
meaning: http://purl.obolibrary.org/obo/NCIT_C49663
Expand All @@ -71,7 +76,7 @@ enums:
meaning: https://en.wikipedia.org/wiki/Chromatin_remodeling
StructuralVariants:
description: Genomic variants data that covers deletions, duplications, CNVs, insertions, inversions, and translocations, which may be derived from specialized variant calling workflows.
source: hhttps://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=structural_variation
source: https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=structural_variation
GermlineVariants:
description: Called germline variants
meaning: https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=simple_germline_variation
Expand Down
17 changes: 17 additions & 0 deletions modules/Template/Data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,23 @@ classes:
dataType:
- geneExpression

MethylationArrayTemplate:
is_a: GenomicsArrayTemplate
description: Template for raw data files (idat) from DNA methylation arrays.
source: https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=raw_methylation_array
slots:
- chipID
- chipPosition
- plateName
- plateWell
annotations:
requiresComponent: ''
required: false
match:
assay:
- methylation array


#########################################################
# Biological Assay Data / Sequencing / Processed
#########################################################
Expand Down
22 changes: 22 additions & 0 deletions modules/props.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,20 @@ slots:
description: A cell type is a distinct morphological or functional form of cell.
range: Cell
required: false
channel:
description: Color channel used to generate data file.
required: true
range: ChannelEnum
source: Genomic Data Commons
chipID:
description: User-specified identifier for the chip used to perform the methylation microarray.
required: false
source: Genomic Data Commons
chipPosition:
description: User-specified identifier for the specific position on the chip that
the sample was loaded into to perform the methylation microarray.
required: false
source: Genomic Data Commons
citation:
description: Citation (e.g. doi) that usage of data or resource should be cited with.
required: false
Expand Down Expand Up @@ -751,6 +765,14 @@ slots:
range: PresenceEnum
required: false
title: Pheochromocytoma
plateName:
description: User-specified identifier of the plate used to prepare the sample for analysis.
required: false
source: Genomic Data Commons
plateWell:
description: User-specified identifier for the specific well of the plate used to prepare the sample for analysis.
required: false
source: Genomic Data Commons
platform:
description: A sequencing platform, microscope, spectroscope/plate reader, or
other platform for collecting data.
Expand Down
4 changes: 3 additions & 1 deletion registered-json-schemas/PortalDataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@
},
"dataType": {
"items": {
"description": "",
"description": "What the data (file) contains.",
"enum": [
"immunoassay",
"behavior process",
Expand All @@ -217,6 +217,8 @@
"genomicFeatures",
"genomicVariants",
"raw counts",
"RawIntensities",
"NormalizedIntensities",
"Pharmacokinetic Study",
"mask image",
"chromatinActivity",
Expand Down
4 changes: 3 additions & 1 deletion registered-json-schemas/PortalStudy.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
},
"dataType": {
"items": {
"description": "",
"description": "What the data (file) contains.",
"enum": [
"immunoassay",
"behavior process",
Expand All @@ -45,6 +45,8 @@
"genomicFeatures",
"genomicVariants",
"raw counts",
"RawIntensities",
"NormalizedIntensities",
"Pharmacokinetic Study",
"mask image",
"chromatinActivity",
Expand Down
2 changes: 1 addition & 1 deletion tests/generate/basic_templates.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ CREDS=creds.json
DATA_MODEL_PATH=../../NF.jsonld
DATA_MODEL=NF.jsonld
LOG_DIR=logs
SLEEP_THROTTLE=17 # API rate-limiting, need to better figure out dynamically based on # of templates
SLEEP_THROTTLE=20 # API rate-limiting, need to better figure out dynamically based on # of templates

# Setup for creds
# If testing locally, it might already be in folder;
Expand Down

0 comments on commit 9dd7d7a

Please sign in to comment.