From 9dd7d7a3e03cab3ab48015383e6de12f71cb25ab Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu <32753274+anngvu@users.noreply.github.com> Date: Wed, 20 Mar 2024 08:58:55 -0600 Subject: [PATCH] Feat/meth array template (#408) * Add optional methylation array data props defined by GDC * Add MethylationArrayTemplate * Update channel prop * Add template to menu * Update throttle param * Rebuild NF.jsonld, json * Add data type values * Lint * Rebuild NF.jsonld, json --------- Co-authored-by: nf-osi[bot] --- NF.jsonld | 163 ++++++++++++++++++++- dca-template-config.json | 3 +- modules/Data/Data.yaml | 7 +- modules/Template/Data.yaml | 17 +++ modules/props.yaml | 22 +++ registered-json-schemas/PortalDataset.json | 4 +- registered-json-schemas/PortalStudy.json | 4 +- tests/generate/basic_templates.sh | 2 +- 8 files changed, 216 insertions(+), 6 deletions(-) diff --git a/NF.jsonld b/NF.jsonld index 7d53b01e..e1c6c1e9 100644 --- a/NF.jsonld +++ b/NF.jsonld @@ -220,7 +220,7 @@ }, { "@id" : "bts:Data", "@type" : "rdfs:Class", - "rdfs:comment" : "TBD", + "rdfs:comment" : "What the data (file) contains.", "rdfs:label" : "Data", "rdfs:subClassOf" : [ ], "schema:isPartOf" : { @@ -2732,6 +2732,81 @@ "@id" : "bts:progressReportNumber" } ], "sms:displayName" : "UpdateMilestoneReport" + }, { + "rdfs:subClassOf" : [ { + "@id" : "bts:GenomicsArrayTemplate" + } ], + "@id" : "bts:MethylationArrayTemplate", + "schema:isPartOf" : { + "@id" : "http://schema.biothings.io/" + }, + "sms:required" : "sms:false", + "sms:requiresComponent" : "", + "rdfs:label" : "MethylationArrayTemplate", + "rdfs:comment" : "Template for raw data files (idat) from DNA methylation arrays.", + "@type" : "rdfs:Class", + "sms:requiresDependency" : [ { + "@id" : "bts:Component" + }, { + "@id" : "bts:Filename" + }, { + "@id" : "bts:fileFormat" + }, { + "@id" : "bts:resourceType" + }, { + "@id" : "bts:dataType" + }, { + "@id" : "bts:dataSubtype" + }, { + "@id" : "bts:assay" + }, { + "@id" : "bts:individualID" + }, { + "@id" : "bts:species" + }, { + "@id" : "bts:sex" + }, { + "@id" : "bts:age" + }, { + "@id" : "bts:ageUnit" + }, { + "@id" : "bts:diagnosis" + }, { + "@id" : "bts:nf1Genotype" + }, { + "@id" : "bts:nf2Genotype" + }, { + "@id" : "bts:tumorType" + }, { + "@id" : "bts:modelSystemName" + }, { + "@id" : "bts:organ" + }, { + "@id" : "bts:comments" + }, { + "@id" : "bts:parentSpecimenID" + }, { + "@id" : "bts:specimenID" + }, { + "@id" : "bts:aliquotID" + }, { + "@id" : "bts:platform" + }, { + "@id" : "bts:nucleicAcidSource" + }, { + "@id" : "bts:specimenPreparationMethod" + }, { + "@id" : "bts:channel" + }, { + "@id" : "bts:chipID" + }, { + "@id" : "bts:chipPosition" + }, { + "@id" : "bts:plateName" + }, { + "@id" : "bts:plateWell" + } ], + "sms:displayName" : "MethylationArrayTemplate" }, { "rdfs:subClassOf" : [ { "@id" : "bts:BiologicalAssayDataTemplate" @@ -5523,6 +5598,17 @@ "rdfs:comment" : "(Legacy/deprecated annotation) Whether or not is paired-end sequencing (Yes; No).", "@type" : "rdfs:Class", "sms:displayName" : "isPairedEnd" + }, { + "@id" : "bts:plateName", + "@type" : "rdfs:Class", + "rdfs:comment" : "User-specified identifier of the plate used to prepare the sample for analysis.", + "rdfs:label" : "plateName", + "rdfs:subClassOf" : [ ], + "schema:isPartOf" : { + "@id" : "http://schema.biothings.io/" + }, + "sms:displayName" : "plateName", + "sms:required" : "sms:false" }, { "rdfs:subClassOf" : [ ], "@id" : "bts:numberOfSchwannomas", @@ -6129,6 +6215,22 @@ }, "sms:displayName" : "meanCoverage", "sms:required" : "sms:false" + }, { + "rdfs:subClassOf" : [ ], + "@id" : "bts:channel", + "schema:isPartOf" : { + "@id" : "http://schema.biothings.io/" + }, + "sms:required" : "sms:true", + "schema:rangeIncludes" : [ { + "@id" : "bts:Cy3" + }, { + "@id" : "bts:Cy5" + } ], + "rdfs:label" : "channel", + "rdfs:comment" : "Color channel used to generate data file.", + "@type" : "rdfs:Class", + "sms:displayName" : "channel" }, { "@id" : "bts:genePerturbed", "@type" : "rdfs:Class", @@ -7015,6 +7117,10 @@ "@id" : "bts:genomicVariants" }, { "@id" : "bts:rawcounts" + }, { + "@id" : "bts:RawIntensities" + }, { + "@id" : "bts:NormalizedIntensities" }, { "@id" : "bts:PharmacokineticStudy" }, { @@ -7675,6 +7781,28 @@ "rdfs:comment" : "Characterization of the manifestation of Dermal neurofibromas.", "@type" : "rdfs:Class", "sms:displayName" : "dermalNeurofibromas" + }, { + "@id" : "bts:plateWell", + "@type" : "rdfs:Class", + "rdfs:comment" : "User-specified identifier for the specific well of the plate used to prepare the sample for analysis.", + "rdfs:label" : "plateWell", + "rdfs:subClassOf" : [ ], + "schema:isPartOf" : { + "@id" : "http://schema.biothings.io/" + }, + "sms:displayName" : "plateWell", + "sms:required" : "sms:false" + }, { + "@id" : "bts:chipID", + "@type" : "rdfs:Class", + "rdfs:comment" : "User-specified identifier for the chip used to perform the methylation microarray.", + "rdfs:label" : "chipID", + "rdfs:subClassOf" : [ ], + "schema:isPartOf" : { + "@id" : "http://schema.biothings.io/" + }, + "sms:displayName" : "chipID", + "sms:required" : "sms:false" }, { "rdfs:subClassOf" : [ ], "@id" : "bts:proteinExtractSource", @@ -8890,6 +9018,17 @@ }, "sms:displayName" : "bisulfiteConversionKitID", "sms:required" : "sms:false" + }, { + "@id" : "bts:chipPosition", + "@type" : "rdfs:Class", + "rdfs:comment" : "User-specified identifier for the specific position on the chip that the sample was loaded into to perform the methylation microarray.", + "rdfs:label" : "chipPosition", + "rdfs:subClassOf" : [ ], + "schema:isPartOf" : { + "@id" : "http://schema.biothings.io/" + }, + "sms:displayName" : "chipPosition", + "sms:required" : "sms:false" }, { "rdfs:subClassOf" : [ ], "@id" : "bts:assay", @@ -21150,6 +21289,17 @@ }, "sms:displayName" : "University of Washington, Seattle", "sms:required" : "sms:false" + }, { + "@id" : "bts:NormalizedIntensities", + "@type" : "rdfs:Class", + "rdfs:comment" : "TBD", + "rdfs:label" : "NormalizedIntensities", + "rdfs:subClassOf" : [ ], + "schema:isPartOf" : { + "@id" : "http://schema.biothings.io/" + }, + "sms:displayName" : "NormalizedIntensities", + "sms:required" : "sms:false" }, { "@id" : "bts:LTQOrbitrapXL", "@type" : "rdfs:Class", @@ -21722,6 +21872,17 @@ }, "sms:displayName" : "bed gappedPeak", "sms:required" : "sms:false" + }, { + "@id" : "bts:RawIntensities", + "@type" : "rdfs:Class", + "rdfs:comment" : "TBD", + "rdfs:label" : "RawIntensities", + "rdfs:subClassOf" : [ ], + "schema:isPartOf" : { + "@id" : "http://schema.biothings.io/" + }, + "sms:displayName" : "RawIntensities", + "sms:required" : "sms:false" }, { "@id" : "bts:AnnotatedSomaticVariants", "@type" : "rdfs:Class", diff --git a/dca-template-config.json b/dca-template-config.json index 2ee46760..3e1be751 100644 --- a/dca-template-config.json +++ b/dca-template-config.json @@ -4,7 +4,8 @@ {"display_name": "Whole Exome Sequencing Assay", "schema_name": "WESTemplate", "type": "file"}, {"display_name": "RNA Sequencing Assay", "schema_name": "RNASeqTemplate", "type": "file"}, {"display_name": "Single-cell RNA Sequencing Assay", "schema_name": "ScRNASeqTemplate", "type": "file"}, - {"display_name": "Epigenetics Assay", "schema_name": "EpigeneticsAssayTemplate", "type": "file"}, + {"display_name": "Epigenetics Sequencing Assay", "schema_name": "EpigeneticsAssayTemplate", "type": "file"}, + {"display_name": "Methylation Array", "schema_name": "MethylationArrayTemplate", "type": "file"}, {"display_name": "Other Genomics Assay", "schema_name": "GenomicsAssayTemplate", "type": "file"}, {"display_name": "Extended Genomics Assay", "schema_name": "GenomicsAssayTemplateExtended", "type": "file"}, {"display_name": "Imaging Assay", "schema_name": "ImagingAssayTemplate", "type": "file"}, diff --git a/modules/Data/Data.yaml b/modules/Data/Data.yaml index 43a81867..e4b6428a 100644 --- a/modules/Data/Data.yaml +++ b/modules/Data/Data.yaml @@ -1,5 +1,6 @@ enums: Data: + description: What the data (file) contains. permissible_values: immunoassay: description: Laboratory test involving interaction of antigens with specific antibodies. @@ -60,6 +61,10 @@ enums: raw counts: description: The number or amount of something. meaning: http://purl.obolibrary.org/obo/NCIT_C25463 + RawIntensities: + description: Raw intensity values from the instrument. + NormalizedIntensities: + description: Normalized intensity values from the instrument/machine. Pharmacokinetic Study: description: A study of the process by which a drug is absorbed, distributed, metabolized, and eliminated by the body. meaning: http://purl.obolibrary.org/obo/NCIT_C49663 @@ -71,7 +76,7 @@ enums: meaning: https://en.wikipedia.org/wiki/Chromatin_remodeling StructuralVariants: description: Genomic variants data that covers deletions, duplications, CNVs, insertions, inversions, and translocations, which may be derived from specialized variant calling workflows. - source: hhttps://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=structural_variation + source: https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=structural_variation GermlineVariants: description: Called germline variants meaning: https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=simple_germline_variation diff --git a/modules/Template/Data.yaml b/modules/Template/Data.yaml index 2c1aed79..716d0a63 100644 --- a/modules/Template/Data.yaml +++ b/modules/Template/Data.yaml @@ -285,6 +285,23 @@ classes: dataType: - geneExpression + MethylationArrayTemplate: + is_a: GenomicsArrayTemplate + description: Template for raw data files (idat) from DNA methylation arrays. + source: https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=raw_methylation_array + slots: + - chipID + - chipPosition + - plateName + - plateWell + annotations: + requiresComponent: '' + required: false + match: + assay: + - methylation array + + ######################################################### # Biological Assay Data / Sequencing / Processed ######################################################### diff --git a/modules/props.yaml b/modules/props.yaml index eeeefcb5..70c703bc 100644 --- a/modules/props.yaml +++ b/modules/props.yaml @@ -172,6 +172,20 @@ slots: description: A cell type is a distinct morphological or functional form of cell. range: Cell required: false + channel: + description: Color channel used to generate data file. + required: true + range: ChannelEnum + source: Genomic Data Commons + chipID: + description: User-specified identifier for the chip used to perform the methylation microarray. + required: false + source: Genomic Data Commons + chipPosition: + description: User-specified identifier for the specific position on the chip that + the sample was loaded into to perform the methylation microarray. + required: false + source: Genomic Data Commons citation: description: Citation (e.g. doi) that usage of data or resource should be cited with. required: false @@ -751,6 +765,14 @@ slots: range: PresenceEnum required: false title: Pheochromocytoma + plateName: + description: User-specified identifier of the plate used to prepare the sample for analysis. + required: false + source: Genomic Data Commons + plateWell: + description: User-specified identifier for the specific well of the plate used to prepare the sample for analysis. + required: false + source: Genomic Data Commons platform: description: A sequencing platform, microscope, spectroscope/plate reader, or other platform for collecting data. diff --git a/registered-json-schemas/PortalDataset.json b/registered-json-schemas/PortalDataset.json index 26802419..29a3c0a8 100644 --- a/registered-json-schemas/PortalDataset.json +++ b/registered-json-schemas/PortalDataset.json @@ -196,7 +196,7 @@ }, "dataType": { "items": { - "description": "", + "description": "What the data (file) contains.", "enum": [ "immunoassay", "behavior process", @@ -217,6 +217,8 @@ "genomicFeatures", "genomicVariants", "raw counts", + "RawIntensities", + "NormalizedIntensities", "Pharmacokinetic Study", "mask image", "chromatinActivity", diff --git a/registered-json-schemas/PortalStudy.json b/registered-json-schemas/PortalStudy.json index b3192830..1dc9bbcb 100644 --- a/registered-json-schemas/PortalStudy.json +++ b/registered-json-schemas/PortalStudy.json @@ -24,7 +24,7 @@ }, "dataType": { "items": { - "description": "", + "description": "What the data (file) contains.", "enum": [ "immunoassay", "behavior process", @@ -45,6 +45,8 @@ "genomicFeatures", "genomicVariants", "raw counts", + "RawIntensities", + "NormalizedIntensities", "Pharmacokinetic Study", "mask image", "chromatinActivity", diff --git a/tests/generate/basic_templates.sh b/tests/generate/basic_templates.sh index 5785a96a..82b51743 100755 --- a/tests/generate/basic_templates.sh +++ b/tests/generate/basic_templates.sh @@ -6,7 +6,7 @@ CREDS=creds.json DATA_MODEL_PATH=../../NF.jsonld DATA_MODEL=NF.jsonld LOG_DIR=logs -SLEEP_THROTTLE=17 # API rate-limiting, need to better figure out dynamically based on # of templates +SLEEP_THROTTLE=20 # API rate-limiting, need to better figure out dynamically based on # of templates # Setup for creds # If testing locally, it might already be in folder;