-
Notifications
You must be signed in to change notification settings - Fork 4
VLPB: example SPARQL queries
Namespace prefixes
- some boilerplate (e.g. check prefix.cc lookup service)
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
- domain-specific
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX sio: <http://semanticscience.org/resource/>
PREFIX pubmed: <http://identifiers.org/pubmed/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX prosite: <http://purl.uniprot.org/prosite/>
PREFIX prints: <http://purl.uniprot.org/prints/>
PREFIX pirsf: <http://purl.uniprot.org/pirsf/>
PREFIX superfamily: <http://purl.uniprot.org/supfam/>
PREFIX tigrfam: <http://purl.uniprot.org/tigrfams/>
PREFIX pfam: <http://purl.uniprot.org/pfam/>
PREFIX smart: <http://purl.uniprot.org/smart/>
PREFIX ensembl: <http://rdf.ebi.ac.uk/resource/ensembl/>
PREFIX transcript: <http://rdf.ebi.ac.uk/resource/ensembl.transcript/>
PREFIX protein: <http://rdf.ebi.ac.uk/resource/ensembl.protein/>
PREFIX exon: <http://rdf.ebi.ac.uk/resource/ensembl.exon/>
PREFIX term: <http://rdf.ebi.ac.uk/terms/ensembl/>
PREFIX taxon: <http://identifiers.org/taxonomy/>
1. Count genomic features in the (wild) tomato genome from the SGN and EnsemblPlants databases.
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT
str(?feature_name) AS ?feature_name
?feature_id
COUNT(*) AS ?n
WHERE {
GRAPH <http://solgenomics.net/genome/Solanum_lycopersicum> {
# http://solgenomics.net/genome/Solanum_pennellii
# http://plants.ensembl.org/Solanum_lycopersicum
?ft a ?feature_type .
FILTER regex(?feature_type, obo:SO_) .
BIND(concat('[', replace(replace(str(?feature_type), '.+\\/', ''), '_', ':'), '](', ?feature_type, ')') AS ?feature_id)
}
GRAPH <http://purl.obolibrary.org/obo/so.owl> {
?feature_type rdfs:label ?feature_name
}
}
GROUP BY ?feature_name ?feature_id
ORDER BY DESC(?n)
Solanum lycopersicum (SGN)
feature_name | feature_id | n |
---|---|---|
exon | SO:0000147 | 160001 |
CDS | SO:0000316 | 157233 |
intron | SO:0000188 | 125276 |
protein_coding_gene | SO:0001217 | 34725 |
protein_coding_primary_transcript | SO:0000120 | 34725 |
genetic_marker | SO:0001645 | 30718 |
three_prime_UTR | SO:0000205 | 15343 |
five_prime_UTR | SO:0000204 | 13548 |
chromosome | SO:0000340 | 13 |
genome | SO:0001026 | 1 |
Solanum pennellii (SGN)
feature_name | feature_id | n |
---|---|---|
exon | SO:0000147 | 278874 |
CDS | SO:0000316 | 252950 |
intron | SO:0000188 | 204027 |
protein_coding_primary_transcript | SO:0000120 | 48923 |
protein_coding_gene | SO:0001217 | 44965 |
genetic_marker | SO:0001645 | 2225 |
chromosome | SO:0000340 | 13 |
genome | SO:0001026 | 1 |
Solanum lycopersicum (EnsemblPlants)
feature_name | feature_id | n |
---|---|---|
exon | SO:0000147 | 162535 |
protein_coding_primary_transcript | SO:0000120 | 34725 |
protein_coding_gene | SO:0001217 | 33785 |
miRNA | SO:0000276 | 3153 |
miRNA_gene | SO:0001265 | 3153 |
tRNA_gene | SO:0001272 | 908 |
snoRNA | SO:0000275 | 390 |
snoRNA_gene | SO:0001267 | 390 |
snRNA_gene | SO:0001268 | 255 |
snRNA | SO:0000274 | 255 |
rRNA | SO:0000252 | 94 |
rRNA_gene | SO:0001637 | 94 |
pseudogenic_tRNA | SO:0000778 | 76 |
chromosome | SO:0000340 | 13 |
RNA | SO:0000356 | 2 |
Note: The chromosome counts include chr.00 (pseudomolecule).
2. Count protein accessions in the tomato proteome from the UniProt database.
PREFIX uniprot: <http://purl.uniprot.org/core/>
SELECT
COUNT(*) AS ?n
FROM <http://www.uniprot.org/proteomes/Solanum_lycopersicum>
WHERE { ?s a uniprot:Protein }
n |
---|
33952 |
3. Count triples using i) fruit ripening
phrase, ii) fruit AND ripening
bag-of-words or iii) fruit*
keyword search across all RDF graphs in SGN-LD.
SELECT COUNT(*) AS ?n
WHERE {
graph ?g {
?s ?p ?o .
?o bif:contains '"fruit ripening"'
# '( fruit AND ripening )'
# '"fruit*"'
}
}
n |
---|
124 |
n |
---|
155 |
n |
---|
1481 |
4. List genes/proteins annotated with Gene Ontology (GO) terms containing fruit AND ripening
bag-of-words.
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX uniprot: <http://purl.uniprot.org/core/>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX so: <http://purl.obolibrary.org/obo/so#>
PREFIX go: <http://www.geneontology.org/formats/oboInOwl#>
SELECT
DISTINCT
str(?gene_name) AS ?gene_name
concat('[', ?sgn_gene_id, '](https://solgenomics.net/locus/Solyc00g005000.2/view)') AS ?sgn_gene_id
concat('[', ?uniprot_acc, '](', ?prot, ')') AS ?uniprot_acc
concat('[', ?uniprot_id, '](', ?prot, ')') AS ?uniprot_id
str(?uniprot_des) AS ?uniprot_des
str(?go_term) AS ?go_term
concat('[', ?go_id, '](', ?go, ')') AS ?go_id
str(?go_cat) AS ?go_cat
WHERE {
GRAPH <http://www.uniprot.org/proteomes/Solanum_lycopersicum> {
?prot uniprot:classifiedWith ?go ;
uniprot:encodedBy/skos:prefLabel ?gene_name
}
GRAPH <http://plants.ensembl.org/Solanum_lycopersicum> {
?prot dc:identifier ?uniprot_acc ;
rdfs:label ?uniprot_id ;
dc:description ?uniprot_des ;
^<http://rdf.ebi.ac.uk/terms/ensembl/CHECKSUM> ?ensembl_prot_id .
?ensembl_transcript_id so:translates_to ?ensembl_prot_id ;
so:transcribed_from/dc:identifier ?sgn_gene_id .
}
GRAPH <http://purl.obolibrary.org/obo/go.owl> {
?go ?p ?o ;
rdfs:label ?go_term ;
go:id ?go_id ;
go:hasOBONamespace ?go_cat .
?o bif:contains '( fruit AND ripening )' .
FILTER regex(?go, obo:GO_)
}
}
ORDER BY ?gene_name
gene_name | sgn_gene_id | uniprot_acc | uniprot_id | uniprot_des | go_term | go_id | go_cat |
---|---|---|---|---|---|---|---|
ACO1 | Solyc07g049530.2 | P05116 | ACCO1_SOLLC | 1-aminocyclopropane-1-carboxylate oxidase 1 | fruit ripening | GO:0009835 | biological_process |
ACO3 | Solyc09g089580.2 | P10967 | ACCH3_SOLLC | 1-aminocyclopropane-1-carboxylate oxidase homolog | fruit ripening | GO:0009835 | biological_process |
ACO4 | Solyc07g049550.2 | P24157 | ACCO4_SOLLC | 1-aminocyclopropane-1-carboxylate oxidase 4 | fruit ripening | GO:0009835 | biological_process |
ACS2 | Solyc01g095080.2 | P18485 | 1A12_SOLLC | 1-aminocyclopropane-1-carboxylate synthase 2 | fruit ripening | GO:0009835 | biological_process |
ACS3 | Solyc02g091990.2 | Q42881 | 1A13_SOLLC | 1-aminocyclopropane-1-carboxylate synthase 3 | fruit ripening | GO:0009835 | biological_process |
GP1 | Solyc05g005560.2 | Q40161 | GP1_SOLLC | Polygalacturonase-1 non-catalytic subunit beta | fruit ripening | GO:0009835 | biological_process |
PG2 | Solyc10g080210.1 | P05117 | PGLR_SOLLC | Polygalacturonase-2 | fruit ripening | GO:0009835 | biological_process |
PME1.9 | Solyc07g064170.2 | P14280 | PME1_SOLLC | Pectinesterase 1 | fruit ripening | GO:0009835 | biological_process |
PME2.1 | Solyc07g064180.2 | P09607 | PME21_SOLLC | Pectinesterase 2.1 | fruit ripening | GO:0009835 | biological_process |
5. Summarize tomato QTL data extracted from Europe PMC. Note: Not all QTLs could be mapped to chromosomal locations (via genetic markers) available in the SGN RDF graphs.
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX dcterms: <http://purl.org/dc/terms/>
SELECT
COUNT(DISTINCT ?pmcid) AS ?n_articles
COUNT(?qtl) AS ?n_qtls
COUNT(?loc) AS ?n_qtls_with_loc
FROM <http://europepmc.org/articles>
WHERE {
?qtl a obo:SO_0000771 ;
dcterms:isReferencedBy ?pmcid .
OPTIONAL { ?qtl faldo:location ?loc }
}
n_articles | n_qtls | n_qtls_mapped |
---|---|---|
6 | 512 | 227 |
6. List traits (terms from PO, TO and SPTO ontologies) associated with the extracted QTLs.
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX dcterms: <http://purl.org/dc/terms/>
SELECT
DISTINCT concat('[', ?trait_id, '](', ?trait, ')') AS ?trait_id
str(?trait_name) AS ?trait_name
COUNT(?qtl) AS ?n_qtls
FROM <http://europepmc.org/articles>
WHERE {
?qtl a obo:SO_0000771 ;
obo:RO_0003308 ?trait .
{
SELECT
?trait
?trait_name
?trait_id
FROM <http://purl.obolibrary.org/obo/po.owl>
FROM <http://purl.obolibrary.org/obo/to.owl>
FROM <http://purl.bioontology.org/ontology/SPTO> {
?trait rdfs:label ?trait_name ;
<http://www.geneontology.org/formats/oboInOwl#id> ?trait_id .
}
}
}
ORDER BY ?trait_name
trait_id | trait_name | n_qtls |
---|---|---|
PO:0020043 | compound leaf | 12 |
SP:0000366 | days to fruit ripening | 16 |
PO:0009001 | fruit | 22 |
TO:0002626 | fruit length | 16 |
SP:0000087 | fruit perimeter | 8 |
TO:0002728 | fruit quality trait | 11 |
SP:0000038 | fruit shape | 8 |
TO:0002628 | fruit shape | 7 |
TO:0002625 | fruit size | 9 |
TO:0002746 | fruit weight | 16 |
TO:0002627 | fruit width | 8 |
TO:0002699 | lycopene content | 2 |
TO:0000174 | maturity trait | 11 |
SP:0000170 | pH | 9 |
TO:0020076 | phenolic compound content | 1 |
SP:0000236 | plant canopy | 16 |
TO:0000442 | plant fresh weight | 9 |
SP:0000003 | plant habit | 16 |
TO:0000207 | plant height | 16 |
TO:0000017 | plant morphology trait | 64 |
SP:0000002 | plant size | 16 |
TO:0006062 | plant width | 16 |
TO:0000181 | seed weight | 16 |
SP:0000345 | titratable acids | 1 |
SP:0000165 | total soluble solids | 14 |
SP:0000198 | yield | 19 |
7. List QTLs and associated gene/transcript IDs for the trait days to fruit ripening
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX so: <http://purl.obolibrary.org/obo/so#>
SELECT
str(?qtl_id) AS ?qtl_id
str(?sgn_gene_id) AS ?sgn_gene_id
str(?sgn_trans_id) AS ?sgn_trans_id
str(?annot) AS ?annot
WHERE {
GRAPH <http://europepmc.org/articles> {
?qtl a obo:SO_0000771 ;
obo:RO_0003308 ?trait ;
so:overlaps ?gene ;
dcterms:identifier ?qtl_id .
FILTER(?trait = obo:SP_0000366)
}
GRAPH <http://solgenomics.net/genome/Solanum_lycopersicum> {
?gene so:transcribed_to ?transcript ;
dcterms:identifier ?sgn_gene_id .
?transcript rdfs:comment ?annot ;
dcterms:identifier ?sgn_trans_id
}
}
LIMIT 5
qtl_id | sgn_gene_id | sgn_trans_id | annot |
---|---|---|---|
PMC4321030_2_36 | Solyc11g008770.1 | Solyc11g008770.1.1 | Name: Solyc11g008770.1.1; Note: LETM1 and EF-hand domain-containing protein 1, mitochondrial (AHRD V1 *--- LETM1_CHICK); contains Interpro domain(s) IPR011685 LETM1-like ; Ontology_term: GO:0005509; interpro2go_term: GO:0005509 |
PMC4321030_2_54 | Solyc11g008770.1 | Solyc11g008770.1.1 | Name: Solyc11g008770.1.1; Note: LETM1 and EF-hand domain-containing protein 1, mitochondrial (AHRD V1 *--- LETM1_CHICK); contains Interpro domain(s) IPR011685 LETM1-like ; Ontology_term: GO:0005509; interpro2go_term: GO:0005509 |
PMC4321030_2_54 | Solyc11g008780.1 | Solyc11g008780.1.1 | Name: Solyc11g008780.1.1; Note: Acetolactate synthase small subunit (AHRD V1 ***- Q9SMC2_NICPL); contains Interpro domain(s) IPR004789 Acetolactate synthase, small subunit ; Ontology_term: GO:0009082, GO:0008152; interpro2go_term: GO:0009082, GO:0008152 |
PMC4321030_2_54 | Solyc11g008790.1 | Solyc11g008790.1.1 | Name: Solyc11g008790.1.1; Note: ARV1 (AHRD V1 ***- Q5MK24_ARATH); contains Interpro domain(s) IPR007290 Arv1-like protein |
PMC4321030_2_54 | Solyc11g008800.1 | Solyc11g008800.1.1 | Name: Solyc11g008800.1.1; Note: Inositol 1 4 5-trisphosphate 5-phosphatase-like protein (AHRD V1 -- Q6H453_ORYSJ); contains Interpro domain(s) IPR000300 Inositol polyphosphate related phosphatase ; Ontology_term: GO:0004437; interpro2go_term: GO:0004437 |
ODEX4all