Skip to content

VLPB: example SPARQL queries

Arnold Kuzniar edited this page May 16, 2017 · 33 revisions

Namespace prefixes

  • some boilerplate (e.g. check prefix.cc lookup service)
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
  • domain-specific
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX sio: <http://semanticscience.org/resource/>
PREFIX pubmed: <http://identifiers.org/pubmed/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX prosite: <http://purl.uniprot.org/prosite/>
PREFIX prints: <http://purl.uniprot.org/prints/>
PREFIX pirsf: <http://purl.uniprot.org/pirsf/>
PREFIX superfamily: <http://purl.uniprot.org/supfam/>
PREFIX tigrfam: <http://purl.uniprot.org/tigrfams/>
PREFIX pfam: <http://purl.uniprot.org/pfam/>
PREFIX smart: <http://purl.uniprot.org/smart/>
PREFIX ensembl: <http://rdf.ebi.ac.uk/resource/ensembl/>
PREFIX transcript: <http://rdf.ebi.ac.uk/resource/ensembl.transcript/>
PREFIX protein: <http://rdf.ebi.ac.uk/resource/ensembl.protein/>
PREFIX exon: <http://rdf.ebi.ac.uk/resource/ensembl.exon/>
PREFIX term: <http://rdf.ebi.ac.uk/terms/ensembl/>
PREFIX taxon: <http://identifiers.org/taxonomy/>

1. Count genomic features in the (wild) tomato genome from the SGN and EnsemblPlants databases.

PREFIX obo: <http://purl.obolibrary.org/obo/>

SELECT
   str(?feature_name) AS ?feature_name
   ?feature_id
   COUNT(*) AS ?n
WHERE {
   GRAPH <http://solgenomics.net/genome/Solanum_lycopersicum> {
   # http://solgenomics.net/genome/Solanum_pennellii
   # http://plants.ensembl.org/Solanum_lycopersicum                             
      ?ft a ?feature_type .
      FILTER regex(?feature_type, obo:SO_) .
      BIND(concat('[', replace(replace(str(?feature_type), '.+\\/', ''), '_', ':'), '](', ?feature_type, ')') AS ?feature_id)
   }
   GRAPH <http://purl.obolibrary.org/obo/so.owl> {
      ?feature_type rdfs:label ?feature_name
   }
}
GROUP BY ?feature_name ?feature_id
ORDER BY DESC(?n)

Solanum lycopersicum (SGN)

feature_name feature_id n
exon SO:0000147 160001
CDS SO:0000316 157233
intron SO:0000188 125276
protein_coding_gene SO:0001217 34725
protein_coding_primary_transcript SO:0000120 34725
genetic_marker SO:0001645 30718
three_prime_UTR SO:0000205 15343
five_prime_UTR SO:0000204 13548
chromosome SO:0000340 13
genome SO:0001026 1

Solanum pennellii (SGN)

feature_name feature_id n
exon SO:0000147 278874
CDS SO:0000316 252950
intron SO:0000188 204027
protein_coding_primary_transcript SO:0000120 48923
protein_coding_gene SO:0001217 44965
genetic_marker SO:0001645 2225
chromosome SO:0000340 13
genome SO:0001026 1

Solanum lycopersicum (EnsemblPlants)

feature_name feature_id n
exon SO:0000147 162535
protein_coding_primary_transcript SO:0000120 34725
protein_coding_gene SO:0001217 33785
miRNA SO:0000276 3153
miRNA_gene SO:0001265 3153
tRNA_gene SO:0001272 908
snoRNA SO:0000275 390
snoRNA_gene SO:0001267 390
snRNA_gene SO:0001268 255
snRNA SO:0000274 255
rRNA SO:0000252 94
rRNA_gene SO:0001637 94
pseudogenic_tRNA SO:0000778 76
chromosome SO:0000340 13
RNA SO:0000356 2

Note: The chromosome counts include chr.00 (pseudomolecule).

2. Count protein accessions in the tomato proteome from the UniProt database.

PREFIX uniprot: <http://purl.uniprot.org/core/>

SELECT
   COUNT(*) AS ?n
FROM <http://www.uniprot.org/proteomes/Solanum_lycopersicum>
WHERE { ?s a uniprot:Protein }
n
33952

3. Count triples using i) fruit ripening phrase, ii) fruit AND ripening bag-of-words or iii) fruit* keyword search across all RDF graphs in SGN-LD.

SELECT COUNT(*) AS ?n
WHERE {
   graph ?g {
      ?s ?p ?o .
      ?o bif:contains '"fruit ripening"'
      # '( fruit AND ripening )'
      # '"fruit*"'
   }
}
n
124
n
155
n
1481

4. List genes/proteins annotated with Gene Ontology (GO) terms containing fruit AND ripening bag-of-words.

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX uniprot: <http://purl.uniprot.org/core/>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX so: <http://purl.obolibrary.org/obo/so#>
PREFIX go: <http://www.geneontology.org/formats/oboInOwl#>

SELECT
   DISTINCT
   str(?gene_name) AS ?gene_name
   concat('[', ?sgn_gene_id, '](https://solgenomics.net/locus/Solyc00g005000.2/view)') AS ?sgn_gene_id
   concat('[', ?uniprot_acc, '](', ?prot, ')') AS ?uniprot_acc
   concat('[', ?uniprot_id, '](', ?prot, ')') AS ?uniprot_id
   str(?uniprot_des) AS ?uniprot_des
   str(?go_term) AS ?go_term
   concat('[', ?go_id, '](', ?go, ')') AS ?go_id
   str(?go_cat) AS ?go_cat
WHERE {
   GRAPH <http://www.uniprot.org/proteomes/Solanum_lycopersicum> {
      ?prot uniprot:classifiedWith ?go ;
          uniprot:encodedBy/skos:prefLabel ?gene_name
   }
   GRAPH <http://plants.ensembl.org/Solanum_lycopersicum> {
      ?prot dc:identifier ?uniprot_acc ;
          rdfs:label ?uniprot_id ;
          dc:description ?uniprot_des ;
          ^<http://rdf.ebi.ac.uk/terms/ensembl/CHECKSUM> ?ensembl_prot_id .
       ?ensembl_transcript_id so:translates_to ?ensembl_prot_id ;
          so:transcribed_from/dc:identifier ?sgn_gene_id .
   }
   GRAPH <http://purl.obolibrary.org/obo/go.owl> {
      ?go ?p ?o ;
         rdfs:label ?go_term ;
         go:id ?go_id ;
         go:hasOBONamespace ?go_cat .
      ?o bif:contains '( fruit AND ripening )' .
      FILTER regex(?go, obo:GO_)
   }
}
ORDER BY ?gene_name
gene_name sgn_gene_id uniprot_acc uniprot_id uniprot_des go_term go_id go_cat
ACO1 Solyc07g049530.2 P05116 ACCO1_SOLLC 1-aminocyclopropane-1-carboxylate oxidase 1 fruit ripening GO:0009835 biological_process
ACO3 Solyc09g089580.2 P10967 ACCH3_SOLLC 1-aminocyclopropane-1-carboxylate oxidase homolog fruit ripening GO:0009835 biological_process
ACO4 Solyc07g049550.2 P24157 ACCO4_SOLLC 1-aminocyclopropane-1-carboxylate oxidase 4 fruit ripening GO:0009835 biological_process
ACS2 Solyc01g095080.2 P18485 1A12_SOLLC 1-aminocyclopropane-1-carboxylate synthase 2 fruit ripening GO:0009835 biological_process
ACS3 Solyc02g091990.2 Q42881 1A13_SOLLC 1-aminocyclopropane-1-carboxylate synthase 3 fruit ripening GO:0009835 biological_process
GP1 Solyc05g005560.2 Q40161 GP1_SOLLC Polygalacturonase-1 non-catalytic subunit beta fruit ripening GO:0009835 biological_process
PG2 Solyc10g080210.1 P05117 PGLR_SOLLC Polygalacturonase-2 fruit ripening GO:0009835 biological_process
PME1.9 Solyc07g064170.2 P14280 PME1_SOLLC Pectinesterase 1 fruit ripening GO:0009835 biological_process
PME2.1 Solyc07g064180.2 P09607 PME21_SOLLC Pectinesterase 2.1 fruit ripening GO:0009835 biological_process

5. Summarize tomato QTL data extracted from Europe PMC. Note: Not all QTLs could be mapped to chromosomal locations (via genetic markers) available in the SGN RDF graphs.

PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX dcterms: <http://purl.org/dc/terms/>

SELECT
   COUNT(DISTINCT ?pmcid) AS ?n_articles
   COUNT(?qtl) AS ?n_qtls
   COUNT(?loc) AS ?n_qtls_with_loc
FROM <http://europepmc.org/articles>
WHERE {
   ?qtl a obo:SO_0000771 ;
      dcterms:isReferencedBy ?pmcid .
   OPTIONAL { ?qtl faldo:location ?loc }
}
n_articles n_qtls n_qtls_mapped
6 512 227

6. List traits (terms from PO, TO and SPTO ontologies) associated with the extracted QTLs.

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX dcterms: <http://purl.org/dc/terms/>

SELECT
   DISTINCT concat('[', ?trait_id, '](', ?trait, ')') AS ?trait_id
   str(?trait_name) AS ?trait_name
   COUNT(?qtl) AS ?n_qtls
FROM <http://europepmc.org/articles>
WHERE {
   ?qtl a obo:SO_0000771 ;
      obo:RO_0003308 ?trait .
   {
      SELECT
         ?trait
         ?trait_name
         ?trait_id
      FROM <http://purl.obolibrary.org/obo/po.owl>
      FROM <http://purl.obolibrary.org/obo/to.owl>
      FROM <http://purl.bioontology.org/ontology/SPTO> {
         ?trait rdfs:label ?trait_name ;
            <http://www.geneontology.org/formats/oboInOwl#id> ?trait_id .
      }
   }
}
ORDER BY ?trait_name
trait_id trait_name n_qtls
PO:0020043 compound leaf 12
SP:0000366 days to fruit ripening 16
PO:0009001 fruit 22
TO:0002626 fruit length 16
SP:0000087 fruit perimeter 8
TO:0002728 fruit quality trait 11
SP:0000038 fruit shape 8
TO:0002628 fruit shape 7
TO:0002625 fruit size 9
TO:0002746 fruit weight 16
TO:0002627 fruit width 8
TO:0002699 lycopene content 2
TO:0000174 maturity trait 11
SP:0000170 pH 9
TO:0020076 phenolic compound content 1
SP:0000236 plant canopy 16
TO:0000442 plant fresh weight 9
SP:0000003 plant habit 16
TO:0000207 plant height 16
TO:0000017 plant morphology trait 64
SP:0000002 plant size 16
TO:0006062 plant width 16
TO:0000181 seed weight 16
SP:0000345 titratable acids 1
SP:0000165 total soluble solids 14
SP:0000198 yield 19

7. List QTLs and associated gene/transcript IDs for the trait days to fruit ripening

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX so: <http://purl.obolibrary.org/obo/so#>

SELECT
   str(?qtl_id) AS ?qtl_id
   str(?sgn_gene_id) AS ?sgn_gene_id
   str(?sgn_trans_id) AS ?sgn_trans_id
   str(?annot) AS ?annot
WHERE {
   GRAPH <http://europepmc.org/articles> {
      ?qtl a obo:SO_0000771 ;
         obo:RO_0003308 ?trait ;
         so:overlaps ?gene ;
         dcterms:identifier ?qtl_id .
      FILTER(?trait = obo:SP_0000366)
   }
   GRAPH <http://solgenomics.net/genome/Solanum_lycopersicum> {
      ?gene so:transcribed_to ?transcript ;
         dcterms:identifier ?sgn_gene_id .
      ?transcript rdfs:comment ?annot ;
         dcterms:identifier ?sgn_trans_id
   }
}
LIMIT 5
qtl_id sgn_gene_id sgn_trans_id annot
PMC4321030_2_36 Solyc11g008770.1 Solyc11g008770.1.1 Name: Solyc11g008770.1.1; Note: LETM1 and EF-hand domain-containing protein 1, mitochondrial (AHRD V1 *--- LETM1_CHICK); contains Interpro domain(s) IPR011685 LETM1-like ; Ontology_term: GO:0005509; interpro2go_term: GO:0005509
PMC4321030_2_54 Solyc11g008770.1 Solyc11g008770.1.1 Name: Solyc11g008770.1.1; Note: LETM1 and EF-hand domain-containing protein 1, mitochondrial (AHRD V1 *--- LETM1_CHICK); contains Interpro domain(s) IPR011685 LETM1-like ; Ontology_term: GO:0005509; interpro2go_term: GO:0005509
PMC4321030_2_54 Solyc11g008780.1 Solyc11g008780.1.1 Name: Solyc11g008780.1.1; Note: Acetolactate synthase small subunit (AHRD V1 ***- Q9SMC2_NICPL); contains Interpro domain(s) IPR004789 Acetolactate synthase, small subunit ; Ontology_term: GO:0009082, GO:0008152; interpro2go_term: GO:0009082, GO:0008152
PMC4321030_2_54 Solyc11g008790.1 Solyc11g008790.1.1 Name: Solyc11g008790.1.1; Note: ARV1 (AHRD V1 ***- Q5MK24_ARATH); contains Interpro domain(s) IPR007290 Arv1-like protein
PMC4321030_2_54 Solyc11g008800.1 Solyc11g008800.1.1 Name: Solyc11g008800.1.1; Note: Inositol 1 4 5-trisphosphate 5-phosphatase-like protein (AHRD V1 -- Q6H453_ORYSJ); contains Interpro domain(s) IPR000300 Inositol polyphosphate related phosphatase ; Ontology_term: GO:0004437; interpro2go_term: GO:0004437