Skip to content

Commit

Permalink
Merge pull request #248 from togoid/main
Browse files Browse the repository at this point in the history
release 2024-07-16
  • Loading branch information
sh-ikeda authored Jul 16, 2024
2 parents 23a626b + aaf516b commit 7759ade
Show file tree
Hide file tree
Showing 11 changed files with 2,245 additions and 2,346 deletions.
42 changes: 42 additions & 0 deletions bin/cellosaurus_labels.awk
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
###

BEGIN {
RS = "\n//\n";
FS = "\n"
if (!label_filename || !synonym_filename) {
print "usage: awk -v label_filename=<FILENAME> -v synonym_filename=<FILENAME> -f cellosaurus_labels.awk" > "/dev/stderr"
exit
}
}

{
label = ""
taxons = ""
n = 0
for (i=1; i<=NF; i++) {
## AC CVCL_0030
if ($i ~ /^AC/) {
ac = substr($i, 11)
}
## ID HeLa
else if ($i ~ /^ID/) {
label = substr($i, 6)
}
## OX NCBI_TaxID=9606; ! Homo sapiens (Human)
else if ($i ~ /^OX/) {
match($i, /;/)
taxon = substr($i, 17, RSTART-17)
if(taxons)
taxons = taxons "|" taxon
else
taxons = taxon
}
## SY HELA; Hela; He La; He-La; HeLa-CCL2; Henrietta Lacks cells; Helacyton gartleri
else if ($i ~ /^SY/) {
split(substr($i, 6), synonyms, "; ")
}
}
print label "\t" ac "\t" taxons > label_filename
for (k in synonyms)
print synonyms[k] "\t" ac "\t" taxons > synonym_filename
}
15 changes: 15 additions & 0 deletions bin/sparql/chebi_labels.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
PREFIX chebi: <http://purl.obolibrary.org/obo/chebi/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX oboinowl: <http://www.geneontology.org/formats/oboInOwl#>

SELECT DISTINCT ?chebi_id ?p ?label
FROM <http://rdf.ebi.ac.uk/dataset/chebi>
WHERE {
VALUES ?p {
rdfs:label
oboinowl:hasExactSynonym
oboinowl:hasRelatedSynonym
}
?chebi ?p ?label .
BIND (STRAFTER(STR(?chebi), "CHEBI_") AS ?chebi_id)
}
21 changes: 21 additions & 0 deletions bin/sparql/mondo_labels.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Endpoint: https://rdfportal.org/bioportal/sparql
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT DISTINCT ?id ?p ?label
# FROM <http://rdfportal.org/ontology/mondo>
FROM <http://rdf.integbio.jp/dataset/bioportal/mondo>
WHERE {
VALUES ?p {
rdfs:label
oboInOwl:hasExactSynonym
oboInOwl:hasRelatedSynonym
oboInOwl:hasBroadSynonym
}
?mondo a owl:Class ;
rdfs:label ?label ;
oboInOwl:id ?mondo_id .

BIND (strafter(str(?mondo_id), 'MONDO:') AS ?id)
}
4 changes: 2 additions & 2 deletions docs/help.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# TogoID ver. 1.1
Datasets last updated: 2024-07-09
Datasets last updated: 2024-07-16

## About
- [TogoID](https://togoid.dbcls.jp/) is an ID conversion service implementing unique features with an intuitive web interface and an API for programmatic access. TogoID supports datasets from various biological categories such as gene, protein, chemical compound, pathway, disease, etc. TogoID users can perform exploratory multistep conversions to find a path among IDs. To guide the interpretation of biological meanings in the conversions, we crafted an [ontology](https://togoid.dbcls.jp/ontology) that defines the semantics of the dataset relations.
Expand All @@ -22,7 +22,7 @@ Shuya Ikeda, Hiromasa Ono, Tazro Ohta, Hirokazu Chiba, Yuki Naito, Yuki Moriya,

- [API Documentation (Swagger)](https://togoid.dbcls.jp/apidoc/)

## Statistics (as of 2024-07-09)
## Statistics (as of 2024-07-16)
- Number of target datasets
- 105 (from 73 databases)
- For details on the target DBs and ID examples, please refer to the "DATASETS" tab.
Expand Down
4 changes: 2 additions & 2 deletions docs/help_ja.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# TogoID ver. 1.1
Datasets last updated: 2024-07-09
Datasets last updated: 2024-07-16

## About
- [TogoID](https://togoid.dbcls.jp/) は、直感的なインターフェースにより生命科学系データベース(DB)間のつながりを探索的に確認しながらID変換を行うことができるウェブアプリケーションです。同一の実体を指すID間の変換だけでなく、関連する別のカテゴリーのIDへの変換も可能です。また、直接リンクされていないDBのID間でも、他のDBを経由した変換を探索することができます。
Expand Down Expand Up @@ -28,7 +28,7 @@ Datasets last updated: 2024-07-09

- [API Documentation (Swagger)](https://togoid.dbcls.jp/apidoc/)

## 統計 (2024-07-09)
## 統計 (2024-07-16)
- 対象データセット数
- 105 (73 のデータベースに由来)
- 対象DBの詳細やID例については、"DATASETS" タブ からご覧いただけます。
Expand Down
3 changes: 3 additions & 0 deletions docs/news.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# 2024-07-16
- Weekly update has been completed.

# 2024-07-09
- Weekly update has been completed.
- A new dataset "PMC" had been added.
Expand Down
28 changes: 14 additions & 14 deletions log/config-summary.tsv

Large diffs are not rendered by default.

45 changes: 0 additions & 45 deletions log/error.log
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,7 @@ Error: check_remote_file_time(input/homologene/homologene.data, https://ftp.ncbi
Error: Remote file is empty
# Error: output/tsv/chembl_target-ensembl_gene.tsv new file size per old 0 / 61845 = 0.0 < 0.5
# Error: Failed to create output/tsv/chembl_target-ensembl_gene.tsv or created file was empty
# Error: output/tsv/gea-bioproject.tsv new file size per old 150 / 3032 = 0.04947229551451187 < 0.5
# Error: output/tsv/gea-bioproject.tsv seems to contain HTML <html>
# Error: output/tsv/gea-bioproject.tsv seems to contain HTML <head><title>502 Bad Gateway</title></head>
# Error: output/tsv/gea-bioproject.tsv seems to contain HTML <body>
# Error: output/tsv/gea-bioproject.tsv seems to contain HTML <center><h1>502 Bad Gateway</h1></center>
# Error: output/tsv/gea-bioproject.tsv seems to contain HTML <hr><center>nginx</center>
# Error: output/tsv/gea-bioproject.tsv seems to contain HTML </body>
# Error: output/tsv/gea-bioproject.tsv seems to contain HTML </html>
# Error: output/tsv/gea-bioproject.tsv seems to contain HTML <html>
# Error: output/tsv/gea-bioproject.tsv seems to contain HTML <head><title>502 Bad Gateway</title></head>
# Error: output/tsv/gea-bioproject.tsv seems to contain HTML <body>
# Error: output/tsv/gea-bioproject.tsv seems to contain HTML <center><h1>502 Bad Gateway</h1></center>
# Error: output/tsv/gea-bioproject.tsv seems to contain HTML <hr><center>nginx</center>
# Error: output/tsv/gea-bioproject.tsv seems to contain HTML </body>
# Error: output/tsv/gea-bioproject.tsv seems to contain HTML </html>
# Error: output/tsv/gea-biosample.tsv new file size per old 150 / 121104 = 0.0012386048355132778 < 0.5
# Error: output/tsv/gea-biosample.tsv seems to contain HTML <html>
# Error: output/tsv/gea-biosample.tsv seems to contain HTML <head><title>502 Bad Gateway</title></head>
# Error: output/tsv/gea-biosample.tsv seems to contain HTML <body>
# Error: output/tsv/gea-biosample.tsv seems to contain HTML <center><h1>502 Bad Gateway</h1></center>
# Error: output/tsv/gea-biosample.tsv seems to contain HTML <hr><center>nginx</center>
# Error: output/tsv/gea-biosample.tsv seems to contain HTML </body>
# Error: output/tsv/gea-biosample.tsv seems to contain HTML </html>
# Error: output/tsv/gea-biosample.tsv seems to contain HTML <html>
# Error: output/tsv/gea-biosample.tsv seems to contain HTML <head><title>502 Bad Gateway</title></head>
# Error: output/tsv/gea-biosample.tsv seems to contain HTML <body>
# Error: output/tsv/gea-biosample.tsv seems to contain HTML <center><h1>502 Bad Gateway</h1></center>
# Error: output/tsv/gea-biosample.tsv seems to contain HTML <hr><center>nginx</center>
# Error: output/tsv/gea-biosample.tsv seems to contain HTML </body>
# Error: output/tsv/gea-biosample.tsv seems to contain HTML </html>
# Error: output/tsv/glytoucan-doid.tsv new file size per old 1596 / 4376 = 0.3647166361974406 < 0.5
# Error: output/tsv/jga_study-jga_dataset.tsv new file size per old 160 / 4378 = 0.03654636820465966 < 0.5
# Error: output/tsv/jga_study-jga_dataset.tsv seems to contain HTML <html>
# Error: output/tsv/jga_study-jga_dataset.tsv seems to contain HTML <head><title>504 Gateway Time-out</title></head>
# Error: output/tsv/jga_study-jga_dataset.tsv seems to contain HTML <body>
# Error: output/tsv/jga_study-jga_dataset.tsv seems to contain HTML <center><h1>504 Gateway Time-out</h1></center>
# Error: output/tsv/jga_study-jga_dataset.tsv seems to contain HTML <hr><center>nginx</center>
# Error: output/tsv/jga_study-jga_dataset.tsv seems to contain HTML </body>
# Error: output/tsv/jga_study-jga_dataset.tsv seems to contain HTML </html>
# Error: output/tsv/jga_study-jga_dataset.tsv seems to contain HTML <html>
# Error: output/tsv/jga_study-jga_dataset.tsv seems to contain HTML <head><title>504 Gateway Time-out</title></head>
# Error: output/tsv/jga_study-jga_dataset.tsv seems to contain HTML <body>
# Error: output/tsv/jga_study-jga_dataset.tsv seems to contain HTML <center><h1>504 Gateway Time-out</h1></center>
# Error: output/tsv/jga_study-jga_dataset.tsv seems to contain HTML <hr><center>nginx</center>
# Error: output/tsv/jga_study-jga_dataset.tsv seems to contain HTML </body>
# Error: output/tsv/jga_study-jga_dataset.tsv seems to contain HTML </html>
# Error: output/tsv/mondo-doid.tsv new file size per old 0 / 155460 = 0.0 < 0.5
# Error: Failed to create output/tsv/mondo-doid.tsv or created file was empty
# Error: output/tsv/mondo-hp_phenotype.tsv new file size per old 0 / 9184 = 0.0 < 0.5
Expand Down
102 changes: 51 additions & 51 deletions log/pair_count.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ assembly_insdc-bioproject.tsv 2283159
assembly_insdc-biosample.tsv 2225812
assembly_insdc-insdc_master.tsv 2150193
bioproject-biosample.tsv 2546485
bioproject-geo_series.tsv 211705
bioproject-pubmed.tsv 272971
bioproject-geo_series.tsv 212049
bioproject-pubmed.tsv 273297
bioproject_umbrella-bioproject.tsv 104586
biosample-bioproject.tsv 19169684
biosample-geo_sample.tsv 9215828
biosample-bioproject.tsv 19216359
biosample-geo_sample.tsv 9227772
cellosaurus-ncit_disease.tsv 71772
cellosaurus-orphanet_phenotype.tsv 39956
chebi-inchi_key.tsv 175447
Expand All @@ -28,13 +28,13 @@ chembl_target-pdb.tsv 34588
chembl_target-pfam.tsv 3549
chembl_target-reactome_pathway.tsv 8047
chembl_target-uniprot.tsv 8272
clinvar-dbsnp.tsv 2937519
clinvar-hgnc.tsv 2936147
clinvar-medgen.tsv 4028236
clinvar-mondo.tsv 1971740
clinvar-ncbigene.tsv 2936457
clinvar-omim_phenotype.tsv 1626491
clinvar-orphanet_phenotype.tsv 1837806
clinvar-dbsnp.tsv 2937515
clinvar-hgnc.tsv 2936239
clinvar-medgen.tsv 4028094
clinvar-mondo.tsv 1971709
clinvar-ncbigene.tsv 2936549
clinvar-omim_phenotype.tsv 1626562
clinvar-orphanet_phenotype.tsv 1837834
clinvar-uniprot.tsv 20794
cog-insdc.tsv 115826
cog-refseq_protein.tsv 3340026
Expand Down Expand Up @@ -96,9 +96,9 @@ interpro-uniprot.tsv 756717820
jga_study-jga_dataset.tsv 199
jga_study-nbdc_human_db.tsv 305
jga_study-pubmed.tsv 130
lipidmaps-chebi.tsv 12916
lipidmaps-inchi_key.tsv 48027
lipidmaps-swisslipids.tsv 12348
lipidmaps-chebi.tsv 12918
lipidmaps-inchi_key.tsv 48054
lipidmaps-swisslipids.tsv 12349
mbgd_gene-uniprot.tsv 137319849
mbgd_organism-taxonomy.tsv 6318
medgen-hp_phenotype.tsv 16674
Expand All @@ -109,35 +109,35 @@ medgen-omim_phenotype.tsv 11043
medgen-orphanet_phenotype.tsv 9063
mgi_gene-ensembl_gene.tsv 56430
mgi_gene-hgnc.tsv 24587
mgi_gene-mgi_allele.tsv 109628
mgi_gene-mgi_allele.tsv 109621
mgi_gene-ncbigene.tsv 55037
mgi_gene-uniprot.tsv 79047
mgi_genotype-doid.tsv 7817
mgi_genotype-mgi_allele.tsv 124273
mgi_genotype-mp.tsv 399386
mgi_gene-uniprot.tsv 79046
mgi_genotype-doid.tsv 7819
mgi_genotype-mgi_allele.tsv 124307
mgi_genotype-mp.tsv 399524
mondo-doid.tsv 10712
mondo-hp_phenotype.tsv 574
mondo-meddra.tsv 1486
mondo-mesh.tsv 8352
mondo-omim_phenotype.tsv 9693
mondo-orphanet_phenotype.tsv 10380
nando-mondo.tsv 2390
ncbigene-ensembl_gene.tsv 10966040
ncbigene-ensembl_protein.tsv 12715160
ncbigene-ensembl_transcript.tsv 13299982
ncbigene-ensembl_gene.tsv 11259466
ncbigene-ensembl_protein.tsv 13038087
ncbigene-ensembl_transcript.tsv 13685563
ncbigene-flybase_gene.tsv 25078
ncbigene-go.tsv 96594508
ncbigene-hgnc.tsv 43778
ncbigene-go.tsv 96809176
ncbigene-hgnc.tsv 43784
ncbigene-mgi_gene.tsv 71685
ncbigene-mirbase.tsv 17541
ncbigene-omim_gene.tsv 18546
ncbigene-refseq_genomic.tsv 211742
ncbigene-refseq_protein.tsv 64884739
ncbigene-refseq_rna.tsv 63570121
ncbigene-omim_gene.tsv 18550
ncbigene-refseq_genomic.tsv 211743
ncbigene-refseq_protein.tsv 65103357
ncbigene-refseq_rna.tsv 63775524
ncbigene-rgd.tsv 47293
ncbigene-sgd.tsv 6471
ncbigene-tair.tsv 32835
ncbigene-taxonomy.tsv 53348077
ncbigene-taxonomy.tsv 53503353
ncbigene-vgnc.tsv 112158
ncbigene-wormbase_gene.tsv 19842
ncbigene-xenbase_gene.tsv 46792
Expand All @@ -160,14 +160,14 @@ pdb-interpro.tsv 726658
pdb-pdb_ccd.tsv 530587
pdb-pfam.tsv 339565
pdb-uniprot.tsv 341431
pmc-pubmed.tsv 9328424
pmc-pubmed.tsv 9343016
prosite-prosite_prorule.tsv 1452
pubchem_compound-atc.tsv 4965
pubchem_compound-chebi.tsv 174843
pubchem_compound-chembl_compound.tsv 2372556
pubchem_compound-drugbank.tsv 10789
pubchem_compound-glytoucan.tsv 69203
pubchem_compound-inchi_key.tsv 118372532
pubchem_compound-inchi_key.tsv 118373866
pubchem_pathway-ncbigene.tsv 33861
pubchem_pathway-pathbank.tsv 69387
pubchem_pathway-pubchem_compound.tsv 1253395
Expand All @@ -185,7 +185,7 @@ reactome_reaction-go.tsv 3318
reactome_reaction-iuphar_ligand.tsv 14387
reactome_reaction-mirbase.tsv 194
reactome_reaction-uniprot.tsv 672687
refseq_protein-uniprot.tsv 167682921
refseq_protein-uniprot.tsv 167682920
refseq_rna-dbsnp.tsv 267451107
refseq_rna-hgnc.tsv 227363
refseq_rna-ncbigene.tsv 62318158
Expand All @@ -199,28 +199,28 @@ rhea-go.tsv 4429
rhea-pubmed.tsv 138072
rhea-reactome_reaction.tsv 1480
rhea-uniprot.tsv 40452196
sra_accession-bioproject.tsv 505360
sra_accession-biosample.tsv 30211016
sra_accession-bioproject.tsv 505724
sra_accession-biosample.tsv 30238529
sra_accession-sra_analysis.tsv 329141
sra_accession-sra_experiment.tsv 33537058
sra_accession-sra_project.tsv 590058
sra_accession-sra_run.tsv 38209139
sra_accession-sra_sample.tsv 32674631
sra_experiment-bioproject.tsv 29241743
sra_experiment-biosample.tsv 29583517
sra_experiment-sra_project.tsv 29585883
sra_experiment-sra_sample.tsv 29585715
sra_project-bioproject.tsv 515507
sra_run-bioproject.tsv 31104158
sra_run-biosample.tsv 31394635
sra_run-sra_experiment.tsv 31486545
sra_run-sra_project.tsv 31461072
sra_run-sra_sample.tsv 31396842
sra_sample-biosample.tsv 30282671
sra_accession-sra_experiment.tsv 33611256
sra_accession-sra_project.tsv 591444
sra_accession-sra_run.tsv 38286026
sra_accession-sra_sample.tsv 32743735
sra_experiment-bioproject.tsv 29264055
sra_experiment-biosample.tsv 29605027
sra_experiment-sra_project.tsv 29608195
sra_experiment-sra_sample.tsv 29608027
sra_project-bioproject.tsv 515871
sra_run-bioproject.tsv 31126525
sra_run-biosample.tsv 31416200
sra_run-sra_experiment.tsv 31508912
sra_run-sra_project.tsv 31483439
sra_run-sra_sample.tsv 31419209
sra_sample-biosample.tsv 30310184
swisslipids-chebi.tsv 4276
swisslipids-hmdb.tsv 26026
swisslipids-inchi_key.tsv 593209
taxonomy-pubmed.tsv 50039
taxonomy-pubmed.tsv 50050
togovar-clinvar.tsv 745335
togovar-dbsnp.tsv 66877211
togovar-ensembl_gene.tsv 72473309
Expand Down Expand Up @@ -259,4 +259,4 @@ wikipathways-hmdb.tsv 4103
wikipathways-lipidmaps.tsv 1397
wikipathways-ncbigene.tsv 30204
wikipathways-uniprot.tsv 33518
total 4941751747
total 4944099992
Loading

0 comments on commit 7759ade

Please sign in to comment.