forked from ontoportal/ncbo_cron
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4ea04c7
commit abfdc71
Showing
10 changed files
with
568 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# require 'bundler/setup' | ||
require 'pry' | ||
require 'benchmark' | ||
require 'ncbo_annotator' | ||
require 'ncbo_cron' | ||
require 'ontologies_linked_data' | ||
|
||
graph = ARGV[1] | ||
profile = ARGV[2] | ||
|
||
if graph.nil? | ||
puts "Error: Missing arguments. Please provide the graph name." | ||
exit(1) | ||
end | ||
|
||
case profile | ||
when 'ag' | ||
# AllegroGraph backend | ||
ENV['GOO_BACKEND_NAME'] = 'allegrograph' | ||
ENV['GOO_PORT'] = '10035' | ||
ENV['GOO_PATH_QUERY'] = '/repositories/ontoportal_test' | ||
ENV['GOO_PATH_DATA'] = '/repositories/ontoportal_test/statements' | ||
ENV['GOO_PATH_UPDATE'] = '/repositories/ontoportal_test/statements' | ||
ENV['COMPOSE_PROFILES'] = 'ag' | ||
|
||
when 'fs' | ||
# 4store backend | ||
ENV['GOO_PORT'] = '9000' | ||
ENV['COMPOSE_PROFILES'] = 'fs' | ||
|
||
when 'vo' | ||
# Virtuoso backend | ||
ENV['GOO_BACKEND_NAME'] = 'virtuoso' | ||
ENV['GOO_PORT'] = '8890' | ||
ENV['GOO_PATH_QUERY'] = '/sparql' | ||
ENV['GOO_PATH_DATA'] = '/sparql' | ||
ENV['GOO_PATH_UPDATE'] = '/sparql' | ||
ENV['COMPOSE_PROFILES'] = 'vo' | ||
|
||
when 'gb' | ||
# Graphdb backend | ||
ENV['GOO_BACKEND_NAME'] = 'graphdb' | ||
ENV['GOO_PORT'] = '7200' | ||
ENV['GOO_PATH_QUERY'] = '/repositories/ontoportal' | ||
ENV['GOO_PATH_DATA'] = '/repositories/ontoportal/statements' | ||
ENV['GOO_PATH_UPDATE'] = '/repositories/ontoportal/statements' | ||
|
||
else | ||
puts "Will import to default config set in config/config.rb" | ||
end | ||
|
||
require_relative '../../config/config' | ||
count = 0 | ||
time = Benchmark.realtime do | ||
rs = Goo.sparql_query_client.query("SELECT (COUNT(?s) as ?count) FROM <#{graph_uri}> WHERE { ?s ?p ?o }") | ||
rs = rs.solutions.first | ||
count = rs[:count].to_i if rs | ||
end | ||
|
||
puts 'Imported triples in ' + format("%.4f", time) + 's with total count: ' + count.to_s |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
require 'ontologies_linked_data' | ||
module Benchmarks | ||
|
||
def self.do_all_benchmarks(sub) | ||
Benchmarks.bench("fetch triples") do | ||
Benchmarks.paginate_all_triples(sub) | ||
end | ||
|
||
Benchmarks.bench("get ontology Concept Roots") do | ||
Benchmarks.ontology_roots(sub) | ||
end | ||
|
||
Benchmarks.bench("concept children") do | ||
Benchmarks.concept_children("http://terminologies.gfbio.org/ITIS/Taxa_0", sub) | ||
end | ||
|
||
Benchmarks.bench("concept path to root") do | ||
Benchmarks.concept_tree("http://terminologies.gfbio.org/ITIS/Taxa_6007", sub) | ||
end | ||
end | ||
|
||
def self.bench(label, &block) | ||
time = Benchmark.realtime do | ||
block.call | ||
end | ||
puts "Time to #{label}: " + time.round(2).to_s | ||
end | ||
|
||
def self.import_nt_file(sub, file_path) | ||
Goo.sparql_data_client.delete_graph(sub.id) | ||
Goo.sparql_data_client.append_triples_no_bnodes(sub.id, file_path, nil) | ||
end | ||
|
||
def self.paginate_all_triples(sub) | ||
page = 1 | ||
pagesize = 10000 | ||
count = 1 | ||
total_count = 0 | ||
while count > 0 && page < 100 | ||
puts "Starting query for page #{page}" | ||
offset = " OFFSET #{(page - 1) * pagesize}" | ||
rs = Goo.sparql_query_client.query("SELECT ?s ?p ?o FROM <#{sub.id}> WHERE { ?s ?p ?o } LIMIT #{pagesize} #{offset}") | ||
count = rs.each_solution.size | ||
total_count += count | ||
page += 1 | ||
end | ||
puts "Total triples: " + total_count.to_s | ||
end | ||
|
||
def self.ontology_roots(sub) | ||
load_attrs = LinkedData::Models::Class.goo_attrs_to_load([:all]) | ||
roots = [] | ||
time = Benchmark.realtime do | ||
roots = sub.roots(load_attrs) | ||
end | ||
puts "Time to find roots: " + time.round(2).to_s | ||
Goo.log_debug_file('roots') | ||
time = Benchmark.realtime do | ||
LinkedData::Models::Class.in(sub).models(roots).include(:unmapped).all | ||
end | ||
puts "Time to load roots: " + time.round(2).to_s | ||
Goo.log_debug_file('roots') | ||
puts "Roots count: " + roots.length.to_s | ||
end | ||
|
||
def self.concept_children(uri, sub) | ||
page, size = [1, 100] | ||
cls = LinkedData::Models::Class.find(RDF::URI.new("http://terminologies.gfbio.org/ITIS/Taxa_0")).in(sub).first | ||
ld = LinkedData::Models::Class.goo_attrs_to_load([:all]) | ||
children = sub.children(cls, includes_param: ld, page: page, size: size) | ||
puts "Children count: " + children.length.to_s | ||
end | ||
|
||
def self.concept_tree(uri, sub) | ||
cls = LinkedData::Models::Class.find("http://terminologies.gfbio.org/ITIS/Taxa_6007").in(sub).first | ||
display_attrs = [:prefLabel, :hasChildren, :children, :obsolete, :subClassOf] | ||
extra_include = display_attrs + [:hasChildren, :isInActiveScheme, :isInActiveScheme] | ||
|
||
roots = sub.roots(extra_include) | ||
# path = cls.path_to_root(roots) | ||
cls.tree(roots: roots) | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Benchmarks | ||
## Import all portal metadata | ||
|
||
## Import AGROVOC and query all triples by pages | ||
## Parse ITIS and do ontoportal operations | ||
ruby test/benchmarks/parse_and_do_ontoportal_operations.rb ITIS fs 47a57aa3-7b54-4f34-b695-dbb5f5b7363e https://data.biodivportal.gfbio.dev |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#!/usr/bin/env bash | ||
path_graphs_files=$1 | ||
profile=$2 | ||
set -e | ||
|
||
|
||
if [ -z "$profile" ]; then | ||
echo "Usage: $0 <path to path_graphs_files> <profile>" | ||
exit 1 | ||
fi | ||
echo "###########################################################################" | ||
./test/benchmarks/start_ontoportal_services.sh "$profile" | ||
./bin/migrations/import_metadata_graphs_to_store "$path_graphs_files" "$profile" | ||
echo 'All metadata graphs imported successfully.' | ||
echo "###########################################################################" | ||
|
||
ruby bin/migrations/compare_counts.rb "$path_graphs_files" "$profile" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
|
||
# Documentation: | ||
# This script is used to import a large NT file into the triple store | ||
# and then fetch all the triples by paginating through the triples. | ||
# The script is used to compare the performance of the import and fetch of different backends. | ||
|
||
profile = ARGV[0] | ||
file_path = ARGV[1] | ||
acronym = ARGV[2] || 'STY' # Default to STY | ||
pwd = File.dirname(__FILE__) | ||
system("#{pwd}/start_ontoportal_services.sh #{profile} #{acronym}") | ||
|
||
if $?.exitstatus != 0 | ||
puts "Error occurred during script execution." | ||
exit(1) | ||
end | ||
|
||
if file_path == nil | ||
puts "Error: Missing arguments. Please provide the file path." | ||
exit(1) | ||
end | ||
|
||
puts "Finished parsing file" | ||
case profile | ||
when 'ag' | ||
# AllegroGraph backend | ||
ENV['GOO_BACKEND_NAME'] = 'allegrograph' | ||
ENV['GOO_PORT'] = '10035' | ||
ENV['GOO_PATH_QUERY'] = '/repositories/ontoportal_test' | ||
ENV['GOO_PATH_DATA'] = '/repositories/ontoportal_test/statements' | ||
ENV['GOO_PATH_UPDATE'] = '/repositories/ontoportal_test/statements' | ||
ENV['COMPOSE_PROFILES'] = 'ag' | ||
|
||
when 'fs' | ||
# 4store backend | ||
ENV['GOO_PORT'] = '9000' | ||
ENV['COMPOSE_PROFILES'] = 'fs' | ||
|
||
when 'vo' | ||
# Virtuoso backend | ||
ENV['GOO_BACKEND_NAME'] = 'virtuoso' | ||
ENV['GOO_PORT'] = '8890' | ||
ENV['GOO_PATH_QUERY'] = '/sparql' | ||
ENV['GOO_PATH_DATA'] = '/sparql' | ||
ENV['GOO_PATH_UPDATE'] = '/sparql' | ||
ENV['COMPOSE_PROFILES'] = 'vo' | ||
|
||
when 'gb' | ||
# Graphdb backend | ||
ENV['GOO_BACKEND_NAME'] = 'graphdb' | ||
ENV['GOO_PORT'] = '7200' | ||
ENV['GOO_PATH_QUERY'] = '/repositories/ontoportal' | ||
ENV['GOO_PATH_DATA'] = '/repositories/ontoportal/statements' | ||
ENV['GOO_PATH_UPDATE'] = '/repositories/ontoportal/statements' | ||
|
||
else | ||
puts "Error: Unknown backend type. Please set BACKEND_TYPE to 'ag', 'fs', 'vo', or 'gb'." | ||
end | ||
|
||
require 'bundler/setup' | ||
require 'pry' | ||
require 'benchmark' | ||
require 'ncbo_annotator' | ||
require 'ncbo_cron' | ||
require 'ontologies_linked_data' | ||
require_relative '../../config/config' | ||
require_relative 'data_benchs' | ||
|
||
puts "Starting to fetch triples" | ||
sub = LinkedData::Models::Ontology.find(acronym).first.latest_submission(status: :any) | ||
sub.bring_remaining | ||
|
||
Benchmarks.bench('Append triples') do | ||
Benchmarks.import_nt_file(sub, file_path) | ||
end | ||
|
||
Benchmarks.do_all_benchmarks(sub) |
Oops, something went wrong.