Skip to content

Commit

Permalink
add benchmarking tests
Browse files Browse the repository at this point in the history
  • Loading branch information
syphax-bouazzouni committed Feb 1, 2025
1 parent 4ea04c7 commit abfdc71
Show file tree
Hide file tree
Showing 10 changed files with 568 additions and 2 deletions.
60 changes: 60 additions & 0 deletions bin/migrations/count_graph_triples.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# require 'bundler/setup'
require 'pry'
require 'benchmark'
require 'ncbo_annotator'
require 'ncbo_cron'
require 'ontologies_linked_data'

graph = ARGV[1]
profile = ARGV[2]

if graph.nil?
puts "Error: Missing arguments. Please provide the graph name."
exit(1)
end

case profile
when 'ag'
# AllegroGraph backend
ENV['GOO_BACKEND_NAME'] = 'allegrograph'
ENV['GOO_PORT'] = '10035'
ENV['GOO_PATH_QUERY'] = '/repositories/ontoportal_test'
ENV['GOO_PATH_DATA'] = '/repositories/ontoportal_test/statements'
ENV['GOO_PATH_UPDATE'] = '/repositories/ontoportal_test/statements'
ENV['COMPOSE_PROFILES'] = 'ag'

when 'fs'
# 4store backend
ENV['GOO_PORT'] = '9000'
ENV['COMPOSE_PROFILES'] = 'fs'

when 'vo'
# Virtuoso backend
ENV['GOO_BACKEND_NAME'] = 'virtuoso'
ENV['GOO_PORT'] = '8890'
ENV['GOO_PATH_QUERY'] = '/sparql'
ENV['GOO_PATH_DATA'] = '/sparql'
ENV['GOO_PATH_UPDATE'] = '/sparql'
ENV['COMPOSE_PROFILES'] = 'vo'

when 'gb'
# Graphdb backend
ENV['GOO_BACKEND_NAME'] = 'graphdb'
ENV['GOO_PORT'] = '7200'
ENV['GOO_PATH_QUERY'] = '/repositories/ontoportal'
ENV['GOO_PATH_DATA'] = '/repositories/ontoportal/statements'
ENV['GOO_PATH_UPDATE'] = '/repositories/ontoportal/statements'

else
puts "Will import to default config set in config/config.rb"
end

require_relative '../../config/config'
count = 0
time = Benchmark.realtime do
rs = Goo.sparql_query_client.query("SELECT (COUNT(?s) as ?count) FROM <#{graph_uri}> WHERE { ?s ?p ?o }")
rs = rs.solutions.first
count = rs[:count].to_i if rs
end

puts 'Imported triples in ' + format("%.4f", time) + 's with total count: ' + count.to_s
Empty file.
84 changes: 84 additions & 0 deletions test/benchmarks/data_benchs.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
require 'ontologies_linked_data'
module Benchmarks

def self.do_all_benchmarks(sub)
Benchmarks.bench("fetch triples") do
Benchmarks.paginate_all_triples(sub)
end

Benchmarks.bench("get ontology Concept Roots") do
Benchmarks.ontology_roots(sub)
end

Benchmarks.bench("concept children") do
Benchmarks.concept_children("http://terminologies.gfbio.org/ITIS/Taxa_0", sub)
end

Benchmarks.bench("concept path to root") do
Benchmarks.concept_tree("http://terminologies.gfbio.org/ITIS/Taxa_6007", sub)
end
end

def self.bench(label, &block)
time = Benchmark.realtime do
block.call
end
puts "Time to #{label}: " + time.round(2).to_s
end

def self.import_nt_file(sub, file_path)
Goo.sparql_data_client.delete_graph(sub.id)
Goo.sparql_data_client.append_triples_no_bnodes(sub.id, file_path, nil)
end

def self.paginate_all_triples(sub)
page = 1
pagesize = 10000
count = 1
total_count = 0
while count > 0 && page < 100
puts "Starting query for page #{page}"
offset = " OFFSET #{(page - 1) * pagesize}"
rs = Goo.sparql_query_client.query("SELECT ?s ?p ?o FROM <#{sub.id}> WHERE { ?s ?p ?o } LIMIT #{pagesize} #{offset}")
count = rs.each_solution.size
total_count += count
page += 1
end
puts "Total triples: " + total_count.to_s
end

def self.ontology_roots(sub)
load_attrs = LinkedData::Models::Class.goo_attrs_to_load([:all])
roots = []
time = Benchmark.realtime do
roots = sub.roots(load_attrs)
end
puts "Time to find roots: " + time.round(2).to_s
Goo.log_debug_file('roots')
time = Benchmark.realtime do
LinkedData::Models::Class.in(sub).models(roots).include(:unmapped).all
end
puts "Time to load roots: " + time.round(2).to_s
Goo.log_debug_file('roots')
puts "Roots count: " + roots.length.to_s
end

def self.concept_children(uri, sub)
page, size = [1, 100]
cls = LinkedData::Models::Class.find(RDF::URI.new("http://terminologies.gfbio.org/ITIS/Taxa_0")).in(sub).first
ld = LinkedData::Models::Class.goo_attrs_to_load([:all])
children = sub.children(cls, includes_param: ld, page: page, size: size)
puts "Children count: " + children.length.to_s
end

def self.concept_tree(uri, sub)
cls = LinkedData::Models::Class.find("http://terminologies.gfbio.org/ITIS/Taxa_6007").in(sub).first
display_attrs = [:prefLabel, :hasChildren, :children, :obsolete, :subClassOf]
extra_include = display_attrs + [:hasChildren, :isInActiveScheme, :isInActiveScheme]

roots = sub.roots(extra_include)
# path = cls.path_to_root(roots)
cls.tree(roots: roots)
end

end
6 changes: 6 additions & 0 deletions test/benchmarks/examples.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Benchmarks
## Import all portal metadata

## Import AGROVOC and query all triples by pages
## Parse ITIS and do ontoportal operations
ruby test/benchmarks/parse_and_do_ontoportal_operations.rb ITIS fs 47a57aa3-7b54-4f34-b695-dbb5f5b7363e https://data.biodivportal.gfbio.dev
17 changes: 17 additions & 0 deletions test/benchmarks/import_all_metadata_file.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env bash
path_graphs_files=$1
profile=$2
set -e


if [ -z "$profile" ]; then
echo "Usage: $0 <path to path_graphs_files> <profile>"
exit 1
fi
echo "###########################################################################"
./test/benchmarks/start_ontoportal_services.sh "$profile"
./bin/migrations/import_metadata_graphs_to_store "$path_graphs_files" "$profile"
echo 'All metadata graphs imported successfully.'
echo "###########################################################################"

ruby bin/migrations/compare_counts.rb "$path_graphs_files" "$profile"
77 changes: 77 additions & 0 deletions test/benchmarks/import_and_fetch_all_triples_nt_file.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@

# Documentation:
# This script is used to import a large NT file into the triple store
# and then fetch all the triples by paginating through the triples.
# The script is used to compare the performance of the import and fetch of different backends.

profile = ARGV[0]
file_path = ARGV[1]
acronym = ARGV[2] || 'STY' # Default to STY
pwd = File.dirname(__FILE__)
system("#{pwd}/start_ontoportal_services.sh #{profile} #{acronym}")

if $?.exitstatus != 0
puts "Error occurred during script execution."
exit(1)
end

if file_path == nil
puts "Error: Missing arguments. Please provide the file path."
exit(1)
end

puts "Finished parsing file"
case profile
when 'ag'
# AllegroGraph backend
ENV['GOO_BACKEND_NAME'] = 'allegrograph'
ENV['GOO_PORT'] = '10035'
ENV['GOO_PATH_QUERY'] = '/repositories/ontoportal_test'
ENV['GOO_PATH_DATA'] = '/repositories/ontoportal_test/statements'
ENV['GOO_PATH_UPDATE'] = '/repositories/ontoportal_test/statements'
ENV['COMPOSE_PROFILES'] = 'ag'

when 'fs'
# 4store backend
ENV['GOO_PORT'] = '9000'
ENV['COMPOSE_PROFILES'] = 'fs'

when 'vo'
# Virtuoso backend
ENV['GOO_BACKEND_NAME'] = 'virtuoso'
ENV['GOO_PORT'] = '8890'
ENV['GOO_PATH_QUERY'] = '/sparql'
ENV['GOO_PATH_DATA'] = '/sparql'
ENV['GOO_PATH_UPDATE'] = '/sparql'
ENV['COMPOSE_PROFILES'] = 'vo'

when 'gb'
# Graphdb backend
ENV['GOO_BACKEND_NAME'] = 'graphdb'
ENV['GOO_PORT'] = '7200'
ENV['GOO_PATH_QUERY'] = '/repositories/ontoportal'
ENV['GOO_PATH_DATA'] = '/repositories/ontoportal/statements'
ENV['GOO_PATH_UPDATE'] = '/repositories/ontoportal/statements'

else
puts "Error: Unknown backend type. Please set BACKEND_TYPE to 'ag', 'fs', 'vo', or 'gb'."
end

require 'bundler/setup'
require 'pry'
require 'benchmark'
require 'ncbo_annotator'
require 'ncbo_cron'
require 'ontologies_linked_data'
require_relative '../../config/config'
require_relative 'data_benchs'

puts "Starting to fetch triples"
sub = LinkedData::Models::Ontology.find(acronym).first.latest_submission(status: :any)
sub.bring_remaining

Benchmarks.bench('Append triples') do
Benchmarks.import_nt_file(sub, file_path)
end

Benchmarks.do_all_benchmarks(sub)
Loading

0 comments on commit abfdc71

Please sign in to comment.