diff --git a/.github/workflows/ruby-unit-tests.yml b/.github/workflows/ruby-unit-tests.yml index 1ade740..93a5407 100644 --- a/.github/workflows/ruby-unit-tests.yml +++ b/.github/workflows/ruby-unit-tests.yml @@ -9,8 +9,8 @@ jobs: strategy: fail-fast: false matrix: - goo-slice: [ '20', '100', '500' ] - ruby-version: [ '2.7' ] + goo-slice: [ '100' ] + ruby-version: [ '3.2.0' ] triplestore: [ 'fs', 'ag', 'vo', 'gb' ] runs-on: ubuntu-latest steps: @@ -34,7 +34,7 @@ jobs: # http://docs.codecov.io/docs/testing-with-docker run: | ci_env=`bash <(curl -s https://codecov.io/env)` - GOO_SLICES=${{ matrix.goo-slice }} bundle exec rake test:docker:${{ matrix.triplestore }} TESTOPTS="-v" + GOO_SLICES=${{ matrix.goo-slice }} bundle exec rake test:docker:${{ matrix.triplestore }} - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v3 with: diff --git a/.gitignore b/.gitignore index a17ea1b..dadfffc 100644 --- a/.gitignore +++ b/.gitignore @@ -37,4 +37,3 @@ processed_files/ queries.txt graph_comparison.csv - diff --git a/Capfile b/Capfile index 42dd3a4..ea67e95 100644 --- a/Capfile +++ b/Capfile @@ -27,12 +27,14 @@ install_plugin Capistrano::SCM::Git # https://github.com/capistrano/passenger # # require "capistrano/rvm" -# require "capistrano/rbenv" +require "capistrano/rbenv" # require "capistrano/chruby" -# require "capistrano/bundler" +require "capistrano/bundler" # require "capistrano/rails/assets" # require "capistrano/rails/migrations" # require "capistrano/passenger" +require 'capistrano/locally' +#require 'new_relic/recipes' # announce deployments in NewRelic # Load custom tasks from `lib/capistrano/tasks` if you have any defined Dir.glob("lib/capistrano/tasks/*.rake").each { |r| import r } diff --git a/Gemfile b/Gemfile index 50a771b..bf0b2ec 100644 --- a/Gemfile +++ b/Gemfile @@ -22,23 +22,27 @@ gem 'sys-proctable' gem 'request_store' gem 'parallel' gem 'json-ld' -gem 'ffi', '~> 1.16.3' -gem 'activesupport', '~> 3.2.22.5' +gem 'ffi' +gem 'activesupport', '~> 5.0' +gem 'rackup' -# Monitoring -gem 'cube-ruby', require: 'cube' -gem 'goo', github: 'ontoportal-lirmm/goo', branch: 'development' +gem 'goo', github: 'ontoportal-lirmm/goo', branch: 'feature/migrate-ruby-3.2' +gem 'ontologies_linked_data', github: 'ontoportal-lirmm/ontologies_linked_data', branch: 'feature/migrate-ruby-3.2' gem 'sparql-client', github: 'ontoportal-lirmm/sparql-client', branch: 'development' -gem 'ontologies_linked_data', github: 'ontoportal-lirmm/ontologies_linked_data', branch: 'development' gem 'ncbo_annotator', github: 'ontoportal-lirmm/ncbo_annotator', branch: 'development' + # Testing group :test do gem 'email_spec' - gem 'minitest', '< 5.0' + gem 'minitest' gem 'simplecov' gem 'simplecov-cobertura' # for codecov.io - gem 'test-unit-minitest' + # gem 'test-unit-minitest' + gem 'crack', '0.4.5' + gem 'webmock' + gem "minitest-hooks", "~> 1.5" + gem 'webrick' end group :development do @@ -51,6 +55,8 @@ group :development do gem 'ed25519', '>= 1.2', '< 2.0', require: false end +gem 'cube-ruby' gem "binding_of_caller", "~> 1.0" +gem 'concurrent-ruby', '1.3.4' gem 'net-smtp' gem 'net-ftp' diff --git a/Gemfile.lock b/Gemfile.lock index 674dabf..1fd0155 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,12 +1,12 @@ GIT remote: https://github.com/ontoportal-lirmm/goo.git - revision: 27300f28ca6c656c7e78af65013d88b792a6312f - branch: development + revision: dd3ea6c0f583c2044622a9f872a0bd18e898bb79 + branch: feature/migrate-ruby-3.2 specs: goo (0.0.2) addressable (~> 2.8) pry - rdf (= 3.2.11) + rdf rdf-raptor rdf-rdfxml rdf-vocab @@ -29,8 +29,8 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/ontologies_linked_data.git - revision: 194fcfb9a1c4660dabef738d16f32c210a23c343 - branch: development + revision: d5f5a439fdec58a9f0ac074f263dc18ff78f3df0 + branch: feature/migrate-ruby-3.2 specs: ontologies_linked_data (0.0.1) activesupport @@ -69,19 +69,22 @@ PATH ncbo_annotator ontologies_linked_data redis - rufus-scheduler (~> 2.0.24) + rufus-scheduler GEM remote: https://rubygems.org/ specs: - activesupport (3.2.22.5) - i18n (~> 0.6, >= 0.6.4) - multi_json (~> 1.0) + activesupport (5.2.8.1) + concurrent-ruby (~> 1.0, >= 1.0.2) + i18n (>= 0.7, < 2) + minitest (~> 5.1) + tzinfo (~> 1.1) addressable (2.8.7) public_suffix (>= 2.0.2, < 7.0) airbrussh (1.5.3) sshkit (>= 1.6.1, != 1.7.0) base64 (0.2.0) + bcp47_spec (0.2.1) bcrypt (3.1.20) bcrypt_pbkdf (1.1.1) bcrypt_pbkdf (1.1.1-arm64-darwin) @@ -102,8 +105,10 @@ GEM capistrano (~> 3.1) sshkit (~> 1.3) coderay (1.1.3) - concurrent-ruby (1.3.5) + concurrent-ruby (1.3.4) connection_pool (2.5.0) + crack (0.4.5) + rexml cube-ruby (0.0.3) dante (0.2.0) date (3.4.1) @@ -116,27 +121,36 @@ GEM htmlentities (~> 4.3.3) launchy (~> 2.1) mail (~> 2.6) - faraday (2.8.1) - base64 - faraday-net_http (>= 2.0, < 3.1) - ruby2_keywords (>= 0.0.4) - faraday-net_http (3.0.2) + et-orbi (1.2.11) + tzinfo + faraday (2.12.2) + faraday-net_http (>= 2.0, < 3.5) + json + logger + faraday-net_http (3.4.0) + net-http (>= 0.5.0) faraday-retry (2.2.1) faraday (~> 2.0) - ffi (1.16.3) - gapic-common (0.21.1) + ffi (1.17.1-arm64-darwin) + ffi (1.17.1-x86_64-linux-gnu) + fugit (1.11.1) + et-orbi (~> 1, >= 1.2.11) + raabro (~> 1.4) + gapic-common (0.25.0) faraday (>= 1.9, < 3.a) faraday-retry (>= 1.0, < 3.a) - google-protobuf (~> 3.18) - googleapis-common-protos (>= 1.4.0, < 2.a) - googleapis-common-protos-types (>= 1.11.0, < 2.a) - googleauth (~> 1.9) - grpc (~> 1.59) + google-cloud-env (~> 2.2) + google-logging-utils (~> 0.1) + google-protobuf (>= 3.25, < 5.a) + googleapis-common-protos (~> 1.6) + googleapis-common-protos-types (~> 1.15) + googleauth (~> 1.12) + grpc (~> 1.66) google-analytics-data (0.6.0) google-analytics-data-v1beta (>= 0.11, < 2.a) google-cloud-core (~> 1.6) - google-analytics-data-v1beta (0.14.0) - gapic-common (>= 0.21.1, < 2.a) + google-analytics-data-v1beta (0.16.0) + gapic-common (>= 0.25.0, < 2.a) google-cloud-errors (~> 1.0) google-apis-analytics_v3 (0.16.0) google-apis-core (>= 0.15.0, < 2.a) @@ -151,10 +165,10 @@ GEM google-cloud-core (1.7.1) google-cloud-env (>= 1.0, < 3.a) google-cloud-errors (~> 1.0) - google-cloud-env (2.1.1) + google-cloud-env (2.2.1) faraday (>= 1.0, < 3.a) google-cloud-errors (1.4.0) - google-protobuf (3.25.3) + google-logging-utils (0.1.0) google-protobuf (3.25.3-arm64-darwin) google-protobuf (3.25.3-x86_64-linux) googleapis-common-protos (1.6.0) @@ -163,38 +177,38 @@ GEM grpc (~> 1.41) googleapis-common-protos-types (1.18.0) google-protobuf (>= 3.18, < 5.a) - googleauth (1.11.2) + googleauth (1.13.1) faraday (>= 1.0, < 3.a) - google-cloud-env (~> 2.1) + google-cloud-env (~> 2.2) + google-logging-utils (~> 0.1) jwt (>= 1.4, < 3.0) multi_json (~> 1.11) os (>= 0.9, < 2.0) signet (>= 0.16, < 2.a) - grpc (1.65.2) + grpc (1.70.1-arm64-darwin) google-protobuf (>= 3.25, < 5.0) googleapis-common-protos-types (~> 1.0) - grpc (1.65.2-arm64-darwin) - google-protobuf (>= 3.25, < 5.0) - googleapis-common-protos-types (~> 1.0) - grpc (1.65.2-x86_64-linux) + grpc (1.70.1-x86_64-linux) google-protobuf (>= 3.25, < 5.0) googleapis-common-protos-types (~> 1.0) + hashdiff (1.1.2) htmlentities (4.3.4) http-accept (1.7.0) http-cookie (1.0.8) domain_name (~> 0.5) httpclient (2.8.3) - i18n (0.9.5) + i18n (1.14.7) concurrent-ruby (~> 1.0) json (2.9.1) - json-canonicalization (0.4.0) - json-ld (3.2.5) + json-canonicalization (1.0.0) + json-ld (3.3.2) htmlentities (~> 4.3) - json-canonicalization (~> 0.3, >= 0.3.2) + json-canonicalization (~> 1.0) link_header (~> 0.0, >= 0.0.8) multi_json (~> 1.15) rack (>= 2.2, < 4) - rdf (~> 3.2, >= 3.2.10) + rdf (~> 3.3) + rexml (~> 3.2) jwt (2.10.1) base64 launchy (2.5.2) @@ -212,7 +226,9 @@ GEM mime-types-data (~> 3.2015) mime-types-data (3.2025.0204) mini_mime (1.1.5) - minitest (4.7.5) + minitest (5.25.4) + minitest-hooks (1.5.2) + minitest (> 5.3) mlanett-redis-lock (0.2.7) redis multi_json (1.15.0) @@ -220,6 +236,8 @@ GEM net-ftp (0.3.8) net-protocol time + net-http (0.6.0) + uri net-http-persistent (4.0.5) connection_pool (~> 2.2) net-protocol (0.2.2) @@ -246,25 +264,30 @@ GEM pry (0.15.2) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (5.1.1) + public_suffix (6.0.1) + raabro (1.4.0) rack (3.1.9) rack-test (2.2.0) rack (>= 1.3) + rackup (2.2.1) + rack (>= 3) rake (13.2.1) - rdf (3.2.11) + rdf (3.3.2) + bcp47_spec (~> 0.2) + bigdecimal (~> 3.1, >= 3.1.5) link_header (~> 0.0, >= 0.0.8) rdf-raptor (3.2.0) ffi (~> 1.15) rdf (~> 3.2) - rdf-rdfxml (3.2.2) - builder (~> 3.2) + rdf-rdfxml (3.3.0) + builder (~> 3.2, >= 3.2.4) htmlentities (~> 4.3) - rdf (~> 3.2) - rdf-xsd (~> 3.2) - rdf-vocab (3.2.7) - rdf (~> 3.2, >= 3.2.4) - rdf-xsd (3.2.1) - rdf (~> 3.2) + rdf (~> 3.3) + rdf-xsd (~> 3.3) + rdf-vocab (3.3.2) + rdf (~> 3.3) + rdf-xsd (3.3.0) + rdf (~> 3.3) rexml (~> 3.2) redis (5.3.0) redis-client (>= 0.22.0) @@ -287,10 +310,9 @@ GEM builder (>= 2.1.2) faraday (>= 0.9, < 3, != 2.0.0) ruby-xxHash (0.4.0.2) - ruby2_keywords (0.0.5) rubyzip (2.4.1) - rufus-scheduler (2.0.24) - tzinfo (>= 0.3.22) + rufus-scheduler (3.9.2) + fugit (~> 1.1, >= 1.11.1) signet (0.19.0) addressable (~> 2.8) faraday (>= 0.17.5, < 3.a) @@ -314,42 +336,49 @@ GEM sys-proctable (1.3.0) ffi (~> 1.1) systemu (2.6.5) - test-unit-minitest (0.9.1) - minitest (~> 4.7) + thread_safe (0.3.6) time (0.4.1) date timeout (0.4.3) trailblazer-option (0.1.2) - tzinfo (2.0.6) - concurrent-ruby (~> 1.0) + tzinfo (1.2.11) + thread_safe (~> 0.1) uber (0.1.0) + uri (1.0.2) uuid (2.3.9) macaddr (~> 1.0) + webmock (3.25.0) + addressable (>= 2.8.0) + crack (>= 0.3.2) + hashdiff (>= 0.4.0, < 2.0.0) + webrick (1.9.1) PLATFORMS arm64-darwin-24 - ruby x86_64-linux DEPENDENCIES - activesupport (~> 3.2.22.5) + activesupport (~> 5.0) bcrypt_pbkdf (>= 1.0, < 2.0) binding_of_caller (~> 1.0) capistrano (~> 3) capistrano-bundler capistrano-locally capistrano-rbenv + concurrent-ruby (= 1.3.4) + crack (= 0.4.5) cube-ruby ed25519 (>= 1.2, < 2.0) email_spec - ffi (~> 1.16.3) + ffi goo! google-analytics-data (= 0.6.0) google-apis-analytics_v3 google-protobuf (= 3.25.3) json-ld mail (= 2.6.6) - minitest (< 5.0) + minitest + minitest-hooks (~> 1.5) multi_json ncbo_annotator! ncbo_cron! @@ -361,6 +390,7 @@ DEPENDENCIES parseconfig pony pry + rackup rake redis request_store @@ -369,7 +399,8 @@ DEPENDENCIES simplecov-cobertura sparql-client! sys-proctable - test-unit-minitest + webmock + webrick BUNDLED WITH 2.4.22 diff --git a/bin/migrations/compare_counts.rb b/bin/migrations/compare_counts.rb index 11a7b3e..d1a5b2a 100755 --- a/bin/migrations/compare_counts.rb +++ b/bin/migrations/compare_counts.rb @@ -13,7 +13,6 @@ PROCESSED_DIR = ARGV[0] || './processed_files' profile = ARGV[1] - case profile when 'ag' # AllegroGraph backend @@ -104,9 +103,10 @@ def compare_graphs_with_files(graph_triples) graph_filename = graphs_files[graph_uri] next csv << [graph_uri, triples_count, "Graph not found", "N/A"] unless graph_filename - + # Construct the expected file name based on the graph URI file_name = "#{PROCESSED_DIR}/#{graph_filename}" + # puts "count lines of the file #{file_name} for the graph #{graph_uri}" if File.exist?(file_name) file_lines_count = count_file_lines(file_name) diff --git a/config/config.rb.sample b/config/config.rb.sample index 1387a9f..cad0762 100644 --- a/config/config.rb.sample +++ b/config/config.rb.sample @@ -92,3 +92,4 @@ NcboCron.config do |config| end Goo.use_cache = true +Goo.slice_loading_size = GOO_SLICES.to_i diff --git a/config/deploy.rb b/config/deploy.rb index 0c95ee8..59c1170 100644 --- a/config/deploy.rb +++ b/config/deploy.rb @@ -1,26 +1,105 @@ -set :repo_url, "git@github.com:biodivportal/ncbo_cron.git" -set :user, 'ontoportal' +set :author, "ontoportal-lirmm" +set :application, "ncbo_cron" +set :repo_url, "https://github.com/#{fetch(:author)}/#{fetch(:application)}.git" -set :deploy_to, '/srv/ontoportal/ncbo_cron_deployments' +set :deploy_via, :remote_cache +# Default branch is :master +# ask :branch, `git rev-parse --abbrev-ref HEAD`.chomp -set :stages, %w[appliance] -set :default_stage, 'appliance' -set :stage, 'appliance' -set :application, 'cron' +# Default deploy_to directory is /var/www/my_app_name +set :deploy_to, "/srv/ontoportal/#{fetch(:application)}" -# SSH parameters -set :ssh_port, 22 -set :pty, true +# Default value for :log_level is :debug +set :log_level, :debug -# Source code -set :repository_cache, "git_cache" -set :deploy_via, :remote_cache -set :ssh_options, { :forward_agent => true } +# Default value for :linked_files is [] +# append :linked_files, "config/database.yml", 'config/master.key' + +# Default value for linked_dirs is [] +# set :linked_dirs, %w{log tmp/pids tmp/cache tmp/sockets vendor/bundle public/system} +set :linked_dirs, %w{log vendor/bundle tmp/pids tmp/sockets public/system} + + +# Default value for keep_releases is 5 +set :keep_releases, 5 +set :config_folder_path, "#{fetch(:application)}/#{fetch(:stage)}" + + +# If you want to restart using `touch tmp/restart.txt`, add this to your config/deploy.rb: + +SSH_JUMPHOST = ENV.include?('SSH_JUMPHOST') ? ENV['SSH_JUMPHOST'] : 'jumpbox.hostname.com' +SSH_JUMPHOST_USER = ENV.include?('SSH_JUMPHOST_USER') ? ENV['SSH_JUMPHOST_USER'] : 'username' + +JUMPBOX_PROXY = "#{SSH_JUMPHOST_USER}@#{SSH_JUMPHOST}" +set :ssh_options, { + user: 'ontoportal', + forward_agent: 'true', + keys: %w(config/deploy_id_rsa), + auth_methods: %w(publickey), + # use ssh proxy if API servers are on a private network + proxy: Net::SSH::Proxy::Command.new("ssh #{JUMPBOX_PROXY} -W %h:%p") +} + +# private git repo for configuraiton +PRIVATE_CONFIG_REPO = ENV.include?('PRIVATE_CONFIG_REPO') ? ENV['PRIVATE_CONFIG_REPO'] : 'https://your_github_pat_token@github.com/your_organization/ontoportal-configs.git' +desc "Check if agent forwarding is working" +task :forwarding do + on roles(:all) do |h| + if test("env | grep SSH_AUTH_SOCK") + info "Agent forwarding is up to #{h}" + else + error "Agent forwarding is NOT up to #{h}" + end + end +end + +# Smoke test for checking if the service is up +desc 'Smoke test: Check if ncbo_cron service is running' +task :smoke_test do + on roles(:app), in: :sequence, wait: 5 do + # Check if the service is running using systemctl + result = `systemctl is-active ncbo_cron` + if result.strip == 'active' + info "ncbo_cron service is up and running!" + else + error "ncbo_cron service failed to start." + end + end +end + +namespace :deploy do + + desc 'Incorporate the private repository content' + # Get cofiguration from repo if PRIVATE_CONFIG_REPO env var is set + # or get config from local directory if LOCAL_CONFIG_PATH env var is set + task :get_config do + if defined?(PRIVATE_CONFIG_REPO) + TMP_CONFIG_PATH = "/tmp/#{SecureRandom.hex(15)}".freeze + on roles(:app) do + execute "git clone -q #{PRIVATE_CONFIG_REPO} #{TMP_CONFIG_PATH}" + execute "rsync -av #{TMP_CONFIG_PATH}/#{fetch(:config_folder_path)}/ #{release_path}/" + execute "rm -rf #{TMP_CONFIG_PATH}" + end + elsif defined?(LOCAL_CONFIG_PATH) + on roles(:app) do + execute "rsync -av #{LOCAL_CONFIG_PATH}/#{fetch(:application)}/ #{release_path}/" + end + end + end -# Linked files and directories -append :linked_files, "config/config.rb" -append :linked_dirs, 'logs', '.bundle' -set :keep_releases, 2 + desc 'Restart application' + task :restart do + on roles(:app), in: :sequence, wait: 5 do + # Your restart mechanism here, for example: + # execute :touch, release_path.join('tmp/restart.txt') + execute 'sudo systemctl restart ncbo_cron' + execute 'sleep 5' + end + end + after :updating, :get_config + after :publishing, :restart + after :restart, :smoke_test +end diff --git a/config/deploy/production.rb b/config/deploy/production.rb deleted file mode 100644 index 4c8107a..0000000 --- a/config/deploy/production.rb +++ /dev/null @@ -1,13 +0,0 @@ -set :branch, 'master' -set :server, '192.168.0.22' - -server fetch(:server), user: fetch(:user), roles: %w{web app} - -set :ssh_options, { - user: 'ontoportal', - forward_agent: 'true', - #keys: %w(config/deploy_id_rsa), - auth_methods: %w(publickey), - # use ssh proxy if UI servers are on a private network - proxy: Net::SSH::Proxy::Command.new('ssh guest@134.176.27.193 -W %h:%p') -} diff --git a/config/deploy/test.rb b/config/deploy/test.rb index 2bdc1eb..8fc790c 100644 --- a/config/deploy/test.rb +++ b/config/deploy/test.rb @@ -1,14 +1,16 @@ -set :branch, 'master' -set :server, 'biodivportal.gfbio.dev' - -server fetch(:server), user: fetch(:user), roles: %w{web app} - -set :ssh_options, { - user: 'ontoportal', - forward_agent: 'true', - port: 30082 - #keys: %w(config/deploy_id_rsa), - #auth_methods: %w(publickey), - # use ssh proxy if UI servers are on a private network - #proxy: Net::SSH::Proxy::Command.new('ssh deployer@sshproxy.ontoportal.org -W %h:%p') -} +# Simple Role Syntax +# ================== +# Supports bulk-adding hosts to roles, the primary +# server in each group is considered to be the first +# unless any hosts have the primary property set. +# Don't declare `role :all`, it's a meta role +role :app, %w{testportal.lirmm.fr} +set :branch, ENV.include?('BRANCH') ? ENV['BRANCH'] : 'development' +# Extended Server Syntax +# ====================== +# This can be used to drop a more detailed server +# definition into the server list. The second argument +# something that quacks like a hash can be used to set +# extended properties on the server. +#server 'example.com', user: 'deploy', roles: %w{web app}, my_property: :my_value +set :log_level, :error \ No newline at end of file diff --git a/lib/ncbo_cron/config.rb b/lib/ncbo_cron/config.rb index 37c70a4..23562aa 100644 --- a/lib/ncbo_cron/config.rb +++ b/lib/ncbo_cron/config.rb @@ -97,12 +97,12 @@ def config(&block) @settings.cron_dictionary_generation_cron_job ||= "30 3 * * *" @settings.log_level ||= :info - unless (@settings.log_path && File.exists?(@settings.log_path)) + unless (@settings.log_path && File.exist?(@settings.log_path)) log_dir = File.expand_path("../../../logs", __FILE__) FileUtils.mkdir_p(log_dir) @settings.log_path = "#{log_dir}/scheduler.log" end - if File.exists?("/var/run/ncbo_cron") + if File.exist?("/var/run/ncbo_cron") pid_path = File.expand_path("/var/run/ncbo_cron/ncbo_cron.pid", __FILE__) else pid_path = File.expand_path("../../../ncbo_cron.pid", __FILE__) diff --git a/lib/ncbo_cron/ontology_helper.rb b/lib/ncbo_cron/ontology_helper.rb index 2014b17..ecf45e2 100644 --- a/lib/ncbo_cron/ontology_helper.rb +++ b/lib/ncbo_cron/ontology_helper.rb @@ -72,7 +72,7 @@ def self.do_ontology_pull(ontology_acronym, enable_pull_umls = false, umls_downl file.close new_submission else - raise self::RemoteFileException.new(last) + raise RemoteFileException.new(last) end end @@ -184,4 +184,4 @@ def self.new_file_exists?(file, last) end end -end \ No newline at end of file +end diff --git a/lib/ncbo_cron/ontology_submission_parser.rb b/lib/ncbo_cron/ontology_submission_parser.rb index bd56699..55d4a29 100644 --- a/lib/ncbo_cron/ontology_submission_parser.rb +++ b/lib/ncbo_cron/ontology_submission_parser.rb @@ -156,7 +156,7 @@ def process_submission(logger, submission_id, actions=ACTIONS) if sub sub.bring_remaining sub.ontology.bring(:acronym) - FileUtils.mkdir_p(sub.data_folder) unless Dir.exists?(sub.data_folder) + FileUtils.mkdir_p(sub.data_folder) unless Dir.exist?(sub.data_folder) log_path = sub.parsing_log_path logger.info "Logging parsing output to #{log_path}" logger1 = Logger.new(log_path) diff --git a/lib/ncbo_cron/scheduler.rb b/lib/ncbo_cron/scheduler.rb index 75badc7..809987c 100644 --- a/lib/ncbo_cron/scheduler.rb +++ b/lib/ncbo_cron/scheduler.rb @@ -1,81 +1,104 @@ require 'logger' - # Scheduling/lock gems require 'redis-lock' -require 'rufus/scheduler' +require 'rufus-scheduler' module NcboCron class Scheduler - ## - # Schedule a job with redis-supported locking - # options: - # life: length in seconds of the initial lock - # job_name: the scheduled job's name - # logger: a logger to track errors/debug output - # relock_preiod: number of seconds to re-lock for, holds lock during job - # redis_host: hostname where redis lock will be performed - # redis_port: port for redis host - # process: a proc that can be run - # minutes_between: how many minutes between job runs (default: 5) - # seconds_between: how many seconds between job runs (priority given to minutes if both passed) - # block: block of code that is the scheduled job - def self.scheduled_locking_job(options = {}, &block) - lock_life = options[:life] || 10*60 - job_name = options[:job_name] || "ncbo_cron" - logger = options[:logger] || Logger.new($stdout) - relock_period = options[:relock_period] || lock_life - 15 - redis_host = options[:redis_host] || "localhost" - redis_port = options[:redis_port] || 6379 - process = options[:process] - minutes_between = options[:minutes_between] - seconds_between = options[:seconds_between] - scheduler_type = options[:scheduler_type] || :every - cron_schedule = options[:cron_schedule] - cron_schedule_long = options[:cron_schedule_long] + class << self + attr_reader :scheduler - if scheduler_type == :every - # Minutes/seconds string prep - interval = "#{seconds_between*1000}" if seconds_between - interval = "#{minutes_between}m" if minutes_between - interval = "5m" unless interval - end + def scheduled_locking_job(options = {}, &block) + lock_life = options[:life] || 10*60 + job_name = options[:job_name] || "ncbo_cron" + logger = options[:logger] || Logger.new($stdout) + relock_period = options[:relock_period] || lock_life - 15 + redis_host = options[:redis_host] || "localhost" + redis_port = options[:redis_port] || 6379 + process = options[:process] + minutes_between = options[:minutes_between] + seconds_between = options[:seconds_between] + scheduler_type = options[:scheduler_type] || :every + cron_schedule = options[:cron_schedule] - if scheduler_type == :cron - interval = cron_schedule - end + # Determine interval based on scheduler type + if scheduler_type == :every + interval = if seconds_between + "#{seconds_between}s" + elsif minutes_between + "#{minutes_between}m" + else + "5m" + end + elsif scheduler_type == :cron + interval = cron_schedule + end + + redis = Redis.new(host: redis_host, port: redis_port) + + # Initialize scheduler only if it's not already running + @scheduler ||= begin + s = Rufus::Scheduler.new( + lockfile: nil, # Disable file locking as we're using Redis + thread_name: "scheduler_#{job_name}" + ) - redis = Redis.new(host: redis_host, port: redis_port) - scheduler = Rufus::Scheduler.start_new(:thread_name => job_name) + # Add shutdown hook for clean termination + Signal.trap('TERM') do + s.shutdown(:wait) + exit + end - scheduler.send(scheduler_type, interval, {:allow_overlapping => false}) do - redis.lock(job_name, life: lock_life, owner: "ncbo_cron") do - pid = fork do - $0 = job_name # rename the process - begin - logger.debug("#{job_name} -- Lock acquired"); logger.flush + s + end - # Spawn a thread to re-acquire the lock every 60 seconds - Thread.new do - sleep(relock_period) do - logger.debug("Re-locking for #{lock_life}") - lock.extend_life(lock_life) + begin + # Schedule the job based on type + @scheduler.send(scheduler_type, interval, job: true, overlap: false, tag: job_name) do + redis.lock(job_name, life: lock_life, owner: "ncbo_cron") do + pid = fork do + $0 = job_name # rename the process + begin + logger.debug("#{job_name} -- Lock acquired") + logger.flush + + # Create a thread for lock renewal + renewal_thread = Thread.new do + loop do + sleep(relock_period) + begin + logger.debug("Re-locking for #{lock_life}") + redis.extend_lock(job_name, life: lock_life) + rescue => e + logger.error("Lock renewal failed: #{e.message}") + break + end + end + end + + # Run the process + yield if block_given? + process&.call + + ensure + renewal_thread&.kill + Kernel.exit! end end - # Run the process if we have a job - yield if block_given? - process.call if process - ensure - Kernel.exit! + logger.debug("#{job_name} -- running in pid #{pid}") + logger.flush + Process.wait(pid) end end - logger.debug("#{job_name} -- running in pid #{pid}"); logger.flush - Process.wait(pid) + rescue Rufus::Scheduler::NotRunningError => e + logger.error("Failed to schedule job: #{e.message}") + raise end - end - # Wait for scheduling (don't exit) - scheduler.join + # Wait for scheduling + @scheduler.join unless @scheduler.nil? + end end end -end \ No newline at end of file +end diff --git a/ncbo_cron.gemspec b/ncbo_cron.gemspec index 25e2f30..ec24931 100644 --- a/ncbo_cron.gemspec +++ b/ncbo_cron.gemspec @@ -23,5 +23,5 @@ Gem::Specification.new do |gem| gem.add_dependency("ncbo_annotator") gem.add_dependency("ontologies_linked_data") gem.add_dependency("redis") - gem.add_dependency("rufus-scheduler", "~> 2.0.24") + gem.add_dependency("rufus-scheduler") end diff --git a/start_ontoportal_services.sh b/start_ontoportal_services.sh deleted file mode 100755 index 9a8a982..0000000 --- a/start_ontoportal_services.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env bash -profile=$1 -acronym=$2 -set -e - - -if [ -z "$profile" ]; then - echo "Usage: $0 " - exit 1 -fi - -BACKEND_TYPE=$profile -if [ "$BACKEND_TYPE" == "ag" ]; then - # AllegroGraph backend - export GOO_BACKEND_NAME="allegrograph" - export GOO_PORT="10035" - export GOO_PATH_QUERY="/repositories/ontoportal_test" - export GOO_PATH_DATA="/repositories/ontoportal_test/statements" - export GOO_PATH_UPDATE="/repositories/ontoportal_test/statements" - export COMPOSE_PROFILES="ag" - -elif [ "$BACKEND_TYPE" == "fs" ]; then - # 4store backend - export GOO_PORT="9000" - export COMPOSE_PROFILES="fs" - -elif [ "$BACKEND_TYPE" == "vo" ]; then - # Virtuoso backend - export GOO_BACKEND_NAME="virtuoso" - export GOO_PORT="8890" - export GOO_PATH_QUERY="/sparql" - export GOO_PATH_DATA="/sparql" - export GOO_PATH_UPDATE="/sparql" - export COMPOSE_PROFILES="vo" - -elif [ "$BACKEND_TYPE" == "gb" ]; then - # Graphdb backend - export GOO_BACKEND_NAME="graphdb" - export GOO_PORT="7200" - export GOO_PATH_QUERY="/repositories/ontoportal" - export GOO_PATH_DATA="/repositories/ontoportal/statements" - export GOO_PATH_UPDATE="/repositories/ontoportal/statements" -else - echo "Error: Unknown backend type. Please set BACKEND_TYPE to 'ag', 'fs', or 'vo'." -fi - -echo "###########################################################################" -echo "Stop and remove all containers, networks, and volumes and start fresh" -docker compose --profile fs --profile vo --profile gb --profile ag down --volumes --remove-orphans && docker compose --profile "$profile" up -d - -echo "Waiting for all Docker services to start..." - -while true; do - # Get the status of all containers - container_status=$(docker compose --profile "$profile" ps -a --format '{{.Names}} {{.State}}') - - all_running=true - while read -r container state; do - if [ "$state" != "running" ] && [ "$state" != "exited" ]; then - all_running=false - break - fi - done <<< "$container_status" - - # If all containers are running, exit the loop - if [ "$all_running" = true ]; then - echo "All containers are running!" - break - fi - - # Wait before checking again - sleep 2 -done - -if [ -z "$acronym" ]; then - exit 0 -fi - -echo "###########################################################################" -echo "Create a new user and make it an admin" -bundle exec rake user:create[admin,admin@nodomain.org,password] -bundle exec rake user:adminify[admin] -echo "###########################################################################" -echo "Create a new ontology $acronym and import it from a remote server" -bin/ncbo_ontology_import --admin-user admin -o "$acronym" --from https://data.stageportal.lirmm.fr --from-apikey 82602563-4750-41be-9654-36f46056a0db diff --git a/test/data/virtuoso.ini b/test/data/virtuoso.ini deleted file mode 100644 index 2543ff2..0000000 --- a/test/data/virtuoso.ini +++ /dev/null @@ -1,61 +0,0 @@ -[Database] -DatabaseFile = ./database/virtuoso.db -ErrorLogFile = ./database/virtuoso.log -TransactionFile = ./database/virtuoso.trx -xa_persistent_file = ./database/virtuoso.pxa -MaxCheckpointRemap = 200000 -CheckpointInterval = 60 -NumberOfBuffers = 2450000 ; Each buffer is 8KB, so ~19GB total -MaxDirtyBuffers = 1837500 ; About 75% of NumberOfBuffers -TransactionAfterImageLimit = 50000000 -; NumberOfBuffers = 1000000 -MaxStaticCursorRows = 5000 -Striping = 0 -TempStorage = . -ErrorLogLevel = 7 - -[HTTPServer] -ServerPort = 8890 -ServerRoot = ./var/lib/virtuoso/vsp -MaxClientConnections = 200 -MaxKeepAlives = 10 -KeepAliveTimeout = 10 -ServerThreads = 50 -HttpTimeout = 300 -MaxBody = 20000000 -EnableGzip = 1 -GzipMimeType = text/html, text/xml, text/plain, text/css, application/xml, application/xhtml+xml, application/rss+xml, application/javascript, application/x-javascript, image/svg+xml -HTTPLogFile = ./http17012025.log - -[Parameters] -ServerPort = 1111 -NumOfThreads = 100 -MaxMem = 20000000000 ; 5GB memory -ResultSetMaxRows = 10000 -DirsAllowed = ., ./vad, ./virtuoso, ../migration-to-virtuoso,../migration-to-virtuoso/processed_files -MaxQueryCostEstimationTime = 6000 -MaxQueryExecutionTime = 6000 -DynamicLocal = 1 -LogEnable = 2 ; Enable SPARQL query logging -TraceOn = errors -LogFile = virtuoso.log -NumberOfBuffers = 2450000 ; Each buffer is 8KB, so ~19GB total -MaxDirtyBuffers = 1837500 ; About 75% of NumberOfBuffers - -[VDB] -ArrayOptimization = 0 -NumArrayParams = 0 -VDBDisconnectTimeout = 1000 -KeepAliveTimeout = 60 -RetryCount = 3 -ThreadCleanupInterval = 600 - -[Replication] -ServerName = virtuoso -ServerEnable = 1 - -[SPARQL] -ResultSetMaxRows = 1000000000000 -MaxQueryExecutionTime = 6000 -DefaultGraph = http://localhost:8890/sparql -MaxSortedTopRows = 10000 diff --git a/test/data/virtuso_grant_write_permission.sh b/test/data/virtuso_grant_write_permission.sh deleted file mode 100755 index ac45665..0000000 --- a/test/data/virtuso_grant_write_permission.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash - -# Virtuoso database connection credentials -DB_PORT=1111 -DB_USER="dba" -DB_PASS="dba" -VIRTUOSO_DIR=$1 - -if [ "$#" -ne 1 ]; then - VIRTUOSO_DIR="/opt/virtuoso-opensource/" -fi -# Connect to Virtuoso using isql and grant EXECUTE permission -echo "-- Granting EXECUTE permission on DB.DBA.SPARQL_INSERT_DICT_CONTENT..." - -$VIRTUOSO_DIR/bin/isql $DB_PORT $DB_USER $DB_PASS < NcboCron.settings.redis_host, :port => NcboCron.settings.redis_port) db_size = @@redis.dbsize @@ -25,7 +25,7 @@ def self.before_suite @@ont_count, @@acronyms, @@ontologies = LinkedData::SampleData::Ontology.create_ontologies_and_submissions(ont_count: 2, submission_count: 2, process_submission: false) end - def self.after_suite + def after_suite @@redis.del NcboCron::Models::OntologySubmissionParser::QUEUE_HOLDER LinkedData::SampleData::Ontology.delete_ontologies_and_submissions end