From 0bef8b31b51f733fd252d41bca880a70181cb9f7 Mon Sep 17 00:00:00 2001 From: Henrique Bontempo Date: Thu, 13 Oct 2022 15:04:44 -0300 Subject: [PATCH 1/7] Adds #not_found to GithubFecher::Resource --- app/models/github_fetcher/resource.rb | 4 ++++ test/unit/github_fetcher/resource_test.rb | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 test/unit/github_fetcher/resource_test.rb diff --git a/app/models/github_fetcher/resource.rb b/app/models/github_fetcher/resource.rb index cf2331705..560191f22 100644 --- a/app/models/github_fetcher/resource.rb +++ b/app/models/github_fetcher/resource.rb @@ -56,6 +56,10 @@ def success? false end + def not_found? + status == 404 ? true : false + end + def bad_token? if status == 401 && response.body.match?(/Bad credentials/) @error = @error_message = "Bad credentials" diff --git a/test/unit/github_fetcher/resource_test.rb b/test/unit/github_fetcher/resource_test.rb new file mode 100644 index 000000000..e1ae1bd73 --- /dev/null +++ b/test/unit/github_fetcher/resource_test.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require 'test_helper' + +class GithubFetcher::ResourceTest < ActiveSupport::TestCase + def status_validation(mocked_status, method, expected_response) + GithubFetcher::Resource.any_instance.stubs(:status).returns(mocked_status) + resource = GithubFetcher::Resource.new({}) + assert_equal expected_response, resource.send(method) + end + + test '#not_found? is true on 404 status responses' do + status_validation(404, :not_found?, true) + end + + test '#not_found? is false on non 404 status responses' do + [200, 201, 204, 400, 403, 500, 502].each { |status| status_validation(status, :not_found?, false) } + end +end From 86bac12ca8744ca436af3e3e623e60087a32b0d1 Mon Sep 17 00:00:00 2001 From: Henrique Bontempo Date: Thu, 13 Oct 2022 17:18:19 -0300 Subject: [PATCH 2/7] Adds memoization to Repo#fetcher_json --- app/models/repo.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/repo.rb b/app/models/repo.rb index d9719b079..3ac0058ee 100644 --- a/app/models/repo.rb +++ b/app/models/repo.rb @@ -37,7 +37,7 @@ def fetcher end def fetcher_json - fetcher.as_json + @fetcher_json ||= fetcher.as_json end def issues_fetcher From c7a0755ccc26ee76c497db85475b1d54fb7914f5 Mon Sep 17 00:00:00 2001 From: Henrique Bontempo Date: Thu, 13 Oct 2022 17:23:00 -0300 Subject: [PATCH 3/7] Adds logic to Repo#update_from_github handle more sittuations (like removeal of the repo and change of properties) --- app/models/repo.rb | 26 +++++++--- test/jobs/update_repo_info_job_test.rb | 68 ++++++++++++++++++++++++-- 2 files changed, 85 insertions(+), 9 deletions(-) diff --git a/app/models/repo.rb b/app/models/repo.rb index 3ac0058ee..921bf3a85 100644 --- a/app/models/repo.rb +++ b/app/models/repo.rb @@ -192,12 +192,26 @@ def self.exists_with_name?(name) end def update_from_github - json = fetcher.as_json - self.update( - language: json.fetch('language', language), - description: json.fetch('description', description)&.first(255), - stars_count: json.fetch('stargazers_count', stars_count) - ) + if fetcher.not_found? + self.update!(removed_from_github: true) + elsif fetcher.success? + repo_full_name = fetcher_json.fetch('full_name', full_name) + + if repo_full_name != full_name && self.class.exists_with_name?(repo_full_name) + # TODO: Add deduplication step + return + end + + repo_user_name, repo_name = repo_full_name.split("/") + self.update!( + name: repo_name, + user_name: repo_user_name, + language: fetcher_json.fetch('language', language), + description: fetcher_json.fetch('description', description)&.first(255), + full_name: repo_full_name, + removed_from_github: false + ) + end end def repo_path diff --git a/test/jobs/update_repo_info_job_test.rb b/test/jobs/update_repo_info_job_test.rb index 2fa76dfa1..d6b25f0f5 100644 --- a/test/jobs/update_repo_info_job_test.rb +++ b/test/jobs/update_repo_info_job_test.rb @@ -2,8 +2,70 @@ require 'test_helper' +# frozen_string_literal: true + +require 'test_helper' class UpdateRepoInfoJobTest < ActiveJob::TestCase - # test "the truth" do - # assert true - # end + test 'repo deleted or made private' do + GithubFetcher::Resource.any_instance.stubs(:status).returns(404) + @repo = repos(:node) + assert_changes -> { + [ + @repo.reload.removed_from_github, + ] + } do + UpdateRepoInfoJob.perform_now(@repo) + end + assert @repo.reload.removed_from_github + end + + test 'repo with information updated' do + GithubFetcher::Resource.any_instance.stubs(:status).returns(200) + GithubFetcher::Resource.any_instance.stubs(:as_json).returns( + { + 'full_name' => 'test_owner/test_repo', + 'language' => 'test_language', + 'description' => 'test_description' + } + ) + @repo = repos(:node) + assert_changes -> { + [ + @repo.reload.full_name, + @repo.reload.name, + @repo.reload.user_name, + @repo.reload.language, + @repo.reload.description, + ] + } do + UpdateRepoInfoJob.perform_now(@repo) + end + assert_equal false, @repo.reload.removed_from_github + assert_equal 'test_owner/test_repo', @repo.reload.full_name + assert_equal 'test_repo', @repo.reload.name + assert_equal 'test_owner', @repo.reload.user_name + assert_equal 'test_language', @repo.reload.language + assert_equal 'test_description', @repo.reload.description + end + + test 'repo rename conflict' do + GithubFetcher::Resource.any_instance.stubs(:status).returns(200) + GithubFetcher::Resource.any_instance.stubs(:as_json).returns( + { + 'full_name' => 'sinatra/sinatra', + } + ) + @repo = repos(:node) + assert_no_changes -> { + [ + @repo.reload.full_name, + @repo.reload.name, + @repo.reload.user_name, + @repo.reload.language, + @repo.reload.description, + ] + } do + UpdateRepoInfoJob.perform_now(@repo) + end + end end From 186e0c8b2b39261ad31c583a6080c9ced1968de2 Mon Sep 17 00:00:00 2001 From: Henrique Bontempo Date: Thu, 13 Oct 2022 18:23:27 -0300 Subject: [PATCH 4/7] Increases the scope of the 'mark_removed_repos' task to a broader 'update_repos' one --- lib/tasks/schedule.rake | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/lib/tasks/schedule.rake b/lib/tasks/schedule.rake index 7c2d85873..04526236c 100644 --- a/lib/tasks/schedule.rake +++ b/lib/tasks/schedule.rake @@ -38,17 +38,12 @@ namespace :schedule do return GithubFetcher::Repo.new(user_name: "rails", name: "rails").success? end - desc "Checks if repos have been deleted on GitHub" - task mark_removed_repos: :environment do + desc "Update repos information" + task update_repos: :environment do raise "GITHUB API APPEARS TO BE DOWN" unless github_api_up? - Repo.select(:id, :user_name, :name).find_each(batch_size: 100) do |repo| - fetcher = GithubFetcher::Repo.new(user_name: repo.user_name, name: repo.name) - fetcher.call(retry_on_bad_token: 5) - - if fetcher.response.status == 404 - repo.update!(removed_from_github: true) - end + Repo.find_each(batch_size: 100) do |repo| + repo.update_repo_info! end end From c81a900b9fb366316fb515d4b3702f3a9d58d5c0 Mon Sep 17 00:00:00 2001 From: Henrique Bontempo Date: Thu, 13 Oct 2022 18:48:41 -0300 Subject: [PATCH 5/7] Removed unecessary instance variable from UpdateRepoInfoJobTest tests --- test/jobs/update_repo_info_job_test.rb | 51 ++++++++++++++------------ 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/test/jobs/update_repo_info_job_test.rb b/test/jobs/update_repo_info_job_test.rb index d6b25f0f5..7c3a76a78 100644 --- a/test/jobs/update_repo_info_job_test.rb +++ b/test/jobs/update_repo_info_job_test.rb @@ -8,15 +8,16 @@ class UpdateRepoInfoJobTest < ActiveJob::TestCase test 'repo deleted or made private' do GithubFetcher::Resource.any_instance.stubs(:status).returns(404) - @repo = repos(:node) + repo = repos(:node) assert_changes -> { [ - @repo.reload.removed_from_github, + repo.removed_from_github, ] } do - UpdateRepoInfoJob.perform_now(@repo) + UpdateRepoInfoJob.perform_now(repo) + repo.reload end - assert @repo.reload.removed_from_github + assert repo.removed_from_github end test 'repo with information updated' do @@ -28,24 +29,25 @@ class UpdateRepoInfoJobTest < ActiveJob::TestCase 'description' => 'test_description' } ) - @repo = repos(:node) + repo = repos(:node) assert_changes -> { [ - @repo.reload.full_name, - @repo.reload.name, - @repo.reload.user_name, - @repo.reload.language, - @repo.reload.description, + repo.full_name, + repo.name, + repo.user_name, + repo.language, + repo.description, ] } do - UpdateRepoInfoJob.perform_now(@repo) + UpdateRepoInfoJob.perform_now(repo) + repo.reload end - assert_equal false, @repo.reload.removed_from_github - assert_equal 'test_owner/test_repo', @repo.reload.full_name - assert_equal 'test_repo', @repo.reload.name - assert_equal 'test_owner', @repo.reload.user_name - assert_equal 'test_language', @repo.reload.language - assert_equal 'test_description', @repo.reload.description + assert_equal false, repo.removed_from_github + assert_equal 'test_owner/test_repo', repo.full_name + assert_equal 'test_repo', repo.name + assert_equal 'test_owner', repo.user_name + assert_equal 'test_language', repo.language + assert_equal 'test_description', repo.description end test 'repo rename conflict' do @@ -55,17 +57,18 @@ class UpdateRepoInfoJobTest < ActiveJob::TestCase 'full_name' => 'sinatra/sinatra', } ) - @repo = repos(:node) + repo = repos(:node) assert_no_changes -> { [ - @repo.reload.full_name, - @repo.reload.name, - @repo.reload.user_name, - @repo.reload.language, - @repo.reload.description, + repo.full_name, + repo.name, + repo.user_name, + repo.language, + repo.description, ] } do - UpdateRepoInfoJob.perform_now(@repo) + UpdateRepoInfoJob.perform_now(repo) + repo.reload end end end From 9154ac46520ec8c1f1fd746d6aeb7b48fb4bf8ac Mon Sep 17 00:00:00 2001 From: Henrique Bontempo Date: Thu, 13 Oct 2022 18:59:58 -0300 Subject: [PATCH 6/7] Adds 'archived' column to repos table --- app/models/repo.rb | 3 ++- db/migrate/20221013212959_add_archived_column_to_repo.rb | 6 ++++++ db/schema.rb | 4 +++- test/jobs/update_repo_info_job_test.rb | 6 +++++- 4 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 db/migrate/20221013212959_add_archived_column_to_repo.rb diff --git a/app/models/repo.rb b/app/models/repo.rb index 921bf3a85..e574f7bbc 100644 --- a/app/models/repo.rb +++ b/app/models/repo.rb @@ -209,7 +209,8 @@ def update_from_github language: fetcher_json.fetch('language', language), description: fetcher_json.fetch('description', description)&.first(255), full_name: repo_full_name, - removed_from_github: false + removed_from_github: false, + archived: fetcher_json.fetch('archived', archived) ) end end diff --git a/db/migrate/20221013212959_add_archived_column_to_repo.rb b/db/migrate/20221013212959_add_archived_column_to_repo.rb new file mode 100644 index 000000000..9a0a769dc --- /dev/null +++ b/db/migrate/20221013212959_add_archived_column_to_repo.rb @@ -0,0 +1,6 @@ +class AddArchivedColumnToRepo < ActiveRecord::Migration[7.0] + def change + add_column :repos, :archived, :boolean, default: false + add_index :repos, :archived + end +end diff --git a/db/schema.rb b/db/schema.rb index 3d54d0a1d..3042837f7 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.0].define(version: 2022_10_10_011048) do +ActiveRecord::Schema[7.0].define(version: 2022_10_13_212959) do # These are extensions that must be enabled in order to support this database enable_extension "pg_stat_statements" enable_extension "plpgsql" @@ -155,6 +155,8 @@ t.integer "subscribers_count", default: 0 t.integer "docs_subscriber_count", default: 0 t.boolean "removed_from_github", default: false + t.boolean "archived", default: false + t.index ["archived"], name: "index_repos_on_archived" t.index ["full_name"], name: "index_repos_on_full_name" t.index ["issues_count"], name: "index_repos_on_issues_count" t.index ["language"], name: "index_repos_on_language" diff --git a/test/jobs/update_repo_info_job_test.rb b/test/jobs/update_repo_info_job_test.rb index 7c3a76a78..420b048a1 100644 --- a/test/jobs/update_repo_info_job_test.rb +++ b/test/jobs/update_repo_info_job_test.rb @@ -26,7 +26,8 @@ class UpdateRepoInfoJobTest < ActiveJob::TestCase { 'full_name' => 'test_owner/test_repo', 'language' => 'test_language', - 'description' => 'test_description' + 'description' => 'test_description', + 'archived' => true } ) repo = repos(:node) @@ -37,6 +38,7 @@ class UpdateRepoInfoJobTest < ActiveJob::TestCase repo.user_name, repo.language, repo.description, + repo.archived ] } do UpdateRepoInfoJob.perform_now(repo) @@ -48,6 +50,7 @@ class UpdateRepoInfoJobTest < ActiveJob::TestCase assert_equal 'test_owner', repo.user_name assert_equal 'test_language', repo.language assert_equal 'test_description', repo.description + assert_equal true, repo.archived end test 'repo rename conflict' do @@ -65,6 +68,7 @@ class UpdateRepoInfoJobTest < ActiveJob::TestCase repo.user_name, repo.language, repo.description, + repo.archived ] } do UpdateRepoInfoJob.perform_now(repo) From 2c30b6f4e5c154f0d9fcabb54b97f32ed6c742dd Mon Sep 17 00:00:00 2001 From: Henrique Bontempo Date: Thu, 13 Oct 2022 19:02:52 -0300 Subject: [PATCH 7/7] Removed duplicated lines from 'update_repo_info_job_test.rb' --- test/jobs/update_repo_info_job_test.rb | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/jobs/update_repo_info_job_test.rb b/test/jobs/update_repo_info_job_test.rb index 420b048a1..3a1b40a9d 100644 --- a/test/jobs/update_repo_info_job_test.rb +++ b/test/jobs/update_repo_info_job_test.rb @@ -1,9 +1,5 @@ # frozen_string_literal: true -require 'test_helper' - -# frozen_string_literal: true - require 'test_helper' class UpdateRepoInfoJobTest < ActiveJob::TestCase test 'repo deleted or made private' do