From 760647470e5cdbb15ac5e03c1ccd499d6d07152a Mon Sep 17 00:00:00 2001 From: h1alexbel Date: Mon, 8 Jul 2024 17:56:13 +0300 Subject: [PATCH] feat(#228): files in clone.sh --- steps/clone.sh | 9 +++++++++ steps/discover-repos.rb | 26 +------------------------- tests/steps/test-discover-repos.sh | 4 ++-- 3 files changed, 12 insertions(+), 27 deletions(-) diff --git a/steps/clone.sh b/steps/clone.sh index 5e6c140a..a0391810 100755 --- a/steps/clone.sh +++ b/steps/clone.sh @@ -37,6 +37,13 @@ total=$(wc -l < "${repos}" | xargs) "${LOCAL}/help/assert-tool.sh" git --version +files() { + local repo_dir="$1" + local repo_name="$2" + local count=$(find "${repo_dir}" \( -path ./.idea -o -path ./.git \) -prune -o -type f | wc -l) + echo "${repo_name}, ${count}" +} + declare -i repo=0 sh="$(dirname "$0")/clone-repo.sh" while IFS=',' read -r r tag tail; do @@ -45,6 +52,8 @@ while IFS=',' read -r r tag tail; do if [ "${tag}" = '.' ]; then tag='master'; fi if [ -e "${TARGET}/github/${r}" ]; then echo "${r}: Git repo is already here (${tail})" + count=$(count_files "${TARGET}/github/${r}" "${r}") + echo "${count}" >> repo_files.csv else printf "%s %s %s %s %s\n" "${sh@Q}" "${r@Q}" "${tag@Q}" "${repo@Q}" "${total@Q}" >> "${jobs}" fi diff --git a/steps/discover-repos.rb b/steps/discover-repos.rb index a021dcd5..94819ade 100755 --- a/steps/discover-repos.rb +++ b/steps/discover-repos.rb @@ -103,29 +103,6 @@ def cooldown(opts, found) sleep opts[:pause] end -def fetch_contents(github, repo, ref, path) - contents = github.contents(repo, { path: path, ref: ref }) - count = 0 - contents.each do |content| - if content[:type] == 'file' - count += 1 - elsif content[:type] == 'dir' - count += fetch_contents(github, repo, ref, content[:path]) - end - end - count -end - -def files_in_repo(github, repo, ref, path = '') - fetch_contents(github, repo, ref, path) -rescue Octokit::NotFound - puts "There is no contents inside #{repo}" - 0 -rescue Octokit::TooManyRequests - puts 'Rate limit to GitHub API exceeded, try to pass --token' - 0 -end - puts 'Not searching GitHub API, using mock repos' if opts[:dry] def fetch(config) @@ -165,8 +142,7 @@ def fetch(config) size: i[:size], open_issues_count: i[:open_issues_count], description: "\"#{i[:description]}\"", - topics: Array(i[:topics]).join(' '), - files: files_in_repo(config[:github], i[:full_name], i[:default_branch], '') + topics: Array(i[:topics]).join(' ') } puts "Found #{i[:full_name].inspect} GitHub repo ##{found.count} \ (#{i[:forks_count]} forks, #{i[:stargazers_count]} stars) with license: #{i[:license][:key]}" diff --git a/tests/steps/test-discover-repos.sh b/tests/steps/test-discover-repos.sh index 30504c9f..6b6bb960 100755 --- a/tests/steps/test-discover-repos.sh +++ b/tests/steps/test-discover-repos.sh @@ -37,7 +37,7 @@ tex=${TARGET}/foo.tex test -e "${csv}" test -s "${tex}" test "$(wc -l < "${csv}" | xargs)" = '4' - test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '10' + test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '9' } > "${stdout}" 2>&1 echo "👍🏻 Small repositories discovery test is succeed" @@ -50,6 +50,6 @@ echo "👍🏻 Small repositories discovery test is succeed" test -e "${csv}" test -s "${tex}" test "$(wc -l < "${csv}" | xargs)" = '36' - test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '10' + test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '9' } > "${stdout}" 2>&1 echo "👍🏻 Medium repositories discovery test is succeed"