Skip to content

Commit

Permalink
Merge pull request #3 from CMUSTRUDEL/dev
Browse files Browse the repository at this point in the history
fix: accept github tokens as class arguments fixes: #1
  • Loading branch information
user2589 authored Jul 7, 2019
2 parents a64a73e + 644f81b commit 5ec5260
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 172 deletions.
20 changes: 14 additions & 6 deletions stscraper/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
import random
import re
import six
import time
from typing import Iterable, Iterator, Optional
from functools import wraps
Expand Down Expand Up @@ -147,12 +148,15 @@ class VCSAPI(object):

def __new__(cls, *args, **kwargs): # Singleton
if not isinstance(cls._instance, cls):
cls._instance = super(VCSAPI, cls).__new__(cls, *args, **kwargs)
cls._instance = super(VCSAPI, cls).__new__(cls)
cls._instance.__init__(*args, **kwargs)
return cls._instance

def __init__(self, tokens=None, timeout=30):
# type: (Optional[Iterable], int) -> None
if tokens:
if isinstance(tokens, six.string_types):
tokens = tokens.split(",")
self.tokens = tuple(
self.token_class(t, timeout=timeout) for t in set(tokens))
self.logger = logging.getLogger('scraper.' + self.__class__.__name__)
Expand Down Expand Up @@ -259,17 +263,21 @@ def all_repos(self):
""" """
raise NotImplementedError

def repo_issues(self, repo_name):
def repo_info(self, repo_slug):
# type: (Union[str, unicode]) -> Iterator[dict]
raise NotImplementedError

def repo_issues(self, repo_slug):
# type: (str) -> Iterable[dict]
""" """
raise NotImplementedError

def repo_commits(self, repo_name):
def repo_commits(self, repo_slug):
# type: (str) -> Iterable[dict]
""" """
raise NotImplementedError

def repo_pulls(self, repo_name):
def repo_pulls(self, repo_slug):
# type: (str) -> Iterable[dict]
""" """
raise NotImplementedError
Expand Down Expand Up @@ -315,13 +323,13 @@ def org_repos(self, org):
raise NotImplementedError

@staticmethod
def project_exists(repo_name):
def project_exists(repo_slug):
# type: (str) -> bool
""" """
raise NotImplementedError

@staticmethod
def canonical_url(project_url):
def canonical_url(repo_slug):
# type: (str) -> str
""" """
raise NotImplementedError
Expand Down
10 changes: 5 additions & 5 deletions stscraper/generic.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@

from .base import *
from .github import GitHubAPI
from .gitlab import GitLabAPI
from .bitbucket import BitbucketAPI

"""
Standard interface to all supported code hosting platforms.
Expand All @@ -14,6 +9,11 @@
2. Returned objects are simplified to a common subset of fields
"""

from .base import *
from .github import GitHubAPI
from .gitlab import GitLabAPI
from .bitbucket import BitbucketAPI

PROVIDERS = {
"github.com": GitHubAPI,
# https://developer.atlassian.com/bitbucket/api/2/reference/resource/
Expand Down
52 changes: 29 additions & 23 deletions stscraper/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,50 +143,56 @@ def all_repos(self):
# https://developer.github.com/v3/repos/#list-all-public-repositories
return ()

@api('repos/%s')
def repo_info(self, repo_slug):
# type: (Union[str, unicode]) -> Iterator[dict]
# https://developer.github.com/v3/repos/#get
return repo_slug

@api_filter(lambda issue: 'pull_request' not in issue)
@api('repos/%s/issues', paginate=True, state='all')
def repo_issues(self, repo_name):
def repo_issues(self, repo_slug):
# type: (Union[str, unicode]) -> Iterator[dict]
# https://developer.github.com/v3/issues/#list-issues-for-a-repository
return repo_name
return repo_slug

@api('repos/%s/issues/comments', paginate=True)
def repo_issue_comments(self, repo_name):
def repo_issue_comments(self, repo_slug):
# type: (Union[str, unicode]) -> Iterator[dict]
""" Get all comments in all issues and pull requests,
both open and closed.
"""
# https://developer.github.com/v3/issues/comments/#list-comments-in-a-repository
return repo_name
return repo_slug

@api('repos/%s/issues/events', paginate=True)
def repo_issue_events(self, repo_name):
def repo_issue_events(self, repo_slug):
# type: (Union[str, unicode]) -> Iterator[dict]
""" Get all events in all issues and pull requests,
both open and closed.
"""
# https://developer.github.com/v3/issues/events/#list-events-for-a-repository
return repo_name
return repo_slug

@api('repos/%s/commits', paginate=True)
def repo_commits(self, repo_name):
def repo_commits(self, repo_slug):
# type: (Union[str, unicode]) -> Iterator[dict]
# https://developer.github.com/v3/repos/commits/#list-commits-on-a-repository
return repo_name
return repo_slug

@api('repos/%s/pulls', paginate=True, state='all')
def repo_pulls(self, repo_name):
def repo_pulls(self, repo_slug):
# type: (Union[str, unicode]) -> Iterator[dict]
# https://developer.github.com/v3/pulls/#list-pull-requests
return repo_name
return repo_slug

def repo_topics(self, repo_name):
def repo_topics(self, repo_slug):
return tuple(
next(self.request('repos/%s/topics' % repo_name)).get('names'))
next(self.request('repos/%s/topics' % repo_slug)).get('names'))

def repo_labels(self, repo_name):
def repo_labels(self, repo_slug):
return tuple(label['name'] for label in
self.request('repos/%s/labels' % repo_name, paginate=True))
self.request('repos/%s/labels' % repo_slug, paginate=True))

@api('repos/%s/pulls/%d/commits', paginate=True, state='all')
def pull_request_commits(self, repo, pr_id):
Expand Down Expand Up @@ -247,22 +253,22 @@ def issue_events(self, repo, issue_no):
# Non-API methods
# ===================================
@staticmethod
def project_exists(repo_name):
def project_exists(repo_slug):
for i in range(5):
try:
return bool(requests.head("https://github.com/" + repo_name))
return bool(requests.head("https://github.com/" + repo_slug))
except requests.RequestException:
time.sleep(2**i)

@staticmethod
def canonical_url(project_url):
def canonical_url(repo_slug):
# type: (str) -> str
""" Normalize URL
- remove trailing .git (IMPORTANT)
- lowercase (API is case insensitive, so lowercase to deduplicate)
- prepend "github.com"
:param project_url: str, user_name/repo_name
:param: repo_slug: str, user_name/repo_name
:return: github.com/user_name/repo_name with both names normalized
>>> GitHubAPI.canonical_url("pandas-DEV/pandas")
Expand All @@ -272,7 +278,7 @@ def canonical_url(project_url):
>>> GitHubAPI.canonical_url("https://github.com/A/B/")
'github.com/a/b'
"""
url = project_url.split("//")[-1].lower()
url = repo_slug.split("//")[-1].lower()
for prefix in ("github.com",):
if url.startswith(prefix):
url = url[len(prefix):]
Expand All @@ -288,9 +294,9 @@ def v4(self, query, **params):
payload = json.dumps({"query": query, "variables": params})
return self.request("graphql", 'post', data=payload)

def repo_issues(self, repo_name, cursor=None):
def repo_issues(self, repo_slug, cursor=None):
# type: (str, str) -> Iterator[dict]
owner, repo = repo_name.split("/")
owner, repo = repo_slug.split("/")
query = """query ($owner: String!, $repo: String!, $cursor: String) {
repository(name: $repo, owner: $owner) {
hasIssuesEnabled
Expand Down Expand Up @@ -323,12 +329,12 @@ def repo_issues(self, repo_name, cursor=None):
if not data["issues"]["pageInfo"]["hasNextPage"]:
break

def repo_commits(self, repo_name, cursor=None):
def repo_commits(self, repo_slug, cursor=None):
# type: (str, str) -> Iterator[dict]
"""As of June 2017 GraphQL API does not allow to get commit parents
Until this issue is fixed this method is only left for a reference
Please use commits() instead"""
owner, repo = repo_name.split("/")
owner, repo = repo_slug.split("/")
query = """query ($owner: String!, $repo: String!, $cursor: String) {
repository(name: $repo, owner: $owner) {
ref(qualifiedName: "master") {
Expand Down
152 changes: 14 additions & 138 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,20 @@ def test_all_repos(self):
for prop in ('name', 'full_name', 'fork', 'owner'):
self.assertIn(prop, repo)

def test_repo_info(self):
info = self.api.repo_info(self.repo_address)
self.assertIsInstance(info, dict)
for prop in (
'id', 'name', 'full_name', 'owner', 'private', 'description',
'fork', 'language', 'size', 'topics', 'license', 'default_branch',
'forks_count', 'stargazers_count', 'watchers_count',
'has_issues', 'has_projects', 'has_wiki', 'has_pages',
'has_downloads', 'created_at', 'updated_at'
):
self.assertIn(prop, info,
"Repository info is expected to have '%s' property,"
" but it doesn't" % prop)

def test_repo_issues(self):
issues = self.api.repo_issues(self.repo_address)
self.assertIsInstance(issues, Generator)
Expand Down Expand Up @@ -390,144 +404,6 @@ def setUp(self):
self.api = stscraper.BitbucketAPI()
self.repo_address = 'zzzeek/sqlalchemy'

# def _test_commits(self, commit):
# for prop in ('sha', 'commit', 'author', 'committer', 'parents'):
# self.assertIn(prop, commit,
# "Commit object is expected to have '%s' property,"
# " but it doesn't" % prop)
# for prop in ('author', 'committer', 'message', 'comment_count'):
# self.assertIn(prop, commit['commit'],
# "Commit object is expected to have 'commit.%s' "
# "property, but it doesn't" % prop)
# for prop1 in ('author', 'committer'):
# for prop2 in ('name', 'email', 'date'):
# self.assertIn(prop2, commit['commit'][prop1])
#
# def _test_issue(self, issue):
# for prop in ('number', 'state', 'title', 'body', 'user', 'labels',
# 'assignee', 'closed_at', 'created_at',
# 'updated_at', 'author_association', 'locked'):
# self.assertIn(prop, issue,
# "Issue object is expected to have '%s' property,"
# " but it doesn't" % prop)
#
# def _test_issue_comments(self, comment):
# for prop in ('body', 'user', 'created_at', 'updated_at'):
# self.assertIn(prop, comment,
# "Issue comment is expected to have '%s' property,"
# " but it doesn't" % prop)
#
# def _test_repo(self, repo):
# for prop in ('name', 'full_name', 'fork', 'owner',
# 'has_issues', 'has_projects', 'has_wiki', 'has_pages',
# 'has_downloads', 'license',
# 'stargazers_count', 'forks_count', 'watchers_count',
# 'pushed_at', 'created_at', 'updated_at'):
# self.assertIn(prop, repo,
# "Repository object is expected to have '%s' property,"
# " but it doesn't" % prop)
#
# def test_all_users(self):
# users = self.api.all_users()
# self.assertIsInstance(users, Generator)
# user = next(users)
# self.assertIn('login', user)
#
# def test_all_repos(self):
# repos = self.api.all_repos()
# self.assertIsInstance(repos, Generator)
# repo = next(repos)
# for prop in ('name', 'full_name', 'fork', 'owner'):
# self.assertIn(prop, repo)
#
# def test_repo_issues(self):
# issues = self.api.repo_issues(self.repo_address)
# self.assertIsInstance(issues, Generator)
# issue = next(issues)
# self._test_issue(issue)
# # issues have this property while pull requests don't
# self.assertIn('comments', issue)
#
# def test_repo_commits(self):
# commits = self.api.repo_commits(self.repo_address)
# self.assertIsInstance(commits, Generator)
# commit = next(commits)
# self._test_commits(commit)
#
# def test_repo_pulls(self):
# pulls = self.api.repo_pulls(self.repo_address)
# self.assertIsInstance(pulls, Generator)
# pr = next(pulls)
# self._test_issue(pr)
# for prop in ('merged_at', 'head', 'base'):
# self.assertIn(prop, pr)
#
# def test_repo_topics(self):
# topics = self.api.repo_topics(self.repo_address)
# self.assertIsInstance(topics, list)
#
# def test_pull_request_commits(self):
# commits = self.api.pull_request_commits(self.repo_address, 22457)
# self.assertIsInstance(commits, Generator)
# commit = next(commits)
# self._test_commits(commit)
#
# def test_issue_comments(self):
# comments = self.api.issue_comments(self.repo_address, 22473)
# self.assertIsInstance(comments, Generator)
# comment = next(comments)
# self._test_issue_comments(comment)
#
# def test_review_comments(self):
# comments = self.api.review_comments(self.repo_address, 22457)
# self.assertIsInstance(comments, Generator)
# comment = next(comments)
# self._test_issue_comments(comment)
# for prop in ('diff_hunk', 'commit_id', 'position',
# 'original_position', 'path'):
# self.assertIn(prop, comment)
#
# def test_user_info(self):
# # Docs: https://developer.github.com/v3/users/#response
# user_info = self.api.user_info('pandas-dev')
# self.assertIsInstance(user_info, dict)
# for prop in ('login', 'type', 'name', 'company', 'blog', 'location',
# 'email', 'bio', 'public_repos', 'followers', 'following',
# 'created_at', 'updated_at'):
# self.assertIn(prop, user_info)
#
# def test_user_repos(self):
# """Get list of user repositories"""
# repos = self.api.user_repos('pandas-dev')
# self.assertIsInstance(repos, Generator)
# repo = next(repos)
# self._test_repo(repo)
#
# def test_user_orgs(self):
# orgs = self.api.user_orgs('user2589')
# self.assertIsInstance(orgs, Generator)
# org = next(orgs)
# for prop in ('login', 'description'):
# self.assertIn(prop, org)
#
# def test_org_members(self):
# members = self.api.org_members('cmustrudel')
# self.assertIsInstance(members, Generator)
# user = next(members)
# for prop in ('login', 'type'):
# self.assertIn(prop, user)
#
# def test_org_repos(self):
# repos = self.api.org_repos('cmustrudel')
# self.assertIsInstance(repos, Generator)
# repo = next(repos)
# self._test_repo(repo)
#
# def test_pagination(self):
# # 464 commits as of Aug 2018
# commits = list(self.api.repo_commits('benjaminp/six'))
# self.assertGreater(len(commits), 463)


class TestGeneric(unittest.TestCase):

Expand Down

0 comments on commit 5ec5260

Please sign in to comment.