Skip to content

Commit

Permalink
feat: support pagination in GitHub graphql APIs returning objects in …
Browse files Browse the repository at this point in the history
…'edges'
  • Loading branch information
user2589 committed Sep 25, 2020
1 parent c1120ca commit bb92758
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 18 deletions.
21 changes: 12 additions & 9 deletions stscraper/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,28 +71,31 @@ def parse_url(url):
return None, None


def json_path(obj, *path):
def json_path(obj, path, raise_on_missing=False):
""" Get a dict value by the specified path.
>>> obj = {'author': {'name': 'John'}, 'committer': None,
... 'labels': [{'name': 'Bug'}, {'name': 'Good first issue'}]}
>>> json_path(obj, 'author', 'name')
>>> json_path(obj, ('author', 'name'))
'John'
>>> json_path(obj, 'committer', 'name') is None
>>> json_path(obj, ('committer', 'name')) is None
True
>>> json_path(obj, 'committer') is None
>>> json_path(obj, ('committer',)) is None
True
>>> json_path(obj, 'labels', ',name')
>>> json_path(obj, ('labels', ',name'))
'Bug,Good first issue'
"""
for chunk in path:
if chunk.startswith(","):
obj = ",".join(str(item.get(chunk[1:])) for item in obj)
# supported only for the last chunk in the path, so break
break
obj = obj.get(chunk)
if obj is None:
break
if chunk not in obj:
if raise_on_missing:
raise IndexError('Path does not exist')
else:
return None
obj = obj[chunk]
return obj


Expand All @@ -107,7 +110,7 @@ def json_map(mapping, obj):
>>> json_map({"author_login": "author__name", 'foo': 'bar'}, obj)
{'author_login': 'John', 'foo': None}
"""
return {key: json_path(obj, *path.split("__"))
return {key: json_path(obj, path.split("__"))
for key, path in mapping.items()}


Expand Down
26 changes: 17 additions & 9 deletions stscraper/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def repo_contributors(self, repo_slug):
url = 'repos/%s/stats/contributors' % repo_slug
for contributor_stats in next(self.request(url)):
record = {w['w']: w['c'] for w in contributor_stats['weeks']}
record['user'] = json_path(contributor_stats, 'author', 'login')
record['user'] = json_path(contributor_stats, ('author', 'login'))
yield record

@api('repos/%s/pulls/%d/commits', paginate=True, state='all')
Expand Down Expand Up @@ -382,20 +382,28 @@ def v4(self, query, object_path=(), **params):
if 'data' not in res:
raise VCSError('API didn\'t return any data:\n' +
json.dumps(res, indent=4))
data = res['data']

objects = json_path(res['data'], *object_path)
if objects is None:
try:
objects = json_path(data, object_path, raise_on_missing=True)
except IndexError:
raise VCSError('Invalid object path "%s" in:\n %s' %
(object_path, json.dumps(res)))
if 'nodes' not in objects:
(object_path, json.dumps(data)))

has_next_page = json_path(objects, ('pageInfo', 'hasNextPage'))
if not json_path(objects, ('pageInfo', 'hasNextPage')):
yield objects
return
for obj in objects['nodes']:
# This is due to inconsistency in graphql API.
# In most cases, requests returning lists of objects put them in
# 'nodes', but in few legacy methods they use 'edges'
nodes = objects.get('nodes') or objects.get('edges')
if not nodes:
break
for obj in nodes:
yield obj
# the result is single page, or there are no more pages
if not json_path(objects, 'pageInfo', 'hasNextPage'):
return
params['cursor'] = json_path(objects, 'pageInfo', 'endCursor')
params['cursor'] = json_path(objects, ('pageInfo', 'endCursor'))

def repo_issues(self, repo_slug, cursor=None):
owner, repo = repo_slug.split('/')
Expand Down

0 comments on commit bb92758

Please sign in to comment.