From fc5df305c9ad2c9b927a577577a1f67e78380402 Mon Sep 17 00:00:00 2001 From: Marat Date: Wed, 23 Jan 2019 14:29:52 -0500 Subject: [PATCH] fix: support Created N other repositories record in full activity log --- .../record/created_other_repositories.html | 34 +++++++++++++++++++ fixtures/record/record_test.csv | 1 + stgithub.py | 18 +++++----- 3 files changed, 43 insertions(+), 10 deletions(-) create mode 100644 fixtures/record/created_other_repositories.html diff --git a/fixtures/record/created_other_repositories.html b/fixtures/record/created_other_repositories.html new file mode 100644 index 0000000..fa5e477 --- /dev/null +++ b/fixtures/record/created_other_repositories.html @@ -0,0 +1,34 @@ +
+ + + +
diff --git a/fixtures/record/record_test.csv b/fixtures/record/record_test.csv index 536753f..2dbc7a7 100644 --- a/fixtures/record/record_test.csv +++ b/fixtures/record/record_test.csv @@ -6,6 +6,7 @@ "created_issues.html","{""user2589/Q"": {""issues"": 13}}" "created_repositories.html","{""vmarkovtsev/go-lcss"": {""created_repository"": 1}, ""vmarkovtsev/documentation"": {""created_repository"": 1}, ""vmarkovtsev/NMLM"": {""created_repository"": 1}, ""vmarkovtsev/pyupgrade-opt"": {""created_repository"": 1}, ""vmarkovtsev/etalpmet"": {""created_repository"": 1}, ""vmarkovtsev/rgf"": {""created_repository"": 1}}" "created_repository.html","{""user2589/Toggl.py"": {""created_repository"": 1}}" +"created_other_repositories.html","{""Vanuan/npapi-cpp-sdk"": {""created_repository"": 1}}" "joined_github.html","{}" "created_pull_requests.html","{""mgdg25/Sandbox"": {""pull_requests"": 1}}" "created_other_pull_requests.html","{""bblfsh/documentation"": {""pull_requests"": 1}, ""nantes-machine-learning-meetup/NMLM"": {""pull_requests"": 1}, ""spacetelescope/asdf"": {""pull_requests"": 4}, ""asottile/pyupgrade"": {""pull_requests"": 3}, ""src-d/style-analyzer"": {""pull_requests"": 10}, ""src-d/wmd-relax"": {""pull_requests"": 2}, ""vmarkovtsev/pyupgrade-opt"": {""pull_requests"": 2}, ""src-d/guide"": {""pull_requests"": 5}, ""src-d/hercules"": {""pull_requests"": 8}, ""RGF-team/rgf"": {""pull_requests"": 1}, ""src-d/lapjv"": {""pull_requests"": 1}, ""bblfsh/client-python"": {""pull_requests"": 1}, ""src-d/conferences"": {""pull_requests"": 1}, ""src-d/blog"": {""pull_requests"": 3}}" diff --git a/stgithub.py b/stgithub.py index 5596ac8..bd6f164 100755 --- a/stgithub.py +++ b/stgithub.py @@ -116,7 +116,7 @@ def _parse_timeline_update_record(record_div): # reviewed pull requests title = normalize_text(record_div.button.text) if re.match( - 'Reviewed \\d+ pull requests? in \\d+ repositor(y|ies)', title): + r'Reviewed \d+ pull requests? in \d+ repositor(y|ies)', title): for repo_div in record_div.find_all( 'div', class_='profile-rollup-summarized'): @@ -125,7 +125,7 @@ def _parse_timeline_update_record(record_div): count = int(count_span.text.split()[0]) record_data[repo]['reviews'] += count - elif re.match('Opened \\d+ (?:other )?issues? in \\d+ repositor(y|ies)', + elif re.match(r'Opened \d+ (?:other )?issues? in \d+ repositor(y|ies)', title): for repo_div in record_div.find_all( 'div', class_='profile-rollup-summarized'): @@ -137,13 +137,13 @@ def _parse_timeline_update_record(record_div): count += int(span.text) record_data[repo]['issues'] += count - elif re.match('Created \\d+ repositor(y|ies)', title): + elif re.match(r'Created \d+ (?:other )?repositor(y|ies)', title): for link in record_div.find_all( 'a', attrs={'data-hovercard-type': "repository"}): record_data[link.text]['created_repository'] = 1 - elif re.match('Opened \\d+ (?:other )?pull requests? ' - 'in \\d+ repositor(y|ies)', title): + elif re.match(r'Opened \d+ (?:other )?pull requests? ' + r'in \d+ repositor(y|ies)', title): for repo_div in record_div.find_all( 'div', class_='profile-rollup-summarized'): repo = repo_div.button.div.span.text @@ -154,7 +154,7 @@ def _parse_timeline_update_record(record_div): count += int(span.text) record_data[repo]['pull_requests'] += count - elif re.match('Created \\d+ commits? in \\d+ repositor(y|ies)', title): + elif re.match(r'Created \d+ commits? in \d+ repositor(y|ies)', title): for repo_li in record_div.ul.find_all('li', recursive=False): li_div = repo_li.div if not li_div: @@ -302,7 +302,6 @@ class Scraper(object): typically takes couple minutes. Use this "API" with caution as it might be extremely slow. - This class provides access to several functions available """ _instance = None # singleton instance @@ -426,9 +425,8 @@ def links_to_recent_user_activity(self, user): Args: user (str): The GitHub login of the user. - Returns: - Generator: A generator of two-tuples: - (<%Y-%m-%d date>, link to the activity) + Yields: + Tuple[str, str]: (<%Y-%m-%d date>, link to the activity) It seems like this feed only includes tags and commits >>> list(Scraper().links_to_recent_user_activity('user2589')) # doctest: +SKIP