From 244e897ba7d687b807f49c99ff8f5b1ff827902d Mon Sep 17 00:00:00 2001 From: cullzie Date: Fri, 11 Dec 2020 16:08:56 +0000 Subject: [PATCH 01/37] Wait for zipcode element to be present and for enroll to be clickable after --- core/udemy.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/core/udemy.py b/core/udemy.py index ea5a886..9cef150 100644 --- a/core/udemy.py +++ b/core/udemy.py @@ -1,8 +1,7 @@ import logging -import time from enum import Enum -from selenium.common.exceptions import NoSuchElementException +from selenium.common.exceptions import NoSuchElementException, TimeoutException from selenium.webdriver.common.by import By from selenium.webdriver.remote.webdriver import WebDriver, WebElement from selenium.webdriver.support import expected_conditions as EC @@ -146,17 +145,25 @@ def redeem(self, url: str) -> str: # Check if zipcode exists before doing this if self.settings.zip_code: - # Assume sometimes zip is not required because script was originally pushed without this + # zipcode is only required in certain regions (e.g USA) try: - zipcode_element = self.driver.find_element_by_id( - "billingAddressSecondaryInput" + element_present = EC.presence_of_element_located( + ( + By.ID, + "billingAddressSecondaryInput", + ) + ) + WebDriverWait(self.driver, 5).until(element_present).send_keys( + self.settings.zip_code ) - zipcode_element.send_keys(self.settings.zip_code) # After you put the zip code in, the page refreshes itself and disables the enroll button for a split # second. - time.sleep(1) - except NoSuchElementException: + enroll_button_is_clickable = EC.element_to_be_clickable( + (By.XPATH, enroll_button_xpath) + ) + WebDriverWait(self.driver, 5).until(enroll_button_is_clickable) + except (TimeoutException, NoSuchElementException): pass # Make sure the price has loaded From 8b0757a6e576768212108b2240fd6945f85f9b93 Mon Sep 17 00:00:00 2001 From: cullzie Date: Fri, 11 Dec 2020 19:10:18 +0000 Subject: [PATCH 02/37] Removing extra scripts and logic --- README.md | 23 +++++++------------ sample_settings.yaml | 1 + udemy_enroller.py | 20 ++++++++--------- udemy_enroller_chrome.py | 15 ------------- udemy_enroller_chromium.py | 15 ------------- udemy_enroller_edge.py | 15 ------------- udemy_enroller_firefox.py | 16 -------------- udemy_enroller_internet_explorer.py | 15 ------------- udemy_enroller_opera.py | 15 ------------- udemy_enroller_vanilla.py | 34 ----------------------------- 10 files changed, 18 insertions(+), 151 deletions(-) delete mode 100644 udemy_enroller_chrome.py delete mode 100644 udemy_enroller_chromium.py delete mode 100644 udemy_enroller_edge.py delete mode 100644 udemy_enroller_firefox.py delete mode 100644 udemy_enroller_internet_explorer.py delete mode 100644 udemy_enroller_opera.py delete mode 100644 udemy_enroller_vanilla.py diff --git a/README.md b/README.md index 2b153c5..eaedf35 100644 --- a/README.md +++ b/README.md @@ -79,41 +79,34 @@ get all the requirements installed in one go. Similar instructions applies for p - **Tested and works perfectly:** - Chrome: - [udemy_enroller_chrome.py](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller_chrome.py) + [udemy_enroller.py --browser=chrome](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - Chromium: - [udemy_enroller_chromium.py](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller_chromium.py) + [udemy_enroller.py --browser=chromium](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - Edge: - [udemy_enroller_edge.py](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller_edge.py) + [udemy_enroller.py --browser=edge](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - **Has issues when run on custom kernel but works fine on vanilla OS:** - Firefox: - [udemy_enroller_firefox.py(might require manual driver installation)](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller_firefox.py) + [udemy_enroller.py --browser=firefox (might require manual driver installation)](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - **Untested:** - Opera: - [udemy_enroller_opera.py](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller_opera.py) - -- **Experimentation or other Browsers (especially Safari):** - - - [aka the old bot- requires manual driver setup](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller_vanilla.py) - + [udemy_enroller.py --browser=opera](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) + - **Use at your own risk:** - Vanilla - Internet Explorer: - [udemy_enroller_internet_explorer.py](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller_internet_explorer.py) + [udemy_enroller.py --browser=internet_explorer](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) 3 . The script can be passed arguments: - `--help`: View full list of arguments available - `--max-pages=`: Max number of pages to scrape from tutorialbar.com before exiting the script - `--browser=`: Run with a specific browser -- `--cache-hits=`: If we hit the cache this number of times in a row we will exit the script +- `--cache-hits=`: If we hit the cache this number of times in a row we will exit the script (default is 12) 4 . Run the chosen script in terminal like so: -- `python udemy_enroller_firefox.py` - - Or by using the generic script: - `python udemy_enroller.py --browser=firefox` 5 . The bot starts scraping the course links from the first **All Courses** page diff --git a/sample_settings.yaml b/sample_settings.yaml index 7b6acf1..94d90f3 100644 --- a/sample_settings.yaml +++ b/sample_settings.yaml @@ -3,3 +3,4 @@ udemy: password: "ExamplePa$$w0rd" # Enter your Udemy password here zipcode: "12345" # If Udemy requires a zipcode for your country, enter it here. languages: [] # If you want to limit the languages of courses to claim e.g ["French", "Spanish"] + categories: [] # If you want to limit the categories of courses to claim \ No newline at end of file diff --git a/udemy_enroller.py b/udemy_enroller.py index 5e68366..50c18c7 100644 --- a/udemy_enroller.py +++ b/udemy_enroller.py @@ -1,6 +1,3 @@ -# Install all the requirements by running requirements.py in IDLE or follow the alternate instructions at -# https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/ Make sure you have -# cleared all saved payment details on your Udemy account & the browser! import argparse from argparse import Namespace from typing import Union @@ -27,18 +24,15 @@ def run( :return: """ settings = Settings() - if driver is None: - dm = DriverManager(browser=browser, is_ci_build=settings.is_ci_build) - driver = dm.driver - redeem_courses(driver, settings, max_pages, cache_hit_limit) + dm = DriverManager(browser=browser, is_ci_build=settings.is_ci_build) + redeem_courses(dm.driver, settings, max_pages, cache_hit_limit) -def parse_args(browser=None, use_manual_driver=False) -> Namespace: +def parse_args(browser=None) -> Namespace: """ Parse args from the CLI or use the args passed in :param str browser: Name of the browser we want to create a driver for - :param bool use_manual_driver: If True don't create a web driver using web driver manager :return: Args to be used in the script """ parser = argparse.ArgumentParser(description="Udemy Enroller") @@ -65,13 +59,17 @@ def parse_args(browser=None, use_manual_driver=False) -> Namespace: args = parser.parse_args() - if args.browser is None and not use_manual_driver: + if args.browser is None: parser.print_help() else: return args -if __name__ == "__main__": +def main(): args = parse_args() if args: run(args.browser, args.max_pages, args.cache_hits) + + +if __name__ == "__main__": + main() diff --git a/udemy_enroller_chrome.py b/udemy_enroller_chrome.py deleted file mode 100644 index a10a265..0000000 --- a/udemy_enroller_chrome.py +++ /dev/null @@ -1,15 +0,0 @@ -# Install all the requirements by running requirements.py in IDLE or follow the alternate instructions at -# https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/ Make sure you have -# cleared all saved payment details on your Udemy account & the browser! -import warnings - -from udemy_enroller import parse_args, run - -if __name__ == "__main__": - browser = "chrome" - warnings.warn( - f"Please use `udemy_enroller.py --browser={browser}` as this script will be removed soon", - DeprecationWarning, - ) - args = parse_args(browser) - run(args.browser, args.max_pages, args.cache_hits) diff --git a/udemy_enroller_chromium.py b/udemy_enroller_chromium.py deleted file mode 100644 index bc21d33..0000000 --- a/udemy_enroller_chromium.py +++ /dev/null @@ -1,15 +0,0 @@ -# Install all the requirements by running requirements.py in IDLE or follow the alternate instructions at -# https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/ Make sure you have -# cleared all saved payment details on your Udemy account & the browser! -import warnings - -from udemy_enroller import parse_args, run - -if __name__ == "__main__": - browser = "chromium" - warnings.warn( - f"Please use `udemy_enroller.py --browser={browser}` as this script will be removed soon", - DeprecationWarning, - ) - args = parse_args(browser) - run(args.browser, args.max_pages, args.cache_hits) diff --git a/udemy_enroller_edge.py b/udemy_enroller_edge.py deleted file mode 100644 index e33eecf..0000000 --- a/udemy_enroller_edge.py +++ /dev/null @@ -1,15 +0,0 @@ -# Install all the requirements by running requirements.py in IDLE or follow the alternate instructions at -# https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/ Make sure you have -# cleared all saved payment details on your Udemy account & the browser! -import warnings - -from udemy_enroller import parse_args, run - -if __name__ == "__main__": - browser = "edge" - warnings.warn( - f"Please use `udemy_enroller.py --browser={browser}` as this script will be removed soon", - DeprecationWarning, - ) - args = parse_args(browser) - run(args.browser, args.max_pages, args.cache_hits) diff --git a/udemy_enroller_firefox.py b/udemy_enroller_firefox.py deleted file mode 100644 index 5d5d770..0000000 --- a/udemy_enroller_firefox.py +++ /dev/null @@ -1,16 +0,0 @@ -# Install all the requirements by running requirements.py in IDLE or follow the alternate instructions at -# https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/ Make sure you have -# cleared all saved payment details on your Udemy account & the browser! For firefox you need to manually install the -# driver on Arch Linux (sudo pacman -S geckodriver). Untested on other platforms. -import warnings - -from udemy_enroller import parse_args, run - -if __name__ == "__main__": - browser = "firefox" - warnings.warn( - f"Please use `udemy_enroller.py --browser={browser}` as this script will be removed soon", - DeprecationWarning, - ) - args = parse_args(browser) - run(args.browser, args.max_pages, args.cache_hits) diff --git a/udemy_enroller_internet_explorer.py b/udemy_enroller_internet_explorer.py deleted file mode 100644 index d1c8fa5..0000000 --- a/udemy_enroller_internet_explorer.py +++ /dev/null @@ -1,15 +0,0 @@ -# Install all the requirements by running requirements.py in IDLE or follow the alternate instructions at -# https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/ Make sure you have -# cleared all saved payment details on your Udemy account & the browser! -import warnings - -from udemy_enroller import parse_args, run - -if __name__ == "__main__": - browser = "internet_explorer" - warnings.warn( - f"Please use `udemy_enroller.py --browser={browser}` as this script will be removed soon", - DeprecationWarning, - ) - args = parse_args(browser) - run(args.browser, args.max_pages, args.cache_hits) diff --git a/udemy_enroller_opera.py b/udemy_enroller_opera.py deleted file mode 100644 index 8970d4a..0000000 --- a/udemy_enroller_opera.py +++ /dev/null @@ -1,15 +0,0 @@ -# Install all the requirements by running requirements.py in IDLE or follow the alternate instructions at -# https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/ Make sure you have -# cleared all saved payment details on your Udemy account & the browser! -import warnings - -from udemy_enroller import parse_args, run - -if __name__ == "__main__": - browser = "opera" - warnings.warn( - f"Please use `udemy_enroller.py --browser={browser}` as this script will be removed soon", - DeprecationWarning, - ) - args = parse_args(browser) - run(args.browser, args.max_pages, args.cache_hits) diff --git a/udemy_enroller_vanilla.py b/udemy_enroller_vanilla.py deleted file mode 100644 index 8e52539..0000000 --- a/udemy_enroller_vanilla.py +++ /dev/null @@ -1,34 +0,0 @@ -# Install all the requirements by running requirements.py in IDLE or follow the alternate instructions at -# https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/ Make sure you have -# cleared all saved payment details on your Udemy account & the browser! -from selenium import webdriver - -from core import Settings -from udemy_enroller import parse_args, run - -"""### **Enter the path/location of your webdriver** -By default, the webdriver for Microsoft Edge browser has been chosen in the code below. - -Also, enter the location of your webdriver. -""" - - -if __name__ == "__main__": - args = parse_args(use_manual_driver=True) - - settings = Settings() - # On windows you need the r (raw string) in front of the string to deal with backslashes. - # Replace this string with the path for your webdriver - - path = r"..location\msedgedriver.exe" - driver = webdriver.Edge(path) - # driver = webdriver.Chrome(path) # Uncomment for Google Chrome driver - # driver = webdriver.Firefox(path) # Uncomment for Mozilla Firefox driver - # driver = webdriver.Edge(path) # Uncomment for Microsoft Edge driver - # driver = webdriver.Safari(path) # Uncomment for Apple Safari driver - - # Maximizes the browser window since Udemy has a responsive design and the code only works - # in the maximized layout - driver.maximize_window() - - run(args.browser, args.max_pages, args.cache_hits, driver=driver) From 06d4ffff340e57500aa82ff77fa0b1a3de3e6c17 Mon Sep 17 00:00:00 2001 From: fakeid30 Date: Sat, 12 Dec 2020 17:33:30 +0600 Subject: [PATCH 03/37] Thanked Gitlab --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index eaedf35..6e637fd 100644 --- a/README.md +++ b/README.md @@ -196,6 +196,12 @@ Thanks to [JetBrains](https://jetbrains.com/?from=udemy-free-course-enroller) fo ### GitBook -[![Gitbook](https://i.imgur.com/OkuB14I.jpg)](https://gitbook.com) +[![GitBook](https://i.imgur.com/OkuB14I.jpg)](https://gitbook.com) -Thanks to [Gitbook](https://gitbook.com) for supporting us. Gitbook is the best place to track personal notes and ideas for teams. If you think their product might help you, please support them. \ No newline at end of file +Thanks to [GitBook](https://gitbook.com) for supporting us. GitBook is the best place to track personal notes and ideas for teams. If you think their product might help you, please support them. + +### GitLab + +[![GitLab](https://i.imgur.com/aUWtSn4.png)](https://gitlab.com) + +Thanks to [GitLab](https://gitlab.com) for supporting us. GitLab is one of the main code hosting providers out there. They also have comprehensive offering for [open source](https://about.gitlab.com/solutions/open-source/). Please check them out. \ No newline at end of file From 7e0070fefe38904351bd470f8e378b535256fabb Mon Sep 17 00:00:00 2001 From: fakeid30 Date: Sat, 12 Dec 2020 17:40:20 +0600 Subject: [PATCH 04/37] Fix CI run --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 787034b..631ef3b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -56,4 +56,4 @@ jobs: UDEMY_PASSWORD: ${{ secrets.UDEMY_PASSWORD }} CI_TEST: "True" run: | - poetry run python udemy_enroller_chrome.py + poetry run python udemy_enroller.py --browser=chrome From 0a6a02761f772bc1cc863dc720b2d374da86a85a Mon Sep 17 00:00:00 2001 From: cullzie Date: Sat, 12 Dec 2020 21:15:53 +0000 Subject: [PATCH 05/37] Move to asyncio, create scraper manager and add comidoc source --- core/__init__.py | 2 +- core/scrapers/__init__.py | 0 core/scrapers/base_scraper.py | 72 +++++++++++++++++ core/scrapers/comidoc.py | 80 +++++++++++++++++++ core/scrapers/manager.py | 36 +++++++++ core/{ => scrapers}/tutorialbar.py | 79 ++++++++++-------- core/utils.py | 18 ++--- pyproject.toml | 5 +- requirements.txt | 2 +- tests/core/scrapers/__init__.py | 0 tests/core/{ => scrapers}/test_tutorialbar.py | 51 ++++++------ 11 files changed, 275 insertions(+), 70 deletions(-) create mode 100644 core/scrapers/__init__.py create mode 100644 core/scrapers/base_scraper.py create mode 100644 core/scrapers/comidoc.py create mode 100644 core/scrapers/manager.py rename core/{ => scrapers}/tutorialbar.py (61%) create mode 100644 tests/core/scrapers/__init__.py rename tests/core/{ => scrapers}/test_tutorialbar.py (78%) diff --git a/core/__init__.py b/core/__init__.py index cb59e9a..dac35e7 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -2,8 +2,8 @@ from .cache import CourseCache from .driver_manager import ALL_VALID_BROWSER_STRINGS, DriverManager +from .scrapers.manager import ScraperManager from .settings import Settings -from .tutorialbar import TutorialBarScraper from .udemy import UdemyActions logging.config.fileConfig("logconfig.ini", disable_existing_loggers=False) diff --git a/core/scrapers/__init__.py b/core/scrapers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/scrapers/base_scraper.py b/core/scrapers/base_scraper.py new file mode 100644 index 0000000..ce6fa37 --- /dev/null +++ b/core/scrapers/base_scraper.py @@ -0,0 +1,72 @@ +import datetime +import logging +from abc import ABC, abstractmethod +from enum import Enum + +logger = logging.getLogger("udemy_enroller") + + +class ScraperStates(Enum): + DISABLED = "DISABLED" + RUNNING = "RUNNING" + COMPLETE = "COMPLETE" + + +class BaseScraper(ABC): + def __init__(self): + self._state = None + self.scraper_name = None + + @abstractmethod + async def run(self): + return + + @abstractmethod + async def get_links(self): + return + + @property + def state(self): + return self._state + + @state.setter + def state(self, value): + if any([ss for ss in ScraperStates if ss.value == value]): + self._state = value + + def set_state_disabled(self): + self.state = ScraperStates.DISABLED.value + logger.info(f"{self.scraper_name} scraper disabled") + + def set_state_running(self): + self.state = ScraperStates.RUNNING.value + logger.info(f"{self.scraper_name} scraper is running") + + def set_state_complete(self): + self.state = ScraperStates.COMPLETE.value + logger.info(f"{self.scraper_name} scraper complete") + + def is_disabled(self): + return self.state == ScraperStates.DISABLED.value + + def is_complete(self): + return self.state == ScraperStates.COMPLETE.value + + def should_run(self): + should_run = not self.is_disabled() and not self.is_complete() + if should_run: + self.set_state_running() + return should_run + + @staticmethod + def time_run(func): + async def wrapper(self): + start_time = datetime.datetime.utcnow() + response = await func(self) + end_time = datetime.datetime.utcnow() + logger.info( + f"Got {len(response)} links from {self.DOMAIN} in {(end_time - start_time).total_seconds():.2f} seconds" + ) + return response + + return wrapper diff --git a/core/scrapers/comidoc.py b/core/scrapers/comidoc.py new file mode 100644 index 0000000..c103bc9 --- /dev/null +++ b/core/scrapers/comidoc.py @@ -0,0 +1,80 @@ +import datetime +import json +import logging + +import aiohttp + +from core.scrapers.base_scraper import BaseScraper + +logger = logging.getLogger("udemy_enroller") + + +class ComidocScraper(BaseScraper): + """ + Contains any logic related to scraping of data from comidoc.net + """ + + DOMAIN = "https://comidoc.net" + HEADERS = { + "authority": "comidoc.net", + "accept-language": "en-US", + "content-type": "application/json", + "accept": "*/*", + "origin": DOMAIN, + "sec-fetch-site": "same-origin", + "sec-fetch-mode": "cors", + "sec-fetch-dest": "empty", + "referer": f"{DOMAIN}/daily", + } + + def __init__(self, days_offset=10, enabled=True): + super().__init__() + self.scraper_name = "comidoc" + if not enabled: + self.set_state_disabled() + self.days_offset = days_offset # Query the past months coupons + + @BaseScraper.time_run + async def run(self): + return await self.get_links() + + async def get_links(self): + links = [] + # TODO: Add try/except block to handle connection issues + data = await self.get_data() + if data: + self.set_state_complete() + links = [ + f"https://www.udemy.com/course{d['course']['cleanUrl']}?couponCode={d['code']}" + for d in data + ] + + return links + + async def get_data(self): + """ + + :return: + """ + url = f"{self.DOMAIN}/beta" + payload = { + "query": "query DAILY_COURSES_QUERY($myDate: DateTime) { coupons: " + 'coupons( where: { isValid: true createdAt_gte: $myDate discountValue_starts_with: "100%" } ' + "orderBy: createdAt_DESC) { code isValid createdAt discountValue discountPrice maxUses " + "remainingUses endTime course { udemyId cleanUrl createdAt updatedAt detail(last: 1) { title " + "isPaid lenghtTxt rating updated subscribers locale { locale } } } } }", + "variables": { + "myDate": ( + datetime.datetime.utcnow() + - datetime.timedelta(days=self.days_offset) + ).strftime("%Y-%m-%dT%H") + }, + } + + async with aiohttp.ClientSession() as session: + async with session.post( + url, headers=self.HEADERS, data=json.dumps(payload) + ) as response: + data = await response.json() + + return data["data"]["coupons"] diff --git a/core/scrapers/manager.py b/core/scrapers/manager.py new file mode 100644 index 0000000..640b20c --- /dev/null +++ b/core/scrapers/manager.py @@ -0,0 +1,36 @@ +import asyncio +from functools import reduce +from typing import List + +from core.scrapers.comidoc import ComidocScraper +from core.scrapers.tutorialbar import TutorialBarScraper + + +class ScraperManager: + def __init__(self, max_pages): + self.tbs = TutorialBarScraper(max_pages=max_pages) + self.cds = ComidocScraper() + self._scrapers = (self.tbs, self.cds) + + async def run(self) -> List: + """ + Runs any enabled scrapers and returns a list of links + + :return: list + """ + urls = [] + enabled_scrapers = self._enabled_scrapers() + if enabled_scrapers: + urls = reduce( + list.__add__, + await asyncio.gather(*map(lambda sc: sc.run(), enabled_scrapers)), + ) + return urls + + def _enabled_scrapers(self) -> List: + """ + Returns a list of scrapers that should run + + :return: + """ + return list(filter(lambda sc: sc.should_run(), self._scrapers)) diff --git a/core/tutorialbar.py b/core/scrapers/tutorialbar.py similarity index 61% rename from core/tutorialbar.py rename to core/scrapers/tutorialbar.py index 9eea724..21322dc 100644 --- a/core/tutorialbar.py +++ b/core/scrapers/tutorialbar.py @@ -1,14 +1,16 @@ +import asyncio import logging -from multiprocessing.dummy import Pool from typing import List -import requests +import aiohttp from bs4 import BeautifulSoup +from core.scrapers.base_scraper import BaseScraper + logger = logging.getLogger("udemy_enroller") -class TutorialBarScraper: +class TutorialBarScraper(BaseScraper): """ Contains any logic related to scraping of data from tutorialbar.com """ @@ -16,34 +18,45 @@ class TutorialBarScraper: DOMAIN = "https://www.tutorialbar.com" AD_DOMAINS = ("https://amzn",) - def __init__(self, max_pages=None): + def __init__(self, max_pages=None, enabled=True): + super().__init__() + self.scraper_name = "tutorialbar" + if not enabled: + self.set_state_disabled() self.current_page = 0 self.last_page = None self.links_per_page = 12 self.max_pages = max_pages - def run(self) -> List: + @BaseScraper.time_run + async def run(self) -> List: """ Runs the steps to scrape links from tutorialbar.com :return: list of udemy coupon links """ + links = await self.get_links() + self.max_pages_reached() + return links + + async def get_links(self): self.current_page += 1 - logger.info("Please Wait: Getting the course list from tutorialbar.com...") - course_links = self.get_course_links( + course_links = await self.get_course_links( f"{self.DOMAIN}/all-courses/page/{self.current_page}/" ) - logger.info(f"Page: {self.current_page} of {self.last_page} scraped") - udemy_links = self.gather_udemy_course_links(course_links) - filtered_udemy_links = self._filter_ad_domains(udemy_links) + logger.info( + f"Page: {self.current_page} of {self.last_page} scraped from tutorialbar.com" + ) + udemy_links = await self.gather_udemy_course_links(course_links) + links = self._filter_ad_domains(udemy_links) - for counter, course in enumerate(filtered_udemy_links): - logger.info(f"Received Link {counter + 1} : {course}") + for counter, course in enumerate(links): + logger.debug(f"Received Link {counter + 1} : {course}") - return filtered_udemy_links + return links - def script_should_run(self) -> bool: + def max_pages_reached(self) -> bool: """ Returns boolean of whether or not we should continue checking tutorialbar.com @@ -51,21 +64,17 @@ def script_should_run(self) -> bool: """ should_run = True + if self.max_pages is not None: should_run = self.max_pages > self.current_page + if not should_run: logger.info( f"Stopping loop. We have reached max number of pages to scrape: {self.max_pages}" ) - return should_run + self.set_state_disabled() - def is_first_loop(self) -> bool: - """ - Simple check to see if this is the first time we have executed - - :return: boolean showing if this is the first loop of the script - """ - return self.current_page == 1 + return should_run def _filter_ad_domains(self, udemy_links) -> List: """ @@ -83,16 +92,19 @@ def _filter_ad_domains(self, udemy_links) -> List: logger.info(f"Removing ad links from courses: {ad_links}") return list(set(udemy_links) - ad_links) - def get_course_links(self, url: str) -> List: + async def get_course_links(self, url: str) -> List: """ Gets the url of pages which contain the udemy link we want to get :param str url: The url to scrape data from :return: list of pages on tutorialbar.com that contain Udemy coupons """ - response = requests.get(url=url) - soup = BeautifulSoup(response.content, "html.parser") + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + text = await response.read() + + soup = BeautifulSoup(text.decode("utf-8"), "html.parser") links = soup.find_all("h3") course_links = [link.find("a").get("href") for link in links] @@ -104,27 +116,26 @@ def get_course_links(self, url: str) -> List: return course_links @staticmethod - def get_udemy_course_link(url: str) -> str: + async def get_udemy_course_link(url: str) -> str: """ Gets the udemy course link :param str url: The url to scrape data from :return: Coupon link of the udemy course """ - response = requests.get(url=url) - soup = BeautifulSoup(response.content, "html.parser") + + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + text = await response.read() + soup = BeautifulSoup(text.decode("utf-8"), "html.parser") udemy_link = soup.find("span", class_="rh_button_wrapper").find("a").get("href") return udemy_link - def gather_udemy_course_links(self, courses: List[str]) -> List: + async def gather_udemy_course_links(self, courses: List[str]): """ Threaded fetching of the udemy course links from tutorialbar.com :param list courses: A list of tutorialbar.com course links we want to fetch the udemy links for :return: list of udemy links """ - thread_pool = Pool() - results = thread_pool.map(self.get_udemy_course_link, courses) - thread_pool.close() - thread_pool.join() - return results + return await asyncio.gather(*map(self.get_udemy_course_link, courses)) diff --git a/core/utils.py b/core/utils.py index 6383083..a0e6b62 100644 --- a/core/utils.py +++ b/core/utils.py @@ -1,3 +1,4 @@ +import asyncio import logging from typing import Union @@ -8,7 +9,7 @@ ) from selenium.webdriver.remote.webdriver import WebDriver -from core import CourseCache, Settings, TutorialBarScraper, UdemyActions, exceptions +from core import CourseCache, ScraperManager, Settings, UdemyActions, exceptions logger = logging.getLogger("udemy_enroller") @@ -29,17 +30,15 @@ def _redeem_courses( :return: """ cache = CourseCache() - tb_scraper = TutorialBarScraper(max_pages) + scrapers = ScraperManager(max_pages) udemy_actions = UdemyActions(driver, settings) udemy_actions.login() # login once outside while loop + loop = asyncio.get_event_loop() current_cache_hits = 0 while True: - # Check if we should exit the loop - if not tb_scraper.script_should_run(): - break - udemy_course_links = tb_scraper.run() + udemy_course_links = loop.run_until_complete(scrapers.run()) for course_link in udemy_course_links: try: @@ -49,7 +48,7 @@ def _redeem_courses( # Reset cache hit count as we haven't scraped this page before current_cache_hits = 0 else: - logger.info(f"In cache: {course_link}") + logger.debug(f"In cache: {course_link}") # Increment the cache hit count since this link is in the cache current_cache_hits += 1 @@ -76,8 +75,9 @@ def _redeem_courses( logger.info("We have attempted to subscribe to 1 udemy course") logger.info("Ending test") return - - logger.info("Moving on to the next page of the course list on tutorialbar.com") + else: + logger.info("All scrapers complete") + return def _reached_cache_hit_limit(cache_hit_limit, cache_hits) -> bool: diff --git a/pyproject.toml b/pyproject.toml index 54f9dc6..9aa30e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,22 +1,23 @@ [tool.poetry] name = "automatic-udemy-course-enroller-get-paid-udemy-courses-for-free" -version = "0.3" +version = "1.0" description = "" authors = [""] [tool.poetry.dependencies] python = "^3.8" selenium = "^3.141.0" -requests = "^2.24.0" beautifulsoup4 = "^4.9.3" "ruamel.yaml" = "^0.16.12" webdriver-manager = "^3.2.2" +aiohttp = "^3.7.3" [tool.poetry.dev-dependencies] black = "^20.8b1" isort = "^5.6.4" pytest = "^6.1.2" pytest-cov = "^2.10.1" +pytest-asyncio = "^0.14.0" [build-system] requires = ["poetry-core>=1.0.0a5"] diff --git a/requirements.txt b/requirements.txt index 7f2e09c..2ff857f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -requests +aiohttp beautifulsoup4 ruamel.yaml selenium diff --git a/tests/core/scrapers/__init__.py b/tests/core/scrapers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/core/test_tutorialbar.py b/tests/core/scrapers/test_tutorialbar.py similarity index 78% rename from tests/core/test_tutorialbar.py rename to tests/core/scrapers/test_tutorialbar.py index 0cc3929..072eb97 100644 --- a/tests/core/test_tutorialbar.py +++ b/tests/core/scrapers/test_tutorialbar.py @@ -2,9 +2,28 @@ import pytest -from core import TutorialBarScraper +from core.scrapers.tutorialbar import TutorialBarScraper +class MockResponse: + def __init__(self, data, status): + self._data = data + self.status = status + + async def read(self): + return self._data + + async def json(self): + return self._data + + async def __aexit__(self, exc_type, exc, tb): + pass + + async def __aenter__(self): + return self + + +@pytest.mark.asyncio @pytest.mark.parametrize( "tutorialbar_course_page_link,tutorialbar_links,udemy_links", [ @@ -25,7 +44,7 @@ ) @mock.patch.object(TutorialBarScraper, "gather_udemy_course_links") @mock.patch.object(TutorialBarScraper, "get_course_links") -def test_run( +async def test_run( mock_get_course_links, mock_gather_udemy_course_links, tutorialbar_course_page_link, @@ -35,7 +54,7 @@ def test_run( mock_get_course_links.return_value = tutorialbar_links mock_gather_udemy_course_links.return_value = udemy_links tbs = TutorialBarScraper() - links = tbs.run() + links = await tbs.run() mock_get_course_links.assert_called_with(tutorialbar_course_page_link) mock_gather_udemy_course_links.assert_called_with(tutorialbar_links) @@ -43,29 +62,15 @@ def test_run( assert link in udemy_links -@pytest.mark.parametrize( - "page_number,is_first_page", - [(1, True), (2, False)], - ids=( - "First Page", - "Not first page", - ), -) -def test_check_page_number(page_number, is_first_page): - tbs = TutorialBarScraper() - tbs.current_page = page_number - assert tbs.is_first_loop() == is_first_page - - -@mock.patch("core.tutorialbar.requests") -def test_get_course_links(mock_requests, tutorialbar_main_page): +@pytest.mark.asyncio +@mock.patch("aiohttp.ClientSession.get") +async def test_get_course_links(mock_get, tutorialbar_main_page): url = "https://www.tutorialbar.com/main" - requests_response = mock.Mock() - requests_response.content = tutorialbar_main_page - mock_requests.get.return_value = requests_response + + mock_get.return_value = MockResponse(tutorialbar_main_page, 200) tbs = TutorialBarScraper() tbs.current_page = 1 - links = tbs.get_course_links(url) + links = await tbs.get_course_links(url) assert tbs.last_page == "601" assert links == [ From 943842499295efe69354edbba350afb70e5ba103 Mon Sep 17 00:00:00 2001 From: cullzie Date: Sat, 12 Dec 2020 22:05:31 +0000 Subject: [PATCH 06/37] Add debug option to cli --- core/scrapers/comidoc.py | 20 ++++++++++++++------ core/scrapers/tutorialbar.py | 2 +- core/udemy.py | 8 ++++---- udemy_enroller.py | 26 ++++++++++++++++++++++---- 4 files changed, 41 insertions(+), 15 deletions(-) diff --git a/core/scrapers/comidoc.py b/core/scrapers/comidoc.py index c103bc9..2374ec7 100644 --- a/core/scrapers/comidoc.py +++ b/core/scrapers/comidoc.py @@ -1,6 +1,7 @@ import datetime import json import logging +from typing import Dict, List import aiohttp @@ -27,7 +28,7 @@ class ComidocScraper(BaseScraper): "referer": f"{DOMAIN}/daily", } - def __init__(self, days_offset=10, enabled=True): + def __init__(self, days_offset=5, enabled=True): super().__init__() self.scraper_name = "comidoc" if not enabled: @@ -35,10 +36,15 @@ def __init__(self, days_offset=10, enabled=True): self.days_offset = days_offset # Query the past months coupons @BaseScraper.time_run - async def run(self): + async def run(self) -> List: + """ + Called to gather the udemy links + + :return: List of udemy course links + """ return await self.get_links() - async def get_links(self): + async def get_links(self) -> List: links = [] # TODO: Add try/except block to handle connection issues data = await self.get_data() @@ -51,11 +57,13 @@ async def get_links(self): return links - async def get_data(self): + async def get_data(self) -> Dict: """ + Fetch data from comidoc endpoint - :return: + :return: dictionary containing data needed to build udemy free urls """ + url = f"{self.DOMAIN}/beta" payload = { "query": "query DAILY_COURSES_QUERY($myDate: DateTime) { coupons: " @@ -77,4 +85,4 @@ async def get_data(self): ) as response: data = await response.json() - return data["data"]["coupons"] + return data["data"]["coupons"] if data else {} diff --git a/core/scrapers/tutorialbar.py b/core/scrapers/tutorialbar.py index 21322dc..7a313f9 100644 --- a/core/scrapers/tutorialbar.py +++ b/core/scrapers/tutorialbar.py @@ -72,7 +72,7 @@ def max_pages_reached(self) -> bool: logger.info( f"Stopping loop. We have reached max number of pages to scrape: {self.max_pages}" ) - self.set_state_disabled() + self.set_state_complete() return should_run diff --git a/core/udemy.py b/core/udemy.py index 9cef150..6d549d9 100644 --- a/core/udemy.py +++ b/core/udemy.py @@ -90,7 +90,7 @@ def redeem(self, url: str) -> str: ) if element_text not in self.settings.languages: - logger.info(f"Course language not wanted: {element_text}") + logger.debug(f"Course language not wanted: {element_text}") return UdemyStatus.UNWANTED_LANGUAGE.value if self.settings.categories: @@ -109,7 +109,7 @@ def redeem(self, url: str) -> str: if category in breadcrumbs: break else: - logger.info("Skipping course as it does not have a wanted category") + logger.debug("Skipping course as it does not have a wanted category") return UdemyStatus.UNWANTED_CATEGORY.value # Enroll Now 1 @@ -124,7 +124,7 @@ def redeem(self, url: str) -> str: "//div[starts-with(@class, 'buy-box--purchased-text-banner')]" ) if self.driver.find_elements_by_xpath(already_purchased_xpath): - logger.info(f"Already enrolled in {course_name}") + logger.debug(f"Already enrolled in {course_name}") return UdemyStatus.ENROLLED.value # Click to enroll in the course @@ -185,7 +185,7 @@ def redeem(self, url: str) -> str: # This logic should work for different locales and currencies _numbers = "".join(filter(lambda x: x if x.isdigit() else None, _price)) if _numbers.isdigit() and int(_numbers) > 0: - logger.info( + logger.debug( f"Skipping course as it now costs {_price}: {course_name}" ) return UdemyStatus.EXPIRED.value diff --git a/udemy_enroller.py b/udemy_enroller.py index 50c18c7..4fd3ad1 100644 --- a/udemy_enroller.py +++ b/udemy_enroller.py @@ -1,18 +1,30 @@ import argparse +import logging from argparse import Namespace from typing import Union -from selenium.webdriver.remote.webdriver import WebDriver - from core import ALL_VALID_BROWSER_STRINGS, DriverManager, Settings from core.utils import redeem_courses +logger = logging.getLogger("udemy_enroller") + + +def enable_debug_logging() -> None: + """ + Enable debug logging for the scripts + + :return: None + """ + logger.setLevel(logging.DEBUG) + for handlers in logger.handlers: + handlers.setLevel(logging.DEBUG) + logger.info(f"Enabled debug logging") + def run( browser: str, max_pages: Union[int, None], cache_hit_limit: int, - driver: WebDriver = None, ): """ Run the udemy enroller script @@ -20,7 +32,6 @@ def run( :param str browser: Name of the browser we want to create a driver for :param int or None max_pages: Max number of pages to scrape from tutorialbar.com :param int cache_hit_limit: If we hit the cache this many times in a row we exit the script - :param WebDriver driver: :return: """ settings = Settings() @@ -56,6 +67,11 @@ def parse_args(browser=None) -> Namespace: default=12, help="If we hit the cache this number of times in a row we will exit the script", ) + parser.add_argument( + "--debug", + action="store_true", + help="Enable debug logging", + ) args = parser.parse_args() @@ -68,6 +84,8 @@ def parse_args(browser=None) -> Namespace: def main(): args = parse_args() if args: + if args.debug: + enable_debug_logging() run(args.browser, args.max_pages, args.cache_hits) From 541a115c8f5b6f2b4ee1fe9111dc9792d41209bf Mon Sep 17 00:00:00 2001 From: Fake ID Date: Mon, 14 Dec 2020 21:20:00 +0600 Subject: [PATCH 07/37] Added GitLab link --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6e637fd..c9e26d2 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ In case of any bugs or issues, please open an issue in github. Also, don't forget to **Fork & Star the repository if you like it!** +***We are also on [GitLab](https://gitlab.com/the-automators/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE)*** + **_Video Proof:_** [![Udemy Auto-Course-Enroller](https://img.youtube.com/vi/IW8CCtv2k2A/0.jpg)](https://www.youtube.com/watch?v=IW8CCtv2k2A "GET PAID UDEMY Courses for FREE, Automatically with this Python Script!") @@ -204,4 +206,4 @@ Thanks to [GitBook](https://gitbook.com) for supporting us. GitBook is the best [![GitLab](https://i.imgur.com/aUWtSn4.png)](https://gitlab.com) -Thanks to [GitLab](https://gitlab.com) for supporting us. GitLab is one of the main code hosting providers out there. They also have comprehensive offering for [open source](https://about.gitlab.com/solutions/open-source/). Please check them out. \ No newline at end of file +Thanks to [GitLab](https://gitlab.com) for supporting us. GitLab is one of the main code hosting providers out there. They also have comprehensive offering for [open source](https://about.gitlab.com/solutions/open-source/). Please check them out. From dffc0b2fc957eca3097e78ca0ca6ea083186f34d Mon Sep 17 00:00:00 2001 From: Fake ID Date: Tue, 15 Dec 2020 10:33:20 +0600 Subject: [PATCH 08/37] changed GitLab according to their request --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c9e26d2..1bd3dd5 100644 --- a/README.md +++ b/README.md @@ -206,4 +206,4 @@ Thanks to [GitBook](https://gitbook.com) for supporting us. GitBook is the best [![GitLab](https://i.imgur.com/aUWtSn4.png)](https://gitlab.com) -Thanks to [GitLab](https://gitlab.com) for supporting us. GitLab is one of the main code hosting providers out there. They also have comprehensive offering for [open source](https://about.gitlab.com/solutions/open-source/). Please check them out. +Thanks to [GitLab](https://gitlab.com) for supporting us. GitLab is one of the main code hosting and CI/CD providers out there. They support the open source community through their GitLab for [Open Source program](https://about.gitlab.com/solutions/open-source/). Please check them out. From 566373cdcf9565b96d3e776c67b2ba8f37666453 Mon Sep 17 00:00:00 2001 From: cullzie Date: Tue, 15 Dec 2020 15:49:41 +0000 Subject: [PATCH 09/37] Remove cache-hit logic and update to have cli-args to run specific scrapers --- core/scrapers/comidoc.py | 9 +++- core/scrapers/manager.py | 10 ++-- core/scrapers/tutorialbar.py | 2 +- core/utils.py | 94 ++++++++++++++---------------------- udemy_enroller.py | 48 +++++++++++++----- 5 files changed, 87 insertions(+), 76 deletions(-) diff --git a/core/scrapers/comidoc.py b/core/scrapers/comidoc.py index 2374ec7..be107b0 100644 --- a/core/scrapers/comidoc.py +++ b/core/scrapers/comidoc.py @@ -15,10 +15,16 @@ class ComidocScraper(BaseScraper): Contains any logic related to scraping of data from comidoc.net """ + # TODO: Not sure how often x-api-key changes in HEADERS. + # Might need to fetch it dynamically DOMAIN = "https://comidoc.net" HEADERS = { "authority": "comidoc.net", + "sec-ch-ua": '"Google Chrome";v="87", " Not;A Brand";v="99", "Chromium";v="87"', "accept-language": "en-US", + "sec-ch-ua-mobile": "?0", + "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", + "x-api-key": "W8GX8OIG4MWCM9Vy16lGH1KDpGvinq66", "content-type": "application/json", "accept": "*/*", "origin": DOMAIN, @@ -26,9 +32,10 @@ class ComidocScraper(BaseScraper): "sec-fetch-mode": "cors", "sec-fetch-dest": "empty", "referer": f"{DOMAIN}/daily", + "cookie": "consent=true", } - def __init__(self, days_offset=5, enabled=True): + def __init__(self, enabled, days_offset=5): super().__init__() self.scraper_name = "comidoc" if not enabled: diff --git a/core/scrapers/manager.py b/core/scrapers/manager.py index 640b20c..1fecd3b 100644 --- a/core/scrapers/manager.py +++ b/core/scrapers/manager.py @@ -7,10 +7,12 @@ class ScraperManager: - def __init__(self, max_pages): - self.tbs = TutorialBarScraper(max_pages=max_pages) - self.cds = ComidocScraper() - self._scrapers = (self.tbs, self.cds) + def __init__(self, tutorialbar_enabled, comidoc_enabled, max_pages): + self.tutorialbar_scraper = TutorialBarScraper( + tutorialbar_enabled, max_pages=max_pages + ) + self.comidoc_scraper = ComidocScraper(comidoc_enabled) + self._scrapers = (self.tutorialbar_scraper, self.comidoc_scraper) async def run(self) -> List: """ diff --git a/core/scrapers/tutorialbar.py b/core/scrapers/tutorialbar.py index 7a313f9..125442f 100644 --- a/core/scrapers/tutorialbar.py +++ b/core/scrapers/tutorialbar.py @@ -18,7 +18,7 @@ class TutorialBarScraper(BaseScraper): DOMAIN = "https://www.tutorialbar.com" AD_DOMAINS = ("https://amzn",) - def __init__(self, max_pages=None, enabled=True): + def __init__(self, enabled, max_pages=None): super().__init__() self.scraper_name = "tutorialbar" if not enabled: diff --git a/core/utils.py b/core/utils.py index a0e6b62..628a07e 100644 --- a/core/utils.py +++ b/core/utils.py @@ -17,100 +17,76 @@ def _redeem_courses( driver: WebDriver, settings: Settings, - max_pages: Union[int, None], - cache_hit_limit: int, + scrapers: ScraperManager, ) -> None: """ Method to scrape courses from tutorialbar.com and enroll in them on udemy :param WebDriver driver: Webdriver used to enroll in Udemy courses :param Settings settings: Core settings used for Udemy - :param int max_pages: Max pages to scrape from tutorialbar.com - :param int cache_hit_limit: If we hit the cache this many times in a row we exit the script + :param ScraperManager scrapers: :return: """ cache = CourseCache() - scrapers = ScraperManager(max_pages) udemy_actions = UdemyActions(driver, settings) udemy_actions.login() # login once outside while loop loop = asyncio.get_event_loop() - current_cache_hits = 0 - while True: udemy_course_links = loop.run_until_complete(scrapers.run()) - for course_link in udemy_course_links: - try: - if course_link not in cache: - status = udemy_actions.redeem(course_link) - cache.add(course_link, status) - # Reset cache hit count as we haven't scraped this page before - current_cache_hits = 0 - else: - logger.debug(f"In cache: {course_link}") - - # Increment the cache hit count since this link is in the cache - current_cache_hits += 1 - - # Exit the loop if we have reached the cache hit limit - if _reached_cache_hit_limit(cache_hit_limit, current_cache_hits): - return - except NoSuchElementException as e: - logger.error(e) - except TimeoutException: - logger.error(f"Timeout on link: {course_link}") - except WebDriverException: - logger.error(f"Webdriver exception on link: {course_link}") - except KeyboardInterrupt: - logger.error("Exiting the script") - raise - except exceptions.RobotException as e: - logger.error(e) - raise - except Exception as e: - logger.error(f"Unexpected exception: {e}") - finally: - if settings.is_ci_build: - logger.info("We have attempted to subscribe to 1 udemy course") - logger.info("Ending test") - return + if udemy_course_links: + for course_link in udemy_course_links: + try: + if course_link not in cache: + status = udemy_actions.redeem(course_link) + cache.add(course_link, status) + else: + logger.debug(f"In cache: {course_link}") + except NoSuchElementException as e: + logger.error(e) + except TimeoutException: + logger.error(f"Timeout on link: {course_link}") + except WebDriverException: + logger.error(f"Webdriver exception on link: {course_link}") + except KeyboardInterrupt: + logger.error("Exiting the script") + raise + except exceptions.RobotException as e: + logger.error(e) + raise + except Exception as e: + logger.error(f"Unexpected exception: {e}") + finally: + if settings.is_ci_build: + logger.info("We have attempted to subscribe to 1 udemy course") + logger.info("Ending test") + return else: logger.info("All scrapers complete") return -def _reached_cache_hit_limit(cache_hit_limit, cache_hits) -> bool: - """ - Check if we have reached the cache hit limit - - :param int cache_hit_limit: Limit on the number of cache hits in a row to allow - :param int cache_hits: Current number of cache hits in a row - :return: - """ - reached_hit_limit = cache_hit_limit <= cache_hits - if reached_hit_limit: - logger.info(f"Hit cache {cache_hits} times in a row. Exiting script") - return reached_hit_limit - - def redeem_courses( driver: WebDriver, settings: Settings, + tutorialbar_enabled: bool, + comidoc_enabled: bool, max_pages: Union[int, None], - cache_hit_limit: int, ) -> None: """ Wrapper of _redeem_courses so we always close browser on completion :param WebDriver driver: Webdriver used to enroll in Udemy courses :param Settings settings: Core settings used for Udemy + :param bool tutorialbar_enabled: Boolean signifying if tutorialbar scraper should run + :param bool comidoc_enabled: Boolean signifying if comidoc scraper should run :param int max_pages: Max pages to scrape from tutorialbar.com - :param int cache_hit_limit: If we hit the cache this many times in a row we exit the script :return: """ try: - _redeem_courses(driver, settings, max_pages, cache_hit_limit) + scrapers = ScraperManager(tutorialbar_enabled, comidoc_enabled, max_pages) + _redeem_courses(driver, settings, scrapers) finally: logger.info("Closing browser") driver.quit() diff --git a/udemy_enroller.py b/udemy_enroller.py index 4fd3ad1..d58c4a1 100644 --- a/udemy_enroller.py +++ b/udemy_enroller.py @@ -21,22 +21,39 @@ def enable_debug_logging() -> None: logger.info(f"Enabled debug logging") +def determine_if_scraper_enabled( + tutorialbar_enabled: bool, + comidoc_enabled: bool, +): + """ + + + :return: None + """ + if not tutorialbar_enabled and not comidoc_enabled: + # Set both to True since user has not enabled a specific scraper i.e Run all scrapers + tutorialbar_enabled, comidoc_enabled = True, True + return tutorialbar_enabled, comidoc_enabled + + def run( browser: str, + tutorialbar_enabled: bool, + comidoc_enabled: bool, max_pages: Union[int, None], - cache_hit_limit: int, ): """ Run the udemy enroller script :param str browser: Name of the browser we want to create a driver for + :param bool tutorialbar_enabled: + :param bool comidoc_enabled: :param int or None max_pages: Max number of pages to scrape from tutorialbar.com - :param int cache_hit_limit: If we hit the cache this many times in a row we exit the script :return: """ settings = Settings() dm = DriverManager(browser=browser, is_ci_build=settings.is_ci_build) - redeem_courses(dm.driver, settings, max_pages, cache_hit_limit) + redeem_courses(dm.driver, settings, tutorialbar_enabled, comidoc_enabled, max_pages) def parse_args(browser=None) -> Namespace: @@ -56,16 +73,22 @@ def parse_args(browser=None) -> Namespace: help="Browser to use for Udemy Enroller", ) parser.add_argument( - "--max-pages", - type=int, - default=None, - help="Max pages to scrape from tutorialbar.com", + "--tutorialbar", + action="store_true", + default=False, + help="Run tutorialbar scraper", ) parser.add_argument( - "--cache-hits", + "--comidoc", + action="store_true", + default=False, + help="Run comidoc scraper", + ) + parser.add_argument( + "--max-pages", type=int, - default=12, - help="If we hit the cache this number of times in a row we will exit the script", + default=5, + help=f"Max pages to scrape from tutorialbar (Default is 5)", ) parser.add_argument( "--debug", @@ -86,7 +109,10 @@ def main(): if args: if args.debug: enable_debug_logging() - run(args.browser, args.max_pages, args.cache_hits) + tutorialbar_enabled, comidoc_enabled = determine_if_scraper_enabled( + args.tutorialbar, args.comidoc + ) + run(args.browser, tutorialbar_enabled, comidoc_enabled, args.max_pages) if __name__ == "__main__": From faa7f5f6bf0d4b2c3b6a09af281b16cdd8ed43e2 Mon Sep 17 00:00:00 2001 From: cullzie Date: Tue, 15 Dec 2020 16:10:54 +0000 Subject: [PATCH 10/37] Fixing unittests --- tests/core/scrapers/test_tutorialbar.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tests/core/scrapers/test_tutorialbar.py b/tests/core/scrapers/test_tutorialbar.py index 072eb97..2a9b8b6 100644 --- a/tests/core/scrapers/test_tutorialbar.py +++ b/tests/core/scrapers/test_tutorialbar.py @@ -53,7 +53,7 @@ async def test_run( ): mock_get_course_links.return_value = tutorialbar_links mock_gather_udemy_course_links.return_value = udemy_links - tbs = TutorialBarScraper() + tbs = TutorialBarScraper(enabled=True) links = await tbs.run() mock_get_course_links.assert_called_with(tutorialbar_course_page_link) @@ -68,7 +68,7 @@ async def test_get_course_links(mock_get, tutorialbar_main_page): url = "https://www.tutorialbar.com/main" mock_get.return_value = MockResponse(tutorialbar_main_page, 200) - tbs = TutorialBarScraper() + tbs = TutorialBarScraper(enabled=True) tbs.current_page = 1 links = await tbs.get_course_links(url) @@ -87,3 +87,19 @@ async def test_get_course_links(mock_get, tutorialbar_main_page): "https://www.tutorialbar.com/quickbooks-pro-desktop-bookkeeping-business-easy-way/", "https://www.tutorialbar.com/quickbooks-online-bank-feeds-credit-card-feeds-2020/", ] + + +@pytest.mark.parametrize( + "enabled", + [ + (True,), + (False,), + ], + ids=("Test enabled", "Test disabled"), +) +def test_enable_status( + enabled, +): + + tbs = TutorialBarScraper(enabled=enabled) + assert tbs.is_disabled() is not enabled From 614b6df7e00e7a23f177d7e3e32b406e1ebb6165 Mon Sep 17 00:00:00 2001 From: cullzie Date: Tue, 15 Dec 2020 16:22:43 +0000 Subject: [PATCH 11/37] Update README --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6e637fd..c2d882d 100644 --- a/README.md +++ b/README.md @@ -102,9 +102,11 @@ get all the requirements installed in one go. Similar instructions applies for p 3 . The script can be passed arguments: - `--help`: View full list of arguments available -- `--max-pages=`: Max number of pages to scrape from tutorialbar.com before exiting the script - `--browser=`: Run with a specific browser -- `--cache-hits=`: If we hit the cache this number of times in a row we will exit the script (default is 12) +- `--comidoc`: Run the comidoc scraper only +- `--tutorialbar`: Run the tutorialbar scraper only +- `--max-pages=`: Max number of pages to scrape from tutorialbar.com before exiting the script (default is 5) +- `--debug`: Enable debug logging 4 . Run the chosen script in terminal like so: - `python udemy_enroller.py --browser=firefox` From 68fa43f00b44b4210f5ff3e3ef0b1a1db0c56de1 Mon Sep 17 00:00:00 2001 From: cullzie Date: Tue, 15 Dec 2020 16:27:47 +0000 Subject: [PATCH 12/37] Adding debug arg to CI call --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 631ef3b..bd902d3 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -56,4 +56,4 @@ jobs: UDEMY_PASSWORD: ${{ secrets.UDEMY_PASSWORD }} CI_TEST: "True" run: | - poetry run python udemy_enroller.py --browser=chrome + poetry run python udemy_enroller.py --browser=chrome --debug From 79602e91527bd6d4cb43858dd41a43541074ffc3 Mon Sep 17 00:00:00 2001 From: cullzie Date: Tue, 15 Dec 2020 16:38:54 +0000 Subject: [PATCH 13/37] Fixing docstring and return type --- udemy_enroller.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/udemy_enroller.py b/udemy_enroller.py index d58c4a1..e5d8663 100644 --- a/udemy_enroller.py +++ b/udemy_enroller.py @@ -1,7 +1,7 @@ import argparse import logging from argparse import Namespace -from typing import Union +from typing import Tuple, Union from core import ALL_VALID_BROWSER_STRINGS, DriverManager, Settings from core.utils import redeem_courses @@ -24,11 +24,11 @@ def enable_debug_logging() -> None: def determine_if_scraper_enabled( tutorialbar_enabled: bool, comidoc_enabled: bool, -): +) -> Tuple[bool, bool]: """ + Determine what scrapers should be enabled and disabled - - :return: None + :return: tuple containing boolean of what scrapers should run """ if not tutorialbar_enabled and not comidoc_enabled: # Set both to True since user has not enabled a specific scraper i.e Run all scrapers From 6535a0b237a5f046756e5e09ddd8b0a8eed31e1f Mon Sep 17 00:00:00 2001 From: cullzie Date: Tue, 15 Dec 2020 19:04:32 +0000 Subject: [PATCH 14/37] Wait for profile options to appear after login --- core/udemy.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/udemy.py b/core/udemy.py index 6d549d9..8bfb5c4 100644 --- a/core/udemy.py +++ b/core/udemy.py @@ -66,7 +66,10 @@ def login(self, is_retry=False) -> None: raise RobotException("I am a bot!") raise e else: - # TODO: Verify successful login + user_dropdown_xpath = "//a[@data-purpose='user-dropdown']" + WebDriverWait(self.driver, 10).until( + EC.presence_of_element_located((By.XPATH, user_dropdown_xpath)) + ) self.logged_in = True def redeem(self, url: str) -> str: From f1b8e9159f925f38ee4d7eea8f1a9d79dc100fc3 Mon Sep 17 00:00:00 2001 From: cullzie Date: Tue, 15 Dec 2020 20:02:48 +0000 Subject: [PATCH 15/37] Get dynamic API key from comidoc js files --- core/scrapers/comidoc.py | 42 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/core/scrapers/comidoc.py b/core/scrapers/comidoc.py index be107b0..463b780 100644 --- a/core/scrapers/comidoc.py +++ b/core/scrapers/comidoc.py @@ -1,9 +1,11 @@ import datetime import json import logging +import re from typing import Dict, List import aiohttp +from bs4 import BeautifulSoup from core.scrapers.base_scraper import BaseScraper @@ -15,8 +17,6 @@ class ComidocScraper(BaseScraper): Contains any logic related to scraping of data from comidoc.net """ - # TODO: Not sure how often x-api-key changes in HEADERS. - # Might need to fetch it dynamically DOMAIN = "https://comidoc.net" HEADERS = { "authority": "comidoc.net", @@ -24,7 +24,6 @@ class ComidocScraper(BaseScraper): "accept-language": "en-US", "sec-ch-ua-mobile": "?0", "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", - "x-api-key": "W8GX8OIG4MWCM9Vy16lGH1KDpGvinq66", "content-type": "application/json", "accept": "*/*", "origin": DOMAIN, @@ -49,6 +48,7 @@ async def run(self) -> List: :return: List of udemy course links """ + await self.set_api_key() return await self.get_links() async def get_links(self) -> List: @@ -93,3 +93,39 @@ async def get_data(self) -> Dict: data = await response.json() return data["data"]["coupons"] if data else {} + + async def set_api_key(self) -> None: + """ + Retrieves the api key from comidoc and updates our Headers with that value + + :return: None + """ + async with aiohttp.ClientSession() as session: + async with session.get( + self.DOMAIN, + headers=self.HEADERS, + ) as response: + text = await response.read() + js_links = [] + soup = BeautifulSoup(text.decode("utf-8"), "html.parser") + + # There is no way to identify which js file has the api key + # so we gather all js files imported at top of the page + for i in soup.find_all("script"): + src = i.get("src", "") + if ( + src.startswith("https://cdn.comidoc.net/_next/static/chunks/") + and src.endswith(".js") + and "-" not in src + ): + js_links.append(src) + + # Loop through js files until we get the X-API-KEY + for js_url in reversed(js_links): + async with aiohttp.ClientSession() as session: + async with session.get(js_url) as response: + text = await response.read() + if "X-API-KEY" in str(text): + match = re.search('(?<=X-API-KEY":")(.*?)(?=")', str(text)) + self.HEADERS["x-api-key"] = match.group() + break From 627b8f60fee39303ae108fcc8f6fb73dc22b48d1 Mon Sep 17 00:00:00 2001 From: cullzie Date: Wed, 16 Dec 2020 16:31:58 +0000 Subject: [PATCH 16/37] Store settings,cache and logs in central user directory --- core/__init__.py | 5 +-- core/cache.py | 4 +- core/driver_manager.py | 9 ++-- core/logging.py | 33 ++++++++++++++ core/runner.py | 92 +++++++++++++++++++++++++++++++++++++++ core/scrapers/comidoc.py | 2 +- core/settings.py | 8 ++-- core/udemy.py | 4 +- core/utils.py | 93 ++++------------------------------------ logconfig.ini | 2 +- udemy_enroller.py | 9 ++-- 11 files changed, 156 insertions(+), 105 deletions(-) create mode 100644 core/logging.py create mode 100644 core/runner.py diff --git a/core/__init__.py b/core/__init__.py index dac35e7..b231ff8 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -1,9 +1,8 @@ -import logging.config - from .cache import CourseCache from .driver_manager import ALL_VALID_BROWSER_STRINGS, DriverManager +from .logging import load_logging_config from .scrapers.manager import ScraperManager from .settings import Settings from .udemy import UdemyActions -logging.config.fileConfig("logconfig.ini", disable_existing_loggers=False) +load_logging_config() diff --git a/core/cache.py b/core/cache.py index c3683e0..d12da2b 100644 --- a/core/cache.py +++ b/core/cache.py @@ -2,6 +2,8 @@ import json import os +from core.utils import get_app_dir + class CourseCache: """ @@ -9,7 +11,7 @@ class CourseCache: """ def __init__(self, file_name=".course_cache"): - self._file_name = file_name + self._file_name = os.path.join(get_app_dir(), file_name) self._cache = [] self._load_cache() diff --git a/core/driver_manager.py b/core/driver_manager.py index b2b7506..a4ca600 100644 --- a/core/driver_manager.py +++ b/core/driver_manager.py @@ -1,5 +1,3 @@ -import logging - from selenium import webdriver from selenium.webdriver.chrome.options import Options as ChromeOptions from webdriver_manager.chrome import ChromeDriverManager @@ -8,6 +6,10 @@ from webdriver_manager.opera import OperaDriverManager from webdriver_manager.utils import ChromeType +from core.logging import get_logger + +logger = get_logger() + VALID_FIREFOX_STRINGS = {"ff", "firefox"} VALID_CHROME_STRINGS = {"chrome", "google-chrome"} VALID_CHROMIUM_STRINGS = {"chromium"} @@ -25,9 +27,6 @@ ) -logger = logging.getLogger("udemy_enroller") - - class DriverManager: def __init__(self, browser: str, is_ci_build: bool = False): self.driver = None diff --git a/core/logging.py b/core/logging.py new file mode 100644 index 0000000..585ffe6 --- /dev/null +++ b/core/logging.py @@ -0,0 +1,33 @@ +import logging +import logging.config +import os + +from core.utils import get_app_dir + + +class CustomFileHandler(logging.FileHandler): + """ + Allows us to log to the app directory + """ + + def __init__(self, file_name, mode): + log_file_path = os.path.join(get_app_dir(), file_name) + super(CustomFileHandler, self).__init__(log_file_path, mode) + + +def load_logging_config() -> None: + """ + Load logging configuration from file + + :return: None + """ + logging.config.fileConfig("logconfig.ini", disable_existing_loggers=False) + + +def get_logger() -> logging.Logger: + """ + Convenience method to load the app logger + + :return: An instance of the app logger + """ + return logging.getLogger("udemy_enroller") diff --git a/core/runner.py b/core/runner.py new file mode 100644 index 0000000..085f1fe --- /dev/null +++ b/core/runner.py @@ -0,0 +1,92 @@ +import asyncio +from typing import Union + +from selenium.common.exceptions import ( + NoSuchElementException, + TimeoutException, + WebDriverException, +) +from selenium.webdriver.remote.webdriver import WebDriver + +from core import CourseCache, ScraperManager, Settings, UdemyActions, exceptions +from core.logging import get_logger + +logger = get_logger() + + +def _redeem_courses( + driver: WebDriver, + settings: Settings, + scrapers: ScraperManager, +) -> None: + """ + Method to scrape courses from tutorialbar.com and enroll in them on udemy + + :param WebDriver driver: Webdriver used to enroll in Udemy courses + :param Settings settings: Core settings used for Udemy + :param ScraperManager scrapers: + :return: + """ + cache = CourseCache() + udemy_actions = UdemyActions(driver, settings) + udemy_actions.login() # login once outside while loop + loop = asyncio.get_event_loop() + + while True: + udemy_course_links = loop.run_until_complete(scrapers.run()) + + if udemy_course_links: + for course_link in udemy_course_links: + try: + if course_link not in cache: + status = udemy_actions.redeem(course_link) + cache.add(course_link, status) + else: + logger.debug(f"In cache: {course_link}") + except NoSuchElementException as e: + logger.error(e) + except TimeoutException: + logger.error(f"Timeout on link: {course_link}") + except WebDriverException: + logger.error(f"Webdriver exception on link: {course_link}") + except KeyboardInterrupt: + logger.error("Exiting the script") + raise + except exceptions.RobotException as e: + logger.error(e) + raise + except Exception as e: + logger.error(f"Unexpected exception: {e}") + finally: + if settings.is_ci_build: + logger.info("We have attempted to subscribe to 1 udemy course") + logger.info("Ending test") + return + else: + logger.info("All scrapers complete") + return + + +def redeem_courses( + driver: WebDriver, + settings: Settings, + tutorialbar_enabled: bool, + comidoc_enabled: bool, + max_pages: Union[int, None], +) -> None: + """ + Wrapper of _redeem_courses so we always close browser on completion + + :param WebDriver driver: Webdriver used to enroll in Udemy courses + :param Settings settings: Core settings used for Udemy + :param bool tutorialbar_enabled: Boolean signifying if tutorialbar scraper should run + :param bool comidoc_enabled: Boolean signifying if comidoc scraper should run + :param int max_pages: Max pages to scrape from tutorialbar.com + :return: + """ + try: + scrapers = ScraperManager(tutorialbar_enabled, comidoc_enabled, max_pages) + _redeem_courses(driver, settings, scrapers) + finally: + logger.info("Closing browser") + driver.quit() diff --git a/core/scrapers/comidoc.py b/core/scrapers/comidoc.py index 463b780..e6014b3 100644 --- a/core/scrapers/comidoc.py +++ b/core/scrapers/comidoc.py @@ -39,7 +39,7 @@ def __init__(self, enabled, days_offset=5): self.scraper_name = "comidoc" if not enabled: self.set_state_disabled() - self.days_offset = days_offset # Query the past months coupons + self.days_offset = days_offset # Query the past x days coupons @BaseScraper.time_run async def run(self) -> List: diff --git a/core/settings.py b/core/settings.py index a3bcf9b..9c217b3 100644 --- a/core/settings.py +++ b/core/settings.py @@ -1,12 +1,14 @@ import getpass -import logging import os.path from distutils.util import strtobool from typing import Dict, List from ruamel.yaml import YAML, dump -logger = logging.getLogger("udemy_enroller") +from core.logging import get_logger +from core.utils import get_app_dir + +logger = get_logger() class Settings: @@ -21,7 +23,7 @@ def __init__(self, settings_path="settings.yaml"): self.languages = [] self.categories = [] - self._settings_path = settings_path + self._settings_path = os.path.join(get_app_dir(), settings_path) self.is_ci_build = strtobool(os.environ.get("CI_TEST", "False")) self._init_settings() diff --git a/core/udemy.py b/core/udemy.py index 8bfb5c4..f316019 100644 --- a/core/udemy.py +++ b/core/udemy.py @@ -1,4 +1,3 @@ -import logging from enum import Enum from selenium.common.exceptions import NoSuchElementException, TimeoutException @@ -8,9 +7,10 @@ from selenium.webdriver.support.ui import WebDriverWait from core.exceptions import RobotException +from core.logging import get_logger from core.settings import Settings -logger = logging.getLogger("udemy_enroller") +logger = get_logger() class UdemyStatus(Enum): diff --git a/core/utils.py b/core/utils.py index 628a07e..4f183da 100644 --- a/core/utils.py +++ b/core/utils.py @@ -1,92 +1,15 @@ -import asyncio -import logging -from typing import Union +import os -from selenium.common.exceptions import ( - NoSuchElementException, - TimeoutException, - WebDriverException, -) -from selenium.webdriver.remote.webdriver import WebDriver -from core import CourseCache, ScraperManager, Settings, UdemyActions, exceptions - -logger = logging.getLogger("udemy_enroller") - - -def _redeem_courses( - driver: WebDriver, - settings: Settings, - scrapers: ScraperManager, -) -> None: +def get_app_dir() -> str: """ - Method to scrape courses from tutorialbar.com and enroll in them on udemy + Gets the app directory where all data related to the script is stored - :param WebDriver driver: Webdriver used to enroll in Udemy courses - :param Settings settings: Core settings used for Udemy - :param ScraperManager scrapers: :return: """ - cache = CourseCache() - udemy_actions = UdemyActions(driver, settings) - udemy_actions.login() # login once outside while loop - loop = asyncio.get_event_loop() - - while True: - udemy_course_links = loop.run_until_complete(scrapers.run()) - - if udemy_course_links: - for course_link in udemy_course_links: - try: - if course_link not in cache: - status = udemy_actions.redeem(course_link) - cache.add(course_link, status) - else: - logger.debug(f"In cache: {course_link}") - except NoSuchElementException as e: - logger.error(e) - except TimeoutException: - logger.error(f"Timeout on link: {course_link}") - except WebDriverException: - logger.error(f"Webdriver exception on link: {course_link}") - except KeyboardInterrupt: - logger.error("Exiting the script") - raise - except exceptions.RobotException as e: - logger.error(e) - raise - except Exception as e: - logger.error(f"Unexpected exception: {e}") - finally: - if settings.is_ci_build: - logger.info("We have attempted to subscribe to 1 udemy course") - logger.info("Ending test") - return - else: - logger.info("All scrapers complete") - return - - -def redeem_courses( - driver: WebDriver, - settings: Settings, - tutorialbar_enabled: bool, - comidoc_enabled: bool, - max_pages: Union[int, None], -) -> None: - """ - Wrapper of _redeem_courses so we always close browser on completion + app_dir = os.path.join(os.path.expanduser("~"), ".udemy_enroller") - :param WebDriver driver: Webdriver used to enroll in Udemy courses - :param Settings settings: Core settings used for Udemy - :param bool tutorialbar_enabled: Boolean signifying if tutorialbar scraper should run - :param bool comidoc_enabled: Boolean signifying if comidoc scraper should run - :param int max_pages: Max pages to scrape from tutorialbar.com - :return: - """ - try: - scrapers = ScraperManager(tutorialbar_enabled, comidoc_enabled, max_pages) - _redeem_courses(driver, settings, scrapers) - finally: - logger.info("Closing browser") - driver.quit() + if not os.path.isdir(app_dir): + # If the app data dir does not exist create it + os.mkdir(app_dir) + return app_dir diff --git a/logconfig.ini b/logconfig.ini index 5e628ec..08ff049 100644 --- a/logconfig.ini +++ b/logconfig.ini @@ -19,7 +19,7 @@ qualname=udemy_enroller propagate=0 [handler_defaultHandler] -class=FileHandler +class=core.logging.CustomFileHandler formatter=defaultFormatter args=("app.log", "a") diff --git a/udemy_enroller.py b/udemy_enroller.py index e5d8663..605e488 100644 --- a/udemy_enroller.py +++ b/udemy_enroller.py @@ -4,9 +4,10 @@ from typing import Tuple, Union from core import ALL_VALID_BROWSER_STRINGS, DriverManager, Settings -from core.utils import redeem_courses +from core.logging import get_logger +from core.runner import redeem_courses -logger = logging.getLogger("udemy_enroller") +logger = get_logger() def enable_debug_logging() -> None: @@ -16,8 +17,8 @@ def enable_debug_logging() -> None: :return: None """ logger.setLevel(logging.DEBUG) - for handlers in logger.handlers: - handlers.setLevel(logging.DEBUG) + for handler in logger.handlers: + handler.setLevel(logging.DEBUG) logger.info(f"Enabled debug logging") From c5a868f23d60924ab3943e75c97171533fef9c04 Mon Sep 17 00:00:00 2001 From: cullzie Date: Wed, 16 Dec 2020 16:38:05 +0000 Subject: [PATCH 17/37] Updating tests to pass --- tests/conftest.py | 14 +++++++++----- tests/core/test_settings.py | 3 ++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 0f306f9..045f1cb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,17 +3,21 @@ import pytest +from core.utils import get_app_dir + @pytest.fixture(scope="session", autouse=True) def test_file_dir(): + app_dir = get_app_dir() test_file_dir = "test_tmp" + full_dir = os.path.join(app_dir, test_file_dir) # Try to delete directory in case it wasn't deleted after last test run - if os.path.isdir(test_file_dir): - shutil.rmtree(test_file_dir) - yield os.mkdir(test_file_dir) + if os.path.isdir(full_dir): + shutil.rmtree(full_dir) + yield os.mkdir(full_dir) # Delete directory after all tests completed - if os.path.isdir(test_file_dir): - shutil.rmtree(test_file_dir) + if os.path.isdir(full_dir): + shutil.rmtree(full_dir) @pytest.fixture() diff --git a/tests/core/test_settings.py b/tests/core/test_settings.py index 5fdcd12..db4e88b 100644 --- a/tests/core/test_settings.py +++ b/tests/core/test_settings.py @@ -5,6 +5,7 @@ from ruamel.yaml import YAML from core import Settings +from core.utils import get_app_dir @pytest.mark.parametrize( @@ -59,7 +60,7 @@ def test_settings(email, password, zip_code, languages, categories, save, file_n "builtins.input", side_effect=[email, zip_code, languages, categories, save] ): with mock.patch("getpass.getpass", return_value=password): - settings_path = f"test_tmp/{file_name}" + settings_path = os.path.join(get_app_dir(), f"test_tmp/{file_name}") settings = Settings(settings_path) assert settings.email == email assert settings.password == password From 3456563638fb881db5c92850fe07cfe7665e6385 Mon Sep 17 00:00:00 2001 From: cullzie Date: Thu, 17 Dec 2020 12:41:32 +0000 Subject: [PATCH 18/37] Fixes for comidoc API --- core/scrapers/comidoc.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/core/scrapers/comidoc.py b/core/scrapers/comidoc.py index e6014b3..049a6db 100644 --- a/core/scrapers/comidoc.py +++ b/core/scrapers/comidoc.py @@ -52,15 +52,14 @@ async def run(self) -> List: return await self.get_links() async def get_links(self) -> List: - links = [] # TODO: Add try/except block to handle connection issues data = await self.get_data() - if data: - self.set_state_complete() - links = [ - f"https://www.udemy.com/course{d['course']['cleanUrl']}?couponCode={d['code']}" - for d in data - ] + + self.set_state_complete() + links = [ + f"https://www.udemy.com/course{d['course']['cleanUrl']}?couponCode={d['code']}" + for d in data + ] return links @@ -90,9 +89,17 @@ async def get_data(self) -> Dict: async with session.post( url, headers=self.HEADERS, data=json.dumps(payload) ) as response: - data = await response.json() + if response.ok: + data = await response.read() + else: + logger.error(f"Response not ok comidoc: {response}") - return data["data"]["coupons"] if data else {} + if data: + data = json.loads(data)["data"]["coupons"] + else: + data = {} + logger.warning(f"Empty response from comidoc. API may have changed") + return data async def set_api_key(self) -> None: """ @@ -113,11 +120,9 @@ async def set_api_key(self) -> None: # so we gather all js files imported at top of the page for i in soup.find_all("script"): src = i.get("src", "") - if ( - src.startswith("https://cdn.comidoc.net/_next/static/chunks/") - and src.endswith(".js") - and "-" not in src - ): + if src.startswith( + "https://cdn.comidoc.net/_next/static/chunks/pages/_app" + ) and src.endswith(".js"): js_links.append(src) # Loop through js files until we get the X-API-KEY From 3b329a21f3ce2daac93264ad206487b96a75b05b Mon Sep 17 00:00:00 2001 From: cullzie Date: Thu, 17 Dec 2020 15:53:51 +0000 Subject: [PATCH 19/37] Login error handling, another comidoc fix and already enrolled fix --- core/exceptions.py | 8 +++++ core/runner.py | 2 ++ core/scrapers/comidoc.py | 73 +++++++++++++--------------------------- core/udemy.py | 30 +++++++++++------ 4 files changed, 53 insertions(+), 60 deletions(-) diff --git a/core/exceptions.py b/core/exceptions.py index 68821fb..ba632a2 100644 --- a/core/exceptions.py +++ b/core/exceptions.py @@ -4,3 +4,11 @@ class RobotException(Exception): """ pass + + +class LoginException(Exception): + """ + You have failed to login to the Udemy site + """ + + pass diff --git a/core/runner.py b/core/runner.py index 085f1fe..04c8121 100644 --- a/core/runner.py +++ b/core/runner.py @@ -87,6 +87,8 @@ def redeem_courses( try: scrapers = ScraperManager(tutorialbar_enabled, comidoc_enabled, max_pages) _redeem_courses(driver, settings, scrapers) + except exceptions.LoginException as e: + logger.error(str(e)) finally: logger.info("Closing browser") driver.quit() diff --git a/core/scrapers/comidoc.py b/core/scrapers/comidoc.py index e6014b3..8c81e61 100644 --- a/core/scrapers/comidoc.py +++ b/core/scrapers/comidoc.py @@ -1,7 +1,5 @@ -import datetime import json import logging -import re from typing import Dict, List import aiohttp @@ -34,12 +32,11 @@ class ComidocScraper(BaseScraper): "cookie": "consent=true", } - def __init__(self, enabled, days_offset=5): + def __init__(self, enabled): super().__init__() self.scraper_name = "comidoc" if not enabled: self.set_state_disabled() - self.days_offset = days_offset # Query the past x days coupons @BaseScraper.time_run async def run(self) -> List: @@ -48,7 +45,6 @@ async def run(self) -> List: :return: List of udemy course links """ - await self.set_api_key() return await self.get_links() async def get_links(self) -> List: @@ -71,61 +67,38 @@ async def get_data(self) -> Dict: :return: dictionary containing data needed to build udemy free urls """ - url = f"{self.DOMAIN}/beta" - payload = { - "query": "query DAILY_COURSES_QUERY($myDate: DateTime) { coupons: " - 'coupons( where: { isValid: true createdAt_gte: $myDate discountValue_starts_with: "100%" } ' - "orderBy: createdAt_DESC) { code isValid createdAt discountValue discountPrice maxUses " - "remainingUses endTime course { udemyId cleanUrl createdAt updatedAt detail(last: 1) { title " - "isPaid lenghtTxt rating updated subscribers locale { locale } } } } }", - "variables": { - "myDate": ( - datetime.datetime.utcnow() - - datetime.timedelta(days=self.days_offset) - ).strftime("%Y-%m-%dT%H") - }, - } - - async with aiohttp.ClientSession() as session: - async with session.post( - url, headers=self.HEADERS, data=json.dumps(payload) - ) as response: - data = await response.json() - - return data["data"]["coupons"] if data else {} - - async def set_api_key(self) -> None: - """ - Retrieves the api key from comidoc and updates our Headers with that value - - :return: None - """ async with aiohttp.ClientSession() as session: async with session.get( self.DOMAIN, headers=self.HEADERS, ) as response: text = await response.read() - js_links = [] soup = BeautifulSoup(text.decode("utf-8"), "html.parser") - # There is no way to identify which js file has the api key - # so we gather all js files imported at top of the page + # We get the url hash needed from the path of the _buildManifest.js file + path_js = None for i in soup.find_all("script"): src = i.get("src", "") - if ( - src.startswith("https://cdn.comidoc.net/_next/static/chunks/") - and src.endswith(".js") - and "-" not in src + if src.startswith("https://cdn.comidoc.net/_next/static/") and src.endswith( + "_buildManifest.js" ): - js_links.append(src) + path_js = src.split("/")[-2] + break + + data = {} + # Fetch the daily courses if the path has been correctly resolved + if path_js is not None: + daily_json_link = f"{self.DOMAIN}/_next/data/{path_js}/daily.json" - # Loop through js files until we get the X-API-KEY - for js_url in reversed(js_links): async with aiohttp.ClientSession() as session: - async with session.get(js_url) as response: - text = await response.read() - if "X-API-KEY" in str(text): - match = re.search('(?<=X-API-KEY":")(.*?)(?=")', str(text)) - self.HEADERS["x-api-key"] = match.group() - break + async with session.get( + daily_json_link, + headers=self.HEADERS, + ) as response: + data = await response.read() + if data: + data = json.loads(data)["pageProps"]["coupons"] + else: + data = {} + logger.warning(f"Empty response from comidoc. API may have changed!") + return data diff --git a/core/udemy.py b/core/udemy.py index f316019..e69ed39 100644 --- a/core/udemy.py +++ b/core/udemy.py @@ -6,7 +6,7 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait -from core.exceptions import RobotException +from core.exceptions import LoginException, RobotException from core.logging import get_logger from core.settings import Settings @@ -58,7 +58,7 @@ def login(self, is_retry=False) -> None: is_robot = self._check_if_robot() if is_robot and not is_retry: input( - "Please solve the captcha before proceeding. Hit enter once solved " + "Before login. Please solve the captcha before proceeding. Hit enter once solved " ) self.login(is_retry=True) return @@ -67,9 +67,21 @@ def login(self, is_retry=False) -> None: raise e else: user_dropdown_xpath = "//a[@data-purpose='user-dropdown']" - WebDriverWait(self.driver, 10).until( - EC.presence_of_element_located((By.XPATH, user_dropdown_xpath)) - ) + try: + WebDriverWait(self.driver, 10).until( + EC.presence_of_element_located((By.XPATH, user_dropdown_xpath)) + ) + except TimeoutException: + is_robot = self._check_if_robot() + if is_robot and not is_retry: + input( + "After login. Please solve the captcha before proceeding. Hit enter once solved " + ) + if self._check_if_robot(): + raise RobotException("I am a bot!") + self.logged_in = True + return + raise LoginException("Udemy user failed to login") self.logged_in = True def redeem(self, url: str) -> str: @@ -122,11 +134,9 @@ def redeem(self, url: str) -> str: EC.element_to_be_clickable((By.XPATH, buy_course_button_xpath)) ) - # Check if already enrolled - already_purchased_xpath = ( - "//div[starts-with(@class, 'buy-box--purchased-text-banner')]" - ) - if self.driver.find_elements_by_xpath(already_purchased_xpath): + # Check if already enrolled. If add to cart is available we have not yet enrolled + add_to_cart_xpath = "//div[@data-purpose='add-to-cart']" + if not self.driver.find_elements_by_xpath(add_to_cart_xpath): logger.debug(f"Already enrolled in {course_name}") return UdemyStatus.ENROLLED.value From 5f8c7dc1b8e910e9aae7be8181f7b21c6cf33ff9 Mon Sep 17 00:00:00 2001 From: cullzie Date: Thu, 17 Dec 2020 16:12:57 +0000 Subject: [PATCH 20/37] Apply black after merging upstream/develop --- core/scrapers/comidoc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/core/scrapers/comidoc.py b/core/scrapers/comidoc.py index 0c0b7a2..631cf56 100644 --- a/core/scrapers/comidoc.py +++ b/core/scrapers/comidoc.py @@ -66,7 +66,6 @@ async def get_data(self) -> Dict: :return: dictionary containing data needed to build udemy free urls """ - async with aiohttp.ClientSession() as session: async with session.get( self.DOMAIN, From 9dbf89b7bcf46697860ab6d1641a9a9e44c3b2dc Mon Sep 17 00:00:00 2001 From: Fake ID Date: Fri, 18 Dec 2020 20:17:17 +0600 Subject: [PATCH 21/37] Trying to see how it runs without --debug --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index bd902d3..631ef3b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -56,4 +56,4 @@ jobs: UDEMY_PASSWORD: ${{ secrets.UDEMY_PASSWORD }} CI_TEST: "True" run: | - poetry run python udemy_enroller.py --browser=chrome --debug + poetry run python udemy_enroller.py --browser=chrome From 659654c35033bd8dd65fc13137424c263b65f538 Mon Sep 17 00:00:00 2001 From: cullzie Date: Fri, 18 Dec 2020 21:03:11 +0000 Subject: [PATCH 22/37] Check if element is dispalyed --- core/udemy.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/udemy.py b/core/udemy.py index e69ed39..364cdf0 100644 --- a/core/udemy.py +++ b/core/udemy.py @@ -136,7 +136,10 @@ def redeem(self, url: str) -> str: # Check if already enrolled. If add to cart is available we have not yet enrolled add_to_cart_xpath = "//div[@data-purpose='add-to-cart']" - if not self.driver.find_elements_by_xpath(add_to_cart_xpath): + add_to_cart_elements = self.driver.find_elements_by_xpath(add_to_cart_xpath) + if not add_to_cart_elements or ( + add_to_cart_elements and not add_to_cart_elements[0].is_displayed() + ): logger.debug(f"Already enrolled in {course_name}") return UdemyStatus.ENROLLED.value From 9645f7277b2bf45fbd05b03d1bbe77283d160f08 Mon Sep 17 00:00:00 2001 From: cullzie Date: Fri, 18 Dec 2020 21:21:17 +0000 Subject: [PATCH 23/37] Add exception around scraper main method --- core/scrapers/base_scraper.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/scrapers/base_scraper.py b/core/scrapers/base_scraper.py index ce6fa37..0b8a37e 100644 --- a/core/scrapers/base_scraper.py +++ b/core/scrapers/base_scraper.py @@ -62,7 +62,11 @@ def should_run(self): def time_run(func): async def wrapper(self): start_time = datetime.datetime.utcnow() - response = await func(self) + try: + response = await func(self) + except Exception as e: + logger.error(f"Error while running {self.scraper_name} scrapper: {e}") + self.is_complete() end_time = datetime.datetime.utcnow() logger.info( f"Got {len(response)} links from {self.DOMAIN} in {(end_time - start_time).total_seconds():.2f} seconds" From 8a7c82843bf3d06aa1bea7e3d6a5e5c678c8b154 Mon Sep 17 00:00:00 2001 From: cullzie Date: Fri, 18 Dec 2020 21:48:31 +0000 Subject: [PATCH 24/37] Adding accept-language to headless options --- core/driver_manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/core/driver_manager.py b/core/driver_manager.py index a4ca600..d10d72e 100644 --- a/core/driver_manager.py +++ b/core/driver_manager.py @@ -86,6 +86,7 @@ def _build_ci_options_chrome(): # We need to run headless when using github CI options.add_argument("--headless") options.add_argument("user-agent={0}".format(user_agent)) + options.add_argument("accept-language=en-GB,en-US;q=0.9,en;q=0.8") options.add_argument("--window-size=1325x744") logger.info("This is a CI run") return options From 34f02553f605646e94338edb901d789efd9a1203 Mon Sep 17 00:00:00 2001 From: Fake ID Date: Sat, 19 Dec 2020 18:25:51 +0600 Subject: [PATCH 25/37] Added debug back --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 631ef3b..bd902d3 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -56,4 +56,4 @@ jobs: UDEMY_PASSWORD: ${{ secrets.UDEMY_PASSWORD }} CI_TEST: "True" run: | - poetry run python udemy_enroller.py --browser=chrome + poetry run python udemy_enroller.py --browser=chrome --debug From a326046d77c239203034948e3dd2d4ab3e306f16 Mon Sep 17 00:00:00 2001 From: Fake ID Date: Sat, 19 Dec 2020 18:25:51 +0600 Subject: [PATCH 26/37] Revert "Added debug back" This reverts commit 34f02553f605646e94338edb901d789efd9a1203. --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index bd902d3..631ef3b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -56,4 +56,4 @@ jobs: UDEMY_PASSWORD: ${{ secrets.UDEMY_PASSWORD }} CI_TEST: "True" run: | - poetry run python udemy_enroller.py --browser=chrome --debug + poetry run python udemy_enroller.py --browser=chrome From a247328909275a89798395ee86564eb6a2563372 Mon Sep 17 00:00:00 2001 From: cullzie Date: Thu, 24 Dec 2020 10:46:33 +0000 Subject: [PATCH 27/37] Fix non-latin url issue --- core/http.py | 21 ++++++++++++++++++++ core/scrapers/base_scraper.py | 1 + core/scrapers/comidoc.py | 17 ++++------------- core/scrapers/tutorialbar.py | 36 ++++++++++++++++------------------- 4 files changed, 42 insertions(+), 33 deletions(-) create mode 100644 core/http.py diff --git a/core/http.py b/core/http.py new file mode 100644 index 0000000..547a1fd --- /dev/null +++ b/core/http.py @@ -0,0 +1,21 @@ +import aiohttp +import logging + +logger = logging.getLogger("udemy_enroller") + + +async def get(url, headers={}): + """ + Send REST get request to the url passed in + + :param url: The Url to get call get request on + :param headers: The headers to pass with the get request + :return: data if any exists + """ + try: + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=headers) as response: + text = await response.read() + return text + except Exception as e: + logger.error(f"Error in get request: {e}") diff --git a/core/scrapers/base_scraper.py b/core/scrapers/base_scraper.py index 0b8a37e..4df40ce 100644 --- a/core/scrapers/base_scraper.py +++ b/core/scrapers/base_scraper.py @@ -67,6 +67,7 @@ async def wrapper(self): except Exception as e: logger.error(f"Error while running {self.scraper_name} scrapper: {e}") self.is_complete() + return [] end_time = datetime.datetime.utcnow() logger.info( f"Got {len(response)} links from {self.DOMAIN} in {(end_time - start_time).total_seconds():.2f} seconds" diff --git a/core/scrapers/comidoc.py b/core/scrapers/comidoc.py index 631cf56..c18b98c 100644 --- a/core/scrapers/comidoc.py +++ b/core/scrapers/comidoc.py @@ -6,6 +6,7 @@ from bs4 import BeautifulSoup from core.scrapers.base_scraper import BaseScraper +from core.http import get logger = logging.getLogger("udemy_enroller") @@ -66,12 +67,7 @@ async def get_data(self) -> Dict: :return: dictionary containing data needed to build udemy free urls """ - async with aiohttp.ClientSession() as session: - async with session.get( - self.DOMAIN, - headers=self.HEADERS, - ) as response: - text = await response.read() + text = await get(self.DOMAIN, headers=self.HEADERS) soup = BeautifulSoup(text.decode("utf-8"), "html.parser") # We get the url hash needed from the path of the _buildManifest.js file @@ -88,14 +84,9 @@ async def get_data(self) -> Dict: # Fetch the daily courses if the path has been correctly resolved if path_js is not None: daily_json_link = f"{self.DOMAIN}/_next/data/{path_js}/daily.json" + data = await get(daily_json_link, headers=self.HEADERS) - async with aiohttp.ClientSession() as session: - async with session.get( - daily_json_link, - headers=self.HEADERS, - ) as response: - data = await response.read() - if data: + if data is not None: data = json.loads(data)["pageProps"]["coupons"] else: data = {} diff --git a/core/scrapers/tutorialbar.py b/core/scrapers/tutorialbar.py index 125442f..34f17a8 100644 --- a/core/scrapers/tutorialbar.py +++ b/core/scrapers/tutorialbar.py @@ -2,10 +2,10 @@ import logging from typing import List -import aiohttp from bs4 import BeautifulSoup from core.scrapers.base_scraper import BaseScraper +from core.http import get logger = logging.getLogger("udemy_enroller") @@ -99,21 +99,18 @@ async def get_course_links(self, url: str) -> List: :param str url: The url to scrape data from :return: list of pages on tutorialbar.com that contain Udemy coupons """ + text = await get(url) + if text is not None: + soup = BeautifulSoup(text.decode("utf-8"), "html.parser") - async with aiohttp.ClientSession() as session: - async with session.get(url) as response: - text = await response.read() + links = soup.find_all("h3") + course_links = [link.find("a").get("href") for link in links] - soup = BeautifulSoup(text.decode("utf-8"), "html.parser") + self.last_page = ( + soup.find("li", class_="next_paginate_link").find_previous_sibling().text + ) - links = soup.find_all("h3") - course_links = [link.find("a").get("href") for link in links] - - self.last_page = ( - soup.find("li", class_="next_paginate_link").find_previous_sibling().text - ) - - return course_links + return course_links @staticmethod async def get_udemy_course_link(url: str) -> str: @@ -124,12 +121,11 @@ async def get_udemy_course_link(url: str) -> str: :return: Coupon link of the udemy course """ - async with aiohttp.ClientSession() as session: - async with session.get(url) as response: - text = await response.read() - soup = BeautifulSoup(text.decode("utf-8"), "html.parser") - udemy_link = soup.find("span", class_="rh_button_wrapper").find("a").get("href") - return udemy_link + text = await get(url) + if text is not None: + soup = BeautifulSoup(text.decode("utf-8"), "html.parser") + udemy_link = soup.find("span", class_="rh_button_wrapper").find("a").get("href") + return udemy_link async def gather_udemy_course_links(self, courses: List[str]): """ @@ -138,4 +134,4 @@ async def gather_udemy_course_links(self, courses: List[str]): :param list courses: A list of tutorialbar.com course links we want to fetch the udemy links for :return: list of udemy links """ - return await asyncio.gather(*map(self.get_udemy_course_link, courses)) + return [link for link in await asyncio.gather(*map(self.get_udemy_course_link, courses)) if link is not None] From 0d9c0ddaf279004383672bb6dda1c68b3c7ef272 Mon Sep 17 00:00:00 2001 From: cullzie Date: Thu, 24 Dec 2020 10:57:03 +0000 Subject: [PATCH 28/37] Run through black and isort --- core/http.py | 3 ++- core/scrapers/comidoc.py | 2 +- core/scrapers/tutorialbar.py | 16 ++++++++++++---- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/core/http.py b/core/http.py index 547a1fd..970f159 100644 --- a/core/http.py +++ b/core/http.py @@ -1,6 +1,7 @@ -import aiohttp import logging +import aiohttp + logger = logging.getLogger("udemy_enroller") diff --git a/core/scrapers/comidoc.py b/core/scrapers/comidoc.py index c18b98c..e42028e 100644 --- a/core/scrapers/comidoc.py +++ b/core/scrapers/comidoc.py @@ -5,8 +5,8 @@ import aiohttp from bs4 import BeautifulSoup -from core.scrapers.base_scraper import BaseScraper from core.http import get +from core.scrapers.base_scraper import BaseScraper logger = logging.getLogger("udemy_enroller") diff --git a/core/scrapers/tutorialbar.py b/core/scrapers/tutorialbar.py index 34f17a8..cd31992 100644 --- a/core/scrapers/tutorialbar.py +++ b/core/scrapers/tutorialbar.py @@ -4,8 +4,8 @@ from bs4 import BeautifulSoup -from core.scrapers.base_scraper import BaseScraper from core.http import get +from core.scrapers.base_scraper import BaseScraper logger = logging.getLogger("udemy_enroller") @@ -107,7 +107,9 @@ async def get_course_links(self, url: str) -> List: course_links = [link.find("a").get("href") for link in links] self.last_page = ( - soup.find("li", class_="next_paginate_link").find_previous_sibling().text + soup.find("li", class_="next_paginate_link") + .find_previous_sibling() + .text ) return course_links @@ -124,7 +126,9 @@ async def get_udemy_course_link(url: str) -> str: text = await get(url) if text is not None: soup = BeautifulSoup(text.decode("utf-8"), "html.parser") - udemy_link = soup.find("span", class_="rh_button_wrapper").find("a").get("href") + udemy_link = ( + soup.find("span", class_="rh_button_wrapper").find("a").get("href") + ) return udemy_link async def gather_udemy_course_links(self, courses: List[str]): @@ -134,4 +138,8 @@ async def gather_udemy_course_links(self, courses: List[str]): :param list courses: A list of tutorialbar.com course links we want to fetch the udemy links for :return: list of udemy links """ - return [link for link in await asyncio.gather(*map(self.get_udemy_course_link, courses)) if link is not None] + return [ + link + for link in await asyncio.gather(*map(self.get_udemy_course_link, courses)) + if link is not None + ] From c6a175a7b324c71fa0603429c0d7623c965286f6 Mon Sep 17 00:00:00 2001 From: cullzie Date: Tue, 29 Dec 2020 22:52:21 +0000 Subject: [PATCH 29/37] Package the repo using setup.py --- core/runner.py | 4 +- core/scrapers/comidoc.py | 53 +++++++++------- scripts/__init__.py | 0 scripts/udemy_enroller.py | 116 ++++++++++++++++++++++++++++++++++ setup.py | 40 ++++++++++++ tests/test_udemy_enroller.py | 2 +- udemy_enroller.py | 118 +---------------------------------- 7 files changed, 189 insertions(+), 144 deletions(-) create mode 100644 scripts/__init__.py create mode 100644 scripts/udemy_enroller.py create mode 100644 setup.py diff --git a/core/runner.py b/core/runner.py index 04c8121..5267a71 100644 --- a/core/runner.py +++ b/core/runner.py @@ -51,10 +51,10 @@ def _redeem_courses( logger.error(f"Webdriver exception on link: {course_link}") except KeyboardInterrupt: logger.error("Exiting the script") - raise + return except exceptions.RobotException as e: logger.error(e) - raise + return except Exception as e: logger.error(f"Unexpected exception: {e}") finally: diff --git a/core/scrapers/comidoc.py b/core/scrapers/comidoc.py index e42028e..04a8f07 100644 --- a/core/scrapers/comidoc.py +++ b/core/scrapers/comidoc.py @@ -2,7 +2,6 @@ import logging from typing import Dict, List -import aiohttp from bs4 import BeautifulSoup from core.http import get @@ -68,27 +67,33 @@ async def get_data(self) -> Dict: """ text = await get(self.DOMAIN, headers=self.HEADERS) - soup = BeautifulSoup(text.decode("utf-8"), "html.parser") - - # We get the url hash needed from the path of the _buildManifest.js file - path_js = None - for i in soup.find_all("script"): - src = i.get("src", "") - if src.startswith("https://cdn.comidoc.net/_next/static/") and src.endswith( - "_buildManifest.js" - ): - path_js = src.split("/")[-2] - break - - data = {} - # Fetch the daily courses if the path has been correctly resolved - if path_js is not None: - daily_json_link = f"{self.DOMAIN}/_next/data/{path_js}/daily.json" - data = await get(daily_json_link, headers=self.HEADERS) - - if data is not None: - data = json.loads(data)["pageProps"]["coupons"] - else: - data = {} - logger.warning(f"Empty response from comidoc. API may have changed!") + if text is not None: + soup = BeautifulSoup(text.decode("utf-8"), "html.parser") + + # We get the url hash needed from the path of the _buildManifest.js file + path_js = None + for i in soup.find_all("script"): + src = i.get("src", "") + if src.startswith( + "https://cdn.comidoc.net/_next/static/" + ) and src.endswith("_buildManifest.js"): + path_js = src.split("/")[-2] + break + + data = {} + # Fetch the daily courses if the path has been correctly resolved + if path_js is not None: + daily_json_link = f"{self.DOMAIN}/_next/data/{path_js}/daily.json" + data = await get(daily_json_link, headers=self.HEADERS) + + if data is not None: + data = json.loads(data)["pageProps"]["coupons"] + else: + data = {} + logger.warning( + f"Empty response from comidoc. API may have changed!" + ) + else: + data = {} + logger.warning("Error while fetching data from comidoc") return data diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/udemy_enroller.py b/scripts/udemy_enroller.py new file mode 100644 index 0000000..77249fb --- /dev/null +++ b/scripts/udemy_enroller.py @@ -0,0 +1,116 @@ +import argparse +import logging +from argparse import Namespace +from typing import Tuple, Union + +from core import ALL_VALID_BROWSER_STRINGS, DriverManager, Settings +from core.logging import get_logger +from core.runner import redeem_courses + +logger = get_logger() + + +def enable_debug_logging() -> None: + """ + Enable debug logging for the scripts + + :return: None + """ + logger.setLevel(logging.DEBUG) + for handler in logger.handlers: + handler.setLevel(logging.DEBUG) + logger.info(f"Enabled debug logging") + + +def determine_if_scraper_enabled( + tutorialbar_enabled: bool, + comidoc_enabled: bool, +) -> Tuple[bool, bool]: + """ + Determine what scrapers should be enabled and disabled + + :return: tuple containing boolean of what scrapers should run + """ + if not tutorialbar_enabled and not comidoc_enabled: + # Set both to True since user has not enabled a specific scraper i.e Run all scrapers + tutorialbar_enabled, comidoc_enabled = True, True + return tutorialbar_enabled, comidoc_enabled + + +def run( + browser: str, + tutorialbar_enabled: bool, + comidoc_enabled: bool, + max_pages: Union[int, None], +): + """ + Run the udemy enroller script + + :param str browser: Name of the browser we want to create a driver for + :param bool tutorialbar_enabled: + :param bool comidoc_enabled: + :param int or None max_pages: Max number of pages to scrape from tutorialbar.com + :return: + """ + settings = Settings() + dm = DriverManager(browser=browser, is_ci_build=settings.is_ci_build) + redeem_courses(dm.driver, settings, tutorialbar_enabled, comidoc_enabled, max_pages) + + +def parse_args(browser=None) -> Namespace: + """ + Parse args from the CLI or use the args passed in + + :param str browser: Name of the browser we want to create a driver for + :return: Args to be used in the script + """ + parser = argparse.ArgumentParser(description="Udemy Enroller") + + parser.add_argument( + "--browser", + type=str, + default=browser, + choices=ALL_VALID_BROWSER_STRINGS, + help="Browser to use for Udemy Enroller", + ) + parser.add_argument( + "--tutorialbar", + action="store_true", + default=False, + help="Run tutorialbar scraper", + ) + parser.add_argument( + "--comidoc", + action="store_true", + default=False, + help="Run comidoc scraper", + ) + parser.add_argument( + "--max-pages", + type=int, + default=5, + help=f"Max pages to scrape from tutorialbar (Default is 5)", + ) + parser.add_argument( + "--debug", + action="store_true", + help="Enable debug logging", + ) + + args = parser.parse_args() + + if args.browser is None: + parser.print_help() + else: + return args + + +def main(): + args = parse_args() + if args: + if args.debug: + enable_debug_logging() + tutorialbar_enabled, comidoc_enabled = determine_if_scraper_enabled( + args.tutorialbar, args.comidoc + ) + run(args.browser, tutorialbar_enabled, comidoc_enabled, args.max_pages) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..af53bc9 --- /dev/null +++ b/setup.py @@ -0,0 +1,40 @@ +import pathlib + +from setuptools import find_packages, setup + +here = pathlib.Path(__file__).parent.resolve() + +long_description = (here / "README.md").read_text(encoding="utf-8") + +with open("requirements.txt") as f: + install_reqs = f.read().splitlines() + +setup( + name="udemy-enroller", + version="2.0.0", + long_description=long_description, + long_description_content_type="text/markdown", + author="", + author_email="", + url="https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE", + classifiers=[ + "Development Status :: 5 - Stable", + "Intended Audience :: Education", + "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)", + "Programming Language :: Python :: 3.8", + ], + keywords="udemy, education, enroll", + packages=find_packages(exclude=["pyproject.toml"]), + python_requires=">=3.8, <4", + install_requires=install_reqs, + setup_requires=["pytest-runner"], + extras_require={ + "dev": ["black", "isort"], + "test": ["pytest", "pytest-cov"], + }, + entry_points={ + "console_scripts": [ + "udemy_enroller=scripts.udemy_enroller:main", + ], + }, +) diff --git a/tests/test_udemy_enroller.py b/tests/test_udemy_enroller.py index 9e478ca..33812f5 100644 --- a/tests/test_udemy_enroller.py +++ b/tests/test_udemy_enroller.py @@ -3,7 +3,7 @@ import pytest -from udemy_enroller import parse_args +from scripts.udemy_enroller import parse_args @pytest.mark.parametrize( diff --git a/udemy_enroller.py b/udemy_enroller.py index 605e488..c982e64 100644 --- a/udemy_enroller.py +++ b/udemy_enroller.py @@ -1,120 +1,4 @@ -import argparse -import logging -from argparse import Namespace -from typing import Tuple, Union - -from core import ALL_VALID_BROWSER_STRINGS, DriverManager, Settings -from core.logging import get_logger -from core.runner import redeem_courses - -logger = get_logger() - - -def enable_debug_logging() -> None: - """ - Enable debug logging for the scripts - - :return: None - """ - logger.setLevel(logging.DEBUG) - for handler in logger.handlers: - handler.setLevel(logging.DEBUG) - logger.info(f"Enabled debug logging") - - -def determine_if_scraper_enabled( - tutorialbar_enabled: bool, - comidoc_enabled: bool, -) -> Tuple[bool, bool]: - """ - Determine what scrapers should be enabled and disabled - - :return: tuple containing boolean of what scrapers should run - """ - if not tutorialbar_enabled and not comidoc_enabled: - # Set both to True since user has not enabled a specific scraper i.e Run all scrapers - tutorialbar_enabled, comidoc_enabled = True, True - return tutorialbar_enabled, comidoc_enabled - - -def run( - browser: str, - tutorialbar_enabled: bool, - comidoc_enabled: bool, - max_pages: Union[int, None], -): - """ - Run the udemy enroller script - - :param str browser: Name of the browser we want to create a driver for - :param bool tutorialbar_enabled: - :param bool comidoc_enabled: - :param int or None max_pages: Max number of pages to scrape from tutorialbar.com - :return: - """ - settings = Settings() - dm = DriverManager(browser=browser, is_ci_build=settings.is_ci_build) - redeem_courses(dm.driver, settings, tutorialbar_enabled, comidoc_enabled, max_pages) - - -def parse_args(browser=None) -> Namespace: - """ - Parse args from the CLI or use the args passed in - - :param str browser: Name of the browser we want to create a driver for - :return: Args to be used in the script - """ - parser = argparse.ArgumentParser(description="Udemy Enroller") - - parser.add_argument( - "--browser", - type=str, - default=browser, - choices=ALL_VALID_BROWSER_STRINGS, - help="Browser to use for Udemy Enroller", - ) - parser.add_argument( - "--tutorialbar", - action="store_true", - default=False, - help="Run tutorialbar scraper", - ) - parser.add_argument( - "--comidoc", - action="store_true", - default=False, - help="Run comidoc scraper", - ) - parser.add_argument( - "--max-pages", - type=int, - default=5, - help=f"Max pages to scrape from tutorialbar (Default is 5)", - ) - parser.add_argument( - "--debug", - action="store_true", - help="Enable debug logging", - ) - - args = parser.parse_args() - - if args.browser is None: - parser.print_help() - else: - return args - - -def main(): - args = parse_args() - if args: - if args.debug: - enable_debug_logging() - tutorialbar_enabled, comidoc_enabled = determine_if_scraper_enabled( - args.tutorialbar, args.comidoc - ) - run(args.browser, tutorialbar_enabled, comidoc_enabled, args.max_pages) - +from scripts.udemy_enroller import main if __name__ == "__main__": main() From df8fbcd1322f1c96f4ced3015b95ac7fcd4eec4c Mon Sep 17 00:00:00 2001 From: cullzie Date: Wed, 6 Jan 2021 18:58:39 +0000 Subject: [PATCH 30/37] Fixing comidoc again --- core/runner.py | 2 +- core/scrapers/base_scraper.py | 29 ++++++++++ core/scrapers/comidoc.py | 99 ++++++++++++++--------------------- core/scrapers/manager.py | 2 +- core/scrapers/tutorialbar.py | 27 ++-------- scripts/udemy_enroller.py | 4 +- 6 files changed, 76 insertions(+), 87 deletions(-) diff --git a/core/runner.py b/core/runner.py index 5267a71..1a65335 100644 --- a/core/runner.py +++ b/core/runner.py @@ -81,7 +81,7 @@ def redeem_courses( :param Settings settings: Core settings used for Udemy :param bool tutorialbar_enabled: Boolean signifying if tutorialbar scraper should run :param bool comidoc_enabled: Boolean signifying if comidoc scraper should run - :param int max_pages: Max pages to scrape from tutorialbar.com + :param int max_pages: Max pages to scrape from sites (if pagination exists) :return: """ try: diff --git a/core/scrapers/base_scraper.py b/core/scrapers/base_scraper.py index 4df40ce..55a06d9 100644 --- a/core/scrapers/base_scraper.py +++ b/core/scrapers/base_scraper.py @@ -16,6 +16,9 @@ class BaseScraper(ABC): def __init__(self): self._state = None self.scraper_name = None + self.max_pages = None + self.last_page = None + self.current_page = 0 @abstractmethod async def run(self): @@ -75,3 +78,29 @@ async def wrapper(self): return response return wrapper + + def max_pages_reached(self) -> bool: + """ + Returns boolean of whether or not we should continue checking tutorialbar.com + + :return: + """ + + should_run = True + + if self.max_pages is not None: + should_run = self.max_pages > self.current_page + + if not should_run: + logger.info( + f"Stopping loop. We have reached max number of pages to scrape: {self.max_pages}" + ) + self.set_state_complete() + + if self.last_page == self.current_page: + logger.info( + f"Stopping loop. We have reached the last page to scrape: {self.last_page}" + ) + self.set_state_complete() + + return should_run diff --git a/core/scrapers/comidoc.py b/core/scrapers/comidoc.py index 04a8f07..bbf540d 100644 --- a/core/scrapers/comidoc.py +++ b/core/scrapers/comidoc.py @@ -1,6 +1,5 @@ -import json import logging -from typing import Dict, List +from typing import List from bs4 import BeautifulSoup @@ -16,27 +15,13 @@ class ComidocScraper(BaseScraper): """ DOMAIN = "https://comidoc.net" - HEADERS = { - "authority": "comidoc.net", - "sec-ch-ua": '"Google Chrome";v="87", " Not;A Brand";v="99", "Chromium";v="87"', - "accept-language": "en-US", - "sec-ch-ua-mobile": "?0", - "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", - "content-type": "application/json", - "accept": "*/*", - "origin": DOMAIN, - "sec-fetch-site": "same-origin", - "sec-fetch-mode": "cors", - "sec-fetch-dest": "empty", - "referer": f"{DOMAIN}/daily", - "cookie": "consent=true", - } - - def __init__(self, enabled): + + def __init__(self, enabled, max_pages=None): super().__init__() self.scraper_name = "comidoc" if not enabled: self.set_state_disabled() + self.max_pages = max_pages @BaseScraper.time_run async def run(self) -> List: @@ -45,55 +30,47 @@ async def run(self) -> List: :return: List of udemy course links """ - return await self.get_links() + links = await self.get_links() + logger.info( + f"Page: {self.current_page} of {self.last_page} scraped from comidoc.net" + ) + self.max_pages_reached() + return links async def get_links(self) -> List: - # TODO: Add try/except block to handle connection issues - data = await self.get_data() + """ + Scrape udemy links from comidoc.net - self.set_state_complete() - links = [ - f"https://www.udemy.com/course{d['course']['cleanUrl']}?couponCode={d['code']}" - for d in data - ] + :return: List of udemy course urls + """ + links = [] + self.current_page += 1 + coupons_data = await get(f"{self.DOMAIN}/coupons?page={self.current_page}") + soup = BeautifulSoup(coupons_data.decode("utf-8"), "html.parser") + for course_card in soup.find_all("div", class_="MuiPaper-root"): + all_links = course_card.find_all("a") + if len(all_links) > 2: + links.append(all_links[2].get("href")) + + self.last_page = self._get_last_page(soup) return links - async def get_data(self) -> Dict: + @staticmethod + def _get_last_page(soup: BeautifulSoup) -> int: """ - Fetch data from comidoc endpoint + Extract the last page number to scrape - :return: dictionary containing data needed to build udemy free urls + :param soup: + :return: The last page number to scrape """ + all_pages = [] + for page_link in soup.find("ul", class_="MuiPagination-ul").find_all("li"): + pagination = page_link.find("a") + + if pagination: + page_number = pagination["aria-label"].split()[-1] + if page_number.isdigit(): + all_pages.append(int(page_number)) - text = await get(self.DOMAIN, headers=self.HEADERS) - if text is not None: - soup = BeautifulSoup(text.decode("utf-8"), "html.parser") - - # We get the url hash needed from the path of the _buildManifest.js file - path_js = None - for i in soup.find_all("script"): - src = i.get("src", "") - if src.startswith( - "https://cdn.comidoc.net/_next/static/" - ) and src.endswith("_buildManifest.js"): - path_js = src.split("/")[-2] - break - - data = {} - # Fetch the daily courses if the path has been correctly resolved - if path_js is not None: - daily_json_link = f"{self.DOMAIN}/_next/data/{path_js}/daily.json" - data = await get(daily_json_link, headers=self.HEADERS) - - if data is not None: - data = json.loads(data)["pageProps"]["coupons"] - else: - data = {} - logger.warning( - f"Empty response from comidoc. API may have changed!" - ) - else: - data = {} - logger.warning("Error while fetching data from comidoc") - return data + return max(all_pages) diff --git a/core/scrapers/manager.py b/core/scrapers/manager.py index 1fecd3b..1b5dc80 100644 --- a/core/scrapers/manager.py +++ b/core/scrapers/manager.py @@ -11,7 +11,7 @@ def __init__(self, tutorialbar_enabled, comidoc_enabled, max_pages): self.tutorialbar_scraper = TutorialBarScraper( tutorialbar_enabled, max_pages=max_pages ) - self.comidoc_scraper = ComidocScraper(comidoc_enabled) + self.comidoc_scraper = ComidocScraper(comidoc_enabled, max_pages=max_pages) self._scrapers = (self.tutorialbar_scraper, self.comidoc_scraper) async def run(self) -> List: diff --git a/core/scrapers/tutorialbar.py b/core/scrapers/tutorialbar.py index cd31992..a5133a9 100644 --- a/core/scrapers/tutorialbar.py +++ b/core/scrapers/tutorialbar.py @@ -23,9 +23,7 @@ def __init__(self, enabled, max_pages=None): self.scraper_name = "tutorialbar" if not enabled: self.set_state_disabled() - self.current_page = 0 self.last_page = None - self.links_per_page = 12 self.max_pages = max_pages @BaseScraper.time_run @@ -40,6 +38,11 @@ async def run(self) -> List: return links async def get_links(self): + """ + Scrape udemy links from tutorialbar.com + + :return: List of udemy course urls + """ self.current_page += 1 course_links = await self.get_course_links( f"{self.DOMAIN}/all-courses/page/{self.current_page}/" @@ -56,26 +59,6 @@ async def get_links(self): return links - def max_pages_reached(self) -> bool: - """ - Returns boolean of whether or not we should continue checking tutorialbar.com - - :return: - """ - - should_run = True - - if self.max_pages is not None: - should_run = self.max_pages > self.current_page - - if not should_run: - logger.info( - f"Stopping loop. We have reached max number of pages to scrape: {self.max_pages}" - ) - self.set_state_complete() - - return should_run - def _filter_ad_domains(self, udemy_links) -> List: """ Filter out any known ad domains from the links scraped diff --git a/scripts/udemy_enroller.py b/scripts/udemy_enroller.py index 77249fb..767dffb 100644 --- a/scripts/udemy_enroller.py +++ b/scripts/udemy_enroller.py @@ -49,7 +49,7 @@ def run( :param str browser: Name of the browser we want to create a driver for :param bool tutorialbar_enabled: :param bool comidoc_enabled: - :param int or None max_pages: Max number of pages to scrape from tutorialbar.com + :param int max_pages: Max pages to scrape from sites (if pagination exists) :return: """ settings = Settings() @@ -89,7 +89,7 @@ def parse_args(browser=None) -> Namespace: "--max-pages", type=int, default=5, - help=f"Max pages to scrape from tutorialbar (Default is 5)", + help=f"Max pages to scrape from sites (if pagination exists) (Default is 5)", ) parser.add_argument( "--debug", From 1d4855e15f8cb4bf982444a50260f36731575adc Mon Sep 17 00:00:00 2001 From: cullzie Date: Wed, 6 Jan 2021 19:09:44 +0000 Subject: [PATCH 31/37] Updating setup.py with needed info --- setup.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index af53bc9..f0f1eab 100644 --- a/setup.py +++ b/setup.py @@ -14,11 +14,12 @@ version="2.0.0", long_description=long_description, long_description_content_type="text/markdown", - author="", - author_email="", + author="aapatre", + author_email="udemyenroller@gmail.com", + maintainer="fakeid cullzie", url="https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE", classifiers=[ - "Development Status :: 5 - Stable", + "Development Status :: 4 - Beta", "Intended Audience :: Education", "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)", "Programming Language :: Python :: 3.8", From 5ab5de2655670d64bc03b29062a78b1e8b85a8cf Mon Sep 17 00:00:00 2001 From: cullzie Date: Wed, 6 Jan 2021 19:30:01 +0000 Subject: [PATCH 32/37] Updating docs and bumping to v2.0.0 --- CHANGELOG.md | 12 ++++++++++++ README.md | 36 ++++++++++++++++-------------------- pyproject.toml | 2 +- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0913438..fdfc2bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2.0.0] - 2021-01-09 + +### Added + +- New coupon source from comidoc.net +- Refactored to have generic scrapers and manager +- Improved performance (asyncio) +- Packaged and published to PyPI +- Added cli args --debug, --tuorialbar, --comidoc +- Removed unpopular cli arg -> --cache-hits +- Write settings/cache to home folder so we can persist settings between versions (installed from PyPI) + ## [1.0.0] - 2020-12-09 ### Added diff --git a/README.md b/README.md index 5d19ca2..942cb59 100644 --- a/README.md +++ b/README.md @@ -64,57 +64,53 @@ get all the requirements installed in one go. Similar instructions applies for p --- ## Instructions - -1 . Make sure to install all the requirements above. +1 . Install from PyPI `pip install udemy_enroller` - Run the script and the cli will guide you through the settings required -- Otherwise you can rename the following file - [sample_settings.yaml](sample_settings.yaml) to **settings.yaml** and edit it - using a text editor and insert your **Udemy registered email in the email - section**, your **Udemy password in the password section**, and the **ZIP Code - in the zipcode section (if you reside in the United States or any other region - where Udemy asks for ZIP Code as Billing Info, else enter a random number)** - Additionally you can add your preferred languages and course categories. +- If you decide to save the settings they will be stored in your home directory:
+**Windows**: + C:/Users/CurrentUserName/.udemy_enroller
+**Linux**: + /home/username/.udemy_enroller -2 . Choose the appropriate file for your browser (from the list below): +2 . Choose the appropriate command for your browser (from the list below): - **Tested and works perfectly:** - Chrome: - [udemy_enroller.py --browser=chrome](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) + [udemy_enroller --browser=chrome](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - Chromium: - [udemy_enroller.py --browser=chromium](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) + [udemy_enroller --browser=chromium](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - Edge: - [udemy_enroller.py --browser=edge](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) + [udemy_enroller --browser=edge](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - **Has issues when run on custom kernel but works fine on vanilla OS:** - Firefox: - [udemy_enroller.py --browser=firefox (might require manual driver installation)](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) + [udemy_enroller --browser=firefox (might require manual driver installation)](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - **Untested:** - Opera: - [udemy_enroller.py --browser=opera](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) + [udemy_enroller --browser=opera](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - **Use at your own risk:** - - Vanilla - Internet Explorer: - [udemy_enroller.py --browser=internet_explorer](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) + [udemy_enroller --browser=internet_explorer](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) 3 . The script can be passed arguments: - `--help`: View full list of arguments available - `--browser=`: Run with a specific browser - `--comidoc`: Run the comidoc scraper only - `--tutorialbar`: Run the tutorialbar scraper only -- `--max-pages=`: Max number of pages to scrape from tutorialbar.com before exiting the script (default is 5) +- `--max-pages=`: Max number of pages to scrape from sites before exiting the script (default is 5) - `--debug`: Enable debug logging 4 . Run the chosen script in terminal like so: -- `python udemy_enroller.py --browser=firefox` +- `udemy_enroller --browser=firefox` 5 . The bot starts scraping the course links from the first **All Courses** page -on [Tutorial Bar](https://www.tutorialbar.com/all-courses/page/1) and starts +on [Tutorial Bar](https://www.tutorialbar.com/all-courses/page/1) and [Comidoc](https://www.comidoc.net/coupons) and starts enrolling you to Udemy courses. After it has enrolled you to courses from the first page, it then moves to the next Tutorial Bar page and the cycle continues. diff --git a/pyproject.toml b/pyproject.toml index 9aa30e7..7efb67c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "automatic-udemy-course-enroller-get-paid-udemy-courses-for-free" -version = "1.0" +version = "2.0.0" description = "" authors = [""] From d55b9ad3ec9e1a1494149bf2b5939dc314902638 Mon Sep 17 00:00:00 2001 From: cullzie Date: Mon, 11 Jan 2021 16:48:42 +0000 Subject: [PATCH 33/37] Fixing comidoc.net --- core/scrapers/base_scraper.py | 17 ++++++++++++++++ core/scrapers/comidoc.py | 37 ++++++++++++++++++++++++++++++++--- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/core/scrapers/base_scraper.py b/core/scrapers/base_scraper.py index 55a06d9..bc86d02 100644 --- a/core/scrapers/base_scraper.py +++ b/core/scrapers/base_scraper.py @@ -1,7 +1,9 @@ import datetime import logging +import re from abc import ABC, abstractmethod from enum import Enum +from typing import Optional logger = logging.getLogger("udemy_enroller") @@ -104,3 +106,18 @@ def max_pages_reached(self) -> bool: self.set_state_complete() return should_run + + @staticmethod + def validate_coupon_url(url) -> Optional[str]: + """ + Validate the udemy coupon url passed in + If it matches the pattern it is returned else it returns None + + :param url: The url to check the udemy coupon pattern for + :return: The validated url or None + """ + url_pattern = r"^https:\/\/www.udemy.com.*couponCode=.*$" + matching = re.match(url_pattern, url) + if matching is not None: + matching = matching.group() + return matching diff --git a/core/scrapers/comidoc.py b/core/scrapers/comidoc.py index bbf540d..b4d69e1 100644 --- a/core/scrapers/comidoc.py +++ b/core/scrapers/comidoc.py @@ -1,3 +1,4 @@ +import asyncio import logging from typing import List @@ -43,19 +44,49 @@ async def get_links(self) -> List: :return: List of udemy course urls """ - links = [] + comidoc_links = [] self.current_page += 1 coupons_data = await get(f"{self.DOMAIN}/coupons?page={self.current_page}") soup = BeautifulSoup(coupons_data.decode("utf-8"), "html.parser") for course_card in soup.find_all("div", class_="MuiPaper-root"): all_links = course_card.find_all("a") - if len(all_links) > 2: - links.append(all_links[2].get("href")) + if len(all_links) == 2: + comidoc_links.append(f"{self.DOMAIN}{all_links[1].get('href')}") + links = await self.gather_udemy_course_links(comidoc_links) self.last_page = self._get_last_page(soup) return links + @classmethod + async def get_udemy_course_link(cls, url: str) -> str: + """ + Gets the udemy course link + + :param str url: The url to scrape data from + :return: Coupon link of the udemy course + """ + + data = await get(url) + soup = BeautifulSoup(data.decode("utf-8"), "html.parser") + for link in soup.find_all("a", href=True): + udemy_link = cls.validate_coupon_url(link["href"]) + if udemy_link is not None: + return udemy_link + + async def gather_udemy_course_links(self, courses: List[str]): + """ + Threaded fetching of the udemy course links from tutorialbar.com + + :param list courses: A list of comidoc.net course links we want to fetch the udemy links for + :return: list of udemy links + """ + return [ + link + for link in await asyncio.gather(*map(self.get_udemy_course_link, courses)) + if link is not None + ] + @staticmethod def _get_last_page(soup: BeautifulSoup) -> int: """ From 763198ac9fd2e5f37ab5f4f52ca0919c3311b971 Mon Sep 17 00:00:00 2001 From: cullzie Date: Thu, 14 Jan 2021 22:56:55 +0000 Subject: [PATCH 34/37] Remove comidoc and add DiscUdemy --- CHANGELOG.md | 4 +- README.md | 13 +++-- core/runner.py | 6 +- core/scrapers/comidoc.py | 2 +- core/scrapers/discudemy.py | 108 +++++++++++++++++++++++++++++++++++ core/scrapers/manager.py | 10 ++-- core/scrapers/tutorialbar.py | 2 +- scripts/udemy_enroller.py | 26 +++++---- 8 files changed, 142 insertions(+), 29 deletions(-) create mode 100644 core/scrapers/discudemy.py diff --git a/CHANGELOG.md b/CHANGELOG.md index fdfc2bf..495c772 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,11 +9,11 @@ and this project adheres to ### Added -- New coupon source from comidoc.net +- New coupon source from discudemy.com - Refactored to have generic scrapers and manager - Improved performance (asyncio) - Packaged and published to PyPI -- Added cli args --debug, --tuorialbar, --comidoc +- Added cli args --debug, --tutorialbar, --discudemy - Removed unpopular cli arg -> --cache-hits - Write settings/cache to home folder so we can persist settings between versions (installed from PyPI) diff --git a/README.md b/README.md index 4d8de09..574d9c9 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,9 @@ Do you want to LEARN NEW STUFF for FREE? Don't worry, with the power of web-scraping and automation, this script will find the necessary Udemy Coupons & enroll you to PAID UDEMY COURSES, ABSOLUTELY FREE! -The code scrapes course links and coupons from -[tutorialbar.com](https://tutorialbar.com) +The code scrapes course links and coupons from: + - [tutorialbar.com](https://tutorialbar.com) + - [discudemy.com](https://discudemy.com) In case of any bugs or issues, please open an issue in github. @@ -101,7 +102,7 @@ get all the requirements installed in one go. Similar instructions applies for p 3 . The script can be passed arguments: - `--help`: View full list of arguments available - `--browser=`: Run with a specific browser -- `--comidoc`: Run the comidoc scraper only +- `--discudemy`: Run the discudemy scraper only - `--tutorialbar`: Run the tutorialbar scraper only - `--max-pages=`: Max number of pages to scrape from sites before exiting the script (default is 5) - `--debug`: Enable debug logging @@ -110,9 +111,9 @@ get all the requirements installed in one go. Similar instructions applies for p - `udemy_enroller --browser=firefox` 5 . The bot starts scraping the course links from the first **All Courses** page -on [Tutorial Bar](https://www.tutorialbar.com/all-courses/page/1) and [Comidoc](https://www.comidoc.net/coupons) and starts +on [Tutorial Bar](https://www.tutorialbar.com/all-courses/page/1) and [DiscUdemy](https://www.discudemy.com/all) and starts enrolling you to Udemy courses. After it has enrolled you to courses from the -first page, it then moves to the next Tutorial Bar page and the cycle continues. +first page, it then moves to the next site page and the cycle continues. - Stop the script by pressing ctrl+c in terminal to stop the enrollment process. @@ -132,7 +133,7 @@ which of course I got for free! :) ### 2. How does the bot work? -The bot retrieves coupon links from Tutorial Bar's list to cut the prices and +The bot retrieves coupon links from Tutorial Bar's and DiscUdemy list to cut the prices and then uses Selenium's Browser automation features to login and enroll to the courses. Think of it this way: Epic Games & other clients like Steam provide you a handful of games each week, for free; Only in this case, we need a coupon code diff --git a/core/runner.py b/core/runner.py index 1a65335..3745288 100644 --- a/core/runner.py +++ b/core/runner.py @@ -71,7 +71,7 @@ def redeem_courses( driver: WebDriver, settings: Settings, tutorialbar_enabled: bool, - comidoc_enabled: bool, + discudemy_enabled: bool, max_pages: Union[int, None], ) -> None: """ @@ -80,12 +80,12 @@ def redeem_courses( :param WebDriver driver: Webdriver used to enroll in Udemy courses :param Settings settings: Core settings used for Udemy :param bool tutorialbar_enabled: Boolean signifying if tutorialbar scraper should run - :param bool comidoc_enabled: Boolean signifying if comidoc scraper should run + :param bool discudemy_enabled: Boolean signifying if discudemy scraper should run :param int max_pages: Max pages to scrape from sites (if pagination exists) :return: """ try: - scrapers = ScraperManager(tutorialbar_enabled, comidoc_enabled, max_pages) + scrapers = ScraperManager(tutorialbar_enabled, discudemy_enabled, max_pages) _redeem_courses(driver, settings, scrapers) except exceptions.LoginException as e: logger.error(str(e)) diff --git a/core/scrapers/comidoc.py b/core/scrapers/comidoc.py index b4d69e1..82ec72e 100644 --- a/core/scrapers/comidoc.py +++ b/core/scrapers/comidoc.py @@ -76,7 +76,7 @@ async def get_udemy_course_link(cls, url: str) -> str: async def gather_udemy_course_links(self, courses: List[str]): """ - Threaded fetching of the udemy course links from tutorialbar.com + Async fetching of the udemy course links from comidoc.net :param list courses: A list of comidoc.net course links we want to fetch the udemy links for :return: list of udemy links diff --git a/core/scrapers/discudemy.py b/core/scrapers/discudemy.py new file mode 100644 index 0000000..a4b253c --- /dev/null +++ b/core/scrapers/discudemy.py @@ -0,0 +1,108 @@ +import asyncio +import logging +from typing import List + +from bs4 import BeautifulSoup + +from core.http import get +from core.scrapers.base_scraper import BaseScraper + +logger = logging.getLogger("udemy_enroller") + + +class DiscUdemyScraper(BaseScraper): + """ + Contains any logic related to scraping of data from discudemy.com + """ + + DOMAIN = "https://discudemy.com" + + def __init__(self, enabled, max_pages=None): + super().__init__() + self.scraper_name = "discudemy" + if not enabled: + self.set_state_disabled() + self.max_pages = max_pages + + @BaseScraper.time_run + async def run(self) -> List: + """ + Called to gather the udemy links + + :return: List of udemy course links + """ + links = await self.get_links() + logger.info( + f"Page: {self.current_page} of {self.last_page} scraped from discudemy.com" + ) + self.max_pages_reached() + return links + + async def get_links(self) -> List: + """ + Scrape udemy links from discudemy.com + + :return: List of udemy course urls + """ + discudemy_links = [] + self.current_page += 1 + coupons_data = await get(f"{self.DOMAIN}/all/{self.current_page}") + soup = BeautifulSoup(coupons_data.decode("utf-8"), "html.parser") + for course_card in soup.find_all("a", class_="card-header"): + url_end = course_card["href"].split("/")[-1] + discudemy_links.append(f"{self.DOMAIN}/go/{url_end}") + + links = await self.gather_udemy_course_links(discudemy_links) + + for counter, course in enumerate(links): + logger.debug(f"Received Link {counter + 1} : {course}") + + self.last_page = self._get_last_page(soup) + + return links + + @classmethod + async def get_udemy_course_link(cls, url: str) -> str: + """ + Gets the udemy course link + + :param str url: The url to scrape data from + :return: Coupon link of the udemy course + """ + + data = await get(url) + soup = BeautifulSoup(data.decode("utf-8"), "html.parser") + for link in soup.find_all("a", href=True): + udemy_link = cls.validate_coupon_url(link["href"]) + if udemy_link is not None: + return udemy_link + + async def gather_udemy_course_links(self, courses: List[str]): + """ + Async fetching of the udemy course links from discudemy.com + + :param list courses: A list of discudemy.com course links we want to fetch the udemy links for + :return: list of udemy links + """ + return [ + link + for link in await asyncio.gather(*map(self.get_udemy_course_link, courses)) + if link is not None + ] + + @staticmethod + def _get_last_page(soup: BeautifulSoup) -> int: + """ + Extract the last page number to scrape + + :param soup: + :return: The last page number to scrape + """ + + return max( + [ + int(i.text) + for i in soup.find("ul", class_="pagination3").find_all("li") + if i.text.isdigit() + ] + ) diff --git a/core/scrapers/manager.py b/core/scrapers/manager.py index 1b5dc80..60b9e6e 100644 --- a/core/scrapers/manager.py +++ b/core/scrapers/manager.py @@ -2,17 +2,19 @@ from functools import reduce from typing import List -from core.scrapers.comidoc import ComidocScraper +from core.scrapers.discudemy import DiscUdemyScraper from core.scrapers.tutorialbar import TutorialBarScraper class ScraperManager: - def __init__(self, tutorialbar_enabled, comidoc_enabled, max_pages): + def __init__(self, tutorialbar_enabled, discudemy_enabled, max_pages): self.tutorialbar_scraper = TutorialBarScraper( tutorialbar_enabled, max_pages=max_pages ) - self.comidoc_scraper = ComidocScraper(comidoc_enabled, max_pages=max_pages) - self._scrapers = (self.tutorialbar_scraper, self.comidoc_scraper) + self.discudemy_scraper = DiscUdemyScraper( + discudemy_enabled, max_pages=max_pages + ) + self._scrapers = (self.tutorialbar_scraper, self.discudemy_scraper) async def run(self) -> List: """ diff --git a/core/scrapers/tutorialbar.py b/core/scrapers/tutorialbar.py index a5133a9..6e09c97 100644 --- a/core/scrapers/tutorialbar.py +++ b/core/scrapers/tutorialbar.py @@ -116,7 +116,7 @@ async def get_udemy_course_link(url: str) -> str: async def gather_udemy_course_links(self, courses: List[str]): """ - Threaded fetching of the udemy course links from tutorialbar.com + Async fetching of the udemy course links from tutorialbar.com :param list courses: A list of tutorialbar.com course links we want to fetch the udemy links for :return: list of udemy links diff --git a/scripts/udemy_enroller.py b/scripts/udemy_enroller.py index 767dffb..c9d8487 100644 --- a/scripts/udemy_enroller.py +++ b/scripts/udemy_enroller.py @@ -24,23 +24,23 @@ def enable_debug_logging() -> None: def determine_if_scraper_enabled( tutorialbar_enabled: bool, - comidoc_enabled: bool, + discudemy_enabled: bool, ) -> Tuple[bool, bool]: """ Determine what scrapers should be enabled and disabled :return: tuple containing boolean of what scrapers should run """ - if not tutorialbar_enabled and not comidoc_enabled: + if not tutorialbar_enabled and not discudemy_enabled: # Set both to True since user has not enabled a specific scraper i.e Run all scrapers - tutorialbar_enabled, comidoc_enabled = True, True - return tutorialbar_enabled, comidoc_enabled + tutorialbar_enabled, discudemy_enabled = True, True + return tutorialbar_enabled, discudemy_enabled def run( browser: str, tutorialbar_enabled: bool, - comidoc_enabled: bool, + discudemy_enabled: bool, max_pages: Union[int, None], ): """ @@ -48,13 +48,15 @@ def run( :param str browser: Name of the browser we want to create a driver for :param bool tutorialbar_enabled: - :param bool comidoc_enabled: + :param bool discudemy_enabled: :param int max_pages: Max pages to scrape from sites (if pagination exists) :return: """ settings = Settings() dm = DriverManager(browser=browser, is_ci_build=settings.is_ci_build) - redeem_courses(dm.driver, settings, tutorialbar_enabled, comidoc_enabled, max_pages) + redeem_courses( + dm.driver, settings, tutorialbar_enabled, discudemy_enabled, max_pages + ) def parse_args(browser=None) -> Namespace: @@ -80,10 +82,10 @@ def parse_args(browser=None) -> Namespace: help="Run tutorialbar scraper", ) parser.add_argument( - "--comidoc", + "--discudemy", action="store_true", default=False, - help="Run comidoc scraper", + help="Run discudemy scraper", ) parser.add_argument( "--max-pages", @@ -110,7 +112,7 @@ def main(): if args: if args.debug: enable_debug_logging() - tutorialbar_enabled, comidoc_enabled = determine_if_scraper_enabled( - args.tutorialbar, args.comidoc + tutorialbar_enabled, discudemy_enabled = determine_if_scraper_enabled( + args.tutorialbar, args.discudemy ) - run(args.browser, tutorialbar_enabled, comidoc_enabled, args.max_pages) + run(args.browser, tutorialbar_enabled, discudemy_enabled, args.max_pages) From 1d771935cc863ed7da41f1cd73dff08be2055ef5 Mon Sep 17 00:00:00 2001 From: cullzie Date: Sat, 16 Jan 2021 00:22:08 +0000 Subject: [PATCH 35/37] Re-organizing for release to pypi --- MANIFEST.in | 1 + core/logging.py | 33 ------------- logconfig.ini | 36 -------------- pyproject.toml | 4 +- scripts/__init__.py | 0 setup.py | 17 ++++--- tests/conftest.py | 2 +- tests/core/scrapers/test_tutorialbar.py | 2 +- tests/core/test_cache.py | 8 ++-- tests/core/test_driver_manager.py | 36 +++++++------- tests/core/test_settings.py | 4 +- tests/test_udemy_enroller.py | 2 +- udemy_enroller.py | 2 +- {core => udemy_enroller}/__init__.py | 0 {core => udemy_enroller}/cache.py | 2 +- .../cli.py | 6 +-- {core => udemy_enroller}/driver_manager.py | 5 +- {core => udemy_enroller}/exceptions.py | 0 {core => udemy_enroller}/http.py | 6 +-- udemy_enroller/logging.py | 48 +++++++++++++++++++ {core => udemy_enroller}/runner.py | 12 ++--- {core => udemy_enroller}/scrapers/__init__.py | 0 .../scrapers/base_scraper.py | 0 {core => udemy_enroller}/scrapers/comidoc.py | 4 +- .../scrapers/discudemy.py | 4 +- {core => udemy_enroller}/scrapers/manager.py | 4 +- .../scrapers/tutorialbar.py | 4 +- {core => udemy_enroller}/settings.py | 4 +- {core => udemy_enroller}/udemy.py | 6 +-- {core => udemy_enroller}/utils.py | 0 30 files changed, 117 insertions(+), 135 deletions(-) create mode 100644 MANIFEST.in delete mode 100644 core/logging.py delete mode 100644 logconfig.ini delete mode 100644 scripts/__init__.py rename {core => udemy_enroller}/__init__.py (100%) rename {core => udemy_enroller}/cache.py (97%) rename scripts/udemy_enroller.py => udemy_enroller/cli.py (94%) rename {core => udemy_enroller}/driver_manager.py (95%) rename {core => udemy_enroller}/exceptions.py (100%) rename {core => udemy_enroller}/http.py (88%) create mode 100644 udemy_enroller/logging.py rename {core => udemy_enroller}/runner.py (90%) rename {core => udemy_enroller}/scrapers/__init__.py (100%) rename {core => udemy_enroller}/scrapers/base_scraper.py (100%) rename {core => udemy_enroller}/scrapers/comidoc.py (97%) rename {core => udemy_enroller}/scrapers/discudemy.py (96%) rename {core => udemy_enroller}/scrapers/manager.py (89%) rename {core => udemy_enroller}/scrapers/tutorialbar.py (97%) rename {core => udemy_enroller}/settings.py (98%) rename {core => udemy_enroller}/udemy.py (98%) rename {core => udemy_enroller}/utils.py (100%) diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..582d6a5 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-exclude dir-pattern tests \ No newline at end of file diff --git a/core/logging.py b/core/logging.py deleted file mode 100644 index 585ffe6..0000000 --- a/core/logging.py +++ /dev/null @@ -1,33 +0,0 @@ -import logging -import logging.config -import os - -from core.utils import get_app_dir - - -class CustomFileHandler(logging.FileHandler): - """ - Allows us to log to the app directory - """ - - def __init__(self, file_name, mode): - log_file_path = os.path.join(get_app_dir(), file_name) - super(CustomFileHandler, self).__init__(log_file_path, mode) - - -def load_logging_config() -> None: - """ - Load logging configuration from file - - :return: None - """ - logging.config.fileConfig("logconfig.ini", disable_existing_loggers=False) - - -def get_logger() -> logging.Logger: - """ - Convenience method to load the app logger - - :return: An instance of the app logger - """ - return logging.getLogger("udemy_enroller") diff --git a/logconfig.ini b/logconfig.ini deleted file mode 100644 index 08ff049..0000000 --- a/logconfig.ini +++ /dev/null @@ -1,36 +0,0 @@ -[loggers] -keys=root,udemy_enroller - -[handlers] -keys=defaultHandler,consoleHandler - -[formatters] -keys=defaultFormatter,consoleFormatter - -[logger_root] -level=INFO -handlers=defaultHandler -qualname=root - -[logger_udemy_enroller] -level=INFO -handlers=defaultHandler,consoleHandler -qualname=udemy_enroller -propagate=0 - -[handler_defaultHandler] -class=core.logging.CustomFileHandler -formatter=defaultFormatter -args=("app.log", "a") - -[handler_consoleHandler] -class=StreamHandler -level=INFO -formatter=consoleFormatter -args=(sys.stdout,) - -[formatter_defaultFormatter] -format=%(asctime)s - %(name)s - %(levelname)s - %(module)s : %(message)s - -[formatter_consoleFormatter] -format=%(message)s \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 7efb67c..a5b9523 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,5 +20,5 @@ pytest-cov = "^2.10.1" pytest-asyncio = "^0.14.0" [build-system] -requires = ["poetry-core>=1.0.0a5"] -build-backend = "poetry.core.masonry.api" +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/scripts/__init__.py b/scripts/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/setup.py b/setup.py index f0f1eab..1d30fb2 100644 --- a/setup.py +++ b/setup.py @@ -6,9 +6,6 @@ long_description = (here / "README.md").read_text(encoding="utf-8") -with open("requirements.txt") as f: - install_reqs = f.read().splitlines() - setup( name="udemy-enroller", version="2.0.0", @@ -25,9 +22,17 @@ "Programming Language :: Python :: 3.8", ], keywords="udemy, education, enroll", - packages=find_packages(exclude=["pyproject.toml"]), + packages=find_packages( + exclude=["*tests*"], + ), python_requires=">=3.8, <4", - install_requires=install_reqs, + install_requires=[ + "aiohttp", + "beautifulsoup4", + "ruamel.yaml", + "selenium", + "webdriver-manager", + ], setup_requires=["pytest-runner"], extras_require={ "dev": ["black", "isort"], @@ -35,7 +40,7 @@ }, entry_points={ "console_scripts": [ - "udemy_enroller=scripts.udemy_enroller:main", + "udemy_enroller=udemy_enroller.cli:main", ], }, ) diff --git a/tests/conftest.py b/tests/conftest.py index 045f1cb..0c44aa3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,7 @@ import pytest -from core.utils import get_app_dir +from udemy_enroller.utils import get_app_dir @pytest.fixture(scope="session", autouse=True) diff --git a/tests/core/scrapers/test_tutorialbar.py b/tests/core/scrapers/test_tutorialbar.py index 2a9b8b6..2c9f902 100644 --- a/tests/core/scrapers/test_tutorialbar.py +++ b/tests/core/scrapers/test_tutorialbar.py @@ -2,7 +2,7 @@ import pytest -from core.scrapers.tutorialbar import TutorialBarScraper +from udemy_enroller.scrapers.tutorialbar import TutorialBarScraper class MockResponse: diff --git a/tests/core/test_cache.py b/tests/core/test_cache.py index a2e04ba..7500488 100644 --- a/tests/core/test_cache.py +++ b/tests/core/test_cache.py @@ -3,8 +3,8 @@ import pytest -from core import CourseCache -from core.udemy import UdemyStatus +from udemy_enroller import CourseCache +from udemy_enroller.udemy import UdemyStatus @pytest.mark.parametrize( @@ -78,7 +78,7 @@ ], ids=("Initialize cache and add data",), ) -@mock.patch("core.cache.datetime") +@mock.patch("udemy_enroller.cache.datetime") def test_cache( mock_dt, cache_file_name, @@ -171,7 +171,7 @@ def test_cache( ], ids=("Initialize cache and add data",), ) -@mock.patch("core.cache.datetime") +@mock.patch("udemy_enroller.cache.datetime") def test_cache_load( mock_dt, cache_file_name, diff --git a/tests/core/test_driver_manager.py b/tests/core/test_driver_manager.py index 50d5823..d7c5fc0 100644 --- a/tests/core/test_driver_manager.py +++ b/tests/core/test_driver_manager.py @@ -2,14 +2,12 @@ import pytest -from core import DriverManager -from core.driver_manager import ( - ALL_VALID_BROWSER_STRINGS, - VALID_EDGE_STRINGS, - VALID_FIREFOX_STRINGS, - VALID_INTERNET_EXPLORER_STRINGS, - VALID_OPERA_STRINGS, -) +from udemy_enroller import DriverManager +from udemy_enroller.driver_manager import (ALL_VALID_BROWSER_STRINGS, + VALID_EDGE_STRINGS, + VALID_FIREFOX_STRINGS, + VALID_INTERNET_EXPLORER_STRINGS, + VALID_OPERA_STRINGS) @pytest.mark.parametrize( @@ -33,13 +31,13 @@ "unsupported browser", ), ) -@mock.patch("core.driver_manager.webdriver") -@mock.patch("core.driver_manager.ChromeDriverManager") -@mock.patch("core.driver_manager.GeckoDriverManager") -@mock.patch("core.driver_manager.EdgeChromiumDriverManager") -@mock.patch("core.driver_manager.IEDriverManager") -@mock.patch("core.driver_manager.OperaDriverManager") -@mock.patch("core.driver_manager.ChromeType") +@mock.patch("udemy_enroller.driver_manager.webdriver") +@mock.patch("udemy_enroller.driver_manager.ChromeDriverManager") +@mock.patch("udemy_enroller.driver_manager.GeckoDriverManager") +@mock.patch("udemy_enroller.driver_manager.EdgeChromiumDriverManager") +@mock.patch("udemy_enroller.driver_manager.IEDriverManager") +@mock.patch("udemy_enroller.driver_manager.OperaDriverManager") +@mock.patch("udemy_enroller.driver_manager.ChromeType") def test_driver_manager_init( _, mock_opera_driver_manager, @@ -95,10 +93,10 @@ def test_driver_manager_init( ], ids=("chrome is ci build", "chrome is not ci build"), ) -@mock.patch("core.driver_manager.webdriver") -@mock.patch("core.driver_manager.ChromeOptions") -@mock.patch("core.driver_manager.ChromeDriverManager") -@mock.patch("core.driver_manager.ChromeType") +@mock.patch("udemy_enroller.driver_manager.webdriver") +@mock.patch("udemy_enroller.driver_manager.ChromeOptions") +@mock.patch("udemy_enroller.driver_manager.ChromeDriverManager") +@mock.patch("udemy_enroller.driver_manager.ChromeType") def test_driver_manager_ci_build( _, mock_chrome_driver_manager, diff --git a/tests/core/test_settings.py b/tests/core/test_settings.py index db4e88b..b0a680a 100644 --- a/tests/core/test_settings.py +++ b/tests/core/test_settings.py @@ -4,8 +4,8 @@ import pytest from ruamel.yaml import YAML -from core import Settings -from core.utils import get_app_dir +from udemy_enroller import Settings +from udemy_enroller.utils import get_app_dir @pytest.mark.parametrize( diff --git a/tests/test_udemy_enroller.py b/tests/test_udemy_enroller.py index 33812f5..e88bad1 100644 --- a/tests/test_udemy_enroller.py +++ b/tests/test_udemy_enroller.py @@ -3,7 +3,7 @@ import pytest -from scripts.udemy_enroller import parse_args +from udemy_enroller.cli import parse_args @pytest.mark.parametrize( diff --git a/udemy_enroller.py b/udemy_enroller.py index c982e64..a870b46 100644 --- a/udemy_enroller.py +++ b/udemy_enroller.py @@ -1,4 +1,4 @@ -from scripts.udemy_enroller import main +from udemy_enroller.cli import main if __name__ == "__main__": main() diff --git a/core/__init__.py b/udemy_enroller/__init__.py similarity index 100% rename from core/__init__.py rename to udemy_enroller/__init__.py diff --git a/core/cache.py b/udemy_enroller/cache.py similarity index 97% rename from core/cache.py rename to udemy_enroller/cache.py index d12da2b..6b648ed 100644 --- a/core/cache.py +++ b/udemy_enroller/cache.py @@ -2,7 +2,7 @@ import json import os -from core.utils import get_app_dir +from udemy_enroller.utils import get_app_dir class CourseCache: diff --git a/scripts/udemy_enroller.py b/udemy_enroller/cli.py similarity index 94% rename from scripts/udemy_enroller.py rename to udemy_enroller/cli.py index c9d8487..dc8fb1c 100644 --- a/scripts/udemy_enroller.py +++ b/udemy_enroller/cli.py @@ -3,9 +3,9 @@ from argparse import Namespace from typing import Tuple, Union -from core import ALL_VALID_BROWSER_STRINGS, DriverManager, Settings -from core.logging import get_logger -from core.runner import redeem_courses +from udemy_enroller import ALL_VALID_BROWSER_STRINGS, DriverManager, Settings +from udemy_enroller.logging import get_logger +from udemy_enroller.runner import redeem_courses logger = get_logger() diff --git a/core/driver_manager.py b/udemy_enroller/driver_manager.py similarity index 95% rename from core/driver_manager.py rename to udemy_enroller/driver_manager.py index d10d72e..4458018 100644 --- a/core/driver_manager.py +++ b/udemy_enroller/driver_manager.py @@ -2,11 +2,12 @@ from selenium.webdriver.chrome.options import Options as ChromeOptions from webdriver_manager.chrome import ChromeDriverManager from webdriver_manager.firefox import GeckoDriverManager -from webdriver_manager.microsoft import EdgeChromiumDriverManager, IEDriverManager +from webdriver_manager.microsoft import (EdgeChromiumDriverManager, + IEDriverManager) from webdriver_manager.opera import OperaDriverManager from webdriver_manager.utils import ChromeType -from core.logging import get_logger +from udemy_enroller.logging import get_logger logger = get_logger() diff --git a/core/exceptions.py b/udemy_enroller/exceptions.py similarity index 100% rename from core/exceptions.py rename to udemy_enroller/exceptions.py diff --git a/core/http.py b/udemy_enroller/http.py similarity index 88% rename from core/http.py rename to udemy_enroller/http.py index 970f159..5ea7f95 100644 --- a/core/http.py +++ b/udemy_enroller/http.py @@ -1,8 +1,8 @@ -import logging - import aiohttp -logger = logging.getLogger("udemy_enroller") +from udemy_enroller.logging import get_logger + +logger = get_logger() async def get(url, headers={}): diff --git a/udemy_enroller/logging.py b/udemy_enroller/logging.py new file mode 100644 index 0000000..f0e5eac --- /dev/null +++ b/udemy_enroller/logging.py @@ -0,0 +1,48 @@ +import logging +import logging.config +import os + +from udemy_enroller.utils import get_app_dir + + +class CustomFileHandler(logging.FileHandler): + """ + Allows us to log to the app directory + """ + + def __init__(self, file_name="app.log", mode="a"): + log_file_path = os.path.join(get_app_dir(), file_name) + super(CustomFileHandler, self).__init__(log_file_path, mode) + + +def load_logging_config() -> None: + """ + Load logging configuration + + :return: None + """ + + my_logger = logging.getLogger("udemy_enroller") + my_logger.setLevel(logging.INFO) + + # File handler + file_handler = CustomFileHandler() + log_format = "%(asctime)s - %(name)s - %(levelname)s - %(module)s : %(message)s" + formatter = logging.Formatter(fmt=log_format) + file_handler.setFormatter(formatter) + my_logger.addHandler(file_handler) + + # Basic format for streamhandler + stream_handler = logging.StreamHandler() + simple_format = logging.Formatter(fmt="%(message)s") + stream_handler.setFormatter(simple_format) + my_logger.addHandler(stream_handler) + + +def get_logger() -> logging.Logger: + """ + Convenience method to load the app logger + + :return: An instance of the app logger + """ + return logging.getLogger("udemy_enroller") diff --git a/core/runner.py b/udemy_enroller/runner.py similarity index 90% rename from core/runner.py rename to udemy_enroller/runner.py index 3745288..5f48d00 100644 --- a/core/runner.py +++ b/udemy_enroller/runner.py @@ -1,15 +1,13 @@ import asyncio from typing import Union -from selenium.common.exceptions import ( - NoSuchElementException, - TimeoutException, - WebDriverException, -) +from selenium.common.exceptions import (NoSuchElementException, + TimeoutException, WebDriverException) from selenium.webdriver.remote.webdriver import WebDriver -from core import CourseCache, ScraperManager, Settings, UdemyActions, exceptions -from core.logging import get_logger +from udemy_enroller import (CourseCache, ScraperManager, Settings, + UdemyActions, exceptions) +from udemy_enroller.logging import get_logger logger = get_logger() diff --git a/core/scrapers/__init__.py b/udemy_enroller/scrapers/__init__.py similarity index 100% rename from core/scrapers/__init__.py rename to udemy_enroller/scrapers/__init__.py diff --git a/core/scrapers/base_scraper.py b/udemy_enroller/scrapers/base_scraper.py similarity index 100% rename from core/scrapers/base_scraper.py rename to udemy_enroller/scrapers/base_scraper.py diff --git a/core/scrapers/comidoc.py b/udemy_enroller/scrapers/comidoc.py similarity index 97% rename from core/scrapers/comidoc.py rename to udemy_enroller/scrapers/comidoc.py index 82ec72e..2685d4b 100644 --- a/core/scrapers/comidoc.py +++ b/udemy_enroller/scrapers/comidoc.py @@ -4,8 +4,8 @@ from bs4 import BeautifulSoup -from core.http import get -from core.scrapers.base_scraper import BaseScraper +from udemy_enroller.http import get +from udemy_enroller.scrapers.base_scraper import BaseScraper logger = logging.getLogger("udemy_enroller") diff --git a/core/scrapers/discudemy.py b/udemy_enroller/scrapers/discudemy.py similarity index 96% rename from core/scrapers/discudemy.py rename to udemy_enroller/scrapers/discudemy.py index a4b253c..fdb791f 100644 --- a/core/scrapers/discudemy.py +++ b/udemy_enroller/scrapers/discudemy.py @@ -4,8 +4,8 @@ from bs4 import BeautifulSoup -from core.http import get -from core.scrapers.base_scraper import BaseScraper +from udemy_enroller.http import get +from udemy_enroller.scrapers.base_scraper import BaseScraper logger = logging.getLogger("udemy_enroller") diff --git a/core/scrapers/manager.py b/udemy_enroller/scrapers/manager.py similarity index 89% rename from core/scrapers/manager.py rename to udemy_enroller/scrapers/manager.py index 60b9e6e..849b5b9 100644 --- a/core/scrapers/manager.py +++ b/udemy_enroller/scrapers/manager.py @@ -2,8 +2,8 @@ from functools import reduce from typing import List -from core.scrapers.discudemy import DiscUdemyScraper -from core.scrapers.tutorialbar import TutorialBarScraper +from udemy_enroller.scrapers.discudemy import DiscUdemyScraper +from udemy_enroller.scrapers.tutorialbar import TutorialBarScraper class ScraperManager: diff --git a/core/scrapers/tutorialbar.py b/udemy_enroller/scrapers/tutorialbar.py similarity index 97% rename from core/scrapers/tutorialbar.py rename to udemy_enroller/scrapers/tutorialbar.py index 6e09c97..6158b6d 100644 --- a/core/scrapers/tutorialbar.py +++ b/udemy_enroller/scrapers/tutorialbar.py @@ -4,8 +4,8 @@ from bs4 import BeautifulSoup -from core.http import get -from core.scrapers.base_scraper import BaseScraper +from udemy_enroller.http import get +from udemy_enroller.scrapers.base_scraper import BaseScraper logger = logging.getLogger("udemy_enroller") diff --git a/core/settings.py b/udemy_enroller/settings.py similarity index 98% rename from core/settings.py rename to udemy_enroller/settings.py index 9c217b3..6c9f51b 100644 --- a/core/settings.py +++ b/udemy_enroller/settings.py @@ -5,8 +5,8 @@ from ruamel.yaml import YAML, dump -from core.logging import get_logger -from core.utils import get_app_dir +from udemy_enroller.logging import get_logger +from udemy_enroller.utils import get_app_dir logger = get_logger() diff --git a/core/udemy.py b/udemy_enroller/udemy.py similarity index 98% rename from core/udemy.py rename to udemy_enroller/udemy.py index 364cdf0..a3a2908 100644 --- a/core/udemy.py +++ b/udemy_enroller/udemy.py @@ -6,9 +6,9 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait -from core.exceptions import LoginException, RobotException -from core.logging import get_logger -from core.settings import Settings +from udemy_enroller.exceptions import LoginException, RobotException +from udemy_enroller.logging import get_logger +from udemy_enroller.settings import Settings logger = get_logger() diff --git a/core/utils.py b/udemy_enroller/utils.py similarity index 100% rename from core/utils.py rename to udemy_enroller/utils.py From 8690436860e079e3f35eed2e764377a63be4f63e Mon Sep 17 00:00:00 2001 From: "Restyled.io" Date: Sat, 16 Jan 2021 00:26:37 +0000 Subject: [PATCH 36/37] style(black): apply code style --- tests/core/test_driver_manager.py | 12 +++++++----- udemy_enroller/driver_manager.py | 3 +-- udemy_enroller/runner.py | 16 ++++++++++++---- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/tests/core/test_driver_manager.py b/tests/core/test_driver_manager.py index d7c5fc0..35ec783 100644 --- a/tests/core/test_driver_manager.py +++ b/tests/core/test_driver_manager.py @@ -3,11 +3,13 @@ import pytest from udemy_enroller import DriverManager -from udemy_enroller.driver_manager import (ALL_VALID_BROWSER_STRINGS, - VALID_EDGE_STRINGS, - VALID_FIREFOX_STRINGS, - VALID_INTERNET_EXPLORER_STRINGS, - VALID_OPERA_STRINGS) +from udemy_enroller.driver_manager import ( + ALL_VALID_BROWSER_STRINGS, + VALID_EDGE_STRINGS, + VALID_FIREFOX_STRINGS, + VALID_INTERNET_EXPLORER_STRINGS, + VALID_OPERA_STRINGS, +) @pytest.mark.parametrize( diff --git a/udemy_enroller/driver_manager.py b/udemy_enroller/driver_manager.py index 4458018..c8542ea 100644 --- a/udemy_enroller/driver_manager.py +++ b/udemy_enroller/driver_manager.py @@ -2,8 +2,7 @@ from selenium.webdriver.chrome.options import Options as ChromeOptions from webdriver_manager.chrome import ChromeDriverManager from webdriver_manager.firefox import GeckoDriverManager -from webdriver_manager.microsoft import (EdgeChromiumDriverManager, - IEDriverManager) +from webdriver_manager.microsoft import EdgeChromiumDriverManager, IEDriverManager from webdriver_manager.opera import OperaDriverManager from webdriver_manager.utils import ChromeType diff --git a/udemy_enroller/runner.py b/udemy_enroller/runner.py index 5f48d00..c09e1ac 100644 --- a/udemy_enroller/runner.py +++ b/udemy_enroller/runner.py @@ -1,12 +1,20 @@ import asyncio from typing import Union -from selenium.common.exceptions import (NoSuchElementException, - TimeoutException, WebDriverException) +from selenium.common.exceptions import ( + NoSuchElementException, + TimeoutException, + WebDriverException, +) from selenium.webdriver.remote.webdriver import WebDriver -from udemy_enroller import (CourseCache, ScraperManager, Settings, - UdemyActions, exceptions) +from udemy_enroller import ( + CourseCache, + ScraperManager, + Settings, + UdemyActions, + exceptions, +) from udemy_enroller.logging import get_logger logger = get_logger() From de40156dae78f5a8e5c10e5ba1a9f08d1eb0f357 Mon Sep 17 00:00:00 2001 From: cullzie Date: Tue, 19 Jan 2021 13:38:21 +0000 Subject: [PATCH 37/37] Update CHANGELOG date and release link --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 495c772..12eb3ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [2.0.0] - 2021-01-09 +## [2.0.0] - 2021-01-19 ### Added @@ -64,6 +64,8 @@ can continue as normal project running locally. Suitable for users who are not looking forward to contribute. +[2.0.0]: + https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/releases/tag/v2.0.0 [1.0.0]: https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/releases/tag/v1.0.0 [0.3]: