diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 26ed628..95b81c1 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -50,10 +50,10 @@ jobs: - name: Run unittests run: | poetry run pytest - - name: Make sure chrome crawler is working (Attempt to subscribe to 1 course) + - name: Make sure crawler is working (Attempt to subscribe to 1 course) env: UDEMY_EMAIL: ${{ secrets.UDEMY_EMAIL }} UDEMY_PASSWORD: ${{ secrets.UDEMY_PASSWORD }} CI_TEST: "True" run: | - poetry run python udemy_enroller.py --browser=chrome + poetry run python udemy_enroller.py --debug diff --git a/CHANGELOG.md b/CHANGELOG.md index 12eb3ab..b355c34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,17 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [3.0.0] - 2021-03-03 + +### Added + +- Enrollment now relies on REST requests +- New coupon source from coursevania + +### Removed +- No longer supporting browser enrolment (Bot captcha was unsolvable) + ## [2.0.0] - 2021-01-19 ### Added @@ -64,6 +75,8 @@ can continue as normal project running locally. Suitable for users who are not looking forward to contribute. +[3.0.0]: + https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/releases/tag/v3.0.0 [2.0.0]: https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/releases/tag/v2.0.0 [1.0.0]: diff --git a/README.md b/README.md index 4df36e4..a030233 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ [![forthebadge](https://forthebadge.com/images/badges/made-with-python.svg)](https://forthebadge.com) [![forthebadge](https://forthebadge.com/images/badges/it-works-why.svg)](https://forthebadge.com) +# ALPHA IS A PRE DEVELOPMENT BRANCH, DO NOT EXPECT USER FACING ISSUES TO BE ADDRESSED IN THIS BRANCH! # Udemy Coupon Grabber & Course Enroller: Grab FREE Coupons! @@ -15,6 +16,7 @@ web-scraping and automation, this script will find the necessary Udemy Coupons The code scrapes course links and coupons from: - [tutorialbar.com](https://tutorialbar.com) - [discudemy.com](https://discudemy.com) + - [coursevania.com](https://coursevania.com) In case of any bugs or issues, please open an issue in github. @@ -52,20 +54,6 @@ Download a release of this project or clone the repository then navigate to the folder where you placed the files on. Type `pip install -r requirements.txt` to get all the requirements installed in one go. Similar instructions applies for poetry. -- **Webdrivers are now automatically installed! But here are some links in case - you are using the vanilla script or the Safari Browser:** - -* Edge- https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/ -* Chrome- https://chromedriver.chromium.org/ -* Firefox- https://github.com/mozilla/geckodriver/releases/ -* Safari- - https://developer.apple.com/documentation/webkit/about_webdriver_for_safari/ -* Opera- https://github.com/operasoftware/operachromiumdriver/releases -* Internet Explorer- - [Find it on your own accord](https://www.selenium.dev/downloads/) - -**Note:** Make sure that the driver version matches your browser. - --- ## Instructions @@ -85,44 +73,20 @@ Props to Davidd Sargent for making a super simple video tutorial. If you prefer **The values in settings.yaml should be in the same language as the site you are browsing on** -2 . Choose the appropriate command for your browser (from the list below): - -- **Tested and works perfectly:** - - - Chrome: - [udemy_enroller --browser=chrome](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - - Chromium: - [udemy_enroller --browser=chromium](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - - Edge: - [udemy_enroller --browser=edge](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - -- **Has issues when run on custom kernel but works fine on vanilla OS:** - - - Firefox: - [udemy_enroller --browser=firefox (might require manual driver installation)](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - -- **Untested:** - - - Opera: - [udemy_enroller --browser=opera](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - -- **Use at your own risk:** - - Internet Explorer: - [udemy_enroller --browser=internet_explorer](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/blob/master/udemy_enroller.py) - -3 . The script can be passed arguments: +2 . The script can be passed arguments: - `--help`: View full list of arguments available -- `--browser=`: Run with a specific browser - `--discudemy`: Run the discudemy scraper only +- `--coursevania`: Run the coursevania scraper only - `--tutorialbar`: Run the tutorialbar scraper only - `--max-pages=`: Max number of pages to scrape from sites before exiting the script (default is 5) +- `--delete-settings`: Delete existing settings file - `--debug`: Enable debug logging -4 . Run the chosen script in terminal like so: -- `udemy_enroller --browser=firefox` +3 . Run the script in terminal like so: +- `udemy_enroller` -5 . The bot starts scraping the course links from the first **All Courses** page -on [Tutorial Bar](https://www.tutorialbar.com/all-courses/page/1) and [DiscUdemy](https://www.discudemy.com/all) and starts +4 . The bot starts scraping the course links from the first **All Courses** page +on [Tutorial Bar](https://www.tutorialbar.com/all-courses/page/1), [DiscUdemy](https://www.discudemy.com/all) and [Coursevania](https://coursevania.com) and starts enrolling you to Udemy courses. After it has enrolled you to courses from the first page, it then moves to the next site page and the cycle continues. @@ -144,8 +108,8 @@ which of course I got for free! :) ### 2. How does the bot work? -The bot retrieves coupon links from Tutorial Bar's and DiscUdemy list to cut the prices and -then uses Selenium's Browser automation features to login and enroll to the +The bot retrieves coupon links from Tutorial Bar, DiscUdemy and Coursevania's lists to cut the prices and +then uses REST requests to authenticate and enroll to the courses. Think of it this way: Epic Games & other clients like Steam provide you a handful of games each week, for free; Only in this case, we need a coupon code to make those courses free. @@ -167,30 +131,23 @@ task that took around 15 minutes, for 10 courses. And then I suddenly got the idea to automate it, after I found the automation course mentioned above. I bet, it will save your precious time too! :) -### 5. Udemy has detected that I'm using automation tools to browse the website! What should I do? - -![](https://i.imgur.com/pwseilE.jpg) Relax! This happens when you run the script -several times in a short interval of time. Solve the captcha, hit enter in the terminal window you are running -the script from and allow the script to continue as normal. -Easy peasy lemon squeezy! 🍋🙃 - -### 6. The code compiles successfully but it's taking too long to work! IS there any way to fix that? +### 5. The code compiles successfully, but it's taking too long to work! IS there any way to fix that? Since we are heavily dependent on a third-party site to retrieve coupons links, there may be issues when the site is down. Needless to mention the connectivity issues too. If everything is working fine, you can see the courses being retrieved in the Python console/shell, which may take a while. -### 7. Which is the best way to run the script? +### 6. Which is the best way to run the script? It is recommended to run the script using your terminal and system python. -### 8. Which branch to commit against? +### 7. Which branch to commit against? Pull request should be made on "develop" branch. -### 9. What's the roadmap? +### 8. What's the roadmap? Take a look at our [Roadmap here](https://github.com/aapatre/Automatic-Udemy-Course-Enroller-GET-PAID-UDEMY-COURSES-for-FREE/projects/1) diff --git a/pyproject.toml b/pyproject.toml index a5b9523..af92d84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,16 +1,16 @@ [tool.poetry] name = "automatic-udemy-course-enroller-get-paid-udemy-courses-for-free" -version = "2.0.0" +version = "3.0.0" description = "" authors = [""] [tool.poetry.dependencies] python = "^3.8" -selenium = "^3.141.0" beautifulsoup4 = "^4.9.3" "ruamel.yaml" = "^0.16.12" -webdriver-manager = "^3.2.2" -aiohttp = "^3.7.3" +cloudscraper = "^1.2.56" +requests = "^2.25.1" +aiohttp = {extras = ["speedups"], version = "^3.7.3"} [tool.poetry.dev-dependencies] black = "^20.8b1" diff --git a/requirements.py b/requirements.py deleted file mode 100644 index 7bd503d..0000000 --- a/requirements.py +++ /dev/null @@ -1,18 +0,0 @@ -import os -import subprocess -import sys - - -def install(package): - os.system("pip install " + str(package)) - reqs = subprocess.check_output([sys.executable, "-m", "pip", "show", str(package)]) - - print(str(reqs) + "\n") - print("Installed " + package.upper() + "\n") - - -install("requests") -install("beautifulsoup4") -install("ruamel.yaml") -install("selenium") -install("webdriver_manager") diff --git a/requirements.txt b/requirements.txt index 2ff857f..3f150d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -aiohttp +aiohttp[speedups] beautifulsoup4 ruamel.yaml -selenium -webdriver-manager \ No newline at end of file +requests +cloudscraper diff --git a/setup.py b/setup.py index 1d30fb2..3b0a43b 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setup( name="udemy-enroller", - version="2.0.0", + version="3.0.0", long_description=long_description, long_description_content_type="text/markdown", author="aapatre", @@ -27,11 +27,11 @@ ), python_requires=">=3.8, <4", install_requires=[ - "aiohttp", + "aiohttp[speedups]", "beautifulsoup4", "ruamel.yaml", - "selenium", - "webdriver-manager", + "requests", + "cloudscraper", ], setup_requires=["pytest-runner"], extras_require={ diff --git a/tests/core/test_driver_manager.py b/tests/core/test_driver_manager.py deleted file mode 100644 index 35ec783..0000000 --- a/tests/core/test_driver_manager.py +++ /dev/null @@ -1,120 +0,0 @@ -from unittest import mock - -import pytest - -from udemy_enroller import DriverManager -from udemy_enroller.driver_manager import ( - ALL_VALID_BROWSER_STRINGS, - VALID_EDGE_STRINGS, - VALID_FIREFOX_STRINGS, - VALID_INTERNET_EXPLORER_STRINGS, - VALID_OPERA_STRINGS, -) - - -@pytest.mark.parametrize( - "browser_name", - [ - ("chrome"), - ("chromium"), - ("edge"), - ("firefox"), - ("opera"), - ("internet_explorer"), - ("tor"), - ], - ids=( - "create driver chrome", - "create driver chromium", - "create driver edge", - "create driver firefox", - "create driver opera", - "create driver internet_explorer", - "unsupported browser", - ), -) -@mock.patch("udemy_enroller.driver_manager.webdriver") -@mock.patch("udemy_enroller.driver_manager.ChromeDriverManager") -@mock.patch("udemy_enroller.driver_manager.GeckoDriverManager") -@mock.patch("udemy_enroller.driver_manager.EdgeChromiumDriverManager") -@mock.patch("udemy_enroller.driver_manager.IEDriverManager") -@mock.patch("udemy_enroller.driver_manager.OperaDriverManager") -@mock.patch("udemy_enroller.driver_manager.ChromeType") -def test_driver_manager_init( - _, - mock_opera_driver_manager, - mock_internet_explorer_driver_manager, - mock_edge_driver_manager, - mock_firefox_driver_manager, - mock_chrome_driver_manager, - mock_selenium_web_driver, - browser_name, -): - try: - dm = DriverManager(browser_name) - except ValueError: - assert browser_name not in ALL_VALID_BROWSER_STRINGS - else: - if browser_name in ("chrome",): - mock_selenium_web_driver.Chrome.assert_called_once_with( - mock_chrome_driver_manager().install(), options=None - ) - assert dm.driver == mock_selenium_web_driver.Chrome() - elif browser_name in ("chromium",): - mock_selenium_web_driver.Chrome.assert_called_once_with( - mock_chrome_driver_manager().install() - ) - assert dm.driver == mock_selenium_web_driver.Chrome() - elif browser_name in VALID_FIREFOX_STRINGS: - mock_selenium_web_driver.Firefox.assert_called_once_with( - executable_path=mock_firefox_driver_manager().install() - ) - assert dm.driver == mock_selenium_web_driver.Firefox() - elif browser_name in VALID_OPERA_STRINGS: - mock_selenium_web_driver.Opera.assert_called_once_with( - executable_path=mock_opera_driver_manager().install() - ) - assert dm.driver == mock_selenium_web_driver.Opera() - elif browser_name in VALID_EDGE_STRINGS: - mock_selenium_web_driver.Edge.assert_called_once_with( - mock_edge_driver_manager().install() - ) - assert dm.driver == mock_selenium_web_driver.Edge() - elif browser_name in VALID_INTERNET_EXPLORER_STRINGS: - mock_selenium_web_driver.Ie.assert_called_once_with( - mock_internet_explorer_driver_manager().install() - ) - assert dm.driver == mock_selenium_web_driver.Ie() - - -@pytest.mark.parametrize( - "browser_name,is_ci_build", - [ - ("chrome", True), - ("chrome", False), - ], - ids=("chrome is ci build", "chrome is not ci build"), -) -@mock.patch("udemy_enroller.driver_manager.webdriver") -@mock.patch("udemy_enroller.driver_manager.ChromeOptions") -@mock.patch("udemy_enroller.driver_manager.ChromeDriverManager") -@mock.patch("udemy_enroller.driver_manager.ChromeType") -def test_driver_manager_ci_build( - _, - mock_chrome_driver_manager, - mock_chrome_options, - mock_selenium_web_driver, - browser_name, - is_ci_build, -): - - dm = DriverManager(browser_name, is_ci_build=is_ci_build) - - if is_ci_build: - options = mock_chrome_options() - else: - options = None - mock_selenium_web_driver.Chrome.assert_called_once_with( - mock_chrome_driver_manager().install(), options=options - ) - assert dm.driver == mock_selenium_web_driver.Chrome() diff --git a/tests/core/test_settings.py b/tests/core/test_settings.py index b0a680a..6be8ca9 100644 --- a/tests/core/test_settings.py +++ b/tests/core/test_settings.py @@ -61,7 +61,7 @@ def test_settings(email, password, zip_code, languages, categories, save, file_n ): with mock.patch("getpass.getpass", return_value=password): settings_path = os.path.join(get_app_dir(), f"test_tmp/{file_name}") - settings = Settings(settings_path) + settings = Settings(False, settings_path) assert settings.email == email assert settings.password == password assert settings.zip_code == zip_code @@ -86,7 +86,7 @@ def test_settings(email, password, zip_code, languages, categories, save, file_n else categories ) # Load settings just created - Settings(settings_path) + Settings(False, settings_path) else: assert os.path.isdir(settings_path) is False @@ -146,10 +146,10 @@ def test_load_existing_settings( ): with mock.patch("getpass.getpass", return_value=password): settings_path = f"test_tmp/{file_name}" - Settings(settings_path) + Settings(False, settings_path) # Load existing settings - settings = Settings(settings_path) + settings = Settings(False, settings_path) assert settings.email == email assert settings.password == password assert settings.zip_code == zip_code @@ -181,7 +181,7 @@ def test_load_ci_settings(_, monkeypatch, is_ci_run, email, password): monkeypatch.setenv("CI_TEST", str(is_ci_run)) monkeypatch.setenv("UDEMY_EMAIL", email) monkeypatch.setenv("UDEMY_PASSWORD", password) - settings = Settings("") + settings = Settings(False, "") if is_ci_run: assert settings.email == email assert settings.password == password diff --git a/tests/test_udemy_enroller.py b/tests/test_udemy_enroller.py deleted file mode 100644 index e88bad1..0000000 --- a/tests/test_udemy_enroller.py +++ /dev/null @@ -1,50 +0,0 @@ -import argparse -from unittest import mock - -import pytest - -from udemy_enroller.cli import parse_args - - -@pytest.mark.parametrize( - "browser_cli,max_pages_cli,expected_browser,expected_max_pages,print_help", - [ - ("chrome", None, "chrome", None, False), - ("firefox", None, "firefox", None, False), - ("chromium", None, "chromium", None, False), - ("internet_explorer", None, "internet_explorer", None, False), - ("opera", None, "opera", None, False), - ("edge", None, "edge", None, False), - (None, None, None, None, True), - ("firefox", 10, "firefox", 10, False), - ], - ids=( - "Test chrome via cli", - "Test firefox via cli", - "Test chromium via cli", - "Test internet_explorer via cli", - "Test opera via cli", - "Test edge via cli", - "No browser selected print help", - "Pass max pages via cli", - ), -) -@mock.patch("argparse.ArgumentParser.print_help") -def test_argparse( - mock_print_help, - browser_cli, - max_pages_cli, - expected_browser, - expected_max_pages, - print_help, -): - with mock.patch( - "argparse.ArgumentParser.parse_args", - return_value=argparse.Namespace(browser=browser_cli, max_pages=max_pages_cli), - ): - args = parse_args() - if print_help: - assert mock_print_help.call_count == 1 - else: - assert args.browser == expected_browser - assert args.max_pages is expected_max_pages diff --git a/udemy_enroller/__init__.py b/udemy_enroller/__init__.py index b231ff8..d984a4d 100644 --- a/udemy_enroller/__init__.py +++ b/udemy_enroller/__init__.py @@ -1,5 +1,4 @@ from .cache import CourseCache -from .driver_manager import ALL_VALID_BROWSER_STRINGS, DriverManager from .logging import load_logging_config from .scrapers.manager import ScraperManager from .settings import Settings diff --git a/udemy_enroller/cli.py b/udemy_enroller/cli.py index dc8fb1c..8f5f308 100644 --- a/udemy_enroller/cli.py +++ b/udemy_enroller/cli.py @@ -3,7 +3,7 @@ from argparse import Namespace from typing import Tuple, Union -from udemy_enroller import ALL_VALID_BROWSER_STRINGS, DriverManager, Settings +from udemy_enroller import Settings from udemy_enroller.logging import get_logger from udemy_enroller.runner import redeem_courses @@ -25,56 +25,50 @@ def enable_debug_logging() -> None: def determine_if_scraper_enabled( tutorialbar_enabled: bool, discudemy_enabled: bool, -) -> Tuple[bool, bool]: + coursevania_enabled: bool, +) -> Tuple[bool, bool, bool]: """ Determine what scrapers should be enabled and disabled :return: tuple containing boolean of what scrapers should run """ - if not tutorialbar_enabled and not discudemy_enabled: - # Set both to True since user has not enabled a specific scraper i.e Run all scrapers - tutorialbar_enabled, discudemy_enabled = True, True - return tutorialbar_enabled, discudemy_enabled + if not tutorialbar_enabled and not discudemy_enabled and not coursevania_enabled: + # Set all to True + tutorialbar_enabled, discudemy_enabled, coursevania_enabled = True, True, True + return tutorialbar_enabled, discudemy_enabled, coursevania_enabled def run( - browser: str, tutorialbar_enabled: bool, discudemy_enabled: bool, + coursevania_enabled: bool, max_pages: Union[int, None], + delete_settings: bool, ): """ Run the udemy enroller script - :param str browser: Name of the browser we want to create a driver for :param bool tutorialbar_enabled: :param bool discudemy_enabled: + :param bool coursevania_enabled: :param int max_pages: Max pages to scrape from sites (if pagination exists) + :param bool delete_settings: Determines if we should delete old settings file :return: """ - settings = Settings() - dm = DriverManager(browser=browser, is_ci_build=settings.is_ci_build) + settings = Settings(delete_settings) redeem_courses( - dm.driver, settings, tutorialbar_enabled, discudemy_enabled, max_pages + settings, tutorialbar_enabled, discudemy_enabled, coursevania_enabled, max_pages ) -def parse_args(browser=None) -> Namespace: +def parse_args() -> Namespace: """ Parse args from the CLI or use the args passed in - :param str browser: Name of the browser we want to create a driver for :return: Args to be used in the script """ parser = argparse.ArgumentParser(description="Udemy Enroller") - parser.add_argument( - "--browser", - type=str, - default=browser, - choices=ALL_VALID_BROWSER_STRINGS, - help="Browser to use for Udemy Enroller", - ) parser.add_argument( "--tutorialbar", action="store_true", @@ -87,12 +81,24 @@ def parse_args(browser=None) -> Namespace: default=False, help="Run discudemy scraper", ) + parser.add_argument( + "--coursevania", + action="store_true", + default=False, + help="Run coursevania scraper", + ) parser.add_argument( "--max-pages", type=int, default=5, help=f"Max pages to scrape from sites (if pagination exists) (Default is 5)", ) + parser.add_argument( + "--delete-settings", + action="store_true", + default=False, + help="Delete any existing settings file", + ) parser.add_argument( "--debug", action="store_true", @@ -101,10 +107,7 @@ def parse_args(browser=None) -> Namespace: args = parser.parse_args() - if args.browser is None: - parser.print_help() - else: - return args + return args def main(): @@ -112,7 +115,17 @@ def main(): if args: if args.debug: enable_debug_logging() - tutorialbar_enabled, discudemy_enabled = determine_if_scraper_enabled( - args.tutorialbar, args.discudemy + ( + tutorialbar_enabled, + discudemy_enabled, + coursevania_enabled, + ) = determine_if_scraper_enabled( + args.tutorialbar, args.discudemy, args.coursevania + ) + run( + tutorialbar_enabled, + discudemy_enabled, + coursevania_enabled, + args.max_pages, + args.delete_settings, ) - run(args.browser, tutorialbar_enabled, discudemy_enabled, args.max_pages) diff --git a/udemy_enroller/driver_manager.py b/udemy_enroller/driver_manager.py deleted file mode 100644 index c8542ea..0000000 --- a/udemy_enroller/driver_manager.py +++ /dev/null @@ -1,92 +0,0 @@ -from selenium import webdriver -from selenium.webdriver.chrome.options import Options as ChromeOptions -from webdriver_manager.chrome import ChromeDriverManager -from webdriver_manager.firefox import GeckoDriverManager -from webdriver_manager.microsoft import EdgeChromiumDriverManager, IEDriverManager -from webdriver_manager.opera import OperaDriverManager -from webdriver_manager.utils import ChromeType - -from udemy_enroller.logging import get_logger - -logger = get_logger() - -VALID_FIREFOX_STRINGS = {"ff", "firefox"} -VALID_CHROME_STRINGS = {"chrome", "google-chrome"} -VALID_CHROMIUM_STRINGS = {"chromium"} -VALID_INTERNET_EXPLORER_STRINGS = {"internet_explorer", "ie"} -VALID_OPERA_STRINGS = {"opera"} -VALID_EDGE_STRINGS = {"edge"} - -ALL_VALID_BROWSER_STRINGS = ( - VALID_FIREFOX_STRINGS.union(VALID_CHROME_STRINGS) - .union(VALID_CHROMIUM_STRINGS) - .union(VALID_CHROMIUM_STRINGS) - .union(VALID_INTERNET_EXPLORER_STRINGS) - .union(VALID_OPERA_STRINGS) - .union(VALID_EDGE_STRINGS) -) - - -class DriverManager: - def __init__(self, browser: str, is_ci_build: bool = False): - self.driver = None - self.options = None - self.browser = browser - self.is_ci_build = is_ci_build - self._init_driver() - - def _init_driver(self): - """ - Initialize the correct web driver based on the users requested browser - - :return: None - """ - - if self.browser.lower() in VALID_CHROME_STRINGS: - if self.is_ci_build: - self.options = self._build_ci_options_chrome() - self.driver = webdriver.Chrome( - ChromeDriverManager().install(), options=self.options - ) - elif self.browser.lower() in VALID_CHROMIUM_STRINGS: - self.driver = webdriver.Chrome( - ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install() - ) - elif self.browser.lower() in VALID_EDGE_STRINGS: - self.driver = webdriver.Edge(EdgeChromiumDriverManager().install()) - elif self.browser.lower() in VALID_FIREFOX_STRINGS: - self.driver = webdriver.Firefox( - executable_path=GeckoDriverManager().install() - ) - elif self.browser.lower() in VALID_OPERA_STRINGS: - self.driver = webdriver.Opera( - executable_path=OperaDriverManager().install() - ) - elif self.browser.lower() in VALID_INTERNET_EXPLORER_STRINGS: - self.driver = webdriver.Ie(IEDriverManager().install()) - else: - raise ValueError("No matching browser found") - - # Maximize the browser - self.driver.maximize_window() - - @staticmethod - def _build_ci_options_chrome(): - """ - Build chrome options required to run in CI - - :return: - """ - # Having the user-agent with Headless param was always leading to robot check - user_agent = ( - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 " - "Safari/537.36" - ) - options = ChromeOptions() - # We need to run headless when using github CI - options.add_argument("--headless") - options.add_argument("user-agent={0}".format(user_agent)) - options.add_argument("accept-language=en-GB,en-US;q=0.9,en;q=0.8") - options.add_argument("--window-size=1325x744") - logger.info("This is a CI run") - return options diff --git a/udemy_enroller/runner.py b/udemy_enroller/runner.py index c09e1ac..db1d717 100644 --- a/udemy_enroller/runner.py +++ b/udemy_enroller/runner.py @@ -1,41 +1,27 @@ import asyncio +import time from typing import Union -from selenium.common.exceptions import ( - NoSuchElementException, - TimeoutException, - WebDriverException, -) -from selenium.webdriver.remote.webdriver import WebDriver - -from udemy_enroller import ( - CourseCache, - ScraperManager, - Settings, - UdemyActions, - exceptions, -) +from udemy_enroller import CourseCache, ScraperManager, Settings, UdemyActions from udemy_enroller.logging import get_logger logger = get_logger() def _redeem_courses( - driver: WebDriver, settings: Settings, scrapers: ScraperManager, ) -> None: """ Method to scrape courses from tutorialbar.com and enroll in them on udemy - :param WebDriver driver: Webdriver used to enroll in Udemy courses :param Settings settings: Core settings used for Udemy :param ScraperManager scrapers: :return: """ cache = CourseCache() - udemy_actions = UdemyActions(driver, settings) - udemy_actions.login() # login once outside while loop + udemy_actions = UdemyActions(settings) + udemy_actions.login() loop = asyncio.get_event_loop() while True: @@ -45,22 +31,14 @@ def _redeem_courses( for course_link in udemy_course_links: try: if course_link not in cache: - status = udemy_actions.redeem(course_link) + status = udemy_actions.enroll(course_link) cache.add(course_link, status) + time.sleep(2) # Try to avoid udemy throttling else: logger.debug(f"In cache: {course_link}") - except NoSuchElementException as e: - logger.error(e) - except TimeoutException: - logger.error(f"Timeout on link: {course_link}") - except WebDriverException: - logger.error(f"Webdriver exception on link: {course_link}") except KeyboardInterrupt: logger.error("Exiting the script") return - except exceptions.RobotException as e: - logger.error(e) - return except Exception as e: logger.error(f"Unexpected exception: {e}") finally: @@ -74,27 +52,26 @@ def _redeem_courses( def redeem_courses( - driver: WebDriver, settings: Settings, tutorialbar_enabled: bool, discudemy_enabled: bool, + coursevania_enabled: bool, max_pages: Union[int, None], ) -> None: """ Wrapper of _redeem_courses so we always close browser on completion - :param WebDriver driver: Webdriver used to enroll in Udemy courses :param Settings settings: Core settings used for Udemy :param bool tutorialbar_enabled: Boolean signifying if tutorialbar scraper should run :param bool discudemy_enabled: Boolean signifying if discudemy scraper should run + :param bool coursevania_enabled: Boolean signifying if coursevania scraper should run :param int max_pages: Max pages to scrape from sites (if pagination exists) :return: """ try: - scrapers = ScraperManager(tutorialbar_enabled, discudemy_enabled, max_pages) - _redeem_courses(driver, settings, scrapers) - except exceptions.LoginException as e: - logger.error(str(e)) - finally: - logger.info("Closing browser") - driver.quit() + scrapers = ScraperManager( + tutorialbar_enabled, discudemy_enabled, coursevania_enabled, max_pages + ) + _redeem_courses(settings, scrapers) + except Exception as e: + logger.error(f"Exception in redeem courses: {e}") diff --git a/udemy_enroller/scrapers/base_scraper.py b/udemy_enroller/scrapers/base_scraper.py index bc86d02..57ce7f8 100644 --- a/udemy_enroller/scrapers/base_scraper.py +++ b/udemy_enroller/scrapers/base_scraper.py @@ -70,7 +70,7 @@ async def wrapper(self): try: response = await func(self) except Exception as e: - logger.error(f"Error while running {self.scraper_name} scrapper: {e}") + logger.error(f"Error while running {self.scraper_name} scraper: {e}") self.is_complete() return [] end_time = datetime.datetime.utcnow() diff --git a/udemy_enroller/scrapers/coursevania.py b/udemy_enroller/scrapers/coursevania.py new file mode 100644 index 0000000..9c54175 --- /dev/null +++ b/udemy_enroller/scrapers/coursevania.py @@ -0,0 +1,127 @@ +import asyncio +import json +import logging +from typing import List +from urllib.parse import urlencode + +from bs4 import BeautifulSoup + +from udemy_enroller.http import get +from udemy_enroller.scrapers.base_scraper import BaseScraper + +logger = logging.getLogger("udemy_enroller") + + +class CoursevaniaScraper(BaseScraper): + """ + Contains any logic related to scraping of data from coursevania.com + """ + + DOMAIN = "https://coursevania.com" + + def __init__(self, enabled, max_pages=None): + super().__init__() + self.scraper_name = "coursevania" + if not enabled: + self.set_state_disabled() + self.last_page = None + self.max_pages = max_pages + + @BaseScraper.time_run + async def run(self) -> List: + """ + Called to gather the udemy links + + :return: List of udemy course links + """ + links = await self.get_links() + logger.info( + f"Page: {self.current_page} of {self.last_page} scraped from coursevania.com" + ) + self.max_pages_reached() + return links + + async def get_links(self): + """ + Scrape udemy links from coursevania.com + + :return: List of udemy course urls + """ + self.current_page += 1 + course_links = await self.get_course_links() + + links = await self.gather_udemy_course_links(course_links) + + for counter, course in enumerate(links): + logger.debug(f"Received Link {counter + 1} : {course}") + + return links + + async def get_course_links(self) -> List: + """ + Gets the url of pages which contain the udemy link we want to get + + :param int page: The page number to scrape data from + :return: list of pages on coursevania.com that contain Udemy coupons + """ + query_params = { + "offset": self.current_page - 1, + "template": "courses/grid", + "args": '{"image_d":"img-480-380","per_row":"4","posts_per_page":"12","class":"archive_grid"}', + "action": "stm_lms_load_content", + "sort": "date_high", + } + headers = { + "Host": "coursevania.com", + "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:85.0) Gecko/20100101 Firefox/85.0", + "Accept": "application/json, text/javascript, */*; q=0.01", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + "X-Requested-With": "XMLHttpRequest", + "Connection": "keep-alive", + "Referer": "https://coursevania.com/courses/", + "TE": "Trailers", + } + query_string = urlencode(query_params) + response = await get( + f"{self.DOMAIN}/wp-admin/admin-ajax.php?{query_string}", headers=headers + ) + if response is not None: + json_data = json.loads(response) + coupons_data = json_data.get("content") + soup = BeautifulSoup(coupons_data, "html.parser") + links = soup.find_all("a", class_="heading_font") + course_links = list({link["href"] for link in links}) + + self.last_page = json_data.get("pages") + + return course_links + + @staticmethod + async def get_udemy_course_link(url: str) -> str: + """ + Gets the udemy course link + + :param str url: The url to scrape data from + :return: Coupon link of the udemy course + """ + text = await get(url) + if text is not None: + soup = BeautifulSoup(text.decode("utf-8"), "html.parser") + udemy_link = ( + soup.find("div", class_="stm-lms-buy-buttons").find("a").get("href") + ) + return udemy_link + + async def gather_udemy_course_links(self, courses: List[str]): + """ + Async fetching of the udemy course links from coursevania.com + + :param list courses: A list of coursevania.com course links we want to fetch the udemy links for + :return: list of udemy links + """ + return [ + link + for link in await asyncio.gather(*map(self.get_udemy_course_link, courses)) + if link is not None + ] diff --git a/udemy_enroller/scrapers/manager.py b/udemy_enroller/scrapers/manager.py index 849b5b9..ae54e45 100644 --- a/udemy_enroller/scrapers/manager.py +++ b/udemy_enroller/scrapers/manager.py @@ -2,19 +2,29 @@ from functools import reduce from typing import List +from udemy_enroller.scrapers.coursevania import CoursevaniaScraper from udemy_enroller.scrapers.discudemy import DiscUdemyScraper from udemy_enroller.scrapers.tutorialbar import TutorialBarScraper class ScraperManager: - def __init__(self, tutorialbar_enabled, discudemy_enabled, max_pages): + def __init__( + self, tutorialbar_enabled, discudemy_enabled, coursevania_enabled, max_pages + ): self.tutorialbar_scraper = TutorialBarScraper( tutorialbar_enabled, max_pages=max_pages ) self.discudemy_scraper = DiscUdemyScraper( discudemy_enabled, max_pages=max_pages ) - self._scrapers = (self.tutorialbar_scraper, self.discudemy_scraper) + self.coursevania_scraper = CoursevaniaScraper( + coursevania_enabled, max_pages=max_pages + ) + self._scrapers = ( + self.tutorialbar_scraper, + self.discudemy_scraper, + self.coursevania_scraper, + ) async def run(self) -> List: """ diff --git a/udemy_enroller/scrapers/tutorialbar.py b/udemy_enroller/scrapers/tutorialbar.py index 6158b6d..b293d44 100644 --- a/udemy_enroller/scrapers/tutorialbar.py +++ b/udemy_enroller/scrapers/tutorialbar.py @@ -16,7 +16,7 @@ class TutorialBarScraper(BaseScraper): """ DOMAIN = "https://www.tutorialbar.com" - AD_DOMAINS = ("https://amzn",) + AD_DOMAINS = ("https://amzn", "https://bit.ly") def __init__(self, enabled, max_pages=None): super().__init__() diff --git a/udemy_enroller/settings.py b/udemy_enroller/settings.py index 6c9f51b..3b0741d 100644 --- a/udemy_enroller/settings.py +++ b/udemy_enroller/settings.py @@ -16,7 +16,7 @@ class Settings: Contains all logic related to the scripts settings """ - def __init__(self, settings_path="settings.yaml"): + def __init__(self, delete_settings, settings_path="settings.yaml"): self.email = None self.password = None self.zip_code = None @@ -25,6 +25,8 @@ def __init__(self, settings_path="settings.yaml"): self._settings_path = os.path.join(get_app_dir(), settings_path) self.is_ci_build = strtobool(os.environ.get("CI_TEST", "False")) + if delete_settings: + self.delete() self._init_settings() def _init_settings(self) -> None: @@ -169,3 +171,19 @@ def _save_settings(self) -> None: logger.info(f"Saved your settings in {self._settings_path}") else: logger.info("Not saving your settings as requested") + + def delete(self) -> None: + """ + Delete the settings file + + :return: None + """ + if os.path.isfile(self._settings_path): + delete_settings = input( + "Please confirm that you want to delete your saved settings (Y/N): " + ) + if delete_settings.lower() == "y": + os.remove(self._settings_path) + logger.info(f"Settings file deleted: {self._settings_path}") + else: + logger.info("No settings to delete") diff --git a/udemy_enroller/udemy.py b/udemy_enroller/udemy.py index a3a2908..ab9213b 100644 --- a/udemy_enroller/udemy.py +++ b/udemy_enroller/udemy.py @@ -1,14 +1,17 @@ +import json +import os +import re +import time from enum import Enum +from typing import Dict, List -from selenium.common.exceptions import NoSuchElementException, TimeoutException -from selenium.webdriver.common.by import By -from selenium.webdriver.remote.webdriver import WebDriver, WebElement -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.ui import WebDriverWait +import requests +from bs4 import BeautifulSoup +from cloudscraper import create_scraper -from udemy_enroller.exceptions import LoginException, RobotException from udemy_enroller.logging import get_logger from udemy_enroller.settings import Settings +from udemy_enroller.utils import get_app_dir logger = get_logger() @@ -25,230 +28,373 @@ class UdemyStatus(Enum): class UdemyActions: - """ - Contains any logic related to interacting with udemy website - """ + LOGIN_URL = "https://www.udemy.com/join/login-popup/?locale=en_US" + MY_COURSES = ( + "https://www.udemy.com/api-2.0/users/me/subscribed-courses/?ordering=-last_accessed&fields[" + "course]=@min,enrollment_time,published_title,&fields[user]=@min" + ) + CHECKOUT_URL = "https://www.udemy.com/payment/checkout-submit/" + CHECK_PRICE = "https://www.udemy.com/api-2.0/course-landing-components/{}/me/?couponCode={}&components=price_text,deal_badge,discount_expiration" + COURSE_DETAILS = "https://www.udemy.com/api-2.0/courses/{}/?fields[course]=context_info,primary_category,primary_subcategory,avg_rating_recent,visible_instructors,locale,estimated_content_length,num_subscribers" + + HEADERS = { + "origin": "https://www.udemy.com", + "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 " + "Safari/537.36", + "accept": "application/json, text/plain, */*", + "accept-encoding": "gzip, deflate, br", + "content-type": "application/json;charset=UTF-8", + "x-requested-with": "XMLHttpRequest", + "x-checkout-version": "2", + "referer": "https://www.udemy.com/", + } + + def __init__(self, settings: Settings, cookie_file_name: str = ".cookie"): + self.settings = settings + self.user_has_preferences = self.settings.categories or self.settings.languages + self.session = requests.Session() + self.udemy_scraper = create_scraper() + self._cookie_file = os.path.join(get_app_dir(), cookie_file_name) + self._enrolled_course_info = [] + self._currency_symbol = None + self._currency = None + + def login(self, retry=False) -> None: + """ + Login to Udemy using REST api + Saves login cookies for future use - DOMAIN = "https://www.udemy.com" + :return: None + """ + cookie_details = self._load_cookies() + if cookie_details is None: + response = self.udemy_scraper.get(self.LOGIN_URL) + soup = BeautifulSoup(response.content, "html.parser") + csrf_token = soup.find("input", {"name": "csrfmiddlewaretoken"})["value"] + _form_data = { + "email": self.settings.email, + "password": self.settings.password, + "csrfmiddlewaretoken": csrf_token, + } + self.udemy_scraper.headers.update({"Referer": self.LOGIN_URL}) + auth_response = self.udemy_scraper.post( + self.LOGIN_URL, data=_form_data, allow_redirects=False + ) + if auth_response.status_code != 302: + raise Exception( + f"Could not login. Code: {auth_response.status_code} Text: {auth_response.text}" + ) + else: + cookie_details = { + "csrf_token": csrf_token, + "access_token": auth_response.cookies["access_token"], + "client_id": auth_response.cookies["client_id"], + } + self._cache_cookies(cookie_details) + + bearer_token = f"Bearer {cookie_details['access_token']}" + self.session.headers = self.HEADERS + self.session.headers.update( + { + "authorization": bearer_token, + "x-udemy-authorization": bearer_token, + "x-csrftoken": cookie_details["csrf_token"], + } + ) + self.session.cookies.update({"access_token": cookie_details["access_token"]}) + self.session.cookies.update({"client_id": cookie_details["client_id"]}) - def __init__(self, driver: WebDriver, settings: Settings): - self.driver = driver - self.settings = settings - self.logged_in = False + try: + self._enrolled_course_info = self.load_my_courses() + except Exception as e: + if not retry: + logger.info("Retrying login") + self._delete_cookies() + return self.login(retry=True) + else: + logger.error( + "Unable to fetch existing courses. Login was not successful" + ) + raise e - def login(self, is_retry=False) -> None: + def load_my_courses(self) -> List: """ - Login to your udemy account + Loads users currently enrolled courses from Udemy - :param bool is_retry: Is this is a login retry and we still have captcha raise RobotException + :return: List of logged in users courses + """ + logger.info("Loading existing course details") + all_courses = list() + page_size = 100 + + my_courses = self.my_courses(1, page_size) + all_courses.extend(my_courses["results"]) + total_pages = my_courses["count"] // page_size + for page in range(2, total_pages + 2): + my_courses = self.my_courses(page, page_size) + if "results" in my_courses: + all_courses.extend(my_courses["results"]) + time.sleep(1) + logger.info(f"Currently enrolled in {len(all_courses)} courses") + return all_courses + + def is_enrolled(self, course_id: int) -> bool: + """ + Check if the user is currently enrolled in the course based on course_id passed in - :return: None + :param int course_id: Check if the course_id is in the users current courses + :return: """ - if not self.logged_in: - self.driver.get(f"{self.DOMAIN}/join/login-popup/") - try: - email_element = self.driver.find_element_by_name("email") - email_element.send_keys(self.settings.email) - - password_element = self.driver.find_element_by_name("password") - password_element.send_keys(self.settings.password) - - self.driver.find_element_by_name("submit").click() - except NoSuchElementException as e: - is_robot = self._check_if_robot() - if is_robot and not is_retry: - input( - "Before login. Please solve the captcha before proceeding. Hit enter once solved " - ) - self.login(is_retry=True) - return - if is_robot and is_retry: - raise RobotException("I am a bot!") - raise e - else: - user_dropdown_xpath = "//a[@data-purpose='user-dropdown']" - try: - WebDriverWait(self.driver, 10).until( - EC.presence_of_element_located((By.XPATH, user_dropdown_xpath)) - ) - except TimeoutException: - is_robot = self._check_if_robot() - if is_robot and not is_retry: - input( - "After login. Please solve the captcha before proceeding. Hit enter once solved " - ) - if self._check_if_robot(): - raise RobotException("I am a bot!") - self.logged_in = True - return - raise LoginException("Udemy user failed to login") - self.logged_in = True - - def redeem(self, url: str) -> str: - """ - Redeems the course url passed in - - :param str url: URL of the course to redeem - :return: A string detailing course status - """ - self.driver.get(url) - - course_name = self.driver.title - - # If the user has configured languages check it is a supported option - if self.settings.languages: - locale_xpath = "//div[@data-purpose='lead-course-locale']" - element_text = ( - WebDriverWait(self.driver, 10) - .until(EC.presence_of_element_located((By.XPATH, locale_xpath))) - .text - ) + enrolled = False + all_course_ids = [course["id"] for course in self._enrolled_course_info] + if course_id in all_course_ids: + enrolled = True - if element_text not in self.settings.languages: - logger.debug(f"Course language not wanted: {element_text}") - return UdemyStatus.UNWANTED_LANGUAGE.value + return enrolled - if self.settings.categories: - # If the wanted categories are specified, get all the categories of the course by - # scraping the breadcrumbs on the top + def is_coupon_valid(self, course_id: int, coupon_code: str) -> bool: + """ + Check if the coupon is valid for a course - breadcrumbs_path = "udlite-breadcrumb" - breadcrumbs_text_path = "udlite-heading-sm" - breadcrumbs: WebElement = self.driver.find_element_by_class_name( - breadcrumbs_path + :param int course_id: Id of the course to check the coupon against + :param str coupon_code: Coupon to apply to the course + :return: + """ + coupon_valid = True + coupon_details = self.coupon_details(course_id, coupon_code) + current_price = coupon_details["price_text"]["data"]["pricing_result"]["price"][ + "amount" + ] + if self._currency_symbol is None and self._currency is None: + self._currency_symbol = coupon_details["price_text"]["data"][ + "pricing_result" + ]["price"]["currency_symbol"] + self._currency = coupon_details["price_text"]["data"]["pricing_result"][ + "price" + ]["currency"] + + if bool(current_price): + logger.debug( + f"Skipping course as it now costs {self._currency_symbol}{current_price}" ) - breadcrumbs = breadcrumbs.find_elements_by_class_name(breadcrumbs_text_path) - breadcrumbs = [bc.text for bc in breadcrumbs] # Get only the text + coupon_valid = False + if not bool( + coupon_details["price_text"]["data"]["pricing_result"]["list_price"][ + "amount" + ] + ): + logger.debug("Skipping course as it is always FREE") + coupon_valid = False - for category in self.settings.categories: - if category in breadcrumbs: - break - else: - logger.debug("Skipping course as it does not have a wanted category") - return UdemyStatus.UNWANTED_CATEGORY.value - - # Enroll Now 1 - buy_course_button_xpath = "//button[@data-purpose='buy-this-course-button']" - # We need to wait for this element to be clickable before checking if already purchased - WebDriverWait(self.driver, 10).until( - EC.element_to_be_clickable((By.XPATH, buy_course_button_xpath)) - ) + return coupon_valid + + def is_preferred_language(self, course_details: Dict) -> bool: + """ + Check if the course is in one of the languages preferred by the user + + :param dict course_details: Dictionary containing course details from Udemy + :return: boolean + """ + is_preferred_language = True + course_language = course_details["locale"]["simple_english_title"] + if course_language not in self.settings.languages: + logger.debug(f"Course language not wanted: {course_language}") + is_preferred_language = False - # Check if already enrolled. If add to cart is available we have not yet enrolled - add_to_cart_xpath = "//div[@data-purpose='add-to-cart']" - add_to_cart_elements = self.driver.find_elements_by_xpath(add_to_cart_xpath) - if not add_to_cart_elements or ( - add_to_cart_elements and not add_to_cart_elements[0].is_displayed() + return is_preferred_language + + def is_preferred_category(self, course_details: Dict) -> bool: + """ + Check if the course is in one of the categories preferred by the user + + :param dict course_details: Dictionary containing course details from Udemy + :return: boolean + """ + is_preferred_category = True + + if ( + course_details["primary_category"]["title"] not in self.settings.categories + and course_details["primary_subcategory"]["title"] + not in self.settings.categories ): - logger.debug(f"Already enrolled in {course_name}") - return UdemyStatus.ENROLLED.value + logger.debug("Skipping course as it does not have a wanted category") + is_preferred_category = False + return is_preferred_category - # Click to enroll in the course - element_present = EC.presence_of_element_located( - (By.XPATH, buy_course_button_xpath) - ) - WebDriverWait(self.driver, 10).until(element_present).click() - - enroll_button_xpath = "//*[@class='udemy pageloaded']/div[1]/div[2]/div/div/div/div[2]/form/div[2]/div/div[4]/button" - # Enroll Now 2 - element_present = EC.presence_of_element_located( - ( - By.XPATH, - enroll_button_xpath, - ) - ) - WebDriverWait(self.driver, 10).until(element_present) - - # Check if zipcode exists before doing this - if self.settings.zip_code: - # zipcode is only required in certain regions (e.g USA) - try: - element_present = EC.presence_of_element_located( - ( - By.ID, - "billingAddressSecondaryInput", - ) - ) - WebDriverWait(self.driver, 5).until(element_present).send_keys( - self.settings.zip_code - ) + def my_courses(self, page: int, page_size: int) -> Dict: + """ + Load the current logged in users courses - # After you put the zip code in, the page refreshes itself and disables the enroll button for a split - # second. - enroll_button_is_clickable = EC.element_to_be_clickable( - (By.XPATH, enroll_button_xpath) - ) - WebDriverWait(self.driver, 5).until(enroll_button_is_clickable) - except (TimeoutException, NoSuchElementException): - pass - - # Make sure the price has loaded - price_class_loading = "udi-circle-loader" - WebDriverWait(self.driver, 10).until_not( - EC.presence_of_element_located((By.CLASS_NAME, price_class_loading)) + :param int page: page number to load + :param int page_size: number of courses to load per page + :return: dict containing the current users courses + """ + response = self.session.get( + self.MY_COURSES + f"&page={page}&page_size={page_size}" ) + return response.json() - # Make sure the course is Free - price_xpath = "//span[@data-purpose='total-price']//span" - price_elements = self.driver.find_elements_by_xpath(price_xpath) - # We get elements here as one of there are 2 matches for this xpath - - for price_element in price_elements: - # We are only interested in the element which is displaying the price details - if price_element.is_displayed(): - _price = price_element.text - # Extract the numbers from the price text - # This logic should work for different locales and currencies - _numbers = "".join(filter(lambda x: x if x.isdigit() else None, _price)) - if _numbers.isdigit() and int(_numbers) > 0: - logger.debug( - f"Skipping course as it now costs {_price}: {course_name}" - ) - return UdemyStatus.EXPIRED.value - - # Check if state/province element exists - billing_state_element_id = "billingAddressSecondarySelect" - billing_state_elements = self.driver.find_elements_by_id( - billing_state_element_id - ) - if billing_state_elements: - # If we are here it means a state/province element exists and needs to be filled - # Open the dropdown menu - billing_state_elements[0].click() - - # Pick the first element in the state/province dropdown - first_state_xpath = ( - "//select[@id='billingAddressSecondarySelect']//option[2]" - ) - element_present = EC.presence_of_element_located( - (By.XPATH, first_state_xpath) - ) - WebDriverWait(self.driver, 10).until(element_present).click() + def coupon_details(self, course_id: int, coupon_code: str) -> Dict: + """ + Check that the coupon is valid for the current course - # Hit the final Enroll now button - enroll_button_is_clickable = EC.element_to_be_clickable( - (By.XPATH, enroll_button_xpath) - ) - WebDriverWait(self.driver, 10).until(enroll_button_is_clickable).click() + :param int course_id: Id of the course to check the coupon against + :param str coupon_code: The coupon_code to check against the course + :return: dictionary containing the course pricing details + """ + response = requests.get(self.CHECK_PRICE.format(course_id, coupon_code)) + return response.json() - # Wait for success page to load - success_element_class = "alert-success" - WebDriverWait(self.driver, 10).until( - EC.presence_of_element_located((By.CLASS_NAME, success_element_class)) - ) + def course_details(self, course_id: int) -> Dict: + """ + Retrieves details relating to the course passed in - logger.info(f"Successfully enrolled in: {course_name}") - return UdemyStatus.ENROLLED.value + :param int course_id: Id of the course to get the details of + :return: dictionary containing the course details + """ + response = requests.get(self.COURSE_DETAILS.format(course_id)) + return response.json() - def _check_if_robot(self) -> bool: + def enroll(self, course_link: str) -> str: """ - Simply checks if the captcha element is present on login if email/password elements are not + Enroll the current user in the course provided - :return: Bool + :param str course_link: Link to the course with valid coupon attached + :return: str representing the status of the enrolment """ - is_robot = True - try: - self.driver.find_element_by_id("px-captcha") - except NoSuchElementException: - is_robot = False - return is_robot + str_check = "?couponCode=" + if str_check in course_link: + url, coupon_code = course_link.split(str_check) + course_id = self._get_course_id(url) + + if self.is_enrolled(course_id): + logger.info(f"Already enrolled in {url}") + return UdemyStatus.ENROLLED.value + + if self.user_has_preferences: + course_details = self.course_details(course_id) + if self.settings.languages: + if not self.is_preferred_language(course_details): + return UdemyStatus.UNWANTED_LANGUAGE.value + if self.settings.categories: + if not self.is_preferred_category(course_details): + return UdemyStatus.UNWANTED_CATEGORY.value + + if not self.is_coupon_valid(course_id, coupon_code): + return UdemyStatus.EXPIRED.value + + return self._checkout(course_id, coupon_code, url) + else: + raise Exception(f"Malformed url passed in: {course_link}") + + def _get_course_id(self, url: str) -> int: + """ + Get the course id from the url provided + + :param str url: Udemy url to fetch the course from + :return: int representing the course id + """ + response = self.session.get(url) + soup = BeautifulSoup(response.content, "html.parser") + + return int(soup.find("body")["data-clp-course-id"]) + + def _checkout( + self, course_id: int, coupon_code: str, url: str, retry: bool = False + ) -> str: + """ + Checkout process for the course and coupon provided + + :param int course_id: The course id of the course to enroll in + :param str coupon_code: The coupon code to apply on checkout + :param str url: Udemy url used in logging + :param str retry: If this is a retried checkout raise exception if not successful + :return: + """ + payload = self._build_checkout_payload(course_id, coupon_code) + checkout_result = self.session.post(self.CHECKOUT_URL, json=payload) + if not checkout_result.ok: + if not retry: + seconds = int(re.search("\\d+", checkout_result.text).group()) + 1 + logger.info( + f"Script has been rate limited. Sleeping for {seconds} seconds" + ) + time.sleep(seconds) + self._checkout(course_id, coupon_code, url, retry=True) + else: + raise Exception( + f"Checkout failed: Code: {checkout_result.status_code} Text: {checkout_result.text}" + ) + else: + result = checkout_result.json() + if result["status"] == "succeeded": + logger.info(f"Successfully enrolled: {url}") + return UdemyStatus.ENROLLED.value + elif result["status"] == "failed": + logger.warning(f"Checkout failed: {url}") + logger.debug(f"Checkout payload: {payload}") + # TODO: Shouldn't happen. Need to monitor if it does + return UdemyStatus.EXPIRED.value + + def _build_checkout_payload(self, course_id: int, coupon_code: str) -> Dict: + """ + Build the payload for checkout + + :param int course_id: The course id to checkout + :param str coupon_code: The coupon code to use at checkout + :return: dict representing the checkout payload + """ + return { + "checkout_event": "Submit", + "shopping_cart": { + "items": [ + { + "discountInfo": {"code": coupon_code}, + "purchasePrice": { + "amount": 0, + "currency": self._currency, + "price_string": "Free", + "currency_symbol": self._currency_symbol, + }, + "buyableType": "course", + "buyableId": course_id, + "buyableContext": {}, + } + ], + "is_cart": True, + }, + "payment_info": {"payment_vendor": "Free", "payment_method": "free-method"}, + } + + def _cache_cookies(self, cookies: Dict) -> None: + """ + Caches cookies for future logins + + :param cookies: + :return: + """ + logger.info("Caching cookies for future use") + with open(self._cookie_file, "a+") as f: + f.write(json.dumps(cookies)) + + def _load_cookies(self) -> Dict: + """ + Loads existing cookie file + + :return: + """ + cookies = None + logger.info("Loading cookies from file") + if os.path.isfile(self._cookie_file): + with open(self._cookie_file) as f: + cookies = json.loads(f.read()) + return cookies + + def _delete_cookies(self) -> None: + """ + Remove existing cookie file + + :return: + """ + logger.info("Deleting cookies") + os.remove(self._cookie_file)