From 1079b01b2479e5964bf90c1d9f81595859bfb075 Mon Sep 17 00:00:00 2001 From: Jeffrey Mvutu Date: Thu, 20 Dec 2018 14:24:14 +0100 Subject: [PATCH 1/6] Added support for reading credentials form netrc file on *nix systems Code for handling netrc has been adapted from the coursera-dl/coursera-dl repo. --- README.md | 11 ++++ edx_dl/edx_dl.py | 163 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 165 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 75ef31c1..7ff512f2 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,17 @@ Your downloaded videos will be placed in a new directory called `Downloaded`, inside your current directory, but you can also choose another destination with the `-o` argument. +On \*nix platforms, the use of a `~/.netrc` file is a good alternative to +specifying both your username (i.e., your email address) and password every +time on the command line. To use it, simply add a line like the one below to +a file named `.netrc` in your home directory with contents like: + + machine coursera-dl login password + +Then, simply invoke your command using the `-n`, like: + + edx-dl -n --list-courses + To see all available options and a brief description of what they do, simply execute: diff --git a/edx_dl/edx_dl.py b/edx_dl/edx_dl.py index 64486674..232a0955 100644 --- a/edx_dl/edx_dl.py +++ b/edx_dl/edx_dl.py @@ -14,6 +14,12 @@ import pickle import re import sys +import platform + +try: + import netrc +except ImportError: + netrc = None from functools import partial from multiprocessing.dummy import Pool as ThreadPool @@ -57,6 +63,14 @@ ) +class CredentialsError(BaseException): + """ + Class to be thrown if the credentials are not found. + """ + + pass + + OPENEDX_SITES = { 'edx': { 'url': 'https://courses.edx.org', @@ -210,6 +224,135 @@ def edx_get_subtitle(url, headers, return None + +def get_config_paths(config_name): # pragma: no test + """ + [Code from https://github.com/coursera-dl/coursera-dl/...] + Return a list of config files paths to try in order, given config file + name and possibly a user-specified path. + + For Windows platforms, there are several paths that can be tried to + retrieve the netrc file. There is, however, no "standard way" of doing + things. + + A brief recap of the situation (all file paths are written in Unix + convention): + + 1. By default, Windows does not define a $HOME path. However, some + people might define one manually, and many command-line tools imported + from Unix will search the $HOME environment variable first. This + includes MSYSGit tools (bash, ssh, ...) and Emacs. + + 2. Windows defines two 'user paths': $USERPROFILE, and the + concatenation of the two variables $HOMEDRIVE and $HOMEPATH. Both of + these paths point by default to the same location, e.g. + C:\\Users\\Username + + 3. $USERPROFILE cannot be changed, however $HOMEDRIVE and $HOMEPATH + can be changed. They are originally intended to be the equivalent of + the $HOME path, but there are many known issues with them + + 4. As for the name of the file itself, most of the tools ported from + Unix will use the standard '.dotfile' scheme, but some of these will + instead use "_dotfile". Of the latter, the two notable exceptions are + vim, which will first try '_vimrc' before '.vimrc' (but it will try + both) and git, which will require the user to name its netrc file + '_netrc'. + + Relevant links : + http://markmail.org/message/i33ldu4xl5aterrr + http://markmail.org/message/wbzs4gmtvkbewgxi + http://stackoverflow.com/questions/6031214/ + + Because the whole thing is a mess, I suggest we tried various sensible + defaults until we succeed or have depleted all possibilities. + """ + + if platform.system() != 'Windows': + return [None] + + # Now, we only treat the case of Windows + env_vars = [["HOME"], + ["HOMEDRIVE", "HOMEPATH"], + ["USERPROFILE"], + ["SYSTEMDRIVE"]] + + env_dirs = [] + for var_list in env_vars: + + var_values = [_getenv_or_empty(var) for var in var_list] + + directory = ''.join(var_values) + if not directory: + logging.debug('Environment var(s) %s not defined, skipping', + var_list) + else: + env_dirs.append(directory) + + additional_dirs = ["C:", ""] + + all_dirs = env_dirs + additional_dirs + + leading_chars = [".", "_"] + + res = [''.join([directory, os.sep, lc, config_name]) + for directory in all_dirs + for lc in leading_chars] + + return res + + +def authenticate_through_netrc(path=None): + """ + [Base code from https://github.com/coursera-dl/coursera-dl/...] + Return the tuple user / password given a path for the .netrc file. + + Raises CredentialsError if no valid netrc file is found. + """ + errors = [] + netrc_machine = 'edx-dl' + paths = [path] if path else get_config_paths("netrc") + for path in paths: + try: + logging.debug('Trying netrc file %s', path) + auths = netrc.netrc(path).authenticators(netrc_machine) + except (IOError, netrc.NetrcParseError) as e: + errors.append(e) + else: + if auths is None: + errors.append('Didn\'t find any credentials for ' + + netrc_machine) + else: + return auths[0], auths[2] + + error_messages = '\n'.join(str(e) for e in errors) + raise CredentialsError( + 'Did not find valid netrc file:\n' + error_messages + + '\nPlease run this command: chmod og-rw ~/.netrc') + + +def get_credentials(username=None, password=None, netrc=None): + """ + Return valid username, password tuple. + + Raises CredentialsError if username or password is missing. + """ + if netrc: + path = None if netrc is True else netrc + return authenticate_through_netrc(path) + + if username: + # Query password, if not alredy passed by command line or not found in any netrc file. + if not password: + password = getpass.getpass(stream=sys.stderr) + + if not username or not password: + logging.error("You must supply username and password to log-in, or provide them in a netrc file") + exit(ExitCode.MISSING_CREDENTIALS) + + return username, password + + def edx_login(url, headers, username, password): """ Log in user into the openedx website. @@ -246,7 +389,7 @@ def parse_args(): # optional parser.add_argument('-u', '--username', - required=True, + default=None, action='store', help='your edX username (email)') @@ -256,6 +399,13 @@ def parse_args(): help='your edX password, ' 'beware: it might be visible to other users on your system') + parser.add_argument( + '-n', + '--netrc', + action='store_true', + help='use netrc for reading passwords, uses default' + ' location if no path specified. Only for *nix systems.') + parser.add_argument('-f', '--format', dest='format', @@ -988,14 +1138,9 @@ def main(): change_openedx_site(args.platform) - # Query password, if not alredy passed by command line. - if not args.password: - args.password = getpass.getpass(stream=sys.stderr) - - if not args.username or not args.password: - logging.error("You must supply username and password to log-in") - exit(ExitCode.MISSING_CREDENTIALS) - + # Query password, if not alredy passed by command line or if no netrc file provided. + args.username, args.password = get_credentials(args.username, args.password, args.netrc) + # Prepare Headers headers = edx_get_headers() From 0074258a77382e0ead91456554403b15d5a4a230 Mon Sep 17 00:00:00 2001 From: Jeffrey Mvutu Date: Thu, 20 Dec 2018 14:28:06 +0100 Subject: [PATCH 2/6] README change --- README.md | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 7ff512f2..e2d735cf 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,18 @@ Your downloaded videos will be placed in a new directory called `Downloaded`, inside your current directory, but you can also choose another destination with the `-o` argument. +To see all available options and a brief description of what they do, simply +execute: + + edx-dl --help + +*Important Note:* To use sites other than , you **have** to specify the +site along with the `-x` option. For example, `-x stanford`, if the course +that you want to get is hosted on Stanford's site. + + +*Alternative to providing username and password everytime:* + On \*nix platforms, the use of a `~/.netrc` file is a good alternative to specifying both your username (i.e., your email address) and password every time on the command line. To use it, simply add a line like the one below to @@ -69,18 +81,10 @@ a file named `.netrc` in your home directory with contents like: machine coursera-dl login password -Then, simply invoke your command using the `-n`, like: +Then, simply invoke your command using the `-n` option like: edx-dl -n --list-courses -To see all available options and a brief description of what they do, simply -execute: - - edx-dl --help - -*Important Note:* To use sites other than , you **have** to specify the -site along with the `-x` option. For example, `-x stanford`, if the course -that you want to get is hosted on Stanford's site. # Docker container From f4e60578d2f66f719f9f0fe71046f180fde65abc Mon Sep 17 00:00:00 2001 From: Jeffrey Mvutu Date: Thu, 20 Dec 2018 15:46:44 +0100 Subject: [PATCH 3/6] Improving the netrc option: the user can specify a custom netrc file --- edx_dl/edx_dl.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/edx_dl/edx_dl.py b/edx_dl/edx_dl.py index 232a0955..c9b54a08 100644 --- a/edx_dl/edx_dl.py +++ b/edx_dl/edx_dl.py @@ -399,10 +399,17 @@ def parse_args(): help='your edX password, ' 'beware: it might be visible to other users on your system') + # the netrc option can be passed a path to the netrc fileĀ or + # it can be used with noting in order to use the default + # netrc file location parser.add_argument( '-n', '--netrc', - action='store_true', + dest='netrc', + nargs='?', + action='store', + const=True, + default=False, help='use netrc for reading passwords, uses default' ' location if no path specified. Only for *nix systems.') From e990b9333e1e7770cc5f163a01c4240176d47813 Mon Sep 17 00:00:00 2001 From: Jeffrey Mvutu Date: Thu, 20 Dec 2018 15:47:00 +0100 Subject: [PATCH 4/6] Proposing a test for the new `netrc` login option Since login with actual credentials is not allowed, maybe we can add a test that takes the netrc file and its credentials and logs in. The netrc file does not have to be --- test_edx_dl.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test_edx_dl.py b/test_edx_dl.py index 5e716fcb..b86d2be0 100644 --- a/test_edx_dl.py +++ b/test_edx_dl.py @@ -12,6 +12,17 @@ def test_failed_login(): assert not resp.get('success', False) +# def test_netrc_login(): +# """ +# Testing with the login with a local netrc file. +# This way, no login/password gets shown. +# """ +# username, password = get_credentials(netrc=True) +# resp = edx_dl.edx_login( +# edx_dl.LOGIN_API, edx_dl.edx_get_headers(), username, password) +# assert not resp.get('success', True) + + def test_remove_repeated_urls(): url = "test/html/multiple_units.html" site = 'https://courses.edx.org' From b1bf9463dffa01c1c034f8377f7e2aea584b0861 Mon Sep 17 00:00:00 2001 From: Jeffrey Mvutu Date: Thu, 20 Dec 2018 15:52:20 +0100 Subject: [PATCH 5/6] Conforming to PEP8 --- edx_dl/edx_dl.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/edx_dl/edx_dl.py b/edx_dl/edx_dl.py index c9b54a08..4080274b 100644 --- a/edx_dl/edx_dl.py +++ b/edx_dl/edx_dl.py @@ -224,7 +224,6 @@ def edx_get_subtitle(url, headers, return None - def get_config_paths(config_name): # pragma: no test """ [Code from https://github.com/coursera-dl/coursera-dl/...] @@ -342,12 +341,14 @@ def get_credentials(username=None, password=None, netrc=None): return authenticate_through_netrc(path) if username: - # Query password, if not alredy passed by command line or not found in any netrc file. + # Query password, if not alredy passed by command line + # or not found in any netrc file. if not password: password = getpass.getpass(stream=sys.stderr) if not username or not password: - logging.error("You must supply username and password to log-in, or provide them in a netrc file") + logging.error("You must supply username and password to log-in" + + ", or provide them in a netrc file") exit(ExitCode.MISSING_CREDENTIALS) return username, password From 8e58984ed14657cbdab1ed3720494bf847ae3168 Mon Sep 17 00:00:00 2001 From: Jeffrey Mvutu Date: Thu, 20 Dec 2018 16:07:15 +0100 Subject: [PATCH 6/6] Added tests for new NETRC feature --- test/auth/netrc | 1 + test/auth/not_netrc | 1 + test_edx_dl.py | 41 ++++++++++++++++++++++++++++++----------- 3 files changed, 32 insertions(+), 11 deletions(-) create mode 100644 test/auth/netrc create mode 100644 test/auth/not_netrc diff --git a/test/auth/netrc b/test/auth/netrc new file mode 100644 index 00000000..3032f98f --- /dev/null +++ b/test/auth/netrc @@ -0,0 +1 @@ +machine edx-dl login user@mail.com password secret \ No newline at end of file diff --git a/test/auth/not_netrc b/test/auth/not_netrc new file mode 100644 index 00000000..f0f877ce --- /dev/null +++ b/test/auth/not_netrc @@ -0,0 +1 @@ +fake \ No newline at end of file diff --git a/test_edx_dl.py b/test_edx_dl.py index b86d2be0..ac76e264 100644 --- a/test_edx_dl.py +++ b/test_edx_dl.py @@ -1,28 +1,47 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import os + import pytest from edx_dl import edx_dl, parsing from edx_dl.common import Unit, Video, DEFAULT_FILE_FORMATS +NETRC = \ + os.path.join(os.path.dirname(__file__), + "test", "auth", "netrc") + +NOT_NETRC = \ + os.path.join(os.path.dirname(__file__), + "test", "auth", "not_netrc") + + +def test_authenticate_through_netrc_with_given_path(): + username, password = edx_dl.authenticate_through_netrc(NETRC) + assert username == 'user@mail.com' + assert password == 'secret' + + +def test_authenticate_through_netrc_raises_exception(): + pytest.raises( + edx_dl.CredentialsError, + edx_dl.authenticate_through_netrc, + NOT_NETRC) + + +def test_get_credentials_with_netrc(): + username, password = edx_dl.get_credentials(netrc=NETRC) + assert username == 'user@mail.com' + assert password == 'secret' + + def test_failed_login(): resp = edx_dl.edx_login( edx_dl.LOGIN_API, edx_dl.edx_get_headers(), "guest", "guest") assert not resp.get('success', False) -# def test_netrc_login(): -# """ -# Testing with the login with a local netrc file. -# This way, no login/password gets shown. -# """ -# username, password = get_credentials(netrc=True) -# resp = edx_dl.edx_login( -# edx_dl.LOGIN_API, edx_dl.edx_get_headers(), username, password) -# assert not resp.get('success', True) - - def test_remove_repeated_urls(): url = "test/html/multiple_units.html" site = 'https://courses.edx.org'