pa_interactor.py

#!/usr/bin/env python3
import re
import os
import sys
import requests
import argparse
import youtube_dl
from bs4 import BeautifulSoup

baseurl = "https://www.pentesteracademy.com"

def namecleaner(setname):
    while setname.find(":") != -1:
        setname = setname[:setname.find(":")] + setname[setname.find(":") + 1:]
    while setname.find("?") != -1:
        setname = setname[:setname.find("?")] + setname[setname.find("?") + 1:]
    while setname.find(".") != -1 :
        setname = setname[:setname.find(".")] + setname[setname.find(".") + 1:]
    while setname.find("/") != -1:
        setname = setname[:setname.find("/")] + "-" + setname[setname.find("/") + 1:]
    return(setname)

def courselister(get_old):
    courses = requests.get(baseurl + "/topics").text
    parsedcourses = BeautifulSoup(courses, 'html.parser')
    courselist = parsedcourses.find_all("h3")
    totalcourses = 0
    for page in courselist:
        try:
            getcpage = requests.get(baseurl + page.a["href"]).text
        except TypeError:
            continue
        listvids = re.findall(r'(\/getstats\?videoid=[0-9]*)', getcpage)
        if get_old:
            checkvid = requests.get(baseurl + "/video?id" + re.findall(r'(=[0-9]*)', listvids[0])[0]).text
            pcheckvid = BeautifulSoup(checkvid, 'html.parser')
            if pcheckvid.embed:
                try:
                    yturl = "https://www.youtube.com/watch?v=" + pcheckvid.embed["src"][pcheckvid.embed["src"].find("/v/") + 3: pcheckvid.embed["src"].find("?")]          
                    ydl_opts = {"no_warnings": True,"quiet":True}
                    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                        meta = ydl.extract_info(yturl, download=False)
                    uploaddate = meta['upload_date'][0:4] + " - " + meta['upload_date'][4:6] + " - " + meta['upload_date'][6:]
                    print ('{}: {} videos, started on {}'.format(page.a.get_text(), len(listvids), uploaddate))
                except youtube_dl.utils.DownloadError:
                    print ("Youtube video seems down, can\'t get upload time")
                    print ('{}: {} videos'.format(page.a.get_text(), len(listvids)))
            else:
                print ('{}: {} videos'.format(page.a.get_text(), len(listvids)))
        else:
            print ('{}: {} videos'.format(page.a.get_text(), len(listvids)))
        totalcourses += len(listvids)
    print ('There are {} courses with a total of {} videos in Pentester Academy at the moment.'.format(len(courselist), totalcourses))

def possilester(gotcookie):
    currcookie = {"SACSID" : gotcookie}
    acctry = requests.get(baseurl + "/members?options=accountlogs", cookies=currcookie)
    acclogs = acctry.text
    parcclogs = BeautifulSoup(acclogs, 'html.parser')
    allorep = parcclogs.find_all("p")[0].b.get_text()
    usedrep = parcclogs.find_all("p")[2].b.get_text()
    leftrep = int(allorep) - int(usedrep)
    print ('You can download {} videos \nHere is a list of the courses that you can download\n'.format(str(leftrep)))
    courses = requests.get(baseurl + "/topics").text
    parsedcourses = BeautifulSoup(courses, 'html.parser')
    courselist = parsedcourses.find_all("h3")
    for page in courselist:
        courseurl = baseurl + page.a["href"]
        getcpage = requests.get(courseurl).text
        listvids = re.findall(r'(\/getstats\?videoid=[0-9]*)', getcpage)
        if len(listvids) <= leftrep:
            print ('{}: {} videos\n{}'.format(page.a.get_text(), len(listvids), courseurl))

def coursedownl(gotcookie, clink, sflink, filink, nmkdir, convert):
    if clink == "https://www.pentesteracademy.com/course?id=10":
        msfconf = input("Looks like you are trying to download Metasploit course, it is available for free on https://www.pentesteracademy.com/course?id=10 (you need to be logged in), are you sure you want to continue? y/n :")
        if msfconf == "n" or msfconf == "N":
            sys.exit(0)
    navig = requests.session()
    newcookie = requests.cookies.create_cookie(domain='www.pentesteracademy.com',name='SACSID',value=gotcookie)
    navig.cookies.set_cookie(newcookie)
    acctry = navig.get(baseurl + "/members?options=accountlogs")
    acclogs = acctry.text
    parcclogs = BeautifulSoup(acclogs, 'html.parser')
    allorep = int(parcclogs.find_all("p")[1].find_all("b")[1].get_text())
    usedrep = int(parcclogs.find_all("p")[2].b.get_text())
    leftrep = allorep - usedrep
    coursesource = requests.get(clink).text
    pcoursesource = BeautifulSoup(coursesource, 'html.parser')
    if nmkdir == False:
        dirname = pcoursesource.title.get_text()
        cleanname = namecleaner(dirname) 
        os.mkdir(cleanname)
        os.chdir(cleanname)
    listvids = re.findall(r'(\/getstats\?videoid=[0-9]*)', coursesource)
    if sflink != None:
        vid = "/getstats?videoid" + re.findall(r'(=[0-9]*)', sflink)[0]
        try:
            vidnum = listvids.index(vid) + 1
            listvids = listvids[listvids.index(vid):]
        except ValueError:
            sys.exit("The video inserted wasn't found on the course, exiting")
    else:
        vidnum = 1
    if filink != None:
        vid = "/getstats?videoid" + re.findall(r'(=[0-9]*)', filink)[0]
        try:
            listvids = listvids[:listvids.index(vid) + 1]
        except ValueError:
            sys.exit("The video inserted wasn't found on the course, exiting")
    ques = "y"
    if len(listvids) > leftrep:
        ques = input("Looks like you don\'t have enough plays remaining to download the whole course, do you want to continue y/n?: ")
        if ques != "y" and ques != "Y" and ques != "n" and ques != "N":
            sys.exit("Invalid answer")
    if ques == "y" or ques == "Y":
        leftvids = len(listvids)
        for video in listvids:
            vidurl = baseurl + "/video?id" + re.findall(r'(=[0-9]*)', video)[0]
            checkvid = navig.get(vidurl).text
            pcheckvid = BeautifulSoup(checkvid, 'html.parser')
            cname = pcheckvid.h2.get_text().strip()
            cname = namecleaner(cname)
            if len(str(len(listvids))) < len(listvids):
                cnum = "0"*(len(str(len(listvids))) - (len(str(vidnum)))) + str(vidnum)
            else:
                cnum = str(vidnum)
            if cname == "Account Logs":
                sys.exit('You don\'t have plays left, execute \"{} -d {} -sf {} -dm\" inside the corresponding direcotory the next month'.format(sys.argv[0], clink, vidurl))
            if pcheckvid.embed:
                yturl = "https://www.youtube.com/watch?v=" + pcheckvid.embed["src"][pcheckvid.embed["src"].find("/v/") + 3: pcheckvid.embed["src"].find("?")]
                print (yturl)
                try:
                    if convert:
                        ydl_opts = {"outtmpl" : cnum + " - " + cname, 'postprocessors':[{'key':'FFmpegVideoConvertor','preferedformat': 'mp4'}]}
                    else:
                        ydl_opts = {"outtmpl" : cnum + " - " + cname}
                    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                        ydl.download([yturl])
                except youtube_dl.utils.DownloadError:
                    print ("Youtube video seems down, downloading from Pentester Academy")
                    paurl = "https://www.pentesteracademy.com/accounting?id" + re.findall(r'(=[0-9]*)', video)[0]
                    getvideo = navig.get(paurl, allow_redirects=True)
                    print (getvideo.url)
                    savevid = open(cnum + " - " + cname + ".mp4",'wb').write(getvideo.content)
            else:
                print ('Getting {}'.format(cname))
                paurl = "https://www.pentesteracademy.com/accounting?id" + re.findall(r'(=[0-9]*)', video)[0]
                getvideo = navig.get(paurl, allow_redirects=True)
                print (getvideo.url)
                savevid = open(cnum + " - " + cname + ".mp4",'wb').write(getvideo.content)
                print ('{} gotten'.format(cname))
                usedrep += 1
            vidnum += 1
            leftvids -= 1
            print ("{} more videos to be downloaded".format(leftvids))
        print ("All videos where downloaded")

def main():
	
    parser = argparse.ArgumentParser(description="Script for downloading courses from Pentester Academy and getting some other information related to it")
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument("-l", "--list", help="Check current courses and amount of videos in Pentester Academy", action="store_true")
    parser.add_argument("-sd", "--start_date", help="Try to get the date when the course started, has to be used with -l option", action="store_true")
    group.add_argument("-c", "--check", help="Check how many courses you can download and list the courses that you can get", action="store_true")
    group.add_argument("-d", "--download", metavar="COURSE_URL", help="Link to the course that you want to download", action="store")
    parser.add_argument("-sf", "--start_from", metavar="VIDEO_URL",help="Link to the video from where you want to start downloading the course, has to be used with -d option", action="store")
    parser.add_argument("-fi", "--finish_in", metavar="VIDEO_URL", help="Link to the video where you want to stop downloading the course, has to be used with -d options", action="store")
    parser.add_argument("-dm", help="Don\'t make a directory, the videos will be downloaded in the current directory", action="store_true")
    parser.add_argument("-cm", "--convert", help="Videos downloaded from Youtube will be converted to mp4, note that using this options will take more time due the convertion proccess", action="store_true")
    args = parser.parse_args()
    if args.list:
        courselister(args.start_date)
    else:
        readcookie = input('Please enter a valid session cookie value: ')
        if args.check:
            possilester(readcookie)
        elif args.download:
            coursedownl(readcookie, args.download, args.start_from, args.finish_in,args.dm,args.convert)

if __name__ == '__main__':
    main()