From 8ea4f3d36813ec1c46e03e4211485548705216f8 Mon Sep 17 00:00:00 2001 From: Lionel Young Date: Mon, 24 Apr 2017 14:52:53 +0800 Subject: [PATCH] Add argparse, python3 support, refactor (#1) * Add Linux and Mac support, previous version of Windows only * Add Python3 Support * Refactor for readability * Add temp and output folders --- .gitignore | 2 + README.md | 30 ++++-- requirements.txt | 13 +++ vimeo-download.py | 239 +++++++++++++++++++++++++++++++--------------- 4 files changed, 198 insertions(+), 86 deletions(-) create mode 100644 .gitignore create mode 100644 requirements.txt mode change 100644 => 100755 vimeo-download.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3569f8c --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +output/ +temp/ diff --git a/README.md b/README.md index 1faacb3..da02be8 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,37 @@ Downloads segmented audio+video from Vimeo and saves as .mp4 -Usage: 'python vimeo_downloader.py http://...master.json?base64_init=1 optional_name' +This script is useful for cases where youtube-dl is unable to find the master url, +for example on pages that require login or cookies. +Install +======= -to use this script, the master url needs to be manually extracted from the page. +Install requirements with `pip install -r requirements.txt` -for a more convenient experience use youtube-dl ( http://rg3.github.io/youtube-dl/ ) +Usage +===== -this script is useful for cases where youtube-dl is unable to find the master url, -for example on pages that require login or cookies. +To use this script, the master url needs to be manually extracted from the page: + `python vimeo_downloader.py --url "http://...master.json?base64_init=1" --output ` -to get the master url: +To get the master url: 1. Open the network tab in the inspector 2. Find the url of a request to the master.json file 3. Run the script with the url as argument +Acknowledgements +======= + +Code merges the following gists: -code merges the following gists: +- https://gist.github.com/alexeygrigorev/a1bc540925054b71e1a7268e50ad55cd +- https://gist.github.com/tayiorbeii/d78c7e4b338b031ce8090b30b395a46f +- https://gist.github.com/paschoaletto/7f65b7e36b76ccde9fe52b74b62ab9df -https://gist.github.com/alexeygrigorev/a1bc540925054b71e1a7268e50ad55cd +Alternatives +============ -https://gist.github.com/tayiorbeii/d78c7e4b338b031ce8090b30b395a46f +For a more convenient experience use youtube-dl ( http://rg3.github.io/youtube-dl/ ) if youtube-dl is able to find the url -https://gist.github.com/paschoaletto/7f65b7e36b76ccde9fe52b74b62ab9df diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5476138 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +asn1crypto==0.22.0 +cffi==1.10.0 +cryptography==1.8.1 +enum34==1.1.6 +idna==2.5 +ipaddress==1.0.18 +packaging==16.8 +pycparser==2.17 +pyOpenSSL==17.0.0 +pyparsing==2.2.0 +requests==2.13.0 +six==1.10.0 +tqdm==4.11.2 diff --git a/vimeo-download.py b/vimeo-download.py old mode 100644 new mode 100755 index 2c06aa7..ab37a05 --- a/vimeo-download.py +++ b/vimeo-download.py @@ -1,81 +1,168 @@ +#!/usr/bin/env python +# Downloads the video and audio streams from the master json url and recombines +# it into a single file +from __future__ import print_function import requests import base64 from tqdm import tqdm import sys import subprocess as sp - -FFMPEG_BIN = 'ffmpeg.exe' - -master_json_url = sys.argv[1] -base_url = master_json_url[:master_json_url.rfind('/', 0, -26) - 5] - -resp = requests.get(master_json_url) -content = resp.json() - -heights = [(i, d['height']) for (i, d) in enumerate(content['video'])] -idx, _ = max(heights, key=lambda (_, h): h) -video = content['video'][idx] -video_base_url = base_url + 'video/' + video['base_url'] -print 'base url:', video_base_url - -filename = 'v.mp4' -video_filename = filename -print 'saving to %s' % filename - -video_file = open(filename, 'wb') - -init_segment = base64.b64decode(video['init_segment']) -video_file.write(init_segment) - -for segment in tqdm(video['segments']): - segment_url = video_base_url + segment['url'] - resp = requests.get(segment_url, stream=True) - if resp.status_code != 200: - print 'not 200!' - print resp - print segment_url - break - for chunk in resp: - video_file.write(chunk) - -video_file.flush() -video_file.close() - -audio = content['audio'][0] -audio_base_url = base_url + audio['base_url'][3:] -print 'base url:', audio_base_url - -filename = 'a.mp3' -audio_filename = filename -print 'saving to %s' % filename - -audio_file = open(filename, 'wb') - -init_segment = base64.b64decode(audio['init_segment']) -audio_file.write(init_segment) - -for segment in tqdm(audio['segments']): - segment_url = audio_base_url + segment['url'] - resp = requests.get(segment_url, stream=True) - if resp.status_code != 200: - print 'not 200!' - print resp - print segment_url - break - for chunk in resp: - audio_file.write(chunk) - -audio_file.flush() -audio_file.close() - -filename = sys.argv[2] + '.mp4' if sys.argv[2] else 'video.mp4' - -command = [ FFMPEG_BIN, - '-y', # (optional) overwrite output file if it exists - '-i', audio_filename, - '-i',video_filename, - '-acodec', 'copy', - '-vcodec', 'h264', - filename ] - -sp.call(command, shell=True) \ No newline at end of file +import os +import distutils +import argparse +import datetime + + +# Prefix for this run +TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + +# Create temp and output paths based on where the executable is located +BASE_DIR = os.path.dirname(os.path.realpath(__file__)) +TEMP_DIR = os.path.join(BASE_DIR, "temp") +OUTPUT_DIR = os.path.join(BASE_DIR, "output") +for directory in (TEMP_DIR, OUTPUT_DIR): + if not os.path.exists(directory): + print("Creating {}...".format(directory)) + os.makedirs(directory) + +# create temp directory right before we need it +INSTANCE_TEMP = os.path.join(TEMP_DIR, TIMESTAMP) + +# Check operating system +OS_WIN = True if os.name == "nt" else False + +# Find ffmpeg executable +FFMPEG_BIN = 'ffmpeg.exe' if OS_WIN else distutils.spawn.find_executable("ffmpeg") + +def download_video(base_url, content): + """Downloads the video portion of teht content into the INSTANCE_TEMP folder""" + heights = [(i, d['height']) for (i, d) in enumerate(content['video'])] + idx, _ = max(heights, key=lambda (_, h): h) + video = content['video'][idx] + video_base_url = base_url + 'video/' + video['base_url'] + print('video base url:', video_base_url) + + # Create INSTANCE_TEMP if it doesn't exist + if not os.path.exists(INSTANCE_TEMP): + print("Creating {}...".format(INSTANCE_TEMP)) + os.makedirs(INSTANCE_TEMP) + + # Download the video portion of the stream + filename = os.path.join(INSTANCE_TEMP, "v.mp4") + video_filename = filename + print('saving to %s' % filename) + + video_file = open(filename, 'wb') + + init_segment = base64.b64decode(video['init_segment']) + video_file.write(init_segment) + + for segment in tqdm(video['segments']): + segment_url = video_base_url + segment['url'] + resp = requests.get(segment_url, stream=True) + if resp.status_code != 200: + print('not 200!') + print(resp) + print(segment_url) + break + for chunk in resp: + video_file.write(chunk) + + video_file.flush() + video_file.close() + + + +def download_audio(base_url, content): + """Downloads the video portion of teht content into the INSTANCE_TEMP folder""" + audio = content['audio'][0] + audio_base_url = base_url + audio['base_url'][3:] + print('audio base url:', audio_base_url) + + + # Create INSTANCE_TEMP if it doesn't exist + if not os.path.exists(INSTANCE_TEMP): + print("Creating {}...".format(INSTANCE_TEMP)) + os.makedirs(INSTANCE_TEMP) + + # Download + filename = os.path.join(INSTANCE_TEMP, "a.mp3") + audio_filename = filename + print('saving to %s' % filename) + + audio_file = open(filename, 'wb') + + init_segment = base64.b64decode(audio['init_segment']) + audio_file.write(init_segment) + + for segment in tqdm(audio['segments']): + segment_url = audio_base_url + segment['url'] + resp = requests.get(segment_url, stream=True) + if resp.status_code != 200: + print('not 200!') + print(resp) + print(segment_url) + break + for chunk in resp: + audio_file.write(chunk) + + audio_file.flush() + audio_file.close() + +def merge_audio_video(input_timestamp, output_filename): + audio_filename = os.path.join(TEMP_DIR, TIMESTAMP, "a.mp3") + video_filename = os.path.join(TEMP_DIR, TIMESTAMP, "v.mp4") + command = [ FFMPEG_BIN, + '-i', audio_filename, + '-i', video_filename, + '-acodec', 'copy', + '-vcodec', 'h264', + output_filename ] + print("ffmpeg command is:", command) + + if OS_WIN: + sp.call(command, shell=True) + else: + sp.call(command) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-u", "--url", action="store", help="master json url") + parser.add_argument("-o", "--output", action="store", + help="output video filename without extension (mp4)", + default=None) + parser.add_argument("-s", "--skip-download", action="store", + help="merges video and audio output of already downloaded streams", + metavar="TIMESTAMP") + parser.add_argument("--skip-merge", action="store_true", + help="downloads only and doesn't merge") + args = parser.parse_args() + + # Set output filename depending on defaults + if args.output: + output_filename = os.path.join(OUTPUT_DIR, args.output + '.mp4') + else: + output_filename = os.path.join(OUTPUT_DIR, '{}_video.mp4'.format(TIMESTAMP)) + print("Output filename set to:", output_filename) + + if not args.skip_download: + # parse the base_url + master_json_url = args.url + base_url = master_json_url[:master_json_url.rfind('/', 0, -26) - 5] + + # get the content + resp = requests.get(master_json_url) + content = resp.json() + + # Download the components of the stream + download_video(base_url, content) + download_audio(base_url, content) + + # Overwrite timestamp if skipping download + if args.skip_download: + TIMESTAMP = args.skip_download + print("Overriding timestamp with:", TIMESTAMP) + + # Combine audio and video + if not args.skip_merge: + merge_audio_video(TIMESTAMP, output_filename)