Add argparse, python3 support, refactor (eMBee#1)

* Add Linux and Mac support, previous version of Windows only * Add Python3 Support * Refactor for readability * Add temp and output folders
dmitry-lyutenko · Apr 24, 2017 · 8ea4f3d · 8ea4f3d
1 parent d0b7560
commit 8ea4f3d
Show file tree

Hide file tree

Showing 4 changed files with 198 additions and 86 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+output/
+temp/
diff --git a/README.md b/README.md
@@ -1,27 +1,37 @@
 Downloads segmented audio+video from Vimeo and saves as .mp4
 
-Usage: 'python vimeo_downloader.py http://...master.json?base64_init=1 optional_name'
+This script is useful for cases where youtube-dl is unable to find the master url, 
+for example on pages that require login or cookies.
 
+Install
+=======
 
-to use this script, the master url needs to be manually extracted from the page. 
+Install requirements with `pip install -r requirements.txt`
 
-for a more convenient experience use youtube-dl ( http://rg3.github.io/youtube-dl/ )
+Usage
+=====
 
-this script is useful for cases where youtube-dl is unable to find the master url, 
-for example on pages that require login or cookies.
+To use this script, the master url needs to be manually extracted from the page: 
 
+   `python vimeo_downloader.py --url "http://...master.json?base64_init=1" --output <optional_name>`
 
-to get the master url:
+To get the master url:
 
    1. Open the network tab in the inspector
    2. Find the url of a request to the master.json file
    3. Run the script with the url as argument
 
+Acknowledgements
+=======
+
+Code merges the following gists:
 
-code merges the following gists:
+- https://gist.github.com/alexeygrigorev/a1bc540925054b71e1a7268e50ad55cd
+- https://gist.github.com/tayiorbeii/d78c7e4b338b031ce8090b30b395a46f
+- https://gist.github.com/paschoaletto/7f65b7e36b76ccde9fe52b74b62ab9df
 
-https://gist.github.com/alexeygrigorev/a1bc540925054b71e1a7268e50ad55cd
+Alternatives
+============
 
-https://gist.github.com/tayiorbeii/d78c7e4b338b031ce8090b30b395a46f
+For a more convenient experience use youtube-dl ( http://rg3.github.io/youtube-dl/ ) if youtube-dl is able to find the url
 
-https://gist.github.com/paschoaletto/7f65b7e36b76ccde9fe52b74b62ab9df
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,13 @@
+asn1crypto==0.22.0
+cffi==1.10.0
+cryptography==1.8.1
+enum34==1.1.6
+idna==2.5
+ipaddress==1.0.18
+packaging==16.8
+pycparser==2.17
+pyOpenSSL==17.0.0
+pyparsing==2.2.0
+requests==2.13.0
+six==1.10.0
+tqdm==4.11.2
diff --git a/vimeo-download.py b/vimeo-download.py
@@ -1,81 +1,168 @@
+#!/usr/bin/env python
+# Downloads the video and audio streams from the master json url and recombines
+# it into a single file
+from __future__ import print_function
 import requests
 import base64
 from tqdm import tqdm
 import sys
 import subprocess as sp
-
-FFMPEG_BIN = 'ffmpeg.exe'
-
-master_json_url = sys.argv[1]
-base_url = master_json_url[:master_json_url.rfind('/', 0, -26) - 5]
-
-resp = requests.get(master_json_url)
-content = resp.json()
-
-heights = [(i, d['height']) for (i, d) in enumerate(content['video'])]
-idx, _ = max(heights, key=lambda (_, h): h)
-video = content['video'][idx]
-video_base_url = base_url + 'video/' + video['base_url']
-print 'base url:', video_base_url
-
-filename = 'v.mp4'
-video_filename = filename
-print 'saving to %s' % filename
-
-video_file = open(filename, 'wb')
-
-init_segment = base64.b64decode(video['init_segment'])
-video_file.write(init_segment)
-
-for segment in tqdm(video['segments']):
-    segment_url = video_base_url + segment['url']
-    resp = requests.get(segment_url, stream=True)
-    if resp.status_code != 200:
-        print 'not 200!'
-        print resp
-        print segment_url
-        break
-    for chunk in resp:
-        video_file.write(chunk)
-
-video_file.flush()
-video_file.close()
-
-audio = content['audio'][0]
-audio_base_url = base_url + audio['base_url'][3:]
-print 'base url:', audio_base_url
-
-filename = 'a.mp3'
-audio_filename = filename
-print 'saving to %s' % filename
-
-audio_file = open(filename, 'wb')
-
-init_segment = base64.b64decode(audio['init_segment'])
-audio_file.write(init_segment)
-
-for segment in tqdm(audio['segments']):
-    segment_url = audio_base_url + segment['url']
-    resp = requests.get(segment_url, stream=True)
-    if resp.status_code != 200:
-        print 'not 200!'
-        print resp
-        print segment_url
-        break
-    for chunk in resp:
-        audio_file.write(chunk)
-
-audio_file.flush()
-audio_file.close()
-
-filename = sys.argv[2] + '.mp4' if sys.argv[2] else 'video.mp4'
-
-command = [ FFMPEG_BIN,
-        '-y', # (optional) overwrite output file if it exists
-        '-i', audio_filename,
-        '-i',video_filename,
-        '-acodec', 'copy',
-        '-vcodec', 'h264',
-        filename ]
-
-sp.call(command, shell=True)
+import os
+import distutils
+import argparse
+import datetime
+
+
+# Prefix for this run
+TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+
+# Create temp and output paths based on where the executable is located
+BASE_DIR = os.path.dirname(os.path.realpath(__file__))
+TEMP_DIR = os.path.join(BASE_DIR, "temp")
+OUTPUT_DIR = os.path.join(BASE_DIR, "output")
+for directory in (TEMP_DIR, OUTPUT_DIR):
+    if not os.path.exists(directory):
+        print("Creating {}...".format(directory))
+        os.makedirs(directory)
+
+# create temp directory right before we need it
+INSTANCE_TEMP = os.path.join(TEMP_DIR, TIMESTAMP)
+
+# Check operating system
+OS_WIN = True if os.name == "nt" else False
+
+# Find ffmpeg executable
+FFMPEG_BIN = 'ffmpeg.exe' if OS_WIN else distutils.spawn.find_executable("ffmpeg")
+
+def download_video(base_url, content):
+    """Downloads the video portion of teht content into the INSTANCE_TEMP folder"""
+    heights = [(i, d['height']) for (i, d) in enumerate(content['video'])]
+    idx, _ = max(heights, key=lambda (_, h): h)
+    video = content['video'][idx]
+    video_base_url = base_url + 'video/' + video['base_url']
+    print('video base url:', video_base_url)
+
+    # Create INSTANCE_TEMP if it doesn't exist
+    if not os.path.exists(INSTANCE_TEMP):
+        print("Creating {}...".format(INSTANCE_TEMP))
+        os.makedirs(INSTANCE_TEMP)
+
+    # Download the video portion of the stream
+    filename = os.path.join(INSTANCE_TEMP, "v.mp4")
+    video_filename = filename
+    print('saving to %s' % filename)
+
+    video_file = open(filename, 'wb')
+
+    init_segment = base64.b64decode(video['init_segment'])
+    video_file.write(init_segment)
+
+    for segment in tqdm(video['segments']):
+        segment_url = video_base_url + segment['url']
+        resp = requests.get(segment_url, stream=True)
+        if resp.status_code != 200:
+            print('not 200!')
+            print(resp)
+            print(segment_url)
+            break
+        for chunk in resp:
+            video_file.write(chunk)
+
+    video_file.flush()
+    video_file.close()
+
+
+
+def download_audio(base_url, content):
+    """Downloads the video portion of teht content into the INSTANCE_TEMP folder"""
+    audio = content['audio'][0]
+    audio_base_url = base_url + audio['base_url'][3:]
+    print('audio base url:', audio_base_url)
+
+
+    # Create INSTANCE_TEMP if it doesn't exist
+    if not os.path.exists(INSTANCE_TEMP):
+        print("Creating {}...".format(INSTANCE_TEMP))
+        os.makedirs(INSTANCE_TEMP)
+
+    # Download
+    filename = os.path.join(INSTANCE_TEMP, "a.mp3")
+    audio_filename = filename
+    print('saving to %s' % filename)
+
+    audio_file = open(filename, 'wb')
+
+    init_segment = base64.b64decode(audio['init_segment'])
+    audio_file.write(init_segment)
+
+    for segment in tqdm(audio['segments']):
+        segment_url = audio_base_url + segment['url']
+        resp = requests.get(segment_url, stream=True)
+        if resp.status_code != 200:
+            print('not 200!')
+            print(resp)
+            print(segment_url)
+            break
+        for chunk in resp:
+            audio_file.write(chunk)
+
+    audio_file.flush()
+    audio_file.close()
+
+def merge_audio_video(input_timestamp, output_filename):
+    audio_filename = os.path.join(TEMP_DIR, TIMESTAMP, "a.mp3")
+    video_filename = os.path.join(TEMP_DIR, TIMESTAMP, "v.mp4")
+    command = [ FFMPEG_BIN,
+            '-i', audio_filename,
+            '-i', video_filename,
+            '-acodec', 'copy',
+            '-vcodec', 'h264',
+            output_filename ]
+    print("ffmpeg command is:", command)
+
+    if OS_WIN:
+        sp.call(command, shell=True)
+    else:
+        sp.call(command)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-u", "--url", action="store", help="master json url")
+    parser.add_argument("-o", "--output", action="store",
+                        help="output video filename without extension (mp4)",
+                        default=None)
+    parser.add_argument("-s", "--skip-download", action="store",
+                        help="merges video and audio output of already downloaded streams",
+                        metavar="TIMESTAMP")
+    parser.add_argument("--skip-merge", action="store_true",
+                        help="downloads only and doesn't merge")
+    args = parser.parse_args()
+
+    # Set output filename depending on defaults
+    if args.output:
+        output_filename = os.path.join(OUTPUT_DIR, args.output + '.mp4')
+    else:
+        output_filename = os.path.join(OUTPUT_DIR, '{}_video.mp4'.format(TIMESTAMP))
+    print("Output filename set to:", output_filename)
+
+    if not args.skip_download:
+        # parse the base_url
+        master_json_url = args.url
+        base_url = master_json_url[:master_json_url.rfind('/', 0, -26) - 5]
+
+        # get the content
+        resp = requests.get(master_json_url)
+        content = resp.json()
+
+        # Download the components of the stream
+        download_video(base_url, content)
+        download_audio(base_url, content)
+
+    # Overwrite timestamp if skipping download
+    if args.skip_download:
+        TIMESTAMP = args.skip_download
+        print("Overriding timestamp with:", TIMESTAMP)
+
+    # Combine audio and video
+    if not args.skip_merge:
+        merge_audio_video(TIMESTAMP, output_filename)