Skip to content

Commit

Permalink
Add argparse, python3 support, refactor (eMBee#1)
Browse files Browse the repository at this point in the history
 * Add Linux and Mac support, previous version of Windows only
 * Add Python3 Support
 * Refactor for readability
 * Add temp and output folders
  • Loading branch information
lionelyoung authored and eMBee committed Apr 24, 2017
1 parent d0b7560 commit 8ea4f3d
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 86 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
output/
temp/
30 changes: 20 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,27 +1,37 @@
Downloads segmented audio+video from Vimeo and saves as .mp4

Usage: 'python vimeo_downloader.py http://...master.json?base64_init=1 optional_name'
This script is useful for cases where youtube-dl is unable to find the master url,
for example on pages that require login or cookies.

Install
=======

to use this script, the master url needs to be manually extracted from the page.
Install requirements with `pip install -r requirements.txt`

for a more convenient experience use youtube-dl ( http://rg3.github.io/youtube-dl/ )
Usage
=====

this script is useful for cases where youtube-dl is unable to find the master url,
for example on pages that require login or cookies.
To use this script, the master url needs to be manually extracted from the page:

`python vimeo_downloader.py --url "http://...master.json?base64_init=1" --output <optional_name>`

to get the master url:
To get the master url:

1. Open the network tab in the inspector
2. Find the url of a request to the master.json file
3. Run the script with the url as argument

Acknowledgements
=======

Code merges the following gists:

code merges the following gists:
- https://gist.github.com/alexeygrigorev/a1bc540925054b71e1a7268e50ad55cd
- https://gist.github.com/tayiorbeii/d78c7e4b338b031ce8090b30b395a46f
- https://gist.github.com/paschoaletto/7f65b7e36b76ccde9fe52b74b62ab9df

https://gist.github.com/alexeygrigorev/a1bc540925054b71e1a7268e50ad55cd
Alternatives
============

https://gist.github.com/tayiorbeii/d78c7e4b338b031ce8090b30b395a46f
For a more convenient experience use youtube-dl ( http://rg3.github.io/youtube-dl/ ) if youtube-dl is able to find the url

https://gist.github.com/paschoaletto/7f65b7e36b76ccde9fe52b74b62ab9df
13 changes: 13 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
asn1crypto==0.22.0
cffi==1.10.0
cryptography==1.8.1
enum34==1.1.6
idna==2.5
ipaddress==1.0.18
packaging==16.8
pycparser==2.17
pyOpenSSL==17.0.0
pyparsing==2.2.0
requests==2.13.0
six==1.10.0
tqdm==4.11.2
239 changes: 163 additions & 76 deletions vimeo-download.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,81 +1,168 @@
#!/usr/bin/env python
# Downloads the video and audio streams from the master json url and recombines
# it into a single file
from __future__ import print_function
import requests
import base64
from tqdm import tqdm
import sys
import subprocess as sp

FFMPEG_BIN = 'ffmpeg.exe'

master_json_url = sys.argv[1]
base_url = master_json_url[:master_json_url.rfind('/', 0, -26) - 5]

resp = requests.get(master_json_url)
content = resp.json()

heights = [(i, d['height']) for (i, d) in enumerate(content['video'])]
idx, _ = max(heights, key=lambda (_, h): h)
video = content['video'][idx]
video_base_url = base_url + 'video/' + video['base_url']
print 'base url:', video_base_url

filename = 'v.mp4'
video_filename = filename
print 'saving to %s' % filename

video_file = open(filename, 'wb')

init_segment = base64.b64decode(video['init_segment'])
video_file.write(init_segment)

for segment in tqdm(video['segments']):
segment_url = video_base_url + segment['url']
resp = requests.get(segment_url, stream=True)
if resp.status_code != 200:
print 'not 200!'
print resp
print segment_url
break
for chunk in resp:
video_file.write(chunk)

video_file.flush()
video_file.close()

audio = content['audio'][0]
audio_base_url = base_url + audio['base_url'][3:]
print 'base url:', audio_base_url

filename = 'a.mp3'
audio_filename = filename
print 'saving to %s' % filename

audio_file = open(filename, 'wb')

init_segment = base64.b64decode(audio['init_segment'])
audio_file.write(init_segment)

for segment in tqdm(audio['segments']):
segment_url = audio_base_url + segment['url']
resp = requests.get(segment_url, stream=True)
if resp.status_code != 200:
print 'not 200!'
print resp
print segment_url
break
for chunk in resp:
audio_file.write(chunk)

audio_file.flush()
audio_file.close()

filename = sys.argv[2] + '.mp4' if sys.argv[2] else 'video.mp4'

command = [ FFMPEG_BIN,
'-y', # (optional) overwrite output file if it exists
'-i', audio_filename,
'-i',video_filename,
'-acodec', 'copy',
'-vcodec', 'h264',
filename ]

sp.call(command, shell=True)
import os
import distutils
import argparse
import datetime


# Prefix for this run
TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# Create temp and output paths based on where the executable is located
BASE_DIR = os.path.dirname(os.path.realpath(__file__))
TEMP_DIR = os.path.join(BASE_DIR, "temp")
OUTPUT_DIR = os.path.join(BASE_DIR, "output")
for directory in (TEMP_DIR, OUTPUT_DIR):
if not os.path.exists(directory):
print("Creating {}...".format(directory))
os.makedirs(directory)

# create temp directory right before we need it
INSTANCE_TEMP = os.path.join(TEMP_DIR, TIMESTAMP)

# Check operating system
OS_WIN = True if os.name == "nt" else False

# Find ffmpeg executable
FFMPEG_BIN = 'ffmpeg.exe' if OS_WIN else distutils.spawn.find_executable("ffmpeg")

def download_video(base_url, content):
"""Downloads the video portion of teht content into the INSTANCE_TEMP folder"""
heights = [(i, d['height']) for (i, d) in enumerate(content['video'])]
idx, _ = max(heights, key=lambda (_, h): h)
video = content['video'][idx]
video_base_url = base_url + 'video/' + video['base_url']
print('video base url:', video_base_url)

# Create INSTANCE_TEMP if it doesn't exist
if not os.path.exists(INSTANCE_TEMP):
print("Creating {}...".format(INSTANCE_TEMP))
os.makedirs(INSTANCE_TEMP)

# Download the video portion of the stream
filename = os.path.join(INSTANCE_TEMP, "v.mp4")
video_filename = filename
print('saving to %s' % filename)

video_file = open(filename, 'wb')

init_segment = base64.b64decode(video['init_segment'])
video_file.write(init_segment)

for segment in tqdm(video['segments']):
segment_url = video_base_url + segment['url']
resp = requests.get(segment_url, stream=True)
if resp.status_code != 200:
print('not 200!')
print(resp)
print(segment_url)
break
for chunk in resp:
video_file.write(chunk)

video_file.flush()
video_file.close()



def download_audio(base_url, content):
"""Downloads the video portion of teht content into the INSTANCE_TEMP folder"""
audio = content['audio'][0]
audio_base_url = base_url + audio['base_url'][3:]
print('audio base url:', audio_base_url)


# Create INSTANCE_TEMP if it doesn't exist
if not os.path.exists(INSTANCE_TEMP):
print("Creating {}...".format(INSTANCE_TEMP))
os.makedirs(INSTANCE_TEMP)

# Download
filename = os.path.join(INSTANCE_TEMP, "a.mp3")
audio_filename = filename
print('saving to %s' % filename)

audio_file = open(filename, 'wb')

init_segment = base64.b64decode(audio['init_segment'])
audio_file.write(init_segment)

for segment in tqdm(audio['segments']):
segment_url = audio_base_url + segment['url']
resp = requests.get(segment_url, stream=True)
if resp.status_code != 200:
print('not 200!')
print(resp)
print(segment_url)
break
for chunk in resp:
audio_file.write(chunk)

audio_file.flush()
audio_file.close()

def merge_audio_video(input_timestamp, output_filename):
audio_filename = os.path.join(TEMP_DIR, TIMESTAMP, "a.mp3")
video_filename = os.path.join(TEMP_DIR, TIMESTAMP, "v.mp4")
command = [ FFMPEG_BIN,
'-i', audio_filename,
'-i', video_filename,
'-acodec', 'copy',
'-vcodec', 'h264',
output_filename ]
print("ffmpeg command is:", command)

if OS_WIN:
sp.call(command, shell=True)
else:
sp.call(command)

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-u", "--url", action="store", help="master json url")
parser.add_argument("-o", "--output", action="store",
help="output video filename without extension (mp4)",
default=None)
parser.add_argument("-s", "--skip-download", action="store",
help="merges video and audio output of already downloaded streams",
metavar="TIMESTAMP")
parser.add_argument("--skip-merge", action="store_true",
help="downloads only and doesn't merge")
args = parser.parse_args()

# Set output filename depending on defaults
if args.output:
output_filename = os.path.join(OUTPUT_DIR, args.output + '.mp4')
else:
output_filename = os.path.join(OUTPUT_DIR, '{}_video.mp4'.format(TIMESTAMP))
print("Output filename set to:", output_filename)

if not args.skip_download:
# parse the base_url
master_json_url = args.url
base_url = master_json_url[:master_json_url.rfind('/', 0, -26) - 5]

# get the content
resp = requests.get(master_json_url)
content = resp.json()

# Download the components of the stream
download_video(base_url, content)
download_audio(base_url, content)

# Overwrite timestamp if skipping download
if args.skip_download:
TIMESTAMP = args.skip_download
print("Overriding timestamp with:", TIMESTAMP)

# Combine audio and video
if not args.skip_merge:
merge_audio_video(TIMESTAMP, output_filename)

0 comments on commit 8ea4f3d

Please sign in to comment.