-
Notifications
You must be signed in to change notification settings - Fork 124
/
vimeo-download.py
executable file
·189 lines (156 loc) · 5.99 KB
/
vimeo-download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#!/usr/bin/env python
# Downloads the video and audio streams from the master json url and recombines
# it into a single file
from __future__ import print_function
import requests
import base64
from tqdm import tqdm
import sys
import subprocess as sp
import os
import distutils.core
import argparse
import urlparse
import datetime
import random
import string
import re
# Prefix for this run
TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
SALT = ''.join(random.choice(string.digits) for _ in range(3))
OUT_PREFIX = TIMESTAMP + '-' + SALT
# Create temp and output paths based on where the executable is located
BASE_DIR = os.path.dirname(os.path.realpath(__file__))
TEMP_DIR = os.path.join(BASE_DIR, "temp")
OUTPUT_DIR = os.path.join(BASE_DIR, "output")
for directory in (TEMP_DIR, OUTPUT_DIR):
if not os.path.exists(directory):
print("Creating {}...".format(directory))
os.makedirs(directory)
# create temp directory right before we need it
INSTANCE_TEMP = os.path.join(TEMP_DIR, OUT_PREFIX)
# Check operating system
OS_WIN = True if os.name == "nt" else False
# Find ffmpeg executable
if OS_WIN:
FFMPEG_BIN = 'ffmpeg.exe'
else:
try:
FFMPEG_BIN = distutils.spawn.find_executable("ffmpeg")
except AttributeError:
FFMPEG_BIN = 'ffmpeg'
def download_video(base_url, content):
"""Downloads the video portion of the content into the INSTANCE_TEMP folder"""
result = True
heights = [(i, d['height']) for (i, d) in enumerate(content)]
idx, _ = max(heights, key=lambda t: t[1])
video = content[idx]
video_base_url = urlparse.urljoin(base_url, video['base_url'])
print('video base url:', video_base_url)
# Create INSTANCE_TEMP if it doesn't exist
if not os.path.exists(INSTANCE_TEMP):
print("Creating {}...".format(INSTANCE_TEMP))
os.makedirs(INSTANCE_TEMP)
# Download the video portion of the stream
filename = os.path.join(INSTANCE_TEMP, "v.mp4")
print('saving to %s' % filename)
video_file = open(filename, 'wb')
init_segment = base64.b64decode(video['init_segment'])
video_file.write(init_segment)
for segment in tqdm(video['segments']):
segment_url = video_base_url + segment['url']
resp = requests.get(segment_url, stream=True)
if resp.status_code != 200:
print('not 200!')
print(resp)
print(segment_url)
result = False
break
for chunk in resp:
video_file.write(chunk)
video_file.flush()
video_file.close()
return result
def download_audio(base_url, content):
"""Downloads the video portion of the content into the INSTANCE_TEMP folder"""
result = True
audio = content[0]
audio_base_url = urlparse.urljoin(base_url, audio['base_url'])
print('audio base url:', audio_base_url)
# Create INSTANCE_TEMP if it doesn't exist
if not os.path.exists(INSTANCE_TEMP):
print("Creating {}...".format(INSTANCE_TEMP))
os.makedirs(INSTANCE_TEMP)
# Download
filename = os.path.join(INSTANCE_TEMP, "a.mp3")
print('saving to %s' % filename)
audio_file = open(filename, 'wb')
init_segment = base64.b64decode(audio['init_segment'])
audio_file.write(init_segment)
for segment in tqdm(audio['segments']):
segment_url = audio_base_url + segment['url']
resp = requests.get(segment_url, stream=True)
if resp.status_code != 200:
print('not 200!')
print(resp)
print(segment_url)
result = False
break
for chunk in resp:
audio_file.write(chunk)
audio_file.flush()
audio_file.close()
return result
def merge_audio_video(output_filename):
audio_filename = os.path.join(TEMP_DIR, OUT_PREFIX, "a.mp3")
video_filename = os.path.join(TEMP_DIR, OUT_PREFIX, "v.mp4")
command = [ FFMPEG_BIN,
'-i', audio_filename,
'-i', video_filename,
'-acodec', 'copy',
'-vcodec', 'copy',
output_filename ]
print("ffmpeg command is:", command)
if OS_WIN:
sp.call(command, shell=True)
else:
sp.call(command)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-u", "--url", action="store", help="master json url")
parser.add_argument("-o", "--output", action="store",
help="output video filename without extension (mp4)",
default=None)
parser.add_argument("-s", "--skip-download", action="store",
help="merges video and audio output of already downloaded streams",
metavar="TIMESTAMP")
parser.add_argument("--skip-merge", action="store_true",
help="downloads only and doesn't merge")
args = parser.parse_args()
# Set output filename depending on defaults
if args.output:
output_filename = os.path.join(OUTPUT_DIR, args.output + '.mp4')
else:
output_filename = os.path.join(OUTPUT_DIR, '{}_video.mp4'.format(OUT_PREFIX))
print("Output filename set to:", output_filename)
if not args.skip_download:
master_json_url = args.url
# get the content
resp = requests.get(master_json_url)
if resp.status_code != 200:
match = re.search('<TITLE>(.+)<\/TITLE>', resp.content, re.IGNORECASE)
title = match.group(1)
print('HTTP error (' + str(resp.status_code) + '): ' + title)
quit(0)
content = resp.json()
base_url = urlparse.urljoin(master_json_url, content['base_url'])
# Download the components of the stream
if not download_video(base_url, content['video']) or not download_audio(base_url, content['audio']):
quit()
# Overwrite timestamp if skipping download
if args.skip_download:
TIMESTAMP = args.skip_download
print("Overriding timestamp with:", TIMESTAMP)
# Combine audio and video
if not args.skip_merge:
merge_audio_video(output_filename)