forked from ApexWeed/orpheusbetter-crawler
-
Notifications
You must be signed in to change notification settings - Fork 2
/
transcode.py
executable file
·456 lines (407 loc) · 22.1 KB
/
transcode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
#!/usr/bin/env python3
import errno
import multiprocessing
import os
import pipes
import re
import shlex
import shutil
import signal
import subprocess
import sys
import mutagen.flac
import tagging
encoders = {
'320': {'enc': 'lame', 'ext': '.mp3', 'opts': '-h -b 320 --ignore-tag-errors'},
'V0': {'enc': 'lame', 'ext': '.mp3', 'opts': '-V 0 --vbr-new --ignore-tag-errors'},
'V2': {'enc': 'lame', 'ext': '.mp3', 'opts': '-V 2 --vbr-new --ignore-tag-errors'},
'FLAC': {'enc': 'flac', 'ext': '.flac', 'opts': '--best'}
}
class TranscodeException(Exception):
pass
class TranscodeDownmixException(TranscodeException):
pass
class UnknownSampleRateException(TranscodeException):
pass
# In most Unix shells, pipelines only report the return code of the
# last process. We need to know if any process in the transcode
# pipeline fails, not just the last one.
#
# This function constructs a pipeline of processes from a chain of
# commands just like a shell does, but it returns the status code (and
# stderr) of every process in the pipeline, not just the last one. The
# results are returned as a list of (code, stderr) pairs, one pair per
# process.
def run_pipeline(cmds):
# The Python executable (and its children) ignore SIGPIPE. (See
# http://bugs.python.org/issue1652) Our subprocesses need to see
# it.
sigpipe_handler = signal.signal(signal.SIGPIPE, signal.SIG_DFL)
stdin = None
last_proc = None
procs = []
try:
for cmd in cmds:
proc = subprocess.Popen(shlex.split(cmd), stdin=stdin, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if last_proc:
# Ensure last_proc receives SIGPIPE if proc exits first
last_proc.stdout.close()
procs.append(proc)
stdin = proc.stdout
last_proc = proc
finally:
signal.signal(signal.SIGPIPE, sigpipe_handler)
last_stderr = last_proc.communicate()[1]
results = []
for (cmd, proc) in zip(cmds[:-1], procs[:-1]):
# wait() is OK here, despite use of PIPE above; these procs
# are finished.
proc.wait()
results.append((proc.returncode, proc.stderr.read()))
results.append((last_proc.returncode, last_stderr))
return results
def locate(root, match_function, ignore_dotfiles=True):
'''
Yields all filenames within the root directory for which match_function returns True.
'''
for path, dirs, files in os.walk(root):
for filename in (os.path.abspath(os.path.join(path, filename)) for filename in files if match_function(filename)):
if ignore_dotfiles and os.path.basename(filename).startswith('.'):
pass
else:
yield filename
def ext_matcher(*extensions):
'''
Returns a function which checks if a filename has one of the specified extensions.
'''
return lambda f: os.path.splitext(f)[-1].lower() in extensions
def is_24bit(flac_dir):
'''
Returns True if any FLAC within flac_dir is 24 bit.
'''
flacs = (mutagen.flac.FLAC(flac_file) for flac_file in locate(flac_dir, ext_matcher('.flac')))
return any(flac.info.bits_per_sample > 16 for flac in flacs)
def is_multichannel(flac_dir):
'''
Returns True if any FLAC within flac_dir is multichannel.
'''
flacs = (mutagen.flac.FLAC(flac_file) for flac_file in locate(flac_dir, ext_matcher('.flac')))
return any(flac.info.channels > 2 for flac in flacs)
def needs_resampling(flac_dir):
'''
Returns True if any FLAC within flac_dir needs resampling when
transcoded.
'''
return is_24bit(flac_dir)
def resample_rate(flac_dir):
'''
Returns the rate to which the release should be resampled.
'''
flacs = (mutagen.flac.FLAC(flac_file) for flac_file in locate(flac_dir, ext_matcher('.flac')))
original_rate = max(flac.info.sample_rate for flac in flacs)
if original_rate % 44100 == 0:
return 44100
elif original_rate % 48000 == 0:
return 48000
else:
return None
def transcode_commands(output_format, resample, needed_sample_rate, flac_file, transcode_file):
'''
Return a list of transcode steps (one command per list element),
which can be used to create a transcode pipeline for flac_file ->
transcode_file using the specified output_format, plus any
resampling, if needed.
'''
if resample:
flac_decoder = 'sox {FLAC} -G -b 16 -t wav - rate -v -L {SAMPLERATE} dither'
else:
flac_decoder = 'flac -dcs -- {FLAC}'
lame_encoder = 'lame -S {OPTS} - {FILE}'
flac_encoder = 'flac {OPTS} -o {FILE} -'
transcoding_steps = [flac_decoder]
if encoders[output_format]['enc'] == 'lame':
transcoding_steps.append(lame_encoder)
elif encoders[output_format]['enc'] == 'flac':
transcoding_steps.append(flac_encoder)
transcode_args = {
'FLAC' : pipes.quote(flac_file),
'FILE' : pipes.quote(transcode_file),
'OPTS' : encoders[output_format]['opts'],
'SAMPLERATE' : needed_sample_rate,
}
if output_format == 'FLAC' and resample:
commands = ['sox {FLAC} -G -b 16 {FILE} rate -v -L {SAMPLERATE} dither'.format(**transcode_args)]
else:
commands = map(lambda cmd: cmd.format(**transcode_args), transcoding_steps)
return commands
# Pool.map() can't pickle lambdas, so we need a helper function.
def pool_transcode(args):
return transcode(*args)
def transcode(flac_file, output_dir, output_format):
'''
Transcodes a FLAC file into another format.
'''
# gather metadata from the flac file
flac_info = mutagen.flac.FLAC(flac_file)
sample_rate = flac_info.info.sample_rate
bits_per_sample = flac_info.info.bits_per_sample
resample = sample_rate > 48000 or bits_per_sample > 16
# if resampling isn't needed then needed_sample_rate will not be used.
needed_sample_rate = None
if resample:
if sample_rate % 44100 == 0:
needed_sample_rate = '44100'
elif sample_rate % 48000 == 0:
needed_sample_rate = '48000'
else:
raise UnknownSampleRateException('FLAC file "{0}" has a sample rate {1}, which is not 88.2, 176.4, 96, or 192kHz but needs resampling, this is unsupported'.format(flac_file, sample_rate))
if flac_info.info.channels > 2:
raise TranscodeDownmixException('FLAC file "{0}" has more than 2 channels, unsupported'.format(flac_file))
# determine the new filename
transcode_basename = os.path.splitext(os.path.basename(flac_file))[0]
transcode_basename = re.sub(r'[\?<>\\*\|"]', '_', transcode_basename)
transcode_file = os.path.join(output_dir, transcode_basename)
transcode_file += encoders[output_format]['ext']
if not os.path.exists(os.path.dirname(transcode_file)):
try:
os.makedirs(os.path.dirname(transcode_file))
except OSError as e:
if e.errno == errno.EEXIST:
# Harmless race condition -- another transcode process
# beat us here.
pass
else:
raise e
commands = list(transcode_commands(output_format, resample, needed_sample_rate, flac_file, transcode_file))
results = run_pipeline(commands)
# Check for problems. Because it's a pipeline, the earliest one is
# usually the source. The exception is -SIGPIPE, which is caused
# by "backpressure" due to a later command failing: ignore those
# unless no other problem is found.
last_sigpipe = None
for (cmd, (code, stderr)) in zip(commands, results):
if code:
if code == -signal.SIGPIPE:
last_sigpipe = (cmd, (code, stderr))
else:
raise TranscodeException('Transcode of file "{0}" failed: {1}'.format(flac_file, stderr))
if last_sigpipe:
# XXX: this should probably never happen....
raise TranscodeException('Transcode of file "{0}" failed: SIGPIPE'.format(flac_file))
tagging.copy_tags(flac_file, transcode_file)
(ok, msg) = tagging.check_tags(transcode_file)
if not ok:
raise TranscodeException('Tag check failed on transcoded file: {0}'.format(msg))
return transcode_file
def get_transcode_dir(flac_dir, output_dir, output_format, resample):
full_flac_dir = flac_dir
transcode_dir = os.path.basename(flac_dir)
flac_dir = transcode_dir
# This is what happens when you spend your time transcoding 24 bit to 16 for
# perfect FLACs.
if 'HD FLAC' in flac_dir.upper():
transcode_dir = re.sub(re.compile('HD FLAC', re.I), output_format, transcode_dir)
elif 'FLAC HD' in flac_dir.upper():
transcode_dir = re.sub(re.compile('FLAC HD', re.I), output_format, transcode_dir)
elif 'FLAC 24-BIT' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('FLAC 24-BIT', re.I), output_format, transcode_dir)
elif 'FLAC-24BIT' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('FLAC-24BIT', re.I), output_format, transcode_dir)
elif 'FLAC-24' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('FLAC-24', re.I), output_format, transcode_dir)
elif 'FLAC 24BIT' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('FLAC 24BIT', re.I), output_format, transcode_dir)
elif 'FLAC 24 BIT' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('FLAC 24 BIT', re.I), output_format, transcode_dir)
elif 'FLAC, 24BIT' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('FLAC, 24BIT', re.I), output_format, transcode_dir)
elif 'FLAC, 24 BIT' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('FLAC, 24 BIT', re.I), output_format, transcode_dir)
elif 'FLAC, 24-BIT' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('FLAC, 24-BIT', re.I), output_format, transcode_dir)
elif 'FLAC 24' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('FLAC 24', re.I), output_format, transcode_dir)
elif 'FLAC24' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('FLAC24', re.I), output_format, transcode_dir)
elif 'FLAC96' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('FLAC96', re.I), output_format, transcode_dir)
elif '24-BIT FLAC' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('24-BIT FLAC', re.I), output_format, transcode_dir)
elif '24-BIT LOSSLESS FLAC' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('24-BIT LOSSLESS FLAC', re.I), output_format, transcode_dir)
elif '24BIT FLAC' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('24BIT FLAC', re.I), output_format, transcode_dir)
elif '24 BIT FLAC' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('24 BIT FLAC', re.I), output_format, transcode_dir)
elif '24FLAC' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('24FLAC', re.I), output_format, transcode_dir)
elif '24 FLAC' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('24 FLAC', re.I), output_format, transcode_dir)
elif 'FLAC' in flac_dir.upper():
transcode_dir = re.sub(re.compile('FLAC', re.I), output_format, transcode_dir)
elif '24 BITS' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('24 BITS', re.I), output_format, transcode_dir)
elif '24-BITS' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('24-BITS', re.I), output_format, transcode_dir)
elif '24BITS' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('24BITS', re.I), output_format, transcode_dir)
elif '24BIT' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('24BIT', re.I), output_format, transcode_dir)
elif '24 BIT' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('24 BIT', re.I), output_format, transcode_dir)
elif '24-BIT' in flac_dir.upper() and ((flac_dir.upper().count('24') >= 2) or (not any(s in flac_dir for s in ('44', '88', '176', '48', '96', '192')))):
transcode_dir = re.sub(re.compile('24-BIT', re.I), output_format, transcode_dir)
else:
transcode_dir = transcode_dir + " (" + output_format + ")"
if output_format != 'FLAC':
transcode_dir = re.sub(re.compile('FLAC', re.I), '', transcode_dir)
if resample:
rate = resample_rate(full_flac_dir)
if rate == 44100:
if '24' in flac_dir and '176.4' in flac_dir:
transcode_dir = transcode_dir.replace('24', '16')
transcode_dir = transcode_dir.replace('176.4', '44')
elif '24' in flac_dir and '176 4' in flac_dir:
transcode_dir = transcode_dir.replace('24', '16')
transcode_dir = transcode_dir.replace('176 4', '44')
elif '24' in flac_dir and '176' in flac_dir:
transcode_dir = transcode_dir.replace('24', '16')
transcode_dir = transcode_dir.replace('176', '44')
elif '24' in flac_dir and '88.2' in flac_dir:
transcode_dir = transcode_dir.replace('24', '16')
transcode_dir = transcode_dir.replace('88.2', '44')
elif '24' in flac_dir and '88 2' in flac_dir:
transcode_dir = transcode_dir.replace('24', '16')
transcode_dir = transcode_dir.replace('88 2', '44')
elif '24' in flac_dir and '88' in flac_dir:
transcode_dir = transcode_dir.replace('24', '16')
transcode_dir = transcode_dir.replace('88', '44')
elif '24' in flac_dir and '44.1' in flac_dir:
transcode_dir = transcode_dir.replace('24', '16')
transcode_dir = transcode_dir.replace('44.1', '44')
elif '24' in flac_dir and '44 1' in flac_dir:
transcode_dir = transcode_dir.replace('24', '16')
transcode_dir = transcode_dir.replace('44 1', '44')
elif '24' in flac_dir and '44' in flac_dir:
transcode_dir = transcode_dir.replace('24', '16')
else:
transcode_dir += ' [16-44]'
elif rate == 48000:
if '24' in flac_dir and '192' in flac_dir:
transcode_dir = transcode_dir.replace('24', '16')
transcode_dir = transcode_dir.replace('192', '48')
elif '24' in flac_dir and '96' in flac_dir:
# XXX: theoretically, this could replace part of the album title too.
# e.g. "24 days in 96 castles - [24-96]" would become "16 days in 44 castles - [16-44]"
transcode_dir = transcode_dir.replace('24', '16')
transcode_dir = transcode_dir.replace('96', '48')
elif '24' in flac_dir and '48' in flac_dir:
transcode_dir = transcode_dir.replace('24', '16')
else:
transcode_dir += " [16-48]"
if re.search(r'\b2016\b', transcode_dir) and re.search(r'\b2024\b', flac_dir):
transcode_dir = re.sub(r'\b2016\b', '2024', transcode_dir)
return os.path.join(output_dir, transcode_dir)
def transcode_release(flac_dir, output_dir, output_format, max_threads=None):
'''
Transcode a FLAC release into another format.
'''
flac_dir = os.path.abspath(flac_dir)
output_dir = os.path.abspath(output_dir)
flac_files = locate(flac_dir, ext_matcher('.flac'))
# check if we need to resample
resample = needs_resampling(flac_dir)
# check if we need to encode
if output_format == 'FLAC' and not resample:
# XXX: if output_dir is not the same as flac_dir, this may not
# do what the user expects.
if output_dir != os.path.dirname(flac_dir):
print("Warning: no encode necessary, so files won't be placed in", output_dir)
return flac_dir
# make a new directory for the transcoded files
#
# NB: The cleanup code that follows this block assumes that
# transcode_dir is a new directory created exclusively for this
# transcode. Do not change this assumption without considering the
# consequences!
transcode_dir = get_transcode_dir(flac_dir, output_dir, output_format, resample)
print(transcode_dir)
if not os.path.exists(transcode_dir):
os.makedirs(transcode_dir)
else:
return transcode_dir
#raise TranscodeException('transcode output directory "%s" already exists' % transcode_dir)
# To ensure that a terminated pool subprocess terminates its
# children, we make each pool subprocess a process group leader,
# and handle SIGTERM by killing the process group. This will
# ensure there are no lingering processes when a transcode fails
# or is interrupted.
def pool_initializer():
os.setsid()
def sigterm_handler(signum, frame):
# We're about to SIGTERM the group, including us; ignore
# it so we can finish this handler.
signal.signal(signal.SIGTERM, signal.SIG_IGN)
pgid = os.getpgid(0)
os.killpg(pgid, signal.SIGTERM)
sys.exit(-signal.SIGTERM)
signal.signal(signal.SIGTERM, sigterm_handler)
try:
# create transcoding threads
#
# Use Pool.map() rather than Pool.apply_async() as it will raise
# exceptions synchronously. (Don't want to waste any more time
# when a transcode breaks.)
#
# XXX: actually, use Pool.map_async() and then get() the result
# with a large timeout, as a workaround for a KeyboardInterrupt in
# Pool.join(). c.f.,
# http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool?rq=1
pool = multiprocessing.Pool(max_threads, initializer=pool_initializer)
try:
result = pool.map_async(pool_transcode, [(filename, os.path.dirname(filename).replace(flac_dir, transcode_dir), output_format) for filename in flac_files])
result.get(60 * 60 * 12)
pool.close()
except:
pool.terminate()
raise
finally:
pool.join()
# copy other files
allowed_extensions = ['.cue', '.gif', '.jpeg', '.jpg', '.log', '.md5', '.nfo', '.pdf', '.png', '.sfv', '.txt']
allowed_files = locate(flac_dir, ext_matcher(*allowed_extensions))
for filename in allowed_files:
new_dir = os.path.dirname(filename).replace(flac_dir, transcode_dir)
if not os.path.exists(new_dir):
os.makedirs(new_dir)
shutil.copy(filename, new_dir)
return transcode_dir
except:
# Cleanup.
#
# ASSERT: transcode_dir was created by this function and does
# not contain anything other than the transcoded files!
shutil.rmtree(transcode_dir)
raise
def make_torrent(input_dir, output_dir, tracker, passkey, source):
torrent = os.path.join(output_dir, os.path.basename(input_dir)) + ".torrent"
if not os.path.exists(os.path.dirname(torrent)):
os.path.makedirs(os.path.dirname(torrent))
tracker_url = '{tracker}{passkey}/announce'.format(
tracker=tracker, passkey=passkey)
if source == None:
command = ["mktorrent", "-p", "-a", tracker_url, "-o", torrent, input_dir]
else:
command = ["mktorrent", "-p", "-s", source, "-a", tracker_url, "-o", torrent, input_dir]
subprocess.check_output(command, stderr=subprocess.STDOUT)
return torrent
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('input_dir')
parser.add_argument('output_dir')
parser.add_argument('output_format', choices=encoders.keys())
parser.add_argument('-j', '--threads', default=multiprocessing.cpu_count(), type=int)
args = parser.parse_args()
transcode_release(os.path.expanduser(args.input_dir), os.path.expanduser(args.output_dir), args.output_format, args.threads)
if __name__ == "__main__": main()