Skip to content

Commit

Permalink
Tweaks and bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
SeaHOH committed May 13, 2022
1 parent e5eb915 commit f0966a2
Show file tree
Hide file tree
Showing 11 changed files with 77 additions and 68 deletions.
9 changes: 5 additions & 4 deletions cykdl/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,11 +336,12 @@ def main(argv=None):
video = input('YKDL> ').strip()
except KeyboardInterrupt:
sys.exit()
if not video:
continue
if video == 'exit':
sys.exit()
try:
if video:
if video == 'exit':
sys.exit()
handle_video(video)
handle_video(video)
except KeyboardInterrupt:
logger.warning('\nInterrupted by Ctrl-C, press Ctrl-C again to exit YKDL.')
except Exception as e:
Expand Down
4 changes: 2 additions & 2 deletions ykdl/extractors/douban/music.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def prepare(self):
return info

def list_only(self):
return 'site.douban' in self.url and not match(self.url, 's=(\d+)') or \
match(self.url, 'sid=\d+,(\d)')
return 'site.douban' in self.url and not match(self.url, 's=\d+') or \
match(self.url, 'sid=\d+,\d')

def prepare_list(self):

Expand Down
31 changes: 16 additions & 15 deletions ykdl/extractors/generalsimple.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# TODO: subtitles support
# REF: https://developer.mozilla.org/zh-CN/docs/Web/API/WebVTT_API

pattern1 = r'''(?ix)
pattern_ext = r'''(?ix)
["'](
(?:https?:|\\?/)[^"'#]+?\.
(
Expand All @@ -22,13 +22,15 @@
/?(?:\?.+?)?
)["'#]
'''
pattern2 = r'''(?ix)
<(?:video|audio|source)[^>]+?src=["'](
(?:https?:|\\?/)[^"']+
)["']
[^>]+?(?:type=["'](
(?:video|audio)/[^"']+
)["'])?
pattern_src = r'''(?ix)
<(?:video|audio|source)[^>]+?
src=["']((?:https?:|\\?/)[^"']+)["']
[^>]+?
(?:
type=["']((?:video|audio|application)/[^"']+)["']
|
[^>](?!type)*>
)
'''

class GeneralSimple(Extractor):
Expand All @@ -44,11 +46,9 @@ def prepare(self):

ext = ctype = None
for i in range(2):
_ = match(html, pattern1)
url, ext = _ and _ or (_, _)
url, ctype = matchm(html, pattern_src)
if url is None:
_ = match(html, pattern2)
url, ctype = _ and _ or (_, _)
url, ext = matchm(html, pattern_ext)
if url:
if not i:
url = unescape(url)
Expand All @@ -63,11 +63,12 @@ def prepare(self):
url = self.url[:self.url.find('/')] + url
elif url[0] == '/':
url = self.url[:self.url.find('/', 9)] + url
if ext is None:
if ext is None or ctype:
ctype = str(ctype).lower()
ext = contentTypes.get(ctype) or url_info(url)[1] or (
str(ctype).lower().startswith('audio') and 'mp3' or 'mp4')
ctype.startswith('audio') and 'mp3' or 'mp4')
if ext[:3] == 'm3u':
info.streams = load_m3u8_playlist(url)
info.streams = load_m3u8_playlist(url, headers={'Referer': self.url})
else:
info.streams['current'] = {
'container': ext,
Expand Down
2 changes: 1 addition & 1 deletion ykdl/extractors/iqilu.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def init(self):
self.url_pattern = '"mp4-wrapper"[^"]+"(http[^"]+)"'

def l_assert(self):
assert match(self.url, '(https?://v\.iqilu\.com/\w+)')
assert match(self.url, 'https?://v\.iqilu\.com/\w+')

def reprocess(self):
self.info.title = '[{self.info.artist}] {self.info.title}'.format(**vars())
Expand Down
4 changes: 2 additions & 2 deletions ykdl/extractors/iqiyi/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ def prepare(self):
info = MediaInfo(self.name)

if self.url and not self.vid:
vid = match(self.url, 'curid=([^_]+)_([\w]+)')
if vid:
vid = matchm(self.url, 'curid=([^_]+)_([\w]+)')
if vid[0]:
self.vid = vid
try:
info_json = get_response(
Expand Down
2 changes: 1 addition & 1 deletion ykdl/extractors/ku6.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def init(self):
pass

def list_only(self):
return match(self.url, 'https://www.ku6.com/detail/(\d+)')
return match(self.url, 'https://www.ku6.com/detail/\d+')

def prepare_list(self):
html = get_content(self.url)
Expand Down
2 changes: 1 addition & 1 deletion ykdl/extractors/le/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def get_extractor(url):

if 'lunbo' in url:
from . import lunbo as s
elif match(url, '(live[\./]|/izt/)'):
elif match(url, 'live[\./]|/izt/'):
from . import live as s
elif 'bcloud' in url:
from . import letvcloud as s
Expand Down
10 changes: 5 additions & 5 deletions ykdl/extractors/lizhi.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ def prepare(self):
info = MediaInfo(self.name)

html = get_content(self.url)
self.vid, info.artist, _, info.title = \
match(html, 'data-hidden-ph\s?=\s?"(.+?)" '
'data-user-name\s?=\s?"(.+?)" '
'data-radio-name\s?=\s?"(.+?)" '
'data-title\s?=\s?"(.+?)"')
self.vid, info.artist, _, info.title = matchm(html,
'data-hidden-ph\s?=\s?"(.+?)" '
'data-user-name\s?=\s?"(.+?)" '
'data-radio-name\s?=\s?"(.+?)" '
'data-title\s?=\s?"(.+?)"')
data = get_response('https://www.lizhi.fm/hidden_ph/' +
self.vid).json()
assert data['rcode'] == 0, data['msg']
Expand Down
2 changes: 1 addition & 1 deletion ykdl/util/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ def unbrotli(data):

def get_response(url, headers={}, data=None, params=None, method='GET',
max_redirections=None, encoding=None,
default_headers=fake_headers, cache=True):
default_headers=fake_headers, cache=CACHED):
'''Fetch the response of giving URL.
Params: both `params` and `data` always use "UTF-8" as encoding.
Expand Down
34 changes: 17 additions & 17 deletions ykdl/util/human.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def _format_str(s):
s = s.decode()
s = s.lower()
n = match1(s, '^((?:0x)?[0-9a-f])$') #
if n and match(n, '([a-fx])'): # only convert which is unambiguous
if n and match(n, '[a-fx]'): # only convert which is unambiguous
return str(int(n, 16))
return s

Expand All @@ -33,22 +33,22 @@ def human_size(n, unit=None):
if isinstance(n, (str, bytes)):
n = _format_str(n)
try:
n, nu = match(n, '''(?x)
(?:
^ | # start
\De | # no scientific notation
[^\-\.\de] # no negative, dot, number
)
(
\d+ # integer
(?:\.\d+)? # float
(?!\.) # bad float
(?:e\d+)? # scientific notation
(?![\.\de]) # bad scientific notation
)
\s*
(?:([kmgt])i?b)? # unit
''')
n, nu = matchm(n, '''(?x)
(?:
^ | # start
\De | # no scientific notation
[^\-\.\de] # no negative, dot, number
)
(
\d+ # integer
(?:\.\d+)? # float
(?!\.) # bad float
(?:e\d+)? # scientific notation
(?![\.\de]) # bad scientific notation
)
\s*
(?:([kmgt])i?b)? # unit
''')
except TypeError:
raise ValueError('invalid literal for human_size(): %r' % n)
f = float(n)
Expand Down
45 changes: 26 additions & 19 deletions ykdl/util/match.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re


__all__ = ['match', 'match1', 'matchall']
__all__ = ['match', 'match1', 'matchm', 'matchall']

def _format_str(pattern, string):
'''Format the target which will be scanned, makes the worker happy.'''
Expand Down Expand Up @@ -33,49 +33,56 @@ def _format_str(pattern, string):
return string

def match(obj, *patterns):
'''Scans a object for matched some patterns with catch mode (matches first).
'''Scans a object for matched some patterns with capture mode (matches first).
Params:
`obj`, any object which contains string data.
`patterns`, arbitrary number of regex patterns.
Returns all the catched substring of first match, or None.
Returns the first Match object, or None.
'''

for pattern in patterns:
string = _format_str(pattern, obj)
match = re.search(pattern, string)
groups = match and match.groups()
if groups:
return groups
m = re.search(pattern, string)
if m:
return m
return None

def match1(obj, *patterns):
'''Scans a object for matched some patterns with catch mode (catches first).
'''Scans a object for matched some patterns with capture mode.
Params: same as match()
Returns the first catched substring, or None.
Returns the first captured substring, or None.
'''
m = match(obj, *patterns)
return m and m.groups()[0]

def matchm(obj, *patterns):
'''Scans a object for matched some patterns with capture mode.
Params: same as match()
groups = match(obj, *patterns)
return groups and groups[0]
Returns all captured substrings of the first Match object, or same number of
None objects.
'''
m = match(obj, *patterns)
return m and m.groups() or (None,) * re.compile(patterns[0]).groups


def matchall(obj, *patterns):
'''Scans a object for matched some patterns with catch mode (matches all).
'''Scans a object for matched some patterns with capture mode.
Params: same as match()
Returns a list of all the catched substring of matches, or a empty list.
If a conformity form of catches in the list has be excepted, all the regex
patterns MUST include a similar catch mode.
Returns a list of all the captured substring of matches, or a empty list.
If a conformity form of captures in the list has be excepted, all the regex
patterns MUST include a similar capture mode.
'''

ret = []
for pattern in patterns:
string = _format_str(pattern, obj)
match = re.findall(pattern, string)
ret += match
m = re.findall(pattern, string)
ret += m

return ret

0 comments on commit f0966a2

Please sign in to comment.