-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathparse_iheart_json.py
182 lines (152 loc) · 7.4 KB
/
parse_iheart_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#! /usr/bin/env python3
import urllib.request
import urllib.parse
import json
class autodetectError (ValueError):
pass
def station_search (search_keywords, tls = True):
'''Returns a dict containing iHeartRadio search results for search_keyword
search_keyword is a string (radio call letters, for instance -
URL-illegal characters will be escaped) which will be submitted to
iHeartRadio, matching against stations in their database. This function
will return a (potentially very large) dict containing detailed search
results, including station ID numbers, names, short descriptions,
locations, radio band/frequency, call letters, URLs to logo images, the
search algorithm's "score", and potentially other data. In addition
there seems to be some summary information, such as the number of search
results and other metadata. For now detailed documentation of this dict
is not available, manual inspection of results will be highly
illuminating.
'''
# This information is derived from deep packet inspection of the
# official iHeartRadio Android app's HTTP traffic. Exhaustive analysis
# has not been done as of this writing.
#
# iheartradio has a simple API for running searches against its station
# database. The response to a GET request to a URL of the form:
#
# http://api.iheart.com/api/v1/catalog/searchAll?&keywords=<keywords>
#
# where <keywords> is a search term, will be UTF-8 encoded JSON giving
# detailed search results; containing both search metadata and detailed
# information on all search results. Importantly, ID numbers for search
# results are included.
#
# There are a great many additional parameters that the official app
# includes. A full API request URL from the official app follows:
#
# http://api2.iheart.com/api/v1/catalog/searchAll?&keywords=wjr&bestMatch=True&queryStation=True&queryArtist=True&queryTrack=True&queryTalkShow=True&startIndex=0&maxRows=12&queryFeaturedStation=False&queryBundle=False&queryTalkTheme=False&_version=4.11.0
#
# Analysis of these parameters will be conducted some other time (TM),
# however it appears only &keywords is necessary; it is the only one
# this program uses.
# Base URL for our API request
iheart_base_url = '%s://api.iheart.com/api/v1/catalog/searchAll?'
paramaters = urllib.parse.urlencode ({ 'keywords':search_keywords })
response = urllib.request.urlopen ((iheart_base_url % ("https" if tls else "http")) + paramaters)
# We assume we're dealing with UTF-8 encoded JSON, if we aren't the API
# has probably changed in ways we can't deal with.
assert (response.getheader ('Content-Type').casefold () == 'application/json;charset=UTF-8'.casefold ())
results = json.loads (response.read ().decode ("utf-8"))
if (results['errors']):
raise RuntimeError (results['errors'])
else:
return results
def station_info (station_id, tls = True):
''' Returns a dict containing all available information about station_id
station_id is a five-digit number assigned to an iHeartRadio station.
No publicly documented method of obtaining a list of valid station_ids
is currently available, however see the function station_search in this
file.
'''
# The iheartradio API is not publicly documented, to my knowledge.
# There have been several historical versions of this function using
# obsolete APIs, their descriptions may be found in the 'iheart-api'
# document in the source distribution.
#
# AGAIN, in early 2017 the iheart API broke; the new URL comes from
# Nathan Rajlich's node.js library
# (https://github.com/TooTallNate/iheart). Deep-packet analysis of
# official iHeartRadio apps remains to be done.
# The base URL for our API request (%s is for string substitution)
iheart_base_url = "%s://api.iheart.com/api/v2/content/liveStations/%s"
response = urllib.request.urlopen (iheart_base_url % (("https" if tls else "http"), station_id))
# We assume we're dealing with UTF-8 encoded JSON, if we aren't the API
# has probably changed in ways we can't deal with.
(response.getheader ('Content-Type') == 'application/json;charset=UTF-8')
# TODO: investigate this new "hits" structure, perhaps searching and
# stream-fetching can be unified
station = json.loads (response.read ().decode('utf-8'))['hits'][0]
if (not station['streams']):
raise RuntimeError("station streams list empty")
else:
return station
def depls (pls_url):
'''PLS to HTTP audio stream
Assuming pls_url is the location of a PLS stream, rip out File1 and
return the contents. This is meant to be used to cut through the PLS
junk returned by some iheartradio stations as a "stream" URL. As a
warning, if this function gets confused it may well just pass the
original URL back.
'''
response = urllib.request.urlopen (pls_url)
# Failure of this assert is marginally more likely than it is for the
# iheart API functions, and may simply indicate that there is variation
# between different servers serving PLS playlists for iheartradio;
# I'd like to know the station id for any station which causes this to
# be tripped.
assert (response.getheader ('Content-Type') == 'audio/x-scpls; charset=UTF-8')
playlist_file = response.read ().decode('utf-8').splitlines ()
# We simply pull out the first entry (File1) with brute force.
for candidate in playlist_file:
if ("File1" in candidate):
# The format of these lines will be FileN=<url> or so
return candidate[len ("File1="):]
else:
# We fell through, something went wrong in the oh-so-robust
# "parsing" logic above. It won't hurt things too bad to just
# toss the original URL back to the caller.
return pls_url
def get_station_url (station, tls = True):
'''Takes a station dictionary and returns a URL.
'''
def streamcmp (x, y):
'''By analogy with C strcmp(), this returns >0 if x is more
preferred than y, >0 if it is less preferred, or 0 if x and y
are equal.
'''
# We prefer shoutcast over pls over rtmp (and we prefer no
# stream at all over stw_stream), and unless otherwise specified
# we prefer tls ("secure_") variants. Note that we will also
# never pick an https stream if told not to, even if that means
# picking no valid stream.
order = { 'secure_shoutcast_stream': (3.1 if tls else -1),
'shoutcast_stream': 3,
'secure_pls_stream': (2.1 if tls else -1),
'pls_stream': 2,
'secure_hls_stream': (2.1 if tls else -1),
'hls_stream': 2,
'secure_rtmp_stream': (1.1 if tls else -1),
'rtmp_stream': 1,
None: 0,
# appears to be a normal HLS stream fed through an
# (HTTPS) redirector hosted by iHeart; since it seems
# to occur alongside non-redirected HLS streams,
# prefer those.
'pivot_hls_stream': (0.2 if tls else -1),
# one station observed with this stream type (github
# issue #6), but no URL for it
'flv_stream': 0.1,
'stw_stream': -1 }
if order[x] > order[y]:
return 1
elif order[x] == order[y]: # should never happen
return 0
elif order[x] < order[y]:
return -1
preferred_stream = None
for stream in station['streams'].keys ():
# stations with blank URLs have been observed
if (streamcmp (stream, preferred_stream) > 0) and len(station['streams'][stream]) > 0:
preferred_stream = stream
return (station['streams'][preferred_stream])