-
Notifications
You must be signed in to change notification settings - Fork 0
/
rssreader.cl
315 lines (263 loc) · 7.78 KB
/
rssreader.cl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
;; read rss feed
;; from jkf on 5/26/2005
(eval-when (compile load eval)
(require :aserve)
(require :pxml-sax))
(defpackage :net.rss.rdf
(:export #:about
#:RDF
#:Seq))
(defpackage :net.rss.dc
(:export #:date))
(defpackage :net.rss.sy
(:export #:sy))
(defpackage :net.rss.admin
)
(defpackage :net.rss (:use :common-lisp :excl :net.xml.parser
:net.aserve
:net.aserve.client
)
(:export #:feed-rss-p
#:feed-slot-value
#:read-feed
; important tags
#:all-items
#:channel
#:description
#:item
#:link
#:pubDate
#:rss
#:title
#:version
#:feed-error
#:feed-error-ignore
;; for EZTV:
#:torrent
#:fileName
#:*uri-to-package*
))
(in-package :net.rss)
(defparameter *uri-to-package*
(list
(cons "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
:net.rss.rdf)
(cons "http://purl.org/dc/elements/1.1/"
:net.rss.dc)
(cons "http://purl.org/rss/1.0/modules/syndication/"
:net.rss.sy)
(cons "http://webns.net/mvcb/"
:net.rss.admin)
(cons "http://purl.org/rss/1.0/"
:net.rss)
;; For EZTV
(cons "http://xmlns.ezrss.it/0.1/"
:net.rss)
;; Yet another hack for EZTV. WTF??
(cons "//xmlns.ezrss.it/0.1/"
:net.rss)
))
(define-condition feed-error (error)
((http-code :initarg :http-code :reader feed-error-httpcode
:initform nil)))
(define-condition feed-error-ignore (feed-error) ())
(defun read-feed (url &key timeout verbose error-func
&aux (host (net.uri:uri-host (net.uri:parse-uri url))))
;;
;;* exported
;;
;; read the feed given by the url and return a feed value
;;
(multiple-value-bind (content code headers)
(handler-case (do-http-request
url :timeout timeout
:headers '(("User-Agent" . "Wget/1.12")))
(excl::ssl-error (c)
(when verbose
(format t "~&;; SSL error reading ~a feed:~%~a~%"
host c))
(signal 'feed-error-ignore)
(return-from read-feed))
(socket-error (c)
(when verbose
(format t "~&;; socket error reading ~a feed:~%~a~%"
host c))
(signal 'feed-error-ignore)
(return-from read-feed))
(error (c)
(error 'feed-error
:format-control "Error from do-http-request to ~a: ~a"
:format-arguments (list host c))))
(declare (ignore headers))
(if* (or (eq 502 code) (eq 522 code))
then (signal 'feed-error-ignore)
(return-from read-feed)
elseif (not (eq 200 code))
then (if* verbose
then (error
'feed-error
:http-code code
:format-control "Accessing URL ~s gave response ~s"
:format-arguments (list url code))
else (error
'feed-error
:http-code code
:format-control "Accessing feed from ~s gave response ~s"
:format-arguments (list host code))))
(handler-case (parse-feed content)
(error ()
(when error-func (funcall error-func content))
(signal 'feed-error-ignore)
(return-from read-feed)))))
(defun parse-feed (content)
(when (or (and (< (length content) 100)
(match-re "database error" content :case-fold t))
(match-re "down for .?maintenance.?"
content
:end (or (min (length content) 1024))
:case-fold t))
(error 'feed-error-ignore))
(setq content
;; WTF? Control chars in feeds? Cripes, what'll they think of next?
(remove-if (lambda (c) (<= (char-code c) #.(char-code #\^z)))
content))
;;
;;* exported
;;
;; read the feed given by the url and return a feed value
;;
(let ((body (car
;; parse-xml was crapping out on EZTV, so I switched to
;; parse-to-lxml on the advice of mm. When I say
;; "crapping out" I mean crashing the lisp.
#+ignore
(let ((*package* (find-package :net.rss)))
(parse-xml content
;;The eztv.it feed requires this
:external nil
:uri-to-package *uri-to-package*))
(let ((*package* (find-package :net.rss)))
(net.xml.sax:parse-to-lxml
content
;; needed for parse-to-lxml not parse-xml
:package *package*
;;The eztv.it feed requires this
:external nil
:uri-to-package *uri-to-package*)))))
;;(pprint body)
;;(setq *body* body)
(if* (and (consp (car body))
(eq 'rss (caar body)))
then `(rss
(version ,(getf (cdar body) 'version))
,@(process-rss-body (cdr body) nil))
elseif (and (consp (car body))
(eq 'net.rss.rdf:RDF (caar body)))
then ;; rss 1.0
`(rss (version "1.0") ,@(process-rss-body (cdr body) 'rdf)))))
(defun process-rss-body (body rdfp)
(mapcar
(lambda (ch)
`(channel
,@(mapcar
(lambda (it)
`(,(car it) ,@(big-stringify (cdr it))))
(find-not-items 'item (cdr ch)))
(all-items
,@(mapcar
(lambda (it
&aux (torrent (find-items 'torrent it))
;; EZTV hack:
(fileName
(when torrent
(car (find-items 'fileName (car torrent))))))
`(item
,@(mapcar
(lambda (vv)
`(,(car vv)
,@(big-stringify (cdr vv))))
(find-conses (cdr it)))
,@(when fileName (list fileName))))
(find-items 'item
(if* rdfp
then ;; found in main body
(cdr body)
else ;; found in channel
(cdr ch)))))))
(find-items 'channel body)))
(defun find-items (key objs)
;; find all items beginning with key
(let (res)
(dolist (obj objs)
(if* (and (consp obj) (or (eq key (car obj))
(and (consp (car obj))
(eq key (caar obj)))))
then (push obj res)))
(nreverse res)))
(defun find-not-items (key objs)
;; find conses not beginning with key
(let (res)
(dolist (obj objs)
(if* (and (consp obj)
(not (or (eq key (car obj))
(and (consp (car obj))
(eq key (caar obj))))))
then (push obj res)))
(nreverse res)))
(defun find-conses (objs)
;; find all things that are conses
(let (res)
(dolist (obj objs)
(if* (consp obj)
then (push obj res)))
(nreverse res)))
(defun big-stringify (objs)
;; concatenate all consecutie strings in objs
(let (res thisres)
(do* ((xx objs (cdr xx))
(val (car xx) (car xx)))
((null xx)
(if* thisres
then (push (apply #'concatenate 'string (nreverse thisres))
res))
(nreverse res))
(if* (stringp val)
then (push val thisres)
elseif thisres
then (push (apply #'concatenate 'string (nreverse thisres))
res)
(setq thisres nil)
(push val res)))))
;; functions for searching the feed results
(defun feed-rss-p (feed &optional version)
;; test if is feed is an rss feed (and if version is given test
;; that it is of that particular version).
;;
;; return the version number if true (or return t if it's an rss
;; feed but the version isn't known).
;;
(and (consp feed)
(eq 'rss (car feed))
(if* version
then (if* (equal (feed-slot-value feed 'version) version)
then version)
else (or(feed-slot-value feed 'version) t))))
(defun feed-slot-value (feed slot-name)
(dolist (ent feed)
(if* (consp ent)
then (if* (consp (car ent))
then (if* (eq slot-name (caar ent))
then (return (cdr ent)))
elseif (eq slot-name (car ent))
then (return (cdr ent))))))
(defun testit ()
(format t "0.91 feed ~%")
(pprint (read-feed "http://www.franz.com/rss091_main.xml"))
(format t "2.0 feed ~%")
(pprint (read-feed "http://www.franz.com/rss20_main.xml"))
(format t "1.0 feed ~%")
(pprint (read-feed "http://www.franz.com/rss10_main.xml")))
(provide :rssreader)