-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgnip.py
687 lines (501 loc) · 22.5 KB
/
gnip.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
import activity
import filter
import datetime
import davclient
import iso8601
import time
import httplib2
import gzip
import StringIO
import publisher
from xml.dom.minidom import parseString
class Gnip:
"""Common functionality between all Gnip classes
This class provides basic functionality help for all Gnip classes.
"""
def __init__(self, username, password, gnip_server="s.gnipcentral.com"):
"""Initialize the class.
@type username string
@param username The Gnip account username
@type password string
@param password The Gnip account password
@type gnip_server string
@param gnip_server The Gnip server to connect to
Initializes a Gnip class by setting up authorization
information, used to log into the Gnip website.
"""
# Determine base Gnip URL
self.base_url = "https://" + gnip_server
# Configure authentication
self.client = davclient.DAVClient(self.base_url)
self.client.set_basic_auth(username,password)
self.client.headers['Accept'] = 'application/xml'
self.client.headers['User-Agent'] = 'Gnip-Client-Python/2.0.1'
self.client.headers['Content-Encoding'] = 'gzip'
self.client.headers['Content-Type'] = 'application/xml'
def compress_with_gzip(self, string):
"""Compress a string with GZIP
@type string string
@param string The data to compress
@return string gzipped data
Does a proper gzip of the incoming string and returns it as a string
"""
zbuf = StringIO.StringIO()
zfile = gzip.GzipFile(mode='wb', fileobj=zbuf, compresslevel=9)
zfile.write(string)
zfile.close()
return zbuf.getvalue()
def decompress_gzip(self, string):
"""Decompress a string encoded with GZIP
@type string string
@param string The data to decompress
@return string unzipped data
Does a proper unzip of the incoming string and returns it as a string
"""
zbuf = StringIO.StringIO(string)
zfile = gzip.GzipFile(mode='rb', fileobj=zbuf)
data = zfile.read();
zfile.close()
return data
def do_http_delete(self, url_path):
"""Do a HTTP DELETE.
@type url_path string
@param url_path The URL to DELETE
@return string representing page retrieved
Does a HTTP DELETE request of the passed in url, and returns
the result from the server.
"""
self.client.delete(self.base_url + url_path)
return self.client.response.body
def do_http_get(self, url_path):
"""Do a HTTP GET.
@type url_path string
@param url_path The URL to GET
@return string representing page retrieved
Does a HTTP GET request of the passed in url, and returns
the result from the server.
"""
self.client.get(self.base_url + url_path)
return self.client.response.body
def do_http_head(self):
"""Do a HTTP HEAD.
@return response object
Does a HTTP HEAD request of the Gnip Server
"""
self.client.head(self.base_url)
return self.client.response
def do_http_post(self, url_path, data):
"""Do a HTTP POST.
@type url_path string
@param url_path The URL to POST to
@type data string in XML format
@param data Formatted POST data
@return string representing page retrieved
Does a HTTP POST request of the passed in url and data, and returns
the result from the server.
"""
self.client.post(self.base_url + url_path, self.compress_with_gzip(data))
return self.client.response.body
def do_http_put(self, url_path, data):
"""Do a HTTP PUT.
@type url_path string
@param url_path The URL to PUT to
@type data string in XML format
@param data Formatted PUT data
@return string representing page retrieved
Does a HTTP PUT request of the passed in url and data, and returns
the result from the server.
"""
self.client.put(self.base_url + url_path, self.compress_with_gzip(data))
return self.client.response.body
def sync_clock(self, time):
"""Adjust a time so that it corresponds with Gnip time
@type time datetime
@param time The time to adjust
@return datetime object containing the corrected time
This method gets the current time from the Gnip server,
gets the current local time and determines the difference
between the two. It then adjusts the passed in time to
account for the difference.
"""
# Do HTTP HEAD request
resp = self.do_http_head()
# Get local time, before we do any other processing
# so that we can get the two times as close as possible
local_time = datetime.datetime.utcnow()
# Get time from headers and parse into python format
gnip_time = datetime.datetime.strptime(
resp.getheader("Date"), "%a, %d %b %Y %H:%M:%S %Z")
# Determine the time difference
time_delta = gnip_time - local_time
# Return the corrected time
return time + time_delta
def time_to_string(self, time):
"""Convert the time to a formatted string.
@type theTime datetime
@param theTime The time to convert to a string
@return string representing time
Converts the time passed in to a string of the
form YYYYMMDDHHMM.
"""
return str(time.strftime("%Y%m%d%H%M"))
def publish_activities(self, publisher_name, activities):
"""Publish activities.
@type publisher_name string
@param publisher_name string The name of the publisher
@type activities list of Activity objects
@param activities The activities to publish
@return string containing response from the server
This method takes in a XML document with a list of activities and
sends it to the Gnip server.
"""
activity_xml = '<?xml version="1.0" encoding="UTF-8"?><activities>'
for activity in activities:
activity_xml += activity.to_xml()
activity_xml += '</activities>'
return self.publish_xml(publisher_name, activity_xml)
def publish_xml(self, publisher_name, activity_xml):
"""Publish activities.
@type publisher_name string
@param publisher_name string The name of the publisher
@type activity_xml string
@param activity_xml XML document formatted to Gnip schema
@return string containing response from the server
This method takes in a XML document with a list of activities and
sends it to the Gnip server.
"""
url_path = "/publishers/" + publisher_name + "/activity.xml"
return self.do_http_post(url_path, activity_xml)
def create_filter(self, publisher_name, filter):
"""Create a Gnip filter.
@type publisher_name string
@param publisher_name The publisher to create filter for
@type filter Filter
@param filter A populated Filter object
@return string containing response from the server
Creates a new filter on the Gnip server, based on the
passed in filter.
"""
return self.create_filter_from_xml(publisher_name, filter.to_xml())
def create_filter_from_xml(self, publisher_name, data):
"""Create a Gnip filter.
@type publisher_name string
@param publisher_name The publisher to create filter for
@type data string
@param data XML formatted to Gnip filter schema
@return string containing response from the server
Creates a new filter on the Gnip server, based on the
passed in parameters.
"""
url_path = "/publishers/" + publisher_name + "/filters.xml"
return self.do_http_post(url_path, data)
def delete_filter(self, publisher_name, name):
"""Delete a Gnip filter.
@type publisher_name string
@param publisher_name The publisher to create filter for
@type name string
@param name The name of the filter to delete
@return string containing response from the server
Deletes an existing filter on the Gnip server, based on the
name of the filter.
"""
url_path = "/publishers/" + publisher_name + "/filters/" + name + ".xml"
return self.do_http_delete(url_path)
def find_filter(self, publisher_name, name):
"""Find a Gnip filter.
@type publisher_name string
@param publisher_name The publisher to create filter for
@type name string
@param name The name of the filter to find
@return string containing response from the server
Finds an existing filter and returns a Filter representing
that filter.
"""
xml = self.find_filter_xml(publisher_name, name)
if "<error>" in xml:
return None
the_filter = filter.Filter()
the_filter.from_xml( xml)
return the_filter
def find_filter_xml(self, publisher_name, name):
"""Find a Gnip filter.
@type publisher_name string
@param publisher_name The publisher to create filter for
@type name string
@param name The name of the filter to find
@return string containing response from the server
Finds an existing filter and returns the XML representation
of that filter.
"""
url_path = "/publishers/" + publisher_name + "/filters/" + name + ".xml"
return self.do_http_get(url_path)
def get_publisher_activities(self, publisher_name, date_and_time=None):
"""Get the data for a publisher.
@type publisher_name string
@param publisher_name The publisher of the data
@type date_and_time datetime
@param date_and_time The time for which data should be retrieved
@return list of Activity objects, one for each activity retrieved
Gets all of the data for a specific publisher, based on the
date_and_time parameter, which should be a datetime object. If
date_and_time is not passed in, the current time will be used.
Note that all times need to be in UTC.
"""
xml = self.get_publisher_activities_xml(publisher_name, date_and_time)
root = parseString(xml).documentElement
activities = []
for node in root.childNodes:
if node.tagName == 'activity':
an_activity = activity.Activity()
an_activity.from_node(node)
activities.append(an_activity)
return activities
def get_publisher_activities_xml(self, publisher_name, date_and_time=None):
"""Get the data for a publisher.
@type publisher_name string
@param publisher_name The publisher of the data
@type date_and_time datetime
@param date_and_time The time for which data should be retrieved
@return string containing response from the server
Gets all of the data for a specific publisher, based on the
date_and_time parameter, which should be a datetime object. If
date_and_time is not passed in, the current time will be used.
Note that all times need to be in UTC.
"""
if None == date_and_time:
url_path = "/publishers/" + publisher_name + "/activity/current.xml"
else:
corrected_time = self.sync_clock(date_and_time)
time_string = self.time_to_string(corrected_time)
url_path = "/publishers/" + publisher_name + \
"/activity/" + time_string + ".xml"
xml = self.do_http_get(url_path)
print xml
return xml
def get_filter_activities(self, publisher_name, name, date_and_time=None):
"""Get a Gnip filter.
@type name string
@param name The name of the filter to get
@type publisher_name string
@param publisher_name The publisher of the filter
@type date_and_time datetime
@param date_and_time The time for which data should be retrieved
@return string containing response from the server
Gets all of the data for a specific filter, based on the
date_and_time parameter, which should be a datetime object. If
date_and_time is not passed in, the current time will be used.
Note that all times need to be in UTC.
"""
xml = self.get_filter_activities_xml(publisher_name, name, date_and_time)
root = parseString(xml).documentElement
activities = []
for node in root.childNodes:
if node.tagName == 'activity':
an_activity = activity.Activity()
an_activity.from_node(node)
activities.append(an_activity)
return activities
def get_filter_activities_xml(self, publisher_name, name, date_and_time=None):
"""Get a Gnip filter.
@type name string
@param name The name of the filter to get
@type publisher_name string
@param publisher_name The publisher of the filter
@type date_and_time datetime
@param date_and_time The time for which data should be retrieved
@return string containing response from the server
Gets all of the data for a specific filter, based on the
date_and_time parameter, which should be a datetime object. If
date_and_time is not passed in, the current time will be used.
Note that all times need to be in UTC.
"""
if None == date_and_time:
url_path = "/publishers/" + publisher_name + "/filters/" + name + "/activity/current.xml"
else:
corrected_time = self.sync_clock(date_and_time)
time_string = self.time_to_string(corrected_time)
url_path = "/publishers/" + publisher_name + "/filters/" + name + "/activity/" + \
time_string + ".xml"
return self.do_http_get(url_path)
def get_publisher_notifications(self, publisher_name, date_and_time=None):
"""Get the data for a publisher.
@type publisher_name string
@param publisher_name The publisher of the data
@type date_and_time datetime
@param date_and_time The time for which data should be retrieved
@return list of Activity objects, one for each activity retrieved
Gets all of the data for a specific publisher, based on the
date_and_time parameter, which should be a datetime object. If
date_and_time is not passed in, the current time will be used.
Note that all times need to be in UTC.
"""
xml = self.get_publisher_notifications_xml(publisher_name, date_and_time)
root = parseString(xml).documentElement
activities = []
for node in root.childNodes:
if node.tagName == 'activity':
an_activity = activity.Activity()
an_activity.from_node(node)
activities.append(an_activity)
return activities
def get_publisher_notifications_xml(self, publisher_name, date_and_time=None):
"""Get the data for a publisher.
@type publisher string
@param publisher The publisher of the data
@type date_and_time datetime
@param date_and_time The time for which data should be retrieved
@return string containing response from the server
Gets all of the data for a specific publisher, based on the
date_and_time parameter, which should be a datetime object. If
date_and_time is not passed in, the current time will be used.
Note that all times need to be in UTC.
"""
if None == date_and_time:
url_path = "/publishers/" + publisher_name + "/notification/current.xml"
else:
corrected_time = self.sync_clock(date_and_time)
time_string = self.time_to_string(corrected_time)
url_path = "/publishers/" + publisher_name + \
"/notification/" + time_string + ".xml"
return self.do_http_get(url_path)
def get_filter_notifications(self, publisher_name, name, date_and_time=None):
"""Get a Gnip filter.
@type name string
@param name The name of the filter to get
@type publisher_name string
@param publisher_name The publisher of the filter
@type date_and_time datetime
@param date_and_time The time for which data should be retrieved
@return string containing response from the server
Gets all of the data for a specific filter, based on the
date_and_time parameter, which should be a datetime object. If
date_and_time is not passed in, the current time will be used.
Note that all times need to be in UTC.
"""
xml = self.get_filter_notifications_xml(publisher_name, name, date_and_time)
root = parseString(xml).documentElement
activities = []
for node in root.childNodes:
if node.tagName == 'activity':
an_activity = activity.Activity()
an_activity.from_node(node)
activities.append(an_activity)
return activities
def get_filter_notifications_xml(self, publisher_name, name, date_and_time=None):
"""Get a Gnip filter.
@type name string
@param name The name of the filter to get
@type publisher_name string
@param publisher_name The publisher of the filter
@type date_and_time datetime
@param date_and_time The time for which data should be retrieved
@return string containing response from the server
Gets all of the data for a specific filter, based on the
date_and_time parameter, which should be a datetime object. If
date_and_time is not passed in, the current time will be used.
Note that all times need to be in UTC.
"""
if None == date_and_time:
url_path = "/publishers/" + publisher_name + "/filters/" + name + "/notification/current.xml"
else:
corrected_time = self.sync_clock(date_and_time)
rounded_time = self.round_time(corrected_time)
time_string = self.time_to_string(rounded_time)
url_path = "/publishers/" + publisher_name + "/filters/" + name + "/notification/" + \
time_string + ".xml"
return self.do_http_get(url_path)
def update_filter(self, publisher_name, filter_to_update):
"""Update a Gnip filter.
@type publisher_name string
@param publisher_name The publisher of the filter
@type filter_to_update Filter
@param filter_to_update A populated Filter object
@return string containing response from the server
Creates a new filter on the Gnip server, based on the
passed in parameters.
"""
return self.update_filter_from_xml(publisher_name, filter_to_update.name, filter_to_update.to_xml())
def update_filter_from_xml(self, publisher_name, name, data):
"""Update a Gnip filter.
@type name string
@param name The name of the filter to update
@type publisher_name string
@param publisher_name The publisher of the filter
@type data string
@param XML data formatted to Gnip filter schema
@return string containing response from the server
Updates a filter on the Gnip server, based on the
passed in parameters.
"""
url_path = "/publishers/" + publisher_name + "/filters/" + name + ".xml"
return self.do_http_put(url_path, data)
def create_publisher(self, publisher):
"""Create a Gnip publisher.
@type publisher Publisher
@param publisher A populated Publisher object
@return string containing response from the server
Creates a new publisher on the Gnip server, based on the
passed in publisher object.
"""
url_path = "/publishers"
return self.do_http_post(url_path, publisher.to_xml())
def create_publisher_from_xml(self, name, data):
"""Create a Gnip publisher.
@type name string
@param name The name of the publisher to create
@type data string
@param data XML formatted to Gnip publisher schema
@return string containing response from the server
Creates a new publisher on the Gnip server, based on the
passed in parameters.
"""
url_path = "/publishers"
return self.do_http_post(url_path, data)
def get_publisher(self, name):
"""Get a Gnip publisher.
@type name string
@param name Name of the publisher to get
@return Publisher object based on response from the server
Gets a publisher from the Gnip server.
"""
xml = self.get_publisher_xml(name)
pub = publisher.Publisher()
pub.from_xml(xml)
return pub
def get_publisher_xml(self, name):
"""Get a Gnip publisher.
@type name string
@param name Name of the publisher to get
@return string containing response from the server
Gets a publisher from the Gnip server.
"""
url_path = "/publishers/" + name + ".xml"
return self.do_http_get(url_path)
def update_publisher(self, publisher):
"""Update a Gnip filter.
@type publisher Publisher
@param publisher The publisher object to update
@type filter_to_update Filter
@param filter_to_update A populated Filter object
@return string containing response from the server
Creates a new filter on the Gnip server, based on the
passed in parameters.
"""
return self.update_publisher_from_xml(publisher.name, publisher.to_xml())
def update_publisher_from_xml(self, name, data):
"""Update a Gnip filter.
@type name string
@param name The name of the publisher to update
@type data string
@param XML data formatted to Gnip publisher schema
@return string containing response from the server
Updates a filter on the Gnip server, based on the
passed in parameters.
"""
url_path = "/publishers/" + name + ".xml"
return self.do_http_put(url_path, data)
if __name__=="__main__":
print "This module was not designed to be called directly."
print
print "Try 'from gnip import Gnip'"
print "or 'from gnip import *'"