Skip to content

Commit

Permalink
solved utf problems
Browse files Browse the repository at this point in the history
  • Loading branch information
cvandeplas committed Oct 15, 2012
1 parent c6f8985 commit e16fd85
Showing 1 changed file with 25 additions and 3 deletions.
28 changes: 25 additions & 3 deletions pystemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import os
import smtplib
import random
import json
from BeautifulSoup import BeautifulSoup
from email.MIMEMultipart import MIMEMultipart
from email.MIMEBase import MIMEBase
Expand Down Expand Up @@ -135,7 +136,7 @@ def savePastie(self, directory):
if not self.pastie_content:
raise SystemExit('BUG: Content not set, sannot save')
f = open(directory + os.sep + self.id, 'w')
f.write(self.pastie_content) # TODO error checking
f.write(self.pastie_content.encode('utf8')) # TODO error checking

def fetchAndProcessPastie(self):
# double check if the pastie was already downloaded, and remember that we've seen it
Expand Down Expand Up @@ -235,6 +236,26 @@ def fetchPastie(self):
return self.pastie_content


class PastieCdvLt(Pastie):
'''
Custom Pastie class for the pastesite.com site
This class overloads the fetchPastie function to do the form submit to get the raw pastie
'''
def __init__(self, site, pastie_id):
Pastie.__init__(self, site, pastie_id)

def fetchPastie(self):
downloaded_page, headers = downloadUrl(self.url)
# make the json valid: strip json1( )
downloaded_page = u'[' + downloaded_page[6:-2] + u']'
# convert to json object
json_pastie = json.loads(downloaded_page)
if json_pastie:
# and extract the code
self.pastie_content = json_pastie[0]['code_record']
return self.pastie_content


class ThreadPasties(threading.Thread):
'''
Instances of these threads are responsible to download all the individual pastes
Expand Down Expand Up @@ -359,16 +380,17 @@ def downloadUrl(url, data=None, cookie=None):
opener = urllib2.build_opener(NoRedirectHandler())
# Random User-Agent if set in config
user_agent = getRandomUserAgent()
opener.addheaders = [('Accept-Charset', 'utf-8')]
if user_agent:
opener.addheaders = [('User-Agent', user_agent)]
opener.addheaders.append([('User-Agent', user_agent)])
if cookie:
opener.addheaders.append(('Cookie', cookie))
logger.debug("Downloading url: {url} with proxy:{proxy} and user-agent:{ua}".format(url=url, proxy=random_proxy, ua=user_agent))
if data:
response = opener.open(url, data)
else:
response = opener.open(url)
htmlPage = response.read()
htmlPage = unicode(response.read(), errors='replace')
# If we receive a "slow down" message, follow Pastebin recommendation!
if 'Please slow down' in htmlPage:
logger.warn("Slow down message received. Waiting 5 seconds")
Expand Down

0 comments on commit e16fd85

Please sign in to comment.