Skip to content

Commit

Permalink
added more paste-sites
Browse files Browse the repository at this point in the history
  • Loading branch information
cvandeplas committed Oct 15, 2012
1 parent e16fd85 commit 771b8ba
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 23 deletions.
29 changes: 26 additions & 3 deletions pystemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def seenPastie(self, pastie_id):
# look on the filesystem. # LATER remove this filesystem lookup as it will give problems on long term
if yamlconfig['archive']['save-all']:
# check if the pastie was already saved on the disk
if os.path.exists(self.archive_dir + os.sep + pastie_id):
if os.path.exists(self.archive_dir + os.sep + self.pastieIdToFilename(pastie_id)):
return True

def seenPastieAndRemember(self, pastie_id):
Expand All @@ -120,6 +120,9 @@ def seenPastieAndRemember(self, pastie_id):
self.seen_pasties.appendleft(pastie_id)
return False

def pastieIdToFilename(self, pastie_id):
return pastie_id.replace('/', '_')


class Pastie():
def __init__(self, site, pastie_id):
Expand All @@ -135,7 +138,7 @@ def fetchPastie(self):
def savePastie(self, directory):
if not self.pastie_content:
raise SystemExit('BUG: Content not set, sannot save')
f = open(directory + os.sep + self.id, 'w')
f = open(directory + os.sep + self.site.pastieIdToFilename(self.id), 'w')
f.write(self.pastie_content.encode('utf8')) # TODO error checking

def fetchAndProcessPastie(self):
Expand Down Expand Up @@ -238,7 +241,7 @@ def fetchPastie(self):

class PastieCdvLt(Pastie):
'''
Custom Pastie class for the pastesite.com site
Custom Pastie class for the cdv.lt site
This class overloads the fetchPastie function to do the form submit to get the raw pastie
'''
def __init__(self, site, pastie_id):
Expand All @@ -256,6 +259,26 @@ def fetchPastie(self):
return self.pastie_content


class PastieSniptNet(Pastie):
'''
Custom Pastie class for the snipt.net site
This class overloads the fetchPastie function to do the form submit to get the raw pastie
'''
def __init__(self, site, pastie_id):
Pastie.__init__(self, site, pastie_id)

def fetchPastie(self):
downloaded_page, headers = downloadUrl(self.url)
htmlDom = BeautifulSoup(downloaded_page)
# search for <textarea class="raw">
textarea = htmlDom.first('textarea', {'class': 'raw'})
if textarea:
# replace html entities like &gt;
decoded = BeautifulSoup(textarea.contents[0], convertEntities=BeautifulSoup.HTML_ENTITIES)
self.pastie_content = decoded.contents[0]
return self.pastie_content


class ThreadPasties(threading.Thread):
'''
Instances of these threads are responsible to download all the individual pastes
Expand Down
52 changes: 32 additions & 20 deletions pystemon.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
archive:
save: yes # Keep
save-all: yes # Keep a copy of all pasties
save-all: no # Keep a copy of all pasties
dir: "alerts" # Directory where matching pasties should be kept
dir-all: "archive" # Directory where all pasties should be kept (if save-all is set to yes)

Expand Down Expand Up @@ -50,30 +50,22 @@ site:
archive-url: 'http://pastie.org/pastes'
archive-regex: '<a href="http://pastie.org/pastes/(\d{7})">'
download-url: 'http://pastie.org/pastes/{id}/text'
update-max: 20
update-min: 10

nopaste.me:
archive-url: 'http://nopaste.me/recent'
archive-regex: '<a href="http://nopaste.me/paste/([a-zA-Z0-9]+)">'
download-url: 'http://nopaste.me/download/{id}.txt'
update-max: 20
update-min: 10

slexy.org:
archive-url: 'http://slexy.org/recent'
archive-regex: '<a href="/view/([a-zA-Z0-9]+)">View paste</a>'
download-url: 'http://slexy.org/raw/{id}'
update-max: 20
update-min: 10

pastesite.com:
pastie-classname: PastiePasteSiteCom
archive-url: 'http://pastesite.com/recent'
archive-regex: '<a href="(\d+)" title="View this Paste'
download-url: 'http://pastesite.com/plain/{id}.txt'
update-max: 20
update-min: 10

gist.github.com:
archive-url: 'https://gist.github.com/gists'
Expand All @@ -85,20 +77,40 @@ site:
archive-regex: '<a href="http://codepad.org/([a-zA-Z0-9]+)">view'
download-url: 'http://codepad.org/{id}/raw.txt'

cdv.lt: # FIXME write custom class to extract data from textarea
pastie-classname: PastieCdvLt
archive-url: 'http://cdv.lt/snippets'
archive-regex: '<a href="/cv/([a-zA-Z0-9]+)">'
download-url: 'http://cdv.lt/getData?sn={id}&callback=json1'

snipt.net:
pastie-classname: PastieSniptNet
#archive-url: 'https://snipt.net/'
#archive-regex: '<h1><a href="/([a-zA-Z0-9-_/]+)/">'
archive-url: 'https://snipt.net/?rss'
archive-regex: '<link>https://snipt.net/(.+)/</link>'
download-url: 'https://snipt.net/{id}/'

# safebin.net: # FIXME not finished
# archive-url: 'http://safebin.net/?archive'
# archive-regex: '<a title="[a-zA-Z0-9 :,]+" href="/([0-9]+)">'
# download-url: 'http://safebin.net/{id}'
# update-max: 60
# update-min: 50


# TODO
# http://hastebin.com/ # no list of last pastes
# http://www.safebin.net/ # more complex site
# http://www.heypasteit.com/ # http://www.heypasteit.com/clip/0IZA => incremental

# http://hastebin.com/ # no list of last pastes
# http://sebsauvage.net/paste/ # no list of last pastes
# https://snipt.net/
# http://www.safebin.net/
# http://cdv.lt/
# http://tny.cz/
# https://pastee.org/
# http://slexy.org/
# http://paste2.org/
# http://0bin.net/
# http://markable.in/
# http://www.heypasteit.com/
# http://tny.cz/ # no list of last pastes
# https://pastee.org/ # no list of last pastes
# http://paste2.org/ # no list of last pastes
# http://0bin.net/ # no list of last pastes
# http://markable.in/ # no list of last pastes


#####
# Configuration section to configure proxies
Expand Down

0 comments on commit 771b8ba

Please sign in to comment.