pystemon.yaml

#network:		# Network settings
#  ip: '1.1.1.1'	# Specify source IP address if you want to bind on a specific one

archive:
  save: yes             # Keep
  save-all: no          # Keep a copy of all pasties
  dir: "alerts"         # Directory where matching pasties should be kept
  dir-all: "archive"    # Directory where all pasties should be kept (if save-all is set to yes)
  compress: yes         # Store the pasties compressed

db:
  sqlite3:              # Store information about the pastie in a database
    enable: no          # Activate this DB engine   # NOT FULLY IMPLEMENTED
    file: 'db.sqlite3'  # The filename of the database

redis:
  queue: no             # Toggle PUSH to redis queue
  server: "localhost"
  port: 6379
  database: 10

email:
  alert: no             # Enable/disable email alerts
  from: alert@example.com
  to: alert@example.com
  server: 127.0.0.1     # Address of the server (hostname or IP)
  port: 25              # Outgoing SMTP port: 25, 587, ...
  username: ''          # (optional) Username for authentication. Leave blank for no authentication.
  password: ''          # (optional) Password for authentication. Leave blank for no authentication.
  subject: '[pystemon] - {subject}'

#####
# Definition of regular expressions to search for in the pasties
#
search:
#  - description: ''    # (optional) A human readable description. (used in alerting)
#    search: ''         # The regular expression to search for
#    count: ''          # (optional) How many hits should it have to be interesting?
#    exclude: ''        # (optional) Do not alert if this regular expression matches
#    regex-flags: ''    # (optional) Regular expression flags to give to the find function.
#                       #            Default = re.IGNORECASE
#                       #            Set to 0 to have no flags set
#                       #            See http://docs.python.org/2/library/re.html#re.DEBUG for more info.
#                       #            Warning: when setting this the default is overridden
#                       #  example: 're.MULTILINE + re.DOTALL + re.IGNORECASE'
#    to: ''             # (optional) Additional recipients for email alert, comma separated list

  - search: '[^a-zA-Z0-9]example\.com'
  - search: '[^a-zA-Z0-9]foobar\.com'
  - description: 'Download (non-porn)'
    search: 'download'
    exclude: 'porn|sex|teen'
    count: 4

#####
# Configuration section for the paste sites
#
threads: 1              # number of download threads per site
site:
#  example.com:
#    archive-url:       # the url where the list of last pasties is present
#                       # example: 'http://pastebin.com/archive'
#    archive-regex:     # a regular expression to extract the pastie-id from the page.
#                       # do not forget the () to extract the pastie-id
#                       # example: '<a href="/(\w{8})">.+</a></td>'
#    download-url:      # url for the raw pastie.
#                       # Should contain {id} on the place where the ID of the pastie needs to be placed
#                       # example: 'http://pastebin.com/raw.php?i={id}'
#    update-max: 40     # every X seconds check for new updates to see if new pasties are available
#    update-min: 30     # a random number will be chosen between these two numbers
#    pastie-classname:  # OPTIONAL: The name of a custom Class that inherits from Pastie
#                       # This is practical for sites that require custom fetchPastie() functions

  pastebin.com:
    enable: yes
    archive-url: 'http://pastebin.com/archive'
    archive-regex: '<a href="/(\w{8})">.+</a></td>'
    download-url: 'http://pastebin.com/raw.php?i={id}'
    update-max: 50
    update-min: 40

# See https://pastebin.com/api_scraping_faq , you will need a pro account on pastebin
# You need to #whitelist you source IP and "Our scraping API is only available for LIFETIME PRO members, and only for those who have their IP whitelisted."
  pastebin.com_pro:
    enable: no
    archive-url: 'https://scrape.pastebin.com/api_scraping.php?limit=500'
    archive-regex: '"key": "(.+)",'
    download-url: 'http://pastebin.com/api_scrape_item.php?i={id}'
    update-max: 50
    update-min: 40

  slexy.org:
    enable: yes
    archive-url: 'http://slexy.org/recent'
    archive-regex: '<a href="/view/([a-zA-Z0-9]+)">View paste</a>'
    download-url: 'http://slexy.org/raw/{id}'

  gist.github.com:
    enable: no
    archive-url: 'https://gist.github.com/gists'
    archive-regex: '<span><a href="/([0-9]+)">gist'
    download-url: 'https://raw.github.com/gist/{id}'

  codepad.org:
    enable: no
    archive-url: 'http://codepad.org/recent'
    archive-regex: '<a href="http://codepad.org/([a-zA-Z0-9]+)">view'
    download-url: 'http://codepad.org/{id}/raw.txt'

  safebin.net:  # FIXME not finished
    enable: no
    archive-url: 'http://safebin.net/?archive'
    archive-regex: '<a title="[a-zA-Z0-9 :,]+" href="/([0-9]+)">'
    download-url: 'http://safebin.net/{id}'
    update-max: 60
    update-min: 50


# TODO
# http://www.safebin.net/       # more complex site
# http://www.heypasteit.com/    # http://www.heypasteit.com/clip/0IZA => incremental

# http://hastebin.com/          # no list of last pastes
# http://sebsauvage.net/paste/  # no list of last pastes
# http://tny.cz/                # no list of last pastes
# https://pastee.org/           # no list of last pastes
# http://paste2.org/            # no list of last pastes
# http://0bin.net/              # no list of last pastes
# http://markable.in/           # no list of last pastes


#####
# Configuration section to configure proxies
# Currently only HTTP proxies are permitted
#
proxy:
  random: no
  file: 'proxies.txt'

#####
# Configuration section for User-Agents
#
user-agent:
  random: no
  file: 'user-agents.txt'