Skip to content

Commit

Permalink
Try to reduce memory usage
Browse files Browse the repository at this point in the history
  • Loading branch information
kelvinn committed Oct 28, 2023
1 parent 5678297 commit 1538197
Show file tree
Hide file tree
Showing 8 changed files with 29 additions and 7 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ project.egg-info
eggs-404627.zip
.vscode
node_modules
crawls/
15 changes: 15 additions & 0 deletions price_monitor/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@

BOT_NAME = 'Outdoor Price Monitor Side Project - [email protected]'

LOG_LEVEL = "INFO"

COOKIES_ENABLED = False

RETRY_ENABLED = False

REDIRECT_ENABLED = False

DEPTH_PRIORITY = 1
SCHEDULER_DISK_QUEUE = "scrapy.squeues.PickleFifoDiskQueue"
SCHEDULER_MEMORY_QUEUE = "scrapy.squeues.FifoMemoryQueue"

DOWNLOADER_MIDDLEWARES = {
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': 100,
'scrapy_useragents.downloadermiddlewares.useragents.UserAgentsMiddleware': 500,
Expand Down Expand Up @@ -67,3 +79,6 @@
HTTPCACHE_EXPIRATION_SECS = 86400
HTTPCACHE_ALWAYS_STORE = True
HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS = ["no-store", "no-cache", "must-revalidate"]

TELNETCONSOLE_USERNAME = 'scrapy'
TELNETCONSOLE_PASSWORD = 'scrapy'
3 changes: 2 additions & 1 deletion price_monitor/spiders/amazon.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

class AmazonSpider(BaseSpider):
name = "amazon.com"

custom_settings = {'JOBDIR': f'crawls/{name}'}

def parse_detail_page(self, response):
item = response.meta.get('item', {})
item['url'] = response.url
Expand Down
3 changes: 2 additions & 1 deletion price_monitor/spiders/backcountry.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
class BackcountrySpider(CrawlSpider):
name = "backcountry.com"
link_extractor = LinkExtractor()

custom_settings = {'JOBDIR': f'crawls/{name}'}

allowed_domains = ['backcountry.com']
base_url = "https://www.backcountry.com/"
start_urls = [
Expand Down
7 changes: 4 additions & 3 deletions price_monitor/spiders/montbell.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
class MontbellSpider(CrawlSpider):
name = "montbell.us"
link_extractor = LinkExtractor()

custom_settings = {'JOBDIR': f'crawls/{name}'}

allowed_domains = ['montbell.us']
base_url = "https://www.montbell.us/"
start_urls = [
Expand All @@ -27,8 +28,8 @@ class MontbellSpider(CrawlSpider):

def parse_detail_page(self, response):
item = {} # response.meta.get('item', {})
item['url'] = response.url
item['title'] = response.css(TITLE_SELECTOR).extract_first("").strip()
item['url'] = str(response.url)
item['title'] = str(response.css(TITLE_SELECTOR).extract_first("").strip())
item['price'] = self.get_price(response)

return item
Expand Down
1 change: 1 addition & 0 deletions price_monitor/spiders/patagonia.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
class PatagoniaSpider(CrawlSpider):
name = "patagonia.com"
link_extractor = LinkExtractor()
custom_settings = {'JOBDIR': f'crawls/{name}'}

allowed_domains = ['patagonia.com']
base_url = "https://www.patagonia.com/"
Expand Down
3 changes: 2 additions & 1 deletion price_monitor/spiders/rei.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
class ReiSpider(CrawlSpider):
name = "rei.com"
link_extractor = LinkExtractor()

custom_settings = {'JOBDIR': f'crawls/{name}'}

allowed_domains = ['rei.com']
base_url = "https://www.rei.com/"
start_urls = [
Expand Down
3 changes: 2 additions & 1 deletion price_monitor/spiders/trekkinn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
class TrekkinnSpider(CrawlSpider):
name = "trekkinn.com"
link_extractor = LinkExtractor()

custom_settings = {'JOBDIR': f'crawls/{name}'}

allowed_domains = ['www.trekkinn.com']
base_url = "https://www.trekkinn.com/"
start_urls = [
Expand Down

0 comments on commit 1538197

Please sign in to comment.