Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AttributeError: 'BossSpider' object has no attribute 'settings' #301

Open
gaye746560359 opened this issue Sep 29, 2024 · 0 comments
Open

Comments

@gaye746560359
Copy link

gaye746560359 commented Sep 29, 2024

Description

AttributeError: 'BossSpider' object has no attribute 'settings'

scrapy v2.11.2
scrapy-redis v0.9.1
python v3.12

Step to Reproduce

`from scrapy import signals
import scrapy
from DrissionPage._pages.chromium_page import ChromiumPage
from scrapy import cmdline
from scrapy_redis.spiders import RedisSpider

class BossSpider(RedisSpider):
name = "boss"
# start_urls = [
# "https://www.zhipin.com/web/geek/job?city=101250100&position=100309,100301,100302,100303,100305,100304,100703&page=1",
# ]
redis_key = 'boss:start_urls' # Redis 中的请求队列键

def __init__(self, *args, **kwargs):
    super().__init__(*args, **kwargs)
    self.edge = ChromiumPage()  # 实例化浏览器

def __del__(self):
    self.edge.quit()

@classmethod
def from_crawler(cls, crawler):
    # This method is used by Scrapy to create your spiders.
    s = cls()
    crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
    crawler.signals.connect(s.spider_closed, signal=signals.spider_closed)
    return s

def spider_opened(self, spider):
    spider.logger.info("启用DrissionPage浏览器.")

def spider_closed(self, spider):
    self.edge.quit()
    spider.logger.info("关闭DrissionPage浏览器.")

def parse(self, response):
    job_list = self.edge.eles("x://li[@class='job-card-wrapper']")
    print("job元素数:", len(job_list))
    for job in job_list:
        job_area = job.ele("x://span[contains(@class, 'job-area')]").text
        salary = job.ele("x://span[@class='salary']").text
        tags = job.ele("x://ul[contains(@class, 'tag-list')]/li").text
        info_public = job.ele("x://div[@class='info-public']").text
        company_name = job.ele("x://h3[@class='company-name']/a").text
        info_desc = job.ele("x://div[contains(@class, 'info-desc')]").text
        job_name_ele = job.ele("x://span[@class='job-name']")
        job_name = job_name_ele.text
        job_detail_page = job_name_ele.click.for_new_tab()
        job_detail_page.wait.load_start()
        job_detail = job_detail_page.ele("x://div[@class='job-sec-text']").text
        login_ico = job_detail_page.ele("x://i[@class='icon-close']")
        if login_ico:
            login_ico.click()
        job_detail_page.close()


        yield {
            'job_name': job_name,
            'job_detail': job_detail,
            'job_area': job_area,
            'salary': salary,
            'tags': tags,
            'info_public': info_public,
            'company_name': company_name,
            'info_desc': info_desc,
        }

    pageNum = int(self.edge.ele("x://div[@class='options-pages']/a[last()-1]").text) - 1
    # 提取当前页面的页码
    current_page = int(response.url.split('page=')[-1])
    # 构造下一页的 URL,增加页码
    next_page = current_page + 1
    print(f"正在爬取第{next_page}页数据")
    if next_page <= pageNum:
        next_page_url = f"{next_page}"
        yield scrapy.Request(url=next_page_url, callback=self.parse)

cmdline.execute('scrapy crawl boss'.split())
`

Error log

`2024-09-29 20:28:22 [scrapy.core.engine] INFO: Spider opened
2024-09-29 20:28:22 [scrapy.core.engine] INFO: Closing spider (shutdown)
2024-09-29 20:28:22 [scrapy.core.engine] ERROR: Scraper close failure
Traceback (most recent call last):
File "D:\pyspider\venv\Lib\site-packages\scrapy\crawler.py", line 160, in crawl
yield self.engine.open_spider(self.spider, start_requests)
AttributeError: 'BossSpider' object has no attribute 'settings'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "D:\pyspider\venv\Lib\site-packages\twisted\internet\defer.py", line 1074, in _runCallbacks
current.result = callback( # type: ignore[misc]
File "D:\pyspider\venv\Lib\site-packages\scrapy\core\engine.py", line 439, in
dfd.addBoth(lambda _: self.scraper.close_spider(spider))
File "D:\pyspider\venv\Lib\site-packages\scrapy\core\scraper.py", line 125, in close_spider
raise RuntimeError("Scraper slot not assigned")
RuntimeError: Scraper slot not assigned
2024-09-29 20:28:23 [boss] INFO: 关闭DrissionPage浏览器.
2024-09-29 20:28:23 [scrapy.utils.signal] ERROR: Error caught on signal handler: <bound method CoreStats.spider_closed of <scrapy.extensions.corestats.CoreStats object at 0x0000018CFA7E0980>>
Traceback (most recent call last):
File "D:\pyspider\venv\Lib\site-packages\scrapy\crawler.py", line 160, in crawl
yield self.engine.open_spider(self.spider, start_requests)
AttributeError: 'BossSpider' object has no attribute 'settings'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "D:\pyspider\venv\Lib\site-packages\scrapy\utils\defer.py", line 348, in maybeDeferred_coro
result = f(*args, **kw)
File "D:\pyspider\venv\Lib\site-packages\pydispatch\robustapply.py", line 55, in robustApply
return receiver(*arguments, **named)
File "D:\pyspider\venv\Lib\site-packages\scrapy\extensions\corestats.py", line 30, in spider_closed
elapsed_time = finish_time - self.start_time
TypeError: unsupported operand type(s) for -: 'datetime.datetime' and 'NoneType'
2024-09-29 20:28:23 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'log_count/DEBUG': 3, 'log_count/ERROR': 2, 'log_count/INFO': 8}
2024-09-29 20:28:23 [scrapy.core.engine] INFO: Spider closed (shutdown)
Unhandled error in Deferred:
2024-09-29 20:28:23 [twisted] CRITICAL: Unhandled error in Deferred:

Traceback (most recent call last):
File "D:\pyspider\venv\Lib\site-packages\scrapy\crawler.py", line 265, in crawl
return self._crawl(crawler, *args, **kwargs)
File "D:\pyspider\venv\Lib\site-packages\scrapy\crawler.py", line 269, in _crawl
d = crawler.crawl(*args, **kwargs)
File "D:\pyspider\venv\Lib\site-packages\twisted\internet\defer.py", line 2287, in unwindGenerator
return _cancellableInlineCallbacks(gen)
File "D:\pyspider\venv\Lib\site-packages\twisted\internet\defer.py", line 2197, in _cancellableInlineCallbacks
_inlineCallbacks(None, gen, status, _copy_context())
--- ---
File "D:\pyspider\venv\Lib\site-packages\twisted\internet\defer.py", line 2014, in _inlineCallbacks
result = context.run(gen.send, result)
File "D:\pyspider\venv\Lib\site-packages\scrapy\crawler.py", line 160, in crawl
yield self.engine.open_spider(self.spider, start_requests)
builtins.AttributeError: 'BossSpider' object has no attribute 'settings'

2024-09-29 20:28:23 [twisted] CRITICAL:
Traceback (most recent call last):
File "D:\pyspider\venv\Lib\site-packages\twisted\internet\defer.py", line 2014, in _inlineCallbacks
result = context.run(gen.send, result)
File "D:\pyspider\venv\Lib\site-packages\scrapy\crawler.py", line 160, in crawl
yield self.engine.open_spider(self.spider, start_requests)
AttributeError: 'BossSpider' object has no attribute 'settings'`

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant