diff --git a/.travis.yml b/.travis.yml index 6f57fbc..8434d19 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,15 +1,16 @@ sudo: false -language: node_js -cache: - yarn: true - directories: - - node_modules -notifications: - email: false -node_js: - - 'stable' +language: + - python + - node_js before_script: - - npm test -branches: - except: - - /^v\d+\.\d+\.\d+$/ + - npm install +python: + - 3.5 +install: + - pip install -r requirements.txt +scripts: + - cd dream11/ + - scripyd & + - cd .. + - npm start & + - curl http://localhost:6800/schedule.json -d project=default -d spider=linkspider \ No newline at end of file diff --git a/crawler.py b/crawler.py new file mode 100644 index 0000000..1e5e3f8 --- /dev/null +++ b/crawler.py @@ -0,0 +1,25 @@ +import scrapy +from heapq import heappush, heappop +code_list = [] + +class MyBaseSpider(scrapy.Spider): + name = 'spider' + start_urls = ['http://localhost:8080'] + custom_settings = { + 'LOG_ENABLED': 'false', + 'CONCURRENT_REQUESTS': 2, + 'CONCURRENT_REQUESTS_PER_DOMAIN': 4 + } + def __init__(self, url=None): + self.something = url + + def parse(self, response): + local_codes = [] + for codes in response.css('div.codes > h1 ::text'): + heappush(local_codes, codes.extract()) + yield heappush(code_list, heappop(local_codes)) + for next_page in response.css('a'): + yield response.follow(next_page,callback=self.parse) + + def closed(self, reason): + print heappop(code_list) diff --git a/crawler.test.py b/crawler.test.py new file mode 100644 index 0000000..596b520 --- /dev/null +++ b/crawler.test.py @@ -0,0 +1,18 @@ +import unittest +from scrapy.crawler import CrawlerProcess +from crawler import MyBaseSpider + + +crawlerProcess = CrawlerProcess() +# crawlerProcess.install() +# crawlerProcess.configure() + + +class TestStringMethods(unittest.TestCase): + + def test_isupper(self): + crawlerProcess.crawl(MyBaseSpider) + crawlerProcess.start() + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a247d0e --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +Scrapy == 1.4.0 \ No newline at end of file