This repository has been archived by the owner on Jun 13, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5679d99
commit c024748
Showing
8 changed files
with
2,560 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import config | ||
import re | ||
import math | ||
import json | ||
import random | ||
import requests | ||
import time | ||
|
||
class ImageHandler(object): | ||
"""docstring for ImageHandler""" | ||
def __init__(self, **kwargs): | ||
super(ImageHandler, self).__init__() | ||
self.path = kwargs.get('path', config.img_base_path) | ||
|
||
def write_image(self, image): | ||
if(image): | ||
_id = self._generate_image_id() | ||
_type_re = re.compile(r'wx_fmt=\w+') | ||
_types = _type_re.findall(image) | ||
end = '' | ||
print(image) | ||
if(len(_types) == 0 or _types[0] == 'wx_fmt=1'): | ||
end = '.png' | ||
path = self.path + str(_id) + end | ||
else: | ||
end = '.' + re.sub('wx_fmt=', '', _types[0]) | ||
path = self.path + str(_id) + end | ||
f_img = open(path, 'w') | ||
r = requests.get(image) | ||
f_img.write(r.content) | ||
f_img.close() | ||
time.sleep(3) | ||
# if you want to change the path where the images will be put, change config file and this path config. | ||
path = '/imgs' + str(_id) + end | ||
return path | ||
else: | ||
return '' | ||
|
||
|
||
def _generate_image_id(self): | ||
chars = 'QWERTYUPLKJHGFDSAZXCVBNMqwertyuplkjhgfdsazxcvbnm123456789' | ||
length = len(chars) | ||
_id = '' | ||
for i in range(15): | ||
_id = _id + chars[math.trunc(random.random()*length)] | ||
return _id |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,4 @@ | ||
# WechatScraper | ||
A python wechat official account scrawler, using sougou search engine. | ||
|
||
### Needed | ||
Python + Selenium + PhantomJS + MySQL | ||
|
||
### MySQL | ||
sql folder is for mysql. | ||
|
||
connect mysql and type ``` source ```, then drag the file into the command lines, the table will automatically be created. | ||
# A python wechat official account scrawler, using sougou search engine. | ||
# selenium needed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,20 @@ | ||
# -*- coding: utf-8 -*- | ||
# config file | ||
|
||
import pymysql.cursors | ||
|
||
article_search_url = 'http://weixin.sogou.com/weixin?oq=&query&_sug_type_=1&sut=0&lkt=0%2C0%2C0&s_from=input&ri=17&_sug_=n&type=2&sst0=1499142762773&page&ie=utf8&p=40040108&dp=1&w=01015002&dr=1' | ||
|
||
gzh_search_url = 'http://weixin.sogou.com/weixin?type=1&s_from=input&query&ie=utf8&_sug_=n&_sug_type_=&w=01019900&sut=10144&sst0=1500528877030&lkt=1%2C1500528876895%2C1500528876895&page' | ||
|
||
# test path | ||
img_base_path = './imgs/' | ||
|
||
db_config = { | ||
'host': '127.0.0.1', # host | ||
'port': 3306, # port | ||
'user': 'root', # database username | ||
'password': '123456', # database password | ||
'db': 'HUANLEYE', # database name | ||
'db': 'DATABASE', # database name | ||
'charset': 'utf8mb4', # database charset | ||
'cursorclass': pymysql.cursors.DictCursor | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.