Skip to content

Commit

Permalink
fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
kangvcar committed Jul 20, 2020
1 parent 4dc74de commit 24209ce
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 299 deletions.
2 changes: 1 addition & 1 deletion Spiders/bilibili/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def get_all_bili_history(self):
headers = self.get_header()
# history = {'all': []}
history = []
for page_num in self.MAX_PAGE:
for page_num in range(self.MAX_PAGE):
time.sleep(0.6)
url = 'https://api.bilibili.com/x/v2/history?pn={pn}&ps={ps}&jsonp=jsonp'.format(pn=page_num, ps=self.PAGE_PER_NUM)
result = self.req_get(headers, url)
Expand Down
4 changes: 2 additions & 2 deletions Spiders/cloudmusic/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def user_login_as_cellphone(self):
else:
print('登录失败')
userid = response.json()['account']['id']
print('userid = ' + str(userid))
# print('userid = ' + str(userid))
return userid

## 使用 ‘邮箱’ + ‘密码’ 登录网易云音乐
Expand All @@ -49,7 +49,7 @@ def user_login_as_email(self):
else:
print('登录失败')
userid = response.json()['account']['id']
print('userid = ' + str(userid))
# print('userid = ' + str(userid))
return userid

## 把获取的个人信息写入json文件
Expand Down
52 changes: 26 additions & 26 deletions Spiders/qqfriend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from bs4 import BeautifulSoup
import lxml
# import openpyxl
from openpyxl import Workbook
# from openpyxl import Workbook

class Qqfriend(object):
def __init__(self):
Expand All @@ -32,32 +32,32 @@ def __init__(self):
self.root.mainloop()

# 存储为excel
def callback_excel(self):
self.driver.switch_to_frame('webpay-iframe')
iframe = self.driver.find_element_by_xpath('//*[@id="midas-webpay-main-1450000186"]/div[2]/div[1]/iframe')
self.driver.switch_to_frame(iframe)
html = self.driver.page_source
soup = BeautifulSoup(html, "lxml")
a = soup.find_all(attrs={'class': 'icon-friend-s'})
wb = Workbook()
ws = wb.active
ws.append(["raw", "group", "view_name", "qqnumber"])
for i in a:
if i.next_sibling != ' {{el.name}}({{el.qq}})':
k = 0
for x in i.next_sibling:
if x == '(':
f = k
if x == ')':
l = k
k = k + 1
ws.append([i.next_sibling, i.next_sibling.parent.parent.parent.parent.find(
attrs={'class': 'icon-more-friend'}).next_sibling, i.next_sibling[:f], i.next_sibling[f + 1:l]])
print([i.next_sibling, i.next_sibling.parent.parent.parent.parent.find(
attrs={'class': 'icon-more-friend'}).next_sibling, i.next_sibling[:f], i.next_sibling[f + 1:l]])
wb.save(asksaveasfilename(defaultextension='.xlsx', filetypes=[('Excel 工作簿', '*.xlsx')]))
# def callback_excel(self):
# self.driver.switch_to_frame('webpay-iframe')
# iframe = self.driver.find_element_by_xpath('//*[@id="midas-webpay-main-1450000186"]/div[2]/div[1]/iframe')
# self.driver.switch_to_frame(iframe)
# html = self.driver.page_source
# soup = BeautifulSoup(html, "lxml")
# a = soup.find_all(attrs={'class': 'icon-friend-s'})
# wb = Workbook()
# ws = wb.active
# ws.append(["raw", "group", "view_name", "qqnumber"])
# for i in a:
# if i.next_sibling != ' {{el.name}}({{el.qq}})':
# k = 0
# for x in i.next_sibling:
# if x == '(':
# f = k
# if x == ')':
# l = k
# k = k + 1
# ws.append([i.next_sibling, i.next_sibling.parent.parent.parent.parent.find(
# attrs={'class': 'icon-more-friend'}).next_sibling, i.next_sibling[:f], i.next_sibling[f + 1:l]])
# print([i.next_sibling, i.next_sibling.parent.parent.parent.parent.find(
# attrs={'class': 'icon-more-friend'}).next_sibling, i.next_sibling[:f], i.next_sibling[f + 1:l]])
# wb.save(asksaveasfilename(defaultextension='.xlsx', filetypes=[('Excel 工作簿', '*.xlsx')]))

return 0
# return 0

# 存储为json
def callback_json(self):
Expand Down
88 changes: 44 additions & 44 deletions Spiders/qqqun/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,53 +41,53 @@ def delNT(self, s):
s = s[:-1]
return s

def callback_excel(self):
a = self.driver.find_elements_by_class_name('icon-def-gicon')
Num = len(a)
time_start = time.time()
for i in range(0, Num):
# 点击进入具体群
a = self.driver.find_elements_by_class_name('icon-def-gicon')
# time.sleep(0.5)
a[i].click()
time.sleep(1)
html = self.driver.page_source
soup = BeautifulSoup(html, "lxml")
groupTit = self.delNT(soup.find(attrs={'id': 'groupTit'}).text)
groupMemberNum = self.delNT(soup.find(attrs={'id': 'groupMemberNum'}).text)
# def callback_excel(self):
# a = self.driver.find_elements_by_class_name('icon-def-gicon')
# Num = len(a)
# time_start = time.time()
# for i in range(0, Num):
# # 点击进入具体群
# a = self.driver.find_elements_by_class_name('icon-def-gicon')
# # time.sleep(0.5)
# a[i].click()
# time.sleep(1)
# html = self.driver.page_source
# soup = BeautifulSoup(html, "lxml")
# groupTit = self.delNT(soup.find(attrs={'id': 'groupTit'}).text)
# groupMemberNum = self.delNT(soup.find(attrs={'id': 'groupMemberNum'}).text)

while len(soup.find_all(attrs={'class': 'td-no'})) < int(groupMemberNum):
self.driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
time.sleep(0.1)
html = self.driver.page_source
soup = BeautifulSoup(html, "lxml")
# while len(soup.find_all(attrs={'class': 'td-no'})) < int(groupMemberNum):
# self.driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
# time.sleep(0.1)
# html = self.driver.page_source
# soup = BeautifulSoup(html, "lxml")

res_elements = etree.HTML(html)
table = res_elements.xpath('//*[@id="groupMember"]')
table = etree.tostring(table[0], encoding='utf-8').decode()
df = pandas.read_html(table, encoding='utf-8', header=0)[0]
try:
print(str(int((time.time() - time_start) / 60)) + ':' + str(int((time.time() - time_start) % 60)),
'第' + str(i + 1) + '群,' + str(int((i + 1) / Num * 100)) + '% ' + groupTit + ' 此表完成')
writer = pandas.ExcelWriter(self.path + '/' + groupTit + '.xlsx')
df.to_excel(writer, 'Sheet1')
writer.save()
except:
k = 0
for v in groupTit:
if v == '(':
f = k
if v == ')':
l = k
k = k + 1
# res_elements = etree.HTML(html)
# table = res_elements.xpath('//*[@id="groupMember"]')
# table = etree.tostring(table[0], encoding='utf-8').decode()
# df = pandas.read_html(table, encoding='utf-8', header=0)[0]
# try:
# print(str(int((time.time() - time_start) / 60)) + ':' + str(int((time.time() - time_start) % 60)),
# '第' + str(i + 1) + '群,' + str(int((i + 1) / Num * 100)) + '% ' + groupTit + ' 此表完成')
# writer = pandas.ExcelWriter(self.path + '/' + groupTit + '.xlsx')
# df.to_excel(writer, 'Sheet1')
# writer.save()
# except:
# k = 0
# for v in groupTit:
# if v == '(':
# f = k
# if v == ')':
# l = k
# k = k + 1

writer = pandas.ExcelWriter(self.path + '/' + groupTit[f + 1:l] + '.xlsx')
df.to_excel(writer, 'Sheet1')
writer.save()
self.driver.find_element_by_id('changeGroup').click()
time.sleep(1)
self.close_chrome()
return 0
# writer = pandas.ExcelWriter(self.path + '/' + groupTit[f + 1:l] + '.xlsx')
# df.to_excel(writer, 'Sheet1')
# writer.save()
# self.driver.find_element_by_id('changeGroup').click()
# time.sleep(1)
# self.close_chrome()
# return 0

def callback_json(self):
a = self.driver.find_elements_by_class_name('icon-def-gicon')
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ pandas==1.0.1
wxpy==0.3.9.8
beautifulsoup4==4.9.1
Pillow==7.2.0
python_dateutil==2.8.1
python_dateutil==2.8.1
97 changes: 0 additions & 97 deletions tests/github/main.py

This file was deleted.

Empty file removed tests/qq/main.py
Empty file.
23 changes: 0 additions & 23 deletions tests/qq/snip.py

This file was deleted.

Loading

0 comments on commit 24209ce

Please sign in to comment.