Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

서버통신부분 신뢰성 개선 #2

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,7 @@ Crawler for dcinside.com
디씨 UI 변경 후 작동 안되는 갤창랭킹 프로그램 수정함.
Python version 3.6.4
필요 모듈: bs4, requests

v1.3b
마이너 갤러리 지원.
exe 패키지 지원
26 changes: 19 additions & 7 deletions gallchangranking.ver.1.3.py → gallchangranking.ver.1.3b.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,33 @@ def request(url):
url_get = requests.get(url, headers=header)
return url_get

def gall_check(gall):
recept = request("http://gall.dcinside.com/board/lists/?id=%s" %gall)
def gall_check(minor_string, gall):
recept = request("http://gall.dcinside.com/%sboard/lists/?id=%s" %(minor_string, gall))
soup = BeautifulSoup(recept.text, "html.parser")
meta_data = soup.find_all("meta", {"name": "title"})
comp = re.findall("\"(.*갤러리)", str(meta_data))
comp = re.findall("\"(.+갤러리)", str(meta_data))
if comp == []:
return None
gall_name = comp[0]
return gall_name


def main():

is_minor = input("마이너 갤러리입니까? (y/n): ")

if is_minor == 'y':
minor_string = "mgallery/"
elif is_minor == 'n':
minor_string = ""

else:
print("y나 n으로 입력부탁")
main()

gall = input("갤러리 id?(ex:mlp): ")
if gall_check(gall):
print(gall_check(gall))
if gall_check(minor_string, gall):
print(gall_check(minor_string, gall))
else:
print("id 잘못 입력한듯")
main()
Expand All @@ -43,7 +55,7 @@ def main():

for page in range(init_page, final_page + 1):
print("\rWorking page={}/{}".format(page, final_page), end="")
recept = request("http://gall.dcinside.com/board/lists/?id=%s&page=%d" %(gall, page))
recept = request("http://gall.dcinside.com/%sboard/lists/?id=%s&page=%d" %(minor_string, gall, page))
soup = BeautifulSoup(recept.text, "html.parser")
nick_list = soup.find_all('td', {'class': "gall_writer ub-writer"})

Expand Down Expand Up @@ -142,7 +154,7 @@ def edit_nick():


if __name__ == "__main__":
print("갤창랭킹 made by hanel2527, mlp갤")
print("갤창랭킹 made by hanel2527, mlp갤 \n\n 마갤 지원 패치 by Prince, \n(**데자와는 갓음료입니다**)\n\n")
if input("갤창랭킹/편집(g/e): ") == "g":
main()
edit_nick() #닉변처리
160 changes: 160 additions & 0 deletions gallchangranking.ver.1.3c.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
import requests
from bs4 import BeautifulSoup
import operator
import time
import re
import os

def request(url):
header = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9',
'Accept-Encoding': 'gzip,deflate',
'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Host': 'gall.dcinside.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0;Win64;x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'
}
url_get = requests.get(url, headers=header)
return url_get

def gall_check(minor_string, gall):
recept = request("http://gall.dcinside.com/%sboard/lists/?id=%s" %(minor_string, gall))
soup = BeautifulSoup(recept.text, "html.parser")
meta_data = soup.find_all("meta", {"name": "title"})
comp = re.findall("\"(.+갤러리)", str(meta_data))
if comp == []:
return None
gall_name = comp[0]
return gall_name


def main():

is_minor = input("마이너 갤러리입니까? (y/n): ")

if is_minor == 'y':
minor_string = "mgallery/"
elif is_minor == 'n':
minor_string = ""

else:
print("y나 n으로 입력부탁")
main()

gall = input("갤러리 id?(ex:mlp): ")
if gall_check(minor_string, gall):
print(gall_check(minor_string, gall))
else:
print("id 잘못 입력한듯")
main()
init_page = int(input("시작 페이지?: "))
final_page = int(input("마지막 페이지?: "))
nick_dic = dict()

for page in range(init_page, final_page + 1):
print("\rWorking page={}/{}".format(page, final_page), end="")
recept = request("http://gall.dcinside.com/%sboard/lists/?id=%s&page=%d" %(minor_string, gall, page))
soup = BeautifulSoup(recept.text, "html.parser")
nick_list = soup.find_all('td', {'class': "gall_writer ub-writer"})

for nicks in nick_list:
try: # 첫부분 예외처리
nick = nicks.attrs['data-nick']
uid = nicks.attrs['data-uid']
ip = nicks.attrs['data-ip']
except:
nick = "운영자"
if nick == "운영자": # 공지사항
continue
nick_str = nick + "(" + uid + ip + ")"
if nick_str in nick_dic:
nick_dic[nick_str] += 1
else:
nick_dic[nick_str] = 1
nick_list = dict_sorter(nick_dic)
file_writer(gall, nick_list) #저장


def dict_sorter(nick_dic):
sorted_dic = sorted(nick_dic.items(), key=operator.itemgetter(1)) #딕셔너리 value로 정렬
sorted_dic.reverse()
return sorted_dic


def nick_change(nick_list):
print("랭킹\t닉\t글수")
for i in range(len(nick_list)):
print("%d\t%s\t%s" %((i+1), nick_list[i][0], nick_list[i][1]))
print("닉변 처리(ex)1위와 10위가 동일닉일 시 1,10 한번에 두개씩만, 종료는 0,0")
while 1:
change = input("닉변?: ")
rankings = change.split(",")
if rankings[0] == "0":
break
temp_1 = nick_list[int(rankings[0]) - 1][0] + "=" + nick_list[int(rankings[1]) - 1][0]
temp_2 = nick_list[int(rankings[0]) - 1][1] + nick_list[int(rankings[1]) - 1][1]
temp_3 = nick_list[int(rankings[1]) - 1][0]
nick_list[int(rankings[0]) - 1] = (temp_1, temp_2)
nick_list[int(rankings[1]) - 1] = (temp_3, 0)
temp_dic = dict(nick_list)
nick_list = dict_sorter(temp_dic)
return nick_list


def file_writer(gall, nick_list):
timestr = time.strftime("%Y_%m_%d-%H_%M")
file_name = "%s_gall-%s.txt" %(gall, timestr)
edit_file_name = "edit_%s_gall-%s.txt" %(gall, timestr)
print(file_name)
f = open(file_name, 'w', encoding = 'utf-8')
ef = open(edit_file_name, "w", encoding = 'utf-8')
f.write("갤창랭킹 made by hanel2527, 마이 리틀 포니 갤러리\n\n Fix by Prince \n (**마셔보세요 데자와**)\n\n")
total = 0
for i in range(len(nick_list)):
total += nick_list[i][1]
f.write("총 글수: %d\n" %total)
f.write("랭킹\t\t닉\t\t\t\t글 수\t\t갤 지분(%)\n")

for i in range(len(nick_list)):
if nick_list[i][1] == 0:
continue
string = "%d\t\t%s\t\t\t\t%d\t\t%.2f\n" %((i+1), nick_list[i][0], nick_list[i][1], (nick_list[i][1] / total * 100))
f.write(string)
ef.write("%s\t%d\n" %(nick_list[i][0], nick_list[i][1]))
f.close()
ef.close()

def edit_nick():
filename_list = list()
n = 0
for filename in os.listdir():
if re.match("^edit_.*\.txt", filename):
n += 1
print(n, filename)
filename_list.append(filename)
num = int(input("맞는 것 번호?: "))
file_name = filename_list[num - 1]
print(file_name)
f = open(file_name, "r", encoding = 'utf-8')
nicks = f.readlines()
nick_list = list()
for i in range(len(nicks)):
temp = nicks[i].split('\t')
nick_list.append((temp[0], int(temp[1])))
nick_list = nick_change(nick_list)
gall = re.findall("^edit_(.*)_gall", file_name)[0]
file_writer(gall, nick_list)
f.close()
original = re.findall("^edit_(.*\.txt)", file_name)[0]
if input("원본파일 삭제?(y/n): ") == "y":
os.remove(original) #원본파일 삭제
os.remove(file_name)


if __name__ == "__main__":
print("갤창랭킹 made by hanel2527, mlp갤 \n\n 마갤 지원 패치 by Prince, \n(**데자와는 갓음료입니다**)\n\n")
if input("갤창랭킹/편집(g/e): ") == "g":
main()
edit_nick() #닉변처리
112 changes: 112 additions & 0 deletions gallchangranking.ver.1.3c2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import requests
from bs4 import BeautifulSoup
import operator
import time
import re
import os

def request(url):
header = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9',
'Accept-Encoding': 'gzip,deflate',
'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Host': 'gall.dcinside.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0;Win64;x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'
}
url_get = requests.get(url, headers=header)
return url_get

def gall_check(minor_string, gall):
recept = request("http://gall.dcinside.com/%sboard/lists/?id=%s" %(minor_string, gall))
soup = BeautifulSoup(recept.text, "html.parser")
meta_data = soup.find_all("meta", {"name": "title"})
comp = re.findall("\"(.+갤러리)", str(meta_data))
if comp == []:
return None
gall_name = comp[0]
return gall_name


def main():

is_minor = input("마이너 갤러리입니까? (y/n): ")

if is_minor == 'y':
minor_string = "mgallery/"
elif is_minor == 'n':
minor_string = ""

else:
print("y나 n으로 입력부탁")
main()

gall = input("갤러리 id?(ex:mlp): ")
if gall_check(minor_string, gall):
print(gall_check(minor_string, gall))
else:
print("id 잘못 입력한듯")
main()
init_page = int(input("시작 페이지?: "))
final_page = int(input("마지막 페이지?: "))
nick_dic = dict()

for page in range(init_page, final_page + 1):
print("\rWorking page={}/{}".format(page, final_page), end="")
recept = request("http://gall.dcinside.com/%sboard/lists/?id=%s&page=%d" %(minor_string, gall, page))
soup = BeautifulSoup(recept.text, "html.parser")
nick_list = soup.find_all('td', {'class': "gall_writer ub-writer"})

for nicks in nick_list:
try: # 첫부분 예외처리
nick = nicks.attrs['data-nick']
uid = nicks.attrs['data-uid']
ip = nicks.attrs['data-ip']
except:
nick = "운영자"
if nick == "운영자": # 공지사항
continue
nick_str = nick + "(" + uid + ip + ")"
if nick_str in nick_dic:
nick_dic[nick_str] += 1
else:
nick_dic[nick_str] = 1
nick_list = dict_sorter(nick_dic)
file_writer(gall, nick_list) #저장


def dict_sorter(nick_dic):
sorted_dic = sorted(nick_dic.items(), key=operator.itemgetter(1)) #딕셔너리 value로 정렬
sorted_dic.reverse()
return sorted_dic



def file_writer(gall, nick_list):
timestr = time.strftime("%Y_%m_%d-%H_%M")
file_name = "%s_gall-%s.txt" %(gall, timestr)
print(file_name)
f = open(file_name, 'w', encoding = 'utf-8')
f.write("갤창랭킹 made by hanel2527, 마이 리틀 포니 갤러리\n\n Fix by Prince \n (**마셔보세요 데자와**)\n\n")
total = 0

for i in range(len(nick_list)):
total += nick_list[i][1]
f.write("총 글수: %d\n" %total)
f.write("랭킹\t\t닉\t\t\t\t글 수\t\t갤 지분(%)\n")
people = len(nick_list)
print("%d" %people)

for i in range(len(nick_list)):
if nick_list[i][1] == 0:
continue
string = "%d\t\t%s\t\t\t\t%d\t\t%.2f\n" %((i+1), nick_list[i][0], nick_list[i][1], (nick_list[i][1] / total * 100))
f.write(string)
f.close()

if __name__ == "__main__":
print("갤창랭킹 made by hanel2527, mlp갤 \n\n 마갤 지원 패치 by Prince, \n(**데자와는 갓음료입니다**)\n\n")
if input("랭킹 (g) : ") == "g":
main()