Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mission/huiduw/m3.1 #111

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions works/huiduw/M1.1/cf_spider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import requests
import json
import sys
from lxml import etree
def get_user_info(username):
#链接构造
url ='https://codeforces.com/profile/{}/'.format(username)
#伪装header
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0"
}
res = requests.get(url,headers)
if res.status_code == 200:
f = 0
text = res.content.decode('utf-8')
html = etree.HTML(text)
try:
rank = html.xpath("//div[@class='user-rank']/span[@class='user-legendary']/text()")[0]
except Exception as e:
sys.stderr.write("No such handle")
sys.exit(1)

try:
rating = html.xpath("//div[@class='info']/ul[1]/li[1]/span[1]/text()")[0]
except Exception as e:
#标记无rating记录
f = 1
if(f == 1):
user_info = {
"handle":username
}
else:
user_info ={
"handle":username,
"rating":rating,
"rank":rank[:-1] #去除最后一位空格
}
res_json = json.dumps(user_info)
sys.stdout.write(res_json)
sys.exit(0)
else:
sys.stderr.write("network anomaly")

if __name__ == '__main__':
username = sys.argv[1:][0]
# username = input()
get_user_info(username)
58 changes: 58 additions & 0 deletions works/huiduw/M1.1/record.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# code

```python
import requests
import json
import sys
from lxml import etree
def get_user_info(username):
#链接构造
url ='https://codeforces.com/profile/{}/'.format(username)
#伪装header
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0"
}
res = requests.get(url,headers)
if res.status_code == 200:
f = 0
text = res.content.decode('utf-8')
html = etree.HTML(text)
try:
rank = html.xpath("//div[@class='user-rank']/span[@class='user-legendary']/text()")[0]
except Exception as e:
sys.stderr.write("No such handle")
sys.exit(1)

try:
rating = html.xpath("//div[@class='info']/ul[1]/li[1]/span[1]/text()")[0]
except Exception as e:
#标记无rating记录
f = 1
if(f == 1):
res = {
"handle":username
}
else:
res ={
"handle":username,
"rating":rating,
"rank":rank[:-1] #去除最后一位空格
}
res_json = json.dumps(res)
sys.stdout.write(res_json)
sys.exit(0)
else:
sys.stderr.write("network anomaly")

if __name__ == '__main__':
username = sys.argv[1:][0]
get_user_info(username)
```

## summary

简单爬虫库的使用

python的标准输入输出流

json库的使用
58 changes: 58 additions & 0 deletions works/huiduw/M1.2/cf_spider_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import requests
import json
import sys


def get_user_info(username):
url = "https://codeforces.com/api/user.info"
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0"
}
# 配置参数
params = {
"handles": username
}
try:
res = requests.get(url,params, headers = headers)
info_json = res.json()
if info_json["status"] == "OK":
for user in info_json["result"]:
if "rank" not in user:
user_info = {"handle": params["handles"]}
else:
user_info = {
"handle": params["handles"],
"rating": user["rating"],
"rank": user["rank"]
}
user_json = json.dumps(user_info)
sys.stdout.write(user_json + "\n")
elif info_json["status"] == "FAILED":
sys.stderr.write("no such handle\n")
sys.exit(1)
except requests.exceptions.Timeout as e:
sys.stderr.write(f"请求超时: {e}\n")
sys.exit(1)
except requests.exceptions.HTTPError as e:
sys.stderr.write(f"HTTP错误: {e}\n")
sys.exit(1)
except requests.exceptions.ConnectionError as e:
sys.stderr.write(f"连接错误: {e}\n")
sys.exit(1)
except KeyError as e:
sys.stderr.write(f"API返回的数据不完整或不正确: {e}\n")
sys.exit(1)
except TypeError as e:
sys.stderr.write(f"返回的数据类型不正确: {e}\n")
sys.exit(1)

def main():
try:
username = sys.argv[1:][0]
get_user_info(username)
except IndexError:
sys.stderr.write("用户名不能为空!\n")


if __name__ == '__main__':
main()
89 changes: 89 additions & 0 deletions works/huiduw/M2.1/cf_spider_back_V0.1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from flask import Flask,request,jsonify
import requests
import json
import sys
app = Flask(__name__) #创建实例

#错误信息
ERROR_MESSAGES = {
"1": "no such handle",
"2": "No valid HTTP response was received",
"3": "Abnormal HTTP response",
"4": "Internal Server Error"
}


def get_user_info(username):
url = "https://codeforces.com/api/user.info"
# 配置参数
params = {
"handles": username
}

res = requests.get(url,params)
# print(res)
status = res.status_code
# print(status)
info_json = res.json()
# print(info_json)

if status == 200 or status == 400:
if info_json["status"] == "OK":
for user in info_json["result"]:
if "rank" not in user:
user_info = {"handle": params["handles"]}
else:
user_info = {
"handle": params["handles"],
"rating": user["rating"],
"rank": user["rank"]
}
user_json = json.dumps(user_info)
res_json = {
"success": True,
"result": user_json
}
return res_json
elif info_json["status"] == "FAILED":
res_json = {
"success": False,
"message": ERROR_MESSAGES['1']
}
return res_json
elif status == 404: #状态码404 未收到有效HTTP响应
res_json = {
"success": False,
"message": ERROR_MESSAGES['2']
}
return res_json
elif status // 100 == 4: #状态码4xx 异常HTTP响应
res_json = {
"success": False,
"message": ERROR_MESSAGES['3']
}
return res_json
elif status // 100 == 5: #状态码5xx 服务器错误
res_json = {
"success": False,
"message": ERROR_MESSAGES['4']
}
return res_json

@app.route('/',methods =['GET'])
def query():
data = []
try:
usernames = request.args.get('handles')
query_list = usernames.split(',')
for username in query_list:
result = get_user_info(username)
data.append(result)
except Exception as e:
return '传个参数先啦'

return jsonify(data)


if __name__ == '__main__':
app.run('127.0.0.1',port=2333) #指定提供服务的端口号

35 changes: 35 additions & 0 deletions works/huiduw/M2.1/record.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
## Flask + http学习

flask真是便便又利利呢

## 总结

**HTTP 协议的基本组成部分有**:请求行、请求头、请求体和响应行、响应头、响应体。

请求行中包含方法、请求 URI 和 HTTP 版本。例如:GET /users/ahz HTTP/1.1。

请求头中包含请求头名称和请求头参数。例如:Accept-Language: zh-CN,zh;q=0.9。

请求体中包含请求数据。例如:{"username": "ahz", "password": "password"}。

响应行中包含HTTP版本和状态码。例如:HTTP/1.1 200 OK。

响应头中包含响应头名称和响应头参数。例如:Content-Type: application/json。

响应体中包含响应数据。例如:{"success": True, "result": {"handle": "ahz"}}。

**在 HTTP 请求中,常见的传参方式有:**

1. Query Parameters:在 URL 的查询参数部分传递参数。例如:/users?handle=ahz。

2. Body Parameters:将参数放在请求体中。例如:POST /users { "username": "ahz", "password": "password" }。

3. Header Parameters:将参数作为请求头参数传递。例如:GET /users/ahz HTTP/1.1 Host: example.com。

4. Path Parameters:将参数放在请求 URI 的路径部分。例如:GET /users/{handle}。

## 优化方案

1. 多线程
2. 加缓存
3. ~~想不到力~~
Loading