Skip to content

Commit

Permalink
Merge pull request #106 from Guovin/dev
Browse files Browse the repository at this point in the history
Release: v1.1.2
  • Loading branch information
Guovin authored May 7, 2024
2 parents 5278408 + 086e66d commit 0b4518c
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 35 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
steps:
- name: Set branch name
id: vars
run: echo ::set-output name=branch::${{ github.ref == 'refs/heads/gd' && 'gd' || 'master' }}
run: echo ::set-output name=branch::${{ github.repository_owner == 'Guovin' && 'gd' || 'master' }}
- uses: actions/checkout@v3
with:
ref: ${{ steps.vars.outputs.branch }}
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# 更新日志(Changelog)

## v1.1.2

### 2024/5/7

- 重构接口获取方法,增强通用性,适应结构变更(Refactored the method for obtaining the interface, enhanced its universality, and adapted to structural changes)
- 修复 gd 分支自动更新问题(#105)(Fixed the automatic update issue of the gd branch (#105))
- 优化自定义接口源获取,接口去重(Optimized the acquisition of custom interface sources and removed duplicate interfaces)

## v1.1.1

### 2024/4/29
Expand Down
43 changes: 32 additions & 11 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
from selenium.webdriver.support import expected_conditions as EC
from selenium_stealth import stealth
import asyncio
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, NavigableString
from utils import (
getChannelItems,
updateChannelUrlsTxt,
updateFile,
getUrlInfo,
getChannelUrl,
getChannelInfo,
sortUrlsBySpeedAndResolution,
getTotalUrls,
filterUrlsByPatterns,
Expand All @@ -25,6 +26,7 @@
from logging.handlers import RotatingFileHandler
import os
from tqdm import tqdm
import re

handler = RotatingFileHandler("result_new.log", encoding="utf-8")
logging.basicConfig(
Expand Down Expand Up @@ -114,18 +116,37 @@ async def visitPage(self, channelItems):
self.driver.execute_script(
"arguments[0].click();", page_link
)
soup = BeautifulSoup(self.driver.page_source, "html.parser")
results = (
soup.find_all("div", class_="result") if soup else []
source = re.sub(
r"<!--.*?-->",
"",
self.driver.page_source,
flags=re.DOTALL,
)
for result in results:
try:
url, date, resolution = getUrlInfo(result)
soup = BeautifulSoup(source, "html.parser")
if soup:
results = []
for element in soup.descendants:
if isinstance(element, NavigableString):
url = getChannelUrl(element)
if url and not any(
item[0] == url for item in results
):
url_element = soup.find(
lambda tag: tag.get_text(strip=True)
== url
)
if url_element:
info_element = (
url_element.find_next_sibling()
)
date, resolution = getChannelInfo(
info_element
)
results.append((url, date, resolution))
for result in results:
url, date, resolution = result
if url and checkUrlByPatterns(url):
infoList.append((url, date, resolution))
except Exception as e:
print(f"Error on result {result}: {e}")
continue
except Exception as e:
print(f"Error on page {page}: {e}")
continue
Expand Down
51 changes: 29 additions & 22 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ async def getChannelsByExtendBaseUrls(channel_names):
url = re.match(pattern, line).group(2)
value = (url, None, resolution)
if key in link_dict:
link_dict[key].append(value)
if value not in link_dict[key]:
link_dict[key].append(value)
else:
link_dict[key] = [value]
found_channels = []
Expand Down Expand Up @@ -137,31 +138,37 @@ def updateFile(final_file, old_file):
os.replace(old_file, final_file)


def getUrlInfo(result):
def getChannelUrl(element):
"""
Get the url, date and resolution
"""
url = date = resolution = None
result_div = [div for div in result.children if div.name == "div"]
if 1 < len(result_div):
channel_text = result_div[1].get_text(strip=True)
url_match = re.search(
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
channel_text,
url = None
urlRegex = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
url_search = re.search(
urlRegex,
element.get_text(strip=True),
)
if url_search:
url = url_search.group()
return url


def getChannelInfo(element):
"""
Get the channel info
"""
date, resolution = None, None
info_text = element.get_text(strip=True)
if info_text:
date, resolution = (
(info_text.partition(" ")[0] if info_text.partition(" ")[0] else None),
(
info_text.partition(" ")[2].partition("•")[2]
if info_text.partition(" ")[2].partition("•")[2]
else None
),
)
if url_match is not None:
url = url_match.group()
info_text = result_div[-1].get_text(strip=True)
if info_text:
date, resolution = (
(info_text.partition(" ")[0] if info_text.partition(" ")[0] else None),
(
info_text.partition(" ")[2].partition("•")[2]
if info_text.partition(" ")[2].partition("•")[2]
else None
),
)
return url, date, resolution
return date, resolution


async def getSpeed(url, urlTimeout=5):
Expand Down
2 changes: 1 addition & 1 deletion version.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "1.1.1"
"version": "1.1.2"
}

0 comments on commit 0b4518c

Please sign in to comment.