From da38436cb87eb0f92cf4eb87a5bf6904f13c76e9 Mon Sep 17 00:00:00 2001 From: "guorong.zheng" <360996299@qq.com> Date: Wed, 8 May 2024 10:12:40 +0800 Subject: [PATCH 1/4] chore: deprecated command --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f0b1bacefcf..61d78702ded 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,7 +18,7 @@ jobs: steps: - name: Set branch name id: vars - run: echo ::set-output name=branch::${{ github.repository_owner == 'Guovin' && 'gd' || 'master' }} + run: echo "branch=${{ github.repository_owner == 'Guovin' && 'gd' || 'master' }}" >> $GITHUB_ENV - uses: actions/checkout@v3 with: ref: ${{ steps.vars.outputs.branch }} From 9881b5de34be9ffd827e563eefa45df92606bf5b Mon Sep 17 00:00:00 2001 From: "guorong.zheng" <360996299@qq.com> Date: Wed, 8 May 2024 10:16:00 +0800 Subject: [PATCH 2/4] security: tqdm --- Pipfile | 2 +- Pipfile.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Pipfile b/Pipfile index 93a87c463c4..137214eac29 100644 --- a/Pipfile +++ b/Pipfile @@ -16,7 +16,7 @@ selenium = "4.19.0" selenium-stealth = "1.0.6" aiohttp = ">=3.9.4" bs4 = "0.0.2" -tqdm = "4.66.2" +tqdm = ">=4.66.3" async-timeout = "4.0.3" [requires] diff --git a/Pipfile.lock b/Pipfile.lock index e2b04bec7ba..2930a962d4c 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "b685dbe95c06aa87c889dec13e6a7c0100fdb362864a76063470bddd20a93bce" + "sha256": "95be8fbb1ed76f8a0bde1e1a098159813b8b736dbbcbb907c9586a1f9b142ce8" }, "pipfile-spec": 6, "requires": { @@ -603,12 +603,12 @@ }, "tqdm": { "hashes": [ - "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9", - "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531" + "sha256:b75ca56b413b030bc3f00af51fd2c1a1a5eac6a0c1cca83cbb37a5c52abce644", + "sha256:e4d936c9de8727928f3be6079590e97d9abfe8d39a590be678eb5919ffc186bb" ], "index": "pypi", "markers": "python_version >= '3.7'", - "version": "==4.66.2" + "version": "==4.66.4" }, "trio": { "hashes": [ From 82e0080226615ed5a0a4bd9b2874445e4e26bf1e Mon Sep 17 00:00:00 2001 From: "guorong.zheng" <360996299@qq.com> Date: Wed, 8 May 2024 12:00:46 +0800 Subject: [PATCH 3/4] fix:name match(#99) --- main.py | 25 +++---------------------- utils.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 49 insertions(+), 32 deletions(-) diff --git a/main.py b/main.py index 864248130be..a2c1d1dbd8f 100644 --- a/main.py +++ b/main.py @@ -8,13 +8,12 @@ from selenium.webdriver.support import expected_conditions as EC from selenium_stealth import stealth import asyncio -from bs4 import BeautifulSoup, NavigableString +from bs4 import BeautifulSoup from utils import ( getChannelItems, updateChannelUrlsTxt, updateFile, - getChannelUrl, - getChannelInfo, + getResultsFromSoup, sortUrlsBySpeedAndResolution, getTotalUrls, filterUrlsByPatterns, @@ -124,25 +123,7 @@ async def visitPage(self, channelItems): ) soup = BeautifulSoup(source, "html.parser") if soup: - results = [] - for element in soup.descendants: - if isinstance(element, NavigableString): - url = getChannelUrl(element) - if url and not any( - item[0] == url for item in results - ): - url_element = soup.find( - lambda tag: tag.get_text(strip=True) - == url - ) - if url_element: - info_element = ( - url_element.find_next_sibling() - ) - date, resolution = getChannelInfo( - info_element - ) - results.append((url, date, resolution)) + results = getResultsFromSoup(soup, name) for result in results: url, date, resolution = result if url and checkUrlByPatterns(url): diff --git a/utils.py b/utils.py index 685e48d4b85..164ce5bba1a 100644 --- a/utils.py +++ b/utils.py @@ -13,6 +13,7 @@ from urllib.parse import urlparse import requests import re +from bs4 import NavigableString def getChannelItems(): @@ -44,16 +45,12 @@ def getChannelItems(): # This is a url, add it to the list of urls for the current channel. match = re.search(pattern, line) if match is not None: - if match.group(1) not in channels[current_category]: - channels[current_category][match.group(1)] = [match.group(2)] - elif ( - match.group(2) - and match.group(2) - not in channels[current_category][match.group(1)] - ): - channels[current_category][match.group(1)].append( - match.group(2) - ) + name = match.group(1).strip() + url = match.group(2).strip() + if name not in channels[current_category]: + channels[current_category][name] = [url] + elif url and url not in channels[current_category][name]: + channels[current_category][name].append(url) return channels finally: f.close() @@ -171,6 +168,45 @@ def getChannelInfo(element): return date, resolution +def checkNameMatch(name, result_name): + pattern = r"[a-zA-Z]+[_\-+]|cctv" + if re.search( + pattern, + result_name, + re.IGNORECASE, + ): + print( + "Name test match:", + name.lower(), + result_name.lower(), + name.lower() == result_name.lower(), + ) + return name.lower() == result_name.lower() + else: + return True + + +def getResultsFromSoup(soup, name): + """ + Get the results from the soup + """ + results = [] + for element in soup.descendants: + if isinstance(element, NavigableString): + url = getChannelUrl(element) + if url and not any(item[0] == url for item in results): + url_element = soup.find(lambda tag: tag.get_text(strip=True) == url) + if url_element: + name_element = url_element.find_previous_sibling() + if name_element: + channel_name = name_element.get_text(strip=True) + if checkNameMatch(name, channel_name): + info_element = url_element.find_next_sibling() + date, resolution = getChannelInfo(info_element) + results.append((url, date, resolution)) + return results + + async def getSpeed(url, urlTimeout=5): """ Get the speed of the url From 048d0ad2e184e5e2870bac233f8f77127f0b7fd9 Mon Sep 17 00:00:00 2001 From: "guorong.zheng" <360996299@qq.com> Date: Wed, 8 May 2024 13:38:38 +0800 Subject: [PATCH 4/4] release:v1.1.3 --- CHANGELOG.md | 8 ++++++++ version.json | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d944daa95e6..e7f50c1d32e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # 更新日志(Changelog) +## v1.1.3 + +### 2024/5/8 + +- 优化频道接口不对应问题(#99)(Optimize the mismatch problem of the channel interface (#99)) +- 处理 tqdm 安全问题(Handle the security issue of tqdm) +- 修改即将被废弃的命令(Modify the commands that are about to be deprecated) + ## v1.1.2 ### 2024/5/7 diff --git a/version.json b/version.json index be442f96056..d03151bcd56 100644 --- a/version.json +++ b/version.json @@ -1,3 +1,3 @@ { - "version": "1.1.2" + "version": "1.1.3" } \ No newline at end of file