diff --git a/CHANGELOG.md b/CHANGELOG.md index 6de4969eb2..781515dc25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,12 @@ -# Changelog +# 更新日志(Changelog) + +## v1.0.3 + +### 2024/4/7 + +- 新增接口域名黑名单(Add interface domain blacklist) +- 新增接口关键字黑名单(Add interface keyword blacklist) +- 调整过滤逻辑执行顺序,提升工作流更新效率(Adjust the execution order of the filtering logic to improve workflow update efficiency) ## v1.0.2 diff --git a/README-EN.md b/README-EN.md index 8b365e3600..ce9b8b7712 100644 --- a/README-EN.md +++ b/README-EN.md @@ -17,18 +17,20 @@ Customize channel menus, automatically fetch and update the latest live source i ## Config -| Configuration Item | Default Value | Description | -| -------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------ | -| source_file | "demo.txt" | Template file name | -| final_file | "result.txt" | Generated file name | -| favorite_list | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) | -| favorite_page_num | 6 | Page retrieval quantity for favorite channels | -| default_page_num | 4 | Page retrieval quantity for regular channels | -| urls_limit | 15 | Number of interfaces per channel | -| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) | -| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) | -| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues | -| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" | +| Configuration Item | Default Value | Description | +| ---------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------ | +| source_file | "demo.txt" | Template file name | +| final_file | "result.txt" | Generated file name | +| favorite_list | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) | +| favorite_page_num | 6 | Page retrieval quantity for favorite channels | +| default_page_num | 4 | Page retrieval quantity for regular channels | +| urls_limit | 15 | Number of interfaces per channel | +| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) | +| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) | +| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues | +| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" | +| domain_blacklist | ["epg.pw"] | Interface domain blacklist, used to filter out interfaces with low-quality, ad-inclusive domains | +| url_keywords_blacklist | [] | Interface keyword blacklist, used to filter out interfaces containing specific characters | ## Quick Start diff --git a/README.md b/README.md index 594c836549..fac3c3d152 100644 --- a/README.md +++ b/README.md @@ -17,18 +17,20 @@ ## 配置 -| 配置项 | 默认值 | 描述 | -| -------------------- | ------------------ | ------------------------------------------------------------------ | -| source_file | "demo.txt" | 模板文件名称 | -| final_file | "result.txt" | 生成文件名称 | -| favorite_list | ["CCTV1","CCTV13"] | 关注频道名称列表(仅用于与常规频道区分,自定义获取分页数量) | -| favorite_page_num | 6 | 关注频道获取分页数量 | -| default_page_num | 4 | 常规频道获取分页数量 | -| urls_limit | 15 | 单个频道接口数量 | -| response_time_weight | 0.5 | 响应时间权重值(所有权重值总和应为 1) | -| resolution_weight | 0.5 | 分辨率权重值 (所有权重值总和应为 1) | -| recent_days | 30 | 获取最近时间范围内更新的接口(单位天),适当减小可避免出现匹配问题 | -| ipv_type | "ipv4" | 生成结果中接口的类型,可选值:"ipv4"、"ipv6"、"all" | +| 配置项 | 默认值 | 描述 | +| ---------------------- | ------------------ | ------------------------------------------------------------------ | +| source_file | "demo.txt" | 模板文件名称 | +| final_file | "result.txt" | 生成文件名称 | +| favorite_list | ["CCTV1","CCTV13"] | 关注频道名称列表(仅用于与常规频道区分,自定义获取分页数量) | +| favorite_page_num | 6 | 关注频道获取分页数量 | +| default_page_num | 4 | 常规频道获取分页数量 | +| urls_limit | 15 | 单个频道接口数量 | +| response_time_weight | 0.5 | 响应时间权重值(所有权重值总和应为 1) | +| resolution_weight | 0.5 | 分辨率权重值 (所有权重值总和应为 1) | +| recent_days | 30 | 获取最近时间范围内更新的接口(单位天),适当减小可避免出现匹配问题 | +| ipv_type | "ipv4" | 生成结果中接口的类型,可选值:"ipv4"、"ipv6"、"all" | +| domain_blacklist | ["epg.pw"] | 接口域名黑名单,用于过滤低质量含广告类域名的接口 | +| url_keywords_blacklist | [] | 接口关键字黑名单,用于过滤含特定字符的接口 | ## 快速上手 diff --git a/config.py b/config.py index edb56da30b..66f4a03875 100644 --- a/config.py +++ b/config.py @@ -22,3 +22,5 @@ resolution_weight = 0.5 recent_days = 30 ipv_type = "ipv4" +domain_blacklist = ["epg.pw"] +url_keywords_blacklist = [] diff --git a/docs/tutorial-EN.md b/docs/tutorial-EN.md index 310723cb20..9387bda42a 100644 --- a/docs/tutorial-EN.md +++ b/docs/tutorial-EN.md @@ -57,18 +57,20 @@ Similar to editing the template, modify the running configuration Adjust the configuration as needed. Below is the default configuration explanation: -| Configuration Item | Default Value | Description | -| -------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------ | -| source_file | "demo.txt" | Template file name | -| final_file | "result.txt" | Generated file name | -| favorite_list | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) | -| favorite_page_num | 6 | Page retrieval quantity for favorite channels | -| default_page_num | 4 | Page retrieval quantity for regular channels | -| urls_limit | 15 | Number of interfaces per channel | -| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) | -| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) | -| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues | -| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" | +| Configuration Item | Default Value | Description | +| ---------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------ | +| source_file | "demo.txt" | Template file name | +| final_file | "result.txt" | Generated file name | +| favorite_list | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) | +| favorite_page_num | 6 | Page retrieval quantity for favorite channels | +| default_page_num | 4 | Page retrieval quantity for regular channels | +| urls_limit | 15 | Number of interfaces per channel | +| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) | +| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) | +| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues | +| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" | +| domain_blacklist | ["epg.pw"] | Interface domain blacklist, used to filter out interfaces with low-quality, ad-inclusive domains | +| url_keywords_blacklist | [] | Interface keyword blacklist, used to filter out interfaces containing specific characters | ## Step 4: Enable Auto-update diff --git a/docs/tutorial.md b/docs/tutorial.md index 72c8bf8326..fbcf81c700 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -68,6 +68,8 @@ | resolution_weight | 0.5 | 分辨率权重值 (所有权重值总和应为 1) | | recent_days | 30 | 获取最近时间范围内更新的接口(单位天),适当减小可避免出现匹配问题 | | ipv_type | "ipv4" | 生成结果中接口的类型,可选值:"ipv4"、"ipv6"、"all" | +| domain_blacklist | ["epg.pw"] | 接口域名黑名单,用于过滤低质量含广告类域名的接口 | +| url_keywords_blacklist | [] | 接口关键字黑名单,用于过滤含特定字符的接口 | ## 步骤四:开启自动更新 diff --git a/main.py b/main.py index 93a205a3ac..5e6550753c 100644 --- a/main.py +++ b/main.py @@ -16,8 +16,10 @@ getUrlInfo, compareSpeedAndResolution, getTotalUrls, - filterSortedDataByIPVType, - filterByIPVType, + checkUrlIPVType, + checkByDomainBlacklist, + checkByURLKeywordsBlacklist, + filterUrlsByPatterns, ) import logging import os @@ -84,7 +86,12 @@ async def visitPage(self, channelItems): for result in results: try: url, date, resolution = getUrlInfo(result) - if url: + if ( + url + and checkUrlIPVType(url) + and checkByDomainBlacklist(url) + and checkByURLKeywordsBlacklist(url) + ): infoList.append((url, date, resolution)) except Exception as e: print(f"Error on result {result}: {e}") @@ -94,17 +101,16 @@ async def visitPage(self, channelItems): continue try: sorted_data = await compareSpeedAndResolution(infoList) - ipvSortedData = filterSortedDataByIPVType(sorted_data) - if ipvSortedData: + if sorted_data: channelUrls[name] = ( - getTotalUrls(ipvSortedData) or channelObj[name] + getTotalUrls(sorted_data) or channelObj[name] ) - for (url, date, resolution), response_time in ipvSortedData: + for (url, date, resolution), response_time in sorted_data: logging.info( f"Name: {name}, URL: {url}, Date: {date}, Resolution: {resolution}, Response Time: {response_time}ms" ) else: - channelUrls[name] = filterByIPVType(channelObj[name]) + channelUrls[name] = filterUrlsByPatterns(channelObj[name]) except Exception as e: print(f"Error on sorting: {e}") continue diff --git a/utils.py b/utils.py index 9f74fdf75b..ae8a5ac512 100644 --- a/utils.py +++ b/utils.py @@ -10,6 +10,7 @@ import os import urllib.parse import ipaddress +from urllib.parse import urlparse # 在这里使用 some_config_variable @@ -215,35 +216,43 @@ def is_ipv6(url): return False -def filterSortedDataByIPVType(sorted_data): +def checkUrlIPVType(url): """ - Filter sorted data by ipv type + Check if the url is compatible with the ipv type in the config """ ipv_type = getattr(config, "ipv_type", "ipv4") if ipv_type == "ipv4": - return [ - ((url, date, resolution), response_time) - for (url, date, resolution), response_time in sorted_data - if not is_ipv6(url) - ] + return not is_ipv6(url) elif ipv_type == "ipv6": - return [ - ((url, date, resolution), response_time) - for (url, date, resolution), response_time in sorted_data - if is_ipv6(url) - ] + return is_ipv6(url) else: - return sorted_data + return True -def filterByIPVType(urls): +def checkByDomainBlacklist(url): """ - Filter by ipv type + Check by domain blacklist """ - ipv_type = getattr(config, "ipv_type", "ipv4") - if ipv_type == "ipv4": - return [url for url in urls if not is_ipv6(url)] - elif ipv_type == "ipv6": - return [url for url in urls if is_ipv6(url)] - else: - return urls + domain_blacklist = [ + urlparse(domain).netloc if urlparse(domain).scheme else domain + for domain in getattr(config, "domain_blacklist", []) + ] + return urlparse(url).netloc not in domain_blacklist + + +def checkByURLKeywordsBlacklist(url): + """ + Check by URL blacklist keywords + """ + url_keywords_blacklist = getattr(config, "url_keywords_blacklist", []) + return not any(keyword in url for keyword in url_keywords_blacklist) + + +def filterUrlsByPatterns(urls): + """ + Filter urls by patterns + """ + urls = [url for url in urls if checkUrlIPVType(url)] + urls = [url for url in urls if checkByDomainBlacklist(url)] + urls = [url for url in urls if checkByURLKeywordsBlacklist(url)] + return urls diff --git a/version.json b/version.json index ef6dca9c65..196d98f585 100644 --- a/version.json +++ b/version.json @@ -1,3 +1,3 @@ { - "version": "1.0.2" + "version": "1.0.3" } \ No newline at end of file