From 4f5c12b80a0ba0ce99e9d42e5823ee627c459186 Mon Sep 17 00:00:00 2001
From: "guorong.zheng" <360996299@qq.com>
Date: Fri, 17 May 2024 15:19:22 +0800
Subject: [PATCH] release: v1.1.5
---
CHANGELOG.md | 10 +++
README-EN.md | 41 +++++----
README.md | 41 +++++----
config.py | 1 -
docs/tutorial-EN.md | 41 +++++----
docs/tutorial.md | 5 +-
main.py | 118 +++++++------------------
utils.py | 211 ++++++++++++++++++++++++++++++--------------
version.json | 2 +-
9 files changed, 251 insertions(+), 219 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d2cbbc4f9e..ded3746b18f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,15 @@
# 更新日志(Changelog)
+## v1.1.5
+
+### 2024/5/17
+
+- 增加模糊匹配规则,适配在线检索、订阅源、组播源(Add fuzzy matching rules for online search, subscription sources, and multicast sources)
+- 增加订阅源、组播源更新进度条(Added the update progress bar for subscription sources and multicast sources)
+- 优化组播源更新可能出现的无匹配结果情况(Optimize the possible situation of no match results in multicast source updates)
+- 移除部分错误日志打印(Removes some error log prints)
+- 移除严格匹配配置(Removes strict matching configurations)
+
## v1.1.4
### 2024/5/15
diff --git a/README-EN.md b/README-EN.md
index 4a59d60fd46..5e3e5e3068b 100644
--- a/README-EN.md
+++ b/README-EN.md
@@ -20,27 +20,26 @@ Customize channel menus and automatically obtain and update the latest live sour
## Config
-| Configuration Item | Default Value | Description |
-| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| source_file | "demo.txt" | Template file name |
-| final_file | "result.txt" | Generated file name |
-| favorite_list | ["广东珠江","CCTV-1","CCTV-5","CCTV-5+","CCTV-13","广东体育","广东卫视","大湾区卫视","浙江卫视","湖南卫视","翡翠台"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) |
-| open_online_search | True | Enable online search source feature |
-| favorite_page_num | 5 | Page retrieval quantity for favorite channels |
-| default_page_num | 3 | Page retrieval quantity for regular channels |
-| urls_limit | 10 | Number of interfaces per channel |
-| open_sort | True | Enable the sorting test function, it is recommended to turn it off if you are not using online search |
-| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) |
-| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) |
-| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
-| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" |
-| domain_blacklist | ["epg.pw"] | Interface domain blacklist, used to filter out interfaces with low-quality, ad-inclusive domains |
-| url_keywords_blacklist | [] | Interface keyword blacklist, used to filter out interfaces containing specific characters |
-| open_subscribe | True | Enable subscription source feature |
-| subscribe_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",
"https://m3u.ibert.me/txt/o_cn.txt",
"https://m3u.ibert.me/txt/j_iptv.txt"] | Subscription source list |
-| open_multicast | True | Enable multicast source function |
-| region_list | ["广东"] | Multicast source region list, for more regions please see the fofa_map.py file |
-| strict_match | False | Strict matching, when enabled, can minimize the issue of channel interface mismatch to the greatest extent, but at the same time, some fuzzy matching results may be lost |
+| Configuration Item | Default Value | Description |
+| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ |
+| source_file | "demo.txt" | Template file name |
+| final_file | "result.txt" | Generated file name |
+| favorite_list | ["广东珠江","CCTV-1","CCTV-5","CCTV-5+","CCTV-13","广东体育","广东卫视","大湾区卫视","浙江卫视","湖南卫视","翡翠台"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) |
+| open_online_search | True | Enable online search source feature |
+| favorite_page_num | 5 | Page retrieval quantity for favorite channels |
+| default_page_num | 3 | Page retrieval quantity for regular channels |
+| urls_limit | 10 | Number of interfaces per channel |
+| open_sort | True | Enable the sorting function (response speed, date, resolution), or turn it off if it takes a long time to execute |
+| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) |
+| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) |
+| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
+| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" |
+| domain_blacklist | ["epg.pw"] | Interface domain blacklist, used to filter out interfaces with low-quality, ad-inclusive domains |
+| url_keywords_blacklist | [] | Interface keyword blacklist, used to filter out interfaces containing specific characters |
+| open_subscribe | True | Enable subscription source feature |
+| subscribe_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",
"https://m3u.ibert.me/txt/o_cn.txt",
"https://m3u.ibert.me/txt/j_iptv.txt"] | Subscription source list |
+| open_multicast | True | Enable multicast source function |
+| region_list | ["广东"] | Multicast source region list, [more regions](./fofa_map.py) |
## Quick Start
diff --git a/README.md b/README.md
index 4e30db5e95e..70ff3c28be5 100644
--- a/README.md
+++ b/README.md
@@ -20,27 +20,26 @@
## 配置
-| 配置项 | 默认值 | 描述 |
-| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------- |
-| source_file | "demo.txt" | 模板文件名称 |
-| final_file | "result.txt" | 生成文件名称 |
-| favorite_list | ["广东珠江","CCTV-1","CCTV-5","CCTV-5+","CCTV-13","广东体育","广东卫视","大湾区卫视","浙江卫视","湖南卫视","翡翠台"] | 关注频道名称列表(仅用于与常规频道区分,自定义获取分页数量) |
-| open_online_search | True | 开启线上检索源功能 |
-| favorite_page_num | 5 | 关注频道获取分页数量 |
-| default_page_num | 3 | 常规频道获取分页数量 |
-| urls_limit | 10 | 单个频道接口数量 |
-| open_sort | True | 开启排序测试功能,若没有使用线上检索建议关闭 |
-| response_time_weight | 0.5 | 响应时间权重值(所有权重值总和应为 1) |
-| resolution_weight | 0.5 | 分辨率权重值 (所有权重值总和应为 1) |
-| recent_days | 30 | 获取最近时间范围内更新的接口(单位天),适当减小可避免出现匹配问题 |
-| ipv_type | "ipv4" | 生成结果中接口的类型,可选值:"ipv4"、"ipv6"、"all" |
-| domain_blacklist | ["epg.pw"] | 接口域名黑名单,用于过滤低质量含广告类域名的接口 |
-| url_keywords_blacklist | [] | 接口关键字黑名单,用于过滤含特定字符的接口 |
-| open_subscribe | True | 开启订阅源功能 |
-| subscribe_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",
"https://m3u.ibert.me/txt/o_cn.txt",
"https://m3u.ibert.me/txt/j_iptv.txt"] | 订阅源列表 |
-| open_multicast | True | 开启组播源功能 |
-| region_list | ["广东"] | 组播源地区列表,更多地区请见 fofa_map.py 文件 |
-| strict_match | False | 严格匹配,开启可最大程度减少频道接口不匹配问题,同时会丢失部分模糊匹配结果 |
+| 配置项 | 默认值 | 描述 |
+| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------- |
+| source_file | "demo.txt" | 模板文件名称 |
+| final_file | "result.txt" | 生成文件名称 |
+| favorite_list | ["广东珠江","CCTV-1","CCTV-5","CCTV-5+","CCTV-13","广东体育","广东卫视","大湾区卫视","浙江卫视","湖南卫视","翡翠台"] | 关注频道名称列表(仅用于与常规频道区分,自定义获取分页数量) |
+| open_online_search | True | 开启线上检索源功能 |
+| favorite_page_num | 5 | 关注频道获取分页数量 |
+| default_page_num | 3 | 常规频道获取分页数量 |
+| urls_limit | 10 | 单个频道接口数量 |
+| open_sort | True | 开启排序功能(响应速度、日期、分辨率),若更执行时间较长可关闭此功能 |
+| response_time_weight | 0.5 | 响应时间权重值(所有权重值总和应为 1) |
+| resolution_weight | 0.5 | 分辨率权重值 (所有权重值总和应为 1) |
+| recent_days | 30 | 获取最近时间范围内更新的接口(单位天),适当减小可避免出现匹配问题 |
+| ipv_type | "ipv4" | 生成结果中接口的类型,可选值:"ipv4"、"ipv6"、"all" |
+| domain_blacklist | ["epg.pw"] | 接口域名黑名单,用于过滤低质量含广告类域名的接口 |
+| url_keywords_blacklist | [] | 接口关键字黑名单,用于过滤含特定字符的接口 |
+| open_subscribe | True | 开启订阅源功能 |
+| subscribe_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",
"https://m3u.ibert.me/txt/o_cn.txt",
"https://m3u.ibert.me/txt/j_iptv.txt"] | 订阅源列表 |
+| open_multicast | True | 开启组播源功能 |
+| region_list | ["广东"] | 组播源地区列表,[更多地区](./fofa_map.py) |
## 快速上手
diff --git a/config.py b/config.py
index 00fbfb71c96..d9b745d1a55 100644
--- a/config.py
+++ b/config.py
@@ -32,4 +32,3 @@
]
open_multicast = True
region_list = ["广东"]
-strict_match = False
diff --git a/docs/tutorial-EN.md b/docs/tutorial-EN.md
index ba6aa53c0f6..f5ed0287c00 100644
--- a/docs/tutorial-EN.md
+++ b/docs/tutorial-EN.md
@@ -57,27 +57,26 @@ Similar to editing the template, modify the running configuration
Adjust the configuration as needed. Below is the default configuration explanation:
-| Configuration Item | Default Value | Description |
-| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| source_file | "demo.txt" | Template file name |
-| final_file | "result.txt" | Generated file name |
-| favorite_list | ["广东珠江","CCTV-1","CCTV-5","CCTV-5+","CCTV-13","广东体育","广东卫视","大湾区卫视","浙江卫视","湖南卫视","翡翠台"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) |
-| open_online_search | True | Enable online search source feature |
-| favorite_page_num | 5 | Page retrieval quantity for favorite channels |
-| default_page_num | 3 | Page retrieval quantity for regular channels |
-| urls_limit | 10 | Number of interfaces per channel |
-| open_sort | True | Enable the sorting test function, it is recommended to turn it off if you are not using online search |
-| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) |
-| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) |
-| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
-| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" |
-| domain_blacklist | ["epg.pw"] | Interface domain blacklist, used to filter out interfaces with low-quality, ad-inclusive domains |
-| url_keywords_blacklist | [] | Interface keyword blacklist, used to filter out interfaces containing specific characters |
-| open_subscribe | True | Enable subscription source feature |
-| subscribe_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",
"https://m3u.ibert.me/txt/o_cn.txt",
"https://m3u.ibert.me/txt/j_iptv.txt"] | Subscription source list |
-| open_multicast | True | Enable multicast source function |
-| region_list | ["广东"] | Multicast source region list, for more regions please see the fofa_map.py file |
-| strict_match | False | Strict matching, when enabled, can minimize the issue of channel interface mismatch to the greatest extent, but at the same time, some fuzzy matching results may be lost |
+| Configuration Item | Default Value | Description |
+| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ |
+| source_file | "demo.txt" | Template file name |
+| final_file | "result.txt" | Generated file name |
+| favorite_list | ["广东珠江","CCTV-1","CCTV-5","CCTV-5+","CCTV-13","广东体育","广东卫视","大湾区卫视","浙江卫视","湖南卫视","翡翠台"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) |
+| open_online_search | True | Enable online search source feature |
+| favorite_page_num | 5 | Page retrieval quantity for favorite channels |
+| default_page_num | 3 | Page retrieval quantity for regular channels |
+| urls_limit | 10 | Number of interfaces per channel |
+| open_sort | True | Enable the sorting function (response speed, date, resolution), or turn it off if it takes a long time to execute |
+| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) |
+| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) |
+| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
+| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" |
+| domain_blacklist | ["epg.pw"] | Interface domain blacklist, used to filter out interfaces with low-quality, ad-inclusive domains |
+| url_keywords_blacklist | [] | Interface keyword blacklist, used to filter out interfaces containing specific characters |
+| open_subscribe | True | Enable subscription source feature |
+| subscribe_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",
"https://m3u.ibert.me/txt/o_cn.txt",
"https://m3u.ibert.me/txt/j_iptv.txt"] | Subscription source list |
+| open_multicast | True | Enable multicast source function |
+| region_list | ["广东"] | Multicast source region list, [more regions](./fofa_map.py) |
## Step 4: Run Updates Locally (Recommended, Stable, Supports a large number of channel updates)
diff --git a/docs/tutorial.md b/docs/tutorial.md
index 821db14a104..3166003ac76 100644
--- a/docs/tutorial.md
+++ b/docs/tutorial.md
@@ -65,7 +65,7 @@
| favorite_page_num | 5 | 关注频道获取分页数量 |
| default_page_num | 3 | 常规频道获取分页数量 |
| urls_limit | 10 | 单个频道接口数量 |
-| open_sort | True | 开启排序测试功能,若没有使用线上检索建议关闭 |
+| open_sort | True | 开启排序功能(响应速度、日期、分辨率),若更执行时间较长可关闭此功能 |
| response_time_weight | 0.5 | 响应时间权重值(所有权重值总和应为 1) |
| resolution_weight | 0.5 | 分辨率权重值 (所有权重值总和应为 1) |
| recent_days | 30 | 获取最近时间范围内更新的接口(单位天),适当减小可避免出现匹配问题 |
@@ -75,8 +75,7 @@
| open_subscribe | True | 开启订阅源功能 |
| subscribe_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",
"https://m3u.ibert.me/txt/o_cn.txt",
"https://m3u.ibert.me/txt/j_iptv.txt"] | 订阅源列表 |
| open_multicast | True | 开启组播源功能 |
-| region_list | ["广东"] | 组播源地区列表,更多地区请见 fofa_map.py 文件 |
-| strict_match | False | 严格匹配,开启可最大程度减少频道接口不匹配问题,同时会丢失部分模糊匹配结果 |
+| region_list | ["广东"] | 组播源地区列表,[更多地区](./fofa_map.py) |
## 步骤四:本地运行更新(推荐,稳定,支持大量频道更新)
diff --git a/main.py b/main.py
index b7cdd7c86f5..d18855c4a24 100644
--- a/main.py
+++ b/main.py
@@ -3,28 +3,25 @@
except ImportError:
import config
from selenium import webdriver
-from selenium.webdriver.common.by import By
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
# from selenium_stealth import stealth
import asyncio
-from bs4 import BeautifulSoup
from utils import (
getChannelItems,
updateChannelUrlsTxt,
updateFile,
- getResultsFromSoup,
sortUrlsBySpeedAndResolution,
- getTotalUrls,
+ getTotalUrlsFromInfoList,
+ getTotalUrlsFromSortedData,
filterUrlsByPatterns,
useAccessibleUrl,
- getChannelsByExtendBaseUrls,
+ getChannelsBySubscribeUrls,
checkUrlByPatterns,
getFOFAUrlsFromRegionList,
getChannelsByFOFA,
mergeObjects,
- getTotalUrlsFromInfoList,
+ getChannelsInfoListByOnlineSearch,
+ formatChannelName,
)
import logging
from logging.handlers import RotatingFileHandler
@@ -71,9 +68,8 @@ async def visitPage(self, channelItems):
name for _, channelObj in channelItems.items() for name in channelObj.keys()
]
if config.open_subscribe:
- extendResults = await getChannelsByExtendBaseUrls(channelNames)
+ extendResults = await getChannelsBySubscribeUrls(channelNames)
if config.open_multicast:
- print(f"Getting channels by FOFA...")
fofa_urls = getFOFAUrlsFromRegionList()
fofa_results = {}
for url in fofa_urls:
@@ -88,7 +84,6 @@ async def visitPage(self, channelItems):
total_channels = len(channelNames)
pbar = tqdm(total=total_channels)
pageUrl = await useAccessibleUrl() if config.open_online_search else None
- wait = WebDriverWait(self.driver, 10)
for cate, channelObj in channelItems.items():
channelUrls = {}
channelObjKeys = channelObj.keys()
@@ -96,95 +91,46 @@ async def visitPage(self, channelItems):
pbar.set_description(
f"Processing {name}, {total_channels - pbar.n} channels remaining"
)
+ format_name = formatChannelName(name)
info_list = []
if config.open_subscribe:
- for url, date, resolution in extendResults.get(name, []):
+ for url, date, resolution in extendResults.get(format_name, []):
if url and checkUrlByPatterns(url):
info_list.append((url, None, resolution))
if config.open_multicast:
- for url in fofa_results.get(name, []):
+ for url in fofa_results.get(format_name, []):
if url and checkUrlByPatterns(url):
info_list.append((url, None, None))
if config.open_online_search and pageUrl:
- self.driver.get(pageUrl)
- search_box = wait.until(
- EC.presence_of_element_located(
- (By.XPATH, '//input[@type="text"]')
- )
- )
- search_box.clear()
- search_box.send_keys(name)
- submit_button = wait.until(
- EC.element_to_be_clickable(
- (By.XPATH, '//input[@type="submit"]')
- )
- )
- self.driver.execute_script("arguments[0].click();", submit_button)
- isFavorite = name in config.favorite_list
- pageNum = (
- config.favorite_page_num
- if isFavorite
- else config.default_page_num
+ online_info_list = getChannelsInfoListByOnlineSearch(
+ self.driver, pageUrl, format_name
)
- for page in range(1, pageNum + 1):
- try:
- if page > 1:
- page_link = wait.until(
- EC.element_to_be_clickable(
- (
- By.XPATH,
- f'//a[contains(@href, "={page}") and contains(@href, "{name}")]',
- )
- )
- )
- self.driver.execute_script(
- "arguments[0].click();", page_link
- )
- source = re.sub(
- r"",
- "",
- self.driver.page_source,
- flags=re.DOTALL,
- )
- soup = BeautifulSoup(source, "html.parser")
- if soup:
- results = getResultsFromSoup(soup, name)
- for result in results:
- url, date, resolution = result
- if url and checkUrlByPatterns(url):
- info_list.append((url, date, resolution))
- except Exception as e:
- print(f"Error on page {page}: {e}")
- continue
+ if online_info_list:
+ info_list.extend(online_info_list)
try:
+ channelUrls[name] = filterUrlsByPatterns(
+ getTotalUrlsFromInfoList(info_list)
+ )
github_actions = os.environ.get("GITHUB_ACTIONS")
- if not github_actions or (
- pbar.n <= 200 and github_actions == "true"
+ if (
+ config.open_sort
+ and not github_actions
+ or (pbar.n <= 200 and github_actions == "true")
):
- if config.open_sort:
- sorted_data = await sortUrlsBySpeedAndResolution(info_list)
- if sorted_data:
- channelUrls[name] = getTotalUrls(sorted_data)
- for (
- url,
- date,
- resolution,
- ), response_time in sorted_data:
- logging.info(
- f"Name: {name}, URL: {url}, Date: {date}, Resolution: {resolution}, Response Time: {response_time}ms"
- )
- else:
- channelUrls[name] = filterUrlsByPatterns(
- channelObj[name]
+ sorted_data = await sortUrlsBySpeedAndResolution(info_list)
+ if sorted_data:
+ channelUrls[name] = getTotalUrlsFromSortedData(sorted_data)
+ for (
+ url,
+ date,
+ resolution,
+ ), response_time in sorted_data:
+ logging.info(
+ f"Name: {name}, URL: {url}, Date: {date}, Resolution: {resolution}, Response Time: {response_time}ms"
)
- else:
- channelUrls[name] = filterUrlsByPatterns(
- getTotalUrlsFromInfoList(info_list)
- )
- else:
+ if len(channelUrls[name]) == 0:
channelUrls[name] = filterUrlsByPatterns(channelObj[name])
- except Exception as e:
- print(f"Error on sorting: {e}")
+ except:
continue
finally:
pbar.update()
diff --git a/utils.py b/utils.py
index 782098078ce..2b7fa3410d4 100644
--- a/utils.py
+++ b/utils.py
@@ -13,9 +13,56 @@
from urllib.parse import urlparse
import requests
import re
+from bs4 import BeautifulSoup
from bs4 import NavigableString
import fofa_map
from collections import defaultdict
+from tqdm import tqdm
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+
+
+def formatChannelName(name):
+ """
+ Format the channel name with sub and replace and lower
+ """
+ sub_pattern = (
+ r"-|_|\((.*?)\)|\[(.*?)\]| |频道|标清|高清|HD|hd|超清|超高|超高清|中央|央视|台"
+ )
+ name = re.sub(sub_pattern, "", name)
+ name = name.replace("plus", "+")
+ name = name.replace("PLUS", "+")
+ name = name.replace("+", "+")
+ name = name.replace("CCTV1综合", "CCTV1")
+ name = name.replace("CCTV2财经", "CCTV2")
+ name = name.replace("CCTV3综艺", "CCTV3")
+ name = name.replace("CCTV4国际", "CCTV4")
+ name = name.replace("CCTV4中文国际", "CCTV4")
+ name = name.replace("CCTV4欧洲", "CCTV4")
+ name = name.replace("CCTV5体育", "CCTV5")
+ name = name.replace("CCTV5+体育赛视", "CCTV5+")
+ name = name.replace("CCTV5+体育赛事", "CCTV5+")
+ name = name.replace("CCTV5+体育", "CCTV5+")
+ name = name.replace("CCTV6电影", "CCTV6")
+ name = name.replace("CCTV7军事", "CCTV7")
+ name = name.replace("CCTV7军农", "CCTV7")
+ name = name.replace("CCTV7农业", "CCTV7")
+ name = name.replace("CCTV7国防军事", "CCTV7")
+ name = name.replace("CCTV8电视剧", "CCTV8")
+ name = name.replace("CCTV9记录", "CCTV9")
+ name = name.replace("CCTV9纪录", "CCTV9")
+ name = name.replace("CCTV10科教", "CCTV10")
+ name = name.replace("CCTV11戏曲", "CCTV11")
+ name = name.replace("CCTV12社会与法", "CCTV12")
+ name = name.replace("CCTV13新闻", "CCTV13")
+ name = name.replace("CCTV新闻", "CCTV13")
+ name = name.replace("CCTV14少儿", "CCTV14")
+ name = name.replace("CCTV15音乐", "CCTV15")
+ name = name.replace("CCTV16奥林匹克", "CCTV16")
+ name = name.replace("CCTV17农业农村", "CCTV17")
+ name = name.replace("CCTV17农业", "CCTV17")
+ return name.lower()
def getChannelItems():
@@ -52,16 +99,19 @@ def getChannelItems():
return channels
-async def getChannelsByExtendBaseUrls(channel_names):
+async def getChannelsBySubscribeUrls(channel_names):
"""
- Get the channels by extending the base urls
+ Get the channels by subscribe urls
"""
channels = {}
pattern = r"^(.*?),(?!#genre#)(.*?)$"
- sub_pattern = r"_\((.*?)\)|_\[(.*?)\]|频道"
+ subscribe_urls_len = len(config.subscribe_urls)
+ pbar = tqdm(total=subscribe_urls_len)
for base_url in config.subscribe_urls:
try:
- print(f"Processing extend base url: {base_url}")
+ pbar.set_description(
+ f"Processing subscribe {base_url}, {subscribe_urls_len - pbar.n} urls remaining"
+ )
try:
response = requests.get(base_url, timeout=30)
except requests.exceptions.Timeout:
@@ -70,7 +120,6 @@ async def getChannelsByExtendBaseUrls(channel_names):
content = response.text
if content:
lines = content.split("\n")
- link_dict = {}
for line in lines:
if re.match(pattern, line) is not None:
key = re.match(pattern, line).group(1)
@@ -80,37 +129,73 @@ async def getChannelsByExtendBaseUrls(channel_names):
if resolution_match is not None
else None
)
- key = re.sub(sub_pattern, "", key).lower()
+ key = formatChannelName(key)
url = re.match(pattern, line).group(2)
value = (url, None, resolution)
- if key in link_dict:
- if value not in link_dict[key]:
- link_dict[key].append(value)
+ if key in channels:
+ if value not in channels[key]:
+ channels[key].append(value)
else:
- link_dict[key] = [value]
- found_channels = []
- for channel_name in channel_names:
- sub_channel_name = (
- channel_name.lower()
- if config.strict_match
- else re.sub(sub_pattern, "", channel_name).lower()
- )
- values = link_dict.get(sub_channel_name)
- if values:
- if channel_name in channels:
- channels[channel_name] += values
- else:
- channels[channel_name] = values
- found_channels.append(channel_name)
- if found_channels:
- print(f"{base_url} found channels: {','.join(found_channels)}")
+ channels[key] = [value]
except Exception as e:
print(f"Error on {base_url}: {e}")
continue
- print("Finished processing extend base urls")
+ finally:
+ pbar.update()
+ print("Finished processing subscribe urls")
+ pbar.close()
return channels
+def getChannelsInfoListByOnlineSearch(driver, pageUrl, name):
+ """
+ Get the channels info list by online search
+ """
+ wait = WebDriverWait(driver, 10)
+ driver.get(pageUrl)
+ search_box = wait.until(
+ EC.presence_of_element_located((By.XPATH, '//input[@type="text"]'))
+ )
+ search_box.clear()
+ search_box.send_keys(name)
+ submit_button = wait.until(
+ EC.element_to_be_clickable((By.XPATH, '//input[@type="submit"]'))
+ )
+ driver.execute_script("arguments[0].click();", submit_button)
+ isFavorite = name in config.favorite_list
+ pageNum = config.favorite_page_num if isFavorite else config.default_page_num
+ info_list = []
+ for page in range(1, pageNum + 1):
+ try:
+ if page > 1:
+ page_link = wait.until(
+ EC.element_to_be_clickable(
+ (
+ By.XPATH,
+ f'//a[contains(@href, "={page}") and contains(@href, "{name}")]',
+ )
+ )
+ )
+ driver.execute_script("arguments[0].click();", page_link)
+ source = re.sub(
+ r"",
+ "",
+ driver.page_source,
+ flags=re.DOTALL,
+ )
+ soup = BeautifulSoup(source, "html.parser")
+ if soup:
+ results = getResultsFromSoup(soup, name)
+ for result in results:
+ url, date, resolution = result
+ if url and checkUrlByPatterns(url):
+ info_list.append((url, date, resolution))
+ except Exception as e:
+ # print(f"Error on page {page}: {e}")
+ continue
+ return info_list
+
+
def updateChannelUrlsTxt(cate, channelUrls):
"""
Update the category and channel urls to the final file
@@ -168,18 +253,6 @@ def getChannelInfo(element):
return date, resolution
-def checkNameMatch(name, result_name):
- pattern = r"[a-zA-Z]+[_\-+]|cctv"
- if re.search(
- pattern,
- result_name,
- re.IGNORECASE,
- ):
- return name.lower() == result_name.lower()
- else:
- return True
-
-
def getResultsFromSoup(soup, name):
"""
Get the results from the soup
@@ -194,7 +267,7 @@ def getResultsFromSoup(soup, name):
name_element = url_element.find_previous_sibling()
if name_element:
channel_name = name_element.get_text(strip=True)
- if checkNameMatch(name, channel_name):
+ if name == formatChannelName(channel_name):
info_element = url_element.find_next_sibling()
date, resolution = getChannelInfo(info_element)
results.append((url, date, resolution))
@@ -267,49 +340,49 @@ def filterByDate(data):
"""
Filter by date and limit
"""
- default_recent_days = 60
- use_recent_days = getattr(config, "recent_days", 60)
- if (
- not isinstance(use_recent_days, int)
- or use_recent_days <= 0
- or use_recent_days > 365
- ):
+ default_recent_days = 30
+ use_recent_days = getattr(config, "recent_days", 30)
+ if not isinstance(use_recent_days, int) or use_recent_days <= 0:
use_recent_days = default_recent_days
start_date = datetime.datetime.now() - datetime.timedelta(days=use_recent_days)
recent_data = []
unrecent_data = []
for (url, date, resolution), response_time in data:
+ item = ((url, date, resolution), response_time)
if date:
date = datetime.datetime.strptime(date, "%m-%d-%Y")
if date >= start_date:
- recent_data.append(((url, date, resolution), response_time))
+ recent_data.append(item)
else:
- unrecent_data.append(((url, date, resolution), response_time))
- if len(recent_data) < config.urls_limit:
+ unrecent_data.append(item)
+ else:
+ unrecent_data.append(item)
+ recent_data_len = len(recent_data)
+ if recent_data_len == 0:
+ recent_data = unrecent_data
+ elif recent_data_len < config.urls_limit:
recent_data.extend(unrecent_data[: config.urls_limit - len(recent_data)])
- return recent_data[: config.urls_limit]
+ return recent_data
-def getTotalUrls(data):
+def getTotalUrlsFromInfoList(infoList):
"""
- Get the total urls with filter by date and depulicate
+ Get the total urls from info list
+ """
+ total_urls = [url for url, _, _ in infoList]
+ return list(dict.fromkeys(total_urls))[: config.urls_limit]
+
+
+def getTotalUrlsFromSortedData(data):
+ """
+ Get the total urls with filter by date and depulicate from sorted data
"""
total_urls = []
if len(data) > config.urls_limit:
total_urls = [url for (url, _, _), _ in filterByDate(data)]
else:
total_urls = [url for (url, _, _), _ in data]
- return list(dict.fromkeys(total_urls))
-
-
-def getTotalUrlsFromInfoList(infoList):
- """
- Get the total urls from info list
- """
- total_urls = [
- url for url, _, _ in infoList[: min(len(infoList), config.urls_limit)]
- ]
- return list(dict.fromkeys(total_urls))
+ return list(dict.fromkeys(total_urls))[: config.urls_limit]
def is_ipv6(url):
@@ -412,8 +485,13 @@ def getChannelsByFOFA(source):
"""
urls = set(re.findall(r"https?://[\w\.-]+:\d+", source))
channels = {}
+ urls_len = len(urls)
+ pbar = tqdm(total=urls_len)
for url in urls:
try:
+ pbar.set_description(
+ f"Processing multicast {url}, {urls_len - pbar.n} urls remaining"
+ )
response = requests.get(url + "/iptv/live/1000.json?key=txiptv", timeout=2)
try:
json_data = response.json()
@@ -421,7 +499,7 @@ def getChannelsByFOFA(source):
try:
for item in json_data["data"]:
if isinstance(item, dict):
- item_name = item.get("name").strip()
+ item_name = formatChannelName(item.get("name"))
item_url = item.get("url").strip()
if item_name and item_url:
total_url = url + item_url
@@ -438,6 +516,9 @@ def getChannelsByFOFA(source):
except Exception as e:
# print(f"{url}: {e}")
continue
+ finally:
+ pbar.update()
+ pbar.close()
return channels
diff --git a/version.json b/version.json
index 8c0f63aecc1..e613e8adaa6 100644
--- a/version.json
+++ b/version.json
@@ -1,3 +1,3 @@
{
- "version": "1.1.4"
+ "version": "1.1.5"
}
\ No newline at end of file