From df233d5160facdb6c49ef4699de2b0414aac3938 Mon Sep 17 00:00:00 2001 From: HappyRespawnanchor <80967824+HappyRespawnanchor@users.noreply.github.com> Date: Mon, 10 Feb 2025 02:01:06 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20delete=5Fsensitive=5Fcomme?= =?UTF-8?q?nts.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- delete_sensitive_comments.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/delete_sensitive_comments.py b/delete_sensitive_comments.py index 263d01f..fc47147 100644 --- a/delete_sensitive_comments.py +++ b/delete_sensitive_comments.py @@ -18,24 +18,23 @@ # GitHub GraphQL API 端点 GITHUB_GRAPHQL_API = "https://api.github.com/graphql" -# 获取敏感词列表 -def fetch_sensitive_words(urls): - words = set() # 使用集合去重 +# 获取所有敏感词并合并 +def fetch_and_merge_sensitive_words(urls): + merged_text = "" # 存储所有敏感词的文本 for url in urls: try: response = requests.get(url) response.raise_for_status() # 确保请求成功 - lines = response.text.splitlines() # 按行拆分(自动去除 `\r`) - for line in lines: - word = line.strip().rstrip(",") # 先去除空格,再去除结尾的 `,` - if word: # 确保不是空行 - words.add(word) + merged_text += "\n" + response.text # 合并所有文件内容 except requests.RequestException as e: print(f"Failed to fetch sensitive words from {url}: {e}") - return list(words) # 转回列表 + + # 解析敏感词列表(去掉逗号、去空行、去空格) + words = set(word.strip().rstrip(",") for word in merged_text.splitlines() if word.strip()) + return list(words) # 转换回列表 # 加载所有敏感词 -SENSITIVE_WORDS = fetch_sensitive_words(SENSITIVE_WORDS_URLS) +SENSITIVE_WORDS = fetch_and_merge_sensitive_words(SENSITIVE_WORDS_URLS) # 过滤并替换敏感词 def censor_text(text, words):