-
-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f5c66e0
commit 2cf5b36
Showing
11 changed files
with
175 additions
and
90 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,3 +4,5 @@ fakeredis==2.20.0 | |
tqdm==4.65.0 | ||
meilisearch==0.28.1 | ||
coloredlogs==15.0.1 | ||
pymongo==4.6.0 | ||
zhconv==1.4.3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
#!/usr/bin/env python3 | ||
# coding: utf-8 | ||
|
||
# SearchGram - engine.py | ||
# 2023-11-18 16:34 | ||
|
||
import configparser | ||
import contextlib | ||
import json | ||
|
||
config = configparser.ConfigParser(allow_no_value=True) | ||
config.optionxform = lambda option: option | ||
|
||
|
||
class BasicSearchEngine: | ||
@staticmethod | ||
def set_uid(message) -> "dict": | ||
uid = f"{message.chat.id}-{message.id}" | ||
timestamp = int(message.date.timestamp()) | ||
setattr(message, "ID", uid) | ||
setattr(message, "timestamp", timestamp) | ||
|
||
data = json.loads(str(message)) | ||
return data | ||
|
||
@staticmethod | ||
def check_ignore(message): | ||
config.read("sync.ini") | ||
blacklist = config.options("blacklist") | ||
whitelist = config.options("whitelist") | ||
uid = str(message.chat.id) | ||
chat_type = message.chat.type.name # upper case | ||
username = getattr(message.chat, "username", None) | ||
if whitelist and not (uid in whitelist or username in whitelist or f"`{chat_type}`" in whitelist): | ||
return True | ||
|
||
if username in blacklist or uid in blacklist or f"`{chat_type}`" in blacklist: | ||
return True | ||
|
||
@staticmethod | ||
def __clean_user(user: "str"): | ||
if user is None: | ||
return None | ||
with contextlib.suppress(Exception): | ||
return int(user) | ||
if user.startswith("@"): | ||
return user[1:] | ||
if user.startswith("https://t.me/"): | ||
return user[13:] | ||
return user | ||
|
||
def upsert(self, message): | ||
pass | ||
|
||
def search(self, keyword, _type=None, user=None, page=1, mode=None): | ||
pass | ||
|
||
def ping(self) -> str: | ||
pass | ||
|
||
def clear_db(self): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,22 +7,16 @@ | |
|
||
__author__ = "Benny <[email protected]>" | ||
|
||
import configparser | ||
import contextlib | ||
import json | ||
import logging | ||
from engine import BasicSearchEngine | ||
|
||
import meilisearch | ||
|
||
from utils import setup_logger, sizeof_fmt | ||
from utils import sizeof_fmt | ||
from config import MEILI_HOST, MEILI_PASS | ||
|
||
setup_logger() | ||
config = configparser.ConfigParser(allow_no_value=True) | ||
config.optionxform = lambda option: option | ||
|
||
|
||
class SearchEngine: | ||
class SearchEngine(BasicSearchEngine): | ||
def __init__(self): | ||
# ["BOT", "CHANNEL", "GROUP", "PRIVATE", "SUPERGROUP"] | ||
try: | ||
|
@@ -36,48 +30,12 @@ def __init__(self): | |
except: | ||
logging.critical("Failed to connect to MeiliSearch") | ||
|
||
@staticmethod | ||
def set_uid(message) -> "dict": | ||
uid = f"{message.chat.id}-{message.id}" | ||
timestamp = int(message.date.timestamp()) | ||
setattr(message, "ID", uid) | ||
setattr(message, "timestamp", timestamp) | ||
|
||
data = json.loads(str(message)) | ||
return data | ||
|
||
@staticmethod | ||
def check_ignore(message): | ||
config.read("sync.ini") | ||
blacklist = config.options("blacklist") | ||
whitelist = config.options("whitelist") | ||
uid = str(message.chat.id) | ||
chat_type = message.chat.type.name # upper case | ||
username = getattr(message.chat, "username", None) | ||
if whitelist and not (uid in whitelist or username in whitelist or f"`{chat_type}`" in whitelist): | ||
return True | ||
|
||
if username in blacklist or uid in blacklist or f"`{chat_type}`" in blacklist: | ||
return True | ||
|
||
def upsert(self, message): | ||
if self.check_ignore(message): | ||
return | ||
data = self.set_uid(message) | ||
self.client.index("telegram").add_documents([data], primary_key="ID") | ||
|
||
@staticmethod | ||
def __clean_user(user: "str"): | ||
if user is None: | ||
return None | ||
with contextlib.suppress(Exception): | ||
return int(user) | ||
if user.startswith("@"): | ||
return user[1:] | ||
if user.startswith("https://t.me/"): | ||
return user[13:] | ||
return user | ||
|
||
def search(self, keyword, _type=None, user=None, page=1, mode=None): | ||
if mode: | ||
keyword = f'"{keyword}"' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
#!/usr/bin/env python3 | ||
# coding: utf-8 | ||
|
||
# SearchGram - Mongo.py | ||
# 2023-11-18 16:31 | ||
|
||
__author__ = "Benny <[email protected]>" | ||
|
||
import contextlib | ||
import json | ||
import re | ||
|
||
import pymongo | ||
import zhconv | ||
|
||
from config import MONGO_HOST | ||
from utils import sizeof_fmt | ||
|
||
from engine import BasicSearchEngine | ||
|
||
|
||
class SearchEngine(BasicSearchEngine): | ||
def __init__(self): | ||
self.client = pymongo.MongoClient(host=MONGO_HOST, connect=False, connectTimeoutMS=5000, serverSelectionTimeoutMS=5000) | ||
self.db = self.client["telegram"] | ||
self.col = self.db["chat"] | ||
self.history = self.db["history"] | ||
|
||
def __del__(self): | ||
self.client.close() | ||
|
||
def upsert(self, message): | ||
pass | ||
|
||
def search(self, keyword, _type=None, user=None, page=1, mode=None): | ||
pass | ||
|
||
def search2(self, keyword, _type=None, user=None, page=1, mode=None): | ||
# support for fuzzy search | ||
keyword = re.sub(r"\s+", ".*", keyword) | ||
|
||
hans = zhconv.convert(keyword, "zh-hans") | ||
hant = zhconv.convert(keyword, "zh-hant") | ||
results = [] | ||
filter_ = {"$or": [{"text": {"$regex": f".*{hans}.*", "$options": "i"}}, {"text": {"$regex": f".*{hant}.*", "$options": "i"}}]} | ||
if user: | ||
user = self.__clean_user(user) | ||
filter_["$and"] = [ | ||
{ | ||
"$or": [ | ||
{"from_user.id": user}, | ||
{"from_user.username": {"$regex": f".*{user}.*", "$options": "i"}}, | ||
{"from_user.first_name": {"$regex": f".*{user}.*", "$options": "i"}}, | ||
{"chat.id": user}, | ||
{"chat.username": {"$regex": f".*{user}.*", "$options": "i"}}, | ||
{"chat.first_name": {"$regex": f".*{user}.*", "$options": "i"}}, | ||
] | ||
} | ||
] | ||
data = self.col.find(filter_).sort("date", pymongo.DESCENDING) | ||
for hit in data: | ||
hit.pop("_id") | ||
results.append(hit) | ||
|
||
return results | ||
|
||
def ping(self) -> str: | ||
count = self.col.count_documents({}) | ||
size = self.db.command("dbstats")["storageSize"] | ||
return f"{count} messages, {sizeof_fmt(size)}" | ||
|
||
def clear_db(self): | ||
pass | ||
|
||
|
||
if __name__ == "__main__": | ||
tges = SearchEngine() | ||
for i in tges.search("干扰项"): | ||
print(i["text"], i["mention"]) |