Skip to content

Commit

Permalink
add adaptor
Browse files Browse the repository at this point in the history
  • Loading branch information
BennyThink committed Nov 18, 2023
1 parent f5c66e0 commit 2cf5b36
Show file tree
Hide file tree
Showing 11 changed files with 175 additions and 90 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/builder.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ jobs:
platforms: linux/amd64,linux/arm64
push: true
tags: |
${{ steps.dh_string.outputs.lowercase }}:ng
ghcr.io/${{ steps.ghcr_string.outputs.lowercase }}:ng
${{ steps.dh_string.outputs.lowercase }}
ghcr.io/${{ steps.ghcr_string.outputs.lowercase }}
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max
Expand Down
34 changes: 0 additions & 34 deletions docker-compose.legacy.yml

This file was deleted.

16 changes: 10 additions & 6 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,20 @@ version: '3.1'

services:
client:
image: bennythink/searchgram:ng
image: bennythink/searchgram
env_file:
- env/gram.env
restart: always
depends_on:
- meili
volumes:
- ./sg_data/session:/SearchGram/searchgram/session
command:
- client.py

bot:
image: bennythink/searchgram:ng
image: bennythink/searchgram
env_file:
- env/gram.env
restart: always
depends_on:
- meili
volumes:
- ./sg_data/session:/SearchGram/searchgram/session
command:
Expand All @@ -34,3 +30,11 @@ services:
- env/gram.env
ports:
- "127.0.0.1:7700:7700"

mongo:
image: mongo:6
restart: always
volumes:
- ./sg_data/mongodb:/data/db
logging:
driver: none
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ fakeredis==2.20.0
tqdm==4.65.0
meilisearch==0.28.1
coloredlogs==15.0.1
pymongo==4.6.0
zhconv==1.4.3
7 changes: 7 additions & 0 deletions searchgram/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,10 @@

# SearchGram - __init__.py
# 2023-11-18 16:26

from config import ENGINE

if ENGINE == "meili":
from meili import SearchEngine
elif ENGINE == "mongo":
from mongo import SearchEngine
5 changes: 3 additions & 2 deletions searchgram/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@
from pyrogram.types import InlineKeyboardButton, InlineKeyboardMarkup

from config import OWNER_ID, TOKEN
from meili import SearchEngine
from searchgram import SearchEngine
from init_client import get_client
from utils import setup_logger

tgdb = SearchEngine()

setup_logger()
app = get_client(TOKEN)
chat_types = [i for i in dir(enums.ChatType) if not i.startswith("_")]
tgdb = SearchEngine()
parser = argparse.ArgumentParser()
parser.add_argument("keyword", help="the keyword to be searched")
parser.add_argument("-t", "--type", help="the type of message", default=None)
Expand Down
2 changes: 1 addition & 1 deletion searchgram/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from pyrogram import Client, filters, types

from config import BOT_ID
from meili import SearchEngine
from searchgram import SearchEngine
from init_client import get_client
from utils import setup_logger

Expand Down
6 changes: 6 additions & 0 deletions searchgram/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,14 @@
APP_ID = int(os.getenv("APP_ID", 321232123))
APP_HASH = os.getenv("APP_HASH", "23231321")
TOKEN = os.getenv("TOKEN", "1234") # id:hash

MEILI_HOST = os.getenv("MEILI_HOST", "http://meili:7700")
MEILI_PASS = os.getenv("MEILI_MASTER_KEY", TOKEN)

MONGO_HOST = os.getenv("MONGO_HOST", "mongo")

ENGINE = os.getenv("ENGINE", "meili")

OWNER_ID = os.getenv("OWNER_ID", "260260121")
BOT_ID = TOKEN.split(":")[0]

Expand Down
62 changes: 62 additions & 0 deletions searchgram/engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python3
# coding: utf-8

# SearchGram - engine.py
# 2023-11-18 16:34

import configparser
import contextlib
import json

config = configparser.ConfigParser(allow_no_value=True)
config.optionxform = lambda option: option


class BasicSearchEngine:
@staticmethod
def set_uid(message) -> "dict":
uid = f"{message.chat.id}-{message.id}"
timestamp = int(message.date.timestamp())
setattr(message, "ID", uid)
setattr(message, "timestamp", timestamp)

data = json.loads(str(message))
return data

@staticmethod
def check_ignore(message):
config.read("sync.ini")
blacklist = config.options("blacklist")
whitelist = config.options("whitelist")
uid = str(message.chat.id)
chat_type = message.chat.type.name # upper case
username = getattr(message.chat, "username", None)
if whitelist and not (uid in whitelist or username in whitelist or f"`{chat_type}`" in whitelist):
return True

if username in blacklist or uid in blacklist or f"`{chat_type}`" in blacklist:
return True

@staticmethod
def __clean_user(user: "str"):
if user is None:
return None
with contextlib.suppress(Exception):
return int(user)
if user.startswith("@"):
return user[1:]
if user.startswith("https://t.me/"):
return user[13:]
return user

def upsert(self, message):
pass

def search(self, keyword, _type=None, user=None, page=1, mode=None):
pass

def ping(self) -> str:
pass

def clear_db(self):
pass
48 changes: 3 additions & 45 deletions searchgram/meili.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,16 @@

__author__ = "Benny <[email protected]>"

import configparser
import contextlib
import json
import logging
from engine import BasicSearchEngine

import meilisearch

from utils import setup_logger, sizeof_fmt
from utils import sizeof_fmt
from config import MEILI_HOST, MEILI_PASS

setup_logger()
config = configparser.ConfigParser(allow_no_value=True)
config.optionxform = lambda option: option


class SearchEngine:
class SearchEngine(BasicSearchEngine):
def __init__(self):
# ["BOT", "CHANNEL", "GROUP", "PRIVATE", "SUPERGROUP"]
try:
Expand All @@ -36,48 +30,12 @@ def __init__(self):
except:
logging.critical("Failed to connect to MeiliSearch")

@staticmethod
def set_uid(message) -> "dict":
uid = f"{message.chat.id}-{message.id}"
timestamp = int(message.date.timestamp())
setattr(message, "ID", uid)
setattr(message, "timestamp", timestamp)

data = json.loads(str(message))
return data

@staticmethod
def check_ignore(message):
config.read("sync.ini")
blacklist = config.options("blacklist")
whitelist = config.options("whitelist")
uid = str(message.chat.id)
chat_type = message.chat.type.name # upper case
username = getattr(message.chat, "username", None)
if whitelist and not (uid in whitelist or username in whitelist or f"`{chat_type}`" in whitelist):
return True

if username in blacklist or uid in blacklist or f"`{chat_type}`" in blacklist:
return True

def upsert(self, message):
if self.check_ignore(message):
return
data = self.set_uid(message)
self.client.index("telegram").add_documents([data], primary_key="ID")

@staticmethod
def __clean_user(user: "str"):
if user is None:
return None
with contextlib.suppress(Exception):
return int(user)
if user.startswith("@"):
return user[1:]
if user.startswith("https://t.me/"):
return user[13:]
return user

def search(self, keyword, _type=None, user=None, page=1, mode=None):
if mode:
keyword = f'"{keyword}"'
Expand Down
79 changes: 79 additions & 0 deletions searchgram/mongo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env python3
# coding: utf-8

# SearchGram - Mongo.py
# 2023-11-18 16:31

__author__ = "Benny <[email protected]>"

import contextlib
import json
import re

import pymongo
import zhconv

from config import MONGO_HOST
from utils import sizeof_fmt

from engine import BasicSearchEngine


class SearchEngine(BasicSearchEngine):
def __init__(self):
self.client = pymongo.MongoClient(host=MONGO_HOST, connect=False, connectTimeoutMS=5000, serverSelectionTimeoutMS=5000)
self.db = self.client["telegram"]
self.col = self.db["chat"]
self.history = self.db["history"]

def __del__(self):
self.client.close()

def upsert(self, message):
pass

def search(self, keyword, _type=None, user=None, page=1, mode=None):
pass

def search2(self, keyword, _type=None, user=None, page=1, mode=None):
# support for fuzzy search
keyword = re.sub(r"\s+", ".*", keyword)

hans = zhconv.convert(keyword, "zh-hans")
hant = zhconv.convert(keyword, "zh-hant")
results = []
filter_ = {"$or": [{"text": {"$regex": f".*{hans}.*", "$options": "i"}}, {"text": {"$regex": f".*{hant}.*", "$options": "i"}}]}
if user:
user = self.__clean_user(user)
filter_["$and"] = [
{
"$or": [
{"from_user.id": user},
{"from_user.username": {"$regex": f".*{user}.*", "$options": "i"}},
{"from_user.first_name": {"$regex": f".*{user}.*", "$options": "i"}},
{"chat.id": user},
{"chat.username": {"$regex": f".*{user}.*", "$options": "i"}},
{"chat.first_name": {"$regex": f".*{user}.*", "$options": "i"}},
]
}
]
data = self.col.find(filter_).sort("date", pymongo.DESCENDING)
for hit in data:
hit.pop("_id")
results.append(hit)

return results

def ping(self) -> str:
count = self.col.count_documents({})
size = self.db.command("dbstats")["storageSize"]
return f"{count} messages, {sizeof_fmt(size)}"

def clear_db(self):
pass


if __name__ == "__main__":
tges = SearchEngine()
for i in tges.search("干扰项"):
print(i["text"], i["mention"])

0 comments on commit 2cf5b36

Please sign in to comment.