From dbe57d8441b6730e8bc4e01c5c783235b1311c91 Mon Sep 17 00:00:00 2001 From: Benny Date: Mon, 1 Jan 2024 17:20:24 +0100 Subject: [PATCH] add zinc --- Docker.md | 8 +++-- docker-compose.yml | 9 ++--- searchgram/__init__.py | 9 +++-- searchgram/config.py | 6 ++++ searchgram/mongo.py | 4 ++- searchgram/zinc.py | 81 +++++++++++++++++++++++++++++++++++++++--- 6 files changed, 102 insertions(+), 15 deletions(-) diff --git a/Docker.md b/Docker.md index cd09b77..febf10d 100644 --- a/Docker.md +++ b/Docker.md @@ -109,11 +109,13 @@ To get started with SearchGram, you'll need to # 4. Modify env file -The MEILI_MASTER_KEY is a credential used to access the Web UI of MeiliSearch. +All the environment variables are stored in `env/gram.env` and you can see the comments in `config.py` for more details. -To simplify things, you can use your bot token instead. +Make sure they're correct before you start the container. You can cross-check them with `docker-compose.yml` -All the environment variables are stored in `env/gram.env` and you can see the comments in `config.py` for more details. +```shell + +An example of `env/gram.env` is shown below: ```shell # vim env/gram.env diff --git a/docker-compose.yml b/docker-compose.yml index 25c4cc7..92cde70 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -43,13 +43,14 @@ services: zinc: image: public.ecr.aws/zinclabs/zincsearch:latest - ports: - - "127.0.0.1:4080:4080" environment: ZINC_DATA_PATH: "/data" - # ZINC_FIRST_ADMIN_USER: "admin" - # ZINC_FIRST_ADMIN_PASSWORD: "Complexpass#123" + # GIN_MODE: "release" + ZINC_FIRST_ADMIN_USER: "root" + ZINC_FIRST_ADMIN_PASSWORD: "root" env_file: - env/gram.env volumes: - ./sg_data/zinc:/data + ports: + - "127.0.0.1:4080:4080" diff --git a/searchgram/__init__.py b/searchgram/__init__.py index 53f5b04..a48991b 100644 --- a/searchgram/__init__.py +++ b/searchgram/__init__.py @@ -6,9 +6,7 @@ from config import ENGINE -AVAILABLE_ENGINES = ["meili", "mongo"] -if ENGINE not in AVAILABLE_ENGINES: - raise ValueError(f"Unsupported engine {ENGINE}, available engines are {AVAILABLE_ENGINES}") +AVAILABLE_ENGINES = ["meili", "mongo", "zinc"] if ENGINE == "meili": print("Using MeiliSearch as search engine") @@ -16,3 +14,8 @@ elif ENGINE == "mongo": print("Using MongoDB as search engine") from mongo import SearchEngine +elif ENGINE == "zinc": + print("Using Zinc as search engine") + from zinc import SearchEngine +else: + raise ValueError(f"Unsupported engine {ENGINE}, available engines are {AVAILABLE_ENGINES}") diff --git a/searchgram/config.py b/searchgram/config.py index 7f7af8d..127b026 100644 --- a/searchgram/config.py +++ b/searchgram/config.py @@ -13,6 +13,7 @@ APP_HASH = os.getenv("APP_HASH", "23231321") TOKEN = os.getenv("TOKEN", "1234") # id:hash +######### search engine settings ######### # MeiliSearch, by default it's meili in docker-compose MEILI_HOST = os.getenv("MEILI_HOST", "http://meili:7700") # Using bot token for simplicity @@ -24,6 +25,11 @@ # available values: meili, mongo, zinc, default: meili ENGINE = os.getenv("ENGINE", "meili").lower() +ZINC_HOST = os.getenv("ZINC_HOST", "http://zinc:4080") +ZINC_USER = os.getenv("ZINC_USER", "root") +ZINC_PASS = os.getenv("ZINC_PASS", "root") + +#################################### # Your own user id, for example: 260260121 OWNER_ID = os.getenv("OWNER_ID", "260260121") BOT_ID = int(TOKEN.split(":")[0]) diff --git a/searchgram/mongo.py b/searchgram/mongo.py index dcea0a1..ad65b26 100644 --- a/searchgram/mongo.py +++ b/searchgram/mongo.py @@ -18,7 +18,9 @@ class SearchEngine(BasicSearchEngine): def __init__(self): - self.client = pymongo.MongoClient(host=MONGO_HOST, connect=False, connectTimeoutMS=5000, serverSelectionTimeoutMS=5000) + self.client = pymongo.MongoClient( + host=MONGO_HOST, connect=False, connectTimeoutMS=5000, serverSelectionTimeoutMS=5000 + ) self.db = self.client["telegram"] self.chat = self.db["chat"] diff --git a/searchgram/zinc.py b/searchgram/zinc.py index 2301007..3b3c697 100644 --- a/searchgram/zinc.py +++ b/searchgram/zinc.py @@ -4,24 +4,97 @@ # SearchGram - zinc.py # 2023-11-18 18:04 +import math + import zincsearch_sdk +from zincsearch_sdk.api import document, index, search +from zincsearch_sdk.model.meta_bool_query import MetaBoolQuery +from zincsearch_sdk.model.meta_match_query import MetaMatchQuery +from zincsearch_sdk.model.meta_query import MetaQuery +from zincsearch_sdk.model.meta_query_string_query import MetaQueryStringQuery +from zincsearch_sdk.model.meta_zinc_query import MetaZincQuery +from config import ZINC_HOST, ZINC_PASS, ZINC_USER from engine import BasicSearchEngine from utils import sizeof_fmt +configuration = zincsearch_sdk.Configuration(host=ZINC_HOST, username=ZINC_USER, password=ZINC_PASS) + +api_client = zincsearch_sdk.ApiClient(configuration) +INDEX = "telegram" + class SearchEngine(BasicSearchEngine): def upsert(self, message): if self.check_ignore(message): return data = self.set_uid(message) - # self.client.index("telegram").add_documents([data], primary_key="ID") + api_instance = document.Document(api_client) + api_instance.index_with_id(INDEX, data.get("ID"), data) def search(self, keyword, _type=None, user=None, page=1, mode=None) -> dict: - pass + query = MetaZincQuery( + query=MetaQuery( + bool=MetaBoolQuery( + must=[ + MetaQuery( + query_string=MetaQueryStringQuery(query=keyword), + ), + ], + ), + ), + sort=["-@timestamp"], + _from=(page - 1) * 10, + size=10, + track_total_hits=True, + ) + + user = self.clean_user(user) + if user: + query.query.bool.must.append( + MetaQuery( + bool=MetaBoolQuery( + should=[ + MetaQuery(match={"chat.username": MetaMatchQuery(query=str(user))}), + MetaQuery(match={"chat.id": MetaMatchQuery(query=str(user))}), + ] + ) + ) + ) + if _type: + query.query.bool.must.append(MetaQuery(match={"chat.type": MetaMatchQuery(query=f"ChatType.{_type}")})) + + if mode: + pass + # TODO exact match, use term query? + + api_instance = search.Search(api_client) + results = api_instance.search(INDEX, query) + total_hits = results.hits.total.value + total_pages = math.ceil(total_hits / 10) + return { + "hits": results.hits.hits, + "query": keyword, + "hitsPerPage": 10, + "page": page, + "totalPages": total_pages, + "totalHits": total_hits, + } def ping(self) -> str: - pass + api_instance = index.Index(api_client) + api_response = api_instance.get_index(INDEX) + size = api_response["stats"]["storage_size"] + count = api_response["stats"]["doc_num"] + return f"{count} messages, {sizeof_fmt(size)}" def clear_db(self): - pass + api_instance = index.Index(api_client) + api_instance.delete(INDEX) + + +if __name__ == "__main__": + engine = SearchEngine() + r = engine.search("天才啊", _type="PRIVATE") + print(engine.ping()) + engine.clear_db()