Skip to content

Commit

Permalink
add zinc
Browse files Browse the repository at this point in the history
  • Loading branch information
BennyThink committed Jan 1, 2024
1 parent cbdbd34 commit dbe57d8
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 15 deletions.
8 changes: 5 additions & 3 deletions Docker.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,13 @@ To get started with SearchGram, you'll need to
# 4. Modify env file
The MEILI_MASTER_KEY is a credential used to access the Web UI of MeiliSearch.
All the environment variables are stored in `env/gram.env` and you can see the comments in `config.py` for more details.
To simplify things, you can use your bot token instead.
Make sure they're correct before you start the container. You can cross-check them with `docker-compose.yml`

All the environment variables are stored in `env/gram.env` and you can see the comments in `config.py` for more details.
```shell
An example of `env/gram.env` is shown below:
```shell
# vim env/gram.env
Expand Down
9 changes: 5 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,14 @@ services:

zinc:
image: public.ecr.aws/zinclabs/zincsearch:latest
ports:
- "127.0.0.1:4080:4080"
environment:
ZINC_DATA_PATH: "/data"
# ZINC_FIRST_ADMIN_USER: "admin"
# ZINC_FIRST_ADMIN_PASSWORD: "Complexpass#123"
# GIN_MODE: "release"
ZINC_FIRST_ADMIN_USER: "root"
ZINC_FIRST_ADMIN_PASSWORD: "root"
env_file:
- env/gram.env
volumes:
- ./sg_data/zinc:/data
ports:
- "127.0.0.1:4080:4080"
9 changes: 6 additions & 3 deletions searchgram/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@

from config import ENGINE

AVAILABLE_ENGINES = ["meili", "mongo"]
if ENGINE not in AVAILABLE_ENGINES:
raise ValueError(f"Unsupported engine {ENGINE}, available engines are {AVAILABLE_ENGINES}")
AVAILABLE_ENGINES = ["meili", "mongo", "zinc"]

if ENGINE == "meili":
print("Using MeiliSearch as search engine")
from meili import SearchEngine
elif ENGINE == "mongo":
print("Using MongoDB as search engine")
from mongo import SearchEngine
elif ENGINE == "zinc":
print("Using Zinc as search engine")
from zinc import SearchEngine
else:
raise ValueError(f"Unsupported engine {ENGINE}, available engines are {AVAILABLE_ENGINES}")
6 changes: 6 additions & 0 deletions searchgram/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
APP_HASH = os.getenv("APP_HASH", "23231321")
TOKEN = os.getenv("TOKEN", "1234") # id:hash

######### search engine settings #########
# MeiliSearch, by default it's meili in docker-compose
MEILI_HOST = os.getenv("MEILI_HOST", "http://meili:7700")
# Using bot token for simplicity
Expand All @@ -24,6 +25,11 @@
# available values: meili, mongo, zinc, default: meili
ENGINE = os.getenv("ENGINE", "meili").lower()

ZINC_HOST = os.getenv("ZINC_HOST", "http://zinc:4080")
ZINC_USER = os.getenv("ZINC_USER", "root")
ZINC_PASS = os.getenv("ZINC_PASS", "root")

####################################
# Your own user id, for example: 260260121
OWNER_ID = os.getenv("OWNER_ID", "260260121")
BOT_ID = int(TOKEN.split(":")[0])
Expand Down
4 changes: 3 additions & 1 deletion searchgram/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@

class SearchEngine(BasicSearchEngine):
def __init__(self):
self.client = pymongo.MongoClient(host=MONGO_HOST, connect=False, connectTimeoutMS=5000, serverSelectionTimeoutMS=5000)
self.client = pymongo.MongoClient(
host=MONGO_HOST, connect=False, connectTimeoutMS=5000, serverSelectionTimeoutMS=5000
)
self.db = self.client["telegram"]
self.chat = self.db["chat"]

Expand Down
81 changes: 77 additions & 4 deletions searchgram/zinc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,97 @@
# SearchGram - zinc.py
# 2023-11-18 18:04

import math

import zincsearch_sdk
from zincsearch_sdk.api import document, index, search
from zincsearch_sdk.model.meta_bool_query import MetaBoolQuery
from zincsearch_sdk.model.meta_match_query import MetaMatchQuery
from zincsearch_sdk.model.meta_query import MetaQuery
from zincsearch_sdk.model.meta_query_string_query import MetaQueryStringQuery
from zincsearch_sdk.model.meta_zinc_query import MetaZincQuery

from config import ZINC_HOST, ZINC_PASS, ZINC_USER
from engine import BasicSearchEngine
from utils import sizeof_fmt

configuration = zincsearch_sdk.Configuration(host=ZINC_HOST, username=ZINC_USER, password=ZINC_PASS)

api_client = zincsearch_sdk.ApiClient(configuration)
INDEX = "telegram"


class SearchEngine(BasicSearchEngine):
def upsert(self, message):
if self.check_ignore(message):
return
data = self.set_uid(message)
# self.client.index("telegram").add_documents([data], primary_key="ID")
api_instance = document.Document(api_client)
api_instance.index_with_id(INDEX, data.get("ID"), data)

def search(self, keyword, _type=None, user=None, page=1, mode=None) -> dict:
pass
query = MetaZincQuery(
query=MetaQuery(
bool=MetaBoolQuery(
must=[
MetaQuery(
query_string=MetaQueryStringQuery(query=keyword),
),
],
),
),
sort=["-@timestamp"],
_from=(page - 1) * 10,
size=10,
track_total_hits=True,
)

user = self.clean_user(user)
if user:
query.query.bool.must.append(
MetaQuery(
bool=MetaBoolQuery(
should=[
MetaQuery(match={"chat.username": MetaMatchQuery(query=str(user))}),
MetaQuery(match={"chat.id": MetaMatchQuery(query=str(user))}),
]
)
)
)
if _type:
query.query.bool.must.append(MetaQuery(match={"chat.type": MetaMatchQuery(query=f"ChatType.{_type}")}))

if mode:
pass
# TODO exact match, use term query?

api_instance = search.Search(api_client)
results = api_instance.search(INDEX, query)
total_hits = results.hits.total.value
total_pages = math.ceil(total_hits / 10)
return {
"hits": results.hits.hits,
"query": keyword,
"hitsPerPage": 10,
"page": page,
"totalPages": total_pages,
"totalHits": total_hits,
}

def ping(self) -> str:
pass
api_instance = index.Index(api_client)
api_response = api_instance.get_index(INDEX)
size = api_response["stats"]["storage_size"]
count = api_response["stats"]["doc_num"]
return f"{count} messages, {sizeof_fmt(size)}"

def clear_db(self):
pass
api_instance = index.Index(api_client)
api_instance.delete(INDEX)


if __name__ == "__main__":
engine = SearchEngine()
r = engine.search("天才啊", _type="PRIVATE")
print(engine.ping())
engine.clear_db()

0 comments on commit dbe57d8

Please sign in to comment.