diff --git a/.gitignore b/.gitignore index 327eae423..306cd6985 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ *.session *.session-journal .vscode -*test.py +*test*.py setup.cfg # Byte-compiled / optimized / DLL files diff --git a/README.md b/README.md index 7b454dcd6..2be4467f2 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,37 @@ -## [Media Search bot](https://github.com/Mahesh0253/Media-Search-bot) +# [Media Search bot](https://github.com/Mahesh0253/Media-Search-bot) -* Index channel/group files for inline search. -* When you going to post file on telegram channel/group this bot will save that in database, So you can search that easily in inline mode. -* Supports document, video and audio file formats with caption. +* Index channel or group files for inline search. +* When you post file on telegram channel or group this bot will save that file in database, so you can search easily in inline mode. +* Supports document, video and audio file formats with caption support. -### Installation +## Installation -#### Easy Way +### Watch this video to create bot - https://youtu.be/dsuTn4qV2GA +### Easy Way [![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy) -#### Watch this video to create bot - https://youtu.be/dsuTn4qV2GA -#### Hard Way +### Hard Way -```sh +```bash +# Create virtual environment python3 -m venv env -. ./env/bin/activate + +# Activate virtual environment +env\Scripts\activate.bat # For Windows +source env/bin/activate # For Linux or MacOS + +# Install Packages pip3 install -r requirements.txt -# Edit info.py with variables as given below + +# Edit info.py with variables as given below then run bot python3 bot.py ``` -Check `sample_info.py` before editing `info.py` file +Check [`sample_info.py`](sample_info.py) before editing [`info.py`](info.py) file -#### Variables +## Variables -##### Required Variables +### Required Variables * `BOT_TOKEN`: Create a bot using [@BotFather](https://telegram.dog/BotFather), and get the Telegram API token. * `API_ID`: Get this value from [telegram.org](https://my.telegram.org/apps) * `API_HASH`: Get this value from [telegram.org](https://my.telegram.org/apps) @@ -33,30 +40,37 @@ Check `sample_info.py` before editing `info.py` file * `DATABASE_URI`: [mongoDB](https://www.mongodb.com) URI. Get this value from [mongoDB](https://www.mongodb.com). For more help watch this [video](https://youtu.be/dsuTn4qV2GA) * `DATABASE_NAME`: Name of the database in [mongoDB](https://www.mongodb.com). For more help watch this [video](https://youtu.be/dsuTn4qV2GA) -##### Optional Variables +### Optional Variables * `COLLECTION_NAME`: Name of the collections. Defaults to Telegram_files. If you going to use same database, then use different collection name for each bot * `MAX_RESULTS`: Maximum limit for inline search results * `CACHE_TIME`: The maximum amount of time in seconds that the result of the inline query may be cached on the server * `USE_CAPTION_FILTER`: Whether bot should use captions to improve search results. (True/False) * `AUTH_USERS`: Username or ID of users to give access of inline search. Separate multiple users by space. Leave it empty if you don't want to restrict bot usage. +* `AUTH_CHANNEL`: Username or ID of channel. Without subscribing this channel users cannot use bot. +* `INVITE_MSG`: Auth channel invitation message. +* `USERBOT_STRING_SESSION`: User bot string session. -### Admin commands +## Admin commands ``` channel - Get basic infomation about channels total - Show total of saved files delete - Delete file from database +index - Index all files from channel or group logger - Get log file ``` -### Tips -* Run [one_time_indexer.py](one_time_indexer.py) file to save old files in the database that are not indexed yet. +## Tips +* Use `index` command or run [one_time_indexer.py](one_time_indexer.py) file to save old files in the database that are not indexed yet. * You can use `|` to separate query and file type while searching for specific type of file. For example: `Avengers | video` * If you don't want to create a channel or group, use your chat ID / username as the channel ID. When you send a file to a bot, it will be saved in the database. -### Contributions +## Contributions Contributions are welcome. -### Thanks to [Pyrogram](https://github.com/pyrogram/pyrogram) +## Thanks to [Pyrogram](https://github.com/pyrogram/pyrogram) + +## Support +[Update Channel](https://t.me/botxupdates) and [Support Group](https://t.me/botxsupport) -### License +## License Code released under [The GNU General Public License](LICENSE). diff --git a/app.json b/app.json index efe135154..ea4aa4254 100644 --- a/app.json +++ b/app.json @@ -19,6 +19,11 @@ "description": "Your bot token.", "value": "" }, + "USERBOT_STRING_SESSION": { + "description": "User bot string session.", + "value": "", + "required": false + }, "API_ID": { "description": "Get this value from https://my.telegram.org", "value": "" @@ -40,6 +45,16 @@ "value": "", "required": false }, + "AUTH_CHANNEL": { + "description": "Username or ID of channel. Without subscribing this channel users cannot use bot.", + "value": "", + "required": false + }, + "INVITE_MSG": { + "description": "Auth channel invitation message", + "value": "'Please join @.... to use this bot'", + "required": false + }, "USE_CAPTION_FILTER": { "description": "Whether bot should use captions to improve search results. (True False)", "value": "False", diff --git a/info.py b/info.py index 085f3ba9f..79f9ce0e1 100644 --- a/info.py +++ b/info.py @@ -1,12 +1,15 @@ import re from os import environ +id_pattern = re.compile(r'^.\d+$') + # Bot information SESSION = environ.get('SESSION', 'Media_search') USER_SESSION = environ.get('USER_SESSION', 'User_Bot') API_ID = int(environ['API_ID']) API_HASH = environ['API_HASH'] BOT_TOKEN = environ['BOT_TOKEN'] +USERBOT_STRING_SESSION = environ.get('USERBOT_STRING_SESSION') # Bot settings MAX_RESULTS = int(environ.get('MAX_RESULTS', 10)) @@ -14,10 +17,12 @@ USE_CAPTION_FILTER = bool(environ.get('USE_CAPTION_FILTER', False)) # Admins, Channels & Users -ADMINS = [int(admin) if re.search('^\d+$', admin) else admin for admin in environ['ADMINS'].split()] -CHANNELS = [int(ch) if re.search('^.\d+$', ch) else ch for ch in environ['CHANNELS'].split()] -auth_users = [int(user) if re.search('^\d+$', user) else user for user in environ.get('AUTH_USERS', '').split()] +ADMINS = [int(admin) if id_pattern.search(admin) else admin for admin in environ['ADMINS'].split()] +CHANNELS = [int(ch) if id_pattern.search(ch) else ch for ch in environ['CHANNELS'].split()] +auth_users = [int(user) if id_pattern.search(user) else user for user in environ.get('AUTH_USERS', '').split()] AUTH_USERS = (auth_users + ADMINS) if auth_users else [] +auth_channel = environ.get('AUTH_CHANNEL') +AUTH_CHANNEL = int(auth_channel) if auth_channel and id_pattern.search(auth_channel) else auth_channel # MongoDB information DATABASE_URI = environ['DATABASE_URI'] @@ -32,3 +37,4 @@ """ SHARE_BUTTON_TEXT = 'Checkout {username} for searching files' +INVITE_MSG = environ.get('INVITE_MSG', 'Please join @.... to use this bot') \ No newline at end of file diff --git a/plugins/commands.py b/plugins/commands.py index dc7b85691..f55e4c712 100644 --- a/plugins/commands.py +++ b/plugins/commands.py @@ -2,7 +2,7 @@ import logging from pyrogram import Client, filters from pyrogram.types import InlineKeyboardButton, InlineKeyboardMarkup -from info import START_MSG, CHANNELS, ADMINS +from info import START_MSG, CHANNELS, ADMINS, INVITE_MSG from utils import Media logger = logging.getLogger(__name__) @@ -11,12 +11,15 @@ @Client.on_message(filters.command('start')) async def start(bot, message): """Start command handler""" - buttons = [[ - InlineKeyboardButton('Search Here', switch_inline_query_current_chat=''), - InlineKeyboardButton('Go Inline', switch_inline_query=''), - ]] - reply_markup = InlineKeyboardMarkup(buttons) - await message.reply(START_MSG, reply_markup=reply_markup) + if len(message.command) > 1 and message.command[1] == 'subscribe': + await message.reply(INVITE_MSG) + else: + buttons = [[ + InlineKeyboardButton('Search Here', switch_inline_query_current_chat=''), + InlineKeyboardButton('Go Inline', switch_inline_query=''), + ]] + reply_markup = InlineKeyboardMarkup(buttons) + await message.reply(START_MSG, reply_markup=reply_markup) @Client.on_message(filters.command('channel') & filters.user(ADMINS)) @@ -29,17 +32,24 @@ async def channel_info(bot, message): else: raise ValueError("Unexpected type of CHANNELS") + text = '📑 **Indexed channels/groups**\n' for channel in channels: - channel_info = await bot.get_chat(channel) - string = str(channel_info) - if len(string) > 4096: - filename = (channel_info.title or channel_info.first_name) + ".txt" - with open(filename, 'w') as f: - f.write(string) - await message.reply_document(filename) - os.remove(filename) + chat = await bot.get_chat(channel) + if chat.username: + text += '\n@' + chat.username else: - await message.reply(str(channel_info)) + text += '\n' + chat.title or chat.first_name + + text += f'\n\n**Total:** {len(CHANNELS)}' + + if len(text) < 4096: + await message.reply(text) + else: + file = 'Indexed channels.txt' + with open(file, 'w') as f: + f.write(text) + await message.reply_document(file) + os.remove(file) @Client.on_message(filters.command('total') & filters.user(ADMINS)) diff --git a/plugins/inline.py b/plugins/inline.py index 6d2c5429a..bdea77058 100644 --- a/plugins/inline.py +++ b/plugins/inline.py @@ -1,14 +1,28 @@ +import logging from urllib.parse import quote + from pyrogram import Client, emoji, filters +from pyrogram.errors import UserNotParticipant from pyrogram.types import InlineKeyboardButton, InlineKeyboardMarkup, InlineQueryResultCachedDocument + from utils import get_search_results -from info import MAX_RESULTS, CACHE_TIME, SHARE_BUTTON_TEXT, AUTH_USERS +from info import MAX_RESULTS, CACHE_TIME, SHARE_BUTTON_TEXT, AUTH_USERS, AUTH_CHANNEL + +logger = logging.getLogger(__name__) +cache_time = 0 if AUTH_USERS or AUTH_CHANNEL else CACHE_TIME @Client.on_inline_query(filters.user(AUTH_USERS) if AUTH_USERS else None) async def answer(bot, query): """Show search results for given inline query""" + if AUTH_CHANNEL and not await is_subscribed(bot, query): + await query.answer(results=[], + cache_time=0, + switch_pm_text='You have to subscribe channel', + switch_pm_parameter="subscribe") + return + results = [] if '|' in query.query: string, file_type = query.query.split('|', maxsplit=1) @@ -19,7 +33,7 @@ async def answer(bot, query): file_type = None offset = int(query.offset or 0) - reply_markup = get_reply_markup(bot.username) + reply_markup = get_reply_markup(bot.username, query=string) files, next_offset = await get_search_results(string, file_type=file_type, max_results=MAX_RESULTS, @@ -40,7 +54,7 @@ async def answer(bot, query): switch_pm_text += f" for {string}" await query.answer(results=results, - cache_time=CACHE_TIME, + cache_time=cache_time, switch_pm_text=switch_pm_text, switch_pm_parameter="start", next_offset=str(next_offset)) @@ -51,15 +65,15 @@ async def answer(bot, query): switch_pm_text += f' for "{string}"' await query.answer(results=[], - cache_time=CACHE_TIME, + cache_time=cache_time, switch_pm_text=switch_pm_text, switch_pm_parameter="okay") -def get_reply_markup(username): +def get_reply_markup(username, query): url = 't.me/share/url?url=' + quote(SHARE_BUTTON_TEXT.format(username=username)) buttons = [[ - InlineKeyboardButton('Search again', switch_inline_query_current_chat=''), + InlineKeyboardButton('Search again', switch_inline_query_current_chat=query), InlineKeyboardButton('Share bot', url=url), ]] return InlineKeyboardMarkup(buttons) @@ -75,3 +89,17 @@ def get_size(size): i += 1 size /= 1024.0 return "%.2f %s" % (size, units[i]) + + +async def is_subscribed(bot, query): + try: + user = await bot.get_chat_member(AUTH_CHANNEL, query.from_user.id) + except UserNotParticipant: + pass + except Exception as e: + logger.exception(e) + else: + if not user.status == 'kicked': + return True + + return False diff --git a/plugins/userbot.py b/plugins/userbot.py new file mode 100644 index 000000000..bc87d5720 --- /dev/null +++ b/plugins/userbot.py @@ -0,0 +1,64 @@ +import logging +import asyncio +from pyrogram import Client, filters +from pyrogram.errors import FloodWait +from info import USERBOT_STRING_SESSION, API_ID, API_HASH, ADMINS, id_pattern +from utils import save_file + +logger = logging.getLogger(__name__) +lock = asyncio.Lock() + + +@Client.on_message(filters.command(['index', 'indexfiles']) & filters.user(ADMINS)) +async def index_files(bot, message): + """Save channel or group files with the help of user bot""" + + if not USERBOT_STRING_SESSION: + await message.reply('Set `USERBOT_STRING_SESSION` in info.py file or in environment variables.') + elif len(message.command) == 1: + await message.reply('Please specify channel username or id in command.\n\n' + 'Example: `/index -10012345678`') + elif lock.locked(): + await message.reply('Wait until previous process complete.') + else: + msg = await message.reply('Processing...⏳') + raw_data = message.command[1:] + user_bot = Client(USERBOT_STRING_SESSION, API_ID, API_HASH) + chats = [int(chat) if id_pattern.search(chat) else chat for chat in raw_data] + total_files = 0 + + async with lock: + try: + async with user_bot: + for chat in chats: + + async for user_message in user_bot.iter_history(chat): + try: + message = await bot.get_messages( + chat, + user_message.message_id, + replies=0, + ) + except FloodWait as e: + await asyncio.sleep(e.x) + message = await bot.get_messages( + chat, + user_message.message_id, + replies=0, + ) + + for file_type in ("document", "video", "audio"): + media = getattr(message, file_type, None) + if media is not None: + break + else: + continue + media.file_type = file_type + media.caption = message.caption + await save_file(media) + total_files += 1 + except Exception as e: + logger.exception(e) + await msg.edit(f'Error: {e}') + else: + await msg.edit(f'Total {total_files} checked!') diff --git a/requirements.txt b/requirements.txt index b2766d8c1..dca51ff03 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ -https://github.com/Mahesh0253/pyrogram/archive/beta.zip -tgcrypto -umongo==2.3.0 -motor==2.3.0 -dnspython +https://github.com/Mahesh0253/pyrogram/archive/inline.zip +tgcrypto==1.2.2 +umongo[motor]==3.0.0 +dnspython==2.1.0 diff --git a/sample_info.py b/sample_info.py index f26b8f09c..071bfc32f 100644 --- a/sample_info.py +++ b/sample_info.py @@ -4,6 +4,7 @@ API_ID = 12345 API_HASH = '0123456789abcdef0123456789abcdef' BOT_TOKEN = '123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11' +USERBOT_STRING_SESSION = '' # Bot settings MAX_RESULTS = 10 @@ -14,6 +15,7 @@ ADMINS = [12345789, 'admin123', 98765432] CHANNELS = [-10012345678, -100987654321, 'channelusername'] AUTH_USERS = [] +AUTH_CHANNEL = None # MongoDB information DATABASE_URI = "mongodb://[username:password@]host1[:port1][,...hostN[:portN]][/[defaultauthdb]?retryWrites=true&w=majority" @@ -27,4 +29,5 @@ Here you can search files in inline mode. Just press follwing buttons and start searching. """ -SHARE_BUTTON_TEXT = 'Checkout {username} for searching files' \ No newline at end of file +SHARE_BUTTON_TEXT = 'Checkout {username} for searching files' +INVITE_MSG = 'Please join @.... to use this bot' \ No newline at end of file diff --git a/utils.py b/utils.py index 8d8630573..64d4e98c1 100644 --- a/utils.py +++ b/utils.py @@ -1,9 +1,14 @@ import re +import base64 import logging +from struct import pack + +from pyrogram.file_id import FileId from pymongo.errors import DuplicateKeyError from umongo import Instance, Document, fields from motor.motor_asyncio import AsyncIOMotorClient from marshmallow.exceptions import ValidationError + from info import DATABASE_URI, DATABASE_NAME, COLLECTION_NAME, USE_CAPTION_FILTER logger = logging.getLogger(__name__) @@ -11,7 +16,7 @@ client = AsyncIOMotorClient(DATABASE_URI) db = client[DATABASE_NAME] -instance = Instance(db) +instance = Instance.from_db(db) @instance.register @@ -31,10 +36,13 @@ class Meta: async def save_file(media): """Save file in database""" + # TODO: Find better way to get same file_id for same media to avoid duplicates + file_id, file_ref = unpack_new_file_id(media.file_id) + try: file = Media( - file_id=media.file_id, - file_ref=media.file_ref, + file_id=file_id, + file_ref=file_ref, file_name=media.file_name, file_size=media.file_size, file_type=media.file_type, @@ -58,9 +66,13 @@ async def save_file(media): async def get_search_results(query, file_type=None, max_results=10, offset=0): """For given query return (results, next_offset)""" - raw_pattern = query.lower().strip().replace(' ', '.*') - if not raw_pattern: + query = query.strip() + if not query: raw_pattern = '.' + elif ' ' not in query: + raw_pattern =r'(\b|[\.\+\-_])' + query + r'(\b|[\.\+\-_])' + else: + raw_pattern = query.replace(' ', r'[\s\.\+\-_]') try: regex = re.compile(raw_pattern, flags=re.IGNORECASE) @@ -90,3 +102,40 @@ async def get_search_results(query, file_type=None, max_results=10, offset=0): files = await cursor.to_list(length=max_results) return files, next_offset + + +def encode_file_id(s: bytes) -> str: + r = b"" + n = 0 + + for i in s + bytes([22]) + bytes([4]): + if i == 0: + n += 1 + else: + if n: + r += b"\x00" + bytes([n]) + n = 0 + + r += bytes([i]) + + return base64.urlsafe_b64encode(r).decode().rstrip("=") + + +def encode_file_ref(file_ref: bytes) -> str: + return base64.urlsafe_b64encode(file_ref).decode().rstrip("=") + + +def unpack_new_file_id(new_file_id): + """Return file_id, file_ref""" + decoded = FileId.decode(new_file_id) + file_id = encode_file_id( + pack( + "