-
Notifications
You must be signed in to change notification settings - Fork 2
/
utils.py
77 lines (63 loc) · 2.55 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import re
import logging
from typing import Optional, List
from urllib.parse import urlparse
from limits.storage import MemoryStorage
from limits.strategies import MovingWindowRateLimiter
from config import settings
def setup_logging():
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# Reduce logging for some verbose libraries
# TODO decide if we want to add these back
# logging.getLogger('httpx').setLevel(logging.WARNING)
logging.getLogger('slack_bolt').setLevel(logging.WARNING)
return logging.getLogger(__name__)
def setup_rate_limiter():
return MovingWindowRateLimiter(MemoryStorage())
def get_trigger_emojis() -> Optional[List[str]]:
if settings.TRIGGER_EMOJIS:
return [emoji.strip() for emoji in settings.TRIGGER_EMOJIS.split(',')]
return None
def sanitize_url(url: str) -> str:
# Remove any trailing '>' characters and whitespace
sanitized = url.rstrip('>').strip()
# Ensure the URL starts with http:// or https://
if not sanitized.startswith(('http://', 'https://')):
sanitized = 'http://' + sanitized
return sanitized
def is_valid_url(url: str) -> bool:
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except ValueError:
return False
def extract_and_validate_url(message: dict) -> Optional[str]:
url = extract_url_from_message(message)
if url:
sanitized_url = sanitize_url(url)
if is_valid_url(sanitized_url):
return sanitized_url
return None
def extract_url_from_message(message: dict) -> Optional[str]:
text = message.get("text", "")
urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', text)
if urls:
return urls[0]
attachments = message.get("attachments", [])
for attachment in attachments:
attachment_text = attachment.get("text", "")
attachment_urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-F]))+', attachment_text)
if attachment_urls:
return attachment_urls[0]
blocks = message.get("blocks", [])
for block in blocks:
if block.get("type") == "section":
text = block.get("text", {}).get("text", "")
block_urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-F]))+', text)
if block_urls:
return block_urls[0]
return None