Skip to content

Commit

Permalink
we do not need no stinkin redis
Browse files Browse the repository at this point in the history
  • Loading branch information
ilude committed May 10, 2024
1 parent 549088d commit feccac6
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 54 deletions.
1 change: 0 additions & 1 deletion app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from typing import Any
from services.link_tracker import link_tracker
from utils import copy_default_to_configs
from models.utils import pwd

logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.WARN)

Expand Down
14 changes: 8 additions & 6 deletions app/services/favicon_store.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import os
import re
import logging
from services.redis_store import RedisStore
from services.favicon_utils import base, download_favicon, get_favicon_filename, normalize_domain
from services.favicon_utils import base, download_favicon, favicon_failed_filename, favicon_filename
from models.scheduler import Scheduler
from models.utils import pwd

Expand All @@ -16,7 +15,6 @@ def __init__(self, icon_dir='static/assets/icons'):
self.icon_dir = pwd.joinpath(icon_dir).resolve()
self.icon_dir.mkdir(parents=True, exist_ok=True)

self.redis_store = RedisStore()
self.ip_pattern = re.compile(
r"^(?:(?:https?://)?(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}"
r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?::\d{1,5})?(?:\/)?$"
Expand All @@ -30,14 +28,18 @@ def icon_path(self, url) -> str:
if not url:
return None

favicon_filename = get_favicon_filename(url)
favicon_relative_path = f"{self.relative_icon_dir}/{favicon_filename}"
filename = favicon_filename(url)
favicon_relative_path = f"{self.relative_icon_dir}/{filename}"

if pwd.joinpath(favicon_relative_path).exists():
return f"/{favicon_relative_path}"
else:
return None

def favicon_failed(self, url) -> bool:
favicon_filename = favicon_failed_filename(url)
return pwd.joinpath(self.relative_icon_dir, favicon_filename).exists()

def fetch_favicons_from(self, urls):
base_urls = sorted(set(map(lambda url: base(url), urls)))
processable_urls = set(filter(lambda url: self.should_processed(url), base_urls))
Expand Down Expand Up @@ -71,5 +73,5 @@ def should_processed(self, url):
not url
or bool(self.ip_pattern.match(url))
or self.icon_path(url)
or self.redis_store.is_domain_processed(normalize_domain(url))
or self.favicon_failed(url)
)
32 changes: 18 additions & 14 deletions app/services/favicon_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import os
import requests
from bs4 import BeautifulSoup
from services.redis_store import RedisStore
from urllib.parse import urljoin, urlparse
from models.utils import pwd

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
Expand All @@ -25,19 +25,23 @@ def base(url):
return url


def get_favicon_filename(url):
def favicon_filename(url):
return f"{normalize_domain(url)}.favicon.ico"


def favicon_failed_filename(url):
return f"{normalize_domain(url)}.failed"


def make_request(url):
request_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
}
return requests.get(url, headers=request_headers, allow_redirects=True, timeout=5)


def favicon_path(icon_path, url):
favicon_filename = get_favicon_filename(url)
def favicon_path(icon_path, favicon_filename):
favicon_filename = favicon_filename(url)
return os.path.join(icon_path, favicon_filename)


Expand Down Expand Up @@ -66,22 +70,22 @@ def download_favicon(url, icon_dir):


def _download(url, icon_dir, icon_url):
redis_store = RedisStore()
try:
response = make_request(icon_url)

if response.status_code == 200 and response.headers.get('content-type', '').lower().startswith('image/'):
filename = favicon_path(icon_dir, url)
with open(favicon_path(icon_dir, url), 'wb') as file:
filename = pwd.joinpath(icon_dir, favicon_filename(url))
with open(filename, 'wb') as file:
file.write(response.content)
logger.debug(f"saving {url} as {filename}")
else:
redis_store.save_processed_domain(
normalize_domain(url),
reason=f'response_code: {response.status_code} content-type: {response.headers.get("content-type", "")}'
)
logger.debug(f"issues {url} complete")
filename = pwd.joinpath(icon_dir, favicon_failed_filename(url))
with open(filename, 'wb') as file:
file.write(f'response_code: {response.status_code} content-type: {response.headers.get("content-type", "")}')
logger.debug(f"Marking {url} as failed with {filename}")
except Exception as ex:
redis_store.save_processed_domain(normalize_domain(url), reason=f'{ex}')
filename = pwd.joinpath(icon_dir, favicon_failed_filename(url))
with open(filename, 'wb') as file:
file.write(f'Error: {ex}')
logger.debug(f"Marking {url} as failed with {filename}")

logger.debug(f"_download({icon_url}) completed")
33 changes: 0 additions & 33 deletions app/services/redis_store.py

This file was deleted.

0 comments on commit feccac6

Please sign in to comment.