diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index c13ff62..836066a 100755 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -48,7 +48,8 @@ "redhat.ansible", "redhat.vscode-yaml", "ms-python.black-formatter", - "ms-python.isort" + "ms-python.isort", + "ms-python.autopep8" ], "settings": { "dev.containers.copyGitConfig": false, diff --git a/app/app.py b/app/app.py index 6e549ca..644220b 100644 --- a/app/app.py +++ b/app/app.py @@ -1,5 +1,5 @@ +import models.layout import asyncio -import json import logging import os import secrets @@ -20,8 +20,7 @@ logger.setLevel(logging.DEBUG) copy_default_to_configs() - -from models.layout import layout +layout = models.layout.Layout() app = Flask(__name__) app.config['JSONIFY_PRETTYPRINT_REGULAR'] = True @@ -29,89 +28,83 @@ if os.environ.get("FLASK_DEBUG", "False") == "True": - cache_config={ - 'CACHE_TYPE': 'null' - } + cache_config = { + 'CACHE_TYPE': 'null' + } else: - # 600 seconds = 10 minutes - cache_config={ - 'CACHE_TYPE': 'simple', - 'CACHE_DEFAULT_TIMEOUT': 600 - } - from flask_minify import Minify - Minify(app=app, html=True, js=True, cssless=True) - + # 600 seconds = 10 minutes + cache_config = { + 'CACHE_TYPE': 'simple', + 'CACHE_DEFAULT_TIMEOUT': 600 + } + from flask_minify import Minify + Minify(app=app, html=True, js=True, cssless=True) + cache = Cache(app, config=cache_config) page_timeout = int(os.environ.get('ONBOARD_PAGE_TIMEOUT', 600)) assets = Environment(app) css = Bundle( - 'css/*.css', - filters="cssmin", - output="assets/common.css" + 'css/*.css', + filters="cssmin", + output="assets/common.css" ) assets.register('css_all', css) css.build() - @app.context_processor def inject_current_date(): - return { - 'today_date': datetime.now(), - 'site_title': os.environ.get('ONBOARD_SITE_TITLE', 'OnBoard'), - } - -def load_bookmarks(): - try: - with open(pwd.joinpath('configs/bookmarks.json'), 'r', encoding='utf-8') as f: - return json.load(f) - except Exception as ex: - logger.error(f"Error: {ex} loading bookmarks") - return None + favicon_finder = models.layout.FaviconFinder() + return { + 'today_date': datetime.now(), + 'site_title': os.environ.get('ONBOARD_SITE_TITLE', 'OnBoard'), + 'favicon_exists': favicon_finder.favicon_exists, + } + @app.route('/') @app.route('/tab/') @cache.cached(timeout=page_timeout, unless=lambda: layout.is_modified) def index(tab_name=None): - # Load feeds and bookmarks - if layout.is_modified(): - layout.reload() + # Load feeds and bookmarks + if layout.is_modified(): + layout.reload() - bookmarks = load_bookmarks() + return render_template('index.html', layout=layout, tab_name=tab_name, skip_htmx=False) - return render_template('index.html', bookmarks = bookmarks, layout=layout, tab_name=tab_name, skip_htmx=False) @app.route('/feed/') def feed(feed_id): - feed = layout.get_feed(feed_id) - #logger.debug(f"{feed.name} - {feed.display_items[0].title}") - return render_template(feed.template, widget=feed, skip_htmx=True) + feed = layout.get_feed(feed_id) + # logger.debug(f"{feed.name} - {feed.display_items[0].title}") + return render_template(feed.template, widget=feed, skip_htmx=True) @app.route('/click_events') def click_events(): - df = link_tracker.get_click_events() - html = df.to_html(classes='data', index=False) - response = make_response(html) - response.headers["Content-Type"] = "text/html" - return response + df = link_tracker.get_click_events() + html = df.to_html(classes='data', index=False) + response = make_response(html) + response.headers["Content-Type"] = "text/html" + return response + @app.route('/redirect//') def track(feed_id, link_id): - link = layout.get_link(feed_id, link_id) - - link_tracker.track_click_event(feed_id, link_id, link) - - logger.info(f"redirecting to {link}") - return redirect(link, code=302) + link = layout.get_link(feed_id, link_id) + + link_tracker.track_click_event(feed_id, link_id, link) + + logger.info(f"redirecting to {link}") + return redirect(link, code=302) @app.route('/feed//refresh') def refresh(feed_id): - layout.refresh_feed(feed_id) - return redirect('/', code=302) + layout.refresh_feed(feed_id) + return redirect('/', code=302) ############################################################################### @@ -120,41 +113,40 @@ def refresh(feed_id): # ############################################################################### -if __name__ == '__main__': - port = int(os.environ.get("FLASK_PORT", os.environ.get("ONBOARD_PORT", 9830))) - development = bool(os.environ.get("FLASK_ENV", "development") == "development") - if development: - app.run(port=port, debug=bool(os.environ.get("FLASK_DEBUG", "True"))) - if bool(os.environ.get('WERKZEUG_RUN_MAIN')): - print("") - layout.stop_scheduler() - app.logger.info("Shutting down...") - - sys.exit() - else: - try: - from hypercorn.config import Config - from hypercorn.asyncio import serve - - shutdown_event = asyncio.Event() - - def _signal_handler(*_: Any) -> None: - logger.info("Shutting down...") - layout.stop_scheduler() - shutdown_event.set() - - config = Config() - config.accesslog="-" - config.errorlog="-" - config.loglevel="DEBUG" - config.bind = f"0.0.0.0:{port}" - loop = asyncio.new_event_loop() - loop.add_signal_handler(signal.SIGTERM, _signal_handler) - loop.run_until_complete( - serve(app, config, shutdown_trigger=shutdown_event.wait) - ) - except KeyboardInterrupt: - logger.info("\nShutting down...") - layout.stop_scheduler() - sys.exit() - \ No newline at end of file +if __name__ == '__main__': + port = int(os.environ.get("FLASK_PORT", os.environ.get("ONBOARD_PORT", 9830))) + development = bool(os.environ.get("FLASK_ENV", "development") == "development") + if development: + app.run(port=port, debug=bool(os.environ.get("FLASK_DEBUG", "True"))) + if bool(os.environ.get('WERKZEUG_RUN_MAIN')): + print("") + layout.stop_scheduler() + app.logger.info("Shutting down...") + + sys.exit() + else: + try: + from hypercorn.config import Config + from hypercorn.asyncio import serve + + shutdown_event = asyncio.Event() + + def _signal_handler(*_: Any) -> None: + logger.info("Shutting down...") + layout.stop_scheduler() + shutdown_event.set() + + config = Config() + config.accesslog = "-" + config.errorlog = "-" + config.loglevel = "DEBUG" + config.bind = f"0.0.0.0:{port}" + loop = asyncio.new_event_loop() + loop.add_signal_handler(signal.SIGTERM, _signal_handler) + loop.run_until_complete( + serve(app, config, shutdown_trigger=shutdown_event.wait) + ) + except KeyboardInterrupt: + logger.info("\nShutting down...") + layout.stop_scheduler() + sys.exit() diff --git a/app/archive/file_data.py b/app/archive/file_data.py deleted file mode 100644 index bcc8a45..0000000 --- a/app/archive/file_data.py +++ /dev/null @@ -1,7 +0,0 @@ -class FileData: - def __init__(self, last_modified=0, contents=None): - self.last_modified = last_modified - self.contents = contents - - def __getitem__(self, key, default=None): - return self.contents.get(key, default) \ No newline at end of file diff --git a/app/archive/layout.py b/app/archive/layout.py deleted file mode 100644 index c626cfb..0000000 --- a/app/archive/layout.py +++ /dev/null @@ -1,87 +0,0 @@ -import os -from pathlib import Path -from rss_feed_manager import RssFeedManager -import yaml -from models.utils import pwd -import logging - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -logger.propagate = False - -# create console handler -consoleHandler = logging.StreamHandler() -consoleHandler.setFormatter(logging.Formatter(fmt='%(asctime)s - %(message)s')) - -class Layout: - def __init__(self, file_path="configs/layout.yml"): - self.file_path = os.path.join(pwd, file_path) - self.mtime = os.path.getmtime(self.file_path) - self.feed_manager = RssFeedManager(self, cache_dir='cache') - self.reload() - - @property - def tabs(self): - return self.contents['tabs'] - - @property - def headers(self): - return self.contents['headers'] - - def feed(self, feed_name): - return self.feed_manager.find(feed_name) - - def columns(self, tab_name): - current_tab = self.tabs[0] if tab_name is None else next((tab for tab in self.tabs if tab['name'].lower() == tab_name.lower()), self.tabs[0]) - return current_tab['columns'] - - def save_articles(self): - self.feed_manager.save_articles() - - def reload(self): - logger.debug("Reloading layout") - with open(self.file_path, 'r') as file: - self.contents = yaml.safe_load(file) - self.mtime = os.path.getmtime(self.file_path) - - feed_widgets = [] - for tab in self.tabs: - for column in tab['columns']: - for widget in column['widgets']: - if widget['type'] == 'feed': - feed_widgets.append(widget) - - logger.debug('Initializing feed manager with {} feeds'.format(len(feed_widgets))) - self.feed_manager.initialize(feed_widgets) - - for tab in self.tabs: - for column in tab['columns']: - if not column['widgets']: - next - for widget in column['widgets']: - widget['summary_enabled'] = widget.get('summary_enabled', True) - match widget['type']: - case 'bookmarks': - widget['articles'] = [{'title': entry['title'], 'link': entry['url']} for entry in widget['bookmarks']] - case 'feed': - widget['hx-get'] = '/rss/' + widget['name'] - self.feed_manager.load(widget) - case 'docker_containers': - widget['hx-get'] = '/docker_containers' - widget['template'] = 'docker_containers.html' - case _: - if (template_path := Path('templates', f'{widget["type"]}.html')).exists(): - widget['template'] = template_path.name - - logger.debug("========== Layout reloaded") - - def is_modified(self): - result = os.path.getmtime(self.file_path) > self.mtime - logger.debug("========== Layout modified: " + str(result)) - return result - - def current_tab(self, tab_name): - current_tab = self.tabs[0] if tab_name is None else next((tab for tab in self.tabs if tab['name'].lower() == tab_name.lower()), self.tabs[0]) - return current_tab - -layout = Layout() \ No newline at end of file diff --git a/app/archive/post_processor.py b/app/archive/post_processor.py deleted file mode 100644 index f5cd895..0000000 --- a/app/archive/post_processor.py +++ /dev/null @@ -1,82 +0,0 @@ -import html -import importlib -import re -import os - -from bs4 import BeautifulSoup - -class NoOpClass: - def process(self, data): - return data - -class PostProcessor: - def __init__(self): - self.loaded_classes = {} - self.pwd = os.path.dirname(os.path.abspath(__file__)) - - def to_snake_case(self, input_string): - # Replace non-alphanumeric characters and apostrophes with spaces and split the string into words - words = re.findall(r"[a-zA-Z0-9]+(?:'[a-zA-Z0-9]+)?", input_string) - - # Remove apostrophes from the words - words = [word.replace("'", "") for word in words] - - # Convert words to lowercase and join them with underscores - snake_case_string = '_'.join(word.lower() for word in words) - - return snake_case_string - - def process(self, widget): - # if 'processed' in widget and widget['processed'] and not bool(os.environ.get('FLASK_DEBUG')): - # print (f"Widget {widget['name']} already processed.") - # return widget - - self.normalize(widget) - - # Check if the class has already been loaded - class_name = self.to_snake_case(widget['name']) - if class_name in self.loaded_classes: - instance = self.loaded_classes[class_name] - else: - # Construct file path to the "processors" subdirectory - file_path = os.path.join(self.pwd, "processors", class_name + ".py") - if os.path.exists(file_path): - module = importlib.import_module(f"processors.{class_name}") - cls = getattr(module, ''.join(word.title() for word in class_name.split('_'))) - instance = cls() - else: - instance = NoOpClass() - - self.loaded_classes[class_name] = instance - - # Call process() method of the instance with the provided data - widget = instance.process(widget) - widget['processed'] = True - return widget - - def normalize(self, widget): - for article in widget['articles']: - article['title'] = article['original_title'].strip() - article['title'] = re.sub(r'\s+', ' ', article['title']) - - if not article['original_summary']: - continue - else: - article['summary'] = article['original_summary'] - - article['summary'] = article['summary'].replace('\n', ' ').replace('\r', ' ').strip() - article['summary'] = BeautifulSoup(html.unescape(article['summary']), 'lxml').text - # strip [...] from the end of the summary - article['summary'] = re.sub(r'\[[\.+|…\]].*$', '', article['summary']) - - if article['summary'] == article['title']: - article['summary'] = None - elif (article['title'] in article['summary'] and len(article['title'])/len(article['summary']) > 0.64): - article['title'] = article['summary'] - article['summary'] = None - elif (article['summary'] in article['title']): - article['summary'] = article['title'] - article['title'] = None - -# Instantiate loader when the module is imported -post_processor = PostProcessor() diff --git a/app/archive/rss.py b/app/archive/rss.py deleted file mode 100644 index e3ddecb..0000000 --- a/app/archive/rss.py +++ /dev/null @@ -1,72 +0,0 @@ -import os -import time -import aiohttp -from cachelib import FileSystemCache -import feedparser -import requests - -from post_processor import post_processor -from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning -import warnings - -warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) - -class Rss: - def __init__(self): - cache_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'feed_cache') - if not os.path.exists(cache_dir): - os.makedirs(cache_dir) - self.feed_cache = FileSystemCache(cache_dir, default_timeout=60*15) - - async def load_feed(self, widget): - start_time = time.time() - - cached_widget = self.feed_cache.get(widget['name']) - - # check if feed is in self.feeds and that the last updated time is less than 15 minutes ago - if cached_widget and 'last_updated' in cached_widget and (start_time - cached_widget['last_updated']) < 60 * 15: - widget['articles'] = cached_widget['articles'] - #print(f"Loaded {widget['name']} from cache") - else: - headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'} - async with aiohttp.ClientSession() as session: - async with session.get(widget['url'], allow_redirects=True, headers=headers) as response: - if(response.status != 200): - print(f"Failed to load {widget['name']} from {widget['url']} with Status Code: {response.status} Response follows:") - print(await response.text()) - else: - #print(f"Loaded {widget['name']} with Status Code: {response.status}") - article_limit = widget.get('article_limit', 10) - parsed_feed = feedparser.parse(await response.text()) - - widget['articles'] = [{ - 'original_title': entry.get('title', 'No Title').strip(), - 'link': entry.link, - 'original_summary': entry.get('summary', None) - } for entry in parsed_feed.entries[:article_limit]] if 'entries' in parsed_feed else [] - - widget['last_updated'] = start_time - self.feed_cache.set(widget['name'], widget) - - post_processor.process(widget) - self.feed_cache.set(widget['name'], widget) - - return widget - - def find_feed_links(self, url): - response = requests.get(url) - - if response.status_code == 200: - soup = BeautifulSoup(response.content, 'html.parser') - links = soup.find_all('link', type=["application/rss+xml", "application/atom+xml"]) - - feed_links = [] - for link in links: - feed_links.append(link.get('href')) - - return feed_links - else: - print(f"Failed to retrieve content from {url}") - return None - -rss = Rss() diff --git a/app/archive/rss_feed.py b/app/archive/rss_feed.py deleted file mode 100644 index d5a2e87..0000000 --- a/app/archive/rss_feed.py +++ /dev/null @@ -1,184 +0,0 @@ - -from itertools import islice -import feedparser -import json -import os -import re -from dataclasses import dataclass -from datetime import datetime -from email.utils import formatdate -from pathlib import Path -from typing import List -from models.feed_article import FeedArticle - -@dataclass -class RssFeed: - widget: dict - data_dir: Path - - - def __post_init__(self): - self.title = self.widget['name'] - self.summary_enabled = self.widget.get('summary_enabled', True) - self.feed_url = self.widget['url'] - self._articles = [] - self._last_updated = None - - default_article_display_limit = int(os.environ.get("ONBOARD_DEFAULT_ARTICLE_DISPLAY_LIMIT", 10)) - self.display_limit = self.widget.get('display_limit', default_article_display_limit) - - self.update() - - - @property - def articles(self) -> List[FeedArticle]: - return self._articles - - @articles.setter - def articles(self, articles: List[FeedArticle]): - # filter articles - filtered_articles = self.apply_filters(articles) - # limit articles to display_limit - self._articles = [article for article in islice(filtered_articles, self.display_limit)] - - def __getattr__(self, key): - if key in self.widget: - return self.widget[key] - else: - return None - - def get(self, key, default=None): - return self.widget.get(key, default) - - @property - def json_file(self): - filename = f"{self.to_snake_case(self.title)}.json" - json_file = os.path.join(self.data_dir, filename) - return Path(json_file) - - @property - def loaded(self): - return len(self.articles) > 0 - - def updated_recently(self): - return (datetime.now() - self._last_updated).total_seconds() <= 60 * 45 - - def save(self): - self.save_articles(self.articles) - - @staticmethod - def load_articles(filename: Path) -> list[FeedArticle]: - articles =[] - if filename.exists(): - with open(filename, 'r') as f: - json_articles = json.load(f)['articles'] - - for article in json_articles: - articles.append( - FeedArticle( - original_title = article['title'], - link = article['link'], - description = article['description'], - pub_date = article['pub_date'] - ) - ) - - return articles - - def apply_filters(self, articles: list[FeedArticle]) -> list[FeedArticle]: - # using article.id remove duplicates from articles - articles = list(dict((article.id, article) for article in articles).values()) - - if 'filters' in self.widget: - for article in articles[:]: - for filter_type in self.widget['filters']: - for filter in self.widget['filters'][filter_type]: - for attribute in filter: - filter_text = filter[attribute] - if not hasattr(article, attribute): - next - match filter_type: - case 'remove': - if re.search(filter_text, getattr(article, attribute), re.IGNORECASE): - articles.remove(article) - case 'strip': - pattern = re.compile(filter_text) - result = re.sub(pattern, '', getattr(article, attribute)) - setattr(article, attribute, result) - case _: - pass - - # sort articles in place by pub_date newest to oldest - articles.sort(key=lambda a: a.pub_date, reverse=True) - - return articles - - @staticmethod - def download(feed_url: str) -> list[FeedArticle]: - articles = [] - feed = feedparser.parse(feed_url) - for entry in feed.entries: - articles.append( - FeedArticle( - original_title = entry.title, - link = entry.link, - description = entry.description, - pub_date = entry.get('published', entry.get('updated', formatdate())) - ) - ) - - return articles - - - def update(self): - if len(self.articles) == 0 and self.json_file.exists(): - self.articles = self.load_articles(self.json_file) - self._last_updated = datetime.fromtimestamp(os.path.getmtime(self.json_file)) - - if len(self.articles) == 0 or not self.updated_recently(): - downloaded_articles = self.download(self.feed_url) - self.articles += downloaded_articles - self._last_updated = datetime.now() - self.save_articles(downloaded_articles) - print(f"[{datetime.now()}] Updated {self.title}") - - else: - print(f"[{datetime.now()}] Not updating {self.title}") - - def save_articles(self, articles: list[FeedArticle]): - # load all existing articles from the json file, and add the new ones - # then apply the filters - all_articles = self.load_articles(self.json_file) + articles - all_articles = self.apply_filters(all_articles) - - data = { - 'title': self.title, - 'link': self.link, - 'articles': [ - { - 'original_title': article.original_title, - 'title': article.title, - 'link': article.link, - 'description': article.description, - 'pub_date': article.pub_date, - 'id': article.id - } for article in all_articles - ] - } - with open(self.json_file, 'w') as f: - json.dump(data, f, indent=2) - - print(f"[{datetime.now()}] Saved {len(all_articles)} articles to {str(self.json_file.resolve())}") - - @staticmethod - def to_snake_case(input_string): - # Replace non-alphanumeric characters and apostrophes with spaces and split the string into words - words = re.findall(r"[a-zA-Z0-9]+(?:'[a-zA-Z0-9]+)?", input_string) - - # Remove apostrophes from the words - words = [word.replace("'", "") for word in words] - - # Convert words to lowercase and join them with underscores - snake_case_string = '_'.join(word.lower() for word in words) - - return snake_case_string \ No newline at end of file diff --git a/app/archive/rss_feed_manager.py b/app/archive/rss_feed_manager.py deleted file mode 100644 index 8a074ab..0000000 --- a/app/archive/rss_feed_manager.py +++ /dev/null @@ -1,68 +0,0 @@ -import importlib -from datetime import datetime -from rss_feed import RssFeed -from pathlib import Path -from apscheduler.schedulers.background import BackgroundScheduler - -class NoOpClass: - def process(self, data): - return data - -class RssFeedManager: - def __init__(self, layout, cache_dir: str = 'data'): - self.feeds = {} - self.layout = layout - self.data_dir = Path(cache_dir) - self.data_dir.mkdir(parents=True, exist_ok=True) - self.scheduler = BackgroundScheduler() - - def initialize(self, feed_widgets: list): - self.scheduler.remove_all_jobs() - - for widget in feed_widgets: - feed = self.load(widget) - self.scheduler.add_job(feed.update, 'cron', hour='*', jitter=20) - - if not self.scheduler.running: - print('Starting scheduler...') - self.scheduler.start() - - print('feed_manager initialized.') - - def find(self, widget_name: str) -> RssFeed: - if widget_name in self.feeds: - return self.feeds[widget_name] - else: - return None - - def save_articles(self): - for feed in self.feeds.values(): - feed.save() - - def load(self, widget: dict) -> RssFeed: - # check if widget is dict - if isinstance(widget, dict): - widget_name = widget['name'] - else: - widget_name = widget - widget = self.layout.widget(widget_name) - - if widget_name in self.feeds: - return self.feeds[widget_name] - else: - self.feeds[widget['name']] = RssFeed(widget, self.data_dir) - return self.feeds[widget['name']] - - def process(self): - if 'processor' in self.widget: - for processor in self.widget['processor']: - processor_name = processor['name'] - processor_path = Path(os.path.join("app","processors", processor_name + ".py")) - if processor_path.exists(): - module = importlib.import_module(f"processors.{processor_name}") - processor_class = getattr(module, ''.join(word.title() for word in processor_name.split('_'))) - processor_instance = processor_class() - else: - processor_instance = NoOpClass() - - articles = processor_instance.process(articles) diff --git a/app/models/layout.py b/app/models/layout.py index 7181ed3..9f5b8b5 100644 --- a/app/models/layout.py +++ b/app/models/layout.py @@ -1,5 +1,9 @@ +from asyncio import tasks +import asyncio +import json import logging import os +from services.favicon_finder import FaviconFinder import yaml from models.bookmark import Bookmark from models.row import Row @@ -14,112 +18,130 @@ class Layout: - id: str = 'layout' - headers: list[Bookmark] = [] - tabs: list[Tab] = [] - - def __init__(self, config_file: str = "configs/layout.yml"): - self.config_path = pwd.joinpath(config_file) - self.reload() - - - def stop_scheduler(self): - Scheduler.shutdown() - - - def is_modified(self): - modified = self.mtime > self.last_reload - logger.info(f"Layout modified?: {modified}") - return modified - - - @property - def mtime(self): - return os.path.getmtime(self.config_path) - - - def reload(self): - logger.debug("Beginning Layout reload...") - Scheduler.clear_jobs() - - with open(self.config_path, 'r') as file: - content = yaml.safe_load(file) - self.tabs = from_list(Tab.from_dict, content.get('tabs', [])) - self.headers = from_list(Bookmark.from_dict, content.get('headers', []), self) - - self.last_reload = self.mtime - self.feed_hash = {} - logger.debug("Completed Layout reload!") - - def tab(self, name: str) -> Tab: - if name is None: - return self.tabs[0] - - return next((tab for tab in self.tabs if tab.name.lower() == name.lower()), self.tabs[0]) - - - def get_feeds(self, columns: Column) -> list[Feed]: - feeds = [] - if columns.rows: - for row in columns.rows: - for column in row.columns: - feeds += self.process_rows(column) - - for widget in columns.widgets: - if widget.type == 'feed': - feeds.append(widget) - - return feeds - - - def get_feed(self, feed_id: str) -> Feed: - if not self.feed_hash: - feeds = [] - for tab in self.tabs: - for row in tab.rows: - for column in row.columns: - feeds += self.get_feeds(column) - - for feed in feeds: - self.feed_hash[feed.id] = feed - - return self.feed_hash[feed_id] - - def refresh_feeds(self, feed_id: str): - feed = self.get_feed(feed_id) - feed.refresh() - - def find_link(self, row: Row, widget_id: str, link_id: str) -> str: - for column in row.columns: - if column.rows: - for row in column.rows: - link = self.find_link(column, widget_id, link_id) - if link: - return link - else: - for widget in column.widgets: - if widget.id == widget_id: - for item in widget: - if item.id == link_id: - return item.link - - return None - - - # TODO: Brute force is best force - def get_link(self, feed_id: str, link_id: str): - if feed_id == self.id: - for header in self.headers: - if header.id == link_id: - return header.link - - for tab in self.tabs: - for row in tab.rows: - link = self.find_link(row, feed_id, link_id) - if link: - return link - - return None - - -layout = Layout() \ No newline at end of file + id: str = 'layout' + headers: list[Bookmark] = [] + tabs: list[Tab] = [] + bookmark_bar: list[dict] = [] + + def __init__(self, config_file: str = "configs/layout.yml"): + self.config_path = pwd.joinpath(config_file) + self.favicon_finder = FaviconFinder() + self.reload() + + def load_bookmarks(self): + try: + with open(pwd.joinpath('configs/bookmarks.json'), 'r', encoding='utf-8') as f: + return json.load(f) + except Exception as ex: + logger.error(f"Error: {ex} loading bookmarks") + return None + + def stop_scheduler(self): + Scheduler.shutdown() + + def is_modified(self): + modified = self.mtime > self.last_reload + logger.info(f"Layout modified?: {modified}") + return modified + + @property + def mtime(self): + return os.path.getmtime(self.config_path) + + def bookmark_iterator(self, bookmarks, urls=[]): + for bookmark in bookmarks: + if 'contents' in bookmark: + self.bookmark_iterator(bookmark['contents'], urls) + elif 'href' in bookmark: + urls.append(bookmark['href']) + return urls + + def reload(self): + logger.debug("Beginning Layout reload...") + Scheduler.clear_jobs() + + with open(self.config_path, 'r') as file: + content = yaml.safe_load(file) + self.tabs = from_list(Tab.from_dict, content.get('tabs', [])) + self.headers = from_list(Bookmark.from_dict, content.get('headers', []), self) + + self.last_reload = self.mtime + self.feed_hash = {} + + self.bookmark_bar = self.load_bookmarks() + + bookmarks = self.bookmark_iterator(self.bookmark_bar) + + print(f"================= Found {len(bookmarks)} bookmarks") + + self.favicon_finder.fetch_from_iterator(bookmarks) + + logger.debug("Completed Layout reload!") + + def tab(self, name: str) -> Tab: + if name is None: + return self.tabs[0] + + return next((tab for tab in self.tabs if tab.name.lower() == name.lower()), self.tabs[0]) + + def get_feeds(self, columns: Column) -> list[Feed]: + feeds = [] + if columns.rows: + for row in columns.rows: + for column in row.columns: + feeds += self.process_rows(column) + + for widget in columns.widgets: + if widget.type == 'feed': + feeds.append(widget) + + return feeds + + def get_feed(self, feed_id: str) -> Feed: + if not self.feed_hash: + feeds = [] + for tab in self.tabs: + for row in tab.rows: + for column in row.columns: + feeds += self.get_feeds(column) + + for feed in feeds: + self.feed_hash[feed.id] = feed + + return self.feed_hash[feed_id] + + def refresh_feeds(self, feed_id: str): + feed = self.get_feed(feed_id) + feed.refresh() + + def find_link(self, row: Row, widget_id: str, link_id: str) -> str: + for column in row.columns: + if column.rows: + for row in column.rows: + link = self.find_link(column, widget_id, link_id) + if link: + return link + else: + for widget in column.widgets: + if widget.id == widget_id: + for item in widget: + if item.id == link_id: + return item.link + + return None + + # TODO: Brute force is best force + + def get_link(self, feed_id: str, link_id: str): + if feed_id == self.id: + for header in self.headers: + if header.id == link_id: + return header.link + + for tab in self.tabs: + for row in tab.rows: + link = self.find_link(row, feed_id, link_id) + if link: + return link + + return None diff --git a/app/models/scheduler.py b/app/models/scheduler.py index 004fce8..a4d57ea 100644 --- a/app/models/scheduler.py +++ b/app/models/scheduler.py @@ -3,32 +3,42 @@ import logging + class Scheduler: - __scheduler = None - - @staticmethod - def shutdown(): - scheduler = Scheduler.getScheduler() - if scheduler and scheduler.running: - scheduler.shutdown() - - @staticmethod - def clear_jobs(): - Scheduler.getScheduler().remove_all_jobs() - - @staticmethod - def start(): - Scheduler.__scheduler.start() - - @staticmethod - def getScheduler() -> BackgroundScheduler: - if Scheduler.__scheduler == None: - Scheduler.__scheduler = BackgroundScheduler() - - if bool(os.environ.get("FLASK_ENV", "development") == "development"): - if bool(os.environ.get('WERKZEUG_RUN_MAIN')): - Scheduler.start() - elif not Scheduler.__scheduler.running: - Scheduler.start() - - return Scheduler.__scheduler + __scheduler = None + + @staticmethod + def shutdown(): + scheduler = Scheduler.getScheduler() + if scheduler and scheduler.running: + scheduler.shutdown() + + @staticmethod + def clear_jobs(): + Scheduler.getScheduler().remove_all_jobs() + + @staticmethod + def start(): + Scheduler.__scheduler.start() + + @staticmethod + def getScheduler() -> BackgroundScheduler: + if Scheduler.__scheduler == None: + Scheduler.__scheduler = BackgroundScheduler({ + 'apscheduler.executors.default': { + 'class': 'apscheduler.executors.pool:ThreadPoolExecutor', + 'max_workers': '20' + }, + 'apscheduler.executors.processpool': { + 'type': 'processpool', + 'max_workers': '12' + } + }) + + if bool(os.environ.get("FLASK_ENV", "development") == "development"): + if bool(os.environ.get('WERKZEUG_RUN_MAIN')): + Scheduler.start() + elif not Scheduler.__scheduler.running: + Scheduler.start() + + return Scheduler.__scheduler diff --git a/app/models/utils.py b/app/models/utils.py index baa71fb..89c0212 100644 --- a/app/models/utils.py +++ b/app/models/utils.py @@ -1,6 +1,5 @@ import base64 import hashlib -import logging import os from pathlib import Path import re @@ -11,68 +10,71 @@ pwd = Path(os.path.dirname(os.path.realpath(__file__))).parent + def from_str(x: Any) -> str: - assert isinstance(x, str) - return x + assert isinstance(x, str) + return x def from_list(f: Callable[[Any], T], x: Any, parent: Any = None) -> List[T]: - assert isinstance(x, list) - if parent is None: - return [f(y) for y in x] - else: - return [f(y, parent) for y in x] - + assert isinstance(x, list) + if parent is None: + return [f(y) for y in x] + else: + return [f(y, parent) for y in x] def from_none(x: Any) -> Any: - assert x is None - return x + assert x is None + return x def from_union(fs, x): - for f in fs: - try: - return f(x) - except: - pass - assert False + for f in fs: + try: + return f(x) + except: + pass + assert False def to_class(c: Type[T], x: Any) -> dict: - assert isinstance(x, c) - return cast(Any, x).to_dict() + assert isinstance(x, c) + return cast(Any, x).to_dict() def from_bool(x: Any) -> bool: - assert isinstance(x, bool) - return x + assert isinstance(x, bool) + return x def from_int(x: Any) -> int: - assert isinstance(x, int) and not isinstance(x, bool) - return x + assert isinstance(x, int) and not isinstance(x, bool) + return x + def normalize_text(text: str) -> str: - text = unidecode.unidecode(text) - text = re.sub(r'“|”', '"', text) - text = re.sub(r'’|‘', "'", text) - return re.sub(r'\s+|\n|\r', ' ',text).strip() + text = unidecode.unidecode(text) + text = re.sub(r'“|”', '"', text) + text = re.sub(r'’|‘', "'", text) + return re.sub(r'\s+|\n|\r', ' ', text).strip() + def calculate_sha1_hash(value: str) -> str: - sha1 = hashlib.sha1() - sha1.update(value.encode('utf-8')) - hash = base64.urlsafe_b64encode(sha1.digest()).decode('ascii') - return ''.join(filter(str.isalnum, hash)) + sha1 = hashlib.sha1() + sha1.update(value.encode('utf-8')) + hash = base64.urlsafe_b64encode(sha1.digest()).decode('ascii') + return ''.join(filter(str.isalnum, hash)) + def to_snake_case(input_string): - # Replace non-alphanumeric characters and apostrophes with spaces and split the string into words - words = re.findall(r"[a-zA-Z0-9]+(?:'[a-zA-Z0-9]+)?", input_string) + # Replace non-alphanumeric characters and apostrophes with spaces and split the string into words + words = re.findall(r"[a-zA-Z0-9]+(?:'[a-zA-Z0-9]+)?", input_string) - # Remove apostrophes from the words - words = [word.replace("'", "") for word in words] + # Remove apostrophes from the words + words = [word.replace("'", "") for word in words] - # Convert words to lowercase and join them with underscores - snake_case_string = '_'.join(word.lower() for word in words) + # Convert words to lowercase and join them with underscores + snake_case_string = '_'.join(word.lower() for word in words) - return snake_case_string + return snake_case_string diff --git a/app/models/widget.py b/app/models/widget.py index 70a9f8a..8cbb5ac 100644 --- a/app/models/widget.py +++ b/app/models/widget.py @@ -1,7 +1,5 @@ from datetime import datetime import logging -import os -from pathlib import Path from models.scheduler import Scheduler from models.exceptions import IDException from models.utils import calculate_sha1_hash, pwd @@ -9,103 +7,103 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) + class Widget: - widget: dict - display_limit: int = None - template: str = 'widget.html' - link: str = None - id: str = None - - def __init__(self, widget): - self.widget = widget - - self.display_limit = widget.get('display_limit', None) - - template_path = pwd.joinpath('templates', self.__class__.__name__.lower() + '.html') - if template_path.exists(): - self.template = template_path.name - - if not self.id: - if 'link' in self.widget: - id = self.widget['link'] - elif 'name' in self.widget: - id = self.widget['name'] - else: - raise IDException("No ID found for widget") - self.id = calculate_sha1_hash(id) - - @property - def loaded(self): - return self.items and len(self.items) > 0 - - @property - def scheduler(self): - return Scheduler.getScheduler() - - @property - def last_updated(self): - return self._last_updated or None - - @property - def items(self): - return self._items or [] - - @items.setter - def items(self, items): - self._items = items - self._last_updated = datetime.now() - - def __iter__(self): - for item in self.items: - yield item - - @property - def display_items(self): - if self.display_limit: - for item in self.items[:self.display_limit]: - yield item - else: - for item in self.items: - yield item - - @property - def name(self): - return self.widget.get('name', '') - - @property - def type(self): - return self.widget.get('type', '') - - @property - def link(self): - return self.widget.get('link', '') - - @property - def display_header(self): - return self.widget.get('display_header', True) - - def hasattr(self, name): - return hasattr(self, name) or name in self.widget - - def get(self, key, default=None): - if hasattr(self, key): - return getattr(self, key) or default - - return self.widget.get(key, default) - - @staticmethod - def from_dict(widget: dict) -> 'Widget': - from models.bookmarks import Bookmarks - from models.iframe import Iframe - from models.feed import Feed - - match widget['type']: - case 'feed': - return Feed(widget) - case 'bookmarks': - return Bookmarks(widget) - case 'iframe': - return Iframe(widget) - case _: - return Widget(widget) - \ No newline at end of file + widget: dict + display_limit: int = None + template: str = 'widget.html' + link: str = None + id: str = None + + def __init__(self, widget): + self.widget = widget + + self.display_limit = widget.get('display_limit', None) + + template_path = pwd.joinpath('templates', self.__class__.__name__.lower() + '.html') + if template_path.exists(): + self.template = template_path.name + + if not self.id: + if 'link' in self.widget: + id = self.widget['link'] + elif 'name' in self.widget: + id = self.widget['name'] + else: + raise IDException("No ID found for widget") + self.id = calculate_sha1_hash(id) + + @property + def loaded(self): + return self.items and len(self.items) > 0 + + @property + def scheduler(self): + return Scheduler.getScheduler() + + @property + def last_updated(self): + return self._last_updated or None + + @property + def items(self): + return self._items or [] + + @items.setter + def items(self, items): + self._items = items + self._last_updated = datetime.now() + + def __iter__(self): + for item in self.items: + yield item + + @property + def display_items(self): + if self.display_limit: + for item in self.items[:self.display_limit]: + yield item + else: + for item in self.items: + yield item + + @property + def name(self): + return self.widget.get('name', '') + + @property + def type(self): + return self.widget.get('type', '') + + @property + def link(self): + return self.widget.get('link', '') + + @property + def display_header(self): + return self.widget.get('display_header', True) + + def hasattr(self, name): + return hasattr(self, name) or name in self.widget + + def get(self, key, default=None): + if hasattr(self, key): + return getattr(self, key) or default + + return self.widget.get(key, default) + + @staticmethod + def from_dict(widget: dict) -> 'Widget': + from models.bookmarks import Bookmarks + from models.iframe import Iframe + from models.feed import Feed + + match widget['type']: + case 'feed': + return Feed(widget) + case 'bookmarks': + return Bookmarks(widget) + case 'iframe': + return Iframe(widget) + case _: + return Widget(widget) diff --git a/app/archive/docker_app.py b/app/services/docker_app.py similarity index 100% rename from app/archive/docker_app.py rename to app/services/docker_app.py diff --git a/app/services/favicon_finder.py b/app/services/favicon_finder.py new file mode 100644 index 0000000..51fb76e --- /dev/null +++ b/app/services/favicon_finder.py @@ -0,0 +1,93 @@ +import logging +import os +import requests +from urllib.parse import urljoin, urlparse +from bs4 import BeautifulSoup +from models.utils import pwd +from models.scheduler import Scheduler + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +class FaviconFinder: + def __init__(self, cache_dir='static/assets/icons'): + self.full_cache_path = pwd.joinpath(cache_dir) + self.full_cache_path.mkdir(parents=True, exist_ok=True) + self.relative_cache_path = f"/{cache_dir}" + + def favicon_exists(self, url): + if not url: + return False + favicon_filename = self.get_favicon_filename(url) + favicon_path = os.path.join(self.full_cache_path, favicon_filename) + return os.path.exists(favicon_path) + + def favicon_relative_path(self, url): + return f"{self.relative_cache_path}/{self.get_favicon_filename(url)}" + + def get_favicon_filename(self, url): + domain_parts = urlparse(url).netloc.split('.')[-2:] + return '.'.join(domain_parts) + '.favicon.ico' + + @property + def scheduler(self): + return Scheduler.getScheduler() + + def fetch_from_iterator(self, urls): + for url in urls: + self.scheduler.add_job(self._get_favicon, args=[url], misfire_grace_time=None, executor='processpool') + + def _get_favicon(self, url): + favicon_filename = self.get_favicon_filename(url) + favicon_path = os.path.join(self.full_cache_path, favicon_filename) + + if not os.path.exists(favicon_path): + icon_url = self.find_favicon_url(url) + + if not icon_url: + # If favicon URL is not found for the original URL, try the base URL + base_url = self.get_base(url) + icon_url = self.find_favicon_url(base_url) + + if icon_url: + self.download_favicon(url, icon_url) + else: + logger.warn(f'Favicon not found for {url} or {self.get_base(url)}') + + def get_base(self, url): + parsed_url = urlparse(url) + base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" + return base_url + + def find_favicon_url(self, url): + try: + response = requests.get(url) + if response.status_code == 200: + soup = BeautifulSoup(response.text, 'html.parser') + icon_link = soup.find('link', rel=['icon', 'shortcut icon']) + if icon_link: + icon_url = icon_link['href'] + if not icon_url.startswith('http'): + icon_url = urljoin(url, icon_url) + return icon_url + else: + return None + else: + return None + except requests.exceptions.RequestException: + return None + + def download_favicon(self, url, icon_url): + try: + response = requests.get(icon_url) + if response.status_code == 200: + favicon_filename = self.get_favicon_filename(url) + favicon_path = os.path.join(self.full_cache_path, favicon_filename) + with open(favicon_path, 'wb') as file: + file.write(response.content) + logger.debug(f'Favicon for {self.get_base(url)} downloaded and saved as {favicon_path}') + else: + logger.warn(f'Failed to download the favicon for {self.get_base(url)}') + except requests.exceptions.RequestException as ex: + logger.error(f'An error occurred while downloading the favicon for {self.get_base(url)}', ex) diff --git a/app/static/css/bookmark_bar.css b/app/static/css/bookmark_bar.css index 71981ff..b9ac04a 100644 --- a/app/static/css/bookmark_bar.css +++ b/app/static/css/bookmark_bar.css @@ -5,7 +5,7 @@ #bookmarkBar { background-color: #2c2c2c; width: 100%; - position: fixed; + position: absolute; top: 0; left: 0; z-index: 1000; @@ -26,6 +26,7 @@ border-bottom: 0; margin: 0; padding: 4px 0px; + height: var(--bookmark-bar-height); } #bookmarkBar a { @@ -40,8 +41,21 @@ margin-right: 5px; } +#bookmarkBar li:hover > a { + + background-color: #525252; +} + #bookmarkBar a .fa-folder { color: #eece00; + padding-right: 2px; +} + +#bookmarkBar a img { + max-width: 12px; + max-height: 12px; + padding-right: 2px; + vertical-align: text-bottom; } #bookmarkBar ul ul { @@ -65,4 +79,5 @@ #bookmarkBar li:hover > ul { display: block; + background-color: #2c2c2c; } \ No newline at end of file diff --git a/app/static/css/layout.css b/app/static/css/layout.css index f1951ee..4eff833 100644 --- a/app/static/css/layout.css +++ b/app/static/css/layout.css @@ -22,7 +22,7 @@ body { display: flex; flex: 1 0 auto; flex-direction: column; - min-height: calc(100vh - var(--bookmark-bar-height)); /* Reference the variable */ + min-height: calc(100vh); /* Reference the variable */ } /* Row */ diff --git a/app/templates/bookmark_bar.html b/app/templates/bookmark_bar.html index 799f55c..5625db7 100644 --- a/app/templates/bookmark_bar.html +++ b/app/templates/bookmark_bar.html @@ -1,22 +1,25 @@ {% if bookmarks %} -{% endif %} +{% endif %} \ No newline at end of file diff --git a/app/templates/index.html b/app/templates/index.html index 435fbc1..1318eca 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -32,7 +32,9 @@ {% include('flash_message.html') %}
- {% include 'bookmark_bar.html' ignore missing with context %} + {% with bookmarks = layout.bookmark_bar %} + {% include 'bookmark_bar.html' ignore missing with context %} + {% endwith %}
diff --git a/notebooks/bookmark-processing-1.ipynb b/notebooks/bookmark-processing-1.ipynb index d879168..aff2fd6 100644 --- a/notebooks/bookmark-processing-1.ipynb +++ b/notebooks/bookmark-processing-1.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 158, + "execution_count": 11, "metadata": { "metadata": {} }, @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 162, + "execution_count": 12, "metadata": { "metadata": {} }, @@ -30,7 +30,22 @@ }, { "cell_type": "code", - "execution_count": 160, + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "from urllib.parse import urlparse\n", + "\n", + "def get_favicon_filename(url):\n", + " parsed_url = urlparse(url)\n", + " domain_parts = parsed_url.netloc.split('.')[-2:]\n", + " favicon_filename = '.'.join(domain_parts) + '.favicon.ico'\n", + " return f'/static/assets/icons/{favicon_filename}'" + ] + }, + { + "cell_type": "code", + "execution_count": 14, "metadata": { "metadata": {} }, @@ -40,13 +55,14 @@ " return {\n", " 'name': a_tag.text,\n", " 'href': a_tag['href'],\n", + " 'favicon': get_favicon_filename(a_tag['href']),\n", " 'add_date': a_tag['add_date']\n", " }" ] }, { "cell_type": "code", - "execution_count": 163, + "execution_count": 15, "metadata": { "metadata": {} },