diff --git a/.dockerignore b/.dockerignore index e7756ec..1580697 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1 +1,2 @@ -app/config \ No newline at end of file +app/config +app/archive \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 9b781f3..8a50cf8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -14,6 +14,7 @@ "Instapundit", "JSONIFY", "langchain", + "lastrun", "llms", "mikestead", "Monero", diff --git a/app/app.py b/app/app.py index 56b217a..9ee5f03 100644 --- a/app/app.py +++ b/app/app.py @@ -12,7 +12,18 @@ from services.link_tracker import link_tracker from utils import copy_default_to_configs +logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.WARN) + logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +logger.propagate = False + +# create console handler +consoleHandler = logging.StreamHandler() +consoleHandler.setFormatter(logging.Formatter(fmt='%(asctime)s - %(message)s')) + +# Add console handler to logger +logger.addHandler(consoleHandler) copy_default_to_configs() @@ -105,7 +116,7 @@ def track(feed_id, link_id): development = bool(os.environ.get("FLASK_ENV", "development") == "development") if development: app.run(port=port, debug=bool(os.environ.get("FLASK_DEBUG", "True"))) - print ("Shutting down...") + logger.info("Shutting down...") layout.stop_scheduler() sys.exit() else: @@ -116,7 +127,7 @@ def track(feed_id, link_id): shutdown_event = asyncio.Event() def _signal_handler(*_: Any) -> None: - print ("Shutting down...") + logger.info("Shutting down...") layout.stop_scheduler() shutdown_event.set() @@ -131,7 +142,7 @@ def _signal_handler(*_: Any) -> None: serve(app, config, shutdown_trigger=shutdown_event.wait) ) except KeyboardInterrupt: - print ("\nShutting down...") + logger.info("\nShutting down...") layout.stop_scheduler() sys.exit() \ No newline at end of file diff --git a/app/archive/layout.py b/app/archive/layout.py index 90e4458..c626cfb 100644 --- a/app/archive/layout.py +++ b/app/archive/layout.py @@ -3,6 +3,15 @@ from rss_feed_manager import RssFeedManager import yaml from models.utils import pwd +import logging + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +logger.propagate = False + +# create console handler +consoleHandler = logging.StreamHandler() +consoleHandler.setFormatter(logging.Formatter(fmt='%(asctime)s - %(message)s')) class Layout: def __init__(self, file_path="configs/layout.yml"): @@ -30,7 +39,7 @@ def save_articles(self): self.feed_manager.save_articles() def reload(self): - print("Reloading layout") + logger.debug("Reloading layout") with open(self.file_path, 'r') as file: self.contents = yaml.safe_load(file) self.mtime = os.path.getmtime(self.file_path) @@ -42,7 +51,7 @@ def reload(self): if widget['type'] == 'feed': feed_widgets.append(widget) - print('Initializing feed manager with {} feeds'.format(len(feed_widgets))) + logger.debug('Initializing feed manager with {} feeds'.format(len(feed_widgets))) self.feed_manager.initialize(feed_widgets) for tab in self.tabs: @@ -64,11 +73,11 @@ def reload(self): if (template_path := Path('templates', f'{widget["type"]}.html')).exists(): widget['template'] = template_path.name - print("========== Layout reloaded") + logger.debug("========== Layout reloaded") def is_modified(self): result = os.path.getmtime(self.file_path) > self.mtime - print("========== Layout modified: " + str(result)) + logger.debug("========== Layout modified: " + str(result)) return result def current_tab(self, tab_name): diff --git a/app/models/feed.py b/app/models/feed.py index 16d6e36..edb1bb3 100644 --- a/app/models/feed.py +++ b/app/models/feed.py @@ -18,8 +18,10 @@ from models.feed_article import FeedArticle from models.noop_feed_processor import NoOpFeedProcessor +#logger = getLogger(__name__, logging.DEBUG) logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) +logger.setLevel(logging.INFO) + class Feed(Widget): feed_url: str @@ -55,16 +57,17 @@ def __init__(self, widget) -> None: else: self._last_updated = None - logger.debug(f"creating cron job for {self.name}") - job = self.scheduler.add_job(self.update, 'cron', name=f'{self.id} - {self.name} - cron', hour='*', jitter=20, max_instances=1) - - if self.needs_update or self.old_cache_path.exists() or self.name == "Instapundit": - # schedule job to run right now - logging.debug(f"{self.name} scheduled {self.name} for immediate update now!") - job.modify(next_run_time=datetime.now()) - #else: - # logger.debug(f"scheduled for {self.name} immediate processing now") - # self.scheduler.add_job(self.process, 'date', name=f'{self.id} - {self.name} - process', run_date=datetime.now(), max_instances=1) + if self.scheduler.running: + job = self.scheduler.add_job(self.update, 'cron', name=f'{self.id} - {self.name} - cron', hour='*', jitter=20, max_instances=1) + logger.debug(f"{logging.getLevelName(logger.level)} creating cron job for {self.name} {job.id}") + + if self.needs_update or self.old_cache_path.exists() or self.name == "Instapundit": + # schedule job to run right now + logging.debug(f"{logging.getLevelName(logger.level)} {self.name} scheduled {self.name} for immediate update now!") + job.modify(next_run_time=datetime.now()) + #else: + # logger.debug(f"scheduled for {self.name} immediate processing now") + # self.scheduler.add_job(self.process, 'date', name=f'{self.id} - {self.name} - process', run_date=datetime.now(), max_instances=1) @property def needs_update(self): @@ -229,6 +232,6 @@ def save_articles(self, articles: list[FeedArticle]): with open(self.cache_path, 'w') as f: json.dump(data, f, indent=2) - logger.debug(f"Saved {len(all_articles)} articles for {self.name} to cache file {self.cache_path}") + logger.info(f"Saved {len(all_articles)} articles for {self.name} to cache file {self.cache_path}") return all_articles \ No newline at end of file diff --git a/app/models/feed_article.py b/app/models/feed_article.py index 7d5fb8a..acb766a 100644 --- a/app/models/feed_article.py +++ b/app/models/feed_article.py @@ -30,7 +30,7 @@ def __init__(self, original_title: str, title: str, link: str, description: str, else: self.title = self.original_title - self.description = description + self.description = normalize_text(description) self.pub_date = pub_date self.processed = processed diff --git a/app/models/layout.py b/app/models/layout.py index 05d2c95..10af1e5 100644 --- a/app/models/layout.py +++ b/app/models/layout.py @@ -10,6 +10,16 @@ from models.utils import from_list, pwd logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +logger.propagate = False + +# create console handler +consoleHandler = logging.StreamHandler() +consoleHandler.setFormatter(logging.Formatter(fmt='%(asctime)s - %(message)s')) + +# Add console handler to logger +logger.addHandler(consoleHandler) + class Layout: id: str = 'layout' @@ -17,7 +27,6 @@ class Layout: tabs: list[Tab] = [] def __init__(self, config_file: str = "configs/layout.yml"): - logger.setLevel(logging.DEBUG) self.config_path = pwd.joinpath(config_file) self.reload() @@ -38,6 +47,7 @@ def mtime(self): def reload(self): + logger.debug("==== Starting reload...") Scheduler.clear_jobs() with open(self.config_path, 'r') as file: @@ -47,9 +57,7 @@ def reload(self): self.last_reload = self.mtime self.feed_hash = {} - logging.debug("Layout reloaded!") - - + logger.debug("==== Layout reloaded!") def tab(self, name: str) -> Tab: if name is None: diff --git a/app/models/scheduler.py b/app/models/scheduler.py index 189a6b5..c596e19 100644 --- a/app/models/scheduler.py +++ b/app/models/scheduler.py @@ -1,13 +1,14 @@ -from apscheduler.events import EVENT_JOB_EXECUTED, EVENT_JOB_ERROR -from apscheduler.schedulers.background import Event, BackgroundScheduler +import os +from apscheduler.schedulers.background import BackgroundScheduler import logging +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + class Scheduler: __scheduler = None - - def scheduler(self) -> BackgroundScheduler: - return Scheduler.getScheduler() @staticmethod def shutdown(): @@ -21,20 +22,11 @@ def clear_jobs(): @staticmethod def getScheduler() -> BackgroundScheduler: - if Scheduler.__scheduler == None: - Scheduler.__scheduler = BackgroundScheduler() - Scheduler.__scheduler.start() - logging.info('Scheduler started') + if Scheduler.__scheduler == None: + Scheduler.__scheduler = BackgroundScheduler() - # async def listener(event: Event) -> None: - # print(f"Received {event.__class__.__name__}") - - # def my_listener(event): - # if event.exception: - # print('The job crashed :(') - # else: - # print('The job worked :)' + str(event.job_id)) - - # SchedulerWidget.__scheduler.add_listener(my_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR) - - return Scheduler.__scheduler + if os.environ.get('WERKZEUG_RUN_MAIN') == 'true': + Scheduler.__scheduler.start() + logger.info('Scheduler started!') + + return Scheduler.__scheduler diff --git a/app/models/utils.py b/app/models/utils.py index 28bd5e0..f9ea839 100644 --- a/app/models/utils.py +++ b/app/models/utils.py @@ -1,5 +1,6 @@ import base64 import hashlib +import logging import os from pathlib import Path import re diff --git a/app/processors/title_editor.py b/app/processors/title_editor.py index 9b20084..e307746 100644 --- a/app/processors/title_editor.py +++ b/app/processors/title_editor.py @@ -1,8 +1,6 @@ -import base64 -import hashlib import logging import os -from functools import cached_property +from pathlib import Path from models.feed_article import FeedArticle from langchain_community.llms import Ollama from langchain.prompts import ChatPromptTemplate, PromptTemplate, HumanMessagePromptTemplate @@ -10,12 +8,21 @@ from langchain.output_parsers import ResponseSchema, StructuredOutputParser from models.utils import calculate_sha1_hash + logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +logger.propagate = False + +# create console handler +consoleHandler = logging.StreamHandler() +consoleHandler.setFormatter(logging.Formatter(fmt='%(asctime)s - %(message)s')) + +# Add console handler to logger +logger.addHandler(consoleHandler) class TitleEditor: def __init__(self): self.ollama_url = os.getenv('OLLAMA_URL') - logger.setLevel(logging.DEBUG) if self.ollama_url: parser = StructuredOutputParser.from_response_schemas( [ResponseSchema(name="title", description="title of the article")] @@ -36,14 +43,15 @@ def __init__(self): You are an expert news article title editor. Use the provided title and summary to write a concise and accurate title that is informative and avoids sounding like clickbait. Do not include links or urls in the title. - Title should be as short as possible, aim to be less that 70 characters long. - Title should have an absolute minimum of punctuation and use at most one all capitalized word at the start of the title. + Do not editorialize the title, even if the title and description do. + Title must be as short as possible, aim to be less that 70 characters long. + Title must have an absolute minimum of punctuation and NOT use words that are all upper case. """)) user_prompt = HumanMessagePromptTemplate(prompt=prompt) chat_prompt = ChatPromptTemplate.from_messages([system_prompt, user_prompt]) model_name = "dolphin-mistral" - model_temp = 0.0 + model_temp = 0.2 model = Ollama(base_url=self.ollama_url, model=model_name, keep_alive=5, temperature=model_temp) self.chain = chat_prompt | model | parser @@ -57,12 +65,12 @@ def process(self, articles: list[FeedArticle]) -> list[FeedArticle]: total = len(needs_processed) for count, article in enumerate(needs_processed, start=1): try: - logger.debug(f"{count}/{total}: {article.processed != self.script_hash} current hash: {self.script_hash} processed hash: {article.processed}") + logger.info(f"Processing title {count}/{total}: {article.original_title}") result = self.chain.invoke({"title": article.original_title, "summary": article.description}) article.title = result['title'] article.processed = self.script_hash except Exception as ex: - print(f"Error: {ex} for {article.original_title}") - needs_processed.remove(article) + logger.error(f"Error: {ex} for {article.original_title}") + #needs_processed.remove(article) return articles diff --git a/app/static/css/widgets.css b/app/static/css/widgets.css index a06a7cb..4c9beb0 100644 --- a/app/static/css/widgets.css +++ b/app/static/css/widgets.css @@ -82,7 +82,8 @@ ul li:last-child { vertical-align: middle; font-family: 'Font Awesome 5 Free'; font-weight: 900; /* This ensures the solid style is used */ - font-size: 0.8em; + font-size: 0.8em; + color: #5e5e5e; } .fa-chevron-down { diff --git a/app/utils.py b/app/utils.py index dc96b89..b230659 100644 --- a/app/utils.py +++ b/app/utils.py @@ -1,8 +1,20 @@ +import logging import os import shutil from pathlib import Path from models.utils import pwd +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +logger.propagate = False + +# create console handler +consoleHandler = logging.StreamHandler() +consoleHandler.setFormatter(logging.Formatter(fmt='%(asctime)s - %(message)s')) + +# Add console handler to logger +logger.addHandler(consoleHandler) + def copy_default_to_configs(): default_dir = os.path.join(pwd, 'defaults') config_dir = os.path.join(pwd, 'configs') @@ -19,10 +31,8 @@ def copy_default_to_configs(): dst = os.path.join(config_dir, file) shutil.copy2(src, dst) files_copied += 1 - print(f"File {file} copied successfully from {default_dir} to {config_dir}.") + logger.info(f"File {file} copied successfully from {default_dir} to {config_dir}.") - if files_copied > 0: - print(f"Default files synced from {default_dir} to {config_dir}.") - else: - print(f"No files copied from {default_dir} to {config_dir}.") + if files_copied == 0: + logger.info(f"No files copied from {default_dir} to {config_dir}.")