Skip to content
This repository has been archived by the owner on Apr 29, 2024. It is now read-only.

Commit

Permalink
Merge pull request #17 from toptal/devx-3855-refactor-DBs
Browse files Browse the repository at this point in the history
Refactor Database folder
  • Loading branch information
sasha370 authored Apr 11, 2024
2 parents cc46d11 + d51a9df commit 56a7e87
Show file tree
Hide file tree
Showing 31 changed files with 522 additions and 627 deletions.
1 change: 1 addition & 0 deletions configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def get_project_root() -> str:

chart_folder_path = os.path.join(project_path, "content", "charts")
sql_file_path = os.path.join(project_path, "content", "database", "confluence_pages_sql.db")
db_url = 'sqlite:///' + sql_file_path
vector_folder_path = os.path.join(project_path, "content", "vectors", "confluence_pages")
interactions_folder_path = os.path.join(project_path, "content", "vectors", "confluence_interactions")

Expand Down
12 changes: 5 additions & 7 deletions confluence/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@
import time

from configuration import api_host, api_port
from database.page_manager import get_page_ids_missing_embeds, store_pages_data
from database.page_manager import PageManager
from database.space_manager import SpaceManager
from vector.create_vector_db import add_embeds_to_vector_db

from .client import ConfluenceClient
from .retriever import retrieve_space


# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

Expand All @@ -34,7 +33,7 @@ def submit_embedding_creation_request(page_id: str):
def generate_missing_page_embeddings(retry_limit: int = 3, wait_time: int = 5) -> None:
for attempt in range(retry_limit):
# Retrieve the IDs of pages that are still missing embeddings.
page_ids = get_page_ids_missing_embeds()
page_ids = PageManager().get_page_ids_missing_embeds()
# If there are no pages missing embeddings, exit the loop and end the process.
if not page_ids:
logging.info("All pages have embeddings. Process complete.")
Expand All @@ -52,13 +51,12 @@ def generate_missing_page_embeddings(retry_limit: int = 3, wait_time: int = 5) -

# After waiting, retrieve the list of pages still missing embeddings to see if the list has decreased.
# This retrieval is crucial to ensure that the loop only continues if there are still pages that need processing.
if (page_ids := get_page_ids_missing_embeds()):
if page_ids := PageManager().get_page_ids_missing_embeds():
logging.info(f"After attempt {attempt + 1}, {len(page_ids)} pages are still missing embeds.")
else:
logging.info("All pages now have embeddings. Process complete.")
break # Break out of the loop if there are no more pages missing embeddings.


# After exhausting the retry limit, check if there are still pages without embeddings.
if page_ids:
logging.info("Some pages still lack embeddings after all attempts.")
Expand All @@ -82,10 +80,10 @@ def tui_choose_space():


def import_space(space_key, space_name):
import_date=datetime.now().strftime('%Y-%m-%d %H:%M:%S')
import_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

pages = retrieve_space(space_key)
store_pages_data(space_key, pages)
PageManager().store_pages_data(space_key, pages)
generate_missing_page_embeddings()

SpaceManager().upsert_space_info(
Expand Down
29 changes: 10 additions & 19 deletions database/bookmarked_conversation_manager.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,32 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import SQLAlchemyError
from configuration import sql_file_path
from database.bookmarked_conversation import BookmarkedConversation, Base
from models.bookmarked_conversation import BookmarkedConversation
from datetime import datetime, timezone
from database.database import Database


class BookmarkedConversationManager:
def __init__(self):
self.engine = create_engine('sqlite:///' + sql_file_path)
Base.metadata.create_all(self.engine)
self.Session = sessionmaker(bind=self.engine)
self.db = Database()

def add_bookmarked_conversation(self, title, body, thread_id):
try:
with self.Session() as session:
new_conversation = BookmarkedConversation(title=title, body=body, thread_id=thread_id)
session.add(new_conversation)
session.commit()
return new_conversation.id
except SQLAlchemyError as e:
print(f"Error adding bookmarked conversation: {e}")
return None
new_conversation = BookmarkedConversation(title=title, body=body, thread_id=thread_id)
self.db.add_object(new_conversation)

def update_posted_on_confluence(self, thread_id):
try:
with self.Session() as session:
with self.db.get_session() as session:
conversation = session.query(BookmarkedConversation).filter_by(thread_id=thread_id).first()
if conversation:
conversation.posted_on_confluence = datetime.now(timezone.utc)
session.commit()
except SQLAlchemyError as e:
print(f"Error updating conversation with Confluence timestamp: {e}")
return None

def get_unposted_conversations(self):
try:
with self.Session() as session:
with self.db.get_session() as session:
return session.query(BookmarkedConversation).filter_by(posted_on_confluence=None).all()
except SQLAlchemyError as e:
print(f"Error getting unposted conversations: {e}")
return None
return None
90 changes: 90 additions & 0 deletions database/database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import SQLAlchemyError
from configuration import db_url
from models.qa_interaction import QAInteraction
from models.space_info import SpaceInfo
from models.page_data import PageData
from models.bookmarked_conversation import BookmarkedConversation
from models.quiz_question import QuizQuestion
from models.user_score import UserScore


class Database:
"""
Class providing access to a SQLAlchemy database.
This class implements the Singleton pattern for creating and managing a connection to a SQLAlchemy database.
It provides methods for getting database sessions and accessing the SQLAlchemy Engine object.
Attributes:
engine (sqlalchemy.engine.Engine): The SQLAlchemy Engine object representing the connection to the database.
Session (sqlalchemy.orm.Session): The SQLAlchemy session factory used for creating database sessions.
"""

_instance = None

def __new__(cls):
"""
Create a new instance of the Database class.
If an instance of the class has not been created yet, it is created; otherwise, the existing instance is returned.
Returns:
Database: An instance of the Database class.
"""
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._init_engine()
cls._instance._create_tables()
return cls._instance

def _create_tables(self):
"""
Create tables in the database if they do not exist.
"""
for model in [QAInteraction, SpaceInfo, PageData, BookmarkedConversation, QuizQuestion, UserScore]:
model.metadata.create_all(self.engine)

def _init_engine(self):
"""
Initialize the SQLAlchemy Engine object and session factory.
Creates the Engine object for connecting to the database and the session factory for creating database sessions.
"""
self.engine = create_engine(db_url)
self.Session = sessionmaker(bind=self.engine)

def get_session(self):
"""
Get a new database session.
Returns:
sqlalchemy.orm.Session: A new database session.
"""
return self.Session()

def add_object(self, obj):
"""
Adds the given object to the database.
Args:
obj: The object to add to the database.
Returns:
int or None: The ID of the added object if successful, None otherwise.
Raises:
None
"""
try:
with self.get_session() as session:
session.add(obj)
session.commit()
return obj
except SQLAlchemyError as e:
class_name = obj.__class__.__name__
print(f"Error adding object of type {class_name}: {e}")
session.rollback()
return None
Loading

0 comments on commit 56a7e87

Please sign in to comment.