From 16006be43d6e90d1e44e58bfaf0ee19241339c93 Mon Sep 17 00:00:00 2001 From: Vineeth Voruganti <13438633+VVoruganti@users.noreply.github.com> Date: Thu, 12 Dec 2024 14:23:33 -0500 Subject: [PATCH] feat: Add alembic and indexes --- CHANGELOG.md | 5 + alembic.ini | 117 ++++++++++++++++++ migrations/README | 1 + migrations/env.py | 95 ++++++++++++++ migrations/script.py.mako | 26 ++++ ...28084f472_add_indexes_for_messages_and_.py | 60 +++++++++ pyproject.toml | 1 + src/agent.py | 2 +- src/db.py | 14 +-- uv.lock | 28 +++++ 10 files changed, 339 insertions(+), 10 deletions(-) create mode 100644 alembic.ini create mode 100644 migrations/README create mode 100644 migrations/env.py create mode 100644 migrations/script.py.mako create mode 100644 migrations/versions/c3828084f472_add_indexes_for_messages_and_.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 0236b80..8986711 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [0.0.15] +### Added + +- Alembic for handling database migrations +- Additional indexes for reading Messages and Metamessages + ### Fixed - Dialectic Streaming Endpoint properly sends text in `StreamingResponse` diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..421e3fe --- /dev/null +++ b/alembic.ini @@ -0,0 +1,117 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +# Use forward slashes (/) also on windows to provide an os agnostic path +script_location = migrations + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to migrations/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +# version_path_separator = newline +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = driver://user:pass@localhost/dbname + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARNING +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARNING +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/migrations/README b/migrations/README new file mode 100644 index 0000000..98e4f9c --- /dev/null +++ b/migrations/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/migrations/env.py b/migrations/env.py new file mode 100644 index 0000000..4dc2e25 --- /dev/null +++ b/migrations/env.py @@ -0,0 +1,95 @@ +import os +import sys +from logging.config import fileConfig +from pathlib import Path + +from alembic import context +from dotenv import load_dotenv +from sqlalchemy import engine_from_config, pool + +# Import your models +from src.db import Base + +# Add project root to Python path +sys.path.append(str(Path(__file__).parents[1])) + +# Load environment variables +load_dotenv() + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name, disable_existing_loggers=False) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = Base.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def get_url(): + return os.getenv("CONNECTION_URI") + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + # url = config.get_main_option("sqlalchemy.url") + url = get_url() + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + configuration = config.get_section(config.config_ini_section) + configuration["sqlalchemy.url"] = get_url() + + connectable = engine_from_config( + configuration, + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/migrations/script.py.mako b/migrations/script.py.mako new file mode 100644 index 0000000..fbc4b07 --- /dev/null +++ b/migrations/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/migrations/versions/c3828084f472_add_indexes_for_messages_and_.py b/migrations/versions/c3828084f472_add_indexes_for_messages_and_.py new file mode 100644 index 0000000..96fded4 --- /dev/null +++ b/migrations/versions/c3828084f472_add_indexes_for_messages_and_.py @@ -0,0 +1,60 @@ +"""Add indexes for messages and metamessages for reads + +Revision ID: c3828084f472 +Revises: +Create Date: 2024-12-12 13:41:40.156095 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from sqlalchemy import text +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = "c3828084f472" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Add new indexes + op.create_index("idx_users_app_lookup", "users", ["app_id", "public_id"]) + op.create_index("idx_sessions_user_lookup", "sessions", ["user_id", "public_id"]) + + op.create_index( + "idx_messages_session_lookup", + "messages", + ["session_id", "id"], + postgresql_include=[ + "public_id", + "is_user", + "content", + "metadata", + "created_at", + ], + ) + + op.create_index( + "idx_metamessages_lookup", + "metamessages", + ["metamessage_type", sa.text("id DESC")], + postgresql_include=[ + "public_id", + "content", + "message_id", + "created_at", + "metadata", + ], + ) + + +def downgrade() -> None: + # Remove new indexes + op.drop_index("idx_users_app_lookup", table_name="users") + op.drop_index("idx_sessions_user_lookup", table_name="sessions") + op.drop_index("idx_messages_session_lookup", table_name="messages") + op.drop_index("idx_metamessages_lookup", table_name="metamessages") diff --git a/pyproject.toml b/pyproject.toml index bd55c5d..5569074 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "openai>=1.43.0", "anthropic>=0.36.0", "nanoid>=2.0.0", + "alembic>=1.14.0", ] [tool.uv] dev-dependencies = [ diff --git a/src/agent.py b/src/agent.py index 90cc4c5..b01dd14 100644 --- a/src/agent.py +++ b/src/agent.py @@ -77,7 +77,7 @@ def stream(self): Provide a brief, matter-of-fact, and appropriate response to the query based on the context provided. If the context provided doesn't aid in addressing the query, return only the word "None". """ return self.client.messages.stream( - model="claude-3-5-sonnet-20240620", + model="claude-3-5-sonnet-20241022", messages=[ { "role": "user", diff --git a/src/db.py b/src/db.py index 2066921..aa5f91a 100644 --- a/src/db.py +++ b/src/db.py @@ -1,7 +1,9 @@ import os +from alembic import command +from alembic.config import Config from dotenv import load_dotenv -from sqlalchemy import MetaData, create_engine +from sqlalchemy import MetaData from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine from sqlalchemy.orm import declarative_base @@ -43,11 +45,5 @@ def scaffold_db(): """use a sync engine for scaffolding the database. ddl operations are unavailable with async engines """ - print(os.environ["CONNECTION_URI"]) - engine = create_engine( - os.environ["CONNECTION_URI"], - pool_pre_ping=True, - echo=True, - ) - Base.metadata.create_all(bind=engine) - engine.dispose() + alembic_cfg = Config("alembic.ini") + command.upgrade(alembic_cfg, "head") diff --git a/uv.lock b/uv.lock index 765af8c..626f838 100644 --- a/uv.lock +++ b/uv.lock @@ -5,6 +5,20 @@ resolution-markers = [ "python_full_version >= '3.13'", ] +[[package]] +name = "alembic" +version = "1.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mako" }, + { name = "sqlalchemy" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/00/1e/8cb8900ba1b6360431e46fb7a89922916d3a1b017a8908a7c0499cc7e5f6/alembic-1.14.0.tar.gz", hash = "sha256:b00892b53b3642d0b8dbedba234dbf1924b69be83a9a769d5a624b01094e304b", size = 1916172 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/06/8b505aea3d77021b18dcbd8133aa1418f1a1e37e432a465b14c46b2c0eaa/alembic-1.14.0-py3-none-any.whl", hash = "sha256:99bd884ca390466db5e27ffccff1d179ec5c05c965cfefc0607e69f9e411cb25", size = 233482 }, +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -428,6 +442,7 @@ name = "honcho" version = "0.0.15" source = { virtual = "." } dependencies = [ + { name = "alembic" }, { name = "anthropic" }, { name = "fastapi", extra = ["standard"] }, { name = "fastapi-pagination" }, @@ -455,6 +470,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "alembic", specifier = ">=1.14.0" }, { name = "anthropic", specifier = ">=0.36.0" }, { name = "fastapi", extras = ["standard"], specifier = ">=0.111.0" }, { name = "fastapi-pagination", specifier = ">=0.12.24" }, @@ -685,6 +701,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b4/ee/6d9873144f860391fd1130be0e1e5a1dbd7e9d128da1c7baf1ae71babb99/jiter-0.6.1-cp39-none-win_amd64.whl", hash = "sha256:d465db62d2d10b489b7e7a33027c4ae3a64374425d757e963f86df5b5f2e7fc5", size = 202278 }, ] +[[package]] +name = "mako" +version = "1.3.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5f/d9/8518279534ed7dace1795d5a47e49d5299dd0994eed1053996402a8902f9/mako-1.3.8.tar.gz", hash = "sha256:577b97e414580d3e088d47c2dbbe9594aa7a5146ed2875d4dfa9075af2dd3cc8", size = 392069 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/bf/7a6a36ce2e4cafdfb202752be68850e22607fccd692847c45c1ae3c17ba6/Mako-1.3.8-py3-none-any.whl", hash = "sha256:42f48953c7eb91332040ff567eb7eea69b22e7a4affbc5ba8e845e8f730f6627", size = 78569 }, +] + [[package]] name = "markdown-it-py" version = "3.0.0"