Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Droplet scaling #294

Merged
merged 10 commits into from
Jun 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ target/
profile_default/
ipython_config.py

# repo secrets
civiclens/deploy/droplet_config.yml

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
Expand Down
Empty file added civiclens/deploy/__init__.py
Empty file.
27 changes: 27 additions & 0 deletions civiclens/deploy/launch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""Script to autodeploy CivicLens NLP jobs"""
import pydo

from civiclens.utils.constants import DIGITAL_OCEAN, SSH_ID


do_client = pydo.Client(token=DIGITAL_OCEAN)

with open("civiclens/deploy/droplet_config.yml", "r") as f:
cloud_config = f.read()

droplet_data = {
"name": "civiclens-nlp",
"region": "nyc3",
"size": "s-4vcpu-8gb",
"image": "ubuntu-24-04-x64",
"user_data": cloud_config,
"ssh_keys": [SSH_ID],
"monitoring": True,
}

# launch instance
try:
do_client.droplets.create(body=droplet_data)
print("Instance launched!")
except Exception as e:
print(e)
7 changes: 7 additions & 0 deletions civiclens/deploy/launch_nlp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

cd /home/civiclens/CivicLens
poetry run python3 -m civiclens.deploy.launch

# crontab command to run the nlp update at midnight on wednesday every week
# 0 0 * * 3 /home/CivicLens/civiclens/deploy/launch_nlp.sh (fix with linux server path)
25 changes: 25 additions & 0 deletions civiclens/deploy/run_civiclens_nlp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash

# set paths
PROJECT_DIR="/home/civiclens-nlp/CivicLens/"
# fetch droplet ID for instance deletion
DROPLET_ID=$(curl "http://169.254.169.254/metadata/v1/id")
echo "export DROPLET_ID=$DROPLET_ID" >> ~/.zshenv
source ~/.zshenv

echo "===================================="
echo "Running NLP Update..."

# go to project directory if exists
if [ -d "$PROJECT_DIR" ]; then
cd "$PROJECT_DIR"
else
echo "Project directory does not exist."
exit 1
fi

# install dependencies
poetry install

# Run NLP update
poetry run python3 -m civiclens.nlp.pipeline --cloud
8 changes: 8 additions & 0 deletions civiclens/nlp/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
from functools import partial

import polars as pl
from pydo import Client

from civiclens.nlp import titles
from civiclens.nlp.comments import get_doc_comments, rep_comment_analysis
from civiclens.nlp.models import sentence_transformer, sentiment_pipeline
from civiclens.nlp.tools import RepComments, sentiment_analysis
from civiclens.nlp.topics import HDAModel, LabelChain, topic_comment_analysis
from civiclens.utils import constants
from civiclens.utils.database_access import Database, pull_data, upload_comments


Expand All @@ -33,6 +35,7 @@

parser = argparse.ArgumentParser()
parser.add_argument("--refresh", action="store_true", required=False)
parser.add_argument("--cloud", action="store_true", required=False)


def doc_generator(df: pl.DataFrame, doc_idx: int = 0):
Expand Down Expand Up @@ -152,3 +155,8 @@ def docs_have_titles():

logger.info(f"Proccessed document: {doc_id}")
upload_comments(Database(), comment_data)

if args.cloud:
# kill instance after job finishes
do_client = Client(token=constants.DIGITAL_OCEAN)
do_client.droplets.destroy(droplet_id=constants.DROPLET_ID)
5 changes: 5 additions & 0 deletions civiclens/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
DATABASE_PORT = os.environ.get("DATABASE_PORT")
DATABASE_SSLMODE = os.environ.get("DATABASE_SSLMODE")
DATABASE_TEST = os.environ.get("DATBASE_TEST")
SSH_ID = os.environ.get("SSH_ID")

# DigitalOcean
DIGITAL_OCEAN = os.environ.get("DIGITAL_OCEAN")
DROPLET_ID = os.environ.get("DROPLET_ID")

# Regulations.GOV
REG_GOV_API_KEY = os.environ.get("REG_GOV_API_KEY")
44 changes: 0 additions & 44 deletions civiclens/utils/run_civiclens_nlp.sh

This file was deleted.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ transformers = "^4.41.0"
sentencepiece = "^0.2.0"
pytest-env = "^1.1.3"
lxml = "^5.2.2"
pydo = "^0.3.0"
azure-core = "^1.30.1"


[tool.poetry.group.dev.dependencies]
Expand Down
Loading