Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Submission form workflow #54

Merged
merged 35 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
85dc0e8
Add forms for data submission upload
akuny Apr 3, 2024
9f7b005
Add status to DataSubmission entity and model
akuny Apr 3, 2024
06a304e
Adding tags
akuny Apr 3, 2024
6839458
Format
akuny Apr 3, 2024
a559a11
Update tests
akuny Apr 3, 2024
8551ca7
Testing
akuny Apr 3, 2024
ae378ad
Demo workflow
akuny Apr 3, 2024
e876c5c
Working through demo
akuny Apr 3, 2024
7e7d0f8
Fix casing
akuny Apr 3, 2024
66f2ec0
Stub out FileValidator class
akuny Apr 8, 2024
6fe3df0
Merge branch 'main' into submission-form-workflow
akuny Apr 8, 2024
89dcb92
Add file validation logic
akuny Apr 9, 2024
aea02ad
Shift dependency from dev only
akuny Apr 9, 2024
cda0218
Tweak alpine.js component functions to emphasis similarities for futu…
akuny Apr 11, 2024
b8090e8
Fix typo
akuny Apr 11, 2024
a0423e9
Update data submission entity to include name property
akuny Apr 12, 2024
2779918
Update model and repository
akuny Apr 12, 2024
e97a109
Update view model
akuny Apr 12, 2024
6008411
Remove unused cli function
akuny Apr 12, 2024
429e780
Add migration for data submission name
akuny Apr 12, 2024
2504e2a
Update storage upload method to also handle file in addition to path
akuny Apr 12, 2024
870b0d8
Add migration to change data_submissions field name
akuny Apr 12, 2024
69eb98b
Update templates
akuny Apr 12, 2024
2b97d23
Update seed script
akuny Apr 12, 2024
7932642
Update required fields
akuny Apr 12, 2024
226667f
Update seed script
akuny Apr 12, 2024
1ae79cd
Update tests
akuny Apr 12, 2024
df76153
Update test data to reflect new required fields criteria
akuny Apr 12, 2024
60b4732
Update test cases for data submission use cases
akuny Apr 12, 2024
0d0e18e
Fix issue with shapefile config
akuny Apr 12, 2024
b12714c
Format
akuny Apr 12, 2024
776ff9d
Merge branch 'main' into submission-form-workflow
akuny Apr 15, 2024
fe629cd
Update poetry.lock file
akuny Apr 15, 2024
79c3251
Add back in gdb file
akuny Apr 15, 2024
38fa230
Merge branch 'main' into submission-form-workflow
akuny May 1, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[flake8]
max-line-length = 88
exclude = venv,.git,__pycache__,docs/source/conf.py,old,build,dist,node_modules
ignore = F403, F401, F405, W503
ignore = F403, F401, F405, W503, E704
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Add name field to data_submissions table

Revision ID: 3ff8e29d705e
Revises: a511ca087149
Create Date: 2024-04-12 12:50:48.903157

"""

from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = "3ff8e29d705e"
down_revision: Union[str, None] = "a511ca087149"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
op.add_column(
"data_submissions",
sa.Column("name", sa.String, nullable=False),
)


def downgrade() -> None:
op.drop_column("data_submissions", "name")
27 changes: 27 additions & 0 deletions alembic/versions/558f8d429963_rename_data_submissions_field.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""Rename data_submissions field

Revision ID: 558f8d429963
Revises: 3ff8e29d705e
Create Date: 2024-04-12 13:18:47.612471

"""

from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = "558f8d429963"
down_revision: Union[str, None] = "3ff8e29d705e"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
op.alter_column("data_submissions", "filename", new_column_name="file_path")


def downgrade() -> None:
op.alter_column("data_submissions", "file_path", new_column_name="filename")
55 changes: 55 additions & 0 deletions alembic/versions/a511ca087149_add_datasubmission_status_column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""Add DataSubmission status column

Revision ID: a511ca087149
Revises: 9bb8e29b98fa
Create Date: 2024-03-29 09:30:08.502456

"""

from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql


# revision identifiers, used by Alembic.
revision: str = "a511ca087149"
down_revision: Union[str, None] = "9bb8e29b98fa"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade():
data_submission_status = postgresql.ENUM(
"PENDING_SUBMISSION",
"CANCELED",
"PENDING_VALIDATION",
"FAILED",
"VALIDATED",
name="data_submission_status",
)
data_submission_status.create(op.get_bind(), checkfirst=True)

op.add_column(
"data_submissions",
sa.Column(
"status",
data_submission_status,
nullable=False,
server_default="PENDING_SUBMISSION",
),
)


def downgrade():
op.drop_column("data_submissions", "status")
data_submission_status = postgresql.ENUM(
"PENDING_SUBMISSION",
"CANCELED",
"PENDING_VALIDATION",
"FAILED",
"VALIDATED",
name="data_submission_status",
)
data_submission_status.drop(op.get_bind(), checkfirst=True)
14 changes: 14 additions & 0 deletions nad_ch/application/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,17 @@ class OAuth2TokenError(NadChError):

def __init__(self, message="OAuth2 token retrieval failed."):
super().__init__(message)


class InvalidDataSubmissionFileError(NadChError):
"""Exception raised when a data submission file is invalid."""

def __init__(self, message="Invalid data submission file."):
super().__init__(message)


class InvalidSchemaError(NadChError):
"""Exception raised when a schema is invalid."""

def __init__(self, message="Invalid schema."):
super().__init__(message)
44 changes: 16 additions & 28 deletions nad_ch/application/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,21 @@


class Logger(Protocol):
def info(self, message):
...
def info(self, message): ...

def error(self, message):
...
def error(self, message): ...

def warning(self, message):
...
def warning(self, message): ...


class Storage(Protocol):
def upload(self, source: str, destination: str) -> bool:
...
def upload(self, source: str, destination: str) -> bool: ...

def delete(self, key: str) -> bool:
...
def delete(self, key: str) -> bool: ...

def download_temp(self, key: str) -> Optional[DownloadResult]:
...
def download_temp(self, key: str) -> Optional[DownloadResult]: ...

def cleanup_temp_dir(self, temp_dir: str) -> bool:
...
def cleanup_temp_dir(self, temp_dir: str) -> bool: ...


class TaskQueue(Protocol):
Expand All @@ -40,30 +33,25 @@ def run_load_and_validate(
submission_id: int,
path: str,
column_map: Dict[str, str],
):
...
): ...


class Authentication(Protocol):
def fetch_oauth2_token(self, provider_name: str, code: str) -> str | None:
...
def fetch_oauth2_token(self, provider_name: str, code: str) -> str | None: ...

def fetch_user_email_from_login_provider(
self, provider_name: str, oauth2_token: str
) -> str | list[str] | None:
...
) -> str | list[str] | None: ...

def get_logout_url(self, provider_name: str) -> str:
...
def get_logout_url(self, provider_name: str) -> str: ...

def make_login_url(self, provider_name: str, state_token: str) -> str | None:
...
def make_login_url(self, provider_name: str, state_token: str) -> str | None: ...

def make_logout_url(self, provider_name: str) -> str | None:
...
def make_logout_url(self, provider_name: str) -> str | None: ...

def user_email_address_has_permitted_domain(self, email: str | list[str]) -> bool:
...
def user_email_address_has_permitted_domain(
self, email: str | list[str]
) -> bool: ...


class ApplicationContext:
Expand Down
127 changes: 85 additions & 42 deletions nad_ch/application/use_cases/data_submissions.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,20 @@
import os
from typing import List
from typing import List, IO
from nad_ch.application.dtos import DownloadResult
from nad_ch.application.exceptions import (
InvalidDataSubmissionFileError,
InvalidSchemaError,
)
from nad_ch.application.interfaces import ApplicationContext
from nad_ch.application.validation import FileValidator
from nad_ch.application.view_models import (
get_view_model,
DataSubmissionViewModel,
)
from nad_ch.core.entities import DataSubmission, ColumnMap
from nad_ch.core.entities import DataSubmissionStatus, DataSubmission, ColumnMap
from nad_ch.config import LANDING_ZONE


def ingest_data_submission(
ctx: ApplicationContext, file_path: str, producer_name: str
) -> DataSubmissionViewModel:
if not file_path:
ctx.logger.error("File path required")
return

_, file_extension = os.path.splitext(file_path)
if file_extension.lower() not in [".zip", ".csv"]:
ctx.logger.error("Invalid file format. Only ZIP or CSV files are accepted.")
return

producer = ctx.producers.get_by_name(producer_name)
if not producer:
ctx.logger.error("Producer with that name does not exist")
return

try:
filename = DataSubmission.generate_filename(file_path, producer)
ctx.storage.upload(file_path, filename)

# TODO: Finish logic for obtaining column map from user
column_map = ColumnMap("placeholder", producer, 1)

submission = DataSubmission(filename, producer, column_map)
saved_submission = ctx.submissions.add(submission)
ctx.logger.info(f"Submission added: {saved_submission.filename}")

return get_view_model(saved_submission)
except Exception as e:
ctx.storage.delete(filename)
ctx.logger.error(f"Failed to process submission: {e}")


def get_data_submission(
ctx: ApplicationContext, submission_id: int
) -> DataSubmissionViewModel:
Expand All @@ -55,7 +26,7 @@ def get_data_submission(
return get_view_model(submission)


def list_data_submissions_by_producer(
def get_data_submissions_by_producer(
ctx: ApplicationContext, producer_name: str
) -> List[DataSubmissionViewModel]:
producer = ctx.producers.get_by_name(producer_name)
Expand All @@ -66,28 +37,32 @@ def list_data_submissions_by_producer(
submissions = ctx.submissions.get_by_producer(producer)
ctx.logger.info(f"Data submissions for {producer.name}")
for s in submissions:
ctx.logger.info(f"{s.producer.name}: {s.filename}")
ctx.logger.info(f"{s.producer.name}: {s.name}")

return get_view_model(submissions)


def validate_data_submission(
ctx: ApplicationContext, filename: str, column_map_name: str
ctx: ApplicationContext, file_path: str, column_map_name: str
):
submission = ctx.submissions.get_by_filename(filename)
submission = ctx.submissions.get_by_file_path(file_path)
if not submission:
ctx.logger.error("Data submission with that filename does not exist")
return

download_result: DownloadResult = ctx.storage.download_temp(filename)
download_result: DownloadResult = ctx.storage.download_temp(file_path)
if not download_result:
ctx.logger.error("Data extration error")
return

column_map = ctx.column_maps.get_by_name_and_version(column_map_name, 1)
if column_map is None:
ctx.logger.error("Column map not found")
return

# Using version 1 for column maps for now, may add feature for user to select
# version later
try:
column_map = ctx.column_maps.get_by_name_and_version(column_map_name, 1)
mapped_data_local_dir = submission.get_mapped_data_dir(
download_result.extracted_dir, LANDING_ZONE
)
Expand All @@ -112,3 +87,71 @@ def validate_data_submission(
finally:
ctx.storage.cleanup_temp_dir(download_result.temp_dir)
ctx.storage.cleanup_temp_dir(mapped_data_local_dir)


def validate_file_before_submission(
ctx: ApplicationContext, file: IO[bytes], column_map_id: int
) -> bool:
column_map = ctx.column_maps.get_by_id(column_map_id)
if column_map is None:
raise ValueError("Column map not found")

_, file_extension = os.path.splitext(file.filename)
if file_extension.lower() != ".zip":
raise InvalidDataSubmissionFileError(
"Invalid file format. Only ZIP files are accepted."
)

file_validator = FileValidator(file, file.name)
if not file_validator.validate_file():
raise InvalidDataSubmissionFileError(
"Invalid file format. Only Shapefiles and Geodatabase files are accepted."
)

if not file_validator.validate_schema(column_map):
raise InvalidSchemaError(
"Invalid schema. The schema of the file must align with the schema of the \
selected mapping."
)

return True


def create_data_submission(
ctx: ApplicationContext,
user_id: int,
column_map_id: int,
submission_name: str,
file: IO[bytes],
):
user = ctx.users.get_by_id(user_id)
if user is None:
raise ValueError("User not found")

producer = user.producer
if producer is None:
raise ValueError("Producer not found")

column_map = ctx.column_maps.get_by_id(column_map_id)
if column_map is None:
raise ValueError("Column map not found")

try:
file_path = DataSubmission.generate_zipped_file_path(submission_name, producer)
submission = DataSubmission(
submission_name,
file_path,
DataSubmissionStatus.PENDING_SUBMISSION,
producer,
column_map,
)
saved_submission = ctx.submissions.add(submission)

ctx.storage.upload(file, file_path)

ctx.logger.info(f"Submission added: {saved_submission.file_path}")

return get_view_model(saved_submission)
except Exception as e:
ctx.storage.delete(file_path)
ctx.logger.error(f"Failed to process submission: {e}")
Loading
Loading