Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Uploads #9

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
__pycache__/
data/*
.venv
log/*
.coverage
uv.lock
116 changes: 116 additions & 0 deletions api/endpoints/upload_route.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""Contains routes for uploading files and accessing uploaded files."""

from __future__ import annotations

import logging
from typing import Annotated

from fastapi import APIRouter, File, Form, HTTPException, UploadFile

from api.utils.upload_helper import (
calculate_md5_checksum,
get_all_filenames,
reassemble_file,
save_chunk,
save_file,
)

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/upload", tags=["upload"])


@router.post("/chunk")
async def upload_chunk(
file: Annotated[UploadFile, File()],
chunk_number: Annotated[int, Form()],
total_chunks: Annotated[int, Form()],
chunk_hash: Annotated[str, Form()],
):
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved
"""
Allow individual chunks to be uploaded and later reassembled.

Parameters
----------
file : UploadFile
The chunk file to be uploaded.
chunk_number : int
The number of the chunk being uploaded.
total_chunks : int
The total number of chunks for the file.
chunk_hash : str
The MD5 hash of the chunk.

Returns
-------
dict
A dictionary containing a message and the path where the chunk was saved.

Raises
------
HTTPException
If there is an error during the upload process.
"""
logger.info(f"Received chunk {chunk_number} of {total_chunks}")
try:
file_content = await file.read()
hash_match = calculate_md5_checksum(file_content, chunk_hash)
logger.info(f"Hash matches: {hash_match}")

save_chunk(file_content, chunk_number, file.filename)

if chunk_number == total_chunks - 1:
reassemble_file(total_chunks, file.filename)
return
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved
except Exception as e:
logger.error(f"Error during chunk upload: {e}")
raise HTTPException(status_code=500, detail=str(e)) from e


@router.post("/single")
async def upload_single(
file: Annotated[UploadFile, File()],
file_hash: Annotated[str, Form()],
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved
):
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved
"""
Upload a single file.

Parameters
----------
file : UploadFile
The file to be uploaded.
file_hash : str
The MD5 hash of the file.

Returns
-------
dict
A dictionary containing a message and the path where the file was saved.

Raises
------
HTTPException
If there is an error during the upload process.
"""
try:
file_content = await file.read()
logger.info(f"Hash matches: {calculate_md5_checksum(file_content, file_hash)}")

save_file(file)
return
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved
except Exception as e:
logger.error(f"Error during file upload: {e}")
raise HTTPException(status_code=500, detail=str(e)) from e


@router.get("/files")
async def get_files() -> list[str]:
"""
Get a list of all uploaded files.

Returns
-------
list of str
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
list of str
list[str]

A list of filenames of all uploaded files.
"""
return get_all_filenames()
125 changes: 125 additions & 0 deletions api/utils/upload_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""Helper functions for uploading files and accessing uploaded files."""

from __future__ import annotations

import hashlib
from pathlib import Path

DATA_DIR = Path("/home/ubuntu/janus-api/janus-web/data")


def save_chunk(file, chunk_number, original_filename, directory=DATA_DIR):
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved
"""
Save a chunk of a file to the specified directory.

Parameters
----------
file : bytes
The content of the chunk to be saved.
chunk_number : int
The number of the chunk being saved.
original_filename : str
The original filename of the file being chunked.
directory : Path, optional
The directory where the chunk will be saved (default is DATA_DIR).

Returns
-------
str
The path where the chunk was saved.
"""
directory.mkdir(parents=True, exist_ok=True)
chunk_path = directory / f"{original_filename}_chunk_{chunk_number}"
chunk_path.write_bytes(file)
return str(chunk_path)


def reassemble_file(total_chunks, original_filename, directory=DATA_DIR):
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved
"""
Reassemble a file from its chunks.

Parameters
----------
total_chunks : int
The total number of chunks.
original_filename : str
The original filename of the file being reassembled.
directory : Path, optional
The directory where the chunks are stored (default is DATA_DIR).

Returns
-------
str
The path where the reassembled file was saved.
"""
output_path = directory / original_filename
with output_path.open("wb") as complete_file:
for i in range(total_chunks):
chunk_path = directory / f"{original_filename}_chunk_{i}"
with chunk_path.open("rb") as chunk_file:
complete_file.write(chunk_file.read())
chunk_path.unlink()
return str(output_path)
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved


def save_file(file, directory=DATA_DIR):
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved
"""
Save a file to the specified directory.

Parameters
----------
file : UploadFile
The file to be saved.
directory : Path, optional
The directory where the file will be saved (default is DATA_DIR).

Returns
-------
str
The path where the file was saved.
"""
directory.mkdir(parents=True, exist_ok=True)
file_path = directory / file.filename
with file_path.open("wb") as buffer:
buffer.write(file.file.read())
return str(file_path)
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved


def calculate_md5_checksum(file_chunk, received_hash) -> bool:
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved
"""
Calculate the MD5 checksum of a file chunk.

Parameters
----------
file_chunk : bytes
The content of the file chunk.
received_hash : str
The hash calculated before the file was uploaded.

Returns
-------
bool
True if the hash matches.
"""
md5 = hashlib.md5()
md5.update(file_chunk)
calculated_hash = md5.hexdigest()
return calculated_hash == received_hash


def get_all_filenames(directory=DATA_DIR) -> list[str]:
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved
"""
Get a list of all filenames in the data directory.

Parameters
----------
directory : Path
Directory to get the filenames from.

Returns
-------
list of str
A list of filenames in the data directory.
"""
filenames = [str(file.name) for file in directory.iterdir() if file.is_file()]
return filenames if filenames else ["No files found"]
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved
38 changes: 38 additions & 0 deletions logging_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Config setup for janus-api."""

from __future__ import annotations

from logging.config import dictConfig
from pathlib import Path

log_file_path = Path("log/janus-api.log")
log_file_path.parent.mkdir(parents=True, exist_ok=True)

LOGGING_CONFIG = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"default": {
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
},
},
"handlers": {
"file": {
"level": "INFO",
"class": "logging.FileHandler",
"filename": "log/janus-api.log",
"formatter": "default",
},
"console": {
"level": "INFO",
"class": "logging.StreamHandler",
"formatter": "default",
},
},
"root": {
"level": "INFO",
"handlers": ["file", "console"],
},
}

dictConfig(LOGGING_CONFIG)
19 changes: 19 additions & 0 deletions main.py
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""Main module for the application."""

from __future__ import annotations

from fastapi import FastAPI

from api.endpoints import upload_route
import logging_config

app = FastAPI()

app.include_router(upload_route.router)

if __name__ == "__main__":
import uvicorn

uvicorn.run(
app, host="0.0.0.0", port=8000, log_config=logging_config.LOGGING_CONFIG
)
Loading
Loading