Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Uploads #9

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
__pycache__/
data/*
.venv
log/*
.coverage
uv.lock
104 changes: 104 additions & 0 deletions api/endpoints/upload_route.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Contains routes for uploading files and accessing uploaded files."""

from __future__ import annotations

import logging
from typing import Annotated

from fastapi import APIRouter, File, Form, HTTPException, UploadFile

from api.utils.upload_helper import (
calculate_md5_checksum,
get_all_filenames,
reassemble_file,
save_chunk,
save_file,
)

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/upload", tags=["upload"])


@router.post("/chunk")
async def upload_chunk(
file: Annotated[UploadFile, File()],
chunk_number: Annotated[int, Form()],
total_chunks: Annotated[int, Form()],
chunk_hash: Annotated[str, Form()],
) -> None:
"""
Allow individual chunks to be uploaded and later reassembled.

Parameters
----------
file : UploadFile
The chunk file to be uploaded.
chunk_number : int
The number of the chunk being uploaded.
total_chunks : int
The total number of chunks for the file.
chunk_hash : str
The MD5 hash of the chunk.

Raises
------
HTTPException
If there is an error during the upload process.
"""
logger.info(f"Received chunk {chunk_number} of {total_chunks}")
try:
file_content = await file.read()
hash_match = calculate_md5_checksum(file_content, chunk_hash)
logger.info(f"Hash matches: {hash_match}")

save_chunk(file_content, chunk_number, file.filename)

if chunk_number == total_chunks - 1:
reassemble_file(total_chunks, file.filename)
except Exception as e:
logger.error(f"Error during chunk upload: {e}")
raise HTTPException(status_code=500, detail=str(e)) from e


@router.post("/single")
async def upload_single(
file: Annotated[UploadFile, File()],
file_hash: Annotated[str, Form()],
Cbameron12 marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
"""
Upload a single file.

Parameters
----------
file : UploadFile
The file to be uploaded.
file_hash : str
The MD5 hash of the file.

Raises
------
HTTPException
If there is an error during the upload process.
"""
try:
file_content = await file.read()
logger.info(f"Hash matches: {calculate_md5_checksum(file_content, file_hash)}")

save_file(file)
except Exception as e:
logger.error(f"Error during file upload: {e}")
raise HTTPException(status_code=500, detail=str(e)) from e


@router.get("/files")
async def get_files() -> list[str]:
"""
Get a list of all uploaded files.

Returns
-------
list of str
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
list of str
list[str]

A list of filenames of all uploaded files.
"""
return get_all_filenames()
129 changes: 129 additions & 0 deletions api/utils/upload_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""Helper functions for uploading files and accessing uploaded files."""

from __future__ import annotations

import hashlib
from pathlib import Path

DATA_DIR = Path("/home/ubuntu/janus-api/janus-web/data")


def save_chunk(
file: bytes, chunk_number: int, original_filename: str, directory: Path = DATA_DIR
) -> str:
"""
Save a chunk of a file to the specified directory.

Parameters
----------
file : bytes
The content of the chunk to be saved.
chunk_number : int
The number of the chunk being saved.
original_filename : str
The original filename of the file being chunked.
directory : Path, optional
The directory where the chunk will be saved (default is DATA_DIR).

Returns
-------
str
The path where the chunk was saved.
"""
directory.mkdir(parents=True, exist_ok=True)
chunk_path = directory / f"{original_filename}_chunk_{chunk_number}"
chunk_path.write_bytes(file)
return chunk_path


def reassemble_file(
total_chunks: int, original_filename: str, directory: Path = DATA_DIR
) -> str:
"""
Reassemble a file from its chunks.

Parameters
----------
total_chunks : int
The total number of chunks.
original_filename : str
The original filename of the file being reassembled.
directory : Path, optional
The directory where the chunks are stored (default is DATA_DIR).

Returns
-------
str
The path where the reassembled file was saved.
"""
output_path = directory / original_filename
with output_path.open("wb") as complete_file:
for i in range(total_chunks):
chunk_path = directory / f"{original_filename}_chunk_{i}"
with chunk_path.open("rb") as chunk_file:
complete_file.write(chunk_file.read())
chunk_path.unlink()
return output_path


def save_file(file: bytes, directory: Path = DATA_DIR):
"""
Save a file to the specified directory.

Parameters
----------
file : bytes
The file to be saved.
directory : Path, optional
The directory where the file will be saved (default is DATA_DIR).

Returns
-------
str
The path where the file was saved.
"""
directory.mkdir(parents=True, exist_ok=True)
file_path = directory / file.filename
with file_path.open("wb") as buffer:
buffer.write(file.file.read())
return file_path


def calculate_md5_checksum(file_chunk: bytes, received_hash: str) -> bool:
"""
Calculate the MD5 checksum of a file chunk.

Parameters
----------
file_chunk : bytes
The content of the file chunk.
received_hash : str
The hash calculated before the file was uploaded.

Returns
-------
bool
True if the hash matches.
"""
md5 = hashlib.md5()
md5.update(file_chunk)
calculated_hash = md5.hexdigest()
return calculated_hash == received_hash


def get_all_filenames(directory: Path = DATA_DIR) -> list[str]:
"""
Get a list of all filenames in the data directory.

Parameters
----------
directory : Path
Directory to get the filenames from.

Returns
-------
list of str
A list of filenames in the data directory.
"""
filenames = [str(file.name) for file in directory.iterdir() if file.is_file()]
return filenames if filenames else []
38 changes: 38 additions & 0 deletions logging_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Config setup for janus-api."""

from __future__ import annotations

from logging.config import dictConfig
from pathlib import Path

log_file_path = Path("log/janus-api.log")
log_file_path.parent.mkdir(parents=True, exist_ok=True)

LOGGING_CONFIG = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"default": {
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
},
},
"handlers": {
"file": {
"level": "INFO",
"class": "logging.FileHandler",
"filename": "log/janus-api.log",
"formatter": "default",
},
"console": {
"level": "INFO",
"class": "logging.StreamHandler",
"formatter": "default",
},
},
"root": {
"level": "INFO",
"handlers": ["file", "console"],
},
}

dictConfig(LOGGING_CONFIG)
19 changes: 19 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""Main module for the application."""

from __future__ import annotations

from fastapi import FastAPI

from api.endpoints import upload_route
import logging_config

app = FastAPI()

app.include_router(upload_route.router)

if __name__ == "__main__":
import uvicorn

uvicorn.run(
app, host="0.0.0.0", port=8000, log_config=logging_config.LOGGING_CONFIG
)
85 changes: 85 additions & 0 deletions tests/test_upload_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""Tests for upload helper functions."""

from __future__ import annotations

import hashlib
from pathlib import Path

from api.utils.upload_helper import (
calculate_md5_checksum,
get_all_filenames,
reassemble_file,
save_chunk,
save_file,
)


def test_get_all_filenames(tmp_path):
"""Test if getter returns all of the filenames correctly."""
filenames = ["file1.txt", "file2.txt"]
for filename in filenames:
(tmp_path / filename).write_text("Test content")

result = get_all_filenames(tmp_path)

assert sorted(result) == sorted(filenames)


def test_save_chunk(tmp_path):
"""Test if save chunk function saves a given chunk in the correct directory."""
file_chunk = b"Test chunk data"
chunk_number = 0
original_filename = "testfile.txt"

chunk_path = save_chunk(file_chunk, chunk_number, original_filename, tmp_path)

assert Path(chunk_path).exists()
assert Path(chunk_path).read_bytes() == file_chunk


def test_reassemble_file(tmp_path):
"""Test if file is correctly rebuilt when function called."""
original_filename = "testfile.txt"
total_chunks = 2
file_chunks = [b"Test chunk 1", b"Test chunk 2"]

for i, chunk in enumerate(file_chunks):
save_chunk(chunk, i, original_filename, tmp_path)

reassembled_file_path = reassemble_file(total_chunks, original_filename, tmp_path)

assert Path(reassembled_file_path).exists()
assert Path(reassembled_file_path).read_bytes() == b"".join(file_chunks)


def test_save_file(tmp_path):
"""Test if save file function saves a given file in the correct directory."""
from io import BytesIO

from fastapi import UploadFile

file_content = b"Test file content"
file = UploadFile(filename="testfile.txt", file=BytesIO(file_content))

file_path = save_file(file, tmp_path)

assert Path(file_path).exists()
assert Path(file_path).read_bytes() == file_content


def test_calculate_md5_checksum(tmp_path):
"""Test for checksum check when the hash should match."""
file_chunk = b"Test data for checksum"
received_hash = hashlib.md5(file_chunk).hexdigest()
result = calculate_md5_checksum(file_chunk, received_hash)

assert result


def test_calculate_md5_checksum_mismatch(tmp_path):
"""Test for checksum check when the hash is incorrect."""
file_chunk = b"Test data for checksum"
received_hash = "incorrecthash"
result = calculate_md5_checksum(file_chunk, received_hash)

assert not result
Loading