Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bacpop-207 Add info_csv file for metadata #54

Merged
merged 24 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
ae2bc78
test: small csv from nicj into microreact
absternator Dec 9, 2024
2f89e6f
feat: add db_metadata_file field to args.json for enhanced configuration
absternator Jan 7, 2025
be7808a
{commit_message}
absternator Jan 8, 2025
7316728
test: add unit test for DatabaseFileStore metadata handling
absternator Jan 9, 2025
652ee21
style: format code for better readability in utils.py and test_unit.py
absternator Jan 9, 2025
65f2155
style: improve code readability in build_subgraph function
absternator Jan 9, 2025
6d4ca06
Merge branch 'bacpop-205-fix-network' of https://github.com/bacpop/be…
absternator Jan 9, 2025
cf52a3f
Merge branch 'bacpop-205-fix-network' of https://github.com/bacpop/be…
absternator Jan 9, 2025
ecefc1b
Merge branch 'bacpop-205-fix-network' of https://github.com/bacpop/be…
absternator Jan 9, 2025
4831d26
Merge branch 'bacpop-205-fix-network' of https://github.com/bacpop/be…
absternator Jan 9, 2025
f5136c2
Merge branch 'bacpop-205-fix-network' of https://github.com/bacpop/be…
absternator Jan 10, 2025
743d5a7
style: clean up whitespace in test_unit.py
absternator Jan 10, 2025
705983f
feat: add timestamp to cluster name in microreact metadata
absternator Jan 10, 2025
716e87e
style: format long line in generate_microreact_url_internal function
absternator Jan 10, 2025
45abf25
Merge branch 'bacpop-205-fix-network' of https://github.com/bacpop/be…
absternator Jan 13, 2025
a8d984a
Merge branch 'bacpop-205-fix-network' of https://github.com/bacpop/be…
absternator Jan 14, 2025
4d467b9
Merge branch 'bacpop-205-fix-network' of https://github.com/bacpop/be…
absternator Jan 14, 2025
e344aee
fix: update species_args type to Any in setup_db_file_stores function
absternator Jan 14, 2025
cc18973
test: update URL assertion in test_results_microreact for cluster mat…
absternator Jan 14, 2025
39020b9
refactor: introduce dataClasses for ClusteringConfig and SpeciesConfi…
absternator Jan 14, 2025
f379993
style: add blank lines for improved readability in assignClusters and…
absternator Jan 14, 2025
ae26808
Merge branch 'bacpop-186-v9-db-support' of https://github.com/bacpop/…
absternator Jan 14, 2025
e300688
Merge branch 'bacpop-186-v9-db-support' of https://github.com/bacpop/…
absternator Jan 15, 2025
4b8662c
Merge branch 'bacpop-186-v9-db-support' of https://github.com/bacpop/…
absternator Jan 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion beebop/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@
import json
import requests
import pickle
from datetime import datetime
from typing import Any

from beebop import versions, assignClusters, visualise
from beebop.filestore import PoppunkFileStore, DatabaseFileStore
from beebop.utils import get_args, get_cluster_num
from PopPUNK.sketchlib import getKmersFromReferenceDatabase
import beebop.schemas
from beebop.dataClasses import SpeciesConfig
schemas = beebop.schemas.Schema()

redis_host = os.environ.get("REDIS_HOST")
Expand Down Expand Up @@ -326,7 +329,7 @@ def run_poppunk_internal(sketches: dict,


def setup_db_file_stores(
species_args: dict,
species_args: SpeciesConfig,
) -> tuple[DatabaseFileStore, DatabaseFileStore]:
"""
[Initializes the reference and full database file stores
Expand All @@ -340,12 +343,14 @@ def setup_db_file_stores(
ref_db_fs = DatabaseFileStore(
f"{dbs_location}/{species_args.refdb}",
species_args.external_clusters_file,
species_args.db_metadata_file,
)

if os.path.exists(f"{dbs_location}/{species_args.fulldb}"):
full_db_fs = DatabaseFileStore(
f"{dbs_location}/{species_args.fulldb}",
species_args.external_clusters_file,
species_args.db_metadata_file,
)
else:
full_db_fs = ref_db_fs
Expand Down Expand Up @@ -595,6 +600,9 @@ def generate_microreact_url_internal(microreact_api_new_url: str,
with open(path_json, 'rb') as microreact_file:
json_microreact = json.load(microreact_file)

json_microreact["meta"][
"name"
] = f"Cluster {cluster_num} - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
# generate URL from microreact API
headers = {"Content-type": "application/json; charset=UTF-8",
"Access-Token": api_token}
Expand Down
16 changes: 1 addition & 15 deletions beebop/assignClusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
from beebop.poppunkWrapper import PoppunkWrapper
from beebop.filestore import PoppunkFileStore, DatabaseFileStore
import shutil
from typing import Optional, Any
from dataclasses import dataclass
from beebop.dataClasses import ClusteringConfig


def hex_to_decimal(sketches_dict) -> None:
Expand All @@ -33,19 +32,6 @@ def hex_to_decimal(sketches_dict) -> None:
sample[key] = list(map(lambda x: int(x, 16), value))


@dataclass
class ClusteringConfig:
species: str
p_hash: str
args: dict
external_clusters_prefix: Optional[str]
fs: PoppunkFileStore
full_db_fs: DatabaseFileStore
ref_db_fs: DatabaseFileStore
db_funcs: dict[str, Any]
out_dir: str


def get_clusters(
hashes_list: list,
p_hash: str,
Expand Down
42 changes: 42 additions & 0 deletions beebop/dataClasses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from dataclasses import dataclass
from typing import Optional, Any
from beebop.filestore import PoppunkFileStore, DatabaseFileStore


@dataclass
class ClusteringConfig:
species: str
p_hash: str
args: dict
external_clusters_prefix: Optional[str]
fs: PoppunkFileStore
full_db_fs: DatabaseFileStore
ref_db_fs: DatabaseFileStore
db_funcs: dict[str, Any]
out_dir: str


@dataclass
class Qc:
run_qc: bool
type_isolate: Optional[str]
max_a_dist: float
max_pi_dist: float
prop_zero: float
prop_n: float
max_merge: int
betweenness: bool
retain_failures: bool
no_remove: bool
length_range: list[int]
upper_n: Optional[int]


@dataclass
class SpeciesConfig:
refdb: str
fulldb: str
external_cluster_prefix: str
external_clusters_file: str
db_metadata_file: str
qc_dict: Qc
10 changes: 9 additions & 1 deletion beebop/filestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,10 @@ class DatabaseFileStore:
"""

def __init__(
self, full_path: str, external_clusters_file: Optional[str] = None
self,
full_path: str,
external_clusters_file: Optional[str] = None,
db_metadata_file: Optional[str] = None,
):
"""
:param full_path: [path to database]
Expand All @@ -311,3 +314,8 @@ def __init__(
if external_clusters_file
else None
)
self.metadata = (
str(PurePath("beebop", "resources", db_metadata_file))
if db_metadata_file
else None
)
4 changes: 2 additions & 2 deletions beebop/poppunkWrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ def create_microreact(self, cluster: str, internal_cluster: str) -> None:
previous_distances=None,
network_file=None,
gpu_graph=self.args.visualise.gpu_graph,
info_csv=self.args.visualise.info_csv,
rapidnj=shutil.which("rapidnj"),
info_csv=self.db_fs.metadata,
rapidnj=shutil.which('rapidnj'),
api_key=None,
tree=self.args.visualise.tree,
mst_distances=self.args.visualise.mst_distances,
Expand Down
Loading
Loading