Skip to content

Commit

Permalink
Merge branch 'main' of github.com:nhsx/NHSSynth
Browse files Browse the repository at this point in the history
  • Loading branch information
HarrisonWilde committed Sep 4, 2023
2 parents 5694137 + df96b68 commit eaa4508
Show file tree
Hide file tree
Showing 8 changed files with 832 additions and 798 deletions.
1,012 changes: 487 additions & 525 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ sdmetrics = "^0.11.0"
tornado = "^6.3.3"
streamlit = "^1.25.0"
plotly = "^5.16.1"
umap-learn = "^0.5.3"

[tool.poetry.scripts]
nhssynth = "nhssynth.cli.__main__:__main__"
Expand Down
74 changes: 31 additions & 43 deletions src/nhssynth/modules/dashboard/Upload.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,37 @@
import argparse
import os
import pickle
from typing import Any

import pandas as pd
import streamlit as st
from nhssynth.modules.dataloader.metatransformer import TypedDataset
from nhssynth.modules.evaluation.utils import EvalBundle
from nhssynth.modules.dataloader.io import TypedDataset
from nhssynth.modules.evaluation.io import Evaluations
from nhssynth.modules.model.io import Experiments, SyntheticDatasets


def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="NHSSynth Evaluation Dashboard")
parser.add_argument("--evaluation-bundle", type=str, help="Path to an evaluation bundle pickle file.")
parser.add_argument("--evaluations", type=str, help="Path to a set of evaluations.")
parser.add_argument("--experiments", type=str, help="Path to a set of experiments.")
parser.add_argument("--synthetic-datasets", type=str, help="Path to a set of synthetic datasets.")
parser.add_argument("--typed", type=str, help="Path to a typed real dataset.")
return parser.parse_args()


def get_component(name: str, component_type: Any, text: str):
uploaded = st.file_uploader(f"Upload a pickle file containing a {text}", type="pkl")
if getattr(args, name):
with open(os.getcwd() + "/" + getattr(args, name), "rb") as f:
loaded = pickle.load(f)
if uploaded is not None:
loaded = pickle.load(uploaded)
if loaded is not None:
assert isinstance(loaded, component_type), f"Uploaded file does not contain a {text}!"
st.session_state[name] = loaded.contents
st.success(f"Loaded {text}!")


if __name__ == "__main__":
args = parse_args()

Expand All @@ -25,43 +41,15 @@ def parse_args() -> argparse.Namespace:
"Welcome! Upload an evaluation bundle below to get started (optionally also the typed real dataset and bundle of experiments containing the synthetic datasets).\n\nUse the menu on the left to navigate the dashboard."
)

uploaded_eval_bundle = st.file_uploader("Upload a pickle file containing an evaluation bundle", type="pkl")
if args.evaluation_bundle:
with open(os.getcwd() + "/" + args.evaluation_bundle, "rb") as f:
eval_bundle = pickle.load(f)
if uploaded_eval_bundle is not None:
eval_bundle = pickle.load(uploaded_eval_bundle)
if eval_bundle is not None:
assert isinstance(eval_bundle, EvalBundle), "Uploaded file does not contain an evaluation bundle!"
st.session_state["evaluations"], st.session_state["experiments"] = (
eval_bundle.evaluations,
eval_bundle.experiments,
)
st.success(f"Loaded evaluation bundle!")

uploaded_experiments = st.file_uploader("Upload a pickle file containing a set of experiments", type="pkl")
if args.experiments:
with open(os.getcwd() + "/" + args.experiments, "rb") as f:
experiments = pickle.load(f)
if uploaded_experiments is not None:
experiments = pickle.load(uploaded_experiments)
if experiments is not None:
experiments = pd.DataFrame(experiments)
assert (
"dataset" in experiments.columns and "id" in experiments.columns
), "Uploaded file does not contain a set of experiments!"
st.session_state["synthetic_data"] = experiments[["id", "dataset"]]
st.success(f"Loaded synthetic datasets from experiments!")

uploaded_typed = st.file_uploader(
"Upload a pickle file containing the typed (by the dataloader module) real dataset", type="pkl"
)
if args.typed:
with open(os.getcwd() + "/" + args.typed, "rb") as f:
typed = pickle.load(f)
if uploaded_typed is not None:
typed = pickle.load(uploaded_typed)
if typed is not None:
assert isinstance(typed, TypedDataset), "Uploaded file does not contain a typed real dataset!"
st.session_state["real_data"] = typed
st.success(f"Loaded real dataset!")
hide_streamlit_style = """
<style>
footer {visibility: hidden;}
.stDeployButton {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)

get_component("evaluations", Evaluations, "bundle of evaluations")
get_component("experiments", Experiments, "bundle of experiments")
get_component("synthetic_datasets", SyntheticDatasets, "bundle of synthetic datasets")
get_component("typed", TypedDataset, "typed real dataset")
33 changes: 22 additions & 11 deletions src/nhssynth/modules/dashboard/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,38 @@


def check_input_paths(
dir_experiment: str, fn_dataset: str, fn_typed: str, fn_experiments: str, fn_evaluation_bundle: str
dir_experiment: str,
fn_dataset: str,
fn_typed: str,
fn_experiments: str,
fn_synthetic_datasets: str,
fn_evaluations: str,
) -> str:
"""
Sets up the input and output paths for the model files.
Args:
dir_experiment: The path to the experiment directory.
fn_dataset: The base name of the dataset.
fn_experiments: The name of the set of experiments.
fn_evaluation_bundle: The name of the evaluation bundle file.
fn_experiments: The filename of the collection of experiments.
fn_synthetic_datasets: The filename of the collection of synthetic datasets.
fn_evaluations: The filename of the collection of evaluations.
Returns:
The path to output the model.
The paths
"""
fn_dataset = Path(fn_dataset).stem
fn_typed, fn_experiments, fn_evaluation_bundle = consistent_endings(
[fn_typed, fn_experiments, fn_evaluation_bundle]
fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations = consistent_endings(
[fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations]
)
fn_typed, fn_experiments, fn_evaluation_bundle = potential_suffixes(
[fn_typed, fn_experiments, fn_evaluation_bundle], fn_dataset
fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations = potential_suffixes(
[fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations], fn_dataset
)
warn_if_path_supplied([fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations], dir_experiment)
check_exists([fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations], dir_experiment)
return (
dir_experiment / fn_typed,
dir_experiment / fn_experiments,
dir_experiment / fn_synthetic_datasets,
dir_experiment / fn_evaluations,
)
warn_if_path_supplied([fn_typed, fn_experiments, fn_evaluation_bundle], dir_experiment)
check_exists([fn_typed, fn_experiments, fn_evaluation_bundle], dir_experiment)
return dir_experiment / fn_typed, dir_experiment / fn_experiments, dir_experiment / fn_evaluation_bundle
Loading

0 comments on commit eaa4508

Please sign in to comment.