Merge branch 'main' of github.com:nhsx/NHSSynth

nhsengland · Sep 4, 2023 · eaa4508 · eaa4508
2 parents 5694137 + df96b68
commit eaa4508
Show file tree

Hide file tree

Showing 8 changed files with 832 additions and 798 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -33,6 +33,7 @@ sdmetrics = "^0.11.0"
 tornado = "^6.3.3"
 streamlit = "^1.25.0"
 plotly = "^5.16.1"
+umap-learn = "^0.5.3"
 
 [tool.poetry.scripts]
 nhssynth = "nhssynth.cli.__main__:__main__"

diff --git a/src/nhssynth/modules/dashboard/Upload.py b/src/nhssynth/modules/dashboard/Upload.py
@@ -1,21 +1,37 @@
 import argparse
 import os
 import pickle
+from typing import Any
 
 import pandas as pd
 import streamlit as st
-from nhssynth.modules.dataloader.metatransformer import TypedDataset
-from nhssynth.modules.evaluation.utils import EvalBundle
+from nhssynth.modules.dataloader.io import TypedDataset
+from nhssynth.modules.evaluation.io import Evaluations
+from nhssynth.modules.model.io import Experiments, SyntheticDatasets
 
 
 def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(description="NHSSynth Evaluation Dashboard")
-    parser.add_argument("--evaluation-bundle", type=str, help="Path to an evaluation bundle pickle file.")
+    parser.add_argument("--evaluations", type=str, help="Path to a set of evaluations.")
     parser.add_argument("--experiments", type=str, help="Path to a set of experiments.")
+    parser.add_argument("--synthetic-datasets", type=str, help="Path to a set of synthetic datasets.")
     parser.add_argument("--typed", type=str, help="Path to a typed real dataset.")
     return parser.parse_args()
 
 
+def get_component(name: str, component_type: Any, text: str):
+    uploaded = st.file_uploader(f"Upload a pickle file containing a {text}", type="pkl")
+    if getattr(args, name):
+        with open(os.getcwd() + "/" + getattr(args, name), "rb") as f:
+            loaded = pickle.load(f)
+    if uploaded is not None:
+        loaded = pickle.load(uploaded)
+    if loaded is not None:
+        assert isinstance(loaded, component_type), f"Uploaded file does not contain a {text}!"
+        st.session_state[name] = loaded.contents
+        st.success(f"Loaded {text}!")
+
+
 if __name__ == "__main__":
     args = parse_args()
 
@@ -25,43 +41,15 @@ def parse_args() -> argparse.Namespace:
         "Welcome! Upload an evaluation bundle below to get started (optionally also the typed real dataset and bundle of experiments containing the synthetic datasets).\n\nUse the menu on the left to navigate the dashboard."
     )
 
-    uploaded_eval_bundle = st.file_uploader("Upload a pickle file containing an evaluation bundle", type="pkl")
-    if args.evaluation_bundle:
-        with open(os.getcwd() + "/" + args.evaluation_bundle, "rb") as f:
-            eval_bundle = pickle.load(f)
-    if uploaded_eval_bundle is not None:
-        eval_bundle = pickle.load(uploaded_eval_bundle)
-    if eval_bundle is not None:
-        assert isinstance(eval_bundle, EvalBundle), "Uploaded file does not contain an evaluation bundle!"
-        st.session_state["evaluations"], st.session_state["experiments"] = (
-            eval_bundle.evaluations,
-            eval_bundle.experiments,
-        )
-        st.success(f"Loaded evaluation bundle!")
-
-    uploaded_experiments = st.file_uploader("Upload a pickle file containing a set of experiments", type="pkl")
-    if args.experiments:
-        with open(os.getcwd() + "/" + args.experiments, "rb") as f:
-            experiments = pickle.load(f)
-    if uploaded_experiments is not None:
-        experiments = pickle.load(uploaded_experiments)
-    if experiments is not None:
-        experiments = pd.DataFrame(experiments)
-        assert (
-            "dataset" in experiments.columns and "id" in experiments.columns
-        ), "Uploaded file does not contain a set of experiments!"
-        st.session_state["synthetic_data"] = experiments[["id", "dataset"]]
-        st.success(f"Loaded synthetic datasets from experiments!")
-
-    uploaded_typed = st.file_uploader(
-        "Upload a pickle file containing the typed (by the dataloader module) real dataset", type="pkl"
-    )
-    if args.typed:
-        with open(os.getcwd() + "/" + args.typed, "rb") as f:
-            typed = pickle.load(f)
-    if uploaded_typed is not None:
-        typed = pickle.load(uploaded_typed)
-    if typed is not None:
-        assert isinstance(typed, TypedDataset), "Uploaded file does not contain a typed real dataset!"
-        st.session_state["real_data"] = typed
-        st.success(f"Loaded real dataset!")
+    hide_streamlit_style = """
+    <style>
+    footer {visibility: hidden;}
+    .stDeployButton {visibility: hidden;}
+    </style>
+    """
+    st.markdown(hide_streamlit_style, unsafe_allow_html=True)
+
+    get_component("evaluations", Evaluations, "bundle of evaluations")
+    get_component("experiments", Experiments, "bundle of experiments")
+    get_component("synthetic_datasets", SyntheticDatasets, "bundle of synthetic datasets")
+    get_component("typed", TypedDataset, "typed real dataset")
diff --git a/src/nhssynth/modules/dashboard/io.py b/src/nhssynth/modules/dashboard/io.py
@@ -2,27 +2,38 @@
 
 
 def check_input_paths(
-    dir_experiment: str, fn_dataset: str, fn_typed: str, fn_experiments: str, fn_evaluation_bundle: str
+    dir_experiment: str,
+    fn_dataset: str,
+    fn_typed: str,
+    fn_experiments: str,
+    fn_synthetic_datasets: str,
+    fn_evaluations: str,
 ) -> str:
     """
     Sets up the input and output paths for the model files.
 
     Args:
         dir_experiment: The path to the experiment directory.
         fn_dataset: The base name of the dataset.
-        fn_experiments: The name of the set of experiments.
-        fn_evaluation_bundle: The name of the evaluation bundle file.
+        fn_experiments: The filename of the collection of experiments.
+        fn_synthetic_datasets: The filename of the collection of synthetic datasets.
+        fn_evaluations: The filename of the collection of evaluations.
 
     Returns:
-        The path to output the model.
+        The paths
     """
     fn_dataset = Path(fn_dataset).stem
-    fn_typed, fn_experiments, fn_evaluation_bundle = consistent_endings(
-        [fn_typed, fn_experiments, fn_evaluation_bundle]
+    fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations = consistent_endings(
+        [fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations]
     )
-    fn_typed, fn_experiments, fn_evaluation_bundle = potential_suffixes(
-        [fn_typed, fn_experiments, fn_evaluation_bundle], fn_dataset
+    fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations = potential_suffixes(
+        [fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations], fn_dataset
+    )
+    warn_if_path_supplied([fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations], dir_experiment)
+    check_exists([fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations], dir_experiment)
+    return (
+        dir_experiment / fn_typed,
+        dir_experiment / fn_experiments,
+        dir_experiment / fn_synthetic_datasets,
+        dir_experiment / fn_evaluations,
     )
-    warn_if_path_supplied([fn_typed, fn_experiments, fn_evaluation_bundle], dir_experiment)
-    check_exists([fn_typed, fn_experiments, fn_evaluation_bundle], dir_experiment)
-    return dir_experiment / fn_typed, dir_experiment / fn_experiments, dir_experiment / fn_evaluation_bundle