From c20ec3545714deb085ce69c92592f28422bd62b0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 22 May 2024 01:25:44 +0000
Subject: [PATCH 01/22] --- updated-dependencies: - dependency-name: requests  
 dependency-type: direct:production ...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .binder/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.binder/requirements.txt b/.binder/requirements.txt
index 050c27af1..e00dd4ac9 100644
--- a/.binder/requirements.txt
+++ b/.binder/requirements.txt
@@ -281,7 +281,7 @@ referencing==0.34.0
     #   jsonschema
     #   jsonschema-specifications
     #   jupyter-events
-requests==2.31.0
+requests==2.32.2
     # via jupyterlab-server
 rfc3339-validator==0.1.4
     # via

From eda9e93343a6ce2b8ad38856f688db6bfdb1c050 Mon Sep 17 00:00:00 2001
From: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>
Date: Wed, 22 May 2024 23:38:10 +1000
Subject: [PATCH 02/22] Readme capitalise HDF5 (#415)

Signed-off-by: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 88481f41d..bc56c1a8e 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ Here is a **curated selection** of the metrics, tools and statistical tests incl
 
 `scores` not only includes common scores (e.g. MAE, RMSE), it includes novel scores not commonly found elsewhere (e.g. FIRM, Flip-Flop Index), complex scores (e.g. threshold weighted CRPS), and statistical tests (such as the Diebold Mariano test). Additionally, it provides pre-processing tools for preparing data for scores in a variety of formats including cumulative distribution functions (CDF). `scores` provides its own implementations where relevant to avoid extensive dependencies.
 
-`scores` primarily supports xarray datatypes for Earth system data allowing it to work with NetCDF4, hdf5, Zarr and GRIB data sources among others. `scores` uses Dask for scaling and performance. Some metrics work with pandas and we will aim to expand this capability. 
+`scores` primarily supports xarray datatypes for Earth system data allowing it to work with NetCDF4, HDF5, Zarr and GRIB data sources among others. `scores` uses Dask for scaling and performance. Some metrics work with pandas and we will aim to expand this capability. 
 
 All of the scores and metrics in this package have undergone a thorough scientific review. Every score has a companion Jupyter Notebook tutorial that demonstrates its use in practice.
 

From ec877a2582c9c715530953300e7f8c5c132f17fd Mon Sep 17 00:00:00 2001
From: Tennessee Leeuwenburg <tennessee.leeuwenburg@bom.gov.au>
Date: Sat, 25 May 2024 22:37:48 +1000
Subject: [PATCH 03/22] Remove FAR acronym as it's better known as the
 abbreviation for False Alarm Ratio

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index bc56c1a8e..01ae90f8d 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ Here is a **curated selection** of the metrics, tools and statistical tests incl
 |-----------------------	|-----------------	|--------------	|
 | **[Continuous](https://scores.readthedocs.io/en/latest/included.html#continuous)**        	|Scores for evaluating single-valued continuous forecasts.                  	|Mean Absolute Error (MAE), Mean Squared Error (MSE), Root Mean Squared Error (RMSE), Additive Bias, Multiplicative Bias, Pearson's Correlation Coefficient, Flip-Flop Index, Quantile loss, Murphy score.              	|
 | **[Probability](https://scores.readthedocs.io/en/latest/included.html#probability)**       	|Scores for evaluating forecasts that are expressed as predictive distributions, ensembles, and probabilities of binary events.                 	|Brier Score, Continuous Ranked Probability Score (CRPS) for Cumulative Density Function (CDF), Threshold weighted CRPS for CDF, CRPS for ensembles, Receiver Operating Characteristic (ROC), Isotonic Regression (reliability diagrams).              	|
-| **[Categorical](https://scores.readthedocs.io/en/latest/included.html#categorical)**       	|Scores for evaluating forecasts based on categories.                	|Probability of Detection (POD), False Alarm Rate (FAR), Probability of False Detection (POFD), Success Ratio, Accuracy, Peirce's Skill Score, Critical Success Index (CSI), Gilbert Skill Score, Heidke Skill Score, Odds Ratio, Odds Ratio Skill Score, F1 score, FIxed Risk Multicategorical (FIRM) Score.               	|
+| **[Categorical](https://scores.readthedocs.io/en/latest/included.html#categorical)**       	|Scores for evaluating forecasts based on categories.                	|Probability of Detection (POD), False Alarm Rate, Probability of False Detection (POFD), Success Ratio, Accuracy, Peirce's Skill Score, Critical Success Index (CSI), Gilbert Skill Score, Heidke Skill Score, Odds Ratio, Odds Ratio Skill Score, F1 score, FIxed Risk Multicategorical (FIRM) Score.               	|
 | **[Statistical Tests](https://scores.readthedocs.io/en/latest/included.html#statistical-tests)** 	|Tools to conduct statistical tests and generate confidence intervals.                 	|Diebold Mariano.              	|
 | **[Processing Tools](https://scores.readthedocs.io/en/latest/included.html#processing-tools-for-preparing-data)**        	|Tools to pre-process data.                 	|Data matching, Discretisation, Cumulative Density Function Manipulation.              	|
 

From ebd06a5a491f865dadd932a83f409c53ff700989 Mon Sep 17 00:00:00 2001
From: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>
Date: Thu, 30 May 2024 19:57:00 +1000
Subject: [PATCH 04/22] Adding reference to Gilbert Skill Score  & Equitable
 Threat Score docstrings (#420)

* Adding reference to Gilbert Skill Score

* Ran linting over code
---
 src/scores/categorical/contingency_impl.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/scores/categorical/contingency_impl.py b/src/scores/categorical/contingency_impl.py
index 964cd676c..97dfa468a 100644
--- a/src/scores/categorical/contingency_impl.py
+++ b/src/scores/categorical/contingency_impl.py
@@ -313,9 +313,14 @@ def equitable_threat_score(self):
 
         Range: -1/3 to 1, 0 indicates no skill. Perfect score: 1.
 
+
+        References:
+
+        Gilbert, G.K., 1884. Finley’s tornado predictions. American Meteorological Journal, 1(5), pp.166–172.
+
         Hogan, R.J., Ferro, C.A., Jolliffe, I.T. and Stephenson, D.B., 2010.
         Equitability revisited: Why the “equitable threat score” is not equitable.
-        Weather and Forecasting, 25(2), pp.710-726.
+        Weather and Forecasting, 25(2), pp.710-726. https://doi.org/10.1175/2009WAF2222350.1
         """
         cd = self.counts
         hits_random = (cd["tp_count"] + cd["fn_count"]) * (cd["tp_count"] + cd["fp_count"]) / cd["total_count"]
@@ -332,9 +337,13 @@ def gilberts_skill_score(self):
 
         Range: -1/3 to 1, 0 indicates no skill. Perfect score: 1.
 
+        References:
+
+        Gilbert, G.K., 1884. Finley’s tornado predictions. American Meteorological Journal, 1(5), pp.166–172.
+
         Hogan, R.J., Ferro, C.A., Jolliffe, I.T. and Stephenson, D.B., 2010.
         Equitability revisited: Why the “equitable threat score” is not equitable.
-        Weather and Forecasting, 25(2), pp.710-726.
+        Weather and Forecasting, 25(2), pp.710-726. https://doi.org/10.1175/2009WAF2222350.1
         """
         return self.equitable_threat_score()
 

From 00c8f842c3b7332da668950a5633361dccef7ef1 Mon Sep 17 00:00:00 2001
From: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>
Date: Fri, 31 May 2024 15:57:28 +1000
Subject: [PATCH 05/22] Updates maintainer.md to include information about
 rendering in readthedocs (#427)

---
 docs/maintainer.md | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/docs/maintainer.md b/docs/maintainer.md
index 5dba10c18..540c93ebb 100644
--- a/docs/maintainer.md
+++ b/docs/maintainer.md
@@ -77,3 +77,43 @@ Information relevant for package maintenance
 |  api.md                  |  a new function is added | each function must be added individually 
 |  included.md             |  a new function is added | each function (and each variation of the function name) must be added individually
 |  Explanation.ipynb       |  a new tutorial is added | navigation throughout the docs
+
+## This section covers checking the documentation renders properly in readthedocs
+
+### What documentation needs checking in readthedocs
+
+Each time an existing function is modified or a new function is added to `scores`, the rendering in readthedocs for any modified or newly created documentation must be checked. 
+
+This applies to each of the following documents:
+
+  - included.md
+  - API Documentation
+  - Tutorials (see also [tutorial rendering](#Tutorial-rendering) further below)
+  - (If applicable) README
+
+### Common rendering issues in readthedocs
+
+Frequent issues include:
+
+- Lists (including lists that use bullets, dot points, hyphens, numbers, letters etc.)
+  - Check **each** list appears and renders properly
+  - Check **all** indented lists/sub-lists for proper indentation
+- Figures: check **each** figure appears and renders properly
+- Plots: check **each** plot appears and renders properly
+- Tables: check **each** table appears and renders properly
+- Formulae: check **each** formula appears and renders properly
+
+### Tutorial rendering
+
+Things that render well in JupyterLab do not always render properly in readthedocs. Additionally, fixes that work well when built locally, don't always work when merged into the codebase. 
+
+To check the rendering of tutorials in readthedocs:
+  - Compare the tutorial in readthedocs against a version running in JupyterLab (as not everything renders in GitHub).
+  - Check the entirety of the tutorial (sometimes things will render properly in one section, while not rendering properly in a different section of the same tutorial).
+  - If you make any changes to the code cells, re-execute the Notebook in JupyterLab before committing, otherwise some things (e.g. some plots) won't render in readthedocs. Then re-check the tutorial in readthedocs to ensure the tutorial is still rendering properly.
+
+Ideally, also check the tutorial renders properly in nbviewer (there is a link at the top of each tutorial page in readthedocs).
+
+
+
+

From 2bb4d6fe69a16d0a53c0d5523b28443c3e5955ba Mon Sep 17 00:00:00 2001
From: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>
Date: Sun, 2 Jun 2024 11:41:21 +1000
Subject: [PATCH 06/22] Update Contributing Guide link in README.md (#431)

Signed-off-by: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 01ae90f8d..4c921de46 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@ Here is a **curated selection** of the metrics, tools and statistical tests incl
 All of the scores and metrics in this package have undergone a thorough scientific review. Every score has a companion Jupyter Notebook tutorial that demonstrates its use in practice.
 
 ## Contributing
-To find out more about contributing, see our [contributor's guide](https://github.com/nci/scores/blob/main/docs/contributing.md).
+To find out more about contributing, see our [Contributing Guide](https://scores.readthedocs.io/en/latest/contributing.html).
 
 All interactions in discussions, issues, emails and code (e.g. merge requests, code comments) will be managed according to the expectations outlined in the [ code of conduct ](https://github.com/nci/scores/blob/main/CODE_OF_CONDUCT.md) and in accordance with all relevant laws and obligations. This project is an inclusive, respectful and open project with high standards for respectful behaviour and language. The code of conduct is the Contributor Covenant, adopted by over 40,000 open source projects. Any concerns will be dealt with fairly and respectfully, with the processes described in the code of conduct.
 

From 28717b73dd847e46e386b6de3a6525c88b1181e7 Mon Sep 17 00:00:00 2001
From: Tennessee Leeuwenburg <tennessee.leeuwenburg@bom.gov.au>
Date: Sun, 2 Jun 2024 12:19:19 +1000
Subject: [PATCH 07/22] Remove un-necessary mkdocs yaml file

---
 mkdocs.yml | 17 -----------------
 1 file changed, 17 deletions(-)
 delete mode 100644 mkdocs.yml

diff --git a/mkdocs.yml b/mkdocs.yml
deleted file mode 100644
index 80824d392..000000000
--- a/mkdocs.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-site_name: Scores Verification Package
-
-plugins:
-  - mkdocstrings
-
-
-theme:
-    name: readthedocs
-    highlightjs: true
-
-nav:
-  - Documentation Index: index.md  
-  - API reference: api.md
-  - User Guide: userguide.md
-  - Data Guide: data.md
-  - Contributor Guide: contributing.md
-  
\ No newline at end of file

From 4bb556454053a3f8e468399470416a4541cc94ce Mon Sep 17 00:00:00 2001
From: Tennessee Leeuwenburg <134973832+tennlee@users.noreply.github.com>
Date: Sun, 2 Jun 2024 12:44:18 +1000
Subject: [PATCH 08/22] Update download links for data fetching (#434)

---
 tutorials/First_Data_Fetching.ipynb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tutorials/First_Data_Fetching.ipynb b/tutorials/First_Data_Fetching.ipynb
index 2648fad47..be91f114e 100644
--- a/tutorials/First_Data_Fetching.ipynb
+++ b/tutorials/First_Data_Fetching.ipynb
@@ -105,9 +105,9 @@
    },
    "outputs": [],
    "source": [
-    "forecast_url = 'https://dapds00.nci.org.au/thredds/fileServer/wr45/ops_aps3/access-g/1/20221120/0000/fc/sfc/temp_scrn.nc'\n",
+    "forecast_url = 'https://thredds.nci.org.au/thredds/fileServer/wr45/ops_aps3/access-g/1/20221120/0000/fc/sfc/temp_scrn.nc'\n",
     "forecast_hash = '7956d95ea3a7edee2a01c989b1f9e089199da5b1924b4c2d4611088713fbcb44'  # Recorded on 13/5/2023\n",
-    "analysis_url = 'https://dapds00.nci.org.au/thredds/fileServer/wr45/ops_aps3/access-g/1/20221124/0000/an/sfc/temp_scrn.nc'\n",
+    "analysis_url = 'https://thredds.nci.org.au/thredds/fileServer/wr45/ops_aps3/access-g/1/20221124/0000/an/sfc/temp_scrn.nc'\n",
     "analysis_hash = '163c5de55e721ad2a76518242120044bedfec805e3397cfb0008435521630042'  # Recorded on 13/5/2023"
    ]
   },
@@ -3011,7 +3011,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,

From ccd83700ad2f3e46810e6cf1549d0774027fbd03 Mon Sep 17 00:00:00 2001
From: Tennessee Leeuwenburg <tennessee.leeuwenburg@bom.gov.au>
Date: Thu, 16 May 2024 17:45:17 +1000
Subject: [PATCH 09/22] Set up versioning for release 0.8.1

---
 docs/conf.py           | 2 +-
 src/scores/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 6cddeff3f..b6f9d35bf 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -9,7 +9,7 @@
 
 project = "scores"
 copyright = "Licensed under Apache 2.0 - https://www.apache.org/licenses/LICENSE-2.0"
-release = "0.9"
+release = "0.8.1"
 
 version = __version__
 
diff --git a/src/scores/__init__.py b/src/scores/__init__.py
index 88cf80ab7..5da442923 100644
--- a/src/scores/__init__.py
+++ b/src/scores/__init__.py
@@ -13,7 +13,7 @@
 import scores.sample_data
 import scores.stats.statistical_tests  # noqa: F401
 
-__version__ = "0.9"
+__version__ = "0.8.1"
 
 __all__ = [
     "scores.categorical",

From d069aaa7ae39057bd1207cebc6eec739612eeafc Mon Sep 17 00:00:00 2001
From: Tennessee Leeuwenburg <tennessee.leeuwenburg@bom.gov.au>
Date: Tue, 21 May 2024 11:33:37 +1000
Subject: [PATCH 10/22] Update version for minor release update

---
 docs/conf.py           | 2 +-
 src/scores/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index b6f9d35bf..567c628c3 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -9,7 +9,7 @@
 
 project = "scores"
 copyright = "Licensed under Apache 2.0 - https://www.apache.org/licenses/LICENSE-2.0"
-release = "0.8.1"
+release = "0.8.2"
 
 version = __version__
 
diff --git a/src/scores/__init__.py b/src/scores/__init__.py
index 5da442923..85a636a18 100644
--- a/src/scores/__init__.py
+++ b/src/scores/__init__.py
@@ -13,7 +13,7 @@
 import scores.sample_data
 import scores.stats.statistical_tests  # noqa: F401
 
-__version__ = "0.8.1"
+__version__ = "0.8.2"
 
 __all__ = [
     "scores.categorical",

From 73c0c1ed7db4addd5649d20fc7e4322bb3e03d55 Mon Sep 17 00:00:00 2001
From: Tennessee Leeuwenburg <134973832+tennlee@users.noreply.github.com>
Date: Sun, 2 Jun 2024 17:21:12 +1000
Subject: [PATCH 11/22] Correction to Peirce Skill Score formula (#439)

---
 docs/conf.py                               | 2 +-
 src/scores/__init__.py                     | 2 +-
 src/scores/categorical/contingency_impl.py | 2 +-
 tests/categorical/test_contingency.py      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 567c628c3..f582d6848 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -9,7 +9,7 @@
 
 project = "scores"
 copyright = "Licensed under Apache 2.0 - https://www.apache.org/licenses/LICENSE-2.0"
-release = "0.8.2"
+release = "0.8.3"
 
 version = __version__
 
diff --git a/src/scores/__init__.py b/src/scores/__init__.py
index 85a636a18..40601b4d8 100644
--- a/src/scores/__init__.py
+++ b/src/scores/__init__.py
@@ -13,7 +13,7 @@
 import scores.sample_data
 import scores.stats.statistical_tests  # noqa: F401
 
-__version__ = "0.8.2"
+__version__ = "0.8.3"
 
 __all__ = [
     "scores.categorical",
diff --git a/src/scores/categorical/contingency_impl.py b/src/scores/categorical/contingency_impl.py
index 97dfa468a..fbd390ae3 100644
--- a/src/scores/categorical/contingency_impl.py
+++ b/src/scores/categorical/contingency_impl.py
@@ -241,7 +241,7 @@ def peirce_skill_score(self):
         """
         cd = self.counts
         component_a = cd["tp_count"] / (cd["tp_count"] + cd["fn_count"])
-        component_b = cd["fn_count"] / (cd["fn_count"] + cd["tn_count"])
+        component_b = cd["fp_count"] / (cd["fp_count"] + cd["tn_count"])
         skill_score = component_a - component_b
         return skill_score
 
diff --git a/tests/categorical/test_contingency.py b/tests/categorical/test_contingency.py
index 7fc68955a..668a34ce7 100644
--- a/tests/categorical/test_contingency.py
+++ b/tests/categorical/test_contingency.py
@@ -202,7 +202,7 @@ def test_categorical_table():
     assert table.yules_q() == table.odds_ratio_skill_score()
 
     peirce_component_a = 9 / (9 + 1)
-    peirce_component_b = 1 / (1 + 6)
+    peirce_component_b = 2 / (2 + 6)
     peirce_expected = peirce_component_a - peirce_component_b
     assert table.peirce_skill_score() == peirce_expected
 

From 8101c92a8a6c9a7a10b8f995a07f6573c85a2cad Mon Sep 17 00:00:00 2001
From: Tennessee Leeuwenburg <134973832+tennlee@users.noreply.github.com>
Date: Mon, 3 Jun 2024 09:40:14 +1000
Subject: [PATCH 12/22] Removed text (looks like it was a copy/paste error
 originally) (#435)

* Removed text (looks like it was a copy/paste error originally)
* Remove reference to Flip Flop Index entirely - this method
does not appear to be specialised for the Flip Flop Index and it
is unclear to me why it was even mentioned

* Update src/scores/processing/discretise.py

Co-authored-by: Nicholas Loveday <48701367+nicholasloveday@users.noreply.github.com>
Signed-off-by: Tennessee Leeuwenburg <134973832+tennlee@users.noreply.github.com>
---
 src/scores/processing/discretise.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/scores/processing/discretise.py b/src/scores/processing/discretise.py
index 028ef049d..f68ee5070 100644
--- a/src/scores/processing/discretise.py
+++ b/src/scores/processing/discretise.py
@@ -188,9 +188,8 @@ def proportion_exceeding(
     Args:
         data (xarray.Dataset or xarray.DataArray): The data from which
             to calculate the proportion exceeding `thresholds`
-        thresholds (iterable): The proportion of Flip-Flop Index results
+        thresholds (iterable): The proportion of values
             equal to or exceeding these thresholds will be calculated.
-            the Flip-Flop Index.
         reduce_dims: Dimensions to reduce.
         preserve_dims: Dimensions to preserve.
 
@@ -222,9 +221,8 @@ def binary_discretise_proportion(
         data: The data to convert
            into 0 and 1 according the thresholds before calculating the
            proportion.
-        thresholds: The proportion of Flip-Flop Index results
+        thresholds: The proportion of values
             equal to or exceeding these thresholds will be calculated.
-            the Flip-Flop Index.
         mode: Specifies the required relation of `data` to `thresholds`
             for a value to fall in the 'event' category (i.e. assigned to 1).
             Allowed modes are:

From 8753623a0a0cebbc7b1e297e7069d32fa1c75ca3 Mon Sep 17 00:00:00 2001
From: Tennessee Leeuwenburg <134973832+tennlee@users.noreply.github.com>
Date: Mon, 3 Jun 2024 12:01:50 +1000
Subject: [PATCH 13/22] (one of five) Add mathjax and improved docstrings to
 several scores (#441)

* Add mathjax and improved docstrings to several scores
* Add return type hints and make them more specific
---
 src/scores/categorical/contingency_impl.py | 100 +++++++++++++++++----
 1 file changed, 83 insertions(+), 17 deletions(-)

diff --git a/src/scores/categorical/contingency_impl.py b/src/scores/categorical/contingency_impl.py
index fbd390ae3..003b2855f 100644
--- a/src/scores/categorical/contingency_impl.py
+++ b/src/scores/categorical/contingency_impl.py
@@ -26,7 +26,7 @@
 import xarray as xr
 
 import scores.utils
-from scores.typing import FlexibleArrayType, FlexibleDimensionTypes
+from scores.typing import FlexibleArrayType, FlexibleDimensionTypes, XarrayLike
 
 DEFAULT_PRECISION = 8
 
@@ -90,22 +90,49 @@ def get_table(self):
         """
         return self.xr_table
 
-    def accuracy(self):
+    def accuracy(self) -> xr.DataArray:
         """
-        The proportion of forecasts which are true
+        Accuracy calculates the proportion of forecasts which are true.
 
-        https://www.cawcr.gov.au/projects/verification/#ACC
+        Returns:
+            xr.DataArray: A DataArray containing the accuracy score
+
+        .. math::
+            \\text{accuracy} = \\frac{\\text{true positives} + \\text{true negatives}}{\\text{total count}}
+
+        Notes:
+
+            - Range: 0 to 1, where 0 indicates no skill, and 1 indicates a perfect score.
+            - True positives is the same at hits
+            - False negatives is the same as misses
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#ACC
         """
         count_dictionary = self.counts
         correct_count = count_dictionary["tp_count"] + count_dictionary["tn_count"]
         ratio = correct_count / count_dictionary["total_count"]
         return ratio
 
-    def frequency_bias(self):
+    def frequency_bias(self) -> xr.DataArray:
         """
         How did the forecast frequency of "yes" events compare to the observed frequency of "yes" events?
 
-        https://www.cawcr.gov.au/projects/verification/#BIAS
+        Returns:
+            xr.DataAray: An xarray object containing the frequency bias
+
+        .. math::
+            \\text{frequency bias} = \\frac{\\text{true positives} + \\text{false positives}}{\\text{true positives} + \\text{false negatives}}
+
+        Notes:
+
+            - Range: 0 to ∞ (infinity), where 1 indicates a perfect score
+            - "True positives" is the same as "hits"
+            - "False positives" is the same as "false alarms"
+            - "False negatives" is the same as "misses"
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#BIAS
         """
         # Note - bias_score calls this method
         cd = self.counts
@@ -113,31 +140,70 @@ def frequency_bias(self):
 
         return freq_bias
 
-    def bias_score(self):
+    def bias_score(self) -> xr.DataArray:
         """
         How did the forecast frequency of "yes" events compare to the observed frequency of "yes" events?
 
-        https://www.cawcr.gov.au/projects/verification/#BIAS
+        Returns:
+            xr.DataArray: An xarray object containing the bias score
+
+        .. math::
+            \\text{frequency bias} = \\frac{\\text{true positives} + \\text{false positives}}{\\text{true positives} + \\text{false negatives}}
+
+        Notes:
+
+            - Range: 0 to ∞ (infinity), where 1 indicates a perfect score
+            - "True positives" is the same as "hits"
+            - "False positives" is the same as "false alarms"
+            - "False negatives" is the same as "misses"
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#BIAS
         """
         return self.frequency_bias()
 
-    def hit_rate(self):
+    def hit_rate(self) -> xr.DataArray:
         """
-        What proportion of the observed events where correctly forecast?
         Identical to probability_of_detection
-        Range: 0 to 1.  Perfect score: 1.
 
-        https://www.cawcr.gov.au/projects/verification/#POD
+        Calculates the proportion of the observed events that were correctly forecast.
+
+        Returns:
+            xr.DataArray: An xarray object containing the hit rate
+
+        .. math::
+            \\text{true positives} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}}
+
+        Notes:
+
+            - Range: 0 to 1.  Perfect score: 1.
+            - "True positives" is the same as "hits"
+            - "False negatives" is the same as "misses"
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#POD
         """
         return self.probability_of_detection()
 
-    def probability_of_detection(self):
+    def probability_of_detection(self) -> xr.DataArray:
         """
-        What proportion of the observed events where correctly forecast?
-        Identical to hit_rate
-        Range: 0 to 1.  Perfect score: 1.
+        Identical to hit rate
 
-        https://www.cawcr.gov.au/projects/verification/#POD
+        Calculates the proportion of the observed events that were correctly forecast.
+
+        Returns:
+            xr.DataArray: An xarray object containing the probability of detection
+
+        .. math::
+            \\text{hit rate} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}}
+
+        Notes:
+            - Range: 0 to 1.  Perfect score: 1.
+            - "True positives" is the same as "hits"
+            - "False negatives" is the same as "misses"
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#POD
         """
         # Note - hit_rate and sensitiviy call this function
         cd = self.counts

From 038db512d87813ff3902bb3ce79b908e176b6630 Mon Sep 17 00:00:00 2001
From: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>
Date: Mon, 3 Jun 2024 12:39:37 +1000
Subject: [PATCH 14/22] Adds true negative rate to API and updates included.md
 accordingly & adds more detail to specificity docstring (#436)

* Adds True Negative Rate to API and updates included.md accordingly
* Change type from XarrayLike to datarray
---
 docs/included.md                           |  8 ++---
 src/scores/categorical/contingency_impl.py | 40 ++++++++++++++++++++--
 tests/categorical/test_contingency.py      |  1 +
 3 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/docs/included.md b/docs/included.md
index 32ec85a8d..d4568bc5e 100644
--- a/docs/included.md
+++ b/docs/included.md
@@ -408,13 +408,13 @@
   - 
     [Threat score (WWRP/WGNE Joint Working Group on Forecast Verification Research)](https://www.cawcr.gov.au/projects/verification/#CSI)    
 * -  
-    - True Negative Rate, *see Specificity*
+    - True Negative Rate (Specificity)
   -
-    &mdash;
+    [API](https://scores.readthedocs.io/en/latest/api.html#scores.categorical.BasicContingencyManager.true_negative_rate)
   - 
-    &mdash;
+    [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Binary_Contingency_Scores.html)
   - 
-    &mdash;
+    [Wikipedia](https://en.wikipedia.org/wiki/Sensitivity_and_specificity)
 * -  
     - True Positive Rate (Hit Rate, Probability of Detection (POD), Sensitivity, Recall)
   -
diff --git a/src/scores/categorical/contingency_impl.py b/src/scores/categorical/contingency_impl.py
index 003b2855f..b8cd38c2a 100644
--- a/src/scores/categorical/contingency_impl.py
+++ b/src/scores/categorical/contingency_impl.py
@@ -337,14 +337,50 @@ def sensitivity(self):
         """
         return self.probability_of_detection()
 
-    def specificity(self):
+    def specificity(self) -> xr.DataArray:
         """
-        https://en.wikipedia.org/wiki/Sensitivity_and_specificity
+        Identical to true negative rate.
+
+        The probability that an observed non-event will be correctly predicted.
+
+        Returns:
+            xr.DataArray: An xarray object containing the true negative rate (specificity).
+
+        .. math::
+            \\text{specificity} = \\frac{\\text{true negatives}}{\\text{true negatives} + \\text{false positives}}
+
+        Notes:
+            - "True negatives" is the same as "correct negatives".
+            - "False positives" is the same as "false alarms".
+
+        Reference:
+            https://en.wikipedia.org/wiki/Sensitivity_and_specificity
         """
         cd = self.counts
         s = cd["tn_count"] / (cd["tn_count"] + cd["fp_count"])
         return s
 
+    def true_negative_rate(self) -> xr.DataArray:
+        """
+        Identical to specificity.
+
+        The probability that an observed non-event will be correctly predicted.
+
+        Returns:
+            xr.DataArray: An xarray object containing the true negative rate.
+
+        .. math::
+            \\text{true negative rate} = \\frac{\\text{true negatives}}{\\text{true negatives} + \\text{false positives}}
+
+        Notes:
+            - "True negatives" is the same as "correct negatives".
+            - "False positives" is the same as "false alarms".
+
+        Reference:
+            https://en.wikipedia.org/wiki/Sensitivity_and_specificity
+        """
+        return self.specificity()
+
     def recall(self):
         """
         Identical to probability of detection.
diff --git a/tests/categorical/test_contingency.py b/tests/categorical/test_contingency.py
index 668a34ce7..a8cb44563 100644
--- a/tests/categorical/test_contingency.py
+++ b/tests/categorical/test_contingency.py
@@ -200,6 +200,7 @@ def test_categorical_table():
     assert table.gilberts_skill_score() == table.equitable_threat_score()
     assert table.cohens_kappa() == table.heidke_skill_score()
     assert table.yules_q() == table.odds_ratio_skill_score()
+    assert table.specificity() == table.true_negative_rate()
 
     peirce_component_a = 9 / (9 + 1)
     peirce_component_b = 2 / (2 + 6)

From fd3659b5d8bb6c8f1e54db7f6c77f3d295053db1 Mon Sep 17 00:00:00 2001
From: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>
Date: Mon, 3 Jun 2024 13:12:22 +1000
Subject: [PATCH 15/22] Adds fraction correct (alternative term to accuracy) to
 API and updates included table (#437)

Signed-off-by: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>
---
 docs/included.md                           |  8 +++----
 src/scores/categorical/contingency_impl.py | 25 ++++++++++++++++++++++
 tests/categorical/test_contingency.py      |  1 +
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/docs/included.md b/docs/included.md
index d4568bc5e..2c0826694 100644
--- a/docs/included.md
+++ b/docs/included.md
@@ -272,13 +272,13 @@
   - 
     [False alarm ratio (WWRP/WGNE Joint Working Group on Forecast Verification Research)](https://www.cawcr.gov.au/projects/verification/#FAR)
 * -  
-    - Fraction Correct, *see Accuracy*
+    - Fraction Correct (Accuracy)
   -
-    &mdash;
+    [API](https://scores.readthedocs.io/en/latest/api.html#scores.categorical.BasicContingencyManager.fraction_correct)
   - 
-    &mdash;
+    [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Binary_Contingency_Scores.html)
   - 
-    &mdash;
+    [Accuracy (WWRP/WGNE Joint Working Group on Forecast Verification Research)](https://www.cawcr.gov.au/projects/verification/#ACC)
 * -  
     - Frequency Bias (Bias Score)
   -
diff --git a/src/scores/categorical/contingency_impl.py b/src/scores/categorical/contingency_impl.py
index b8cd38c2a..55d04a864 100644
--- a/src/scores/categorical/contingency_impl.py
+++ b/src/scores/categorical/contingency_impl.py
@@ -92,6 +92,8 @@ def get_table(self):
 
     def accuracy(self) -> xr.DataArray:
         """
+        Identical to fraction correct.
+
         Accuracy calculates the proportion of forecasts which are true.
 
         Returns:
@@ -114,6 +116,29 @@ def accuracy(self) -> xr.DataArray:
         ratio = correct_count / count_dictionary["total_count"]
         return ratio
 
+    def fraction_correct(self) -> xr.DataArray:
+        """
+        Identical to accuracy.
+
+        Fraction correct calculates the proportion of forecasts which are correct.
+
+        Returns:
+            xr.DataArray: An xarray object containing the fraction correct.
+
+        .. math::
+            \\text{fraction correct} = \\frac{\\text{true positives} + \\text{true negatives}}{\\text{total count}}
+
+        Notes:
+
+            - Range: 0 to 1, where 1 indicates a perfect score.
+            - "True positives" is the same as "hits".
+            - "False negatives" is the same as "misses".
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#ACC
+        """
+        return self.accuracy()
+
     def frequency_bias(self) -> xr.DataArray:
         """
         How did the forecast frequency of "yes" events compare to the observed frequency of "yes" events?
diff --git a/tests/categorical/test_contingency.py b/tests/categorical/test_contingency.py
index a8cb44563..21df3b591 100644
--- a/tests/categorical/test_contingency.py
+++ b/tests/categorical/test_contingency.py
@@ -200,6 +200,7 @@ def test_categorical_table():
     assert table.gilberts_skill_score() == table.equitable_threat_score()
     assert table.cohens_kappa() == table.heidke_skill_score()
     assert table.yules_q() == table.odds_ratio_skill_score()
+    assert table.accuracy() == table.fraction_correct()
     assert table.specificity() == table.true_negative_rate()
 
     peirce_component_a = 9 / (9 + 1)

From e147485c44abc2cdb9c469a7bf2ce8b60dd20e29 Mon Sep 17 00:00:00 2001
From: Tennessee Leeuwenburg <134973832+tennlee@users.noreply.github.com>
Date: Mon, 3 Jun 2024 15:03:01 +1000
Subject: [PATCH 16/22] (two of five) Add mathjax and fix docstrings to various
 functions (#442)

* Add mathjax and improved docstrings to several scores
* Standardise contingency table terminology
* Correction to threat score formula
---
 src/scores/categorical/contingency_impl.py | 142 +++++++++++++++------
 tests/categorical/test_contingency.py      |   2 +-
 2 files changed, 107 insertions(+), 37 deletions(-)

diff --git a/src/scores/categorical/contingency_impl.py b/src/scores/categorical/contingency_impl.py
index 55d04a864..1f872eba0 100644
--- a/src/scores/categorical/contingency_impl.py
+++ b/src/scores/categorical/contingency_impl.py
@@ -26,7 +26,7 @@
 import xarray as xr
 
 import scores.utils
-from scores.typing import FlexibleArrayType, FlexibleDimensionTypes, XarrayLike
+from scores.typing import FlexibleArrayType, FlexibleDimensionTypes
 
 DEFAULT_PRECISION = 8
 
@@ -39,8 +39,8 @@ class BasicContingencyManager:  # pylint: disable=too-many-public-methods
     to the actual event tables in their full dimensionality.
 
     The event count data is much smaller than the full event tables, particularly when
-    considering very large data sets like NWP data, which could be terabytes to petabytes
-    in size.
+    considering very large data sets like Numerical Weather Prediction (NWP) data, which
+    could be terabytes to petabytes in size.
     """
 
     def __init__(self, counts: dict):
@@ -104,7 +104,7 @@ def accuracy(self) -> xr.DataArray:
 
         Notes:
 
-            - Range: 0 to 1, where 0 indicates no skill, and 1 indicates a perfect score.
+            - Range: 0 to 1, where 1 indicates a perfect score.
             - True positives is the same at hits
             - False negatives is the same as misses
 
@@ -236,36 +236,73 @@ def probability_of_detection(self) -> xr.DataArray:
 
         return pod
 
-    def true_positive_rate(self):
+    def true_positive_rate(self) -> xr.DataArray:
         """
-        What proportion of the observed events where correctly forecast?
         Identical to probability_of_detection
-        Range: 0 to 1.  Perfect score: 1.
 
-        https://www.cawcr.gov.au/projects/verification/#POD
+        What proportion of the observed events where correctly forecast?
+
+        Returns:
+            An xarray object containing the true positive rate
+
+        .. math::
+            \\text{true positive rate} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}}
+
+        Notes:
+            - Range: 0 to 1.  Perfect score: 1.
+            - "True positives" is the same as "hits"
+            - "False negatives" is the same as "misses"
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#POD
         """
         return self.probability_of_detection()
 
-    def false_alarm_ratio(self):
+    def false_alarm_ratio(self) -> xr.DataArray:
         """
         What fraction of the predicted "yes" events actually did not occur (i.e.,
         were false alarms)?
-        Range: 0 to 1. Perfect score: 0.
 
-        https://www.cawcr.gov.au/projects/verification/#FAR
+        Returns:
+            xr.DataArray: An xarray object containing the false alarm ratio
+
+        .. math::
+            \\text{false alarm ratio} = \\frac{\\text{false positives}}{\\text{true positives} + \\text{false positives}}
+
+        Notes:
+            - Range: 0 to 1. Perfect score: 0.
+            - Not to be confused with the False Alarm Rate
+            - "False positives" is the same as "false alarms"
+            - "True positives" is the same as "hits"
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#FAR
         """
         cd = self.counts
         far = cd["fp_count"] / (cd["tp_count"] + cd["fp_count"])
 
         return far
 
-    def false_alarm_rate(self):
+    def false_alarm_rate(self) -> xr.DataArray:
         """
-        What fraction of the non-events were incorrectly predicted?
         Identical to probability_of_false_detection
-        Range: 0 to 1.  Perfect score: 0.
 
-        https://www.cawcr.gov.au/projects/verification/#POFD
+        What fraction of the non-events were incorrectly predicted?
+
+        Returns:
+            xr.DataArray: An xarray object containing the false alarm rate
+
+        .. math::
+            \\text{false alarm rate} = \\frac{\\text{false positives}}{\\text{true negatives} + \\text{false positives}}
+
+        Notes:
+            - Range: 0 to 1.  Perfect score: 0.
+            - Not to be confused with the false alarm ratio
+            - "False positives" is the same as "false alarms"
+            - "True negatives" is the same as "correct negatives"
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#POFD
         """
         # Note - probability of false detection calls this function
         cd = self.counts
@@ -273,48 +310,78 @@ def false_alarm_rate(self):
 
         return far
 
-    def probability_of_false_detection(self):
+    def probability_of_false_detection(self) -> xr.DataArray:
         """
-        What fraction of the non-events were incorrectly predicted?
         Identical to false_alarm_rate
-        Range: 0 to 1.  Perfect score: 0.
 
-        https://www.cawcr.gov.au/projects/verification/#POFD
+        What fraction of the non-events were incorrectly predicted?
+
+        Returns:
+            An xarray object containing the probability of false detection
+
+        .. math::
+            \\text{probability of false detection} = \\frac{\\text{false positives}}{\\text{true negatives} + \\text{false positives}}
+
+        Notes:
+            - Range: 0 to 1.  Perfect score: 0.
+            - "False positives" is the same as "false alarms"
+            - "True negatives" is the same as "correct negatives"
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#POFD
         """
 
         return self.false_alarm_rate()
 
-    def success_ratio(self):
+    def success_ratio(self) -> xr.DataArray:
         """
         What proportion of the forecast events actually eventuated?
-        Range: 0 to 1.  Perfect score: 1.
 
-        https://www.cawcr.gov.au/projects/verification/#SR
+        Returns:
+            An xarray object containing the success ratio
+
+        .. math::
+            \\text{success ratio} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false positives}}
+
+        Notes:
+            - Range: 0 to 1.  Perfect score: 1.
+            - "True positives" is the same as "hits"
+            - "False positives" is the same as "misses"
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#SR
         """
         cd = self.counts
         sr = cd["tp_count"] / (cd["tp_count"] + cd["fp_count"])
 
         return sr
 
-    def threat_score(self):
+    def threat_score(self) -> xr.DataArray:
         """
-        How well did the forecast "yes" events correspond to the observed "yes" events?
         Identical to critical_success_index
-        Range: 0 to 1, 0 indicates no skill. Perfect score: 1.
 
-        https://www.cawcr.gov.au/projects/verification/#CSI
+        Returns:
+            xr.DataArray: An xarray object containing the threat score
+
+        .. math::
+            \\text{threat score} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false positives} + \\text{false negatives}}
+
+        Notes:
+            - Range: 0 to 1, 0 indicates no skill. Perfect score: 1.
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#CSI
         """
         # Note - critical success index just calls this method
 
         cd = self.counts
-        ts = cd["tp_count"] / (cd["tp_count"] + cd["fp_count"] + cd["tn_count"])
+        ts = cd["tp_count"] / (cd["tp_count"] + cd["fp_count"] + cd["fn_count"])
         return ts
 
     def critical_success_index(self):
         """
         Often known as CSI.
 
-        How well did the forecast "yes" events correspond to the observed "yes" events?
         Identical to threat_score
         Range: 0 to 1, 0 indicates no skill. Perfect score: 1.
 
@@ -553,18 +620,21 @@ def yules_q(self):
 
 class BinaryContingencyManager(BasicContingencyManager):
     """
+
     At each location, the value will either be:
-     - A true positive (hit)
-     - A false positive (false alarm)
-     - A true negative (correct negative)
-     - A false negative (miss)
+
+    - A true positive (hit)
+    - A false positive (false alarm)
+    - A true negative (correct negative)
+    - A false negative (miss)
+
 
     It will be common to want to operate on masks of these values,
     such as:
-     - Plotting these attributes on a map
-     - Calculating the total number of these attributes
-     - Calculating various ratios of these attributes, potentially
-       masked by geographical area (e.g. accuracy in a region)
+
+    - Plotting these attributes on a map
+    - Calculating the total number of these attributes
+    - Calculating various ratios of these attributes, potentially masked by geographical area (e.g. accuracy in a region)
 
     As such, the per-pixel information is useful as well as the overall
     ratios involved.
diff --git a/tests/categorical/test_contingency.py b/tests/categorical/test_contingency.py
index 21df3b591..4f62a30ba 100644
--- a/tests/categorical/test_contingency.py
+++ b/tests/categorical/test_contingency.py
@@ -173,7 +173,7 @@ def test_categorical_table():
     assert table.accuracy() == (9 + 6) / 18
     assert table.probability_of_detection() == 9 / (9 + 1)
     assert table.false_alarm_rate() == 2 / (2 + 6)
-    assert table.threat_score() == 9 / (9 + 2 + 6)
+    assert table.threat_score() == 9 / (9 + 2 + 1)
     assert table.frequency_bias() == (9 + 2) / (9 + 1)
     assert table.hit_rate() == 9 / (9 + 1)
     assert table.probability_of_false_detection() == 2 / (6 + 2)

From 93e4a8b8f8f132b6b289cd17a3ca06bba547a5bc Mon Sep 17 00:00:00 2001
From: Tennessee Leeuwenburg <134973832+tennlee@users.noreply.github.com>
Date: Mon, 3 Jun 2024 17:02:30 +1000
Subject: [PATCH 17/22] (three of five) Add mathjax and fix docstring
 templating for various functions (#443)

* Further mathjax and docstring templating

---------

Signed-off-by: Tennessee Leeuwenburg <134973832+tennlee@users.noreply.github.com>
Co-authored-by: Nicholas Loveday <48701367+nicholasloveday@users.noreply.github.com>
---
 src/scores/categorical/contingency_impl.py | 135 +++++++++++++++++----
 1 file changed, 113 insertions(+), 22 deletions(-)

diff --git a/src/scores/categorical/contingency_impl.py b/src/scores/categorical/contingency_impl.py
index 1f872eba0..981bb9d56 100644
--- a/src/scores/categorical/contingency_impl.py
+++ b/src/scores/categorical/contingency_impl.py
@@ -94,7 +94,7 @@ def accuracy(self) -> xr.DataArray:
         """
         Identical to fraction correct.
 
-        Accuracy calculates the proportion of forecasts which are true.
+        Accuracy calculates the proportion of forecasts which are correct.
 
         Returns:
             xr.DataArray: A DataArray containing the accuracy score
@@ -105,8 +105,8 @@ def accuracy(self) -> xr.DataArray:
         Notes:
 
             - Range: 0 to 1, where 1 indicates a perfect score.
-            - True positives is the same at hits
-            - False negatives is the same as misses
+            - "True positives" is the same at "hits".
+            - "False negatives" is the same as "misses".
 
         References:
             https://www.cawcr.gov.au/projects/verification/#ACC
@@ -368,6 +368,9 @@ def threat_score(self) -> xr.DataArray:
 
         Notes:
             - Range: 0 to 1, 0 indicates no skill. Perfect score: 1.
+            - "True positives" is the same as "hits"
+            - "False positives" is the same as "false alarms"
+            - "True negatives" is the same as "correct negatives"
 
         References:
             https://www.cawcr.gov.au/projects/verification/#CSI
@@ -378,24 +381,49 @@ def threat_score(self) -> xr.DataArray:
         ts = cd["tp_count"] / (cd["tp_count"] + cd["fp_count"] + cd["fn_count"])
         return ts
 
-    def critical_success_index(self):
+    def critical_success_index(self) -> xr.DataArray:
         """
-        Often known as CSI.
-
         Identical to threat_score
-        Range: 0 to 1, 0 indicates no skill. Perfect score: 1.
 
-        https://www.cawcr.gov.au/projects/verification/#CSI
+        .. math::
+            \\text{threat score} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false positives} + \\text{false negatives}}
+
+        Returns:
+            An xarray object containing the critical success index
+
+        Notes:
+            - Range: 0 to 1, 0 indicates no skill. Perfect score: 1.
+            - Often known as CSI.
+            - "True positives" is the same as "hits"
+            - "False positives" is the same as "false alarms"
+            - "True negatives" is the same as "correct negatives"
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#CSI
         """
         return self.threat_score()
 
-    def peirce_skill_score(self):
+    def peirce_skill_score(self) -> xr.DataArray:
         """
-        Hanssen and Kuipers discriminant (true skill statistic, Peirce's skill score)
+        Identical to Hanssen and Kuipers discriminant and the true skill statistic
+
         How well did the forecast separate the "yes" events from the "no" events?
-        Range: -1 to 1, 0 indicates no skill. Perfect score: 1.
 
-        https://www.cawcr.gov.au/projects/verification/#HK
+        Returns:
+            An xarray object containing the Peirce Skill Score
+
+        .. math::
+            \\text{Peirce skill score} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}} - \\frac{\\text{false positives}}{\\text{false positives} + \\text{true negatives}}
+
+        Notes:
+            - Range: -1 to 1, 0 indicates no skill. Perfect score: 1.
+            - "True positives" is the same as "hits"
+            - "False negatives" is the same as "misses"
+            - "False positives" is the same as "false alarms"
+            - "True negatives" is the same as "correct negatives"
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#HK
         """
         cd = self.counts
         component_a = cd["tp_count"] / (cd["tp_count"] + cd["fn_count"])
@@ -403,29 +431,77 @@ def peirce_skill_score(self):
         skill_score = component_a - component_b
         return skill_score
 
-    def true_skill_statistic(self):
+    def true_skill_statistic(self) -> xr.DataArray:
         """
         Identical to Peirce's skill score and to Hanssen and Kuipers discriminant
+
         How well did the forecast separate the "yes" events from the "no" events?
-        Range: -1 to 1, 0 indicates no skill. Perfect score: 1.
 
-        https://www.cawcr.gov.au/projects/verification/#HK
+        Returns:
+            xr.DataArray: An xarray object containing the true skill statistic
+
+        .. math::
+            \\text{true skill statistic} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}} - \\frac{\\text{false positives}}{\\text{false positives} + \\text{true negatives}}
+
+        Notes:
+            - Range: -1 to 1, 0 indicates no skill. Perfect score: 1.
+            - "True positives" is the same as "hits"
+            - "False negatives" is the same as "misses"
+            - "False positives" is the same as "false alarms"
+            - "True negatives" is the same as "correct negatives"
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#HK
         """
         return self.peirce_skill_score()
 
-    def hanssen_and_kuipers_discriminant(self):
+    def hanssen_and_kuipers_discriminant(self) -> xr.DataArray:
         """
         Identical to Peirce's skill score and to true skill statistic
+
         How well did the forecast separate the "yes" events from the "no" events?
-        Range: -1 to 1, 0 indicates no skill. Perfect score: 1.
 
-        https://www.cawcr.gov.au/projects/verification/#HK
+        Returns:
+            xr.DataArray: An xarray object containing Hanssen and Kuipers' Discriminant
+
+        .. math::
+            \\text{HK} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}} - \\frac{\\text{false positives}}{\\text{false positives} + \\text{true negatives}}
+
+        Where :math:`\\text{HK}` is Hansen and Kuipers Discriminant
+
+        Notes:
+            - Range: -1 to 1, 0 indicates no skill. Perfect score: 1.
+            - "True positives" is the same as "hits"
+            - "False negatives" is the same as "misses"
+            - "False positives" is the same as "false alarms"
+            - "True negatives" is the same as "correct negatives"
+
+        References:
+            https://www.cawcr.gov.au/projects/verification/#HK
         """
         return self.peirce_skill_score()
 
-    def sensitivity(self):
+    def sensitivity(self) -> xr.DataArray:
         """
-        https://en.wikipedia.org/wiki/Sensitivity_and_specificity
+        Identical to probability of detection and hit_rate
+
+        Calculates the proportion of the observed events that were correctly forecast.
+
+        Returns:
+            An xarray object containing the probability of detection
+
+        .. math::
+            \\text{sensitivity} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}}
+
+        Notes:
+            - Range: 0 to 1.  Perfect score: 1.
+            - "True positives" is the same as "hits"
+            - "False negatives" is the same as "misses"
+
+        References:
+            - https://www.cawcr.gov.au/projects/verification/#POD
+            - https://en.wikipedia.org/wiki/Sensitivity_and_specificity
+
         """
         return self.probability_of_detection()
 
@@ -473,11 +549,26 @@ def true_negative_rate(self) -> xr.DataArray:
         """
         return self.specificity()
 
-    def recall(self):
+    def recall(self) -> xr.DataArray:
         """
         Identical to probability of detection.
 
-        https://en.wikipedia.org/wiki/Precision_and_recall
+        Calculates the proportion of the observed events that were correctly forecast.
+
+        Returns:
+            An xarray object containing the probability of detection
+
+        .. math::
+            \\text{recall} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}}
+
+        Notes:
+            - Range: 0 to 1.  Perfect score: 1.
+            - "True positives" is the same as "hits"
+            - "False negatives" is the same as "misses"
+
+        References:
+            - https://www.cawcr.gov.au/projects/verification/#POD
+            - https://en.wikipedia.org/wiki/Precision_and_recall
         """
         return self.probability_of_detection()
 

From 24f707448f05618b330269533ffa681a057a64f1 Mon Sep 17 00:00:00 2001
From: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>
Date: Mon, 3 Jun 2024 17:04:23 +1000
Subject: [PATCH 18/22] Renaming explanation to tutorial gallery (#451)

* Update README.md replacing Explanation with Tutorial Gallery
* Update index.md, maintainer.md and tutorial readme.md
* Renaming tutorial gallery file (#17)
* Renaming Explanation.ipynb to Tutorial_Gallery.ipynb

---------

Signed-off-by: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>

---------

Signed-off-by: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>
---
 .github/pull_request_template.md                        | 2 +-
 README.md                                               | 2 +-
 docs/index.md                                           | 2 +-
 docs/maintainer.md                                      | 2 +-
 tutorials/README.md                                     | 2 +-
 tutorials/{Explanation.ipynb => Tutorial_Gallery.ipynb} | 0
 6 files changed, 5 insertions(+), 5 deletions(-)
 rename tutorials/{Explanation.ipynb => Tutorial_Gallery.ipynb} (100%)

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 70960e3e6..64712112e 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -20,7 +20,7 @@ Please work through the following checklists. Delete anything that isn't relevan
 - [ ] Short introduction to why you would use that metric and what it tells you
 - [ ] A link to a reference
 - [ ] A "things to try next" section at the end
-- [ ] Add notebook to [Explanation.ipynb](https://github.com/nci/scores/blob/develop/tutorials/Explanation.ipynb)
+- [ ] Add notebook to [Tutorial_Gallery.ipynb](https://github.com/nci/scores/blob/develop/tutorials/Tutorial_Gallery.ipynb)
 - [ ] Optional - a detailed discussion of how the metric works at the end of the notebook
 
 ## Documentation
diff --git a/README.md b/README.md
index 4c921de46..c33ad3dc1 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
 
 Documentation is hosted at [scores.readthedocs.io](https://scores.readthedocs.io).  
 Source code is hosted at [github.com/nci/scores](https://github.com/nci/scores).  
-The tutorial gallery is hosted at [as part of the documentation, here](https://scores.readthedocs.io/en/latest/tutorials/Explanation.html).
+The tutorial gallery is hosted at [as part of the documentation, here](https://scores.readthedocs.io/en/latest/tutorials/Tutorial_Gallery.html).
 
 ## Overview
 Here is a **curated selection** of the metrics, tools and statistical tests included in `scores`:
diff --git a/docs/index.md b/docs/index.md
index 21de3eb1a..ee9f47ff8 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -18,6 +18,6 @@ api
 contributing
 data
 maintainer
-tutorials/Explanation
+tutorials/Tutorial_Gallery
 related_works
 ```
diff --git a/docs/maintainer.md b/docs/maintainer.md
index 540c93ebb..69cdb0a1a 100644
--- a/docs/maintainer.md
+++ b/docs/maintainer.md
@@ -76,7 +76,7 @@ Information relevant for package maintenance
 |  README                  |  a new is score added    | in case it deserves a mention
 |  api.md                  |  a new function is added | each function must be added individually 
 |  included.md             |  a new function is added | each function (and each variation of the function name) must be added individually
-|  Explanation.ipynb       |  a new tutorial is added | navigation throughout the docs
+|  Tutorial_Gallery.ipynb  |  a new tutorial is added | navigation throughout the docs
 
 ## This section covers checking the documentation renders properly in readthedocs
 
diff --git a/tutorials/README.md b/tutorials/README.md
index 19abc213d..7401a1261 100644
--- a/tutorials/README.md
+++ b/tutorials/README.md
@@ -1,5 +1,5 @@
 # Tutorial Notebooks
 
-[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nci/scores/main?labpath=tutorials%2FExplanation.ipynb)
+[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nci/scores/main?labpath=tutorials%2FTutorial_Gallery.ipynb)
 
 Some notebooks included here for tutorial purposes make use of `plotly` for interactive visualisation. They will not show in github, so a Binder environment is provided.
\ No newline at end of file
diff --git a/tutorials/Explanation.ipynb b/tutorials/Tutorial_Gallery.ipynb
similarity index 100%
rename from tutorials/Explanation.ipynb
rename to tutorials/Tutorial_Gallery.ipynb

From 655af7c6fbfe8b4e281a07e7c693311bdd140604 Mon Sep 17 00:00:00 2001
From: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>
Date: Mon, 3 Jun 2024 17:10:07 +1000
Subject: [PATCH 19/22] Relocates maintainer docs in readthedocs sidebar and
 adds one dot point to maintainer doc (#447)

Signed-off-by: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>
---
 docs/index.md      | 2 +-
 docs/maintainer.md | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/index.md b/docs/index.md
index ee9f47ff8..2fe669333 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -17,7 +17,7 @@ included
 api
 contributing
 data
-maintainer
 tutorials/Tutorial_Gallery
 related_works
+maintainer
 ```
diff --git a/docs/maintainer.md b/docs/maintainer.md
index 69cdb0a1a..81bef723b 100644
--- a/docs/maintainer.md
+++ b/docs/maintainer.md
@@ -102,6 +102,7 @@ Frequent issues include:
 - Plots: check **each** plot appears and renders properly
 - Tables: check **each** table appears and renders properly
 - Formulae: check **each** formula appears and renders properly
+- API Documentation: in addition to checking the above items, also confirm "Returns" and "Return Type" are rendering as expected
 
 ### Tutorial rendering
 

From fea72e29e85d6a2ea5ab06391fbec1e604ff3043 Mon Sep 17 00:00:00 2001
From: Stephanie Chong <168800785+Steph-Chong@users.noreply.github.com>
Date: Mon, 3 Jun 2024 17:10:44 +1000
Subject: [PATCH 20/22] Contributing guide (a) adds reference to providing
 feedback and (b) adds code of conduct section (#449)

---
 docs/contributing.md | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/docs/contributing.md b/docs/contributing.md
index 95ef9d90c..bd9e7cdbe 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -4,7 +4,7 @@ Thank you for considering contributing to `scores`. Contributions of all kinds a
 
 These guidelines describe how to collaborate effectively.
 
-Types of contributions include bug reports, feature requests and pull requests. Contributions which are in line with the roadmap will be prioritised. The roadmap outlines the intentions for this package.
+Types of contributions include bug reports, feature requests, feedback and pull requests. Contributions which are in line with the roadmap will be prioritised. The roadmap outlines the intentions for this package.
 
 ## Roadmap
 - Addition of more scores, metrics and statistical techniques
@@ -13,9 +13,13 @@ Types of contributions include bug reports, feature requests and pull requests.
 - Increased support for machine learning library integration
 - Additional notebooks exploring complex use cases in depth
 
-## Bug Reports and Feature Requests
+## Code of Conduct and Respectful Behaviour
 
-Please submit bug reports and feature requests as issues in GitHub: [https://github.com/nci/scores/issues](https://github.com/nci/scores/issues).
+All interactions in discussions, issues, emails and code (e.g. pull requests, code comments) will be managed according to the expectations outlined in the [code of conduct](https://github.com/nci/scores/blob/main/CODE_OF_CONDUCT.md) and in accordance with all relevant laws and obligations. This project is an inclusive, respectful and open project with high standards for respectful behaviour and language. The code of conduct is the Contributor Covenant, adopted by over 40,000 open source projects. Any concerns will be dealt with fairly and respectfully, with the processes described in the code of conduct.
+
+## Bug Reports, Feature Requests and Feedback
+
+Please submit bug reports, feature requests and feedback as issues in GitHub: [https://github.com/nci/scores/issues](https://github.com/nci/scores/issues).
 
 ## Handling Security Concerns
 

From 0408fb057468aa3084c65b832f533978c3134445 Mon Sep 17 00:00:00 2001
From: Tennessee Leeuwenburg <tennessee.leeuwenburg@bom.gov.au>
Date: Mon, 3 Jun 2024 17:55:34 +1000
Subject: [PATCH 21/22] Add notice about testing to the readme

---
 README.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index c33ad3dc1..41a0fe195 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,8 @@
 # Scores: Verification and Evaluation for Forecasts and Models
+
+> **Notice:**
+> **`scores` is undergoing final testing and review. When this is completed, this notice will be removed.**
+
 > 
 > **A list of over 50 metrics, statistical techniques and data processing tools contained in `scores` is [available here](https://scores.readthedocs.io/en/latest/included.html).**
 
@@ -6,7 +10,7 @@
 
 Documentation is hosted at [scores.readthedocs.io](https://scores.readthedocs.io).  
 Source code is hosted at [github.com/nci/scores](https://github.com/nci/scores).  
-The tutorial gallery is hosted at [as part of the documentation, here](https://scores.readthedocs.io/en/latest/tutorials/Tutorial_Gallery.html).
+The tutorial gallery is hosted at [as part of the documentation, here](https://scores.readthedocs.io/en/latest/tutorials/Tutorial_Gallery.html). 
 
 ## Overview
 Here is a **curated selection** of the metrics, tools and statistical tests included in `scores`:
@@ -15,7 +19,7 @@ Here is a **curated selection** of the metrics, tools and statistical tests incl
 |-----------------------	|-----------------	|--------------	|
 | **[Continuous](https://scores.readthedocs.io/en/latest/included.html#continuous)**        	|Scores for evaluating single-valued continuous forecasts.                  	|Mean Absolute Error (MAE), Mean Squared Error (MSE), Root Mean Squared Error (RMSE), Additive Bias, Multiplicative Bias, Pearson's Correlation Coefficient, Flip-Flop Index, Quantile loss, Murphy score.              	|
 | **[Probability](https://scores.readthedocs.io/en/latest/included.html#probability)**       	|Scores for evaluating forecasts that are expressed as predictive distributions, ensembles, and probabilities of binary events.                 	|Brier Score, Continuous Ranked Probability Score (CRPS) for Cumulative Density Function (CDF), Threshold weighted CRPS for CDF, CRPS for ensembles, Receiver Operating Characteristic (ROC), Isotonic Regression (reliability diagrams).              	|
-| **[Categorical](https://scores.readthedocs.io/en/latest/included.html#categorical)**       	|Scores for evaluating forecasts based on categories.                	|Probability of Detection (POD), False Alarm Rate, Probability of False Detection (POFD), Success Ratio, Accuracy, Peirce's Skill Score, Critical Success Index (CSI), Gilbert Skill Score, Heidke Skill Score, Odds Ratio, Odds Ratio Skill Score, F1 score, FIxed Risk Multicategorical (FIRM) Score.               	|
+| **[Categorical](https://scores.readthedocs.io/en/latest/included.html#categorical)**       	|Scores for evaluating forecasts based on categories.                	|Probability of Detection (POD), False Alarm Ratio (FAR), Probability of False Detection (POFD), Success Ratio, Accuracy, Peirce's Skill Score, Critical Success Index (CSI), Gilbert Skill Score, Heidke Skill Score, Odds Ratio, Odds Ratio Skill Score, F1 score, FIxed Risk Multicategorical (FIRM) Score.               	|
 | **[Statistical Tests](https://scores.readthedocs.io/en/latest/included.html#statistical-tests)** 	|Tools to conduct statistical tests and generate confidence intervals.                 	|Diebold Mariano.              	|
 | **[Processing Tools](https://scores.readthedocs.io/en/latest/included.html#processing-tools-for-preparing-data)**        	|Tools to pre-process data.                 	|Data matching, Discretisation, Cumulative Density Function Manipulation.              	|
 

From c94114b5e88dfc87299ef3356e182a61b5c432f3 Mon Sep 17 00:00:00 2001
From: Tennessee Leeuwenburg <tennessee.leeuwenburg@bom.gov.au>
Date: Mon, 3 Jun 2024 17:59:24 +1000
Subject: [PATCH 22/22] Update package version to 0.8.4

---
 docs/conf.py           | 2 +-
 src/scores/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index f582d6848..23dc7aedc 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -9,7 +9,7 @@
 
 project = "scores"
 copyright = "Licensed under Apache 2.0 - https://www.apache.org/licenses/LICENSE-2.0"
-release = "0.8.3"
+release = "0.8.4"
 
 version = __version__
 
diff --git a/src/scores/__init__.py b/src/scores/__init__.py
index 40601b4d8..78c68e504 100644
--- a/src/scores/__init__.py
+++ b/src/scores/__init__.py
@@ -13,7 +13,7 @@
 import scores.sample_data
 import scores.stats.statistical_tests  # noqa: F401
 
-__version__ = "0.8.3"
+__version__ = "0.8.4"
 
 __all__ = [
     "scores.categorical",