diff --git a/NAMESPACE b/NAMESPACE
index ca72b0436..e6c530dd3 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -87,6 +87,7 @@ importFrom(checkmate,assert_list)
 importFrom(checkmate,assert_logical)
 importFrom(checkmate,assert_number)
 importFrom(checkmate,assert_numeric)
+importFrom(checkmate,assert_vector)
 importFrom(checkmate,check_atomic_vector)
 importFrom(checkmate,check_data_frame)
 importFrom(checkmate,check_function)
diff --git a/R/check-inputs-scoring-functions.R b/R/check-inputs-scoring-functions.R
index 0e4467e3c..2c36071ad 100644
--- a/R/check-inputs-scoring-functions.R
+++ b/R/check-inputs-scoring-functions.R
@@ -161,25 +161,21 @@ check_input_interval <- function(observed, lower, upper, range) {
 #' that `predicted` represents the probability that the observed value is equal
 #' to the highest factor level.
 #' @param predicted Input to be checked. `predicted` should be a vector of
-#' length n, holding probabilities. Values represent the probability that
+#' length n, holding probabilities. Alternatively, `predicted` can be a matrix
+#' of size n x 1. Values represent the probability that
 #' the corresponding value in `observed` will be equal to the highest
 #' available factor level.
 #' @importFrom checkmate assert assert_factor
 #' @inherit document_assert_functions return
 #' @keywords check-inputs
 assert_input_binary <- function(observed, predicted) {
-  if (length(observed) != length(predicted)) {
-    stop("`observed` and `predicted` need to be ",
-         "of same length when scoring binary forecasts")
-  }
-  assert_factor(observed, n.levels = 2)
-  levels <- levels(observed)
-  assert(
-    check_numeric_vector(predicted, min.len = 1, lower = 0, upper = 1)
-  )
+  assert_factor(observed, n.levels = 2, min.len = 1)
+  assert_numeric(predicted, lower = 0, upper = 1)
+  assert_dims_ok_point(observed, predicted)
   return(invisible(NULL))
 }
 
+
 #' @title Check that inputs are correct for binary forecast
 #' @inherit assert_input_binary params description
 #' @inherit document_check_functions return
@@ -200,12 +196,9 @@ check_input_binary <- function(observed, predicted) {
 #' @inherit document_assert_functions return
 #' @keywords check-inputs
 assert_input_point <- function(observed, predicted) {
-  assert(check_numeric_vector(observed, min.len = 1))
-  assert(check_numeric_vector(predicted, min.len = 1))
-  if (length(observed) != length(predicted)) {
-    stop("`observed` and `predicted` need to be ",
-         "of same length when scoring point forecasts")
-  }
+  assert(check_numeric(observed))
+  assert(check_numeric(predicted))
+  assert(check_dims_ok_point(observed, predicted))
   return(invisible(NULL))
 }
 
@@ -217,3 +210,48 @@ check_input_point <- function(observed, predicted) {
   result <- check_try(assert_input_point(observed, predicted))
   return(result)
 }
+
+
+#' @title Assert Inputs Have Matching Dimensions
+#' @description Function assesses whether input dimensions match. In the
+#' following, n is the number of observations / forecasts. Scalar values may
+#' be repeated to match the length of the other input.
+#' Allowed options are therefore
+#' - `observed` is vector of length 1 or length n
+#' - `predicted` is
+#'     - a vector of of length 1 or length n
+#'     - a matrix with n rows and 1 column
+#' @inherit assert_input_binary
+#' @inherit document_assert_functions return
+#' @importFrom checkmate assert_vector check_matrix check_vector assert
+#' @keywords check-inputs
+assert_dims_ok_point <- function(observed, predicted) {
+  assert_vector(observed, min.len = 1)
+  n_obs <- length(observed)
+  assert(
+    check_vector(predicted, min.len = 1, strict = TRUE),
+    check_matrix(predicted, ncols = 1, nrows = n_obs)
+  )
+  dim_p <- dim(predicted)
+  if (!is.null(dim_p) && (length(dim_p) > 1) && (dim_p[2] > 1)) {
+    stop("`predicted` must be a vector or a matrix with one column. Found ",
+         dim(predicted)[2], " columns")
+  }
+  n_pred <- length(as.vector(predicted))
+  # check that both are either of length 1 or of equal length
+  if ((n_obs != 1) && (n_pred != 1) && (n_obs != n_pred)) {
+    stop("`observed` and `predicted` must either be of length 1 or ",
+         "of equal length. Found ", n_obs, " and ", n_pred)
+  }
+  return(invisible(NULL))
+}
+
+
+#' @title Check Inputs Have Matching Dimensions
+#' @inherit assert_dims_ok_point params description
+#' @inherit document_check_functions return
+#' @keywords check-inputs
+check_dims_ok_point <- function(observed, predicted) {
+  result <- check_try(assert_dims_ok_point(observed, predicted))
+  return(result)
+}
diff --git a/man/assert_dims_ok_point.Rd b/man/assert_dims_ok_point.Rd
new file mode 100644
index 000000000..4f6bd3bd0
--- /dev/null
+++ b/man/assert_dims_ok_point.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/check-inputs-scoring-functions.R
+\name{assert_dims_ok_point}
+\alias{assert_dims_ok_point}
+\title{Assert Inputs Have Matching Dimensions}
+\usage{
+assert_dims_ok_point(observed, predicted)
+}
+\arguments{
+\item{observed}{Input to be checked. Should be a factor of length n with
+exactly two levels, holding the observed values.
+The highest factor level is assumed to be the reference level. This means
+that \code{predicted} represents the probability that the observed value is equal
+to the highest factor level.}
+
+\item{predicted}{Input to be checked. \code{predicted} should be a vector of
+length n, holding probabilities. Alternatively, \code{predicted} can be a matrix
+of size n x 1. Values represent the probability that
+the corresponding value in \code{observed} will be equal to the highest
+available factor level.}
+}
+\value{
+Returns NULL invisibly if the assertion was successful and throws an
+error otherwise.
+}
+\description{
+Function assesses whether input dimensions match. In the
+following, n is the number of observations / forecasts. Scalar values may
+be repeated to match the length of the other input.
+Allowed options are therefore
+\itemize{
+\item \code{observed} is vector of length 1 or length n
+\item \code{predicted} is
+\itemize{
+\item a vector of of length 1 or length n
+\item a matrix with n rows and 1 column
+}
+}
+}
+\keyword{check-inputs}
diff --git a/man/assert_input_binary.Rd b/man/assert_input_binary.Rd
index 4ca8f7883..5b4d8b2bf 100644
--- a/man/assert_input_binary.Rd
+++ b/man/assert_input_binary.Rd
@@ -14,7 +14,8 @@ that \code{predicted} represents the probability that the observed value is equa
 to the highest factor level.}
 
 \item{predicted}{Input to be checked. \code{predicted} should be a vector of
-length n, holding probabilities. Values represent the probability that
+length n, holding probabilities. Alternatively, \code{predicted} can be a matrix
+of size n x 1. Values represent the probability that
 the corresponding value in \code{observed} will be equal to the highest
 available factor level.}
 }
diff --git a/man/check_dims_ok_point.Rd b/man/check_dims_ok_point.Rd
new file mode 100644
index 000000000..1269a6725
--- /dev/null
+++ b/man/check_dims_ok_point.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/check-inputs-scoring-functions.R
+\name{check_dims_ok_point}
+\alias{check_dims_ok_point}
+\title{Check Inputs Have Matching Dimensions}
+\usage{
+check_dims_ok_point(observed, predicted)
+}
+\arguments{
+\item{observed}{Input to be checked. Should be a factor of length n with
+exactly two levels, holding the observed values.
+The highest factor level is assumed to be the reference level. This means
+that \code{predicted} represents the probability that the observed value is equal
+to the highest factor level.}
+
+\item{predicted}{Input to be checked. \code{predicted} should be a vector of
+length n, holding probabilities. Alternatively, \code{predicted} can be a matrix
+of size n x 1. Values represent the probability that
+the corresponding value in \code{observed} will be equal to the highest
+available factor level.}
+}
+\value{
+Returns TRUE if the check was successful and a string with an
+error message otherwise
+}
+\description{
+Function assesses whether input dimensions match. In the
+following, n is the number of observations / forecasts. Scalar values may
+be repeated to match the length of the other input.
+Allowed options are therefore
+\itemize{
+\item \code{observed} is vector of length 1 or length n
+\item \code{predicted} is
+\itemize{
+\item a vector of of length 1 or length n
+\item a matrix with n rows and 1 column
+}
+}
+}
+\keyword{check-inputs}
diff --git a/man/check_input_binary.Rd b/man/check_input_binary.Rd
index 5b206f35b..10d6c36ea 100644
--- a/man/check_input_binary.Rd
+++ b/man/check_input_binary.Rd
@@ -14,7 +14,8 @@ that \code{predicted} represents the probability that the observed value is equa
 to the highest factor level.}
 
 \item{predicted}{Input to be checked. \code{predicted} should be a vector of
-length n, holding probabilities. Values represent the probability that
+length n, holding probabilities. Alternatively, \code{predicted} can be a matrix
+of size n x 1. Values represent the probability that
 the corresponding value in \code{observed} will be equal to the highest
 available factor level.}
 }
diff --git a/tests/testthat/test-metrics-binary.R b/tests/testthat/test-metrics-binary.R
index 9c49e7050..79f56c535 100644
--- a/tests/testthat/test-metrics-binary.R
+++ b/tests/testthat/test-metrics-binary.R
@@ -7,121 +7,158 @@ df <- data.table(
   id = 1:10
 )
 
-# test input handling
-test_that("function throws an error when missing observed or predicted", {
-  observed <- sample(c(0, 1), size = 10, replace = TRUE)
-  predicted <- replicate(
-    20,
-    sample(c(0, 1), size = 10, replace = TRUE)
-  )
+observed_point <- rnorm(10)
+predicted_point <- rnorm(10)
 
-  expect_error(
-    brier_score(predicted = predicted),
-    'argument "observed" is missing, with no default'
-  )
+# ==============================================================================
+# Test Input Checks - this also checks point inputs where functions are similar
+# ==============================================================================
+test_that("correct input works", {
+  expect_no_condition(assert_input_binary(observed, predicted))
+  expect_no_condition(assert_input_point(observed_point, predicted_point))
 
-  expect_error(
-    brier_score(observed = observed),
-    'argument "predicted" is missing, with no default'
+  # observed is a single number and does not have the same length as predicted
+  expect_no_condition(
+    assert_input_binary(factor(1, levels = c(0, 1)), predicted)
+  )
+  expect_no_condition(
+    assert_input_point(1, predicted_point)
   )
-})
-
 
+  # predicted is a single number and does not have the same length as observed
+  expect_no_condition(assert_input_binary(observed, predicted = 0.2))
+  expect_no_condition(assert_input_point(observed_point, predicted = 0.2))
 
-test_that("function throws an error for wrong format of `observed`", {
-  observed <- factor(rpois(10, lambda = 1:10))
-  predicted <- runif(10, min = 0, max = 1)
+  # predicted is a matrix with nrow equal to observed
+  expect_no_condition(assert_input_binary(observed, matrix(predicted)))
+  expect_no_condition(assert_input_point(observed_point, matrix(predicted_point)))
+})
 
+# test input handling
+test_that("function throws an error for wrong input formats", {
+  # observed value not as expected
   expect_error(
-    brier_score(
-      observed = observed,
-      predicted = predicted
-    ),
-    "Assertion on 'observed' failed: Must have exactly 2 levels."
+    assert_input_binary(observed = rnorm(10), predicted = predicted),
+    "Assertion on 'observed' failed: Must be of type 'factor', not 'double'."
   )
-
-  observed <- rnorm(10)
   expect_error(
-    brier_score(
-      observed = observed,
-      predicted = predicted
-    ),
-    "Assertion on 'observed' failed: Must be of type 'factor', not 'double'."
+    assert_input_binary(1:10, predicted),
+    "Assertion on 'observed' failed: Must be of type 'factor', not 'integer'."
   )
-})
-
-test_that("function throws an error for wrong format of predictions", {
-  predicted <- runif(10, min = 0, max = 1)
   expect_error(
-    brier_score(
-      observed = observed,
-      predicted = as.list(predicted)
-    ),
+    assert_input_binary(observed = observed, predicted = as.list(predicted)),
+    "Assertion on 'predicted' failed: Must be of type 'numeric', not 'list'."
+  )
+  expect_error(
+    assert_input_point(observed = factor(rnorm(10)), predicted = predicted),
+    "Assertion on 'observed' failed: Must be of type 'numeric', not 'factor'."
+  )
+  expect_error(
+    assert_input_point(observed = observed_point, list(predicted_point)),
     "Assertion on 'predicted' failed: Must be of type 'numeric', not 'list'."
   )
 
-  predicted <- runif(15, min = 0, max = 1)
+  # observed value has not 2 levels
   expect_error(
-    brier_score(
-      observed = observed,
-      predicted = predicted
-    ),
-    "`observed` and `predicted` need to be of same length when scoring binary forecasts",
-    # "Arguments to the following function call: 'brier_score(observed = observed, predicted = predicted)' should have the same length (or length one). Actual lengths: 10, 15",
-    fixed = TRUE
+    assert_input_binary(factor(1:10), predicted),
+    "Assertion on 'observed' failed: Must have exactly 2 levels."
   )
-})
 
-test_that("Input checking for binary forecasts works", {
-  # everything correct
-  expect_no_condition(
-    scoringutils:::assert_input_binary(observed, predicted)
+  # wrong length
+  expect_error(
+    assert_input_binary(observed = observed, predicted = runif(15, min = 0, max = 1)),
+    "`observed` and `predicted` must either be of length 1 or of equal length. Found 10 and 15",
+    fixed = TRUE
+  )
+  expect_error(
+    assert_input_point(observed_point, runif(15, min = 0, max = 1)),
+    "Assertion on 'observed' failed: `observed` and `predicted` must either be of length 1 or of equal length. Found 10 and 15.",
+    fixed = TRUE
   )
 
   # predicted > 1
   expect_error(
-    scoringutils:::assert_input_binary(observed, predicted + 1),
+    assert_input_binary(observed, predicted + 1),
     "Assertion on 'predicted' failed: Element 1 is not <= 1."
   )
 
   # predicted < 0
   expect_error(
-    scoringutils:::assert_input_binary(observed, predicted - 1),
+    assert_input_binary(observed, predicted - 1),
     "Assertion on 'predicted' failed: Element 1 is not >= 0."
   )
 
-  # observed value not factor
+  # predicted is a matrix with one row
   expect_error(
-    scoringutils:::assert_input_binary(1:10, predicted),
-    "Assertion on 'observed' failed: Must be of type 'factor', not 'integer'."
+    assert_input_binary(observed, predicted = matrix(0.2)),
+    "Assertion failed. One of the following must apply:\n * check_vector(predicted): Must be of type 'vector', not 'matrix'\n * check_matrix(predicted): Must have exactly 10 rows, but has 1 rows",
+    fixed = TRUE)
+  expect_error(
+    assert_input_point(observed_point, predicted = matrix(0.2)),
+    "Assertion failed. One of the following must apply:\n * check_vector(predicted): Must be of type 'vector', not 'matrix'\n * check_matrix(predicted): Must have exactly 10 rows, but has 1 rows",
+    fixed = TRUE)
+
+  # predicted is a matrix with 2 rows
+  expect_error(
+    assert_input_binary(observed, matrix(rep(predicted, 2), ncol = 2)),
+    "Assertion failed. One of the following must apply:\n * check_vector(predicted): Must be of type 'vector', not 'matrix'\n * check_matrix(predicted): Must have exactly 1 cols, but has 2 cols",
+    fixed = TRUE
   )
+})
 
-  # observed value has not 2 levels
+
+# ==============================================================================
+# Test Binary Metrics
+# ==============================================================================
+
+test_that("function throws an error when missing observed or predicted", {
   expect_error(
-    scoringutils:::assert_input_binary(factor(1:10), predicted),
-    "Assertion on 'observed' failed: Must have exactly 2 levels."
+    brier_score(predicted = predicted),
+    'argument "observed" is missing, with no default'
   )
 
+  expect_error(
+    brier_score(observed = observed),
+    'argument "predicted" is missing, with no default'
+  )
+})
+
+test_that("Brier score works with different inputs", {
   # observed is a single number and does not have the same length as predicted
+  expect_equal(
+    brier_score(factor(1, levels = c(0, 1)), predicted),
+    (1 - predicted)^2
+  )
+
+  # predicted is a single number and does not have the same length as observed
+  expect_equal(
+    brier_score(observed, predicted = 0.2),
+    ifelse(observed == 1, (1 - 0.2)^2, (0.2)^2)
+  )
+
+  # predicted is a matrix with 1 row
   expect_error(
-    scoringutils:::assert_input_binary(factor(1), predicted),
-    "`observed` and `predicted` need to be of same length when scoring binary forecasts",
+    brier_score(observed, predicted = matrix(0.2)),
+    "Assertion failed. One of the following must apply:\n * check_vector(predicted): Must be of type 'vector', not 'matrix'\n * check_matrix(predicted): Must have exactly 10 rows, but has 1 rows",
     fixed = TRUE
   )
 
-  # predicted is a matrix
+  # predicted is an array
   expect_error(
-    scoringutils:::assert_input_binary(observed, matrix(predicted)),
-    "Assertion on 'predicted' failed: Must be of type 'atomic vector', not 'matrix'."
+    brier_score(observed, predicted = array(0.2)),
+    "Assertion failed. One of the following must apply:\n * check_vector(predicted): Must be of type 'vector', not 'array'\n * check_matrix(predicted): Must be of type 'matrix', not 'array'",
+    fixed = TRUE
   )
 })
 
+
 test_that("Binary metrics work within and outside of `score()`", {
   result <- score(df)
   expect_equal(
     brier_score(observed, predicted),
     result$brier_score
   )
+
   expect_equal(
     logs_binary(observed, predicted),
     result$log_score