diff --git a/docs/examples/custom/README.ipynb b/docs/examples/custom/README.ipynb
index e479c4fce..5d1cea3ab 100644
--- a/docs/examples/custom/README.ipynb
+++ b/docs/examples/custom/README.ipynb
@@ -177,8 +177,7 @@
     "\n",
     "        self._predictive = Predictive(self._model, self._samples)\n",
     "\n",
-    "        self.ready = True\n",
-    "        return self.ready\n",
+    "        return True\n",
     "\n",
     "    @decode_args\n",
     "    async def predict(\n",
diff --git a/docs/examples/custom/README.md b/docs/examples/custom/README.md
index 269429d6b..3294926c1 100644
--- a/docs/examples/custom/README.md
+++ b/docs/examples/custom/README.md
@@ -6,7 +6,7 @@ To support this scenario, MLServer makes it really easy to create your own exten
 
 ## Overview
 
-In this example, we will train a [`numpyro` model](http://num.pyro.ai/en/stable/). 
+In this example, we will train a [`numpyro` model](http://num.pyro.ai/en/stable/).
 The `numpyro` library streamlines the implementation of probabilistic models, abstracting away advanced inference and training algorithms.
 
 Out of the box, `mlserver` doesn't provide an inference runtime for `numpyro`.
@@ -19,7 +19,6 @@ This will be a very simple bayesian regression model, based on an example provid
 
 Since this is a probabilistic model, during training we will compute an approximation to the posterior distribution of our model using MCMC.
 
-
 ```python
 # Original source code and more details can be found in:
 # https://nbviewer.jupyter.org/github/pyro-ppl/numpyro/blob/master/notebooks/source/bayesian_regression.ipynb
@@ -79,7 +78,6 @@ Note that, since this is a probabilistic model, we will only need to save the tr
 
 This will get saved in a `numpyro-divorce.json` file.
 
-
 ```python
 import json
 
@@ -95,7 +93,7 @@ with open(model_file_name, "w") as model_file:
 
 ## Serving
 
-The next step will be to serve our model using `mlserver`. 
+The next step will be to serve our model using `mlserver`.
 For that, we will first implement an extension which serve as the _runtime_ to perform inference using our custom `numpyro` model.
 
 ### Custom inference runtime
@@ -105,8 +103,6 @@ Our custom inference wrapper should be responsible of:
 - Loading the model from the set samples we saved previously.
 - Running inference using our model structure, and the posterior approximated from the samples.
 
-
-
 ```python
 # %load models.py
 import json
@@ -134,8 +130,7 @@ class NumpyroModel(MLModel):
 
         self._predictive = Predictive(self._model, self._samples)
 
-        self.ready = True
-        return self.ready
+        return True
 
     @decode_args
     async def predict(
@@ -170,14 +165,13 @@ class NumpyroModel(MLModel):
 
 ### Settings files
 
-The next step will be to create 2 configuration files: 
+The next step will be to create 2 configuration files:
 
 - `settings.json`: holds the configuration of our server (e.g. ports, log level, etc.).
 - `model-settings.json`: holds the configuration of our model (e.g. input type, runtime to use, etc.).
 
 #### `settings.json`
 
-
 ```python
 # %load settings.json
 {
@@ -188,7 +182,6 @@ The next step will be to create 2 configuration files:
 
 #### `model-settings.json`
 
-
 ```python
 # %load model-settings.json
 {
@@ -213,13 +206,11 @@ Since this command will start the server and block the terminal, waiting for req
 
 ### Send test inference request
 
-
 We now have our model being served by `mlserver`.
 To make sure that everything is working as expected, let's send a request from our test set.
 
 For that, we can use the Python types that `mlserver` provides out of box, or we can build our request manually.
 
-
 ```python
 import requests
 import numpy as np
@@ -245,10 +236,9 @@ response.json()
 Now that we have written and tested our custom model, the next step is to deploy it.
 With that goal in mind, the rough outline of steps will be to first build a custom image containing our code, and then deploy it.
 
-
 ### Specifying requirements
-MLServer will automatically find your requirements.txt file and install necessary python packages
 
+MLServer will automatically find your requirements.txt file and install necessary python packages
 
 ```python
 # %load requirements.txt
@@ -262,15 +252,13 @@ jaxlib==0.3.7
 ### Building a custom image
 
 ```{note}
-This section expects that Docker is available and running in the background. 
+This section expects that Docker is available and running in the background.
 ```
 
 MLServer offers helpers to build a custom Docker image containing your code.
 In this example, we will use the `mlserver build` subcommand to create an image, which we'll be able to deploy later.
 
-
-Note that this section expects that Docker is available and running in the background, as well as a functional cluster with Seldon Core installed and some familiarity with `kubectl`. 
-
+Note that this section expects that Docker is available and running in the background, as well as a functional cluster with Seldon Core installed and some familiarity with `kubectl`.
 
 ```bash
 %%bash
@@ -283,7 +271,6 @@ To ensure that the image is fully functional, we can spin up a container and the
 docker run -it --rm -p 8080:8080 my-custom-numpyro-server:0.1.0
 ```
 
-
 ```python
 import numpy as np
 
@@ -308,7 +295,7 @@ As we should be able to see, the server running within our Docker image responds
 ### Deploying our custom image
 
 ```{note}
-This section expects access to a functional Kubernetes cluster with Seldon Core installed and some familiarity with `kubectl`. 
+This section expects access to a functional Kubernetes cluster with Seldon Core installed and some familiarity with `kubectl`.
 ```
 
 Now that we've built a custom image and verified that it works as expected, we can move to the next step and deploy it.
@@ -316,13 +303,12 @@ There is a large number of tools out there to deploy images.
 However, for our example, we will focus on deploying it to a cluster running [Seldon Core](https://docs.seldon.io/projects/seldon-core/en/latest/).
 
 ```{note}
-Also consider that depending on your Kubernetes installation Seldon Core might expect to get the container image from a public container registry like [Docker hub](https://hub.docker.com/) or [Google Container Registry](https://cloud.google.com/container-registry). For that you need to do an extra step of pushing the container to the registry using `docker tag <image name> <container registry>/<image name>` and `docker push <container registry>/<image name>` and also updating the `image` section of the yaml file to `<container registry>/<image name>`. 
+Also consider that depending on your Kubernetes installation Seldon Core might expect to get the container image from a public container registry like [Docker hub](https://hub.docker.com/) or [Google Container Registry](https://cloud.google.com/container-registry). For that you need to do an extra step of pushing the container to the registry using `docker tag <image name> <container registry>/<image name>` and `docker push <container registry>/<image name>` and also updating the `image` section of the yaml file to `<container registry>/<image name>`.
 ```
 
 For that, we will need to create a `SeldonDeployment` resource which instructs Seldon Core to deploy a model embedded within our custom image and compliant with the [V2 Inference Protocol](https://github.com/kserve/kserve/tree/master/docs/predict-api/v2).
 This can be achieved by _applying_ (i.e. `kubectl apply`) a `SeldonDeployment` manifest to the cluster, similar to the one below:
 
-
 ```python
 %%writefile seldondeployment.yaml
 apiVersion: machinelearning.seldon.io/v1
@@ -343,7 +329,6 @@ spec:
                 image: my-custom-numpyro-server:0.1.0
 ```
 
-
 ```python
 
 ```
diff --git a/docs/examples/custom/models.py b/docs/examples/custom/models.py
index 8e97943af..9d19914e4 100644
--- a/docs/examples/custom/models.py
+++ b/docs/examples/custom/models.py
@@ -23,8 +23,7 @@ async def load(self) -> bool:
 
         self._predictive = Predictive(self._model, self._samples)
 
-        self.ready = True
-        return self.ready
+        return True
 
     @decode_args
     async def predict(
diff --git a/docs/user-guide/custom.md b/docs/user-guide/custom.md
index 585725ae1..4a2edb608 100644
--- a/docs/user-guide/custom.md
+++ b/docs/user-guide/custom.md
@@ -29,7 +29,7 @@ and then overriding those methods with your custom logic.
 
 ```{code-block} python
 ---
-emphasize-lines: 7-8, 13-14
+emphasize-lines: 7-8, 12-13
 ---
 from mlserver import MLModel
 from mlserver.types import InferenceRequest, InferenceResponse
@@ -39,8 +39,7 @@ class MyCustomRuntime(MLModel):
   async def load(self) -> bool:
     # TODO: Replace for custom logic to load a model artifact
     self._model = load_my_custom_model()
-    self.ready = True
-    return self.ready
+    return True
 
   async def predict(self, payload: InferenceRequest) -> InferenceResponse:
     # TODO: Replace for custom logic to run inference
@@ -86,7 +85,7 @@ following custom runtime:
 
 ```{code-block} python
 ---
-emphasize-lines: 2, 12-13
+emphasize-lines: 2, 11-12
 ---
 from mlserver import MLModel
 from mlserver.codecs import decode_args
@@ -96,8 +95,7 @@ class MyCustomRuntime(MLModel):
   async def load(self) -> bool:
     # TODO: Replace for custom logic to load a model artifact
     self._model = load_my_custom_model()
-    self.ready = True
-    return self.ready
+    return True
 
   @decode_args
   async def predict(self, questions: List[str], context: List[str]) -> np.ndarray:
diff --git a/docs/user-guide/metrics.md b/docs/user-guide/metrics.md
index da000869c..822c88659 100644
--- a/docs/user-guide/metrics.md
+++ b/docs/user-guide/metrics.md
@@ -71,7 +71,7 @@ Custom metrics will generally be registered in the {func}`load()
 
 ```{code-block} python
 ---
-emphasize-lines: 1, 8, 13
+emphasize-lines: 1, 8, 12
 ---
 import mlserver
 
@@ -81,8 +81,7 @@ class MyCustomRuntime(mlserver.MLModel):
   async def load(self) -> bool:
     self._model = load_my_custom_model()
     mlserver.register("my_custom_metric", "This is a custom metric example")
-    self.ready = True
-    return self.ready
+    return True
 
   async def predict(self, payload: InferenceRequest) -> InferenceResponse:
     mlserver.log(my_custom_metric=34)
diff --git a/mlserver/errors.py b/mlserver/errors.py
index 2f5eb92d7..81cc65efb 100644
--- a/mlserver/errors.py
+++ b/mlserver/errors.py
@@ -26,6 +26,15 @@ def __init__(self, name: str, version: Optional[str] = None):
         super().__init__(msg, status.HTTP_404_NOT_FOUND)
 
 
+class ModelNotReady(MLServerError):
+    def __init__(self, name: str, version: Optional[str] = None):
+        msg = f"Model {name} is not ready yet."
+        if version is not None:
+            msg = f"Model {name} with version {version} is not ready yet."
+
+        super().__init__(msg, status.HTTP_400_BAD_REQUEST)
+
+
 class InferenceError(MLServerError):
     def __init__(self, msg: str):
         super().__init__(msg, status.HTTP_400_BAD_REQUEST)
diff --git a/mlserver/handlers/dataplane.py b/mlserver/handlers/dataplane.py
index 72dab8540..15103862d 100644
--- a/mlserver/handlers/dataplane.py
+++ b/mlserver/handlers/dataplane.py
@@ -4,6 +4,7 @@
 )
 from typing import Optional
 
+from ..errors import ModelNotReady
 from ..metrics import model_context
 from ..settings import Settings
 from ..registry import MultiModelRegistry
@@ -92,6 +93,8 @@ async def infer(
                 payload.id = generate_uuid()
 
             model = await self._model_registry.get_model(name, version)
+            if not model.ready:
+                raise ModelNotReady(name, version)
 
             self._inference_middleware.request_middleware(payload, model.settings)
 
diff --git a/mlserver/model.py b/mlserver/model.py
index 448517228..f446afed3 100644
--- a/mlserver/model.py
+++ b/mlserver/model.py
@@ -67,8 +67,7 @@ async def load(self) -> bool:
         **This method should be overriden to implement your custom load
         logic.**
         """
-        self.ready = True
-        return self.ready
+        return True
 
     async def predict(self, payload: InferenceRequest) -> InferenceResponse:
         """
diff --git a/mlserver/registry.py b/mlserver/registry.py
index 580b3b314..ed5d7ecf5 100644
--- a/mlserver/registry.py
+++ b/mlserver/registry.py
@@ -162,7 +162,7 @@ async def _load_model(self, model: MLModel):
 
             # Register model again to ensure we save version modified by hooks
             self._register(model)
-            await model.load()
+            model.ready = await model.load()
 
             logger.info(f"Loaded model '{model.name}' succesfully.")
         except Exception:
@@ -180,7 +180,7 @@ async def _reload_model(self, old_model: MLModel, new_model: MLModel):
         # Loading the model before unloading the old one - this will ensure
         # that at least one is available (sort of mimicking a rolling
         # deployment)
-        await new_model.load()
+        new_model.ready = await new_model.load()
         self._register(new_model)
 
         if old_model == self.default:
diff --git a/runtimes/alibi-detect/mlserver_alibi_detect/runtime.py b/runtimes/alibi-detect/mlserver_alibi_detect/runtime.py
index 30b05bf5c..1adb2187a 100644
--- a/runtimes/alibi-detect/mlserver_alibi_detect/runtime.py
+++ b/runtimes/alibi-detect/mlserver_alibi_detect/runtime.py
@@ -72,8 +72,7 @@ async def load(self) -> bool:
                 f"Invalid configuration for model {self._settings.name}: {e}"
             ) from e
 
-        self.ready = True
-        return self.ready
+        return True
 
     async def predict(self, payload: InferenceRequest) -> InferenceResponse:
         # If batch is not configured, run the detector and return the output
diff --git a/runtimes/alibi-detect/tests/conftest.py b/runtimes/alibi-detect/tests/conftest.py
index 837e5031c..56870786d 100644
--- a/runtimes/alibi-detect/tests/conftest.py
+++ b/runtimes/alibi-detect/tests/conftest.py
@@ -105,7 +105,7 @@ async def outlier_detector(
     outlier_detector_settings: ModelSettings,
 ) -> AlibiDetectRuntime:
     model = AlibiDetectRuntime(outlier_detector_settings)
-    await model.load()
+    model.ready = await model.load()
 
     return model
 
@@ -140,6 +140,6 @@ def drift_detector_uri(tmp_path: str) -> str:
 @pytest.fixture
 async def drift_detector(drift_detector_settings: ModelSettings) -> AlibiDetectRuntime:
     model = AlibiDetectRuntime(drift_detector_settings)
-    await model.load()
+    model.ready = await model.load()
 
     return model
diff --git a/runtimes/alibi-explain/mlserver_alibi_explain/explainers/black_box_runtime.py b/runtimes/alibi-explain/mlserver_alibi_explain/explainers/black_box_runtime.py
index 8cefdbc29..94b0c9028 100644
--- a/runtimes/alibi-explain/mlserver_alibi_explain/explainers/black_box_runtime.py
+++ b/runtimes/alibi-explain/mlserver_alibi_explain/explainers/black_box_runtime.py
@@ -49,8 +49,7 @@ async def load(self) -> bool:
         else:
             self._model = await self._load_from_uri(self._infer_impl)
 
-        self.ready = True
-        return self.ready
+        return True
 
     def _explain_impl(self, input_data: Any, explain_parameters: Dict) -> Explanation:
         if not self.alibi_explain_settings.explainer_batch:
diff --git a/runtimes/alibi-explain/mlserver_alibi_explain/explainers/white_box_runtime.py b/runtimes/alibi-explain/mlserver_alibi_explain/explainers/white_box_runtime.py
index 3342d8df9..03011a8ca 100644
--- a/runtimes/alibi-explain/mlserver_alibi_explain/explainers/white_box_runtime.py
+++ b/runtimes/alibi-explain/mlserver_alibi_explain/explainers/white_box_runtime.py
@@ -39,8 +39,7 @@ async def load(self) -> bool:
         else:
             self._model = await self._load_from_uri(self._inference_model)
 
-        self.ready = True
-        return self.ready
+        return True
 
     async def _get_inference_model(self) -> Any:
         raise NotImplementedError
diff --git a/runtimes/alibi-explain/tests/helpers/tf_model.py b/runtimes/alibi-explain/tests/helpers/tf_model.py
index c8fb593fb..64051cd21 100644
--- a/runtimes/alibi-explain/tests/helpers/tf_model.py
+++ b/runtimes/alibi-explain/tests/helpers/tf_model.py
@@ -34,8 +34,7 @@ async def predict(self, payload: InferenceRequest) -> InferenceResponse:
 
     async def load(self) -> bool:
         self._model = tf.keras.models.load_model(get_tf_mnist_model_uri())
-        self.ready = True
-        return self.ready
+        return True
 
 
 def _train_tf_mnist() -> None:
diff --git a/runtimes/huggingface/mlserver_huggingface/runtime.py b/runtimes/huggingface/mlserver_huggingface/runtime.py
index 0c62272b8..624a1a5db 100644
--- a/runtimes/huggingface/mlserver_huggingface/runtime.py
+++ b/runtimes/huggingface/mlserver_huggingface/runtime.py
@@ -74,8 +74,7 @@ async def load(self) -> bool:
         self._model = load_pipeline_from_settings(self.hf_settings, self.settings)
         self._merge_metadata()
         print("model has been loaded!")
-        self.ready = True
-        return self.ready
+        return True
 
     async def predict(self, payload: InferenceRequest) -> InferenceResponse:
         """
diff --git a/runtimes/huggingface/tests/conftest.py b/runtimes/huggingface/tests/conftest.py
index a5235e6f6..e278270ea 100644
--- a/runtimes/huggingface/tests/conftest.py
+++ b/runtimes/huggingface/tests/conftest.py
@@ -41,7 +41,7 @@ def model_settings() -> ModelSettings:
 @pytest.fixture(scope="module")
 async def runtime(model_settings: ModelSettings) -> HuggingFaceRuntime:
     runtime = HuggingFaceRuntime(model_settings)
-    await runtime.load()
+    runtime.ready = await runtime.load()
     return runtime
 
 
diff --git a/runtimes/lightgbm/mlserver_lightgbm/lightgbm.py b/runtimes/lightgbm/mlserver_lightgbm/lightgbm.py
index 210794340..5b8d5a5e5 100644
--- a/runtimes/lightgbm/mlserver_lightgbm/lightgbm.py
+++ b/runtimes/lightgbm/mlserver_lightgbm/lightgbm.py
@@ -21,8 +21,7 @@ async def load(self) -> bool:
 
         self._model = lgb.Booster(model_file=model_uri)
 
-        self.ready = True
-        return self.ready
+        return True
 
     async def predict(self, payload: types.InferenceRequest) -> types.InferenceResponse:
         decoded = self.decode_request(payload, default_codec=NumpyRequestCodec)
diff --git a/runtimes/lightgbm/tests/conftest.py b/runtimes/lightgbm/tests/conftest.py
index b09cda0eb..dc83ce5fa 100644
--- a/runtimes/lightgbm/tests/conftest.py
+++ b/runtimes/lightgbm/tests/conftest.py
@@ -50,7 +50,7 @@ def model_settings(model_uri: str) -> ModelSettings:
 @pytest.fixture
 async def model(model_settings: ModelSettings) -> LightGBMModel:
     model = LightGBMModel(model_settings)
-    await model.load()
+    model.ready = await model.load()
 
     return model
 
diff --git a/runtimes/lightgbm/tests/test_lightgbm.py b/runtimes/lightgbm/tests/test_lightgbm.py
index 4f32f7502..046f686e1 100644
--- a/runtimes/lightgbm/tests/test_lightgbm.py
+++ b/runtimes/lightgbm/tests/test_lightgbm.py
@@ -24,7 +24,7 @@ async def test_load_folder(fname, model_uri: str, model_settings: ModelSettings)
     model_settings.parameters.uri = model_path  # type: ignore
 
     model = LightGBMModel(model_settings)
-    await model.load()
+    model.ready = await model.load()
 
     assert model.ready
     assert type(model._model) == lgb.Booster
diff --git a/runtimes/mlflow/mlserver_mlflow/runtime.py b/runtimes/mlflow/mlserver_mlflow/runtime.py
index dcdd3b064..896d2bf6c 100644
--- a/runtimes/mlflow/mlserver_mlflow/runtime.py
+++ b/runtimes/mlflow/mlserver_mlflow/runtime.py
@@ -158,8 +158,7 @@ async def load(self) -> bool:
         self._signature = self._model.metadata.signature
         self._sync_metadata()
 
-        self.ready = True
-        return self.ready
+        return True
 
     def _sync_metadata(self) -> None:
         # Update metadata from model signature (if present)
diff --git a/runtimes/mlflow/tests/conftest.py b/runtimes/mlflow/tests/conftest.py
index 77de57267..3c4aa3748 100644
--- a/runtimes/mlflow/tests/conftest.py
+++ b/runtimes/mlflow/tests/conftest.py
@@ -110,7 +110,7 @@ def model_settings_pytorch_fixed(pytorch_model_uri) -> ModelSettings:
 @pytest.fixture
 async def runtime(model_settings: ModelSettings) -> MLflowRuntime:
     model = MLflowRuntime(model_settings)
-    await model.load()
+    model.ready = await model.load()
 
     return model
 
@@ -118,7 +118,7 @@ async def runtime(model_settings: ModelSettings) -> MLflowRuntime:
 @pytest.fixture
 async def runtime_pytorch(model_settings_pytorch_fixed: ModelSettings) -> MLflowRuntime:
     model = MLflowRuntime(model_settings_pytorch_fixed)
-    await model.load()
+    model.ready = await model.load()
 
     return model
 
diff --git a/runtimes/mllib/mlserver_mllib/mllib.py b/runtimes/mllib/mlserver_mllib/mllib.py
index 91b0c5756..35287fd68 100644
--- a/runtimes/mllib/mlserver_mllib/mllib.py
+++ b/runtimes/mllib/mlserver_mllib/mllib.py
@@ -18,8 +18,7 @@ async def load(self) -> bool:
 
         self._model = model_load(sc, model_uri)
 
-        self.ready = True
-        return self.ready
+        return True
 
     async def predict(self, payload: types.InferenceRequest) -> types.InferenceResponse:
         payload = self._check_request(payload)
diff --git a/runtimes/sklearn/mlserver_sklearn/sklearn.py b/runtimes/sklearn/mlserver_sklearn/sklearn.py
index 4c959ca38..e722eb926 100644
--- a/runtimes/sklearn/mlserver_sklearn/sklearn.py
+++ b/runtimes/sklearn/mlserver_sklearn/sklearn.py
@@ -36,8 +36,7 @@ async def load(self) -> bool:
         )
         self._model = joblib.load(model_uri)
 
-        self.ready = True
-        return self.ready
+        return True
 
     async def predict(self, payload: InferenceRequest) -> InferenceResponse:
         payload = self._check_request(payload)
diff --git a/runtimes/sklearn/tests/conftest.py b/runtimes/sklearn/tests/conftest.py
index 9e419e855..d90d26154 100644
--- a/runtimes/sklearn/tests/conftest.py
+++ b/runtimes/sklearn/tests/conftest.py
@@ -58,7 +58,7 @@ def model_settings(model_uri: str) -> ModelSettings:
 @pytest.fixture
 async def model(model_settings: ModelSettings) -> SKLearnModel:
     model = SKLearnModel(model_settings)
-    await model.load()
+    model.ready = await model.load()
 
     return model
 
@@ -89,7 +89,7 @@ async def regression_model(tmp_path) -> SKLearnModel:
     )
 
     model = SKLearnModel(settings)
-    await model.load()
+    model.ready = await model.load()
 
     return model
 
@@ -140,7 +140,7 @@ def pandas_model_settings(pandas_model_uri: str) -> ModelSettings:
 @pytest.fixture
 async def pandas_model(pandas_model_settings: ModelSettings) -> SKLearnModel:
     model = SKLearnModel(pandas_model_settings)
-    await model.load()
+    model.ready = await model.load()
 
     return model
 
@@ -186,7 +186,7 @@ async def dataframe_model(model_settings: ModelSettings) -> SKLearnModel:
     dummy = DummyDataframeModel()
 
     model = SKLearnModel(model_settings)
-    await model.load()
+    model.ready = await model.load()
 
     # Replace internal model with dummy model
     model._model = dummy
@@ -241,6 +241,6 @@ async def pandas_preprocessor(
     pandas_preprocessor_settings: ModelSettings,
 ) -> SKLearnModel:
     model = SKLearnModel(pandas_preprocessor_settings)
-    await model.load()
+    model.ready = await model.load()
 
     return model
diff --git a/runtimes/sklearn/tests/test_sklearn.py b/runtimes/sklearn/tests/test_sklearn.py
index 6515f41e6..e90f8c890 100644
--- a/runtimes/sklearn/tests/test_sklearn.py
+++ b/runtimes/sklearn/tests/test_sklearn.py
@@ -42,7 +42,7 @@ async def test_load_folder(fname, model_uri: str, model_settings: ModelSettings)
     model_settings.parameters.uri = model_path  # type: ignore
 
     model = SKLearnModel(model_settings)
-    await model.load()
+    model.ready = await model.load()
 
     assert model.ready
     assert type(model._model) == DummyClassifier
diff --git a/runtimes/xgboost/mlserver_xgboost/xgboost.py b/runtimes/xgboost/mlserver_xgboost/xgboost.py
index 57bd6fb83..ba4f0f44d 100644
--- a/runtimes/xgboost/mlserver_xgboost/xgboost.py
+++ b/runtimes/xgboost/mlserver_xgboost/xgboost.py
@@ -46,8 +46,7 @@ async def load(self) -> bool:
 
         self._model = _load_sklearn_interface(model_uri)
 
-        self.ready = True
-        return self.ready
+        return True
 
     def _check_request(self, payload: InferenceRequest) -> InferenceRequest:
         if not payload.outputs:
diff --git a/runtimes/xgboost/tests/conftest.py b/runtimes/xgboost/tests/conftest.py
index 0253503f0..e7525332c 100644
--- a/runtimes/xgboost/tests/conftest.py
+++ b/runtimes/xgboost/tests/conftest.py
@@ -68,7 +68,7 @@ def model_settings(model_uri: str) -> ModelSettings:
 @pytest.fixture
 async def model(model_settings: ModelSettings) -> XGBoostModel:
     model = XGBoostModel(model_settings)
-    await model.load()
+    model.ready = await model.load()
 
     return model
 
@@ -81,7 +81,7 @@ async def classifier(classifier_uri: str) -> XGBoostModel:
         parameters=ModelParameters(uri=classifier_uri, version="v1.2.3"),
     )
     model = XGBoostModel(model_settings)
-    await model.load()
+    model.ready = await model.load()
 
     return model
 
diff --git a/runtimes/xgboost/tests/test_xgboost.py b/runtimes/xgboost/tests/test_xgboost.py
index 886f4af2b..39847d08d 100644
--- a/runtimes/xgboost/tests/test_xgboost.py
+++ b/runtimes/xgboost/tests/test_xgboost.py
@@ -36,7 +36,7 @@ async def test_load_folder(fname, model_uri: str, model_settings: ModelSettings)
     model_settings.parameters.uri = model_path  # type: ignore
 
     model = XGBoostModel(model_settings)
-    await model.load()
+    model.ready = await model.load()
 
     assert model.ready
     assert type(model._model) == xgb.XGBRegressor
diff --git a/tests/cli/test_build_cases.py b/tests/cli/test_build_cases.py
index 40bc4666b..f8bccd303 100644
--- a/tests/cli/test_build_cases.py
+++ b/tests/cli/test_build_cases.py
@@ -4,7 +4,7 @@
 
 from typing import List
 
-from ..conftest import TESTDATA_PATH
+from ..conftest import TESTS_PATH, TESTDATA_PATH
 
 
 def _copy_test_files(
@@ -26,9 +26,12 @@ def case_no_custom_env(tmp_path: str) -> str:
     """
     Custom model with no custom environment required.
     """
-    to_copy = ["models.py"]
-    model_settings = {"name": "no_custom_env", "implementation": "models.SumModel"}
-    return _copy_test_files(tmp_path, model_settings, to_copy=to_copy)
+    src = os.path.join(TESTS_PATH, "fixtures.py")
+    dst = os.path.join(tmp_path, "fixtures.py")
+    shutil.copyfile(src, dst)
+
+    model_settings = {"name": "no_custom_env", "implementation": "fixtures.SumModel"}
+    return _copy_test_files(tmp_path, model_settings, to_copy=[])
 
 
 def case_environment_yml(tmp_path: str) -> str:
diff --git a/tests/cli/test_start_cases.py b/tests/cli/test_start_cases.py
index e5e3982d7..642b7f714 100644
--- a/tests/cli/test_start_cases.py
+++ b/tests/cli/test_start_cases.py
@@ -6,7 +6,7 @@
 from mlserver.cli.serve import DEFAULT_SETTINGS_FILENAME
 from mlserver.repository import DEFAULT_MODEL_SETTINGS_FILENAME
 
-from ..conftest import TESTDATA_PATH
+from ..conftest import TESTS_PATH, TESTDATA_PATH
 
 
 def _init_mlserver_folder(tmp_path: str, settings: Settings):
@@ -15,9 +15,9 @@ def _init_mlserver_folder(tmp_path: str, settings: Settings):
     with open(settings_path, "w") as settings_file:
         settings_file.write(settings.json())
 
-    # Copy models.py module
-    src_path = os.path.join(TESTDATA_PATH, "models.py")
-    dst_path = os.path.join(tmp_path, "models.py")
+    # Copy fixtures.py module
+    src_path = os.path.join(TESTS_PATH, "fixtures.py")
+    dst_path = os.path.join(tmp_path, "fixtures.py")
     shutil.copy(src_path, dst_path)
 
     # Write SlowModel's model-settings.json
@@ -25,7 +25,7 @@ def _init_mlserver_folder(tmp_path: str, settings: Settings):
     os.makedirs(model_folder)
     model_settings_path = os.path.join(model_folder, DEFAULT_MODEL_SETTINGS_FILENAME)
     with open(model_settings_path, "w") as model_settings_file:
-        model_settings = {"name": "slow-model", "implementation": "models.SlowModel"}
+        model_settings = {"name": "slow-model", "implementation": "fixtures.SlowModel"}
         model_settings_file.write(json.dumps(model_settings))
 
 
@@ -52,8 +52,8 @@ def case_custom_module(
     model_folder = os.path.join(tmp_path, sum_model_settings.name)
     os.makedirs(model_folder)
 
-    # Copy models.py module
-    src_path = os.path.join(TESTDATA_PATH, "models.py")
+    # Copy fixtures.py module
+    src_path = os.path.join(TESTS_PATH, "fixtures.py")
     dst_path = os.path.join(model_folder, "custom.py")
     shutil.copy(src_path, dst_path)
 
diff --git a/tests/fixtures.py b/tests/fixtures.py
index 46c0aa83b..d9cca9cc9 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -66,8 +66,7 @@ async def load(self) -> bool:
             if load_error:
                 raise MLServerError(self.error_message)
 
-        self.ready = True
-        return self.ready
+        return True
 
     async def predict(self, payload: InferenceRequest) -> InferenceResponse:
         raise MLServerError(self.error_message)
@@ -82,8 +81,7 @@ async def predict(self, foo: np.ndarray, bar: List[str]) -> np.ndarray:
 class SlowModel(MLModel):
     async def load(self) -> bool:
         await asyncio.sleep(10)
-        self.ready = True
-        return self.ready
+        return True
 
     async def infer(self, payload: InferenceRequest) -> InferenceResponse:
         await asyncio.sleep(10)
@@ -93,8 +91,7 @@ async def infer(self, payload: InferenceRequest) -> InferenceResponse:
 class EnvModel(MLModel):
     async def load(self):
         self._sklearn_version = sklearn.__version__
-        self.ready = True
-        return self.ready
+        return True
 
     async def predict(self, inference_request: InferenceRequest) -> InferenceResponse:
         return InferenceResponse(
diff --git a/tests/handlers/test_dataplane.py b/tests/handlers/test_dataplane.py
index b8d6479e7..705c672e7 100644
--- a/tests/handlers/test_dataplane.py
+++ b/tests/handlers/test_dataplane.py
@@ -1,6 +1,7 @@
 import pytest
 import uuid
 
+from mlserver.errors import ModelNotReady
 from mlserver.settings import ModelSettings, ModelParameters
 from mlserver.types import MetadataTensor
 
@@ -95,6 +96,16 @@ async def test_infer(data_plane, sum_model, inference_request):
     assert prediction.outputs[0].data.__root__ == [6]
 
 
+async def test_infer_error_not_ready(data_plane, sum_model, inference_request):
+    sum_model.ready = False
+    with pytest.raises(ModelNotReady):
+        await data_plane.infer(payload=inference_request, name=sum_model.name)
+
+    sum_model.ready = True
+    prediction = await data_plane.infer(payload=inference_request, name=sum_model.name)
+    assert len(prediction.outputs) == 1
+
+
 async def test_infer_generates_uuid(data_plane, sum_model, inference_request):
     inference_request.id = None
     prediction = await data_plane.infer(
diff --git a/tests/metrics/test_custom.py b/tests/metrics/test_custom.py
index 8bf02f813..022d9409f 100644
--- a/tests/metrics/test_custom.py
+++ b/tests/metrics/test_custom.py
@@ -20,9 +20,8 @@
 class CustomMetricsModel(MLModel):
     async def load(self) -> bool:
         metrics.register(CUSTOM_METRIC_NAME, "Test custom counter")
-        self.ready = True
         self.reqs = 0
-        return self.ready
+        return True
 
     async def predict(self, req: InferenceRequest) -> InferenceResponse:
         self.reqs += 1
diff --git a/tests/repository/test_load.py b/tests/repository/test_load.py
index 1aff0a118..890b8a3e2 100644
--- a/tests/repository/test_load.py
+++ b/tests/repository/test_load.py
@@ -9,16 +9,16 @@
 from mlserver.repository.load import load_model_settings
 from mlserver.settings import ModelSettings
 
-from ..conftest import TESTDATA_PATH
+from ..conftest import TESTS_PATH
 
 
 @pytest.fixture
 def custom_module_settings_path(
     sum_model_settings: ModelSettings, tmp_path: str
 ) -> str:
-    # Copy models.py, which acts as custom module
-    src = os.path.join(TESTDATA_PATH, "models.py")
-    dst = os.path.join(tmp_path, "models.py")
+    # Copy fixtures.py, which acts as custom module
+    src = os.path.join(TESTS_PATH, "fixtures.py")
+    dst = os.path.join(tmp_path, "fixtures.py")
     shutil.copyfile(src, dst)
 
     # Add modified settings, pointing to local module
@@ -26,7 +26,7 @@ def custom_module_settings_path(
     with open(model_settings_path, "w") as f:
         settings_dict = sum_model_settings.dict()
         # Point to local module
-        settings_dict["implementation"] = "models.SumModel"
+        settings_dict["implementation"] = "fixtures.SumModel"
         f.write(json.dumps(settings_dict))
 
     return model_settings_path
@@ -70,4 +70,4 @@ async def test_load_custom_module(
 
     assert pre_sys_path == post_sys_path
     assert model_settings.name == sum_model_settings.name
-    assert model_settings.implementation_ == "models.SumModel"
+    assert model_settings.implementation_ == "fixtures.SumModel"
diff --git a/tests/test_registry.py b/tests/test_registry.py
index b0aecae31..9c2d73fe0 100644
--- a/tests/test_registry.py
+++ b/tests/test_registry.py
@@ -67,6 +67,7 @@ async def test_get_model_not_found(model_registry, name, version):
 )
 async def test_get_model(model_registry, sum_model, name, version):
     found_model = await model_registry.get_model(name, version)
+    assert found_model.ready
     assert found_model == sum_model
 
 
@@ -76,6 +77,8 @@ async def test_model_hooks(
     sum_model_settings.name = "sum-model-2"
 
     sum_model = await model_registry.load(sum_model_settings)
+    assert sum_model.ready
+
     for callback in model_registry._on_model_load:
         callback.assert_called_once_with(sum_model)
 
@@ -93,6 +96,7 @@ async def test_reload_model(
     reloaded_model = await model_registry.get_model(sum_model_settings.name)
     assert new_model != existing_model
     assert new_model == reloaded_model
+    assert reloaded_model.ready
 
     for callback in model_registry._on_model_load:
         callback.assert_not_called()
@@ -114,6 +118,7 @@ async def test_load_multi_version(
     new_model_settings = sum_model_settings.copy(deep=True)
     new_model_settings.parameters.version = "v2.0.0"
     new_model = await model_registry.load(new_model_settings)
+    assert new_model.ready
 
     # Ensure latest model is now the default one
     default_model = await model_registry.get_model(sum_model_settings.name)
diff --git a/tests/test_settings.py b/tests/test_settings.py
index 28a194284..3d97eaffb 100644
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -6,7 +6,7 @@
 from mlserver.settings import CORSSettings, Settings, ModelSettings, ModelParameters
 from mlserver.repository import DEFAULT_MODEL_SETTINGS_FILENAME
 
-from .conftest import TESTDATA_PATH
+from .conftest import TESTDATA_PATH, TESTS_PATH
 
 
 def test_settings_from_env(monkeypatch):
@@ -60,9 +60,9 @@ def test_model_settings_from_env(monkeypatch):
         ({"name": "foo", "implementation": "tests.fixtures.SumModel"}),
         (
             {
-                "_source": os.path.join(TESTDATA_PATH, DEFAULT_MODEL_SETTINGS_FILENAME),
+                "_source": os.path.join(TESTS_PATH, DEFAULT_MODEL_SETTINGS_FILENAME),
                 "name": "foo",
-                "implementation": "models.SumModel",
+                "implementation": "fixtures.SumModel",
             }
         ),
     ],
diff --git a/tests/testdata/env_models.py b/tests/testdata/env_models.py
index 55aa7e71c..1192990e3 100644
--- a/tests/testdata/env_models.py
+++ b/tests/testdata/env_models.py
@@ -23,8 +23,7 @@ async def load(self) -> bool:
         self._model = DummyClassifier(strategy="prior")
         self._model.fit(X, y)
 
-        self.ready = True
-        return self.ready
+        return True
 
     async def predict(self, payload: InferenceRequest) -> InferenceResponse:
         decoded = self.decode_request(payload, default_codec=NumpyRequestCodec)
diff --git a/tests/testdata/models.py b/tests/testdata/models.py
deleted file mode 100644
index 9d6dfe9a0..000000000
--- a/tests/testdata/models.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import asyncio
-
-from mlserver import MLModel
-
-from mlserver.codecs import NumpyCodec
-from mlserver.types import InferenceRequest, InferenceResponse
-
-
-class SumModel(MLModel):
-    async def predict(self, payload: InferenceRequest) -> InferenceResponse:
-        decoded = self.decode(payload.inputs[0])
-        total = decoded.sum(axis=1, keepdims=True)
-
-        output = NumpyCodec().encode(name="total", payload=total)
-        return InferenceResponse(id=payload.id, model_name=self.name, outputs=[output])
-
-
-class SlowModel(MLModel):
-    async def load(self) -> bool:
-        await asyncio.sleep(10)
-        self.ready = True
-        return self.ready
-
-    async def infer(self, payload: InferenceRequest) -> InferenceResponse:
-        await asyncio.sleep(10)
-        return InferenceResponse(id=payload.id, model_name=self.name, outputs=[])