Skip to content

Commit

Permalink
Improvements after PR review.
Browse files Browse the repository at this point in the history
  • Loading branch information
mvlvrd committed Nov 21, 2024
1 parent 1eb1435 commit 4c5e3ea
Show file tree
Hide file tree
Showing 4 changed files with 277 additions and 118 deletions.
1 change: 1 addition & 0 deletions doc/api/nonparametric.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Non-parametric Estimators

CensoringDistributionEstimator
SurvivalFunctionEstimator
cumulative_incidence_competing_risks
ipc_weights
kaplan_meier_estimator
nelson_aalen_estimator
83 changes: 24 additions & 59 deletions sksurv/nonparametric.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"nelson_aalen_estimator",
"ipc_weights",
"SurvivalFunctionEstimator",
"km_cumulative_incidence_competing_risks",
"cumulative_incidence_competing_risks",
]


Expand Down Expand Up @@ -55,7 +55,7 @@ def _compute_counts(event, time, order=None):
n_events : array
Number of events at each time point.
2D array (unique time points, n risks + 1) in the case of competing risks.
2D array with shape `(n_unique_time_points, n_risks + 1)` in the case of competing risks.
n_at_risk : array
Number of samples that have not been censored or have not had an event at each time point.
Expand Down Expand Up @@ -601,49 +601,26 @@ def predict_ipcw(self, y):
return weights


def km_cumulative_incidence_competing_risks(
event,
time_exit,
time_enter=None,
time_min=None,
reverse=None,
conf_level=None,
conf_type=None
):
"""Kaplan-Meier estimator of Cumulative Incidence function in the case of competing risks.
def cumulative_incidence_competing_risks(event, time_exit, time_min=None):
"""Non-parametric estimator of Cumulative Incidence function in the case of competing risks.
See [1]_ for further description.
Parameters
----------
event : array-like, shape = (n_samples)
event : array-like, shape = (n_samples,)
Contains event indicators.
time_exit : array-like, shape = (n_samples,)
Contains event/censoring times.
time_enter : array-like, shape = (n_samples,), optional
Contains time when each individual entered the study for
left truncated survival data.
Not implemented
Contains event/censoring times. '0' indicates right-censoring.
Positive integers (between 1 and n_risks, n_risks being the total number of different risks)
indicate the possible different risks.
It assumes there are events for all possible risks.
time_min : float, optional
time_min : float, optional, default: None
Compute estimator conditional on survival at least up to
the specified time.
reverse : bool, optional, default: False
Whether to estimate the censoring distribution.
Not implemented for competing risks.
Can be obtained using regular Kaplan-Meier.
conf_level : float, optional, default: 0.95
The level for a two-sided confidence interval on the survival curves.
conf_type : None or {'log-log'}, optional, default: None.
The type of confidence intervals to estimate.
If `None`, no confidence intervals are estimated.
Not implemented.
Returns
-------
time : array, shape = (n_times,)
Expand All @@ -656,20 +633,18 @@ def km_cumulative_incidence_competing_risks(
Examples
--------
Creating a Kaplan-Meier curve:
>>> x, y = km_cumulative_incidence_competing_risks(event, time)
>>> for i=range(1, n_risks + 1):
>>> plt.step(x, y[i], where="post", label=i)
Creating cumulative incidence curves:
>>> bmt_df = pd.read_csv("tests/data/bmt.csv", sep=";", skiprows=4)
>>> event = bmt_df["status"].values
>>> time = bmt_df["ftime"].values
>>> x, y = cumulative_incidence_competing_risks(event, time)
>>> plt.step(x, y[0], where="post", label="Total risk")
>>> for i in range(1, n_risks + 1):
>>> plt.step(x, y[i], where="post", label=f"{i}-risk")
>>> plt.ylim(0, 1)
>>> plt.legend()
>>> plt.show()
See also
--------
sksurv.nonparametric.SurvivalFunctionEstimator
Estimator API of the Kaplan-Meier estimator.
References
----------
.. [1] Kalbfleisch, J.D. and Prentice, R.L. (2002)
Expand All @@ -678,15 +653,6 @@ def km_cumulative_incidence_competing_risks(
event, time_exit = check_y_survival(event, time_exit, allow_all_censored=True, competing_risks=True)
check_consistent_length(event, time_exit)

if time_enter is not None:
raise NotImplementedError("Left censored data is not implemented.")

if conf_level is not None or conf_type is not None:
raise NotImplementedError("Confidence error estimation is not implemented")

if reverse is not None:
raise NotImplementedError("Not implemented for competing risks. Use kaplan_meier_estimator instead.")

n_risks = event.max()
uniq_times, n_events_cr, n_at_risk, _n_censored = _compute_counts(event, time_exit)

Expand All @@ -696,20 +662,19 @@ def km_cumulative_incidence_competing_risks(
n_events_cr,
n_at_risk[..., np.newaxis],
out=np.zeros((n_t, n_risks + 1), dtype=float),
where=n_events_cr != 0
where=n_events_cr != 0,
)

if time_min is not None:
mask = uniq_times >= time_min
uniq_times = np.compress(mask, uniq_times)
ratio = np.compress(mask, ratio, axis=0)

kpe_cum_inc = np.empty((n_risks + 1, n_t), dtype=float)
kpe = np.cumprod(1.0 - ratio[:, 0])
kpe_prime = np.ones(shape=kpe.shape, dtype=kpe.dtype)
kpe_prime = np.ones_like(kpe)
kpe_prime[1:] = kpe[:-1]
for i in range(1, n_risks + 1):
kpe_cum_inc[i] = np.cumsum(ratio[:, i] * kpe_prime)
kpe_cum_inc[0] = 1.0 - kpe
cum_inc = np.empty((n_risks + 1, n_t), dtype=float)
cum_inc[1 : n_risks + 1] = np.cumsum((ratio[:, 1 : n_risks + 1].T * kpe_prime), axis=1)
cum_inc[0] = 1.0 - kpe

return uniq_times, kpe_cum_inc
return uniq_times, cum_inc
11 changes: 7 additions & 4 deletions sksurv/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ def check_y_survival(y_or_event, *args, allow_all_censored=False, allow_time_zer
as first field, and time of event or time of censoring as
second field. Otherwise, it is assumed that a boolean array
representing the event indicator is passed.
If competing_risks is True it should be a non-negative valued integer array.
If competing_risks is True it should be a non-negative valued integer array,
also all risks must appear at least once in the event array.
*args : list of array-likes
Any number of array-like objects representing time information.
Expand Down Expand Up @@ -148,6 +149,8 @@ def check_y_survival(y_or_event, *args, allow_all_censored=False, allow_time_zer

event = check_array(y_event, ensure_2d=False)
check_event_dtype(event, competing_risks)
if competing_risks and not np.all(np.isin(range(1, np.max(event) + 1), event)):
raise ValueError("Some risks do not appear in the event array.")

if not (allow_all_censored or np.any(event)):
raise ValueError("all samples are censored")
Expand Down Expand Up @@ -178,13 +181,13 @@ def check_y_survival(y_or_event, *args, allow_all_censored=False, allow_time_zer

def check_event_dtype(event, competing_risks=False):
"""Check that the event array has the correct dtypes:
Boolean for the general case and Intger in the case of competing risks.
Boolean for the general case and Integer in the case of competing risks.
Parameters
----------
event : numpy array
event : array, shape=[n_samples,], dtype=bool|integer
Array containing the censoring events.
competing_risks : boolean
competing_risks : bool, optional, default: False
Boolean that indicates the case of competing risks.
"""
if competing_risks:
Expand Down
Loading

0 comments on commit 4c5e3ea

Please sign in to comment.