Skip to content

Commit

Permalink
Move txt data file to arff format.
Browse files Browse the repository at this point in the history
  • Loading branch information
mvlvrd committed Dec 6, 2024
1 parent da316fc commit 5ef4d0a
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 120 deletions.
21 changes: 9 additions & 12 deletions sksurv/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,17 +544,14 @@ def load_cgvhd():
.. [2] https://drive.google.com/file/d/1FPM264pE\_-F8DB7lvFeLB1yQ3HDo7c-i/view
https://sites.google.com/view/melaniapintiliemscstatistics/home/statistics
"""
full_path = _get_data_path("cgvhd.txt")

df = pd.read_csv(full_path)
df["ftime"] = df[["survtime", "reltime", "cgvhtime"]].min(axis=1)
df["status"] = (
((df["ftime"] == df["cgvhtime"]) & (df["cgvh"] == 1)).astype(int)
+ 2 * ((df["ftime"] == df["reltime"]) & (df["rcens"] == 1)).astype(int)
+ 3 * ((df["ftime"] == df["survtime"]) & (df["stat"] == 1)).astype(int)
full_path = _get_data_path("cgvhd.arff")
data = loadarff(full_path)
data["ftime"] = data[["survtime", "reltime", "cgvhtime"]].min(axis=1)
data["status"] = (
((data["ftime"] == data["cgvhtime"]) & (data["cgvh"] == "1")).astype(int)
+ 2 * ((data["ftime"] == data["reltime"]) & (data["rcens"] == "1")).astype(int)
+ 3 * ((data["ftime"] == data["survtime"]) & (data["stat"] == "1")).astype(int)
)
df = df[["ftime", "status", "tx"]]
ftime, event = df["ftime"].values, df["status"].values
data = data[["ftime", "status", "dx", "tx", "extent", "age"]]

# return get_x_y(data, attr_labels=["status", "ftime"], competing_risks=True)
return None, {"ftime": ftime, "status": event}
return get_x_y(data, attr_labels=["status", "ftime"], competing_risks=True)
118 changes: 118 additions & 0 deletions sksurv/datasets/data/cgvhd.arff
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
@RELATION CGVHD

@ATTRIBUTE dx {CML, AML}
@ATTRIBUTE tx {PB, BM}
@ATTRIBUTE extent {L, E}
@ATTRIBUTE agvhdgd NUMERIC
@ATTRIBUTE age NUMERIC
@ATTRIBUTE survtime NUMERIC
@ATTRIBUTE reltime NUMERIC
@ATTRIBUTE agvhtime NUMERIC
@ATTRIBUTE cgvhtime NUMERIC
@ATTRIBUTE stat {0, 1}
@ATTRIBUTE rcens {0, 1}
@ATTRIBUTE agvh {1, 0}
@ATTRIBUTE cgvh {1, 0}
@ATTRIBUTE stnum NUMERIC

@DATA
CML,PB,L,1,36,4.895,4.895,0.099,0.52,0,0,1,1,1
AML,PB,L,3,57,3.474,0.753,0.101,0.408,1,1,1,1,2
CML,PB,L,0,48,4.95,4.95,4.95,0.348,0,0,0,1,3
AML,PB,L,2,52,4.643,4.643,0.057,0.482,0,0,1,1,4
AML,PB,L,3,45,4.066,4.066,0.137,0.378,0,0,1,1,5
AML,PB,L,3,47,1.558,0.416,0.055,1.558,1,1,1,0,6
CML,PB,L,1,40,4.512,4.512,0.09,0.381,0,0,1,1,7
AML,PB,L,3,38,4.041,4.041,0.082,0.914,0,0,1,1,8
AML,PB,L,2,41,4.164,4.164,0.055,0.923,0,0,1,1,9
CML,PB,L,0,50,4.011,4.011,4.011,0.397,0,0,0,1,10
CML,PB,L,1,56,3.945,3.945,0.047,0.479,0,0,1,1,11
CML,PB,L,2,56,4.361,4.361,0.079,0.991,0,0,1,1,12
AML,PB,L,1,54,0.841,0.654,0.077,0.474,1,1,1,1,13
CML,PB,L,3,25,2.951,2.951,0.164,0.339,0,0,1,1,14
CML,PB,L,4,40,0.586,0.586,0.055,0.277,1,0,1,1,15
CML,PB,L,0,41,3.559,3.559,3.559,0.367,0,0,0,1,16
CML,PB,L,2,57,3.422,3.422,0.131,0.742,0,0,1,1,17
CML,PB,L,3,62,0.408,0.408,0.408,0.408,0,0,1,1,18
CML,PB,L,1,29,3.428,3.428,0.09,0.958,0,0,1,1,19
AML,PB,L,1,44,0.063,0.063,0.014,0.063,1,0,1,0,20
CML,PB,L,2,40,1.572,1.572,0.09,0.282,1,0,1,1,21
CML,PB,L,1,54,1.013,1.013,0.093,0.413,1,0,1,1,22
AML,PB,L,2,37,3.023,3.023,0.074,0.394,0,0,1,1,23
AML,PB,L,1,58,2.979,2.979,0.079,0.342,0,0,1,1,24
CML,PB,L,3,39,2.817,2.817,0.049,0.367,0,0,1,1,25
CML,PB,L,2,31,2.804,2.804,0.137,0.277,0,0,1,1,26
CML,PB,L,2,45,2.609,2.609,0.252,0.367,0,0,1,1,27
AML,PB,L,0,48,2.508,2.508,2.508,0.331,0,0,0,1,28
CML,PB,L,0,53,0.665,0.665,0.665,0.32,1,0,0,1,29
CML,PB,L,0,29,2.497,2.497,2.497,0.329,0,0,0,1,30
CML,PB,L,0,27,1.799,1.799,1.799,0.444,1,0,0,1,31
AML,PB,L,3,45,0.471,0.438,0.071,0.471,1,1,1,0,32
CML,PB,L,1,39,2.031,2.031,0.112,0.964,0,0,1,1,33
CML,PB,L,3,49,2.073,2.073,0.063,0.564,0,0,1,1,34
AML,PB,L,1,37,0.999,0.75,0.274,0.402,1,1,1,1,35
AML,PB,L,3,53,0.427,0.427,0.055,0.277,1,0,1,1,36
CML,PB,L,1,48,1.766,1.766,0.216,0.4,0,0,1,1,37
AML,PB,L,1,59,1.555,1.555,0.178,0.446,0,0,1,1,38
CML,PB,L,2,33,1.67,1.67,0.11,0.474,0,0,1,1,39
CML,PB,L,0,38,1.607,1.607,1.607,0.329,0,0,0,1,40
CML,PB,L,4,37,1.511,1.511,0.055,0.323,0,0,1,1,41
AML,PB,L,3,41,1.287,1.287,0.049,0.392,0,0,1,1,42
AML,PB,E,1,64,1.227,1.227,0.23,0.496,0,0,1,1,43
CML,PB,L,3,32,1.3,1.3,0.063,0.63,0,0,1,1,44
CML,PB,L,0,41,1.27,1.27,1.27,0.383,0,0,0,1,45
AML,PB,E,1,56,1.205,1.205,0.074,1.205,0,0,1,0,46
CML,PB,L,1,50,1.147,1.147,0.131,0.361,0,0,1,1,47
CML,PB,L,3,37,1.109,1.109,0.055,0.277,0,0,1,1,48
CML,PB,L,0,27,0.994,0.994,0.994,0.287,0,0,0,1,49
CML,BM,L,3,45,4.572,4.572,0.066,0.619,0,0,1,1,50
AML,BM,L,3,45,4.616,4.616,0.101,0.452,0,0,1,1,51
AML,BM,L,2,42,4.0,4.0,0.027,0.29,0,0,1,1,52
CML,BM,L,0,22,4.238,4.238,4.238,0.479,0,0,0,1,53
AML,BM,L,4,47,0.11,0.11,0.074,0.11,1,0,1,0,54
AML,BM,L,2,48,4.03,4.03,0.101,0.857,0,0,1,1,55
AML,BM,L,2,49,3.124,2.527,0.115,1.993,1,1,1,1,56
CML,BM,L,2,38,0.515,0.515,0.079,0.463,1,0,1,1,57
CML,BM,L,1,39,4.222,3.149,0.085,0.496,0,1,1,1,58
CML,BM,L,3,41,4.027,4.027,0.104,0.422,0,0,1,1,59
CML,BM,L,2,46,1.969,1.969,0.038,0.307,1,0,1,1,60
AML,BM,L,0,24,3.792,3.792,3.792,0.701,0,0,0,1,61
AML,BM,L,3,32,0.427,0.427,0.041,0.279,1,0,1,1,62
CML,BM,L,0,36,3.34,3.34,3.34,0.419,0,0,0,1,63
CML,BM,L,1,53,3.504,0.72,0.112,0.616,0,1,1,1,64
CML,BM,L,0,52,3.685,3.685,3.685,0.331,0,0,0,1,65
CML,BM,L,1,59,0.181,0.181,0.049,0.181,1,0,1,0,66
CML,BM,L,3,42,0.736,0.736,0.09,0.567,1,0,1,1,67
CML,BM,L,1,65,0.287,0.287,0.052,0.287,1,0,1,0,68
CML,BM,E,0,60,0.057,0.057,0.057,0.057,0,0,0,0,69
CML,BM,L,2,61,3.107,3.107,0.088,0.764,0,0,1,1,70
CML,BM,L,1,55,3.088,3.088,0.11,0.381,0,0,1,1,71
AML,BM,E,0,48,0.446,0.274,0.446,0.446,1,1,0,0,72
AML,BM,E,0,49,2.776,2.776,2.776,2.776,0,0,0,0,73
CML,BM,L,0,36,0.693,0.172,0.693,0.635,1,1,0,1,74
AML,BM,L,1,48,2.01,2.01,0.077,0.553,0,0,1,1,75
CML,BM,L,0,47,2.374,2.374,2.374,0.287,0,0,0,1,76
AML,BM,L,3,43,1.079,1.079,0.088,0.345,1,0,1,1,77
CML,BM,L,0,56,2.604,2.604,2.604,0.375,0,0,0,1,78
CML,BM,L,1,56,2.478,2.478,0.17,0.517,0,0,1,1,79
CML,BM,L,0,36,2.338,2.338,2.338,0.457,0,0,0,1,80
CML,BM,L,2,52,2.3,2.3,0.049,0.345,0,0,1,1,81
CML,BM,E,1,44,0.219,0.219,0.145,0.219,1,0,1,0,82
AML,BM,L,3,32,2.127,2.127,0.118,0.422,0,0,1,1,83
AML,BM,L,1,44,2.034,2.034,0.096,0.479,0,0,1,1,84
CML,BM,L,0,45,2.034,2.034,2.034,0.29,0,0,0,1,85
AML,BM,L,3,48,2.007,2.007,0.088,0.35,0,0,1,1,86
CML,BM,L,0,48,1.183,1.183,1.183,0.372,0,0,0,1,87
AML,BM,L,3,42,0.375,0.375,0.096,0.277,1,0,1,1,88
AML,BM,E,2,24,0.353,0.301,0.096,0.353,1,1,1,0,89
CML,BM,L,2,26,1.566,1.566,0.137,0.474,0,0,1,1,90
CML,BM,L,2,34,1.588,1.588,0.129,0.465,0,0,1,1,91
CML,BM,L,0,57,1.243,1.243,1.243,0.433,0,0,0,1,92
CML,BM,L,3,51,1.555,1.555,0.09,0.359,0,0,1,1,93
AML,BM,L,2,54,1.202,1.202,0.192,1.202,0,0,1,0,94
AML,BM,E,0,20,1.251,1.251,1.251,0.408,0,0,0,1,95
AML,BM,L,2,39,1.114,1.114,0.074,0.402,0,0,1,1,96
AML,BM,L,0,49,1.15,1.15,1.15,0.35,0,0,0,1,97
CML,BM,L,1,42,0.997,0.997,0.142,0.411,0,0,1,1,98
CML,BM,L,0,44,1.057,1.057,1.057,0.301,0,0,0,1,99
CML,BM,L,1,56,1.125,1.125,0.129,0.32,0,0,1,1,100
101 changes: 0 additions & 101 deletions sksurv/datasets/data/cgvhd.txt

This file was deleted.

14 changes: 7 additions & 7 deletions sksurv/nonparametric.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,11 +740,11 @@ def cumulative_incidence_competing_risks(
return uniq_times, cum_inc

if var_type == "Dinse":
var = var_dinse(n_events_cr, kpe_prime, n_at_risk, cum_inc)
var = _var_dinse(n_events_cr, kpe_prime, n_at_risk, cum_inc)

Check warning on line 743 in sksurv/nonparametric.py

View check run for this annotation

Codecov / codecov/patch

sksurv/nonparametric.py#L743

Added line #L743 was not covered by tests
elif var_type == "Dinse_Approx":
var = var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc)
var = _var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc)
elif var_type == "Aalen":
var = var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc)
var = _var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc)
else:
raise ValueError(f"{var_type=} not implemented.")

Check warning on line 749 in sksurv/nonparametric.py

View check run for this annotation

Codecov / codecov/patch

sksurv/nonparametric.py#L749

Added line #L749 was not covered by tests

Expand All @@ -756,11 +756,11 @@ def cumulative_incidence_competing_risks(
return uniq_times, cum_inc, ci


def var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc):
def _var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc):
"""
Variance estimator from Dinse and Larson, Biometrika (1986), 379
See Section 4, Eqs. 6.
This is an approximation from the var_dinse, so that one should be preferred.
This is an approximation from the _var_dinse, so that one should be preferred.
However, this seems to be more common in the literature.
"""
dr = n_events_cr[:, 0]
Expand All @@ -776,7 +776,7 @@ def var_dinse_approx(n_events_cr, kpe_prime, n_at_risk, cum_inc):
return var


def var_dinse(n_events_cr, kpe_prime, n_at_risk):
def _var_dinse(n_events_cr, kpe_prime, n_at_risk):
"""
Variance estimator from Dinse and Larson, Biometrika (1986), 379
See Section 4, Eqs. 4 and 5
Expand Down Expand Up @@ -804,7 +804,7 @@ def var_dinse(n_events_cr, kpe_prime, n_at_risk):
return var

Check warning on line 804 in sksurv/nonparametric.py

View check run for this annotation

Codecov / codecov/patch

sksurv/nonparametric.py#L804

Added line #L804 was not covered by tests


def var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc):
def _var_aalen(n_events_cr, kpe_prime, n_at_risk, cum_inc):
"""
Variance estimator from Aalen
Aalen, O. (1978a). Nonparametric estimation of partial transition
Expand Down

0 comments on commit 5ef4d0a

Please sign in to comment.