From b962f131bbc5c489d298f1077bfcc05da023a729 Mon Sep 17 00:00:00 2001 From: Brian Healy <42810347+bfhealy@users.noreply.github.com> Date: Tue, 3 Sep 2024 18:33:32 -0400 Subject: [PATCH] Enable weighted_std_dev feature (#324) --- cesium/features/graphs.py | 4 ++++ cesium/features/tests/data/expected_features.csv | 8 ++++---- cesium/features/tests/test_general_features.py | 12 ++++++++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/cesium/features/graphs.py b/cesium/features/graphs.py index 71ae7cdb..4217d53f 100644 --- a/cesium/features/graphs.py +++ b/cesium/features/graphs.py @@ -22,6 +22,7 @@ kurtosis, std, weighted_average, + weighted_std_dev, anderson_darling, shapiro_wilk, ) @@ -151,6 +152,7 @@ "stetson_k", "shapiro_wilk", "weighted_average", + "weighted_std_dev", ], "Lomb-Scargle (Periodic)": [ "fold2P_slope_10percentile", @@ -283,6 +285,7 @@ "stetson_k": (stetson_k, "m"), "shapiro_wilk": (shapiro_wilk, "m", "e"), "weighted_average": (weighted_average, "m", "e"), + "weighted_std_dev": (weighted_std_dev, "m", "e"), # QSO model features "qso_model": (qso_fit, "t", "m", "e"), "qso_log_chi2_qsonu": (get_qso_log_chi2_qsonu, "qso_model"), @@ -457,6 +460,7 @@ def generate_dask_graph(t, m, e): "stetson_k": ["Astronomy", "General"], "shapiro_wilk": ["Astronomy", "General"], "weighted_average": ["Astronomy", "General"], + "weighted_std_dev": ["Astronomy", "General"], # QSO model features "qso_model": ["Astronomy"], "qso_log_chi2_qsonu": ["Astronomy"], diff --git a/cesium/features/tests/data/expected_features.csv b/cesium/features/tests/data/expected_features.csv index ec39dcd1..f03d3973 100644 --- a/cesium/features/tests/data/expected_features.csv +++ b/cesium/features/tests/data/expected_features.csv @@ -1,4 +1,4 @@ -amplitude,anderson_darling,flux_percentile_ratio_mid20,flux_percentile_ratio_mid35,flux_percentile_ratio_mid50,flux_percentile_ratio_mid65,flux_percentile_ratio_mid80,fold2P_slope_10percentile,fold2P_slope_90percentile,freq1_amplitude1,freq1_amplitude2,freq1_amplitude3,freq1_amplitude4,freq1_freq,freq1_lambda,freq1_rel_phase2,freq1_rel_phase3,freq1_rel_phase4,freq1_signif,freq2_amplitude1,freq2_amplitude2,freq2_amplitude3,freq2_amplitude4,freq2_freq,freq2_rel_phase2,freq2_rel_phase3,freq2_rel_phase4,freq3_amplitude1,freq3_amplitude2,freq3_amplitude3,freq3_amplitude4,freq3_freq,freq3_rel_phase2,freq3_rel_phase3,freq3_rel_phase4,freq_amplitude_ratio_21,freq_amplitude_ratio_31,freq_frequency_ratio_21,freq_frequency_ratio_31,freq_model_max_delta_mags,freq_model_min_delta_mags,freq_model_phi1_phi2,freq_n_alias,freq_signif_ratio_21,freq_signif_ratio_31,freq_varrat,freq_y_offset,linear_trend,max_slope,maximum,median,median_absolute_deviation,medperc90_2p_p,minimum,p2p_scatter_2praw,p2p_scatter_over_mad,p2p_scatter_pfold_over_mad,p2p_ssqr_diff_over_var,percent_amplitude,percent_beyond_1_std,percent_close_to_median,percent_difference_flux_percentile,period_fast,qso_log_chi2_qsonu,qso_log_chi2nuNULL_chi2nu,scatter_res_raw,shapiro_wilk,skew,std,stetson_j,stetson_k,weighted_average -0.4695,5.398344616118379,0.1391191698,0.255495667,0.3933558399,0.5357113476,0.7345991397,-3.4444531503,3.3307906791,0.1013563889,0.0142452789,0.0005442693,0.0010724211,6.0688970237,5.4934900906,-1.8107758352,2.090252784,1.3995008795,11.2681277508,0.0315886229,0.0018222864,0.0006995279,0.0002360676,2.3250069312,0.1572123843,1.9347274633,1.8081434543,0.0290885372,0.0009549442,0.0004769511,0.0001788471,9.1142277619,-0.372819872,2.4993977233,-1.5869169231,0.3116589224,0.2869926359,0.3831020566,1.5017931144,8.52055231926E-11,3.05807826284E-09,0.3927107357,0,0.4208495586,0.4030330277,6.88293697906E-05,-0.0029369825,2.4986485735E-05,0.31574689,13.869,13.295,0.088,0.9631850179,12.93,0.7409749222,1.2556818182,1.0397727273,1.7838983953,0.4106137498,0.29263158,0.5305263158,0.4140128815,27.4480915,1.9335536941,0.1127558354,0.7102264604,0.9654412865638733,0.5536755309,0.1392362364,0.1863107801,0.958934476,13.30343644 -0.365,0.8747606698646564,0.1773462194,0.3114327492,0.436904049,0.5882967978,0.7672402192,-1.92492705,2.0255490809,0.0415961966,0.0009118138,0.0005114711,0.0002122479,8.3859538513,8.6831998581,-1.5722344384,-2.6650322107,-1.8396554581,12.0229612753,0.0093384569,0.0009404195,0.0001609345,3.64872550248E-05,2.1464473663,-1.9173684556,2.0252829445,1.2310896164,0.0108355446,0.0013703744,0.0002473447,0.0001088843,10.5167443839,-2.2392773646,-2.7672119788,0.9033347702,0.2245026632,0.260493639,0.2559574503,1.2540904196,2.122405654E-10,8.91044751872E-10,0.3559372874,0,0.327293423,0.3407100021,1.80627698458E-05,0.0012543862,1.97883821107E-06,15.90909091,10.46,9.997,0.028,0.9540692349,9.73,0.7226474127,1.3214285714,0.8214285714,1.8128582015,0.3471701558,0.186,0.94,0.1139420046,22.91634888,0.3587730502,0.2151954704,0.5705804968,0.9896652698516846,2.96462376,0.0554298829,-0.8531922756,0.7876747619,10.00258434 -2.1945,2.136304480106446,0.2857724132,0.4855634926,0.6426319146,0.7897408686,0.923373012,-0.3117055105,0.2927787224,0.4290081032,1.6849492872,0.1463171964,0.1229216884,0.001432796,0.0242805022,-0.7604340529,-2.8975835688,2.3142690591,17.872747262,0.1252429352,0.0211855055,0.0184750321,0.00283892,0.999571743,2.5602804971,1.7676877105,-0.8397250706,0.0955682377,0.0722449483,0.0120842339,0.009488304,0.0011792038,-0.303170517,-1.9002231169,1.7726147378,0.2919360597,0.222765577,697.637168142,0.8230088496,0.1609657391,0.9035285273,0.1879728818,1,0.5854892756,0.5377487349,0.0003623603,0.1330412794,-5.56786497429E-05,76.71641791,12.278,9.3305,1.0895,2.1390951062,7.889,0.62737528,0.0541532813,0.0761817347,0.1064545744,2.7722459419,0.40248963,0.1742738589,3.2994822291,348.58243204,3.453919777,3.3395417584,0.05925052,0.9789755940437317,0.4301775459,1.279772667,11.6164598093,0.9751156768,9.49116371 +amplitude,anderson_darling,flux_percentile_ratio_mid20,flux_percentile_ratio_mid35,flux_percentile_ratio_mid50,flux_percentile_ratio_mid65,flux_percentile_ratio_mid80,fold2P_slope_10percentile,fold2P_slope_90percentile,freq1_amplitude1,freq1_amplitude2,freq1_amplitude3,freq1_amplitude4,freq1_freq,freq1_lambda,freq1_rel_phase2,freq1_rel_phase3,freq1_rel_phase4,freq1_signif,freq2_amplitude1,freq2_amplitude2,freq2_amplitude3,freq2_amplitude4,freq2_freq,freq2_rel_phase2,freq2_rel_phase3,freq2_rel_phase4,freq3_amplitude1,freq3_amplitude2,freq3_amplitude3,freq3_amplitude4,freq3_freq,freq3_rel_phase2,freq3_rel_phase3,freq3_rel_phase4,freq_amplitude_ratio_21,freq_amplitude_ratio_31,freq_frequency_ratio_21,freq_frequency_ratio_31,freq_model_max_delta_mags,freq_model_min_delta_mags,freq_model_phi1_phi2,freq_n_alias,freq_signif_ratio_21,freq_signif_ratio_31,freq_varrat,freq_y_offset,linear_trend,max_slope,maximum,median,median_absolute_deviation,medperc90_2p_p,minimum,p2p_scatter_2praw,p2p_scatter_over_mad,p2p_scatter_pfold_over_mad,p2p_ssqr_diff_over_var,percent_amplitude,percent_beyond_1_std,percent_close_to_median,percent_difference_flux_percentile,period_fast,qso_log_chi2_qsonu,qso_log_chi2nuNULL_chi2nu,scatter_res_raw,shapiro_wilk,skew,std,stetson_j,stetson_k,weighted_average,weighted_std_dev +0.4695,5.398344616118379,0.1391191698,0.255495667,0.3933558399,0.5357113476,0.7345991397,-3.4444531503,3.3307906791,0.1013563889,0.0142452789,0.0005442693,0.0010724211,6.0688970237,5.4934900906,-1.8107758352,2.090252784,1.3995008795,11.2681277508,0.0315886229,0.0018222864,0.0006995279,0.0002360676,2.3250069312,0.1572123843,1.9347274633,1.8081434543,0.0290885372,0.0009549442,0.0004769511,0.0001788471,9.1142277619,-0.372819872,2.4993977233,-1.5869169231,0.3116589224,0.2869926359,0.3831020566,1.5017931144,8.52055231926E-11,3.05807826284E-09,0.3927107357,0,0.4208495586,0.4030330277,6.88293697906E-05,-0.0029369825,2.4986485735E-05,0.31574689,13.869,13.295,0.088,0.9631850179,12.93,0.7409749222,1.2556818182,1.0397727273,1.7838983953,0.4106137498,0.29263158,0.5305263158,0.4140128815,27.4480915,1.9335536941,0.1127558354,0.7102264604,0.9654412865638733,0.5536755309,0.1392362364,0.1863107801,0.958934476,13.30343644,0.136455 +0.365,0.8747606698646564,0.1773462194,0.3114327492,0.436904049,0.5882967978,0.7672402192,-1.92492705,2.0255490809,0.0415961966,0.0009118138,0.0005114711,0.0002122479,8.3859538513,8.6831998581,-1.5722344384,-2.6650322107,-1.8396554581,12.0229612753,0.0093384569,0.0009404195,0.0001609345,3.64872550248E-05,2.1464473663,-1.9173684556,2.0252829445,1.2310896164,0.0108355446,0.0013703744,0.0002473447,0.0001088843,10.5167443839,-2.2392773646,-2.7672119788,0.9033347702,0.2245026632,0.260493639,0.2559574503,1.2540904196,2.122405654E-10,8.91044751872E-10,0.3559372874,0,0.327293423,0.3407100021,1.80627698458E-05,0.0012543862,1.97883821107E-06,15.90909091,10.46,9.997,0.028,0.9540692349,9.73,0.7226474127,1.3214285714,0.8214285714,1.8128582015,0.3471701558,0.186,0.94,0.1139420046,22.91634888,0.3587730502,0.2151954704,0.5705804968,0.9896652698516846,2.96462376,0.0554298829,-0.8531922756,0.7876747619,10.00258434,0.050120 +2.1945,2.136304480106446,0.2857724132,0.4855634926,0.6426319146,0.7897408686,0.923373012,-0.3117055105,0.2927787224,0.4290081032,1.6849492872,0.1463171964,0.1229216884,0.001432796,0.0242805022,-0.7604340529,-2.8975835688,2.3142690591,17.872747262,0.1252429352,0.0211855055,0.0184750321,0.00283892,0.999571743,2.5602804971,1.7676877105,-0.8397250706,0.0955682377,0.0722449483,0.0120842339,0.009488304,0.0011792038,-0.303170517,-1.9002231169,1.7726147378,0.2919360597,0.222765577,697.637168142,0.8230088496,0.1609657391,0.9035285273,0.1879728818,1,0.5854892756,0.5377487349,0.0003623603,0.1330412794,-5.56786497429E-05,76.71641791,12.278,9.3305,1.0895,2.1390951062,7.889,0.62737528,0.0541532813,0.0761817347,0.1064545744,2.7722459419,0.40248963,0.1742738589,3.2994822291,348.58243204,3.453919777,3.3395417584,0.05925052,0.9789755940437317,0.4301775459,1.279772667,11.6164598093,0.9751156768,9.49116371,1.267892 diff --git a/cesium/features/tests/test_general_features.py b/cesium/features/tests/test_general_features.py index 4c8631d8..7ffc16c8 100644 --- a/cesium/features/tests/test_general_features.py +++ b/cesium/features/tests/test_general_features.py @@ -303,3 +303,15 @@ def test_weighted_average(): npt.assert_equal( f["percent_beyond_1_std"], np.mean(np.abs(stds_from_weighted_avg) > 1.0) ) + + +def test_weighted_std_dev(): + """Test weighted std dev.""" + times, values, errors = irregular_random() + f = generate_features( + times, values, errors, ["weighted_average", "weighted_std_dev"] + ) + weighted_std_dev = np.sqrt( + np.average((values - f["weighted_average"]) ** 2, weights=1.0 / (errors**2)) + ) + npt.assert_allclose(f["weighted_std_dev"], weighted_std_dev)