From 188a31b5b38684388b6863d1889f88505c4d7014 Mon Sep 17 00:00:00 2001 From: Tanzim Farhan <52883108+tanzim10@users.noreply.github.com> Date: Fri, 11 Oct 2024 11:25:49 +0600 Subject: [PATCH] Mobility model alpha parameter regression from data (#15) * Fixed PR changes for params_regression * Fixed Unittest for param_regression according to PR change * Fixed the mobility notebook and radp_library file(PR changes done) * Resolved test_param_regression unittest cases * Updated Param Regression and radp library as instructed in the PR review * Changed the scipy dependency to solve dependency error * Changed how seed value works especially made it user friendly * Refactored functions and changed the imports * Resolved test_param_regression imports --- notebooks/coo_with_radp_digital_twin.ipynb | 2 +- notebooks/mobility_model.ipynb | 362 +++++++++++++++++ notebooks/radp_library.py | 370 +++++++++++++++--- .../digital_twin/mobility/param_regression.py | 189 +++++++++ .../mobility/tests/test_param_regression.py | 90 +++++ radp/digital_twin/requirements.txt | 3 + 6 files changed, 963 insertions(+), 53 deletions(-) create mode 100644 notebooks/mobility_model.ipynb create mode 100644 radp/digital_twin/mobility/param_regression.py create mode 100644 radp/digital_twin/mobility/tests/test_param_regression.py diff --git a/notebooks/coo_with_radp_digital_twin.ipynb b/notebooks/coo_with_radp_digital_twin.ipynb index ecff4e1..45ac095 100644 --- a/notebooks/coo_with_radp_digital_twin.ipynb +++ b/notebooks/coo_with_radp_digital_twin.ipynb @@ -522,7 +522,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.5" }, "varInspector": { "cols": { diff --git a/notebooks/mobility_model.ipynb b/notebooks/mobility_model.ipynb new file mode 100644 index 0000000..0f0a578 --- /dev/null +++ b/notebooks/mobility_model.ipynb @@ -0,0 +1,362 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ec7ab4ae", + "metadata": {}, + "source": [ + "# Alpha Optimization in a Gauss-Markov Mobility Model\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "754a70e3", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "from pathlib import Path\n", + "sys.path.append(f\"{Path().absolute().parent}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3d3ca1a", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import scipy\n", + "import numpy as np\n", + "from radp_library import *\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.cm as cm\n", + "from radp.digital_twin.mobility.param_regression import get_predicted_alpha,preprocess_ue_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "309ead1f", + "metadata": {}, + "outputs": [], + "source": [ + "params = {\n", + " \"ue_tracks_generation\": {\n", + " \"params\": {\n", + " \"simulation_duration\": 3600,\n", + " \"simulation_time_interval_seconds\": 0.01,\n", + " \"num_ticks\": 50,\n", + " \"num_batches\": 1,\n", + " \"ue_class_distribution\": {\n", + " \"stationary\": {\n", + " \"count\": 10,\n", + " \"velocity\": 0,\n", + " \"velocity_variance\": 1\n", + " },\n", + " \"pedestrian\": {\n", + " \"count\": 5,\n", + " \"velocity\": 2,\n", + " \"velocity_variance\": 1\n", + " },\n", + " \"cyclist\": {\n", + " \"count\": 5,\n", + " \"velocity\": 5,\n", + " \"velocity_variance\": 1\n", + " },\n", + " \"car\": {\n", + " \"count\": 12,\n", + " \"velocity\": 20,\n", + " \"velocity_variance\": 1\n", + " }\n", + " },\n", + " \"lat_lon_boundaries\": {\n", + " \"min_lat\": -90,\n", + " \"max_lat\": 90,\n", + " \"min_lon\": -180,\n", + " \"max_lon\": 180\n", + " },\n", + " \"gauss_markov_params\": {\n", + " \"alpha\": 0.5,\n", + " \"variance\": 0.8,\n", + " \"rng_seed\": 42,\n", + " \"lon_x_dims\": 100,\n", + " \"lon_y_dims\": 100,\n", + " \"// TODO\": \"Account for supporting the user choosing the anchor_loc and cov_around_anchor.\",\n", + " \"// Current implementation\": \"the UE Tracks generator will not be using these values.\",\n", + " \"// anchor_loc\": {},\n", + " \"// cov_around_anchor\": {}\n", + " }\n", + " }\n", + " }\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "d5d3f4fb", + "metadata": {}, + "source": [ + "## Generate Data Set 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf32a451", + "metadata": {}, + "outputs": [], + "source": [ + "data1 = get_ue_data(params)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0217fb7d", + "metadata": {}, + "outputs": [], + "source": [ + "data1.head(10)" + ] + }, + { + "cell_type": "markdown", + "id": "0f112187", + "metadata": {}, + "source": [ + "## Plot Dataset 1 " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57d3c99b", + "metadata": {}, + "outputs": [], + "source": [ + "plot_ue_tracks(data1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "507f5c0b", + "metadata": {}, + "outputs": [], + "source": [ + "velocity = preprocess_ue_data(data1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60b0bc3b", + "metadata": {}, + "outputs": [], + "source": [ + "velocity" + ] + }, + { + "cell_type": "markdown", + "id": "3e4713fc", + "metadata": {}, + "source": [ + "## Alpha Initialization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee3e00c1", + "metadata": {}, + "outputs": [], + "source": [ + "alpha0 = np.random.choice(np.arange(0, 1.1, 0.1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "119f1534", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Alpha0:\",alpha0)" + ] + }, + { + "cell_type": "markdown", + "id": "97a8c1ec", + "metadata": {}, + "source": [ + "## Regress to Find Alpha 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12fdaf4b", + "metadata": {}, + "outputs": [], + "source": [ + "alpha1 = get_predicted_alpha(data1,alpha0,seed=42) # Adding seed for reproducibility" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39eed46d", + "metadata": {}, + "outputs": [], + "source": [ + "alpha1" + ] + }, + { + "cell_type": "markdown", + "id": "1dc52c9d", + "metadata": {}, + "source": [ + "## Generating new data using alpha 1\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46d7e685", + "metadata": {}, + "outputs": [], + "source": [ + "#Changing alpha value to alpha1 in the params dictionary\n", + "params['ue_tracks_generation']['params']['gauss_markov_params']['alpha'] = alpha1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab4eb7cd", + "metadata": {}, + "outputs": [], + "source": [ + "print(params['ue_tracks_generation']['params']['gauss_markov_params']['alpha'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c700855", + "metadata": {}, + "outputs": [], + "source": [ + "data2 = get_ue_data(params)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18a2b294", + "metadata": {}, + "outputs": [], + "source": [ + "data2" + ] + }, + { + "cell_type": "markdown", + "id": "07a4c3aa", + "metadata": {}, + "source": [ + "## Plot Dataset 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df4b1b11", + "metadata": {}, + "outputs": [], + "source": [ + "plot_ue_tracks(data2)" + ] + }, + { + "cell_type": "markdown", + "id": "b727758e", + "metadata": {}, + "source": [ + "## Regress to Find Alpha 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a268312", + "metadata": {}, + "outputs": [], + "source": [ + "alpha2 = get_predicted_alpha(data1,alpha1,seed=42) # Adding seed for reproducibility" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02575be1", + "metadata": {}, + "outputs": [], + "source": [ + "alpha2" + ] + }, + { + "cell_type": "markdown", + "id": "da3b378b", + "metadata": {}, + "source": [ + "## Comparison Plot of Dataset 1 and Dataset2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0798532d", + "metadata": {}, + "outputs": [], + "source": [ + "plot_ue_tracks_side_by_side(data1, data2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c61c293", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/radp_library.py b/notebooks/radp_library.py index c0cfe24..9d44da4 100644 --- a/notebooks/radp_library.py +++ b/notebooks/radp_library.py @@ -12,6 +12,7 @@ import fastkml import matplotlib.animation as animation import matplotlib.pyplot as plt +import matplotlib.cm as cm import numpy as np import pandas as pd import rasterio.features @@ -20,8 +21,14 @@ from shapely import geometry from radp.digital_twin.mobility.mobility import gauss_markov -from radp.digital_twin.rf.bayesian.bayesian_engine import BayesianDigitalTwin, NormMethod +from radp.digital_twin.rf.bayesian.bayesian_engine import ( + BayesianDigitalTwin, + NormMethod, +) from radp.digital_twin.utils.gis_tools import GISTools +from radp.digital_twin.mobility.ue_tracks import UETracksGenerator +from radp.digital_twin.mobility.ue_tracks_params import UETracksGenerationParams + Boundary = Union[geometry.Polygon, geometry.MultiPolygon] KML_NS = "{http://www.opengis.net/kml/2.2}" @@ -77,7 +84,9 @@ def _setup_kml_obj( styles = [] k = fastkml.KML() - doc = fastkml.Document(ns=KML_NS, name=(name or "Shapes"), description=(desc or ""), styles=styles) + doc = fastkml.Document( + ns=KML_NS, name=(name or "Shapes"), description=(desc or ""), styles=styles + ) k.append(doc) return k, doc @@ -106,7 +115,9 @@ def _add_shape_to_folder( ) -> None: if desc is None: desc = name - shape_placemark = fastkml.Placemark(ns=KML_NS, name=name, description=desc, styles=styles) + shape_placemark = fastkml.Placemark( + ns=KML_NS, name=name, description=desc, styles=styles + ) shape_placemark.geometry = shape folder.append(shape_placemark) @@ -181,15 +192,21 @@ def shape_dict_to_kmz( obj_style = styles if descriptions_dict is not None and name in descriptions_dict: - obj_desc = ShapesKMLWriter._build_description_from_prop_dict(descriptions_dict[name]) + obj_desc = ShapesKMLWriter._build_description_from_prop_dict( + descriptions_dict[name] + ) else: obj_desc = name if isinstance(obj, geometry.base.BaseGeometry): - cls._add_shape_to_folder(cur_folder, obj, name, styles=obj_style, desc=obj_desc) + cls._add_shape_to_folder( + cur_folder, obj, name, styles=obj_style, desc=obj_desc + ) else: # isinstance(obj, dict)) - child_folder = fastkml.Folder(ns=KML_NS, name=name, styles=obj_style) + child_folder = fastkml.Folder( + ns=KML_NS, name=name, styles=obj_style + ) cur_folder.append(child_folder) fringe.append((child_folder, obj)) @@ -214,17 +231,22 @@ def get_percell_data( data_in_sampled = data_in - data_in_sampled.columns = [col.replace("_1", "") if col.endswith("_1") else col for col in data_in_sampled.columns] + data_in_sampled.columns = [ + col.replace("_1", "") if col.endswith("_1") else col + for col in data_in_sampled.columns + ] # filter out invalid values data_cell_valid = data_in_sampled[data_in_sampled.cell_rxpwr_dbm != invalid_value] if choose_strongest_samples_percell: - data_cell_sampled = data_cell_valid.sort_values("cell_rxpwr_dbm", ascending=False).head( - n=min(n_samples, len(data_cell_valid)) - ) + data_cell_sampled = data_cell_valid.sort_values( + "cell_rxpwr_dbm", ascending=False + ).head(n=min(n_samples, len(data_cell_valid))) else: # get n_samples independent random samples inside training groups - data_cell_sampled = data_cell_valid.sample(n=min(n_samples, len(data_cell_valid)), random_state=(seed)) + data_cell_sampled = data_cell_valid.sample( + n=min(n_samples, len(data_cell_valid)), random_state=(seed) + ) # logging.info(f"n_samples={n_samples}, len(data_cell_valid)={len(data_cell_valid)}") # plt.scatter(y=data_cell_sampled.loc_y, x=data_cell_sampled.loc_x, s=10) @@ -276,7 +298,11 @@ def bing_tile_to_center(x, y, level, tile_pixels=256): xwidth = 360.0 / zoom_factor out = [] out.append( - (y_to_latitude(True, y, zoom_factor, tile_pixels) + y_to_latitude(False, y, zoom_factor, tile_pixels)) / 2 + ( + y_to_latitude(True, y, zoom_factor, tile_pixels) + + y_to_latitude(False, y, zoom_factor, tile_pixels) + ) + / 2 ) out.append(xwidth * (x + 0.5) - 180) return out @@ -329,7 +355,9 @@ def latitude_to_world_pixel(latitude, zoom_factor, tile_pixels=256): latitude = map_clip(latitude, -85.05112878, 85.05112878) sin_latitude = np.sin(latitude * np.pi / 180.0) - pixel_y = (0.5 - np.log((1 + sin_latitude) / (1 - sin_latitude)) / (4 * np.pi)) * (tile_pixels * zoom_factor) + pixel_y = (0.5 - np.log((1 + sin_latitude) / (1 - sin_latitude)) / (4 * np.pi)) * ( + tile_pixels * zoom_factor + ) return pixel_y @@ -363,12 +391,16 @@ def lon_lat_to_bing_tile_df_row(row, level): return row -def get_lonlat_from_xy_idxs(xy: np.ndarray, lower_left: Tuple[float, float]) -> np.ndarray: +def get_lonlat_from_xy_idxs( + xy: np.ndarray, lower_left: Tuple[float, float] +) -> np.ndarray: return xy * SRTM_STEP + lower_left def find_closest(data_df, lat, lon): - dist = data_df.apply(lambda row: GISTools.dist((row.loc_y, row.loc_x), (lat, lon)), axis=1) + dist = data_df.apply( + lambda row: GISTools.dist((row.loc_y, row.loc_x), (lat, lon)), axis=1 + ) if dist.min() < 100: return dist.idxmin() else: @@ -410,8 +442,12 @@ def get_track_samples( xy_lonlat = get_lonlat_from_xy_idxs(xy, (min_lon, min_lat)) xy_lonlat_ue_tracks.extend(xy_lonlat) - all_track_pts_df = pd.DataFrame(columns=["loc_x", "loc_y"], data=xy_lonlat_ue_tracks) - all_track_pts_sampled_df = all_track_pts_df.apply(lambda row: find_closest(data_df, row.loc_y, row.loc_x), axis=1) + all_track_pts_df = pd.DataFrame( + columns=["loc_x", "loc_y"], data=xy_lonlat_ue_tracks + ) + all_track_pts_sampled_df = all_track_pts_df.apply( + lambda row: find_closest(data_df, row.loc_y, row.loc_x), axis=1 + ) return data_df.loc[all_track_pts_sampled_df] @@ -498,7 +534,9 @@ def bdt( axs[1].set_yticks([]) for i in range(len(desired_idxs)): train_cell_id = idx_cell_id_mapping[i + 1] - training_data[train_cell_id] = pd.concat([tilt_per_cell_df[i] for tilt_per_cell_df in percell_data_list]) + training_data[train_cell_id] = pd.concat( + [tilt_per_cell_df[i] for tilt_per_cell_df in percell_data_list] + ) if track_sampling: training_data[train_cell_id] = get_track_samples( training_data[train_cell_id], @@ -515,19 +553,27 @@ def bdt( ) for train_cell_id, training_data_idx in training_data.items(): training_data_idx["cell_id"] = train_cell_id - training_data_idx["cell_lat"] = site_config_df[site_config_df["cell_id"] == train_cell_id]["cell_lat"].values[0] - training_data_idx["cell_lon"] = site_config_df[site_config_df["cell_id"] == train_cell_id]["cell_lon"].values[0] - training_data_idx["cell_az_deg"] = site_config_df[site_config_df["cell_id"] == train_cell_id][ - "cell_az_deg" - ].values[0] - training_data_idx["cell_txpwr_dbm"] = site_config_df[site_config_df["cell_id"] == train_cell_id][ - "cell_txpwr_dbm" - ].values[0] - training_data_idx["hTx"] = site_config_df[site_config_df["cell_id"] == train_cell_id]["hTx"].values[0] - training_data_idx["hRx"] = site_config_df[site_config_df["cell_id"] == train_cell_id]["hRx"].values[0] - training_data_idx["cell_carrier_freq_mhz"] = site_config_df[site_config_df["cell_id"] == train_cell_id][ - "cell_carrier_freq_mhz" - ].values[0] + training_data_idx["cell_lat"] = site_config_df[ + site_config_df["cell_id"] == train_cell_id + ]["cell_lat"].values[0] + training_data_idx["cell_lon"] = site_config_df[ + site_config_df["cell_id"] == train_cell_id + ]["cell_lon"].values[0] + training_data_idx["cell_az_deg"] = site_config_df[ + site_config_df["cell_id"] == train_cell_id + ]["cell_az_deg"].values[0] + training_data_idx["cell_txpwr_dbm"] = site_config_df[ + site_config_df["cell_id"] == train_cell_id + ]["cell_txpwr_dbm"].values[0] + training_data_idx["hTx"] = site_config_df[ + site_config_df["cell_id"] == train_cell_id + ]["hTx"].values[0] + training_data_idx["hRx"] = site_config_df[ + site_config_df["cell_id"] == train_cell_id + ]["hRx"].values[0] + training_data_idx["cell_carrier_freq_mhz"] = site_config_df[ + site_config_df["cell_id"] == train_cell_id + ]["cell_carrier_freq_mhz"].values[0] training_data_idx["log_distance"] = [ GISTools.get_log_distance( training_data_idx["cell_lat"].values[0], @@ -572,7 +618,10 @@ def bdt( training_data_idx = training_data_idx.drop( training_data_idx[ (training_data_idx["cell_rxpwr_dbm"] < filter_out_samples_dbm_threshold) - & (training_data_idx["log_distance"] > np.log(1000 * filter_out_samples_kms_threshold)) + & ( + training_data_idx["log_distance"] + > np.log(1000 * filter_out_samples_kms_threshold) + ) ].index ) if plot_loss_vs_iter: @@ -632,19 +681,27 @@ def bdt( for test_cell_id, test_data_idx in test_data.items(): test_data_idx["cell_id"] = test_cell_id - test_data_idx["cell_lat"] = site_config_df[site_config_df["cell_id"] == test_cell_id]["cell_lat"].values[0] - test_data_idx["cell_lon"] = site_config_df[site_config_df["cell_id"] == test_cell_id]["cell_lon"].values[0] - test_data_idx["cell_az_deg"] = site_config_df[site_config_df["cell_id"] == test_cell_id]["cell_az_deg"].values[ - 0 - ] - test_data_idx["cell_txpwr_dbm"] = site_config_df[site_config_df["cell_id"] == test_cell_id][ - "cell_txpwr_dbm" - ].values[0] - test_data_idx["hTx"] = site_config_df[site_config_df["cell_id"] == test_cell_id]["hTx"].values[0] - test_data_idx["hRx"] = site_config_df[site_config_df["cell_id"] == test_cell_id]["hRx"].values[0] - test_data_idx["cell_carrier_freq_mhz"] = site_config_df[site_config_df["cell_id"] == test_cell_id][ - "cell_carrier_freq_mhz" - ].values[0] + test_data_idx["cell_lat"] = site_config_df[ + site_config_df["cell_id"] == test_cell_id + ]["cell_lat"].values[0] + test_data_idx["cell_lon"] = site_config_df[ + site_config_df["cell_id"] == test_cell_id + ]["cell_lon"].values[0] + test_data_idx["cell_az_deg"] = site_config_df[ + site_config_df["cell_id"] == test_cell_id + ]["cell_az_deg"].values[0] + test_data_idx["cell_txpwr_dbm"] = site_config_df[ + site_config_df["cell_id"] == test_cell_id + ]["cell_txpwr_dbm"].values[0] + test_data_idx["hTx"] = site_config_df[ + site_config_df["cell_id"] == test_cell_id + ]["hTx"].values[0] + test_data_idx["hRx"] = site_config_df[ + site_config_df["cell_id"] == test_cell_id + ]["hRx"].values[0] + test_data_idx["cell_carrier_freq_mhz"] = site_config_df[ + site_config_df["cell_id"] == test_cell_id + ]["cell_carrier_freq_mhz"].values[0] test_data_idx["log_distance"] = [ GISTools.get_log_distance( test_data_idx["cell_lat"].values[0], @@ -686,10 +743,16 @@ def bdt( test_data_percell = test_data_percell.drop( test_data_percell[ (test_data_percell["cell_rxpwr_dbm"] < filter_out_samples_dbm_threshold) - & (test_data_percell["log_distance"] > np.log(1000 * filter_out_samples_kms_threshold)) + & ( + test_data_percell["log_distance"] + > np.log(1000 * filter_out_samples_kms_threshold) + ) ].index ) - (pred_means_percell, _,) = bayesian_digital_twins[idx].predict_distributed_gpmodel( + ( + pred_means_percell, + _, + ) = bayesian_digital_twins[idx].predict_distributed_gpmodel( prediction_dfs=[test_data_percell], ) logging.info(f"merging cell at idx = : {idx}") @@ -700,11 +763,15 @@ def bdt( ) full_prediction_frame = ( pd.concat([full_prediction_frame, test_data_percell_bing_tile]) - .groupby(["loc_x", "loc_y"], as_index=False)[["cell_rxpwr_dbm", "pred_means"]] + .groupby(["loc_x", "loc_y"], as_index=False)[ + ["cell_rxpwr_dbm", "pred_means"] + ] .max() ) # re-convert to lat/lon - full_prediction_frame = full_prediction_frame.apply(bing_tile_to_center_df_row, level=bing_tile_level, axis=1) + full_prediction_frame = full_prediction_frame.apply( + bing_tile_to_center_df_row, level=bing_tile_level, axis=1 + ) # compute RSRP as maximum over predicted rx powers pred_rsrp = np.array(full_prediction_frame.pred_means) @@ -751,7 +818,9 @@ def bdt( axs[1].set_xticks([]) axs[1].set_yticks([]) - plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=0.0, hspace=0.1) + plt.subplots_adjust( + left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=0.0, hspace=0.1 + ) plt.show() return ( @@ -825,7 +894,9 @@ def init(): def animate(i): plt.clf() _init_plt(axs) - pred_rsrp_points = axs[1].scatter(lons, lats, c=pred_rsrp_list[i], cmap=cmap, s=25) + pred_rsrp_points = axs[1].scatter( + lons, lats, c=pred_rsrp_list[i], cmap=cmap, s=25 + ) axs[1].set_title( f"Predicted RSRP \n MAE = {MAE_list[i]:0.1f} dB" f"\nmax_training_iterations = {maxiter_list[i]} | " @@ -839,7 +910,9 @@ def animate(i): return [true_rsrp_points, pred_rsrp_points] # call the animator. blit=True means only re-draw the parts that have changed. - anim = animation.FuncAnimation(fig, animate, init_func=init, frames=len(pred_rsrp_list), blit=True) + anim = animation.FuncAnimation( + fig, animate, init_func=init, frames=len(pred_rsrp_list), blit=True + ) writervideo = animation.FFMpegWriter(fps=4) anim.save(filename, writer=writervideo) @@ -867,3 +940,196 @@ def rfco_to_best_server_shapes( key=lambda x: x[1], ) return shapes + + +# Mobility Model helper functions + + +def get_ue_data(params: dict) -> pd.DataFrame: + """ + Generates user equipment (UE) tracks data using specified simulation parameters. + + This function initializes a UETracksGenerationParams object using the provided parameters + and then iterates over batches generated by the UETracksGenerator. Each batch of UE tracks + data is consolidated into a single DataFrame which captures mobility tracks across multiple + ticks and batches, as per the defined parameters. + + Using the UETracksGenerator, the UE tracks are returned in form of a dataframe + The Dataframe is arranged as follows: + + +------------+------------+-----------+------+ + | mock_ue_id | lon | lat | tick | + +============+============+===========+======+ + | 0 | 102.219377 | 33.674572 | 0 | + | 1 | 102.415954 | 33.855534 | 0 | + | 2 | 102.545935 | 33.878075 | 0 | + | 0 | 102.297766 | 33.575942 | 1 | + | 1 | 102.362725 | 33.916477 | 1 | + | 2 | 102.080675 | 33.832793 | 1 | + +------------+------------+-----------+------+ + """ + + # Initialize the UE data + ue_tracks_params = UETracksGenerationParams(params) + + ue_tracks_generation = pd.DataFrame() # Initialize an empty DataFrame + for ue_tracks_generation_batch in UETracksGenerator.generate_as_lon_lat_points( + rng_seed=ue_tracks_params.rng_seed, + lon_x_dims=ue_tracks_params.lon_x_dims, + lon_y_dims=ue_tracks_params.lon_y_dims, + num_ticks=ue_tracks_params.num_ticks, + num_UEs=ue_tracks_params.num_UEs, + num_batches=ue_tracks_params.num_batches, + alpha=ue_tracks_params.alpha, + variance=ue_tracks_params.variance, + min_lat=ue_tracks_params.min_lat, + max_lat=ue_tracks_params.max_lat, + min_lon=ue_tracks_params.min_lon, + max_lon=ue_tracks_params.max_lon, + mobility_class_distribution=ue_tracks_params.mobility_class_distribution, + mobility_class_velocities=ue_tracks_params.mobility_class_velocities, + mobility_class_velocity_variances=ue_tracks_params.mobility_class_velocity_variances, + ): + # Append each batch to the main DataFrame + ue_tracks_generation = pd.concat( + [ue_tracks_generation, ue_tracks_generation_batch], ignore_index=True + ) + + return ue_tracks_generation + + +def plot_ue_tracks(df: pd.DataFrame) -> None: + """ + Plots the movement tracks of unique UE IDs on a grid of subplots. + """ + + # Initialize an empty list to store batch indices + batch_indices = [] + + # Identify where tick resets and mark the indices + for i in range(1, len(df)): + if df.loc[i, "tick"] == 0 and df.loc[i - 1, "tick"] != 0: + batch_indices.append(i) + + # Add the final index to close the last batch + batch_indices.append(len(df)) + + # Now, iterate over the identified batches + start_idx = 0 + for batch_num, end_idx in enumerate(batch_indices): + batch_data = df.iloc[start_idx:end_idx] + + # Create a new figure + plt.figure(figsize=(10, 6)) + + # Generate a color map with different colors for each ue_id + color_map = cm.get_cmap("tab20", len(batch_data["mock_ue_id"].unique())) + + # Plot each ue_id's movement over ticks in this batch + for idx, ue_id in enumerate(batch_data["mock_ue_id"].unique()): + ue_data = batch_data[batch_data["mock_ue_id"] == ue_id] + color = color_map(idx) # Get a unique color for each ue_id + + # Plot the path with arrows + for i in range(len(ue_data) - 1): + x_start = ue_data.iloc[i]["lon"] + y_start = ue_data.iloc[i]["lat"] + x_end = ue_data.iloc[i + 1]["lon"] + y_end = ue_data.iloc[i + 1]["lat"] + + # Calculate the direction vector + dx = x_end - x_start + dy = y_end - y_start + + # Plot the line with an arrow with reduced width and unique color + plt.quiver( + x_start, + y_start, + dx, + dy, + angles="xy", + scale_units="xy", + scale=1, + color=color, + width=0.002, + headwidth=3, + headlength=5, + ) + + # Plot starting points as circles with the same color + plt.scatter( + ue_data["lon"].iloc[0], + ue_data["lat"].iloc[0], + color=color, + label=f"Start UE {ue_id}", + ) + + # Set plot title and labels + plt.title(f"UE Tracks with Direction for Batch {batch_num + 1}") + plt.xlabel("Longitude") + plt.ylabel("Latitude") + plt.legend(loc="upper right", bbox_to_anchor=(1.2, 1)) + + # Display the plot + plt.show() + + # Update start_idx for the next batch + start_idx = end_idx + + +def plot_ue_tracks_side_by_side(df1: pd.DataFrame, df2: pd.DataFrame) -> None: + """ + Plots the movement tracks of unique UE IDs from two DataFrames side by side. + """ + # Set up subplots with 2 columns for side by side plots + fig, axes = plt.subplots(1, 2, figsize=(25, 10)) # 2 rows, 2 columns (side by side) + + # Plot the first DataFrame + plot_ue_tracks_on_axis(df1, axes[0], title="DataFrame 1") + + # Plot the second DataFrame + plot_ue_tracks_on_axis(df2, axes[1], title="DataFrame 2") + + # Adjust layout and show + plt.tight_layout() + plt.show() + + +def plot_ue_tracks_on_axis(df: pd.DataFrame, ax, title: str) -> None: + """ + Helper function to plot UE tracks on a given axis. + """ + data = df + unique_ids = data["mock_ue_id"].unique() + num_plots = len(unique_ids) + + color_map = cm.get_cmap("tab20", num_plots) + + for idx, ue_id in enumerate(unique_ids): + ue_data = data[data["mock_ue_id"] == ue_id] + + for i in range(len(ue_data) - 1): + x_start = ue_data.iloc[i]["lon"] + y_start = ue_data.iloc[i]["lat"] + x_end = ue_data.iloc[i + 1]["lon"] + y_end = ue_data.iloc[i + 1]["lat"] + + dx = x_end - x_start + dy = y_end - y_start + ax.quiver( + x_start, + y_start, + dx, + dy, + angles="xy", + scale_units="xy", + scale=1, + color=color_map(idx), + ) + + ax.scatter( + ue_data["lon"], ue_data["lat"], color=color_map(idx), label=f"UE {ue_id}" + ) + + ax.set_title(title) + ax.legend() diff --git a/radp/digital_twin/mobility/param_regression.py b/radp/digital_twin/mobility/param_regression.py new file mode 100644 index 0000000..72c8f03 --- /dev/null +++ b/radp/digital_twin/mobility/param_regression.py @@ -0,0 +1,189 @@ +import numpy as np +import pandas as pd +from scipy import optimize +from typing import Tuple, List, Any +from radp.digital_twin.utils.gis_tools import GISTools + + +def _initialize( + df: pd.DataFrame, seed: int +) -> Tuple[np.ndarray, np.ndarray, float, float, np.random.Generator]: + """ + Initializes and preprocesses data from a DataFrame for mobility modeling. + + Args: + df (pd.DataFrame): DataFrame containing 'velocity' and 'mock_ue_id' columns. + + """ + v_t_full_data = df["velocity"].to_numpy() + + # CONSTANTS + num_users = df["mock_ue_id"].nunique() + rng = np.random.default_rng(seed) + + # Data + f = np.poly1d([8, 7, 5, 1]) + v_t_full = v_t_full_data + v_t = v_t_full[:-1] + v_t_next = v_t_full[1:] + + theta_t_full = f(v_t_full) + 6 * rng.normal(size=len(v_t_full)) + theta_t = theta_t_full[:-1] + theta_t_next = theta_t_full[1:] + + t_array = np.array((v_t, theta_t)) + t_next_array = np.array((v_t_next, theta_t_next)) + + velocity_mean = np.mean(v_t_full_data) + variance = np.var(v_t_full_data) + + return t_array, t_next_array, velocity_mean, variance, rng + + +def _next( + alpha: float, + x: np.ndarray, + velocity_mean: float, + variance: float, + rng: np.random.Generator, +) -> np.ndarray: + """ + Computes the next state of velocity and angle using the current state and alpha. + + Args: + alpha (float): The regression parameter. + x (np.ndarray): Current state vector [velocity, angle]. + velocity_mean (float): Mean velocity used for normalization. + variance (float): Variance of the velocity. + rng (np.random.Generator): Random number generator for noise. + + """ + v_t, theta_t = x[0], x[1] + alpha2 = 1.0 - alpha + alpha3 = np.sqrt(1.0 - alpha * alpha) * variance + v_t_next = alpha * v_t + alpha2 * velocity_mean + alpha3 * rng.normal() + angle_mean = theta_t # Simplified model without margin correction + theta_t_next = alpha * theta_t + alpha2 * angle_mean + alpha3 * rng.normal() + return np.array([v_t_next, theta_t_next]) + + +def _residual_vector( + alpha: float, + t: np.ndarray, + t_next: np.ndarray, + velocity_mean: float, + variance: float, + rng: np.random.Generator, +) -> np.ndarray: + """ + Computes residuals for optimization by comparing predicted and actual next states. + + Args: + alpha (float): The regression parameter. + t (np.ndarray): Current state array [velocity, angle]. + t_next (np.ndarray): Actual next state array [next_velocity, next_angle]. + velocity_mean (float): Mean velocity used for normalization. + variance (float): Variance of the velocity. + rng (np.random.Generator): Random number generator for noise. + + """ + predicted = _next(alpha, t, velocity_mean, variance, rng) + return (predicted - t_next).flatten() + + +def optimize_alpha( + alpha0: float, + t_array: np.ndarray, + t_next_array: np.ndarray, + velocity_mean: float, + variance: float, + rng: np.random.Generator, +) -> Tuple[float, Any]: + """ + Optimizes alpha using least squares based on the provided mobility data. + + Args: + alpha0 (float): Initial guess for alpha. + t_array (np.ndarray): Array of current state values [velocity, angle]. + t_next_array (np.ndarray): Array of next state values [next_velocity, next_angle]. + velocity_mean (float): Mean of velocity. + variance (float): Variance of velocity. + rng (np.random.Generator): Random number generator for noise. + + """ + popt, pcov = optimize.leastsq( + _residual_vector, + alpha0, + args=(t_array, t_next_array, velocity_mean, variance, rng), + ) + return popt, pcov + + +def calculate_distances_and_velocities(group: pd.DataFrame) -> pd.DataFrame: + """Calculating distances and velocities for each UE based on sorted data by ticks.""" + group["prev_longitude"] = group["lon"].shift(1) + group["prev_latitude"] = group["lat"].shift(1) + group["distance"] = group.apply( + lambda row: GISTools.get_log_distance( + row["prev_latitude"], row["prev_longitude"], row["lat"], row["lon"] + ) + if not pd.isna(row["prev_longitude"]) + else 0, + axis=1, + ) + # Assuming time interval between ticks is 1 unit, adjust below if different + group["velocity"] = ( + group["distance"] / 1 + ) # Convert to m/s by dividing by the seconds per tick, here assumed to be 1s + return group + + +def preprocess_ue_data(df: pd.DataFrame) -> pd.DataFrame: + """Preprocess the UE data by calculating distances and velocities.""" + df.sort_values(by=["mock_ue_id", "tick"], inplace=True) + + df = df.groupby("mock_ue_id").apply(calculate_distances_and_velocities) + # Drop the temporary columns + df.drop(["prev_longitude", "prev_latitude"], axis=1, inplace=True) + + return df + + +def get_predicted_alpha(data: pd.DataFrame, alpha0: float,seed: int) -> float: + """ + Get the predicted alpha value for the given UE (User Equipment) data. + + This function serves as the main API entry point for predicting the alpha parameter, + which represents a key aspect of mobility behavior derived from UE tracks. + + Alpha is predicted based on the movement patterns observed in the UE tracks, + which are represented by the following columns in the input DataFrame: + + Parameters: + - data (DataFrame): The input data containing UE tracks, structured as follows: + + +------------+------------+-----------+------+ + | mock_ue_id | lon | lat | tick | + +============+============+===========+======+ + | 0 | 102.219377 | 33.674572 | 0 | + | 1 | 102.415954 | 33.855534 | 0 | + | 2 | 102.545935 | 33.878075 | 0 | + | 0 | 102.297766 | 33.575942 | 1 | + | 1 | 102.362725 | 33.916477 | 1 | + | 2 | 102.080675 | 33.832793 | 1 | + +------------+------------+-----------+------+ + +""" + # Extract the data after preprocessing + velocity = preprocess_ue_data(data) + + # Initialize and unpack all outputs from the initialize function + t_array, t_next_array, velocity_mean, variance, rng = _initialize(velocity,seed) + + # Optimize alpha using the unpacked values + popt, pcov = optimize_alpha( + alpha0, t_array, t_next_array, velocity_mean, variance, rng + ) + + # Return the optimized alpha value + return popt[0] diff --git a/radp/digital_twin/mobility/tests/test_param_regression.py b/radp/digital_twin/mobility/tests/test_param_regression.py new file mode 100644 index 0000000..932f9c6 --- /dev/null +++ b/radp/digital_twin/mobility/tests/test_param_regression.py @@ -0,0 +1,90 @@ +import unittest +import numpy as np +import pandas as pd +from scipy import optimize +from radp.digital_twin.mobility.param_regression import ( + _initialize, + _next, + _residual_vector, + optimize_alpha, +) + + +class TestParameterFunctions(unittest.TestCase): + """ + Set up a mock DataFrame for testing the ParameterRegression class. + + Initializes a DataFrame with 2 'mock_ue_id' and 5 'velocity' values each, + and creates an instance of ParameterRegression using this DataFrame. + + How to Run: + ------------ + To run these tests, execute the following command in your terminal: + + python3 -m unittest radp/digital_twin/mobility/tests/test_parameter_regression.py + + """ + + def setUp(self): + # Setup the DataFrame for testing with 2 distinct ue_ids and 5 velocities each + data = { + "velocity": [1.2, 2.5, 3.0, 4.1, 5.7, 1.5, 2.0, 3.5, 4.2, 5.0], + "mock_ue_id": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1], + } + self.df = pd.DataFrame(data) + + # Initialize the test data + ( + self.t_array, + self.t_next_array, + self.velocity_mean, + self.variance, + self.rng, + ) = _initialize(self.df, 42) + self.alpha0 = 0.5 + + def test_initialize(self): + # Check if initialization returns correct data types and values + self.assertIsInstance(self.t_array, np.ndarray) + self.assertIsInstance(self.t_next_array, np.ndarray) + + # Check if the length of arrays matches the input data + self.assertEqual(self.t_array.shape[1], len(self.df) - 1) + self.assertEqual(self.t_next_array.shape[1], len(self.df) - 1) + + def test_next(self): + # Test if the next function returns correct shape of results + alpha = 0.5 + result = _next( + alpha, self.t_array, 0, 1, self.rng + ) # Passing arbitrary mean and variance + + # Check if result has the correct shape + self.assertEqual(result.shape[0], 2) + self.assertEqual(result.shape[1], self.t_array.shape[1]) + + # Check if result is a numpy array + self.assertIsInstance(result, np.ndarray) + + def test_residual_vector(self): + # Test if the residuals are calculated correctly + alpha = 0.5 + residuals = _residual_vector( + alpha, self.t_array, self.t_next_array, 0, 1, self.rng + ) # Passing arbitrary mean and variance + + # Check if residuals are a flat array + self.assertEqual(residuals.shape[0], 2 * self.t_array.shape[1]) + self.assertIsInstance(residuals, np.ndarray) + + def test_optimize_alpha(self): + # Test the optimization process for alpha + popt, pcov = optimize_alpha( + self.alpha0, self.t_array, self.t_next_array, 0, 1, self.rng + ) # Passing arbitrary mean and variance + + # Check if popt is a numpy array and pcov is None (leastsq returns None for covariance) + self.assertIsInstance(popt, np.ndarray) + + # Check that optimized alpha is within a reasonable range of the calculated alpha + self.assertTrue(np.allclose(popt[0], 0.49999419)) diff --git a/radp/digital_twin/requirements.txt b/radp/digital_twin/requirements.txt index a998257..2bc090b 100644 --- a/radp/digital_twin/requirements.txt +++ b/radp/digital_twin/requirements.txt @@ -5,3 +5,6 @@ pyarrow==13.0.0 scikit-learn==1.2.1 torch==2.0.0 torchvision==0.15.1 +scipy==1.10.1 + +