From 188a31b5b38684388b6863d1889f88505c4d7014 Mon Sep 17 00:00:00 2001
From: Tanzim Farhan <52883108+tanzim10@users.noreply.github.com>
Date: Fri, 11 Oct 2024 11:25:49 +0600
Subject: [PATCH] Mobility model alpha parameter regression from data (#15)

* Fixed PR changes for params_regression

* Fixed Unittest for param_regression according to PR change

* Fixed the mobility notebook and radp_library file(PR changes done)

* Resolved test_param_regression unittest cases

* Updated Param Regression and radp library as instructed in the PR review

* Changed the scipy dependency to solve dependency error

* Changed how seed value works especially made it user friendly

* Refactored functions and changed the imports

* Resolved test_param_regression imports
---
 notebooks/coo_with_radp_digital_twin.ipynb    |   2 +-
 notebooks/mobility_model.ipynb                | 362 +++++++++++++++++
 notebooks/radp_library.py                     | 370 +++++++++++++++---
 .../digital_twin/mobility/param_regression.py | 189 +++++++++
 .../mobility/tests/test_param_regression.py   |  90 +++++
 radp/digital_twin/requirements.txt            |   3 +
 6 files changed, 963 insertions(+), 53 deletions(-)
 create mode 100644 notebooks/mobility_model.ipynb
 create mode 100644 radp/digital_twin/mobility/param_regression.py
 create mode 100644 radp/digital_twin/mobility/tests/test_param_regression.py

diff --git a/notebooks/coo_with_radp_digital_twin.ipynb b/notebooks/coo_with_radp_digital_twin.ipynb
index ecff4e1..45ac095 100644
--- a/notebooks/coo_with_radp_digital_twin.ipynb
+++ b/notebooks/coo_with_radp_digital_twin.ipynb
@@ -522,7 +522,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.6"
+   "version": "3.11.5"
   },
   "varInspector": {
    "cols": {
diff --git a/notebooks/mobility_model.ipynb b/notebooks/mobility_model.ipynb
new file mode 100644
index 0000000..0f0a578
--- /dev/null
+++ b/notebooks/mobility_model.ipynb
@@ -0,0 +1,362 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ec7ab4ae",
+   "metadata": {},
+   "source": [
+    "# Alpha Optimization in a Gauss-Markov Mobility Model\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "754a70e3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "from pathlib import Path\n",
+    "sys.path.append(f\"{Path().absolute().parent}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b3d3ca1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import scipy\n",
+    "import numpy as np\n",
+    "from radp_library import *\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.cm as cm\n",
+    "from radp.digital_twin.mobility.param_regression import get_predicted_alpha,preprocess_ue_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "309ead1f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "params = {\n",
+    "    \"ue_tracks_generation\": {\n",
+    "            \"params\": {\n",
+    "                \"simulation_duration\": 3600,\n",
+    "                \"simulation_time_interval_seconds\": 0.01,\n",
+    "                \"num_ticks\": 50,\n",
+    "                \"num_batches\": 1,\n",
+    "                \"ue_class_distribution\": {\n",
+    "                    \"stationary\": {\n",
+    "                        \"count\": 10,\n",
+    "                        \"velocity\": 0,\n",
+    "                        \"velocity_variance\": 1\n",
+    "                    },\n",
+    "                    \"pedestrian\": {\n",
+    "                        \"count\": 5,\n",
+    "                        \"velocity\": 2,\n",
+    "                        \"velocity_variance\": 1\n",
+    "                    },\n",
+    "                    \"cyclist\": {\n",
+    "                        \"count\": 5,\n",
+    "                        \"velocity\": 5,\n",
+    "                        \"velocity_variance\": 1\n",
+    "                    },\n",
+    "                    \"car\": {\n",
+    "                        \"count\": 12,\n",
+    "                        \"velocity\": 20,\n",
+    "                        \"velocity_variance\": 1\n",
+    "                    }\n",
+    "                },\n",
+    "                \"lat_lon_boundaries\": {\n",
+    "                    \"min_lat\": -90,\n",
+    "                    \"max_lat\": 90,\n",
+    "                    \"min_lon\": -180,\n",
+    "                    \"max_lon\": 180\n",
+    "                },\n",
+    "                \"gauss_markov_params\": {\n",
+    "                    \"alpha\": 0.5,\n",
+    "                    \"variance\": 0.8,\n",
+    "                    \"rng_seed\": 42,\n",
+    "                    \"lon_x_dims\": 100,\n",
+    "                    \"lon_y_dims\": 100,\n",
+    "                    \"// TODO\": \"Account for supporting the user choosing the anchor_loc and cov_around_anchor.\",\n",
+    "                    \"// Current implementation\": \"the UE Tracks generator will not be using these values.\",\n",
+    "                    \"// anchor_loc\": {},\n",
+    "                    \"// cov_around_anchor\": {}\n",
+    "            }\n",
+    "        }\n",
+    "    }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d5d3f4fb",
+   "metadata": {},
+   "source": [
+    "## Generate Data Set 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf32a451",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data1 = get_ue_data(params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0217fb7d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data1.head(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0f112187",
+   "metadata": {},
+   "source": [
+    "## Plot Dataset 1 "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "57d3c99b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_ue_tracks(data1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "507f5c0b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "velocity = preprocess_ue_data(data1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "60b0bc3b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "velocity"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3e4713fc",
+   "metadata": {},
+   "source": [
+    "## Alpha Initialization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ee3e00c1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "alpha0 = np.random.choice(np.arange(0, 1.1, 0.1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "119f1534",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Alpha0:\",alpha0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "97a8c1ec",
+   "metadata": {},
+   "source": [
+    "## Regress to Find Alpha 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12fdaf4b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "alpha1 = get_predicted_alpha(data1,alpha0,seed=42) # Adding seed for reproducibility"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "39eed46d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "alpha1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1dc52c9d",
+   "metadata": {},
+   "source": [
+    "## Generating new data using alpha 1\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "46d7e685",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Changing alpha value to alpha1 in the params dictionary\n",
+    "params['ue_tracks_generation']['params']['gauss_markov_params']['alpha'] = alpha1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab4eb7cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(params['ue_tracks_generation']['params']['gauss_markov_params']['alpha'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c700855",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data2 = get_ue_data(params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18a2b294",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "07a4c3aa",
+   "metadata": {},
+   "source": [
+    "## Plot Dataset 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "df4b1b11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_ue_tracks(data2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b727758e",
+   "metadata": {},
+   "source": [
+    "## Regress to Find Alpha 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5a268312",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "alpha2 =  get_predicted_alpha(data1,alpha1,seed=42) # Adding seed for reproducibility"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02575be1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "alpha2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "da3b378b",
+   "metadata": {},
+   "source": [
+    "## Comparison Plot of Dataset 1 and Dataset2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0798532d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_ue_tracks_side_by_side(data1, data2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4c61c293",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/radp_library.py b/notebooks/radp_library.py
index c0cfe24..9d44da4 100644
--- a/notebooks/radp_library.py
+++ b/notebooks/radp_library.py
@@ -12,6 +12,7 @@
 import fastkml
 import matplotlib.animation as animation
 import matplotlib.pyplot as plt
+import matplotlib.cm as cm
 import numpy as np
 import pandas as pd
 import rasterio.features
@@ -20,8 +21,14 @@
 from shapely import geometry
 
 from radp.digital_twin.mobility.mobility import gauss_markov
-from radp.digital_twin.rf.bayesian.bayesian_engine import BayesianDigitalTwin, NormMethod
+from radp.digital_twin.rf.bayesian.bayesian_engine import (
+    BayesianDigitalTwin,
+    NormMethod,
+)
 from radp.digital_twin.utils.gis_tools import GISTools
+from radp.digital_twin.mobility.ue_tracks import UETracksGenerator
+from radp.digital_twin.mobility.ue_tracks_params import UETracksGenerationParams
+
 
 Boundary = Union[geometry.Polygon, geometry.MultiPolygon]
 KML_NS = "{http://www.opengis.net/kml/2.2}"
@@ -77,7 +84,9 @@ def _setup_kml_obj(
             styles = []
 
         k = fastkml.KML()
-        doc = fastkml.Document(ns=KML_NS, name=(name or "Shapes"), description=(desc or ""), styles=styles)
+        doc = fastkml.Document(
+            ns=KML_NS, name=(name or "Shapes"), description=(desc or ""), styles=styles
+        )
         k.append(doc)
 
         return k, doc
@@ -106,7 +115,9 @@ def _add_shape_to_folder(
     ) -> None:
         if desc is None:
             desc = name
-        shape_placemark = fastkml.Placemark(ns=KML_NS, name=name, description=desc, styles=styles)
+        shape_placemark = fastkml.Placemark(
+            ns=KML_NS, name=name, description=desc, styles=styles
+        )
         shape_placemark.geometry = shape
         folder.append(shape_placemark)
 
@@ -181,15 +192,21 @@ def shape_dict_to_kmz(
                     obj_style = styles
 
                 if descriptions_dict is not None and name in descriptions_dict:
-                    obj_desc = ShapesKMLWriter._build_description_from_prop_dict(descriptions_dict[name])
+                    obj_desc = ShapesKMLWriter._build_description_from_prop_dict(
+                        descriptions_dict[name]
+                    )
                 else:
                     obj_desc = name
 
                 if isinstance(obj, geometry.base.BaseGeometry):
-                    cls._add_shape_to_folder(cur_folder, obj, name, styles=obj_style, desc=obj_desc)
+                    cls._add_shape_to_folder(
+                        cur_folder, obj, name, styles=obj_style, desc=obj_desc
+                    )
                 else:
                     # isinstance(obj, dict))
-                    child_folder = fastkml.Folder(ns=KML_NS, name=name, styles=obj_style)
+                    child_folder = fastkml.Folder(
+                        ns=KML_NS, name=name, styles=obj_style
+                    )
                     cur_folder.append(child_folder)
                     fringe.append((child_folder, obj))
 
@@ -214,17 +231,22 @@ def get_percell_data(
 
     data_in_sampled = data_in
 
-    data_in_sampled.columns = [col.replace("_1", "") if col.endswith("_1") else col for col in data_in_sampled.columns]
+    data_in_sampled.columns = [
+        col.replace("_1", "") if col.endswith("_1") else col
+        for col in data_in_sampled.columns
+    ]
 
     # filter out invalid values
     data_cell_valid = data_in_sampled[data_in_sampled.cell_rxpwr_dbm != invalid_value]
     if choose_strongest_samples_percell:
-        data_cell_sampled = data_cell_valid.sort_values("cell_rxpwr_dbm", ascending=False).head(
-            n=min(n_samples, len(data_cell_valid))
-        )
+        data_cell_sampled = data_cell_valid.sort_values(
+            "cell_rxpwr_dbm", ascending=False
+        ).head(n=min(n_samples, len(data_cell_valid)))
     else:
         # get n_samples independent random samples inside training groups
-        data_cell_sampled = data_cell_valid.sample(n=min(n_samples, len(data_cell_valid)), random_state=(seed))
+        data_cell_sampled = data_cell_valid.sample(
+            n=min(n_samples, len(data_cell_valid)), random_state=(seed)
+        )
 
     # logging.info(f"n_samples={n_samples}, len(data_cell_valid)={len(data_cell_valid)}")
     # plt.scatter(y=data_cell_sampled.loc_y, x=data_cell_sampled.loc_x, s=10)
@@ -276,7 +298,11 @@ def bing_tile_to_center(x, y, level, tile_pixels=256):
     xwidth = 360.0 / zoom_factor
     out = []
     out.append(
-        (y_to_latitude(True, y, zoom_factor, tile_pixels) + y_to_latitude(False, y, zoom_factor, tile_pixels)) / 2
+        (
+            y_to_latitude(True, y, zoom_factor, tile_pixels)
+            + y_to_latitude(False, y, zoom_factor, tile_pixels)
+        )
+        / 2
     )
     out.append(xwidth * (x + 0.5) - 180)
     return out
@@ -329,7 +355,9 @@ def latitude_to_world_pixel(latitude, zoom_factor, tile_pixels=256):
     latitude = map_clip(latitude, -85.05112878, 85.05112878)
     sin_latitude = np.sin(latitude * np.pi / 180.0)
 
-    pixel_y = (0.5 - np.log((1 + sin_latitude) / (1 - sin_latitude)) / (4 * np.pi)) * (tile_pixels * zoom_factor)
+    pixel_y = (0.5 - np.log((1 + sin_latitude) / (1 - sin_latitude)) / (4 * np.pi)) * (
+        tile_pixels * zoom_factor
+    )
     return pixel_y
 
 
@@ -363,12 +391,16 @@ def lon_lat_to_bing_tile_df_row(row, level):
     return row
 
 
-def get_lonlat_from_xy_idxs(xy: np.ndarray, lower_left: Tuple[float, float]) -> np.ndarray:
+def get_lonlat_from_xy_idxs(
+    xy: np.ndarray, lower_left: Tuple[float, float]
+) -> np.ndarray:
     return xy * SRTM_STEP + lower_left
 
 
 def find_closest(data_df, lat, lon):
-    dist = data_df.apply(lambda row: GISTools.dist((row.loc_y, row.loc_x), (lat, lon)), axis=1)
+    dist = data_df.apply(
+        lambda row: GISTools.dist((row.loc_y, row.loc_x), (lat, lon)), axis=1
+    )
     if dist.min() < 100:
         return dist.idxmin()
     else:
@@ -410,8 +442,12 @@ def get_track_samples(
         xy_lonlat = get_lonlat_from_xy_idxs(xy, (min_lon, min_lat))
         xy_lonlat_ue_tracks.extend(xy_lonlat)
 
-    all_track_pts_df = pd.DataFrame(columns=["loc_x", "loc_y"], data=xy_lonlat_ue_tracks)
-    all_track_pts_sampled_df = all_track_pts_df.apply(lambda row: find_closest(data_df, row.loc_y, row.loc_x), axis=1)
+    all_track_pts_df = pd.DataFrame(
+        columns=["loc_x", "loc_y"], data=xy_lonlat_ue_tracks
+    )
+    all_track_pts_sampled_df = all_track_pts_df.apply(
+        lambda row: find_closest(data_df, row.loc_y, row.loc_x), axis=1
+    )
     return data_df.loc[all_track_pts_sampled_df]
 
 
@@ -498,7 +534,9 @@ def bdt(
         axs[1].set_yticks([])
     for i in range(len(desired_idxs)):
         train_cell_id = idx_cell_id_mapping[i + 1]
-        training_data[train_cell_id] = pd.concat([tilt_per_cell_df[i] for tilt_per_cell_df in percell_data_list])
+        training_data[train_cell_id] = pd.concat(
+            [tilt_per_cell_df[i] for tilt_per_cell_df in percell_data_list]
+        )
         if track_sampling:
             training_data[train_cell_id] = get_track_samples(
                 training_data[train_cell_id],
@@ -515,19 +553,27 @@ def bdt(
             )
     for train_cell_id, training_data_idx in training_data.items():
         training_data_idx["cell_id"] = train_cell_id
-        training_data_idx["cell_lat"] = site_config_df[site_config_df["cell_id"] == train_cell_id]["cell_lat"].values[0]
-        training_data_idx["cell_lon"] = site_config_df[site_config_df["cell_id"] == train_cell_id]["cell_lon"].values[0]
-        training_data_idx["cell_az_deg"] = site_config_df[site_config_df["cell_id"] == train_cell_id][
-            "cell_az_deg"
-        ].values[0]
-        training_data_idx["cell_txpwr_dbm"] = site_config_df[site_config_df["cell_id"] == train_cell_id][
-            "cell_txpwr_dbm"
-        ].values[0]
-        training_data_idx["hTx"] = site_config_df[site_config_df["cell_id"] == train_cell_id]["hTx"].values[0]
-        training_data_idx["hRx"] = site_config_df[site_config_df["cell_id"] == train_cell_id]["hRx"].values[0]
-        training_data_idx["cell_carrier_freq_mhz"] = site_config_df[site_config_df["cell_id"] == train_cell_id][
-            "cell_carrier_freq_mhz"
-        ].values[0]
+        training_data_idx["cell_lat"] = site_config_df[
+            site_config_df["cell_id"] == train_cell_id
+        ]["cell_lat"].values[0]
+        training_data_idx["cell_lon"] = site_config_df[
+            site_config_df["cell_id"] == train_cell_id
+        ]["cell_lon"].values[0]
+        training_data_idx["cell_az_deg"] = site_config_df[
+            site_config_df["cell_id"] == train_cell_id
+        ]["cell_az_deg"].values[0]
+        training_data_idx["cell_txpwr_dbm"] = site_config_df[
+            site_config_df["cell_id"] == train_cell_id
+        ]["cell_txpwr_dbm"].values[0]
+        training_data_idx["hTx"] = site_config_df[
+            site_config_df["cell_id"] == train_cell_id
+        ]["hTx"].values[0]
+        training_data_idx["hRx"] = site_config_df[
+            site_config_df["cell_id"] == train_cell_id
+        ]["hRx"].values[0]
+        training_data_idx["cell_carrier_freq_mhz"] = site_config_df[
+            site_config_df["cell_id"] == train_cell_id
+        ]["cell_carrier_freq_mhz"].values[0]
         training_data_idx["log_distance"] = [
             GISTools.get_log_distance(
                 training_data_idx["cell_lat"].values[0],
@@ -572,7 +618,10 @@ def bdt(
         training_data_idx = training_data_idx.drop(
             training_data_idx[
                 (training_data_idx["cell_rxpwr_dbm"] < filter_out_samples_dbm_threshold)
-                & (training_data_idx["log_distance"] > np.log(1000 * filter_out_samples_kms_threshold))
+                & (
+                    training_data_idx["log_distance"]
+                    > np.log(1000 * filter_out_samples_kms_threshold)
+                )
             ].index
         )
         if plot_loss_vs_iter:
@@ -632,19 +681,27 @@ def bdt(
 
     for test_cell_id, test_data_idx in test_data.items():
         test_data_idx["cell_id"] = test_cell_id
-        test_data_idx["cell_lat"] = site_config_df[site_config_df["cell_id"] == test_cell_id]["cell_lat"].values[0]
-        test_data_idx["cell_lon"] = site_config_df[site_config_df["cell_id"] == test_cell_id]["cell_lon"].values[0]
-        test_data_idx["cell_az_deg"] = site_config_df[site_config_df["cell_id"] == test_cell_id]["cell_az_deg"].values[
-            0
-        ]
-        test_data_idx["cell_txpwr_dbm"] = site_config_df[site_config_df["cell_id"] == test_cell_id][
-            "cell_txpwr_dbm"
-        ].values[0]
-        test_data_idx["hTx"] = site_config_df[site_config_df["cell_id"] == test_cell_id]["hTx"].values[0]
-        test_data_idx["hRx"] = site_config_df[site_config_df["cell_id"] == test_cell_id]["hRx"].values[0]
-        test_data_idx["cell_carrier_freq_mhz"] = site_config_df[site_config_df["cell_id"] == test_cell_id][
-            "cell_carrier_freq_mhz"
-        ].values[0]
+        test_data_idx["cell_lat"] = site_config_df[
+            site_config_df["cell_id"] == test_cell_id
+        ]["cell_lat"].values[0]
+        test_data_idx["cell_lon"] = site_config_df[
+            site_config_df["cell_id"] == test_cell_id
+        ]["cell_lon"].values[0]
+        test_data_idx["cell_az_deg"] = site_config_df[
+            site_config_df["cell_id"] == test_cell_id
+        ]["cell_az_deg"].values[0]
+        test_data_idx["cell_txpwr_dbm"] = site_config_df[
+            site_config_df["cell_id"] == test_cell_id
+        ]["cell_txpwr_dbm"].values[0]
+        test_data_idx["hTx"] = site_config_df[
+            site_config_df["cell_id"] == test_cell_id
+        ]["hTx"].values[0]
+        test_data_idx["hRx"] = site_config_df[
+            site_config_df["cell_id"] == test_cell_id
+        ]["hRx"].values[0]
+        test_data_idx["cell_carrier_freq_mhz"] = site_config_df[
+            site_config_df["cell_id"] == test_cell_id
+        ]["cell_carrier_freq_mhz"].values[0]
         test_data_idx["log_distance"] = [
             GISTools.get_log_distance(
                 test_data_idx["cell_lat"].values[0],
@@ -686,10 +743,16 @@ def bdt(
         test_data_percell = test_data_percell.drop(
             test_data_percell[
                 (test_data_percell["cell_rxpwr_dbm"] < filter_out_samples_dbm_threshold)
-                & (test_data_percell["log_distance"] > np.log(1000 * filter_out_samples_kms_threshold))
+                & (
+                    test_data_percell["log_distance"]
+                    > np.log(1000 * filter_out_samples_kms_threshold)
+                )
             ].index
         )
-        (pred_means_percell, _,) = bayesian_digital_twins[idx].predict_distributed_gpmodel(
+        (
+            pred_means_percell,
+            _,
+        ) = bayesian_digital_twins[idx].predict_distributed_gpmodel(
             prediction_dfs=[test_data_percell],
         )
         logging.info(f"merging cell at idx =  : {idx}")
@@ -700,11 +763,15 @@ def bdt(
         )
         full_prediction_frame = (
             pd.concat([full_prediction_frame, test_data_percell_bing_tile])
-            .groupby(["loc_x", "loc_y"], as_index=False)[["cell_rxpwr_dbm", "pred_means"]]
+            .groupby(["loc_x", "loc_y"], as_index=False)[
+                ["cell_rxpwr_dbm", "pred_means"]
+            ]
             .max()
         )
     # re-convert to lat/lon
-    full_prediction_frame = full_prediction_frame.apply(bing_tile_to_center_df_row, level=bing_tile_level, axis=1)
+    full_prediction_frame = full_prediction_frame.apply(
+        bing_tile_to_center_df_row, level=bing_tile_level, axis=1
+    )
 
     # compute RSRP as maximum over predicted rx powers
     pred_rsrp = np.array(full_prediction_frame.pred_means)
@@ -751,7 +818,9 @@ def bdt(
         axs[1].set_xticks([])
         axs[1].set_yticks([])
 
-        plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=0.0, hspace=0.1)
+        plt.subplots_adjust(
+            left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=0.0, hspace=0.1
+        )
         plt.show()
 
     return (
@@ -825,7 +894,9 @@ def init():
     def animate(i):
         plt.clf()
         _init_plt(axs)
-        pred_rsrp_points = axs[1].scatter(lons, lats, c=pred_rsrp_list[i], cmap=cmap, s=25)
+        pred_rsrp_points = axs[1].scatter(
+            lons, lats, c=pred_rsrp_list[i], cmap=cmap, s=25
+        )
         axs[1].set_title(
             f"Predicted RSRP \n MAE = {MAE_list[i]:0.1f} dB"
             f"\nmax_training_iterations = {maxiter_list[i]} | "
@@ -839,7 +910,9 @@ def animate(i):
         return [true_rsrp_points, pred_rsrp_points]
 
     # call the animator.  blit=True means only re-draw the parts that have changed.
-    anim = animation.FuncAnimation(fig, animate, init_func=init, frames=len(pred_rsrp_list), blit=True)
+    anim = animation.FuncAnimation(
+        fig, animate, init_func=init, frames=len(pred_rsrp_list), blit=True
+    )
 
     writervideo = animation.FFMpegWriter(fps=4)
     anim.save(filename, writer=writervideo)
@@ -867,3 +940,196 @@ def rfco_to_best_server_shapes(
         key=lambda x: x[1],
     )
     return shapes
+
+
+# Mobility Model helper functions
+
+
+def get_ue_data(params: dict) -> pd.DataFrame:
+    """
+    Generates user equipment (UE) tracks data using specified simulation parameters.
+
+    This function initializes a UETracksGenerationParams object using the provided parameters
+    and then iterates over batches generated by the UETracksGenerator. Each batch of UE tracks
+    data is consolidated into a single DataFrame which captures mobility tracks across multiple
+    ticks and batches, as per the defined parameters.
+
+    Using the UETracksGenerator, the UE tracks are returned in form of a dataframe
+    The Dataframe is arranged as follows:
+
+        +------------+------------+-----------+------+
+        | mock_ue_id | lon        | lat       | tick |
+        +============+============+===========+======+
+        |   0        | 102.219377 | 33.674572 |   0  |
+        |   1        | 102.415954 | 33.855534 |   0  |
+        |   2        | 102.545935 | 33.878075 |   0  |
+        |   0        | 102.297766 | 33.575942 |   1  |
+        |   1        | 102.362725 | 33.916477 |   1  |
+        |   2        | 102.080675 | 33.832793 |   1  |
+        +------------+------------+-----------+------+
+    """
+
+    # Initialize the UE data
+    ue_tracks_params = UETracksGenerationParams(params)
+
+    ue_tracks_generation = pd.DataFrame()  # Initialize an empty DataFrame
+    for ue_tracks_generation_batch in UETracksGenerator.generate_as_lon_lat_points(
+        rng_seed=ue_tracks_params.rng_seed,
+        lon_x_dims=ue_tracks_params.lon_x_dims,
+        lon_y_dims=ue_tracks_params.lon_y_dims,
+        num_ticks=ue_tracks_params.num_ticks,
+        num_UEs=ue_tracks_params.num_UEs,
+        num_batches=ue_tracks_params.num_batches,
+        alpha=ue_tracks_params.alpha,
+        variance=ue_tracks_params.variance,
+        min_lat=ue_tracks_params.min_lat,
+        max_lat=ue_tracks_params.max_lat,
+        min_lon=ue_tracks_params.min_lon,
+        max_lon=ue_tracks_params.max_lon,
+        mobility_class_distribution=ue_tracks_params.mobility_class_distribution,
+        mobility_class_velocities=ue_tracks_params.mobility_class_velocities,
+        mobility_class_velocity_variances=ue_tracks_params.mobility_class_velocity_variances,
+    ):
+        # Append each batch to the main DataFrame
+        ue_tracks_generation = pd.concat(
+            [ue_tracks_generation, ue_tracks_generation_batch], ignore_index=True
+        )
+
+    return ue_tracks_generation
+
+
+def plot_ue_tracks(df: pd.DataFrame) -> None:
+    """
+    Plots the movement tracks of unique UE IDs on a grid of subplots.
+    """
+
+    # Initialize an empty list to store batch indices
+    batch_indices = []
+
+    # Identify where tick resets and mark the indices
+    for i in range(1, len(df)):
+        if df.loc[i, "tick"] == 0 and df.loc[i - 1, "tick"] != 0:
+            batch_indices.append(i)
+
+    # Add the final index to close the last batch
+    batch_indices.append(len(df))
+
+    # Now, iterate over the identified batches
+    start_idx = 0
+    for batch_num, end_idx in enumerate(batch_indices):
+        batch_data = df.iloc[start_idx:end_idx]
+
+        # Create a new figure
+        plt.figure(figsize=(10, 6))
+
+        # Generate a color map with different colors for each ue_id
+        color_map = cm.get_cmap("tab20", len(batch_data["mock_ue_id"].unique()))
+
+        # Plot each ue_id's movement over ticks in this batch
+        for idx, ue_id in enumerate(batch_data["mock_ue_id"].unique()):
+            ue_data = batch_data[batch_data["mock_ue_id"] == ue_id]
+            color = color_map(idx)  # Get a unique color for each ue_id
+
+            # Plot the path with arrows
+            for i in range(len(ue_data) - 1):
+                x_start = ue_data.iloc[i]["lon"]
+                y_start = ue_data.iloc[i]["lat"]
+                x_end = ue_data.iloc[i + 1]["lon"]
+                y_end = ue_data.iloc[i + 1]["lat"]
+
+                # Calculate the direction vector
+                dx = x_end - x_start
+                dy = y_end - y_start
+
+                # Plot the line with an arrow with reduced width and unique color
+                plt.quiver(
+                    x_start,
+                    y_start,
+                    dx,
+                    dy,
+                    angles="xy",
+                    scale_units="xy",
+                    scale=1,
+                    color=color,
+                    width=0.002,
+                    headwidth=3,
+                    headlength=5,
+                )
+
+            # Plot starting points as circles with the same color
+            plt.scatter(
+                ue_data["lon"].iloc[0],
+                ue_data["lat"].iloc[0],
+                color=color,
+                label=f"Start UE {ue_id}",
+            )
+
+        # Set plot title and labels
+        plt.title(f"UE Tracks with Direction for Batch {batch_num + 1}")
+        plt.xlabel("Longitude")
+        plt.ylabel("Latitude")
+        plt.legend(loc="upper right", bbox_to_anchor=(1.2, 1))
+
+        # Display the plot
+        plt.show()
+
+        # Update start_idx for the next batch
+        start_idx = end_idx
+
+
+def plot_ue_tracks_side_by_side(df1: pd.DataFrame, df2: pd.DataFrame) -> None:
+    """
+    Plots the movement tracks of unique UE IDs from two DataFrames side by side.
+    """
+    # Set up subplots with 2 columns for side by side plots
+    fig, axes = plt.subplots(1, 2, figsize=(25, 10))  # 2 rows, 2 columns (side by side)
+
+    # Plot the first DataFrame
+    plot_ue_tracks_on_axis(df1, axes[0], title="DataFrame 1")
+
+    # Plot the second DataFrame
+    plot_ue_tracks_on_axis(df2, axes[1], title="DataFrame 2")
+
+    # Adjust layout and show
+    plt.tight_layout()
+    plt.show()
+
+
+def plot_ue_tracks_on_axis(df: pd.DataFrame, ax, title: str) -> None:
+    """
+    Helper function to plot UE tracks on a given axis.
+    """
+    data = df
+    unique_ids = data["mock_ue_id"].unique()
+    num_plots = len(unique_ids)
+
+    color_map = cm.get_cmap("tab20", num_plots)
+
+    for idx, ue_id in enumerate(unique_ids):
+        ue_data = data[data["mock_ue_id"] == ue_id]
+
+        for i in range(len(ue_data) - 1):
+            x_start = ue_data.iloc[i]["lon"]
+            y_start = ue_data.iloc[i]["lat"]
+            x_end = ue_data.iloc[i + 1]["lon"]
+            y_end = ue_data.iloc[i + 1]["lat"]
+
+            dx = x_end - x_start
+            dy = y_end - y_start
+            ax.quiver(
+                x_start,
+                y_start,
+                dx,
+                dy,
+                angles="xy",
+                scale_units="xy",
+                scale=1,
+                color=color_map(idx),
+            )
+
+        ax.scatter(
+            ue_data["lon"], ue_data["lat"], color=color_map(idx), label=f"UE {ue_id}"
+        )
+
+    ax.set_title(title)
+    ax.legend()
diff --git a/radp/digital_twin/mobility/param_regression.py b/radp/digital_twin/mobility/param_regression.py
new file mode 100644
index 0000000..72c8f03
--- /dev/null
+++ b/radp/digital_twin/mobility/param_regression.py
@@ -0,0 +1,189 @@
+import numpy as np
+import pandas as pd
+from scipy import optimize
+from typing import Tuple, List, Any
+from radp.digital_twin.utils.gis_tools import GISTools
+
+
+def _initialize(
+    df: pd.DataFrame, seed: int
+) -> Tuple[np.ndarray, np.ndarray, float, float, np.random.Generator]:
+    """
+    Initializes and preprocesses data from a DataFrame for mobility modeling.
+
+    Args:
+    df (pd.DataFrame): DataFrame containing 'velocity' and 'mock_ue_id' columns.
+
+    """
+    v_t_full_data = df["velocity"].to_numpy()
+
+    # CONSTANTS
+    num_users = df["mock_ue_id"].nunique()
+    rng = np.random.default_rng(seed)
+
+    # Data
+    f = np.poly1d([8, 7, 5, 1])
+    v_t_full = v_t_full_data
+    v_t = v_t_full[:-1]
+    v_t_next = v_t_full[1:]
+
+    theta_t_full = f(v_t_full) + 6 * rng.normal(size=len(v_t_full))
+    theta_t = theta_t_full[:-1]
+    theta_t_next = theta_t_full[1:]
+
+    t_array = np.array((v_t, theta_t))
+    t_next_array = np.array((v_t_next, theta_t_next))
+
+    velocity_mean = np.mean(v_t_full_data)
+    variance = np.var(v_t_full_data)
+
+    return t_array, t_next_array, velocity_mean, variance, rng
+
+
+def _next(
+    alpha: float,
+    x: np.ndarray,
+    velocity_mean: float,
+    variance: float,
+    rng: np.random.Generator,
+) -> np.ndarray:
+    """
+    Computes the next state of velocity and angle using the current state and alpha.
+
+    Args:
+    alpha (float): The regression parameter.
+    x (np.ndarray): Current state vector [velocity, angle].
+    velocity_mean (float): Mean velocity used for normalization.
+    variance (float): Variance of the velocity.
+    rng (np.random.Generator): Random number generator for noise.
+
+    """
+    v_t, theta_t = x[0], x[1]
+    alpha2 = 1.0 - alpha
+    alpha3 = np.sqrt(1.0 - alpha * alpha) * variance
+    v_t_next = alpha * v_t + alpha2 * velocity_mean + alpha3 * rng.normal()
+    angle_mean = theta_t  # Simplified model without margin correction
+    theta_t_next = alpha * theta_t + alpha2 * angle_mean + alpha3 * rng.normal()
+    return np.array([v_t_next, theta_t_next])
+
+
+def _residual_vector(
+    alpha: float,
+    t: np.ndarray,
+    t_next: np.ndarray,
+    velocity_mean: float,
+    variance: float,
+    rng: np.random.Generator,
+) -> np.ndarray:
+    """
+    Computes residuals for optimization by comparing predicted and actual next states.
+
+    Args:
+    alpha (float): The regression parameter.
+    t (np.ndarray): Current state array [velocity, angle].
+    t_next (np.ndarray): Actual next state array [next_velocity, next_angle].
+    velocity_mean (float): Mean velocity used for normalization.
+    variance (float): Variance of the velocity.
+    rng (np.random.Generator): Random number generator for noise.
+
+    """
+    predicted = _next(alpha, t, velocity_mean, variance, rng)
+    return (predicted - t_next).flatten()
+
+
+def optimize_alpha(
+    alpha0: float,
+    t_array: np.ndarray,
+    t_next_array: np.ndarray,
+    velocity_mean: float,
+    variance: float,
+    rng: np.random.Generator,
+) -> Tuple[float, Any]:
+    """
+    Optimizes alpha using least squares based on the provided mobility data.
+
+    Args:
+    alpha0 (float): Initial guess for alpha.
+    t_array (np.ndarray): Array of current state values [velocity, angle].
+    t_next_array (np.ndarray): Array of next state values [next_velocity, next_angle].
+    velocity_mean (float): Mean of velocity.
+    variance (float): Variance of velocity.
+    rng (np.random.Generator): Random number generator for noise.
+
+    """
+    popt, pcov = optimize.leastsq(
+        _residual_vector,
+        alpha0,
+        args=(t_array, t_next_array, velocity_mean, variance, rng),
+    )
+    return popt, pcov
+
+
+def calculate_distances_and_velocities(group: pd.DataFrame) -> pd.DataFrame:
+    """Calculating distances and velocities for each UE based on sorted data by ticks."""
+    group["prev_longitude"] = group["lon"].shift(1)
+    group["prev_latitude"] = group["lat"].shift(1)
+    group["distance"] = group.apply(
+        lambda row: GISTools.get_log_distance(
+            row["prev_latitude"], row["prev_longitude"], row["lat"], row["lon"]
+        )
+        if not pd.isna(row["prev_longitude"])
+        else 0,
+        axis=1,
+    )
+    # Assuming time interval between ticks is 1 unit, adjust below if different
+    group["velocity"] = (
+        group["distance"] / 1
+    )  # Convert to m/s by dividing by the seconds per tick, here assumed to be 1s
+    return group
+
+
+def preprocess_ue_data(df: pd.DataFrame) -> pd.DataFrame:
+    """Preprocess the UE data by calculating distances and velocities."""
+    df.sort_values(by=["mock_ue_id", "tick"], inplace=True)
+
+    df = df.groupby("mock_ue_id").apply(calculate_distances_and_velocities)
+    # Drop the temporary columns
+    df.drop(["prev_longitude", "prev_latitude"], axis=1, inplace=True)
+
+    return df
+
+
+def get_predicted_alpha(data: pd.DataFrame, alpha0: float,seed: int) -> float:
+    """
+    Get the predicted alpha value for the given UE (User Equipment) data.
+
+    This function serves as the main API entry point for predicting the alpha parameter, 
+    which represents a key aspect of mobility behavior derived from UE tracks.  
+
+    Alpha is predicted based on the movement patterns observed in the UE tracks, 
+    which are represented by the following columns in the input DataFrame:
+
+    Parameters:
+    - data (DataFrame): The input data containing UE tracks, structured as follows:
+  
+        +------------+------------+-----------+------+
+        | mock_ue_id | lon        | lat       | tick |
+        +============+============+===========+======+
+        |     0      | 102.219377 | 33.674572 |   0  |
+        |     1      | 102.415954 | 33.855534 |   0  |
+        |     2      | 102.545935 | 33.878075 |   0  |
+        |     0      | 102.297766 | 33.575942 |   1  |
+        |     1      | 102.362725 | 33.916477 |   1  |
+        |     2      | 102.080675 | 33.832793 |   1  |
+        +------------+------------+-----------+------+
+
+"""
+    # Extract the data after preprocessing
+    velocity = preprocess_ue_data(data)
+
+    # Initialize and unpack all outputs from the initialize function
+    t_array, t_next_array, velocity_mean, variance, rng = _initialize(velocity,seed)
+
+    # Optimize alpha using the unpacked values
+    popt, pcov = optimize_alpha(
+        alpha0, t_array, t_next_array, velocity_mean, variance, rng
+    )
+
+    # Return the optimized alpha value
+    return popt[0]
diff --git a/radp/digital_twin/mobility/tests/test_param_regression.py b/radp/digital_twin/mobility/tests/test_param_regression.py
new file mode 100644
index 0000000..932f9c6
--- /dev/null
+++ b/radp/digital_twin/mobility/tests/test_param_regression.py
@@ -0,0 +1,90 @@
+import unittest
+import numpy as np
+import pandas as pd
+from scipy import optimize
+from radp.digital_twin.mobility.param_regression import (
+    _initialize,
+    _next,
+    _residual_vector,
+    optimize_alpha,
+)
+
+
+class TestParameterFunctions(unittest.TestCase):
+    """
+     Set up a mock DataFrame for testing the ParameterRegression class.
+
+     Initializes a DataFrame with 2 'mock_ue_id' and 5 'velocity' values each,
+     and creates an instance of ParameterRegression using this DataFrame.
+
+     How to Run:
+     ------------
+     To run these tests, execute the following command in your terminal:
+
+    python3 -m unittest radp/digital_twin/mobility/tests/test_parameter_regression.py
+
+    """
+
+    def setUp(self):
+        # Setup the DataFrame for testing with 2 distinct ue_ids and 5 velocities each
+        data = {
+            "velocity": [1.2, 2.5, 3.0, 4.1, 5.7, 1.5, 2.0, 3.5, 4.2, 5.0],
+            "mock_ue_id": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1],
+        }
+        self.df = pd.DataFrame(data)
+
+        # Initialize the test data
+        (
+            self.t_array,
+            self.t_next_array,
+            self.velocity_mean,
+            self.variance,
+            self.rng,
+        ) = _initialize(self.df, 42)
+        self.alpha0 = 0.5
+
+    def test_initialize(self):
+        # Check if initialization returns correct data types and values
+        self.assertIsInstance(self.t_array, np.ndarray)
+        self.assertIsInstance(self.t_next_array, np.ndarray)
+
+        # Check if the length of arrays matches the input data
+        self.assertEqual(self.t_array.shape[1], len(self.df) - 1)
+        self.assertEqual(self.t_next_array.shape[1], len(self.df) - 1)
+
+    def test_next(self):
+        # Test if the next function returns correct shape of results
+        alpha = 0.5
+        result = _next(
+            alpha, self.t_array, 0, 1, self.rng
+        )  # Passing arbitrary mean and variance
+
+        # Check if result has the correct shape
+        self.assertEqual(result.shape[0], 2)
+        self.assertEqual(result.shape[1], self.t_array.shape[1])
+
+        # Check if result is a numpy array
+        self.assertIsInstance(result, np.ndarray)
+
+    def test_residual_vector(self):
+        # Test if the residuals are calculated correctly
+        alpha = 0.5
+        residuals = _residual_vector(
+            alpha, self.t_array, self.t_next_array, 0, 1, self.rng
+        )  # Passing arbitrary mean and variance
+
+        # Check if residuals are a flat array
+        self.assertEqual(residuals.shape[0], 2 * self.t_array.shape[1])
+        self.assertIsInstance(residuals, np.ndarray)
+
+    def test_optimize_alpha(self):
+        # Test the optimization process for alpha
+        popt, pcov = optimize_alpha(
+            self.alpha0, self.t_array, self.t_next_array, 0, 1, self.rng
+        )  # Passing arbitrary mean and variance
+
+        # Check if popt is a numpy array and pcov is None (leastsq returns None for covariance)
+        self.assertIsInstance(popt, np.ndarray)
+
+        # Check that optimized alpha is within a reasonable range of the calculated alpha
+        self.assertTrue(np.allclose(popt[0], 0.49999419))
diff --git a/radp/digital_twin/requirements.txt b/radp/digital_twin/requirements.txt
index a998257..2bc090b 100644
--- a/radp/digital_twin/requirements.txt
+++ b/radp/digital_twin/requirements.txt
@@ -5,3 +5,6 @@ pyarrow==13.0.0
 scikit-learn==1.2.1
 torch==2.0.0
 torchvision==0.15.1
+scipy==1.10.1
+
+