diff --git a/scripts/chloropleth.ipynb b/scripts/chloropleth.ipynb
new file mode 100644
index 0000000..784888b
--- /dev/null
+++ b/scripts/chloropleth.ipynb
@@ -0,0 +1,1578 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import requests\n",
+    "from zipfile import ZipFile\n",
+    "import pandas as pd\n",
+    "import geopandas as gpd\n",
+    "import matplotlib.pyplot as plt\n",
+    "from shapely.affinity import scale, translate\n",
+    "import matplotlib.patheffects as pe\n",
+    "\n",
+    "# Directory to store downloaded data\n",
+    "local_data_dir = \"./data/\"\n",
+    "os.makedirs(local_data_dir, exist_ok=True)\n",
+    "\n",
+    "# eGRID Data URLs\n",
+    "egrid_urls = {\n",
+    "    2019: {\n",
+    "        \"egrid_xlsx\": 'https://www.epa.gov/sites/default/files/2021-02/egrid2019_data.xlsx',\n",
+    "        \"subregions_kmz\": 'https://www.epa.gov/sites/default/files/2021-02/egrid2019_subregions.kmz',\n",
+    "    },\n",
+    "    2020: {\n",
+    "        \"egrid_xlsx\": 'https://www.epa.gov/system/files/documents/2022-09/eGRID2020_Data_v2.xlsx',\n",
+    "        \"subregions_kmz\": 'https://www.epa.gov/system/files/other-files/2022-01/egrid2020_subregions.kmz',\n",
+    "    },\n",
+    "    2021: {\n",
+    "        \"egrid_xlsx\": 'https://www.epa.gov/system/files/documents/2023-01/eGRID2021_data.xlsx',\n",
+    "        \"subregions_shp\": 'https://gaftp.epa.gov/EPADataCommons/ORD/egrid2021/eGRID2021%20Subregions%20Shapefile.zip',\n",
+    "    },\n",
+    "    2022: {\n",
+    "        \"egrid_xlsx\": 'https://www.epa.gov/system/files/documents/2023-01/egrid2022_data.xlsx',\n",
+    "        \"subregions_kmz\": 'https://www.epa.gov/system/files/other-files/2023-01/egrid2022_subregions.kmz',\n",
+    "    },\n",
+    "}\n",
+    "\n",
+    "# Conversion factor from pounds to kilograms\n",
+    "LBS_PER_KG = 0.45359237\n",
+    "\n",
+    "# Function to download files\n",
+    "def download_file(url, local_path):\n",
+    "    if not os.path.exists(local_path):\n",
+    "        print(f\"Downloading from {url}...\")\n",
+    "        response = requests.get(url)\n",
+    "        if response.status_code == 200:\n",
+    "            with open(local_path, 'wb') as f:\n",
+    "                f.write(response.content)\n",
+    "            print(f\"Downloaded to {local_path}.\")\n",
+    "        else:\n",
+    "            print(f\"Failed to download {url}. Status code: {response.status_code}\")\n",
+    "    else:\n",
+    "        print(f\"{local_path} already downloaded.\")\n",
+    "\n",
+    "# Function to extract data for a given year\n",
+    "def process_year(year, urls):\n",
+    "    print(f\"\\nProcessing data for year {year}...\")\n",
+    "\n",
+    "    # Paths for Excel and subregions files\n",
+    "    egrid_xlsx_url = urls['egrid_xlsx']\n",
+    "    egrid_xlsx_path = os.path.join(local_data_dir, f\"egrid{year}_data.xlsx\")\n",
+    "\n",
+    "    # Determine the subregions file type and path\n",
+    "    if 'subregions_kml' in urls:\n",
+    "        subregions_url = urls['subregions_kml']\n",
+    "        subregions_ext = 'kml'\n",
+    "    elif 'subregions_kmz' in urls:\n",
+    "        subregions_url = urls['subregions_kmz']\n",
+    "        subregions_ext = 'kmz'\n",
+    "    elif 'subregions_shp' in urls:\n",
+    "        subregions_url = urls['subregions_shp']\n",
+    "        subregions_ext = 'zip'\n",
+    "    else:\n",
+    "        print(f\"No subregions file provided for year {year}. Skipping...\")\n",
+    "        return None\n",
+    "\n",
+    "    subregions_path = os.path.join(local_data_dir, f\"egrid{year}_subregions.{subregions_ext}\")\n",
+    "\n",
+    "    # Download files\n",
+    "    download_file(egrid_xlsx_url, egrid_xlsx_path)\n",
+    "    download_file(subregions_url, subregions_path)\n",
+    "\n",
+    "    # Load carbon intensity data\n",
+    "    try:\n",
+    "        sheet_name = f'SRL{str(year)[-2:]}'\n",
+    "        egrid_intensity_df = pd.read_excel(egrid_xlsx_path, sheet_name=sheet_name)\n",
+    "\n",
+    "        # Use the relevant column for CO2 intensity\n",
+    "        co2_column_name = [col for col in egrid_intensity_df.columns if 'CO2 total output emission rate (lb/MWh)' in col]\n",
+    "        if co2_column_name:\n",
+    "            co2_column_name = co2_column_name[0]\n",
+    "        else:\n",
+    "            raise ValueError(\"CO2 emission rate column not found.\")\n",
+    "\n",
+    "        # Extract subregion acronym and carbon intensity\n",
+    "        region_intensity = egrid_intensity_df[['eGRID subregion acronym', co2_column_name]].copy()\n",
+    "        region_intensity.columns = ['SUBRGN', 'SRC2ERTA']\n",
+    "\n",
+    "        # Convert to numeric and handle NaNs\n",
+    "        region_intensity['SRC2ERTA'] = pd.to_numeric(region_intensity['SRC2ERTA'], errors='coerce')\n",
+    "        region_intensity = region_intensity.dropna(subset=['SRC2ERTA'])\n",
+    "\n",
+    "        # Convert lbs to kg\n",
+    "        region_intensity['kg_per_mwh'] = region_intensity['SRC2ERTA'] * LBS_PER_KG\n",
+    "        region_intensity = region_intensity[['SUBRGN', 'kg_per_mwh']]\n",
+    "        region_intensity['SUBRGN'] = region_intensity['SUBRGN'].astype(str)\n",
+    "\n",
+    "        print(f\"Extracted intensity data for {year}:\")\n",
+    "        print(region_intensity.head())\n",
+    "        print(f\"{year} - Available subregions in intensity data:\", region_intensity['SUBRGN'].unique())\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(f\"Failed to read eGRID data for {year}: {e}\")\n",
+    "        return None\n",
+    "\n",
+    "    # Load subregions into GeoDataFrame\n",
+    "    try:\n",
+    "        if subregions_ext == 'kmz':\n",
+    "            with ZipFile(subregions_path, 'r') as kmz:\n",
+    "                kml_filename = [name for name in kmz.namelist() if name.endswith('.kml')][0]\n",
+    "                kmz.extract(kml_filename, local_data_dir)\n",
+    "                kml_path = os.path.join(local_data_dir, kml_filename)\n",
+    "                gdf = gpd.read_file(kml_path, driver='KML')\n",
+    "                os.remove(kml_path)\n",
+    "        elif subregions_ext == 'kml':\n",
+    "            gdf = gpd.read_file(subregions_path, driver='KML')\n",
+    "        elif subregions_ext == 'zip':\n",
+    "            # Extract the zip file\n",
+    "            with ZipFile(subregions_path, 'r') as zip_ref:\n",
+    "                zip_ref.extractall(local_data_dir)\n",
+    "                # Find the shapefile (.shp)\n",
+    "                shapefile_name = [name for name in zip_ref.namelist() if name.endswith('.shp')][0]\n",
+    "                shapefile_path = os.path.join(local_data_dir, shapefile_name)\n",
+    "            gdf = gpd.read_file(shapefile_path)\n",
+    "        else:\n",
+    "            print(f\"Unsupported subregions file format for year {year}.\")\n",
+    "            return None\n",
+    "\n",
+    "        # Check for 'SUBRGN' or 'Name' column and rename to 'Name'\n",
+    "        if 'SUBRGN' in gdf.columns:\n",
+    "            gdf = gdf.rename(columns={'SUBRGN': 'Name'})\n",
+    "        elif 'Name' not in gdf.columns:\n",
+    "            raise ValueError(\"Subregion name column not found in geospatial data.\")\n",
+    "\n",
+    "        # Ensure geometries are in WGS84 CRS (EPSG:4326)\n",
+    "        if gdf.crs != 'EPSG:4326':\n",
+    "            print(f\"Reprojecting geometries to WGS84 for year {year}.\")\n",
+    "            gdf = gdf.to_crs(epsg=4326)\n",
+    "\n",
+    "        # Merge with carbon intensity data using 'Name' as the identifier\n",
+    "        gdf = gdf.merge(region_intensity, left_on='Name', right_on='SUBRGN', how='left')\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(f\"Failed to read subregions for {year}: {e}\")\n",
+    "        return None\n",
+    "\n",
+    "    # Adjust regions and return the processed GeoDataFrame\n",
+    "    combined_gdf = adjust_regions(gdf, year)\n",
+    "    return combined_gdf\n",
+    "\n",
+    "# Function to adjust regions (without plotting)\n",
+    "def adjust_regions(gdf, year):\n",
+    "    # Define subregions for Alaska and Hawaii\n",
+    "    alaska_subregions = ['AKGD', 'AKMS']\n",
+    "    hawaii_subregions = ['HIMS', 'HIOA', 'HIMS/HIOA']\n",
+    "    puerto_rico_subregions = ['PR', 'PRVI', 'PRMS']\n",
+    "\n",
+    "    # Attempt to find Puerto Rico subregions in the data\n",
+    "    pr_subregions = gdf[gdf['Name'].str.contains('PR', case=False)]['Name'].unique()\n",
+    "    if len(pr_subregions) == 0:\n",
+    "        # Try to identify PR subregions differently\n",
+    "        pr_subregions = [name for name in gdf['Name'].unique() if 'PR' in name.upper()]\n",
+    "    print(f\"Puerto Rico subregions in data: {pr_subregions}\")\n",
+    "\n",
+    "    # Separate the regions\n",
+    "    mainland_gdf = gdf[~gdf['Name'].isin(alaska_subregions + hawaii_subregions + list(pr_subregions))]\n",
+    "    alaska_gdf = gdf[gdf['Name'].isin(alaska_subregions)]\n",
+    "    hawaii_gdf = gdf[gdf['Name'].isin(hawaii_subregions)]\n",
+    "    puerto_rico_gdf = gdf[gdf['Name'].isin(pr_subregions)]\n",
+    "\n",
+    "    # Alaska: Scale and translate\n",
+    "    if not alaska_gdf.empty:\n",
+    "        alaska_gdf = alaska_gdf.copy()\n",
+    "\n",
+    "        # Compute the centroid of Alaska (combined geometries)\n",
+    "        alaska_centroid = alaska_gdf.geometry.unary_union.centroid\n",
+    "\n",
+    "        # Desired location for Alaska after translation\n",
+    "        alaska_desired_location = (-120, 27)  # Adjust as needed\n",
+    "\n",
+    "        # Apply scaling with the common origin (alaska_centroid)\n",
+    "        alaska_gdf['geometry'] = alaska_gdf['geometry'].apply(\n",
+    "            lambda geom: scale(geom, xfact=0.41, yfact=0.5, origin=alaska_centroid)\n",
+    "        )\n",
+    "\n",
+    "        # After scaling, compute the new centroid\n",
+    "        scaled_alaska_centroid = alaska_gdf.geometry.unary_union.centroid\n",
+    "\n",
+    "        # Compute translation amounts based on the desired location\n",
+    "        delta_x = alaska_desired_location[0] - scaled_alaska_centroid.x\n",
+    "        delta_y = alaska_desired_location[1] - scaled_alaska_centroid.y\n",
+    "\n",
+    "        # Apply translation to each geometry\n",
+    "        alaska_gdf['geometry'] = alaska_gdf['geometry'].apply(\n",
+    "            lambda geom: translate(geom, xoff=delta_x, yoff=delta_y)\n",
+    "        )\n",
+    "    else:\n",
+    "        print(\"Alaska data is not available.\")\n",
+    "\n",
+    "    # Hawaii: Scale and translate\n",
+    "    if not hawaii_gdf.empty:\n",
+    "        hawaii_gdf = hawaii_gdf.copy()\n",
+    "        hawaii_centroid = hawaii_gdf.geometry.unary_union.centroid\n",
+    "\n",
+    "        # Apply scaling\n",
+    "        hawaii_gdf['geometry'] = hawaii_gdf['geometry'].apply(\n",
+    "            lambda geom: scale(geom, xfact=1.5, yfact=1.5, origin=hawaii_centroid)\n",
+    "        )\n",
+    "\n",
+    "        # Desired location for Hawaii after translation\n",
+    "        hawaii_desired_location = (-105, 22)  # Adjust as needed\n",
+    "        delta_x = hawaii_desired_location[0] - hawaii_centroid.x\n",
+    "        delta_y = hawaii_desired_location[1] - hawaii_centroid.y\n",
+    "\n",
+    "        # Apply translation\n",
+    "        hawaii_gdf['geometry'] = hawaii_gdf['geometry'].apply(\n",
+    "            lambda geom: translate(geom, xoff=delta_x, yoff=delta_y)\n",
+    "        )\n",
+    "    else:\n",
+    "        print(\"Hawaii data is not available.\")\n",
+    "\n",
+    "    # Puerto Rico: Scale and translate\n",
+    "    if not puerto_rico_gdf.empty:\n",
+    "        puerto_rico_gdf = puerto_rico_gdf.copy()\n",
+    "        pr_centroid = puerto_rico_gdf.geometry.unary_union.centroid\n",
+    "\n",
+    "        # Apply scaling\n",
+    "        puerto_rico_gdf['geometry'] = puerto_rico_gdf['geometry'].apply(\n",
+    "            lambda geom: scale(geom, xfact=1.5, yfact=1.5, origin=pr_centroid)\n",
+    "        )\n",
+    "\n",
+    "        # Desired location for Puerto Rico after translation\n",
+    "        pr_desired_location = (-75, 22)  # Adjust as needed\n",
+    "        delta_x = pr_desired_location[0] - pr_centroid.x\n",
+    "        delta_y = pr_desired_location[1] - pr_centroid.y\n",
+    "\n",
+    "        # Apply translation\n",
+    "        puerto_rico_gdf['geometry'] = puerto_rico_gdf['geometry'].apply(\n",
+    "            lambda geom: translate(geom, xoff=delta_x, yoff=delta_y)\n",
+    "        )\n",
+    "    else:\n",
+    "        print(\"Puerto Rico data is not available.\")\n",
+    "        # Create an empty GeoDataFrame to prevent errors during concatenation\n",
+    "        puerto_rico_gdf = gpd.GeoDataFrame(columns=gdf.columns)\n",
+    "\n",
+    "    # Combine all regions back into a single GeoDataFrame\n",
+    "    combined_gdf = pd.concat(\n",
+    "        [mainland_gdf, alaska_gdf, hawaii_gdf, puerto_rico_gdf], ignore_index=True\n",
+    "    )\n",
+    "\n",
+    "    # Remove any rows with missing kg_per_mwh\n",
+    "    combined_gdf = combined_gdf.dropna(subset=['kg_per_mwh'])\n",
+    "\n",
+    "    return combined_gdf\n",
+    "\n",
+    "# Process data for selected years and store GeoDataFrames\n",
+    "years_to_process = [2019, 2020, 2021, 2022]\n",
+    "processed_data = {}\n",
+    "for year in years_to_process:\n",
+    "    if year in egrid_urls:\n",
+    "        gdf = process_year(year, egrid_urls[year])\n",
+    "        if gdf is not None:\n",
+    "            processed_data[year] = gdf\n",
+    "    else:\n",
+    "        print(f\"No data available for year {year}.\")\n",
+    "\n",
+    "# Now plot subplots\n",
+    "num_years = len(processed_data)\n",
+    "fig, axes = plt.subplots(2, 2, figsize=(16, 12))\n",
+    "axes = axes.flatten()\n",
+    "\n",
+    "# Define common vmax and vmin for consistent color scaling\n",
+    "vmin = min(gdf['kg_per_mwh'].min() for gdf in processed_data.values())\n",
+    "vmax = max(gdf['kg_per_mwh'].max() for gdf in processed_data.values())\n",
+    "\n",
+    "for ax, (year, combined_gdf) in zip(axes, sorted(processed_data.items())):\n",
+    "    combined_gdf.plot(\n",
+    "        column='kg_per_mwh',\n",
+    "        cmap='YlGnBu',\n",
+    "        linewidth=0.8,\n",
+    "        ax=ax,\n",
+    "        edgecolor='0.8',\n",
+    "        vmax=vmax,\n",
+    "        vmin=vmin\n",
+    "    )\n",
+    "\n",
+    "    # Add labels for each subregion\n",
+    "    combined_gdf['coords'] = combined_gdf['geometry'].apply(lambda x: x.representative_point().coords[0])\n",
+    "    for idx, row in combined_gdf.iterrows():\n",
+    "        x, y = row['coords']\n",
+    "        # Manually adjust positions for overlapping labels\n",
+    "        if row['Name'] == 'NYLI':\n",
+    "            x_offset = 0.9\n",
+    "            y_offset = -0.9\n",
+    "        elif row['Name'] == 'NYCW':\n",
+    "            x_offset = 0.5\n",
+    "            y_offset = 1\n",
+    "        elif row['Name'] == 'PRMS':\n",
+    "            x_offset = 0\n",
+    "            y_offset = 0.7\n",
+    "        else:\n",
+    "            x_offset = 0\n",
+    "            y_offset = 0\n",
+    "        ax.annotate(\n",
+    "            text=row['Name'],\n",
+    "            xy=(x + x_offset, y + y_offset),\n",
+    "            horizontalalignment='center',\n",
+    "            fontsize=8,\n",
+    "            color='white',\n",
+    "            path_effects=[pe.withStroke(linewidth=2, foreground=\"black\")]\n",
+    "        )\n",
+    "\n",
+    "    # Adjust plot limits to include the new regions\n",
+    "    ax.set_xlim(-130, -65)\n",
+    "    ax.set_ylim(20, 50)\n",
+    "\n",
+    "    # Simplify the title to display only the year\n",
+    "    ax.set_title(f\"{year}\", fontsize=15)\n",
+    "\n",
+    "    ax.axis('off')\n",
+    "\n",
+    "\n",
+    "# Adjust layout and add a colorbar\n",
+    "plt.tight_layout()\n",
+    "fig.subplots_adjust(right=0.85, hspace=0.05)  # Set hspace close to zero for minimal vertical spacing\n",
+    "\n",
+    "# Additional adjustment to overlap rows if needed\n",
+    "# Manually adjust each axis position to achieve a slight negative spacing effect\n",
+    "for i, ax in enumerate(axes):\n",
+    "    pos = ax.get_position()\n",
+    "    if i >= 2:  # Only move the bottom row up\n",
+    "        ax.set_position([pos.x0, pos.y0 + 0.12, pos.width, pos.height])  # Adjust as needed to control overlap\n",
+    "\n",
+    "cbar_ax = fig.add_axes([0.88, 0.24, 0.03, 0.7])  # Move up by increasing the bottom and adjust height as needed\n",
+    "\n",
+    "sm = plt.cm.ScalarMappable(cmap='YlGnBu', norm=plt.Normalize(vmin=vmin, vmax=vmax))\n",
+    "sm._A = []  # Empty array for the scalar mappable\n",
+    "cbar = fig.colorbar(sm, cax=cbar_ax)\n",
+    "cbar.set_label('kg CO₂e/MWh', fontsize=12)\n",
+    "\n",
+    "plt.savefig(\"egrid_carbon_intensity_subplots.png\", dpi=300, bbox_inches='tight')\n",
+    "plt.savefig(\"egrid_carbon_intensity_subplots.pdf\", dpi=150, bbox_inches='tight')\n",
+    "\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import requests\n",
+    "from zipfile import ZipFile\n",
+    "import pandas as pd\n",
+    "import geopandas as gpd\n",
+    "import matplotlib.pyplot as plt\n",
+    "from shapely.affinity import scale, translate\n",
+    "import matplotlib.patheffects as pe\n",
+    "\n",
+    "# Directory to store downloaded data\n",
+    "local_data_dir = \"./data/\"\n",
+    "os.makedirs(local_data_dir, exist_ok=True)\n",
+    "\n",
+    "# URLs for the 2014 shapefile and data file\n",
+    "data_file_url = \"https://www.epa.gov/system/files/other-files/2023-01/egrid_historical_files_1996-2016.zip\"\n",
+    "shapefile_url = \"https://www.epa.gov/sites/default/files/2017-01/egrid_subregions.zip\"\n",
+    "\n",
+    "# Paths for downloaded ZIP files\n",
+    "data_file_zip_path = os.path.join(local_data_dir, \"egrid_historical_files_1996-2016.zip\")\n",
+    "shapefile_zip_path = os.path.join(local_data_dir, \"egrid_subregions.zip\")\n",
+    "\n",
+    "# Directory paths for extracted files\n",
+    "data_file_dir = os.path.join(local_data_dir, \"egrid_historical_files_1996-2016/egrid_historical_files_1996-2016\")  # Updated to inner directory\n",
+    "shapefile_dir = os.path.join(local_data_dir, \"egrid_subregions\")\n",
+    "\n",
+    "# Download function\n",
+    "def download_file(url, local_path):\n",
+    "    if not os.path.exists(local_path):\n",
+    "        print(f\"Downloading from {url}...\")\n",
+    "        response = requests.get(url)\n",
+    "        if response.status_code == 200:\n",
+    "            with open(local_path, 'wb') as f:\n",
+    "                f.write(response.content)\n",
+    "            print(f\"Downloaded to {local_path}.\")\n",
+    "        else:\n",
+    "            print(f\"Failed to download {url}. Status code: {response.status_code}\")\n",
+    "    else:\n",
+    "        print(f\"{local_path} already downloaded.\")\n",
+    "\n",
+    "# Extract function\n",
+    "def extract_zip(zip_path, extract_to):\n",
+    "    if not os.path.exists(extract_to):\n",
+    "        os.makedirs(extract_to, exist_ok=True)\n",
+    "        with ZipFile(zip_path, 'r') as zip_ref:\n",
+    "            zip_ref.extractall(extract_to)\n",
+    "        print(f\"Extracted {zip_path} to {extract_to}.\")\n",
+    "    else:\n",
+    "        print(f\"{extract_to} already exists. Extraction skipped.\")\n",
+    "\n",
+    "# Download and extract data files\n",
+    "download_file(data_file_url, data_file_zip_path)\n",
+    "extract_zip(data_file_zip_path, os.path.join(local_data_dir, \"egrid_historical_files_1996-2016\"))\n",
+    "\n",
+    "# Download and extract shapefile\n",
+    "download_file(shapefile_url, shapefile_zip_path)\n",
+    "extract_zip(shapefile_zip_path, shapefile_dir)\n",
+    "\n",
+    "# Path to the 2014 data file\n",
+    "data_file_path = os.path.join(data_file_dir, \"eGRID2014_Data_v2.xlsx\")\n",
+    "\n",
+    "# Load carbon intensity data from the Excel file\n",
+    "if not os.path.exists(data_file_path):\n",
+    "    print(f\"Data file {data_file_path} not found. Available files in the directory:\")\n",
+    "    for root, dirs, files in os.walk(data_file_dir):\n",
+    "        for file in files:\n",
+    "            print(os.path.join(root, file))\n",
+    "else:\n",
+    "    # Read the specific worksheet and columns\n",
+    "    intensity_df = pd.read_excel(data_file_path, sheet_name=\"SRL14\", usecols=[\"SUBRGN\", \"SRC2ERTA\"], skiprows=1)\n",
+    "    intensity_df[\"SRC2ERTA\"] = pd.to_numeric(intensity_df[\"SRC2ERTA\"], errors=\"coerce\") * 0.45359237  # Convert lbs to kg\n",
+    "    intensity_df = intensity_df.dropna(subset=[\"SRC2ERTA\"])\n",
+    "\n",
+    "    print(\"Sample of carbon intensity data:\")\n",
+    "    print(intensity_df.head())\n",
+    "\n",
+    "\n",
+    "    # Load the 2014 shapefile\n",
+    "    shapefile_path = os.path.join(shapefile_dir, \"eGRID2014_subregions.shp\")\n",
+    "    if not os.path.exists(shapefile_path):\n",
+    "        print(f\"Shapefile {shapefile_path} not found.\")\n",
+    "    else:\n",
+    "        gdf = gpd.read_file(shapefile_path)\n",
+    "\n",
+    "        # Print available columns to identify the correct subregion code column\n",
+    "        print(\"Columns in shapefile:\", gdf.columns)\n",
+    "\n",
+    "        # Ensure geometries are in WGS84 CRS (EPSG:4326)\n",
+    "        if gdf.crs != 'EPSG:4326':\n",
+    "            print(\"Reprojecting geometries to WGS84 (EPSG:4326).\")\n",
+    "            gdf = gdf.to_crs(epsg=4326)\n",
+    "\n",
+    "        # Assuming 'SUBRGN' is the correct column name based on printed columns\n",
+    "        # Adjust the variable name if needed\n",
+    "        subregion_column = 'SUBRGN'  # Replace 'SUBRGN' if another name is used for subregions\n",
+    "\n",
+    "        # Print a sample of the 'zips_for_G' column to inspect its contents\n",
+    "        print(\"Sample values in 'zips_for_G':\")\n",
+    "        print(gdf['zips_for_G'].head())\n",
+    "\n",
+    "        # If 'zips_for_G' contains subregion codes, use it as the subregion column for merging\n",
+    "        subregion_column = 'zips_for_G'  # Update this if necessary after inspecting the values\n",
+    "\n",
+    "        # Merge shapefile with carbon intensity data\n",
+    "        gdf = gdf.merge(intensity_df, left_on=subregion_column, right_on=\"SUBRGN\", how=\"left\")\n",
+    "\n",
+    "        # The rest of the processing and plotting logic follows...\n",
+    "\n",
+    "        # Define subregions for Alaska, Hawaii, and Puerto Rico\n",
+    "        alaska_subregions = ['AKGD', 'AKMS']\n",
+    "        hawaii_subregions = ['HIMS', 'HIOA', 'HIMS/HIOA']\n",
+    "        puerto_rico_subregions = ['PR', 'PRVI', 'PRMS']\n",
+    "\n",
+    "        # Separate regions for specific adjustments\n",
+    "        mainland_gdf = gdf[~gdf[subregion_column].isin(alaska_subregions + hawaii_subregions + puerto_rico_subregions)]\n",
+    "        alaska_gdf = gdf[gdf[subregion_column].isin(alaska_subregions)]\n",
+    "        hawaii_gdf = gdf[gdf[subregion_column].isin(hawaii_subregions)]\n",
+    "        puerto_rico_gdf = gdf[gdf[subregion_column].isin(puerto_rico_subregions)]\n",
+    "\n",
+    "        # Alaska: Scale and translate\n",
+    "        if not alaska_gdf.empty:\n",
+    "            alaska_centroid = alaska_gdf.geometry.unary_union.centroid\n",
+    "            alaska_desired_location = (-120, 27)\n",
+    "            alaska_gdf['geometry'] = alaska_gdf['geometry'].apply(\n",
+    "                lambda geom: translate(scale(geom, xfact=0.41, yfact=0.5, origin=alaska_centroid),\n",
+    "                                    xoff=alaska_desired_location[0] - alaska_centroid.x,\n",
+    "                                    yoff=alaska_desired_location[1] - alaska_centroid.y)\n",
+    "            )\n",
+    "\n",
+    "        # Hawaii: Scale and translate\n",
+    "        if not hawaii_gdf.empty:\n",
+    "            hawaii_centroid = hawaii_gdf.geometry.unary_union.centroid\n",
+    "            hawaii_desired_location = (-105, 22)\n",
+    "            hawaii_gdf['geometry'] = hawaii_gdf['geometry'].apply(\n",
+    "                lambda geom: translate(scale(geom, xfact=1.5, yfact=1.5, origin=hawaii_centroid),\n",
+    "                                    xoff=hawaii_desired_location[0] - hawaii_centroid.x,\n",
+    "                                    yoff=hawaii_desired_location[1] - hawaii_centroid.y)\n",
+    "            )\n",
+    "\n",
+    "        # Puerto Rico: Scale and translate\n",
+    "        if not puerto_rico_gdf.empty:\n",
+    "            pr_centroid = puerto_rico_gdf.geometry.unary_union.centroid\n",
+    "            pr_desired_location = (-75, 22)\n",
+    "            puerto_rico_gdf['geometry'] = puerto_rico_gdf['geometry'].apply(\n",
+    "                lambda geom: translate(scale(geom, xfact=1.5, yfact=1.5, origin=pr_centroid),\n",
+    "                                    xoff=pr_desired_location[0] - pr_centroid.x,\n",
+    "                                    yoff=pr_desired_location[1] - pr_centroid.y)\n",
+    "            )\n",
+    "\n",
+    "        # Combine all regions back into a single GeoDataFrame\n",
+    "        combined_gdf = pd.concat([mainland_gdf, alaska_gdf, hawaii_gdf, puerto_rico_gdf], ignore_index=True)\n",
+    "\n",
+    "        # Plot\n",
+    "        fig, ax = plt.subplots(figsize=(12, 8))\n",
+    "        combined_gdf.plot(column='SRC2ERTA', cmap='YlGnBu', linewidth=0.8, ax=ax, edgecolor='0.8', legend=True)\n",
+    "\n",
+    "        # Add labels for each subregion\n",
+    "        combined_gdf['coords'] = combined_gdf['geometry'].apply(lambda x: x.representative_point().coords[0])\n",
+    "        for idx, row in combined_gdf.iterrows():\n",
+    "            x, y = row['coords']\n",
+    "            ax.annotate(\n",
+    "                text=row[subregion_column],\n",
+    "                xy=(x, y),\n",
+    "                horizontalalignment='center',\n",
+    "                fontsize=8,\n",
+    "                color='white',\n",
+    "                path_effects=[pe.withStroke(linewidth=2, foreground=\"black\")]\n",
+    "            )\n",
+    "\n",
+    "        # Customize plot appearance\n",
+    "        # Adjust plot limits to include the new regions\n",
+    "        ax.set_xlim(-130, -65)\n",
+    "        ax.set_ylim(20, 50)\n",
+    "\n",
+    "        ax.set_title(\"eGRID Subregions - 2014 CO₂ Intensity (kg/MWh)\", fontsize=15)\n",
+    "        ax.axis('off')\n",
+    "        plt.tight_layout()\n",
+    "        plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# gdf = extract_to_geojson(kml_path, year)\n",
+    "# print(gdf.head())  # Debugging line\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# print(gdf['Description'].head(10))  # Print the first 10 descriptions to see what they look like\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# new 2014"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import requests\n",
+    "from zipfile import ZipFile\n",
+    "import pandas as pd\n",
+    "import geopandas as gpd\n",
+    "import matplotlib.pyplot as plt\n",
+    "from shapely.affinity import scale, translate\n",
+    "import matplotlib.patheffects as pe\n",
+    "\n",
+    "# Directory to store downloaded data\n",
+    "local_data_dir = \"./data/\"\n",
+    "os.makedirs(local_data_dir, exist_ok=True)\n",
+    "\n",
+    "# eGRID Data URLs\n",
+    "egrid_urls = {\n",
+    "    2014: {\n",
+    "        \"data_zip\": \"https://www.epa.gov/system/files/other-files/2023-01/egrid_historical_files_1996-2016.zip\",\n",
+    "        \"shapefile_zip\": \"https://www.epa.gov/sites/default/files/2017-01/egrid_subregions.zip\",\n",
+    "    },\n",
+    "    2019: {\n",
+    "        \"egrid_xlsx\": 'https://www.epa.gov/sites/default/files/2021-02/egrid2019_data.xlsx',\n",
+    "        \"subregions_kmz\": 'https://www.epa.gov/sites/default/files/2021-02/egrid2019_subregions.kmz',\n",
+    "    },\n",
+    "    2021: {\n",
+    "        \"egrid_xlsx\": 'https://www.epa.gov/system/files/documents/2023-01/eGRID2021_data.xlsx',\n",
+    "        \"subregions_shp\": 'https://gaftp.epa.gov/EPADataCommons/ORD/egrid2021/eGRID2021%20Subregions%20Shapefile.zip',\n",
+    "    },\n",
+    "    2022: {\n",
+    "        \"egrid_xlsx\": 'https://www.epa.gov/system/files/documents/2023-01/egrid2022_data.xlsx',\n",
+    "        \"subregions_kmz\": 'https://www.epa.gov/system/files/other-files/2023-01/egrid2022_subregions.kmz',\n",
+    "    },\n",
+    "}\n",
+    "\n",
+    "# Conversion factor from pounds to kilograms\n",
+    "LBS_PER_KG = 0.45359237\n",
+    "\n",
+    "# Function to download files\n",
+    "def download_file(url, local_path):\n",
+    "    if not os.path.exists(local_path):\n",
+    "        print(f\"Downloading from {url}...\")\n",
+    "        response = requests.get(url)\n",
+    "        if response.status_code == 200:\n",
+    "            with open(local_path, 'wb') as f:\n",
+    "                f.write(response.content)\n",
+    "            print(f\"Downloaded to {local_path}.\")\n",
+    "        else:\n",
+    "            print(f\"Failed to download {url}. Status code: {response.status_code}\")\n",
+    "    else:\n",
+    "        print(f\"{local_path} already downloaded.\")\n",
+    "\n",
+    "# Function to extract ZIP files\n",
+    "def extract_zip(zip_path, extract_to):\n",
+    "    if not os.path.exists(extract_to):\n",
+    "        os.makedirs(extract_to, exist_ok=True)\n",
+    "        with ZipFile(zip_path, 'r') as zip_ref:\n",
+    "            zip_ref.extractall(extract_to)\n",
+    "        print(f\"Extracted {zip_path} to {extract_to}.\")\n",
+    "    else:\n",
+    "        print(f\"{extract_to} already exists. Extraction skipped.\")\n",
+    "\n",
+    "# Function to process data for 2014\n",
+    "def process_2014_data(urls):\n",
+    "    print(\"Processing 2014 data with specialized logic...\")\n",
+    "\n",
+    "    # URLs for the 2014 shapefile and data file\n",
+    "    data_file_url = urls[\"data_zip\"]\n",
+    "    shapefile_url = urls[\"shapefile_zip\"]\n",
+    "\n",
+    "    # Paths for downloaded ZIP files\n",
+    "    data_file_zip_path = os.path.join(local_data_dir, \"egrid_historical_files_1996-2016.zip\")\n",
+    "    shapefile_zip_path = os.path.join(local_data_dir, \"egrid_subregions.zip\")\n",
+    "\n",
+    "    # Directory paths for extracted files\n",
+    "    data_file_dir = os.path.join(local_data_dir, \"egrid_historical_files_1996-2016\")\n",
+    "    shapefile_dir = os.path.join(local_data_dir, \"egrid_subregions\")\n",
+    "\n",
+    "    # Download and extract data files\n",
+    "    download_file(data_file_url, data_file_zip_path)\n",
+    "    extract_zip(data_file_zip_path, data_file_dir)\n",
+    "\n",
+    "    # Download and extract shapefile\n",
+    "    download_file(shapefile_url, shapefile_zip_path)\n",
+    "    extract_zip(shapefile_zip_path, shapefile_dir)\n",
+    "\n",
+    "    # Adjust data_file_dir to account for nested directories\n",
+    "    nested_data_dir = os.path.join(data_file_dir, \"egrid_historical_files_1996-2016\")\n",
+    "    if os.path.exists(nested_data_dir):\n",
+    "        data_file_dir = nested_data_dir\n",
+    "\n",
+    "    # Path to the 2014 data file\n",
+    "    data_file_path = os.path.join(data_file_dir, \"eGRID2014_Data_v2.xlsx\")\n",
+    "\n",
+    "    # Verify that the data file exists\n",
+    "    if not os.path.exists(data_file_path):\n",
+    "        print(f\"Data file {data_file_path} not found. Available files in the directory:\")\n",
+    "        for root, dirs, files in os.walk(data_file_dir):\n",
+    "            for file in files:\n",
+    "                print(os.path.join(root, file))\n",
+    "        return None\n",
+    "\n",
+    "    # Load carbon intensity data from the Excel file\n",
+    "    try:\n",
+    "        # Read the specific worksheet and columns\n",
+    "        intensity_df = pd.read_excel(data_file_path, sheet_name=\"SRL14\", usecols=[\"SUBRGN\", \"SRC2ERTA\"], skiprows=1)\n",
+    "        intensity_df[\"SRC2ERTA\"] = pd.to_numeric(intensity_df[\"SRC2ERTA\"], errors=\"coerce\")\n",
+    "        intensity_df = intensity_df.dropna(subset=[\"SRC2ERTA\"])\n",
+    "\n",
+    "        # Convert lbs to kg\n",
+    "        intensity_df[\"kg_per_mwh\"] = intensity_df[\"SRC2ERTA\"] * LBS_PER_KG\n",
+    "\n",
+    "        print(\"Sample of carbon intensity data:\")\n",
+    "        print(intensity_df.head())\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(f\"Failed to read eGRID data for 2014: {e}\")\n",
+    "        return None\n",
+    "\n",
+    "    # Load the 2014 shapefile\n",
+    "    try:\n",
+    "        shapefile_path = os.path.join(shapefile_dir, \"eGRID2014_subregions.shp\")\n",
+    "        gdf = gpd.read_file(shapefile_path)\n",
+    "\n",
+    "        # Determine the subregion column\n",
+    "        if 'SUBRGN' in gdf.columns:\n",
+    "            subregion_column = 'SUBRGN'\n",
+    "        elif 'zips_for_G' in gdf.columns:\n",
+    "            subregion_column = 'zips_for_G'\n",
+    "        else:\n",
+    "            print(\"Subregion column not found in shapefile.\")\n",
+    "            return None\n",
+    "\n",
+    "        # Ensure geometries are in WGS84 CRS (EPSG:4326)\n",
+    "        if gdf.crs != 'EPSG:4326':\n",
+    "            print(\"Reprojecting geometries to WGS84 (EPSG:4326).\")\n",
+    "            gdf = gdf.to_crs(epsg=4326)\n",
+    "\n",
+    "        # Merge shapefile with carbon intensity data\n",
+    "        gdf = gdf.merge(intensity_df[['SUBRGN', 'kg_per_mwh']], left_on=subregion_column, right_on=\"SUBRGN\", how=\"left\")\n",
+    "\n",
+    "        # Rename the subregion column to 'Name' to match other years\n",
+    "        gdf = gdf.rename(columns={subregion_column: 'Name'})\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(f\"Failed to read subregions for 2014: {e}\")\n",
+    "        return None\n",
+    "\n",
+    "    # Adjust regions and return the processed GeoDataFrame\n",
+    "    combined_gdf = adjust_regions(gdf, 2014)\n",
+    "    return combined_gdf\n",
+    "\n",
+    "# Function to process data for other years\n",
+    "def process_year(year, urls):\n",
+    "    print(f\"\\nProcessing data for year {year}...\")\n",
+    "\n",
+    "    if year == 2014:\n",
+    "        return process_2014_data(urls)\n",
+    "    else:\n",
+    "        # Paths for Excel and subregions files\n",
+    "        egrid_xlsx_url = urls['egrid_xlsx']\n",
+    "        egrid_xlsx_path = os.path.join(local_data_dir, f\"egrid{year}_data.xlsx\")\n",
+    "\n",
+    "        # Determine the subregions file type and path\n",
+    "        if 'subregions_kml' in urls:\n",
+    "            subregions_url = urls['subregions_kml']\n",
+    "            subregions_ext = 'kml'\n",
+    "        elif 'subregions_kmz' in urls:\n",
+    "            subregions_url = urls['subregions_kmz']\n",
+    "            subregions_ext = 'kmz'\n",
+    "        elif 'subregions_shp' in urls:\n",
+    "            subregions_url = urls['subregions_shp']\n",
+    "            subregions_ext = 'zip'\n",
+    "        else:\n",
+    "            print(f\"No subregions file provided for year {year}. Skipping...\")\n",
+    "            return None\n",
+    "\n",
+    "        subregions_path = os.path.join(local_data_dir, f\"egrid{year}_subregions.{subregions_ext}\")\n",
+    "\n",
+    "        # Download files\n",
+    "        download_file(egrid_xlsx_url, egrid_xlsx_path)\n",
+    "        download_file(subregions_url, subregions_path)\n",
+    "\n",
+    "        # Load carbon intensity data\n",
+    "        try:\n",
+    "            sheet_name = f'SRL{str(year)[-2:]}'\n",
+    "            egrid_intensity_df = pd.read_excel(egrid_xlsx_path, sheet_name=sheet_name)\n",
+    "\n",
+    "            # Use the relevant column for CO₂ intensity\n",
+    "            co2_column_name = [col for col in egrid_intensity_df.columns if 'CO2 total output emission rate (lb/MWh)' in col]\n",
+    "            if co2_column_name:\n",
+    "                co2_column_name = co2_column_name[0]\n",
+    "            else:\n",
+    "                raise ValueError(\"CO₂ emission rate column not found.\")\n",
+    "\n",
+    "            # Extract subregion acronym and carbon intensity\n",
+    "            region_intensity = egrid_intensity_df[['eGRID subregion acronym', co2_column_name]].copy()\n",
+    "            region_intensity.columns = ['SUBRGN', 'SRC2ERTA']\n",
+    "\n",
+    "            # Convert to numeric and handle NaNs\n",
+    "            region_intensity['SRC2ERTA'] = pd.to_numeric(region_intensity['SRC2ERTA'], errors='coerce')\n",
+    "            region_intensity = region_intensity.dropna(subset=['SRC2ERTA'])\n",
+    "\n",
+    "            # Convert lbs to kg\n",
+    "            region_intensity['kg_per_mwh'] = region_intensity['SRC2ERTA'] * LBS_PER_KG\n",
+    "            region_intensity = region_intensity[['SUBRGN', 'kg_per_mwh']]\n",
+    "            region_intensity['SUBRGN'] = region_intensity['SUBRGN'].astype(str)\n",
+    "\n",
+    "            print(f\"Extracted intensity data for {year}:\")\n",
+    "            print(region_intensity.head())\n",
+    "\n",
+    "        except Exception as e:\n",
+    "            print(f\"Failed to read eGRID data for {year}: {e}\")\n",
+    "            return None\n",
+    "\n",
+    "        # Load subregions into GeoDataFrame\n",
+    "        try:\n",
+    "            if subregions_ext == 'kmz':\n",
+    "                with ZipFile(subregions_path, 'r') as kmz:\n",
+    "                    kml_filename = [name for name in kmz.namelist() if name.endswith('.kml')][0]\n",
+    "                    kmz.extract(kml_filename, local_data_dir)\n",
+    "                    kml_path = os.path.join(local_data_dir, kml_filename)\n",
+    "                    gdf = gpd.read_file(kml_path, driver='KML')\n",
+    "                    os.remove(kml_path)\n",
+    "            elif subregions_ext == 'kml':\n",
+    "                gdf = gpd.read_file(subregions_path, driver='KML')\n",
+    "            elif subregions_ext == 'zip':\n",
+    "                # Extract the zip file\n",
+    "                with ZipFile(subregions_path, 'r') as zip_ref:\n",
+    "                    zip_ref.extractall(local_data_dir)\n",
+    "                    # Find the shapefile (.shp)\n",
+    "                    shapefile_name = [name for name in zip_ref.namelist() if name.endswith('.shp')][0]\n",
+    "                    shapefile_path = os.path.join(local_data_dir, shapefile_name)\n",
+    "                gdf = gpd.read_file(shapefile_path)\n",
+    "            else:\n",
+    "                print(f\"Unsupported subregions file format for year {year}.\")\n",
+    "                return None\n",
+    "\n",
+    "            # Check for 'SUBRGN' or 'Name' column and rename to 'Name'\n",
+    "            if 'SUBRGN' in gdf.columns:\n",
+    "                gdf = gdf.rename(columns={'SUBRGN': 'Name'})\n",
+    "            elif 'Name' not in gdf.columns:\n",
+    "                raise ValueError(\"Subregion name column not found in geospatial data.\")\n",
+    "\n",
+    "            # Ensure geometries are in WGS84 CRS (EPSG:4326)\n",
+    "            if gdf.crs != 'EPSG:4326':\n",
+    "                print(f\"Reprojecting geometries to WGS84 for year {year}.\")\n",
+    "                gdf = gdf.to_crs(epsg=4326)\n",
+    "\n",
+    "            # Merge with carbon intensity data using 'Name' as the identifier\n",
+    "            gdf = gdf.merge(region_intensity, left_on='Name', right_on='SUBRGN', how='left')\n",
+    "\n",
+    "        except Exception as e:\n",
+    "            print(f\"Failed to read subregions for {year}: {e}\")\n",
+    "            return None\n",
+    "\n",
+    "        # Adjust regions and return the processed GeoDataFrame\n",
+    "        combined_gdf = adjust_regions(gdf, year)\n",
+    "        return combined_gdf\n",
+    "\n",
+    "# Function to adjust regions (without plotting)\n",
+    "def adjust_regions(gdf, year):\n",
+    "    # Define subregions for Alaska and Hawaii\n",
+    "    alaska_subregions = ['AKGD', 'AKMS']\n",
+    "    hawaii_subregions = ['HIMS', 'HIOA', 'HIMS/HIOA']\n",
+    "    puerto_rico_subregions = ['PR', 'PRVI', 'PRMS']\n",
+    "\n",
+    "    # Attempt to find Puerto Rico subregions in the data\n",
+    "    pr_subregions = gdf[gdf['Name'].str.contains('PR', case=False, na=False)]['Name'].unique()\n",
+    "    if len(pr_subregions) == 0:\n",
+    "        # Try to identify PR subregions differently\n",
+    "        pr_subregions = [name for name in gdf['Name'].unique() if 'PR' in str(name).upper()]\n",
+    "    print(f\"Puerto Rico subregions in data for {year}: {pr_subregions}\")\n",
+    "\n",
+    "    # Separate the regions\n",
+    "    mainland_gdf = gdf[~gdf['Name'].isin(alaska_subregions + hawaii_subregions + list(pr_subregions))]\n",
+    "    alaska_gdf = gdf[gdf['Name'].isin(alaska_subregions)]\n",
+    "    hawaii_gdf = gdf[gdf['Name'].isin(hawaii_subregions)]\n",
+    "    puerto_rico_gdf = gdf[gdf['Name'].isin(pr_subregions)]\n",
+    "\n",
+    "    # Alaska: Scale and translate\n",
+    "    if not alaska_gdf.empty:\n",
+    "        alaska_gdf = alaska_gdf.copy()\n",
+    "\n",
+    "        # Compute the centroid of Alaska (combined geometries)\n",
+    "        alaska_centroid = alaska_gdf.geometry.unary_union.centroid\n",
+    "\n",
+    "        # Desired location for Alaska after translation\n",
+    "        alaska_desired_location = (-120, 27)  # Adjust as needed\n",
+    "\n",
+    "        # Apply scaling with the common origin (alaska_centroid)\n",
+    "        alaska_gdf['geometry'] = alaska_gdf['geometry'].apply(\n",
+    "            lambda geom: scale(geom, xfact=0.41, yfact=0.5, origin=alaska_centroid)\n",
+    "        )\n",
+    "\n",
+    "        # After scaling, compute the new centroid\n",
+    "        scaled_alaska_centroid = alaska_gdf.geometry.unary_union.centroid\n",
+    "\n",
+    "        # Compute translation amounts based on the desired location\n",
+    "        delta_x = alaska_desired_location[0] - scaled_alaska_centroid.x\n",
+    "        delta_y = alaska_desired_location[1] - scaled_alaska_centroid.y\n",
+    "\n",
+    "        # Apply translation to each geometry\n",
+    "        alaska_gdf['geometry'] = alaska_gdf['geometry'].apply(\n",
+    "            lambda geom: translate(geom, xoff=delta_x, yoff=delta_y)\n",
+    "        )\n",
+    "    else:\n",
+    "        print(f\"Alaska data is not available for {year}.\")\n",
+    "\n",
+    "    # Hawaii: Scale and translate\n",
+    "    if not hawaii_gdf.empty:\n",
+    "        hawaii_gdf = hawaii_gdf.copy()\n",
+    "        hawaii_centroid = hawaii_gdf.geometry.unary_union.centroid\n",
+    "\n",
+    "        # Apply scaling\n",
+    "        hawaii_gdf['geometry'] = hawaii_gdf['geometry'].apply(\n",
+    "            lambda geom: scale(geom, xfact=1.5, yfact=1.5, origin=hawaii_centroid)\n",
+    "        )\n",
+    "\n",
+    "        # Desired location for Hawaii after translation\n",
+    "        hawaii_desired_location = (-105, 22)  # Adjust as needed\n",
+    "        delta_x = hawaii_desired_location[0] - hawaii_centroid.x\n",
+    "        delta_y = hawaii_desired_location[1] - hawaii_centroid.y\n",
+    "\n",
+    "        # Apply translation\n",
+    "        hawaii_gdf['geometry'] = hawaii_gdf['geometry'].apply(\n",
+    "            lambda geom: translate(geom, xoff=delta_x, yoff=delta_y)\n",
+    "        )\n",
+    "    else:\n",
+    "        print(f\"Hawaii data is not available for {year}.\")\n",
+    "\n",
+    "    # Puerto Rico: Scale and translate\n",
+    "    if not puerto_rico_gdf.empty:\n",
+    "        puerto_rico_gdf = puerto_rico_gdf.copy()\n",
+    "        pr_centroid = puerto_rico_gdf.geometry.unary_union.centroid\n",
+    "\n",
+    "        # Apply scaling\n",
+    "        puerto_rico_gdf['geometry'] = puerto_rico_gdf['geometry'].apply(\n",
+    "            lambda geom: scale(geom, xfact=1.5, yfact=1.5, origin=pr_centroid)\n",
+    "        )\n",
+    "\n",
+    "        # Desired location for Puerto Rico after translation\n",
+    "        pr_desired_location = (-75, 22)  # Adjust as needed\n",
+    "        delta_x = pr_desired_location[0] - pr_centroid.x\n",
+    "        delta_y = pr_desired_location[1] - pr_centroid.y\n",
+    "\n",
+    "        # Apply translation\n",
+    "        puerto_rico_gdf['geometry'] = puerto_rico_gdf['geometry'].apply(\n",
+    "            lambda geom: translate(geom, xoff=delta_x, yoff=delta_y)\n",
+    "        )\n",
+    "    else:\n",
+    "        print(f\"Puerto Rico data is not available for {year}.\")\n",
+    "\n",
+    "    # Combine all regions back into a single GeoDataFrame\n",
+    "    combined_gdf = pd.concat(\n",
+    "        [mainland_gdf, alaska_gdf, hawaii_gdf, puerto_rico_gdf], ignore_index=True\n",
+    "    )\n",
+    "\n",
+    "    # Remove any rows with missing kg_per_mwh\n",
+    "    combined_gdf = combined_gdf.dropna(subset=['kg_per_mwh'])\n",
+    "\n",
+    "    return combined_gdf\n",
+    "\n",
+    "# Process data for selected years and store GeoDataFrames\n",
+    "years_to_process = [2014, 2019, 2021, 2022]\n",
+    "processed_data = {}\n",
+    "for year in years_to_process:\n",
+    "    if year in egrid_urls:\n",
+    "        gdf = process_year(year, egrid_urls[year])\n",
+    "        if gdf is not None:\n",
+    "            processed_data[year] = gdf\n",
+    "    else:\n",
+    "        print(f\"No data available for year {year}.\")\n",
+    "\n",
+    "# Now plot subplots\n",
+    "num_years = len(processed_data)\n",
+    "fig, axes = plt.subplots(2, 2, figsize=(16, 12))\n",
+    "axes = axes.flatten()\n",
+    "\n",
+    "# Define common vmax and vmin for consistent color scaling\n",
+    "vmin = min(gdf['kg_per_mwh'].min() for gdf in processed_data.values())\n",
+    "vmax = max(gdf['kg_per_mwh'].max() for gdf in processed_data.values())\n",
+    "\n",
+    "for ax, (year, combined_gdf) in zip(axes, sorted(processed_data.items())):\n",
+    "    combined_gdf.plot(\n",
+    "        column='kg_per_mwh',\n",
+    "        cmap='YlGnBu',\n",
+    "        linewidth=0.8,\n",
+    "        ax=ax,\n",
+    "        edgecolor='0.8',\n",
+    "        vmax=vmax,\n",
+    "        vmin=vmin\n",
+    "    )\n",
+    "\n",
+    "    # Add labels for each subregion\n",
+    "    combined_gdf['coords'] = combined_gdf['geometry'].apply(lambda x: x.representative_point().coords[0])\n",
+    "    for idx, row in combined_gdf.iterrows():\n",
+    "        x, y = row['coords']\n",
+    "        # Manually adjust positions for overlapping labels\n",
+    "        if row['Name'] == 'NYLI':\n",
+    "            x_offset = 0.9\n",
+    "            y_offset = -0.9\n",
+    "        elif row['Name'] == 'NYCW':\n",
+    "            x_offset = 0.5\n",
+    "            y_offset = 1\n",
+    "        elif row['Name'] == 'PRMS':\n",
+    "            x_offset = 0\n",
+    "            y_offset = 0.7\n",
+    "        else:\n",
+    "            x_offset = 0\n",
+    "            y_offset = 0\n",
+    "        ax.annotate(\n",
+    "            text=row['Name'],\n",
+    "            xy=(x + x_offset, y + y_offset),\n",
+    "            horizontalalignment='center',\n",
+    "            fontsize=8,\n",
+    "            color='white',\n",
+    "            path_effects=[pe.withStroke(linewidth=2, foreground=\"black\")]\n",
+    "        )\n",
+    "\n",
+    "    # Adjust plot limits to include the new regions\n",
+    "    ax.set_xlim(-130, -65)\n",
+    "    ax.set_ylim(20, 50)\n",
+    "\n",
+    "    # Simplify the title to display only the year\n",
+    "    ax.set_title(f\"{year}\", fontsize=15)\n",
+    "\n",
+    "    ax.axis('off')\n",
+    "\n",
+    "# Adjust layout and add a colorbar\n",
+    "plt.tight_layout()\n",
+    "fig.subplots_adjust(right=0.85, hspace=0.05)  # Set hspace close to zero for minimal vertical spacing\n",
+    "\n",
+    "# Additional adjustment to overlap rows if needed\n",
+    "# Manually adjust each axis position to achieve a slight negative spacing effect\n",
+    "for i, ax in enumerate(axes):\n",
+    "    pos = ax.get_position()\n",
+    "    if i >= 2:  # Only move the bottom row up\n",
+    "        ax.set_position([pos.x0, pos.y0 + 0.12, pos.width, pos.height])  # Adjust as needed\n",
+    "\n",
+    "cbar_ax = fig.add_axes([0.88, 0.24, 0.03, 0.7])  # Adjust as needed\n",
+    "\n",
+    "sm = plt.cm.ScalarMappable(cmap='YlGnBu', norm=plt.Normalize(vmin=vmin, vmax=vmax))\n",
+    "sm._A = []  # Empty array for the scalar mappable\n",
+    "cbar = fig.colorbar(sm, cax=cbar_ax)\n",
+    "cbar.set_label('kg CO₂e/MWh', fontsize=12)\n",
+    "\n",
+    "plt.savefig(\"egrid_carbon_intensity_subplots.png\", dpi=300, bbox_inches='tight')\n",
+    "plt.savefig(\"egrid_carbon_intensity_subplots.pdf\", dpi=150, bbox_inches='tight')\n",
+    "\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Delta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import requests\n",
+    "from zipfile import ZipFile\n",
+    "import pandas as pd\n",
+    "import geopandas as gpd\n",
+    "import matplotlib.pyplot as plt\n",
+    "from shapely.affinity import scale, translate\n",
+    "import matplotlib.patheffects as pe\n",
+    "import warnings\n",
+    "\n",
+    "# Suppress warnings for cleaner output\n",
+    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "# Directory to store downloaded data\n",
+    "local_data_dir = \"./data/\"\n",
+    "os.makedirs(local_data_dir, exist_ok=True)\n",
+    "\n",
+    "# eGRID Data URLs\n",
+    "egrid_urls = {\n",
+    "    2014: {\n",
+    "        \"data_zip\": \"https://www.epa.gov/system/files/other-files/2023-01/egrid_historical_files_1996-2016.zip\",\n",
+    "        \"shapefile_zip\": \"https://www.epa.gov/sites/default/files/2017-01/egrid_subregions.zip\",\n",
+    "    },\n",
+    "    2020: {\n",
+    "        \"egrid_xlsx\": 'https://www.epa.gov/system/files/documents/2022-09/eGRID2020_Data_v2.xlsx',\n",
+    "        \"subregions_kmz\": 'https://www.epa.gov/system/files/other-files/2022-01/egrid2020_subregions.kmz',\n",
+    "    },\n",
+    "    2022: {\n",
+    "        \"egrid_xlsx\": 'https://www.epa.gov/system/files/documents/2023-01/egrid2022_data.xlsx',\n",
+    "        \"subregions_kmz\": 'https://www.epa.gov/system/files/other-files/2023-01/egrid2022_subregions.kmz',\n",
+    "    },\n",
+    "}\n",
+    "\n",
+    "# Conversion factor from pounds to kilograms\n",
+    "LBS_PER_KG = 0.45359237\n",
+    "\n",
+    "# Function to download files\n",
+    "def download_file(url, local_path):\n",
+    "    if not os.path.exists(local_path):\n",
+    "        print(f\"Downloading from {url}...\")\n",
+    "        response = requests.get(url, verify=False)  # Added verify=False to handle SSL issues\n",
+    "        if response.status_code == 200:\n",
+    "            with open(local_path, 'wb') as f:\n",
+    "                f.write(response.content)\n",
+    "            print(f\"Downloaded to {local_path}.\")\n",
+    "        else:\n",
+    "            print(f\"Failed to download {url}. Status code: {response.status_code}\")\n",
+    "    else:\n",
+    "        print(f\"{local_path} already downloaded.\")\n",
+    "\n",
+    "# Function to extract ZIP files\n",
+    "def extract_zip(zip_path, extract_to):\n",
+    "    if not os.path.exists(extract_to):\n",
+    "        os.makedirs(extract_to, exist_ok=True)\n",
+    "        with ZipFile(zip_path, 'r') as zip_ref:\n",
+    "            zip_ref.extractall(extract_to)\n",
+    "        print(f\"Extracted {zip_path} to {extract_to}.\")\n",
+    "    else:\n",
+    "        print(f\"{extract_to} already exists. Extraction skipped.\")\n",
+    "\n",
+    "# Function to process data for 2014\n",
+    "def process_2014_data(urls):\n",
+    "    print(\"Processing 2014 data with specialized logic...\")\n",
+    "\n",
+    "    # URLs for the 2014 shapefile and data file\n",
+    "    data_file_url = urls[\"data_zip\"]\n",
+    "    shapefile_url = urls[\"shapefile_zip\"]\n",
+    "\n",
+    "    # Paths for downloaded ZIP files\n",
+    "    data_file_zip_path = os.path.join(local_data_dir, \"egrid_historical_files_1996-2016.zip\")\n",
+    "    shapefile_zip_path = os.path.join(local_data_dir, \"egrid_subregions_2014.zip\")\n",
+    "\n",
+    "    # Directory paths for extracted files\n",
+    "    data_file_dir = os.path.join(local_data_dir, \"egrid_historical_files_1996-2016\")\n",
+    "    shapefile_dir = os.path.join(local_data_dir, \"egrid_subregions_2014\")\n",
+    "\n",
+    "    # Download and extract data files\n",
+    "    download_file(data_file_url, data_file_zip_path)\n",
+    "    extract_zip(data_file_zip_path, data_file_dir)\n",
+    "\n",
+    "    # Download and extract shapefile\n",
+    "    download_file(shapefile_url, shapefile_zip_path)\n",
+    "    extract_zip(shapefile_zip_path, shapefile_dir)\n",
+    "\n",
+    "    # Adjust data_file_dir to account for nested directories\n",
+    "    nested_data_dir = os.path.join(data_file_dir, \"egrid_historical_files_1996-2016\")\n",
+    "    if os.path.exists(nested_data_dir):\n",
+    "        data_file_dir = nested_data_dir\n",
+    "\n",
+    "    # Path to the 2014 data file\n",
+    "    data_file_path = os.path.join(data_file_dir, \"eGRID2014_Data_v2.xlsx\")\n",
+    "\n",
+    "    # Verify that the data file exists\n",
+    "    if not os.path.exists(data_file_path):\n",
+    "        print(f\"Data file {data_file_path} not found.\")\n",
+    "        return None\n",
+    "\n",
+    "    # Load carbon intensity data from the Excel file\n",
+    "    try:\n",
+    "        # Read the specific worksheet and columns\n",
+    "        intensity_df = pd.read_excel(data_file_path, sheet_name=\"SRL14\", usecols=[\"SUBRGN\", \"SRC2ERTA\"], skiprows=1)\n",
+    "        intensity_df[\"SRC2ERTA\"] = pd.to_numeric(intensity_df[\"SRC2ERTA\"], errors=\"coerce\")\n",
+    "        intensity_df = intensity_df.dropna(subset=[\"SRC2ERTA\"])\n",
+    "\n",
+    "        # Convert lbs to kg\n",
+    "        intensity_df[\"kg_per_mwh\"] = intensity_df[\"SRC2ERTA\"] * LBS_PER_KG\n",
+    "\n",
+    "        # Keep only the required columns\n",
+    "        intensity_df = intensity_df[['SUBRGN', 'kg_per_mwh']]\n",
+    "\n",
+    "        # Remove Puerto Rico (not available in 2014)\n",
+    "        intensity_df = intensity_df[~intensity_df['SUBRGN'].str.contains('PR', na=False)]\n",
+    "\n",
+    "        print(\"Sample of carbon intensity data for 2014:\")\n",
+    "        print(intensity_df.head())\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(f\"Failed to read eGRID data for 2014: {e}\")\n",
+    "        return None\n",
+    "\n",
+    "    return intensity_df\n",
+    "\n",
+    "# Function to process data for a given year (2020 or 2022)\n",
+    "def process_year_data(year, urls, include_puerto_rico=True):\n",
+    "    print(f\"Processing {year} data...\")\n",
+    "\n",
+    "    # Paths for Excel and subregions files\n",
+    "    egrid_xlsx_url = urls['egrid_xlsx']\n",
+    "    egrid_xlsx_path = os.path.join(local_data_dir, f\"egrid{year}_data.xlsx\")\n",
+    "\n",
+    "    subregions_url = urls['subregions_kmz']\n",
+    "    subregions_path = os.path.join(local_data_dir, f\"egrid{year}_subregions.kmz\")\n",
+    "\n",
+    "    # Download files\n",
+    "    download_file(egrid_xlsx_url, egrid_xlsx_path)\n",
+    "    download_file(subregions_url, subregions_path)\n",
+    "\n",
+    "    # Load carbon intensity data\n",
+    "    try:\n",
+    "        sheet_name = f'SRL{str(year)[-2:]}'\n",
+    "        egrid_intensity_df = pd.read_excel(egrid_xlsx_path, sheet_name=sheet_name)\n",
+    "\n",
+    "        # Use the relevant column for CO₂ intensity\n",
+    "        co2_column_name = [col for col in egrid_intensity_df.columns if 'CO2 total output emission rate (lb/MWh)' in col]\n",
+    "        if co2_column_name:\n",
+    "            co2_column_name = co2_column_name[0]\n",
+    "        else:\n",
+    "            raise ValueError(\"CO₂ emission rate column not found.\")\n",
+    "\n",
+    "        # Extract subregion acronym and carbon intensity\n",
+    "        region_intensity = egrid_intensity_df[['eGRID subregion acronym', co2_column_name]].copy()\n",
+    "        region_intensity.columns = ['SUBRGN', 'SRC2ERTA']\n",
+    "\n",
+    "        # Convert to numeric and handle NaNs\n",
+    "        region_intensity['SRC2ERTA'] = pd.to_numeric(region_intensity['SRC2ERTA'], errors='coerce')\n",
+    "        region_intensity = region_intensity.dropna(subset=['SRC2ERTA'])\n",
+    "\n",
+    "        # Convert lbs to kg\n",
+    "        region_intensity['kg_per_mwh'] = region_intensity['SRC2ERTA'] * LBS_PER_KG\n",
+    "        region_intensity = region_intensity[['SUBRGN', 'kg_per_mwh']]\n",
+    "        region_intensity['SUBRGN'] = region_intensity['SUBRGN'].astype(str)\n",
+    "\n",
+    "        # Optionally remove Puerto Rico\n",
+    "        if not include_puerto_rico:\n",
+    "            region_intensity = region_intensity[~region_intensity['SUBRGN'].str.contains('PR', na=False)]\n",
+    "\n",
+    "        print(f\"Sample of carbon intensity data for {year}:\")\n",
+    "        print(region_intensity.head())\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(f\"Failed to read eGRID data for {year}: {e}\")\n",
+    "        return None, None\n",
+    "\n",
+    "    # Load subregions into GeoDataFrame\n",
+    "    try:\n",
+    "        # Extract the KML file from the KMZ\n",
+    "        with ZipFile(subregions_path, 'r') as kmz:\n",
+    "            kml_filename = [name for name in kmz.namelist() if name.endswith('.kml')][0]\n",
+    "            kmz.extract(kml_filename, local_data_dir)\n",
+    "            kml_path = os.path.join(local_data_dir, kml_filename)\n",
+    "            gdf = gpd.read_file(kml_path, driver='KML')\n",
+    "            os.remove(kml_path)\n",
+    "\n",
+    "        # Check for 'Name' column\n",
+    "        if 'Name' not in gdf.columns:\n",
+    "            raise ValueError(\"Subregion name column not found in geospatial data.\")\n",
+    "\n",
+    "        # Optionally remove Puerto Rico from the geodataframe\n",
+    "        if not include_puerto_rico:\n",
+    "            gdf = gdf[~gdf['Name'].str.contains('PR', na=False)]\n",
+    "\n",
+    "        # Ensure geometries are in WGS84 CRS (EPSG:4326)\n",
+    "        if gdf.crs != 'EPSG:4326':\n",
+    "            print(f\"Reprojecting geometries to WGS84 for {year}.\")\n",
+    "            gdf = gdf.to_crs(epsg=4326)\n",
+    "\n",
+    "        print(f\"Subregions in {year} geodataframe: {gdf['Name'].unique()}\")\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(f\"Failed to read subregions for {year}: {e}\")\n",
+    "        return None, None\n",
+    "\n",
+    "    return region_intensity, gdf\n",
+    "\n",
+    "# Adjust regions (Alaska, Hawaii, and Puerto Rico)\n",
+    "def adjust_regions_delta(gdf):\n",
+    "    # Define subregions for Alaska, Hawaii, and Puerto Rico\n",
+    "    alaska_subregions = ['AKGD', 'AKMS']\n",
+    "    hawaii_subregions = ['HIMS', 'HIOA', 'HIMS/HIOA']\n",
+    "    puerto_rico_subregions = ['PR', 'PRMS', 'PRMSA']\n",
+    "\n",
+    "    # Separate the regions\n",
+    "    mainland_gdf = gdf[~gdf['Name'].isin(alaska_subregions + hawaii_subregions + puerto_rico_subregions)]\n",
+    "    alaska_gdf = gdf[gdf['Name'].isin(alaska_subregions)]\n",
+    "    hawaii_gdf = gdf[gdf['Name'].isin(hawaii_subregions)]\n",
+    "    puerto_rico_gdf = gdf[gdf['Name'].isin(puerto_rico_subregions)]\n",
+    "\n",
+    "    # Alaska: Scale and translate\n",
+    "    if not alaska_gdf.empty:\n",
+    "        alaska_gdf = alaska_gdf.copy()\n",
+    "\n",
+    "        # Compute the centroid of Alaska (combined geometries)\n",
+    "        alaska_centroid = alaska_gdf.geometry.unary_union.centroid\n",
+    "\n",
+    "        # Desired location for Alaska after translation\n",
+    "        alaska_desired_location = (-120, 27)  # Adjust as needed\n",
+    "\n",
+    "        # Apply scaling with the common origin (alaska_centroid)\n",
+    "        alaska_gdf['geometry'] = alaska_gdf['geometry'].apply(\n",
+    "            lambda geom: scale(geom, xfact=0.41, yfact=0.5, origin=alaska_centroid)\n",
+    "        )\n",
+    "\n",
+    "        # After scaling, compute the new centroid\n",
+    "        scaled_alaska_centroid = alaska_gdf.geometry.unary_union.centroid\n",
+    "\n",
+    "        # Compute translation amounts based on the desired location\n",
+    "        delta_x = alaska_desired_location[0] - scaled_alaska_centroid.x\n",
+    "        delta_y = alaska_desired_location[1] - scaled_alaska_centroid.y\n",
+    "\n",
+    "        # Apply translation to each geometry\n",
+    "        alaska_gdf['geometry'] = alaska_gdf['geometry'].apply(\n",
+    "            lambda geom: translate(geom, xoff=delta_x, yoff=delta_y)\n",
+    "        )\n",
+    "    else:\n",
+    "        print(\"Alaska data is not available.\")\n",
+    "\n",
+    "    # Hawaii: Scale and translate\n",
+    "    if not hawaii_gdf.empty:\n",
+    "        hawaii_gdf = hawaii_gdf.copy()\n",
+    "        hawaii_centroid = hawaii_gdf.geometry.unary_union.centroid\n",
+    "\n",
+    "        # Apply scaling\n",
+    "        hawaii_gdf['geometry'] = hawaii_gdf['geometry'].apply(\n",
+    "            lambda geom: scale(geom, xfact=1.5, yfact=1.5, origin=hawaii_centroid)\n",
+    "        )\n",
+    "\n",
+    "        # Desired location for Hawaii after translation\n",
+    "        hawaii_desired_location = (-105, 22)  # Adjust as needed\n",
+    "        delta_x = hawaii_desired_location[0] - hawaii_centroid.x\n",
+    "        delta_y = hawaii_desired_location[1] - hawaii_centroid.y\n",
+    "\n",
+    "        # Apply translation\n",
+    "        hawaii_gdf['geometry'] = hawaii_gdf['geometry'].apply(\n",
+    "            lambda geom: translate(geom, xoff=delta_x, yoff=delta_y)\n",
+    "        )\n",
+    "    else:\n",
+    "        print(\"Hawaii data is not available.\")\n",
+    "\n",
+    "    # Puerto Rico: Scale and translate\n",
+    "    if not puerto_rico_gdf.empty:\n",
+    "        puerto_rico_gdf = puerto_rico_gdf.copy()\n",
+    "        pr_centroid = puerto_rico_gdf.geometry.unary_union.centroid\n",
+    "\n",
+    "        # Apply scaling\n",
+    "        puerto_rico_gdf['geometry'] = puerto_rico_gdf['geometry'].apply(\n",
+    "            lambda geom: scale(geom, xfact=2.5, yfact=2.5, origin=pr_centroid)\n",
+    "        )\n",
+    "\n",
+    "        # Desired location for Puerto Rico after translation\n",
+    "        pr_desired_location = (-75, 20)  # Adjust as needed\n",
+    "        delta_x = pr_desired_location[0] - pr_centroid.x\n",
+    "        delta_y = pr_desired_location[1] - pr_centroid.y\n",
+    "\n",
+    "        # Apply translation\n",
+    "        puerto_rico_gdf['geometry'] = puerto_rico_gdf['geometry'].apply(\n",
+    "            lambda geom: translate(geom, xoff=delta_x, yoff=delta_y)\n",
+    "        )\n",
+    "    else:\n",
+    "        print(\"Puerto Rico data is not available.\")\n",
+    "\n",
+    "    # Combine all regions back into a single GeoDataFrame\n",
+    "    combined_gdf = pd.concat(\n",
+    "        [mainland_gdf, alaska_gdf, hawaii_gdf, puerto_rico_gdf], ignore_index=True\n",
+    "    )\n",
+    "\n",
+    "    return combined_gdf\n",
+    "\n",
+    "# Process data for 2014\n",
+    "intensity_df_2014 = process_2014_data(egrid_urls[2014])\n",
+    "if intensity_df_2014 is None:\n",
+    "    print(\"Failed to process 2014 data.\")\n",
+    "    exit()\n",
+    "\n",
+    "# Process data for 2020, excluding Puerto Rico for 2014-2020 delta\n",
+    "region_intensity_2020_no_pr, gdf_2020_no_pr = process_year_data(2020, egrid_urls[2020], include_puerto_rico=False)\n",
+    "if region_intensity_2020_no_pr is None or gdf_2020_no_pr is None:\n",
+    "    print(\"Failed to process 2020 data.\")\n",
+    "    exit()\n",
+    "\n",
+    "# Merge 2014 and 2020 data on 'SUBRGN'\n",
+    "merged_intensity_2014_2020 = pd.merge(intensity_df_2014, region_intensity_2020_no_pr, on='SUBRGN', suffixes=('_2014', '_2020'))\n",
+    "\n",
+    "# Compute the delta\n",
+    "merged_intensity_2014_2020['delta_kg_per_mwh'] = merged_intensity_2014_2020['kg_per_mwh_2020'] - merged_intensity_2014_2020['kg_per_mwh_2014']\n",
+    "\n",
+    "print(\"Merged intensity data with delta from 2014 to 2020:\")\n",
+    "print(merged_intensity_2014_2020.head())\n",
+    "\n",
+    "# Merge with the 2020 geodataframe\n",
+    "gdf_delta_2014_2020 = gdf_2020_no_pr.merge(merged_intensity_2014_2020[['SUBRGN', 'delta_kg_per_mwh']], left_on='Name', right_on='SUBRGN', how='left')\n",
+    "\n",
+    "# Adjust regions\n",
+    "gdf_delta_2014_2020 = adjust_regions_delta(gdf_delta_2014_2020)\n",
+    "\n",
+    "gdf_delta_2014_2020['geometry'] = gdf_delta_2014_2020['geometry'].simplify(tolerance=0.1)\n",
+    "\n",
+    "\n",
+    "# Plot delta from 2014 to 2020\n",
+    "fig, ax = plt.subplots(figsize=(12, 8))\n",
+    "\n",
+    "# Define diverging colormap\n",
+    "cmap = plt.cm.RdBu_r\n",
+    "\n",
+    "# Define vmax and vmin for color scaling\n",
+    "vmax = gdf_delta_2014_2020['delta_kg_per_mwh'].abs().max()\n",
+    "vmin = -vmax\n",
+    "\n",
+    "gdf_delta_2014_2020.plot(\n",
+    "    column='delta_kg_per_mwh',\n",
+    "    cmap=cmap,\n",
+    "    linewidth=0.8,\n",
+    "    ax=ax,\n",
+    "    edgecolor='0.8',\n",
+    "    vmax=vmax,\n",
+    "    vmin=vmin\n",
+    ")\n",
+    "\n",
+    "# Define a function to calculate brightness based on RGB values\n",
+    "def is_dark_color(value, cmap, vmin, vmax):\n",
+    "    \"\"\"Returns True if the color is dark, False otherwise.\"\"\"\n",
+    "    norm_value = (value - vmin) / (vmax - vmin)  # Normalize value to be between 0 and 1\n",
+    "    rgb = cmap(norm_value)[:3]  # Get RGB from colormap\n",
+    "    brightness = (0.299 * rgb[0] + 0.587 * rgb[1] + 0.114 * rgb[2])  # Perceived brightness formula\n",
+    "    return brightness < 0.5  # Threshold to determine if color is \"dark\"\n",
+    "\n",
+    "# Add labels for each subregion\n",
+    "gdf_delta_2014_2020['coords'] = gdf_delta_2014_2020['geometry'].apply(lambda x: x.representative_point().coords[0])\n",
+    "for idx, row in gdf_delta_2014_2020.iterrows():\n",
+    "    x, y = row['coords']\n",
+    "    # Manually adjust positions for overlapping labels\n",
+    "    if row['Name'] == 'NYLI':\n",
+    "        x_offset = 0.9\n",
+    "        y_offset = -0.9\n",
+    "    elif row['Name'] == 'NYCW':\n",
+    "        x_offset = 0.5\n",
+    "        y_offset = 1\n",
+    "    elif row['Name'] == 'PRMS':\n",
+    "        x_offset = 0\n",
+    "        y_offset = 0.4\n",
+    "    else:\n",
+    "        x_offset = 0\n",
+    "        y_offset = 0\n",
+    "\n",
+    "    # Determine color based on brightness\n",
+    "    color = 'white' if is_dark_color(row['delta_kg_per_mwh'], cmap, vmin, vmax) else 'black'\n",
+    "    \n",
+    "    ax.annotate(\n",
+    "        text=row['Name'],\n",
+    "        xy=(x + x_offset, y + y_offset),\n",
+    "        horizontalalignment='center',\n",
+    "        fontsize=11,\n",
+    "        color=color,\n",
+    "        # path_effects=[pe.withStroke(linewidth=1.5, foreground=\"black\")]\n",
+    "    )\n",
+    "\n",
+    "# Customize plot appearance\n",
+    "ax.set_xlim(-130, -65)\n",
+    "ax.set_ylim(17, 50)\n",
+    "\n",
+    "# ax.set_title(\"Change in eGRID Subregion CO₂ Intensity from 2014 to 2020 (kg/MWh)\", fontsize=15)\n",
+    "ax.axis('off')\n",
+    "\n",
+    "cbar_ax = fig.add_axes([0.92, 0.3, 0.02, 0.4]) \n",
+    "\n",
+    "# Create the colorbar using the custom axis\n",
+    "sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax))\n",
+    "sm._A = []\n",
+    "cbar = fig.colorbar(sm, cax=cbar_ax, pad=0.1)\n",
+    "cbar.set_label('Change in kg CO₂e/MWh', fontsize=12)\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.savefig(\"egrid_delta_2014_2020.pdf\", dpi=150)\n",
+    "plt.savefig(\"egrid_delta_2014_2020.png\", dpi=300, transparent=True)\n",
+    "plt.show()\n",
+    "\n",
+    "# Now process data for 2020 and 2022, including Puerto Rico\n",
+    "region_intensity_2020_with_pr, gdf_2020_with_pr = process_year_data(2020, egrid_urls[2020], include_puerto_rico=True)\n",
+    "region_intensity_2022, gdf_2022 = process_year_data(2022, egrid_urls[2022], include_puerto_rico=True)\n",
+    "\n",
+    "if region_intensity_2020_with_pr is None or gdf_2020_with_pr is None or region_intensity_2022 is None or gdf_2022 is None:\n",
+    "    print(\"Failed to process 2020 or 2022 data.\")\n",
+    "    exit()\n",
+    "\n",
+    "# Merge 2020 and 2022 data on 'SUBRGN'\n",
+    "merged_intensity_2020_2022 = pd.merge(region_intensity_2020_with_pr, region_intensity_2022, on='SUBRGN', suffixes=('_2020', '_2022'))\n",
+    "\n",
+    "# Compute the delta\n",
+    "merged_intensity_2020_2022['delta_kg_per_mwh'] = merged_intensity_2020_2022['kg_per_mwh_2022'] - merged_intensity_2020_2022['kg_per_mwh_2020']\n",
+    "\n",
+    "print(\"Merged intensity data with delta from 2020 to 2022:\")\n",
+    "print(merged_intensity_2020_2022.head())\n",
+    "\n",
+    "# Merge with the 2022 geodataframe\n",
+    "gdf_delta_2020_2022 = gdf_2022.merge(merged_intensity_2020_2022[['SUBRGN', 'delta_kg_per_mwh']], left_on='Name', right_on='SUBRGN', how='left')\n",
+    "\n",
+    "# Adjust regions\n",
+    "gdf_delta_2020_2022 = adjust_regions_delta(gdf_delta_2020_2022)\n",
+    "\n",
+    "gdf_delta_2020_2022['geometry'] = gdf_delta_2020_2022['geometry'].simplify(tolerance=0.1)\n",
+    "\n",
+    "\n",
+    "# Plot delta from 2020 to 2022\n",
+    "fig, ax = plt.subplots(figsize=(12, 8))\n",
+    "\n",
+    "# Define diverging colormap\n",
+    "cmap = plt.cm.RdBu_r\n",
+    "\n",
+    "# Define vmax and vmin for color scaling\n",
+    "# vmax = gdf_delta_2020_2022['delta_kg_per_mwh'].abs().max()\n",
+    "vmax = 75\n",
+    "vmin = -75\n",
+    "# vmin = -vmax\n",
+    "\n",
+    "gdf_delta_2020_2022.plot(\n",
+    "    column='delta_kg_per_mwh',\n",
+    "    cmap=cmap,\n",
+    "    linewidth=0.8,\n",
+    "    ax=ax,\n",
+    "    edgecolor='0.8',\n",
+    "    vmax=vmax,\n",
+    "    vmin=vmin\n",
+    ")\n",
+    "\n",
+    "# Add labels for each subregion\n",
+    "gdf_delta_2020_2022['coords'] = gdf_delta_2020_2022['geometry'].apply(lambda x: x.representative_point().coords[0])\n",
+    "for idx, row in gdf_delta_2020_2022.iterrows():\n",
+    "    x, y = row['coords']\n",
+    "    # Manually adjust positions for overlapping labels\n",
+    "    if row['Name'] == 'NYLI':\n",
+    "        x_offset = 0.9\n",
+    "        y_offset = -0.9\n",
+    "    elif row['Name'] == 'NYCW':\n",
+    "        x_offset = 0.5\n",
+    "        y_offset = 1\n",
+    "    elif row['Name'] == 'PRMS':\n",
+    "        x_offset = 0\n",
+    "        y_offset = 0.4\n",
+    "    else:\n",
+    "        x_offset = 0\n",
+    "        y_offset = 0\n",
+    "\n",
+    "        # Determine color based on brightness\n",
+    "    color = 'white' if is_dark_color(row['delta_kg_per_mwh'], cmap, vmin, vmax) else 'black'\n",
+    "\n",
+    "    if row['Name'] == 'NYCW':\n",
+    "        color='black'\n",
+    "    ax.annotate(\n",
+    "        text=row['Name'],\n",
+    "        xy=(x + x_offset, y + y_offset),\n",
+    "        horizontalalignment='center',\n",
+    "        fontsize=13,\n",
+    "        color=color,\n",
+    "        # path_effects=[pe.withStroke(linewidth=1.5, foreground=\"black\")]\n",
+    "    )\n",
+    "\n",
+    "# Customize plot appearance\n",
+    "ax.set_xlim(-130, -65)\n",
+    "ax.set_ylim(17, 50)\n",
+    "\n",
+    "# ax.set_title(\"Change in eGRID Subregion CO₂ Intensity from 2020 to 2022 (kg/MWh)\", fontsize=15)\n",
+    "ax.axis('off')\n",
+    "\n",
+    "cbar_ax = fig.add_axes([0.92, 0.3, 0.02, 0.4]) \n",
+    "\n",
+    "# Create the colorbar using the custom axis\n",
+    "sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax))\n",
+    "sm._A = []\n",
+    "cbar = fig.colorbar(sm, cax=cbar_ax, pad=0.1)\n",
+    "cbar.set_label('Change in kg CO₂e/MWh', fontsize=12)\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.savefig(\"egrid_delta_2020_2022.pdf\", dpi=150)\n",
+    "plt.savefig(\"egrid_delta_2020_2022.png\", dpi=300, transparent=True)\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ENV3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/scripts/efficiency.ipynb b/scripts/efficiency.ipynb
new file mode 100644
index 0000000..bbb3776
--- /dev/null
+++ b/scripts/efficiency.ipynb
@@ -0,0 +1,367 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import requests\n",
+    "import zipfile\n",
+    "import geopandas as gpd\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import json\n",
+    "import matplotlib.colors as mcolors\n",
+    "from shapely.geometry import Polygon\n",
+    "import matplotlib.patches as mpatches\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Directory to store shapefiles\n",
+    "shapefile_dir = \"shapefile\"\n",
+    "os.makedirs(shapefile_dir, exist_ok=True)\n",
+    "\n",
+    "# URLs for individual files in the GitHub repository and UACE Census TIGER data\n",
+    "natural_earth_files = {\n",
+    "    \"ne_10m_admin_0_countries\": [\n",
+    "        \"https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/10m_cultural/ne_10m_admin_0_countries.shp\",\n",
+    "        \"https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/10m_cultural/ne_10m_admin_0_countries.shx\",\n",
+    "        \"https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/10m_cultural/ne_10m_admin_0_countries.dbf\",\n",
+    "        \"https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/10m_cultural/ne_10m_admin_0_countries.prj\",\n",
+    "    ],\n",
+    "    \"ne_10m_admin_1_states_provinces\": [\n",
+    "        \"https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/10m_cultural/ne_10m_admin_1_states_provinces.shp\",\n",
+    "        \"https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/10m_cultural/ne_10m_admin_1_states_provinces.shx\",\n",
+    "        \"https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/10m_cultural/ne_10m_admin_1_states_provinces.dbf\",\n",
+    "        \"https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/10m_cultural/ne_10m_admin_1_states_provinces.prj\",\n",
+    "    ]\n",
+    "}\n",
+    "\n",
+    "uace_url = \"https://www2.census.gov/geo/tiger/TIGER2024/UAC20/tl_2024_us_uac20.zip\"\n",
+    "uace_zip_path = os.path.join(shapefile_dir, \"tl_2024_us_uac20.zip\")\n",
+    "\n",
+    "# Function to download each file\n",
+    "def download_files(file_urls):\n",
+    "    for url in file_urls:\n",
+    "        filename = os.path.join(shapefile_dir, url.split('/')[-1])\n",
+    "        if not os.path.exists(filename):\n",
+    "            print(f\"Downloading {filename}...\")\n",
+    "            response = requests.get(url)\n",
+    "            response.raise_for_status()\n",
+    "            with open(filename, 'wb') as f:\n",
+    "                f.write(response.content)\n",
+    "            print(f\"Downloaded {filename}.\")\n",
+    "        else:\n",
+    "            print(f\"{filename} already exists.\")\n",
+    "\n",
+    "# Function to download and extract the UACE ZIP file\n",
+    "def download_and_extract_uace(url, zip_path):\n",
+    "    if not os.path.exists(zip_path):\n",
+    "        print(f\"Downloading UACE data from {url}...\")\n",
+    "        response = requests.get(url)\n",
+    "        response.raise_for_status()\n",
+    "        with open(zip_path, 'wb') as f:\n",
+    "            f.write(response.content)\n",
+    "        print(f\"Downloaded {zip_path}.\")\n",
+    "    \n",
+    "    # Extract UACE shapefile\n",
+    "    with zipfile.ZipFile(zip_path, 'r') as zip_ref:\n",
+    "        zip_ref.extractall(shapefile_dir)\n",
+    "    print(f\"Extracted UACE shapefiles to {shapefile_dir}.\")\n",
+    "\n",
+    "# Download Natural Earth files\n",
+    "for file_urls in natural_earth_files.values():\n",
+    "    download_files(file_urls)\n",
+    "\n",
+    "# Download and extract UACE shapefile\n",
+    "download_and_extract_uace(uace_url, uace_zip_path)\n",
+    "\n",
+    "# Load the shapefiles using the original filenames\n",
+    "world = gpd.read_file(os.path.join(shapefile_dir, \"ne_10m_admin_0_countries.shp\"))\n",
+    "states_provinces = gpd.read_file(os.path.join(shapefile_dir, \"ne_10m_admin_1_states_provinces.shp\"))\n",
+    "uace_shapefile = gpd.read_file(os.path.join(shapefile_dir, \"tl_2024_us_uac20.shp\"))\n",
+    "\n",
+    "# Filter for the United States in the world shapefile\n",
+    "us_boundary = world[world['NAME'] == 'United States of America']\n",
+    "\n",
+    "\n",
+    "def process_state_deltas(state_name, year_ranges, mode_type, metric_column):\n",
+    "    # Determine the modes based on mode_type\n",
+    "    if mode_type == \"bus\":\n",
+    "        modes = ['MB', 'RB', 'CB']\n",
+    "        title_suffix = \"Bus Modes\"\n",
+    "    elif mode_type == \"train\":\n",
+    "        modes = ['LR', 'HR', 'YR', 'CR']\n",
+    "        title_suffix = \"Train Modes\"\n",
+    "    else:\n",
+    "        raise ValueError(\"Invalid mode_type. Use 'bus' or 'train'.\")\n",
+    "\n",
+    "    # Define the column title for plot labels based on the metric\n",
+    "    # CHANGE TO WH/KM: If we're dealing with \"All Fuels (Wh/pkm),\" rename to Wh/km\n",
+    "    if metric_column == \"All Fuels (Wh/pkm)\":\n",
+    "        metric_title = \"Fuel Efficiency (Wh/km)\"\n",
+    "    elif metric_column == \"Average Passengers\":\n",
+    "        metric_title = \"Average Passengers\"\n",
+    "    else:\n",
+    "        metric_title = metric_column\n",
+    "\n",
+    "    def load_year_data(year):\n",
+    "        json_file = f\"../src/emcommon/resources/ntd{year}_intensities.json\"\n",
+    "        with open(json_file, 'r') as f:\n",
+    "            data = json.load(f)\n",
+    "        df = pd.DataFrame(data['records'])\n",
+    "\n",
+    "        mode_df = df[df['Mode'].isin(modes)]\n",
+    "\n",
+    "        mode_df[metric_column] = pd.to_numeric(mode_df[metric_column], errors='coerce')\n",
+    "        mode_df['Unlinked Passenger Trips'] = pd.to_numeric(mode_df['Unlinked Passenger Trips'], errors='coerce')\n",
+    "        mode_df['Average Passengers'] = pd.to_numeric(mode_df['Average Passengers'], errors='coerce')\n",
+    "        mode_df = mode_df.dropna(subset=[metric_column, 'Unlinked Passenger Trips', 'UACE Code', 'Average Passengers'])\n",
+    "\n",
+    "        # CHANGE TO WH/KM: Convert Wh/pkm to Wh/km by multiplying by Average Passengers\n",
+    "        if metric_column == \"All Fuels (Wh/pkm)\":\n",
+    "            mode_df[metric_column] = mode_df[metric_column] * mode_df[\"Average Passengers\"]\n",
+    "\n",
+    "        mode_df['UACE Code'] = mode_df['UACE Code'].astype(str).str.zfill(5)\n",
+    "        grouped = mode_df.groupby('UACE Code').apply(\n",
+    "            lambda x: pd.Series({\n",
+    "                f'Weighted {metric_column}': (x[metric_column] * x['Unlinked Passenger Trips']).sum() / x['Unlinked Passenger Trips'].sum(),\n",
+    "            })\n",
+    "        ).reset_index()\n",
+    "\n",
+    "        return grouped\n",
+    "\n",
+    "    # Determine the number of plots based on the number of year ranges\n",
+    "    num_plots = len(year_ranges)\n",
+    "    figsize = (9 * num_plots, 8)  # Adjust the figure size based on the number of plots\n",
+    "\n",
+    "    # Create a figure with the appropriate number of subplots\n",
+    "    fig, axs = plt.subplots(1, num_plots, figsize=figsize, constrained_layout=True)\n",
+    "    if num_plots == 1:\n",
+    "        axs = [axs]  # Ensure axs is always a list for consistency\n",
+    "\n",
+    "    fig.subplots_adjust(top=0.88)  # Add space above subplots\n",
+    "    titley = 0.93\n",
+    "    if state_name == \"North Carolina\":\n",
+    "        titley = 0.76\n",
+    "    elif state_name == \"Massachusetts\":\n",
+    "        titley = 0.82\n",
+    "    elif state_name == \"Colorado\":\n",
+    "        titley = 0.86\n",
+    "    fig.suptitle(f\"{state_name} {mode_type.title()} - {metric_title}\", fontsize=20, y=titley)\n",
+    "\n",
+    "    # Define color normalization and color map based on metric and mode type\n",
+    "    if metric_column == \"All Fuels (Wh/pkm)\":\n",
+    "        norm = mcolors.Normalize(vmin=-800, vmax=800)\n",
+    "    elif metric_column == \"Average Passengers\":\n",
+    "        if mode_type == \"bus\":\n",
+    "            norm = mcolors.Normalize(vmin=-4, vmax=4)\n",
+    "        elif mode_type == \"train\":\n",
+    "            norm = mcolors.Normalize(vmin=-40, vmax=40)\n",
+    "    else:\n",
+    "        norm = mcolors.Normalize(vmin=-4, vmax=4)  # Default case if needed\n",
+    "\n",
+    "    if metric_column == \"All Fuels (Wh/pkm)\":\n",
+    "        cmap = 'RdYlGn_r'\n",
+    "    else:\n",
+    "        cmap = 'RdYlGn'\n",
+    "\n",
+    "    # Filter for the specified state\n",
+    "    state = states_provinces[states_provinces['name'] == state_name]\n",
+    "    state_boundary = state.unary_union  # Get the boundary as a single geometry\n",
+    "\n",
+    "    for ax, (year_start, year_end) in zip(axs, year_ranges):\n",
+    "        # Load data for both years\n",
+    "        data_start = load_year_data(year_start)\n",
+    "        data_end = load_year_data(year_end)\n",
+    "\n",
+    "        # Merge data for both years to calculate the delta\n",
+    "        data_delta = data_start.merge(data_end, on=\"UACE Code\", suffixes=(f'_{year_start}', f'_{year_end}'))\n",
+    "        data_delta[f'Delta {metric_column}'] = data_delta[f'Weighted {metric_column}_{year_end}'] - data_delta[f'Weighted {metric_column}_{year_start}']\n",
+    "\n",
+    "        # Merge with UACE shapefile for spatial plotting\n",
+    "        uace_shapefile['GEOID20'] = uace_shapefile['GEOID20'].astype(str).str.zfill(5)\n",
+    "        merged_gdf = uace_shapefile.merge(data_delta, left_on='GEOID20', right_on='UACE Code', how='inner')\n",
+    "\n",
+    "        # Identify UACE regions with data and without data (no data)\n",
+    "        uace_with_data = merged_gdf.copy()\n",
+    "        uace_no_data = uace_shapefile[~uace_shapefile['GEOID20'].isin(uace_with_data['GEOID20'])]\n",
+    "\n",
+    "        # Perform spatial intersection to clip UACE regions to the state boundary\n",
+    "        clipped_with_data = gpd.overlay(uace_with_data, gpd.GeoDataFrame(geometry=[state_boundary], crs=uace_with_data.crs), how='intersection')\n",
+    "        clipped_no_data = gpd.overlay(uace_no_data, gpd.GeoDataFrame(geometry=[state_boundary], crs=uace_no_data.crs), how='intersection')\n",
+    "\n",
+    "        # Plot the delta values for the specified time range\n",
+    "        clipped_with_data.plot(\n",
+    "            ax=ax,\n",
+    "            column=f'Delta {metric_column}',\n",
+    "            cmap=cmap,\n",
+    "            legend=False,\n",
+    "            edgecolor='grey',\n",
+    "            linewidth=0.5,\n",
+    "            norm=norm,\n",
+    "        )\n",
+    "\n",
+    "        # Plot the UACE regions with no data using hatch pattern\n",
+    "        clipped_no_data.plot(\n",
+    "            ax=ax,\n",
+    "            color=\"none\",\n",
+    "            edgecolor=\"grey\",\n",
+    "            hatch=\"////////\",\n",
+    "            linewidth=0.5,\n",
+    "        )\n",
+    "\n",
+    "        # Overlay state boundary\n",
+    "        state.plot(ax=ax, color=\"none\", edgecolor=\"black\", linewidth=1.5)\n",
+    "\n",
+    "        # Set title and bounds for each subplot\n",
+    "        ax.set_title(f\"{year_start}-{year_end} Delta\", fontsize=16)\n",
+    "        if state_name == \"Massachusetts\":\n",
+    "            ax.set_xlim([-73.5, -69.9])\n",
+    "            ax.set_ylim([41.2, 42.9])\n",
+    "        elif state_name == \"Colorado\":\n",
+    "            ax.set_xlim([-109.1, -102.0])\n",
+    "            ax.set_ylim([36.9, 41.0])\n",
+    "        elif state_name == \"North Carolina\":\n",
+    "            ax.set_xlim([-84.3, -75.5])\n",
+    "            ax.set_ylim([33.8, 36.6])\n",
+    "\n",
+    "        ax.set_aspect('equal')\n",
+    "\n",
+    "    # Add a single colorbar for the entire figure\n",
+    "    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)\n",
+    "    sm._A = []  # Dummy array for colorbar\n",
+    "    cbar = fig.colorbar(sm, ax=axs, orientation='vertical', fraction=0.02, pad=0.04, shrink=0.5)\n",
+    "    cbar.set_label(f\"Delta {metric_title}\")\n",
+    "\n",
+    "    # Create a custom legend entry for \"No Data\" with hatching\n",
+    "    no_data_patch = mpatches.Patch(facecolor=\"none\", edgecolor=\"grey\", hatch=\"////////\", label=\"No Data\")\n",
+    "    if state_name == \"North Carolina\":\n",
+    "        patch_y = 0.25\n",
+    "    else:\n",
+    "        patch_y = 0.14\n",
+    "    fig.legend(handles=[no_data_patch], loc=\"lower right\", bbox_to_anchor=(0.93, patch_y), fontsize=12)\n",
+    "\n",
+    "    # Save and display the combined figure for the state and mode type\n",
+    "    year_range_str = \"-\".join([f\"{start}_{end}\" for start, end in year_ranges])\n",
+    "    plt.savefig(f\"delta_{metric_column.replace(' ', '_').replace('/', '_').replace('(', '').replace(')', '').lower()}_{title_suffix.replace(' ', '_').lower()}_{state_name.replace(' ', '_').lower()}_{year_range_str}.pdf\")\n",
+    "\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    "# Specify states and year ranges for deltas\n",
+    "states = [\"Massachusetts\", \"Colorado\", \"North Carolina\"]\n",
+    "# year_ranges = [(2018, 2020), (2020, 2022)]\n",
+    "year_ranges = [(2018, 2022)]\n",
+    "\n",
+    "# Generate delta plots for each state and mode type for both metrics\n",
+    "# for mode_type in [\"bus\", \"train\"]:\n",
+    "for mode_type in [\"bus\"]:\n",
+    "    for state in states:\n",
+    "        process_state_deltas(state, year_ranges, mode_type, \"All Fuels (Wh/pkm)\")  # For fuel efficiency\n",
+    "        process_state_deltas(state, year_ranges, mode_type, \"Average Passengers\")  # For average occupancy\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "def calculate_national_and_uace_average_passengers(years, uace=None, mode_type=\"bus\"):\n",
+    "    \"\"\"\n",
+    "    Calculate the weighted national average passengers and for a specific UACE.\n",
+    "    \n",
+    "    Parameters:\n",
+    "        years (list): List of years to process.\n",
+    "        uace (str): UACE code to filter on. If None, only calculates national average.\n",
+    "        mode_type (str): Mode type to process (default: 'bus').\n",
+    "    \"\"\"\n",
+    "    # Define bus modes\n",
+    "    if mode_type == \"bus\":\n",
+    "        modes = ['MB', 'RB', 'CB']  # Bus modes\n",
+    "    else:\n",
+    "        raise ValueError(\"Currently only 'bus' mode is supported.\")\n",
+    "\n",
+    "    # Directory containing the JSON files\n",
+    "    json_dir = \"../src/emcommon/resources/\"\n",
+    "\n",
+    "    # Loop through each year and calculate averages\n",
+    "    for year in years:\n",
+    "        json_file = os.path.join(json_dir, f\"ntd{year}_intensities.json\")\n",
+    "        if not os.path.exists(json_file):\n",
+    "            print(f\"File for year {year} not found: {json_file}\")\n",
+    "            continue\n",
+    "\n",
+    "        with open(json_file, 'r') as f:\n",
+    "            data = json.load(f)\n",
+    "\n",
+    "        # Convert to DataFrame\n",
+    "        df = pd.DataFrame(data['records'])\n",
+    "\n",
+    "        # Filter for bus modes\n",
+    "        mode_df = df[df['Mode'].isin(modes)]\n",
+    "\n",
+    "        # Ensure numeric columns\n",
+    "        mode_df['Average Passengers'] = pd.to_numeric(mode_df['Average Passengers'], errors='coerce')\n",
+    "        mode_df['Unlinked Passenger Trips'] = pd.to_numeric(mode_df['Unlinked Passenger Trips'], errors='coerce')\n",
+    "\n",
+    "        # Drop rows with missing values\n",
+    "        mode_df = mode_df.dropna(subset=['Average Passengers', 'Unlinked Passenger Trips'])\n",
+    "\n",
+    "        # Calculate the weighted national average for passengers\n",
+    "        total_passenger_trips = mode_df['Unlinked Passenger Trips'].sum()\n",
+    "        weighted_avg_passengers = (mode_df['Average Passengers'] * mode_df['Unlinked Passenger Trips']).sum() / total_passenger_trips\n",
+    "        print(f\"Year {year}: Weighted National Average Passengers for Bus = {weighted_avg_passengers:.2f}\")\n",
+    "\n",
+    "        # If UACE is provided, filter and calculate for specific UACE\n",
+    "        if uace:\n",
+    "            uace_df = mode_df[mode_df['UACE Code'] == str(uace)]\n",
+    "\n",
+    "            if not uace_df.empty:\n",
+    "                total_uace_trips = uace_df['Unlinked Passenger Trips'].sum()\n",
+    "                weighted_uace_avg_passengers = (uace_df['Average Passengers'] * uace_df['Unlinked Passenger Trips']).sum() / total_uace_trips\n",
+    "                print(f\"Year {year}, UACE {uace}: Weighted Average Passengers for Bus = {weighted_uace_avg_passengers:.2f}\")\n",
+    "            else:\n",
+    "                print(f\"Year {year}: No data for UACE {uace}\")\n",
+    "\n",
+    "# Define the years to process\n",
+    "years = [2018, 2019, 2020, 2021, 2022]\n",
+    "\n",
+    "# Call the function with UACE filtering\n",
+    "calculate_national_and_uace_average_passengers(years, uace=\"23527\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ENV3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}