From b38e55420519c0c9d0cfa8d8db3ecfc1db73f892 Mon Sep 17 00:00:00 2001
From: ktehranchi <83722342+ktehranchi@users.noreply.github.com>
Date: Tue, 27 Feb 2024 10:18:53 -0800
Subject: [PATCH 01/10] Update env to pypsa==0.27.0

---
 workflow/Snakefile                            |   4 +-
 workflow/config/tests/config.test_simple.yaml |  17 +-
 workflow/envs/environment.yaml                |  27 +-
 workflow/rules/build_electricity.smk          |  10 +
 workflow/scripts/add_electricity.py           |  13 +-
 workflow/scripts/build_base_network.py        |   5 +-
 workflow/scripts/build_demand.py              |   6 +-
 workflow/scripts/cluster_network_eur.py       | 286 ++++++++----------
 workflow/scripts/simplify_network.py          |  21 +-
 workflow/tests/test_yaml_structure.py         |  44 +++
 10 files changed, 236 insertions(+), 197 deletions(-)
 create mode 100644 workflow/tests/test_yaml_structure.py

diff --git a/workflow/Snakefile b/workflow/Snakefile
index c2ecf247..58b7a134 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -66,8 +66,8 @@ wildcard_constraints:
 
 # Merge subworkflow configs and main config
 # configfile: "config/tests/config.validation.yaml"
-# configfile: "config/tests/config.test_simple.yaml"
-configfile: "config/config.default.yaml"
+configfile: "config/tests/config.test_simple.yaml"
+# configfile: "config/config.default.yaml"
 configfile: "config/config.cluster.yaml"
 configfile: "config/config.osw.yaml"
 configfile: "config/config.plotting.yaml"
diff --git a/workflow/config/tests/config.test_simple.yaml b/workflow/config/tests/config.test_simple.yaml
index 1e9e7af8..db252965 100644
--- a/workflow/config/tests/config.test_simple.yaml
+++ b/workflow/config/tests/config.test_simple.yaml
@@ -12,7 +12,7 @@ run:
 scenario:
   interconnect: [western] #"usa|texas|western|eastern"
   clusters: [40]
-  opts: [Co2L0.1, Co2L0.2]
+  opts: [Co2L0.1,]
   ll: [v1.0]
   scope: "total" # "urban", "rural", or "total"
   sector: "" # G
@@ -29,7 +29,7 @@ countries: [US]
 
 snapshots:
   start: "2019-01-01"
-  end: "2020-01-01"
+  end: "2019-01-16"
   inclusive: 'left'
 
 # docs :
@@ -260,18 +260,13 @@ clustering:
     algorithm: kmeans
     feature: solar+onwind-time
     aggregation_zones: 'balancing_area' # [balancing_area, state]
+    exclude_carriers: []
+    consider_efficiency_classes: false
   aggregation_strategies:
     generators:
-      p_nom_max: sum # use "min" for more conservative assumptions
-      p_nom_min: sum
-      p_min_pu: mean
-      marginal_cost: mean
       committable: any
-      ramp_limit_up: mean
-      ramp_limit_down: mean
-      efficiency: mean
-    buses:
-      state: max # temp fix. When fixing state aggregation- change add electricity such that region info not is use is removed.
+      ramp_limit_up: max
+      ramp_limit_down: max
 
 focus_weights:
   # California: 0.5
diff --git a/workflow/envs/environment.yaml b/workflow/envs/environment.yaml
index 4f2af980..c40f0617 100644
--- a/workflow/envs/environment.yaml
+++ b/workflow/envs/environment.yaml
@@ -1,20 +1,22 @@
-name: pypsa-usa
+name: pypsa-usa-new
 channels:
 - conda-forge
 - bioconda
 
 dependencies:
-- python==3.11.6
-- pip==23.3
-- pypsa==0.25.1
-- linopy==0.2.6
+- python>=3.8
+- pip
+
+- pypsa>=0.26.1
+- atlite>=0.2.9
+- linopy
 
-- atlite==0.2.11
 - dask==2023.7.0
 - dask-core==2023.7.0
 
 # Dependencies of the workflow itself
-- pandas>=0.24.0,<2.1
+- pandas>=2.1
+- xarray>=2023.11.0
 - xlrd==2.0.1
 - openpyxl==3.1.2
 - pycountry==22.3.5
@@ -25,17 +27,15 @@ dependencies:
 - pytables==3.9.1
 - lxml==4.9.3
 - numpy==1.26.0
-- xarray==2023.6.0
 - netcdf4==1.6.4
 - networkx==3.1
 - scipy==1.11.3
 - shapely==2.0.2
 - progressbar2==4.3.2
-- pyomo==6.6.2
 - matplotlib==3.8.0
 - plotly==5.17.0
-- powerplantmatching==0.5.7
-
+- graphviz
+- powerplantmatching
 
 # Keep in conda environment when calling ipython
 - ipython==8.16.1
@@ -44,14 +44,14 @@ dependencies:
 - ipykernel==6.25.2
 
 # GIS dependencies:
+- geopy==2.4.0
 - cartopy==0.22.0
 - descartes==1.1.0
 - rasterio==1.3.8
-- geopandas==0.14.0
+- geopandas>=0.11.0
 - geopandas-base==0.14.0
 
 # TODO: check these dependencies
-- geopy==2.4.0
 - tqdm==4.66.1
 - pytz==2023.3.post1
 - country_converter==1.0.0
@@ -62,3 +62,4 @@ dependencies:
   - vresutils==0.3.1
   - tsam>=1.1.0
   - gurobipy==10.0.3
+  - highspy
\ No newline at end of file
diff --git a/workflow/rules/build_electricity.smk b/workflow/rules/build_electricity.smk
index 31c9cf2c..015c30a9 100644
--- a/workflow/rules/build_electricity.smk
+++ b/workflow/rules/build_electricity.smk
@@ -324,6 +324,16 @@ rule simplify_network:
 
 
 rule cluster_network:
+    params:
+        cluster_network=config["clustering"]["cluster_network"],
+        conventional_carriers=config["electricity"].get("conventional_carriers", []),
+        renewable_carriers=config["electricity"]["renewable_carriers"],
+        aggregation_strategies=config["clustering"].get("aggregation_strategies", {}),
+        custom_busmap=config["enable"].get("custom_busmap", False),
+        focus_weights=config.get("focus_weights", None),
+        max_hours=config["electricity"]["max_hours"],
+        length_factor=config["lines"]["length_factor"],
+        costs=config["costs"],
     input:
         network=RESOURCES + "{interconnect}/elec_s.nc",
         regions_onshore=RESOURCES + "{interconnect}/regions_onshore.geojson",
diff --git a/workflow/scripts/add_electricity.py b/workflow/scripts/add_electricity.py
index e47939f4..f8a8ed9c 100755
--- a/workflow/scripts/add_electricity.py
+++ b/workflow/scripts/add_electricity.py
@@ -731,7 +731,11 @@ def match_plant_to_bus(n, plants):
     return plants_matched
 
 
-def attach_renewable_capacities_to_atlite(n, plants_df, renewable_carriers):
+def attach_renewable_capacities_to_atlite(
+    n: pypsa.Network, 
+    plants_df: pd.DataFrame,
+    renewable_carriers: list,
+):
     plants = plants_df.query(
         "bus_assignment in @n.buses.index",
     )
@@ -928,14 +932,14 @@ def attach_wind_and_solar(
             p_nom_max_bus = (
                 ds["p_nom_max"]
                 .to_dataframe()
-                .merge(bus2sub, left_on="bus", right_on="sub_id")
+                .merge(bus2sub[['bus_id','sub_id']], left_on="bus", right_on="sub_id")
                 .set_index("bus_id")
                 .p_nom_max
             )
             weight_bus = (
                 ds["weight"]
                 .to_dataframe()
-                .merge(bus2sub, left_on="bus", right_on="sub_id")
+                .merge(bus2sub[['bus_id','sub_id']], left_on="bus", right_on="sub_id")
                 .set_index("bus_id")
                 .weight
             )
@@ -943,12 +947,11 @@ def attach_wind_and_solar(
                 ds["profile"]
                 .transpose("time", "bus")
                 .to_pandas()
-                .T.merge(bus2sub, left_on="bus", right_on="sub_id")
+                .T.merge(bus2sub[['bus_id','sub_id']], left_on="bus", right_on="sub_id")
                 .set_index("bus_id")
                 .drop(columns="sub_id")
                 .T
             )
-
             if supcar == "offwind":
                 capital_cost = capital_cost.to_frame().reset_index()
                 capital_cost.bus = capital_cost.bus.astype(int)
diff --git a/workflow/scripts/build_base_network.py b/workflow/scripts/build_base_network.py
index 97bf24b2..00e07409 100644
--- a/workflow/scripts/build_base_network.py
+++ b/workflow/scripts/build_base_network.py
@@ -98,7 +98,7 @@ def add_buses_from_file(
         interconnect=buses.interconnect,
         x=buses.lon,
         y=buses.lat,
-        sub_id=buses.sub_id,
+        sub_id=buses.sub_id.astype(int),
         substation_off=False,
         poi=False,
         LAF_states=buses.LAF_states,
@@ -317,7 +317,7 @@ def add_offshore_buses(n: pypsa.Network, offshore_buses: pd.DataFrame) -> pypsa.
         interconnect="Offshore",
         x=offshore_buses.lon,
         y=offshore_buses.lat,
-        sub_id=offshore_buses.sub_id.astype(str),
+        sub_id=offshore_buses.sub_id.astype(int),
         substation_off=True,
         poi_sub=False,
         poi_bus=False,
@@ -726,6 +726,7 @@ def main(snakemake):
     )
     lines_gis.to_csv(snakemake.output.lines_gis)
 
+    
     # export network
     n.export_to_netcdf(snakemake.output.network)
 
diff --git a/workflow/scripts/build_demand.py b/workflow/scripts/build_demand.py
index 387b035f..78b43b66 100644
--- a/workflow/scripts/build_demand.py
+++ b/workflow/scripts/build_demand.py
@@ -149,7 +149,7 @@ def prepare_efs_demand(
         year=planning_horizons[0],
         month=1,
         day=1,
-    ) + pd.to_timedelta(demand["LocalHourID"] - 1, unit="H")
+    ) + pd.to_timedelta(demand["LocalHourID"] - 1, unit="h")
     demand["UTC_Time"] = demand.groupby(["State"])["DateTime"].transform(local_to_utc)
     demand.drop(columns=["LocalHourID", "DateTime"], inplace=True)
     demand.set_index("UTC_Time", inplace=True)
@@ -173,7 +173,11 @@ def prepare_efs_demand(
         col = demand[column].reset_index()
         demand_new[column] = col.groupby('UTC_Time').apply(lambda group: group.loc[group.drop(columns='UTC_Time').first_valid_index()]).drop(columns='UTC_Time')
 
+    #take the intersection of the demand and the snapshots by hour of year
+    hoy = (n.snapshots.dayofyear - 1) * 24 + n.snapshots.hour
+    demand_new = demand_new.loc[hoy]
     demand_new.index = n.snapshots
+
     n.buses.rename(columns={"LAF_states": "LAF"}, inplace=True)
     return disaggregate_demand_to_buses(n, demand_new)
 
diff --git a/workflow/scripts/cluster_network_eur.py b/workflow/scripts/cluster_network_eur.py
index 52d09d3b..45c44130 100644
--- a/workflow/scripts/cluster_network_eur.py
+++ b/workflow/scripts/cluster_network_eur.py
@@ -1,12 +1,17 @@
-# SPDX-FileCopyrightText: : 2017-2022 The PyPSA-Eur Authors
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: : 2017-2023 The PyPSA-Eur Authors
 #
 # SPDX-License-Identifier: MIT
+
 # coding: utf-8
+
+# ADAPTED FROM PyPSA-Eur for PyPSA-USA
 """
 Creates networks clustered to ``{cluster}`` number of zones with aggregated
 buses, generators and transmission corridors.
 
-**Relevant Settings**
+Relevant Settings
+-----------------
 
 .. code:: yaml
 
@@ -24,10 +29,11 @@
         length_factor:
 
 .. seealso::
-    Documentation of the configuration file ``config.yaml`` at
-    :ref:`renewable_cf`, :ref:`solving_cf`, :ref:`lines_cf`
+    Documentation of the configuration file ``config/config.yaml`` at
+    :ref:`toplevel_cf`, :ref:`renewable_cf`, :ref:`solving_cf`, :ref:`lines_cf`
 
-**Inputs**
+Inputs
+------
 
 - ``resources/regions_onshore_elec_s{simpl}.geojson``: confer :ref:`simplify`
 - ``resources/regions_offshore_elec_s{simpl}.geojson``: confer :ref:`simplify`
@@ -35,26 +41,28 @@
 - ``networks/elec_s{simpl}.nc``: confer :ref:`simplify`
 - ``data/custom_busmap_elec_s{simpl}_{clusters}.csv``: optional input
 
-**Outputs**
+Outputs
+-------
 
 - ``resources/regions_onshore_elec_s{simpl}_{clusters}.geojson``:
 
-    # .. image:: ../img/regions_onshore_elec_s_X.png
-    #     :scale: 33 %
+    .. image:: img/regions_onshore_elec_s_X.png
+        :scale: 33 %
 
 - ``resources/regions_offshore_elec_s{simpl}_{clusters}.geojson``:
 
-    # .. image:: ../img/regions_offshore_elec_s_X.png
-    #     :scale: 33 %
+    .. image:: img/regions_offshore_elec_s_X.png
+        :scale: 33 %
 
 - ``resources/busmap_elec_s{simpl}_{clusters}.csv``: Mapping of buses from ``networks/elec_s{simpl}.nc`` to ``networks/elec_s{simpl}_{clusters}.nc``;
 - ``resources/linemap_elec_s{simpl}_{clusters}.csv``: Mapping of lines from ``networks/elec_s{simpl}.nc`` to ``networks/elec_s{simpl}_{clusters}.nc``;
 - ``networks/elec_s{simpl}_{clusters}.nc``:
 
-    # .. image:: ../img/elec_s_X.png
-    #     :scale: 40  %
+    .. image:: img/elec_s_X.png
+        :scale: 40  %
 
-**Description**
+Description
+-----------
 
 .. note::
 
@@ -83,15 +91,38 @@
     **Is it possible to run the model without the** ``simplify_network`` **rule?**
 
         No, the network clustering methods in the PyPSA module
-        `pypsa.networkclustering <https://github.com/PyPSA/PyPSA/blob/master/pypsa/networkclustering.py>`_
+        `pypsa.clustering.spatial <https://github.com/PyPSA/PyPSA/blob/master/pypsa/clustering/spatial.py>`_
         do not work reliably with multiple voltage levels and transformers.
 
 .. tip::
-    The rule :mod:`cluster_all_networks` runs
+    The rule :mod:`cluster_networks` runs
     for all ``scenario`` s in the configuration file
     the rule :mod:`cluster_network`.
-"""
 
+Exemplary unsolved network clustered to 512 nodes:
+
+.. image:: img/elec_s_512.png
+    :scale: 40  %
+    :align: center
+
+Exemplary unsolved network clustered to 256 nodes:
+
+.. image:: img/elec_s_256.png
+    :scale: 40  %
+    :align: center
+
+Exemplary unsolved network clustered to 128 nodes:
+
+.. image:: img/elec_s_128.png
+    :scale: 40  %
+    :align: center
+
+Exemplary unsolved network clustered to 37 nodes:
+
+.. image:: img/elec_s_37.png
+    :scale: 40  %
+    :align: center
+"""
 
 import logging
 import warnings
@@ -104,21 +135,16 @@
 import pyomo.environ as po
 import pypsa
 import seaborn as sns
-from _helpers import configure_logging
-from _helpers import export_network_for_gis_mapping
-from _helpers import get_aggregation_strategies
-from _helpers import update_p_nom_max
-from pypsa.clustering.spatial import busmap_by_greedy_modularity
-from pypsa.clustering.spatial import busmap_by_hac
-from pypsa.clustering.spatial import busmap_by_kmeans
-from pypsa.clustering.spatial import get_clustering_from_busmap
+from _helpers import configure_logging, update_p_nom_max
+from pypsa.clustering.spatial import (
+    busmap_by_greedy_modularity,
+    busmap_by_hac,
+    busmap_by_kmeans,
+    get_clustering_from_busmap,
+)
 
 warnings.filterwarnings(action="ignore", category=UserWarning)
 
-import os, sys
-
-sys.path.append(os.path.join(os.getcwd(), "subworkflows", "pypsa-eur", "scripts"))
-
 from add_electricity import load_costs
 
 idx = pd.IndexSlice
@@ -131,17 +157,15 @@ def normed(x):
 
 
 def weighting_for_country(n, x):
-    # conv_carriers = {'OCGT','CCGT','PHS', 'hydro'}
-    gen = n.generators.groupby(  # .loc[n.generators.carrier.isin(conv_carriers)]
-        "bus",
-    ).p_nom.sum().reindex(
-        n.buses.index,
-        fill_value=0.0,
-    ) + n.storage_units.groupby(  # .loc[n.storage_units.carrier.isin(conv_carriers)]
-        "bus",
+    conv_carriers = {"OCGT", "CCGT", "PHS", "hydro"}
+    gen = n.generators.loc[n.generators.carrier.isin(conv_carriers)].groupby(
+        "bus"
+    ).p_nom.sum().reindex(n.buses.index, fill_value=0.0) + n.storage_units.loc[
+        n.storage_units.carrier.isin(conv_carriers)
+    ].groupby(
+        "bus"
     ).p_nom.sum().reindex(
-        n.buses.index,
-        fill_value=0.0,
+        n.buses.index, fill_value=0.0
     )
     load = n.loads_t.p_set.mean().groupby(n.loads.bus).sum()
 
@@ -154,7 +178,6 @@ def weighting_for_country(n, x):
 
 
 def get_feature_for_hac(n, buses_i=None, feature=None):
-
     if buses_i is None:
         buses_i = n.buses.index
 
@@ -165,8 +188,7 @@ def get_feature_for_hac(n, buses_i=None, feature=None):
     if "offwind" in carriers:
         carriers.remove("offwind")
         carriers = np.append(
-            carriers,
-            network.generators.carrier.filter(like="offwind").unique(),
+            carriers, n.generators.carrier.filter(like="offwind").unique()
         )
 
     if feature.split("-")[1] == "cap":
@@ -185,7 +207,7 @@ def get_feature_for_hac(n, buses_i=None, feature=None):
         for carrier in carriers:
             gen_i = n.generators.query("carrier == @carrier").index
             attach = n.generators_t.p_max_pu[gen_i].rename(
-                columns=n.generators.loc[gen_i].bus,
+                columns=n.generators.loc[gen_i].bus
             )
             feature_data = pd.concat([feature_data, attach], axis=0)[buses_i]
 
@@ -218,7 +240,6 @@ def distribute_clusters(n, n_clusters, focus_weights=None, solver_name="cbc"):
     ), f"Number of clusters must be {len(N)} <= n_clusters <= {N.sum()} for this selection of countries."
 
     if focus_weights is not None:
-
         total_focus = sum(list(focus_weights.values()))
 
         assert (
@@ -226,9 +247,6 @@ def distribute_clusters(n, n_clusters, focus_weights=None, solver_name="cbc"):
         ), "The sum of focus weights must be less than or equal to 1."
 
         for country, weight in focus_weights.items():
-            if country == "Offshore":
-                L[(country, "0")] = weight + weight**2
-                L.pipe(normed)
             L[country] = weight / len(L[country])
 
         remainder = [
@@ -239,9 +257,7 @@ def distribute_clusters(n, n_clusters, focus_weights=None, solver_name="cbc"):
         logger.warning("Using custom focus weights for determining number of clusters.")
 
     assert np.isclose(
-        L.sum(),
-        1.0,
-        rtol=1e-3,
+        L.sum(), 1.0, rtol=1e-3
     ), f"Country weights L must sum up to 1.0 when distributing clusters. Is {L.sum()}."
 
     m = po.ConcreteModel()
@@ -259,7 +275,7 @@ def n_bounds(model, *n_id):
     opt = po.SolverFactory(solver_name)
     if not opt.has_capability("quadratic_objective"):
         logger.warning(
-            f"The configured solver `{solver_name}` does not support quadratic objectives. Falling back to `ipopt`.",
+            f"The configured solver `{solver_name}` does not support quadratic objectives. Falling back to `ipopt`."
         )
         opt = po.SolverFactory("ipopt")
 
@@ -294,8 +310,7 @@ def fix_country_assignment_for_hac(n):
             m = n[n.buses.country == country].copy()
 
             _, labels = csgraph.connected_components(
-                m.adjacency_matrix(),
-                directed=False,
+                m.adjacency_matrix(), directed=False
             )
 
             component = pd.Series(labels, index=m.buses.index)
@@ -307,16 +322,16 @@ def fix_country_assignment_for_hac(n):
                 ].index[0]
 
                 neighbor_bus = n.lines.query(
-                    "bus0 == @disconnected_bus or bus1 == @disconnected_bus",
+                    "bus0 == @disconnected_bus or bus1 == @disconnected_bus"
                 ).iloc[0][["bus0", "bus1"]]
-                new_country = list(set(n.buses.loc[neighbor_bus].country) - {country})[
-                    0
-                ]
+                new_country = list(
+                    set(n.buses.loc[neighbor_bus].country) - set([country])
+                )[0]
 
                 logger.info(
                     f"overwriting country `{country}` of bus `{disconnected_bus}` "
                     f"to new country `{new_country}`, because it is disconnected "
-                    "from its inital inter-country transmission grid.",
+                    "from its initial inter-country transmission grid."
                 )
                 n.buses.at[disconnected_bus, "country"] = new_country
         return n
@@ -328,16 +343,13 @@ def fix_country_assignment_for_hac(n):
     if (algorithm != "hac") and (feature is not None):
         logger.warning(
             f"Keyword argument feature is only valid for algorithm `hac`. "
-            f"Given feature `{feature}` will be ignored.",
+            f"Given feature `{feature}` will be ignored."
         )
 
     n.determine_network_topology()
 
     n_clusters = distribute_clusters(
-        n,
-        n_clusters,
-        focus_weights=focus_weights,
-        solver_name=solver_name,
+        n, n_clusters, focus_weights=focus_weights, solver_name=solver_name
     )
 
     def busmap_for_country(x):
@@ -349,28 +361,19 @@ def busmap_for_country(x):
 
         if algorithm == "kmeans":
             return prefix + busmap_by_kmeans(
-                n,
-                weight,
-                n_clusters[x.name],
-                buses_i=x.index,
-                **algorithm_kwds,
+                n, weight, n_clusters[x.name], buses_i=x.index, **algorithm_kwds
             )
         elif algorithm == "hac":
             return prefix + busmap_by_hac(
-                n,
-                n_clusters[x.name],
-                buses_i=x.index,
-                feature=feature.loc[x.index],
+                n, n_clusters[x.name], buses_i=x.index, feature=feature.loc[x.index]
             )
         elif algorithm == "modularity":
             return prefix + busmap_by_greedy_modularity(
-                n,
-                n_clusters[x.name],
-                buses_i=x.index,
+                n, n_clusters[x.name], buses_i=x.index
             )
         else:
             raise ValueError(
-                f"`algorithm` must be one of 'kmeans' or 'hac'. Is {algorithm}.",
+                f"`algorithm` must be one of 'kmeans' or 'hac'. Is {algorithm}."
             )
 
     return (
@@ -394,23 +397,17 @@ def clustering_for_n_clusters(
     extended_link_costs=0,
     focus_weights=None,
 ):
-
-    bus_strategies, generator_strategies = get_aggregation_strategies(
-        aggregation_strategies,
-    )
-
     if not isinstance(custom_busmap, pd.Series):
         busmap = busmap_for_n_clusters(
-            n,
-            n_clusters,
-            solver_name,
-            focus_weights,
-            algorithm,
-            feature,
+            n, n_clusters, solver_name, focus_weights, algorithm, feature
         )
     else:
         busmap = custom_busmap
 
+    line_strategies = aggregation_strategies.get("lines", dict())
+    generator_strategies = aggregation_strategies.get("generators", dict())
+    one_port_strategies = aggregation_strategies.get("one_ports", dict())
+
     clustering = get_clustering_from_busmap(
         n,
         busmap,
@@ -418,8 +415,9 @@ def clustering_for_n_clusters(
         aggregate_generators_carriers=aggregate_carriers,
         aggregate_one_ports=["Load", "StorageUnit"],
         line_length_factor=line_length_factor,
+        line_strategies=line_strategies,
         generator_strategies=generator_strategies,
-        bus_strategies=bus_strategies,
+        one_port_strategies=one_port_strategies,
         scale_link_capital_costs=False,
     )
 
@@ -429,7 +427,10 @@ def clustering_for_n_clusters(
             n.links.eval("underwater_fraction * length").div(nc.links.length).dropna()
         )
         nc.links["capital_cost"] = nc.links["capital_cost"].add(
-            (nc.links.length - n.links.length).clip(lower=0).mul(extended_link_costs),
+            (nc.links.length - n.links.length)
+            .clip(lower=0)
+            .mul(extended_link_costs)
+            .dropna(),
             fill_value=0,
         )
 
@@ -437,7 +438,6 @@ def clustering_for_n_clusters(
 
 
 def cluster_regions(busmaps, input=None, output=None):
-
     busmap = reduce(lambda x, y: x.map(y), busmaps[1:], busmaps[0])
 
     for which in ("regions_onshore", "regions_offshore"):
@@ -460,114 +460,96 @@ def plot_busmap_for_n_clusters(n, n_clusters, fn=None):
 
 
 if __name__ == "__main__":
-    print("Running clustering.py directly")
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        snakemake = mock_snakemake(
-            "cluster_network",
-            interconnect="western",
-            clusters="100",
-        )
+        snakemake = mock_snakemake("cluster_network", simpl="", clusters="37")
     configure_logging(snakemake)
 
-    n = pypsa.Network(snakemake.input.network)
-    focus_weights = snakemake.config.get("focus_weights", None)
-
-    n.buses.drop(columns=["state", "balancing_area", "sub_id"], inplace=True)
+    params = snakemake.params
+    solver_name = snakemake.config["solving"]["solver"]["name"]
 
-    renewable_carriers = pd.Index(
-        [
-            tech
-            for tech in n.generators.carrier.unique()
-            if tech in snakemake.config["renewable"]
-        ],
-    )
+    n = pypsa.Network(snakemake.input.network)
 
+    exclude_carriers = params.cluster_network["exclude_carriers"]
+    aggregate_carriers = set(n.generators.carrier) - set(exclude_carriers)
+    conventional_carriers = set(params.conventional_carriers)
     if snakemake.wildcards.clusters.endswith("m"):
         n_clusters = int(snakemake.wildcards.clusters[:-1])
-        aggregate_carriers = snakemake.config["electricity"].get(
-            "conventional_carriers",
-        )
+        aggregate_carriers = params.conventional_carriers & aggregate_carriers
+    elif snakemake.wildcards.clusters.endswith("c"):
+        n_clusters = int(snakemake.wildcards.clusters[:-1])
+        aggregate_carriers = aggregate_carriers - conventional_carriers
     elif snakemake.wildcards.clusters == "all":
         n_clusters = len(n.buses)
-        aggregate_carriers = None  # All
     else:
         n_clusters = int(snakemake.wildcards.clusters)
-        aggregate_carriers = None  # All
+
+    if params.cluster_network.get("consider_efficiency_classes", False):
+        carriers = []
+        for c in aggregate_carriers:
+            gens = n.generators.query("carrier == @c")
+            low = gens.efficiency.quantile(0.10)
+            high = gens.efficiency.quantile(0.90)
+            if low >= high:
+                carriers += [c]
+            else:
+                labels = ["low", "medium", "high"]
+                suffix = pd.cut(
+                    gens.efficiency, bins=[0, low, high, 1], labels=labels
+                ).astype(str)
+                carriers += [f"{c} {label} efficiency" for label in labels]
+                n.generators.carrier.update(gens.carrier + " " + suffix + " efficiency")
+        aggregate_carriers = carriers
 
     if n_clusters == len(n.buses):
         # Fast-path if no clustering is necessary
         busmap = n.buses.index.to_series()
         linemap = n.lines.index.to_series()
         clustering = pypsa.clustering.spatial.Clustering(
-            n,
-            busmap,
-            linemap,
-            linemap,
-            pd.Series(dtype="O"),
+            n, busmap, linemap, linemap, pd.Series(dtype="O")
         )
     else:
-        line_length_factor = snakemake.config["lines"]["length_factor"]
         Nyears = n.snapshot_weightings.objective.sum() / 8760
 
         hvac_overhead_cost = load_costs(
             snakemake.input.tech_costs,
-            snakemake.config["costs"],
-            snakemake.config["electricity"]["max_hours"],
+            params.costs,
+            params.max_hours,
             Nyears,
         ).at["HVAC overhead", "capital_cost"]
 
-        def consense(x):
-            v = x.iat[0]
-            assert (
-                x == v
-            ).all() or x.isnull().all(), "The `potential` configuration option must agree for all renewable carriers, for now!"
-            return v
-
-        aggregation_strategies = snakemake.config["clustering"].get(
-            "aggregation_strategies",
-            {},
-        )
-        # translate str entries of aggregation_strategies to pd.Series functions:
-        aggregation_strategies = {
-            p: {k: getattr(pd.Series, v) for k, v in aggregation_strategies[p].items()}
-            for p in aggregation_strategies.keys()
-        }
-        custom_busmap = snakemake.config["enable"].get("custom_busmap", False)
+        custom_busmap = params.custom_busmap
         if custom_busmap:
             custom_busmap = pd.read_csv(
-                snakemake.input.custom_busmap,
-                index_col=0,
-                squeeze=True,
+                snakemake.input.custom_busmap, index_col=0, squeeze=True
             )
             custom_busmap.index = custom_busmap.index.astype(str)
             logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}")
 
-        cluster_config = snakemake.config.get("clustering", {}).get(
-            "cluster_network",
-            {},
-        )
-
         clustering = clustering_for_n_clusters(
             n,
             n_clusters,
             custom_busmap,
             aggregate_carriers,
-            line_length_factor,
-            aggregation_strategies,
-            snakemake.config["solving"]["solver"]["name"],
-            cluster_config.get("algorithm", "hac"),
-            cluster_config.get("feature", "solar+onwind-time"),
+            params.length_factor,
+            params.aggregation_strategies,
+            solver_name,
+            params.cluster_network["algorithm"],
+            params.cluster_network["feature"],
             hvac_overhead_cost,
-            focus_weights,
+            params.focus_weights,
         )
 
     update_p_nom_max(clustering.network)
 
+    if params.cluster_network.get("consider_efficiency_classes"):
+        labels = [f" {label} efficiency" for label in ["low", "medium", "high"]]
+        nc = clustering.network
+        nc.generators["carrier"] = nc.generators.carrier.replace(labels, "", regex=True)
+
     clustering.network.meta = dict(
-        snakemake.config,
-        **dict(wildcards=dict(snakemake.wildcards)),
+        snakemake.config, **dict(wildcards=dict(snakemake.wildcards))
     )
     clustering.network.export_to_netcdf(snakemake.output.network)
     for attr in (
diff --git a/workflow/scripts/simplify_network.py b/workflow/scripts/simplify_network.py
index eedbcf48..39e85e22 100644
--- a/workflow/scripts/simplify_network.py
+++ b/workflow/scripts/simplify_network.py
@@ -86,16 +86,16 @@ def aggregate_to_substations(
         aggregate_one_ports=["Load", "StorageUnit"],
         line_length_factor=1.0,
         bus_strategies={
-            "type": np.max,
-            "Pd": np.sum,
+            "type": 'max',
+            "Pd": 'sum',
         },
         generator_strategies={
-            "marginal_cost": np.mean,
-            "p_nom_min": np.sum,
-            "p_min_pu": np.mean,
-            "p_max_pu": np.mean,
-            "ramp_limit_up": np.max,
-            "ramp_limit_down": np.max,
+            "marginal_cost": 'mean',
+            "p_nom_min": 'sum',
+            "p_min_pu": 'mean',
+            "p_max_pu": 'mean',
+            "ramp_limit_up": 'max',
+            "ramp_limit_down": 'max',
         },
     )
 
@@ -129,13 +129,12 @@ def aggregate_to_substations(
     network_s.buses["x"] = substations.x
     network_s.buses["y"] = substations.y
     network_s.buses["substation_lv"] = True
-    network_s.buses["substation_off"] = True
     network_s.buses["country"] = (
         zone  # country field used bc pypsa-eur aggregates based on country boundary
     )
-    network_s.buses["state"] = substations.state
-    network_s.buses["balancing_area"] = substations.balancing_area
     network_s.lines["type"] = np.nan
+
+    network_s.buses.drop(columns = ['balancing_area', 'state', 'substation_off', 'sub_id'], inplace=True)
     return network_s
 
 
diff --git a/workflow/tests/test_yaml_structure.py b/workflow/tests/test_yaml_structure.py
new file mode 100644
index 00000000..6c1cae89
--- /dev/null
+++ b/workflow/tests/test_yaml_structure.py
@@ -0,0 +1,44 @@
+import yaml
+
+def load_yaml_file(filepath):
+    with open(filepath, 'r') as file:
+        return yaml.safe_load(file)
+
+def compare_structures(data1, data2, path=''):
+    if type(data1) != type(data2):
+        print(f"Type mismatch at {path}: {type(data1).__name__} vs {type(data2).__name__}")
+        return False
+
+    if isinstance(data1, dict):
+        for key in data1:
+            if key not in data2:
+                print(f"Missing key in second structure at {path}: {key}")
+                continue
+            compare_structures(data1[key], data2[key], path=f"{path}.{key}" if path else key)
+        for key in data2:
+            if key not in data1:
+                print(f"Missing key in first structure at {path}: {key}")
+        return True
+    elif isinstance(data1, list):
+        # For simplicity, just compare the first item if it exists, assuming homogeneous lists
+        if data1 and data2:
+            compare_structures(data1[0], data2[0], path=f"{path}[0]")
+        elif not data1 and data2 or data1 and not data2:
+            print(f"List length mismatch or one is empty at {path}")
+        return True
+    else:
+        # This part ignores values if they are not container types
+        return True
+
+def test_yaml_structure(filepath1, filepath2):
+    data1 = load_yaml_file(filepath1)
+    data2 = load_yaml_file(filepath2)
+    
+    print("Comparing structure...")
+    if compare_structures(data1, data2):
+        print("The structures match.")
+    else:
+        print("The structures do not match.")
+
+# Example usage
+test_yaml_structure('../config/tests/config.test_simple.yaml', '../config/config.default.yaml')

From 58813d02373ed4df7f136a04b934bb634ed5c79e Mon Sep 17 00:00:00 2001
From: ktehranchi <83722342+ktehranchi@users.noreply.github.com>
Date: Tue, 27 Feb 2024 17:36:32 -0800
Subject: [PATCH 02/10] update plotting for new env

---
 workflow/Snakefile               |   2 -
 workflow/scripts/plot_figures.py | 134 +++++++++++--------------------
 workflow/scripts/summary.py      |  85 ++++----------------
 3 files changed, 60 insertions(+), 161 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 58b7a134..8a2587c5 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -22,7 +22,6 @@ FIGURES_SINGLE = [
     "production_bar",
     "production_area",
     "emissions_area",
-    "emissions_accumulated",
     "emissions_accumulated_tech",
     "emissions_map",
     "renewable_potential_map",
@@ -38,7 +37,6 @@ FIGURES_VALIDATE = [
 FIGURES_SINGLE_HTML = [
     "production_area_html",
     "emissions_area_html",
-    # "emissions_node_html",
     "emissions_region_html",
     "emissions_accumulated_tech_html",
 ]
diff --git a/workflow/scripts/plot_figures.py b/workflow/scripts/plot_figures.py
index 7ee2b0d2..c1b64d93 100644
--- a/workflow/scripts/plot_figures.py
+++ b/workflow/scripts/plot_figures.py
@@ -73,7 +73,6 @@
     get_capacity_brownfield,
     get_capacity_base,
     get_demand_base,
-    get_operational_costs,
     get_capital_costs,
 )
 from add_electricity import (
@@ -117,7 +116,7 @@ def get_color_palette(n: pypsa.Network) -> pd.Series:
         "co2": "k",
     }
 
-    return pd.concat([colors, pd.Series(additional)])
+    return pd.concat([colors, pd.Series(additional)]).to_dict()
 
 
 def get_bus_scale(interconnect: str) -> float:
@@ -274,12 +273,10 @@ def plot_region_emissions_html(n: pypsa.Network, save: str, **wildcards) -> None
     """
 
     # get data
-
     emissions = get_node_emissions_timeseries(n).mul(1e-6)  # T -> MT
-    emissions = emissions.groupby(n.buses.country, axis=1).sum()
+    emissions = emissions.T.groupby(n.buses.country).sum().T
 
     # plot data
-
     fig = px.area(
         emissions,
         x=emissions.index,
@@ -323,37 +320,6 @@ def plot_node_emissions_html(n: pypsa.Network, save: str, **wildcards) -> None:
     fig.write_html(save)
 
 
-def plot_accumulated_emissions(n: pypsa.Network, save: str, **wildcards) -> None:
-    """
-    Plots accumulated emissions.
-    """
-
-    # get data
-
-    emissions = get_tech_emissions_timeseries(n).mul(1e-6).sum(axis=1)  # T -> MT
-    emissions = emissions.cumsum().to_frame("co2")
-
-    # plot
-
-    color_palette = get_color_palette(n)
-
-    fig, ax = plt.subplots(figsize=(14, 4))
-
-    emissions.plot.area(
-        ax=ax,
-        alpha=0.7,
-        legend="reverse",
-        color=color_palette.to_dict(),
-    )
-
-    ax.legend(bbox_to_anchor=(1, 1), loc="upper left")
-    ax.set_title(create_title("Accumulated Emissions", **wildcards))
-    ax.set_ylabel("Emissions [MT]")
-    fig.tight_layout()
-
-    fig.savefig(save)
-
-
 def plot_accumulated_emissions_tech(n: pypsa.Network, save: str, **wildcards) -> None:
     """
     Plots accumulated emissions by technology.
@@ -381,7 +347,7 @@ def plot_accumulated_emissions_tech(n: pypsa.Network, save: str, **wildcards) ->
         ax=ax,
         alpha=0.7,
         legend="reverse",
-        color=color_palette.to_dict(),
+        color=color_palette,
     )
 
     ax.legend(bbox_to_anchor=(1, 1), loc="upper left")
@@ -421,7 +387,7 @@ def plot_accumulated_emissions_tech_html(
         emissions,
         x=emissions.index,
         y=emissions.columns,
-        color_discrete_map=color_palette.to_dict(),
+        color_discrete_map=color_palette,
     )
 
     title = create_title("Technology Accumulated Emissions", **wildcards)
@@ -458,7 +424,7 @@ def plot_hourly_emissions_html(n: pypsa.Network, save: str, **wildcards) -> None
         emissions,
         x=emissions.index,
         y=emissions.columns,
-        color_discrete_map=color_palette.to_dict(),
+        color_discrete_map=color_palette,
     )
 
     title = create_title("Technology Emissions", **wildcards)
@@ -488,7 +454,6 @@ def plot_hourly_emissions(n: pypsa.Network, save: str, **wildcards) -> None:
     emissions = emissions.rename(columns=n.carriers.nice_name)
 
     # plot
-
     color_palette = get_color_palette(n)
 
     fig, ax = plt.subplots(figsize=(14, 4))
@@ -497,7 +462,7 @@ def plot_hourly_emissions(n: pypsa.Network, save: str, **wildcards) -> None:
         ax=ax,
         alpha=0.7,
         legend="reverse",
-        color=color_palette.to_dict(),
+        color=color_palette,
     )
 
     ax.legend(bbox_to_anchor=(1, 1), loc="upper left")
@@ -546,7 +511,7 @@ def plot_production_html(
         energy_mix,
         x=energy_mix.index,
         y=[c for c in energy_mix.columns if c != "Demand"],
-        color_discrete_map=color_palette.to_dict(),
+        color_discrete_map=color_palette,
     )
     fig.add_trace(
         go.Scatter(
@@ -589,7 +554,7 @@ def plot_production_area(
             energy_mix[carrier + "_discharger"] = energy_mix[carrier].clip(lower=0.0001)
             energy_mix[carrier + "_charger"] = energy_mix[carrier].clip(upper=-0.0001)
             energy_mix = energy_mix.drop(columns=carrier)
-    # energy_mix = energy_mix[[x for x in carriers_2_plot if x in energy_mix]]
+
     energy_mix = energy_mix.rename(columns=n.carriers.nice_name)
 
     color_palette = get_color_palette(n)
@@ -607,7 +572,7 @@ def plot_production_area(
             energy_mix[snapshots].plot.area(
                 ax=ax,
                 alpha=0.7,
-                color=color_palette.to_dict(),
+                color=color_palette,
             )
             demand[snapshots].plot.line(ax=ax, ls="-", color="darkblue")
 
@@ -635,34 +600,25 @@ def plot_production_bar(
     **wildcards,
 ) -> None:
     """
-    Plot production per carrier.
+    Plot diaptch per carrier.
     """
 
     # get data
-
-    energy_mix = (
-        get_energy_timeseries(n)
-        # .rename(columns={"battery charger": "battery", "battery discharger": "battery"})
-        .groupby(level=0, axis=1)
-        .sum()
-        .sum()
-        .mul(1e-3)  # MW -> GW
-    )
-    energy_mix = pd.DataFrame(energy_mix, columns=["Production"]).reset_index(
-        names="carrier",
-    )
-    energy_mix = energy_mix[
-        energy_mix.carrier.isin([x for x in carriers_2_plot if x != "battery"])
-    ].copy()
-    energy_mix["color"] = energy_mix.carrier.map(n.carriers.color)
-    energy_mix["carrier"] = energy_mix.carrier.map(n.carriers.nice_name)
-
-    # plot
+    energy_mix = n.statistics.dispatch().mul(1e-3)  # MW -> GW
+    energy_mix.name = "dispatch" 
+    energy_mix = energy_mix[energy_mix.index.get_level_values('component').isin(['Generator', 'StorageUnit'])]
+    energy_mix = energy_mix.groupby("carrier").sum().reset_index()
+    color_palette = get_color_palette(n)
 
     fig, ax = plt.subplots(figsize=(10, 10))
-    sns.barplot(data=energy_mix, y="carrier", x="Production", palette=energy_mix.color)
+    sns.barplot(
+        data=energy_mix, 
+        y="carrier", 
+        x="dispatch", 
+        palette=color_palette,
+        )
 
-    ax.set_title(create_title("Production [GWh]", **wildcards))
+    ax.set_title(create_title("Dispatch [GWh]", **wildcards))
     ax.set_ylabel("")
     # ax.set_xlabel("")
     fig.tight_layout()
@@ -681,22 +637,20 @@ def plot_costs_bar(
 
     # get data
 
-    operational_costs = get_operational_costs(n).sum().mul(1e-9)  # $ -> M$
-    capital_costs = get_capital_costs(n).mul(1e-9)  # $ -> M$
+    opex = n.statistics.opex().mul(1e-6)  # $ -> M$
+    capex = get_capital_costs(n).mul(1e-6)  # $ -> M$
 
     costs = pd.concat(
-        [operational_costs, capital_costs],
+        [opex, capex],
         axis=1,
-        keys=["OPEX", "CAPEX"],
-    ).reset_index()
-    costs = costs[costs.carrier.isin(carriers_2_plot)]
-    costs["carrier"] = costs.carrier.map(n.carriers.nice_name)
+        keys=["OPEX", "CAPEX"]).reset_index()
     costs = costs.groupby("carrier").sum().reset_index()  # groups batteries
 
     # plot data
-
     fig, ax = plt.subplots(figsize=(10, 10))
-    color_palette = n.carriers.reset_index().set_index("nice_name").to_dict()["color"]
+    color_palette = get_color_palette(n)
+
+
     sns.barplot(
         y="carrier",
         x="CAPEX",
@@ -830,7 +784,7 @@ def plot_demand_map(
 
     bus_values = get_demand_base(n).mul(1e-3)
     line_values = n.lines.s_nom
-    link_values = n.links.p_nom.replace(0)
+    link_values = n.links.p_nom.replace(to_replace={pd.NA: 0})
 
     # plot data
     title = create_title("Network Demand", **wildcards)
@@ -931,7 +885,7 @@ def plot_base_capacity_map(
         n=n,
         bus_values=bus_values,
         line_values=line_values,
-        link_values=n.links.p_nom.replace(0),
+        link_values=n.links.p_nom.replace(to_replace={pd.NA: 0}),
         regions=regions,
         line_scale=line_scale,
         bus_scale=bus_scale,
@@ -984,7 +938,7 @@ def plot_opt_capacity_map(
         n=n,
         bus_values=bus_values,
         line_values=line_values,
-        link_values=n.links.p_nom.replace(0),
+        link_values=n.links.p_nom.replace(to_replace={pd.NA: 0}),
         regions=regions,
         line_scale=line_scale,
         bus_scale=bus_scale,
@@ -1046,7 +1000,7 @@ def plot_new_capacity_map(
         n=n,
         bus_values=bus_values,
         line_values=line_values,
-        link_values=n.links.p_nom.replace(0),
+        link_values=n.links.p_nom.replace(to_replace={pd.NA: 0}),
         regions=regions,
         line_scale=line_scale,
         bus_scale=bus_scale,
@@ -1181,8 +1135,8 @@ def plot_capacity_additions_bar(
     # plot data (option 2)
     # using matplotlib for tech group colours
 
-    # color_palette = get_color_palette(n)
-    color_palette = n.carriers.reset_index().set_index("nice_name").to_dict()["color"]
+
+    color_palette = get_color_palette(n)
     color_mapper = [color_palette[carrier] for carrier in capacity.index]
     bar_height = 0.35
 
@@ -1315,7 +1269,12 @@ def plot_capacity_additions_bar(
         retirement_method,
         **snakemake.wildcards,
     )
-    plot_costs_bar(n, carriers, snakemake.output["costs_bar"], **snakemake.wildcards)
+    plot_costs_bar(
+        n, 
+        carriers, 
+        snakemake.output["costs_bar"], 
+        **snakemake.wildcards
+    )
     plot_production_bar(
         n,
         carriers,
@@ -1334,17 +1293,16 @@ def plot_capacity_additions_bar(
         snakemake.output["production_area_html"],
         **snakemake.wildcards,
     )
-    plot_hourly_emissions(n, snakemake.output["emissions_area"], **snakemake.wildcards)
+    plot_hourly_emissions(
+        n, 
+        snakemake.output["emissions_area"], 
+        **snakemake.wildcards
+    )
     plot_hourly_emissions_html(
         n,
         snakemake.output["emissions_area_html"],
         **snakemake.wildcards,
     )
-    plot_accumulated_emissions(
-        n,
-        snakemake.output["emissions_accumulated"],
-        **snakemake.wildcards,
-    )
     plot_accumulated_emissions_tech(
         n,
         snakemake.output["emissions_accumulated_tech"],
diff --git a/workflow/scripts/summary.py b/workflow/scripts/summary.py
index 545a4900..c71b99e3 100644
--- a/workflow/scripts/summary.py
+++ b/workflow/scripts/summary.py
@@ -254,71 +254,8 @@ def _economic_retirement(c: str) -> pd.DataFrame:
 # COSTS
 ###
 
-
-def get_operational_costs(n: pypsa.Network) -> pd.DataFrame:
-
-    def _get_energy_one_port(c: pypsa.components.Component) -> pd.DataFrame:
-        return c.pnl.p.abs()
-
-    def _get_energy_multi_port(c: pypsa.components.Component) -> pd.DataFrame:
-        return c.pnl.p0.abs()
-
-    totals = []
-    for c in n.iterate_components(n.one_port_components | n.branch_components):
-        if c.name in ("Generator", "StorageUnit", "Store"):
-            production = _get_energy_one_port(c)
-        elif c.name in ("Link"):
-            production = _get_energy_multi_port(c)
-        else:
-            continue
-
-        marginal_cost = c.pnl.marginal_cost
-        marginal_cost_static = {}
-        for item in [x for x in c.df.index if x not in marginal_cost.columns]:
-            marginal_cost_static[item] = [c.df.at[item, "marginal_cost"]] * len(
-                marginal_cost,
-            )
-        marginal_cost = pd.concat(
-            [
-                marginal_cost,
-                pd.DataFrame(marginal_cost_static, index=marginal_cost.index),
-            ],
-            axis=1,
-        )
-
-        opex = (
-            (production * marginal_cost).fillna(0).groupby(c.df.carrier, axis=1).sum()
-        )
-
-        totals.append(opex)
-
-    return pd.concat(totals, axis=1)
-
-
 def get_capital_costs(n: pypsa.Network) -> pd.DataFrame:
-
-    def _get_new_capacity_MW(c: pypsa.components.Component) -> pd.DataFrame:
-        return (c.df.p_nom_opt - c.df.p_nom).map(lambda x: x if x > 0 else 0)
-
-    def _get_new_capacity_MWh(c: pypsa.components.Component) -> pd.DataFrame:
-        return (c.df.e_nom_opt - c.df.e_nom).map(lambda x: x if x > 0 else 0)
-
-    totals = []
-    for c in n.iterate_components(n.one_port_components | n.branch_components):
-        if c.name in ("Generator", "StorageUnit", "Link"):
-            new_capacity = _get_new_capacity_MW(c)
-        elif c.name in ("Store"):
-            new_capacity = _get_new_capacity_MWh(c)
-        else:
-            continue
-
-        capital_costs = c.df.capital_cost
-
-        capex = (new_capacity * capital_costs).fillna(0).groupby(c.df.carrier).sum()
-
-        totals.append(capex)
-
-    return pd.concat(totals)
+    return n.statistics.capex() - n.statistics.installed_capex()
 
 
 ###
@@ -342,26 +279,28 @@ def get_node_emissions_timeseries(n: pypsa.Network) -> pd.DataFrame:
                 eff_static[gen] = [c.df.at[gen, "efficiency"]] * len(eff)
             eff = pd.concat([eff, pd.DataFrame(eff_static, index=eff.index)], axis=1)
 
-            co2_factor = c.df.carrier.map(n.carriers.co2_emissions).fillna(0)
+            co2_factor = c.df.carrier.map(n.carriers.co2_emissions).fillna(0).infer_objects(copy=False)
 
             totals.append(
                 (
                     c.pnl.p.mul(1 / eff)
                     .mul(co2_factor)
-                    .groupby(n.generators.bus, axis=1)
+                    .T.groupby(n.generators.bus)
                     .sum()
+                    .T
                 ),
             )
         elif c.name == "Link":  # efficiency taken into account by using p0
 
-            co2_factor = c.df.carrier.map(n.carriers.co2_emissions).fillna(0)
+            co2_factor = c.df.carrier.map(n.carriers.co2_emissions).fillna(0).infer_objects(copy=False)
 
             totals.append(
                 (
                     c.pnl.p0.mul(co2_factor)
-                    .groupby(n.links.bus0, axis=1)
+                    .T.groupby(n.links.bus0)
                     .sum()
                     .rename_axis(index={"bus0": "bus"})
+                    .T
                 ),
             )
     return pd.concat(totals, axis=1)
@@ -389,16 +328,20 @@ def get_tech_emissions_timeseries(n: pypsa.Network) -> pd.DataFrame:
                 (
                     c.pnl.p.mul(1 / eff)
                     .mul(co2_factor)
-                    .groupby(n.generators.carrier, axis=1)
+                    .T.groupby(n.generators.carrier)
                     .sum()
+                    .T
                 ),
             )
         elif c.name == "Link":  # efficiency taken into account by using p0
 
-            co2_factor = c.df.carrier.map(n.carriers.co2_emissions).fillna(0)
+            co2_factor = c.df.carrier.map(n.carriers.co2_emissions).fillna(0).infer_objects(copy=False)
 
             totals.append(
-                (c.pnl.p0.mul(co2_factor).groupby(n.links.carrier, axis=1).sum()),
+                (c.pnl.p0.mul(co2_factor)
+                .T.groupby(n.links.carrier)
+                .sum()
+                .T),
             )
     return pd.concat(totals, axis=1)
 

From bf4891a261234af21c07fb428174e02b3bd47102 Mon Sep 17 00:00:00 2001
From: Kamran <ktehranchi@stanford.edu>
Date: Tue, 27 Feb 2024 18:15:27 -0800
Subject: [PATCH 03/10] update configs to be consistent

---
 workflow/config/config.default.yaml           | 19 ++++++---------
 workflow/config/tests/config.test.yaml        | 23 ++++++++-----------
 workflow/config/tests/config.test_simple.yaml |  4 ++--
 workflow/config/tests/config.validation.yaml  | 17 +++++---------
 workflow/envs/environment.yaml                |  2 +-
 5 files changed, 25 insertions(+), 40 deletions(-)

diff --git a/workflow/config/config.default.yaml b/workflow/config/config.default.yaml
index 5ffc5600..ed6f2327 100644
--- a/workflow/config/config.default.yaml
+++ b/workflow/config/config.default.yaml
@@ -17,7 +17,7 @@ scenario:
   scope: "total" # "urban", "rural", or "total"
   sector: "" # G
   planning_horizons:
-  - 2030   #  # Uncomment to use NREL EFS Demand Forecasts (2030, 2040, 2050)
+  - 2030   #(2030, 2040, 2050)
 
 foresight:  # Only Single Stage Currently
 
@@ -90,7 +90,7 @@ electricity:
     Store: [] #[H2]
     Link: [] #[H2 pipeline]
 
-  demand: #EFS used for given planning_horizons year
+  demand: #EFS used for given planning_horizons year (only ref/mod implemented)
     EFS_case: reference # reference, medium, high
     EFS_speed: moderate # slow, moderate, rapid
 
@@ -259,19 +259,14 @@ clustering:
   cluster_network:
     algorithm: kmeans
     feature: solar+onwind-time
-    aggregation_zones: 'balancing_area' # [balancing_area, state]
+    aggregation_zones: 'state' # [balancing_area, state]
+    exclude_carriers: []
+    consider_efficiency_classes: false
   aggregation_strategies:
     generators:
-      p_nom_max: sum # use "min" for more conservative assumptions
-      p_nom_min: sum
-      p_min_pu: mean
-      marginal_cost: mean
       committable: any
-      ramp_limit_up: mean
-      ramp_limit_down: mean
-      efficiency: mean
-    buses:
-      state: max # temp fix. When fixing state aggregation- change add electricity such that region info not is use is removed.
+      ramp_limit_up: max
+      ramp_limit_down: max
 
 focus_weights:
   # California: 0.5
diff --git a/workflow/config/tests/config.test.yaml b/workflow/config/tests/config.test.yaml
index 62b87545..05e16cfb 100644
--- a/workflow/config/tests/config.test.yaml
+++ b/workflow/config/tests/config.test.yaml
@@ -13,12 +13,12 @@ run:
 scenario:
   interconnect: western #"usa|texas|western|eastern"
   clusters: [30, 100]
-  opts: [Co2L1.0-4H, Co2L0.75-4H, Co2L0.5-4H, Co2L0.25-4H, Co2L0.0-4H]
+  opts: [Co2L0.30-4H-Ep-EQ0.05c-SAFE,]
   ll: [vopt, v1.15]
   scope: "total" # "urban", "rural", or "total"
   sector: "" # G
   planning_horizons:
-  - 2030   #  # Uncomment to use NREL EFS Demand Forecasts (2030, 2040, 2050)
+  - 2030   #(2030, 2040, 2050)
 
 foresight:  # Only Single Stage Currently
 
@@ -78,7 +78,7 @@ electricity:
   retirement: economic # "economic" or "technical"
 
   operational_reserve:
-    activate: true
+    activate: false
     epsilon_load: 0.02
     epsilon_vres: 0.02
     contingency: 4000
@@ -242,7 +242,7 @@ costs:  # based on the potentials, assuming  (0.1 kW/m2 and 10 m2/person)
     battery: 0.
     battery inverter: 0.
   emission_prices: # in currency per tonne emission, only used with the option Ep
-    co2: 0.
+    co2: 20.0
 
 # docs :
 sector:
@@ -263,19 +263,14 @@ clustering:
   cluster_network:
     algorithm: kmeans
     feature: solar+onwind-time
-    aggregation_zones: 'balancing_area' # [balancing_area, state]
+    aggregation_zones: 'state' # [balancing_area, state]
+    exclude_carriers: []
+    consider_efficiency_classes: false
   aggregation_strategies:
     generators:
-      p_nom_max: sum # use "min" for more conservative assumptions
-      p_nom_min: sum
-      p_min_pu: mean
-      marginal_cost: mean
       committable: any
-      ramp_limit_up: mean
-      ramp_limit_down: mean
-      efficiency: mean
-    buses:
-      state: max # temp fix. When fixing state aggregation- change add electricity such that region info not is use is removed.
+      ramp_limit_up: max
+      ramp_limit_down: max
 
 focus_weights:
   # California: 0.5
diff --git a/workflow/config/tests/config.test_simple.yaml b/workflow/config/tests/config.test_simple.yaml
index db252965..0f0b582d 100644
--- a/workflow/config/tests/config.test_simple.yaml
+++ b/workflow/config/tests/config.test_simple.yaml
@@ -17,7 +17,7 @@ scenario:
   scope: "total" # "urban", "rural", or "total"
   sector: "" # G
   planning_horizons:
-  - 2030   #  # Uncomment to use NREL EFS Demand Forecasts (2030, 2040, 2050)
+  - 2030   #(2030, 2040, 2050)
 
 foresight:  # Only Single Stage Currently
 
@@ -259,7 +259,7 @@ clustering:
   cluster_network:
     algorithm: kmeans
     feature: solar+onwind-time
-    aggregation_zones: 'balancing_area' # [balancing_area, state]
+    aggregation_zones: 'state' # [balancing_area, state]
     exclude_carriers: []
     consider_efficiency_classes: false
   aggregation_strategies:
diff --git a/workflow/config/tests/config.validation.yaml b/workflow/config/tests/config.validation.yaml
index e3a55070..23883785 100644
--- a/workflow/config/tests/config.validation.yaml
+++ b/workflow/config/tests/config.validation.yaml
@@ -17,7 +17,7 @@ scenario:
   scope: "total" # "urban", "rural", or "total"
   sector: "" # G
   planning_horizons:
-  # - 2030   #  # Uncomment to use NREL EFS Demand Forecasts (2030, 2040, 2050)
+  # - 2030   #(2030, 2040, 2050)
 
 foresight:  # Only Single Stage Currently
 
@@ -260,19 +260,14 @@ clustering:
   cluster_network:
     algorithm: kmeans
     feature: solar+onwind-time
-    aggregation_zones: 'balancing_area' # balancing_area, country, or state. # Currently issue in State aggregation
+    aggregation_zones: 'balancing_area' # [balancing_area, state]
+    exclude_carriers: []
+    consider_efficiency_classes: false
   aggregation_strategies:
     generators:
-      p_nom_max: sum # use "min" for more conservative assumptions
-      p_nom_min: sum
-      p_min_pu: mean
-      marginal_cost: mean
       committable: any
-      ramp_limit_up: mean
-      ramp_limit_down: mean
-      efficiency: mean
-    buses:
-      state: max # temp fix. When fixing state aggregation- change add electricity such that region info not is use is removed.
+      ramp_limit_up: max
+      ramp_limit_down: max
 
 focus_weights:
   # California: 0.5
diff --git a/workflow/envs/environment.yaml b/workflow/envs/environment.yaml
index c40f0617..06fcfb73 100644
--- a/workflow/envs/environment.yaml
+++ b/workflow/envs/environment.yaml
@@ -1,4 +1,4 @@
-name: pypsa-usa-new
+name: pypsa-usa
 channels:
 - conda-forge
 - bioconda

From 7237cc5a60aba9d0800eee8fd752f064962e8b96 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 28 Feb 2024 02:25:55 +0000
Subject: [PATCH 04/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 workflow/config/tests/config.test.yaml        |  2 +-
 workflow/config/tests/config.test_simple.yaml |  2 +-
 workflow/envs/environment.yaml                |  2 +-
 workflow/scripts/add_electricity.py           | 10 ++-
 workflow/scripts/build_base_network.py        |  1 -
 workflow/scripts/build_demand.py              |  6 +-
 workflow/scripts/cluster_network_eur.py       | 76 +++++++++++++------
 workflow/scripts/plot_figures.py              | 35 +++++----
 workflow/scripts/simplify_network.py          | 20 ++---
 workflow/scripts/summary.py                   | 24 ++++--
 workflow/tests/test_yaml_structure.py         | 22 ++++--
 11 files changed, 128 insertions(+), 72 deletions(-)

diff --git a/workflow/config/tests/config.test.yaml b/workflow/config/tests/config.test.yaml
index 05e16cfb..5fdad016 100644
--- a/workflow/config/tests/config.test.yaml
+++ b/workflow/config/tests/config.test.yaml
@@ -13,7 +13,7 @@ run:
 scenario:
   interconnect: western #"usa|texas|western|eastern"
   clusters: [30, 100]
-  opts: [Co2L0.30-4H-Ep-EQ0.05c-SAFE,]
+  opts: [Co2L0.30-4H-Ep-EQ0.05c-SAFE]
   ll: [vopt, v1.15]
   scope: "total" # "urban", "rural", or "total"
   sector: "" # G
diff --git a/workflow/config/tests/config.test_simple.yaml b/workflow/config/tests/config.test_simple.yaml
index 0f0b582d..e6b0d2c3 100644
--- a/workflow/config/tests/config.test_simple.yaml
+++ b/workflow/config/tests/config.test_simple.yaml
@@ -12,7 +12,7 @@ run:
 scenario:
   interconnect: [western] #"usa|texas|western|eastern"
   clusters: [40]
-  opts: [Co2L0.1,]
+  opts: [Co2L0.1]
   ll: [v1.0]
   scope: "total" # "urban", "rural", or "total"
   sector: "" # G
diff --git a/workflow/envs/environment.yaml b/workflow/envs/environment.yaml
index 06fcfb73..a25a80e0 100644
--- a/workflow/envs/environment.yaml
+++ b/workflow/envs/environment.yaml
@@ -62,4 +62,4 @@ dependencies:
   - vresutils==0.3.1
   - tsam>=1.1.0
   - gurobipy==10.0.3
-  - highspy
\ No newline at end of file
+  - highspy
diff --git a/workflow/scripts/add_electricity.py b/workflow/scripts/add_electricity.py
index 19d2133a..626a9c9d 100755
--- a/workflow/scripts/add_electricity.py
+++ b/workflow/scripts/add_electricity.py
@@ -735,7 +735,7 @@ def match_plant_to_bus(n, plants):
 
 
 def attach_renewable_capacities_to_atlite(
-    n: pypsa.Network, 
+    n: pypsa.Network,
     plants_df: pd.DataFrame,
     renewable_carriers: list,
 ):
@@ -935,14 +935,14 @@ def attach_wind_and_solar(
             p_nom_max_bus = (
                 ds["p_nom_max"]
                 .to_dataframe()
-                .merge(bus2sub[['bus_id','sub_id']], left_on="bus", right_on="sub_id")
+                .merge(bus2sub[["bus_id", "sub_id"]], left_on="bus", right_on="sub_id")
                 .set_index("bus_id")
                 .p_nom_max
             )
             weight_bus = (
                 ds["weight"]
                 .to_dataframe()
-                .merge(bus2sub[['bus_id','sub_id']], left_on="bus", right_on="sub_id")
+                .merge(bus2sub[["bus_id", "sub_id"]], left_on="bus", right_on="sub_id")
                 .set_index("bus_id")
                 .weight
             )
@@ -950,7 +950,9 @@ def attach_wind_and_solar(
                 ds["profile"]
                 .transpose("time", "bus")
                 .to_pandas()
-                .T.merge(bus2sub[['bus_id','sub_id']], left_on="bus", right_on="sub_id")
+                .T.merge(
+                    bus2sub[["bus_id", "sub_id"]], left_on="bus", right_on="sub_id"
+                )
                 .set_index("bus_id")
                 .drop(columns="sub_id")
                 .T
diff --git a/workflow/scripts/build_base_network.py b/workflow/scripts/build_base_network.py
index 00e07409..14da2b5e 100644
--- a/workflow/scripts/build_base_network.py
+++ b/workflow/scripts/build_base_network.py
@@ -726,7 +726,6 @@ def main(snakemake):
     )
     lines_gis.to_csv(snakemake.output.lines_gis)
 
-    
     # export network
     n.export_to_netcdf(snakemake.output.network)
 
diff --git a/workflow/scripts/build_demand.py b/workflow/scripts/build_demand.py
index 09fb9275..b3f5a0c1 100644
--- a/workflow/scripts/build_demand.py
+++ b/workflow/scripts/build_demand.py
@@ -180,12 +180,12 @@ def prepare_efs_demand(
             .apply(
                 lambda group: group.loc[
                     group.drop(columns="UTC_Time").first_valid_index()
-                ]
+                ],
             )
             .drop(columns="UTC_Time")
         )
 
-    #take the intersection of the demand and the snapshots by hour of year
+    # take the intersection of the demand and the snapshots by hour of year
     hoy = (n.snapshots.dayofyear - 1) * 24 + n.snapshots.hour
     demand_new = demand_new.loc[hoy]
     demand_new.index = n.snapshots
@@ -263,7 +263,7 @@ def main(snakemake):
         )
     else:
         raise ValueError(
-            "Invalid demand_type. Supported values are 'ads', and 'pypsa-usa'."
+            "Invalid demand_type. Supported values are 'ads', and 'pypsa-usa'.",
         )
 
     demand_per_bus.to_csv(snakemake.output.demand, index=True)
diff --git a/workflow/scripts/cluster_network_eur.py b/workflow/scripts/cluster_network_eur.py
index 45c44130..f30fa0b8 100644
--- a/workflow/scripts/cluster_network_eur.py
+++ b/workflow/scripts/cluster_network_eur.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # SPDX-FileCopyrightText: : 2017-2023 The PyPSA-Eur Authors
 #
 # SPDX-License-Identifier: MIT
@@ -159,13 +158,14 @@ def normed(x):
 def weighting_for_country(n, x):
     conv_carriers = {"OCGT", "CCGT", "PHS", "hydro"}
     gen = n.generators.loc[n.generators.carrier.isin(conv_carriers)].groupby(
-        "bus"
+        "bus",
     ).p_nom.sum().reindex(n.buses.index, fill_value=0.0) + n.storage_units.loc[
         n.storage_units.carrier.isin(conv_carriers)
     ].groupby(
-        "bus"
+        "bus",
     ).p_nom.sum().reindex(
-        n.buses.index, fill_value=0.0
+        n.buses.index,
+        fill_value=0.0,
     )
     load = n.loads_t.p_set.mean().groupby(n.loads.bus).sum()
 
@@ -188,7 +188,8 @@ def get_feature_for_hac(n, buses_i=None, feature=None):
     if "offwind" in carriers:
         carriers.remove("offwind")
         carriers = np.append(
-            carriers, n.generators.carrier.filter(like="offwind").unique()
+            carriers,
+            n.generators.carrier.filter(like="offwind").unique(),
         )
 
     if feature.split("-")[1] == "cap":
@@ -207,7 +208,7 @@ def get_feature_for_hac(n, buses_i=None, feature=None):
         for carrier in carriers:
             gen_i = n.generators.query("carrier == @carrier").index
             attach = n.generators_t.p_max_pu[gen_i].rename(
-                columns=n.generators.loc[gen_i].bus
+                columns=n.generators.loc[gen_i].bus,
             )
             feature_data = pd.concat([feature_data, attach], axis=0)[buses_i]
 
@@ -257,7 +258,9 @@ def distribute_clusters(n, n_clusters, focus_weights=None, solver_name="cbc"):
         logger.warning("Using custom focus weights for determining number of clusters.")
 
     assert np.isclose(
-        L.sum(), 1.0, rtol=1e-3
+        L.sum(),
+        1.0,
+        rtol=1e-3,
     ), f"Country weights L must sum up to 1.0 when distributing clusters. Is {L.sum()}."
 
     m = po.ConcreteModel()
@@ -275,7 +278,7 @@ def n_bounds(model, *n_id):
     opt = po.SolverFactory(solver_name)
     if not opt.has_capability("quadratic_objective"):
         logger.warning(
-            f"The configured solver `{solver_name}` does not support quadratic objectives. Falling back to `ipopt`."
+            f"The configured solver `{solver_name}` does not support quadratic objectives. Falling back to `ipopt`.",
         )
         opt = po.SolverFactory("ipopt")
 
@@ -310,7 +313,8 @@ def fix_country_assignment_for_hac(n):
             m = n[n.buses.country == country].copy()
 
             _, labels = csgraph.connected_components(
-                m.adjacency_matrix(), directed=False
+                m.adjacency_matrix(),
+                directed=False,
             )
 
             component = pd.Series(labels, index=m.buses.index)
@@ -322,16 +326,16 @@ def fix_country_assignment_for_hac(n):
                 ].index[0]
 
                 neighbor_bus = n.lines.query(
-                    "bus0 == @disconnected_bus or bus1 == @disconnected_bus"
+                    "bus0 == @disconnected_bus or bus1 == @disconnected_bus",
                 ).iloc[0][["bus0", "bus1"]]
                 new_country = list(
-                    set(n.buses.loc[neighbor_bus].country) - set([country])
+                    set(n.buses.loc[neighbor_bus].country) - {country},
                 )[0]
 
                 logger.info(
                     f"overwriting country `{country}` of bus `{disconnected_bus}` "
                     f"to new country `{new_country}`, because it is disconnected "
-                    "from its initial inter-country transmission grid."
+                    "from its initial inter-country transmission grid.",
                 )
                 n.buses.at[disconnected_bus, "country"] = new_country
         return n
@@ -343,13 +347,16 @@ def fix_country_assignment_for_hac(n):
     if (algorithm != "hac") and (feature is not None):
         logger.warning(
             f"Keyword argument feature is only valid for algorithm `hac`. "
-            f"Given feature `{feature}` will be ignored."
+            f"Given feature `{feature}` will be ignored.",
         )
 
     n.determine_network_topology()
 
     n_clusters = distribute_clusters(
-        n, n_clusters, focus_weights=focus_weights, solver_name=solver_name
+        n,
+        n_clusters,
+        focus_weights=focus_weights,
+        solver_name=solver_name,
     )
 
     def busmap_for_country(x):
@@ -361,19 +368,28 @@ def busmap_for_country(x):
 
         if algorithm == "kmeans":
             return prefix + busmap_by_kmeans(
-                n, weight, n_clusters[x.name], buses_i=x.index, **algorithm_kwds
+                n,
+                weight,
+                n_clusters[x.name],
+                buses_i=x.index,
+                **algorithm_kwds,
             )
         elif algorithm == "hac":
             return prefix + busmap_by_hac(
-                n, n_clusters[x.name], buses_i=x.index, feature=feature.loc[x.index]
+                n,
+                n_clusters[x.name],
+                buses_i=x.index,
+                feature=feature.loc[x.index],
             )
         elif algorithm == "modularity":
             return prefix + busmap_by_greedy_modularity(
-                n, n_clusters[x.name], buses_i=x.index
+                n,
+                n_clusters[x.name],
+                buses_i=x.index,
             )
         else:
             raise ValueError(
-                f"`algorithm` must be one of 'kmeans' or 'hac'. Is {algorithm}."
+                f"`algorithm` must be one of 'kmeans' or 'hac'. Is {algorithm}.",
             )
 
     return (
@@ -399,7 +415,12 @@ def clustering_for_n_clusters(
 ):
     if not isinstance(custom_busmap, pd.Series):
         busmap = busmap_for_n_clusters(
-            n, n_clusters, solver_name, focus_weights, algorithm, feature
+            n,
+            n_clusters,
+            solver_name,
+            focus_weights,
+            algorithm,
+            feature,
         )
     else:
         busmap = custom_busmap
@@ -496,7 +517,9 @@ def plot_busmap_for_n_clusters(n, n_clusters, fn=None):
             else:
                 labels = ["low", "medium", "high"]
                 suffix = pd.cut(
-                    gens.efficiency, bins=[0, low, high, 1], labels=labels
+                    gens.efficiency,
+                    bins=[0, low, high, 1],
+                    labels=labels,
                 ).astype(str)
                 carriers += [f"{c} {label} efficiency" for label in labels]
                 n.generators.carrier.update(gens.carrier + " " + suffix + " efficiency")
@@ -507,7 +530,11 @@ def plot_busmap_for_n_clusters(n, n_clusters, fn=None):
         busmap = n.buses.index.to_series()
         linemap = n.lines.index.to_series()
         clustering = pypsa.clustering.spatial.Clustering(
-            n, busmap, linemap, linemap, pd.Series(dtype="O")
+            n,
+            busmap,
+            linemap,
+            linemap,
+            pd.Series(dtype="O"),
         )
     else:
         Nyears = n.snapshot_weightings.objective.sum() / 8760
@@ -522,7 +549,9 @@ def plot_busmap_for_n_clusters(n, n_clusters, fn=None):
         custom_busmap = params.custom_busmap
         if custom_busmap:
             custom_busmap = pd.read_csv(
-                snakemake.input.custom_busmap, index_col=0, squeeze=True
+                snakemake.input.custom_busmap,
+                index_col=0,
+                squeeze=True,
             )
             custom_busmap.index = custom_busmap.index.astype(str)
             logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}")
@@ -549,7 +578,8 @@ def plot_busmap_for_n_clusters(n, n_clusters, fn=None):
         nc.generators["carrier"] = nc.generators.carrier.replace(labels, "", regex=True)
 
     clustering.network.meta = dict(
-        snakemake.config, **dict(wildcards=dict(snakemake.wildcards))
+        snakemake.config,
+        **dict(wildcards=dict(snakemake.wildcards)),
     )
     clustering.network.export_to_netcdf(snakemake.output.network)
     for attr in (
diff --git a/workflow/scripts/plot_figures.py b/workflow/scripts/plot_figures.py
index c1b64d93..e8e8c5da 100644
--- a/workflow/scripts/plot_figures.py
+++ b/workflow/scripts/plot_figures.py
@@ -605,18 +605,22 @@ def plot_production_bar(
 
     # get data
     energy_mix = n.statistics.dispatch().mul(1e-3)  # MW -> GW
-    energy_mix.name = "dispatch" 
-    energy_mix = energy_mix[energy_mix.index.get_level_values('component').isin(['Generator', 'StorageUnit'])]
+    energy_mix.name = "dispatch"
+    energy_mix = energy_mix[
+        energy_mix.index.get_level_values("component").isin(
+            ["Generator", "StorageUnit"]
+        )
+    ]
     energy_mix = energy_mix.groupby("carrier").sum().reset_index()
     color_palette = get_color_palette(n)
 
     fig, ax = plt.subplots(figsize=(10, 10))
     sns.barplot(
-        data=energy_mix, 
-        y="carrier", 
-        x="dispatch", 
+        data=energy_mix,
+        y="carrier",
+        x="dispatch",
         palette=color_palette,
-        )
+    )
 
     ax.set_title(create_title("Dispatch [GWh]", **wildcards))
     ax.set_ylabel("")
@@ -643,14 +647,14 @@ def plot_costs_bar(
     costs = pd.concat(
         [opex, capex],
         axis=1,
-        keys=["OPEX", "CAPEX"]).reset_index()
+        keys=["OPEX", "CAPEX"],
+    ).reset_index()
     costs = costs.groupby("carrier").sum().reset_index()  # groups batteries
 
     # plot data
     fig, ax = plt.subplots(figsize=(10, 10))
     color_palette = get_color_palette(n)
 
-
     sns.barplot(
         y="carrier",
         x="CAPEX",
@@ -1135,7 +1139,6 @@ def plot_capacity_additions_bar(
     # plot data (option 2)
     # using matplotlib for tech group colours
 
-
     color_palette = get_color_palette(n)
     color_mapper = [color_palette[carrier] for carrier in capacity.index]
     bar_height = 0.35
@@ -1270,10 +1273,10 @@ def plot_capacity_additions_bar(
         **snakemake.wildcards,
     )
     plot_costs_bar(
-        n, 
-        carriers, 
-        snakemake.output["costs_bar"], 
-        **snakemake.wildcards
+        n,
+        carriers,
+        snakemake.output["costs_bar"],
+        **snakemake.wildcards,
     )
     plot_production_bar(
         n,
@@ -1294,9 +1297,9 @@ def plot_capacity_additions_bar(
         **snakemake.wildcards,
     )
     plot_hourly_emissions(
-        n, 
-        snakemake.output["emissions_area"], 
-        **snakemake.wildcards
+        n,
+        snakemake.output["emissions_area"],
+        **snakemake.wildcards,
     )
     plot_hourly_emissions_html(
         n,
diff --git a/workflow/scripts/simplify_network.py b/workflow/scripts/simplify_network.py
index 39e85e22..553b7e53 100644
--- a/workflow/scripts/simplify_network.py
+++ b/workflow/scripts/simplify_network.py
@@ -86,16 +86,16 @@ def aggregate_to_substations(
         aggregate_one_ports=["Load", "StorageUnit"],
         line_length_factor=1.0,
         bus_strategies={
-            "type": 'max',
-            "Pd": 'sum',
+            "type": "max",
+            "Pd": "sum",
         },
         generator_strategies={
-            "marginal_cost": 'mean',
-            "p_nom_min": 'sum',
-            "p_min_pu": 'mean',
-            "p_max_pu": 'mean',
-            "ramp_limit_up": 'max',
-            "ramp_limit_down": 'max',
+            "marginal_cost": "mean",
+            "p_nom_min": "sum",
+            "p_min_pu": "mean",
+            "p_max_pu": "mean",
+            "ramp_limit_up": "max",
+            "ramp_limit_down": "max",
         },
     )
 
@@ -134,7 +134,9 @@ def aggregate_to_substations(
     )
     network_s.lines["type"] = np.nan
 
-    network_s.buses.drop(columns = ['balancing_area', 'state', 'substation_off', 'sub_id'], inplace=True)
+    network_s.buses.drop(
+        columns=["balancing_area", "state", "substation_off", "sub_id"], inplace=True
+    )
     return network_s
 
 
diff --git a/workflow/scripts/summary.py b/workflow/scripts/summary.py
index c71b99e3..67bd5895 100644
--- a/workflow/scripts/summary.py
+++ b/workflow/scripts/summary.py
@@ -254,6 +254,7 @@ def _economic_retirement(c: str) -> pd.DataFrame:
 # COSTS
 ###
 
+
 def get_capital_costs(n: pypsa.Network) -> pd.DataFrame:
     return n.statistics.capex() - n.statistics.installed_capex()
 
@@ -279,7 +280,11 @@ def get_node_emissions_timeseries(n: pypsa.Network) -> pd.DataFrame:
                 eff_static[gen] = [c.df.at[gen, "efficiency"]] * len(eff)
             eff = pd.concat([eff, pd.DataFrame(eff_static, index=eff.index)], axis=1)
 
-            co2_factor = c.df.carrier.map(n.carriers.co2_emissions).fillna(0).infer_objects(copy=False)
+            co2_factor = (
+                c.df.carrier.map(n.carriers.co2_emissions)
+                .fillna(0)
+                .infer_objects(copy=False)
+            )
 
             totals.append(
                 (
@@ -292,7 +297,11 @@ def get_node_emissions_timeseries(n: pypsa.Network) -> pd.DataFrame:
             )
         elif c.name == "Link":  # efficiency taken into account by using p0
 
-            co2_factor = c.df.carrier.map(n.carriers.co2_emissions).fillna(0).infer_objects(copy=False)
+            co2_factor = (
+                c.df.carrier.map(n.carriers.co2_emissions)
+                .fillna(0)
+                .infer_objects(copy=False)
+            )
 
             totals.append(
                 (
@@ -335,13 +344,14 @@ def get_tech_emissions_timeseries(n: pypsa.Network) -> pd.DataFrame:
             )
         elif c.name == "Link":  # efficiency taken into account by using p0
 
-            co2_factor = c.df.carrier.map(n.carriers.co2_emissions).fillna(0).infer_objects(copy=False)
+            co2_factor = (
+                c.df.carrier.map(n.carriers.co2_emissions)
+                .fillna(0)
+                .infer_objects(copy=False)
+            )
 
             totals.append(
-                (c.pnl.p0.mul(co2_factor)
-                .T.groupby(n.links.carrier)
-                .sum()
-                .T),
+                (c.pnl.p0.mul(co2_factor).T.groupby(n.links.carrier).sum().T),
             )
     return pd.concat(totals, axis=1)
 
diff --git a/workflow/tests/test_yaml_structure.py b/workflow/tests/test_yaml_structure.py
index 6c1cae89..b2c9ebfe 100644
--- a/workflow/tests/test_yaml_structure.py
+++ b/workflow/tests/test_yaml_structure.py
@@ -1,12 +1,16 @@
 import yaml
 
+
 def load_yaml_file(filepath):
-    with open(filepath, 'r') as file:
+    with open(filepath) as file:
         return yaml.safe_load(file)
 
-def compare_structures(data1, data2, path=''):
+
+def compare_structures(data1, data2, path=""):
     if type(data1) != type(data2):
-        print(f"Type mismatch at {path}: {type(data1).__name__} vs {type(data2).__name__}")
+        print(
+            f"Type mismatch at {path}: {type(data1).__name__} vs {type(data2).__name__}"
+        )
         return False
 
     if isinstance(data1, dict):
@@ -14,7 +18,9 @@ def compare_structures(data1, data2, path=''):
             if key not in data2:
                 print(f"Missing key in second structure at {path}: {key}")
                 continue
-            compare_structures(data1[key], data2[key], path=f"{path}.{key}" if path else key)
+            compare_structures(
+                data1[key], data2[key], path=f"{path}.{key}" if path else key
+            )
         for key in data2:
             if key not in data1:
                 print(f"Missing key in first structure at {path}: {key}")
@@ -30,15 +36,19 @@ def compare_structures(data1, data2, path=''):
         # This part ignores values if they are not container types
         return True
 
+
 def test_yaml_structure(filepath1, filepath2):
     data1 = load_yaml_file(filepath1)
     data2 = load_yaml_file(filepath2)
-    
+
     print("Comparing structure...")
     if compare_structures(data1, data2):
         print("The structures match.")
     else:
         print("The structures do not match.")
 
+
 # Example usage
-test_yaml_structure('../config/tests/config.test_simple.yaml', '../config/config.default.yaml')
+test_yaml_structure(
+    "../config/tests/config.test_simple.yaml", "../config/config.default.yaml"
+)

From ed48378ee68a34ab0dd445a1ea41c8e0f3ae64fc Mon Sep 17 00:00:00 2001
From: Kamran <ktehranchi@stanford.edu>
Date: Tue, 27 Feb 2024 18:55:51 -0800
Subject: [PATCH 05/10] update configs. add sector rule resources. removes use
 of local clustering script.

---
 workflow/config/config.cluster.yaml           |  4 ++--
 workflow/config/tests/config.test.yaml        |  4 ++--
 workflow/config/tests/config.test_simple.yaml |  4 ++--
 workflow/rules/build_electricity.smk          | 12 ++++--------
 workflow/rules/build_sector.smk               |  5 +++++
 workflow/run_slurm.sh                         |  2 +-
 workflow/scripts/cluster_network_eur.py       |  1 +
 7 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/workflow/config/config.cluster.yaml b/workflow/config/config.cluster.yaml
index c8681777..fb3f2424 100644
--- a/workflow/config/config.cluster.yaml
+++ b/workflow/config/config.cluster.yaml
@@ -5,8 +5,8 @@ __default__:
   walltime: 00:30:00   # time limit for each job
   cpus_per_task: 1   # number of cores per job
   chdir: $GROUP_HOME/kamran/pypsa-usa/workflow
-  output: logs/slurm/{rule}-%j.out
-  error: logs/slurm/err_{rule}-%j.err
+  output: logs/slurm/{rule}/log-%j.out
+  error: logs/slurm/{rule}/errlog-%j.err
 
 build_renewable_profiles:
   walltime: 02:00:00
diff --git a/workflow/config/tests/config.test.yaml b/workflow/config/tests/config.test.yaml
index 05e16cfb..81ee0b32 100644
--- a/workflow/config/tests/config.test.yaml
+++ b/workflow/config/tests/config.test.yaml
@@ -12,9 +12,9 @@ run:
 # docs :
 scenario:
   interconnect: western #"usa|texas|western|eastern"
-  clusters: [30, 100]
+  clusters: [40]
   opts: [Co2L0.30-4H-Ep-EQ0.05c-SAFE,]
-  ll: [vopt, v1.15]
+  ll: [v1.05]
   scope: "total" # "urban", "rural", or "total"
   sector: "" # G
   planning_horizons:
diff --git a/workflow/config/tests/config.test_simple.yaml b/workflow/config/tests/config.test_simple.yaml
index 0f0b582d..d3360575 100644
--- a/workflow/config/tests/config.test_simple.yaml
+++ b/workflow/config/tests/config.test_simple.yaml
@@ -12,7 +12,7 @@ run:
 scenario:
   interconnect: [western] #"usa|texas|western|eastern"
   clusters: [40]
-  opts: [Co2L0.1,]
+  opts: [Co2L0.30-4H-Ep-EQ0.05c-SAFE]
   ll: [v1.0]
   scope: "total" # "urban", "rural", or "total"
   sector: "" # G
@@ -29,7 +29,7 @@ countries: [US]
 
 snapshots:
   start: "2019-01-01"
-  end: "2019-01-16"
+  end: "2019-02-01"
   inclusive: 'left'
 
 # docs :
diff --git a/workflow/rules/build_electricity.smk b/workflow/rules/build_electricity.smk
index d90e6058..a7ce4581 100644
--- a/workflow/rules/build_electricity.smk
+++ b/workflow/rules/build_electricity.smk
@@ -237,7 +237,7 @@ rule build_demand:
         BENCHMARKS + "{interconnect}/build_demand"
     threads: 1
     resources:
-        mem_mb=10000,
+        mem_mb=12000,
     script:
         "../scripts/build_demand.py"
 
@@ -318,8 +318,6 @@ rule simplify_network:
     threads: 2
     resources:
         mem_mb=10000,
-    group:
-        "agg_network"
     script:
         "../scripts/simplify_network.py"
 
@@ -361,10 +359,8 @@ rule cluster_network:
     threads: 1
     resources:
         mem_mb=10000,
-    group:
-        "agg_network"
     script:
-        "../scripts/cluster_network_eur.py"
+        "../scripts/subworkflows/pypsa-eur/scripts/cluster_network.py"
 
 
 rule add_extra_components:
@@ -381,7 +377,7 @@ rule add_extra_components:
     resources:
         mem_mb=4000,
     group:
-        "agg_network"
+        "prepare"
     script:
         "../scripts/add_extra_components.py"
 
@@ -406,7 +402,7 @@ rule prepare_network:
     resources:
         mem_mb=4000,
     group:
-        "agg_network"
+        "prepare"
     log:
         "logs/prepare_network",
     script:
diff --git a/workflow/rules/build_sector.smk b/workflow/rules/build_sector.smk
index beb7b5f0..ed18dee9 100644
--- a/workflow/rules/build_sector.smk
+++ b/workflow/rules/build_sector.smk
@@ -31,6 +31,11 @@ rule add_sectors:
     output:
         network=RESOURCES
         + "{interconnect}/elec_s_{clusters}_ec_l{ll}_{opts}_{sector}.nc",
+    group: 
+        "prepare"
+    threads: 1
+    resources:
+        mem_mb=4000,        
     script:
         "../scripts/add_sectors.py"
 
diff --git a/workflow/run_slurm.sh b/workflow/run_slurm.sh
index 75d71459..4b9d6b1d 100644
--- a/workflow/run_slurm.sh
+++ b/workflow/run_slurm.sh
@@ -1,2 +1,2 @@
 # SLURM specifications made in default.cluster.yaml & the individual rules
-snakemake --cluster "sbatch -A {cluster.account} --mail-type ALL --mail-user {cluster.email} -p {cluster.partition} -t {cluster.walltime} -o {cluster.output} -e {cluster.error} -c {threads} --mem {resources.mem_mb}" --cluster-config config/config.cluster.yaml --jobs 10 --latency-wait 10
+snakemake --cluster "sbatch -A {cluster.account} --mail-type ALL --mail-user {cluster.email} -p {cluster.partition} -t {cluster.walltime} -o {cluster.output} -e {cluster.error} -c {threads} --mem {resources.mem_mb}" --cluster-config config/config.cluster.yaml --jobs 10 --latency-wait 60
diff --git a/workflow/scripts/cluster_network_eur.py b/workflow/scripts/cluster_network_eur.py
index 45c44130..26dea6bb 100644
--- a/workflow/scripts/cluster_network_eur.py
+++ b/workflow/scripts/cluster_network_eur.py
@@ -128,6 +128,7 @@
 import warnings
 from functools import reduce
 
+import os
 import geopandas as gpd
 import matplotlib.pyplot as plt
 import numpy as np

From cfd862ff5bb75afa0f2d0b05b20114ed4fac19e3 Mon Sep 17 00:00:00 2001
From: Kamran <ktehranchi@stanford.edu>
Date: Tue, 27 Feb 2024 18:56:28 -0800
Subject: [PATCH 06/10] rm old clustering script

---
 workflow/scripts/cluster_network_eur.py | 565 ------------------------
 1 file changed, 565 deletions(-)
 delete mode 100644 workflow/scripts/cluster_network_eur.py

diff --git a/workflow/scripts/cluster_network_eur.py b/workflow/scripts/cluster_network_eur.py
deleted file mode 100644
index 26dea6bb..00000000
--- a/workflow/scripts/cluster_network_eur.py
+++ /dev/null
@@ -1,565 +0,0 @@
-# -*- coding: utf-8 -*-
-# SPDX-FileCopyrightText: : 2017-2023 The PyPSA-Eur Authors
-#
-# SPDX-License-Identifier: MIT
-
-# coding: utf-8
-
-# ADAPTED FROM PyPSA-Eur for PyPSA-USA
-"""
-Creates networks clustered to ``{cluster}`` number of zones with aggregated
-buses, generators and transmission corridors.
-
-Relevant Settings
------------------
-
-.. code:: yaml
-
-    clustering:
-      cluster_network:
-      aggregation_strategies:
-
-    focus_weights:
-
-    solving:
-        solver:
-            name:
-
-    lines:
-        length_factor:
-
-.. seealso::
-    Documentation of the configuration file ``config/config.yaml`` at
-    :ref:`toplevel_cf`, :ref:`renewable_cf`, :ref:`solving_cf`, :ref:`lines_cf`
-
-Inputs
-------
-
-- ``resources/regions_onshore_elec_s{simpl}.geojson``: confer :ref:`simplify`
-- ``resources/regions_offshore_elec_s{simpl}.geojson``: confer :ref:`simplify`
-- ``resources/busmap_elec_s{simpl}.csv``: confer :ref:`simplify`
-- ``networks/elec_s{simpl}.nc``: confer :ref:`simplify`
-- ``data/custom_busmap_elec_s{simpl}_{clusters}.csv``: optional input
-
-Outputs
--------
-
-- ``resources/regions_onshore_elec_s{simpl}_{clusters}.geojson``:
-
-    .. image:: img/regions_onshore_elec_s_X.png
-        :scale: 33 %
-
-- ``resources/regions_offshore_elec_s{simpl}_{clusters}.geojson``:
-
-    .. image:: img/regions_offshore_elec_s_X.png
-        :scale: 33 %
-
-- ``resources/busmap_elec_s{simpl}_{clusters}.csv``: Mapping of buses from ``networks/elec_s{simpl}.nc`` to ``networks/elec_s{simpl}_{clusters}.nc``;
-- ``resources/linemap_elec_s{simpl}_{clusters}.csv``: Mapping of lines from ``networks/elec_s{simpl}.nc`` to ``networks/elec_s{simpl}_{clusters}.nc``;
-- ``networks/elec_s{simpl}_{clusters}.nc``:
-
-    .. image:: img/elec_s_X.png
-        :scale: 40  %
-
-Description
------------
-
-.. note::
-
-    **Why is clustering used both in** ``simplify_network`` **and** ``cluster_network`` **?**
-
-        Consider for example a network ``networks/elec_s100_50.nc`` in which
-        ``simplify_network`` clusters the network to 100 buses and in a second
-        step ``cluster_network``` reduces it down to 50 buses.
-
-        In preliminary tests, it turns out, that the principal effect of
-        changing spatial resolution is actually only partially due to the
-        transmission network. It is more important to differentiate between
-        wind generators with higher capacity factors from those with lower
-        capacity factors, i.e. to have a higher spatial resolution in the
-        renewable generation than in the number of buses.
-
-        The two-step clustering allows to study this effect by looking at
-        networks like ``networks/elec_s100_50m.nc``. Note the additional
-        ``m`` in the ``{cluster}`` wildcard. So in the example network
-        there are still up to 100 different wind generators.
-
-        In combination these two features allow you to study the spatial
-        resolution of the transmission network separately from the
-        spatial resolution of renewable generators.
-
-    **Is it possible to run the model without the** ``simplify_network`` **rule?**
-
-        No, the network clustering methods in the PyPSA module
-        `pypsa.clustering.spatial <https://github.com/PyPSA/PyPSA/blob/master/pypsa/clustering/spatial.py>`_
-        do not work reliably with multiple voltage levels and transformers.
-
-.. tip::
-    The rule :mod:`cluster_networks` runs
-    for all ``scenario`` s in the configuration file
-    the rule :mod:`cluster_network`.
-
-Exemplary unsolved network clustered to 512 nodes:
-
-.. image:: img/elec_s_512.png
-    :scale: 40  %
-    :align: center
-
-Exemplary unsolved network clustered to 256 nodes:
-
-.. image:: img/elec_s_256.png
-    :scale: 40  %
-    :align: center
-
-Exemplary unsolved network clustered to 128 nodes:
-
-.. image:: img/elec_s_128.png
-    :scale: 40  %
-    :align: center
-
-Exemplary unsolved network clustered to 37 nodes:
-
-.. image:: img/elec_s_37.png
-    :scale: 40  %
-    :align: center
-"""
-
-import logging
-import warnings
-from functools import reduce
-
-import os
-import geopandas as gpd
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import pyomo.environ as po
-import pypsa
-import seaborn as sns
-from _helpers import configure_logging, update_p_nom_max
-from pypsa.clustering.spatial import (
-    busmap_by_greedy_modularity,
-    busmap_by_hac,
-    busmap_by_kmeans,
-    get_clustering_from_busmap,
-)
-
-warnings.filterwarnings(action="ignore", category=UserWarning)
-
-from add_electricity import load_costs
-
-idx = pd.IndexSlice
-
-logger = logging.getLogger(__name__)
-
-
-def normed(x):
-    return (x / x.sum()).fillna(0.0)
-
-
-def weighting_for_country(n, x):
-    conv_carriers = {"OCGT", "CCGT", "PHS", "hydro"}
-    gen = n.generators.loc[n.generators.carrier.isin(conv_carriers)].groupby(
-        "bus"
-    ).p_nom.sum().reindex(n.buses.index, fill_value=0.0) + n.storage_units.loc[
-        n.storage_units.carrier.isin(conv_carriers)
-    ].groupby(
-        "bus"
-    ).p_nom.sum().reindex(
-        n.buses.index, fill_value=0.0
-    )
-    load = n.loads_t.p_set.mean().groupby(n.loads.bus).sum()
-
-    b_i = x.index
-    g = normed(gen.reindex(b_i, fill_value=0))
-    l = normed(load.reindex(b_i, fill_value=0))
-
-    w = g + l
-    return (w * (100.0 / w.max())).clip(lower=1.0).astype(int)
-
-
-def get_feature_for_hac(n, buses_i=None, feature=None):
-    if buses_i is None:
-        buses_i = n.buses.index
-
-    if feature is None:
-        feature = "solar+onwind-time"
-
-    carriers = feature.split("-")[0].split("+")
-    if "offwind" in carriers:
-        carriers.remove("offwind")
-        carriers = np.append(
-            carriers, n.generators.carrier.filter(like="offwind").unique()
-        )
-
-    if feature.split("-")[1] == "cap":
-        feature_data = pd.DataFrame(index=buses_i, columns=carriers)
-        for carrier in carriers:
-            gen_i = n.generators.query("carrier == @carrier").index
-            attach = (
-                n.generators_t.p_max_pu[gen_i]
-                .mean()
-                .rename(index=n.generators.loc[gen_i].bus)
-            )
-            feature_data[carrier] = attach
-
-    if feature.split("-")[1] == "time":
-        feature_data = pd.DataFrame(columns=buses_i)
-        for carrier in carriers:
-            gen_i = n.generators.query("carrier == @carrier").index
-            attach = n.generators_t.p_max_pu[gen_i].rename(
-                columns=n.generators.loc[gen_i].bus
-            )
-            feature_data = pd.concat([feature_data, attach], axis=0)[buses_i]
-
-        feature_data = feature_data.T
-        # timestamp raises error in sklearn >= v1.2:
-        feature_data.columns = feature_data.columns.astype(str)
-
-    feature_data = feature_data.fillna(0)
-
-    return feature_data
-
-
-def distribute_clusters(n, n_clusters, focus_weights=None, solver_name="cbc"):
-    """
-    Determine the number of clusters per country.
-    """
-    L = (
-        n.loads_t.p_set.mean()
-        .groupby(n.loads.bus)
-        .sum()
-        .groupby([n.buses.country, n.buses.sub_network])
-        .sum()
-        .pipe(normed)
-    )
-
-    N = n.buses.groupby(["country", "sub_network"]).size()
-
-    assert (
-        n_clusters >= len(N) and n_clusters <= N.sum()
-    ), f"Number of clusters must be {len(N)} <= n_clusters <= {N.sum()} for this selection of countries."
-
-    if focus_weights is not None:
-        total_focus = sum(list(focus_weights.values()))
-
-        assert (
-            total_focus <= 1.0
-        ), "The sum of focus weights must be less than or equal to 1."
-
-        for country, weight in focus_weights.items():
-            L[country] = weight / len(L[country])
-
-        remainder = [
-            c not in focus_weights.keys() for c in L.index.get_level_values("country")
-        ]
-        L[remainder] = L.loc[remainder].pipe(normed) * (1 - total_focus)
-
-        logger.warning("Using custom focus weights for determining number of clusters.")
-
-    assert np.isclose(
-        L.sum(), 1.0, rtol=1e-3
-    ), f"Country weights L must sum up to 1.0 when distributing clusters. Is {L.sum()}."
-
-    m = po.ConcreteModel()
-
-    def n_bounds(model, *n_id):
-        return (1, N[n_id])
-
-    m.n = po.Var(list(L.index), bounds=n_bounds, domain=po.Integers)
-    m.tot = po.Constraint(expr=(po.summation(m.n) == n_clusters))
-    m.objective = po.Objective(
-        expr=sum((m.n[i] - L.loc[i] * n_clusters) ** 2 for i in L.index),
-        sense=po.minimize,
-    )
-
-    opt = po.SolverFactory(solver_name)
-    if not opt.has_capability("quadratic_objective"):
-        logger.warning(
-            f"The configured solver `{solver_name}` does not support quadratic objectives. Falling back to `ipopt`."
-        )
-        opt = po.SolverFactory("ipopt")
-
-    results = opt.solve(m)
-    assert (
-        results["Solver"][0]["Status"] == "ok"
-    ), f"Solver returned non-optimally: {results}"
-
-    return pd.Series(m.n.get_values(), index=L.index).round().astype(int)
-
-
-def busmap_for_n_clusters(
-    n,
-    n_clusters,
-    solver_name,
-    focus_weights=None,
-    algorithm="kmeans",
-    feature=None,
-    **algorithm_kwds,
-):
-    if algorithm == "kmeans":
-        algorithm_kwds.setdefault("n_init", 1000)
-        algorithm_kwds.setdefault("max_iter", 30000)
-        algorithm_kwds.setdefault("tol", 1e-6)
-        algorithm_kwds.setdefault("random_state", 0)
-
-    def fix_country_assignment_for_hac(n):
-        from scipy.sparse import csgraph
-
-        # overwrite country of nodes that are disconnected from their country-topology
-        for country in n.buses.country.unique():
-            m = n[n.buses.country == country].copy()
-
-            _, labels = csgraph.connected_components(
-                m.adjacency_matrix(), directed=False
-            )
-
-            component = pd.Series(labels, index=m.buses.index)
-            component_sizes = component.value_counts()
-
-            if len(component_sizes) > 1:
-                disconnected_bus = component[
-                    component == component_sizes.index[-1]
-                ].index[0]
-
-                neighbor_bus = n.lines.query(
-                    "bus0 == @disconnected_bus or bus1 == @disconnected_bus"
-                ).iloc[0][["bus0", "bus1"]]
-                new_country = list(
-                    set(n.buses.loc[neighbor_bus].country) - set([country])
-                )[0]
-
-                logger.info(
-                    f"overwriting country `{country}` of bus `{disconnected_bus}` "
-                    f"to new country `{new_country}`, because it is disconnected "
-                    "from its initial inter-country transmission grid."
-                )
-                n.buses.at[disconnected_bus, "country"] = new_country
-        return n
-
-    if algorithm == "hac":
-        feature = get_feature_for_hac(n, buses_i=n.buses.index, feature=feature)
-        n = fix_country_assignment_for_hac(n)
-
-    if (algorithm != "hac") and (feature is not None):
-        logger.warning(
-            f"Keyword argument feature is only valid for algorithm `hac`. "
-            f"Given feature `{feature}` will be ignored."
-        )
-
-    n.determine_network_topology()
-
-    n_clusters = distribute_clusters(
-        n, n_clusters, focus_weights=focus_weights, solver_name=solver_name
-    )
-
-    def busmap_for_country(x):
-        prefix = x.name[0] + x.name[1] + " "
-        logger.debug(f"Determining busmap for country {prefix[:-1]}")
-        if len(x) == 1:
-            return pd.Series(prefix + "0", index=x.index)
-        weight = weighting_for_country(n, x)
-
-        if algorithm == "kmeans":
-            return prefix + busmap_by_kmeans(
-                n, weight, n_clusters[x.name], buses_i=x.index, **algorithm_kwds
-            )
-        elif algorithm == "hac":
-            return prefix + busmap_by_hac(
-                n, n_clusters[x.name], buses_i=x.index, feature=feature.loc[x.index]
-            )
-        elif algorithm == "modularity":
-            return prefix + busmap_by_greedy_modularity(
-                n, n_clusters[x.name], buses_i=x.index
-            )
-        else:
-            raise ValueError(
-                f"`algorithm` must be one of 'kmeans' or 'hac'. Is {algorithm}."
-            )
-
-    return (
-        n.buses.groupby(["country", "sub_network"], group_keys=False)
-        .apply(busmap_for_country)
-        .squeeze()
-        .rename("busmap")
-    )
-
-
-def clustering_for_n_clusters(
-    n,
-    n_clusters,
-    custom_busmap=False,
-    aggregate_carriers=None,
-    line_length_factor=1.25,
-    aggregation_strategies=dict(),
-    solver_name="cbc",
-    algorithm="hac",
-    feature=None,
-    extended_link_costs=0,
-    focus_weights=None,
-):
-    if not isinstance(custom_busmap, pd.Series):
-        busmap = busmap_for_n_clusters(
-            n, n_clusters, solver_name, focus_weights, algorithm, feature
-        )
-    else:
-        busmap = custom_busmap
-
-    line_strategies = aggregation_strategies.get("lines", dict())
-    generator_strategies = aggregation_strategies.get("generators", dict())
-    one_port_strategies = aggregation_strategies.get("one_ports", dict())
-
-    clustering = get_clustering_from_busmap(
-        n,
-        busmap,
-        aggregate_generators_weighted=True,
-        aggregate_generators_carriers=aggregate_carriers,
-        aggregate_one_ports=["Load", "StorageUnit"],
-        line_length_factor=line_length_factor,
-        line_strategies=line_strategies,
-        generator_strategies=generator_strategies,
-        one_port_strategies=one_port_strategies,
-        scale_link_capital_costs=False,
-    )
-
-    if not n.links.empty:
-        nc = clustering.network
-        nc.links["underwater_fraction"] = (
-            n.links.eval("underwater_fraction * length").div(nc.links.length).dropna()
-        )
-        nc.links["capital_cost"] = nc.links["capital_cost"].add(
-            (nc.links.length - n.links.length)
-            .clip(lower=0)
-            .mul(extended_link_costs)
-            .dropna(),
-            fill_value=0,
-        )
-
-    return clustering
-
-
-def cluster_regions(busmaps, input=None, output=None):
-    busmap = reduce(lambda x, y: x.map(y), busmaps[1:], busmaps[0])
-
-    for which in ("regions_onshore", "regions_offshore"):
-        regions = gpd.read_file(getattr(input, which))
-        regions = regions.reindex(columns=["name", "geometry"]).set_index("name")
-        regions_c = regions.dissolve(busmap)
-        regions_c.index.name = "name"
-        regions_c = regions_c.reset_index()
-        regions_c.to_file(getattr(output, which))
-
-
-def plot_busmap_for_n_clusters(n, n_clusters, fn=None):
-    busmap = busmap_for_n_clusters(n, n_clusters)
-    cs = busmap.unique()
-    cr = sns.color_palette("hls", len(cs))
-    n.plot(bus_colors=busmap.map(dict(zip(cs, cr))))
-    if fn is not None:
-        plt.savefig(fn, bbox_inches="tight")
-    del cs, cr
-
-
-if __name__ == "__main__":
-    if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
-        snakemake = mock_snakemake("cluster_network", simpl="", clusters="37")
-    configure_logging(snakemake)
-
-    params = snakemake.params
-    solver_name = snakemake.config["solving"]["solver"]["name"]
-
-    n = pypsa.Network(snakemake.input.network)
-
-    exclude_carriers = params.cluster_network["exclude_carriers"]
-    aggregate_carriers = set(n.generators.carrier) - set(exclude_carriers)
-    conventional_carriers = set(params.conventional_carriers)
-    if snakemake.wildcards.clusters.endswith("m"):
-        n_clusters = int(snakemake.wildcards.clusters[:-1])
-        aggregate_carriers = params.conventional_carriers & aggregate_carriers
-    elif snakemake.wildcards.clusters.endswith("c"):
-        n_clusters = int(snakemake.wildcards.clusters[:-1])
-        aggregate_carriers = aggregate_carriers - conventional_carriers
-    elif snakemake.wildcards.clusters == "all":
-        n_clusters = len(n.buses)
-    else:
-        n_clusters = int(snakemake.wildcards.clusters)
-
-    if params.cluster_network.get("consider_efficiency_classes", False):
-        carriers = []
-        for c in aggregate_carriers:
-            gens = n.generators.query("carrier == @c")
-            low = gens.efficiency.quantile(0.10)
-            high = gens.efficiency.quantile(0.90)
-            if low >= high:
-                carriers += [c]
-            else:
-                labels = ["low", "medium", "high"]
-                suffix = pd.cut(
-                    gens.efficiency, bins=[0, low, high, 1], labels=labels
-                ).astype(str)
-                carriers += [f"{c} {label} efficiency" for label in labels]
-                n.generators.carrier.update(gens.carrier + " " + suffix + " efficiency")
-        aggregate_carriers = carriers
-
-    if n_clusters == len(n.buses):
-        # Fast-path if no clustering is necessary
-        busmap = n.buses.index.to_series()
-        linemap = n.lines.index.to_series()
-        clustering = pypsa.clustering.spatial.Clustering(
-            n, busmap, linemap, linemap, pd.Series(dtype="O")
-        )
-    else:
-        Nyears = n.snapshot_weightings.objective.sum() / 8760
-
-        hvac_overhead_cost = load_costs(
-            snakemake.input.tech_costs,
-            params.costs,
-            params.max_hours,
-            Nyears,
-        ).at["HVAC overhead", "capital_cost"]
-
-        custom_busmap = params.custom_busmap
-        if custom_busmap:
-            custom_busmap = pd.read_csv(
-                snakemake.input.custom_busmap, index_col=0, squeeze=True
-            )
-            custom_busmap.index = custom_busmap.index.astype(str)
-            logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}")
-
-        clustering = clustering_for_n_clusters(
-            n,
-            n_clusters,
-            custom_busmap,
-            aggregate_carriers,
-            params.length_factor,
-            params.aggregation_strategies,
-            solver_name,
-            params.cluster_network["algorithm"],
-            params.cluster_network["feature"],
-            hvac_overhead_cost,
-            params.focus_weights,
-        )
-
-    update_p_nom_max(clustering.network)
-
-    if params.cluster_network.get("consider_efficiency_classes"):
-        labels = [f" {label} efficiency" for label in ["low", "medium", "high"]]
-        nc = clustering.network
-        nc.generators["carrier"] = nc.generators.carrier.replace(labels, "", regex=True)
-
-    clustering.network.meta = dict(
-        snakemake.config, **dict(wildcards=dict(snakemake.wildcards))
-    )
-    clustering.network.export_to_netcdf(snakemake.output.network)
-    for attr in (
-        "busmap",
-        "linemap",
-    ):  # also available: linemap_positive, linemap_negative
-        getattr(clustering, attr).to_csv(snakemake.output[attr])
-
-    cluster_regions((clustering.busmap,), snakemake.input, snakemake.output)
-
-    output_path = os.path.dirname(snakemake.output[0]) + "_clustered_"
-    export_network_for_gis_mapping(clustering.network, output_path)

From 26bde9facb4a1a1f0c9f83df5d8291e6a7b5e08c Mon Sep 17 00:00:00 2001
From: Kamran <ktehranchi@stanford.edu>
Date: Wed, 28 Feb 2024 00:20:10 -0800
Subject: [PATCH 07/10] updates configs

---
 workflow/config/config.cluster.yaml           | 4 ++--
 workflow/config/config.default.yaml           | 1 +
 workflow/config/tests/config.test.yaml        | 1 +
 workflow/config/tests/config.test_simple.yaml | 7 ++++---
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/workflow/config/config.cluster.yaml b/workflow/config/config.cluster.yaml
index fb3f2424..c3d2fd1c 100644
--- a/workflow/config/config.cluster.yaml
+++ b/workflow/config/config.cluster.yaml
@@ -5,8 +5,8 @@ __default__:
   walltime: 00:30:00   # time limit for each job
   cpus_per_task: 1   # number of cores per job
   chdir: $GROUP_HOME/kamran/pypsa-usa/workflow
-  output: logs/slurm/{rule}/log-%j.out
-  error: logs/slurm/{rule}/errlog-%j.err
+  output: logs/{rule}/log-%j.out
+  error: logs/{rule}/errlog-%j.err
 
 build_renewable_profiles:
   walltime: 02:00:00
diff --git a/workflow/config/config.default.yaml b/workflow/config/config.default.yaml
index ed6f2327..6275eca6 100644
--- a/workflow/config/config.default.yaml
+++ b/workflow/config/config.default.yaml
@@ -73,6 +73,7 @@ electricity:
   co2base: 226.86e+6 #base_from_2020 Locations of the 250 MMmt of CO2 emissions from the WECC 2021.
   gaslimit: false # global gas usage limit of X MWh_th
   retirement: economic # "economic" or "technical"
+  SAFE_reservemargin: 0.15
 
   operational_reserve:
     activate: false
diff --git a/workflow/config/tests/config.test.yaml b/workflow/config/tests/config.test.yaml
index 81ee0b32..def60f33 100644
--- a/workflow/config/tests/config.test.yaml
+++ b/workflow/config/tests/config.test.yaml
@@ -76,6 +76,7 @@ electricity:
   co2base: 226.86e+6 #base_from_2020 Locations of the 250 MMmt of CO2 emissions from the WECC 2021.
   gaslimit: false # global gas usage limit of X MWh_th
   retirement: economic # "economic" or "technical"
+  SAFE_reservemargin: 0.15
 
   operational_reserve:
     activate: false
diff --git a/workflow/config/tests/config.test_simple.yaml b/workflow/config/tests/config.test_simple.yaml
index d3360575..1415ac23 100644
--- a/workflow/config/tests/config.test_simple.yaml
+++ b/workflow/config/tests/config.test_simple.yaml
@@ -12,7 +12,7 @@ run:
 scenario:
   interconnect: [western] #"usa|texas|western|eastern"
   clusters: [40]
-  opts: [Co2L0.30-4H-Ep-EQ0.05c-SAFE]
+  opts: [Co2L0.30-3H-Ep-SAFE]
   ll: [v1.0]
   scope: "total" # "urban", "rural", or "total"
   sector: "" # G
@@ -72,6 +72,7 @@ electricity:
   co2base: 225.0e+6 #base_from_2020 Locations of the 250 MMmt of CO2 emissions from the WECC 2021.
   gaslimit: false # global gas usage limit of X MWh_th
   retirement: economic # "economic" or "technical"
+  SAFE_reservemargin: 0.15
 
   operational_reserve:
     activate: false
@@ -254,10 +255,10 @@ sector:
 clustering:
   simplify_network:
     to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
-    algorithm: kmeans # choose from: [hac, kmeans]
+    algorithm: hac # choose from: [hac, kmeans]
     feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
   cluster_network:
-    algorithm: kmeans
+    algorithm: hac
     feature: solar+onwind-time
     aggregation_zones: 'state' # [balancing_area, state]
     exclude_carriers: []

From 1fb469dbdfe0dddbc659fe7af62beb5a80fa9529 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 28 Feb 2024 08:21:54 +0000
Subject: [PATCH 08/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 workflow/config/tests/config.test.yaml | 2 +-
 workflow/rules/build_sector.smk        | 4 ++--
 workflow/scripts/add_electricity.py    | 4 +++-
 workflow/scripts/plot_figures.py       | 2 +-
 workflow/scripts/simplify_network.py   | 3 ++-
 workflow/tests/test_yaml_structure.py  | 9 ++++++---
 6 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/workflow/config/tests/config.test.yaml b/workflow/config/tests/config.test.yaml
index cc682c22..5b584592 100644
--- a/workflow/config/tests/config.test.yaml
+++ b/workflow/config/tests/config.test.yaml
@@ -13,7 +13,7 @@ run:
 scenario:
   interconnect: western #"usa|texas|western|eastern"
   clusters: [40]
-  opts: [Co2L0.30-4H-Ep-SAFE,]
+  opts: [Co2L0.30-4H-Ep-SAFE]
   ll: [v1.05]
   scope: "total" # "urban", "rural", or "total"
   sector: "" # G
diff --git a/workflow/rules/build_sector.smk b/workflow/rules/build_sector.smk
index ed18dee9..b73d843e 100644
--- a/workflow/rules/build_sector.smk
+++ b/workflow/rules/build_sector.smk
@@ -31,11 +31,11 @@ rule add_sectors:
     output:
         network=RESOURCES
         + "{interconnect}/elec_s_{clusters}_ec_l{ll}_{opts}_{sector}.nc",
-    group: 
+    group:
         "prepare"
     threads: 1
     resources:
-        mem_mb=4000,        
+        mem_mb=4000,
     script:
         "../scripts/add_sectors.py"
 
diff --git a/workflow/scripts/add_electricity.py b/workflow/scripts/add_electricity.py
index 626a9c9d..1074e5d6 100755
--- a/workflow/scripts/add_electricity.py
+++ b/workflow/scripts/add_electricity.py
@@ -951,7 +951,9 @@ def attach_wind_and_solar(
                 .transpose("time", "bus")
                 .to_pandas()
                 .T.merge(
-                    bus2sub[["bus_id", "sub_id"]], left_on="bus", right_on="sub_id"
+                    bus2sub[["bus_id", "sub_id"]],
+                    left_on="bus",
+                    right_on="sub_id",
                 )
                 .set_index("bus_id")
                 .drop(columns="sub_id")
diff --git a/workflow/scripts/plot_figures.py b/workflow/scripts/plot_figures.py
index e8e8c5da..6a196019 100644
--- a/workflow/scripts/plot_figures.py
+++ b/workflow/scripts/plot_figures.py
@@ -608,7 +608,7 @@ def plot_production_bar(
     energy_mix.name = "dispatch"
     energy_mix = energy_mix[
         energy_mix.index.get_level_values("component").isin(
-            ["Generator", "StorageUnit"]
+            ["Generator", "StorageUnit"],
         )
     ]
     energy_mix = energy_mix.groupby("carrier").sum().reset_index()
diff --git a/workflow/scripts/simplify_network.py b/workflow/scripts/simplify_network.py
index 553b7e53..8add69aa 100644
--- a/workflow/scripts/simplify_network.py
+++ b/workflow/scripts/simplify_network.py
@@ -135,7 +135,8 @@ def aggregate_to_substations(
     network_s.lines["type"] = np.nan
 
     network_s.buses.drop(
-        columns=["balancing_area", "state", "substation_off", "sub_id"], inplace=True
+        columns=["balancing_area", "state", "substation_off", "sub_id"],
+        inplace=True,
     )
     return network_s
 
diff --git a/workflow/tests/test_yaml_structure.py b/workflow/tests/test_yaml_structure.py
index b2c9ebfe..099593d8 100644
--- a/workflow/tests/test_yaml_structure.py
+++ b/workflow/tests/test_yaml_structure.py
@@ -9,7 +9,7 @@ def load_yaml_file(filepath):
 def compare_structures(data1, data2, path=""):
     if type(data1) != type(data2):
         print(
-            f"Type mismatch at {path}: {type(data1).__name__} vs {type(data2).__name__}"
+            f"Type mismatch at {path}: {type(data1).__name__} vs {type(data2).__name__}",
         )
         return False
 
@@ -19,7 +19,9 @@ def compare_structures(data1, data2, path=""):
                 print(f"Missing key in second structure at {path}: {key}")
                 continue
             compare_structures(
-                data1[key], data2[key], path=f"{path}.{key}" if path else key
+                data1[key],
+                data2[key],
+                path=f"{path}.{key}" if path else key,
             )
         for key in data2:
             if key not in data1:
@@ -50,5 +52,6 @@ def test_yaml_structure(filepath1, filepath2):
 
 # Example usage
 test_yaml_structure(
-    "../config/tests/config.test_simple.yaml", "../config/config.default.yaml"
+    "../config/tests/config.test_simple.yaml",
+    "../config/config.default.yaml",
 )

From f28c336aceec23960e3c0c013e66bd9a72bd7532 Mon Sep 17 00:00:00 2001
From: ktehranchi <83722342+ktehranchi@users.noreply.github.com>
Date: Wed, 28 Feb 2024 00:23:09 -0800
Subject: [PATCH 09/10] update docs

---
 docs/source/configtables/clustering.csv |  8 ++++----
 docs/source/configtables/lines.csv      | 26 ++++++++++++-------------
 docs/source/configtables/opts.csv       | 24 +++++++++++------------
 workflow/repo_data/agg_p_nom_minmax.csv |  2 ++
 4 files changed, 31 insertions(+), 29 deletions(-)
 create mode 100644 workflow/repo_data/agg_p_nom_minmax.csv

diff --git a/docs/source/configtables/clustering.csv b/docs/source/configtables/clustering.csv
index 5fd8f67a..a4572885 100644
--- a/docs/source/configtables/clustering.csv
+++ b/docs/source/configtables/clustering.csv
@@ -2,10 +2,10 @@
 simplify_network:,,,
 to_substations,bool,"{true, false}",Implementation curerntly overrides to true. Network is simplified to substation nodes with positive or negative power injection.
 algorithm,str,{'kmeans'},
-feature,str," {'solar+onwind-time', 'solar+onwind-cap', 'solar-time', 'solar-cap', 'solar+offwind-cap'}",For HAC clustering. Currenntly unused in pypsa-usa.
+feature,str," {'solar+onwind-time', 'solar+onwind-cap', 'solar-time', 'solar-cap', 'solar+offwind-cap'}",For HAC clustering.
 cluster_network:,,,
 algorithm,str,{'kmeans'},
-feature,str," {'solar+onwind-time', 'solar+onwind-cap', 'solar-time', 'solar-cap', 'solar+offwind-cap'}",For HAC clustering. Currenntly unused in pypsa-usa.
-aggregation_zones,str,"{'balancing_area', 'state', 'country'}",Boundaries of GIS shapes that are to be respected in clustering. Retain if you would like to analyze expansion within a given zone.
+feature,str," {'solar+onwind-time', 'solar+onwind-cap', 'solar-time', 'solar-cap', 'solar+offwind-cap'}",For HAC clustering.
+aggregation_zones,str,"{'balancing_area', 'state'}",Boundaries of GIS shapes that are to be respected in clustering. Retain if you would like to analyze expansion within a given zone.
 aggregation_strategies:,,,
-table --> {key},str,"{'mean','max','min',etc}","Specifiy the method of aggregating fields within the generators, buses tables. "
+table --> {key},str,"{'mean','max','min',etc}","Specifiy the method of aggregating fields within the generators, buses tables. "
\ No newline at end of file
diff --git a/docs/source/configtables/lines.csv b/docs/source/configtables/lines.csv
index 40bbe297..53390070 100644
--- a/docs/source/configtables/lines.csv
+++ b/docs/source/configtables/lines.csv
@@ -1,13 +1,13 @@
-,Unit,Values,Description
-types,--,"Values should specify a `line type in PyPSA <https://pypsa.readthedocs.io/en/latest/components.html#line-types>`_. Keys should specify the corresponding voltage level (e.g. 220., 300. and 380. kV)","Specifies line types to assume for the different voltage levels of the ENTSO-E grid extraction. Should normally handle voltage levels 220, 300, and 380 kV"
-s_max_pu,--,"Value in [0.,1.]","Correction factor for line capacities (`s_nom`) to approximate :math:`N-1` security and reserve capacity for reactive power flows"
-s_nom_max,MW,"float","Global upper limit for the maximum capacity of each extendable line."
-max_extension,MW,"float","Upper limit for the extended capacity of each extendable line."
-length_factor,--,float,"Correction factor to account for the fact that buses are *not* connected by lines through air-line distance."
-under_construction,--,"One of {'zero': set capacity to zero, 'remove': remove completely, 'keep': keep with full capacity}","Specifies how to handle lines which are currently under construction."
-dynamic_line_rating,,,
--- activate,bool,"true or false","Whether to take dynamic line rating into account"
--- cutout,--,"Should be a folder listed in the configuration ``atlite: cutouts:`` (e.g. 'europe-2013-era5') or reference an existing folder in the directory ``cutouts``. Source module must be ERA5.","Specifies the directory where the relevant weather data ist stored."
--- correction_factor,--,"float","Factor to compensate for overestimation of wind speeds in hourly averaged wind data"
--- max_voltage_difference,deg,"float","Maximum voltage angle difference in degrees or 'false' to disable"
--- max_line_rating,--,"float","Maximum line rating relative to nominal capacity without DLR, e.g. 1.3 or 'false' to disable"
+,Unit,Values,Description
+types,--,"Values should specify a `line type in PyPSA <https://pypsa.readthedocs.io/en/latest/components.html#line-types>`_. Keys should specify the corresponding voltage level (e.g. 220., 300. and 380. kV)",Specifies line types to assume for the different voltage levels of the TAMU Network.
+s_max_pu,--,"Value in [0.,1.]",Correction factor for line capacities (`s_nom`) to approximate :math:`N-1` security and reserve capacity for reactive power flows
+s_nom_max,MW,float,Global upper limit for the maximum capacity of each extendable line.
+max_extension,MW,float,Upper limit for the extended capacity of each extendable line.
+length_factor,--,float,Correction factor to account for the fact that buses are *not* connected by lines through air-line distance.
+under_construction,--,"One of {'zero': set capacity to zero, 'remove': remove completely, 'keep': keep with full capacity}",Specifies how to handle lines which are currently under construction.
+dynamic_line_rating,,,
+#NAME?,bool,true or false,Whether to take dynamic line rating into account
+#NAME?,--,Should be a folder listed in the configuration ``atlite: cutouts:`` (e.g. 'europe-2013-era5') or reference an existing folder in the directory ``cutouts``. Source module must be ERA5.,Specifies the directory where the relevant weather data is stored.
+#NAME?,--,float,Factor to compensate for overestimation of wind speeds in hourly averaged wind data
+#NAME?,deg,float,Maximum voltage angle difference in degrees or 'false' to disable
+#NAME?,--,float,"Maximum line rating relative to nominal capacity without DLR, e.g. 1.3 or 'false' to disable"
\ No newline at end of file
diff --git a/docs/source/configtables/opts.csv b/docs/source/configtables/opts.csv
index b468be6e..e98df9ee 100644
--- a/docs/source/configtables/opts.csv
+++ b/docs/source/configtables/opts.csv
@@ -1,12 +1,12 @@
-Trigger, Description, Definition, Status
-``nH``; i.e. ``2H``-``6H``, Resample the time-resolution by averaging over every ``n`` snapshots, ``prepare_network``: `average_every_nhours() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L110>`_ and its `caller <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L146>`__), In active use
-``nSEG``; e.g. ``4380SEG``, "Apply time series segmentation with `tsam <https://tsam.readthedocs.io/en/latest/index.html>`_ package to ``n`` adjacent snapshots of varying lengths based on capacity factors of varying renewables, hydro inflow and load.", ``prepare_network``: apply_time_segmentation(), In active use
-``Co2L``, Add an overall absolute carbon-dioxide emissions limit configured in ``electricity: co2limit``. If a float is appended an overall emission limit relative to the emission level given in ``electricity: co2base`` is added (e.g. ``Co2L0.05`` limits emissisions to 5% of what is given in ``electricity: co2base``), ``prepare_network``: `add_co2limit() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L19>`_ and its `caller <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L154>`__, In active use
-``Ep``, Add cost for a carbon-dioxide price configured in ``costs: emission_prices: co2`` to ``marginal_cost`` of generators (other emission types listed in ``network.carriers`` possible as well), ``prepare_network``: `add_emission_prices() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L24>`_ and its `caller <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L158>`__, In active use
-``CCL``, Add minimum and maximum levels of generator nominal capacity per carrier for individual countries. These can be specified in the file linked at ``electricity: agg_p_nom_limits`` in the configuration. File defaults to ``data/agg_p_nom_minmax.csv``., ``solve_network``, In active use
-``EQ``, "Require each country or node to on average produce a minimal share of its total consumption itself. Example: ``EQ0.5c`` demands each country to produce on average at least 50% of its consumption; ``EQ0.5`` demands each node to produce on average at least 50% of its consumption.", ``solve_network``, In active use
-``ATK``, "Require each node to be autarkic. Example: ``ATK`` removes all lines and links. ``ATKc`` removes all cross-border lines and links.", ``prepare_network``, In active use
-``BAU``, Add a per-``carrier`` minimal overall capacity; i.e. at least ``40GW`` of ``OCGT`` in Europe; configured in ``electricity: BAU_mincapacities``, ``solve_network``: `add_opts_constraints() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/solve_network.py#L66>`__, Untested
-``SAFE``, Add a capacity reserve margin of a certain fraction above the peak demand to which renewable generators and storage do *not* contribute. Ignores network., ``solve_network`` `add_opts_constraints() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/solve_network.py#L73>`__, Untested
-``carrier+{c|p|m}factor``,"Alter the capital cost (``c``), installable potential (``p``) or marginal costs (``m``) of a carrier by a factor. Example: ``solar+c0.5`` reduces the capital cost of solar to 50\% of original values.", ``prepare_network``, In active use
-``CH4L``,"Add an overall absolute gas limit. If configured in ``electricity: gaslimit`` it is given in MWh thermal, if a float is appended, the overall gaslimit is assumed to be given in TWh thermal (e.g. ``CH4L200`` limits gas dispatch to 200 TWh termal)", ``prepare_network``: ``add_gaslimit()``, In active use
+Trigger, Description, Definition, Status,
+``nH``; i.e. ``2H``-``6H``, Resample the time-resolution by averaging over every ``n`` snapshots, ``prepare_network``: `average_every_nhours() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L110>`_ and its `caller <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L146>`__), In active use,
+``nSEG``; e.g. ``4380SEG``," ""Apply time series segmentation with `tsam <https://tsam.readthedocs.io/en/latest/index.html>`_ package to ``n`` adjacent snapshots of varying lengths based on capacity factors of varying renewables"," hydro inflow and load.""", ``prepare_network``: apply_time_segmentation(), In active use
+``Co2L``, Add an overall absolute carbon-dioxide emissions limit configured in ``electricity: co2limit``. If a float is appended an overall emission limit relative to the emission level given in ``electricity: co2base`` is added (e.g. ``Co2L0.05`` limits emissisions to 5% of what is given in ``electricity: co2base``), ``prepare_network``: `add_co2limit() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L19>`_ and its `caller <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L154>`__, In active use,
+``Ep``, Add cost for a carbon-dioxide price configured in ``costs: emission_prices: co2`` to ``marginal_cost`` of generators (other emission types listed in ``network.carriers`` possible as well), ``prepare_network``: `add_emission_prices() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L24>`_ and its `caller <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L158>`__, In active use,
+``CCL``, Add minimum and maximum levels of generator nominal capacity per carrier for individual regions. These can be specified in the file linked at ``electricity: agg_p_nom_limits`` in the configuration. File defaults to ``data/agg_p_nom_minmax.csv``., ``solve_network``, In active use,
+``EQ``," ""Require each region or node to on average produce a minimal share of its total consumption itself. Example: ``EQ0.5c`` demands each region to produce on average at least 50% of its consumption; ``EQ0.5`` demands each node to produce on average at least 50% of its consumption.""", ``solve_network``, In active use,
+``BAU``, Add a per-``carrier`` minimal overall capacity; i.e. at least ``40GW`` of ``OCGT`` in a given Interconnect; configured in ``electricity: BAU_mincapacities``, ``solve_network``: `add_opts_constraints() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/solve_network.py#L66>`__, Untested,
+``SAFE``, Add a capacity reserve margin (a.k.a Planning Reserve Margin) of a certain fraction above the peak demand to which renewable generators and storage do *not* contribute. Ignores network constraints., ``solve_network`` `add_opts_constraints() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/solve_network.py#L73>`__, Untested,
+``carrier+{c|p|m}factor``,"Alter the capital cost (``c``), installable potential (``p``) or marginal costs (``m``) of a carrier by a factor. Example: ``solar+c0.5`` reduces the capital cost of solar to 50\% of original values.", ``prepare_network``, In active use,
+``CH4L``,"Add an overall absolute gas limit. If configured in ``electricity: gaslimit`` it is given in MWh thermal, if a float is appended, the overall gaslimit is assumed to be given in TWh thermal (e.g. ``CH4L200`` limits gas dispatch to 200 TWh termal)", ``prepare_network``: ``add_gaslimit()``, In active use,
+``ATK``," ""Require each node to be autarkic. Example: ``ATK`` removes all lines and links. ``ATKc`` removes all cross-border lines and links.""", ``prepare_network``, In active use,
\ No newline at end of file
diff --git a/workflow/repo_data/agg_p_nom_minmax.csv b/workflow/repo_data/agg_p_nom_minmax.csv
new file mode 100644
index 00000000..9089f16c
--- /dev/null
+++ b/workflow/repo_data/agg_p_nom_minmax.csv
@@ -0,0 +1,2 @@
+country,carrier,min,max
+California,solar,0,
\ No newline at end of file

From f09e3095b98efefb09af239e341831b77d05cff1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 28 Feb 2024 08:23:29 +0000
Subject: [PATCH 10/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 docs/source/configtables/clustering.csv | 2 +-
 docs/source/configtables/lines.csv      | 2 +-
 docs/source/configtables/opts.csv       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/configtables/clustering.csv b/docs/source/configtables/clustering.csv
index a4572885..34a6efa8 100644
--- a/docs/source/configtables/clustering.csv
+++ b/docs/source/configtables/clustering.csv
@@ -8,4 +8,4 @@ algorithm,str,{'kmeans'},
 feature,str," {'solar+onwind-time', 'solar+onwind-cap', 'solar-time', 'solar-cap', 'solar+offwind-cap'}",For HAC clustering.
 aggregation_zones,str,"{'balancing_area', 'state'}",Boundaries of GIS shapes that are to be respected in clustering. Retain if you would like to analyze expansion within a given zone.
 aggregation_strategies:,,,
-table --> {key},str,"{'mean','max','min',etc}","Specifiy the method of aggregating fields within the generators, buses tables. "
\ No newline at end of file
+table --> {key},str,"{'mean','max','min',etc}","Specifiy the method of aggregating fields within the generators, buses tables. "
diff --git a/docs/source/configtables/lines.csv b/docs/source/configtables/lines.csv
index 53390070..d9a00040 100644
--- a/docs/source/configtables/lines.csv
+++ b/docs/source/configtables/lines.csv
@@ -10,4 +10,4 @@ dynamic_line_rating,,,
 #NAME?,--,Should be a folder listed in the configuration ``atlite: cutouts:`` (e.g. 'europe-2013-era5') or reference an existing folder in the directory ``cutouts``. Source module must be ERA5.,Specifies the directory where the relevant weather data is stored.
 #NAME?,--,float,Factor to compensate for overestimation of wind speeds in hourly averaged wind data
 #NAME?,deg,float,Maximum voltage angle difference in degrees or 'false' to disable
-#NAME?,--,float,"Maximum line rating relative to nominal capacity without DLR, e.g. 1.3 or 'false' to disable"
\ No newline at end of file
+#NAME?,--,float,"Maximum line rating relative to nominal capacity without DLR, e.g. 1.3 or 'false' to disable"
diff --git a/docs/source/configtables/opts.csv b/docs/source/configtables/opts.csv
index e98df9ee..0812e404 100644
--- a/docs/source/configtables/opts.csv
+++ b/docs/source/configtables/opts.csv
@@ -9,4 +9,4 @@ Trigger, Description, Definition, Status,
 ``SAFE``, Add a capacity reserve margin (a.k.a Planning Reserve Margin) of a certain fraction above the peak demand to which renewable generators and storage do *not* contribute. Ignores network constraints., ``solve_network`` `add_opts_constraints() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/solve_network.py#L73>`__, Untested,
 ``carrier+{c|p|m}factor``,"Alter the capital cost (``c``), installable potential (``p``) or marginal costs (``m``) of a carrier by a factor. Example: ``solar+c0.5`` reduces the capital cost of solar to 50\% of original values.", ``prepare_network``, In active use,
 ``CH4L``,"Add an overall absolute gas limit. If configured in ``electricity: gaslimit`` it is given in MWh thermal, if a float is appended, the overall gaslimit is assumed to be given in TWh thermal (e.g. ``CH4L200`` limits gas dispatch to 200 TWh termal)", ``prepare_network``: ``add_gaslimit()``, In active use,
-``ATK``," ""Require each node to be autarkic. Example: ``ATK`` removes all lines and links. ``ATKc`` removes all cross-border lines and links.""", ``prepare_network``, In active use,
\ No newline at end of file
+``ATK``," ""Require each node to be autarkic. Example: ``ATK`` removes all lines and links. ``ATKc`` removes all cross-border lines and links.""", ``prepare_network``, In active use,