diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 65e933960..cf5b93895 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -20,6 +20,14 @@ This part of documentation collects descriptive release notes to capture the mai
 * Add a list of PyPSA-Earth applications in academic and industrial projects `PR #1255 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/1255>`__
 
 
+* Computational improvements of build_osm_network `PR #845 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/845>`__
+
+* Boost computational performances of set_lines_ids with cKDTree by scipy `PR #806 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/806>`__
+
+* Boost computational performances of set_substation_ids using DBSCAN `PR #799 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/799>`__
+
+* Boost computational performances of fix_overpassing_line `PR #807 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/807>`__
+
 **Minor Changes and bug-fixing**
 
 * Added electricity bus to Fischer-Tropsch in prepare_sector_network.py `PR #1226 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/1226>`__
diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py
index 867262abc..b4e7e6f4c 100644
--- a/scripts/build_osm_network.py
+++ b/scripts/build_osm_network.py
@@ -17,8 +17,10 @@
     read_osm_config,
     to_csv_nafix,
 )
-from shapely.geometry import LineString, Point
-from shapely.ops import linemerge, split
+from scipy.spatial import cKDTree
+from shapely.geometry import LineString, MultiLineString, Point
+from shapely.ops import linemerge, nearest_points, split
+from sklearn.cluster import DBSCAN
 from tqdm import tqdm
 
 logger = create_logger(__name__)
@@ -41,76 +43,32 @@ def line_endings_to_bus_conversion(lines):
     return lines
 
 
-# tol in m
-def set_substations_ids(buses, distance_crs, tol=2000):
-    """
-    Function to set substations ids to buses, accounting for location
-    tolerance.
-
-    The algorithm is as follows:
-
-    1. initialize all substation ids to -1
-    2. if the current substation has been already visited [substation_id < 0], then skip the calculation
-    3. otherwise:
-        1. identify the substations within the specified tolerance (tol)
-        2. when all the substations in tolerance have substation_id < 0, then specify a new substation_id
-        3. otherwise, if one of the substation in tolerance has a substation_id >= 0, then set that substation_id to all the others;
-           in case of multiple substations with substation_ids >= 0, the first value is picked for all
+def set_substations_ids(buses, distance_crs, tol=5000):
     """
+    Assigns station IDs to buses based on their proximity.
 
-    buses["station_id"] = -1
+    Parameters:
+    - buses: GeoDataFrame object representing the buses data.
+    - distance_crs: Coordinate reference system (CRS) to convert the geometry to.
+    - tol: Tolerance distance in chosen CRS to define cluster proximity.
 
-    # create temporary series to execute distance calculations using m as reference distances
-    temp_bus_geom = buses.geometry.to_crs(distance_crs)
+    Returns:
+    - None. Modifies the 'station_id' column in the 'buses' GeoDataFrame.
 
-    # set tqdm options for substation ids
-    tqdm_kwargs_substation_ids = dict(
-        ascii=False,
-        unit=" buses",
-        total=buses.shape[0],
-        desc="Set substation ids ",
-    )
+    Example:
+    set_substations_ids(buses_data, 'EPSG:3857', tol=5000)
+    """
 
-    station_id = 0
-    for i, row in tqdm(buses.iterrows(), **tqdm_kwargs_substation_ids):
-        if buses.loc[i, "station_id"] >= 0:
-            continue
+    # Convert the geometry to EPSG:3857
+    tmp_geometry = buses.geometry.to_crs(distance_crs)
 
-        # get substations within tolerance
-        close_nodes = np.flatnonzero(
-            temp_bus_geom.distance(temp_bus_geom.loc[i]) <= tol
-        )
+    coords = tmp_geometry.apply(lambda geom: np.array(geom.coords[0])).to_list()
 
-        if len(close_nodes) == 1:
-            # if only one substation is in tolerance, then the substation is the current one iì
-            # Note that the node cannot be with substation_id >= 0, given the preliminary check
-            # at the beginning of the for loop
-            buses.loc[buses.index[i], "station_id"] = station_id
-            # update station id
-            station_id += 1
-        else:
-            # several substations in tolerance
-            # get their ids
-            subset_substation_ids = buses.loc[buses.index[close_nodes], "station_id"]
-            # check if all substation_ids are negative (<0)
-            all_neg = subset_substation_ids.max() < 0
-            # check if at least a substation_id is negative (<0)
-            some_neg = subset_substation_ids.min() < 0
-
-            if all_neg:
-                # when all substation_ids are negative, then this is a new substation id
-                # set the current station_id and increment the counter
-                buses.loc[buses.index[close_nodes], "station_id"] = station_id
-                station_id += 1
-            elif some_neg:
-                # otherwise, when at least a substation_id is non-negative, then pick the first value
-                # and set it to all the other substations within tolerance
-                sub_id = -1
-                for substation_id in subset_substation_ids:
-                    if substation_id >= 0:
-                        sub_id = substation_id
-                        break
-                buses.loc[buses.index[close_nodes], "station_id"] = sub_id
+    # Perform DBSCAN on the coordinates
+    db = DBSCAN(eps=tol, min_samples=1).fit(coords)
+
+    # Add the cluster labels to the GeoDataFrame
+    buses["station_id"] = db.labels_
 
 
 def set_lines_ids(lines, buses, distance_crs):
@@ -118,69 +76,66 @@ def set_lines_ids(lines, buses, distance_crs):
     Function to set line buses ids to the closest bus in the list.
     """
     # set tqdm options for set lines ids
-    tqdm_kwargs_line_ids = dict(
-        ascii=False,
-        unit=" lines",
-        total=lines.shape[0],
-        desc="Set line bus ids ",
-    )
+    lines_d = lines.to_crs(distance_crs)
+    buses_d = buses.to_crs(distance_crs)
 
     # initialization
     lines["bus0"] = -1
     lines["bus1"] = -1
 
-    busesepsg = buses.to_crs(distance_crs)
-    linesepsg = lines.to_crs(distance_crs)
-
-    for i, row in tqdm(linesepsg.iterrows(), **tqdm_kwargs_line_ids):
-        # select buses having the voltage level of the current line
-        buses_sel = busesepsg[
-            (buses["voltage"] == row["voltage"]) & (buses["dc"] == row["dc"])
-        ]
+    for key, lines_sel in lines_d.groupby(["voltage", "dc"]):
+        buses_sel = buses_d.query(f"voltage == {key[0]} and dc == {key[1]}")
 
         # find the closest node of the bus0 of the line
-        bus0_id = buses_sel.geometry.distance(row.geometry.boundary.geoms[0]).idxmin()
-        lines.loc[i, "bus0"] = buses.loc[bus0_id, "bus_id"]
-
-        # check if the line starts exactly in the node, otherwise modify the linestring
-        distance_bus0 = busesepsg.geometry.loc[bus0_id].distance(
-            row.geometry.boundary.geoms[0]
+        bus0_points = np.array(
+            list(
+                lines_sel.geometry.boundary.apply(
+                    lambda x: (x.geoms[0].x, x.geoms[0].y)
+                )
+            )
         )
-        if distance_bus0 > 0.0:
-            # the line does not start in the node, thus modify the linestring
-            lines.loc[i, "geometry"] = linemerge(
-                [
-                    LineString(
-                        [
-                            buses.loc[bus0_id, "geometry"],
-                            lines.loc[i, "geometry"].boundary.geoms[0],
-                        ]
-                    ),
-                    lines.loc[i, "geometry"],
-                ]
+        bus1_points = np.array(
+            list(
+                lines_sel.geometry.boundary.apply(
+                    lambda x: (x.geoms[1].x, x.geoms[1].y)
+                )
             )
-
-        # find the closest node of the bus1 of the line
-        bus1_id = buses_sel.geometry.distance(row.geometry.boundary.geoms[1]).idxmin()
-        lines.loc[i, "bus1"] = buses.loc[bus1_id, "bus_id"]
-
-        # check if the line ends exactly in the node, otherwise modify the linestring
-        distance_bus1 = busesepsg.geometry.loc[bus1_id].distance(
-            row.geometry.boundary.geoms[1]
         )
-        if distance_bus1 > 0.0:
-            # the line does not end in the node, thus modify the linestring
-            lines.loc[i, "geometry"] = linemerge(
-                [
-                    lines.loc[i, "geometry"],
-                    LineString(
-                        [
-                            lines.loc[i, "geometry"].boundary.geoms[1],
-                            buses.loc[bus1_id, "geometry"],
-                        ]
-                    ),
-                ]
+        points_buses = np.array(list(buses_sel.geometry.apply(lambda x: (x.x, x.y))))
+
+        btree = cKDTree(points_buses)
+        dist0, idx0 = btree.query(bus0_points, k=1)  # find closest points of bus0
+        dist1, idx1 = btree.query(bus1_points, k=1)  # find closest points of bus1
+
+        # set bus0 and bus1
+        lines.loc[lines_sel.index, "bus0"] = buses_sel.bus_id.iloc[idx0].values
+        lines.loc[lines_sel.index, "bus1"] = buses_sel.bus_id.iloc[idx1].values
+
+        # check if the line starts exactly in the bus0, otherwise modify the linestring
+        bus0_linestring = (
+            lines.loc[lines_sel.index]
+            .apply(
+                lambda x: LineString([buses.geometry.loc[x["bus0"]], x["bus_0_coors"]]),
+                axis=1,
             )
+            .set_crs(crs=lines.crs)
+        )
+        bus1_linestring = (
+            lines.loc[lines_sel.index]
+            .apply(
+                lambda x: LineString([x["bus_1_coors"], buses.geometry.loc[x["bus1"]]]),
+                axis=1,
+            )
+            .set_crs(crs=lines.crs)
+        )
+
+        # update geometry with left and right linestrings to match bus0 and bus1
+        lines.loc[lines_sel.index, "geometry"] = (
+            lines.loc[lines_sel.index]
+            .union(bus0_linestring)
+            .union(bus1_linestring)
+            .apply(linemerge)
+        )
 
     return lines, buses
 
@@ -503,14 +458,6 @@ def set_lv_substations(buses):
     return buses
 
 
-# Note tolerance = 0.01 means around 700m
-# TODO: the current tolerance is high to avoid an issue in the Nigeria case where line 565939360-1
-#       seems to be interconnected to both ends, but at the eastern one, the node is actually not connected
-#       another line seems to be exactly touching the node, but from the data point of view it only fly over it.
-#       There may be the need to split a line in several segments in the case the line is within tolerance with
-#       respect to a node
-
-
 def merge_stations_lines_by_station_id_and_voltage(
     lines, buses, geo_crs, distance_crs, tol=2000
 ):
@@ -520,19 +467,19 @@ def merge_stations_lines_by_station_id_and_voltage(
     """
 
     logger.info(
-        "Stage 3a/4: Set substation ids with tolerance of %.2f km" % (tol / 1000)
+        "Stage 4a/5: Set substation ids with tolerance of %.2f km" % (tol / 1000)
     )
 
     # set substation ids
     set_substations_ids(buses, distance_crs, tol=tol)
 
-    logger.info("Stage 3b/4: Merge substations with the same id")
+    logger.info("Stage 4b/5: Merge substations with the same id")
 
     # merge buses with same station id and voltage
     if not buses.empty:
         buses = merge_stations_same_station_id(buses)
 
-    logger.info("Stage 3c/4: Specify the bus ids of the line endings")
+    logger.info("Stage 4c/5: Specify the bus ids of the line endings")
 
     # set the bus ids to the line dataset
     lines, buses = set_lines_ids(lines, buses, distance_crs)
@@ -545,7 +492,7 @@ def merge_stations_lines_by_station_id_and_voltage(
     # set substation_lv
     set_lv_substations(buses)
 
-    logger.info("Stage 3d/4: Add converters to lines")
+    logger.info("Stage 4d/5: Add converters to lines")
 
     # append fake converters
     # lines = pd.concat([lines, converters], ignore_index=True)
@@ -558,171 +505,131 @@ def merge_stations_lines_by_station_id_and_voltage(
     return lines, buses
 
 
-def create_station_at_equal_bus_locations(
-    lines, buses, geo_crs, distance_crs, tol=2000
-):
-    # V1. Create station_id at same bus location
-    # - We saw that buses are not connected exactly at one point, they are
-    #   usually connected to a substation "area" (analysed on maps)
-    # - Create station_id at exactly the same location might therefore be not
-    #   always correct
-    # - Though as you can see below, it might be still sometime the case.
-    #   Examples are **station 4** (2 lines with the same voltage connect at the
-    #   same point) and **station 23** (4 lines with two different voltages connect
-    #   at the same point)
-    # TODO: Filter out the generator lines - defined as going from generator to
-    #       the next station which is connected to a load. Excluding generator
-    #       lines make probably sense because they are not transmission expansion
-    #       relevant. For now we simplify and include generator lines.
-
-    # If same location/geometry make station
-    bus_all = buses
-
-    # set substation ids
-    set_substations_ids(buses, distance_crs, tol=tol)
-
-    # set the bus ids to the line dataset
-    lines, buses = set_lines_ids(lines, buses, distance_crs)
-
-    # update line endings
-    lines = line_endings_to_bus_conversion(lines)
-
-    # For each station number with multiple buses make lowest voltage `substation_lv = TRUE`
-    set_lv_substations(bus_all)
-
-    # TRY: Keep only buses that are not duplicated & lv_substation = True
-    # TODO: Check if this is necessary. What effect do duplicates have?
-    bus_all = bus_all[bus_all["substation_lv"] == True]
-
-    lines = connect_stations_same_station_id(lines, buses)
-
-    return lines, buses
-
-
-def _split_linestring_by_point(linestring, points):
+def fix_overpassing_lines(lines, buses, distance_crs, tol=1):
     """
-    Function to split a linestring geometry by multiple inner points.
+    Snap buses to lines that are within a certain tolerance. It does this by
+    first buffering the buses by the tolerance distance, and then performing a
+    spatial join to find all lines that intersect with the buffers. For each
+    group of lines that intersect with a buffer, the function identifies the
+    points that overpass the line (i.e., are not snapped to the line), and then
+    snaps those points to the nearest point on the line. The line is then split
+    at each snapped point, resulting in a new set of lines that are snapped to
+    the buses. The function returns a GeoDataFrame containing the snapped
+    lines, and the original GeoDataFrame containing the buses.
 
     Parameters
     ----------
-    lstring : LineString
-        Linestring of the line to be split
-    points : list
-        List of points to split the linestring
-
-    Return
-    ------
-    list_lines : list
-        List of linestring to split the line
+    lines : GeoDataFrame
+        GeoDataFrame containing the lines
+    buses : GeoDataFrame
+        GeoDataFrame containing the buses
+    distance_crs : str
+        Coordinate reference system to use for distance calculations
+    tol : float
+        Tolerance in meters to snap the buses to the lines
+
+    Returns
+    -------
+    lines : GeoDataFrame
+        GeoDataFrame containing the lines
     """
+    if lines.empty:
+        return lines, buses
 
-    list_linestrings = [linestring]
+    df_l = lines.copy()  # can use lines directly without copying
+    # drop all columns except id and geometry for buses
+    df_p = buses.copy()
 
-    for p in points:
-        # execute split to all lines and store results
-        temp_list = [split(l, p) for l in list_linestrings]
-        # nest all geometries
-        list_linestrings = [lstring for tval in temp_list for lstring in tval.geoms]
+    line_id_str = "line_id"
+    bus_id_str = "bus_id"
 
-    return list_linestrings
+    # change crs to distance based
+    df_l = df_l.to_crs(distance_crs)
+    df_p = df_p.to_crs(distance_crs)
 
+    # set index to bus_id
+    df_p.set_index(bus_id_str, inplace=True)
 
-def fix_overpassing_lines(lines, buses, distance_crs, tol=1):
-    """
-    Function to avoid buses overpassing lines with no connection when the bus
-    is within a given tolerance from the line.
+    # Buffer points to create areas for spatial join
+    buffer_df = df_p.buffer(tol).to_frame()
 
-    Parameters
-    ----------
-    lines : GeoDataFrame
-        Geodataframe of lines
-    buses : GeoDataFrame
-        Geodataframe of substations
-    tol : float
-        Tolerance in meters of the distance between the substation and the line
-        below which the line will be split
-    """
+    # Spatial join to find lines intersecting point buffers
+    joined = gpd.sjoin(df_l, buffer_df, how="inner", op="intersects")
 
-    lines_to_add = []  # list of lines to be added
-    lines_to_split = []  # list of lines that have been split
+    # group lines by their index
+    group_lines = joined.groupby(level=0)
 
-    lines_epsgmod = lines.to_crs(distance_crs)
-    buses_epsgmod = buses.to_crs(distance_crs)
+    # iterate over the groups, TODO: change to apply
+    for i, group in group_lines:
+        line_id = df_l.loc[i, line_id_str]  # pick the line id that represents the group
+        line_geom = df_l.loc[i, "geometry"]
 
-    # set tqdm options for substation ids
-    tqdm_kwargs_substation_ids = dict(
-        ascii=False,
-        unit=" lines",
-        total=lines.shape[0],
-        desc="Verify lines overpassing nodes ",
-    )
+        # number of points that intersect with the line
+        num_points = len(group)
 
-    for l in tqdm(lines.index, **tqdm_kwargs_substation_ids):
-        # bus indices being within tolerance from the line
-        bus_in_tol_epsg = buses_epsgmod[
-            buses_epsgmod.geometry.distance(lines_epsgmod.geometry.loc[l]) <= tol
-        ]
+        # get the indices of the points that intersect with the line
+        points_indexes = group["index_right"].tolist()
 
-        # exclude endings of the lines
-        bus_in_tol_epsg = bus_in_tol_epsg[
-            (
-                (
-                    bus_in_tol_epsg.geometry.distance(
-                        lines_epsgmod.geometry.loc[l].boundary.geoms[0]
-                    )
-                    > tol
-                )
-                | (
-                    bus_in_tol_epsg.geometry.distance(
-                        lines_epsgmod.geometry.loc[l].boundary.geoms[1]
-                    )
-                    > tol
-                )
-            )
-        ]
+        # get the geometries of the points that intersect with the line
+        all_points = df_p.loc[points_indexes, "geometry"]
 
-        if not bus_in_tol_epsg.empty:
-            # add index of line to split
-            lines_to_split.append(l)
+        # discard points related to the extrema points (the buses) of each line
+        distance_from_buses = all_points.distance(line_geom.boundary)
+        overpassing_points = list(all_points[distance_from_buses > tol])
 
-            buses_locs = buses.geometry.loc[bus_in_tol_epsg.index]
+        # if no overpassing points are identified, skip iteration
+        if len(overpassing_points) == 0:
+            continue
 
-            # get new line geometries
-            new_geometries = _split_linestring_by_point(lines.geometry[l], buses_locs)
-            n_geoms = len(new_geometries)
+        # find all the nearest points on the line to the points that intersect with the line
+        nearest_points_list = [
+            nearest_points(line_geom, point)[0] for point in overpassing_points
+        ]
 
-            # create temporary copies of the line
-            df_append = gpd.GeoDataFrame([lines.loc[l]] * n_geoms)
-            # update geometries
-            df_append["geometry"] = new_geometries
-            # update name of the line
-            df_append["line_id"] = [
-                str(df_append["line_id"].iloc[0]) + f"_{id}" for id in range(n_geoms)
-            ]
+        # sort the nearest points based on their distance from the start point of the line
+        nearest_points_list.sort(key=lambda point: line_geom.project(point))
 
-            lines_to_add.append(df_append)
+        # split the line at each nearest point using the split function
+        split_line = [line_geom]
+        for point in nearest_points_list:
+            # Split the line at the current point
+            # The split function returns a GeometryCollection, so we need to convert it to a list
+            split_lines = split(split_line[-1], point)
+            split_line = split_line[:-1] + list(split_lines.geoms)
 
-    if not lines_to_add:
-        return lines, buses
+        # convert the split line to a multilinestring
+        split_line = MultiLineString(split_line)
 
-    df_to_add = gpd.GeoDataFrame(pd.concat(lines_to_add, ignore_index=True))
-    df_to_add.set_crs(lines.crs, inplace=True)
-    df_to_add.set_index(lines.index[-1] + df_to_add.index, inplace=True)
+        # replace the line with the split line in lines df
+        df_l.loc[i, "geometry"] = split_line
 
-    # update length
-    df_to_add["length"] = df_to_add.to_crs(distance_crs).geometry.length
+    # explode the multilinestrings (not recommended, but included for completion)
+    # exploding the df should be done at the last step
+    # if an operation requires separate lines, it should be done using df.explode().apply(your_function)
+    # which is a lot more memory efficient
+    df_l = df_l.explode(index_parts=True).reset_index()
 
-    # update line endings
-    df_to_add = line_endings_to_bus_conversion(df_to_add)
+    # revise line_id to account for part index
+    df_l[line_id_str] = (
+        df_l[line_id_str].astype(str) + "_" + df_l["level_1"].astype(str)
+    )
+    df_l.drop(columns=["level_0", "level_1"], inplace=True)
+
+    # update line endings (included for completion, the scope of the function should be limited to fixing overpassing lines)
+    # commented out due to errors in the bus conversion function
+    # df_l = line_endings_to_bus_conversion(df_l)
 
-    # remove original lines
-    lines.drop(lines_to_split, inplace=True)
+    # update length
+    df_l["length"] = df_l.to_crs(distance_crs).geometry.length
 
-    lines = df_to_add if lines.empty else pd.concat([lines, df_to_add])
+    # return to original crs
+    df_l = df_l.to_crs(lines.crs)
 
-    lines = gpd.GeoDataFrame(lines.reset_index(drop=True), crs=lines.crs)
+    # remove lines that are rings (included for completion), TODO: this should be a separate function
+    df_l = df_l[~df_l.geometry.is_ring].reset_index(drop=True)
 
-    return lines, buses
+    # buses should not be returned as they are not changed, but included for completion
+    return df_l, buses
 
 
 def force_ac_lines(df, col="tag_frequency"):