From e0dc8912a68bcdb3a93758166f8569392df12056 Mon Sep 17 00:00:00 2001
From: wangzhao0217 <74598734+wangzhao0217@users.noreply.github.com>
Date: Wed, 30 Oct 2024 10:37:05 +0000
Subject: [PATCH] add p2 python (#22)

* add python code

* remove cache

* Remove bash code from materials

* add python code

* remove cache

* Add cache, try to fix actions

* Add draft pixi code

* Start tidy-up of p2

* Add timetable

* Update pr workflow

* Remove yaml pkg from workflows

---------

Co-authored-by: robinlovelace <rob00x@gmail.com>
---
 .gitattributes                |   2 +
 .github/workflows/pr.yml      |   2 +-
 .github/workflows/publish.yml |   2 +-
 .gitignore                    |   4 +
 p1/index.qmd                  |   2 +-
 p2/index.qmd                  | 351 +++++++++++++++++++++++++++++++---
 pixi.toml                     |  28 +++
 schedule.qmd                  |   7 +-
 timetable.csv                 |  12 ++
 9 files changed, 382 insertions(+), 28 deletions(-)
 create mode 100644 .gitattributes
 create mode 100644 pixi.toml
 create mode 100644 timetable.csv

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..07fe41c
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# GitHub syntax highlighting
+pixi.lock linguist-language=YAML linguist-generated=true
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
index 87ab33f..a529de4 100644
--- a/.github/workflows/pr.yml
+++ b/.github/workflows/pr.yml
@@ -31,7 +31,7 @@ jobs:
           sudo apt-get update
           sudo apt-get install python3
           sudo apt-get install python3-pip
-          python3 -m pip install jupyter
+          python3 -m pip install jupyter jupyter-cache
 
       - name: Build Quarto Project
         run: quarto render
\ No newline at end of file
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 3b0f2ed..80f109b 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -34,7 +34,7 @@ jobs:
           sudo apt-get update
           sudo apt-get install python3
           sudo apt-get install python3-pip
-          python3 -m pip install jupyter
+          python3 -m pip install jupyter jupyter-cache
 
       - name: Render and Publish
         uses: quarto-dev/quarto-actions/publish@v2
diff --git a/.gitignore b/.gitignore
index 4f021ce..b4e12fb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,7 @@ docs
 _freeze/
 *.csv
 *.ics
+
+# pixi environments
+.pixi
+*.egg-info
diff --git a/p1/index.qmd b/p1/index.qmd
index b5d10fe..f8d4181 100644
--- a/p1/index.qmd
+++ b/p1/index.qmd
@@ -185,7 +185,7 @@ ve = get_stats19(year = 2019, type = "veh")
 ## Python
 
 ``` python
-# Todo: start with installing stats19 Python package
+# pip install stats19
 import stats19
 ac = stats19.get_stats19(year = 2019, type = "collision")
 ca = stats19.get_stats19(year = 2019, type = "cas")
diff --git a/p2/index.qmd b/p2/index.qmd
index f68dac7..bd44fa2 100644
--- a/p2/index.qmd
+++ b/p2/index.qmd
@@ -6,7 +6,11 @@ title: "Origin-destination data"
 toc: true
 execute: 
   cache: true
+  eval: false
+  warning: false
+  message: false
 bibliography: ../tds.bib
+jupyter: python3
 ---
 
 # Review Homework
@@ -28,31 +32,55 @@ Install the packages by typing in the following commands into RStudio (you do no
 
 If you need to install any of these packages use:
 
-```{r echo = T, results = 'hide', eval = FALSE}
-install.packages("sf")  # Install a package from CRAN
-remotes::install_github("Nowosad/spDataLarge")   # install from GitHub using the remotes package
+::: {.panel-tabset group="language"}
+
+## R
+
+```{r}
+#| eval: false
+if (!require("pak")) install.packages("pak")
+pak::pkg_install(c("sf", "tidyverse", "remotes"))
+# GitHub pkgs
+pak::pkg_install("Nowosad/spDataLarge")
 ```
 
-```{r echo = T, results = 'hide', warning=FALSE, message=FALSE}
+```{r}
 library(sf)          # vector data package 
 library(tidyverse)   # tidyverse packages
+library(spData)  # spatial data package 
 ```
 
-- It relies on **spData**, which loads datasets used in the code examples of this chapter:
+## Python
 
-```{r 03-attribute-operations-2, echo = T, results = 'hide',warning=FALSE, message=FALSE}
-library(spData)  # spatial data package 
+```{python, eval=FALSE}
+# Install necessary packages (uncomment if not already installed)
+# !pip install geopandas pandas matplotlib seaborn
+
+import geopandas as gpd       # vector data package
+import pandas as pd           # data manipulation
+import matplotlib.pyplot as plt  # plotting
+import seaborn as sns            # advanced plotting
+# For spatial data, geopandas comes with sample datasets
+# Alternatively, we can use the naturalearth datasets
+import geopandas.datasets
 ```
 
-1. Check your packages are up-to-date with `update.packages()`
-1. Create an RStudio project with an appropriate name for this session (e.g. `practical2`)
+:::
+
+1. Check your packages are up-to-date with `update.packages()` in R (or equivalent in Python)
+1. Create a project folder with an appropriate name for this session (e.g. `practical2`)
 1. Create appropriate folders for code, data and anything else (e.g. images)
 1. Create a script called `learning-OD.R`, e.g. with the following command:
 
-```{r, eval = F, echo = T, results = 'hide'}
-dir.create("code") # 
-file.edit("code/learning-OD.R")
+```sh
+mkdir code
+code code/learning-OD.R # for R
+code code/learning-OD.py # for Python
 ```
+
+<!-- dir.create("code") # 
+file.edit("code/learning-OD.R") -->
+
 ## Basic sf operations
 
 We will start with a simple map of the world. Load the `world` object from the `spData` package.  Notice the use of `::` to say that you want the `world` object from the `spData` package.
@@ -61,6 +89,12 @@ We will start with a simple map of the world. Load the `world` object from the `
 world = spData::world
 ```
 
+```{python, eval=FALSE}
+world = gpd.read_file(
+    'https://naturalearth.s3.amazonaws.com/110m_cultural/ne_110m_admin_0_countries.zip'
+)
+```
+
 Use some basic R functions to explore the `world` object. e.g. `class(world)`, `dim(world)`, `head(world)`, `summary(world)`. Also view the `world` object by clicking on it in the Environment panel.
 
 `sf` objects can be plotted with `plot()`.
@@ -69,6 +103,18 @@ Use some basic R functions to explore the `world` object. e.g. `class(world)`, `
 plot(world)
 ```
 
+```{python, eval=FALSE}
+print(type(world))       # Equivalent to class(world)
+print(world.shape)       # Equivalent to dim(world)
+print(world.head())      # Equivalent to head(world)
+print(world.describe())  # Equivalent to summary(world)
+
+# Plotting the world GeoDataFrame
+world.plot(figsize=(12, 8))
+plt.title('World Map')
+plt.show()
+```
+
 Note that this makes a map of each column in the data frame. Try some other plotting options
 
 ```{r}
@@ -76,6 +122,16 @@ plot(world[3:6])
 plot(world["pop"])
 ```
 
+```{python, eval=FALSE}
+# Since world is a GeoDataFrame, we can select columns by position
+# However, GeoPandas plots the geometry, so we need to specify columns
+fig, axes = plt.subplots(1, 3, figsize=(15, 5))
+world.plot(column='POP_EST', ax=axes[0])
+world.plot(column='GDP_YEAR', ax=axes[1])
+world.plot(column='CONTINENT', ax=axes[2])
+plt.show()
+```
+
 ## Basic spatial operations
 
 Load the `nz` and `nz_height` datasets from the `spData` package.
@@ -84,14 +140,22 @@ Load the `nz` and `nz_height` datasets from the `spData` package.
 nz = spData::nz
 nz_height = spData::nz_height
 ```
+```{python, eval=FALSE}
+nz = gpd.read_file("https://github.com/Nowosad/spData_files/raw/refs/heads/main/data/nz.gpkg")
+nz_height = gpd.read_file("https://github.com/Nowosad/spData_files/raw/refs/heads/main/data/nz_height.gpkg")
+```
 
 We can use `tidyverse` functions like `filter` and `select` on `sf` objects in the same way you did in Practical 1.
 
 ```{r, echo = T, results = 'hide'}
-canterbury = nz %>% filter(Name == "Canterbury")
+canterbury = nz |> filter(Name == "Canterbury")
 canterbury_height = nz_height[canterbury, ]
 ```
 
+```{python, eval=FALSE}
+canterbury = nz[nz['Name'] == 'Canterbury']
+```
+
 In this case we filtered the `nz` object to only include places called `Canterbury` and then did and intersection to find objects in the `nz_height` object that are in Canterbury.
 
 This syntax is not very clear. But is the equivalent to
@@ -100,12 +164,20 @@ This syntax is not very clear. But is the equivalent to
 canterbury_height = nz_height[canterbury, , op = st_intersects]
 ```
 
+```{python, eval=FALSE}
+canterbury_height = gpd.overlay(nz_height, canterbury, how='intersection')
+```
+
 There are many different types of relationships you can use with `op`. Try `?st_intersects()` to see more. For example this would give all the places not in Canterbury
 
 ```{r, eval=FALSE}
 nz_height[canterbury, , op = st_disjoint]
 ```
 
+```{python, eval=FALSE}
+canterbury_height = gpd.sjoin(nz_height, canterbury, op='intersects')
+```
+
 ![Topological relations between vector geometries, inspired by Figures 1 and 2 in Egenhofer and Herring (1990). The relations for which the function(x, y) is true are printed for each geometry pair, with x represented in pink and y represented in blue. The nature of the spatial relationship for each pair is described by the Dimensionally Extended 9-Intersection Model string. ](https://r.geocompx.org/figures/relations-1.png)
 
 
@@ -127,31 +199,54 @@ First we will load some sample data:
 
 ```{r, echo=FALSE}
 od_data = stplanr::od_data_sample
+zone = stplanr::cents_sf
+```
+
+```{python, eval=FALSE}
+import pandas as pd
+od_data = pd.read_csv('https://github.com/ropensci/stplanr/releases/download/v1.2.2/od_data_sample.csv')
 ```
 
 You can click on the data in the environment panel to view it or use `head(od_data)`
 Now we will rename one of the columns from `foot` to `walk`
 
 ```{r, echo=FALSE}
-od_data = od_data %>%
+od_data = od_data |>
   rename(walk = foot)
 ```
 
-Next we will made a new dataset `od_data_walk` by taking `od_data` and piping it (`%>%`) to `filter` the data frame to only include rows where `walk > 0`. Then `select` a few of the columns and calculate two new columns `proportion_walk` and `proportion_drive`.
+```{python, eval=FALSE}
+od_data.rename(columns={'foot': 'walk'}, inplace=True)
+```
+
+Next we will made a new dataset `od_data_walk` by taking `od_data` and piping it (`|>`) to `filter` the data frame to only include rows where `walk > 0`. Then `select` a few of the columns and calculate two new columns `proportion_walk` and `proportion_drive`.
 
 ```{r, echo=FALSE}
-od_data_walk = od_data %>% 
-  filter(walk > 0) %>% 
-  select(geo_code1, geo_code2, all, car_driver, walk) %>% 
+od_data_walk = od_data |> 
+  filter(walk > 0) |> 
+  select(geo_code1, geo_code2, all, car_driver, walk) |> 
   mutate(proportion_walk = walk / all, proportion_drive = car_driver / all)
 ```
 
+```{python, eval=FALSE}
+od_data_walk = od_data[od_data['walk'] > 0].copy()
+od_data_walk = od_data_walk[['geo_code1', 'geo_code2', 'all', 'car_driver', 'walk']]
+od_data_walk['proportion_walk'] = od_data_walk['walk'] / od_data_walk['all']
+od_data_walk['proportion_drive'] = od_data_walk['car_driver'] / od_data_walk['all']
+```
+
 We can use the generic `plot` function to view the relationships between variables
 
 ```{r}
 plot(od_data_walk)
 ```
 
+
+```{python, eval=FALSE}
+sns.pairplot(od_data_walk)
+plt.show()
+```
+
 R has built in modelling functions such as `lm` lets make a simple model to predict the proportion of people who walk based on the proportion of people who drive.
 
 ```{r, echo=FALSE}
@@ -159,6 +254,14 @@ model1 = lm(proportion_walk ~ proportion_drive, data = od_data_walk)
 od_data_walk$proportion_walk_predicted = model1$fitted.values
 ```
 
+```{python, eval=FALSE}
+# pip install statsmodels
+import statsmodels.formula.api as smf
+
+model1 = smf.ols('proportion_walk ~ proportion_drive', data=od_data_walk).fit()
+od_data_walk['proportion_walk_predicted'] = model1.fittedvalues
+```
+
 We can use the `ggplot2` package to graph our model predictions.
 
 ```{r}
@@ -167,6 +270,16 @@ ggplot(od_data_walk) +
   geom_line(aes(proportion_drive, proportion_walk_predicted))
 ```
 
+```{python, eval=FALSE}
+plt.figure(figsize=(8, 6))
+plt.scatter(od_data_walk['proportion_drive'], od_data_walk['proportion_walk'], label='Observed')
+plt.plot(od_data_walk['proportion_drive'], od_data_walk['proportion_walk_predicted'], color='red', label='Predicted')
+plt.xlabel('Proportion Drive')
+plt.ylabel('Proportion Walk')
+plt.legend()
+plt.show()
+```
+
 Exercises
 
 1. What is the class of the data in `od_data`?
@@ -180,24 +293,41 @@ Exercises
 class(od_data)
 ```
 
+```{python, eval=FALSE}
+print("Class of od_data:", type(od_data))
+```
+
 ```{r, echo=FALSE, eval=FALSE}
 #2 
-od_data_walk = od_data %>% 
+od_data_walk = od_data |> 
   filter(walk > 0)
 nrow(od_data_walk) / nrow(od_data) * 100
 ```
 
+```{python, eval=FALSE}
+od_data_walk = od_data[od_data['walk'] > 0].copy()
+percentage_walk = (len(od_data_walk) / len(od_data)) * 100
+print(f"Percentage of OD pairs where at least one person walks: {percentage_walk}%")
+```
+
+
 ```{r, echo=FALSE, eval=FALSE}
 #3
-od_data = od_data %>% 
-  filter(bicycle > 0) %>% 
+od_data = od_data |> 
+  filter(bicycle > 0) |> 
   mutate(perc_cycle = (bicycle/all) * 100)
 ```
 
+```{python, eval=FALSE}
+od_data_cycle = od_data[od_data['bicycle'] > 0].copy()
+od_data_cycle['perc_cycle'] = (od_data_cycle['bicycle'] / od_data_cycle['all']) * 100
+```
+
+
 ```{r, echo=FALSE, eval=FALSE}
 #4
-od_data_new = od_data %>% 
-  filter(walk > 0, bicycle>0 ) %>% 
+od_data_new = od_data |> 
+  filter(walk > 0, bicycle>0 ) |> 
   select(bicycle, walk, all) 
 
 model = lm(walk ~ bicycle, weights = all, data = od_data_new)
@@ -208,13 +338,87 @@ ggplot(od_data_new) +
   geom_line(aes(bicycle, walk_predicted))
 ```
 
+```{python, eval=FALSE}
+od_data_new = od_data[(od_data['walk'] > 0) & (od_data['bicycle'] > 0)].copy()
+od_data_new = od_data_new[['bicycle', 'walk', 'all']]
+
+# Weighted linear regression
+import statsmodels.api as sm
+
+weights = od_data_new['all']
+X = sm.add_constant(od_data_new['bicycle'])
+wls_model = sm.WLS(od_data_new['walk'], X, weights=weights)
+results = wls_model.fit()
+od_data_new['walk_predicted'] = results.fittedvalues
+
+# Plotting the relationship
+plt.figure(figsize=(8, 6))
+plt.scatter(od_data_new['bicycle'], od_data_new['walk'], s=od_data_new['all']*0.1, label='Data')
+plt.plot(od_data_new['bicycle'], od_data_new['walk_predicted'], color='red', label='Fitted Line')
+plt.xlabel('Bicycle')
+plt.ylabel('Walk')
+plt.legend()
+plt.show()
+```
 
 ```{r, echo=FALSE, eval=FALSE}
 #5
-desire_lines = od2line(flow = od_data, zones)
+desire_lines = stplanr::od2line(flow = od_data, zones = zone)
 plot(desire_lines)
+#save zone as gpkg
+sf::st_write(zone, "zone.geojson")
+```
+
+```{python, eval=FALSE}
+import pandas as pd
+import geopandas as gpd
+from shapely.geometry import LineString
+
+od_data = pd.read_csv('https://github.com/ropensci/stplanr/releases/download/v1.2.2/od_data_sample.csv')
+
+zones = gpd.read_file('https://github.com/ropensci/stplanr/releases/download/v1.2.2/zones.geojson')
+
+# Ensure the CRS is set (replace 'epsg:4326' with your actual CRS if different)
+if zones.crs is None:
+    zones.set_crs(epsg=4326, inplace=True)
+
+# If zones are polygons, compute centroids
+if zones.geom_type.isin(['Polygon', 'MultiPolygon']).any():
+    print("Creating centroids representing desire line start and end points.")
+    zones['geometry'] = zones.centroid
+
+# Create a mapping from 'geo_cod' to 'geometry'
+geo_cod_to_geometry = dict(zip(zones['geo_cod'], zones['geometry']))
+
+# Map origin and destination geometries
+od_data['geometry_o'] = od_data['geo_code1'].map(geo_cod_to_geometry)
+od_data['geometry_d'] = od_data['geo_code2'].map(geo_cod_to_geometry)
+
+# Check for any missing matches
+missing_origins = od_data[od_data['geometry_o'].isnull()]
+missing_destinations = od_data[od_data['geometry_d'].isnull()]
+
+if not missing_origins.empty:
+    print(f"Missing origin geometries for {len(missing_origins)} records")
+if not missing_destinations.empty:
+    print(f"Missing destination geometries for {len(missing_destinations)} records")
+
+# Remove rows with missing geometries
+od_data.dropna(subset=['geometry_o', 'geometry_d'], inplace=True)
+
+# Create LineString geometries for desire lines
+od_data['geometry'] = od_data.apply(
+    lambda row: LineString([row['geometry_o'], row['geometry_d']]), axis=1
+)
+
+# Create a GeoDataFrame for the desire lines
+desire_lines = gpd.GeoDataFrame(od_data, geometry='geometry', crs=zones.crs)
+
+# Plot the desire lines
+desire_lines.plot()
 ```
 
+
 # Processing origin-destination data in Bristol
 
 This section is based on [Chapter 12 of Geocomputation with R](https://geocompr.robinlovelace.net/transport.html). You should read this chapter in full in your own time.
@@ -234,6 +438,50 @@ od = spDataLarge::bristol_od
 zones = spDataLarge::bristol_zones
 ```
 
+```{python, eval=FALSE}
+od_data = gpd.read_file('https://github.com/ropensci/stplanr/releases/download/v1.2.2/bristol_od.geojson')
+
+zones = gpd.read_file('https://github.com/ropensci/stplanr/releases/download/v1.2.2/bristol_zones.geojson')
+
+if zones.crs is None:
+    zones.set_crs(epsg=4326, inplace=True)
+
+# If zones are polygons, compute centroids
+if zones.geom_type.isin(['Polygon', 'MultiPolygon']).any():
+    print("Creating centroids representing desire line start and end points.")
+    zones['geometry'] = zones.centroid
+
+# Create a mapping from 'geo_cod' to 'geometry'
+geo_cod_to_geometry = dict(zip(zones['geo_code'], zones['geometry']))
+
+# Map origin and destination geometries
+od_data['geometry_o'] = od_data['geo_code1'].map(geo_cod_to_geometry)
+od_data['geometry_d'] = od_data['geo_code2'].map(geo_cod_to_geometry)
+
+# Check for any missing matches
+missing_origins = od_data[od_data['geometry_o'].isnull()]
+missing_destinations = od_data[od_data['geometry_d'].isnull()]
+
+if not missing_origins.empty:
+    print(f"Missing origin geometries for {len(missing_origins)} records")
+if not missing_destinations.empty:
+    print(f"Missing destination geometries for {len(missing_destinations)} records")
+
+# Remove rows with missing geometries
+od_data.dropna(subset=['geometry_o', 'geometry_d'], inplace=True)
+
+# Create LineString geometries for desire lines
+od_data['geometry'] = od_data.apply(
+    lambda row: LineString([row['geometry_o'], row['geometry_d']]), axis=1
+)
+
+# Create a GeoDataFrame for the desire lines
+desire_lines = gpd.GeoDataFrame(od_data, geometry='geometry', crs=zones.crs)
+
+# Plot the desire lines
+desire_lines.plot()
+```
+
 Explore these datasets using the functions you have already learnt (e.g. `head`,`nrow`).
 
 You will notice that the `od` datasets has shared id values with the `zones` dataset. We can use these to make desire lines between each zone. But first we must filter out trips that start and end in the same zone.
@@ -242,6 +490,18 @@ You will notice that the `od` datasets has shared id values with the `zones` dat
 od_inter = filter(od, o != d)
 desire_lines = od2line(od_inter, zones)
 ```
+
+```{python, eval=FALSE}
+# Filter OD data where origin and destination are different
+od_inter = od[od['o'] != od['d']].copy()
+
+od_inter = od_inter.merge(zones[['geo_code', 'geometry']], left_on='o', right_on='geo_code', how='left')
+od_inter.rename(columns={'geometry': 'origin_geometry'}, inplace=True)
+od_inter = od_inter.merge(zones[['geo_code', 'geometry']], left_on='d', right_on='geo_code', how='left')
+od_inter.rename(columns={'geometry': 'destination_geometry'}, inplace=True)
+
+```
+
 Let's calculate the percentage of trips that are made by active travel
 
 ```{r, echo = T, results = 'hide'}
@@ -249,6 +509,10 @@ desire_lines$Active = (desire_lines$bicycle + desire_lines$foot) /
   desire_lines$all * 100
 ```
 
+```{python, eval=FALSE}
+desire_lines['Active'] = (desire_lines['bicycle'] + desire_lines['foot']) / desire_lines['all'] * 100
+```
+
 Now use `tmap` to make a plot showing the number of trips and the percentage of people using active travel.
 
 ```{r, echo = T, results = 'hide', warning=FALSE, message=FALSE}
@@ -263,12 +527,41 @@ tm_shape(desire_lines) +   # Define the data frame used to make the map
   tm_scale_bar()           # Add a scale bar to the map
 ```
 
+```{python, eval=FALSE}
+desire_lines = desire_lines.sort_values('Active')
+
+# Normalize line widths for plotting
+max_trips = desire_lines['all'].max()
+desire_lines['linewidth'] = (desire_lines['all'] / max_trips) * 5
+
+# Plotting desire lines with active travel percentage
+fig, ax = plt.subplots(figsize=(12, 10))
+desire_lines.plot(
+    ax=ax, 
+    column='Active', 
+    cmap='plasma', 
+    linewidth=desire_lines['linewidth'], 
+    alpha=0.7, 
+    legend=True
+)
+plt.title('Desire Lines with Active Travel Percentage')
+
+# Add basemap (optional)
+# ctx.add_basemap(ax, crs=desire_lines.crs.to_string())
+
+plt.show()
+```
+
 Now that we have geometry attached to our data we can calculate other variables of interest. For example let's calculate the distacne travelled and see if it relates to the percentage of people who use active travel.
 
 ```{r}
 desire_lines$distance_direct_m = as.numeric(st_length(desire_lines))
 ```
 
+```{python, eval=FALSE}
+desire_lines['distance_direct_m'] = desire_lines.geometry.length
+```
+
 Note the use of `as.numeric` by default `st_length` and many other functions return a special type of result with `unit`. Here we force the results back into the basic R numerical value. But be careful! The units you get back depend on the coordinate reference system, so check your data before you assume what values mean.
 
 ```{r, warning=FALSE, message=FALSE}
@@ -277,6 +570,16 @@ ggplot(desire_lines) +
   geom_smooth(aes(x = distance_direct_m, y = Active))
 ```
 
+```{python, eval=FALSE}
+plt.figure(figsize=(8, 6))
+sns.scatterplot(data=desire_lines, x='distance_direct_m', y='Active', size='all', legend=False)
+sns.regplot(data=desire_lines, x='distance_direct_m', y='Active', scatter=False, color='red')
+plt.xlabel('Distance (meters)')
+plt.ylabel('Active Travel Percentage')
+plt.title('Active Travel vs Distance')
+plt.show()
+```
+
 The blue line is a smoothed average of the data. It shows a common concept in transport research, the distance decay curve. In this case it shows that the longer the journey the less likely people are to use active travel. But this concept applies to all kinds of travel decisions. For example you are more likely to travel to a nearby coffee shop than a far away coffee shop. Different types of trip have different curves, but most people always have a bias for shorter trips.
 
 
diff --git a/pixi.toml b/pixi.toml
new file mode 100644
index 0000000..eaf7a67
--- /dev/null
+++ b/pixi.toml
@@ -0,0 +1,28 @@
+[project]
+channels = ["conda-forge"]
+description = "Add a short description here"
+name = "TDStests"
+platforms = ["win-64"]
+version = "0.1.0"
+
+[tasks]
+
+[dependencies]
+jupyter = "*"
+jupyter-cache = "*"
+geopandas = "*"
+matplotlib = "*"
+shapely = "*"
+seaborn = "*"
+quarto = "*"
+r-base = "*"
+r-irkernel = "*"
+r-tidyverse = "*"
+r-sf = "*"
+r-quarto = "*"
+r-nycflights13 = "*"
+r-remotes = "*"
+r-DT = "*"
+r-reticulate = "*"
+r-spData = "*"
+r-pak = "*"
\ No newline at end of file
diff --git a/schedule.qmd b/schedule.qmd
index 9a09e33..bc0c758 100644
--- a/schedule.qmd
+++ b/schedule.qmd
@@ -6,8 +6,12 @@ execute:
 
 ```{r}
 #| include: false
-# Aim: create ical of all lectures and practicals of TDS
 library(tidyverse)
+```
+
+```{r}
+#| eval: false
+# Aim: create ical of all lectures and practicals of TDS
 # Start date of week 1 (source: https://ses.leeds.ac.uk/info/21630/timetabling/1384/teaching-week-patterns-202425)
 w_start = as.Date("2024-09-30")
 w_start
@@ -141,6 +145,7 @@ system("gh release upload 2025 timetable.ics timetable.csv")
 ```
 
 ```{r}
+tt_csv = readr::read_csv("timetable.csv")
 timetable = tt_csv |> 
   mutate(
     session_code = paste0(
diff --git a/timetable.csv b/timetable.csv
new file mode 100644
index 0000000..4e70452
--- /dev/null
+++ b/timetable.csv
@@ -0,0 +1,12 @@
+summary,description,date,duration,location
+TDS Practical 1: intro,Introduction to transport data science,2025-01-30,3,Richard Hughes Cluster (1.40)
+TDS deadline 1,Computer set-up,2025-01-31,0,Online - Teams
+TDS Practical 2: od,Origin-destination data,2025-02-06,3,Richard Hughes Cluster (1.40)
+TDS Practical 3: routing,Routing,2025-02-13,3,Richard Hughes Cluster (1.40)
+TDS seminar 1,Seminar 1: TBC,2025-02-20,1,Institute for Transport Studies 1.11
+TDS deadline 2,Draft portfolio,2025-02-21,0,Online - Teams
+TDS Practical 4: getting,Getting transport data,2025-03-06,3,Richard Hughes Cluster (1.40)
+TDS seminar 2,"Seminar 2 Will Deakin, Network Rail",2025-03-20,3,Institute for Transport Studies 1.11
+TDS Practical 5: visualisation,Visualising transport data,2025-03-27,3,Richard Hughes Cluster (1.40)
+TDS Practical 6: project,Project work,2025-05-01,3,Richard Hughes Cluster (1.40)
+TDS deadline 3,"Deadline: coursework, 2pm",2025-05-16,0,Online - Teams