Skip to content

Commit

Permalink
find{Arrival,Departure}: fall back to matching by trip_headsign βœ…πŸ“
Browse files Browse the repository at this point in the history
  • Loading branch information
derhuerst committed Jan 8, 2024
1 parent 7005b58 commit 1918b63
Show file tree
Hide file tree
Showing 17 changed files with 324 additions and 18 deletions.
3 changes: 3 additions & 0 deletions build-index.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const {Client} = require('pg')
const QueryStream = require('pg-query-stream')
const stops = require('./lib/prepare-stable-ids/stops')
const routes = require('./lib/prepare-stable-ids/routes')
const tripHeadsigns = require('./lib/prepare-stable-ids/trip-headsigns')

const ARRS_DEPS_WITH_STABLE_IDS = readFileSync(require.resolve('./lib/arrivals_departures_with_stable_ids.sql'))
const FIND_ARR_DEP = readFileSync(require.resolve('./lib/find_arr_dep.sql'))
Expand Down Expand Up @@ -83,6 +84,8 @@ BEGIN;
await convert(stops)
console.error('routes')
await convert(routes)
console.error('trip headsigns')
await convert(tripHeadsigns)

process.stdout.write(`
CREATE INDEX ON trips (trip_id);
Expand Down
95 changes: 80 additions & 15 deletions lib/find-arrival-departure.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ const inBerlinTime = (d) => {
.toISO({suppressMilliseconds: true})
}

const createFindArrDep = (gtfsRtInfo, gtfsInfo, type, debug, queryName, queryText) => async (_) => {
const createFindArrDep = (gtfsRtInfo, gtfsInfo, type, debug, baseQuery, withHeadsignQuery) => async (_) => {
debug(type, _)

// todo: DRY with stopovers matching in find-trip
Expand Down Expand Up @@ -45,10 +45,10 @@ const createFindArrDep = (gtfsRtInfo, gtfsInfo, type, debug, queryName, queryTex
const lineStableIds = getStableRouteIds(_.line).map(([id]) => id)

// todo: search by trip ID + date?
const query = {
const _query0 = {
// allow `pg` to create a prepared statement
name: queryName,
text: queryText,
name: baseQuery.name,
text: baseQuery.text,
values: [
// todo: filter using `station_stable_id`?
[...stopStableIds, ...stationStableIds],
Expand All @@ -57,13 +57,59 @@ const createFindArrDep = (gtfsRtInfo, gtfsInfo, type, debug, queryName, queryTex
timeFrameEnd,
],
}
debug('query', query)
debug('query', _query0)

const {rows: matched} = await db.query(query)
const _res0 = await db.query(_query0)
let matched = _res0.rows
if (matched.length > 1) {
debug('more than 1 match; first two matches:', matched)
// todo: try again but also filter by headsign
return null

// todo: this is too hafas-client-specific
// gtfs term: "headsign"
// hafas-client [0] term: "direction"
// [0] https://github.com/public-transport/hafas-client
if (!_.direction) {
debug(type + ' has no .direction, giving up')
return null;
}
// This assumes that the headsign/direction contains a stop name, which is not the case in every region on earth.
// todo [breaking]: make gtfs(Rt)Info.normalizeTripHeadsign mandatory
const normalizeDirection = 'normalizeTripHeadsign' in gtfsRtInfo
? gtfsRtInfo.normalizeTripHeadsign
: gtfsRtInfo.normalizeStopName

// todo: handle trip_headsign being optional in GTFS
const _query1 = {
// allow `pg` to create a prepared statement
name: withHeadsignQuery.name,
text: withHeadsignQuery.text,
values: [
..._query0.values,
normalizeDirection(_.direction),
],
}
debug('querying with the trip headsign to narrow down', _query1)

const _res1 = await db.query(_query1)
matched = _res1.rows
if (matched.length > 1) {
debug('with headsign still >1 matches; first two:', matched)
// With the VBB data, this happens
// - at a terminus stop,
// - where one "run" ends (and the respective stop_times entry specifies a departure_time)
// - while another "run" of the same trip starts simultaneously, but
// - only with lines that have an unchanging headsign (shuttle service, cycles, etc).
// todo: prevent this by querying a *subsequent* arrival/departure (when finding a departure, a previous one for when finding an arrival).
// WHERE EXISTS (
// SELECT *
// FROM arrivals_departures_with_stable_ids ad2
// -- find a previous arrival/departure on the same trip & date
// WHERE ad2.trip_id = arrivals_departures_with_stable_ids.trip_id
// AND ad2.date = arrivals_departures_with_stable_ids.date
// AND ad2.stop_sequence_consec < arrivals_departures_with_stable_ids.stop_sequence_consec
// )
return null
}
}
const m = matched[0]
if (!m) return null
Expand Down Expand Up @@ -130,12 +176,12 @@ const createCachedFindArrDep = (gtfsRtInfo, gtfsInfo, type) => {
} else {
throw new Error(`invalid type (${type}), must be arrival or departure`)
}
const queryText = `\
const _baseQuery = (addColumns, joins, conditions) => `\
SELECT DISTINCT ON (trip_id, "date", stop_sequence_consec)
*,
(date::date)::text as date_as_text
FROM arrivals_departures_with_stable_ids
WHERE stop_stable_id = ANY($1)
ad.*,
(date::date)::text as date_as_text${addColumns}
FROM arrivals_departures_with_stable_ids ad
${joins}WHERE stop_stable_id = ANY($1)
AND route_stable_id = ANY($2)
AND t_${type} >= $3::timestamp with time zone
AND t_${type} <= $4::timestamp with time zone
Expand All @@ -145,11 +191,30 @@ AND t_${type} <= $4::timestamp with time zone
-- arrival/departure time.
AND "date" >= dates_filter_min($3::timestamp with time zone)
AND "date" <= dates_filter_max($4::timestamp with time zone)
LIMIT 2
${conditions}LIMIT 2
`
const baseQuery = {
name: queryName,
text: _baseQuery('', '', ''),
}
const withHeadsignQuery = {
name: queryName + '_with_headsign',
text: _baseQuery(
// `normalized_trip_headsign` colum form `trips_normalized_headsigns`
`,
normalized_trip_headsign`,
// join `trips_normalized_headsigns`
`\
LEFT JOIN trips_normalized_headsigns tnh ON ad.trip_id = tnh.trip_id
`,
// filter by `normalized_trip_headsign`
`\
AND normalized_trip_headsign = $5
`),
}

return withCaching(
createFindArrDep(gtfsRtInfo, gtfsInfo, type, debug, queryName, queryText),
createFindArrDep(gtfsRtInfo, gtfsInfo, type, debug, baseQuery, withHeadsignQuery),
// todo:
// With HAFAS, `tripId` uniquely identifies a vehicle going from A to B
// at *one point in time*. Within GTFS semantics, a "trip" happends on
Expand Down
1 change: 1 addition & 0 deletions lib/find-trip.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ WITH dep AS (
-- arrival/departure time.
AND "date" >= dates_filter_min($3::timestamp with time zone)
AND "date" <= dates_filter_max($4::timestamp with time zone)
-- todo: what if there is >1 match? match by headsign?
LIMIT 1
)
SELECT DISTINCT ON (trip_id, date, stop_sequence)
Expand Down
52 changes: 52 additions & 0 deletions lib/prepare-stable-ids/trip-headsigns.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
'use strict'

const csv = require('../csv')

const TRIPS = `
SELECT
trip_id,
trip_headsign
FROM trips
`

const beforeAll = `
CREATE TABLE trips_normalized_headsigns (
trip_id TEXT NOT NULL,
FOREIGN KEY (trip_id) REFERENCES trips,
normalized_trip_headsign TEXT NOT NULL
);
COPY trips_normalized_headsigns FROM STDIN csv;
`

const createOnTrip = (gtfsRtInfo, gtfsInfo) => {
// This assumes that the headsign contains a stop name, which is not the case in every region on earth.
// todo [breaking]: make gtfs(Rt)Info.normalizeTripHeadsign mandatory
const normalizeTripHeadsign = 'normalizeTripHeadsign' in gtfsInfo
? gtfsInfo.normalizeTripHeadsign
: gtfsInfo.normalizeStopName

const onTrip = ({trip_id, trip_headsign}) => {
// todo: handle trip_headsign being optional
const normalizedHeadsign = normalizeTripHeadsign(trip_headsign)

csv.write([
trip_id,
normalizedHeadsign,
])
}
return onTrip
}

const afterAll = `\
\\.
CREATE INDEX ON trips_normalized_headsigns (trip_id);
CREATE INDEX ON trips_normalized_headsigns (normalized_trip_headsign);
`

module.exports = {
query: TRIPS,
beforeAll,
createOnRow: createOnTrip,
afterAll,
}
24 changes: 21 additions & 3 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,28 +45,45 @@ const normalizeLineName = (name) => {
return slugg(name.replace(/([a-zA-Z]+)\s+(\d+)/g, '$1$2'))
}

module.exports = {normalizeStopName, normalizeLineName}
module.exports = {
normalizeStopName,
normalizeLineName,
// With VBB vehicles, the headsign is almost always the last stop.
normalizeTripHeadsign: normalizeStopName,
}
```

We're going to create two files that specify how to handle the GTFS-RT & GTFS (Static) data, respectively:

```js
// gtfs-rt-info.js
const {normalizeStopName, normalizeLineName} = require('./normalize.js')
const {
normalizeStopName,
normalizeLineName,
normalizeTripHeadsign,
} = require('./normalize.js')

module.exports = {
endpointName: 'vbb-hafas',
normalizeStopName,
normalizeLineName,
normalizeTripHeadsign,
}
```

```js
// gtfs-info.js
const {normalizeStopName, normalizeLineName} = require('./normalize.js')
const {
normalizeStopName,
normalizeLineName,
normalizeTripHeadsign,
} = require('./normalize.js')

module.exports = {
endpointName: 'vbb-gtfs',
normalizeStopName,
normalizeLineName,
normalizeTripHeadsign,
}
```

Expand Down Expand Up @@ -225,6 +242,7 @@ The size of this additional index depends on how many stable IDs your logic gene
endpointName: string,
normalizeStopName: (name: string, stop: FptfStop) => string,
normalizeLineName(name: string, line: FptfLine) => string,
normalizeTripHeadsign(headsign: string) => string,
}
```

Expand Down
2 changes: 2 additions & 0 deletions test/gtfs-info.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
const {
normalizeStopName,
normalizeLineName,
normalizeTripHeadsign,
} = require('./normalize')

const gtfsInfo = {
endpointName: 'gtfs',
normalizeStopName,
normalizeLineName,
normalizeTripHeadsign,
}

module.exports = gtfsInfo
2 changes: 2 additions & 0 deletions test/gtfs-rt-info.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
const {
normalizeStopName,
normalizeLineName,
normalizeTripHeadsign,
} = require('./normalize')

const gtfsRtInfo = {
endpointName: 'gtfs-rt',
normalizeStopName,
normalizeLineName,
normalizeTripHeadsign,
}

module.exports = gtfsRtInfo
2 changes: 2 additions & 0 deletions test/headsign-matching/agency.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
agency_id,agency_name,agency_url,agency_timezone,agency_lang,agency_phone
1,S-Bahn Berlin GmbH,http://www.s-bahn-berlin.de,Europe/Berlin,de,
2 changes: 2 additions & 0 deletions test/headsign-matching/calendar.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date
1883,0,0,0,0,0,0,0,20231221,20241214
42 changes: 42 additions & 0 deletions test/headsign-matching/calendar_dates.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
service_id,date,exception_type
1883,20240108,1
1883,20240115,1
1883,20240122,1
1883,20240129,1
1883,20240205,1
1883,20240212,1
1883,20240109,1
1883,20240116,1
1883,20240123,1
1883,20240130,1
1883,20240206,1
1883,20240213,1
1883,20240110,1
1883,20240117,1
1883,20240124,1
1883,20240131,1
1883,20240207,1
1883,20240214,1
1883,20240111,1
1883,20240118,1
1883,20240125,1
1883,20240201,1
1883,20240208,1
1883,20240215,1
1883,20240112,1
1883,20240119,1
1883,20240126,1
1883,20240202,1
1883,20240209,1
1883,20240106,1
1883,20240113,1
1883,20240120,1
1883,20240127,1
1883,20240203,1
1883,20240210,1
1883,20240107,1
1883,20240114,1
1883,20240121,1
1883,20240128,1
1883,20240204,1
1883,20240211,1
21 changes: 21 additions & 0 deletions test/headsign-matching/index.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

set -e
set -o pipefail
cd "$(dirname $0)"

env | grep '^PG' || true

set -x

psql -c 'CREATE DATABASE test_headsign_matching'
export PGDATABASE=test_headsign_matching

../../node_modules/.bin/gtfs-to-sql \
-d --trips-without-shape-id \
*.csv \
| psql -b

../../build-index.js ../gtfs-rt-info.js ../gtfs-info.js | psql -b

node test-find-departure.js
2 changes: 2 additions & 0 deletions test/headsign-matching/routes.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
route_id,agency_id,route_short_name,route_long_name,route_type,route_color,route_text_color,route_desc
22009_700,1,S1A,,700,,,
18 changes: 18 additions & 0 deletions test/headsign-matching/stop_times.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,stop_headsign
225318702,14:50:00,14:50:00,de:11000:900058101::8,0,0,0,
225318702,14:54:00,14:54:00,de:11000:900054104::2,1,0,0,
225318702,14:58:00,14:58:00,de:11000:900054109::5,2,0,0,
225318702,15:04:00,15:04:00,de:11000:900057103::3,3,0,0,
225318702,15:12:00,15:12:00,de:11000:900012101::1,4,0,0,
225318702,15:15:00,15:15:00,de:11000:900100022::1,5,0,0,
225318702,15:17:00,15:17:00,de:11000:900100301::1,6,0,0,
225318702,15:24:00,15:24:00,de:11000:900100001::6,7,0,0,
225318415,15:24:00,15:24:00,de:11000:900100001::6,0,0,0,
225318415,15:29:00,15:29:00,de:11000:900100301::1,1,0,0,
225318415,15:32:00,15:32:00,de:11000:900100022::1,2,0,0,
225318415,15:35:00,15:35:00,de:11000:900012101::1,3,0,0,
225318415,15:43:00,15:43:00,de:11000:900058108::1,4,0,0,
225318415,15:44:00,15:44:00,de:11000:900057101::1,5,0,0,
225318415,15:49:00,15:49:00,de:11000:900054109::5,6,0,0,
225318415,15:54:00,15:54:00,de:11000:900054104::2,7,0,0,
225318415,15:58:00,15:58:00,de:11000:900058101::8,8,0,0,
Loading

0 comments on commit 1918b63

Please sign in to comment.