tweak & add comments

OpenDataVBB · Nov 28, 2024 · 4b9e0ba · 4b9e0ba
1 parent 82a1969
commit 4b9e0ba
Show file tree

Hide file tree

Showing 4 changed files with 10 additions and 9 deletions.
diff --git a/lib/match-with-schedule-trip.js b/lib/match-with-schedule-trip.js
@@ -218,8 +218,8 @@ SELECT
 	-- todo: use tz
 FROM arrivals_departures ad
 WHERE True
--- We can't use a mere \`trip_id = ANY(SELECT trip_id FROM matches)\` because, as of v14, PostgreSQL fails to "push down" the join/filtering [2] into \`arrivals_departures\` even though \`matches\` is materialized and known to be small. By forcing PostgreSQL to "collect" the values from \`matches\` using \`array()\` [0][1], we guide it to todo. (The same applies to \`date\`.)
--- This technique *does not* work (i.e. is slow) when using \`IN\` to filter with (trip_id, date) pairs – using two separate \`= ANY()\` filters is not equivalent, after all! –, so we first filter using \`= ANY()\` on trip_id & date for speed, and then filter on the pairs for correctness.
+-- We can't use a mere \`trip_id = ANY(SELECT trip_id FROM matches)\` because, as of v14, PostgreSQL fails to "push down" the join/filtering [2] into \`arrivals_departures\` even though \`matches\` is materialized and known to be small. By forcing PostgreSQL to "collect" the values from \`matches\` using \`array()\` [0][1], we guide it to first collect results and then filter. (The same applies to \`date\`.)
+-- This technique *does not* work (i.e. is slow) when using \`IN\` to filter with (trip_id, date) pairs – using two separate \`= ANY()\` filters is not equivalent, after all! –, so we first filter using \`= ANY()\` on trip_id & date for speed, and then additionally filter on the pairs for correctness.
 -- [0] https://stackoverflow.com/a/15007154/1072129
 -- [1] https://dba.stackexchange.com/a/189255/289704
 -- [2] https://stackoverflow.com/a/66626205/1072129
@@ -372,8 +372,8 @@ SELECT
 	-- todo: use tz
 FROM arrivals_departures ad
 WHERE True
--- We can't use a mere \`trip_id = ANY(SELECT trip_id FROM matches)\` because, as of v14, PostgreSQL fails to "push down" the join/filtering [2] into \`arrivals_departures\` even though \`matches\` is materialized and known to be small. By forcing PostgreSQL to "collect" the values from \`matches\` using \`array()\` [0][1], we guide it to todo. (The same applies to \`date\`.)
--- This technique *does not* work (i.e. is slow) when using \`IN\` to filter with (trip_id, date) pairs – using two separate \`= ANY()\` filters is not equivalent, after all! –, so we first filter using \`= ANY()\` on trip_id & date for speed, and then filter on the pairs for correctness.
+-- We can't use a mere \`trip_id = ANY(SELECT trip_id FROM matches)\` because, as of v14, PostgreSQL fails to "push down" the join/filtering [2] into \`arrivals_departures\` even though \`matches\` is materialized and known to be small. By forcing PostgreSQL to "collect" the values from \`matches\` using \`array()\` [0][1], we guide it to first collect results and then filter. (The same applies to \`date\`.)
+-- This technique *does not* work (i.e. is slow) when using \`IN\` to filter with (trip_id, date) pairs – using two separate \`= ANY()\` filters is not equivalent, after all! –, so we first filter using \`= ANY()\` on trip_id & date for speed, and then additionally filter on the pairs for correctness.
 -- [0] https://stackoverflow.com/a/15007154/1072129
 -- [1] https://dba.stackexchange.com/a/189255/289704
 -- [2] https://stackoverflow.com/a/66626205/1072129

diff --git a/lib/match.js b/lib/match.js
@@ -134,8 +134,7 @@ const runGtfsMatching = async (cfg, opt = {}) => {
 		],
 	})
 	// NATS gives separate sequence numbers to both a) messages in a stream and b) messages as (re-)received by a consumer.
-	// We currently use `msg.seq`. – todo: what is that?
-	// todo [breaking]: change ot use the stream sequence or remove this metric, as it's misleading!
+	// We currently use `msg.seq`, which is the stream sequence (not the consumer sequence) of the message.
 	const natsMsgSeq = new Gauge({
 		name: 'nats_msg_seq',
 		help: 'sequence number of the latest NATS message being processed',
@@ -255,7 +254,7 @@ const runGtfsMatching = async (cfg, opt = {}) => {
 		const tReceived = Date.now()
 		const {
 			subject,
-			seq,
+			seq, // stream sequence, not consumer sequence
 			redelivered,
 			data,
 		} = msg

diff --git a/lib/soft-exit.js b/lib/soft-exit.js
@@ -1,3 +1,5 @@
+// copied from https://github.com/derhuerst/hafas-gtfs-rt-feed/blob/8.2.6/lib/soft-exit.js
+
 const withSoftExit = (softExit) => {
 	let softExiting = false
 	const onExitSignal = () => {

diff --git a/lib/vdv-aus-istfahrt-as-gtfs-rt-tripupdate.js b/lib/vdv-aus-istfahrt-as-gtfs-rt-tripupdate.js
@@ -175,7 +175,7 @@ const createFormatVdvAusIstFahrtAsGtfsRtTripUpdate = async (cfg) => {
 			// todo: expose istFahrt.Besetztgrad as VehiclePosition.occupancy_status?
 		}
 
-		// not part of the GTFS Realtime spec, we just use it for matching
+		// not part of the GTFS Realtime spec, we just use it for matching and/or debug-logging
 		const route_short_name = istFahrt.LinienText || null
 		Object.defineProperty(tripUpdate.trip, kRouteShortName, {value: route_short_name})
 		Object.defineProperty(tripUpdate, kFahrtID, {value: istFahrt.FahrtID ?? null})
@@ -196,4 +196,4 @@ export {
 	kUmlaufID,
 	unixTimestampFromIso8601,
 	createFormatVdvAusIstFahrtAsGtfsRtTripUpdate,
-}
+}