Skip to content

Commit

Permalink
Merge branch 'dev' into DT-462-add-stop-time-validator
Browse files Browse the repository at this point in the history
  • Loading branch information
miles-grant-ibigroup authored Nov 26, 2024
2 parents db482fd + a8a376c commit d3c1520
Show file tree
Hide file tree
Showing 9 changed files with 187 additions and 48 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/maven.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ jobs:
uses: actions/setup-java@v1
with:
java-version: 1.8
# Install node 22+ for running maven-semantic-release.
- name: Use Node.js 22.X
# Install node 20.11.0 (LTS) for running maven-semantic-release.
- name: Use Node.js 20.X
uses: actions/setup-node@v1
with:
node-version: 22
node-version: 20.11.0
- name: Install maven-semantic-release
# FIXME: Enable cache for node packages (add package.json?)
run: |
Expand Down Expand Up @@ -64,10 +64,10 @@ jobs:
#
# The git commands get the commit hash of the HEAD commit and the commit just before HEAD.
# Install node 14+ for running maven-semantic-release.
- name: Use Node.js 22.X
- name: Use Node.js 20.X
uses: actions/setup-node@v1
with:
node-version: 22
node-version: 20.11.0
- name: Run maven-semantic-release
env:
GH_TOKEN: ${{ secrets.GH_TOKEN }}
Expand Down
14 changes: 7 additions & 7 deletions src/main/java/com/conveyal/gtfs/PatternBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Map;
import java.util.Set;

import static com.conveyal.gtfs.loader.JdbcGtfsLoader.copyFromFile;
import static com.conveyal.gtfs.model.Entity.INT_MISSING;
Expand All @@ -40,7 +39,7 @@ public PatternBuilder(Feed feed) throws SQLException {
connection = feed.getConnection();
}

public void create(Map<TripPatternKey, Pattern> patterns, Set<String> patternIdsLoadedFromFile) {
public void create(Map<TripPatternKey, Pattern> patterns, boolean usePatternsFromFeed) {
String patternsTableName = feed.getTableNameWithSchemaPrefix("patterns");
String tripsTableName = feed.getTableNameWithSchemaPrefix("trips");
String patternStopsTableName = feed.getTableNameWithSchemaPrefix("pattern_stops");
Expand All @@ -53,13 +52,14 @@ public void create(Map<TripPatternKey, Pattern> patterns, Set<String> patternIds
LOG.info("Creating pattern and pattern stops tables.");
Statement statement = connection.createStatement();
statement.execute(String.format("alter table %s add column pattern_id varchar", tripsTableName));
if (patternIdsLoadedFromFile.isEmpty()) {
// No patterns were loaded from file so the pattern table has not previously been created.
if (!usePatternsFromFeed) {
// If no patterns were loaded from file, create the pattern table. Conversely, if the patterns loaded
// from file have been superseded by generated patterns, recreate the table to start afresh.
patternsTable.createSqlTable(connection, null, true);
}
patternStopsTable.createSqlTable(connection, null, true);
try (PrintStream patternForTripsFileStream = createTempPatternForTripsTable(tempPatternForTripsTextFile, statement)) {
processPatternAndPatternStops(patternsTable, patternStopsTable, patternForTripsFileStream, patterns, patternIdsLoadedFromFile);
processPatternAndPatternStops(patternsTable, patternStopsTable, patternForTripsFileStream, patterns, usePatternsFromFeed);
}
updateTripPatternIds(tempPatternForTripsTextFile, statement, tripsTableName);
createIndexes(statement, patternsTableName, patternStopsTableName, tripsTableName);
Expand All @@ -80,7 +80,7 @@ private void processPatternAndPatternStops(
Table patternStopsTable,
PrintStream patternForTripsFileStream,
Map<TripPatternKey, Pattern> patterns,
Set<String> patternIdsLoadedFromFile
boolean usePatternsFromFeed
) throws SQLException {
// Generate prepared statements for inserts.
String insertPatternSql = patternsTable.generateInsertSql(true);
Expand All @@ -90,7 +90,7 @@ private void processPatternAndPatternStops(
for (Map.Entry<TripPatternKey, Pattern> entry : patterns.entrySet()) {
Pattern pattern = entry.getValue();
LOG.debug("Batching pattern {}", pattern.pattern_id);
if (!patternIdsLoadedFromFile.contains(pattern.pattern_id)) {
if (!usePatternsFromFeed) {
// Only insert the pattern if it has not already been imported from file.
pattern.setStatementParameters(insertPatternStatement, true);
patternTracker.addBatch();
Expand Down
38 changes: 33 additions & 5 deletions src/main/java/com/conveyal/gtfs/PatternFinder.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,19 +61,34 @@ public void processTrip(Trip trip, Iterable<StopTime> orderedStopTimes) {
* Once all trips have been processed, call this method to produce the final Pattern objects representing all the
* unique sequences of stops encountered. Returns map of patterns to their keys so that downstream functions can
* make use of trip pattern keys for constructing pattern stops or other derivative objects.
*
* There is no viable relationship between patterns that are loaded from a feed (patternsFromFeed) and patterns
* generated here. Process ordering is used to update the pattern id and name if patterns from a feed are available.
* E.g. The first pattern loaded from a feed will be used to updated the first pattern created here.
*/
public Map<TripPatternKey, Pattern> createPatternObjects(Map<String, Stop> stopById, SQLErrorStorage errorStorage) {
public Map<TripPatternKey, Pattern> createPatternObjects(
Map<String, Stop> stopById,
List<Pattern> patternsFromFeed,
SQLErrorStorage errorStorage
) {
// Make pattern ID one-based to avoid any JS type confusion between an ID of zero vs. null value.
int nextPatternId = 1;
int patternsFromFeedIndex = 0;
boolean usePatternsFromFeed = canUsePatternsFromFeed(patternsFromFeed);
// Create an in-memory list of Patterns because we will later rename them before inserting them into storage.
// Use a LinkedHashMap so we can retrieve the entrySets later in the order of insertion.
Map<TripPatternKey, Pattern> patterns = new LinkedHashMap<>();
// TODO assign patterns sequential small integer IDs (may include route)
for (TripPatternKey key : tripsForPattern.keySet()) {
Collection<Trip> trips = tripsForPattern.get(key);
Pattern pattern = new Pattern(key.stops, trips, null);
// Overwrite long UUID with sequential integer pattern ID
pattern.pattern_id = Integer.toString(nextPatternId++);
if (usePatternsFromFeed) {
pattern.pattern_id = patternsFromFeed.get(patternsFromFeedIndex).pattern_id;
pattern.name = patternsFromFeed.get(patternsFromFeedIndex).name;
} else {
// Overwrite long UUID with sequential integer pattern ID
pattern.pattern_id = Integer.toString(nextPatternId++);
}
// FIXME: Should associated shapes be a single entry?
pattern.associatedShapes = new HashSet<>();
trips.stream().forEach(trip -> pattern.associatedShapes.add(trip.shape_id));
Expand All @@ -87,13 +102,26 @@ public Map<TripPatternKey, Pattern> createPatternObjects(Map<String, Stop> stopB
.setBadValue(pattern.associatedShapes.toString()));
}
patterns.put(key, pattern);
patternsFromFeedIndex++;
}
if (!usePatternsFromFeed) {
// Name patterns before storing in SQL database if they have not already been provided with a feed.
renamePatterns(patterns.values(), stopById);
}
// Name patterns before storing in SQL database.
renamePatterns(patterns.values(), stopById);
LOG.info("Total patterns: {}", tripsForPattern.keySet().size());
return patterns;
}

/**
* If there is a difference in the number of patterns provided by a feed and the number of patterns generated here,
* the patterns provided by the feed are rejected.
*/
public boolean canUsePatternsFromFeed(List<Pattern> patternsFromFeed) {
boolean usePatternsFromFeed = patternsFromFeed.size() == tripsForPattern.keySet().size();
LOG.info("Using patterns from feed: {}", usePatternsFromFeed);
return usePatternsFromFeed;
}

/**
* Destructively rename the supplied collection of patterns.
* This process requires access to all the stops in the feed.
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ public enum NewGTFSErrorType {
FIELD_VALUE_TOO_LONG(Priority.MEDIUM, "Field value has too many characters."),

// Shared Stops-specifc errors.
MULTIPLE_SHARED_STOPS_GROUPS(Priority.HIGH, "A GTFS stop belongs to more than one shared-stop group."),
SHARED_STOP_GROUP_MUTLIPLE_PRIMARY_STOPS(Priority.HIGH, "A Shared-stop group has multiple primary stops."),
SHARED_STOP_GROUP_ENTITY_DOES_NOT_EXIST(Priority.MEDIUM, "The stop referenced by a shared-stop does not exist."),
MULTIPLE_SHARED_STOPS_GROUPS(Priority.HIGH, "A GTFS stop belongs to more than one shared-stop group, or belongs to the same shared-stop group twice."),
SHARED_STOP_GROUP_MULTIPLE_PRIMARY_STOPS(Priority.HIGH, "A shared-stop group has multiple primary stops."),
SHARED_STOP_GROUP_ENTITY_DOES_NOT_EXIST(Priority.MEDIUM, "The stop referenced by a shared-stop does not exist in the feed it was said to exist in."),

// Unknown errors.
OTHER(Priority.LOW, "Other errors.");
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,17 @@ private void populateDefaultEditorValues(Connection connection, String tablePref
LOG.info(updateOtherSql);
int calendarsUpdated = statement.executeUpdate(updateOtherSql);
LOG.info("Updated description for {} calendars", calendarsUpdated);

// Check if there are duplicate descriptions, in which case set the description to be the service_id which is unique
String avoidDuplicateDescriptionSql = String.format(
"update %1$scalendar set description = service_id " +
"from (select description, count(*) as duplicate_count from %1$scalendar group by description) as duplicate_descriptions " +
"where %1$scalendar.description = duplicate_descriptions.description and duplicate_descriptions.duplicate_count > 1",
tablePrefix
);
LOG.info(avoidDuplicateDescriptionSql);
int duplicatesAvoided = statement.executeUpdate(avoidDuplicateDescriptionSql);
LOG.info("Updated duplicate descriptions for {} calendars", duplicatesAvoided);
}
if (Table.TRIPS.name.equals(table.name)) {
// Update use_frequency field for patterns. This sets all patterns that have a frequency trip to use
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/conveyal/gtfs/loader/JdbcTableWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -1502,7 +1502,7 @@ private void ensureReferentialIntegrity(
// Special case for schedule_exceptions where for exception type 10 and service_id is also a key.
String calendarDateServiceKey = "custom_schedule";
Field calendarDateServiceKeyField = table.getFieldForName(calendarDateServiceKey);
String calendarDateServiceKeyVal = jsonObject.get(calendarDateServiceKey).asText();
String calendarDateServiceKeyVal = jsonObject.get(calendarDateServiceKey).get(0).asText();
TIntSet calendarDateServiceUniqueIds = getIdsForCondition(tableName, calendarDateServiceKey, calendarDateServiceKeyVal, connection);
checkUniqueIdsAndUpdateReferencingTables(
calendarDateServiceUniqueIds,
Expand Down Expand Up @@ -1726,7 +1726,7 @@ private void updateReferencingTables(
connection.rollback();
if (entityClass.getSimpleName().equals("Stop")) {
String patternStopLookup = String.format(
"select distinct p.id, r.id " +
"select distinct p.id, r.id, r.route_short_name, r.route_id " +
"from %s.pattern_stops ps " +
"inner join " +
"%s.patterns p " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,20 @@
import com.conveyal.gtfs.PatternFinder;
import com.conveyal.gtfs.TripPatternKey;
import com.conveyal.gtfs.error.SQLErrorStorage;
import com.conveyal.gtfs.loader.BatchTracker;
import com.conveyal.gtfs.loader.Feed;
import com.conveyal.gtfs.loader.Requirement;
import com.conveyal.gtfs.loader.Table;
import com.conveyal.gtfs.model.Pattern;
import com.conveyal.gtfs.model.PatternStop;
import com.conveyal.gtfs.model.Route;
import com.conveyal.gtfs.model.Stop;
import com.conveyal.gtfs.model.StopTime;
import com.conveyal.gtfs.model.Trip;
import org.apache.commons.dbutils.DbUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Collections;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static com.conveyal.gtfs.loader.JdbcGtfsLoader.copyFromFile;
import static com.conveyal.gtfs.model.Entity.INT_MISSING;
import static com.conveyal.gtfs.model.Entity.setDoubleParameter;
import static com.conveyal.gtfs.model.Entity.setIntParameter;

/**
* Groups trips together into "patterns" that share the same sequence of stops.
Expand Down Expand Up @@ -72,19 +52,18 @@ public void validateTrip (Trip trip, Route route, List<StopTime> stopTimes, List
*/
@Override
public void complete(ValidationResult validationResult) {
Set<String> patternIds = new HashSet<>();
List<Pattern> patternsFromFeed = new ArrayList<>();
for(Pattern pattern : feed.patterns) {
patternIds.add(pattern.pattern_id);
patternsFromFeed.add(pattern);
}
LOG.info("Finding patterns...");
Map<String, Stop> stopById = new HashMap<>();
for (Stop stop : feed.stops) {
stopById.put(stop.stop_id, stop);
}
// Although patterns may have already been loaded from file, the trip patterns are still required.
Map<TripPatternKey, Pattern> patterns = patternFinder.createPatternObjects(stopById, errorStorage);
patternBuilder.create(patterns, patternIds);
Map<TripPatternKey, Pattern> patterns = patternFinder.createPatternObjects(stopById, patternsFromFeed, errorStorage);
patternBuilder.create(patterns, patternFinder.canUsePatternsFromFeed(patternsFromFeed));
}

}

Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
package com.conveyal.gtfs.validator;

import com.conveyal.gtfs.GTFSFeed;
import com.conveyal.gtfs.TestUtils;
import com.conveyal.gtfs.loader.EntityPopulator;
import com.conveyal.gtfs.loader.FeedLoadResult;
import com.conveyal.gtfs.loader.JDBCTableReader;
import com.conveyal.gtfs.loader.Table;
import com.conveyal.gtfs.model.Pattern;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

import javax.sql.DataSource;
import java.io.IOException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

import static com.conveyal.gtfs.GTFS.load;
import static com.conveyal.gtfs.GTFS.validate;
import static com.conveyal.gtfs.TestUtils.getResourceFileName;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.greaterThan;


class PatternFinderValidatorTest {

private static String testDBName;

private static DataSource testDataSource;

@BeforeAll
public static void setUpClass() {
// create a new database
testDBName = TestUtils.generateNewDB();
String dbConnectionUrl = String.format("jdbc:postgresql://localhost/%s", testDBName);
testDataSource = TestUtils.createTestDataSource(dbConnectionUrl);
}

@AfterAll
public static void tearDownClass() {
TestUtils.dropDB(testDBName);
}

@Test
void canUseFeedPatterns() throws SQLException {
String fileName = getResourceFileName("real-world-gtfs-feeds/RABA.zip");
FeedLoadResult feedLoadResult = load(fileName, testDataSource);
String testNamespace = feedLoadResult.uniqueIdentifier;
validate(testNamespace, testDataSource);
checkPatternStopsAgainstFeedPatterns(fileName, testNamespace);
}

/**
* Remove one pattern from the feed so that there is a mismatch between the patterns loaded and the patterns
* generated. This will result in the generated patterns taking precedence over the loaded patterns.
*/
@Test
void canRevertToGeneratedPatterns() throws SQLException {
String fileName = getResourceFileName("real-world-gtfs-feeds/RABA.zip");
FeedLoadResult feedLoadResult = load(fileName, testDataSource);
String testNamespace = feedLoadResult.uniqueIdentifier;
String patternIdToExclude = "2k3j";
executeSqlStatement(String.format(
"delete from %s where pattern_id = '%s'",
String.format("%s.%s", testNamespace, Table.PATTERNS.name),
patternIdToExclude
));
validate(testNamespace, testDataSource);
JDBCTableReader<Pattern> patterns = new JDBCTableReader(Table.PATTERNS,
testDataSource,
testNamespace + ".",
EntityPopulator.PATTERN
);
for (Pattern pattern : patterns.getAllOrdered()) {
assertThatSqlQueryYieldsRowCountGreaterThanZero(generateSql(testNamespace, pattern.pattern_id));
}
}

@Test
void canUseGeneratedPatterns() throws SQLException, IOException {
String zipFileName = TestUtils.zipFolderFiles("fake-agency", true);
FeedLoadResult feedLoadResult = load(zipFileName, testDataSource);
String testNamespace = feedLoadResult.uniqueIdentifier;
validate(testNamespace, testDataSource);
checkPatternStopsAgainstFeedPatterns(zipFileName, testNamespace);
}

private void checkPatternStopsAgainstFeedPatterns(String zipFileName, String testNamespace) throws SQLException {
GTFSFeed feed = GTFSFeed.fromFile(zipFileName);
for (String key : feed.patterns.keySet()) {
Pattern pattern = feed.patterns.get(key);
assertThatSqlQueryYieldsRowCountGreaterThanZero(generateSql(testNamespace, pattern.pattern_id));
}
}

private String generateSql(String testNamespace, String patternId) {
return String.format(
"select * from %s where pattern_id = '%s'",
String.format("%s.%s", testNamespace, Table.PATTERN_STOP.name),
patternId
);
}

private void assertThatSqlQueryYieldsRowCountGreaterThanZero(String sql) throws SQLException {
int recordCount = 0;
ResultSet rs = testDataSource.getConnection().prepareStatement(sql).executeQuery();
while (rs.next()) recordCount++;
assertThat(recordCount, greaterThan(0));
}

private void executeSqlStatement(String sql) throws SQLException {
Connection connection = testDataSource.getConnection();
Statement statement = connection.createStatement();
statement.execute(sql);
statement.close();
connection.commit();
}
}
Binary file added src/test/resources/real-world-gtfs-feeds/RABA.zip
Binary file not shown.

0 comments on commit d3c1520

Please sign in to comment.