Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Google Feed Validator (Near) Parity #236

Merged
merged 7 commits into from
Jun 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -357,5 +357,11 @@
<artifactId>graphql-java</artifactId>
<version>11.0</version>
</dependency>
<!-- Contains special utils for things like converting escaped strings to unescaped strings for logging. -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.6</version>
</dependency>
</dependencies>
</project>
6 changes: 5 additions & 1 deletion src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ public enum NewGTFSErrorType {
LANGUAGE_FORMAT(Priority.LOW, "Language should be specified with a valid BCP47 tag."),
ILLEGAL_FIELD_VALUE(Priority.MEDIUM, "Fields may not contain tabs, carriage returns or new lines."),
INTEGER_FORMAT(Priority.MEDIUM, "Incorrect integer format."),
FARE_TRANSFER_MISMATCH(Priority.MEDIUM, "A fare that does not permit transfers has a non-zero transfer duration."),
FREQUENCY_PERIOD_OVERLAP(Priority.MEDIUM, "A frequency for a trip overlaps with another frequency defined for the same trip."),
FLOATING_FORMAT(Priority.MEDIUM, "Incorrect floating point number format."),
COLUMN_NAME_UNSAFE(Priority.HIGH, "Column header contains characters not safe in SQL, it was renamed."),
NUMBER_PARSING(Priority.MEDIUM, "Unable to parse number from value."),
Expand All @@ -35,7 +37,7 @@ public enum NewGTFSErrorType {
TABLE_IN_SUBDIRECTORY(Priority.HIGH, "Rather than being at the root of the zip file, a table was nested in a subdirectory."),
TABLE_MISSING_COLUMN_HEADERS(Priority.HIGH, "Table is missing column headers."),
TABLE_TOO_LONG(Priority.MEDIUM, "Table is too long to record line numbers with a 32-bit integer, overflow will occur."),
TIME_ZONE_FORMAT(Priority.MEDIUM, "Time zone format should be X."),
TIME_ZONE_FORMAT(Priority.MEDIUM, "Time zone format should match value from the Time Zone Database https://en.wikipedia.org/wiki/List_of_tz_database_time_zones."),
REQUIRED_TABLE_EMPTY(Priority.MEDIUM, "This table is required by the GTFS specification but is empty."),
FEED_TRAVEL_TIMES_ROUNDED(Priority.LOW, "All travel times in the feed are rounded to the minute, which may cause unexpected results in routing applications where travel times are zero."),
ROUTE_DESCRIPTION_SAME_AS_NAME(Priority.LOW, "The description of a route is identical to its name, so does not add any information."),
Expand All @@ -49,7 +51,9 @@ public enum NewGTFSErrorType {
STOP_LOW_POPULATION_DENSITY(Priority.HIGH, "A stop is located in a geographic area with very low human population density."),
STOP_NAME_MISSING(Priority.MEDIUM, "A stop does not have a name."),
STOP_GEOGRAPHIC_OUTLIER(Priority.HIGH, "This stop is located very far from the middle 90% of stops in this feed."),
STOP_TIME_UNUSED(Priority.LOW, "This stop time allows neither pickup nor drop off and is not a timepoint, so it serves no purpose and should be removed from trip."),
STOP_UNUSED(Priority.MEDIUM, "This stop is not referenced by any trips."),
TIMEPOINT_MISSING_TIMES(Priority.MEDIUM, "This stop time is marked as a timepoint, but is missing both arrival and departure times."),
TRIP_EMPTY(Priority.HIGH, "This trip is defined but has no stop times."),
TRIP_HEADSIGN_CONTAINS_ROUTE_NAME(Priority.LOW, "A trip headsign contains the route name, but should only contain information to distinguish it from other trips for the route."),
TRIP_HEADSIGN_SHOULD_DESCRIBE_DESTINATION_OR_WAYPOINTS(Priority.LOW, "A trip headsign begins with 'to' or 'towards', but should begin with destination or direction and optionally include waypoints with 'via'"),
Expand Down
105 changes: 64 additions & 41 deletions src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import com.conveyal.gtfs.model.Calendar;
import com.conveyal.gtfs.model.CalendarDate;
import com.conveyal.gtfs.model.Entity;
import com.conveyal.gtfs.model.FareAttribute;
import com.conveyal.gtfs.model.Frequency;
import com.conveyal.gtfs.model.PatternStop;
import com.conveyal.gtfs.model.Route;
import com.conveyal.gtfs.model.ScheduleException;
Expand Down Expand Up @@ -68,21 +70,21 @@ public interface EntityPopulator<T> {
T populate (ResultSet results, TObjectIntMap<String> columnForName) throws SQLException;

EntityPopulator<Agency> AGENCY = (result, columnForName) -> {
Agency agency = new Agency();
agency.agency_id = getStringIfPresent(result, "agency_id", columnForName);
agency.agency_name = getStringIfPresent(result, "agency_name", columnForName);
agency.agency_url = getUrlIfPresent (result, "agency_url", columnForName);
agency.agency_timezone = getStringIfPresent(result, "agency_timezone", columnForName);
agency.agency_lang = getStringIfPresent(result, "agency_lang", columnForName);
agency.agency_phone = getStringIfPresent(result, "agency_phone", columnForName);
agency.agency_fare_url = getUrlIfPresent (result, "agency_fare_url", columnForName);
agency.agency_email = getStringIfPresent(result, "agency_email", columnForName);
agency.agency_branding_url = getUrlIfPresent (result, "agency_branding_url", columnForName);
Agency agency = new Agency();
agency.agency_id = getStringIfPresent(result, "agency_id", columnForName);
agency.agency_name = getStringIfPresent(result, "agency_name", columnForName);
agency.agency_url = getUrlIfPresent (result, "agency_url", columnForName);
agency.agency_timezone = getStringIfPresent(result, "agency_timezone", columnForName);
agency.agency_lang = getStringIfPresent(result, "agency_lang", columnForName);
agency.agency_phone = getStringIfPresent(result, "agency_phone", columnForName);
agency.agency_fare_url = getUrlIfPresent (result, "agency_fare_url", columnForName);
agency.agency_email = getStringIfPresent(result, "agency_email", columnForName);
agency.agency_branding_url = getUrlIfPresent (result, "agency_branding_url", columnForName);
return agency;
};

EntityPopulator<Calendar> CALENDAR = (result, columnForName) -> {
Calendar calendar = new Calendar();
Calendar calendar = new Calendar();
calendar.service_id = getStringIfPresent(result, "service_id", columnForName);
calendar.start_date = getDateIfPresent (result, "start_date", columnForName);
calendar.end_date = getDateIfPresent (result, "end_date", columnForName);
Expand All @@ -104,10 +106,31 @@ public interface EntityPopulator<T> {
return calendarDate;
};

EntityPopulator<FareAttribute> FARE_ATTRIBUTE = (result, columnForName) -> {
FareAttribute fareAttribute = new FareAttribute();
fareAttribute.fare_id = getStringIfPresent(result, "fare_id", columnForName);
fareAttribute.agency_id = getStringIfPresent(result, "agency_id", columnForName);
fareAttribute.price = getDoubleIfPresent(result, "price", columnForName);
fareAttribute.payment_method = getIntIfPresent (result, "payment_method", columnForName);
fareAttribute.transfers = getIntIfPresent (result, "transfers", columnForName);
fareAttribute.transfer_duration = getIntIfPresent (result, "transfer_duration", columnForName);
return fareAttribute;
};

EntityPopulator<Frequency> FREQUENCY = (result, columnForName) -> {
Frequency frequency = new Frequency();
frequency.trip_id = getStringIfPresent(result, "trip_id", columnForName);
frequency.start_time = getIntIfPresent (result, "start_time", columnForName);
frequency.end_time = getIntIfPresent (result, "end_time", columnForName);
frequency.headway_secs = getIntIfPresent (result, "headway_secs", columnForName);
frequency.exact_times = getIntIfPresent (result, "exact_times", columnForName);
return frequency;
};

EntityPopulator<ScheduleException> SCHEDULE_EXCEPTION = (result, columnForName) -> {
ScheduleException scheduleException = new ScheduleException();
scheduleException.name = getStringIfPresent(result, "name", columnForName);
scheduleException.dates = getDateListIfPresent(result, "dates", columnForName);
scheduleException.name = getStringIfPresent (result, "name", columnForName);
scheduleException.dates = getDateListIfPresent (result, "dates", columnForName);
scheduleException.exemplar = exemplarFromInt(getIntIfPresent(result, "exemplar", columnForName));
scheduleException.customSchedule = getStringListIfPresent(result, "custom_schedule", columnForName);
scheduleException.addedService = getStringListIfPresent(result, "added_service", columnForName);
Expand All @@ -116,22 +139,22 @@ public interface EntityPopulator<T> {
};

EntityPopulator<Route> ROUTE = (result, columnForName) -> {
Route route = new Route();
route.route_id = getStringIfPresent(result, "route_id", columnForName);
route.agency_id = getStringIfPresent(result, "agency_id", columnForName);
route.route_short_name = getStringIfPresent(result, "route_short_name", columnForName);
route.route_long_name = getStringIfPresent(result, "route_long_name", columnForName);
route.route_desc = getStringIfPresent(result, "route_desc", columnForName);
route.route_type = getIntIfPresent (result, "route_type", columnForName);
route.route_color = getStringIfPresent(result, "route_color", columnForName);
route.route_text_color = getStringIfPresent(result, "route_text_color", columnForName);
route.route_url = getUrlIfPresent (result, "route_url", columnForName);
route.route_branding_url = getUrlIfPresent (result, "route_branding_url", columnForName);
Route route = new Route();
route.route_id = getStringIfPresent(result, "route_id", columnForName);
route.agency_id = getStringIfPresent(result, "agency_id", columnForName);
route.route_short_name = getStringIfPresent(result, "route_short_name", columnForName);
route.route_long_name = getStringIfPresent(result, "route_long_name", columnForName);
route.route_desc = getStringIfPresent(result, "route_desc", columnForName);
route.route_type = getIntIfPresent (result, "route_type", columnForName);
route.route_color = getStringIfPresent(result, "route_color", columnForName);
route.route_text_color = getStringIfPresent(result, "route_text_color", columnForName);
route.route_url = getUrlIfPresent (result, "route_url", columnForName);
route.route_branding_url = getUrlIfPresent (result, "route_branding_url", columnForName);
return route;
};

EntityPopulator<Stop> STOP = (result, columnForName) -> {
Stop stop = new Stop();
Stop stop = new Stop();
stop.stop_id = getStringIfPresent(result, "stop_id", columnForName);
stop.stop_code = getStringIfPresent(result, "stop_code", columnForName);
stop.stop_name = getStringIfPresent(result, "stop_name", columnForName);
Expand All @@ -148,7 +171,7 @@ public interface EntityPopulator<T> {
};

EntityPopulator<Trip> TRIP = (result, columnForName) -> {
Trip trip = new Trip();
Trip trip = new Trip();
trip.trip_id = getStringIfPresent(result, "trip_id", columnForName);
trip.route_id = getStringIfPresent(result, "route_id", columnForName);
trip.service_id = getStringIfPresent(result, "service_id", columnForName);
Expand All @@ -163,26 +186,26 @@ public interface EntityPopulator<T> {
};

EntityPopulator<ShapePoint> SHAPE_POINT = (result, columnForName) -> {
ShapePoint shapePoint = new ShapePoint();
shapePoint.shape_id = getStringIfPresent(result, "shape_id", columnForName);
shapePoint.shape_pt_lat = getDoubleIfPresent(result, "shape_pt_lat", columnForName);
shapePoint.shape_pt_lon = getDoubleIfPresent(result, "shape_pt_lon", columnForName);
shapePoint.shape_pt_sequence = getIntIfPresent(result, "shape_pt_sequence", columnForName);
ShapePoint shapePoint = new ShapePoint();
shapePoint.shape_id = getStringIfPresent(result, "shape_id", columnForName);
shapePoint.shape_pt_lat = getDoubleIfPresent(result, "shape_pt_lat", columnForName);
shapePoint.shape_pt_lon = getDoubleIfPresent(result, "shape_pt_lon", columnForName);
shapePoint.shape_pt_sequence = getIntIfPresent (result, "shape_pt_sequence", columnForName);
shapePoint.shape_dist_traveled = getDoubleIfPresent(result, "shape_dist_traveled", columnForName);
return shapePoint;
};

EntityPopulator<StopTime> STOP_TIME = (result, columnForName) -> {
StopTime stopTime = new StopTime();
stopTime.trip_id = getStringIfPresent(result, "trip_id", columnForName);
stopTime.arrival_time = getIntIfPresent (result, "arrival_time", columnForName);
stopTime.departure_time = getIntIfPresent (result, "departure_time", columnForName);
stopTime.stop_id = getStringIfPresent(result, "stop_id", columnForName);
stopTime.stop_sequence = getIntIfPresent (result, "stop_sequence", columnForName);
stopTime.stop_headsign = getStringIfPresent(result, "stop_headsign", columnForName);
stopTime.pickup_type = getIntIfPresent (result, "pickup_type", columnForName);
stopTime.drop_off_type = getIntIfPresent (result, "drop_off_type", columnForName);
stopTime.timepoint = getIntIfPresent (result, "timepoint", columnForName);
StopTime stopTime = new StopTime();
stopTime.trip_id = getStringIfPresent(result, "trip_id", columnForName);
stopTime.arrival_time = getIntIfPresent (result, "arrival_time", columnForName);
stopTime.departure_time = getIntIfPresent (result, "departure_time", columnForName);
stopTime.stop_id = getStringIfPresent(result, "stop_id", columnForName);
stopTime.stop_sequence = getIntIfPresent (result, "stop_sequence", columnForName);
stopTime.stop_headsign = getStringIfPresent(result, "stop_headsign", columnForName);
stopTime.pickup_type = getIntIfPresent (result, "pickup_type", columnForName);
stopTime.drop_off_type = getIntIfPresent (result, "drop_off_type", columnForName);
stopTime.timepoint = getIntIfPresent (result, "timepoint", columnForName);
stopTime.shape_dist_traveled = getDoubleIfPresent(result, "shape_dist_traveled", columnForName);
return stopTime;
};
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/com/conveyal/gtfs/loader/Feed.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ public class Feed {
public final TableReader<Agency> agencies;
public final TableReader<Calendar> calendars;
public final TableReader<CalendarDate> calendarDates;
// public final TableReader<Fare> fares;
public final TableReader<FareAttribute> fareAttributes;
public final TableReader<Frequency> frequencies;
public final TableReader<Route> routes;
public final TableReader<Stop> stops;
public final TableReader<Trip> trips;
Expand All @@ -57,7 +58,8 @@ public Feed (DataSource dataSource, String tablePrefix) {
if (tablePrefix != null && !tablePrefix.endsWith(".")) tablePrefix += ".";
this.tablePrefix = tablePrefix == null ? "" : tablePrefix;
agencies = new JDBCTableReader(Table.AGENCY, dataSource, tablePrefix, EntityPopulator.AGENCY);
// fares = new JDBCTableReader(Table.FARES, dataSource, tablePrefix, EntityPopulator.FARE);
fareAttributes = new JDBCTableReader(Table.FARE_ATTRIBUTES, dataSource, tablePrefix, EntityPopulator.FARE_ATTRIBUTE);
frequencies = new JDBCTableReader(Table.FREQUENCIES, dataSource, tablePrefix, EntityPopulator.FREQUENCY);
calendars = new JDBCTableReader(Table.CALENDAR, dataSource, tablePrefix, EntityPopulator.CALENDAR);
calendarDates = new JDBCTableReader(Table.CALENDAR_DATES, dataSource, tablePrefix, EntityPopulator.CALENDAR_DATE);
routes = new JDBCTableReader(Table.ROUTES, dataSource, tablePrefix, EntityPopulator.ROUTE);
Expand Down Expand Up @@ -89,6 +91,8 @@ public ValidationResult validate () {
List<FeedValidator> feedValidators = Arrays.asList(
new MisplacedStopValidator(this, errorStorage, validationResult),
new DuplicateStopsValidator(this, errorStorage),
new FaresValidator(this, errorStorage),
new FrequencyValidator(this, errorStorage),
new TimeZoneValidator(this, errorStorage),
new NewTripTimesValidator(this, errorStorage),
new NamesValidator(this, errorStorage));
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/gtfs/loader/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ public Table keyFieldIsNotUnique() {
}

/** Fluent method to set whether the table has a compound key, e.g., transfers#to_stop_id. */
private Table hasCompoundKey() {
public Table hasCompoundKey() {
this.compoundKey = true;
return this;
}
Expand Down
26 changes: 26 additions & 0 deletions src/main/java/com/conveyal/gtfs/validator/FaresValidator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package com.conveyal.gtfs.validator;

import com.conveyal.gtfs.error.NewGTFSErrorType;
import com.conveyal.gtfs.error.SQLErrorStorage;
import com.conveyal.gtfs.loader.Feed;
import com.conveyal.gtfs.model.FareAttribute;

/**
* Validator for fares that currently just checks that the transfers and transfer_duration fields are harmonious.
*/
public class FaresValidator extends FeedValidator {
public FaresValidator(Feed feed, SQLErrorStorage errorStorage) {
super(feed, errorStorage);
}

@Override
public void validate() {
for (FareAttribute fareAttribute : feed.fareAttributes) {
if (fareAttribute.transfers == 0 && fareAttribute.transfer_duration > 0) {
// If a fare does not permit transfers, but defines a duration for which a transfer is valid, register
// an error.
registerError(fareAttribute, NewGTFSErrorType.FARE_TRANSFER_MISMATCH);
}
}
}
}
Loading