Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Field length validator ltr 2 #277

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 4 additions & 11 deletions src/main/java/com/conveyal/gtfs/GTFS.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
package com.conveyal.gtfs;

import com.conveyal.gtfs.error.SQLErrorStorage;
import com.conveyal.gtfs.loader.Feed;
import com.conveyal.gtfs.loader.FeedLoadResult;
import com.conveyal.gtfs.loader.JdbcGtfsExporter;
import com.conveyal.gtfs.loader.JdbcGtfsLoader;
import com.conveyal.gtfs.loader.JdbcGtfsSnapshotter;
import com.conveyal.gtfs.loader.SnapshotResult;
import com.conveyal.gtfs.util.InvalidNamespaceException;
import com.conveyal.gtfs.validator.FeedValidator;
import com.conveyal.gtfs.validator.FeedValidatorCreator;
import com.conveyal.gtfs.validator.ValidationResult;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.io.Files;
Expand Down Expand Up @@ -94,19 +93,13 @@ public static SnapshotResult makeSnapshot (String feedId, DataSource dataSource)
return result;
}

/**
* Shorthand for calling validate() without custom validators.
*/
public static ValidationResult validate (String feedId, DataSource dataSource) {
return validate(feedId, dataSource, null);
}

/**
* Once a feed has been loaded into the database, examine its contents looking for various problems and errors.
*/
public static ValidationResult validate (String feedId, DataSource dataSource, BiFunction<Feed, SQLErrorStorage, List<FeedValidator>> customValidatorRequest) {
public static ValidationResult validate (String feedId, DataSource dataSource, FeedValidatorCreator... additionalValidators) {
Feed feed = new Feed(dataSource, feedId);
return feed.validate(customValidatorRequest);
ValidationResult result = feed.validate(additionalValidators);
return result;
}

/**
Expand Down
56 changes: 21 additions & 35 deletions src/main/java/com/conveyal/gtfs/loader/Feed.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package com.conveyal.gtfs.loader;

import com.conveyal.gtfs.error.GTFSError;
import com.conveyal.gtfs.error.NewGTFSError;
import com.conveyal.gtfs.error.SQLErrorStorage;
import com.conveyal.gtfs.model.*;
Expand All @@ -14,9 +13,7 @@
import javax.sql.DataSource;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.function.BiFunction;

import static com.conveyal.gtfs.error.NewGTFSErrorType.VALIDATOR_FAILED;

Expand All @@ -33,20 +30,15 @@ public class Feed {
// This may be the empty string if the feed is stored in the root ("public") schema.
public final String tablePrefix;

public final TableReader<Agency> agencies;
public final TableReader<Calendar> calendars;
public final TableReader<CalendarDate> calendarDates;
public final TableReader<Agency> agencies;
public final TableReader<Calendar> calendars;
public final TableReader<CalendarDate> calendarDates;
public final TableReader<FareAttribute> fareAttributes;
public final TableReader<Frequency> frequencies;
public final TableReader<Route> routes;
public final TableReader<Stop> stops;
public final TableReader<Trip> trips;
// public final TableReader<ShapePoint> shapePoints;
public final TableReader<StopTime> stopTimes;

/* A place to accumulate errors while the feed is loaded. Tolerate as many errors as possible and keep on loading. */
// TODO remove this and use only NewGTFSErrors in Validators, loaded into a JDBC table
public final List<GTFSError> errors = new ArrayList<>();
public final TableReader<Frequency> frequencies;
public final TableReader<Route> routes;
public final TableReader<Stop> stops;
public final TableReader<Trip> trips;
public final TableReader<StopTime> stopTimes;

/**
* Create a feed that reads tables over a JDBC connection. The connection should already be set to the right
Expand All @@ -66,36 +58,32 @@ public Feed (DataSource dataSource, String tablePrefix) {
routes = new JDBCTableReader(Table.ROUTES, dataSource, tablePrefix, EntityPopulator.ROUTE);
stops = new JDBCTableReader(Table.STOPS, dataSource, tablePrefix, EntityPopulator.STOP);
trips = new JDBCTableReader(Table.TRIPS, dataSource, tablePrefix, EntityPopulator.TRIP);
// shapePoints = new JDBCTableReader(Table.SHAPES, dataSource, tablePrefix, EntityPopulator.SHAPE_POINT);
stopTimes = new JDBCTableReader(Table.STOP_TIMES, dataSource, tablePrefix, EntityPopulator.STOP_TIME);
}

/**
* Shorthand for calling validate() without custom validators.
*/
public ValidationResult validate () {
return validate(null);
}

/**
* Run the standard validation checks for this feed and store the validation errors in the database. Optionally,
* takes one or more {@link FeedValidatorCreator} in the form of lambda method refs (e.g., {@code MTCValidator::new}),
* which this method will instantiate and run after the standard validation checks have been completed.
*
* TODO check whether validation has already occurred, overwrite results.
* TODO allow validation within feed loading process, so the same connection can be used, and we're certain loaded data is 100% visible.
* That would also avoid having to reconnect the error storage to the DB.
* TODO allow validation within feed loading process, so the same connection can be used, and we're certain loaded
* data is 100% visible. That would also avoid having to reconnect the error storage to the DB.
*/
public ValidationResult validate (BiFunction<Feed, SQLErrorStorage, List<FeedValidator>> customValidatorRequest) {
public ValidationResult validate (FeedValidatorCreator... additionalValidators) {
long validationStartTime = System.currentTimeMillis();
// Create an empty validation result that will have its fields populated by certain validators.
ValidationResult validationResult = new ValidationResult();
// Error tables should already be present from the initial load.
// Reconnect to the existing error tables.
SQLErrorStorage errorStorage = null;
SQLErrorStorage errorStorage;
try {
errorStorage = new SQLErrorStorage(dataSource.getConnection(), tablePrefix, false);
} catch (SQLException | InvalidNamespaceException ex) {
throw new StorageException(ex);
}
int errorCountBeforeValidation = errorStorage.getErrorCount();

// Create list of standard validators to run on every feed.
List<FeedValidator> feedValidators = Lists.newArrayList(
new MisplacedStopValidator(this, errorStorage, validationResult),
new DuplicateStopsValidator(this, errorStorage),
Expand All @@ -105,18 +93,16 @@ public ValidationResult validate (BiFunction<Feed, SQLErrorStorage, List<FeedVal
new NewTripTimesValidator(this, errorStorage),
new NamesValidator(this, errorStorage)
);

// Add additional validators, if any provided.
List<FeedValidator> customValidators = null;
if (customValidatorRequest != null) customValidators = customValidatorRequest.apply(this, errorStorage);
if (customValidators != null) feedValidators.addAll(customValidators);
// Create additional validators specified in this method's args and add to list of feed validators to run.
for (FeedValidatorCreator creator : additionalValidators) {
if (creator != null) feedValidators.add(creator.create(this, errorStorage));
}

for (FeedValidator feedValidator : feedValidators) {
String validatorName = feedValidator.getClass().getSimpleName();
try {
LOG.info("Running {}.", validatorName);
int errorCountBefore = errorStorage.getErrorCount();
// todo why not just pass the feed and errorstorage in here?
feedValidator.validate();
LOG.info("{} found {} errors.", validatorName, errorStorage.getErrorCount() - errorCountBefore);
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package com.conveyal.gtfs.validator;

import com.conveyal.gtfs.error.SQLErrorStorage;
import com.conveyal.gtfs.loader.Feed;

/**
* A functional interface used to create {@link FeedValidator} instances. This interface supports the ability to pass
* validators by lambda method references ({@code SomeValidator::new}) to the {@link Feed#validate(FeedValidatorCreator...)}
* method in order to run special validation checks on specific feeds (e.g., {@link MTCValidator} should be run only on
* GTFS files loaded for those MTC operators). To instantiate the FeedValidator, simply call the
* {@link #create(Feed, SQLErrorStorage)} method, passing in the feed and errorStorage arguments. This is in lieu of
* instantiating the FeedValidator with those arguments in the constructor because these objects are not available before
* the validate method is invoked.
*/
@FunctionalInterface
public interface FeedValidatorCreator {
/**
* The callback that instantiates and returns instances of custom FeedValidator objects
* constructed using the provided feed and error storage objects.
* @param feed The feed being validated.
* @param errorStorage The object that handles error storage.
*/
FeedValidator create(Feed feed, SQLErrorStorage errorStorage);
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ public boolean validate(Feed feed, boolean repair) {
if (trip.shape_id == null) {
isValid = false;
if (missingShapeErrorCount < errorLimit) {
feed.errors.add(new MissingShapeError(trip));
// FIXME store MissingShape errors
// feed.errors.add(new MissingShapeError(trip));
}
missingShapeErrorCount++;
continue;
Expand Down Expand Up @@ -111,7 +112,8 @@ public boolean validate(Feed feed, boolean repair) {
// check if first stop is x times closer to end of shape than the beginning or last stop is x times closer to start than the end
if (distanceFirstStopToStart > (distanceFirstStopToEnd * distanceMultiplier) && distanceLastStopToEnd > (distanceLastStopToStart * distanceMultiplier)) {
if (reversedTripShapeErrorCount < errorLimit) {
feed.errors.add(new ReversedTripShapeError(trip));
// FIXME store ReversedTripShape errors
// feed.errors.add(new ReversedTripShapeError(trip));
}
reversedTripShapeErrorCount++;
isValid = false;
Expand All @@ -120,7 +122,8 @@ public boolean validate(Feed feed, boolean repair) {
if (missingCoordinatesErrorCount > 0) {
for (Map.Entry<ShapePoint, List<String>> shapeError : missingShapesMap.entrySet()) {
String[] tripIdList = shapeError.getValue().toArray(new String[shapeError.getValue().size()]);
feed.errors.add(new ShapeMissingCoordinatesError(shapeError.getKey(), tripIdList));
// FIXME store ShapeMissingCoordinates errors
// feed.errors.add(new ShapeMissingCoordinatesError(shapeError.getKey(), tripIdList));
}
}
return isValid;
Expand Down
33 changes: 18 additions & 15 deletions src/test/java/com/conveyal/gtfs/GTFSTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,18 @@


import com.conveyal.gtfs.error.NewGTFSErrorType;
import com.conveyal.gtfs.error.SQLErrorStorage;
import com.conveyal.gtfs.loader.Feed;
import com.conveyal.gtfs.loader.FeedLoadResult;
import com.conveyal.gtfs.loader.SnapshotResult;
import com.conveyal.gtfs.storage.ErrorExpectation;
import com.conveyal.gtfs.storage.ExpectedFieldType;
import com.conveyal.gtfs.storage.PersistenceExpectation;
import com.conveyal.gtfs.storage.RecordExpectation;
import com.conveyal.gtfs.util.InvalidNamespaceException;
import com.conveyal.gtfs.validator.FeedValidator;
import com.conveyal.gtfs.validator.FeedValidatorCreator;
import com.conveyal.gtfs.validator.MTCValidator;
import com.conveyal.gtfs.validator.ValidationResult;
import com.csvreader.CsvReader;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.io.Files;
import org.apache.commons.io.FileUtils;
Expand All @@ -38,12 +35,9 @@
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.function.BiFunction;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

Expand Down Expand Up @@ -326,10 +320,13 @@ public void canLoadFeedWithLongFieldValues () {
);
assertThat(
"Long-field-value test passes",
runIntegrationTestOnFolder("fake-agency-mtc-long-fields", nullValue(), expectations, errorExpectations,
(feed, errorSource) -> {
return Lists.newArrayList(new MTCValidator(feed, errorSource));
}),
runIntegrationTestOnFolder(
"fake-agency-mtc-long-fields",
nullValue(),
expectations,
errorExpectations,
MTCValidator::new
),
equalTo(true)
);
}
Expand Down Expand Up @@ -361,7 +358,7 @@ private boolean runIntegrationTestOnFolder(
Matcher<Object> fatalExceptionExpectation,
PersistenceExpectation[] persistenceExpectations,
ErrorExpectation[] errorExpectations,
BiFunction<Feed, SQLErrorStorage, List<FeedValidator>> customValidatorRequest
FeedValidatorCreator customValidator
) {
LOG.info("Running integration test on folder {}", folderName);
// zip up test folder into temp zip file
Expand All @@ -372,7 +369,13 @@ private boolean runIntegrationTestOnFolder(
e.printStackTrace();
return false;
}
return runIntegrationTestOnZipFile(zipFileName, fatalExceptionExpectation, persistenceExpectations, errorExpectations, customValidatorRequest);
return runIntegrationTestOnZipFile(
zipFileName,
fatalExceptionExpectation,
persistenceExpectations,
errorExpectations,
customValidator
);
}

/**
Expand Down Expand Up @@ -407,7 +410,7 @@ private boolean runIntegrationTestOnZipFile(
Matcher<Object> fatalExceptionExpectation,
PersistenceExpectation[] persistenceExpectations,
ErrorExpectation[] errorExpectations,
BiFunction<Feed, SQLErrorStorage, List<FeedValidator>> customValidatorRequest
FeedValidatorCreator customValidator
) {
String testDBName = TestUtils.generateNewDB();
String dbConnectionUrl = String.join("/", JDBC_URL, testDBName);
Expand All @@ -424,7 +427,7 @@ private boolean runIntegrationTestOnZipFile(
// load and validate feed
LOG.info("load and validate GTFS file {}", zipFileName);
FeedLoadResult loadResult = GTFS.load(zipFileName, dataSource);
ValidationResult validationResult = GTFS.validate(loadResult.uniqueIdentifier, dataSource, customValidatorRequest);
ValidationResult validationResult = GTFS.validate(loadResult.uniqueIdentifier, dataSource, customValidator);

assertThat(validationResult.fatalException, is(fatalExceptionExpectation));
namespace = loadResult.uniqueIdentifier;
Expand Down