Skip to content

Commit

Permalink
resolve conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
linglp committed Sep 19, 2024
2 parents 41b5ebe + 05836c9 commit 85eed54
Show file tree
Hide file tree
Showing 6 changed files with 314 additions and 27 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pdoc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
runs-on: ubuntu-latest
env:
POETRY_VERSION: 1.3.0
PYTHON_VERSION: 3.10
PYTHON_VERSION: "3.10"

steps:
#----------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ jobs:
curl -sSL https://install.python-poetry.org \
| python3 - --version ${{ env.POETRY_VERSION }};
poetry config virtualenvs.create true;
poetry config virtualenvs.in-project false;
poetry config virtualenvs.in-project true;
#----------------------------------------------
# install dependencies and root project
#----------------------------------------------
Expand Down
33 changes: 24 additions & 9 deletions schematic/models/validate_attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
import pandas as pd
import requests
from jsonschema import ValidationError
from synapseclient.core.exceptions import SynapseNoCredentialsError
from synapseclient import File
from synapseclient.core.exceptions import SynapseNoCredentialsError

from schematic.schemas.data_model_graph import DataModelGraphExplorer
from schematic.store.synapse import SynapseStorage
Expand Down Expand Up @@ -479,10 +479,17 @@ def generate_filename_error(
Errors: list[str] Error details for further storage.
warnings: list[str] Warning details for further storage.
"""
if error_type == "path does not exist":
error_message = f"The file path '{invalid_entry}' on row {row_num} does not exist in the file view."
elif error_type == "mismatched entityId":
error_message = f"The entityId for file path '{invalid_entry}' on row {row_num} does not match the entityId for the file in the file view"
error_messages = {
"mismatched entityId": f"The entityId for file path '{invalid_entry}' on row {row_num}"
" does not match the entityId for the file in the file view.",
"path does not exist": f"The file path '{invalid_entry}' on row {row_num} does not exist in the file view.",
"entityId does not exist": f"The entityId for file path '{invalid_entry}' on row {row_num}"
" does not exist in the file view.",
"missing entityId": f"The entityId is missing for file path '{invalid_entry}' on row {row_num}.",
}
error_message = error_messages.get(error_type, None)
if not error_message:
raise KeyError(f"Unsupported error type provided: '{error_type}'")

error_list, warning_list = GenerateError.raise_and_store_message(
dmge=dmge,
Expand Down Expand Up @@ -2075,12 +2082,11 @@ def filename_validation(
fileview = self.synStore.storageFileviewTable.reset_index(drop=True)
# filename in dataset?
files_in_view = manifest["Filename"].isin(fileview["path"])
entity_ids_in_view = manifest["entityId"].isin(fileview["id"])
# filenames match with entity IDs in dataset
joined_df = manifest.merge(
fileview, how="outer", left_on="Filename", right_on="path"
fileview, how="left", left_on="Filename", right_on="path"
)
# cover case where there are more files in dataset than in manifest
joined_df = joined_df.loc[~joined_df["Component"].isna()].reset_index(drop=True)

entity_id_match = joined_df["id"] == joined_df["entityId"]

Expand All @@ -2089,6 +2095,14 @@ def filename_validation(
manifest_with_errors["Error"] = pd.NA
manifest_with_errors.loc[~entity_id_match, "Error"] = "mismatched entityId"
manifest_with_errors.loc[~files_in_view, "Error"] = "path does not exist"
manifest_with_errors.loc[
~entity_ids_in_view, "Error"
] = "entityId does not exist"
manifest_with_errors.loc[
(manifest_with_errors["entityId"].isna())
| (manifest_with_errors["entityId"] == ""),
"Error",
] = "missing entityId"

# Generate errors
invalid_entries = manifest_with_errors.loc[
Expand All @@ -2098,7 +2112,8 @@ def filename_validation(
vr_errors, vr_warnings = GenerateError.generate_filename_error(
val_rule=val_rule,
attribute_name="Filename",
row_num=str(index),
# +2 to make consistent with other validation functions
row_num=str(index + 2),
invalid_entry=data["Filename"],
error_type=data["Error"],
dmge=self.dmge,
Expand Down
5 changes: 3 additions & 2 deletions tests/data/mock_manifests/InvalidFilenameManifest.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Component,Filename,entityId
MockFilename,schematic - main/MockFilenameComponent/txt1.txt,syn61682653
MockFilename,schematic - main/MockFilenameComponent/txt2.txt,syn61682660
MockFilename,schematic - main/MockFilenameComponent/txt3.txt,syn61682662
MockFilename,schematic - main/MockFilenameComponent/txt3.txt,syn61682653
MockFilename,schematic - main/MockFilenameComponent/this_file_does_not_exist.txt,syn61682653
MockFilename,schematic - main/MockFilenameComponent/txt4.txt,syn6168265
MockFilename,schematic - main/MockFilenameComponent/txt5.txt,
MockFilename,schematic - main/MockFilenameComponent/txt6.txt,
39 changes: 31 additions & 8 deletions tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,14 +693,13 @@ def test_filename_manifest(self, helpers, dmge):
project_scope=["syn23643250"],
dataset_scope="syn61682648",
)

# Check errors
assert (
GenerateError.generate_filename_error(
val_rule="filenameExists",
val_rule="filenameExists syn61682648",
attribute_name="Filename",
row_num="3",
invalid_entry="schematic - main/MockFilenameComponent/txt4.txt",
row_num="4",
invalid_entry="schematic - main/MockFilenameComponent/txt3.txt",
error_type="mismatched entityId",
dmge=dmge,
)[0]
Expand All @@ -709,17 +708,41 @@ def test_filename_manifest(self, helpers, dmge):

assert (
GenerateError.generate_filename_error(
val_rule="filenameExists",
val_rule="filenameExists syn61682648",
attribute_name="Filename",
row_num="4",
invalid_entry="schematic - main/MockFilenameComponent/txt5.txt",
row_num="5",
invalid_entry="schematic - main/MockFilenameComponent/this_file_does_not_exist.txt",
error_type="path does not exist",
dmge=dmge,
)[0]
in errors
)

assert len(errors) == 2
assert (
GenerateError.generate_filename_error(
val_rule="filenameExists syn61682648",
attribute_name="Filename",
row_num="6",
invalid_entry="schematic - main/MockFilenameComponent/txt4.txt",
error_type="entityId does not exist",
dmge=dmge,
)[0]
in errors
)

assert (
GenerateError.generate_filename_error(
val_rule="filenameExists syn61682648",
attribute_name="Filename",
row_num="7",
invalid_entry="schematic - main/MockFilenameComponent/txt6.txt",
error_type="missing entityId",
dmge=dmge,
)[0]
in errors
)

assert len(errors) == 4
assert len(warnings) == 0

def test_filename_manifest_exception(self, helpers, dmge):
Expand Down
Loading

0 comments on commit 85eed54

Please sign in to comment.