From 4df1d5d506594c49807a555c5844adafd1991683 Mon Sep 17 00:00:00 2001 From: Cameron Tew Date: Wed, 2 Dec 2020 14:42:14 -0800 Subject: [PATCH] Relaxes `split_if_relative_reference` validation constraints in C++ and Python. See also: https://www.hl7.org/fhir/references.html#literal. Closes #24. PiperOrigin-RevId: 345316065 --- cc/google/fhir/json_parser.cc | 1 + cc/google/fhir/references.cc | 10 ++----- cc/google/fhir/references.h | 19 +++++++++++++ cc/google/fhir/testutil/generator.cc | 1 + cc/google/fhir/util.h | 4 --- py/google/fhir/r4/references_test.py | 9 ------- py/google/fhir/references.py | 27 ++++++++++--------- py/google/fhir/stu3/references_test.py | 8 ------ .../r4/validation/reference.invalid.ndjson | 3 --- testdata/r4/validation/reference.valid.ndjson | 1 + .../stu3/validation/reference.invalid.ndjson | 3 --- .../stu3/validation/reference.valid.ndjson | 1 + 12 files changed, 40 insertions(+), 47 deletions(-) diff --git a/cc/google/fhir/json_parser.cc b/cc/google/fhir/json_parser.cc index 11289067f..795ab4f1e 100644 --- a/cc/google/fhir/json_parser.cc +++ b/cc/google/fhir/json_parser.cc @@ -41,6 +41,7 @@ #include "google/fhir/status/statusor.h" #include "google/fhir/stu3/profiles.h" #include "google/fhir/util.h" +#include "google/fhir/references.h" #include "proto/google/fhir/proto/annotations.pb.h" #include "include/json/json.h" #include "re2/re2.h" diff --git a/cc/google/fhir/references.cc b/cc/google/fhir/references.cc index 465cb44ff..2d39a2142 100644 --- a/cc/google/fhir/references.cc +++ b/cc/google/fhir/references.cc @@ -173,14 +173,8 @@ absl::Status SplitIfRelativeReference(Message* reference) { return absl::OkStatus(); } - // We're permissive about various full url schemes. - static LazyRE2 kUrlReference = {"(http|https|urn):.*"}; - if (RE2::FullMatch(uri_string, *kUrlReference)) { - // There's no way to rewrite the URI, but it's valid as is. - return absl::OkStatus(); - } - return InvalidArgumentError(absl::StrCat( - "String \"", uri_string, "\" cannot be parsed as a reference.")); + // There's no way to rewrite the URI, but it's valid as is. + return absl::OkStatus(); } namespace internal { diff --git a/cc/google/fhir/references.h b/cc/google/fhir/references.h index c297183d4..18fd11f4a 100644 --- a/cc/google/fhir/references.h +++ b/cc/google/fhir/references.h @@ -40,6 +40,25 @@ absl::StatusOr GetReferenceFieldForR } // namespace internal +// If possible, parses a reference `uri` field into more structured fields. +// This is only possible for two forms of reference uris: +// * Relative references of the form $TYPE/$ID, e.g., "Patient/1234" +// In this case, this will be parsed to a proto of the form: +// {patient_id: {value: "1234"}} +// * Fragments of the form "#$FRAGMENT", e.g., "#vs1". In this case, this would +// be parsed into a proto of the form: +// {fragment: {value: "vs1"} } +// +// If the reference URI matches one of these schemas, the `uri` field will be +// cleared, and the appropriate structured fields set. +// +// If it does not match any of these schemas, the reference will be unchanged, +// and an OK status will be returned. +// +// If the message is not a valid FHIR Reference proto, this will return a +// failure status. +absl::Status SplitIfRelativeReference(::google::protobuf::Message* reference); + // Return the full string representation of a reference. absl::StatusOr ReferenceProtoToString( const ::google::protobuf::Message& reference); diff --git a/cc/google/fhir/testutil/generator.cc b/cc/google/fhir/testutil/generator.cc index ea78cda36..8c4c55398 100644 --- a/cc/google/fhir/testutil/generator.cc +++ b/cc/google/fhir/testutil/generator.cc @@ -21,6 +21,7 @@ #include "google/fhir/annotations.h" #include "google/fhir/fhir_types.h" #include "google/fhir/proto_util.h" +#include "google/fhir/references.h" #include "google/fhir/util.h" #include "include/json/json.h" diff --git a/cc/google/fhir/util.h b/cc/google/fhir/util.h index 9364df23c..857abfe39 100644 --- a/cc/google/fhir/util.h +++ b/cc/google/fhir/util.h @@ -49,10 +49,6 @@ namespace google { namespace fhir { -// Splits relative references into their components, for example, "Patient/ABCD" -// will result in the patientId field getting the value "ABCD". -absl::Status SplitIfRelativeReference(::google::protobuf::Message* reference); - // Builds an absl::Time from a time-like fhir Element. // Must have a value_us field. template diff --git a/py/google/fhir/r4/references_test.py b/py/google/fhir/r4/references_test.py index e2281c1fb..23860ab3c 100644 --- a/py/google/fhir/r4/references_test.py +++ b/py/google/fhir/r4/references_test.py @@ -70,14 +70,5 @@ def testSplitIfRelativeReference_withUrlScheme_succeeds(self): datatypes_pb2.String( value='http://acme.com/ehr/fhir/Practitioner/2323-33-4')) - def testSplitIfRelativeReference_withMalformedUri_raises(self): - ref = datatypes_pb2.Reference(uri=datatypes_pb2.String(value='InvalidUri')) - - with self.assertRaises(ValueError) as e: - references.split_if_relative_reference(ref) - - self.assertIsInstance(e.exception, ValueError) - - if __name__ == '__main__': absltest.main() diff --git a/py/google/fhir/references.py b/py/google/fhir/references.py index e8cba3475..e1f46004f 100644 --- a/py/google/fhir/references.py +++ b/py/google/fhir/references.py @@ -27,7 +27,6 @@ _INTERNAL_REFERENCE_PATTERN = re.compile( r'^(?P[0-9A-Za-z_]+)/(?P[A-Za-z0-9.-]{1,64})' r'(?:/_history/(?P[A-Za-z0-9.-]{1,64}))?$') -_URL_REFERENCE_PATTERN = re.compile(r'^(http|https|urn):.*$') def _validate_reference(reference: message.Message): @@ -68,13 +67,25 @@ def populate_typed_reference_id(reference_id: message.Message, resource_id: str, def split_if_relative_reference(reference: message.Message): - """Splits relative references into their components. + """If possible, parses a `Reference` `uri` into more structured fields. - For example, a reference with the uri set to "Patient/ABCD" will be changed - into a reference with the patientId set to "ABCD". + This is only possible for two forms of reference uris: + * Relative references of the form $TYPE/$ID, e.g., "Patient/1234" + In this case, this will be parsed to a proto of the form: + {patient_id: {value: "1234"}} + * Fragments of the form "#$FRAGMENT", e.g., "#vs1". In this case, this would + be parsed into a proto of the form: + {fragment: {value: "vs1"} } + + If the reference URI matches one of these schemas, the `uri` field will be + cleared, and the appropriate structured fields set. Otherwise, the reference + will be unchanged. Args: reference: The FHIR reference to potentially split. + + Raises: + ValueError: If the message is not a valid FHIR Reference proto. """ _validate_reference(reference) uri_field = reference.DESCRIPTOR.fields_by_name.get('uri') @@ -83,12 +94,6 @@ def split_if_relative_reference(reference: message.Message): uri = proto_utils.get_value_at_field(reference, uri_field) - # We're permissive about various full URL schemes. If we're able to find a - # match, the URI is valid as-is. - url_match = re.fullmatch(_URL_REFERENCE_PATTERN, uri.value) - if url_match is not None: - return # URI is valid - internal_match = re.fullmatch(_INTERNAL_REFERENCE_PATTERN, uri.value) if internal_match is not None: # Note that we make the reference_id off of the reference before adding it, @@ -125,8 +130,6 @@ def split_if_relative_reference(reference: message.Message): proto_utils.set_value_at_field(reference, fragment_field, fragment) return - raise ValueError(f'String {uri.value!r} cannot be parsed as a reference.') - def reference_to_string(reference: message.Message) -> str: """Returns a reference URI for a typed reference message.""" diff --git a/py/google/fhir/stu3/references_test.py b/py/google/fhir/stu3/references_test.py index d7f610e3f..77f49d688 100644 --- a/py/google/fhir/stu3/references_test.py +++ b/py/google/fhir/stu3/references_test.py @@ -70,14 +70,6 @@ def testSplitIfRelativeReference_withUrlScheme_succeeds(self): datatypes_pb2.String( value='http://acme.com/ehr/fhir/Practitioner/2323-33-4')) - def testSplitIfRelativeReference_withMalformedUri_raises(self): - ref = datatypes_pb2.Reference(uri=datatypes_pb2.String(value='InvalidUri')) - - with self.assertRaises(ValueError) as e: - references.split_if_relative_reference(ref) - - self.assertIsInstance(e.exception, ValueError) - if __name__ == '__main__': absltest.main() diff --git a/testdata/r4/validation/reference.invalid.ndjson b/testdata/r4/validation/reference.invalid.ndjson index fe35b00ff..c8d52b417 100644 --- a/testdata/r4/validation/reference.invalid.ndjson +++ b/testdata/r4/validation/reference.invalid.ndjson @@ -1,7 +1,4 @@ -{"reference": "Patient/spaces not allowed"} -{"reference": "Patient/\"notallowed"} {"reference": "Unknown/resourceType"} -{"reference": "#spaces not allowed"} {"uri": "invalidField"} {"patient_id": "unknownField"} {"patientId": "unknownField"} \ No newline at end of file diff --git a/testdata/r4/validation/reference.valid.ndjson b/testdata/r4/validation/reference.valid.ndjson index 551bb77bc..579e22995 100644 --- a/testdata/r4/validation/reference.valid.ndjson +++ b/testdata/r4/validation/reference.valid.ndjson @@ -5,3 +5,4 @@ {"reference": "FamilyMemberHistory/1"} {"reference": "urn:uuid:49a86246-4004-42eb-9bdc-f542f93f9228"} {"display": "Display Field Only"} +{"reference": "Practitioner?identifier=http://hl7.org/fhir/sid/us-npi|9999999959"} diff --git a/testdata/stu3/validation/reference.invalid.ndjson b/testdata/stu3/validation/reference.invalid.ndjson index fe35b00ff..c8d52b417 100644 --- a/testdata/stu3/validation/reference.invalid.ndjson +++ b/testdata/stu3/validation/reference.invalid.ndjson @@ -1,7 +1,4 @@ -{"reference": "Patient/spaces not allowed"} -{"reference": "Patient/\"notallowed"} {"reference": "Unknown/resourceType"} -{"reference": "#spaces not allowed"} {"uri": "invalidField"} {"patient_id": "unknownField"} {"patientId": "unknownField"} \ No newline at end of file diff --git a/testdata/stu3/validation/reference.valid.ndjson b/testdata/stu3/validation/reference.valid.ndjson index 551bb77bc..0d6e365a2 100644 --- a/testdata/stu3/validation/reference.valid.ndjson +++ b/testdata/stu3/validation/reference.valid.ndjson @@ -5,3 +5,4 @@ {"reference": "FamilyMemberHistory/1"} {"reference": "urn:uuid:49a86246-4004-42eb-9bdc-f542f93f9228"} {"display": "Display Field Only"} +{"reference": "Practitioner?identifier=http://hl7.org/fhir/sid/us-npi|9999999959"} \ No newline at end of file