From ec170659b03c6db3d1242664b8455366955481b7 Mon Sep 17 00:00:00 2001 From: Alban Gaignard Date: Wed, 1 Nov 2023 16:24:12 +0100 Subject: [PATCH 1/7] code formatting --- tests/test_web_resource.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_web_resource.py b/tests/test_web_resource.py index 4826731f..d27e1b7d 100644 --- a/tests/test_web_resource.py +++ b/tests/test_web_resource.py @@ -139,10 +139,12 @@ def test_turtle(self): # self.assertEqual(60, len(turtle_WR.get_rdf())) def test_MassBank(self): - mb = WebResource("https://massbank.eu/MassBank/RecordDisplay?id=MSBNK-RIKEN_IMS-LQB00001") + mb = WebResource( + "https://massbank.eu/MassBank/RecordDisplay?id=MSBNK-RIKEN_IMS-LQB00001" + ) kg = mb.get_rdf() - #print(kg.serialize(format="turtle")) - #logging.info(f"{len(kg)} loaded RDF triples") + # print(kg.serialize(format="turtle")) + # logging.info(f"{len(kg)} loaded RDF triples") self.assertGreater(len(kg), 70) def test_n3(self): From 592ad73a94f09b59df6a785a257e0dabeac79145 Mon Sep 17 00:00:00 2001 From: Alban Gaignard Date: Mon, 4 Dec 2023 17:32:32 +0100 Subject: [PATCH 2/7] Store harvesting and validation statistics --- app.py | 44 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/app.py b/app.py index b57f02e6..9d25f980 100644 --- a/app.py +++ b/app.py @@ -54,7 +54,7 @@ from metrics import test_metric from metrics.FAIRMetricsFactory import FAIRMetricsFactory from metrics.WebResource import WebResource -from metrics.Evaluation import Result +from metrics.Evaluation import Result, Evaluation from profiles.bioschemas_shape_gen import validate_any_from_KG from profiles.bioschemas_shape_gen import validate_any_from_microdata from metrics.util import SOURCE, inspect_onto_reg @@ -502,9 +502,21 @@ def get(self): args = reqparse.parse_args() url = args["url"] + eval = Evaluation() + eval.set_start_time() + eval.set_target_uri(url) + eval.set_reason("metadata harvesting, success score == metadata size") + web_res = WebResource(url) - data_str = web_res.get_rdf().serialize(format="json-ld") + kg = web_res.get_rdf() + size = len(kg) + data_str = kg.serialize(format="json-ld") data_json = json.loads(data_str) + + eval.set_score(size) + eval.set_end_time() + eval.persist(source="API") + return data_json @@ -616,17 +628,21 @@ def get(self): """Validate an RDF JSON-LD graph against Bioschemas profiles""" args = reqparse.parse_args() url = args["url"] + + eval = Evaluation() + eval.set_start_time() + eval.set_target_uri(url) + eval.set_reason("bioschemas metadata validation") web_res = WebResource(url) kg = web_res.get_rdf() results = {} - + # Evaluate only profile with conformsTo results_conformsto = dyn_evaluate_profile_with_conformsto(kg) # Try to match and evaluate all found corresponding profiles results_type = evaluate_profile_from_type(kg) - print(results_type) for result_key in results_conformsto.keys(): results[result_key] = results_conformsto[result_key] @@ -635,6 +651,9 @@ def get(self): if result_key not in results: results[result_key] = results_type[result_key] + eval.set_end_time() + eval.persist(source="API") + # TODO Try similarity match her for profiles that are not matched return results @@ -648,6 +667,11 @@ def get(self): args = reqparse.parse_args() url = args["url"] + eval = Evaluation() + eval.set_start_time() + eval.set_target_uri(url) + eval.set_reason("bioschemas metadata validation (from conforms_to)") + web_res = WebResource(url) kg = web_res.get_rdf() @@ -656,6 +680,9 @@ def get(self): # TODO Try similarity match her for profiles that are not matched + eval.set_end_time() + eval.persist(source="API") + return results_conformsto @@ -667,12 +694,19 @@ def get(self): args = reqparse.parse_args() url = args["url"] + eval = Evaluation() + eval.set_start_time() + eval.set_target_uri(url) + eval.set_reason("bioschemas metadata validation (from types)") + web_res = WebResource(url) kg = web_res.get_rdf() # Try to match and evaluate all found corresponding profiles results_type = evaluate_profile_from_type(kg) - print(results_type) + + eval.set_end_time() + eval.persist(source="API") # TODO Try similarity match her for profiles that are not matched From 8191151b9d5d9393c1e1ecafe307f1182597bf22 Mon Sep 17 00:00:00 2001 From: Alban Gaignard Date: Mon, 4 Dec 2023 17:32:55 +0100 Subject: [PATCH 3/7] fixed dependecny --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 818e2ced..fc645363 100644 --- a/environment.yml +++ b/environment.yml @@ -26,7 +26,7 @@ dependencies: - gitpython - pip - cachetools==5.0.0 - - flask-restx + - flask-restx==1.0.3 - flask-swagger-ui - numpy - sphinx From 569ea5f3f594298dbc51438ff83369b416cd9ba9 Mon Sep 17 00:00:00 2001 From: Alban Gaignard Date: Mon, 4 Dec 2023 17:33:23 +0100 Subject: [PATCH 4/7] type annotations --- metrics/Evaluation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metrics/Evaluation.py b/metrics/Evaluation.py index cedf8f02..e6d3a3e4 100644 --- a/metrics/Evaluation.py +++ b/metrics/Evaluation.py @@ -99,15 +99,15 @@ def set_start_time(self): def set_end_time(self): self.end_time = self.get_current_time() - def set_recommendations(self, recommendation_text): + def set_recommendations(self, recommendation_text: str): self.recommendation = recommendation_text # used by FAIRMetrics, will probably be replaced by logs - def set_reason(self, r): + def set_reason(self, r: str): self.reason = r # used by FAIRMetrics, will probably be replaced by logs - def append_reason(self, r): + def append_reason(self, r: str): self.reason = self.reason + "\n" + r def set_web_resource(self, web_resource): From ea7545c4f29655f24e8bbea0602c5f20be4dc31c Mon Sep 17 00:00:00 2001 From: Alban Gaignard Date: Mon, 4 Dec 2023 17:34:10 +0100 Subject: [PATCH 5/7] fixed logs --- profiles/Profile.py | 10 +++++----- profiles/ProfileFactory.py | 2 +- profiles/bioschemas_shape_gen.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/profiles/Profile.py b/profiles/Profile.py index ca0fb929..0aa5ca4e 100755 --- a/profiles/Profile.py +++ b/profiles/Profile.py @@ -102,7 +102,7 @@ def gen_SHACL_from_profile(self): fc:{{shape_name}} a sh:NodeShape ; - + {% for c in target_classes %} sh:targetClass {{c}}, {{c.replace("sc:", "scs:")}} ; {% endfor %} @@ -180,8 +180,8 @@ def validate_shape(self, knowledge_graph, shacl_shape): results = results_graph.query(report_query) # print("VALIDATION RESULTS") - print(knowledge_graph.serialize(format="turtle")) - print(shacl_shape) + # print(knowledge_graph.serialize(format="turtle")) + # print(shacl_shape) # print(results_text) # print(conforms) # print(results_graph.serialize(format="turtle")) @@ -208,8 +208,8 @@ def validate_shape(self, knowledge_graph, shacl_shape): errors.append(f'{r["path"]}') else: errors.append(f'{r["path"]}') - print(errors) - print(warnings) + # print(errors) + # print(warnings) return conforms, warnings, errors def match_sub_kgs_from_profile(self, kg): diff --git a/profiles/ProfileFactory.py b/profiles/ProfileFactory.py index 1366ca33..39ad8be4 100644 --- a/profiles/ProfileFactory.py +++ b/profiles/ProfileFactory.py @@ -472,7 +472,7 @@ def evaluate_profile_with_conformsto(kg): if ct_profile is not None: shacl_shape = ct_profile.get_shacl_shape() - print(shacl_shape) + # print(shacl_shape) conforms, warnings, errors = ct_profile.validate_shape( sub_kg, shacl_shape ) diff --git a/profiles/bioschemas_shape_gen.py b/profiles/bioschemas_shape_gen.py index b9d43c2c..4f7ef5d7 100644 --- a/profiles/bioschemas_shape_gen.py +++ b/profiles/bioschemas_shape_gen.py @@ -590,7 +590,7 @@ def validate_any_from_KG(kg): # print(o.n3(kg.namespace_manager)) if o.n3(kg.namespace_manager) in bs_profiles.keys(): # print() - # print(f"Trying to validate {s} as a(n) {o} resource") + print(f"Trying to validate {s} as a(n) {o} resource") shacl_shape, ref_profile = gen_SHACL_from_target_class( o.n3(kg.namespace_manager) ) From fa34f40bb49d476f6744a65759d9fc3220e7c3f2 Mon Sep 17 00:00:00 2001 From: Alban Gaignard Date: Mon, 4 Dec 2023 17:35:32 +0100 Subject: [PATCH 6/7] formatting --- metrics/util.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/metrics/util.py b/metrics/util.py index d2b364f9..a7a8afbe 100644 --- a/metrics/util.py +++ b/metrics/util.py @@ -1,5 +1,5 @@ -from time import time -from SPARQLWrapper import SPARQLWrapper, N3 +# from time import time +# from SPARQLWrapper import SPARQLWrapper, N3 from rdflib import Graph, ConjunctiveGraph, URIRef import requests import metrics.statistics as stats @@ -63,6 +63,7 @@ def __str__(self): # DOI regex regex = r"10.\d{4,9}\/[-._;()\/:A-Z0-9]+" + # Dynamicaly generates a table with FAIR metrics implementations def gen_metrics(): metrics = [] @@ -407,7 +408,6 @@ def inspect_onto_reg(kg, is_inspect_ui): emit("done_check", table_content) for c in table_content["classes"]: - c["tag"]["OLS"] = ask_OLS(c["name"]) if is_inspect_ui: emit("done_check", table_content) @@ -430,7 +430,6 @@ def inspect_onto_reg(kg, is_inspect_ui): table_content["classes_false"].append(c["name"]) for p in table_content["properties"]: - p["tag"]["OLS"] = ask_OLS(p["name"]) if is_inspect_ui: emit("done_check", table_content) @@ -693,7 +692,6 @@ def extract_rdf_from_html(uri): def extruct_to_rdf(extruct_str): - g = ConjunctiveGraph() for md in extruct_str["json-ld"]: From c23c77380223350a4da63462dafe713d9651c570 Mon Sep 17 00:00:00 2001 From: Alban Gaignard Date: Thu, 7 Dec 2023 16:58:49 +0100 Subject: [PATCH 7/7] code formatting --- app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app.py b/app.py index 9d25f980..f8d16b60 100644 --- a/app.py +++ b/app.py @@ -628,7 +628,7 @@ def get(self): """Validate an RDF JSON-LD graph against Bioschemas profiles""" args = reqparse.parse_args() url = args["url"] - + eval = Evaluation() eval.set_start_time() eval.set_target_uri(url) @@ -637,7 +637,7 @@ def get(self): web_res = WebResource(url) kg = web_res.get_rdf() results = {} - + # Evaluate only profile with conformsTo results_conformsto = dyn_evaluate_profile_with_conformsto(kg)