From ec170659b03c6db3d1242664b8455366955481b7 Mon Sep 17 00:00:00 2001
From: Alban Gaignard <alban.gaignard@univ-nantes.fr>
Date: Wed, 1 Nov 2023 16:24:12 +0100
Subject: [PATCH 1/7] code formatting

---
 tests/test_web_resource.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_web_resource.py b/tests/test_web_resource.py
index 4826731f..d27e1b7d 100644
--- a/tests/test_web_resource.py
+++ b/tests/test_web_resource.py
@@ -139,10 +139,12 @@ def test_turtle(self):
         # self.assertEqual(60, len(turtle_WR.get_rdf()))
 
     def test_MassBank(self):
-        mb = WebResource("https://massbank.eu/MassBank/RecordDisplay?id=MSBNK-RIKEN_IMS-LQB00001")
+        mb = WebResource(
+            "https://massbank.eu/MassBank/RecordDisplay?id=MSBNK-RIKEN_IMS-LQB00001"
+        )
         kg = mb.get_rdf()
-        #print(kg.serialize(format="turtle"))
-        #logging.info(f"{len(kg)} loaded RDF triples")
+        # print(kg.serialize(format="turtle"))
+        # logging.info(f"{len(kg)} loaded RDF triples")
         self.assertGreater(len(kg), 70)
 
     def test_n3(self):

From 592ad73a94f09b59df6a785a257e0dabeac79145 Mon Sep 17 00:00:00 2001
From: Alban Gaignard <alban.gaignard@univ-nantes.fr>
Date: Mon, 4 Dec 2023 17:32:32 +0100
Subject: [PATCH 2/7] Store harvesting and validation statistics

---
 app.py | 44 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 5 deletions(-)

diff --git a/app.py b/app.py
index b57f02e6..9d25f980 100644
--- a/app.py
+++ b/app.py
@@ -54,7 +54,7 @@
 from metrics import test_metric
 from metrics.FAIRMetricsFactory import FAIRMetricsFactory
 from metrics.WebResource import WebResource
-from metrics.Evaluation import Result
+from metrics.Evaluation import Result, Evaluation
 from profiles.bioschemas_shape_gen import validate_any_from_KG
 from profiles.bioschemas_shape_gen import validate_any_from_microdata
 from metrics.util import SOURCE, inspect_onto_reg
@@ -502,9 +502,21 @@ def get(self):
         args = reqparse.parse_args()
         url = args["url"]
 
+        eval = Evaluation()
+        eval.set_start_time()
+        eval.set_target_uri(url)
+        eval.set_reason("metadata harvesting, success score == metadata size")
+
         web_res = WebResource(url)
-        data_str = web_res.get_rdf().serialize(format="json-ld")
+        kg = web_res.get_rdf()
+        size = len(kg)
+        data_str = kg.serialize(format="json-ld")
         data_json = json.loads(data_str)
+
+        eval.set_score(size)
+        eval.set_end_time()
+        eval.persist(source="API")
+
         return data_json
 
 
@@ -616,17 +628,21 @@ def get(self):
         """Validate an RDF JSON-LD graph against Bioschemas profiles"""
         args = reqparse.parse_args()
         url = args["url"]
+        
+        eval = Evaluation()
+        eval.set_start_time()
+        eval.set_target_uri(url)
+        eval.set_reason("bioschemas metadata validation")
 
         web_res = WebResource(url)
         kg = web_res.get_rdf()
         results = {}
-
+        
         # Evaluate only profile with conformsTo
         results_conformsto = dyn_evaluate_profile_with_conformsto(kg)
 
         # Try to match and evaluate all found corresponding profiles
         results_type = evaluate_profile_from_type(kg)
-        print(results_type)
 
         for result_key in results_conformsto.keys():
             results[result_key] = results_conformsto[result_key]
@@ -635,6 +651,9 @@ def get(self):
             if result_key not in results:
                 results[result_key] = results_type[result_key]
 
+        eval.set_end_time()
+        eval.persist(source="API")
+
         # TODO Try similarity match her for profiles that are not matched
 
         return results
@@ -648,6 +667,11 @@ def get(self):
         args = reqparse.parse_args()
         url = args["url"]
 
+        eval = Evaluation()
+        eval.set_start_time()
+        eval.set_target_uri(url)
+        eval.set_reason("bioschemas metadata validation (from conforms_to)")
+
         web_res = WebResource(url)
         kg = web_res.get_rdf()
 
@@ -656,6 +680,9 @@ def get(self):
 
         # TODO Try similarity match her for profiles that are not matched
 
+        eval.set_end_time()
+        eval.persist(source="API")
+
         return results_conformsto
 
 
@@ -667,12 +694,19 @@ def get(self):
         args = reqparse.parse_args()
         url = args["url"]
 
+        eval = Evaluation()
+        eval.set_start_time()
+        eval.set_target_uri(url)
+        eval.set_reason("bioschemas metadata validation (from types)")
+
         web_res = WebResource(url)
         kg = web_res.get_rdf()
 
         # Try to match and evaluate all found corresponding profiles
         results_type = evaluate_profile_from_type(kg)
-        print(results_type)
+
+        eval.set_end_time()
+        eval.persist(source="API")
 
         # TODO Try similarity match her for profiles that are not matched
 

From 8191151b9d5d9393c1e1ecafe307f1182597bf22 Mon Sep 17 00:00:00 2001
From: Alban Gaignard <alban.gaignard@univ-nantes.fr>
Date: Mon, 4 Dec 2023 17:32:55 +0100
Subject: [PATCH 3/7] fixed dependecny

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 818e2ced..fc645363 100644
--- a/environment.yml
+++ b/environment.yml
@@ -26,7 +26,7 @@ dependencies:
   - gitpython
   - pip
   - cachetools==5.0.0
-  - flask-restx
+  - flask-restx==1.0.3
   - flask-swagger-ui
   - numpy
   - sphinx

From 569ea5f3f594298dbc51438ff83369b416cd9ba9 Mon Sep 17 00:00:00 2001
From: Alban Gaignard <alban.gaignard@univ-nantes.fr>
Date: Mon, 4 Dec 2023 17:33:23 +0100
Subject: [PATCH 4/7] type annotations

---
 metrics/Evaluation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/metrics/Evaluation.py b/metrics/Evaluation.py
index cedf8f02..e6d3a3e4 100644
--- a/metrics/Evaluation.py
+++ b/metrics/Evaluation.py
@@ -99,15 +99,15 @@ def set_start_time(self):
     def set_end_time(self):
         self.end_time = self.get_current_time()
 
-    def set_recommendations(self, recommendation_text):
+    def set_recommendations(self, recommendation_text: str):
         self.recommendation = recommendation_text
 
     # used by FAIRMetrics, will probably be replaced by logs
-    def set_reason(self, r):
+    def set_reason(self, r: str):
         self.reason = r
 
     # used by FAIRMetrics, will probably be replaced by logs
-    def append_reason(self, r):
+    def append_reason(self, r: str):
         self.reason = self.reason + "\n" + r
 
     def set_web_resource(self, web_resource):

From ea7545c4f29655f24e8bbea0602c5f20be4dc31c Mon Sep 17 00:00:00 2001
From: Alban Gaignard <alban.gaignard@univ-nantes.fr>
Date: Mon, 4 Dec 2023 17:34:10 +0100
Subject: [PATCH 5/7] fixed logs

---
 profiles/Profile.py              | 10 +++++-----
 profiles/ProfileFactory.py       |  2 +-
 profiles/bioschemas_shape_gen.py |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/profiles/Profile.py b/profiles/Profile.py
index ca0fb929..0aa5ca4e 100755
--- a/profiles/Profile.py
+++ b/profiles/Profile.py
@@ -102,7 +102,7 @@ def gen_SHACL_from_profile(self):
 
             fc:{{shape_name}}
                 a sh:NodeShape ;
-                
+
                 {% for c in target_classes %}
                 sh:targetClass {{c}}, {{c.replace("sc:", "scs:")}} ;
                 {% endfor %}
@@ -180,8 +180,8 @@ def validate_shape(self, knowledge_graph, shacl_shape):
 
         results = results_graph.query(report_query)
         # print("VALIDATION RESULTS")
-        print(knowledge_graph.serialize(format="turtle"))
-        print(shacl_shape)
+        # print(knowledge_graph.serialize(format="turtle"))
+        # print(shacl_shape)
         # print(results_text)
         # print(conforms)
         # print(results_graph.serialize(format="turtle"))
@@ -208,8 +208,8 @@ def validate_shape(self, knowledge_graph, shacl_shape):
                     errors.append(f'{r["path"]}')
                 else:
                     errors.append(f'{r["path"]}')
-        print(errors)
-        print(warnings)
+        # print(errors)
+        # print(warnings)
         return conforms, warnings, errors
 
     def match_sub_kgs_from_profile(self, kg):
diff --git a/profiles/ProfileFactory.py b/profiles/ProfileFactory.py
index 1366ca33..39ad8be4 100644
--- a/profiles/ProfileFactory.py
+++ b/profiles/ProfileFactory.py
@@ -472,7 +472,7 @@ def evaluate_profile_with_conformsto(kg):
 
             if ct_profile is not None:
                 shacl_shape = ct_profile.get_shacl_shape()
-                print(shacl_shape)
+                # print(shacl_shape)
                 conforms, warnings, errors = ct_profile.validate_shape(
                     sub_kg, shacl_shape
                 )
diff --git a/profiles/bioschemas_shape_gen.py b/profiles/bioschemas_shape_gen.py
index b9d43c2c..4f7ef5d7 100644
--- a/profiles/bioschemas_shape_gen.py
+++ b/profiles/bioschemas_shape_gen.py
@@ -590,7 +590,7 @@ def validate_any_from_KG(kg):
         # print(o.n3(kg.namespace_manager))
         if o.n3(kg.namespace_manager) in bs_profiles.keys():
             # print()
-            # print(f"Trying to validate {s} as a(n) {o} resource")
+            print(f"Trying to validate {s} as a(n) {o} resource")
             shacl_shape, ref_profile = gen_SHACL_from_target_class(
                 o.n3(kg.namespace_manager)
             )

From fa34f40bb49d476f6744a65759d9fc3220e7c3f2 Mon Sep 17 00:00:00 2001
From: Alban Gaignard <alban.gaignard@univ-nantes.fr>
Date: Mon, 4 Dec 2023 17:35:32 +0100
Subject: [PATCH 6/7] formatting

---
 metrics/util.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/metrics/util.py b/metrics/util.py
index d2b364f9..a7a8afbe 100644
--- a/metrics/util.py
+++ b/metrics/util.py
@@ -1,5 +1,5 @@
-from time import time
-from SPARQLWrapper import SPARQLWrapper, N3
+# from time import time
+# from SPARQLWrapper import SPARQLWrapper, N3
 from rdflib import Graph, ConjunctiveGraph, URIRef
 import requests
 import metrics.statistics as stats
@@ -63,6 +63,7 @@ def __str__(self):
 # DOI regex
 regex = r"10.\d{4,9}\/[-._;()\/:A-Z0-9]+"
 
+
 # Dynamicaly generates a table with FAIR metrics implementations
 def gen_metrics():
     metrics = []
@@ -407,7 +408,6 @@ def inspect_onto_reg(kg, is_inspect_ui):
         emit("done_check", table_content)
 
     for c in table_content["classes"]:
-
         c["tag"]["OLS"] = ask_OLS(c["name"])
         if is_inspect_ui:
             emit("done_check", table_content)
@@ -430,7 +430,6 @@ def inspect_onto_reg(kg, is_inspect_ui):
             table_content["classes_false"].append(c["name"])
 
     for p in table_content["properties"]:
-
         p["tag"]["OLS"] = ask_OLS(p["name"])
         if is_inspect_ui:
             emit("done_check", table_content)
@@ -693,7 +692,6 @@ def extract_rdf_from_html(uri):
 
 
 def extruct_to_rdf(extruct_str):
-
     g = ConjunctiveGraph()
 
     for md in extruct_str["json-ld"]:

From c23c77380223350a4da63462dafe713d9651c570 Mon Sep 17 00:00:00 2001
From: Alban Gaignard <alban.gaignard@univ-nantes.fr>
Date: Thu, 7 Dec 2023 16:58:49 +0100
Subject: [PATCH 7/7] code formatting

---
 app.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app.py b/app.py
index 9d25f980..f8d16b60 100644
--- a/app.py
+++ b/app.py
@@ -628,7 +628,7 @@ def get(self):
         """Validate an RDF JSON-LD graph against Bioschemas profiles"""
         args = reqparse.parse_args()
         url = args["url"]
-        
+
         eval = Evaluation()
         eval.set_start_time()
         eval.set_target_uri(url)
@@ -637,7 +637,7 @@ def get(self):
         web_res = WebResource(url)
         kg = web_res.get_rdf()
         results = {}
-        
+
         # Evaluate only profile with conformsTo
         results_conformsto = dyn_evaluate_profile_with_conformsto(kg)