Skip to content

Commit

Permalink
Merge pull request #209 from IFB-ElixirFr/better-stats
Browse files Browse the repository at this point in the history
Keep track of metadata harvesting and validation in stats
  • Loading branch information
albangaignard authored Dec 7, 2023
2 parents 7e857bf + c23c773 commit 3b31bf3
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 23 deletions.
42 changes: 38 additions & 4 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
from metrics import test_metric
from metrics.FAIRMetricsFactory import FAIRMetricsFactory
from metrics.WebResource import WebResource
from metrics.Evaluation import Result
from metrics.Evaluation import Result, Evaluation
from profiles.bioschemas_shape_gen import validate_any_from_KG
from profiles.bioschemas_shape_gen import validate_any_from_microdata
from metrics.util import SOURCE, inspect_onto_reg
Expand Down Expand Up @@ -502,9 +502,21 @@ def get(self):
args = reqparse.parse_args()
url = args["url"]

eval = Evaluation()
eval.set_start_time()
eval.set_target_uri(url)
eval.set_reason("metadata harvesting, success score == metadata size")

web_res = WebResource(url)
data_str = web_res.get_rdf().serialize(format="json-ld")
kg = web_res.get_rdf()
size = len(kg)
data_str = kg.serialize(format="json-ld")
data_json = json.loads(data_str)

eval.set_score(size)
eval.set_end_time()
eval.persist(source="API")

return data_json


Expand Down Expand Up @@ -617,6 +629,11 @@ def get(self):
args = reqparse.parse_args()
url = args["url"]

eval = Evaluation()
eval.set_start_time()
eval.set_target_uri(url)
eval.set_reason("bioschemas metadata validation")

web_res = WebResource(url)
kg = web_res.get_rdf()
results = {}
Expand All @@ -626,7 +643,6 @@ def get(self):

# Try to match and evaluate all found corresponding profiles
results_type = evaluate_profile_from_type(kg)
print(results_type)

for result_key in results_conformsto.keys():
results[result_key] = results_conformsto[result_key]
Expand All @@ -635,6 +651,9 @@ def get(self):
if result_key not in results:
results[result_key] = results_type[result_key]

eval.set_end_time()
eval.persist(source="API")

# TODO Try similarity match her for profiles that are not matched

return results
Expand All @@ -648,6 +667,11 @@ def get(self):
args = reqparse.parse_args()
url = args["url"]

eval = Evaluation()
eval.set_start_time()
eval.set_target_uri(url)
eval.set_reason("bioschemas metadata validation (from conforms_to)")

web_res = WebResource(url)
kg = web_res.get_rdf()

Expand All @@ -656,6 +680,9 @@ def get(self):

# TODO Try similarity match her for profiles that are not matched

eval.set_end_time()
eval.persist(source="API")

return results_conformsto


Expand All @@ -667,12 +694,19 @@ def get(self):
args = reqparse.parse_args()
url = args["url"]

eval = Evaluation()
eval.set_start_time()
eval.set_target_uri(url)
eval.set_reason("bioschemas metadata validation (from types)")

web_res = WebResource(url)
kg = web_res.get_rdf()

# Try to match and evaluate all found corresponding profiles
results_type = evaluate_profile_from_type(kg)
print(results_type)

eval.set_end_time()
eval.persist(source="API")

# TODO Try similarity match her for profiles that are not matched

Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ dependencies:
- gitpython
- pip
- cachetools==5.0.0
- flask-restx
- flask-restx==1.0.3
- flask-swagger-ui
- numpy
- sphinx
Expand Down
6 changes: 3 additions & 3 deletions metrics/Evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,15 @@ def set_start_time(self):
def set_end_time(self):
self.end_time = self.get_current_time()

def set_recommendations(self, recommendation_text):
def set_recommendations(self, recommendation_text: str):
self.recommendation = recommendation_text

# used by FAIRMetrics, will probably be replaced by logs
def set_reason(self, r):
def set_reason(self, r: str):
self.reason = r

# used by FAIRMetrics, will probably be replaced by logs
def append_reason(self, r):
def append_reason(self, r: str):
self.reason = self.reason + "\n" + r

def set_web_resource(self, web_resource):
Expand Down
8 changes: 3 additions & 5 deletions metrics/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from time import time
from SPARQLWrapper import SPARQLWrapper, N3
# from time import time
# from SPARQLWrapper import SPARQLWrapper, N3
from rdflib import Graph, ConjunctiveGraph, URIRef
import requests
import metrics.statistics as stats
Expand Down Expand Up @@ -63,6 +63,7 @@ def __str__(self):
# DOI regex
regex = r"10.\d{4,9}\/[-._;()\/:A-Z0-9]+"


# Dynamicaly generates a table with FAIR metrics implementations
def gen_metrics():
metrics = []
Expand Down Expand Up @@ -407,7 +408,6 @@ def inspect_onto_reg(kg, is_inspect_ui):
emit("done_check", table_content)

for c in table_content["classes"]:

c["tag"]["OLS"] = ask_OLS(c["name"])
if is_inspect_ui:
emit("done_check", table_content)
Expand All @@ -430,7 +430,6 @@ def inspect_onto_reg(kg, is_inspect_ui):
table_content["classes_false"].append(c["name"])

for p in table_content["properties"]:

p["tag"]["OLS"] = ask_OLS(p["name"])
if is_inspect_ui:
emit("done_check", table_content)
Expand Down Expand Up @@ -693,7 +692,6 @@ def extract_rdf_from_html(uri):


def extruct_to_rdf(extruct_str):

g = ConjunctiveGraph()

for md in extruct_str["json-ld"]:
Expand Down
10 changes: 5 additions & 5 deletions profiles/Profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def gen_SHACL_from_profile(self):
fc:{{shape_name}}
a sh:NodeShape ;
{% for c in target_classes %}
sh:targetClass {{c}}, {{c.replace("sc:", "scs:")}} ;
{% endfor %}
Expand Down Expand Up @@ -180,8 +180,8 @@ def validate_shape(self, knowledge_graph, shacl_shape):

results = results_graph.query(report_query)
# print("VALIDATION RESULTS")
print(knowledge_graph.serialize(format="turtle"))
print(shacl_shape)
# print(knowledge_graph.serialize(format="turtle"))
# print(shacl_shape)
# print(results_text)
# print(conforms)
# print(results_graph.serialize(format="turtle"))
Expand All @@ -208,8 +208,8 @@ def validate_shape(self, knowledge_graph, shacl_shape):
errors.append(f'{r["path"]}')
else:
errors.append(f'{r["path"]}')
print(errors)
print(warnings)
# print(errors)
# print(warnings)
return conforms, warnings, errors

def match_sub_kgs_from_profile(self, kg):
Expand Down
2 changes: 1 addition & 1 deletion profiles/ProfileFactory.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ def evaluate_profile_with_conformsto(kg):

if ct_profile is not None:
shacl_shape = ct_profile.get_shacl_shape()
print(shacl_shape)
# print(shacl_shape)
conforms, warnings, errors = ct_profile.validate_shape(
sub_kg, shacl_shape
)
Expand Down
2 changes: 1 addition & 1 deletion profiles/bioschemas_shape_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ def validate_any_from_KG(kg):
# print(o.n3(kg.namespace_manager))
if o.n3(kg.namespace_manager) in bs_profiles.keys():
# print()
# print(f"Trying to validate {s} as a(n) {o} resource")
print(f"Trying to validate {s} as a(n) {o} resource")
shacl_shape, ref_profile = gen_SHACL_from_target_class(
o.n3(kg.namespace_manager)
)
Expand Down
8 changes: 5 additions & 3 deletions tests/test_web_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,12 @@ def test_turtle(self):
# self.assertEqual(60, len(turtle_WR.get_rdf()))

def test_MassBank(self):
mb = WebResource("https://massbank.eu/MassBank/RecordDisplay?id=MSBNK-RIKEN_IMS-LQB00001")
mb = WebResource(
"https://massbank.eu/MassBank/RecordDisplay?id=MSBNK-RIKEN_IMS-LQB00001"
)
kg = mb.get_rdf()
#print(kg.serialize(format="turtle"))
#logging.info(f"{len(kg)} loaded RDF triples")
# print(kg.serialize(format="turtle"))
# logging.info(f"{len(kg)} loaded RDF triples")
self.assertGreater(len(kg), 70)

def test_n3(self):
Expand Down

0 comments on commit 3b31bf3

Please sign in to comment.