Skip to content

Commit

Permalink
Merge pull request #192 from IFB-ElixirFr/fix-http-redirect
Browse files Browse the repository at this point in the history
Fix http redirect
  • Loading branch information
albangaignard authored Apr 18, 2023
2 parents 2f2b7b2 + fe2f5d7 commit 96a5cda
Showing 1 changed file with 8 additions and 17 deletions.
25 changes: 8 additions & 17 deletions metrics/WebResource.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
import os
import re

from json.decoder import JSONDecodeError

from metrics.util import clean_kg_excluding_ns_prefix

logger = logging.getLogger("DEV")
Expand Down Expand Up @@ -104,21 +102,13 @@ def __init__(self, url, rdf_graph=None) -> None:
"dct", URIRef("http://purl.org/dc/terms/")
)

# self.kg_links_header = ConjunctiveGraph(identifier="http://webresource/links_headers")
# self.kg_auto = ConjunctiveGraph(identifier="http://webresource/auto")
# self.kg_brut = ConjunctiveGraph(identifier="http://webresource/brut")
# self.kg_links_html = ConjunctiveGraph(identifier="http://webresource/links_html")
# self.kg_html = ConjunctiveGraph(identifier="http://webresource/html")

self.init_kgs()

# b = [self.get_var_name(el) for el in kg_list]
# print(b)

if rdf_graph is None:

# get headers of the resource
response = requests.head(url)
# response = requests.head(url)
response = requests.get(url)
self.headers = response.headers
self.status_code = response.status_code

Expand Down Expand Up @@ -147,7 +137,7 @@ def __init__(self, url, rdf_graph=None) -> None:
self.url, rdf_format, self.kg_auto
)

# if no rdf found: brutforce testing each RDF formats regardless of mimetypes
# if no rdf found: brute force testing each RDF formats regardless of mimetypes
if len(self.kg_auto) == 0:
rdf_str = response.text
for rdf_format in self.RDF_MEDIA_TYPES_MAPPING.keys():
Expand Down Expand Up @@ -409,7 +399,6 @@ def get_rdf_from_mimetype_match(self, url, rdf_format, kg):
logging.debug("Getting RDF from: " + rdf_format)

kg_temp = ConjunctiveGraph()

response = requests.get(url)

if response.status_code == 200:
Expand All @@ -421,18 +410,20 @@ def get_rdf_from_mimetype_match(self, url, rdf_format, kg):
format=rdf_format,
publicID=url,
)
for s, p, o in kg_temp:
kg.add((s, p, o))

kg += kg_temp
# for s, p, o in kg_temp:
# kg.add((s, p, o))
# print("######################")
# for namespace in kg.namespaces():
# print(namespace)
logging.debug(len(kg_temp))
except Exception as err:
# if error UnicodeDecodeError execute following code, otherwise continue to next format
if type(err).__name__ == "UnicodeDecodeError":
print(err)
print("ERROR UNICODE")
kg = self.handle_unicodedecodeerror(url, kg, response)
print(len(kg))

return kg

Expand Down

0 comments on commit 96a5cda

Please sign in to comment.