-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathqueryProcessor.py
71 lines (62 loc) · 2.9 KB
/
queryProcessor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
from processor import Processor
from sqlite3 import connect
from pandas import read_sql, read_sql_query
from sparql_dataframe import get
class QueryProcessor(Processor):
def __init__(self):
super().__init__()
def getEntityById(self, entityId: str):
# return data frame for a specific identifier
# reading data from sqlite or blazegraph
# for json do we need to wrap the .. for RDF triplestore??
path = self.getDbPathOrUrl()
if os.path.isfile(path) or path.endswith(".db"):
with connect(self.dbPathOrUrl) as con:
query = f"""
SELECT
EntitiesWithMetadata.*,
Annotations.*,
Images.*,
TRIM(Creators.creator_name) AS creator_name,
CASE
WHEN EntitiesWithMetadata.id = '{entityId}' THEN 'EntitiesWithMetadata'
WHEN Annotations.annotation_ids = '{entityId}' THEN 'Annotations'
WHEN Images.image_ids = '{entityId}' THEN 'Images'
ELSE NULL
END AS entityId_table
FROM EntitiesWithMetadata
LEFT JOIN Annotations ON EntitiesWithMetadata.metadata_internal_id = Annotations.annotation_targets
LEFT JOIN Images ON Annotations.annotation_bodies = Images.images_internal_id
LEFT JOIN Creators ON EntitiesWithMetadata.creator = Creators.creator_internal_id
WHERE EntitiesWithMetadata.id = '{entityId}' OR Annotations.annotation_ids = '{entityId}' OR Images.image_ids = '{entityId}'
"""
df = read_sql(query, con)
return df
elif path.startswith("https:") or path.startswith("http:"):
endpoint = self.getDbPathOrUrl()
query = (
(
"""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX pomodoro: <https://github.com/datasci2023/datascience/class/>
PREFIX feslegen: <https://github.com/datasci2023/datascience/attribute/>
PREFIX spaghetti: <https://github.com/datasci2023/datascience/relation/>
SELECT ?id ?label ?items (strafter(str(?t), 'class/') AS ?type)
WHERE {
?id rdf:type ?t ;
rdfs:label ?label .
OPTIONAL {?id spaghetti:items ?items}
FILTER (?id = <%s>)
}
"""
)
% entityId
)
df_sparql = get(endpoint, query, True)
return df_sparql
else:
print("Error!!!")
df_sparql = get(endpoint, query, True)
return df_sparql