Skip to content

Commit

Permalink
fix default entity mapping
Browse files Browse the repository at this point in the history
  • Loading branch information
ShubhamVashisth7 committed Jun 10, 2022
1 parent fb85f36 commit b02bf8b
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
11 changes: 11 additions & 0 deletions feature_discovery/src/api/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,17 @@ def detect_entities(config):
return execute_query(config, query)


def get_number_of_relations(config, column_id: str):
query = """
SELECT (COUNT(?relation) as ?Number_of_relations)
WHERE
{
<<<%s> ?relation ?column_id>> data:withCertainty ?Score.
}
""" % column_id
return execute_query(config, query, return_type='json')


def get_pkfk_relations(config):
query = """
# SELECT DISTINCT ?Primary_table ?Primary_column ?Foreign_table ?Foreign_column ?Pkfk_score (?Distinct_values/?Total_values as ?Primary_key_uniqueness_ratio) ?Primary_table_id ?Primary_column_id ?Foreign_table_id ?Foreign_column_id
Expand Down
30 changes: 25 additions & 5 deletions feature_discovery/src/graph_builder/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,33 @@ def __dump_triples(self):

def __annotate_default_entity(self, table_id):
triple_format = '<{}> <{}> <{}>'
if len(self.default_entities) == 1:
if len(self.default_entities) == 1: # table with single entity detected
column_id = list(self.default_entities.keys())[0]
uniqueness_ratio = self.default_entities.get(column_id)

self.triples.add('<<' + triple_format.format(table_id, self.ontology.get('kgfarm') + 'hasDefaultEntity',
column_id) + '>> <' + self.ontology.get(
'kgfarm') + 'confidence>' + ' "{}"^^xsd:double.'.format(str(uniqueness_ratio)))
else: # table with multiple entities detected
uniqueness_ratios = list(self.default_entities.values())
uniqueness_ratio = max(uniqueness_ratios)
if uniqueness_ratios.count(uniqueness_ratio) == 1: # table with single maximum entity
column_id = list(self.default_entities.keys())[list(self.default_entities.values()) \
.index(uniqueness_ratio)]
else: # table with multiple entities having equal uniqueness ratio
candidate_column_ids = set()
max_number_of_relations = 0
column_id = None
for candidate_column_id, uniqueness in self.default_entities.items():
if uniqueness == uniqueness_ratio:
candidate_column_ids.add(candidate_column_id)
n_relations = int(get_number_of_relations(self.config,
candidate_column_id)[0]['Number_of_relations'][
'value'])
if max_number_of_relations < n_relations:
column_id = candidate_column_id
max_number_of_relations = n_relations

self.triples.add('<<' + triple_format.format(table_id, self.ontology.get('kgfarm') + 'hasDefaultEntity',
column_id) + '>> <' + self.ontology.get(
'kgfarm') + 'confidence>' + ' "{}"^^xsd:double.'.format(str(uniqueness_ratio)))

def __annotate_entity_and_feature_view_mapping(self, column_id, entity_name, table_id, uniqueness_ratio):
triple_format = '<{}> <{}> <{}>'
Expand Down Expand Up @@ -74,7 +94,7 @@ def annotate_entity_mapping(self):
uniqueness_ratio = entity_info['Primary_key_uniqueness_ratio']

if table_id != table_to_process:
self.__annotate_default_entity(table_id)
self.__annotate_default_entity(table_to_process)
table_to_process = table_id
self.default_entities = {}

Expand Down

0 comments on commit b02bf8b

Please sign in to comment.