-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathdbpedia_rules.pl
131 lines (110 loc) · 3.81 KB
/
dbpedia_rules.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
:- use_module(library(tabling)).
:- use_module(library(sparqlprog/emulate_builtins)).
%! band(?B) is nondet
%
% true if B is a band
band(B) :- rdf(B,rdf:type,dbont:'Band').
%! has_shared_band_member(?B1, ?B2, ?SharedMember) is nondet
%
% links bands by members in common. E.g. Ronnie James Dio in both Rainbow and Black Sabbath
%
% true if SharedMember is a member of both B1 and B2 (and B1 and B2 are distinct)
%
has_shared_band_member(B1,B2,A) :-
rdf(A,dbont:associatedBand,B1),
rdf(A,dbont:associatedBand,B2),
B1\=B2.
%! has_shared_band_genre(?B1, ?B2, ?SharedGenre) is nondet
%
% links bands by shared genre common.
%
% true if SharedGenre is a genre of both B1 and B2 (and B1 and B2 are distinct)
%
has_shared_genre(B1,B2,A) :-
rdf(B1,dbont:genre,A),
rdf(B2,dbont:genre,A),
B1\=B2.
genre_pair(G1,G2,A) :-
rdf(A,dbont:genre,G1),
rdf(A,dbont:genre,G2),
G1\=G2.
%! similarity_by_genre(?BandA, ?BandB, ?SumIC)
%
% calculates the jaccard similarity between two entities based on genres in common
%
% the entities should be of similar types (e.g. two bands, or two books)
%
% ==
% | genres(A) /\ genres(B) | / | genres(A) \/ genres(B) |
% ==
%
% if no genres are in common, then this should equal 0
% if all genres are in common, then this should equal 1
%
% note this does not take into account how *meaningful* it is for a genre to be shared;
% e.g. sharing the common genre 'pop' counts as much as a rarer genre like 'psytrance'.
% see further on for IC-based metrics.
%
similarity_by_genre(A,B,Sim) :-
get_all_genres(A,SA),
get_all_genres(B,SB),
jaccard(SA,SB,Sim).
jaccard(SA,SB,Sim) :-
ord_intersection(SA,SB,I),
ord_union(SA,SB,U),
length(I,NI),
length(U,NU),
Sim is NI/NU.
get_all_genres(Entity,L) :-
service_query_all(dbpedia,G,rdf(Entity,dbont:genre,G),L).
:- table get_num_bands/1.
%! get_num_bands(?Count) is det
%
% unifies Count with the total number of bands in the database
%
% note this is tabled (cached) so that repeated calls do not invoke new SPARQL queries
%
get_num_bands(Count) :-
??(dbpedia,num_bands(Count)).
num_bands(Count) :-
aggregate(count(distinct(B)),band(B),Count).
%! get_genre_num_bands(?Genre,?Count) is nondet.
%! get_genre_num_bands(+Genre,?Count) is det.
%
% unifies Count with the total number of bands that are categorized as Genre
%
%
%%%%:- table get_genre_num_bands/2.
get_genre_num_bands(G,Count) :-
??(dbpedia,genre_num_bands(G,Count)).
genre_num_bands(G,Count) :-
aggregate_group(count(distinct(B)),[G],(rdf(B,dbont:genre,G),band(B)),Count).
%! pair_genre_sum_ic(?BandA, ?BandB, ?SumIC)
%
% for a pair of bands, SumIC is the sum of the ICs of the genres shared in common.
%
%
% Example: =pair_genre_ic(dbr:'Metallica', dbr:'Megadeth', IC)=
pair_genre_sum_ic(A,B,SumIC) :-
get_all_genres(A,SA),
??(dbpedia,(band(B),has_shared_genre(A,B,_))),
get_all_genres(B,SB),
ord_intersection(SA,SB,I),
debug(dbpedia,'~w vs ~w :: INTERSECTION(~w + ~w) = ~w',[A,B,SA,SB,I]),
aggregate(sum(IC),G^(member(G,I),genre_ic(G,IC)),SumIC).
%! genre_ic(?Genre, ?InformationContent:float) is nondet.
%
% gets the IC of a particular genre. The higher the IC, the rarer and more 'surprising' or information-rich it is.
%
% for example, many bands are pop, so this would have a low IC. Progressive sludge metal is relatively rare and would have a high IC
%
% ==
% InformationContent = -log2( Pr(Genre) )
% ==
%
genre_ic(G,IC) :-
get_genre_num_bands(G,Count),
debug(dbpedia,'|bands| in ~w = ~w',[G,Count]),
get_num_bands(Total),
debug(dbpedia,'Total bands = ~w',[Total]),
seval(-log(Count/Total)/log(2), IC).