-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpKLDvr.m
59 lines (47 loc) · 1.66 KB
/
pKLDvr.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
function y=pKLDvr(X1,X2,nQ);
%function y=pKLDvr(X1,X2,nQ);
%
% X1 and X2 are random samples from two distributions for comparison
% nQ is the number of quantiles to use to partition the joint sample
%
% X1 is anticipated to be an array size n x 1 (i.e. a set of observed data)
% X2 is anticipated to be another sample size m x 1 (i.e. another set of observed data)
% or a set of samples (possibly simulated from the same fitted model) of size m x p
%
% KL divergence is calculated for (sub)sets of equal maximum size between X1 and each column of X2
% The mean KL divergence is then output
if nargin==0;
n=1000;
p=100;
X1=randn(n,1);
X2=randn(n,p);
nQ=10;
end;
n1=size(X1,1);
[n2,p]=size(X2);
% Use the minimum of n1 and n2 for KLD calculations
n=min(n1,n2);
% Loop over the p samples from X2, calculate KLD between X1 and X2(:,j)
KL=nan(p,1);
for j=1:p;
% Randomly resample X1 and X2(:,j) with replacement for sample size n
tX1=randsample(X1,n,1); % Third input imposes resampling with replacement
tX2=randsample(X2(:,j),n,1); % Third input imposes resampling with replacement
% Find quantile levels
Edg=quantile([tX1;tX2],linspace(0,1,nQ+1))';
Edg=Edg(2:end); % Omit smallest quantile at cdf=0
% Estimate empirical cdf
F1=sum(tX1*ones(1,nQ)<=ones(n,1)*Edg')'/n;
F2=sum(tX2*ones(1,nQ)<=ones(n,1)*Edg')'/n;
% Symmetric KLD
KL(j)=0.5*sum(F1.*log(F1./F2)+F2.*log(F2./F1));
% Flag NaN output
if isnan(KL(j))==1;
fprintf(1,'pKLDvr returning NaN. Consider reducing number of quantiles\n');
y=NaN;
return;
end;
end;
% Report mean KL over p samples from X2
y=mean(KL);
return;