Skip to content

Commit

Permalink
feat: add threshold for clustering
Browse files Browse the repository at this point in the history
  • Loading branch information
ObservedObserver committed Jan 13, 2020
1 parent 464e7bb commit 4cdaac3
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 14 deletions.
4 changes: 2 additions & 2 deletions packages/frontend/src/workers/cluster.worker.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* eslint no-restricted-globals: 0 */
import { kruskalMST } from 'visual-insights';

const PearsonThreshold = 0.5;
function sum (arr) {
let ans = 0;
let len = arr.length;
Expand All @@ -15,7 +15,7 @@ const cluster = (e) => {
const { spaces, maxGroupNumber } = e.data;
let result = [];
for (let space of spaces) {
const { edgesInMST, groups } = kruskalMST(space.matrix, maxGroupNumber);
const { edgesInMST, groups } = kruskalMST(space.matrix, maxGroupNumber, PearsonThreshold);
let measureGroups = new Map();
for (let i = 0; i < groups.length; i++) {
if (!measureGroups.has(groups[i])) {
Expand Down
10 changes: 7 additions & 3 deletions packages/visual-insights/src/dashboard/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import cluster from "../insights/cluster";
import aggregate from 'cube-core';
import { normalize, entropy } from "../impurityMeasure";
import { crammersV } from './utils';
import { CrammersVThreshold, PearsonCorrelation } from '../insights/config';

interface DashBoardSpace {
dimensions: string[];
Expand Down Expand Up @@ -48,7 +49,8 @@ export function getDashBoardSubspace (dataSource: DataSource, dimensions: string
const measureGroups = cluster({
matrix: correlationMatrix,
measures,
groupMaxSize: Math.round(measures.length / 6) // todo: make a config: max 6 measures in a dashboard
groupMaxSize: Math.round(measures.length / 6), // todo: make a config: max 6 measures in a dashboard
threshold: PearsonCorrelation.weak
})

const dimCorrelationMatrix = dimensions.map(d => dimensions.map(d => 0));
Expand Down Expand Up @@ -119,7 +121,8 @@ export function getDashBoardView (dashBoardSpace: DashBoardSpace, dataSource: Da
const measureGroups = cluster({
matrix: dashBoardSpace.correlationMatrix,
measures: measures,
groupMaxSize: Math.round(measures.length / 3) // todo: make a config: max 3 measures in a chart
groupMaxSize: Math.round(measures.length / 3), // todo: make a config: max 3 measures in a chart
threshold: PearsonCorrelation.strong
});
for (let group of measureGroups) {
const meaIndexList = group.map(mea => measures.indexOf(mea))
Expand All @@ -145,7 +148,8 @@ export function getDashBoardView (dashBoardSpace: DashBoardSpace, dataSource: Da
matrix: dimensionCorrelationMatrix,
measures: dimensions,
groupMaxSize: 2, // todo: make a config: max 2 dimensions in a chart
limitSize: true
limitSize: true,
threshold: CrammersVThreshold
})

const dimGroupEntropyMatrix = getEntropyMatrix(dimensionGroups, measures, dataSource);
Expand Down
13 changes: 7 additions & 6 deletions packages/visual-insights/src/insights/cluster.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ function unionWithEffect (parents: number[], sizes: number[], n1: number, n2: nu
* @param matrix adjmatrix
* @param groupNumber number of group generated by clustering
*/
function kruskal(matrix: number[][], groupNumber: number): Map<number, number[]> {
function kruskal(matrix: number[][], groupNumber: number, threshold: number | undefined = 0): Map<number, number[]> {
const edges = turnAdjMatrix2List(matrix);
edges.sort((a, b) => b[1] - a[1]);
const parents = matrix.map((m, i) => i);
Expand All @@ -68,7 +68,7 @@ function kruskal(matrix: number[][], groupNumber: number): Map<number, number[]>
parents[i] = find(parents, i)
}
let set = new Set(parents);
if (set.size <= groupNumber){
if (set.size <= groupNumber || edge[1] < threshold){
break;
}
}
Expand Down Expand Up @@ -158,7 +158,7 @@ export function kruskalMSTWithLimitSize(matrix: number[][], limitSize: number =
* @param matrix
* @param groupNumber number of group generated by clustering
*/
export function kruskalMST(matrix: number[][], groupNumber: number = 4) {
export function kruskalMST(matrix: number[][], groupNumber: number = 4, threshold: number | undefined = 0) {
const edges = turnAdjMatrix2List(matrix);
edges.sort((a, b) => b[1] - a[1]);

Expand All @@ -179,7 +179,7 @@ export function kruskalMST(matrix: number[][], groupNumber: number = 4) {
let set = new Set(parents);
// TODO:
// + use kruskalMST instead of kruskal.
if (set.size <= groupNumber) {
if (set.size <= groupNumber || edge[1] < threshold) {
inCutEdge = true;
} else {
groups = [...parents]
Expand All @@ -198,15 +198,16 @@ interface ClusterProps {
method?: string;
groupMaxSize?: number;
limitSize?: boolean;
threshold?: number;
}

function cluster ({ matrix, measures, method = 'kruskal', groupMaxSize = 4, limitSize = false }: ClusterProps): string[][] {
function cluster ({ matrix, measures, method = 'kruskal', groupMaxSize = 4, limitSize = false, threshold = 0 }: ClusterProps): string[][] {
// const groups = kruskal({ matrix, groupMaxSize });
let groups;
if (limitSize) {
groups = kruskalWithLimitSize(matrix, groupMaxSize)
} else {
groups = kruskal(matrix, groupMaxSize)
groups = kruskal(matrix, groupMaxSize, threshold)
}

let ans: string[][] = [];
Expand Down
9 changes: 8 additions & 1 deletion packages/visual-insights/src/insights/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,11 @@ export const Depth = 4;

export const VisualLimit = 8;

export const TopKPercentField = 0.8;
export const TopKPercentField = 0.8;

export const CrammersVThreshold = 0.3;

export const PearsonCorrelation = {
strong: 0.5,
weak: 0.3
};
6 changes: 4 additions & 2 deletions packages/visual-insights/src/insights/impurity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import aggregate from 'cube-core';
import { entropy, normalize } from '../impurityMeasure';
import { DataSource, OperatorType } from '../commonTypes';
import { crammersV } from '../dashboard/utils';
import { CrammersVThreshold } from './config';
import cluster from './cluster';
// insights like outlier and trend both request high impurity of dimension.
const maxVisualChannel = 8;
Expand Down Expand Up @@ -44,7 +45,8 @@ export function getDimSetsBasedOnClusterGroups(dataSource: DataSource, dimension
let groups: string[][] = cluster({
matrix: dimCorrelationMatrix,
measures: dimensions,
groupMaxSize: Math.round(dimensions.length / maxDimNumberInView)
groupMaxSize: Math.round(dimensions.length / maxDimNumberInView),
threshold: CrammersVThreshold
});
// todo: maybe a threhold would be better ?
for (let group of groups) {
Expand Down Expand Up @@ -79,7 +81,7 @@ export function correlation(dataSource: DataSource, fieldX: string, fieldY: stri
export type FieldsFeature = [string[], any, number[][]];
function analysisDimensions(dataSource: DataSource, dimensions: string[], measures: string[], operator: OperatorType | undefined = 'sum'): FieldsFeature[] {
let impurityList: FieldsFeature[] = [];
let dimSet = getCombination(dimensions)
let dimSet = getDimSetsBasedOnClusterGroups(dataSource, dimensions);
for (let dset of dimSet) {
let impurity = {};
let aggData = aggregate({
Expand Down

0 comments on commit 4cdaac3

Please sign in to comment.