SyntheticDataset2_functions.py

from helperFunctions import *
import numpy as np
import seaborn as sns
import os
import pandas as pd
import matplotlib.pyplot as plt

sns.set(font_scale=5)

def generate_synthetic_dataset2_pairwise():

    #%% Make synthetic data which correlates pairwise but not three-wise
    np.random.seed(1536)
    clusterCentres = np.asarray([[0.2,0.2],[0.5,0.8],[0.8,0.2]])
    labelsPresent = [['$C_1$','$C_2$'],['$C_1$','$C_3$'],['$C_2$','$C_3$']]

    points = np.empty(shape=(0,2))
    labels = []

    # background noise
    nBackground = 0
    for l in ['$C_1$','$C_2$','$C_3$']:
        points = np.vstack([points, np.random.rand(nBackground,2)])
        labels.extend([l for v in range(nBackground)])

    sigma = 0.05
    nPoints = 25
    for j in range(3):
        mu = clusterCentres[j]
        l = labelsPresent[j]
        points = np.vstack([points, mu + sigma*np.random.randn(2*nPoints,2)])
        labels.extend([l[0] for v in range(nPoints)])
        labels.extend([l[1] for v in range(nPoints)])


    pc_pairwise = generatePointCloud('Pairwise Correlation Only', points*1000,domain=([[0,1000],[0,1000]]))
    pc_pairwise.addLabels('Celltype', 'categorical', labels)
    
    #%% Visualise point clouds
    visualisePointCloud(pc_pairwise,'Celltype',markerSize=100)
    
    return(pc_pairwise)

def generate_synthetic_dataset2_3way():

    #%% Make synthetic data which correlates pairwise AND three-wise in the same way
    clusterCentres = np.asarray([[0.2,0.2],[0.5,0.8],[0.8,0.2]])
    labelsPresent = [['$C_1$','$C_2$','$C_3$'],['$C_1$','$C_2$','$C_3$'],['$C_1$','$C_2$','$C_3$']]

    points = np.empty(shape=(0,2))
    labels = []

    # background noise
    nBackground = 0
    for l in ['$C_1$','$C_2$','$C_3$']:
        points = np.vstack([points, np.random.rand(nBackground,2)])
        labels.extend([l for v in range(nBackground)])

    sigma = 0.05
    nPoints = 25
    for j in range(3):
        mu = clusterCentres[j]
        l = labelsPresent[j]
        points = np.vstack([points, mu + sigma*np.random.randn(3*nPoints,2)])
        labels.extend([l[0] for v in range(nPoints)])
        labels.extend([l[1] for v in range(nPoints)])
        labels.extend([l[2] for v in range(nPoints)])


    pc_threewise = generatePointCloud('Three-way correlation', points*1000,domain=([[0,1000],[0,1000]]))
    pc_threewise.addLabels('Celltype', 'categorical', labels)


    #%% Visualise point clouds
    visualisePointCloud(pc_threewise,'Celltype',markerSize=100)

    return pc_threewise


def cross_pcf__synthetic_dataset2():
    #%% Synthetic Dataset II - Pairwise cross-PCFs

    for pc in [pc_pairwise, pc_threewise]:
        plt.figure(figsize=(20,20))
        plt.gca().axhline(1,c='k',linestyle=':',lw=4)
        for cellpairs in [['$C_1$','$C_2$'],['$C_1$','$C_3$'],['$C_2$','$C_3$']]:
            maxR = 1000
            annulusStep = 10
            annulusWidth = 50
            r, pcf, contributions = pairCorrelationFunction(pc, 'Celltype', cellpairs, maxR=maxR,annulusStep=annulusStep,annulusWidth=annulusWidth)
            label = '$g_{C_'+cellpairs[0][3]+' C_'+cellpairs[1][3]+'}(r)$'
            plt.plot(r,pcf,lw=5,label=label)
            plt.xlabel('Radius, $r$ ($\mu$m)')
            plt.ylim([0,15])
            plt.xlim([0,1000])
        plt.title(pc.name)
        plt.legend()
    
    
def ncf_synthetic_dataset1():
    #%% Synthetic Dataset II - Neighbourhood correlation functions
    maxR = 300

    for pc in [pc_pairwise, pc_threewise]:
        circles, triplets = neighbourhoodCorrelationFunction(pc,'Celltype',['$C_1$','$C_2$','$C_3$'],maxR=maxR)
        order = np.arange(len(circles))
        np.random.shuffle(order)


        drawCircles = False
        if drawCircles:
            visualisePointCloud(pc,'Celltype',markerSize=100)
            for i in range(len(order)):
                circle = circles[order[i]]
                col = plt.cm.plasma(circle[2]/maxR)
                ec = [v for v in col]
                ec[3] = 0.25
                circle = plt.Circle((circle[0], circle[1]), circle[2], ec=col, fc=[0,0,0,0],zorder=-1)
                plt.gca().add_patch(circle)

            plt.gca().axis('equal')

        circles = np.asarray(circles)

        # Use bootstrapping to get predicted number of circles under CSR
        nA = 2*nPoints + nBackground
        nB = 2*nPoints + nBackground
        nC = 2*nPoints + nBackground

        redoBoostrap = False
        # Set this flag to true if you want to regenerate the distribution under CSR 
        # Otherwise we load in precalculated values for speed
        if redoBoostrap:
            bootstrappedRadii = []
            nBootstrap = 1000000
            for i in range(nBootstrap):
                if i % 10000 == 0:
                    print(i)
                points_temp = np.random.rand(3,2)
                pc_temp = generatePointCloud('temp',points_temp,domain=[[0,1],[0,1]])
                pc_temp.addLabels('Celltype','categorical',['$C_1$','$C_2$','$C_3$'])
                circles_temp, triplets_temp = neighbourhoodCorrelationFunction(pc_temp,'Celltype',['$C_1$','$C_2$','$C_3$'],maxR=2)
                bootstrappedRadii.append(circles_temp[0][2])
            bootstrappedRadii = [v*1000 for v in bootstrappedRadii] # As this was generated in mm, not mu m

            vals_bootstrap, rs = np.histogram(bootstrappedRadii,bins=bins)
            vals_bootstrap = nA*nB*nC*vals_bootstrap/nBootstrap

        else:
            vals_observed, rs = np.histogram(circles,bins=bins)
            vals_bootstrap = np.asarray([0.375,1.375,4.875,17.625,39.25,65.875,103.75,143.75,208.875,276.5,364.625,464,578.25,713.75,856.125,997.625,1151.5,1325.88,1489.88,1684.62,1875.88,2075.88,2280.38,2495.5,2682.62,2887.62,3072.5,3305.75,3487.88,3617])

        plt.figure(figsize=(18,18))
        plt.plot(rs[1:],vals_bootstrap,label='Expectation (CSR)',lw=5)
        plt.plot(rs[1:],vals_observed,label='Observation',lw=5)
        plt.xlabel('$r$ (mm)')
        plt.ylabel('Number')
        plt.legend()
        plt.title(pc.name)

        plt.figure(figsize=(18,18))
        plt.plot(rs[1:],vals_observed/vals_bootstrap,lw=5)
        plt.gca().axhline(1,c='k',linestyle=':',lw=5)
        plt.xlabel('$r$ ($\mu$m)')
        plt.ylabel('NCF$_{C_1 C_2 C_3}(r)$')
        plt.title(pc.name)