diff --git a/MVPA/stimulusSelection.py b/MVPA/stimulusSelection.py index e18442a..cda2f1b 100644 --- a/MVPA/stimulusSelection.py +++ b/MVPA/stimulusSelection.py @@ -1,40 +1,48 @@ # -*- coding: utf-8 -*- """ GA based stimulus selector -Creates a population of wowasaurus rex -3 chromosomes, each with 10 'proteins' -Permutation based CX and mutation (preserves uniqueness) - + @ Created on Sat Feb 21 13:10:50 2015 @author: Calvin Leather + +To do- +Crossover algorithm is somehow producing a few repeats +Consider single-item/hetero 'buddy' term in fitness function +Stick with 2pop ks test or use JS divergence? Or its square? + """ -#%% Imorts + +#%% Imports import numpy as np import pandas as pd from deap import base, creator, tools -import random, operator -from sklearn.neighbors.kde import KernelDensity import matplotlib.pyplot as plt -from scipy.stats import mstats +from scipy.stats import kstest, ks_2samp from scoop import futures +import random, operator, seaborn + print 'defining variables' # Three col CSV (Item-Code, Option-Type, Value) -csv_filepath='/home/brain/Desktop/Custom MVPA Workflow/SampleData.csv' +csv_filepath='/home/brain/Desktop/Custom MVPA Workflow/options.csv' #%% Magic Numbers -population_size=20 - -# SALT +nepochs, ngen, npop, cxpb, mutpb = 4,60,250, 0.4, 0.2 +HOFsize=1 #number of individuals to put in each epoc +HallOfFame=[] -n_single=20 #1 +SID='a03' +n_single=20 #1 n_hetero=15 #2 n_homo=22 #3 n_genome=n_single+n_hetero+n_homo -n_target=10 #Desired number in each catagory (maybe make this 11 options for the single items) +n_target=10 #Desired number in each catagory +chromosomeDict={0:n_single, 1:n_hetero, 2:n_homo} + +random.seed() -#%%"""===========define fitness and functions=================""" +#%%===========define fitness and functions=================%%# print 'defining functions' @@ -42,20 +50,26 @@ def evalMax(individual): indiv=dictionaryLookup(individual) - rangeCost=np.power((np.ptp(indiv[0])+np.ptp(indiv[1])+np.ptp(indiv[2])), 2) - distanceCost=0 - normalityCost=mstats.normaltest(indiv[0])[1]+mstats.normaltest(indiv[1])[1]+mstats.normaltest(indiv[2])[1] - cost=400*rangeCost+distanceCost+100*normalityCost + similarityCost= 15/np.sum(np.in1d(individual[0][0],individual[0][1])) + rangeCost=1/(np.ptp(indiv[0])+np.ptp(indiv[1])+np.ptp(indiv[2])) + uniformCost=1/(kstest(indiv[0],'uniform')[1]+kstest(indiv[1],'uniform')[1]+kstest(indiv[2],'uniform')[1]+.00001) + distanceCost=1/(ks_2samp(indiv[0], indiv[1])[1]+ks_2samp(indiv[1], indiv[2])[1]+ks_2samp(indiv[2], indiv[0])[1]) + cost=rangeCost+1.5*uniformCost+distanceCost+similarityCost return (cost,) -# Creates the initial generation -def createIndividual(species): +# Creates the initial generation +def createIndividual(): #Creates an individual with unique sample - return [random.sample(range(n_single),n_target), - random.sample(range(n_hetero),n_target), - random.sample(range(n_homo), n_target)] + x=[np.random.choice(valueDictionary[1].keys(),n_target+1), + np.random.choice(valueDictionary[2].keys(),n_target), + np.random.choice(valueDictionary[3].keys(),n_target)] + + return[np.random.choice(valueDictionary[1].keys(),n_target+1), + np.random.choice(valueDictionary[2].keys(),n_target), + np.random.choice(valueDictionary[3].keys(),n_target)] -# Crossover algo + +# Crossover algorithm def nonReplicatingCross(ind1, ind2): chromosomeNumber = random.randint(0,2) indLength = len(ind1[chromosomeNumber]) @@ -64,18 +78,21 @@ def nonReplicatingCross(ind1, ind2): child2 = np.zeros(indLength) child1[0:cxpoint]=ind2[chromosomeNumber][0:cxpoint] child2[0:cxpoint]=ind1[chromosomeNumber][0:cxpoint] - index1=0 - index2=0 + index1, index2=0,0 + cont1, cont2=1,1 for item1 in ind1[chromosomeNumber]: if cxpoint+index1==indLength: break try: np.where(child1==item1)[0][0] + continue except IndexError: child1[cxpoint+index1]= item1 index1=index1+1 - - + cont1==0 + x=np.where(child1==0)[0] + for i in x: + child1[i]=np.random.choice(valueDictionary[chromosomeNumber+1].keys()) for item2 in ind2[chromosomeNumber]: if cxpoint+index2==indLength: break @@ -84,59 +101,72 @@ def nonReplicatingCross(ind1, ind2): except IndexError: child2[cxpoint+index2]= item2 index2=index2+1 + cont2==0 + x=np.where(child2==0)[0] + for i in x: + child2[i]=np.random.choice(valueDictionary[chromosomeNumber+1].keys()) ind1[chromosomeNumber]=child1 ind2[chromosomeNumber]=child2 - index1=0 - index2=0 - + index1, index2=0,0 + + return ind1, ind2 - -# Mutation fucntion + +#Mutation algorithm def nonReplicatingMutate(ind,indpb): + ind=np.asarray(ind) - for i in range(1,len(ind[0])): - if random.random() < indpb: - working = True - while working: - randomN=random.randint(0,n_single) - try: - np.where(ind[0]==randomN)[0][0] - except IndexError: - ind[0][i]=randomN - working=False - for i in range(1,len(ind[1])): - if random.random() < indpb: - working = True - while working: - randomN=random.randint(0,n_hetero) - try: - np.where(ind[1]==randomN)[0][0] - except IndexError: - ind[1][i]=randomN - working=False - for i in range(1,len(ind[2])): - if random.random() < indpb: - working = True - while working: - randomN=random.randint(0,n_homo) - try: - np.where(ind[2]==randomN)[0][0] - except IndexError: - ind[2][i]=randomN - working=False + for chro in range(0,3): + for i in range(1,len(ind[chro])): + if random.random() < indpb: + working = True + while working: + randomN=np.random.choice(valueDictionary[chro+1].keys()) + if randomN in valueDictionary[chro+1].keys()==0: + print 'noooooooo' + try: + np.where(ind[0]==randomN)[0][0] + except IndexError: + ind[chro][i]=randomN + working=False return ind del ind - +#Maps genotype onto phenotype (item number onto value) def dictionaryLookup(individual): - indiv=np.ndarray(shape=(3,10), dtype=float) + indiv=[np.zeros(n_target+1), np.zeros(n_target), np.zeros(n_target)] for chro in range(0,3): for i in range(len(individual[0][chro])): indiv[chro][i]=valueDictionary[chro+1][individual[0][chro][i]] return indiv + +#stores top n individuals of an epoch in a list +def custHallOfFame(population,maxaddsize): + for i in tools.selBest(population, k=maxaddsize): + HallOfFame.append(i) + -#%%"""==============import data from csv======================""" +#%%==============import data from csv======================%%# +raw_choice_dataset = pd.read_csv(csv_filepath, sep=',', header=None) +print raw_choice_dataset +raw_choice_dataset=raw_choice_dataset[raw_choice_dataset[0]==SID] + +valueDictionary={} +for x in range(1,4): + placeholderValueDictionary={} + for rows in raw_choice_dataset[raw_choice_dataset[3].astype(int)==x].iterrows(): + rows[1][6]=random.randint(100,140) # change this once modeling is done + placeholderValueDictionary[int(rows[1][1])] =float(rows[1][6]) + valueDictionary[x]=placeholderValueDictionary + +bundleLookup={} +for x in raw_choice_dataset[raw_choice_dataset[3].astype(int)==3].iterrows(): + bundleLookup[int(x[1][1])]=int(x[1][4]),int(x[1][5]) + + + +""" raw_choice_dataset = pd.read_csv(csv_filepath, sep=',', header=None) raw_choice_dataset.dropna() @@ -144,10 +174,11 @@ def dictionaryLookup(individual): for x in range(1,4): placeholderValueDictionary={} for i in raw_choice_dataset[(raw_choice_dataset[1]==x)].index: - placeholderValueDictionary[raw_choice_dataset[0][i]] = raw_choice_dataset[2][i] + placeholderValueDictionary[raw_choice_dataset[0][i]] = random.random() valueDictionary[x]=placeholderValueDictionary +""" +#%%===============initialize toolbox=======================%%# -#%%"""===============initialize toolbox=======================""" print 'initializing' creator.create("FitnessMax", base.Fitness, weights=(-1.0,)) creator.create("Individual", list, typecode="d", fitness=creator.FitnessMax) @@ -157,11 +188,10 @@ def dictionaryLookup(individual): stats.register("mean", np.mean) stats.register("min", np.min) -HOF = tools.HallOfFame(maxsize=10) - toolbox = base.Toolbox() -toolbox.register("create_individual", createIndividual, 1) +toolbox.register("HOF", custHallOfFame, maxaddsize=3) +toolbox.register("create_individual", createIndividual) toolbox.register("individuals", tools.initRepeat, creator.Individual, toolbox.create_individual, n=1) toolbox.register("population", tools.initRepeat, list, toolbox.individuals) @@ -173,55 +203,70 @@ def dictionaryLookup(individual): toolbox.register("select", tools.selTournament, tournsize=2) toolbox.register("map", futures.map) -#%%"""======================main==============================""" + +#%%======================main==============================%%# + print 'main' + if __name__ == "__main__": - pop = toolbox.population(n=200) - ngen, cxpb, mutpb = 80, 0.4, 0.2 - - fitnesses = toolbox.map(toolbox.evaluate, pop) - - for ind, fit in zip(pop, fitnesses): - ind.fitness.values = fit - print 'beginning' - - for g in range(ngen): - print g - offspring = toolbox.select(pop, len(pop)) - offspring = map(toolbox.clone, offspring) + for epoch in xrange(nepochs): + print 'beginning epoch %s of %s' %(epoch+1,nepochs) + pop = toolbox.population(n=npop) + - for child1, child2 in zip(offspring[::2], offspring[1::2]): - if random.random() < cxpb: - child1[0], child2[0] = toolbox.mate(child1[0], child2[0]) - del child1.fitness.values, child2.fitness.values - - for mutant in offspring: - if random.random() < mutpb: - mutant[0]=toolbox.mutate(mutant[0]) - del mutant.fitness.values + fitnesses = toolbox.map(toolbox.evaluate, pop) - invalids = [ind for ind in offspring if not ind.fitness.valid] - fitnesses = toolbox.map(toolbox.evaluate, invalids) - for ind, fit in zip(invalids, fitnesses): - ind.fitness.values = fit + for ind, fit in zip(pop, fitnesses): + ind.fitness.values = fit + + for g in range(ngen): + if g%5==0: + print g + offspring = toolbox.select(pop, len(pop)) + offspring = map(toolbox.clone, offspring) - pop[:] = offspring + for child1, child2 in zip(offspring[::2], offspring[1::2]): + if random.random() < cxpb: + child1[0], child2[0] = toolbox.mate(child1[0], child2[0]) + del child1.fitness.values, child2.fitness.values - a=tools.selBest(pop,k=1)[0][0] - print a - print np.sum(tools.selBest(pop,k=1)[0][0]) + for mutant in offspring: + if random.random() < mutpb: + mutant[0]=toolbox.mutate(mutant[0]) + del mutant.fitness.values + + invalids = [ind for ind in offspring if not ind.fitness.valid] + fitnesses = toolbox.map(toolbox.evaluate, invalids) + for ind, fit in zip(invalids, fitnesses): + ind.fitness.values = fit + + pop[:] = offspring + a=dictionaryLookup(tools.selBest(pop,k=1)[0]) - - for i in range(0,3): - ones=np.ones(1000) - kde=KernelDensity(kernel='gaussian', bandwidth=.05).fit(zip(a[i], ones)) - x_grid=np.linspace(0, 1, 1000) - pdf = np.exp(kde.score_samples(zip(x_grid, ones))) - plt.plot(x_grid, pdf, linewidth=3, alpha=.5) + toolbox.HOF(pop) + del pop + + +#%%===============reporting and graphing======================%%# +#Graph, color order - blue green red purple gold light_blue +f, axes=plt.subplots(len(HallOfFame),1, figsize=(7,2*len(HallOfFame))) +plt.tight_layout(pad=3) +num=0 + +with open('output.txt', 'w') as output_text: + output_text.write("Results for %s individuals, %s generations and %s epochs\n%s\n" %(npop,ngen,nepochs, SID)) + for x in HallOfFame: + for i in dictionaryLookup(x): + seaborn.kdeplot(i, shade=True, bw=10, ax=axes[num]) + + axes[num].set_title("{0:.3f}".format(evalMax(x)[0])) + if num==0: + axes[num].set_title("Graphs of Possible Solutions\n{0} individuals, {1} generations, {2} epoch \n\n\n\n{3:.3f}".format(npop,ngen, nepochs,evalMax(x)[0])) + output_text.write('%s. Similarity- %s, %s\n'%(num,np.sum(np.in1d(x[0][0],x[0][1])),np.sum(np.in1d(x[0][1],x[0][2])))) + output_text.write("%s\n\n" %x[0]) + num=num+1 -#%% TO-DO's +plt.savefig('/home/brain/Desktop/Custom MVPA Workflow/multipage.pdf', format='pdf', bbox_inches='tight', pad_inches=1) -# make the single chromisome 11 long -# add a term to the fitness function for the "buddy" items. -# Jensen-Shannon? \ No newline at end of file +print 'Program complete'