-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplotters.py
2642 lines (2242 loc) · 107 KB
/
plotters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
""" Tools for plotting, including for plotting several related plots as part
of larger subplots.
HISTORY
10.07.2024 Read through everything apart from plotLineWithError, findSigEnds,
plotHeatmapFromDf
"""
import os
from copy import deepcopy
import numpy as np
import pandas as pd
import scipy.stats as spStats
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.cm as mplCm
import matplotlib.colors as pltColours
from matplotlib.backends.backend_pdf import PdfPages
import mne
from . import helpers
def setMatplotlibDefaults():
""" Set some Matplotlib defaults to values that I commonly use.
"""
lines = ['lines.linewidth',
'lines.markeredgewidth',
'axes.linewidth',
'xtick.major.width',
'ytick.major.width',
'patch.linewidth']
for thisLine in lines:
matplotlib.rcParams[thisLine] = 0.5
matplotlib.rcParams['font.size'] = 10
# For keeping text as text when moving to vector graphics editing software:
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
matplotlib.rcParams['font.family'] = 'sans-serif'
matplotlib.rcParams['font.sans-serif'] = 'Arial'
def _makeCarefulProperty(name: str, keys: list[str] = None):
""" Make a property which can only be used in a specific way without
an error being raised. Specifically, the property will raise an
error if...
- Called before being set
- Set again after already being set
- Called but the keys of the returned dict don't match the
input 'keys' (if keys is provided as an input)
INPUT
name: str. Name to use for the underying attribute. A '_' will be
prepended.
keys: list[str]. Optional. The keys that expect in the dict returned
when calling this property.
"""
attrName = '_'+name
@property
def thisProp(self):
value = getattr(self, attrName)
if value is None:
raise ValueError(f'The {name} attribute has not yet been set.')
if (keys is not None) and (set(value.keys()) != set(keys)):
raise ValueError(f'The {name} attribute does not have the '+
'expected keys.')
return value
@thisProp.setter
def thisProp(self, value):
oldValue = getattr(self, attrName)
if oldValue is not None:
raise ValueError(f'The {name} attribute cannot be set twice.')
if (keys is not None) and (set(value.keys()) != set(keys)):
raise ValueError(f'The {name} attribute does not have the '+
'expected keys.')
setattr(self, attrName, value)
return thisProp
class Formatter():
""" Stores and manipulates a dataset. Helpful for getting a dataset into
the format required for making a specific plot.
ATTRIBUTES
data: pandas dataframe storing the current version of the dataset. Index
is arbitrary.
"""
def __init__(self, data: pd.DataFrame) -> None:
"""
INPUT
data: pandas dataframe. The data that we want to manipulate. The
index will be ignored. Only a deepcopy of the input dataframe
will be maniptulated.
"""
data = deepcopy(data)
self.data = data.reset_index(drop=True)
def average(self, within: str | list[str],
keep: list[str] = None,
drop: list[str] = None,
checkEqual: bool = True,
avType: str | float ='mean'):
""" Average the data within specified groups, and optionally dropping
some columns.
within: str | list[str]. The names of one or more columns.
Averaging is performed seperately for each unique combination of
these variables (i.e. we average within the groups defined by
these variables).
keep: None | list[str]. List of column names. If provided, checks that
all columns in the dataset match list(within) + keep + drop.
Provide (possibly empty) lists for both keep and drop or neither of
them.
drop: None | list[str]. List of column names. If provided, these
columns will be dropped. May not contain column names that were
also passed as 'within' or 'keep'. Provide (possibly empty) lists
for both keep and drop or neither of them.
checkEqual: bool. If true, check that there are the same number
of cases in each group.
avType: str. What type of averaging to perform. Options are...
'mean'
'median'
A number between 0 and 1: In this case does not find the mean, but
the value of the quantile corresponding to the passed number.
"""
if not isinstance(within, list):
assert isinstance(within, str)
within = [within]
if (keep is None) and (drop is None):
pass
elif (keep is not None) and (drop is not None):
expectCols = within + keep + drop
assert len(expectCols) == len(set(expectCols)), 'Duplicates'
expectCols = set(expectCols)
assert expectCols == set(self.data.columns), 'Unexpected columns'
self.data = self.data.drop(columns=drop)
else:
raise ValueError('Provide both \'keep\' and \'drop\' or neither.')
grouped = self.data.groupby(within)
if checkEqual:
checkGroupsEqual(grouped)
if avType == 'mean':
avData = grouped.mean()
elif avType == 'median':
avData = grouped.median()
elif isinstance(avType, float):
avData = grouped.quantile(q=avType)
else:
raise ValueError('Unrecognised option for type of average')
helpers.checkDfLevels(avData, indexLvs=within)
avData = avData.reset_index(allow_duplicates=False)
assert len(set(avData.columns)) == len(avData.columns)
if keep is not None:
assert set(avData.columns) == set(within + keep)
self.data = avData
def dBin(self, strategy: str,
opts: dict,
binName: str,
sepFor: None | str | list[str] = None,
binAvName: None | str = None):
""" Bin data and add a new column to the data, givng the bin to
which each case belongs (and optionally a column giving the average
value of the binning variable in each bin).
INPUT
strategy: str. Binning strategy. Options are...
'col': The values in a column directly as the bins.
'specifiedEdges': The x-values are binned based on specified bin
edges.
opts: dict. Depends on the value of strategy. The keys and
values for each strategy option are as follows.
'col': Keys and values...
'col': The name of the column to use directly as the bins
'specifiedEdges': Keys and values...
'col': The name of the column upon which the binning is
to be conducted.
'edges': Sequence of scalars giving the bin edges to use.
binName: str. The name of the column that will be added to the data
giving the bin to which each case belongs.
sepFor: None | str | list[str]. Name or names of columns in the
dataset. Bining is performed seperately for each unique
combination of these variables.
binAvName: None | str. If provided add a second new column to the
data with name specified by binAvName. This column gives the
average value of varaible used for binning (i.e. opts['col']) in
the bin to which the case belongs. Averages are computed seperately
for each seperation sepecified by sepFor. Hence this value will be
the same for all cases that fall in the same bin, and which have
the same unique combination for the variables specified in sepFor.
TODO Add option to bin by percentile See groupby(...)[var].rank(pct=true)
"""
startCases = len(self.data)
startCols = deepcopy(list(self.data.columns))
if sepFor is None:
self.data = self._binAll(self.data, strategy, opts, binName,
binAvName=binAvName)
else:
if isinstance(sepFor, str):
sepFor = [sepFor]
assert isinstance(sepFor, list)
grouped = self.data.groupby(sepFor)
processed = []
for _, group in grouped:
thisProcessed = self._binAll(group, strategy, opts, binName,
binAvName=binAvName)
processed.append(thisProcessed)
processed = pd.concat(processed, verify_integrity=True)
assert len(processed) == len(startCases)
expectCols = startCols + [binName]
if binAvName is not None:
expectCols.append(binAvName)
assert list(processed.columns) == expectCols
self.data = processed
def _binAll(self, data: pd.DataFrame,
strategy: str,
opts: dict,
binName: str,
binAvName: None | str = None):
""" Perform binning of all input data. Similar functionality to .dBin()
except operates on the input, not on self.data, and cannot perform
binning seperately for different subsets of the data.
INPUT
data: pandas dataframe to perform the binning on. Data is deepcopied
to ensure no mutation of the input data.
strategy, opts, binName, binAvName: Same as input to the method .dBin()
OUTPUT
data: pandas dataframe. Copy of the input data with the binning
perormed and the results of the binning stored in a new column
with name binName. A further new column with name binAvName is
added if this input is provided.
"""
data = deepcopy(data)
if binName in data.columns:
raise ValueError('Binning data cannot be assigned to requested '+
'columns because it already exists.')
if (binAvName is not None) and (binAvName in data.columns):
raise ValueError('Average value of binning variable cannot be '+
'assigned to the requested column because '+
'the column already exists.')
if strategy == 'col':
assert set(opts.keys()) == {'col'}
data[binName] = data[opts['col']]
elif strategy == 'specifiedEdges':
assert set(opts.keys()) == {'col', 'edges'}
data[binName] = pd.cut(data[opts['col']],
bins=opts['edges'],
labels=False)
assert not np.any(np.isnan(data[binName]))
else:
raise ValueError('Unrecognised binning option')
if binAvName is not None:
avFrameCols = [binName, opts['col']]
avDataFormatter = Formatter(data.loc[:, avFrameCols])
avDataFormatter.average(within=binName, checkEqual=False)
avData = avDataFormatter.data
assert list(avData.columns) == avFrameCols
avData = avData.rename(columns={opts['col']: binAvName},
errors='raise')
oldLen = len(data)
data = pd.merge(data, avData, how='left', on=binName,
suffixes=(False, False),
validate='many_to_one')
assert len(data) == oldLen
return data
def runSeriesStats(self, xVar: str, yVar: str, dBin: str,
obsID: None | str = None,
runPerm: bool = True,
checkEqual: bool = True,
checkSame: bool = True,
checkOrder: bool = True):
""" Run statistics on series data.
INPUT
xVar: str. Name of column giving the values of the independent
variable.
yVar: str. Name of column giving the values of the dependent variable.
dBin: str. Name of column giving the bin to which each datapoint
belongs. Averaging, and statistical tests are performed
for each bin. For example, this might give a binned version of the
xVar, or xVar itself, if xVar takes discrete values. Data will
be returned in assending order of this binning variable, therefore
it must be numeric. Bins which are adjacent acording to numeric
ordering of this binning variable are also treated as ajdacent
for cluster-based permutation tests.
obsID: None | str. Only needed if runPerm or checkSame are True.
In this case it should be the name of a column giving the
independent observation to which each datapoint belongs.
In these cases, dBin takes on the more specific meaning of
identifying the different repeated measurements that make up
each observeration. E.g. obsID could be a unique identifier for
each participant.
runPerm: bool. If true, run threshold-free cluster based permutation
tests.
checkEqual: bool. If true, check that there are the same number
of cases in each dBin. Must be true if runPerm is True.
checkSame: bool. Check that the same observations (identified through
obsID) are present in each comparison. Must be true if runPerm is
True.
checkOrder: bool. If true, check that when data is ordered following
the values of the binning variable ('dBin'), that the mean
x-value increases across bins.
OUTPUT
stats: dataframe. Gives the results of the statistical tests.
Index is arbitrary. Has the one row for each dBin, and the
following columns...
xVar+'_mean': Mean x-value of data in the dBin
yVar+'_mean': Mean y-value of data in the dBin
yVar+'_SEM': Standard error of the mean y-value
yVar+'_sig': (Only present if runPerm is True.) bool. True if
y-value for this dBin is significantly different from zero
in threhsold-free cluster-based permutation test.
"""
if runPerm:
assert checkEqual
assert checkSame
if runPerm or checkSame:
assert obsID
data = deepcopy(self.data)
grouped = data.groupby(dBin)
if checkEqual:
checkGroupsEqual(grouped)
if checkSame:
checkSameMeasures(data, obsID, dBin)
xMean, _, xBin = groupedToMeanSd(grouped[xVar])
yMean, ySem, xBinForY = groupedToMeanSd(grouped[yVar])
assert np.array_equal(xBin, xBinForY)
assert np.all(np.diff(xBin) > 0)
if checkOrder:
assert np.all(np.diff(xMean) > 0)
stats = {
(xVar+'_mean'): xMean,
(yVar+'_mean'): yMean,
(yVar+'_SEM'): ySem
}
if obsID is not None:
pVals, xBinForP = runSeriesClstTest(self.data, xBin=dBin,
yVar=yVar, obsID=obsID)
assert np.array_equal(xBin, xBinForP)
sig = pVals < 0.05
assert sig.shape == stats[yVar+'_mean'].shape
stats[yVar+'_sig'] = sig
print(f'{np.sum(sig)} of {len(sig)} bins significant.')
stats = pd.DataFrame(stats)
return stats
def runMultiStats(self,
depVar: str,
dBin: str | list[str],
avVars: None | str | list[str] = None,
checkEqual: bool = True,
checkSame: str = None,
fdr: bool = True):
""" Run multiple statistical tests comparing values to zero, before
performing FDR correction if requested.
INPUT
depVar: str. Name of the columns containing the dependent variable
that we want to perform statistics on.
dBin: str | list[str]. Name or names of columns in the
dataset. Statistics are performed seperately for each unique
combination of these variables. That is, each unique combination
of these variables defines a bin of data, within which averaging
and a statistical comparison is performed. Results of these
multiple individual statistical comparsions are corrected using
FDR correction if requested.
avVars: None | str | list[str]. Names of any columns containing
variables that we would like to average (within the bins defined
by dBin) but not perform statisitcs on. E.g. could be helpful for
determining average values of independent variables.
checkEqual: bool. If true, check there are an equal number of cases
in each comparison performed.
checkSame: None | str. If str, should be the name of a column in the
dataset giving the independent observation to which each
measurement belongs. In this case check that the same observations
are present in each comparison.
fdr: bool. If true, peform False Discovery Rate correction.
OUTPUT
stats: dataframe. Gives the results of the statistical tests.
Index is arbitrary. Has the one row for each unique combination of
the bin variables, and the following columns...
depVar+'_mean': Gives the mean of the dependent variable within
the dBin.
depVar+'_sig' or depVar+'_fdr_sig': Bool. True for for
significant points (FDR corrected if requested).
Each of the avVars with the suffex '_mean': For each of the
avVars gives the mean within the dBin.
Each of the dBin vars: The binning variables.
"""
assert isinstance(depVar, str)
if avVars is None:
allAvVars = [depVar]
elif isinstance(avVars, str):
allAvVars = [depVar, avVars]
elif isinstance(avVars, list):
allAvVars = [depVar] + avVars
else:
raise TypeError('Unsupported input')
if isinstance(dBin, str):
dBin = [dBin]
data = deepcopy(self.data)
grouped = data.groupby(dBin)
if checkEqual:
checkGroupsEqual(grouped)
if checkSame:
checkSameMeasures(data, obsID=checkSame, repMeas=dBin)
def tTestCol(df):
assert np.ndim(df) == 1
return spStats.ttest_1samp(df, popmean=0, axis=None).pvalue
stats = data.groupby(dBin).agg(pValue=pd.NamedAgg(
column=depVar, aggfunc=tTestCol))
helpers.checkDfLevels(stats, indexLvs=dBin)
assert list(stats.columns) == ['pValue']
assert stats.dtypes['pValue'] == 'float'
if fdr:
stats[depVar+'_fdr_sig'], _ = mne.stats.fdr_correction(
stats['pValue'])
else:
stats[depVar+'_sig'] = stats['pValue'] < 0.05
stats = stats.drop(columns='pValue')
mean = grouped[allAvVars].mean()
if isinstance(mean, pd.Series):
mean = mean.to_frame()
assert list(mean.columns) == allAvVars
mean = mean.add_suffix('_mean', axis='columns')
assert len(stats) == len(mean)
oldLen = len(stats)
stats = pd.merge(mean, stats,
how='inner',
on=dBin,
suffixes=(False, False),
validate='one_to_one')
assert len(stats) == len(mean) == oldLen
helpers.checkDfLevels(stats, indexLvs=dBin)
stats = stats.reset_index(allow_duplicates=False)
if fdr:
sigCol = depVar + '_fdr_sig'
else:
sigCol = depVar + '_sig'
expect = [thisCol+'_mean' for thisCol in allAvVars] + [sigCol] + dBin
assert set(stats.columns) == set(expect)
assert len(np.unique(stats.columns)) == len(stats.columns)
return stats
class Plotter():
""" Stores and plots the data for one subplot.
ATTRIBUTES
mayShareAx: bool. If true plotting using axes that are shared with other
subplots is ok.
axisLabels: None | dict. Stores the labels for the x- and y-axes.
title: None | dict. Stores a complete specification of the plot title.
ax: None | axis. Once plotting is performed, the axis used is stored here.
"""
mayShareAx = False
axisLabels = _makeCarefulProperty('axisLabels', ['xLabel', 'yLabel'])
title = _makeCarefulProperty('title', ['txt', 'rotation', 'weight'])
def __init__(self, xLabel=None, yLabel=None,
titleTxt=None, titleRot=0, titleWeight='normal') -> None:
"""
INPUT
xLabel: str | None. Axis label.
yLabel: str | None. Axis label.
titleTxt: str | None. Text for title.
titleRot: scalar. Rotation of title text.
titleWeight: str. Specification of font weight.
"""
self._axisLabels = None
self._title = None
self.axisLabels = {'xLabel': xLabel, 'yLabel': yLabel}
self.title = {'txt': titleTxt, 'rotation': titleRot,
'weight': titleWeight}
self.ax = None
def plot(self, ax):
""" Make the subplot.
INPUT
ax: Axis to plot onto
"""
raise NotImplementedError
def addTitle(self, ax):
""" Add a title to a specified axis.
INPUT
ax: The axis to add the title to
"""
ax.set_title(self.title['txt'],
rotation=self.title['rotation'],
fontweight=self.title['weight'],
fontsize=matplotlib.rcParams['font.size'],
multialignment='left')
class CustomPlotter(Plotter):
""" Stores and plots data for any kind of plot, in a way that can be used
with the core features of MultiPlotter.
ATTRIBUTES
mayShareAx: bool. If true plotting using axes that are shared with other
subplots is ok.
axisLabels: None | dict. Stores the labels for the x- and y-axes.
title: None | dict. Stores a complete specification of the plot title.
ax: None | axis. Once plotting is performed, the axis used is stored here.
data: Same as input to the constructor.
plotFun: Same as input to the constructor.
"""
def __init__(self, data, plotFun,
xLabel=None, yLabel=None,
titleTxt=None, titleRot=0, titleWeight='normal') -> None:
"""
INPUT
data: any format. The data from which the data for plotting will be
derived, or that data itself.
plotFun: function. It should accept two arguments passed by position,
data and ax. Data will be this class' self.data attribute, and ax
will be a matplotlib axis to plot on to. Should perform the
plotting (apart from adding a title and x and y axis labels).
Any output will be ignored.
xLabel: str | None. Axis label.
yLabel: str | None. Axis label.
titleTxt: str | None. Text for title.
titleRot: scalar. Rotation of title text.
titleWeight: str. Specification of font weight.
"""
super().__init__(xLabel, yLabel, titleTxt, titleRot, titleWeight)
self.data = data
self.plotFun = plotFun
def plot(self, ax):
""" Make the subplot.
INPUT
ax: Axis to plot onto
"""
self.plotFun(self.data, ax)
if self.axisLabels['xLabel'] is not None:
ax.set_xlabel(self.axisLabels['xLabel'])
if self.axisLabels['yLabel'] is not None:
ax.set_ylabel(self.axisLabels['yLabel'])
self.addTitle(ax)
self.ax = ax
class SeriesPlotter(Plotter):
""" Stores and plots the data for one or multiple series on a single
subplot.
ATTRIBUTES
mayShareAx: bool. If true plotting using axes that are shared with other
subplots is ok.
ax: None | axis. Once plotting is performed, the axis used is stored here.
seriesData: list of dataframe. Same as input to the
constructor but always a list.
sColours: list of str. Same as input to the constructor.
sLabels: list of str. Same as input to the constructor.
axisLabels: None | dict. Stores the labels for the x- and y-axes.
title: None | dict. Stores a complete specification of the plot title.
vLines: None | dict. Same as input to constructor.
hLine: scalar | False. Same as input to constructor.
legendSpec: dict. Stores a complete specification of the legend
associated with the plot. Keys are...
'label': list of str. Labels for the entries in the legend.
'colour': list of str. Colours for the entries in the legend.
"""
mayShareAx = True
# Make sure the colours, labels and legend can only be set once, so that
# they don't accidentaly come out of sync with each other (or with
# the legends of other plots, if plotting multiple subplots)
sColours = _makeCarefulProperty('sColours')
sLabels = _makeCarefulProperty('sLabels')
legendSpec = _makeCarefulProperty('legendSpec', ['label', 'colour'])
def __init__(self, seriesData, sColours, sLabels,
xLabel=None, yLabel=None,
vLines=None, hLine=False,
titleTxt=None, titleRot=0, titleWeight='normal'):
"""
INPUT
seriesData: dataframe | list of dataframe. Each element of the list is
a dataframe that specifies a series to plot. One dataframe to plot
only a single series. Index of the dataframe is ignored. Columns
should be...
X: str. The column giving the x-points to plot.
Y: str. The column giving the y-points to plot.
posErr: str. The column giving the length of the
error bar from the data point to the top of the error bar.
Hence the total length of the error bar will be twice this
value.
sig: str (optional). The column giving boolean values where
true indicates that a data point was significant.
sColours: list of str. List as long as seriesData, specifying the
colour to use for each series.
sLabels: list of str. List as long as seriesData, specifying the label
for each series in the legend
xLabel: str | None. Axis label.
yLabel: str | None. Axis label.
vLines: None | dict. Keys are...
vLines: dict. Keys are strings to be used as labels,
and values are scalars giving the x-location in
data coordinates for the vertical line
addVLabels: bool. Whether to add labels to the vertical
lines.
hLine: scalar or False. If not False, then adds a horizonal line
at the value given (in data coordinates)
titleTxt: str | None. Text for title.
titleRot: scalar. Rotation of title text.
titleWeight: str. Specification of font weight.
"""
self._sColours = None
self._sLabels = None
self._legendSpec = None
super().__init__(xLabel, yLabel, titleTxt, titleRot, titleWeight)
if isinstance(seriesData, pd.DataFrame):
seriesData = [seriesData]
assert isinstance(seriesData, list)
self.seriesData = seriesData
self.sColours = sColours
self.sLabels = sLabels
self.legendSpec = self.findLegend()
self.vLines = vLines
self.hLine = hLine
def findLegend(self):
""" Find the details of the legend to use.
OUTPUT
legendSpec: dict. Keys are...
'label': list of str. Labels for the entries in the legend.
'colour': list of str. Colours for the entries in the legend.
"""
# This function isn't doing much at the moment but will become
# helpful when want to impliment the option of supressing particular
# series from the legend.
legendSpec = dict()
legendSpec['label'] = self.sLabels
legendSpec['colour'] = self.sColours
return legendSpec
def plot(self, ax):
""" Make the subplot.
INPUT
ax: Axis to plot onto
"""
for thisSeries in self.seriesData:
assert np.all(np.isin(list(thisSeries.columns),
['X', 'Y', 'posErr', 'sig']))
plotLineWithError(self.seriesData, self.sColours,
hLine=self.hLine, ax=ax,
xLabel=self.axisLabels['xLabel'],
yLabel=self.axisLabels['yLabel'])
addVLines(ax, self.vLines['vLines'], self.vLines['addVLabels'])
self.addLegend(ax)
self.addTitle(ax)
self.ax = ax
def addLegend(self, ax):
""" Add a legend to a specified axis
INPUT
ax: The axis to add the legend to
"""
legSpec = self.legendSpec
assert set(legSpec.keys()) == set(['label', 'colour'])
allLines = []
allLineSpecs = zip(legSpec['label'], legSpec['colour'])
for thisLabel, thisColour in allLineSpecs:
allLines.append(ax.plot([], [], label=thisLabel, color=thisColour))
ax.legend(frameon=False, loc='upper left')
class ColourPlotter(Plotter):
""" Stores and plots the data for one colour-based plot (e.g. a heatmap)
on a single subplot.
ATTRIBUTES
mayShareAx: bool. If true plotting using axes that are shared with other
subplots is ok.
cBarSpec: None | dict. Stores a final and complete specification of all the
details required to produce the colour bar associated with the plot.
axisLabels: None | dict. Stores the labels for the x- and y-axes.
title: None | dict. Stores a complete specification of the plot title.
ax: None | axis. Once plotting is performed, the axis used is stored here.
colourData: dict. Same as input to the constructor.
draftCBarSpec: None | dict. Stores a draft of the cBarSpec based on user
requested settings. Should not be used for plotting, but rather a
finalised cBarSpec should be created using finaliseCBarSpec. Keys
are...
cLabel: str | None. Label for the colour bar.
cBarCenter: None | scalar. If not none, colour bar is to be
centred on this value.
cMap: str. The colour map to use.
cBar: None or handle of the colourbar associated with the plot.
"""
cBarSpec = _makeCarefulProperty('cBarSpec', ['cMap', 'cNorm', 'cMin',
'cMax', 'cLabel'])
def __init__(self, colourData, xLabel=None, yLabel=None,
cLabel=None, cBarCentre=None, cMap='RdBu_r',
titleTxt=None, titleRot=0, titleWeight='normal'):
"""
INPUT
colourData: dataframe. Contains the data to convert to colours and
plot on the heatmap. All provided data will be plotted. Index is
arbitrary. Has the following columns:
C: The values to convert to colours and plot. All provided data
will be used to set the range of the colourbar.
xLabel: str | None. Axis label.
yLabel: str | None. Axis label.
cLabel: str | None. Label for the colourbar.
cBarCentre: None | scalar. If not none, ensure the colour bar is
centred on this value.
cMap: str. The colour map to use.
titleTxt: str | None. Text for title.
titleRot: scalar. Rotation of title text.
titleWeight: str. Specification of font weight.
"""
self._cBarSpec = None
super().__init__(xLabel, yLabel, titleTxt, titleRot, titleWeight)
self.colourData = colourData
self.draftCBarSpec = {
'cLabel': cLabel,
'cBarCentre': cBarCentre,
'cMap': cMap
}
self.cBar = None
def plot(self, ax):
""" Make the subplot.
INPUT
ax: Axis to plot onto
"""
raise NotImplementedError
def findColourRange(self):
""" Find the smallest colour range that would include all colour
data and meet all requirements on the nature of the colour bar.
Defaults to -0.001 to 0.001 if there is no data to plot.
OUTPUT
cMin: scalar. Bottom of the smallest possible colour bar range.
cMax: scalar. Top of the smallest possible colour bar range.
cCenter: None | scalar. If not None, then gives the value that has
been requested to be at the centre of the colour scale.
"""
vals = self.colourData['C'].to_numpy(copy=True)
vals = np.unique(vals)
if len(vals) > 1:
cMin = np.min(vals)
cMax = np.max(vals)
elif len(vals) == 1:
vals = vals[0]
assert np.ndim(vals) == 0
cMax = np.abs(vals)
cMin = -cMax
else:
cMin = -0.001
cMax = 0.001
for lim in [cMin, cMax]:
assert not np.isnan(lim)
cBarCentre = self.draftCBarSpec['cBarCentre']
if cBarCentre is not None:
cMin, cMax = findCenteredScale(cMin, cMax, cBarCentre)
return cMin, cMax, cBarCentre
def finaliseCBarSpec(self, cMin, cMax):
""" Finalise the cBarSpec attribute, using the draft version and the
input arguments. Check that the specification is consistent with the
requested properties.
INPUT
cMin, cMax: scalar. The bottom and top of the finalised colour bar
scale.
"""
cBarSpec = dict()
cBarSpec['cMap'] = self.draftCBarSpec['cMap']
cBarSpec['cNorm'] = pltColours.Normalize
cBarSpec['cMin'] = cMin
cBarSpec['cMax'] = cMax
cBarSpec['cLabel'] = self.draftCBarSpec['cLabel']
self.checkCBarSpec(cBarSpec=cBarSpec, colourData=self.colourData['C'])
self.cBarSpec = cBarSpec
def checkCBarSpec(self, cBarSpec=None, colourData=None):
""" Run a number of checks on the cBarSpec.
INPUT
cBarSpec: None | dict. The cBarSpec to check. If None, checks
self.cBarSpec
colourData: None | dataframe | pandas series. If provided, it is
checked that all the values in the dataframe are within the
colorbar range.
"""
if cBarSpec is None:
cBarSpec = self.cBarSpec
assert set(cBarSpec.keys()) == set(['cMap', 'cNorm', 'cMin', 'cMax',
'cLabel'])
if self.draftCBarSpec['cBarCentre'] is not None:
assert self.draftCBarSpec['cBarCentre'] == ((cBarSpec['cMax'] +
cBarSpec['cMin']) /2)
if (colourData is not None) and (len(colourData) > 0):
assert np.min(colourData.to_numpy()) >= cBarSpec['cMin']
assert np.max(colourData.to_numpy()) <= cBarSpec['cMax']
def addColourBar(self, ax):
""" Plot a colourbar
INPUT
ax: The axis to use for the colourbar
"""
cBarSpec = self.cBarSpec
assert set(cBarSpec.keys()) == set(['cMap', 'cNorm', 'cMin', 'cMax',
'cLabel'])
scalarMappable, _ = self.makeColourMapper()
cbar = plt.colorbar(scalarMappable, cax=ax)
cbar.outline.set_visible(False)
cbar.set_label(cBarSpec['cLabel'])
assert self.cBar is None, 'About to overwrite existing colourbar'
self.cBar = cbar
def removeColourBar(self):
""" Remove the colourbar associated with the plot
"""
assert self.cBar is not None
self.cBar.remove()
self.cBar = None
def addColourBarOverPlot(self, ax):
""" Create a new axis directly on top of the passed axis, and create
a colourbar in this new axis.
INPUT
ax: The axis over which to create the colourbar
"""
fig = ax.get_figure()
cax = fig.add_axes(ax.get_position())
self.addColourBar(cax)
def makeColourMapper(self):
""" Returns the matplotlib ScalarMappable that is to be used for
mapping scalars to colours.
OUTPUT
scalarMappable: matplotlib ScalarMappable instance.
cBarNorm: The matplotlib normaliser instance used to create the
scalar mappable. E.g. an initalised instance of
pltColours.Normalize.
"""
cBarSpec = self.cBarSpec
assert set(cBarSpec.keys()) == set(['cMap', 'cNorm', 'cMin', 'cMax',
'cLabel'])
Normaliser = cBarSpec['cNorm']
cBarNorm = Normaliser(vmin=cBarSpec['cMin'], vmax=cBarSpec['cMax'])
scalarMappable = mplCm.ScalarMappable(norm=cBarNorm,
cmap=cBarSpec['cMap'])
return scalarMappable, cBarNorm
class HeatmapPlotter(ColourPlotter):
""" Stores and plots the data for one heatmap in a single subplot.
ATTRIBUTES
mayShareAx: bool. If true plotting using axes that are shared with other
subplots is ok.
cBarSpec: None | dict. Stores a final and complete specification of all the
details required to produce the colour bar associated with the plot.
axisLabels: None | dict. Stores the labels for the x- and y-axes.
title: None | dict. Stores a complete specification of the plot title.
ax: None | axis. Once plotting is performed, the axis used is stored here.
colourData: dict. Same as input to the constructor.
draftCBarSpec: None | dict. Stores a draft of the cBarSpec based on user
requested plotting. Should not be used for plotting, but rather a
finalised cBarSpec should be created using finaliseCBarSpec. Keys
are...
cLabel: str | None. Label for the colour bar.
cBarCenter: None | scalar. If not none, colour bar is to be
centred on this value.
cMap: str. The colour map to use.
cBar: None or handle of the colourbar associated with the plot, if a
colourbar has been plotted.
axisData: bool. Same as input to the constructor.
xOrder, yOrder: Same as input to the constructor.
"""
def __init__(self, colourData, xLabel=None, yLabel=None,
cLabel=None, cBarCentre=None, cMap='RdBu_r',
titleTxt=None, titleRot=0, titleWeight='normal',
axisData='interval', xOrder=None, yOrder=None):
"""
INPUT
colourData: dataframe. Contains the data to convert to colours and
plot on the heatmap. All provided data will be plotted. Index is
arbitrary. Has the following columns:
C: The values to convert to colours and plot. All provided data
will be used to set the range of the colourbar.
X: The x-position associated with each colour value if
axisData='interval', or a label associated with each
colour value if axisData='nominal'
Y: Same as 'X' but for the y-axis
sig: str (optional). The column giving boolean values where
true indicates that a data point was significant.
xLabel: str | None. Axis label.
yLabel: str | None. Axis label.
cLabel: str | None. Label for the colourbar.
cBarCentre: None | scalar. If not none, ensure the colour bar is
centred on this value.
cMap: str. The colour map to use.
titleTxt: str | None. Text for title.
titleRot: scalar. Rotation of title text.
titleWeight: str. Specification of font weight.
axisData: What type of data forms the 'X' and 'Y' values in
colourData? Options are...
'interval': 'X' and 'Y' values will be sorted prior to plotting and
not all tick labels will be displayed. Data must be numeric.
'nominal': All tick labels will be displayed. Using shared axes
with this kind of plot is not yet implemented.
xOrder, yOrder: None | list. Must be None unless axisData='nominal'
in which case these inputs can be used to specify the order with
which to plot items along the x- and y-axes.
"""
super().__init__(colourData, xLabel, yLabel,
cLabel, cBarCentre, cMap,
titleTxt, titleRot, titleWeight)